summaryrefslogtreecommitdiff
path: root/drivers/gpu/drm/amd/amdgpu/gfx_v8_0.c
diff options
context:
space:
mode:
Diffstat (limited to 'drivers/gpu/drm/amd/amdgpu/gfx_v8_0.c')
-rw-r--r--drivers/gpu/drm/amd/amdgpu/gfx_v8_0.c1691
1 files changed, 906 insertions, 785 deletions
diff --git a/drivers/gpu/drm/amd/amdgpu/gfx_v8_0.c b/drivers/gpu/drm/amd/amdgpu/gfx_v8_0.c
index b8184617ca25..373374164bd5 100644
--- a/drivers/gpu/drm/amd/amdgpu/gfx_v8_0.c
+++ b/drivers/gpu/drm/amd/amdgpu/gfx_v8_0.c
@@ -25,6 +25,7 @@
#include "amdgpu.h"
#include "amdgpu_gfx.h"
#include "vi.h"
+#include "vi_structs.h"
#include "vid.h"
#include "amdgpu_ucode.h"
#include "amdgpu_atombios.h"
@@ -138,6 +139,13 @@ MODULE_FIRMWARE("amdgpu/polaris10_mec.bin");
MODULE_FIRMWARE("amdgpu/polaris10_mec2.bin");
MODULE_FIRMWARE("amdgpu/polaris10_rlc.bin");
+MODULE_FIRMWARE("amdgpu/polaris12_ce.bin");
+MODULE_FIRMWARE("amdgpu/polaris12_pfp.bin");
+MODULE_FIRMWARE("amdgpu/polaris12_me.bin");
+MODULE_FIRMWARE("amdgpu/polaris12_mec.bin");
+MODULE_FIRMWARE("amdgpu/polaris12_mec2.bin");
+MODULE_FIRMWARE("amdgpu/polaris12_rlc.bin");
+
static const struct amdgpu_gds_reg_offset amdgpu_gds_reg_offset[] =
{
{mmGDS_VMID0_BASE, mmGDS_VMID0_SIZE, mmGDS_GWS_VMID0, mmGDS_OA_VMID0},
@@ -167,6 +175,7 @@ static const u32 golden_settings_tonga_a11[] =
mmPA_SC_ENHANCE, 0xffffffff, 0x20000001,
mmPA_SC_FIFO_DEPTH_CNTL, 0x000003ff, 0x000000fc,
mmPA_SC_LINE_STIPPLE_STATE, 0x0000ff0f, 0x00000000,
+ mmRLC_CGCG_CGLS_CTRL, 0x00000003, 0x0000003c,
mmSQ_RANDOM_WAVE_PRI, 0x001fffff, 0x000006fd,
mmTA_CNTL_AUX, 0x000f000f, 0x000b0000,
mmTCC_CTRL, 0x00100000, 0xf31fff7f,
@@ -640,7 +649,6 @@ static const u32 stoney_mgcg_cgcg_init[] =
mmCP_MEM_SLP_CNTL, 0xffffffff, 0x00020201,
mmRLC_MEM_SLP_CNTL, 0xffffffff, 0x00020201,
mmCGTS_SM_CTRL_REG, 0xffffffff, 0x96940200,
- mmATC_MISC_CG, 0xffffffff, 0x000c0200,
};
static void gfx_v8_0_set_ring_funcs(struct amdgpu_device *adev);
@@ -688,6 +696,7 @@ static void gfx_v8_0_init_golden_registers(struct amdgpu_device *adev)
(const u32)ARRAY_SIZE(tonga_golden_common_all));
break;
case CHIP_POLARIS11:
+ case CHIP_POLARIS12:
amdgpu_program_register_sequence(adev,
golden_settings_polaris11_a11,
(const u32)ARRAY_SIZE(golden_settings_polaris11_a11));
@@ -703,7 +712,10 @@ static void gfx_v8_0_init_golden_registers(struct amdgpu_device *adev)
polaris10_golden_common_all,
(const u32)ARRAY_SIZE(polaris10_golden_common_all));
WREG32_SMC(ixCG_ACLK_CNTL, 0x0000001C);
- if (adev->pdev->revision == 0xc7) {
+ if (adev->pdev->revision == 0xc7 &&
+ ((adev->pdev->subsystem_device == 0xb37 && adev->pdev->subsystem_vendor == 0x1002) ||
+ (adev->pdev->subsystem_device == 0x4a8 && adev->pdev->subsystem_vendor == 0x1043) ||
+ (adev->pdev->subsystem_device == 0x9480 && adev->pdev->subsystem_vendor == 0x1682))) {
amdgpu_atombios_i2c_channel_trans(adev, 0x10, 0x96, 0x1E, 0xDD);
amdgpu_atombios_i2c_channel_trans(adev, 0x10, 0x96, 0x1F, 0xD0);
}
@@ -795,7 +807,7 @@ static int gfx_v8_0_ring_test_ib(struct amdgpu_ring *ring, long timeout)
{
struct amdgpu_device *adev = ring->adev;
struct amdgpu_ib ib;
- struct fence *f = NULL;
+ struct dma_fence *f = NULL;
uint32_t scratch;
uint32_t tmp = 0;
long r;
@@ -821,7 +833,7 @@ static int gfx_v8_0_ring_test_ib(struct amdgpu_ring *ring, long timeout)
if (r)
goto err2;
- r = fence_wait_timeout(f, false, timeout);
+ r = dma_fence_wait_timeout(f, false, timeout);
if (r == 0) {
DRM_ERROR("amdgpu: IB test timed out.\n");
r = -ETIMEDOUT;
@@ -841,7 +853,7 @@ static int gfx_v8_0_ring_test_ib(struct amdgpu_ring *ring, long timeout)
}
err2:
amdgpu_ib_free(adev, &ib, NULL);
- fence_put(f);
+ dma_fence_put(f);
err1:
amdgpu_gfx_scratch_free(adev, scratch);
return r;
@@ -899,6 +911,9 @@ static int gfx_v8_0_init_microcode(struct amdgpu_device *adev)
case CHIP_POLARIS10:
chip_name = "polaris10";
break;
+ case CHIP_POLARIS12:
+ chip_name = "polaris12";
+ break;
case CHIP_STONEY:
chip_name = "stoney";
break;
@@ -1055,6 +1070,19 @@ static int gfx_v8_0_init_microcode(struct amdgpu_device *adev)
adev->firmware.fw_size +=
ALIGN(le32_to_cpu(header->ucode_size_bytes), PAGE_SIZE);
+ /* we need account JT in */
+ cp_hdr = (const struct gfx_firmware_header_v1_0 *)adev->gfx.mec_fw->data;
+ adev->firmware.fw_size +=
+ ALIGN(le32_to_cpu(cp_hdr->jt_size) << 2, PAGE_SIZE);
+
+ if (amdgpu_sriov_vf(adev)) {
+ info = &adev->firmware.ucode[AMDGPU_UCODE_ID_STORAGE];
+ info->ucode_id = AMDGPU_UCODE_ID_STORAGE;
+ info->fw = adev->gfx.mec_fw;
+ adev->firmware.fw_size +=
+ ALIGN(le32_to_cpu(64 * PAGE_SIZE), PAGE_SIZE);
+ }
+
if (adev->gfx.mec2_fw) {
info = &adev->firmware.ucode[AMDGPU_UCODE_ID_CP_MEC2];
info->ucode_id = AMDGPU_UCODE_ID_CP_MEC2;
@@ -1124,34 +1152,8 @@ static void gfx_v8_0_get_csb_buffer(struct amdgpu_device *adev,
buffer[count++] = cpu_to_le32(PACKET3(PACKET3_SET_CONTEXT_REG, 2));
buffer[count++] = cpu_to_le32(mmPA_SC_RASTER_CONFIG -
PACKET3_SET_CONTEXT_REG_START);
- switch (adev->asic_type) {
- case CHIP_TONGA:
- case CHIP_POLARIS10:
- buffer[count++] = cpu_to_le32(0x16000012);
- buffer[count++] = cpu_to_le32(0x0000002A);
- break;
- case CHIP_POLARIS11:
- buffer[count++] = cpu_to_le32(0x16000012);
- buffer[count++] = cpu_to_le32(0x00000000);
- break;
- case CHIP_FIJI:
- buffer[count++] = cpu_to_le32(0x3a00161a);
- buffer[count++] = cpu_to_le32(0x0000002e);
- break;
- case CHIP_TOPAZ:
- case CHIP_CARRIZO:
- buffer[count++] = cpu_to_le32(0x00000002);
- buffer[count++] = cpu_to_le32(0x00000000);
- break;
- case CHIP_STONEY:
- buffer[count++] = cpu_to_le32(0x00000000);
- buffer[count++] = cpu_to_le32(0x00000000);
- break;
- default:
- buffer[count++] = cpu_to_le32(0x00000000);
- buffer[count++] = cpu_to_le32(0x00000000);
- break;
- }
+ buffer[count++] = cpu_to_le32(adev->gfx.config.rb_config[0][0].raster_config);
+ buffer[count++] = cpu_to_le32(adev->gfx.config.rb_config[0][0].raster_config_1);
buffer[count++] = cpu_to_le32(PACKET3(PACKET3_PREAMBLE_CNTL, 0));
buffer[count++] = cpu_to_le32(PACKET3_PREAMBLE_END_CLEAR_STATE);
@@ -1233,10 +1235,9 @@ static void gfx_v8_0_rlc_fini(struct amdgpu_device *adev)
if (adev->gfx.rlc.clear_state_obj) {
r = amdgpu_bo_reserve(adev->gfx.rlc.clear_state_obj, false);
if (unlikely(r != 0))
- dev_warn(adev->dev, "(%d) reserve RLC c bo failed\n", r);
+ dev_warn(adev->dev, "(%d) reserve RLC cbs bo failed\n", r);
amdgpu_bo_unpin(adev->gfx.rlc.clear_state_obj);
amdgpu_bo_unreserve(adev->gfx.rlc.clear_state_obj);
-
amdgpu_bo_unref(&adev->gfx.rlc.clear_state_obj);
adev->gfx.rlc.clear_state_obj = NULL;
}
@@ -1248,7 +1249,6 @@ static void gfx_v8_0_rlc_fini(struct amdgpu_device *adev)
dev_warn(adev->dev, "(%d) reserve RLC cp table bo failed\n", r);
amdgpu_bo_unpin(adev->gfx.rlc.cp_table_obj);
amdgpu_bo_unreserve(adev->gfx.rlc.cp_table_obj);
-
amdgpu_bo_unref(&adev->gfx.rlc.cp_table_obj);
adev->gfx.rlc.cp_table_obj = NULL;
}
@@ -1272,7 +1272,8 @@ static int gfx_v8_0_rlc_init(struct amdgpu_device *adev)
if (adev->gfx.rlc.clear_state_obj == NULL) {
r = amdgpu_bo_create(adev, dws * 4, PAGE_SIZE, true,
AMDGPU_GEM_DOMAIN_VRAM,
- AMDGPU_GEM_CREATE_CPU_ACCESS_REQUIRED,
+ AMDGPU_GEM_CREATE_CPU_ACCESS_REQUIRED |
+ AMDGPU_GEM_CREATE_VRAM_CONTIGUOUS,
NULL, NULL,
&adev->gfx.rlc.clear_state_obj);
if (r) {
@@ -1290,14 +1291,14 @@ static int gfx_v8_0_rlc_init(struct amdgpu_device *adev)
&adev->gfx.rlc.clear_state_gpu_addr);
if (r) {
amdgpu_bo_unreserve(adev->gfx.rlc.clear_state_obj);
- dev_warn(adev->dev, "(%d) pin RLC c bo failed\n", r);
+ dev_warn(adev->dev, "(%d) pin RLC cbs bo failed\n", r);
gfx_v8_0_rlc_fini(adev);
return r;
}
r = amdgpu_bo_kmap(adev->gfx.rlc.clear_state_obj, (void **)&adev->gfx.rlc.cs_ptr);
if (r) {
- dev_warn(adev->dev, "(%d) map RLC c bo failed\n", r);
+ dev_warn(adev->dev, "(%d) map RLC cbs bo failed\n", r);
gfx_v8_0_rlc_fini(adev);
return r;
}
@@ -1314,7 +1315,8 @@ static int gfx_v8_0_rlc_init(struct amdgpu_device *adev)
if (adev->gfx.rlc.cp_table_obj == NULL) {
r = amdgpu_bo_create(adev, adev->gfx.rlc.cp_table_size, PAGE_SIZE, true,
AMDGPU_GEM_DOMAIN_VRAM,
- AMDGPU_GEM_CREATE_CPU_ACCESS_REQUIRED,
+ AMDGPU_GEM_CREATE_CPU_ACCESS_REQUIRED |
+ AMDGPU_GEM_CREATE_VRAM_CONTIGUOUS,
NULL, NULL,
&adev->gfx.rlc.cp_table_obj);
if (r) {
@@ -1332,7 +1334,7 @@ static int gfx_v8_0_rlc_init(struct amdgpu_device *adev)
&adev->gfx.rlc.cp_table_gpu_addr);
if (r) {
amdgpu_bo_unreserve(adev->gfx.rlc.cp_table_obj);
- dev_warn(adev->dev, "(%d) pin RLC cp_table bo failed\n", r);
+ dev_warn(adev->dev, "(%d) pin RLC cp table bo failed\n", r);
return r;
}
r = amdgpu_bo_kmap(adev->gfx.rlc.cp_table_obj, (void **)&adev->gfx.rlc.cp_table_ptr);
@@ -1345,7 +1347,6 @@ static int gfx_v8_0_rlc_init(struct amdgpu_device *adev)
amdgpu_bo_kunmap(adev->gfx.rlc.cp_table_obj);
amdgpu_bo_unreserve(adev->gfx.rlc.cp_table_obj);
-
}
return 0;
@@ -1361,7 +1362,6 @@ static void gfx_v8_0_mec_fini(struct amdgpu_device *adev)
dev_warn(adev->dev, "(%d) reserve HPD EOP bo failed\n", r);
amdgpu_bo_unpin(adev->gfx.mec.hpd_eop_obj);
amdgpu_bo_unreserve(adev->gfx.mec.hpd_eop_obj);
-
amdgpu_bo_unref(&adev->gfx.mec.hpd_eop_obj);
adev->gfx.mec.hpd_eop_obj = NULL;
}
@@ -1384,7 +1384,7 @@ static int gfx_v8_0_mec_init(struct amdgpu_device *adev)
if (adev->gfx.mec.hpd_eop_obj == NULL) {
r = amdgpu_bo_create(adev,
- adev->gfx.mec.num_mec *adev->gfx.mec.num_pipe * MEC_HPD_SIZE * 2,
+ adev->gfx.mec.num_queue * MEC_HPD_SIZE,
PAGE_SIZE, true,
AMDGPU_GEM_DOMAIN_GTT, 0, NULL, NULL,
&adev->gfx.mec.hpd_eop_obj);
@@ -1413,7 +1413,7 @@ static int gfx_v8_0_mec_init(struct amdgpu_device *adev)
return r;
}
- memset(hpd, 0, adev->gfx.mec.num_mec *adev->gfx.mec.num_pipe * MEC_HPD_SIZE * 2);
+ memset(hpd, 0, adev->gfx.mec.num_queue * MEC_HPD_SIZE);
amdgpu_bo_kunmap(adev->gfx.mec.hpd_eop_obj);
amdgpu_bo_unreserve(adev->gfx.mec.hpd_eop_obj);
@@ -1576,7 +1576,7 @@ static int gfx_v8_0_do_edc_gpr_workarounds(struct amdgpu_device *adev)
{
struct amdgpu_ring *ring = &adev->gfx.compute_ring[0];
struct amdgpu_ib ib;
- struct fence *f = NULL;
+ struct dma_fence *f = NULL;
int r, i;
u32 tmp;
unsigned total_size, vgpr_offset, sgpr_offset;
@@ -1709,7 +1709,7 @@ static int gfx_v8_0_do_edc_gpr_workarounds(struct amdgpu_device *adev)
}
/* wait for the GPU to finish processing the IB */
- r = fence_wait(f, false);
+ r = dma_fence_wait(f, false);
if (r) {
DRM_ERROR("amdgpu: fence wait failed (%d).\n", r);
goto fail;
@@ -1730,7 +1730,7 @@ static int gfx_v8_0_do_edc_gpr_workarounds(struct amdgpu_device *adev)
fail:
amdgpu_ib_free(adev, &ib, NULL);
- fence_put(f);
+ dma_fence_put(f);
return r;
}
@@ -1779,6 +1779,7 @@ static int gfx_v8_0_gpu_early_init(struct amdgpu_device *adev)
gb_addr_config = TONGA_GB_ADDR_CONFIG_GOLDEN;
break;
case CHIP_POLARIS11:
+ case CHIP_POLARIS12:
ret = amdgpu_atombios_get_gfx_info(adev);
if (ret)
return ret;
@@ -2046,10 +2047,8 @@ static int gfx_v8_0_sw_init(void *handle)
ring->doorbell_index = AMDGPU_DOORBELL_GFX_RING0;
}
- r = amdgpu_ring_init(adev, ring, 1024,
- PACKET3(PACKET3_NOP, 0x3FFF), 0xf,
- &adev->gfx.eop_irq, AMDGPU_CP_IRQ_GFX_EOP,
- AMDGPU_RING_TYPE_GFX);
+ r = amdgpu_ring_init(adev, ring, 1024, &adev->gfx.eop_irq,
+ AMDGPU_CP_IRQ_GFX_EOP);
if (r)
return r;
}
@@ -2073,33 +2072,28 @@ static int gfx_v8_0_sw_init(void *handle)
sprintf(ring->name, "comp_%d.%d.%d", ring->me, ring->pipe, ring->queue);
irq_type = AMDGPU_CP_IRQ_COMPUTE_MEC1_PIPE0_EOP + ring->pipe;
/* type-2 packets are deprecated on MEC, use type-3 instead */
- r = amdgpu_ring_init(adev, ring, 1024,
- PACKET3(PACKET3_NOP, 0x3FFF), 0xf,
- &adev->gfx.eop_irq, irq_type,
- AMDGPU_RING_TYPE_COMPUTE);
+ r = amdgpu_ring_init(adev, ring, 1024, &adev->gfx.eop_irq,
+ irq_type);
if (r)
return r;
}
/* reserve GDS, GWS and OA resource for gfx */
- r = amdgpu_bo_create(adev, adev->gds.mem.gfx_partition_size,
- PAGE_SIZE, true,
- AMDGPU_GEM_DOMAIN_GDS, 0, NULL,
- NULL, &adev->gds.gds_gfx_bo);
+ r = amdgpu_bo_create_kernel(adev, adev->gds.mem.gfx_partition_size,
+ PAGE_SIZE, AMDGPU_GEM_DOMAIN_GDS,
+ &adev->gds.gds_gfx_bo, NULL, NULL);
if (r)
return r;
- r = amdgpu_bo_create(adev, adev->gds.gws.gfx_partition_size,
- PAGE_SIZE, true,
- AMDGPU_GEM_DOMAIN_GWS, 0, NULL,
- NULL, &adev->gds.gws_gfx_bo);
+ r = amdgpu_bo_create_kernel(adev, adev->gds.gws.gfx_partition_size,
+ PAGE_SIZE, AMDGPU_GEM_DOMAIN_GWS,
+ &adev->gds.gws_gfx_bo, NULL, NULL);
if (r)
return r;
- r = amdgpu_bo_create(adev, adev->gds.oa.gfx_partition_size,
- PAGE_SIZE, true,
- AMDGPU_GEM_DOMAIN_OA, 0, NULL,
- NULL, &adev->gds.oa_gfx_bo);
+ r = amdgpu_bo_create_kernel(adev, adev->gds.oa.gfx_partition_size,
+ PAGE_SIZE, AMDGPU_GEM_DOMAIN_OA,
+ &adev->gds.oa_gfx_bo, NULL, NULL);
if (r)
return r;
@@ -2117,9 +2111,9 @@ static int gfx_v8_0_sw_fini(void *handle)
int i;
struct amdgpu_device *adev = (struct amdgpu_device *)handle;
- amdgpu_bo_unref(&adev->gds.oa_gfx_bo);
- amdgpu_bo_unref(&adev->gds.gws_gfx_bo);
- amdgpu_bo_unref(&adev->gds.gds_gfx_bo);
+ amdgpu_bo_free_kernel(&adev->gds.oa_gfx_bo, NULL, NULL);
+ amdgpu_bo_free_kernel(&adev->gds.gws_gfx_bo, NULL, NULL);
+ amdgpu_bo_free_kernel(&adev->gds.gds_gfx_bo, NULL, NULL);
for (i = 0; i < adev->gfx.num_gfx_rings; i++)
amdgpu_ring_fini(&adev->gfx.gfx_ring[i]);
@@ -2127,9 +2121,7 @@ static int gfx_v8_0_sw_fini(void *handle)
amdgpu_ring_fini(&adev->gfx.compute_ring[i]);
gfx_v8_0_mec_fini(adev);
-
gfx_v8_0_rlc_fini(adev);
-
gfx_v8_0_free_microcode(adev);
return 0;
@@ -2702,6 +2694,7 @@ static void gfx_v8_0_tiling_mode_table_init(struct amdgpu_device *adev)
break;
case CHIP_POLARIS11:
+ case CHIP_POLARIS12:
modearray[0] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
PIPE_CONFIG(ADDR_SURF_P4_16x16) |
TILE_SPLIT(ADDR_SURF_TILE_SPLIT_64B) |
@@ -3465,19 +3458,16 @@ static void gfx_v8_0_select_se_sh(struct amdgpu_device *adev,
else
data = REG_SET_FIELD(0, GRBM_GFX_INDEX, INSTANCE_INDEX, instance);
- if ((se_num == 0xffffffff) && (sh_num == 0xffffffff)) {
- data = REG_SET_FIELD(data, GRBM_GFX_INDEX, SH_BROADCAST_WRITES, 1);
- data = REG_SET_FIELD(data, GRBM_GFX_INDEX, SE_BROADCAST_WRITES, 1);
- } else if (se_num == 0xffffffff) {
- data = REG_SET_FIELD(data, GRBM_GFX_INDEX, SH_INDEX, sh_num);
+ if (se_num == 0xffffffff)
data = REG_SET_FIELD(data, GRBM_GFX_INDEX, SE_BROADCAST_WRITES, 1);
- } else if (sh_num == 0xffffffff) {
- data = REG_SET_FIELD(data, GRBM_GFX_INDEX, SH_BROADCAST_WRITES, 1);
+ else
data = REG_SET_FIELD(data, GRBM_GFX_INDEX, SE_INDEX, se_num);
- } else {
+
+ if (sh_num == 0xffffffff)
+ data = REG_SET_FIELD(data, GRBM_GFX_INDEX, SH_BROADCAST_WRITES, 1);
+ else
data = REG_SET_FIELD(data, GRBM_GFX_INDEX, SH_INDEX, sh_num);
- data = REG_SET_FIELD(data, GRBM_GFX_INDEX, SE_INDEX, se_num);
- }
+
WREG32(mmGRBM_GFX_INDEX, data);
}
@@ -3490,11 +3480,10 @@ static u32 gfx_v8_0_get_rb_active_bitmap(struct amdgpu_device *adev)
{
u32 data, mask;
- data = RREG32(mmCC_RB_BACKEND_DISABLE);
- data |= RREG32(mmGC_USER_RB_BACKEND_DISABLE);
+ data = RREG32(mmCC_RB_BACKEND_DISABLE) |
+ RREG32(mmGC_USER_RB_BACKEND_DISABLE);
- data &= CC_RB_BACKEND_DISABLE__BACKEND_DISABLE_MASK;
- data >>= GC_USER_RB_BACKEND_DISABLE__BACKEND_DISABLE__SHIFT;
+ data = REG_GET_FIELD(data, GC_USER_RB_BACKEND_DISABLE, BACKEND_DISABLE);
mask = gfx_v8_0_create_bitmask(adev->gfx.config.max_backends_per_se /
adev->gfx.config.max_sh_per_se);
@@ -3502,13 +3491,164 @@ static u32 gfx_v8_0_get_rb_active_bitmap(struct amdgpu_device *adev)
return (~data) & mask;
}
+static void
+gfx_v8_0_raster_config(struct amdgpu_device *adev, u32 *rconf, u32 *rconf1)
+{
+ switch (adev->asic_type) {
+ case CHIP_FIJI:
+ *rconf |= RB_MAP_PKR0(2) | RB_MAP_PKR1(2) |
+ RB_XSEL2(1) | PKR_MAP(2) |
+ PKR_XSEL(1) | PKR_YSEL(1) |
+ SE_MAP(2) | SE_XSEL(2) | SE_YSEL(3);
+ *rconf1 |= SE_PAIR_MAP(2) | SE_PAIR_XSEL(3) |
+ SE_PAIR_YSEL(2);
+ break;
+ case CHIP_TONGA:
+ case CHIP_POLARIS10:
+ *rconf |= RB_MAP_PKR0(2) | RB_XSEL2(1) | SE_MAP(2) |
+ SE_XSEL(1) | SE_YSEL(1);
+ *rconf1 |= SE_PAIR_MAP(2) | SE_PAIR_XSEL(2) |
+ SE_PAIR_YSEL(2);
+ break;
+ case CHIP_TOPAZ:
+ case CHIP_CARRIZO:
+ *rconf |= RB_MAP_PKR0(2);
+ *rconf1 |= 0x0;
+ break;
+ case CHIP_POLARIS11:
+ case CHIP_POLARIS12:
+ *rconf |= RB_MAP_PKR0(2) | RB_XSEL2(1) | SE_MAP(2) |
+ SE_XSEL(1) | SE_YSEL(1);
+ *rconf1 |= 0x0;
+ break;
+ case CHIP_STONEY:
+ *rconf |= 0x0;
+ *rconf1 |= 0x0;
+ break;
+ default:
+ DRM_ERROR("unknown asic: 0x%x\n", adev->asic_type);
+ break;
+ }
+}
+
+static void
+gfx_v8_0_write_harvested_raster_configs(struct amdgpu_device *adev,
+ u32 raster_config, u32 raster_config_1,
+ unsigned rb_mask, unsigned num_rb)
+{
+ unsigned sh_per_se = max_t(unsigned, adev->gfx.config.max_sh_per_se, 1);
+ unsigned num_se = max_t(unsigned, adev->gfx.config.max_shader_engines, 1);
+ unsigned rb_per_pkr = min_t(unsigned, num_rb / num_se / sh_per_se, 2);
+ unsigned rb_per_se = num_rb / num_se;
+ unsigned se_mask[4];
+ unsigned se;
+
+ se_mask[0] = ((1 << rb_per_se) - 1) & rb_mask;
+ se_mask[1] = (se_mask[0] << rb_per_se) & rb_mask;
+ se_mask[2] = (se_mask[1] << rb_per_se) & rb_mask;
+ se_mask[3] = (se_mask[2] << rb_per_se) & rb_mask;
+
+ WARN_ON(!(num_se == 1 || num_se == 2 || num_se == 4));
+ WARN_ON(!(sh_per_se == 1 || sh_per_se == 2));
+ WARN_ON(!(rb_per_pkr == 1 || rb_per_pkr == 2));
+
+ if ((num_se > 2) && ((!se_mask[0] && !se_mask[1]) ||
+ (!se_mask[2] && !se_mask[3]))) {
+ raster_config_1 &= ~SE_PAIR_MAP_MASK;
+
+ if (!se_mask[0] && !se_mask[1]) {
+ raster_config_1 |=
+ SE_PAIR_MAP(RASTER_CONFIG_SE_PAIR_MAP_3);
+ } else {
+ raster_config_1 |=
+ SE_PAIR_MAP(RASTER_CONFIG_SE_PAIR_MAP_0);
+ }
+ }
+
+ for (se = 0; se < num_se; se++) {
+ unsigned raster_config_se = raster_config;
+ unsigned pkr0_mask = ((1 << rb_per_pkr) - 1) << (se * rb_per_se);
+ unsigned pkr1_mask = pkr0_mask << rb_per_pkr;
+ int idx = (se / 2) * 2;
+
+ if ((num_se > 1) && (!se_mask[idx] || !se_mask[idx + 1])) {
+ raster_config_se &= ~SE_MAP_MASK;
+
+ if (!se_mask[idx]) {
+ raster_config_se |= SE_MAP(RASTER_CONFIG_SE_MAP_3);
+ } else {
+ raster_config_se |= SE_MAP(RASTER_CONFIG_SE_MAP_0);
+ }
+ }
+
+ pkr0_mask &= rb_mask;
+ pkr1_mask &= rb_mask;
+ if (rb_per_se > 2 && (!pkr0_mask || !pkr1_mask)) {
+ raster_config_se &= ~PKR_MAP_MASK;
+
+ if (!pkr0_mask) {
+ raster_config_se |= PKR_MAP(RASTER_CONFIG_PKR_MAP_3);
+ } else {
+ raster_config_se |= PKR_MAP(RASTER_CONFIG_PKR_MAP_0);
+ }
+ }
+
+ if (rb_per_se >= 2) {
+ unsigned rb0_mask = 1 << (se * rb_per_se);
+ unsigned rb1_mask = rb0_mask << 1;
+
+ rb0_mask &= rb_mask;
+ rb1_mask &= rb_mask;
+ if (!rb0_mask || !rb1_mask) {
+ raster_config_se &= ~RB_MAP_PKR0_MASK;
+
+ if (!rb0_mask) {
+ raster_config_se |=
+ RB_MAP_PKR0(RASTER_CONFIG_RB_MAP_3);
+ } else {
+ raster_config_se |=
+ RB_MAP_PKR0(RASTER_CONFIG_RB_MAP_0);
+ }
+ }
+
+ if (rb_per_se > 2) {
+ rb0_mask = 1 << (se * rb_per_se + rb_per_pkr);
+ rb1_mask = rb0_mask << 1;
+ rb0_mask &= rb_mask;
+ rb1_mask &= rb_mask;
+ if (!rb0_mask || !rb1_mask) {
+ raster_config_se &= ~RB_MAP_PKR1_MASK;
+
+ if (!rb0_mask) {
+ raster_config_se |=
+ RB_MAP_PKR1(RASTER_CONFIG_RB_MAP_3);
+ } else {
+ raster_config_se |=
+ RB_MAP_PKR1(RASTER_CONFIG_RB_MAP_0);
+ }
+ }
+ }
+ }
+
+ /* GRBM_GFX_INDEX has a different offset on VI */
+ gfx_v8_0_select_se_sh(adev, se, 0xffffffff, 0xffffffff);
+ WREG32(mmPA_SC_RASTER_CONFIG, raster_config_se);
+ WREG32(mmPA_SC_RASTER_CONFIG_1, raster_config_1);
+ }
+
+ /* GRBM_GFX_INDEX has a different offset on VI */
+ gfx_v8_0_select_se_sh(adev, 0xffffffff, 0xffffffff, 0xffffffff);
+}
+
static void gfx_v8_0_setup_rb(struct amdgpu_device *adev)
{
int i, j;
u32 data;
+ u32 raster_config = 0, raster_config_1 = 0;
u32 active_rbs = 0;
u32 rb_bitmap_width_per_sh = adev->gfx.config.max_backends_per_se /
adev->gfx.config.max_sh_per_se;
+ unsigned num_rb_pipes;
mutex_lock(&adev->grbm_idx_mutex);
for (i = 0; i < adev->gfx.config.max_shader_engines; i++) {
@@ -3520,10 +3660,41 @@ static void gfx_v8_0_setup_rb(struct amdgpu_device *adev)
}
}
gfx_v8_0_select_se_sh(adev, 0xffffffff, 0xffffffff, 0xffffffff);
- mutex_unlock(&adev->grbm_idx_mutex);
adev->gfx.config.backend_enable_mask = active_rbs;
adev->gfx.config.num_rbs = hweight32(active_rbs);
+
+ num_rb_pipes = min_t(unsigned, adev->gfx.config.max_backends_per_se *
+ adev->gfx.config.max_shader_engines, 16);
+
+ gfx_v8_0_raster_config(adev, &raster_config, &raster_config_1);
+
+ if (!adev->gfx.config.backend_enable_mask ||
+ adev->gfx.config.num_rbs >= num_rb_pipes) {
+ WREG32(mmPA_SC_RASTER_CONFIG, raster_config);
+ WREG32(mmPA_SC_RASTER_CONFIG_1, raster_config_1);
+ } else {
+ gfx_v8_0_write_harvested_raster_configs(adev, raster_config, raster_config_1,
+ adev->gfx.config.backend_enable_mask,
+ num_rb_pipes);
+ }
+
+ /* cache the values for userspace */
+ for (i = 0; i < adev->gfx.config.max_shader_engines; i++) {
+ for (j = 0; j < adev->gfx.config.max_sh_per_se; j++) {
+ gfx_v8_0_select_se_sh(adev, i, j, 0xffffffff);
+ adev->gfx.config.rb_config[i][j].rb_backend_disable =
+ RREG32(mmCC_RB_BACKEND_DISABLE);
+ adev->gfx.config.rb_config[i][j].user_rb_backend_disable =
+ RREG32(mmGC_USER_RB_BACKEND_DISABLE);
+ adev->gfx.config.rb_config[i][j].raster_config =
+ RREG32(mmPA_SC_RASTER_CONFIG);
+ adev->gfx.config.rb_config[i][j].raster_config_1 =
+ RREG32(mmPA_SC_RASTER_CONFIG_1);
+ }
+ }
+ gfx_v8_0_select_se_sh(adev, 0xffffffff, 0xffffffff, 0xffffffff);
+ mutex_unlock(&adev->grbm_idx_mutex);
}
/**
@@ -3576,16 +3747,12 @@ static void gfx_v8_0_gpu_init(struct amdgpu_device *adev)
u32 tmp;
int i;
- tmp = RREG32(mmGRBM_CNTL);
- tmp = REG_SET_FIELD(tmp, GRBM_CNTL, READ_TIMEOUT, 0xff);
- WREG32(mmGRBM_CNTL, tmp);
-
+ WREG32_FIELD(GRBM_CNTL, READ_TIMEOUT, 0xFF);
WREG32(mmGB_ADDR_CONFIG, adev->gfx.config.gb_addr_config);
WREG32(mmHDP_ADDR_CONFIG, adev->gfx.config.gb_addr_config);
WREG32(mmDMIF_ADDR_CALC, adev->gfx.config.gb_addr_config);
gfx_v8_0_tiling_mode_table_init(adev);
-
gfx_v8_0_setup_rb(adev);
gfx_v8_0_get_cu_info(adev);
@@ -3753,7 +3920,7 @@ static int gfx_v8_0_init_save_restore_list(struct amdgpu_device *adev)
int list_size;
unsigned int *register_list_format =
kmalloc(adev->gfx.rlc.reg_list_format_size_bytes, GFP_KERNEL);
- if (register_list_format == NULL)
+ if (!register_list_format)
return -ENOMEM;
memcpy(register_list_format, adev->gfx.rlc.register_list_format,
adev->gfx.rlc.reg_list_format_size_bytes);
@@ -3769,9 +3936,7 @@ static int gfx_v8_0_init_save_restore_list(struct amdgpu_device *adev)
sizeof(indirect_start_offsets)/sizeof(int));
/* save and restore list */
- temp = RREG32(mmRLC_SRM_CNTL);
- temp |= RLC_SRM_CNTL__AUTO_INCR_ADDR_MASK;
- WREG32(mmRLC_SRM_CNTL, temp);
+ WREG32_FIELD(RLC_SRM_CNTL, AUTO_INCR_ADDR, 1);
WREG32(mmRLC_SRM_ARAM_ADDR, 0);
for (i = 0; i < adev->gfx.rlc.reg_list_size_bytes >> 2; i++)
@@ -3798,8 +3963,12 @@ static int gfx_v8_0_init_save_restore_list(struct amdgpu_device *adev)
temp = mmRLC_SRM_INDEX_CNTL_ADDR_0;
data = mmRLC_SRM_INDEX_CNTL_DATA_0;
for (i = 0; i < sizeof(unique_indices) / sizeof(int); i++) {
- amdgpu_mm_wreg(adev, temp + i, unique_indices[i] & 0x3FFFF, false);
- amdgpu_mm_wreg(adev, data + i, unique_indices[i] >> 20, false);
+ if (unique_indices[i] != 0) {
+ amdgpu_mm_wreg(adev, temp + i,
+ unique_indices[i] & 0x3FFFF, false);
+ amdgpu_mm_wreg(adev, data + i,
+ unique_indices[i] >> 20, false);
+ }
}
kfree(register_list_format);
@@ -3808,155 +3977,94 @@ static int gfx_v8_0_init_save_restore_list(struct amdgpu_device *adev)
static void gfx_v8_0_enable_save_restore_machine(struct amdgpu_device *adev)
{
- uint32_t data;
-
- data = RREG32(mmRLC_SRM_CNTL);
- data |= RLC_SRM_CNTL__SRM_ENABLE_MASK;
- WREG32(mmRLC_SRM_CNTL, data);
+ WREG32_FIELD(RLC_SRM_CNTL, SRM_ENABLE, 1);
}
static void gfx_v8_0_init_power_gating(struct amdgpu_device *adev)
{
uint32_t data;
- if (adev->pg_flags & (AMD_PG_SUPPORT_GFX_PG |
- AMD_PG_SUPPORT_GFX_SMG |
- AMD_PG_SUPPORT_GFX_DMG)) {
- data = RREG32(mmCP_RB_WPTR_POLL_CNTL);
- data &= ~CP_RB_WPTR_POLL_CNTL__IDLE_POLL_COUNT_MASK;
- data |= (0x60 << CP_RB_WPTR_POLL_CNTL__IDLE_POLL_COUNT__SHIFT);
- WREG32(mmCP_RB_WPTR_POLL_CNTL, data);
+ WREG32_FIELD(CP_RB_WPTR_POLL_CNTL, IDLE_POLL_COUNT, 0x60);
- data = 0;
- data |= (0x10 << RLC_PG_DELAY__POWER_UP_DELAY__SHIFT);
- data |= (0x10 << RLC_PG_DELAY__POWER_DOWN_DELAY__SHIFT);
- data |= (0x10 << RLC_PG_DELAY__CMD_PROPAGATE_DELAY__SHIFT);
- data |= (0x10 << RLC_PG_DELAY__MEM_SLEEP_DELAY__SHIFT);
- WREG32(mmRLC_PG_DELAY, data);
+ data = REG_SET_FIELD(0, RLC_PG_DELAY, POWER_UP_DELAY, 0x10);
+ data = REG_SET_FIELD(data, RLC_PG_DELAY, POWER_DOWN_DELAY, 0x10);
+ data = REG_SET_FIELD(data, RLC_PG_DELAY, CMD_PROPAGATE_DELAY, 0x10);
+ data = REG_SET_FIELD(data, RLC_PG_DELAY, MEM_SLEEP_DELAY, 0x10);
+ WREG32(mmRLC_PG_DELAY, data);
- data = RREG32(mmRLC_PG_DELAY_2);
- data &= ~RLC_PG_DELAY_2__SERDES_CMD_DELAY_MASK;
- data |= (0x3 << RLC_PG_DELAY_2__SERDES_CMD_DELAY__SHIFT);
- WREG32(mmRLC_PG_DELAY_2, data);
+ WREG32_FIELD(RLC_PG_DELAY_2, SERDES_CMD_DELAY, 0x3);
+ WREG32_FIELD(RLC_AUTO_PG_CTRL, GRBM_REG_SAVE_GFX_IDLE_THRESHOLD, 0x55f0);
- data = RREG32(mmRLC_AUTO_PG_CTRL);
- data &= ~RLC_AUTO_PG_CTRL__GRBM_REG_SAVE_GFX_IDLE_THRESHOLD_MASK;
- data |= (0x55f0 << RLC_AUTO_PG_CTRL__GRBM_REG_SAVE_GFX_IDLE_THRESHOLD__SHIFT);
- WREG32(mmRLC_AUTO_PG_CTRL, data);
- }
}
static void cz_enable_sck_slow_down_on_power_up(struct amdgpu_device *adev,
bool enable)
{
- u32 data, orig;
-
- orig = data = RREG32(mmRLC_PG_CNTL);
-
- if (enable)
- data |= RLC_PG_CNTL__SMU_CLK_SLOWDOWN_ON_PU_ENABLE_MASK;
- else
- data &= ~RLC_PG_CNTL__SMU_CLK_SLOWDOWN_ON_PU_ENABLE_MASK;
-
- if (orig != data)
- WREG32(mmRLC_PG_CNTL, data);
+ WREG32_FIELD(RLC_PG_CNTL, SMU_CLK_SLOWDOWN_ON_PU_ENABLE, enable ? 1 : 0);
}
static void cz_enable_sck_slow_down_on_power_down(struct amdgpu_device *adev,
bool enable)
{
- u32 data, orig;
-
- orig = data = RREG32(mmRLC_PG_CNTL);
-
- if (enable)
- data |= RLC_PG_CNTL__SMU_CLK_SLOWDOWN_ON_PD_ENABLE_MASK;
- else
- data &= ~RLC_PG_CNTL__SMU_CLK_SLOWDOWN_ON_PD_ENABLE_MASK;
-
- if (orig != data)
- WREG32(mmRLC_PG_CNTL, data);
+ WREG32_FIELD(RLC_PG_CNTL, SMU_CLK_SLOWDOWN_ON_PD_ENABLE, enable ? 1 : 0);
}
static void cz_enable_cp_power_gating(struct amdgpu_device *adev, bool enable)
{
- u32 data, orig;
-
- orig = data = RREG32(mmRLC_PG_CNTL);
-
- if (enable)
- data &= ~RLC_PG_CNTL__CP_PG_DISABLE_MASK;
- else
- data |= RLC_PG_CNTL__CP_PG_DISABLE_MASK;
-
- if (orig != data)
- WREG32(mmRLC_PG_CNTL, data);
+ WREG32_FIELD(RLC_PG_CNTL, CP_PG_DISABLE, enable ? 0 : 1);
}
static void gfx_v8_0_init_pg(struct amdgpu_device *adev)
{
- if (adev->pg_flags & (AMD_PG_SUPPORT_GFX_PG |
- AMD_PG_SUPPORT_GFX_SMG |
- AMD_PG_SUPPORT_GFX_DMG |
- AMD_PG_SUPPORT_CP |
- AMD_PG_SUPPORT_GDS |
- AMD_PG_SUPPORT_RLC_SMU_HS)) {
+ if ((adev->asic_type == CHIP_CARRIZO) ||
+ (adev->asic_type == CHIP_STONEY)) {
gfx_v8_0_init_csb(adev);
gfx_v8_0_init_save_restore_list(adev);
gfx_v8_0_enable_save_restore_machine(adev);
-
- if ((adev->asic_type == CHIP_CARRIZO) ||
- (adev->asic_type == CHIP_STONEY)) {
- WREG32(mmRLC_JUMP_TABLE_RESTORE, adev->gfx.rlc.cp_table_gpu_addr >> 8);
- gfx_v8_0_init_power_gating(adev);
- WREG32(mmRLC_PG_ALWAYS_ON_CU_MASK, adev->gfx.cu_info.ao_cu_mask);
- if (adev->pg_flags & AMD_PG_SUPPORT_RLC_SMU_HS) {
- cz_enable_sck_slow_down_on_power_up(adev, true);
- cz_enable_sck_slow_down_on_power_down(adev, true);
- } else {
- cz_enable_sck_slow_down_on_power_up(adev, false);
- cz_enable_sck_slow_down_on_power_down(adev, false);
- }
- if (adev->pg_flags & AMD_PG_SUPPORT_CP)
- cz_enable_cp_power_gating(adev, true);
- else
- cz_enable_cp_power_gating(adev, false);
- } else if (adev->asic_type == CHIP_POLARIS11) {
- gfx_v8_0_init_power_gating(adev);
+ WREG32(mmRLC_JUMP_TABLE_RESTORE, adev->gfx.rlc.cp_table_gpu_addr >> 8);
+ gfx_v8_0_init_power_gating(adev);
+ WREG32(mmRLC_PG_ALWAYS_ON_CU_MASK, adev->gfx.cu_info.ao_cu_mask);
+ if (adev->pg_flags & AMD_PG_SUPPORT_RLC_SMU_HS) {
+ cz_enable_sck_slow_down_on_power_up(adev, true);
+ cz_enable_sck_slow_down_on_power_down(adev, true);
+ } else {
+ cz_enable_sck_slow_down_on_power_up(adev, false);
+ cz_enable_sck_slow_down_on_power_down(adev, false);
}
+ if (adev->pg_flags & AMD_PG_SUPPORT_CP)
+ cz_enable_cp_power_gating(adev, true);
+ else
+ cz_enable_cp_power_gating(adev, false);
+ } else if ((adev->asic_type == CHIP_POLARIS11) ||
+ (adev->asic_type == CHIP_POLARIS12)) {
+ gfx_v8_0_init_csb(adev);
+ gfx_v8_0_init_save_restore_list(adev);
+ gfx_v8_0_enable_save_restore_machine(adev);
+ gfx_v8_0_init_power_gating(adev);
}
+
}
-void gfx_v8_0_rlc_stop(struct amdgpu_device *adev)
+static void gfx_v8_0_rlc_stop(struct amdgpu_device *adev)
{
- u32 tmp = RREG32(mmRLC_CNTL);
-
- tmp = REG_SET_FIELD(tmp, RLC_CNTL, RLC_ENABLE_F32, 0);
- WREG32(mmRLC_CNTL, tmp);
+ WREG32_FIELD(RLC_CNTL, RLC_ENABLE_F32, 0);
gfx_v8_0_enable_gui_idle_interrupt(adev, false);
-
gfx_v8_0_wait_for_rlc_serdes(adev);
}
static void gfx_v8_0_rlc_reset(struct amdgpu_device *adev)
{
- u32 tmp = RREG32(mmGRBM_SOFT_RESET);
-
- tmp = REG_SET_FIELD(tmp, GRBM_SOFT_RESET, SOFT_RESET_RLC, 1);
- WREG32(mmGRBM_SOFT_RESET, tmp);
+ WREG32_FIELD(GRBM_SOFT_RESET, SOFT_RESET_RLC, 1);
udelay(50);
- tmp = REG_SET_FIELD(tmp, GRBM_SOFT_RESET, SOFT_RESET_RLC, 0);
- WREG32(mmGRBM_SOFT_RESET, tmp);
+
+ WREG32_FIELD(GRBM_SOFT_RESET, SOFT_RESET_RLC, 0);
udelay(50);
}
static void gfx_v8_0_rlc_start(struct amdgpu_device *adev)
{
- u32 tmp = RREG32(mmRLC_CNTL);
-
- tmp = REG_SET_FIELD(tmp, RLC_CNTL, RLC_ENABLE_F32, 1);
- WREG32(mmRLC_CNTL, tmp);
+ WREG32_FIELD(RLC_CNTL, RLC_ENABLE_F32, 1);
/* carrizo do enable cp interrupt after cp inited */
if (!(adev->flags & AMD_IS_APU))
@@ -3992,20 +4100,27 @@ static int gfx_v8_0_rlc_load_microcode(struct amdgpu_device *adev)
static int gfx_v8_0_rlc_resume(struct amdgpu_device *adev)
{
int r;
+ u32 tmp;
gfx_v8_0_rlc_stop(adev);
/* disable CG */
- WREG32(mmRLC_CGCG_CGLS_CTRL, 0);
+ tmp = RREG32(mmRLC_CGCG_CGLS_CTRL);
+ tmp &= ~(RLC_CGCG_CGLS_CTRL__CGCG_EN_MASK |
+ RLC_CGCG_CGLS_CTRL__CGLS_EN_MASK);
+ WREG32(mmRLC_CGCG_CGLS_CTRL, tmp);
if (adev->asic_type == CHIP_POLARIS11 ||
- adev->asic_type == CHIP_POLARIS10)
- WREG32(mmRLC_CGCG_CGLS_CTRL_3D, 0);
+ adev->asic_type == CHIP_POLARIS10 ||
+ adev->asic_type == CHIP_POLARIS12) {
+ tmp = RREG32(mmRLC_CGCG_CGLS_CTRL_3D);
+ tmp &= ~0x3;
+ WREG32(mmRLC_CGCG_CGLS_CTRL_3D, tmp);
+ }
/* disable PG */
WREG32(mmRLC_PG_CNTL, 0);
gfx_v8_0_rlc_reset(adev);
-
gfx_v8_0_init_pg(adev);
if (!adev->pp_enabled) {
@@ -4184,6 +4299,7 @@ static int gfx_v8_0_cp_gfx_start(struct amdgpu_device *adev)
amdgpu_ring_write(ring, 0x0000002A);
break;
case CHIP_POLARIS11:
+ case CHIP_POLARIS12:
amdgpu_ring_write(ring, 0x16000012);
amdgpu_ring_write(ring, 0x00000000);
break;
@@ -4230,7 +4346,7 @@ static int gfx_v8_0_cp_gfx_resume(struct amdgpu_device *adev)
struct amdgpu_ring *ring;
u32 tmp;
u32 rb_bufsz;
- u64 rb_addr, rptr_addr;
+ u64 rb_addr, rptr_addr, wptr_gpu_addr;
int r;
/* Set the write pointer delay */
@@ -4261,6 +4377,9 @@ static int gfx_v8_0_cp_gfx_resume(struct amdgpu_device *adev)
WREG32(mmCP_RB0_RPTR_ADDR, lower_32_bits(rptr_addr));
WREG32(mmCP_RB0_RPTR_ADDR_HI, upper_32_bits(rptr_addr) & 0xFF);
+ wptr_gpu_addr = adev->wb.gpu_addr + (ring->wptr_offs * 4);
+ WREG32(mmCP_RB_WPTR_POLL_ADDR_LO, lower_32_bits(wptr_gpu_addr));
+ WREG32(mmCP_RB_WPTR_POLL_ADDR_HI, upper_32_bits(wptr_gpu_addr));
mdelay(1);
WREG32(mmCP_RB0_CNTL, tmp);
@@ -4300,12 +4419,10 @@ static int gfx_v8_0_cp_gfx_resume(struct amdgpu_device *adev)
gfx_v8_0_cp_gfx_start(adev);
ring->ready = true;
r = amdgpu_ring_test_ring(ring);
- if (r) {
+ if (r)
ring->ready = false;
- return r;
- }
- return 0;
+ return r;
}
static void gfx_v8_0_cp_compute_enable(struct amdgpu_device *adev, bool enable)
@@ -4368,267 +4485,6 @@ static int gfx_v8_0_cp_compute_load_microcode(struct amdgpu_device *adev)
return 0;
}
-struct vi_mqd {
- uint32_t header; /* ordinal0 */
- uint32_t compute_dispatch_initiator; /* ordinal1 */
- uint32_t compute_dim_x; /* ordinal2 */
- uint32_t compute_dim_y; /* ordinal3 */
- uint32_t compute_dim_z; /* ordinal4 */
- uint32_t compute_start_x; /* ordinal5 */
- uint32_t compute_start_y; /* ordinal6 */
- uint32_t compute_start_z; /* ordinal7 */
- uint32_t compute_num_thread_x; /* ordinal8 */
- uint32_t compute_num_thread_y; /* ordinal9 */
- uint32_t compute_num_thread_z; /* ordinal10 */
- uint32_t compute_pipelinestat_enable; /* ordinal11 */
- uint32_t compute_perfcount_enable; /* ordinal12 */
- uint32_t compute_pgm_lo; /* ordinal13 */
- uint32_t compute_pgm_hi; /* ordinal14 */
- uint32_t compute_tba_lo; /* ordinal15 */
- uint32_t compute_tba_hi; /* ordinal16 */
- uint32_t compute_tma_lo; /* ordinal17 */
- uint32_t compute_tma_hi; /* ordinal18 */
- uint32_t compute_pgm_rsrc1; /* ordinal19 */
- uint32_t compute_pgm_rsrc2; /* ordinal20 */
- uint32_t compute_vmid; /* ordinal21 */
- uint32_t compute_resource_limits; /* ordinal22 */
- uint32_t compute_static_thread_mgmt_se0; /* ordinal23 */
- uint32_t compute_static_thread_mgmt_se1; /* ordinal24 */
- uint32_t compute_tmpring_size; /* ordinal25 */
- uint32_t compute_static_thread_mgmt_se2; /* ordinal26 */
- uint32_t compute_static_thread_mgmt_se3; /* ordinal27 */
- uint32_t compute_restart_x; /* ordinal28 */
- uint32_t compute_restart_y; /* ordinal29 */
- uint32_t compute_restart_z; /* ordinal30 */
- uint32_t compute_thread_trace_enable; /* ordinal31 */
- uint32_t compute_misc_reserved; /* ordinal32 */
- uint32_t compute_dispatch_id; /* ordinal33 */
- uint32_t compute_threadgroup_id; /* ordinal34 */
- uint32_t compute_relaunch; /* ordinal35 */
- uint32_t compute_wave_restore_addr_lo; /* ordinal36 */
- uint32_t compute_wave_restore_addr_hi; /* ordinal37 */
- uint32_t compute_wave_restore_control; /* ordinal38 */
- uint32_t reserved9; /* ordinal39 */
- uint32_t reserved10; /* ordinal40 */
- uint32_t reserved11; /* ordinal41 */
- uint32_t reserved12; /* ordinal42 */
- uint32_t reserved13; /* ordinal43 */
- uint32_t reserved14; /* ordinal44 */
- uint32_t reserved15; /* ordinal45 */
- uint32_t reserved16; /* ordinal46 */
- uint32_t reserved17; /* ordinal47 */
- uint32_t reserved18; /* ordinal48 */
- uint32_t reserved19; /* ordinal49 */
- uint32_t reserved20; /* ordinal50 */
- uint32_t reserved21; /* ordinal51 */
- uint32_t reserved22; /* ordinal52 */
- uint32_t reserved23; /* ordinal53 */
- uint32_t reserved24; /* ordinal54 */
- uint32_t reserved25; /* ordinal55 */
- uint32_t reserved26; /* ordinal56 */
- uint32_t reserved27; /* ordinal57 */
- uint32_t reserved28; /* ordinal58 */
- uint32_t reserved29; /* ordinal59 */
- uint32_t reserved30; /* ordinal60 */
- uint32_t reserved31; /* ordinal61 */
- uint32_t reserved32; /* ordinal62 */
- uint32_t reserved33; /* ordinal63 */
- uint32_t reserved34; /* ordinal64 */
- uint32_t compute_user_data_0; /* ordinal65 */
- uint32_t compute_user_data_1; /* ordinal66 */
- uint32_t compute_user_data_2; /* ordinal67 */
- uint32_t compute_user_data_3; /* ordinal68 */
- uint32_t compute_user_data_4; /* ordinal69 */
- uint32_t compute_user_data_5; /* ordinal70 */
- uint32_t compute_user_data_6; /* ordinal71 */
- uint32_t compute_user_data_7; /* ordinal72 */
- uint32_t compute_user_data_8; /* ordinal73 */
- uint32_t compute_user_data_9; /* ordinal74 */
- uint32_t compute_user_data_10; /* ordinal75 */
- uint32_t compute_user_data_11; /* ordinal76 */
- uint32_t compute_user_data_12; /* ordinal77 */
- uint32_t compute_user_data_13; /* ordinal78 */
- uint32_t compute_user_data_14; /* ordinal79 */
- uint32_t compute_user_data_15; /* ordinal80 */
- uint32_t cp_compute_csinvoc_count_lo; /* ordinal81 */
- uint32_t cp_compute_csinvoc_count_hi; /* ordinal82 */
- uint32_t reserved35; /* ordinal83 */
- uint32_t reserved36; /* ordinal84 */
- uint32_t reserved37; /* ordinal85 */
- uint32_t cp_mqd_query_time_lo; /* ordinal86 */
- uint32_t cp_mqd_query_time_hi; /* ordinal87 */
- uint32_t cp_mqd_connect_start_time_lo; /* ordinal88 */
- uint32_t cp_mqd_connect_start_time_hi; /* ordinal89 */
- uint32_t cp_mqd_connect_end_time_lo; /* ordinal90 */
- uint32_t cp_mqd_connect_end_time_hi; /* ordinal91 */
- uint32_t cp_mqd_connect_end_wf_count; /* ordinal92 */
- uint32_t cp_mqd_connect_end_pq_rptr; /* ordinal93 */
- uint32_t cp_mqd_connect_end_pq_wptr; /* ordinal94 */
- uint32_t cp_mqd_connect_end_ib_rptr; /* ordinal95 */
- uint32_t reserved38; /* ordinal96 */
- uint32_t reserved39; /* ordinal97 */
- uint32_t cp_mqd_save_start_time_lo; /* ordinal98 */
- uint32_t cp_mqd_save_start_time_hi; /* ordinal99 */
- uint32_t cp_mqd_save_end_time_lo; /* ordinal100 */
- uint32_t cp_mqd_save_end_time_hi; /* ordinal101 */
- uint32_t cp_mqd_restore_start_time_lo; /* ordinal102 */
- uint32_t cp_mqd_restore_start_time_hi; /* ordinal103 */
- uint32_t cp_mqd_restore_end_time_lo; /* ordinal104 */
- uint32_t cp_mqd_restore_end_time_hi; /* ordinal105 */
- uint32_t reserved40; /* ordinal106 */
- uint32_t reserved41; /* ordinal107 */
- uint32_t gds_cs_ctxsw_cnt0; /* ordinal108 */
- uint32_t gds_cs_ctxsw_cnt1; /* ordinal109 */
- uint32_t gds_cs_ctxsw_cnt2; /* ordinal110 */
- uint32_t gds_cs_ctxsw_cnt3; /* ordinal111 */
- uint32_t reserved42; /* ordinal112 */
- uint32_t reserved43; /* ordinal113 */
- uint32_t cp_pq_exe_status_lo; /* ordinal114 */
- uint32_t cp_pq_exe_status_hi; /* ordinal115 */
- uint32_t cp_packet_id_lo; /* ordinal116 */
- uint32_t cp_packet_id_hi; /* ordinal117 */
- uint32_t cp_packet_exe_status_lo; /* ordinal118 */
- uint32_t cp_packet_exe_status_hi; /* ordinal119 */
- uint32_t gds_save_base_addr_lo; /* ordinal120 */
- uint32_t gds_save_base_addr_hi; /* ordinal121 */
- uint32_t gds_save_mask_lo; /* ordinal122 */
- uint32_t gds_save_mask_hi; /* ordinal123 */
- uint32_t ctx_save_base_addr_lo; /* ordinal124 */
- uint32_t ctx_save_base_addr_hi; /* ordinal125 */
- uint32_t reserved44; /* ordinal126 */
- uint32_t reserved45; /* ordinal127 */
- uint32_t cp_mqd_base_addr_lo; /* ordinal128 */
- uint32_t cp_mqd_base_addr_hi; /* ordinal129 */
- uint32_t cp_hqd_active; /* ordinal130 */
- uint32_t cp_hqd_vmid; /* ordinal131 */
- uint32_t cp_hqd_persistent_state; /* ordinal132 */
- uint32_t cp_hqd_pipe_priority; /* ordinal133 */
- uint32_t cp_hqd_queue_priority; /* ordinal134 */
- uint32_t cp_hqd_quantum; /* ordinal135 */
- uint32_t cp_hqd_pq_base_lo; /* ordinal136 */
- uint32_t cp_hqd_pq_base_hi; /* ordinal137 */
- uint32_t cp_hqd_pq_rptr; /* ordinal138 */
- uint32_t cp_hqd_pq_rptr_report_addr_lo; /* ordinal139 */
- uint32_t cp_hqd_pq_rptr_report_addr_hi; /* ordinal140 */
- uint32_t cp_hqd_pq_wptr_poll_addr; /* ordinal141 */
- uint32_t cp_hqd_pq_wptr_poll_addr_hi; /* ordinal142 */
- uint32_t cp_hqd_pq_doorbell_control; /* ordinal143 */
- uint32_t cp_hqd_pq_wptr; /* ordinal144 */
- uint32_t cp_hqd_pq_control; /* ordinal145 */
- uint32_t cp_hqd_ib_base_addr_lo; /* ordinal146 */
- uint32_t cp_hqd_ib_base_addr_hi; /* ordinal147 */
- uint32_t cp_hqd_ib_rptr; /* ordinal148 */
- uint32_t cp_hqd_ib_control; /* ordinal149 */
- uint32_t cp_hqd_iq_timer; /* ordinal150 */
- uint32_t cp_hqd_iq_rptr; /* ordinal151 */
- uint32_t cp_hqd_dequeue_request; /* ordinal152 */
- uint32_t cp_hqd_dma_offload; /* ordinal153 */
- uint32_t cp_hqd_sema_cmd; /* ordinal154 */
- uint32_t cp_hqd_msg_type; /* ordinal155 */
- uint32_t cp_hqd_atomic0_preop_lo; /* ordinal156 */
- uint32_t cp_hqd_atomic0_preop_hi; /* ordinal157 */
- uint32_t cp_hqd_atomic1_preop_lo; /* ordinal158 */
- uint32_t cp_hqd_atomic1_preop_hi; /* ordinal159 */
- uint32_t cp_hqd_hq_status0; /* ordinal160 */
- uint32_t cp_hqd_hq_control0; /* ordinal161 */
- uint32_t cp_mqd_control; /* ordinal162 */
- uint32_t cp_hqd_hq_status1; /* ordinal163 */
- uint32_t cp_hqd_hq_control1; /* ordinal164 */
- uint32_t cp_hqd_eop_base_addr_lo; /* ordinal165 */
- uint32_t cp_hqd_eop_base_addr_hi; /* ordinal166 */
- uint32_t cp_hqd_eop_control; /* ordinal167 */
- uint32_t cp_hqd_eop_rptr; /* ordinal168 */
- uint32_t cp_hqd_eop_wptr; /* ordinal169 */
- uint32_t cp_hqd_eop_done_events; /* ordinal170 */
- uint32_t cp_hqd_ctx_save_base_addr_lo; /* ordinal171 */
- uint32_t cp_hqd_ctx_save_base_addr_hi; /* ordinal172 */
- uint32_t cp_hqd_ctx_save_control; /* ordinal173 */
- uint32_t cp_hqd_cntl_stack_offset; /* ordinal174 */
- uint32_t cp_hqd_cntl_stack_size; /* ordinal175 */
- uint32_t cp_hqd_wg_state_offset; /* ordinal176 */
- uint32_t cp_hqd_ctx_save_size; /* ordinal177 */
- uint32_t cp_hqd_gds_resource_state; /* ordinal178 */
- uint32_t cp_hqd_error; /* ordinal179 */
- uint32_t cp_hqd_eop_wptr_mem; /* ordinal180 */
- uint32_t cp_hqd_eop_dones; /* ordinal181 */
- uint32_t reserved46; /* ordinal182 */
- uint32_t reserved47; /* ordinal183 */
- uint32_t reserved48; /* ordinal184 */
- uint32_t reserved49; /* ordinal185 */
- uint32_t reserved50; /* ordinal186 */
- uint32_t reserved51; /* ordinal187 */
- uint32_t reserved52; /* ordinal188 */
- uint32_t reserved53; /* ordinal189 */
- uint32_t reserved54; /* ordinal190 */
- uint32_t reserved55; /* ordinal191 */
- uint32_t iqtimer_pkt_header; /* ordinal192 */
- uint32_t iqtimer_pkt_dw0; /* ordinal193 */
- uint32_t iqtimer_pkt_dw1; /* ordinal194 */
- uint32_t iqtimer_pkt_dw2; /* ordinal195 */
- uint32_t iqtimer_pkt_dw3; /* ordinal196 */
- uint32_t iqtimer_pkt_dw4; /* ordinal197 */
- uint32_t iqtimer_pkt_dw5; /* ordinal198 */
- uint32_t iqtimer_pkt_dw6; /* ordinal199 */
- uint32_t iqtimer_pkt_dw7; /* ordinal200 */
- uint32_t iqtimer_pkt_dw8; /* ordinal201 */
- uint32_t iqtimer_pkt_dw9; /* ordinal202 */
- uint32_t iqtimer_pkt_dw10; /* ordinal203 */
- uint32_t iqtimer_pkt_dw11; /* ordinal204 */
- uint32_t iqtimer_pkt_dw12; /* ordinal205 */
- uint32_t iqtimer_pkt_dw13; /* ordinal206 */
- uint32_t iqtimer_pkt_dw14; /* ordinal207 */
- uint32_t iqtimer_pkt_dw15; /* ordinal208 */
- uint32_t iqtimer_pkt_dw16; /* ordinal209 */
- uint32_t iqtimer_pkt_dw17; /* ordinal210 */
- uint32_t iqtimer_pkt_dw18; /* ordinal211 */
- uint32_t iqtimer_pkt_dw19; /* ordinal212 */
- uint32_t iqtimer_pkt_dw20; /* ordinal213 */
- uint32_t iqtimer_pkt_dw21; /* ordinal214 */
- uint32_t iqtimer_pkt_dw22; /* ordinal215 */
- uint32_t iqtimer_pkt_dw23; /* ordinal216 */
- uint32_t iqtimer_pkt_dw24; /* ordinal217 */
- uint32_t iqtimer_pkt_dw25; /* ordinal218 */
- uint32_t iqtimer_pkt_dw26; /* ordinal219 */
- uint32_t iqtimer_pkt_dw27; /* ordinal220 */
- uint32_t iqtimer_pkt_dw28; /* ordinal221 */
- uint32_t iqtimer_pkt_dw29; /* ordinal222 */
- uint32_t iqtimer_pkt_dw30; /* ordinal223 */
- uint32_t iqtimer_pkt_dw31; /* ordinal224 */
- uint32_t reserved56; /* ordinal225 */
- uint32_t reserved57; /* ordinal226 */
- uint32_t reserved58; /* ordinal227 */
- uint32_t set_resources_header; /* ordinal228 */
- uint32_t set_resources_dw1; /* ordinal229 */
- uint32_t set_resources_dw2; /* ordinal230 */
- uint32_t set_resources_dw3; /* ordinal231 */
- uint32_t set_resources_dw4; /* ordinal232 */
- uint32_t set_resources_dw5; /* ordinal233 */
- uint32_t set_resources_dw6; /* ordinal234 */
- uint32_t set_resources_dw7; /* ordinal235 */
- uint32_t reserved59; /* ordinal236 */
- uint32_t reserved60; /* ordinal237 */
- uint32_t reserved61; /* ordinal238 */
- uint32_t reserved62; /* ordinal239 */
- uint32_t reserved63; /* ordinal240 */
- uint32_t reserved64; /* ordinal241 */
- uint32_t reserved65; /* ordinal242 */
- uint32_t reserved66; /* ordinal243 */
- uint32_t reserved67; /* ordinal244 */
- uint32_t reserved68; /* ordinal245 */
- uint32_t reserved69; /* ordinal246 */
- uint32_t reserved70; /* ordinal247 */
- uint32_t reserved71; /* ordinal248 */
- uint32_t reserved72; /* ordinal249 */
- uint32_t reserved73; /* ordinal250 */
- uint32_t reserved74; /* ordinal251 */
- uint32_t reserved75; /* ordinal252 */
- uint32_t reserved76; /* ordinal253 */
- uint32_t reserved77; /* ordinal254 */
- uint32_t reserved78; /* ordinal255 */
-
- uint32_t reserved_t[256]; /* Reserve 256 dword buffer used by ucode */
-};
-
static void gfx_v8_0_cp_compute_fini(struct amdgpu_device *adev)
{
int i, r;
@@ -4662,34 +4518,7 @@ static int gfx_v8_0_cp_compute_resume(struct amdgpu_device *adev)
u32 *buf;
struct vi_mqd *mqd;
- /* init the pipes */
- mutex_lock(&adev->srbm_mutex);
- for (i = 0; i < (adev->gfx.mec.num_pipe * adev->gfx.mec.num_mec); i++) {
- int me = (i < 4) ? 1 : 2;
- int pipe = (i < 4) ? i : (i - 4);
-
- eop_gpu_addr = adev->gfx.mec.hpd_eop_gpu_addr + (i * MEC_HPD_SIZE);
- eop_gpu_addr >>= 8;
-
- vi_srbm_select(adev, me, pipe, 0, 0);
-
- /* write the EOP addr */
- WREG32(mmCP_HQD_EOP_BASE_ADDR, eop_gpu_addr);
- WREG32(mmCP_HQD_EOP_BASE_ADDR_HI, upper_32_bits(eop_gpu_addr));
-
- /* set the VMID assigned */
- WREG32(mmCP_HQD_VMID, 0);
-
- /* set the EOP size, register value is 2^(EOP_SIZE+1) dwords */
- tmp = RREG32(mmCP_HQD_EOP_CONTROL);
- tmp = REG_SET_FIELD(tmp, CP_HQD_EOP_CONTROL, EOP_SIZE,
- (order_base_2(MEC_HPD_SIZE / 4) - 1));
- WREG32(mmCP_HQD_EOP_CONTROL, tmp);
- }
- vi_srbm_select(adev, 0, 0, 0, 0);
- mutex_unlock(&adev->srbm_mutex);
-
- /* init the queues. Just two for now. */
+ /* init the queues. */
for (i = 0; i < adev->gfx.num_compute_rings; i++) {
struct amdgpu_ring *ring = &adev->gfx.compute_ring[i];
@@ -4741,6 +4570,22 @@ static int gfx_v8_0_cp_compute_resume(struct amdgpu_device *adev)
ring->pipe,
ring->queue, 0);
+ eop_gpu_addr = adev->gfx.mec.hpd_eop_gpu_addr + (i * MEC_HPD_SIZE);
+ eop_gpu_addr >>= 8;
+
+ /* write the EOP addr */
+ WREG32(mmCP_HQD_EOP_BASE_ADDR, eop_gpu_addr);
+ WREG32(mmCP_HQD_EOP_BASE_ADDR_HI, upper_32_bits(eop_gpu_addr));
+
+ /* set the VMID assigned */
+ WREG32(mmCP_HQD_VMID, 0);
+
+ /* set the EOP size, register value is 2^(EOP_SIZE+1) dwords */
+ tmp = RREG32(mmCP_HQD_EOP_CONTROL);
+ tmp = REG_SET_FIELD(tmp, CP_HQD_EOP_CONTROL, EOP_SIZE,
+ (order_base_2(MEC_HPD_SIZE / 4) - 1));
+ WREG32(mmCP_HQD_EOP_CONTROL, tmp);
+
/* disable wptr polling */
tmp = RREG32(mmCP_PQ_WPTR_POLL_CNTL);
tmp = REG_SET_FIELD(tmp, CP_PQ_WPTR_POLL_CNTL, EN, 0);
@@ -4824,9 +4669,9 @@ static int gfx_v8_0_cp_compute_resume(struct amdgpu_device *adev)
/* only used if CP_PQ_WPTR_POLL_CNTL.CP_PQ_WPTR_POLL_CNTL__EN_MASK=1 */
wb_gpu_addr = adev->wb.gpu_addr + (ring->wptr_offs * 4);
- mqd->cp_hqd_pq_wptr_poll_addr = wb_gpu_addr & 0xfffffffc;
+ mqd->cp_hqd_pq_wptr_poll_addr_lo = wb_gpu_addr & 0xfffffffc;
mqd->cp_hqd_pq_wptr_poll_addr_hi = upper_32_bits(wb_gpu_addr) & 0xffff;
- WREG32(mmCP_HQD_PQ_WPTR_POLL_ADDR, mqd->cp_hqd_pq_wptr_poll_addr);
+ WREG32(mmCP_HQD_PQ_WPTR_POLL_ADDR, mqd->cp_hqd_pq_wptr_poll_addr_lo);
WREG32(mmCP_HQD_PQ_WPTR_POLL_ADDR_HI,
mqd->cp_hqd_pq_wptr_poll_addr_hi);
@@ -4836,7 +4681,8 @@ static int gfx_v8_0_cp_compute_resume(struct amdgpu_device *adev)
(adev->asic_type == CHIP_FIJI) ||
(adev->asic_type == CHIP_STONEY) ||
(adev->asic_type == CHIP_POLARIS11) ||
- (adev->asic_type == CHIP_POLARIS10)) {
+ (adev->asic_type == CHIP_POLARIS10) ||
+ (adev->asic_type == CHIP_POLARIS12)) {
WREG32(mmCP_MEC_DOORBELL_RANGE_LOWER,
AMDGPU_DOORBELL_KIQ << 2);
WREG32(mmCP_MEC_DOORBELL_RANGE_UPPER,
@@ -4872,7 +4718,8 @@ static int gfx_v8_0_cp_compute_resume(struct amdgpu_device *adev)
mqd->cp_hqd_persistent_state = tmp;
if (adev->asic_type == CHIP_STONEY ||
adev->asic_type == CHIP_POLARIS11 ||
- adev->asic_type == CHIP_POLARIS10) {
+ adev->asic_type == CHIP_POLARIS10 ||
+ adev->asic_type == CHIP_POLARIS12) {
tmp = RREG32(mmCP_ME1_PIPE3_INT_CNTL);
tmp = REG_SET_FIELD(tmp, CP_ME1_PIPE3_INT_CNTL, GENERIC2_INT_ENABLE, 1);
WREG32(mmCP_ME1_PIPE3_INT_CNTL, tmp);
@@ -4980,7 +4827,6 @@ static int gfx_v8_0_hw_init(void *handle)
struct amdgpu_device *adev = (struct amdgpu_device *)handle;
gfx_v8_0_init_golden_registers(adev);
-
gfx_v8_0_gpu_init(adev);
r = gfx_v8_0_rlc_resume(adev);
@@ -4988,8 +4834,6 @@ static int gfx_v8_0_hw_init(void *handle)
return r;
r = gfx_v8_0_cp_resume(adev);
- if (r)
- return r;
return r;
}
@@ -5000,6 +4844,10 @@ static int gfx_v8_0_hw_fini(void *handle)
amdgpu_irq_put(adev, &adev->gfx.priv_reg_irq, 0);
amdgpu_irq_put(adev, &adev->gfx.priv_inst_irq, 0);
+ if (amdgpu_sriov_vf(adev)) {
+ pr_debug("For SRIOV client, shouldn't do anything.\n");
+ return 0;
+ }
gfx_v8_0_cp_enable(adev, false);
gfx_v8_0_rlc_stop(adev);
gfx_v8_0_cp_compute_fini(adev);
@@ -5037,25 +4885,22 @@ static bool gfx_v8_0_is_idle(void *handle)
static int gfx_v8_0_wait_for_idle(void *handle)
{
unsigned i;
- u32 tmp;
struct amdgpu_device *adev = (struct amdgpu_device *)handle;
for (i = 0; i < adev->usec_timeout; i++) {
- /* read MC_STATUS */
- tmp = RREG32(mmGRBM_STATUS) & GRBM_STATUS__GUI_ACTIVE_MASK;
-
- if (!REG_GET_FIELD(tmp, GRBM_STATUS, GUI_ACTIVE))
+ if (gfx_v8_0_is_idle(handle))
return 0;
+
udelay(1);
}
return -ETIMEDOUT;
}
-static int gfx_v8_0_soft_reset(void *handle)
+static bool gfx_v8_0_check_soft_reset(void *handle)
{
+ struct amdgpu_device *adev = (struct amdgpu_device *)handle;
u32 grbm_soft_reset = 0, srbm_soft_reset = 0;
u32 tmp;
- struct amdgpu_device *adev = (struct amdgpu_device *)handle;
/* GRBM_STATUS */
tmp = RREG32(mmGRBM_STATUS);
@@ -5064,16 +4909,12 @@ static int gfx_v8_0_soft_reset(void *handle)
GRBM_STATUS__TA_BUSY_MASK | GRBM_STATUS__VGT_BUSY_MASK |
GRBM_STATUS__DB_BUSY_MASK | GRBM_STATUS__CB_BUSY_MASK |
GRBM_STATUS__GDS_BUSY_MASK | GRBM_STATUS__SPI_BUSY_MASK |
- GRBM_STATUS__IA_BUSY_MASK | GRBM_STATUS__IA_BUSY_NO_DMA_MASK)) {
+ GRBM_STATUS__IA_BUSY_MASK | GRBM_STATUS__IA_BUSY_NO_DMA_MASK |
+ GRBM_STATUS__CP_BUSY_MASK | GRBM_STATUS__CP_COHERENCY_BUSY_MASK)) {
grbm_soft_reset = REG_SET_FIELD(grbm_soft_reset,
GRBM_SOFT_RESET, SOFT_RESET_CP, 1);
grbm_soft_reset = REG_SET_FIELD(grbm_soft_reset,
GRBM_SOFT_RESET, SOFT_RESET_GFX, 1);
- }
-
- if (tmp & (GRBM_STATUS__CP_BUSY_MASK | GRBM_STATUS__CP_COHERENCY_BUSY_MASK)) {
- grbm_soft_reset = REG_SET_FIELD(grbm_soft_reset,
- GRBM_SOFT_RESET, SOFT_RESET_CP, 1);
srbm_soft_reset = REG_SET_FIELD(srbm_soft_reset,
SRBM_SOFT_RESET, SOFT_RESET_GRBM, 1);
}
@@ -5084,73 +4925,200 @@ static int gfx_v8_0_soft_reset(void *handle)
grbm_soft_reset = REG_SET_FIELD(grbm_soft_reset,
GRBM_SOFT_RESET, SOFT_RESET_RLC, 1);
+ if (REG_GET_FIELD(tmp, GRBM_STATUS2, CPF_BUSY) ||
+ REG_GET_FIELD(tmp, GRBM_STATUS2, CPC_BUSY) ||
+ REG_GET_FIELD(tmp, GRBM_STATUS2, CPG_BUSY)) {
+ grbm_soft_reset = REG_SET_FIELD(grbm_soft_reset, GRBM_SOFT_RESET,
+ SOFT_RESET_CPF, 1);
+ grbm_soft_reset = REG_SET_FIELD(grbm_soft_reset, GRBM_SOFT_RESET,
+ SOFT_RESET_CPC, 1);
+ grbm_soft_reset = REG_SET_FIELD(grbm_soft_reset, GRBM_SOFT_RESET,
+ SOFT_RESET_CPG, 1);
+ srbm_soft_reset = REG_SET_FIELD(srbm_soft_reset, SRBM_SOFT_RESET,
+ SOFT_RESET_GRBM, 1);
+ }
+
/* SRBM_STATUS */
tmp = RREG32(mmSRBM_STATUS);
if (REG_GET_FIELD(tmp, SRBM_STATUS, GRBM_RQ_PENDING))
srbm_soft_reset = REG_SET_FIELD(srbm_soft_reset,
SRBM_SOFT_RESET, SOFT_RESET_GRBM, 1);
+ if (REG_GET_FIELD(tmp, SRBM_STATUS, SEM_BUSY))
+ srbm_soft_reset = REG_SET_FIELD(srbm_soft_reset,
+ SRBM_SOFT_RESET, SOFT_RESET_SEM, 1);
if (grbm_soft_reset || srbm_soft_reset) {
- /* stop the rlc */
- gfx_v8_0_rlc_stop(adev);
+ adev->gfx.grbm_soft_reset = grbm_soft_reset;
+ adev->gfx.srbm_soft_reset = srbm_soft_reset;
+ return true;
+ } else {
+ adev->gfx.grbm_soft_reset = 0;
+ adev->gfx.srbm_soft_reset = 0;
+ return false;
+ }
+}
+static void gfx_v8_0_inactive_hqd(struct amdgpu_device *adev,
+ struct amdgpu_ring *ring)
+{
+ int i;
+
+ vi_srbm_select(adev, ring->me, ring->pipe, ring->queue, 0);
+ if (RREG32(mmCP_HQD_ACTIVE) & CP_HQD_ACTIVE__ACTIVE_MASK) {
+ u32 tmp;
+ tmp = RREG32(mmCP_HQD_DEQUEUE_REQUEST);
+ tmp = REG_SET_FIELD(tmp, CP_HQD_DEQUEUE_REQUEST,
+ DEQUEUE_REQ, 2);
+ WREG32(mmCP_HQD_DEQUEUE_REQUEST, tmp);
+ for (i = 0; i < adev->usec_timeout; i++) {
+ if (!(RREG32(mmCP_HQD_ACTIVE) & CP_HQD_ACTIVE__ACTIVE_MASK))
+ break;
+ udelay(1);
+ }
+ }
+}
+
+static int gfx_v8_0_pre_soft_reset(void *handle)
+{
+ struct amdgpu_device *adev = (struct amdgpu_device *)handle;
+ u32 grbm_soft_reset = 0, srbm_soft_reset = 0;
+
+ if ((!adev->gfx.grbm_soft_reset) &&
+ (!adev->gfx.srbm_soft_reset))
+ return 0;
+
+ grbm_soft_reset = adev->gfx.grbm_soft_reset;
+ srbm_soft_reset = adev->gfx.srbm_soft_reset;
+
+ /* stop the rlc */
+ gfx_v8_0_rlc_stop(adev);
+
+ if (REG_GET_FIELD(grbm_soft_reset, GRBM_SOFT_RESET, SOFT_RESET_CP) ||
+ REG_GET_FIELD(grbm_soft_reset, GRBM_SOFT_RESET, SOFT_RESET_GFX))
/* Disable GFX parsing/prefetching */
gfx_v8_0_cp_gfx_enable(adev, false);
+ if (REG_GET_FIELD(grbm_soft_reset, GRBM_SOFT_RESET, SOFT_RESET_CP) ||
+ REG_GET_FIELD(grbm_soft_reset, GRBM_SOFT_RESET, SOFT_RESET_CPF) ||
+ REG_GET_FIELD(grbm_soft_reset, GRBM_SOFT_RESET, SOFT_RESET_CPC) ||
+ REG_GET_FIELD(grbm_soft_reset, GRBM_SOFT_RESET, SOFT_RESET_CPG)) {
+ int i;
+
+ for (i = 0; i < adev->gfx.num_compute_rings; i++) {
+ struct amdgpu_ring *ring = &adev->gfx.compute_ring[i];
+
+ gfx_v8_0_inactive_hqd(adev, ring);
+ }
/* Disable MEC parsing/prefetching */
gfx_v8_0_cp_compute_enable(adev, false);
+ }
- if (grbm_soft_reset || srbm_soft_reset) {
- tmp = RREG32(mmGMCON_DEBUG);
- tmp = REG_SET_FIELD(tmp,
- GMCON_DEBUG, GFX_STALL, 1);
- tmp = REG_SET_FIELD(tmp,
- GMCON_DEBUG, GFX_CLEAR, 1);
- WREG32(mmGMCON_DEBUG, tmp);
+ return 0;
+}
- udelay(50);
- }
+static int gfx_v8_0_soft_reset(void *handle)
+{
+ struct amdgpu_device *adev = (struct amdgpu_device *)handle;
+ u32 grbm_soft_reset = 0, srbm_soft_reset = 0;
+ u32 tmp;
- if (grbm_soft_reset) {
- tmp = RREG32(mmGRBM_SOFT_RESET);
- tmp |= grbm_soft_reset;
- dev_info(adev->dev, "GRBM_SOFT_RESET=0x%08X\n", tmp);
- WREG32(mmGRBM_SOFT_RESET, tmp);
- tmp = RREG32(mmGRBM_SOFT_RESET);
+ if ((!adev->gfx.grbm_soft_reset) &&
+ (!adev->gfx.srbm_soft_reset))
+ return 0;
- udelay(50);
+ grbm_soft_reset = adev->gfx.grbm_soft_reset;
+ srbm_soft_reset = adev->gfx.srbm_soft_reset;
- tmp &= ~grbm_soft_reset;
- WREG32(mmGRBM_SOFT_RESET, tmp);
- tmp = RREG32(mmGRBM_SOFT_RESET);
- }
+ if (grbm_soft_reset || srbm_soft_reset) {
+ tmp = RREG32(mmGMCON_DEBUG);
+ tmp = REG_SET_FIELD(tmp, GMCON_DEBUG, GFX_STALL, 1);
+ tmp = REG_SET_FIELD(tmp, GMCON_DEBUG, GFX_CLEAR, 1);
+ WREG32(mmGMCON_DEBUG, tmp);
+ udelay(50);
+ }
- if (srbm_soft_reset) {
- tmp = RREG32(mmSRBM_SOFT_RESET);
- tmp |= srbm_soft_reset;
- dev_info(adev->dev, "SRBM_SOFT_RESET=0x%08X\n", tmp);
- WREG32(mmSRBM_SOFT_RESET, tmp);
- tmp = RREG32(mmSRBM_SOFT_RESET);
+ if (grbm_soft_reset) {
+ tmp = RREG32(mmGRBM_SOFT_RESET);
+ tmp |= grbm_soft_reset;
+ dev_info(adev->dev, "GRBM_SOFT_RESET=0x%08X\n", tmp);
+ WREG32(mmGRBM_SOFT_RESET, tmp);
+ tmp = RREG32(mmGRBM_SOFT_RESET);
- udelay(50);
+ udelay(50);
- tmp &= ~srbm_soft_reset;
- WREG32(mmSRBM_SOFT_RESET, tmp);
- tmp = RREG32(mmSRBM_SOFT_RESET);
- }
+ tmp &= ~grbm_soft_reset;
+ WREG32(mmGRBM_SOFT_RESET, tmp);
+ tmp = RREG32(mmGRBM_SOFT_RESET);
+ }
- if (grbm_soft_reset || srbm_soft_reset) {
- tmp = RREG32(mmGMCON_DEBUG);
- tmp = REG_SET_FIELD(tmp,
- GMCON_DEBUG, GFX_STALL, 0);
- tmp = REG_SET_FIELD(tmp,
- GMCON_DEBUG, GFX_CLEAR, 0);
- WREG32(mmGMCON_DEBUG, tmp);
- }
+ if (srbm_soft_reset) {
+ tmp = RREG32(mmSRBM_SOFT_RESET);
+ tmp |= srbm_soft_reset;
+ dev_info(adev->dev, "SRBM_SOFT_RESET=0x%08X\n", tmp);
+ WREG32(mmSRBM_SOFT_RESET, tmp);
+ tmp = RREG32(mmSRBM_SOFT_RESET);
- /* Wait a little for things to settle down */
udelay(50);
+
+ tmp &= ~srbm_soft_reset;
+ WREG32(mmSRBM_SOFT_RESET, tmp);
+ tmp = RREG32(mmSRBM_SOFT_RESET);
+ }
+
+ if (grbm_soft_reset || srbm_soft_reset) {
+ tmp = RREG32(mmGMCON_DEBUG);
+ tmp = REG_SET_FIELD(tmp, GMCON_DEBUG, GFX_STALL, 0);
+ tmp = REG_SET_FIELD(tmp, GMCON_DEBUG, GFX_CLEAR, 0);
+ WREG32(mmGMCON_DEBUG, tmp);
}
+
+ /* Wait a little for things to settle down */
+ udelay(50);
+
+ return 0;
+}
+
+static void gfx_v8_0_init_hqd(struct amdgpu_device *adev,
+ struct amdgpu_ring *ring)
+{
+ vi_srbm_select(adev, ring->me, ring->pipe, ring->queue, 0);
+ WREG32(mmCP_HQD_DEQUEUE_REQUEST, 0);
+ WREG32(mmCP_HQD_PQ_RPTR, 0);
+ WREG32(mmCP_HQD_PQ_WPTR, 0);
+ vi_srbm_select(adev, 0, 0, 0, 0);
+}
+
+static int gfx_v8_0_post_soft_reset(void *handle)
+{
+ struct amdgpu_device *adev = (struct amdgpu_device *)handle;
+ u32 grbm_soft_reset = 0, srbm_soft_reset = 0;
+
+ if ((!adev->gfx.grbm_soft_reset) &&
+ (!adev->gfx.srbm_soft_reset))
+ return 0;
+
+ grbm_soft_reset = adev->gfx.grbm_soft_reset;
+ srbm_soft_reset = adev->gfx.srbm_soft_reset;
+
+ if (REG_GET_FIELD(grbm_soft_reset, GRBM_SOFT_RESET, SOFT_RESET_CP) ||
+ REG_GET_FIELD(grbm_soft_reset, GRBM_SOFT_RESET, SOFT_RESET_GFX))
+ gfx_v8_0_cp_gfx_resume(adev);
+
+ if (REG_GET_FIELD(grbm_soft_reset, GRBM_SOFT_RESET, SOFT_RESET_CP) ||
+ REG_GET_FIELD(grbm_soft_reset, GRBM_SOFT_RESET, SOFT_RESET_CPF) ||
+ REG_GET_FIELD(grbm_soft_reset, GRBM_SOFT_RESET, SOFT_RESET_CPC) ||
+ REG_GET_FIELD(grbm_soft_reset, GRBM_SOFT_RESET, SOFT_RESET_CPG)) {
+ int i;
+
+ for (i = 0; i < adev->gfx.num_compute_rings; i++) {
+ struct amdgpu_ring *ring = &adev->gfx.compute_ring[i];
+
+ gfx_v8_0_init_hqd(adev, ring);
+ }
+ gfx_v8_0_cp_compute_resume(adev);
+ }
+ gfx_v8_0_rlc_start(adev);
+
return 0;
}
@@ -5222,9 +5190,70 @@ static void gfx_v8_0_ring_emit_gds_switch(struct amdgpu_ring *ring,
amdgpu_ring_write(ring, (1 << (oa_size + oa_base)) - (1 << oa_base));
}
+static uint32_t wave_read_ind(struct amdgpu_device *adev, uint32_t simd, uint32_t wave, uint32_t address)
+{
+ WREG32(mmSQ_IND_INDEX,
+ (wave << SQ_IND_INDEX__WAVE_ID__SHIFT) |
+ (simd << SQ_IND_INDEX__SIMD_ID__SHIFT) |
+ (address << SQ_IND_INDEX__INDEX__SHIFT) |
+ (SQ_IND_INDEX__FORCE_READ_MASK));
+ return RREG32(mmSQ_IND_DATA);
+}
+
+static void wave_read_regs(struct amdgpu_device *adev, uint32_t simd,
+ uint32_t wave, uint32_t thread,
+ uint32_t regno, uint32_t num, uint32_t *out)
+{
+ WREG32(mmSQ_IND_INDEX,
+ (wave << SQ_IND_INDEX__WAVE_ID__SHIFT) |
+ (simd << SQ_IND_INDEX__SIMD_ID__SHIFT) |
+ (regno << SQ_IND_INDEX__INDEX__SHIFT) |
+ (thread << SQ_IND_INDEX__THREAD_ID__SHIFT) |
+ (SQ_IND_INDEX__FORCE_READ_MASK) |
+ (SQ_IND_INDEX__AUTO_INCR_MASK));
+ while (num--)
+ *(out++) = RREG32(mmSQ_IND_DATA);
+}
+
+static void gfx_v8_0_read_wave_data(struct amdgpu_device *adev, uint32_t simd, uint32_t wave, uint32_t *dst, int *no_fields)
+{
+ /* type 0 wave data */
+ dst[(*no_fields)++] = 0;
+ dst[(*no_fields)++] = wave_read_ind(adev, simd, wave, ixSQ_WAVE_STATUS);
+ dst[(*no_fields)++] = wave_read_ind(adev, simd, wave, ixSQ_WAVE_PC_LO);
+ dst[(*no_fields)++] = wave_read_ind(adev, simd, wave, ixSQ_WAVE_PC_HI);
+ dst[(*no_fields)++] = wave_read_ind(adev, simd, wave, ixSQ_WAVE_EXEC_LO);
+ dst[(*no_fields)++] = wave_read_ind(adev, simd, wave, ixSQ_WAVE_EXEC_HI);
+ dst[(*no_fields)++] = wave_read_ind(adev, simd, wave, ixSQ_WAVE_HW_ID);
+ dst[(*no_fields)++] = wave_read_ind(adev, simd, wave, ixSQ_WAVE_INST_DW0);
+ dst[(*no_fields)++] = wave_read_ind(adev, simd, wave, ixSQ_WAVE_INST_DW1);
+ dst[(*no_fields)++] = wave_read_ind(adev, simd, wave, ixSQ_WAVE_GPR_ALLOC);
+ dst[(*no_fields)++] = wave_read_ind(adev, simd, wave, ixSQ_WAVE_LDS_ALLOC);
+ dst[(*no_fields)++] = wave_read_ind(adev, simd, wave, ixSQ_WAVE_TRAPSTS);
+ dst[(*no_fields)++] = wave_read_ind(adev, simd, wave, ixSQ_WAVE_IB_STS);
+ dst[(*no_fields)++] = wave_read_ind(adev, simd, wave, ixSQ_WAVE_TBA_LO);
+ dst[(*no_fields)++] = wave_read_ind(adev, simd, wave, ixSQ_WAVE_TBA_HI);
+ dst[(*no_fields)++] = wave_read_ind(adev, simd, wave, ixSQ_WAVE_TMA_LO);
+ dst[(*no_fields)++] = wave_read_ind(adev, simd, wave, ixSQ_WAVE_TMA_HI);
+ dst[(*no_fields)++] = wave_read_ind(adev, simd, wave, ixSQ_WAVE_IB_DBG0);
+ dst[(*no_fields)++] = wave_read_ind(adev, simd, wave, ixSQ_WAVE_M0);
+}
+
+static void gfx_v8_0_read_wave_sgprs(struct amdgpu_device *adev, uint32_t simd,
+ uint32_t wave, uint32_t start,
+ uint32_t size, uint32_t *dst)
+{
+ wave_read_regs(
+ adev, simd, wave, 0,
+ start + SQIND_WAVE_SGPRS_OFFSET, size, dst);
+}
+
+
static const struct amdgpu_gfx_funcs gfx_v8_0_gfx_funcs = {
.get_gpu_clock_counter = &gfx_v8_0_get_gpu_clock_counter,
.select_se_sh = &gfx_v8_0_select_se_sh,
+ .read_wave_data = &gfx_v8_0_read_wave_data,
+ .read_wave_sgprs = &gfx_v8_0_read_wave_sgprs,
};
static int gfx_v8_0_early_init(void *handle)
@@ -5269,92 +5298,43 @@ static int gfx_v8_0_late_init(void *handle)
static void gfx_v8_0_enable_gfx_static_mg_power_gating(struct amdgpu_device *adev,
bool enable)
{
- uint32_t data, temp;
-
- if (adev->asic_type == CHIP_POLARIS11)
+ if ((adev->asic_type == CHIP_POLARIS11) ||
+ (adev->asic_type == CHIP_POLARIS12))
/* Send msg to SMU via Powerplay */
amdgpu_set_powergating_state(adev,
AMD_IP_BLOCK_TYPE_SMC,
enable ?
AMD_PG_STATE_GATE : AMD_PG_STATE_UNGATE);
- temp = data = RREG32(mmRLC_PG_CNTL);
- /* Enable static MGPG */
- if (enable)
- data |= RLC_PG_CNTL__STATIC_PER_CU_PG_ENABLE_MASK;
- else
- data &= ~RLC_PG_CNTL__STATIC_PER_CU_PG_ENABLE_MASK;
-
- if (temp != data)
- WREG32(mmRLC_PG_CNTL, data);
+ WREG32_FIELD(RLC_PG_CNTL, STATIC_PER_CU_PG_ENABLE, enable ? 1 : 0);
}
static void gfx_v8_0_enable_gfx_dynamic_mg_power_gating(struct amdgpu_device *adev,
bool enable)
{
- uint32_t data, temp;
-
- temp = data = RREG32(mmRLC_PG_CNTL);
- /* Enable dynamic MGPG */
- if (enable)
- data |= RLC_PG_CNTL__DYN_PER_CU_PG_ENABLE_MASK;
- else
- data &= ~RLC_PG_CNTL__DYN_PER_CU_PG_ENABLE_MASK;
-
- if (temp != data)
- WREG32(mmRLC_PG_CNTL, data);
+ WREG32_FIELD(RLC_PG_CNTL, DYN_PER_CU_PG_ENABLE, enable ? 1 : 0);
}
static void polaris11_enable_gfx_quick_mg_power_gating(struct amdgpu_device *adev,
bool enable)
{
- uint32_t data, temp;
-
- temp = data = RREG32(mmRLC_PG_CNTL);
- /* Enable quick PG */
- if (enable)
- data |= RLC_PG_CNTL__QUICK_PG_ENABLE_MASK;
- else
- data &= ~RLC_PG_CNTL__QUICK_PG_ENABLE_MASK;
-
- if (temp != data)
- WREG32(mmRLC_PG_CNTL, data);
+ WREG32_FIELD(RLC_PG_CNTL, QUICK_PG_ENABLE, enable ? 1 : 0);
}
static void cz_enable_gfx_cg_power_gating(struct amdgpu_device *adev,
bool enable)
{
- u32 data, orig;
-
- orig = data = RREG32(mmRLC_PG_CNTL);
-
- if (enable)
- data |= RLC_PG_CNTL__GFX_POWER_GATING_ENABLE_MASK;
- else
- data &= ~RLC_PG_CNTL__GFX_POWER_GATING_ENABLE_MASK;
-
- if (orig != data)
- WREG32(mmRLC_PG_CNTL, data);
+ WREG32_FIELD(RLC_PG_CNTL, GFX_POWER_GATING_ENABLE, enable ? 1 : 0);
}
static void cz_enable_gfx_pipeline_power_gating(struct amdgpu_device *adev,
bool enable)
{
- u32 data, orig;
-
- orig = data = RREG32(mmRLC_PG_CNTL);
-
- if (enable)
- data |= RLC_PG_CNTL__GFX_PIPELINE_PG_ENABLE_MASK;
- else
- data &= ~RLC_PG_CNTL__GFX_PIPELINE_PG_ENABLE_MASK;
-
- if (orig != data)
- WREG32(mmRLC_PG_CNTL, data);
+ WREG32_FIELD(RLC_PG_CNTL, GFX_PIPELINE_PG_ENABLE, enable ? 1 : 0);
/* Read any GFX register to wake up GFX. */
if (!enable)
- data = RREG32(mmDB_RENDER_CONTROL);
+ RREG32(mmDB_RENDER_CONTROL);
}
static void cz_update_gfx_cg_power_gating(struct amdgpu_device *adev,
@@ -5376,14 +5356,11 @@ static int gfx_v8_0_set_powergating_state(void *handle,
struct amdgpu_device *adev = (struct amdgpu_device *)handle;
bool enable = (state == AMD_PG_STATE_GATE) ? true : false;
- if (!(adev->pg_flags & AMD_PG_SUPPORT_GFX_PG))
- return 0;
-
switch (adev->asic_type) {
case CHIP_CARRIZO:
case CHIP_STONEY:
- if (adev->pg_flags & AMD_PG_SUPPORT_GFX_PG)
- cz_update_gfx_cg_power_gating(adev, enable);
+
+ cz_update_gfx_cg_power_gating(adev, enable);
if ((adev->pg_flags & AMD_PG_SUPPORT_GFX_SMG) && enable)
gfx_v8_0_enable_gfx_static_mg_power_gating(adev, true);
@@ -5396,6 +5373,7 @@ static int gfx_v8_0_set_powergating_state(void *handle,
gfx_v8_0_enable_gfx_dynamic_mg_power_gating(adev, false);
break;
case CHIP_POLARIS11:
+ case CHIP_POLARIS12:
if ((adev->pg_flags & AMD_PG_SUPPORT_GFX_SMG) && enable)
gfx_v8_0_enable_gfx_static_mg_power_gating(adev, true);
else
@@ -5430,15 +5408,15 @@ static void gfx_v8_0_send_serdes_cmd(struct amdgpu_device *adev,
data = RREG32(mmRLC_SERDES_WR_CTRL);
if (adev->asic_type == CHIP_STONEY)
- data &= ~(RLC_SERDES_WR_CTRL__WRITE_COMMAND_MASK |
- RLC_SERDES_WR_CTRL__READ_COMMAND_MASK |
- RLC_SERDES_WR_CTRL__P1_SELECT_MASK |
- RLC_SERDES_WR_CTRL__P2_SELECT_MASK |
- RLC_SERDES_WR_CTRL__RDDATA_RESET_MASK |
- RLC_SERDES_WR_CTRL__POWER_DOWN_MASK |
- RLC_SERDES_WR_CTRL__POWER_UP_MASK |
- RLC_SERDES_WR_CTRL__SHORT_FORMAT_MASK |
- RLC_SERDES_WR_CTRL__SRBM_OVERRIDE_MASK);
+ data &= ~(RLC_SERDES_WR_CTRL__WRITE_COMMAND_MASK |
+ RLC_SERDES_WR_CTRL__READ_COMMAND_MASK |
+ RLC_SERDES_WR_CTRL__P1_SELECT_MASK |
+ RLC_SERDES_WR_CTRL__P2_SELECT_MASK |
+ RLC_SERDES_WR_CTRL__RDDATA_RESET_MASK |
+ RLC_SERDES_WR_CTRL__POWER_DOWN_MASK |
+ RLC_SERDES_WR_CTRL__POWER_UP_MASK |
+ RLC_SERDES_WR_CTRL__SHORT_FORMAT_MASK |
+ RLC_SERDES_WR_CTRL__SRBM_OVERRIDE_MASK);
else
data &= ~(RLC_SERDES_WR_CTRL__WRITE_COMMAND_MASK |
RLC_SERDES_WR_CTRL__READ_COMMAND_MASK |
@@ -5461,10 +5439,10 @@ static void gfx_v8_0_send_serdes_cmd(struct amdgpu_device *adev,
#define MSG_ENTER_RLC_SAFE_MODE 1
#define MSG_EXIT_RLC_SAFE_MODE 0
-
-#define RLC_GPR_REG2__REQ_MASK 0x00000001
-#define RLC_GPR_REG2__MESSAGE__SHIFT 0x00000001
-#define RLC_GPR_REG2__MESSAGE_MASK 0x0000001e
+#define RLC_GPR_REG2__REQ_MASK 0x00000001
+#define RLC_GPR_REG2__REQ__SHIFT 0
+#define RLC_GPR_REG2__MESSAGE__SHIFT 0x00000001
+#define RLC_GPR_REG2__MESSAGE_MASK 0x0000001e
static void cz_enter_rlc_safe_mode(struct amdgpu_device *adev)
{
@@ -5494,7 +5472,7 @@ static void cz_enter_rlc_safe_mode(struct amdgpu_device *adev)
}
for (i = 0; i < adev->usec_timeout; i++) {
- if ((RREG32(mmRLC_GPR_REG2) & RLC_GPR_REG2__REQ_MASK) == 0)
+ if (!REG_GET_FIELD(RREG32(mmRLC_GPR_REG2), RLC_GPR_REG2, REQ))
break;
udelay(1);
}
@@ -5522,7 +5500,7 @@ static void cz_exit_rlc_safe_mode(struct amdgpu_device *adev)
}
for (i = 0; i < adev->usec_timeout; i++) {
- if ((RREG32(mmRLC_GPR_REG2) & RLC_GPR_REG2__REQ_MASK) == 0)
+ if (!REG_GET_FIELD(RREG32(mmRLC_GPR_REG2), RLC_GPR_REG2, REQ))
break;
udelay(1);
}
@@ -5554,7 +5532,7 @@ static void iceland_enter_rlc_safe_mode(struct amdgpu_device *adev)
}
for (i = 0; i < adev->usec_timeout; i++) {
- if ((RREG32(mmRLC_SAFE_MODE) & RLC_SAFE_MODE__CMD_MASK) == 0)
+ if (!REG_GET_FIELD(RREG32(mmRLC_SAFE_MODE), RLC_SAFE_MODE, CMD))
break;
udelay(1);
}
@@ -5581,7 +5559,7 @@ static void iceland_exit_rlc_safe_mode(struct amdgpu_device *adev)
}
for (i = 0; i < adev->usec_timeout; i++) {
- if ((RREG32(mmRLC_SAFE_MODE) & RLC_SAFE_MODE__CMD_MASK) == 0)
+ if (!REG_GET_FIELD(RREG32(mmRLC_SAFE_MODE), RLC_SAFE_MODE, CMD))
break;
udelay(1);
}
@@ -5622,21 +5600,12 @@ static void gfx_v8_0_update_medium_grain_clock_gating(struct amdgpu_device *adev
/* It is disabled by HW by default */
if (enable && (adev->cg_flags & AMD_CG_SUPPORT_GFX_MGCG)) {
if (adev->cg_flags & AMD_CG_SUPPORT_GFX_MGLS) {
- if (adev->cg_flags & AMD_CG_SUPPORT_GFX_RLC_LS) {
+ if (adev->cg_flags & AMD_CG_SUPPORT_GFX_RLC_LS)
/* 1 - RLC memory Light sleep */
- temp = data = RREG32(mmRLC_MEM_SLP_CNTL);
- data |= RLC_MEM_SLP_CNTL__RLC_MEM_LS_EN_MASK;
- if (temp != data)
- WREG32(mmRLC_MEM_SLP_CNTL, data);
- }
+ WREG32_FIELD(RLC_MEM_SLP_CNTL, RLC_MEM_LS_EN, 1);
- if (adev->cg_flags & AMD_CG_SUPPORT_GFX_CP_LS) {
- /* 2 - CP memory Light sleep */
- temp = data = RREG32(mmCP_MEM_SLP_CNTL);
- data |= CP_MEM_SLP_CNTL__CP_MEM_LS_EN_MASK;
- if (temp != data)
- WREG32(mmCP_MEM_SLP_CNTL, data);
- }
+ if (adev->cg_flags & AMD_CG_SUPPORT_GFX_CP_LS)
+ WREG32_FIELD(CP_MEM_SLP_CNTL, CP_MEM_LS_EN, 1);
}
/* 3 - RLC_CGTT_MGCG_OVERRIDE */
@@ -5735,29 +5704,24 @@ static void gfx_v8_0_update_coarse_grain_clock_gating(struct amdgpu_device *adev
adev->gfx.rlc.funcs->enter_safe_mode(adev);
if (enable && (adev->cg_flags & AMD_CG_SUPPORT_GFX_CGCG)) {
- /* 1 enable cntx_empty_int_enable/cntx_busy_int_enable/
- * Cmp_busy/GFX_Idle interrupts
- */
- gfx_v8_0_enable_gui_idle_interrupt(adev, true);
-
temp1 = data1 = RREG32(mmRLC_CGTT_MGCG_OVERRIDE);
data1 &= ~RLC_CGTT_MGCG_OVERRIDE__CGCG_MASK;
if (temp1 != data1)
WREG32(mmRLC_CGTT_MGCG_OVERRIDE, data1);
- /* 2 wait for RLC_SERDES_CU_MASTER & RLC_SERDES_NONCU_MASTER idle */
+ /* : wait for RLC_SERDES_CU_MASTER & RLC_SERDES_NONCU_MASTER idle */
gfx_v8_0_wait_for_rlc_serdes(adev);
- /* 3 - clear cgcg override */
+ /* 2 - clear cgcg override */
gfx_v8_0_send_serdes_cmd(adev, BPM_REG_CGCG_OVERRIDE, CLE_BPM_SERDES_CMD);
/* wait for RLC_SERDES_CU_MASTER & RLC_SERDES_NONCU_MASTER idle */
gfx_v8_0_wait_for_rlc_serdes(adev);
- /* 4 - write cmd to set CGLS */
+ /* 3 - write cmd to set CGLS */
gfx_v8_0_send_serdes_cmd(adev, BPM_REG_CGLS_EN, SET_BPM_SERDES_CMD);
- /* 5 - enable cgcg */
+ /* 4 - enable cgcg */
data |= RLC_CGCG_CGLS_CTRL__CGCG_EN_MASK;
if (adev->cg_flags & AMD_CG_SUPPORT_GFX_CGLS) {
@@ -5775,6 +5739,11 @@ static void gfx_v8_0_update_coarse_grain_clock_gating(struct amdgpu_device *adev
if (temp != data)
WREG32(mmRLC_CGCG_CGLS_CTRL, data);
+
+ /* 5 enable cntx_empty_int_enable/cntx_busy_int_enable/
+ * Cmp_busy/GFX_Idle interrupts
+ */
+ gfx_v8_0_enable_gui_idle_interrupt(adev, true);
} else {
/* disable cntx_empty_int_enable & GFX Idle interrupt */
gfx_v8_0_enable_gui_idle_interrupt(adev, false);
@@ -5834,6 +5803,155 @@ static int gfx_v8_0_update_gfx_clock_gating(struct amdgpu_device *adev,
return 0;
}
+static int gfx_v8_0_tonga_update_gfx_clock_gating(struct amdgpu_device *adev,
+ enum amd_clockgating_state state)
+{
+ uint32_t msg_id, pp_state = 0;
+ uint32_t pp_support_state = 0;
+ void *pp_handle = adev->powerplay.pp_handle;
+
+ if (adev->cg_flags & (AMD_CG_SUPPORT_GFX_CGCG | AMD_CG_SUPPORT_GFX_CGLS)) {
+ if (adev->cg_flags & AMD_CG_SUPPORT_GFX_CGLS) {
+ pp_support_state = PP_STATE_SUPPORT_LS;
+ pp_state = PP_STATE_LS;
+ }
+ if (adev->cg_flags & AMD_CG_SUPPORT_GFX_CGCG) {
+ pp_support_state |= PP_STATE_SUPPORT_CG;
+ pp_state |= PP_STATE_CG;
+ }
+ if (state == AMD_CG_STATE_UNGATE)
+ pp_state = 0;
+
+ msg_id = PP_CG_MSG_ID(PP_GROUP_GFX,
+ PP_BLOCK_GFX_CG,
+ pp_support_state,
+ pp_state);
+ amd_set_clockgating_by_smu(pp_handle, msg_id);
+ }
+
+ if (adev->cg_flags & (AMD_CG_SUPPORT_GFX_MGCG | AMD_CG_SUPPORT_GFX_MGLS)) {
+ if (adev->cg_flags & AMD_CG_SUPPORT_GFX_MGLS) {
+ pp_support_state = PP_STATE_SUPPORT_LS;
+ pp_state = PP_STATE_LS;
+ }
+
+ if (adev->cg_flags & AMD_CG_SUPPORT_GFX_MGCG) {
+ pp_support_state |= PP_STATE_SUPPORT_CG;
+ pp_state |= PP_STATE_CG;
+ }
+
+ if (state == AMD_CG_STATE_UNGATE)
+ pp_state = 0;
+
+ msg_id = PP_CG_MSG_ID(PP_GROUP_GFX,
+ PP_BLOCK_GFX_MG,
+ pp_support_state,
+ pp_state);
+ amd_set_clockgating_by_smu(pp_handle, msg_id);
+ }
+
+ return 0;
+}
+
+static int gfx_v8_0_polaris_update_gfx_clock_gating(struct amdgpu_device *adev,
+ enum amd_clockgating_state state)
+{
+
+ uint32_t msg_id, pp_state = 0;
+ uint32_t pp_support_state = 0;
+ void *pp_handle = adev->powerplay.pp_handle;
+
+ if (adev->cg_flags & (AMD_CG_SUPPORT_GFX_CGCG | AMD_CG_SUPPORT_GFX_CGLS)) {
+ if (adev->cg_flags & AMD_CG_SUPPORT_GFX_CGLS) {
+ pp_support_state = PP_STATE_SUPPORT_LS;
+ pp_state = PP_STATE_LS;
+ }
+ if (adev->cg_flags & AMD_CG_SUPPORT_GFX_CGCG) {
+ pp_support_state |= PP_STATE_SUPPORT_CG;
+ pp_state |= PP_STATE_CG;
+ }
+ if (state == AMD_CG_STATE_UNGATE)
+ pp_state = 0;
+
+ msg_id = PP_CG_MSG_ID(PP_GROUP_GFX,
+ PP_BLOCK_GFX_CG,
+ pp_support_state,
+ pp_state);
+ amd_set_clockgating_by_smu(pp_handle, msg_id);
+ }
+
+ if (adev->cg_flags & (AMD_CG_SUPPORT_GFX_3D_CGCG | AMD_CG_SUPPORT_GFX_3D_CGLS)) {
+ if (adev->cg_flags & AMD_CG_SUPPORT_GFX_3D_CGLS) {
+ pp_support_state = PP_STATE_SUPPORT_LS;
+ pp_state = PP_STATE_LS;
+ }
+ if (adev->cg_flags & AMD_CG_SUPPORT_GFX_3D_CGCG) {
+ pp_support_state |= PP_STATE_SUPPORT_CG;
+ pp_state |= PP_STATE_CG;
+ }
+ if (state == AMD_CG_STATE_UNGATE)
+ pp_state = 0;
+
+ msg_id = PP_CG_MSG_ID(PP_GROUP_GFX,
+ PP_BLOCK_GFX_3D,
+ pp_support_state,
+ pp_state);
+ amd_set_clockgating_by_smu(pp_handle, msg_id);
+ }
+
+ if (adev->cg_flags & (AMD_CG_SUPPORT_GFX_MGCG | AMD_CG_SUPPORT_GFX_MGLS)) {
+ if (adev->cg_flags & AMD_CG_SUPPORT_GFX_MGLS) {
+ pp_support_state = PP_STATE_SUPPORT_LS;
+ pp_state = PP_STATE_LS;
+ }
+
+ if (adev->cg_flags & AMD_CG_SUPPORT_GFX_MGCG) {
+ pp_support_state |= PP_STATE_SUPPORT_CG;
+ pp_state |= PP_STATE_CG;
+ }
+
+ if (state == AMD_CG_STATE_UNGATE)
+ pp_state = 0;
+
+ msg_id = PP_CG_MSG_ID(PP_GROUP_GFX,
+ PP_BLOCK_GFX_MG,
+ pp_support_state,
+ pp_state);
+ amd_set_clockgating_by_smu(pp_handle, msg_id);
+ }
+
+ if (adev->cg_flags & AMD_CG_SUPPORT_GFX_RLC_LS) {
+ pp_support_state = PP_STATE_SUPPORT_LS;
+
+ if (state == AMD_CG_STATE_UNGATE)
+ pp_state = 0;
+ else
+ pp_state = PP_STATE_LS;
+
+ msg_id = PP_CG_MSG_ID(PP_GROUP_GFX,
+ PP_BLOCK_GFX_RLC,
+ pp_support_state,
+ pp_state);
+ amd_set_clockgating_by_smu(pp_handle, msg_id);
+ }
+
+ if (adev->cg_flags & AMD_CG_SUPPORT_GFX_CP_LS) {
+ pp_support_state = PP_STATE_SUPPORT_LS;
+
+ if (state == AMD_CG_STATE_UNGATE)
+ pp_state = 0;
+ else
+ pp_state = PP_STATE_LS;
+ msg_id = PP_CG_MSG_ID(PP_GROUP_GFX,
+ PP_BLOCK_GFX_CP,
+ pp_support_state,
+ pp_state);
+ amd_set_clockgating_by_smu(pp_handle, msg_id);
+ }
+
+ return 0;
+}
+
static int gfx_v8_0_set_clockgating_state(void *handle,
enum amd_clockgating_state state)
{
@@ -5846,33 +5964,33 @@ static int gfx_v8_0_set_clockgating_state(void *handle,
gfx_v8_0_update_gfx_clock_gating(adev,
state == AMD_CG_STATE_GATE ? true : false);
break;
+ case CHIP_TONGA:
+ gfx_v8_0_tonga_update_gfx_clock_gating(adev, state);
+ break;
+ case CHIP_POLARIS10:
+ case CHIP_POLARIS11:
+ gfx_v8_0_polaris_update_gfx_clock_gating(adev, state);
+ break;
default:
break;
}
return 0;
}
-static u32 gfx_v8_0_ring_get_rptr_gfx(struct amdgpu_ring *ring)
+static u32 gfx_v8_0_ring_get_rptr(struct amdgpu_ring *ring)
{
- u32 rptr;
-
- rptr = ring->adev->wb.wb[ring->rptr_offs];
-
- return rptr;
+ return ring->adev->wb.wb[ring->rptr_offs];
}
static u32 gfx_v8_0_ring_get_wptr_gfx(struct amdgpu_ring *ring)
{
struct amdgpu_device *adev = ring->adev;
- u32 wptr;
if (ring->use_doorbell)
/* XXX check if swapping is necessary on BE */
- wptr = ring->adev->wb.wb[ring->wptr_offs];
+ return ring->adev->wb.wb[ring->wptr_offs];
else
- wptr = RREG32(mmCP_RB0_WPTR);
-
- return wptr;
+ return RREG32(mmCP_RB0_WPTR);
}
static void gfx_v8_0_ring_set_wptr_gfx(struct amdgpu_ring *ring)
@@ -5893,7 +6011,7 @@ static void gfx_v8_0_ring_emit_hdp_flush(struct amdgpu_ring *ring)
{
u32 ref_and_mask, reg_mem_engine;
- if (ring->type == AMDGPU_RING_TYPE_COMPUTE) {
+ if (ring->funcs->type == AMDGPU_RING_TYPE_COMPUTE) {
switch (ring->me) {
case 1:
ref_and_mask = GPU_HDP_FLUSH_DONE__CP2_MASK << ring->pipe;
@@ -5921,6 +6039,18 @@ static void gfx_v8_0_ring_emit_hdp_flush(struct amdgpu_ring *ring)
amdgpu_ring_write(ring, 0x20); /* poll interval */
}
+static void gfx_v8_0_ring_emit_vgt_flush(struct amdgpu_ring *ring)
+{
+ amdgpu_ring_write(ring, PACKET3(PACKET3_EVENT_WRITE, 0));
+ amdgpu_ring_write(ring, EVENT_TYPE(VS_PARTIAL_FLUSH) |
+ EVENT_INDEX(4));
+
+ amdgpu_ring_write(ring, PACKET3(PACKET3_EVENT_WRITE, 0));
+ amdgpu_ring_write(ring, EVENT_TYPE(VGT_FLUSH) |
+ EVENT_INDEX(0));
+}
+
+
static void gfx_v8_0_ring_emit_hdp_invalidate(struct amdgpu_ring *ring)
{
amdgpu_ring_write(ring, PACKET3(PACKET3_WRITE_DATA, 3));
@@ -5939,12 +6069,6 @@ static void gfx_v8_0_ring_emit_ib_gfx(struct amdgpu_ring *ring,
{
u32 header, control = 0;
- /* insert SWITCH_BUFFER packet before first IB in the ring frame */
- if (ctx_switch) {
- amdgpu_ring_write(ring, PACKET3(PACKET3_SWITCH_BUFFER, 0));
- amdgpu_ring_write(ring, 0);
- }
-
if (ib->flags & AMDGPU_IB_FLAG_CE)
header = PACKET3(PACKET3_INDIRECT_BUFFER_CONST, 2);
else
@@ -5971,9 +6095,9 @@ static void gfx_v8_0_ring_emit_ib_compute(struct amdgpu_ring *ring,
amdgpu_ring_write(ring, PACKET3(PACKET3_INDIRECT_BUFFER, 2));
amdgpu_ring_write(ring,
#ifdef __BIG_ENDIAN
- (2 << 0) |
+ (2 << 0) |
#endif
- (ib->gpu_addr & 0xFFFFFFFC));
+ (ib->gpu_addr & 0xFFFFFFFC));
amdgpu_ring_write(ring, upper_32_bits(ib->gpu_addr) & 0xFFFF);
amdgpu_ring_write(ring, control);
}
@@ -6001,7 +6125,7 @@ static void gfx_v8_0_ring_emit_fence_gfx(struct amdgpu_ring *ring, u64 addr,
static void gfx_v8_0_ring_emit_pipeline_sync(struct amdgpu_ring *ring)
{
- int usepfp = (ring->type == AMDGPU_RING_TYPE_GFX);
+ int usepfp = (ring->funcs->type == AMDGPU_RING_TYPE_GFX);
uint32_t seq = ring->fence_drv.sync_seq;
uint64_t addr = ring->fence_drv.gpu_addr;
@@ -6014,20 +6138,12 @@ static void gfx_v8_0_ring_emit_pipeline_sync(struct amdgpu_ring *ring)
amdgpu_ring_write(ring, seq);
amdgpu_ring_write(ring, 0xffffffff);
amdgpu_ring_write(ring, 4); /* poll interval */
-
- if (usepfp) {
- /* synce CE with ME to prevent CE fetch CEIB before context switch done */
- amdgpu_ring_write(ring, PACKET3(PACKET3_SWITCH_BUFFER, 0));
- amdgpu_ring_write(ring, 0);
- amdgpu_ring_write(ring, PACKET3(PACKET3_SWITCH_BUFFER, 0));
- amdgpu_ring_write(ring, 0);
- }
}
static void gfx_v8_0_ring_emit_vm_flush(struct amdgpu_ring *ring,
unsigned vm_id, uint64_t pd_addr)
{
- int usepfp = (ring->type == AMDGPU_RING_TYPE_GFX);
+ int usepfp = (ring->funcs->type == AMDGPU_RING_TYPE_GFX);
amdgpu_ring_write(ring, PACKET3(PACKET3_WRITE_DATA, 3));
amdgpu_ring_write(ring, (WRITE_DATA_ENGINE_SEL(usepfp) |
@@ -6068,18 +6184,11 @@ static void gfx_v8_0_ring_emit_vm_flush(struct amdgpu_ring *ring,
/* sync PFP to ME, otherwise we might get invalid PFP reads */
amdgpu_ring_write(ring, PACKET3(PACKET3_PFP_SYNC_ME, 0));
amdgpu_ring_write(ring, 0x0);
- amdgpu_ring_write(ring, PACKET3(PACKET3_SWITCH_BUFFER, 0));
- amdgpu_ring_write(ring, 0);
- amdgpu_ring_write(ring, PACKET3(PACKET3_SWITCH_BUFFER, 0));
- amdgpu_ring_write(ring, 0);
+ /* GFX8 emits 128 dw nop to prevent CE access VM before vm_flush finish */
+ amdgpu_ring_insert_nop(ring, 128);
}
}
-static u32 gfx_v8_0_ring_get_rptr_compute(struct amdgpu_ring *ring)
-{
- return ring->adev->wb.wb[ring->rptr_offs];
-}
-
static u32 gfx_v8_0_ring_get_wptr_compute(struct amdgpu_ring *ring)
{
return ring->adev->wb.wb[ring->wptr_offs];
@@ -6115,36 +6224,53 @@ static void gfx_v8_0_ring_emit_fence_compute(struct amdgpu_ring *ring,
amdgpu_ring_write(ring, upper_32_bits(seq));
}
-static void gfx_v8_0_set_gfx_eop_interrupt_state(struct amdgpu_device *adev,
- enum amdgpu_interrupt_state state)
+static void gfx_v8_ring_emit_sb(struct amdgpu_ring *ring)
{
- u32 cp_int_cntl;
+ amdgpu_ring_write(ring, PACKET3(PACKET3_SWITCH_BUFFER, 0));
+ amdgpu_ring_write(ring, 0);
+}
- switch (state) {
- case AMDGPU_IRQ_STATE_DISABLE:
- cp_int_cntl = RREG32(mmCP_INT_CNTL_RING0);
- cp_int_cntl = REG_SET_FIELD(cp_int_cntl, CP_INT_CNTL_RING0,
- TIME_STAMP_INT_ENABLE, 0);
- WREG32(mmCP_INT_CNTL_RING0, cp_int_cntl);
- break;
- case AMDGPU_IRQ_STATE_ENABLE:
- cp_int_cntl = RREG32(mmCP_INT_CNTL_RING0);
- cp_int_cntl =
- REG_SET_FIELD(cp_int_cntl, CP_INT_CNTL_RING0,
- TIME_STAMP_INT_ENABLE, 1);
- WREG32(mmCP_INT_CNTL_RING0, cp_int_cntl);
- break;
- default:
- break;
+static void gfx_v8_ring_emit_cntxcntl(struct amdgpu_ring *ring, uint32_t flags)
+{
+ uint32_t dw2 = 0;
+
+ dw2 |= 0x80000000; /* set load_enable otherwise this package is just NOPs */
+ if (flags & AMDGPU_HAVE_CTX_SWITCH) {
+ gfx_v8_0_ring_emit_vgt_flush(ring);
+ /* set load_global_config & load_global_uconfig */
+ dw2 |= 0x8001;
+ /* set load_cs_sh_regs */
+ dw2 |= 0x01000000;
+ /* set load_per_context_state & load_gfx_sh_regs for GFX */
+ dw2 |= 0x10002;
+
+ /* set load_ce_ram if preamble presented */
+ if (AMDGPU_PREAMBLE_IB_PRESENT & flags)
+ dw2 |= 0x10000000;
+ } else {
+ /* still load_ce_ram if this is the first time preamble presented
+ * although there is no context switch happens.
+ */
+ if (AMDGPU_PREAMBLE_IB_PRESENT_FIRST & flags)
+ dw2 |= 0x10000000;
}
+
+ amdgpu_ring_write(ring, PACKET3(PACKET3_CONTEXT_CONTROL, 1));
+ amdgpu_ring_write(ring, dw2);
+ amdgpu_ring_write(ring, 0);
+}
+
+static void gfx_v8_0_set_gfx_eop_interrupt_state(struct amdgpu_device *adev,
+ enum amdgpu_interrupt_state state)
+{
+ WREG32_FIELD(CP_INT_CNTL_RING0, TIME_STAMP_INT_ENABLE,
+ state == AMDGPU_IRQ_STATE_DISABLE ? 0 : 1);
}
static void gfx_v8_0_set_compute_eop_interrupt_state(struct amdgpu_device *adev,
int me, int pipe,
enum amdgpu_interrupt_state state)
{
- u32 mec_int_cntl, mec_int_cntl_reg;
-
/*
* amdgpu controls only pipe 0 of MEC1. That's why this function only
* handles the setting of interrupts for this specific pipe. All other
@@ -6154,7 +6280,6 @@ static void gfx_v8_0_set_compute_eop_interrupt_state(struct amdgpu_device *adev,
if (me == 1) {
switch (pipe) {
case 0:
- mec_int_cntl_reg = mmCP_ME1_PIPE0_INT_CNTL;
break;
default:
DRM_DEBUG("invalid pipe %d\n", pipe);
@@ -6165,22 +6290,8 @@ static void gfx_v8_0_set_compute_eop_interrupt_state(struct amdgpu_device *adev,
return;
}
- switch (state) {
- case AMDGPU_IRQ_STATE_DISABLE:
- mec_int_cntl = RREG32(mec_int_cntl_reg);
- mec_int_cntl = REG_SET_FIELD(mec_int_cntl, CP_ME1_PIPE0_INT_CNTL,
- TIME_STAMP_INT_ENABLE, 0);
- WREG32(mec_int_cntl_reg, mec_int_cntl);
- break;
- case AMDGPU_IRQ_STATE_ENABLE:
- mec_int_cntl = RREG32(mec_int_cntl_reg);
- mec_int_cntl = REG_SET_FIELD(mec_int_cntl, CP_ME1_PIPE0_INT_CNTL,
- TIME_STAMP_INT_ENABLE, 1);
- WREG32(mec_int_cntl_reg, mec_int_cntl);
- break;
- default:
- break;
- }
+ WREG32_FIELD(CP_ME1_PIPE0_INT_CNTL, TIME_STAMP_INT_ENABLE,
+ state == AMDGPU_IRQ_STATE_DISABLE ? 0 : 1);
}
static int gfx_v8_0_set_priv_reg_fault_state(struct amdgpu_device *adev,
@@ -6188,24 +6299,8 @@ static int gfx_v8_0_set_priv_reg_fault_state(struct amdgpu_device *adev,
unsigned type,
enum amdgpu_interrupt_state state)
{
- u32 cp_int_cntl;
-
- switch (state) {
- case AMDGPU_IRQ_STATE_DISABLE:
- cp_int_cntl = RREG32(mmCP_INT_CNTL_RING0);
- cp_int_cntl = REG_SET_FIELD(cp_int_cntl, CP_INT_CNTL_RING0,
- PRIV_REG_INT_ENABLE, 0);
- WREG32(mmCP_INT_CNTL_RING0, cp_int_cntl);
- break;
- case AMDGPU_IRQ_STATE_ENABLE:
- cp_int_cntl = RREG32(mmCP_INT_CNTL_RING0);
- cp_int_cntl = REG_SET_FIELD(cp_int_cntl, CP_INT_CNTL_RING0,
- PRIV_REG_INT_ENABLE, 1);
- WREG32(mmCP_INT_CNTL_RING0, cp_int_cntl);
- break;
- default:
- break;
- }
+ WREG32_FIELD(CP_INT_CNTL_RING0, PRIV_REG_INT_ENABLE,
+ state == AMDGPU_IRQ_STATE_DISABLE ? 0 : 1);
return 0;
}
@@ -6215,24 +6310,8 @@ static int gfx_v8_0_set_priv_inst_fault_state(struct amdgpu_device *adev,
unsigned type,
enum amdgpu_interrupt_state state)
{
- u32 cp_int_cntl;
-
- switch (state) {
- case AMDGPU_IRQ_STATE_DISABLE:
- cp_int_cntl = RREG32(mmCP_INT_CNTL_RING0);
- cp_int_cntl = REG_SET_FIELD(cp_int_cntl, CP_INT_CNTL_RING0,
- PRIV_INSTR_INT_ENABLE, 0);
- WREG32(mmCP_INT_CNTL_RING0, cp_int_cntl);
- break;
- case AMDGPU_IRQ_STATE_ENABLE:
- cp_int_cntl = RREG32(mmCP_INT_CNTL_RING0);
- cp_int_cntl = REG_SET_FIELD(cp_int_cntl, CP_INT_CNTL_RING0,
- PRIV_INSTR_INT_ENABLE, 1);
- WREG32(mmCP_INT_CNTL_RING0, cp_int_cntl);
- break;
- default:
- break;
- }
+ WREG32_FIELD(CP_INT_CNTL_RING0, PRIV_INSTR_INT_ENABLE,
+ state == AMDGPU_IRQ_STATE_DISABLE ? 0 : 1);
return 0;
}
@@ -6326,7 +6405,7 @@ static int gfx_v8_0_priv_inst_irq(struct amdgpu_device *adev,
return 0;
}
-const struct amd_ip_funcs gfx_v8_0_ip_funcs = {
+static const struct amd_ip_funcs gfx_v8_0_ip_funcs = {
.name = "gfx_v8_0",
.early_init = gfx_v8_0_early_init,
.late_init = gfx_v8_0_late_init,
@@ -6338,16 +6417,31 @@ const struct amd_ip_funcs gfx_v8_0_ip_funcs = {
.resume = gfx_v8_0_resume,
.is_idle = gfx_v8_0_is_idle,
.wait_for_idle = gfx_v8_0_wait_for_idle,
+ .check_soft_reset = gfx_v8_0_check_soft_reset,
+ .pre_soft_reset = gfx_v8_0_pre_soft_reset,
.soft_reset = gfx_v8_0_soft_reset,
+ .post_soft_reset = gfx_v8_0_post_soft_reset,
.set_clockgating_state = gfx_v8_0_set_clockgating_state,
.set_powergating_state = gfx_v8_0_set_powergating_state,
};
static const struct amdgpu_ring_funcs gfx_v8_0_ring_funcs_gfx = {
- .get_rptr = gfx_v8_0_ring_get_rptr_gfx,
+ .type = AMDGPU_RING_TYPE_GFX,
+ .align_mask = 0xff,
+ .nop = PACKET3(PACKET3_NOP, 0x3FFF),
+ .get_rptr = gfx_v8_0_ring_get_rptr,
.get_wptr = gfx_v8_0_ring_get_wptr_gfx,
.set_wptr = gfx_v8_0_ring_set_wptr_gfx,
- .parse_cs = NULL,
+ .emit_frame_size =
+ 20 + /* gfx_v8_0_ring_emit_gds_switch */
+ 7 + /* gfx_v8_0_ring_emit_hdp_flush */
+ 5 + /* gfx_v8_0_ring_emit_hdp_invalidate */
+ 6 + 6 + 6 +/* gfx_v8_0_ring_emit_fence_gfx x3 for user fence, vm fence */
+ 7 + /* gfx_v8_0_ring_emit_pipeline_sync */
+ 128 + 19 + /* gfx_v8_0_ring_emit_vm_flush */
+ 2 + /* gfx_v8_ring_emit_sb */
+ 3 + 4, /* gfx_v8_ring_emit_cntxcntl including vgt flush */
+ .emit_ib_size = 4, /* gfx_v8_0_ring_emit_ib_gfx */
.emit_ib = gfx_v8_0_ring_emit_ib_gfx,
.emit_fence = gfx_v8_0_ring_emit_fence_gfx,
.emit_pipeline_sync = gfx_v8_0_ring_emit_pipeline_sync,
@@ -6359,13 +6453,25 @@ static const struct amdgpu_ring_funcs gfx_v8_0_ring_funcs_gfx = {
.test_ib = gfx_v8_0_ring_test_ib,
.insert_nop = amdgpu_ring_insert_nop,
.pad_ib = amdgpu_ring_generic_pad_ib,
+ .emit_switch_buffer = gfx_v8_ring_emit_sb,
+ .emit_cntxcntl = gfx_v8_ring_emit_cntxcntl,
};
static const struct amdgpu_ring_funcs gfx_v8_0_ring_funcs_compute = {
- .get_rptr = gfx_v8_0_ring_get_rptr_compute,
+ .type = AMDGPU_RING_TYPE_COMPUTE,
+ .align_mask = 0xff,
+ .nop = PACKET3(PACKET3_NOP, 0x3FFF),
+ .get_rptr = gfx_v8_0_ring_get_rptr,
.get_wptr = gfx_v8_0_ring_get_wptr_compute,
.set_wptr = gfx_v8_0_ring_set_wptr_compute,
- .parse_cs = NULL,
+ .emit_frame_size =
+ 20 + /* gfx_v8_0_ring_emit_gds_switch */
+ 7 + /* gfx_v8_0_ring_emit_hdp_flush */
+ 5 + /* gfx_v8_0_ring_emit_hdp_invalidate */
+ 7 + /* gfx_v8_0_ring_emit_pipeline_sync */
+ 17 + /* gfx_v8_0_ring_emit_vm_flush */
+ 7 + 7 + 7, /* gfx_v8_0_ring_emit_fence_compute x3 for user fence, vm fence */
+ .emit_ib_size = 4, /* gfx_v8_0_ring_emit_ib_compute */
.emit_ib = gfx_v8_0_ring_emit_ib_compute,
.emit_fence = gfx_v8_0_ring_emit_fence_compute,
.emit_pipeline_sync = gfx_v8_0_ring_emit_pipeline_sync,
@@ -6479,15 +6585,12 @@ static u32 gfx_v8_0_get_cu_active_bitmap(struct amdgpu_device *adev)
{
u32 data, mask;
- data = RREG32(mmCC_GC_SHADER_ARRAY_CONFIG);
- data |= RREG32(mmGC_USER_SHADER_ARRAY_CONFIG);
-
- data &= CC_GC_SHADER_ARRAY_CONFIG__INACTIVE_CUS_MASK;
- data >>= CC_GC_SHADER_ARRAY_CONFIG__INACTIVE_CUS__SHIFT;
+ data = RREG32(mmCC_GC_SHADER_ARRAY_CONFIG) |
+ RREG32(mmGC_USER_SHADER_ARRAY_CONFIG);
mask = gfx_v8_0_create_bitmask(adev->gfx.config.max_cu_per_sh);
- return (~data) & mask;
+ return ~REG_GET_FIELD(data, CC_GC_SHADER_ARRAY_CONFIG, INACTIVE_CUS) & mask;
}
static void gfx_v8_0_get_cu_info(struct amdgpu_device *adev)
@@ -6532,3 +6635,21 @@ static void gfx_v8_0_get_cu_info(struct amdgpu_device *adev)
cu_info->number = active_cu_number;
cu_info->ao_cu_mask = ao_cu_mask;
}
+
+const struct amdgpu_ip_block_version gfx_v8_0_ip_block =
+{
+ .type = AMD_IP_BLOCK_TYPE_GFX,
+ .major = 8,
+ .minor = 0,
+ .rev = 0,
+ .funcs = &gfx_v8_0_ip_funcs,
+};
+
+const struct amdgpu_ip_block_version gfx_v8_1_ip_block =
+{
+ .type = AMD_IP_BLOCK_TYPE_GFX,
+ .major = 8,
+ .minor = 1,
+ .rev = 0,
+ .funcs = &gfx_v8_0_ip_funcs,
+};