diff options
Diffstat (limited to 'drivers/gpu/drm/amd/amdgpu/umc_v6_1.c')
-rw-r--r-- | drivers/gpu/drm/amd/amdgpu/umc_v6_1.c | 163 |
1 files changed, 82 insertions, 81 deletions
diff --git a/drivers/gpu/drm/amd/amdgpu/umc_v6_1.c b/drivers/gpu/drm/amd/amdgpu/umc_v6_1.c index 515eb50cd0f8..11e924dd88ff 100644 --- a/drivers/gpu/drm/amd/amdgpu/umc_v6_1.c +++ b/drivers/gpu/drm/amd/amdgpu/umc_v6_1.c @@ -28,27 +28,26 @@ #include "rsmu/rsmu_0_0_2_sh_mask.h" #include "umc/umc_6_1_1_offset.h" #include "umc/umc_6_1_1_sh_mask.h" +#include "umc/umc_6_1_2_offset.h" #define smnMCA_UMC0_MCUMC_ADDRT0 0x50f10 -/* UMC 6_1_2 register offsets */ -#define mmUMCCH0_0_EccErrCntSel_ARCT 0x0360 -#define mmUMCCH0_0_EccErrCntSel_ARCT_BASE_IDX 1 -#define mmUMCCH0_0_EccErrCnt_ARCT 0x0361 -#define mmUMCCH0_0_EccErrCnt_ARCT_BASE_IDX 1 -#define mmMCA_UMC_UMC0_MCUMC_STATUST0_ARCT 0x03c2 -#define mmMCA_UMC_UMC0_MCUMC_STATUST0_ARCT_BASE_IDX 1 +#define UMC_6_INST_DIST 0x40000 /* * (addr / 256) * 8192, the higher 26 bits in ErrorAddr * is the index of 8KB block */ -#define ADDR_OF_8KB_BLOCK(addr) (((addr) & ~0xffULL) << 5) +#define ADDR_OF_8KB_BLOCK(addr) (((addr) & ~0xffULL) << 5) /* channel index is the index of 256B block */ #define ADDR_OF_256B_BLOCK(channel_index) ((channel_index) << 8) /* offset in 256B block */ #define OFFSET_IN_256B_BLOCK(addr) ((addr) & 0xffULL) +#define LOOP_UMC_INST(umc_inst) for ((umc_inst) = 0; (umc_inst) < adev->umc.umc_inst_num; (umc_inst)++) +#define LOOP_UMC_CH_INST(ch_inst) for ((ch_inst) = 0; (ch_inst) < adev->umc.channel_inst_num; (ch_inst)++) +#define LOOP_UMC_INST_AND_CH(umc_inst, ch_inst) LOOP_UMC_INST((umc_inst)) LOOP_UMC_CH_INST((ch_inst)) + const uint32_t umc_v6_1_channel_idx_tbl[UMC_V6_1_UMC_INSTANCE_NUM][UMC_V6_1_CHANNEL_INSTANCE_NUM] = { {2, 18, 11, 27}, {4, 20, 13, 29}, @@ -57,41 +56,17 @@ const uint32_t {9, 25, 0, 16}, {15, 31, 6, 22} }; -static void umc_v6_1_enable_umc_index_mode(struct amdgpu_device *adev, - uint32_t umc_instance) -{ - uint32_t rsmu_umc_index; - - rsmu_umc_index = RREG32_SOC15(RSMU, 0, - mmRSMU_UMC_INDEX_REGISTER_NBIF_VG20_GPU); - rsmu_umc_index = REG_SET_FIELD(rsmu_umc_index, - RSMU_UMC_INDEX_REGISTER_NBIF_VG20_GPU, - RSMU_UMC_INDEX_MODE_EN, 1); - rsmu_umc_index = REG_SET_FIELD(rsmu_umc_index, - RSMU_UMC_INDEX_REGISTER_NBIF_VG20_GPU, - RSMU_UMC_INDEX_INSTANCE, umc_instance); - rsmu_umc_index = REG_SET_FIELD(rsmu_umc_index, - RSMU_UMC_INDEX_REGISTER_NBIF_VG20_GPU, - RSMU_UMC_INDEX_WREN, 1 << umc_instance); - WREG32_SOC15(RSMU, 0, mmRSMU_UMC_INDEX_REGISTER_NBIF_VG20_GPU, - rsmu_umc_index); -} - static void umc_v6_1_disable_umc_index_mode(struct amdgpu_device *adev) { WREG32_FIELD15(RSMU, 0, RSMU_UMC_INDEX_REGISTER_NBIF_VG20_GPU, RSMU_UMC_INDEX_MODE_EN, 0); } -static uint32_t umc_v6_1_get_umc_inst(struct amdgpu_device *adev) +static inline uint32_t get_umc_6_reg_offset(struct amdgpu_device *adev, + uint32_t umc_inst, + uint32_t ch_inst) { - uint32_t rsmu_umc_index; - - rsmu_umc_index = RREG32_SOC15(RSMU, 0, - mmRSMU_UMC_INDEX_REGISTER_NBIF_VG20_GPU); - return REG_GET_FIELD(rsmu_umc_index, - RSMU_UMC_INDEX_REGISTER_NBIF_VG20_GPU, - RSMU_UMC_INDEX_INSTANCE); + return adev->umc.channel_offs*ch_inst + UMC_6_INST_DIST*umc_inst; } static void umc_v6_1_query_correctable_error_count(struct amdgpu_device *adev, @@ -105,7 +80,6 @@ static void umc_v6_1_query_correctable_error_count(struct amdgpu_device *adev, if (adev->asic_type == CHIP_ARCTURUS) { /* UMC 6_1_2 registers */ - ecc_err_cnt_sel_addr = SOC15_REG_OFFSET(UMC, 0, mmUMCCH0_0_EccErrCntSel_ARCT); ecc_err_cnt_addr = @@ -114,7 +88,6 @@ static void umc_v6_1_query_correctable_error_count(struct amdgpu_device *adev, SOC15_REG_OFFSET(UMC, 0, mmMCA_UMC_UMC0_MCUMC_STATUST0_ARCT); } else { /* UMC 6_1_1 registers */ - ecc_err_cnt_sel_addr = SOC15_REG_OFFSET(UMC, 0, mmUMCCH0_0_EccErrCntSel); ecc_err_cnt_addr = @@ -124,31 +97,31 @@ static void umc_v6_1_query_correctable_error_count(struct amdgpu_device *adev, } /* select the lower chip and check the error count */ - ecc_err_cnt_sel = RREG32(ecc_err_cnt_sel_addr + umc_reg_offset); + ecc_err_cnt_sel = RREG32_PCIE((ecc_err_cnt_sel_addr + umc_reg_offset) * 4); ecc_err_cnt_sel = REG_SET_FIELD(ecc_err_cnt_sel, UMCCH0_0_EccErrCntSel, EccErrCntCsSel, 0); - WREG32(ecc_err_cnt_sel_addr + umc_reg_offset, ecc_err_cnt_sel); - ecc_err_cnt = RREG32(ecc_err_cnt_addr + umc_reg_offset); + WREG32_PCIE((ecc_err_cnt_sel_addr + umc_reg_offset) * 4, ecc_err_cnt_sel); + ecc_err_cnt = RREG32_PCIE((ecc_err_cnt_addr + umc_reg_offset) * 4); *error_count += (REG_GET_FIELD(ecc_err_cnt, UMCCH0_0_EccErrCnt, EccErrCnt) - UMC_V6_1_CE_CNT_INIT); /* clear the lower chip err count */ - WREG32(ecc_err_cnt_addr + umc_reg_offset, UMC_V6_1_CE_CNT_INIT); + WREG32_PCIE((ecc_err_cnt_addr + umc_reg_offset) * 4, UMC_V6_1_CE_CNT_INIT); /* select the higher chip and check the err counter */ ecc_err_cnt_sel = REG_SET_FIELD(ecc_err_cnt_sel, UMCCH0_0_EccErrCntSel, EccErrCntCsSel, 1); - WREG32(ecc_err_cnt_sel_addr + umc_reg_offset, ecc_err_cnt_sel); - ecc_err_cnt = RREG32(ecc_err_cnt_addr + umc_reg_offset); + WREG32_PCIE((ecc_err_cnt_sel_addr + umc_reg_offset) * 4, ecc_err_cnt_sel); + ecc_err_cnt = RREG32_PCIE((ecc_err_cnt_addr + umc_reg_offset) * 4); *error_count += (REG_GET_FIELD(ecc_err_cnt, UMCCH0_0_EccErrCnt, EccErrCnt) - UMC_V6_1_CE_CNT_INIT); /* clear the higher chip err count */ - WREG32(ecc_err_cnt_addr + umc_reg_offset, UMC_V6_1_CE_CNT_INIT); + WREG32_PCIE((ecc_err_cnt_addr + umc_reg_offset) * 4, UMC_V6_1_CE_CNT_INIT); /* check for SRAM correctable error MCUMC_STATUS is a 64 bit register */ - mc_umc_status = RREG64_UMC(mc_umc_status_addr + umc_reg_offset); + mc_umc_status = RREG64_PCIE((mc_umc_status_addr + umc_reg_offset) * 4); if (REG_GET_FIELD(mc_umc_status, MCA_UMC_UMC0_MCUMC_STATUST0, ErrorCodeExt) == 6 && REG_GET_FIELD(mc_umc_status, MCA_UMC_UMC0_MCUMC_STATUST0, Val) == 1 && REG_GET_FIELD(mc_umc_status, MCA_UMC_UMC0_MCUMC_STATUST0, CECC) == 1) @@ -164,18 +137,16 @@ static void umc_v6_1_querry_uncorrectable_error_count(struct amdgpu_device *adev if (adev->asic_type == CHIP_ARCTURUS) { /* UMC 6_1_2 registers */ - mc_umc_status_addr = SOC15_REG_OFFSET(UMC, 0, mmMCA_UMC_UMC0_MCUMC_STATUST0_ARCT); } else { /* UMC 6_1_1 registers */ - mc_umc_status_addr = SOC15_REG_OFFSET(UMC, 0, mmMCA_UMC_UMC0_MCUMC_STATUST0); } /* check the MCUMC_STATUS */ - mc_umc_status = RREG64_UMC(mc_umc_status_addr + umc_reg_offset); + mc_umc_status = RREG64_PCIE((mc_umc_status_addr + umc_reg_offset) * 4); if ((REG_GET_FIELD(mc_umc_status, MCA_UMC_UMC0_MCUMC_STATUST0, Val) == 1) && (REG_GET_FIELD(mc_umc_status, MCA_UMC_UMC0_MCUMC_STATUST0, Deferred) == 1 || REG_GET_FIELD(mc_umc_status, MCA_UMC_UMC0_MCUMC_STATUST0, UECC) == 1 || @@ -185,38 +156,46 @@ static void umc_v6_1_querry_uncorrectable_error_count(struct amdgpu_device *adev *error_count += 1; } -static void umc_v6_1_query_error_count(struct amdgpu_device *adev, - struct ras_err_data *err_data, uint32_t umc_reg_offset, - uint32_t channel_index) -{ - umc_v6_1_query_correctable_error_count(adev, umc_reg_offset, - &(err_data->ce_count)); - umc_v6_1_querry_uncorrectable_error_count(adev, umc_reg_offset, - &(err_data->ue_count)); -} - static void umc_v6_1_query_ras_error_count(struct amdgpu_device *adev, void *ras_error_status) { - amdgpu_umc_for_each_channel(umc_v6_1_query_error_count); + struct ras_err_data* err_data = (struct ras_err_data*)ras_error_status; + + uint32_t umc_inst = 0; + uint32_t ch_inst = 0; + uint32_t umc_reg_offset = 0; + + LOOP_UMC_INST_AND_CH(umc_inst, ch_inst) { + umc_reg_offset = get_umc_6_reg_offset(adev, + umc_inst, + ch_inst); + + umc_v6_1_query_correctable_error_count(adev, + umc_reg_offset, + &(err_data->ce_count)); + umc_v6_1_querry_uncorrectable_error_count(adev, + umc_reg_offset, + &(err_data->ue_count)); + } } static void umc_v6_1_query_error_address(struct amdgpu_device *adev, struct ras_err_data *err_data, - uint32_t umc_reg_offset, uint32_t channel_index) + uint32_t umc_reg_offset, + uint32_t ch_inst, + uint32_t umc_inst) { uint32_t lsb, mc_umc_status_addr; uint64_t mc_umc_status, err_addr, retired_page; struct eeprom_table_record *err_rec; + uint32_t channel_index = adev->umc.channel_idx_tbl[umc_inst * adev->umc.channel_inst_num + ch_inst]; if (adev->asic_type == CHIP_ARCTURUS) { /* UMC 6_1_2 registers */ - mc_umc_status_addr = SOC15_REG_OFFSET(UMC, 0, mmMCA_UMC_UMC0_MCUMC_STATUST0_ARCT); } else { /* UMC 6_1_1 registers */ - mc_umc_status_addr = SOC15_REG_OFFSET(UMC, 0, mmMCA_UMC_UMC0_MCUMC_STATUST0); } @@ -224,12 +203,12 @@ static void umc_v6_1_query_error_address(struct amdgpu_device *adev, /* skip error address process if -ENOMEM */ if (!err_data->err_addr) { /* clear umc status */ - WREG64_UMC(mc_umc_status_addr + umc_reg_offset, 0x0ULL); + WREG64_PCIE((mc_umc_status_addr + umc_reg_offset) * 4, 0x0ULL); return; } err_rec = &err_data->err_addr[err_data->err_addr_cnt]; - mc_umc_status = RREG64_UMC(mc_umc_status_addr + umc_reg_offset); + mc_umc_status = RREG64_PCIE((mc_umc_status_addr + umc_reg_offset) * 4); /* calculate error address if ue/ce error is detected */ if (REG_GET_FIELD(mc_umc_status, MCA_UMC_UMC0_MCUMC_STATUST0, Val) == 1 && @@ -257,39 +236,53 @@ static void umc_v6_1_query_error_address(struct amdgpu_device *adev, err_rec->err_type = AMDGPU_RAS_EEPROM_ERR_NON_RECOVERABLE; err_rec->cu = 0; err_rec->mem_channel = channel_index; - err_rec->mcumc_id = umc_v6_1_get_umc_inst(adev); + err_rec->mcumc_id = umc_inst; err_data->err_addr_cnt++; } } /* clear umc status */ - WREG64_UMC(mc_umc_status_addr + umc_reg_offset, 0x0ULL); + WREG64_PCIE((mc_umc_status_addr + umc_reg_offset) * 4, 0x0ULL); } static void umc_v6_1_query_ras_error_address(struct amdgpu_device *adev, void *ras_error_status) { - amdgpu_umc_for_each_channel(umc_v6_1_query_error_address); + struct ras_err_data* err_data = (struct ras_err_data*)ras_error_status; + + uint32_t umc_inst = 0; + uint32_t ch_inst = 0; + uint32_t umc_reg_offset = 0; + + LOOP_UMC_INST_AND_CH(umc_inst, ch_inst) { + umc_reg_offset = get_umc_6_reg_offset(adev, + umc_inst, + ch_inst); + + umc_v6_1_query_error_address(adev, + err_data, + umc_reg_offset, + ch_inst, + umc_inst); + } + } static void umc_v6_1_err_cnt_init_per_channel(struct amdgpu_device *adev, - struct ras_err_data *err_data, - uint32_t umc_reg_offset, uint32_t channel_index) + uint32_t umc_reg_offset) { uint32_t ecc_err_cnt_sel, ecc_err_cnt_sel_addr; uint32_t ecc_err_cnt_addr; if (adev->asic_type == CHIP_ARCTURUS) { /* UMC 6_1_2 registers */ - ecc_err_cnt_sel_addr = SOC15_REG_OFFSET(UMC, 0, mmUMCCH0_0_EccErrCntSel_ARCT); ecc_err_cnt_addr = SOC15_REG_OFFSET(UMC, 0, mmUMCCH0_0_EccErrCnt_ARCT); } else { /* UMC 6_1_1 registers */ - ecc_err_cnt_sel_addr = SOC15_REG_OFFSET(UMC, 0, mmUMCCH0_0_EccErrCntSel); ecc_err_cnt_addr = @@ -297,28 +290,38 @@ static void umc_v6_1_err_cnt_init_per_channel(struct amdgpu_device *adev, } /* select the lower chip and check the error count */ - ecc_err_cnt_sel = RREG32(ecc_err_cnt_sel_addr + umc_reg_offset); + ecc_err_cnt_sel = RREG32_PCIE((ecc_err_cnt_sel_addr + umc_reg_offset) * 4); ecc_err_cnt_sel = REG_SET_FIELD(ecc_err_cnt_sel, UMCCH0_0_EccErrCntSel, EccErrCntCsSel, 0); /* set ce error interrupt type to APIC based interrupt */ ecc_err_cnt_sel = REG_SET_FIELD(ecc_err_cnt_sel, UMCCH0_0_EccErrCntSel, EccErrInt, 0x1); - WREG32(ecc_err_cnt_sel_addr + umc_reg_offset, ecc_err_cnt_sel); + WREG32_PCIE((ecc_err_cnt_sel_addr + umc_reg_offset) * 4, ecc_err_cnt_sel); /* set error count to initial value */ - WREG32(ecc_err_cnt_addr + umc_reg_offset, UMC_V6_1_CE_CNT_INIT); + WREG32_PCIE((ecc_err_cnt_addr + umc_reg_offset) * 4, UMC_V6_1_CE_CNT_INIT); /* select the higher chip and check the err counter */ ecc_err_cnt_sel = REG_SET_FIELD(ecc_err_cnt_sel, UMCCH0_0_EccErrCntSel, EccErrCntCsSel, 1); - WREG32(ecc_err_cnt_sel_addr + umc_reg_offset, ecc_err_cnt_sel); - WREG32(ecc_err_cnt_addr + umc_reg_offset, UMC_V6_1_CE_CNT_INIT); + WREG32_PCIE((ecc_err_cnt_sel_addr + umc_reg_offset) * 4, ecc_err_cnt_sel); + WREG32_PCIE((ecc_err_cnt_addr + umc_reg_offset) * 4, UMC_V6_1_CE_CNT_INIT); } static void umc_v6_1_err_cnt_init(struct amdgpu_device *adev) { - void *ras_error_status = NULL; + uint32_t umc_inst = 0; + uint32_t ch_inst = 0; + uint32_t umc_reg_offset = 0; + + umc_v6_1_disable_umc_index_mode(adev); - amdgpu_umc_for_each_channel(umc_v6_1_err_cnt_init_per_channel); + LOOP_UMC_INST_AND_CH(umc_inst, ch_inst) { + umc_reg_offset = get_umc_6_reg_offset(adev, + umc_inst, + ch_inst); + + umc_v6_1_err_cnt_init_per_channel(adev, umc_reg_offset); + } } const struct amdgpu_umc_funcs umc_v6_1_funcs = { @@ -326,6 +329,4 @@ const struct amdgpu_umc_funcs umc_v6_1_funcs = { .ras_late_init = amdgpu_umc_ras_late_init, .query_ras_error_count = umc_v6_1_query_ras_error_count, .query_ras_error_address = umc_v6_1_query_ras_error_address, - .enable_umc_index_mode = umc_v6_1_enable_umc_index_mode, - .disable_umc_index_mode = umc_v6_1_disable_umc_index_mode, }; |