diff options
Diffstat (limited to 'drivers/gpu/drm/amd/amdgpu')
90 files changed, 6866 insertions, 1267 deletions
diff --git a/drivers/gpu/drm/amd/amdgpu/Makefile b/drivers/gpu/drm/amd/amdgpu/Makefile index 56e084367b93..8afa0bceb460 100644 --- a/drivers/gpu/drm/amd/amdgpu/Makefile +++ b/drivers/gpu/drm/amd/amdgpu/Makefile @@ -66,7 +66,8 @@ amdgpu-$(CONFIG_DRM_AMDGPU_SI)+= si.o gmc_v6_0.o gfx_v6_0.o si_ih.o si_dma.o dce amdgpu-y += \ vi.o mxgpu_vi.o nbio_v6_1.o soc15.o emu_soc.o mxgpu_ai.o nbio_v7_0.o vega10_reg_init.o \ - vega20_reg_init.o nbio_v7_4.o nbio_v2_3.o nv.o navi10_reg_init.o + vega20_reg_init.o nbio_v7_4.o nbio_v2_3.o nv.o navi10_reg_init.o navi14_reg_init.o \ + arct_reg_init.o navi12_reg_init.o # add DF block amdgpu-y += \ @@ -77,9 +78,13 @@ amdgpu-y += \ amdgpu-y += \ gmc_v7_0.o \ gmc_v8_0.o \ - gfxhub_v1_0.o mmhub_v1_0.o gmc_v9_0.o gfxhub_v1_1.o \ + gfxhub_v1_0.o mmhub_v1_0.o gmc_v9_0.o gfxhub_v1_1.o mmhub_v9_4.o \ gfxhub_v2_0.o mmhub_v2_0.o gmc_v10_0.o +# add UMC block +amdgpu-y += \ + umc_v6_1.o + # add IH block amdgpu-y += \ amdgpu_irq.o \ @@ -144,7 +149,8 @@ amdgpu-y += \ amdgpu-y += \ amdgpu_vcn.o \ vcn_v1_0.o \ - vcn_v2_0.o + vcn_v2_0.o \ + vcn_v2_5.o # add ATHUB block amdgpu-y += \ @@ -162,6 +168,7 @@ amdgpu-y += \ amdgpu_amdkfd_gpuvm.o \ amdgpu_amdkfd_gfx_v8.o \ amdgpu_amdkfd_gfx_v9.o \ + amdgpu_amdkfd_arcturus.o \ amdgpu_amdkfd_gfx_v10.o ifneq ($(CONFIG_DRM_AMDGPU_CIK),) diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu.h b/drivers/gpu/drm/amd/amdgpu/amdgpu.h index 8199d201b43a..f85e7174babb 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu.h +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu.h @@ -86,6 +86,7 @@ #include "amdgpu_smu.h" #include "amdgpu_discovery.h" #include "amdgpu_mes.h" +#include "amdgpu_umc.h" #define MAX_GPU_INSTANCE 16 @@ -532,6 +533,14 @@ struct amdgpu_allowed_register_entry { bool grbm_indexed; }; +enum amd_reset_method { + AMD_RESET_METHOD_LEGACY = 0, + AMD_RESET_METHOD_MODE0, + AMD_RESET_METHOD_MODE1, + AMD_RESET_METHOD_MODE2, + AMD_RESET_METHOD_BACO +}; + /* * ASIC specific functions. */ @@ -543,6 +552,7 @@ struct amdgpu_asic_funcs { u32 sh_num, u32 reg_offset, u32 *value); void (*set_vga_state)(struct amdgpu_device *adev, bool state); int (*reset)(struct amdgpu_device *adev); + enum amd_reset_method (*reset_method)(struct amdgpu_device *adev); /* get the reference clock */ u32 (*get_xclk)(struct amdgpu_device *adev); /* MM block clocks */ @@ -627,6 +637,9 @@ void amdgpu_cgs_destroy_device(struct cgs_device *cgs_device); typedef uint32_t (*amdgpu_rreg_t)(struct amdgpu_device*, uint32_t); typedef void (*amdgpu_wreg_t)(struct amdgpu_device*, uint32_t, uint32_t); +typedef uint64_t (*amdgpu_rreg64_t)(struct amdgpu_device*, uint32_t); +typedef void (*amdgpu_wreg64_t)(struct amdgpu_device*, uint32_t, uint64_t); + typedef uint32_t (*amdgpu_block_rreg_t)(struct amdgpu_device*, uint32_t, uint32_t); typedef void (*amdgpu_block_wreg_t)(struct amdgpu_device*, uint32_t, uint32_t, uint32_t); @@ -648,6 +661,12 @@ struct nbio_hdp_flush_reg { u32 ref_and_mask_cp9; u32 ref_and_mask_sdma0; u32 ref_and_mask_sdma1; + u32 ref_and_mask_sdma2; + u32 ref_and_mask_sdma3; + u32 ref_and_mask_sdma4; + u32 ref_and_mask_sdma5; + u32 ref_and_mask_sdma6; + u32 ref_and_mask_sdma7; }; struct amdgpu_mmio_remap { @@ -668,7 +687,7 @@ struct amdgpu_nbio_funcs { void (*sdma_doorbell_range)(struct amdgpu_device *adev, int instance, bool use_doorbell, int doorbell_index, int doorbell_size); void (*vcn_doorbell_range)(struct amdgpu_device *adev, bool use_doorbell, - int doorbell_index); + int doorbell_index, int instance); void (*enable_doorbell_aperture)(struct amdgpu_device *adev, bool enable); void (*enable_doorbell_selfring_aperture)(struct amdgpu_device *adev, @@ -705,6 +724,9 @@ struct amdgpu_df_funcs { int is_disable); void (*pmc_get_count)(struct amdgpu_device *adev, uint64_t config, uint64_t *count); + uint64_t (*get_fica)(struct amdgpu_device *adev, uint32_t ficaa_val); + void (*set_fica)(struct amdgpu_device *adev, uint32_t ficaa_val, + uint32_t ficadl_val, uint32_t ficadh_val); }; /* Define the HW IP blocks will be used in driver , add more if necessary */ enum amd_hw_ip_block_type { @@ -712,6 +734,12 @@ enum amd_hw_ip_block_type { HDP_HWIP, SDMA0_HWIP, SDMA1_HWIP, + SDMA2_HWIP, + SDMA3_HWIP, + SDMA4_HWIP, + SDMA5_HWIP, + SDMA6_HWIP, + SDMA7_HWIP, MMHUB_HWIP, ATHUB_HWIP, NBIO_HWIP, @@ -728,10 +756,12 @@ enum amd_hw_ip_block_type { NBIF_HWIP, THM_HWIP, CLK_HWIP, + UMC_HWIP, + RSMU_HWIP, MAX_HWIP }; -#define HWIP_MAX_INSTANCE 6 +#define HWIP_MAX_INSTANCE 8 struct amd_powerplay { void *pp_handle; @@ -803,6 +833,8 @@ struct amdgpu_device { amdgpu_wreg_t pcie_wreg; amdgpu_rreg_t pciep_rreg; amdgpu_wreg_t pciep_wreg; + amdgpu_rreg64_t pcie_rreg64; + amdgpu_wreg64_t pcie_wreg64; /* protects concurrent UVD register access */ spinlock_t uvd_ctx_idx_lock; amdgpu_rreg_t uvd_ctx_rreg; @@ -836,6 +868,7 @@ struct amdgpu_device { dma_addr_t dummy_page_addr; struct amdgpu_vm_manager vm_manager; struct amdgpu_vmhub vmhub[AMDGPU_MAX_VMHUBS]; + unsigned num_vmhubs; /* memory management */ struct amdgpu_mman mman; @@ -915,6 +948,9 @@ struct amdgpu_device { /* KFD */ struct amdgpu_kfd_dev kfd; + /* UMC */ + struct amdgpu_umc umc; + /* display related functionality */ struct amdgpu_display_manager dm; @@ -965,6 +1001,7 @@ struct amdgpu_device { /* record last mm index being written through WREG32*/ unsigned long last_mm_index; bool in_gpu_reset; + enum pp_mp1_state mp1_state; struct mutex lock_reset; struct amdgpu_doorbell_index doorbell_index; @@ -1033,6 +1070,8 @@ int emu_soc_asic_init(struct amdgpu_device *adev); #define WREG32_PCIE(reg, v) adev->pcie_wreg(adev, (reg), (v)) #define RREG32_PCIE_PORT(reg) adev->pciep_rreg(adev, (reg)) #define WREG32_PCIE_PORT(reg, v) adev->pciep_wreg(adev, (reg), (v)) +#define RREG64_PCIE(reg) adev->pcie_rreg64(adev, (reg)) +#define WREG64_PCIE(reg, v) adev->pcie_wreg64(adev, (reg), (v)) #define RREG32_SMC(reg) adev->smc_rreg(adev, (reg)) #define WREG32_SMC(reg, v) adev->smc_wreg(adev, (reg), (v)) #define RREG32_UVD_CTX(reg) adev->uvd_ctx_rreg(adev, (reg)) @@ -1093,6 +1132,7 @@ int emu_soc_asic_init(struct amdgpu_device *adev); */ #define amdgpu_asic_set_vga_state(adev, state) (adev)->asic_funcs->set_vga_state((adev), (state)) #define amdgpu_asic_reset(adev) (adev)->asic_funcs->reset((adev)) +#define amdgpu_asic_reset_method(adev) (adev)->asic_funcs->reset_method((adev)) #define amdgpu_asic_get_xclk(adev) (adev)->asic_funcs->get_xclk((adev)) #define amdgpu_asic_set_uvd_clocks(adev, v, d) (adev)->asic_funcs->set_uvd_clocks((adev), (v), (d)) #define amdgpu_asic_set_vce_clocks(adev, ev, ec) (adev)->asic_funcs->set_vce_clocks((adev), (ev), (ec)) diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd.c index 9fa4f25a3745..07eb29885372 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd.c @@ -87,7 +87,12 @@ void amdgpu_amdkfd_device_probe(struct amdgpu_device *adev) case CHIP_RAVEN: kfd2kgd = amdgpu_amdkfd_gfx_9_0_get_functions(); break; + case CHIP_ARCTURUS: + kfd2kgd = amdgpu_amdkfd_arcturus_get_functions(); + break; case CHIP_NAVI10: + case CHIP_NAVI14: + case CHIP_NAVI12: kfd2kgd = amdgpu_amdkfd_gfx_10_0_get_functions(); break; default: @@ -651,8 +656,12 @@ void amdgpu_amdkfd_set_compute_idle(struct kgd_dev *kgd, bool idle) { struct amdgpu_device *adev = (struct amdgpu_device *)kgd; - if (adev->powerplay.pp_funcs && - adev->powerplay.pp_funcs->switch_power_profile) + if (is_support_sw_smu(adev)) + smu_switch_power_profile(&adev->smu, + PP_SMC_POWER_PROFILE_COMPUTE, + !idle); + else if (adev->powerplay.pp_funcs && + adev->powerplay.pp_funcs->switch_power_profile) amdgpu_dpm_switch_power_profile(adev, PP_SMC_POWER_PROFILE_COMPUTE, !idle); @@ -715,6 +724,11 @@ struct kfd2kgd_calls *amdgpu_amdkfd_gfx_9_0_get_functions(void) return NULL; } +struct kfd2kgd_calls *amdgpu_amdkfd_arcturus_get_functions(void) +{ + return NULL; +} + struct kfd2kgd_calls *amdgpu_amdkfd_gfx_10_0_get_functions(void) { return NULL; diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd.h b/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd.h index b6076d19e442..e519df3fd2b6 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd.h +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd.h @@ -140,6 +140,7 @@ bool amdgpu_amdkfd_have_atomics_support(struct kgd_dev *kgd); struct kfd2kgd_calls *amdgpu_amdkfd_gfx_7_get_functions(void); struct kfd2kgd_calls *amdgpu_amdkfd_gfx_8_0_get_functions(void); struct kfd2kgd_calls *amdgpu_amdkfd_gfx_9_0_get_functions(void); +struct kfd2kgd_calls *amdgpu_amdkfd_arcturus_get_functions(void); struct kfd2kgd_calls *amdgpu_amdkfd_gfx_10_0_get_functions(void); bool amdgpu_amdkfd_is_kfd_vmid(struct amdgpu_device *adev, u32 vmid); diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_arcturus.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_arcturus.c new file mode 100644 index 000000000000..c79aaebeeaf0 --- /dev/null +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_arcturus.c @@ -0,0 +1,323 @@ +/* + * Copyright 2019 Advanced Micro Devices, Inc. + * + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and associated documentation files (the "Software"), + * to deal in the Software without restriction, including without limitation + * the rights to use, copy, modify, merge, publish, distribute, sublicense, + * and/or sell copies of the Software, and to permit persons to whom the + * Software is furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included in + * all copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL + * THE COPYRIGHT HOLDER(S) OR AUTHOR(S) BE LIABLE FOR ANY CLAIM, DAMAGES OR + * OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, + * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR + * OTHER DEALINGS IN THE SOFTWARE. + */ + +#undef pr_fmt +#define pr_fmt(fmt) "kfd2kgd: " fmt + +#include <linux/module.h> +#include <linux/fdtable.h> +#include <linux/uaccess.h> +#include <linux/mmu_context.h> +#include <linux/firmware.h> +#include "amdgpu.h" +#include "amdgpu_amdkfd.h" +#include "sdma0/sdma0_4_2_2_offset.h" +#include "sdma0/sdma0_4_2_2_sh_mask.h" +#include "sdma1/sdma1_4_2_2_offset.h" +#include "sdma1/sdma1_4_2_2_sh_mask.h" +#include "sdma2/sdma2_4_2_2_offset.h" +#include "sdma2/sdma2_4_2_2_sh_mask.h" +#include "sdma3/sdma3_4_2_2_offset.h" +#include "sdma3/sdma3_4_2_2_sh_mask.h" +#include "sdma4/sdma4_4_2_2_offset.h" +#include "sdma4/sdma4_4_2_2_sh_mask.h" +#include "sdma5/sdma5_4_2_2_offset.h" +#include "sdma5/sdma5_4_2_2_sh_mask.h" +#include "sdma6/sdma6_4_2_2_offset.h" +#include "sdma6/sdma6_4_2_2_sh_mask.h" +#include "sdma7/sdma7_4_2_2_offset.h" +#include "sdma7/sdma7_4_2_2_sh_mask.h" +#include "v9_structs.h" +#include "soc15.h" +#include "soc15d.h" +#include "amdgpu_amdkfd_gfx_v9.h" + +#define HQD_N_REGS 56 +#define DUMP_REG(addr) do { \ + if (WARN_ON_ONCE(i >= HQD_N_REGS)) \ + break; \ + (*dump)[i][0] = (addr) << 2; \ + (*dump)[i++][1] = RREG32(addr); \ + } while (0) + +static inline struct amdgpu_device *get_amdgpu_device(struct kgd_dev *kgd) +{ + return (struct amdgpu_device *)kgd; +} + +static inline struct v9_sdma_mqd *get_sdma_mqd(void *mqd) +{ + return (struct v9_sdma_mqd *)mqd; +} + +static uint32_t get_sdma_base_addr(struct amdgpu_device *adev, + unsigned int engine_id, + unsigned int queue_id) +{ + uint32_t base[8] = { + SOC15_REG_OFFSET(SDMA0, 0, + mmSDMA0_RLC0_RB_CNTL) - mmSDMA0_RLC0_RB_CNTL, + SOC15_REG_OFFSET(SDMA1, 0, + mmSDMA1_RLC0_RB_CNTL) - mmSDMA1_RLC0_RB_CNTL, + SOC15_REG_OFFSET(SDMA2, 0, + mmSDMA2_RLC0_RB_CNTL) - mmSDMA2_RLC0_RB_CNTL, + SOC15_REG_OFFSET(SDMA3, 0, + mmSDMA3_RLC0_RB_CNTL) - mmSDMA3_RLC0_RB_CNTL, + SOC15_REG_OFFSET(SDMA4, 0, + mmSDMA4_RLC0_RB_CNTL) - mmSDMA4_RLC0_RB_CNTL, + SOC15_REG_OFFSET(SDMA5, 0, + mmSDMA5_RLC0_RB_CNTL) - mmSDMA5_RLC0_RB_CNTL, + SOC15_REG_OFFSET(SDMA6, 0, + mmSDMA6_RLC0_RB_CNTL) - mmSDMA6_RLC0_RB_CNTL, + SOC15_REG_OFFSET(SDMA7, 0, + mmSDMA7_RLC0_RB_CNTL) - mmSDMA7_RLC0_RB_CNTL + }; + uint32_t retval; + + retval = base[engine_id] + queue_id * (mmSDMA0_RLC1_RB_CNTL - + mmSDMA0_RLC0_RB_CNTL); + + pr_debug("sdma base address: 0x%x\n", retval); + + return retval; +} + +static u32 sdma_v4_0_get_reg_offset(struct amdgpu_device *adev, + u32 instance, u32 offset) +{ + switch (instance) { + case 0: + return (adev->reg_offset[SDMA0_HWIP][0][0] + offset); + case 1: + return (adev->reg_offset[SDMA1_HWIP][0][1] + offset); + case 2: + return (adev->reg_offset[SDMA2_HWIP][0][1] + offset); + case 3: + return (adev->reg_offset[SDMA3_HWIP][0][1] + offset); + case 4: + return (adev->reg_offset[SDMA4_HWIP][0][1] + offset); + case 5: + return (adev->reg_offset[SDMA5_HWIP][0][1] + offset); + case 6: + return (adev->reg_offset[SDMA6_HWIP][0][1] + offset); + case 7: + return (adev->reg_offset[SDMA7_HWIP][0][1] + offset); + default: + break; + } + return 0; +} + +static int kgd_hqd_sdma_load(struct kgd_dev *kgd, void *mqd, + uint32_t __user *wptr, struct mm_struct *mm) +{ + struct amdgpu_device *adev = get_amdgpu_device(kgd); + struct v9_sdma_mqd *m; + uint32_t sdma_base_addr, sdmax_gfx_context_cntl; + unsigned long end_jiffies; + uint32_t data; + uint64_t data64; + uint64_t __user *wptr64 = (uint64_t __user *)wptr; + + m = get_sdma_mqd(mqd); + sdma_base_addr = get_sdma_base_addr(adev, m->sdma_engine_id, + m->sdma_queue_id); + sdmax_gfx_context_cntl = sdma_v4_0_get_reg_offset(adev, + m->sdma_engine_id, mmSDMA0_GFX_CONTEXT_CNTL); + + WREG32(sdma_base_addr + mmSDMA0_RLC0_RB_CNTL, + m->sdmax_rlcx_rb_cntl & (~SDMA0_RLC0_RB_CNTL__RB_ENABLE_MASK)); + + end_jiffies = msecs_to_jiffies(2000) + jiffies; + while (true) { + data = RREG32(sdma_base_addr + mmSDMA0_RLC0_CONTEXT_STATUS); + if (data & SDMA0_RLC0_CONTEXT_STATUS__IDLE_MASK) + break; + if (time_after(jiffies, end_jiffies)) + return -ETIME; + usleep_range(500, 1000); + } + data = RREG32(sdmax_gfx_context_cntl); + data = REG_SET_FIELD(data, SDMA0_GFX_CONTEXT_CNTL, + RESUME_CTX, 0); + WREG32(sdmax_gfx_context_cntl, data); + + WREG32(sdma_base_addr + mmSDMA0_RLC0_DOORBELL_OFFSET, + m->sdmax_rlcx_doorbell_offset); + + data = REG_SET_FIELD(m->sdmax_rlcx_doorbell, SDMA0_RLC0_DOORBELL, + ENABLE, 1); + WREG32(sdma_base_addr + mmSDMA0_RLC0_DOORBELL, data); + WREG32(sdma_base_addr + mmSDMA0_RLC0_RB_RPTR, m->sdmax_rlcx_rb_rptr); + WREG32(sdma_base_addr + mmSDMA0_RLC0_RB_RPTR_HI, + m->sdmax_rlcx_rb_rptr_hi); + + WREG32(sdma_base_addr + mmSDMA0_RLC0_MINOR_PTR_UPDATE, 1); + if (read_user_wptr(mm, wptr64, data64)) { + WREG32(sdma_base_addr + mmSDMA0_RLC0_RB_WPTR, + lower_32_bits(data64)); + WREG32(sdma_base_addr + mmSDMA0_RLC0_RB_WPTR_HI, + upper_32_bits(data64)); + } else { + WREG32(sdma_base_addr + mmSDMA0_RLC0_RB_WPTR, + m->sdmax_rlcx_rb_rptr); + WREG32(sdma_base_addr + mmSDMA0_RLC0_RB_WPTR_HI, + m->sdmax_rlcx_rb_rptr_hi); + } + WREG32(sdma_base_addr + mmSDMA0_RLC0_MINOR_PTR_UPDATE, 0); + + WREG32(sdma_base_addr + mmSDMA0_RLC0_RB_BASE, m->sdmax_rlcx_rb_base); + WREG32(sdma_base_addr + mmSDMA0_RLC0_RB_BASE_HI, + m->sdmax_rlcx_rb_base_hi); + WREG32(sdma_base_addr + mmSDMA0_RLC0_RB_RPTR_ADDR_LO, + m->sdmax_rlcx_rb_rptr_addr_lo); + WREG32(sdma_base_addr + mmSDMA0_RLC0_RB_RPTR_ADDR_HI, + m->sdmax_rlcx_rb_rptr_addr_hi); + + data = REG_SET_FIELD(m->sdmax_rlcx_rb_cntl, SDMA0_RLC0_RB_CNTL, + RB_ENABLE, 1); + WREG32(sdma_base_addr + mmSDMA0_RLC0_RB_CNTL, data); + + return 0; +} + +static int kgd_hqd_sdma_dump(struct kgd_dev *kgd, + uint32_t engine_id, uint32_t queue_id, + uint32_t (**dump)[2], uint32_t *n_regs) +{ + struct amdgpu_device *adev = get_amdgpu_device(kgd); + uint32_t sdma_base_addr = get_sdma_base_addr(adev, engine_id, queue_id); + uint32_t i = 0, reg; +#undef HQD_N_REGS +#define HQD_N_REGS (19+6+7+10) + + *dump = kmalloc_array(HQD_N_REGS * 2, sizeof(uint32_t), GFP_KERNEL); + if (*dump == NULL) + return -ENOMEM; + + for (reg = mmSDMA0_RLC0_RB_CNTL; reg <= mmSDMA0_RLC0_DOORBELL; reg++) + DUMP_REG(sdma_base_addr + reg); + for (reg = mmSDMA0_RLC0_STATUS; reg <= mmSDMA0_RLC0_CSA_ADDR_HI; reg++) + DUMP_REG(sdma_base_addr + reg); + for (reg = mmSDMA0_RLC0_IB_SUB_REMAIN; + reg <= mmSDMA0_RLC0_MINOR_PTR_UPDATE; reg++) + DUMP_REG(sdma_base_addr + reg); + for (reg = mmSDMA0_RLC0_MIDCMD_DATA0; + reg <= mmSDMA0_RLC0_MIDCMD_CNTL; reg++) + DUMP_REG(sdma_base_addr + reg); + + WARN_ON_ONCE(i != HQD_N_REGS); + *n_regs = i; + + return 0; +} + +static bool kgd_hqd_sdma_is_occupied(struct kgd_dev *kgd, void *mqd) +{ + struct amdgpu_device *adev = get_amdgpu_device(kgd); + struct v9_sdma_mqd *m; + uint32_t sdma_base_addr; + uint32_t sdma_rlc_rb_cntl; + + m = get_sdma_mqd(mqd); + sdma_base_addr = get_sdma_base_addr(adev, m->sdma_engine_id, + m->sdma_queue_id); + + sdma_rlc_rb_cntl = RREG32(sdma_base_addr + mmSDMA0_RLC0_RB_CNTL); + + if (sdma_rlc_rb_cntl & SDMA0_RLC0_RB_CNTL__RB_ENABLE_MASK) + return true; + + return false; +} + +static int kgd_hqd_sdma_destroy(struct kgd_dev *kgd, void *mqd, + unsigned int utimeout) +{ + struct amdgpu_device *adev = get_amdgpu_device(kgd); + struct v9_sdma_mqd *m; + uint32_t sdma_base_addr; + uint32_t temp; + unsigned long end_jiffies = (utimeout * HZ / 1000) + jiffies; + + m = get_sdma_mqd(mqd); + sdma_base_addr = get_sdma_base_addr(adev, m->sdma_engine_id, + m->sdma_queue_id); + + temp = RREG32(sdma_base_addr + mmSDMA0_RLC0_RB_CNTL); + temp = temp & ~SDMA0_RLC0_RB_CNTL__RB_ENABLE_MASK; + WREG32(sdma_base_addr + mmSDMA0_RLC0_RB_CNTL, temp); + + while (true) { + temp = RREG32(sdma_base_addr + mmSDMA0_RLC0_CONTEXT_STATUS); + if (temp & SDMA0_RLC0_CONTEXT_STATUS__IDLE_MASK) + break; + if (time_after(jiffies, end_jiffies)) + return -ETIME; + usleep_range(500, 1000); + } + + WREG32(sdma_base_addr + mmSDMA0_RLC0_DOORBELL, 0); + WREG32(sdma_base_addr + mmSDMA0_RLC0_RB_CNTL, + RREG32(sdma_base_addr + mmSDMA0_RLC0_RB_CNTL) | + SDMA0_RLC0_RB_CNTL__RB_ENABLE_MASK); + + m->sdmax_rlcx_rb_rptr = RREG32(sdma_base_addr + mmSDMA0_RLC0_RB_RPTR); + m->sdmax_rlcx_rb_rptr_hi = + RREG32(sdma_base_addr + mmSDMA0_RLC0_RB_RPTR_HI); + + return 0; +} + +static const struct kfd2kgd_calls kfd2kgd = { + .program_sh_mem_settings = kgd_gfx_v9_program_sh_mem_settings, + .set_pasid_vmid_mapping = kgd_gfx_v9_set_pasid_vmid_mapping, + .init_interrupts = kgd_gfx_v9_init_interrupts, + .hqd_load = kgd_gfx_v9_hqd_load, + .hqd_sdma_load = kgd_hqd_sdma_load, + .hqd_dump = kgd_gfx_v9_hqd_dump, + .hqd_sdma_dump = kgd_hqd_sdma_dump, + .hqd_is_occupied = kgd_gfx_v9_hqd_is_occupied, + .hqd_sdma_is_occupied = kgd_hqd_sdma_is_occupied, + .hqd_destroy = kgd_gfx_v9_hqd_destroy, + .hqd_sdma_destroy = kgd_hqd_sdma_destroy, + .address_watch_disable = kgd_gfx_v9_address_watch_disable, + .address_watch_execute = kgd_gfx_v9_address_watch_execute, + .wave_control_execute = kgd_gfx_v9_wave_control_execute, + .address_watch_get_offset = kgd_gfx_v9_address_watch_get_offset, + .get_atc_vmid_pasid_mapping_pasid = + kgd_gfx_v9_get_atc_vmid_pasid_mapping_pasid, + .get_atc_vmid_pasid_mapping_valid = + kgd_gfx_v9_get_atc_vmid_pasid_mapping_valid, + .set_scratch_backing_va = kgd_gfx_v9_set_scratch_backing_va, + .get_tile_config = kgd_gfx_v9_get_tile_config, + .set_vm_context_page_table_base = kgd_gfx_v9_set_vm_context_page_table_base, + .invalidate_tlbs = kgd_gfx_v9_invalidate_tlbs, + .invalidate_tlbs_vmid = kgd_gfx_v9_invalidate_tlbs_vmid, + .get_hive_id = amdgpu_amdkfd_get_hive_id, +}; + +struct kfd2kgd_calls *amdgpu_amdkfd_arcturus_get_functions(void) +{ + return (struct kfd2kgd_calls *)&kfd2kgd; +} + diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_gfx_v10.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_gfx_v10.c index 0723f800e815..7c03a7fcd011 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_gfx_v10.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_gfx_v10.c @@ -27,7 +27,6 @@ #include <linux/uaccess.h> #include <linux/firmware.h> #include <linux/mmu_context.h> -#include <drm/drmP.h> #include "amdgpu.h" #include "amdgpu_amdkfd.h" #include "amdgpu_ucode.h" diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_gfx_v9.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_gfx_v9.c index 85395f2d83a6..9d153cf39581 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_gfx_v9.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_gfx_v9.c @@ -47,6 +47,7 @@ #include "soc15d.h" #include "mmhub_v1_0.h" #include "gfxhub_v1_0.h" +#include "gmc_v9_0.h" #define V9_PIPE_PER_MEC (4) @@ -58,66 +59,11 @@ enum hqd_dequeue_request_type { RESET_WAVES }; -/* - * Register access functions - */ - -static void kgd_program_sh_mem_settings(struct kgd_dev *kgd, uint32_t vmid, - uint32_t sh_mem_config, - uint32_t sh_mem_ape1_base, uint32_t sh_mem_ape1_limit, - uint32_t sh_mem_bases); -static int kgd_set_pasid_vmid_mapping(struct kgd_dev *kgd, unsigned int pasid, - unsigned int vmid); -static int kgd_init_interrupts(struct kgd_dev *kgd, uint32_t pipe_id); -static int kgd_hqd_load(struct kgd_dev *kgd, void *mqd, uint32_t pipe_id, - uint32_t queue_id, uint32_t __user *wptr, - uint32_t wptr_shift, uint32_t wptr_mask, - struct mm_struct *mm); -static int kgd_hqd_dump(struct kgd_dev *kgd, - uint32_t pipe_id, uint32_t queue_id, - uint32_t (**dump)[2], uint32_t *n_regs); -static int kgd_hqd_sdma_load(struct kgd_dev *kgd, void *mqd, - uint32_t __user *wptr, struct mm_struct *mm); -static int kgd_hqd_sdma_dump(struct kgd_dev *kgd, - uint32_t engine_id, uint32_t queue_id, - uint32_t (**dump)[2], uint32_t *n_regs); -static bool kgd_hqd_is_occupied(struct kgd_dev *kgd, uint64_t queue_address, - uint32_t pipe_id, uint32_t queue_id); -static bool kgd_hqd_sdma_is_occupied(struct kgd_dev *kgd, void *mqd); -static int kgd_hqd_destroy(struct kgd_dev *kgd, void *mqd, - enum kfd_preempt_type reset_type, - unsigned int utimeout, uint32_t pipe_id, - uint32_t queue_id); -static int kgd_hqd_sdma_destroy(struct kgd_dev *kgd, void *mqd, - unsigned int utimeout); -static int kgd_address_watch_disable(struct kgd_dev *kgd); -static int kgd_address_watch_execute(struct kgd_dev *kgd, - unsigned int watch_point_id, - uint32_t cntl_val, - uint32_t addr_hi, - uint32_t addr_lo); -static int kgd_wave_control_execute(struct kgd_dev *kgd, - uint32_t gfx_index_val, - uint32_t sq_cmd); -static uint32_t kgd_address_watch_get_offset(struct kgd_dev *kgd, - unsigned int watch_point_id, - unsigned int reg_offset); - -static bool get_atc_vmid_pasid_mapping_valid(struct kgd_dev *kgd, - uint8_t vmid); -static uint16_t get_atc_vmid_pasid_mapping_pasid(struct kgd_dev *kgd, - uint8_t vmid); -static void set_vm_context_page_table_base(struct kgd_dev *kgd, uint32_t vmid, - uint64_t page_table_base); -static void set_scratch_backing_va(struct kgd_dev *kgd, - uint64_t va, uint32_t vmid); -static int invalidate_tlbs(struct kgd_dev *kgd, uint16_t pasid); -static int invalidate_tlbs_vmid(struct kgd_dev *kgd, uint16_t vmid); /* Because of REG_GET_FIELD() being used, we put this function in the * asic specific file. */ -static int amdgpu_amdkfd_get_tile_config(struct kgd_dev *kgd, +int kgd_gfx_v9_get_tile_config(struct kgd_dev *kgd, struct tile_config *config) { struct amdgpu_device *adev = (struct amdgpu_device *)kgd; @@ -135,39 +81,6 @@ static int amdgpu_amdkfd_get_tile_config(struct kgd_dev *kgd, return 0; } -static const struct kfd2kgd_calls kfd2kgd = { - .program_sh_mem_settings = kgd_program_sh_mem_settings, - .set_pasid_vmid_mapping = kgd_set_pasid_vmid_mapping, - .init_interrupts = kgd_init_interrupts, - .hqd_load = kgd_hqd_load, - .hqd_sdma_load = kgd_hqd_sdma_load, - .hqd_dump = kgd_hqd_dump, - .hqd_sdma_dump = kgd_hqd_sdma_dump, - .hqd_is_occupied = kgd_hqd_is_occupied, - .hqd_sdma_is_occupied = kgd_hqd_sdma_is_occupied, - .hqd_destroy = kgd_hqd_destroy, - .hqd_sdma_destroy = kgd_hqd_sdma_destroy, - .address_watch_disable = kgd_address_watch_disable, - .address_watch_execute = kgd_address_watch_execute, - .wave_control_execute = kgd_wave_control_execute, - .address_watch_get_offset = kgd_address_watch_get_offset, - .get_atc_vmid_pasid_mapping_pasid = - get_atc_vmid_pasid_mapping_pasid, - .get_atc_vmid_pasid_mapping_valid = - get_atc_vmid_pasid_mapping_valid, - .set_scratch_backing_va = set_scratch_backing_va, - .get_tile_config = amdgpu_amdkfd_get_tile_config, - .set_vm_context_page_table_base = set_vm_context_page_table_base, - .invalidate_tlbs = invalidate_tlbs, - .invalidate_tlbs_vmid = invalidate_tlbs_vmid, - .get_hive_id = amdgpu_amdkfd_get_hive_id, -}; - -struct kfd2kgd_calls *amdgpu_amdkfd_gfx_9_0_get_functions(void) -{ - return (struct kfd2kgd_calls *)&kfd2kgd; -} - static inline struct amdgpu_device *get_amdgpu_device(struct kgd_dev *kgd) { return (struct amdgpu_device *)kgd; @@ -215,7 +128,7 @@ static void release_queue(struct kgd_dev *kgd) unlock_srbm(kgd); } -static void kgd_program_sh_mem_settings(struct kgd_dev *kgd, uint32_t vmid, +void kgd_gfx_v9_program_sh_mem_settings(struct kgd_dev *kgd, uint32_t vmid, uint32_t sh_mem_config, uint32_t sh_mem_ape1_base, uint32_t sh_mem_ape1_limit, @@ -232,7 +145,7 @@ static void kgd_program_sh_mem_settings(struct kgd_dev *kgd, uint32_t vmid, unlock_srbm(kgd); } -static int kgd_set_pasid_vmid_mapping(struct kgd_dev *kgd, unsigned int pasid, +int kgd_gfx_v9_set_pasid_vmid_mapping(struct kgd_dev *kgd, unsigned int pasid, unsigned int vmid) { struct amdgpu_device *adev = get_amdgpu_device(kgd); @@ -293,7 +206,7 @@ static int kgd_set_pasid_vmid_mapping(struct kgd_dev *kgd, unsigned int pasid, * but still works */ -static int kgd_init_interrupts(struct kgd_dev *kgd, uint32_t pipe_id) +int kgd_gfx_v9_init_interrupts(struct kgd_dev *kgd, uint32_t pipe_id) { struct amdgpu_device *adev = get_amdgpu_device(kgd); uint32_t mec; @@ -343,7 +256,7 @@ static inline struct v9_sdma_mqd *get_sdma_mqd(void *mqd) return (struct v9_sdma_mqd *)mqd; } -static int kgd_hqd_load(struct kgd_dev *kgd, void *mqd, uint32_t pipe_id, +int kgd_gfx_v9_hqd_load(struct kgd_dev *kgd, void *mqd, uint32_t pipe_id, uint32_t queue_id, uint32_t __user *wptr, uint32_t wptr_shift, uint32_t wptr_mask, struct mm_struct *mm) @@ -438,7 +351,7 @@ static int kgd_hqd_load(struct kgd_dev *kgd, void *mqd, uint32_t pipe_id, return 0; } -static int kgd_hqd_dump(struct kgd_dev *kgd, +int kgd_gfx_v9_hqd_dump(struct kgd_dev *kgd, uint32_t pipe_id, uint32_t queue_id, uint32_t (**dump)[2], uint32_t *n_regs) { @@ -575,7 +488,7 @@ static int kgd_hqd_sdma_dump(struct kgd_dev *kgd, return 0; } -static bool kgd_hqd_is_occupied(struct kgd_dev *kgd, uint64_t queue_address, +bool kgd_gfx_v9_hqd_is_occupied(struct kgd_dev *kgd, uint64_t queue_address, uint32_t pipe_id, uint32_t queue_id) { struct amdgpu_device *adev = get_amdgpu_device(kgd); @@ -616,7 +529,7 @@ static bool kgd_hqd_sdma_is_occupied(struct kgd_dev *kgd, void *mqd) return false; } -static int kgd_hqd_destroy(struct kgd_dev *kgd, void *mqd, +int kgd_gfx_v9_hqd_destroy(struct kgd_dev *kgd, void *mqd, enum kfd_preempt_type reset_type, unsigned int utimeout, uint32_t pipe_id, uint32_t queue_id) @@ -704,7 +617,7 @@ static int kgd_hqd_sdma_destroy(struct kgd_dev *kgd, void *mqd, return 0; } -static bool get_atc_vmid_pasid_mapping_valid(struct kgd_dev *kgd, +bool kgd_gfx_v9_get_atc_vmid_pasid_mapping_valid(struct kgd_dev *kgd, uint8_t vmid) { uint32_t reg; @@ -715,7 +628,7 @@ static bool get_atc_vmid_pasid_mapping_valid(struct kgd_dev *kgd, return reg & ATC_VMID0_PASID_MAPPING__VALID_MASK; } -static uint16_t get_atc_vmid_pasid_mapping_pasid(struct kgd_dev *kgd, +uint16_t kgd_gfx_v9_get_atc_vmid_pasid_mapping_pasid(struct kgd_dev *kgd, uint8_t vmid) { uint32_t reg; @@ -754,7 +667,7 @@ static int invalidate_tlbs_with_kiq(struct amdgpu_device *adev, uint16_t pasid, return 0; } -static int invalidate_tlbs(struct kgd_dev *kgd, uint16_t pasid) +int kgd_gfx_v9_invalidate_tlbs(struct kgd_dev *kgd, uint16_t pasid) { struct amdgpu_device *adev = (struct amdgpu_device *) kgd; int vmid; @@ -773,8 +686,8 @@ static int invalidate_tlbs(struct kgd_dev *kgd, uint16_t pasid) for (vmid = 0; vmid < 16; vmid++) { if (!amdgpu_amdkfd_is_kfd_vmid(adev, vmid)) continue; - if (get_atc_vmid_pasid_mapping_valid(kgd, vmid)) { - if (get_atc_vmid_pasid_mapping_pasid(kgd, vmid) + if (kgd_gfx_v9_get_atc_vmid_pasid_mapping_valid(kgd, vmid)) { + if (kgd_gfx_v9_get_atc_vmid_pasid_mapping_pasid(kgd, vmid) == pasid) { amdgpu_gmc_flush_gpu_tlb(adev, vmid, flush_type); @@ -786,7 +699,7 @@ static int invalidate_tlbs(struct kgd_dev *kgd, uint16_t pasid) return 0; } -static int invalidate_tlbs_vmid(struct kgd_dev *kgd, uint16_t vmid) +int kgd_gfx_v9_invalidate_tlbs_vmid(struct kgd_dev *kgd, uint16_t vmid) { struct amdgpu_device *adev = (struct amdgpu_device *) kgd; @@ -814,12 +727,12 @@ static int invalidate_tlbs_vmid(struct kgd_dev *kgd, uint16_t vmid) return 0; } -static int kgd_address_watch_disable(struct kgd_dev *kgd) +int kgd_gfx_v9_address_watch_disable(struct kgd_dev *kgd) { return 0; } -static int kgd_address_watch_execute(struct kgd_dev *kgd, +int kgd_gfx_v9_address_watch_execute(struct kgd_dev *kgd, unsigned int watch_point_id, uint32_t cntl_val, uint32_t addr_hi, @@ -828,7 +741,7 @@ static int kgd_address_watch_execute(struct kgd_dev *kgd, return 0; } -static int kgd_wave_control_execute(struct kgd_dev *kgd, +int kgd_gfx_v9_wave_control_execute(struct kgd_dev *kgd, uint32_t gfx_index_val, uint32_t sq_cmd) { @@ -853,14 +766,14 @@ static int kgd_wave_control_execute(struct kgd_dev *kgd, return 0; } -static uint32_t kgd_address_watch_get_offset(struct kgd_dev *kgd, +uint32_t kgd_gfx_v9_address_watch_get_offset(struct kgd_dev *kgd, unsigned int watch_point_id, unsigned int reg_offset) { return 0; } -static void set_scratch_backing_va(struct kgd_dev *kgd, +void kgd_gfx_v9_set_scratch_backing_va(struct kgd_dev *kgd, uint64_t va, uint32_t vmid) { /* No longer needed on GFXv9. The scratch base address is @@ -869,7 +782,7 @@ static void set_scratch_backing_va(struct kgd_dev *kgd, */ } -static void set_vm_context_page_table_base(struct kgd_dev *kgd, uint32_t vmid, +void kgd_gfx_v9_set_vm_context_page_table_base(struct kgd_dev *kgd, uint32_t vmid, uint64_t page_table_base) { struct amdgpu_device *adev = get_amdgpu_device(kgd); @@ -884,7 +797,45 @@ static void set_vm_context_page_table_base(struct kgd_dev *kgd, uint32_t vmid, * now, all processes share the same address space size, like * on GFX8 and older. */ - mmhub_v1_0_setup_vm_pt_regs(adev, vmid, page_table_base); + if (adev->asic_type == CHIP_ARCTURUS) { + /* Two MMHUBs */ + mmhub_v9_4_setup_vm_pt_regs(adev, 0, vmid, page_table_base); + mmhub_v9_4_setup_vm_pt_regs(adev, 1, vmid, page_table_base); + } else + mmhub_v1_0_setup_vm_pt_regs(adev, vmid, page_table_base); gfxhub_v1_0_setup_vm_pt_regs(adev, vmid, page_table_base); } + +static const struct kfd2kgd_calls kfd2kgd = { + .program_sh_mem_settings = kgd_gfx_v9_program_sh_mem_settings, + .set_pasid_vmid_mapping = kgd_gfx_v9_set_pasid_vmid_mapping, + .init_interrupts = kgd_gfx_v9_init_interrupts, + .hqd_load = kgd_gfx_v9_hqd_load, + .hqd_sdma_load = kgd_hqd_sdma_load, + .hqd_dump = kgd_gfx_v9_hqd_dump, + .hqd_sdma_dump = kgd_hqd_sdma_dump, + .hqd_is_occupied = kgd_gfx_v9_hqd_is_occupied, + .hqd_sdma_is_occupied = kgd_hqd_sdma_is_occupied, + .hqd_destroy = kgd_gfx_v9_hqd_destroy, + .hqd_sdma_destroy = kgd_hqd_sdma_destroy, + .address_watch_disable = kgd_gfx_v9_address_watch_disable, + .address_watch_execute = kgd_gfx_v9_address_watch_execute, + .wave_control_execute = kgd_gfx_v9_wave_control_execute, + .address_watch_get_offset = kgd_gfx_v9_address_watch_get_offset, + .get_atc_vmid_pasid_mapping_pasid = + kgd_gfx_v9_get_atc_vmid_pasid_mapping_pasid, + .get_atc_vmid_pasid_mapping_valid = + kgd_gfx_v9_get_atc_vmid_pasid_mapping_valid, + .set_scratch_backing_va = kgd_gfx_v9_set_scratch_backing_va, + .get_tile_config = kgd_gfx_v9_get_tile_config, + .set_vm_context_page_table_base = kgd_gfx_v9_set_vm_context_page_table_base, + .invalidate_tlbs = kgd_gfx_v9_invalidate_tlbs, + .invalidate_tlbs_vmid = kgd_gfx_v9_invalidate_tlbs_vmid, + .get_hive_id = amdgpu_amdkfd_get_hive_id, +}; + +struct kfd2kgd_calls *amdgpu_amdkfd_gfx_9_0_get_functions(void) +{ + return (struct kfd2kgd_calls *)&kfd2kgd; +} diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_gfx_v9.h b/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_gfx_v9.h new file mode 100644 index 000000000000..26d8879bff9d --- /dev/null +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_gfx_v9.h @@ -0,0 +1,69 @@ +/* + * Copyright 2019 Advanced Micro Devices, Inc. + * + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and associated documentation files (the "Software"), + * to deal in the Software without restriction, including without limitation + * the rights to use, copy, modify, merge, publish, distribute, sublicense, + * and/or sell copies of the Software, and to permit persons to whom the + * Software is furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included in + * all copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL + * THE COPYRIGHT HOLDER(S) OR AUTHOR(S) BE LIABLE FOR ANY CLAIM, DAMAGES OR + * OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, + * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR + * OTHER DEALINGS IN THE SOFTWARE. + */ + + + +void kgd_gfx_v9_program_sh_mem_settings(struct kgd_dev *kgd, uint32_t vmid, + uint32_t sh_mem_config, + uint32_t sh_mem_ape1_base, uint32_t sh_mem_ape1_limit, + uint32_t sh_mem_bases); +int kgd_gfx_v9_set_pasid_vmid_mapping(struct kgd_dev *kgd, unsigned int pasid, + unsigned int vmid); +int kgd_gfx_v9_init_interrupts(struct kgd_dev *kgd, uint32_t pipe_id); +int kgd_gfx_v9_hqd_load(struct kgd_dev *kgd, void *mqd, uint32_t pipe_id, + uint32_t queue_id, uint32_t __user *wptr, + uint32_t wptr_shift, uint32_t wptr_mask, + struct mm_struct *mm); +int kgd_gfx_v9_hqd_dump(struct kgd_dev *kgd, + uint32_t pipe_id, uint32_t queue_id, + uint32_t (**dump)[2], uint32_t *n_regs); +bool kgd_gfx_v9_hqd_is_occupied(struct kgd_dev *kgd, uint64_t queue_address, + uint32_t pipe_id, uint32_t queue_id); +int kgd_gfx_v9_hqd_destroy(struct kgd_dev *kgd, void *mqd, + enum kfd_preempt_type reset_type, + unsigned int utimeout, uint32_t pipe_id, + uint32_t queue_id); +int kgd_gfx_v9_address_watch_disable(struct kgd_dev *kgd); +int kgd_gfx_v9_address_watch_execute(struct kgd_dev *kgd, + unsigned int watch_point_id, + uint32_t cntl_val, + uint32_t addr_hi, + uint32_t addr_lo); +int kgd_gfx_v9_wave_control_execute(struct kgd_dev *kgd, + uint32_t gfx_index_val, + uint32_t sq_cmd); +uint32_t kgd_gfx_v9_address_watch_get_offset(struct kgd_dev *kgd, + unsigned int watch_point_id, + unsigned int reg_offset); + +bool kgd_gfx_v9_get_atc_vmid_pasid_mapping_valid(struct kgd_dev *kgd, + uint8_t vmid); +uint16_t kgd_gfx_v9_get_atc_vmid_pasid_mapping_pasid(struct kgd_dev *kgd, + uint8_t vmid); +void kgd_gfx_v9_set_vm_context_page_table_base(struct kgd_dev *kgd, uint32_t vmid, + uint64_t page_table_base); +void kgd_gfx_v9_set_scratch_backing_va(struct kgd_dev *kgd, + uint64_t va, uint32_t vmid); +int kgd_gfx_v9_invalidate_tlbs(struct kgd_dev *kgd, uint16_t pasid); +int kgd_gfx_v9_invalidate_tlbs_vmid(struct kgd_dev *kgd, uint16_t vmid); +int kgd_gfx_v9_get_tile_config(struct kgd_dev *kgd, + struct tile_config *config); diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_gpuvm.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_gpuvm.c index 9410ffceee15..e0c47ae52fc1 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_gpuvm.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_gpuvm.c @@ -1090,7 +1090,7 @@ int amdgpu_amdkfd_gpuvm_alloc_memory_of_gpu( */ if (flags & ALLOC_MEM_FLAGS_VRAM) { domain = alloc_domain = AMDGPU_GEM_DOMAIN_VRAM; - alloc_flags = AMDGPU_GEM_CREATE_VRAM_CLEARED; + alloc_flags = AMDGPU_GEM_CREATE_VRAM_WIPE_ON_RELEASE; alloc_flags |= (flags & ALLOC_MEM_FLAGS_PUBLIC) ? AMDGPU_GEM_CREATE_CPU_ACCESS_REQUIRED : AMDGPU_GEM_CREATE_NO_CPU_ACCESS; diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_ctx.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_ctx.c index f539a2a92774..ec311de86fba 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_ctx.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_ctx.c @@ -74,7 +74,7 @@ static int amdgpu_ctx_init(struct amdgpu_device *adev, struct amdgpu_ctx *ctx) { unsigned num_entities = amdgput_ctx_total_num_entities(); - unsigned i, j; + unsigned i, j, k; int r; if (priority < 0 || priority >= DRM_SCHED_PRIORITY_MAX) @@ -123,7 +123,7 @@ static int amdgpu_ctx_init(struct amdgpu_device *adev, for (i = 0; i < AMDGPU_HW_IP_NUM; ++i) { struct amdgpu_ring *rings[AMDGPU_MAX_RINGS]; struct drm_sched_rq *rqs[AMDGPU_MAX_RINGS]; - unsigned num_rings; + unsigned num_rings = 0; unsigned num_rqs = 0; switch (i) { @@ -154,16 +154,26 @@ static int amdgpu_ctx_init(struct amdgpu_device *adev, num_rings = 1; break; case AMDGPU_HW_IP_VCN_DEC: - rings[0] = &adev->vcn.ring_dec; - num_rings = 1; + for (j = 0; j < adev->vcn.num_vcn_inst; ++j) { + if (adev->vcn.harvest_config & (1 << j)) + continue; + rings[num_rings++] = &adev->vcn.inst[j].ring_dec; + } break; case AMDGPU_HW_IP_VCN_ENC: - rings[0] = &adev->vcn.ring_enc[0]; - num_rings = 1; + for (j = 0; j < adev->vcn.num_vcn_inst; ++j) { + if (adev->vcn.harvest_config & (1 << j)) + continue; + for (k = 0; k < adev->vcn.num_enc_rings; ++k) + rings[num_rings++] = &adev->vcn.inst[j].ring_enc[k]; + } break; case AMDGPU_HW_IP_VCN_JPEG: - rings[0] = &adev->vcn.ring_jpeg; - num_rings = 1; + for (j = 0; j < adev->vcn.num_vcn_inst; ++j) { + if (adev->vcn.harvest_config & (1 << j)) + continue; + rings[num_rings++] = &adev->vcn.inst[j].ring_jpeg; + } break; } diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_device.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_device.c index 5a7f893cf724..682833f90fdd 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_device.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_device.c @@ -70,7 +70,10 @@ MODULE_FIRMWARE("amdgpu/vega12_gpu_info.bin"); MODULE_FIRMWARE("amdgpu/raven_gpu_info.bin"); MODULE_FIRMWARE("amdgpu/picasso_gpu_info.bin"); MODULE_FIRMWARE("amdgpu/raven2_gpu_info.bin"); +MODULE_FIRMWARE("amdgpu/arcturus_gpu_info.bin"); MODULE_FIRMWARE("amdgpu/navi10_gpu_info.bin"); +MODULE_FIRMWARE("amdgpu/navi14_gpu_info.bin"); +MODULE_FIRMWARE("amdgpu/navi12_gpu_info.bin"); #define AMDGPU_RESUME_MS 2000 @@ -98,7 +101,10 @@ static const char *amdgpu_asic_name[] = { "VEGA12", "VEGA20", "RAVEN", + "ARCTURUS", "NAVI10", + "NAVI14", + "NAVI12", "LAST", }; @@ -413,6 +419,40 @@ static void amdgpu_invalid_wreg(struct amdgpu_device *adev, uint32_t reg, uint32 } /** + * amdgpu_invalid_rreg64 - dummy 64 bit reg read function + * + * @adev: amdgpu device pointer + * @reg: offset of register + * + * Dummy register read function. Used for register blocks + * that certain asics don't have (all asics). + * Returns the value in the register. + */ +static uint64_t amdgpu_invalid_rreg64(struct amdgpu_device *adev, uint32_t reg) +{ + DRM_ERROR("Invalid callback to read 64 bit register 0x%04X\n", reg); + BUG(); + return 0; +} + +/** + * amdgpu_invalid_wreg64 - dummy reg write function + * + * @adev: amdgpu device pointer + * @reg: offset of register + * @v: value to write to the register + * + * Dummy register read function. Used for register blocks + * that certain asics don't have (all asics). + */ +static void amdgpu_invalid_wreg64(struct amdgpu_device *adev, uint32_t reg, uint64_t v) +{ + DRM_ERROR("Invalid callback to write 64 bit register 0x%04X with 0x%08llX\n", + reg, v); + BUG(); +} + +/** * amdgpu_block_invalid_rreg - dummy reg read function * * @adev: amdgpu device pointer @@ -1384,9 +1424,18 @@ static int amdgpu_device_parse_gpu_info_fw(struct amdgpu_device *adev) else chip_name = "raven"; break; + case CHIP_ARCTURUS: + chip_name = "arcturus"; + break; case CHIP_NAVI10: chip_name = "navi10"; break; + case CHIP_NAVI14: + chip_name = "navi14"; + break; + case CHIP_NAVI12: + chip_name = "navi12"; + break; } snprintf(fw_name, sizeof(fw_name), "amdgpu/%s_gpu_info.bin", chip_name); @@ -1529,6 +1578,7 @@ static int amdgpu_device_ip_early_init(struct amdgpu_device *adev) case CHIP_VEGA12: case CHIP_VEGA20: case CHIP_RAVEN: + case CHIP_ARCTURUS: if (adev->asic_type == CHIP_RAVEN) adev->family = AMDGPU_FAMILY_RV; else @@ -1539,6 +1589,8 @@ static int amdgpu_device_ip_early_init(struct amdgpu_device *adev) return r; break; case CHIP_NAVI10: + case CHIP_NAVI14: + case CHIP_NAVI12: adev->family = AMDGPU_FAMILY_NV; r = nv_set_ip_blocks(adev); @@ -1560,9 +1612,6 @@ static int amdgpu_device_ip_early_init(struct amdgpu_device *adev) r = amdgpu_virt_request_full_gpu(adev, true); if (r) return -EAGAIN; - - /* query the reg access mode at the very beginning */ - amdgpu_virt_init_reg_access_mode(adev); } adev->pm.pp_feature = amdgpu_pp_feature_mask; @@ -1665,28 +1714,34 @@ static int amdgpu_device_fw_loading(struct amdgpu_device *adev) if (adev->asic_type >= CHIP_VEGA10) { for (i = 0; i < adev->num_ip_blocks; i++) { - if (adev->ip_blocks[i].version->type == AMD_IP_BLOCK_TYPE_PSP) { - if (adev->in_gpu_reset || adev->in_suspend) { - if (amdgpu_sriov_vf(adev) && adev->in_gpu_reset) - break; /* sriov gpu reset, psp need to do hw_init before IH because of hw limit */ - r = adev->ip_blocks[i].version->funcs->resume(adev); - if (r) { - DRM_ERROR("resume of IP block <%s> failed %d\n", + if (adev->ip_blocks[i].version->type != AMD_IP_BLOCK_TYPE_PSP) + continue; + + /* no need to do the fw loading again if already done*/ + if (adev->ip_blocks[i].status.hw == true) + break; + + if (adev->in_gpu_reset || adev->in_suspend) { + r = adev->ip_blocks[i].version->funcs->resume(adev); + if (r) { + DRM_ERROR("resume of IP block <%s> failed %d\n", adev->ip_blocks[i].version->funcs->name, r); - return r; - } - } else { - r = adev->ip_blocks[i].version->funcs->hw_init(adev); - if (r) { - DRM_ERROR("hw_init of IP block <%s> failed %d\n", - adev->ip_blocks[i].version->funcs->name, r); - return r; - } + return r; + } + } else { + r = adev->ip_blocks[i].version->funcs->hw_init(adev); + if (r) { + DRM_ERROR("hw_init of IP block <%s> failed %d\n", + adev->ip_blocks[i].version->funcs->name, r); + return r; } - adev->ip_blocks[i].status.hw = true; } + + adev->ip_blocks[i].status.hw = true; + break; } } + r = amdgpu_pm_load_smu_firmware(adev, &smu_version); return r; @@ -2128,7 +2183,9 @@ static int amdgpu_device_ip_suspend_phase1(struct amdgpu_device *adev) if (r) { DRM_ERROR("suspend of IP block <%s> failed %d\n", adev->ip_blocks[i].version->funcs->name, r); + return r; } + adev->ip_blocks[i].status.hw = false; } } @@ -2163,6 +2220,25 @@ static int amdgpu_device_ip_suspend_phase2(struct amdgpu_device *adev) DRM_ERROR("suspend of IP block <%s> failed %d\n", adev->ip_blocks[i].version->funcs->name, r); } + adev->ip_blocks[i].status.hw = false; + /* handle putting the SMC in the appropriate state */ + if (adev->ip_blocks[i].version->type == AMD_IP_BLOCK_TYPE_SMC) { + if (is_support_sw_smu(adev)) { + /* todo */ + } else if (adev->powerplay.pp_funcs && + adev->powerplay.pp_funcs->set_mp1_state) { + r = adev->powerplay.pp_funcs->set_mp1_state( + adev->powerplay.pp_handle, + adev->mp1_state); + if (r) { + DRM_ERROR("SMC failed to set mp1 state %d, %d\n", + adev->mp1_state, r); + return r; + } + } + } + + adev->ip_blocks[i].status.hw = false; } return 0; @@ -2215,6 +2291,7 @@ static int amdgpu_device_ip_reinit_early_sriov(struct amdgpu_device *adev) for (j = 0; j < adev->num_ip_blocks; j++) { block = &adev->ip_blocks[j]; + block->status.hw = false; if (block->version->type != ip_order[i] || !block->status.valid) continue; @@ -2223,6 +2300,7 @@ static int amdgpu_device_ip_reinit_early_sriov(struct amdgpu_device *adev) DRM_INFO("RE-INIT-early: %s %s\n", block->version->funcs->name, r?"failed":"succeeded"); if (r) return r; + block->status.hw = true; } } @@ -2250,13 +2328,15 @@ static int amdgpu_device_ip_reinit_late_sriov(struct amdgpu_device *adev) block = &adev->ip_blocks[j]; if (block->version->type != ip_order[i] || - !block->status.valid) + !block->status.valid || + block->status.hw) continue; r = block->version->funcs->hw_init(adev); DRM_INFO("RE-INIT-late: %s %s\n", block->version->funcs->name, r?"failed":"succeeded"); if (r) return r; + block->status.hw = true; } } @@ -2280,17 +2360,19 @@ static int amdgpu_device_ip_resume_phase1(struct amdgpu_device *adev) int i, r; for (i = 0; i < adev->num_ip_blocks; i++) { - if (!adev->ip_blocks[i].status.valid) + if (!adev->ip_blocks[i].status.valid || adev->ip_blocks[i].status.hw) continue; if (adev->ip_blocks[i].version->type == AMD_IP_BLOCK_TYPE_COMMON || adev->ip_blocks[i].version->type == AMD_IP_BLOCK_TYPE_GMC || adev->ip_blocks[i].version->type == AMD_IP_BLOCK_TYPE_IH) { + r = adev->ip_blocks[i].version->funcs->resume(adev); if (r) { DRM_ERROR("resume of IP block <%s> failed %d\n", adev->ip_blocks[i].version->funcs->name, r); return r; } + adev->ip_blocks[i].status.hw = true; } } @@ -2315,7 +2397,7 @@ static int amdgpu_device_ip_resume_phase2(struct amdgpu_device *adev) int i, r; for (i = 0; i < adev->num_ip_blocks; i++) { - if (!adev->ip_blocks[i].status.valid) + if (!adev->ip_blocks[i].status.valid || adev->ip_blocks[i].status.hw) continue; if (adev->ip_blocks[i].version->type == AMD_IP_BLOCK_TYPE_COMMON || adev->ip_blocks[i].version->type == AMD_IP_BLOCK_TYPE_GMC || @@ -2328,6 +2410,7 @@ static int amdgpu_device_ip_resume_phase2(struct amdgpu_device *adev) adev->ip_blocks[i].version->funcs->name, r); return r; } + adev->ip_blocks[i].status.hw = true; } return 0; @@ -2426,6 +2509,8 @@ bool amdgpu_device_asic_has_dc_support(enum amd_asic_type asic_type) #endif #if defined(CONFIG_DRM_AMD_DC_DCN2_0) case CHIP_NAVI10: + case CHIP_NAVI14: + case CHIP_NAVI12: #endif return amdgpu_dc != 0; #endif @@ -2509,6 +2594,8 @@ int amdgpu_device_init(struct amdgpu_device *adev, adev->pcie_wreg = &amdgpu_invalid_wreg; adev->pciep_rreg = &amdgpu_invalid_rreg; adev->pciep_wreg = &amdgpu_invalid_wreg; + adev->pcie_rreg64 = &amdgpu_invalid_rreg64; + adev->pcie_wreg64 = &amdgpu_invalid_wreg64; adev->uvd_ctx_rreg = &amdgpu_invalid_rreg; adev->uvd_ctx_wreg = &amdgpu_invalid_wreg; adev->didt_rreg = &amdgpu_invalid_rreg; @@ -3627,6 +3714,17 @@ static bool amdgpu_device_lock_adev(struct amdgpu_device *adev, bool trylock) atomic_inc(&adev->gpu_reset_counter); adev->in_gpu_reset = 1; + switch (amdgpu_asic_reset_method(adev)) { + case AMD_RESET_METHOD_MODE1: + adev->mp1_state = PP_MP1_STATE_SHUTDOWN; + break; + case AMD_RESET_METHOD_MODE2: + adev->mp1_state = PP_MP1_STATE_RESET; + break; + default: + adev->mp1_state = PP_MP1_STATE_NONE; + break; + } /* Block kfd: SRIOV would do it separately */ if (!amdgpu_sriov_vf(adev)) amdgpu_amdkfd_pre_reset(adev); @@ -3640,6 +3738,7 @@ static void amdgpu_device_unlock_adev(struct amdgpu_device *adev) if (!amdgpu_sriov_vf(adev)) amdgpu_amdkfd_post_reset(adev); amdgpu_vf_error_trans_all(adev); + adev->mp1_state = PP_MP1_STATE_NONE; adev->in_gpu_reset = 0; mutex_unlock(&adev->lock_reset); } diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_display.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_display.c index b5d020e15c35..f453e277ed24 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_display.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_display.c @@ -191,7 +191,8 @@ int amdgpu_display_crtc_page_flip_target(struct drm_crtc *crtc, } if (!adev->enable_virtual_display) { - r = amdgpu_bo_pin(new_abo, amdgpu_display_supported_domains(adev)); + r = amdgpu_bo_pin(new_abo, + amdgpu_display_supported_domains(adev, new_abo->flags)); if (unlikely(r != 0)) { DRM_ERROR("failed to pin new abo buffer before flip\n"); goto unreserve; @@ -495,13 +496,25 @@ static const struct drm_framebuffer_funcs amdgpu_fb_funcs = { .create_handle = drm_gem_fb_create_handle, }; -uint32_t amdgpu_display_supported_domains(struct amdgpu_device *adev) +uint32_t amdgpu_display_supported_domains(struct amdgpu_device *adev, + uint64_t bo_flags) { uint32_t domain = AMDGPU_GEM_DOMAIN_VRAM; #if defined(CONFIG_DRM_AMD_DC) - if (adev->asic_type >= CHIP_CARRIZO && adev->asic_type < CHIP_RAVEN && - adev->flags & AMD_IS_APU && + /* + * if amdgpu_bo_support_uswc returns false it means that USWC mappings + * is not supported for this board. But this mapping is required + * to avoid hang caused by placement of scanout BO in GTT on certain + * APUs. So force the BO placement to VRAM in case this architecture + * will not allow USWC mappings. + * Also, don't allow GTT domain if the BO doens't have USWC falg set. + */ + if (adev->asic_type >= CHIP_CARRIZO && + adev->asic_type <= CHIP_RAVEN && + (adev->flags & AMD_IS_APU) && + (bo_flags & AMDGPU_GEM_CREATE_CPU_GTT_USWC) && + amdgpu_bo_support_uswc(bo_flags) && amdgpu_device_asic_has_dc_support(adev->asic_type)) domain |= AMDGPU_GEM_DOMAIN_GTT; #endif diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_display.h b/drivers/gpu/drm/amd/amdgpu/amdgpu_display.h index 06b922fe0d42..3620b24785e1 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_display.h +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_display.h @@ -38,7 +38,8 @@ int amdgpu_display_freesync_ioctl(struct drm_device *dev, void *data, struct drm_file *filp); void amdgpu_display_update_priority(struct amdgpu_device *adev); -uint32_t amdgpu_display_supported_domains(struct amdgpu_device *adev); +uint32_t amdgpu_display_supported_domains(struct amdgpu_device *adev, + uint64_t bo_flags); struct drm_framebuffer * amdgpu_display_user_framebuffer_create(struct drm_device *dev, struct drm_file *file_priv, diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_dma_buf.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_dma_buf.c index b88e27da7c28..f0db7ddcb61b 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_dma_buf.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_dma_buf.c @@ -285,7 +285,7 @@ static int amdgpu_dma_buf_begin_cpu_access(struct dma_buf *dma_buf, struct amdgpu_bo *bo = gem_to_amdgpu_bo(dma_buf->priv); struct amdgpu_device *adev = amdgpu_ttm_adev(bo->tbo.bdev); struct ttm_operation_ctx ctx = { true, false }; - u32 domain = amdgpu_display_supported_domains(adev); + u32 domain = amdgpu_display_supported_domains(adev, bo->flags); int ret; bool reads = (direction == DMA_BIDIRECTIONAL || direction == DMA_FROM_DEVICE); diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_doorbell.h b/drivers/gpu/drm/amd/amdgpu/amdgpu_doorbell.h index 790263dcc064..3fa18003d4d6 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_doorbell.h +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_doorbell.h @@ -130,13 +130,18 @@ typedef enum _AMDGPU_VEGA20_DOORBELL_ASSIGNMENT AMDGPU_VEGA20_DOORBELL_IH = 0x178, /* MMSCH: 392~407 * overlap the doorbell assignment with VCN as they are mutually exclusive - * VCE engine's doorbell is 32 bit and two VCE ring share one QWORD + * VCN engine's doorbell is 32 bit and two VCN ring share one QWORD */ - AMDGPU_VEGA20_DOORBELL64_VCN0_1 = 0x188, /* lower 32 bits for VNC0 and upper 32 bits for VNC1 */ + AMDGPU_VEGA20_DOORBELL64_VCN0_1 = 0x188, /* VNC0 */ AMDGPU_VEGA20_DOORBELL64_VCN2_3 = 0x189, AMDGPU_VEGA20_DOORBELL64_VCN4_5 = 0x18A, AMDGPU_VEGA20_DOORBELL64_VCN6_7 = 0x18B, + AMDGPU_VEGA20_DOORBELL64_VCN8_9 = 0x18C, /* VNC1 */ + AMDGPU_VEGA20_DOORBELL64_VCNa_b = 0x18D, + AMDGPU_VEGA20_DOORBELL64_VCNc_d = 0x18E, + AMDGPU_VEGA20_DOORBELL64_VCNe_f = 0x18F, + AMDGPU_VEGA20_DOORBELL64_UVD_RING0_1 = 0x188, AMDGPU_VEGA20_DOORBELL64_UVD_RING2_3 = 0x189, AMDGPU_VEGA20_DOORBELL64_UVD_RING4_5 = 0x18A, diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_drv.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_drv.c index 6d24b422e0ff..e9046922fe94 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_drv.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_drv.c @@ -996,6 +996,10 @@ static const struct pci_device_id pciidlist[] = { /* Raven */ {0x1002, 0x15dd, PCI_ANY_ID, PCI_ANY_ID, 0, 0, CHIP_RAVEN|AMD_IS_APU}, {0x1002, 0x15d8, PCI_ANY_ID, PCI_ANY_ID, 0, 0, CHIP_RAVEN|AMD_IS_APU}, + /* Arcturus */ + {0x1002, 0x738C, PCI_ANY_ID, PCI_ANY_ID, 0, 0, CHIP_ARCTURUS|AMD_EXP_HW_SUPPORT}, + {0x1002, 0x7388, PCI_ANY_ID, PCI_ANY_ID, 0, 0, CHIP_ARCTURUS|AMD_EXP_HW_SUPPORT}, + {0x1002, 0x738E, PCI_ANY_ID, PCI_ANY_ID, 0, 0, CHIP_ARCTURUS|AMD_EXP_HW_SUPPORT}, /* Navi10 */ {0x1002, 0x7310, PCI_ANY_ID, PCI_ANY_ID, 0, 0, CHIP_NAVI10}, {0x1002, 0x7312, PCI_ANY_ID, PCI_ANY_ID, 0, 0, CHIP_NAVI10}, @@ -1092,21 +1096,21 @@ amdgpu_pci_shutdown(struct pci_dev *pdev) * unfortunately we can't detect certain * hypervisors so just do this all the time. */ + adev->mp1_state = PP_MP1_STATE_UNLOAD; amdgpu_device_ip_suspend(adev); + adev->mp1_state = PP_MP1_STATE_NONE; } static int amdgpu_pmops_suspend(struct device *dev) { - struct pci_dev *pdev = to_pci_dev(dev); + struct drm_device *drm_dev = dev_get_drvdata(dev); - struct drm_device *drm_dev = pci_get_drvdata(pdev); return amdgpu_device_suspend(drm_dev, true, true); } static int amdgpu_pmops_resume(struct device *dev) { - struct pci_dev *pdev = to_pci_dev(dev); - struct drm_device *drm_dev = pci_get_drvdata(pdev); + struct drm_device *drm_dev = dev_get_drvdata(dev); /* GPU comes up enabled by the bios on resume */ if (amdgpu_device_is_px(drm_dev)) { @@ -1120,33 +1124,29 @@ static int amdgpu_pmops_resume(struct device *dev) static int amdgpu_pmops_freeze(struct device *dev) { - struct pci_dev *pdev = to_pci_dev(dev); + struct drm_device *drm_dev = dev_get_drvdata(dev); - struct drm_device *drm_dev = pci_get_drvdata(pdev); return amdgpu_device_suspend(drm_dev, false, true); } static int amdgpu_pmops_thaw(struct device *dev) { - struct pci_dev *pdev = to_pci_dev(dev); + struct drm_device *drm_dev = dev_get_drvdata(dev); - struct drm_device *drm_dev = pci_get_drvdata(pdev); return amdgpu_device_resume(drm_dev, false, true); } static int amdgpu_pmops_poweroff(struct device *dev) { - struct pci_dev *pdev = to_pci_dev(dev); + struct drm_device *drm_dev = dev_get_drvdata(dev); - struct drm_device *drm_dev = pci_get_drvdata(pdev); return amdgpu_device_suspend(drm_dev, true, true); } static int amdgpu_pmops_restore(struct device *dev) { - struct pci_dev *pdev = to_pci_dev(dev); + struct drm_device *drm_dev = dev_get_drvdata(dev); - struct drm_device *drm_dev = pci_get_drvdata(pdev); return amdgpu_device_resume(drm_dev, false, true); } @@ -1205,8 +1205,7 @@ static int amdgpu_pmops_runtime_resume(struct device *dev) static int amdgpu_pmops_runtime_idle(struct device *dev) { - struct pci_dev *pdev = to_pci_dev(dev); - struct drm_device *drm_dev = pci_get_drvdata(pdev); + struct drm_device *drm_dev = dev_get_drvdata(dev); struct drm_crtc *crtc; if (!amdgpu_device_is_px(drm_dev)) { diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_fb.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_fb.c index eb3569b46c1e..143753d237e7 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_fb.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_fb.c @@ -131,6 +131,10 @@ static int amdgpufb_create_pinned_object(struct amdgpu_fbdev *rfbdev, int aligned_size, size; int height = mode_cmd->height; u32 cpp; + u64 flags = AMDGPU_GEM_CREATE_CPU_ACCESS_REQUIRED | + AMDGPU_GEM_CREATE_VRAM_CONTIGUOUS | + AMDGPU_GEM_CREATE_VRAM_CLEARED | + AMDGPU_GEM_CREATE_CPU_GTT_USWC; info = drm_get_format_info(adev->ddev, mode_cmd); cpp = info->cpp[0]; @@ -138,15 +142,11 @@ static int amdgpufb_create_pinned_object(struct amdgpu_fbdev *rfbdev, /* need to align pitch with crtc limits */ mode_cmd->pitches[0] = amdgpu_align_pitch(adev, mode_cmd->width, cpp, fb_tiled); - domain = amdgpu_display_supported_domains(adev); - + domain = amdgpu_display_supported_domains(adev, flags); height = ALIGN(mode_cmd->height, 8); size = mode_cmd->pitches[0] * height; aligned_size = ALIGN(size, PAGE_SIZE); - ret = amdgpu_gem_object_create(adev, aligned_size, 0, domain, - AMDGPU_GEM_CREATE_CPU_ACCESS_REQUIRED | - AMDGPU_GEM_CREATE_VRAM_CONTIGUOUS | - AMDGPU_GEM_CREATE_VRAM_CLEARED, + ret = amdgpu_gem_object_create(adev, aligned_size, 0, domain, flags, ttm_bo_type_kernel, NULL, &gobj); if (ret) { pr_err("failed to allocate framebuffer (%d)\n", aligned_size); @@ -168,7 +168,6 @@ static int amdgpufb_create_pinned_object(struct amdgpu_fbdev *rfbdev, dev_err(adev->dev, "FB failed to set tiling flags\n"); } - ret = amdgpu_bo_pin(abo, domain); if (ret) { amdgpu_bo_unreserve(abo); diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_gem.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_gem.c index bff9173a1a94..d532e3d647ca 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_gem.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_gem.c @@ -747,7 +747,8 @@ int amdgpu_mode_dumb_create(struct drm_file *file_priv, struct amdgpu_device *adev = dev->dev_private; struct drm_gem_object *gobj; uint32_t handle; - u64 flags = AMDGPU_GEM_CREATE_CPU_ACCESS_REQUIRED; + u64 flags = AMDGPU_GEM_CREATE_CPU_ACCESS_REQUIRED | + AMDGPU_GEM_CREATE_CPU_GTT_USWC; u32 domain; int r; @@ -764,7 +765,7 @@ int amdgpu_mode_dumb_create(struct drm_file *file_priv, args->size = (u64)args->pitch * args->height; args->size = ALIGN(args->size, PAGE_SIZE); domain = amdgpu_bo_get_preferred_pin_domain(adev, - amdgpu_display_supported_domains(adev)); + amdgpu_display_supported_domains(adev, flags)); r = amdgpu_gem_object_create(adev, args->size, 0, domain, flags, ttm_bo_type_device, NULL, &gobj); if (r) diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_gfx.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_gfx.c index 74066e1466f7..f9bef3154b99 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_gfx.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_gfx.c @@ -389,7 +389,7 @@ int amdgpu_gfx_mqd_sw_init(struct amdgpu_device *adev, dev_warn(adev->dev, "no memory to create MQD backup for ring %s\n", ring->name); } - if (adev->asic_type == CHIP_NAVI10 && amdgpu_async_gfx_ring) { + if (adev->asic_type >= CHIP_NAVI10 && amdgpu_async_gfx_ring) { /* create MQD for each KGQ */ for (i = 0; i < adev->gfx.num_gfx_rings; i++) { ring = &adev->gfx.gfx_ring[i]; @@ -437,7 +437,7 @@ void amdgpu_gfx_mqd_sw_fini(struct amdgpu_device *adev) struct amdgpu_ring *ring = NULL; int i; - if (adev->asic_type == CHIP_NAVI10 && amdgpu_async_gfx_ring) { + if (adev->asic_type >= CHIP_NAVI10 && amdgpu_async_gfx_ring) { for (i = 0; i < adev->gfx.num_gfx_rings; i++) { ring = &adev->gfx.gfx_ring[i]; kfree(adev->gfx.me.mqd_backup[i]); @@ -456,7 +456,7 @@ void amdgpu_gfx_mqd_sw_fini(struct amdgpu_device *adev) } ring = &adev->gfx.kiq.ring; - if (adev->asic_type == CHIP_NAVI10 && amdgpu_async_gfx_ring) + if (adev->asic_type >= CHIP_NAVI10 && amdgpu_async_gfx_ring) kfree(adev->gfx.me.mqd_backup[AMDGPU_MAX_GFX_RINGS]); kfree(adev->gfx.mec.mqd_backup[AMDGPU_MAX_COMPUTE_RINGS]); amdgpu_bo_free_kernel(&ring->mqd_obj, diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_gfx.h b/drivers/gpu/drm/amd/amdgpu/amdgpu_gfx.h index 1199b5828b90..554a59b3c4a6 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_gfx.h +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_gfx.h @@ -196,6 +196,8 @@ struct amdgpu_gfx_funcs { uint32_t *dst); void (*select_me_pipe_q)(struct amdgpu_device *adev, u32 me, u32 pipe, u32 queue, u32 vmid); + int (*ras_error_inject)(struct amdgpu_device *adev, void *inject_if); + int (*query_ras_error_count) (struct amdgpu_device *adev, void *ras_error_status); }; struct amdgpu_ngg_buf { diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_ids.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_ids.c index 57b3d8a9bef3..529065b83885 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_ids.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_ids.c @@ -368,7 +368,8 @@ static int amdgpu_vmid_grab_used(struct amdgpu_vm *vm, * are broken on Navi10 and Navi14. */ if (needs_flush && (adev->asic_type < CHIP_VEGA10 || - adev->asic_type == CHIP_NAVI10)) + adev->asic_type == CHIP_NAVI10 || + adev->asic_type == CHIP_NAVI14)) continue; /* Good, we can use this VMID. Remember this submission as diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_kms.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_kms.c index 252f71e90953..0e2ec608530b 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_kms.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_kms.c @@ -408,23 +408,38 @@ static int amdgpu_hw_ip_info(struct amdgpu_device *adev, break; case AMDGPU_HW_IP_VCN_DEC: type = AMD_IP_BLOCK_TYPE_VCN; - if (adev->vcn.ring_dec.sched.ready) - ++num_rings; + for (i = 0; i < adev->vcn.num_vcn_inst; i++) { + if (adev->uvd.harvest_config & (1 << i)) + continue; + + if (adev->vcn.inst[i].ring_dec.sched.ready) + ++num_rings; + } ib_start_alignment = 16; ib_size_alignment = 16; break; case AMDGPU_HW_IP_VCN_ENC: type = AMD_IP_BLOCK_TYPE_VCN; - for (i = 0; i < adev->vcn.num_enc_rings; i++) - if (adev->vcn.ring_enc[i].sched.ready) - ++num_rings; + for (i = 0; i < adev->vcn.num_vcn_inst; i++) { + if (adev->uvd.harvest_config & (1 << i)) + continue; + + for (j = 0; j < adev->vcn.num_enc_rings; j++) + if (adev->vcn.inst[i].ring_enc[j].sched.ready) + ++num_rings; + } ib_start_alignment = 64; ib_size_alignment = 1; break; case AMDGPU_HW_IP_VCN_JPEG: type = AMD_IP_BLOCK_TYPE_VCN; - if (adev->vcn.ring_jpeg.sched.ready) - ++num_rings; + for (i = 0; i < adev->vcn.num_vcn_inst; i++) { + if (adev->uvd.harvest_config & (1 << i)) + continue; + + if (adev->vcn.inst[i].ring_jpeg.sched.ready) + ++num_rings; + } ib_start_alignment = 16; ib_size_alignment = 16; break; diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_object.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_object.c index 8ae44d383a13..2d07f16f1789 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_object.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_object.c @@ -80,9 +80,6 @@ static void amdgpu_bo_destroy(struct ttm_buffer_object *tbo) if (bo->pin_count > 0) amdgpu_bo_subtract_pin_size(bo); - if (bo->kfd_bo) - amdgpu_amdkfd_unreserve_memory_limit(bo); - amdgpu_bo_kunmap(bo); if (bo->tbo.base.import_attach) @@ -413,6 +410,40 @@ fail: return false; } +bool amdgpu_bo_support_uswc(u64 bo_flags) +{ + +#ifdef CONFIG_X86_32 + /* XXX: Write-combined CPU mappings of GTT seem broken on 32-bit + * See https://bugs.freedesktop.org/show_bug.cgi?id=84627 + */ + return false; +#elif defined(CONFIG_X86) && !defined(CONFIG_X86_PAT) + /* Don't try to enable write-combining when it can't work, or things + * may be slow + * See https://bugs.freedesktop.org/show_bug.cgi?id=88758 + */ + +#ifndef CONFIG_COMPILE_TEST +#warning Please enable CONFIG_MTRR and CONFIG_X86_PAT for better performance \ + thanks to write-combining +#endif + + if (bo_flags & AMDGPU_GEM_CREATE_CPU_GTT_USWC) + DRM_INFO_ONCE("Please enable CONFIG_MTRR and CONFIG_X86_PAT for " + "better performance thanks to write-combining\n"); + return false; +#else + /* For architectures that don't support WC memory, + * mask out the WC flag from the BO + */ + if (!drm_arch_can_wc_memory()) + return false; + + return true; +#endif +} + static int amdgpu_bo_do_create(struct amdgpu_device *adev, struct amdgpu_bo_param *bp, struct amdgpu_bo **bo_ptr) @@ -466,33 +497,8 @@ static int amdgpu_bo_do_create(struct amdgpu_device *adev, bo->flags = bp->flags; -#ifdef CONFIG_X86_32 - /* XXX: Write-combined CPU mappings of GTT seem broken on 32-bit - * See https://bugs.freedesktop.org/show_bug.cgi?id=84627 - */ - bo->flags &= ~AMDGPU_GEM_CREATE_CPU_GTT_USWC; -#elif defined(CONFIG_X86) && !defined(CONFIG_X86_PAT) - /* Don't try to enable write-combining when it can't work, or things - * may be slow - * See https://bugs.freedesktop.org/show_bug.cgi?id=88758 - */ - -#ifndef CONFIG_COMPILE_TEST -#warning Please enable CONFIG_MTRR and CONFIG_X86_PAT for better performance \ - thanks to write-combining -#endif - - if (bo->flags & AMDGPU_GEM_CREATE_CPU_GTT_USWC) - DRM_INFO_ONCE("Please enable CONFIG_MTRR and CONFIG_X86_PAT for " - "better performance thanks to write-combining\n"); - bo->flags &= ~AMDGPU_GEM_CREATE_CPU_GTT_USWC; -#else - /* For architectures that don't support WC memory, - * mask out the WC flag from the BO - */ - if (!drm_arch_can_wc_memory()) + if (!amdgpu_bo_support_uswc(bo->flags)) bo->flags &= ~AMDGPU_GEM_CREATE_CPU_GTT_USWC; -#endif bo->tbo.bdev = &adev->mman.bdev; if (bp->domain & (AMDGPU_GEM_DOMAIN_GWS | AMDGPU_GEM_DOMAIN_OA | @@ -1212,6 +1218,42 @@ void amdgpu_bo_move_notify(struct ttm_buffer_object *bo, } /** + * amdgpu_bo_move_notify - notification about a BO being released + * @bo: pointer to a buffer object + * + * Wipes VRAM buffers whose contents should not be leaked before the + * memory is released. + */ +void amdgpu_bo_release_notify(struct ttm_buffer_object *bo) +{ + struct dma_fence *fence = NULL; + struct amdgpu_bo *abo; + int r; + + if (!amdgpu_bo_is_amdgpu_bo(bo)) + return; + + abo = ttm_to_amdgpu_bo(bo); + + if (abo->kfd_bo) + amdgpu_amdkfd_unreserve_memory_limit(abo); + + if (bo->mem.mem_type != TTM_PL_VRAM || !bo->mem.mm_node || + !(abo->flags & AMDGPU_GEM_CREATE_VRAM_WIPE_ON_RELEASE)) + return; + + reservation_object_lock(bo->base.resv, NULL); + + r = amdgpu_fill_buffer(abo, AMDGPU_POISON, bo->base.resv, &fence); + if (!WARN_ON(r)) { + amdgpu_bo_fence(abo, fence, false); + dma_fence_put(fence); + } + + reservation_object_unlock(bo->base.resv); +} + +/** * amdgpu_bo_fault_reserve_notify - notification about a memory fault * @bo: pointer to a buffer object * diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_object.h b/drivers/gpu/drm/amd/amdgpu/amdgpu_object.h index 113fb2feb437..05dde0dd04ff 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_object.h +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_object.h @@ -264,6 +264,7 @@ int amdgpu_bo_get_metadata(struct amdgpu_bo *bo, void *buffer, void amdgpu_bo_move_notify(struct ttm_buffer_object *bo, bool evict, struct ttm_mem_reg *new_mem); +void amdgpu_bo_release_notify(struct ttm_buffer_object *bo); int amdgpu_bo_fault_reserve_notify(struct ttm_buffer_object *bo); void amdgpu_bo_fence(struct amdgpu_bo *bo, struct dma_fence *fence, bool shared); @@ -307,5 +308,7 @@ void amdgpu_sa_bo_dump_debug_info(struct amdgpu_sa_manager *sa_manager, struct seq_file *m); #endif +bool amdgpu_bo_support_uswc(u64 bo_flags); + #endif diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_pm.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_pm.c index 2b546567853b..39998f203b49 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_pm.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_pm.c @@ -325,13 +325,6 @@ static ssize_t amdgpu_set_dpm_forced_performance_level(struct device *dev, (ddev->switch_power_state != DRM_SWITCH_POWER_ON)) return -EINVAL; - if (!amdgpu_sriov_vf(adev)) { - if (is_support_sw_smu(adev)) - current_level = smu_get_performance_level(&adev->smu); - else if (adev->powerplay.pp_funcs->get_performance_level) - current_level = amdgpu_dpm_get_performance_level(adev); - } - if (strncmp("low", buf, strlen("low")) == 0) { level = AMD_DPM_FORCED_LEVEL_LOW; } else if (strncmp("high", buf, strlen("high")) == 0) { @@ -355,17 +348,23 @@ static ssize_t amdgpu_set_dpm_forced_performance_level(struct device *dev, goto fail; } - if (amdgpu_sriov_vf(adev)) { - if (amdgim_is_hwperf(adev) && - adev->virt.ops->force_dpm_level) { - mutex_lock(&adev->pm.mutex); - adev->virt.ops->force_dpm_level(adev, level); - mutex_unlock(&adev->pm.mutex); - return count; - } else { - return -EINVAL; + /* handle sriov case here */ + if (amdgpu_sriov_vf(adev)) { + if (amdgim_is_hwperf(adev) && + adev->virt.ops->force_dpm_level) { + mutex_lock(&adev->pm.mutex); + adev->virt.ops->force_dpm_level(adev, level); + mutex_unlock(&adev->pm.mutex); + return count; + } else { + return -EINVAL; } - } + } + + if (is_support_sw_smu(adev)) + current_level = smu_get_performance_level(&adev->smu); + else if (adev->powerplay.pp_funcs->get_performance_level) + current_level = amdgpu_dpm_get_performance_level(adev); if (current_level == level) return count; @@ -746,10 +745,10 @@ static ssize_t amdgpu_get_pp_od_clk_voltage(struct device *dev, } /** - * DOC: ppfeatures + * DOC: pp_features * * The amdgpu driver provides a sysfs API for adjusting what powerplay - * features to be enabled. The file ppfeatures is used for this. And + * features to be enabled. The file pp_features is used for this. And * this is only available for Vega10 and later dGPUs. * * Reading back the file will show you the followings: @@ -761,7 +760,7 @@ static ssize_t amdgpu_get_pp_od_clk_voltage(struct device *dev, * the corresponding bit from original ppfeature masks and input the * new ppfeature masks. */ -static ssize_t amdgpu_set_ppfeature_status(struct device *dev, +static ssize_t amdgpu_set_pp_feature_status(struct device *dev, struct device_attribute *attr, const char *buf, size_t count) @@ -778,7 +777,7 @@ static ssize_t amdgpu_set_ppfeature_status(struct device *dev, pr_debug("featuremask = 0x%llx\n", featuremask); if (is_support_sw_smu(adev)) { - ret = smu_set_ppfeature_status(&adev->smu, featuremask); + ret = smu_sys_set_pp_feature_mask(&adev->smu, featuremask); if (ret) return -EINVAL; } else if (adev->powerplay.pp_funcs->set_ppfeature_status) { @@ -790,7 +789,7 @@ static ssize_t amdgpu_set_ppfeature_status(struct device *dev, return count; } -static ssize_t amdgpu_get_ppfeature_status(struct device *dev, +static ssize_t amdgpu_get_pp_feature_status(struct device *dev, struct device_attribute *attr, char *buf) { @@ -798,7 +797,7 @@ static ssize_t amdgpu_get_ppfeature_status(struct device *dev, struct amdgpu_device *adev = ddev->dev_private; if (is_support_sw_smu(adev)) { - return smu_get_ppfeature_status(&adev->smu, buf); + return smu_sys_get_pp_feature_mask(&adev->smu, buf); } else if (adev->powerplay.pp_funcs->get_ppfeature_status) return amdgpu_dpm_get_ppfeature_status(adev, buf); @@ -1458,9 +1457,9 @@ static DEVICE_ATTR(gpu_busy_percent, S_IRUGO, static DEVICE_ATTR(mem_busy_percent, S_IRUGO, amdgpu_get_memory_busy_percent, NULL); static DEVICE_ATTR(pcie_bw, S_IRUGO, amdgpu_get_pcie_bw, NULL); -static DEVICE_ATTR(ppfeatures, S_IRUGO | S_IWUSR, - amdgpu_get_ppfeature_status, - amdgpu_set_ppfeature_status); +static DEVICE_ATTR(pp_features, S_IRUGO | S_IWUSR, + amdgpu_get_pp_feature_status, + amdgpu_set_pp_feature_status); static DEVICE_ATTR(unique_id, S_IRUGO, amdgpu_get_unique_id, NULL); static ssize_t amdgpu_hwmon_show_temp(struct device *dev, @@ -1625,20 +1624,16 @@ static ssize_t amdgpu_hwmon_set_pwm1_enable(struct device *dev, (adev->ddev->switch_power_state != DRM_SWITCH_POWER_ON)) return -EINVAL; - if (is_support_sw_smu(adev)) { - err = kstrtoint(buf, 10, &value); - if (err) - return err; + err = kstrtoint(buf, 10, &value); + if (err) + return err; + if (is_support_sw_smu(adev)) { smu_set_fan_control_mode(&adev->smu, value); } else { if (!adev->powerplay.pp_funcs->set_fan_control_mode) return -EINVAL; - err = kstrtoint(buf, 10, &value); - if (err) - return err; - amdgpu_dpm_set_fan_control_mode(adev, value); } @@ -2058,16 +2053,18 @@ static ssize_t amdgpu_hwmon_set_power_cap(struct device *dev, return err; value = value / 1000000; /* convert to Watt */ + if (is_support_sw_smu(adev)) { - adev->smu.funcs->set_power_limit(&adev->smu, value); + err = smu_set_power_limit(&adev->smu, value); } else if (adev->powerplay.pp_funcs && adev->powerplay.pp_funcs->set_power_limit) { err = adev->powerplay.pp_funcs->set_power_limit(adev->powerplay.pp_handle, value); - if (err) - return err; } else { - return -EINVAL; + err = -EINVAL; } + if (err) + return err; + return count; } @@ -2917,10 +2914,10 @@ int amdgpu_pm_sysfs_init(struct amdgpu_device *adev) if ((adev->asic_type >= CHIP_VEGA10) && !(adev->flags & AMD_IS_APU)) { ret = device_create_file(adev->dev, - &dev_attr_ppfeatures); + &dev_attr_pp_features); if (ret) { DRM_ERROR("failed to create device file " - "ppfeatures\n"); + "pp_features\n"); return ret; } } @@ -2974,7 +2971,7 @@ void amdgpu_pm_sysfs_fini(struct amdgpu_device *adev) device_remove_file(adev->dev, &dev_attr_unique_id); if ((adev->asic_type >= CHIP_VEGA10) && !(adev->flags & AMD_IS_APU)) - device_remove_file(adev->dev, &dev_attr_ppfeatures); + device_remove_file(adev->dev, &dev_attr_pp_features); } void amdgpu_pm_compute_clocks(struct amdgpu_device *adev) diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_psp.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_psp.c index c027e5e7713e..51fb890e2d3b 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_psp.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_psp.c @@ -53,10 +53,13 @@ static int psp_early_init(void *handle) psp->autoload_supported = false; break; case CHIP_VEGA20: + case CHIP_ARCTURUS: psp_v11_0_set_psp_funcs(psp); psp->autoload_supported = false; break; case CHIP_NAVI10: + case CHIP_NAVI14: + case CHIP_NAVI12: psp_v11_0_set_psp_funcs(psp); psp->autoload_supported = true; break; @@ -162,8 +165,8 @@ psp_cmd_submit_buf(struct psp_context *psp, if (ucode) DRM_WARN("failed to load ucode id (%d) ", ucode->ucode_id); - DRM_WARN("psp command failed and response status is (%d)\n", - psp->cmd_buf_mem->resp.status); + DRM_WARN("psp command failed and response status is (0x%X)\n", + psp->cmd_buf_mem->resp.status & GFX_CMD_STATUS_MASK); if (!timeout) { mutex_unlock(&psp->mutex); return -EINVAL; @@ -831,7 +834,6 @@ static int psp_hw_start(struct psp_context *psp) "XGMI: Failed to initialize XGMI session\n"); } - if (psp->adev->psp.ta_fw) { ret = psp_ras_initialize(psp); if (ret) @@ -852,6 +854,24 @@ static int psp_get_fw_type(struct amdgpu_firmware_info *ucode, case AMDGPU_UCODE_ID_SDMA1: *type = GFX_FW_TYPE_SDMA1; break; + case AMDGPU_UCODE_ID_SDMA2: + *type = GFX_FW_TYPE_SDMA2; + break; + case AMDGPU_UCODE_ID_SDMA3: + *type = GFX_FW_TYPE_SDMA3; + break; + case AMDGPU_UCODE_ID_SDMA4: + *type = GFX_FW_TYPE_SDMA4; + break; + case AMDGPU_UCODE_ID_SDMA5: + *type = GFX_FW_TYPE_SDMA5; + break; + case AMDGPU_UCODE_ID_SDMA6: + *type = GFX_FW_TYPE_SDMA6; + break; + case AMDGPU_UCODE_ID_SDMA7: + *type = GFX_FW_TYPE_SDMA7; + break; case AMDGPU_UCODE_ID_CP_CE: *type = GFX_FW_TYPE_CP_CE; break; @@ -980,12 +1000,20 @@ out: if (ucode->ucode_id == AMDGPU_UCODE_ID_SMC && (psp_smu_reload_quirk(psp) || psp->autoload_supported)) continue; + if (amdgpu_sriov_vf(adev) && (ucode->ucode_id == AMDGPU_UCODE_ID_SDMA0 || ucode->ucode_id == AMDGPU_UCODE_ID_SDMA1 + || ucode->ucode_id == AMDGPU_UCODE_ID_SDMA2 + || ucode->ucode_id == AMDGPU_UCODE_ID_SDMA3 + || ucode->ucode_id == AMDGPU_UCODE_ID_SDMA4 + || ucode->ucode_id == AMDGPU_UCODE_ID_SDMA5 + || ucode->ucode_id == AMDGPU_UCODE_ID_SDMA6 + || ucode->ucode_id == AMDGPU_UCODE_ID_SDMA7 || ucode->ucode_id == AMDGPU_UCODE_ID_RLC_G)) /*skip ucode loading in SRIOV VF */ continue; + if (psp->autoload_supported && (ucode->ucode_id == AMDGPU_UCODE_ID_CP_MEC1_JT || ucode->ucode_id == AMDGPU_UCODE_ID_CP_MEC2_JT)) @@ -997,7 +1025,8 @@ out: return ret; /* Start rlc autoload after psp recieved all the gfx firmware */ - if (ucode->ucode_id == AMDGPU_UCODE_ID_RLC_RESTORE_LIST_SRM_MEM) { + if (ucode->ucode_id == AMDGPU_UCODE_ID_RLC_RESTORE_LIST_SRM_MEM || + (adev->asic_type == CHIP_NAVI12 && ucode->ucode_id == AMDGPU_UCODE_ID_RLC_G)) { ret = psp_rlc_autoload(psp); if (ret) { DRM_ERROR("Failed to start rlc autoload\n"); diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_ras.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_ras.c index fac7aa2c244f..523f43732dee 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_ras.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_ras.c @@ -30,74 +30,6 @@ #include "amdgpu_ras.h" #include "amdgpu_atomfirmware.h" -struct ras_ih_data { - /* interrupt bottom half */ - struct work_struct ih_work; - int inuse; - /* IP callback */ - ras_ih_cb cb; - /* full of entries */ - unsigned char *ring; - unsigned int ring_size; - unsigned int element_size; - unsigned int aligned_element_size; - unsigned int rptr; - unsigned int wptr; -}; - -struct ras_fs_data { - char sysfs_name[32]; - char debugfs_name[32]; -}; - -struct ras_err_data { - unsigned long ue_count; - unsigned long ce_count; -}; - -struct ras_err_handler_data { - /* point to bad pages array */ - struct { - unsigned long bp; - struct amdgpu_bo *bo; - } *bps; - /* the count of entries */ - int count; - /* the space can place new entries */ - int space_left; - /* last reserved entry's index + 1 */ - int last_reserved; -}; - -struct ras_manager { - struct ras_common_if head; - /* reference count */ - int use; - /* ras block link */ - struct list_head node; - /* the device */ - struct amdgpu_device *adev; - /* debugfs */ - struct dentry *ent; - /* sysfs */ - struct device_attribute sysfs_attr; - int attr_inuse; - - /* fs node name */ - struct ras_fs_data fs_data; - - /* IH data */ - struct ras_ih_data ih_data; - - struct ras_err_data err_data; -}; - -struct ras_badpage { - unsigned int bp; - unsigned int size; - unsigned int flags; -}; - const char *ras_error_string[] = { "none", "parity", @@ -130,6 +62,9 @@ const char *ras_block_string[] = { #define AMDGPU_RAS_FLAG_INIT_NEED_RESET 2 #define RAS_DEFAULT_FLAGS (AMDGPU_RAS_FLAG_INIT_BY_VBIOS) +/* inject address is 52 bits */ +#define RAS_UMC_INJECT_ADDR_LIMIT (0x1ULL << 52) + static int amdgpu_ras_reserve_vram(struct amdgpu_device *adev, uint64_t offset, uint64_t size, struct amdgpu_bo **bo_ptr); @@ -223,9 +158,14 @@ static int amdgpu_ras_debugfs_ctrl_parse_data(struct file *f, return -EINVAL; data->head.block = block_id; - data->head.type = memcmp("ue", err, 2) == 0 ? - AMDGPU_RAS_ERROR__MULTI_UNCORRECTABLE : - AMDGPU_RAS_ERROR__SINGLE_CORRECTABLE; + /* only ue and ce errors are supported */ + if (!memcmp("ue", err, 2)) + data->head.type = AMDGPU_RAS_ERROR__MULTI_UNCORRECTABLE; + else if (!memcmp("ce", err, 2)) + data->head.type = AMDGPU_RAS_ERROR__SINGLE_CORRECTABLE; + else + return -EINVAL; + data->op = op; if (op == 2) { @@ -310,7 +250,6 @@ static ssize_t amdgpu_ras_debugfs_ctrl_write(struct file *f, const char __user * { struct amdgpu_device *adev = (struct amdgpu_device *)file_inode(f)->i_private; struct ras_debug_if data; - struct amdgpu_bo *bo; int ret = 0; ret = amdgpu_ras_debugfs_ctrl_parse_data(f, buf, size, pos, &data); @@ -328,17 +267,14 @@ static ssize_t amdgpu_ras_debugfs_ctrl_write(struct file *f, const char __user * ret = amdgpu_ras_feature_enable(adev, &data.head, 1); break; case 2: - ret = amdgpu_ras_reserve_vram(adev, - data.inject.address, PAGE_SIZE, &bo); - if (ret) { - /* address was offset, now it is absolute.*/ - data.inject.address += adev->gmc.vram_start; - if (data.inject.address > adev->gmc.vram_end) - break; - } else - data.inject.address = amdgpu_bo_gpu_offset(bo); + if ((data.inject.address >= adev->gmc.mc_vram_size) || + (data.inject.address >= RAS_UMC_INJECT_ADDR_LIMIT)) { + ret = -EINVAL; + break; + } + + /* data.inject.address is offset instead of absolute gpu address */ ret = amdgpu_ras_error_inject(adev, &data.inject); - amdgpu_ras_release_vram(adev, &bo); break; default: ret = -EINVAL; @@ -656,14 +592,42 @@ int amdgpu_ras_error_query(struct amdgpu_device *adev, struct ras_query_if *info) { struct ras_manager *obj = amdgpu_ras_find_obj(adev, &info->head); + struct ras_err_data err_data = {0, 0, 0, NULL}; if (!obj) return -EINVAL; - /* TODO might read the register to read the count */ + + switch (info->head.block) { + case AMDGPU_RAS_BLOCK__UMC: + if (adev->umc.funcs->query_ras_error_count) + adev->umc.funcs->query_ras_error_count(adev, &err_data); + /* umc query_ras_error_address is also responsible for clearing + * error status + */ + if (adev->umc.funcs->query_ras_error_address) + adev->umc.funcs->query_ras_error_address(adev, &err_data); + break; + case AMDGPU_RAS_BLOCK__GFX: + if (adev->gfx.funcs->query_ras_error_count) + adev->gfx.funcs->query_ras_error_count(adev, &err_data); + break; + default: + break; + } + + obj->err_data.ue_count += err_data.ue_count; + obj->err_data.ce_count += err_data.ce_count; info->ue_count = obj->err_data.ue_count; info->ce_count = obj->err_data.ce_count; + if (err_data.ce_count) + dev_info(adev->dev, "%ld correctable errors detected in %s block\n", + obj->err_data.ce_count, ras_block_str(info->head.block)); + if (err_data.ue_count) + dev_info(adev->dev, "%ld uncorrectable errors detected in %s block\n", + obj->err_data.ue_count, ras_block_str(info->head.block)); + return 0; } @@ -684,13 +648,22 @@ int amdgpu_ras_error_inject(struct amdgpu_device *adev, if (!obj) return -EINVAL; - if (block_info.block_id != TA_RAS_BLOCK__UMC) { + switch (info->head.block) { + case AMDGPU_RAS_BLOCK__GFX: + if (adev->gfx.funcs->ras_error_inject) + ret = adev->gfx.funcs->ras_error_inject(adev, info); + else + ret = -EINVAL; + break; + case AMDGPU_RAS_BLOCK__UMC: + ret = psp_ras_trigger_error(&adev->psp, &block_info); + break; + default: DRM_INFO("%s error injection is not supported yet\n", ras_block_str(info->head.block)); - return -EINVAL; + ret = -EINVAL; } - ret = psp_ras_trigger_error(&adev->psp, &block_info); if (ret) DRM_ERROR("RAS ERROR: inject %s error failed ret %d\n", ras_block_str(info->head.block), @@ -816,25 +789,18 @@ static ssize_t amdgpu_ras_sysfs_features_read(struct device *dev, struct amdgpu_device *adev = ddev->dev_private; struct ras_common_if head; int ras_block_count = AMDGPU_RAS_BLOCK_COUNT; - int i; + int i, enabled; ssize_t s; - struct ras_manager *obj; s = scnprintf(buf, PAGE_SIZE, "feature mask: 0x%x\n", con->features); for (i = 0; i < ras_block_count; i++) { head.block = i; + enabled = amdgpu_ras_is_feature_enabled(adev, &head); - if (amdgpu_ras_is_feature_enabled(adev, &head)) { - obj = amdgpu_ras_find_obj(adev, &head); - s += scnprintf(&buf[s], PAGE_SIZE - s, - "%s: %s\n", - ras_block_str(i), - ras_err_str(obj->head.type)); - } else - s += scnprintf(&buf[s], PAGE_SIZE - s, - "%s: disabled\n", - ras_block_str(i)); + s += scnprintf(&buf[s], PAGE_SIZE - s, + "%s ras feature mask: %s\n", + ras_block_str(i), enabled?"on":"off"); } return s; @@ -1054,6 +1020,7 @@ static void amdgpu_ras_interrupt_handler(struct ras_manager *obj) struct ras_ih_data *data = &obj->ih_data; struct amdgpu_iv_entry entry; int ret; + struct ras_err_data err_data = {0, 0, 0, NULL}; while (data->rptr != data->wptr) { rmb(); @@ -1068,19 +1035,19 @@ static void amdgpu_ras_interrupt_handler(struct ras_manager *obj) * from the callback to udpate the error type/count, etc */ if (data->cb) { - ret = data->cb(obj->adev, &entry); + ret = data->cb(obj->adev, &err_data, &entry); /* ue will trigger an interrupt, and in that case * we need do a reset to recovery the whole system. * But leave IP do that recovery, here we just dispatch * the error. */ - if (ret == AMDGPU_RAS_UE) { - obj->err_data.ue_count++; + if (ret == AMDGPU_RAS_SUCCESS) { + /* these counts could be left as 0 if + * some blocks do not count error number + */ + obj->err_data.ue_count += err_data.ue_count; + obj->err_data.ce_count += err_data.ce_count; } - /* Might need get ce count by register, but not all IP - * saves ce count, some IP just use one bit or two bits - * to indicate ce happened. - */ } } } @@ -1577,6 +1544,10 @@ int amdgpu_ras_init(struct amdgpu_device *adev) if (amdgpu_ras_fs_init(adev)) goto fs_out; + /* ras init for each ras block */ + if (adev->umc.funcs->ras_init) + adev->umc.funcs->ras_init(adev); + DRM_INFO("RAS INFO: ras initialized successfully, " "hardware ability[%x] ras_mask[%x]\n", con->hw_supported, con->supported); diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_ras.h b/drivers/gpu/drm/amd/amdgpu/amdgpu_ras.h index b2841195bd3b..2765f2dbb1e6 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_ras.h +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_ras.h @@ -52,6 +52,236 @@ enum amdgpu_ras_block { #define AMDGPU_RAS_BLOCK_COUNT AMDGPU_RAS_BLOCK__LAST #define AMDGPU_RAS_BLOCK_MASK ((1ULL << AMDGPU_RAS_BLOCK_COUNT) - 1) +enum amdgpu_ras_gfx_subblock { + /* CPC */ + AMDGPU_RAS_BLOCK__GFX_CPC_INDEX_START = 0, + AMDGPU_RAS_BLOCK__GFX_CPC_SCRATCH = + AMDGPU_RAS_BLOCK__GFX_CPC_INDEX_START, + AMDGPU_RAS_BLOCK__GFX_CPC_UCODE, + AMDGPU_RAS_BLOCK__GFX_DC_STATE_ME1, + AMDGPU_RAS_BLOCK__GFX_DC_CSINVOC_ME1, + AMDGPU_RAS_BLOCK__GFX_DC_RESTORE_ME1, + AMDGPU_RAS_BLOCK__GFX_DC_STATE_ME2, + AMDGPU_RAS_BLOCK__GFX_DC_CSINVOC_ME2, + AMDGPU_RAS_BLOCK__GFX_DC_RESTORE_ME2, + AMDGPU_RAS_BLOCK__GFX_CPC_INDEX_END = + AMDGPU_RAS_BLOCK__GFX_DC_RESTORE_ME2, + /* CPF */ + AMDGPU_RAS_BLOCK__GFX_CPF_INDEX_START, + AMDGPU_RAS_BLOCK__GFX_CPF_ROQ_ME2 = + AMDGPU_RAS_BLOCK__GFX_CPF_INDEX_START, + AMDGPU_RAS_BLOCK__GFX_CPF_ROQ_ME1, + AMDGPU_RAS_BLOCK__GFX_CPF_TAG, + AMDGPU_RAS_BLOCK__GFX_CPF_INDEX_END = AMDGPU_RAS_BLOCK__GFX_CPF_TAG, + /* CPG */ + AMDGPU_RAS_BLOCK__GFX_CPG_INDEX_START, + AMDGPU_RAS_BLOCK__GFX_CPG_DMA_ROQ = + AMDGPU_RAS_BLOCK__GFX_CPG_INDEX_START, + AMDGPU_RAS_BLOCK__GFX_CPG_DMA_TAG, + AMDGPU_RAS_BLOCK__GFX_CPG_TAG, + AMDGPU_RAS_BLOCK__GFX_CPG_INDEX_END = AMDGPU_RAS_BLOCK__GFX_CPG_TAG, + /* GDS */ + AMDGPU_RAS_BLOCK__GFX_GDS_INDEX_START, + AMDGPU_RAS_BLOCK__GFX_GDS_MEM = AMDGPU_RAS_BLOCK__GFX_GDS_INDEX_START, + AMDGPU_RAS_BLOCK__GFX_GDS_INPUT_QUEUE, + AMDGPU_RAS_BLOCK__GFX_GDS_OA_PHY_CMD_RAM_MEM, + AMDGPU_RAS_BLOCK__GFX_GDS_OA_PHY_DATA_RAM_MEM, + AMDGPU_RAS_BLOCK__GFX_GDS_OA_PIPE_MEM, + AMDGPU_RAS_BLOCK__GFX_GDS_INDEX_END = + AMDGPU_RAS_BLOCK__GFX_GDS_OA_PIPE_MEM, + /* SPI */ + AMDGPU_RAS_BLOCK__GFX_SPI_SR_MEM, + /* SQ */ + AMDGPU_RAS_BLOCK__GFX_SQ_INDEX_START, + AMDGPU_RAS_BLOCK__GFX_SQ_SGPR = AMDGPU_RAS_BLOCK__GFX_SQ_INDEX_START, + AMDGPU_RAS_BLOCK__GFX_SQ_LDS_D, + AMDGPU_RAS_BLOCK__GFX_SQ_LDS_I, + AMDGPU_RAS_BLOCK__GFX_SQ_VGPR, + AMDGPU_RAS_BLOCK__GFX_SQ_INDEX_END = AMDGPU_RAS_BLOCK__GFX_SQ_VGPR, + /* SQC (3 ranges) */ + AMDGPU_RAS_BLOCK__GFX_SQC_INDEX_START, + /* SQC range 0 */ + AMDGPU_RAS_BLOCK__GFX_SQC_INDEX0_START = + AMDGPU_RAS_BLOCK__GFX_SQC_INDEX_START, + AMDGPU_RAS_BLOCK__GFX_SQC_INST_UTCL1_LFIFO = + AMDGPU_RAS_BLOCK__GFX_SQC_INDEX0_START, + AMDGPU_RAS_BLOCK__GFX_SQC_DATA_CU0_WRITE_DATA_BUF, + AMDGPU_RAS_BLOCK__GFX_SQC_DATA_CU0_UTCL1_LFIFO, + AMDGPU_RAS_BLOCK__GFX_SQC_DATA_CU1_WRITE_DATA_BUF, + AMDGPU_RAS_BLOCK__GFX_SQC_DATA_CU1_UTCL1_LFIFO, + AMDGPU_RAS_BLOCK__GFX_SQC_DATA_CU2_WRITE_DATA_BUF, + AMDGPU_RAS_BLOCK__GFX_SQC_DATA_CU2_UTCL1_LFIFO, + AMDGPU_RAS_BLOCK__GFX_SQC_INDEX0_END = + AMDGPU_RAS_BLOCK__GFX_SQC_DATA_CU2_UTCL1_LFIFO, + /* SQC range 1 */ + AMDGPU_RAS_BLOCK__GFX_SQC_INDEX1_START, + AMDGPU_RAS_BLOCK__GFX_SQC_INST_BANKA_TAG_RAM = + AMDGPU_RAS_BLOCK__GFX_SQC_INDEX1_START, + AMDGPU_RAS_BLOCK__GFX_SQC_INST_BANKA_UTCL1_MISS_FIFO, + AMDGPU_RAS_BLOCK__GFX_SQC_INST_BANKA_MISS_FIFO, + AMDGPU_RAS_BLOCK__GFX_SQC_INST_BANKA_BANK_RAM, + AMDGPU_RAS_BLOCK__GFX_SQC_DATA_BANKA_TAG_RAM, + AMDGPU_RAS_BLOCK__GFX_SQC_DATA_BANKA_HIT_FIFO, + AMDGPU_RAS_BLOCK__GFX_SQC_DATA_BANKA_MISS_FIFO, + AMDGPU_RAS_BLOCK__GFX_SQC_DATA_BANKA_DIRTY_BIT_RAM, + AMDGPU_RAS_BLOCK__GFX_SQC_DATA_BANKA_BANK_RAM, + AMDGPU_RAS_BLOCK__GFX_SQC_INDEX1_END = + AMDGPU_RAS_BLOCK__GFX_SQC_DATA_BANKA_BANK_RAM, + /* SQC range 2 */ + AMDGPU_RAS_BLOCK__GFX_SQC_INDEX2_START, + AMDGPU_RAS_BLOCK__GFX_SQC_INST_BANKB_TAG_RAM = + AMDGPU_RAS_BLOCK__GFX_SQC_INDEX2_START, + AMDGPU_RAS_BLOCK__GFX_SQC_INST_BANKB_UTCL1_MISS_FIFO, + AMDGPU_RAS_BLOCK__GFX_SQC_INST_BANKB_MISS_FIFO, + AMDGPU_RAS_BLOCK__GFX_SQC_INST_BANKB_BANK_RAM, + AMDGPU_RAS_BLOCK__GFX_SQC_DATA_BANKB_TAG_RAM, + AMDGPU_RAS_BLOCK__GFX_SQC_DATA_BANKB_HIT_FIFO, + AMDGPU_RAS_BLOCK__GFX_SQC_DATA_BANKB_MISS_FIFO, + AMDGPU_RAS_BLOCK__GFX_SQC_DATA_BANKB_DIRTY_BIT_RAM, + AMDGPU_RAS_BLOCK__GFX_SQC_DATA_BANKB_BANK_RAM, + AMDGPU_RAS_BLOCK__GFX_SQC_INDEX2_END = + AMDGPU_RAS_BLOCK__GFX_SQC_DATA_BANKB_BANK_RAM, + AMDGPU_RAS_BLOCK__GFX_SQC_INDEX_END = + AMDGPU_RAS_BLOCK__GFX_SQC_INDEX2_END, + /* TA */ + AMDGPU_RAS_BLOCK__GFX_TA_INDEX_START, + AMDGPU_RAS_BLOCK__GFX_TA_FS_DFIFO = + AMDGPU_RAS_BLOCK__GFX_TA_INDEX_START, + AMDGPU_RAS_BLOCK__GFX_TA_FS_AFIFO, + AMDGPU_RAS_BLOCK__GFX_TA_FL_LFIFO, + AMDGPU_RAS_BLOCK__GFX_TA_FX_LFIFO, + AMDGPU_RAS_BLOCK__GFX_TA_FS_CFIFO, + AMDGPU_RAS_BLOCK__GFX_TA_INDEX_END = AMDGPU_RAS_BLOCK__GFX_TA_FS_CFIFO, + /* TCA */ + AMDGPU_RAS_BLOCK__GFX_TCA_INDEX_START, + AMDGPU_RAS_BLOCK__GFX_TCA_HOLE_FIFO = + AMDGPU_RAS_BLOCK__GFX_TCA_INDEX_START, + AMDGPU_RAS_BLOCK__GFX_TCA_REQ_FIFO, + AMDGPU_RAS_BLOCK__GFX_TCA_INDEX_END = + AMDGPU_RAS_BLOCK__GFX_TCA_REQ_FIFO, + /* TCC (5 sub-ranges) */ + AMDGPU_RAS_BLOCK__GFX_TCC_INDEX_START, + /* TCC range 0 */ + AMDGPU_RAS_BLOCK__GFX_TCC_INDEX0_START = + AMDGPU_RAS_BLOCK__GFX_TCC_INDEX_START, + AMDGPU_RAS_BLOCK__GFX_TCC_CACHE_DATA = + AMDGPU_RAS_BLOCK__GFX_TCC_INDEX0_START, + AMDGPU_RAS_BLOCK__GFX_TCC_CACHE_DATA_BANK_0_1, + AMDGPU_RAS_BLOCK__GFX_TCC_CACHE_DATA_BANK_1_0, + AMDGPU_RAS_BLOCK__GFX_TCC_CACHE_DATA_BANK_1_1, + AMDGPU_RAS_BLOCK__GFX_TCC_CACHE_DIRTY_BANK_0, + AMDGPU_RAS_BLOCK__GFX_TCC_CACHE_DIRTY_BANK_1, + AMDGPU_RAS_BLOCK__GFX_TCC_HIGH_RATE_TAG, + AMDGPU_RAS_BLOCK__GFX_TCC_LOW_RATE_TAG, + AMDGPU_RAS_BLOCK__GFX_TCC_INDEX0_END = + AMDGPU_RAS_BLOCK__GFX_TCC_LOW_RATE_TAG, + /* TCC range 1 */ + AMDGPU_RAS_BLOCK__GFX_TCC_INDEX1_START, + AMDGPU_RAS_BLOCK__GFX_TCC_IN_USE_DEC = + AMDGPU_RAS_BLOCK__GFX_TCC_INDEX1_START, + AMDGPU_RAS_BLOCK__GFX_TCC_IN_USE_TRANSFER, + AMDGPU_RAS_BLOCK__GFX_TCC_INDEX1_END = + AMDGPU_RAS_BLOCK__GFX_TCC_IN_USE_TRANSFER, + /* TCC range 2 */ + AMDGPU_RAS_BLOCK__GFX_TCC_INDEX2_START, + AMDGPU_RAS_BLOCK__GFX_TCC_RETURN_DATA = + AMDGPU_RAS_BLOCK__GFX_TCC_INDEX2_START, + AMDGPU_RAS_BLOCK__GFX_TCC_RETURN_CONTROL, + AMDGPU_RAS_BLOCK__GFX_TCC_UC_ATOMIC_FIFO, + AMDGPU_RAS_BLOCK__GFX_TCC_WRITE_RETURN, + AMDGPU_RAS_BLOCK__GFX_TCC_WRITE_CACHE_READ, + AMDGPU_RAS_BLOCK__GFX_TCC_SRC_FIFO, + AMDGPU_RAS_BLOCK__GFX_TCC_SRC_FIFO_NEXT_RAM, + AMDGPU_RAS_BLOCK__GFX_TCC_CACHE_TAG_PROBE_FIFO, + AMDGPU_RAS_BLOCK__GFX_TCC_INDEX2_END = + AMDGPU_RAS_BLOCK__GFX_TCC_CACHE_TAG_PROBE_FIFO, + /* TCC range 3 */ + AMDGPU_RAS_BLOCK__GFX_TCC_INDEX3_START, + AMDGPU_RAS_BLOCK__GFX_TCC_LATENCY_FIFO = + AMDGPU_RAS_BLOCK__GFX_TCC_INDEX3_START, + AMDGPU_RAS_BLOCK__GFX_TCC_LATENCY_FIFO_NEXT_RAM, + AMDGPU_RAS_BLOCK__GFX_TCC_INDEX3_END = + AMDGPU_RAS_BLOCK__GFX_TCC_LATENCY_FIFO_NEXT_RAM, + /* TCC range 4 */ + AMDGPU_RAS_BLOCK__GFX_TCC_INDEX4_START, + AMDGPU_RAS_BLOCK__GFX_TCC_WRRET_TAG_WRITE_RETURN = + AMDGPU_RAS_BLOCK__GFX_TCC_INDEX4_START, + AMDGPU_RAS_BLOCK__GFX_TCC_ATOMIC_RETURN_BUFFER, + AMDGPU_RAS_BLOCK__GFX_TCC_INDEX4_END = + AMDGPU_RAS_BLOCK__GFX_TCC_ATOMIC_RETURN_BUFFER, + AMDGPU_RAS_BLOCK__GFX_TCC_INDEX_END = + AMDGPU_RAS_BLOCK__GFX_TCC_INDEX4_END, + /* TCI */ + AMDGPU_RAS_BLOCK__GFX_TCI_WRITE_RAM, + /* TCP */ + AMDGPU_RAS_BLOCK__GFX_TCP_INDEX_START, + AMDGPU_RAS_BLOCK__GFX_TCP_CACHE_RAM = + AMDGPU_RAS_BLOCK__GFX_TCP_INDEX_START, + AMDGPU_RAS_BLOCK__GFX_TCP_LFIFO_RAM, + AMDGPU_RAS_BLOCK__GFX_TCP_CMD_FIFO, + AMDGPU_RAS_BLOCK__GFX_TCP_VM_FIFO, + AMDGPU_RAS_BLOCK__GFX_TCP_DB_RAM, + AMDGPU_RAS_BLOCK__GFX_TCP_UTCL1_LFIFO0, + AMDGPU_RAS_BLOCK__GFX_TCP_UTCL1_LFIFO1, + AMDGPU_RAS_BLOCK__GFX_TCP_INDEX_END = + AMDGPU_RAS_BLOCK__GFX_TCP_UTCL1_LFIFO1, + /* TD */ + AMDGPU_RAS_BLOCK__GFX_TD_INDEX_START, + AMDGPU_RAS_BLOCK__GFX_TD_SS_FIFO_LO = + AMDGPU_RAS_BLOCK__GFX_TD_INDEX_START, + AMDGPU_RAS_BLOCK__GFX_TD_SS_FIFO_HI, + AMDGPU_RAS_BLOCK__GFX_TD_CS_FIFO, + AMDGPU_RAS_BLOCK__GFX_TD_INDEX_END = AMDGPU_RAS_BLOCK__GFX_TD_CS_FIFO, + /* EA (3 sub-ranges) */ + AMDGPU_RAS_BLOCK__GFX_EA_INDEX_START, + /* EA range 0 */ + AMDGPU_RAS_BLOCK__GFX_EA_INDEX0_START = + AMDGPU_RAS_BLOCK__GFX_EA_INDEX_START, + AMDGPU_RAS_BLOCK__GFX_EA_DRAMRD_CMDMEM = + AMDGPU_RAS_BLOCK__GFX_EA_INDEX0_START, + AMDGPU_RAS_BLOCK__GFX_EA_DRAMWR_CMDMEM, + AMDGPU_RAS_BLOCK__GFX_EA_DRAMWR_DATAMEM, + AMDGPU_RAS_BLOCK__GFX_EA_RRET_TAGMEM, + AMDGPU_RAS_BLOCK__GFX_EA_WRET_TAGMEM, + AMDGPU_RAS_BLOCK__GFX_EA_GMIRD_CMDMEM, + AMDGPU_RAS_BLOCK__GFX_EA_GMIWR_CMDMEM, + AMDGPU_RAS_BLOCK__GFX_EA_GMIWR_DATAMEM, + AMDGPU_RAS_BLOCK__GFX_EA_INDEX0_END = + AMDGPU_RAS_BLOCK__GFX_EA_GMIWR_DATAMEM, + /* EA range 1 */ + AMDGPU_RAS_BLOCK__GFX_EA_INDEX1_START, + AMDGPU_RAS_BLOCK__GFX_EA_DRAMRD_PAGEMEM = + AMDGPU_RAS_BLOCK__GFX_EA_INDEX1_START, + AMDGPU_RAS_BLOCK__GFX_EA_DRAMWR_PAGEMEM, + AMDGPU_RAS_BLOCK__GFX_EA_IORD_CMDMEM, + AMDGPU_RAS_BLOCK__GFX_EA_IOWR_CMDMEM, + AMDGPU_RAS_BLOCK__GFX_EA_IOWR_DATAMEM, + AMDGPU_RAS_BLOCK__GFX_EA_GMIRD_PAGEMEM, + AMDGPU_RAS_BLOCK__GFX_EA_GMIWR_PAGEMEM, + AMDGPU_RAS_BLOCK__GFX_EA_INDEX1_END = + AMDGPU_RAS_BLOCK__GFX_EA_GMIWR_PAGEMEM, + /* EA range 2 */ + AMDGPU_RAS_BLOCK__GFX_EA_INDEX2_START, + AMDGPU_RAS_BLOCK__GFX_EA_MAM_D0MEM = + AMDGPU_RAS_BLOCK__GFX_EA_INDEX2_START, + AMDGPU_RAS_BLOCK__GFX_EA_MAM_D1MEM, + AMDGPU_RAS_BLOCK__GFX_EA_MAM_D2MEM, + AMDGPU_RAS_BLOCK__GFX_EA_MAM_D3MEM, + AMDGPU_RAS_BLOCK__GFX_EA_INDEX2_END = + AMDGPU_RAS_BLOCK__GFX_EA_MAM_D3MEM, + AMDGPU_RAS_BLOCK__GFX_EA_INDEX_END = + AMDGPU_RAS_BLOCK__GFX_EA_INDEX2_END, + /* UTC VM L2 bank */ + AMDGPU_RAS_BLOCK__UTC_VML2_BANK_CACHE, + /* UTC VM walker */ + AMDGPU_RAS_BLOCK__UTC_VML2_WALKER, + /* UTC ATC L2 2MB cache */ + AMDGPU_RAS_BLOCK__UTC_ATCL2_CACHE_2M_BANK, + /* UTC ATC L2 4KB cache */ + AMDGPU_RAS_BLOCK__UTC_ATCL2_CACHE_4K_BANK, + AMDGPU_RAS_BLOCK__GFX_MAX +}; + enum amdgpu_ras_error_type { AMDGPU_RAS_ERROR__NONE = 0, AMDGPU_RAS_ERROR__PARITY = 1, @@ -76,9 +306,6 @@ struct ras_common_if { char name[32]; }; -typedef int (*ras_ih_cb)(struct amdgpu_device *adev, - struct amdgpu_iv_entry *entry); - struct amdgpu_ras { /* ras infrastructure */ /* for ras itself. */ @@ -108,8 +335,81 @@ struct amdgpu_ras { uint32_t flags; }; -/* interfaces for IP */ +struct ras_fs_data { + char sysfs_name[32]; + char debugfs_name[32]; +}; + +struct ras_err_data { + unsigned long ue_count; + unsigned long ce_count; + unsigned long err_addr_cnt; + uint64_t *err_addr; +}; + +struct ras_err_handler_data { + /* point to bad pages array */ + struct { + unsigned long bp; + struct amdgpu_bo *bo; + } *bps; + /* the count of entries */ + int count; + /* the space can place new entries */ + int space_left; + /* last reserved entry's index + 1 */ + int last_reserved; +}; + +typedef int (*ras_ih_cb)(struct amdgpu_device *adev, + struct ras_err_data *err_data, + struct amdgpu_iv_entry *entry); +struct ras_ih_data { + /* interrupt bottom half */ + struct work_struct ih_work; + int inuse; + /* IP callback */ + ras_ih_cb cb; + /* full of entries */ + unsigned char *ring; + unsigned int ring_size; + unsigned int element_size; + unsigned int aligned_element_size; + unsigned int rptr; + unsigned int wptr; +}; + +struct ras_manager { + struct ras_common_if head; + /* reference count */ + int use; + /* ras block link */ + struct list_head node; + /* the device */ + struct amdgpu_device *adev; + /* debugfs */ + struct dentry *ent; + /* sysfs */ + struct device_attribute sysfs_attr; + int attr_inuse; + + /* fs node name */ + struct ras_fs_data fs_data; + + /* IH data */ + struct ras_ih_data ih_data; + + struct ras_err_data err_data; +}; + +struct ras_badpage { + unsigned int bp; + unsigned int size; + unsigned int flags; +}; + +/* interfaces for IP */ struct ras_fs_if { struct ras_common_if head; char sysfs_name[32]; diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_ring.h b/drivers/gpu/drm/amd/amdgpu/amdgpu_ring.h index 4410c97ac9b7..930316e60155 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_ring.h +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_ring.h @@ -29,7 +29,7 @@ #include <drm/drm_print.h> /* max number of rings */ -#define AMDGPU_MAX_RINGS 24 +#define AMDGPU_MAX_RINGS 28 #define AMDGPU_MAX_GFX_RINGS 2 #define AMDGPU_MAX_COMPUTE_RINGS 8 #define AMDGPU_MAX_VCE_RINGS 3 diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_sdma.h b/drivers/gpu/drm/amd/amdgpu/amdgpu_sdma.h index 35dd152f9d5c..a9ae0d8a0589 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_sdma.h +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_sdma.h @@ -25,11 +25,17 @@ #define __AMDGPU_SDMA_H__ /* max number of IP instances */ -#define AMDGPU_MAX_SDMA_INSTANCES 2 +#define AMDGPU_MAX_SDMA_INSTANCES 8 enum amdgpu_sdma_irq { AMDGPU_SDMA_IRQ_INSTANCE0 = 0, AMDGPU_SDMA_IRQ_INSTANCE1, + AMDGPU_SDMA_IRQ_INSTANCE2, + AMDGPU_SDMA_IRQ_INSTANCE3, + AMDGPU_SDMA_IRQ_INSTANCE4, + AMDGPU_SDMA_IRQ_INSTANCE5, + AMDGPU_SDMA_IRQ_INSTANCE6, + AMDGPU_SDMA_IRQ_INSTANCE7, AMDGPU_SDMA_IRQ_LAST }; diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_ttm.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_ttm.c index 63e7d1e01b76..8f8b7a350b8b 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_ttm.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_ttm.c @@ -444,6 +444,22 @@ static int amdgpu_move_blit(struct ttm_buffer_object *bo, if (r) goto error; + /* clear the space being freed */ + if (old_mem->mem_type == TTM_PL_VRAM && + (ttm_to_amdgpu_bo(bo)->flags & + AMDGPU_GEM_CREATE_VRAM_WIPE_ON_RELEASE)) { + struct dma_fence *wipe_fence = NULL; + + r = amdgpu_fill_buffer(ttm_to_amdgpu_bo(bo), AMDGPU_POISON, + NULL, &wipe_fence); + if (r) { + goto error; + } else if (wipe_fence) { + dma_fence_put(fence); + fence = wipe_fence; + } + } + /* Always block for VM page tables before committing the new location */ if (bo->type == ttm_bo_type_kernel) r = ttm_bo_move_accel_cleanup(bo, fence, true, new_mem); @@ -1599,6 +1615,7 @@ static struct ttm_bo_driver amdgpu_bo_driver = { .move = &amdgpu_bo_move, .verify_access = &amdgpu_verify_access, .move_notify = &amdgpu_bo_move_notify, + .release_notify = &amdgpu_bo_release_notify, .fault_reserve_notify = &amdgpu_bo_fault_reserve_notify, .io_mem_reserve = &amdgpu_ttm_io_mem_reserve, .io_mem_free = &amdgpu_ttm_io_mem_free, diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_ttm.h b/drivers/gpu/drm/amd/amdgpu/amdgpu_ttm.h index caa76c693700..bccb8c49e597 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_ttm.h +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_ttm.h @@ -38,6 +38,8 @@ #define AMDGPU_GTT_MAX_TRANSFER_SIZE 512 #define AMDGPU_GTT_NUM_TRANSFER_WINDOWS 2 +#define AMDGPU_POISON 0xd0bed0be + struct amdgpu_mman { struct ttm_bo_device bdev; bool mem_global_referenced; diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_ucode.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_ucode.c index bfaa0eac3213..dd18ebc2eb01 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_ucode.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_ucode.c @@ -269,6 +269,16 @@ void amdgpu_ucode_print_psp_hdr(const struct common_firmware_header *hdr) DRM_DEBUG("kdb_size_bytes: %u\n", le32_to_cpu(psp_hdr_v1_1->kdb_size_bytes)); } + if (version_minor == 2) { + const struct psp_firmware_header_v1_2 *psp_hdr_v1_2 = + container_of(psp_hdr, struct psp_firmware_header_v1_2, v1_0); + DRM_DEBUG("kdb_header_version: %u\n", + le32_to_cpu(psp_hdr_v1_2->kdb_header_version)); + DRM_DEBUG("kdb_offset_bytes: %u\n", + le32_to_cpu(psp_hdr_v1_2->kdb_offset_bytes)); + DRM_DEBUG("kdb_size_bytes: %u\n", + le32_to_cpu(psp_hdr_v1_2->kdb_size_bytes)); + } } else { DRM_ERROR("Unknown PSP ucode version: %u.%u\n", version_major, version_minor); @@ -351,10 +361,14 @@ amdgpu_ucode_get_load_type(struct amdgpu_device *adev, int load_type) case CHIP_VEGA12: case CHIP_VEGA20: case CHIP_NAVI10: + case CHIP_NAVI14: + case CHIP_NAVI12: if (!load_type) return AMDGPU_FW_LOAD_DIRECT; else return AMDGPU_FW_LOAD_PSP; + case CHIP_ARCTURUS: + return AMDGPU_FW_LOAD_DIRECT; default: DRM_ERROR("Unknown firmware load type\n"); } diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_ucode.h b/drivers/gpu/drm/amd/amdgpu/amdgpu_ucode.h index c1fb6dc86440..b34f00d42049 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_ucode.h +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_ucode.h @@ -90,6 +90,15 @@ struct psp_firmware_header_v1_1 { uint32_t kdb_size_bytes; }; +/* version_major=1, version_minor=2 */ +struct psp_firmware_header_v1_2 { + struct psp_firmware_header_v1_0 v1_0; + uint32_t reserve[3]; + uint32_t kdb_header_version; + uint32_t kdb_offset_bytes; + uint32_t kdb_size_bytes; +}; + /* version_major=1, version_minor=0 */ struct ta_firmware_header_v1_0 { struct common_firmware_header header; @@ -262,6 +271,12 @@ union amdgpu_firmware_header { enum AMDGPU_UCODE_ID { AMDGPU_UCODE_ID_SDMA0 = 0, AMDGPU_UCODE_ID_SDMA1, + AMDGPU_UCODE_ID_SDMA2, + AMDGPU_UCODE_ID_SDMA3, + AMDGPU_UCODE_ID_SDMA4, + AMDGPU_UCODE_ID_SDMA5, + AMDGPU_UCODE_ID_SDMA6, + AMDGPU_UCODE_ID_SDMA7, AMDGPU_UCODE_ID_CP_CE, AMDGPU_UCODE_ID_CP_PFP, AMDGPU_UCODE_ID_CP_ME, @@ -281,6 +296,7 @@ enum AMDGPU_UCODE_ID { AMDGPU_UCODE_ID_UVD1, AMDGPU_UCODE_ID_VCE, AMDGPU_UCODE_ID_VCN, + AMDGPU_UCODE_ID_VCN1, AMDGPU_UCODE_ID_DMCU_ERAM, AMDGPU_UCODE_ID_DMCU_INTV, AMDGPU_UCODE_ID_VCN0_RAM, diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_umc.h b/drivers/gpu/drm/amd/amdgpu/amdgpu_umc.h new file mode 100644 index 000000000000..975afa04df09 --- /dev/null +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_umc.h @@ -0,0 +1,82 @@ +/* + * Copyright (C) 2019 Advanced Micro Devices, Inc. + * + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and associated documentation files (the "Software"), + * to deal in the Software without restriction, including without limitation + * the rights to use, copy, modify, merge, publish, distribute, sublicense, + * and/or sell copies of the Software, and to permit persons to whom the + * Software is furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included + * in all copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS + * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL + * THE COPYRIGHT HOLDER(S) BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN + * AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN + * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. + */ +#ifndef __AMDGPU_UMC_H__ +#define __AMDGPU_UMC_H__ + +/* implement 64 bits REG operations via 32 bits interface */ +#define RREG64_UMC(reg) (RREG32(reg) | \ + ((uint64_t)RREG32((reg) + 1) << 32)) +#define WREG64_UMC(reg, v) \ + do { \ + WREG32((reg), lower_32_bits(v)); \ + WREG32((reg) + 1, upper_32_bits(v)); \ + } while (0) + +/* + * void (*func)(struct amdgpu_device *adev, struct ras_err_data *err_data, + * uint32_t umc_reg_offset, uint32_t channel_index) + */ +#define amdgpu_umc_for_each_channel(func) \ + struct ras_err_data *err_data = (struct ras_err_data *)ras_error_status; \ + uint32_t umc_inst, channel_inst, umc_reg_offset, channel_index; \ + for (umc_inst = 0; umc_inst < adev->umc.umc_inst_num; umc_inst++) { \ + /* enable the index mode to query eror count per channel */ \ + adev->umc.funcs->enable_umc_index_mode(adev, umc_inst); \ + for (channel_inst = 0; \ + channel_inst < adev->umc.channel_inst_num; \ + channel_inst++) { \ + /* calc the register offset according to channel instance */ \ + umc_reg_offset = adev->umc.channel_offs * channel_inst; \ + /* get channel index of interleaved memory */ \ + channel_index = adev->umc.channel_idx_tbl[ \ + umc_inst * adev->umc.channel_inst_num + channel_inst]; \ + (func)(adev, err_data, umc_reg_offset, channel_index); \ + } \ + } \ + adev->umc.funcs->disable_umc_index_mode(adev); + +struct amdgpu_umc_funcs { + void (*ras_init)(struct amdgpu_device *adev); + void (*query_ras_error_count)(struct amdgpu_device *adev, + void *ras_error_status); + void (*query_ras_error_address)(struct amdgpu_device *adev, + void *ras_error_status); + void (*enable_umc_index_mode)(struct amdgpu_device *adev, + uint32_t umc_instance); + void (*disable_umc_index_mode)(struct amdgpu_device *adev); +}; + +struct amdgpu_umc { + /* max error count in one ras query call */ + uint32_t max_ras_err_cnt_per_query; + /* number of umc channel instance with memory map register access */ + uint32_t channel_inst_num; + /* number of umc instance with memory map register access */ + uint32_t umc_inst_num; + /* UMC regiser per channel offset */ + uint32_t channel_offs; + /* channel index table of interleaved memory */ + const uint32_t *channel_idx_tbl; + + const struct amdgpu_umc_funcs *funcs; +}; + +#endif diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_vcn.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_vcn.c index 2e12eeb314a7..47086cdbb413 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_vcn.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_vcn.c @@ -46,12 +46,18 @@ #define FIRMWARE_RAVEN "amdgpu/raven_vcn.bin" #define FIRMWARE_PICASSO "amdgpu/picasso_vcn.bin" #define FIRMWARE_RAVEN2 "amdgpu/raven2_vcn.bin" +#define FIRMWARE_ARCTURUS "amdgpu/arcturus_vcn.bin" #define FIRMWARE_NAVI10 "amdgpu/navi10_vcn.bin" +#define FIRMWARE_NAVI14 "amdgpu/navi14_vcn.bin" +#define FIRMWARE_NAVI12 "amdgpu/navi12_vcn.bin" MODULE_FIRMWARE(FIRMWARE_RAVEN); MODULE_FIRMWARE(FIRMWARE_PICASSO); MODULE_FIRMWARE(FIRMWARE_RAVEN2); +MODULE_FIRMWARE(FIRMWARE_ARCTURUS); MODULE_FIRMWARE(FIRMWARE_NAVI10); +MODULE_FIRMWARE(FIRMWARE_NAVI14); +MODULE_FIRMWARE(FIRMWARE_NAVI12); static void amdgpu_vcn_idle_work_handler(struct work_struct *work); @@ -61,7 +67,7 @@ int amdgpu_vcn_sw_init(struct amdgpu_device *adev) const char *fw_name; const struct common_firmware_header *hdr; unsigned char fw_check; - int r; + int i, r; INIT_DELAYED_WORK(&adev->vcn.idle_work, amdgpu_vcn_idle_work_handler); @@ -74,12 +80,27 @@ int amdgpu_vcn_sw_init(struct amdgpu_device *adev) else fw_name = FIRMWARE_RAVEN; break; + case CHIP_ARCTURUS: + fw_name = FIRMWARE_ARCTURUS; + break; case CHIP_NAVI10: fw_name = FIRMWARE_NAVI10; if ((adev->firmware.load_type == AMDGPU_FW_LOAD_PSP) && (adev->pg_flags & AMD_PG_SUPPORT_VCN_DPG)) adev->vcn.indirect_sram = true; break; + case CHIP_NAVI14: + fw_name = FIRMWARE_NAVI14; + if ((adev->firmware.load_type == AMDGPU_FW_LOAD_PSP) && + (adev->pg_flags & AMD_PG_SUPPORT_VCN_DPG)) + adev->vcn.indirect_sram = true; + break; + case CHIP_NAVI12: + fw_name = FIRMWARE_NAVI12; + if ((adev->firmware.load_type == AMDGPU_FW_LOAD_PSP) && + (adev->pg_flags & AMD_PG_SUPPORT_VCN_DPG)) + adev->vcn.indirect_sram = true; + break; default: return -EINVAL; } @@ -133,12 +154,18 @@ int amdgpu_vcn_sw_init(struct amdgpu_device *adev) bo_size = AMDGPU_VCN_STACK_SIZE + AMDGPU_VCN_CONTEXT_SIZE; if (adev->firmware.load_type != AMDGPU_FW_LOAD_PSP) bo_size += AMDGPU_GPU_PAGE_ALIGN(le32_to_cpu(hdr->ucode_size_bytes) + 8); - r = amdgpu_bo_create_kernel(adev, bo_size, PAGE_SIZE, - AMDGPU_GEM_DOMAIN_VRAM, &adev->vcn.vcpu_bo, - &adev->vcn.gpu_addr, &adev->vcn.cpu_addr); - if (r) { - dev_err(adev->dev, "(%d) failed to allocate vcn bo\n", r); - return r; + + for (i = 0; i < adev->vcn.num_vcn_inst; i++) { + if (adev->vcn.harvest_config & (1 << i)) + continue; + + r = amdgpu_bo_create_kernel(adev, bo_size, PAGE_SIZE, + AMDGPU_GEM_DOMAIN_VRAM, &adev->vcn.inst[i].vcpu_bo, + &adev->vcn.inst[i].gpu_addr, &adev->vcn.inst[i].cpu_addr); + if (r) { + dev_err(adev->dev, "(%d) failed to allocate vcn bo\n", r); + return r; + } } if (adev->vcn.indirect_sram) { @@ -156,26 +183,30 @@ int amdgpu_vcn_sw_init(struct amdgpu_device *adev) int amdgpu_vcn_sw_fini(struct amdgpu_device *adev) { - int i; - - kvfree(adev->vcn.saved_bo); + int i, j; if (adev->vcn.indirect_sram) { amdgpu_bo_free_kernel(&adev->vcn.dpg_sram_bo, - &adev->vcn.dpg_sram_gpu_addr, - (void **)&adev->vcn.dpg_sram_cpu_addr); + &adev->vcn.dpg_sram_gpu_addr, + (void **)&adev->vcn.dpg_sram_cpu_addr); } - amdgpu_bo_free_kernel(&adev->vcn.vcpu_bo, - &adev->vcn.gpu_addr, - (void **)&adev->vcn.cpu_addr); + for (j = 0; j < adev->vcn.num_vcn_inst; ++j) { + if (adev->vcn.harvest_config & (1 << j)) + continue; + kvfree(adev->vcn.inst[j].saved_bo); + + amdgpu_bo_free_kernel(&adev->vcn.inst[j].vcpu_bo, + &adev->vcn.inst[j].gpu_addr, + (void **)&adev->vcn.inst[j].cpu_addr); - amdgpu_ring_fini(&adev->vcn.ring_dec); + amdgpu_ring_fini(&adev->vcn.inst[j].ring_dec); - for (i = 0; i < adev->vcn.num_enc_rings; ++i) - amdgpu_ring_fini(&adev->vcn.ring_enc[i]); + for (i = 0; i < adev->vcn.num_enc_rings; ++i) + amdgpu_ring_fini(&adev->vcn.inst[j].ring_enc[i]); - amdgpu_ring_fini(&adev->vcn.ring_jpeg); + amdgpu_ring_fini(&adev->vcn.inst[j].ring_jpeg); + } release_firmware(adev->vcn.fw); @@ -186,21 +217,25 @@ int amdgpu_vcn_suspend(struct amdgpu_device *adev) { unsigned size; void *ptr; + int i; cancel_delayed_work_sync(&adev->vcn.idle_work); - if (adev->vcn.vcpu_bo == NULL) - return 0; - - size = amdgpu_bo_size(adev->vcn.vcpu_bo); - ptr = adev->vcn.cpu_addr; + for (i = 0; i < adev->vcn.num_vcn_inst; ++i) { + if (adev->vcn.harvest_config & (1 << i)) + continue; + if (adev->vcn.inst[i].vcpu_bo == NULL) + return 0; - adev->vcn.saved_bo = kvmalloc(size, GFP_KERNEL); - if (!adev->vcn.saved_bo) - return -ENOMEM; + size = amdgpu_bo_size(adev->vcn.inst[i].vcpu_bo); + ptr = adev->vcn.inst[i].cpu_addr; - memcpy_fromio(adev->vcn.saved_bo, ptr, size); + adev->vcn.inst[i].saved_bo = kvmalloc(size, GFP_KERNEL); + if (!adev->vcn.inst[i].saved_bo) + return -ENOMEM; + memcpy_fromio(adev->vcn.inst[i].saved_bo, ptr, size); + } return 0; } @@ -208,32 +243,36 @@ int amdgpu_vcn_resume(struct amdgpu_device *adev) { unsigned size; void *ptr; + int i; - if (adev->vcn.vcpu_bo == NULL) - return -EINVAL; - - size = amdgpu_bo_size(adev->vcn.vcpu_bo); - ptr = adev->vcn.cpu_addr; - - if (adev->vcn.saved_bo != NULL) { - memcpy_toio(ptr, adev->vcn.saved_bo, size); - kvfree(adev->vcn.saved_bo); - adev->vcn.saved_bo = NULL; - } else { - const struct common_firmware_header *hdr; - unsigned offset; - - hdr = (const struct common_firmware_header *)adev->vcn.fw->data; - if (adev->firmware.load_type != AMDGPU_FW_LOAD_PSP) { - offset = le32_to_cpu(hdr->ucode_array_offset_bytes); - memcpy_toio(adev->vcn.cpu_addr, adev->vcn.fw->data + offset, - le32_to_cpu(hdr->ucode_size_bytes)); - size -= le32_to_cpu(hdr->ucode_size_bytes); - ptr += le32_to_cpu(hdr->ucode_size_bytes); + for (i = 0; i < adev->vcn.num_vcn_inst; ++i) { + if (adev->vcn.harvest_config & (1 << i)) + continue; + if (adev->vcn.inst[i].vcpu_bo == NULL) + return -EINVAL; + + size = amdgpu_bo_size(adev->vcn.inst[i].vcpu_bo); + ptr = adev->vcn.inst[i].cpu_addr; + + if (adev->vcn.inst[i].saved_bo != NULL) { + memcpy_toio(ptr, adev->vcn.inst[i].saved_bo, size); + kvfree(adev->vcn.inst[i].saved_bo); + adev->vcn.inst[i].saved_bo = NULL; + } else { + const struct common_firmware_header *hdr; + unsigned offset; + + hdr = (const struct common_firmware_header *)adev->vcn.fw->data; + if (adev->firmware.load_type != AMDGPU_FW_LOAD_PSP) { + offset = le32_to_cpu(hdr->ucode_array_offset_bytes); + memcpy_toio(adev->vcn.inst[i].cpu_addr, adev->vcn.fw->data + offset, + le32_to_cpu(hdr->ucode_size_bytes)); + size -= le32_to_cpu(hdr->ucode_size_bytes); + ptr += le32_to_cpu(hdr->ucode_size_bytes); + } + memset_io(ptr, 0, size); } - memset_io(ptr, 0, size); } - return 0; } @@ -241,35 +280,40 @@ static void amdgpu_vcn_idle_work_handler(struct work_struct *work) { struct amdgpu_device *adev = container_of(work, struct amdgpu_device, vcn.idle_work.work); - unsigned int fences = 0; - unsigned int i; + unsigned int fences = 0, fence[AMDGPU_MAX_VCN_INSTANCES] = {0}; + unsigned int i, j; - for (i = 0; i < adev->vcn.num_enc_rings; ++i) { - fences += amdgpu_fence_count_emitted(&adev->vcn.ring_enc[i]); - } + for (j = 0; j < adev->vcn.num_vcn_inst; ++j) { + if (adev->vcn.harvest_config & (1 << j)) + continue; + for (i = 0; i < adev->vcn.num_enc_rings; ++i) { + fence[j] += amdgpu_fence_count_emitted(&adev->vcn.inst[j].ring_enc[i]); + } - if (adev->pg_flags & AMD_PG_SUPPORT_VCN_DPG) { - struct dpg_pause_state new_state; + if (adev->pg_flags & AMD_PG_SUPPORT_VCN_DPG) { + struct dpg_pause_state new_state; - if (fences) - new_state.fw_based = VCN_DPG_STATE__PAUSE; - else - new_state.fw_based = VCN_DPG_STATE__UNPAUSE; + if (fence[j]) + new_state.fw_based = VCN_DPG_STATE__PAUSE; + else + new_state.fw_based = VCN_DPG_STATE__UNPAUSE; - if (amdgpu_fence_count_emitted(&adev->vcn.ring_jpeg)) - new_state.jpeg = VCN_DPG_STATE__PAUSE; - else - new_state.jpeg = VCN_DPG_STATE__UNPAUSE; + if (amdgpu_fence_count_emitted(&adev->vcn.inst[j].ring_jpeg)) + new_state.jpeg = VCN_DPG_STATE__PAUSE; + else + new_state.jpeg = VCN_DPG_STATE__UNPAUSE; - adev->vcn.pause_dpg_mode(adev, &new_state); - } + adev->vcn.pause_dpg_mode(adev, &new_state); + } - fences += amdgpu_fence_count_emitted(&adev->vcn.ring_jpeg); - fences += amdgpu_fence_count_emitted(&adev->vcn.ring_dec); + fence[j] += amdgpu_fence_count_emitted(&adev->vcn.inst[j].ring_jpeg); + fence[j] += amdgpu_fence_count_emitted(&adev->vcn.inst[j].ring_dec); + fences += fence[j]; + } if (fences == 0) { amdgpu_gfx_off_ctrl(adev, true); - if (adev->asic_type < CHIP_NAVI10 && adev->pm.dpm_enabled) + if (adev->asic_type < CHIP_ARCTURUS && adev->pm.dpm_enabled) amdgpu_dpm_enable_uvd(adev, false); else amdgpu_device_ip_set_powergating_state(adev, AMD_IP_BLOCK_TYPE_VCN, @@ -286,7 +330,7 @@ void amdgpu_vcn_ring_begin_use(struct amdgpu_ring *ring) if (set_clocks) { amdgpu_gfx_off_ctrl(adev, false); - if (adev->asic_type < CHIP_NAVI10 && adev->pm.dpm_enabled) + if (adev->asic_type < CHIP_ARCTURUS && adev->pm.dpm_enabled) amdgpu_dpm_enable_uvd(adev, true); else amdgpu_device_ip_set_powergating_state(adev, AMD_IP_BLOCK_TYPE_VCN, @@ -299,14 +343,14 @@ void amdgpu_vcn_ring_begin_use(struct amdgpu_ring *ring) unsigned int i; for (i = 0; i < adev->vcn.num_enc_rings; ++i) { - fences += amdgpu_fence_count_emitted(&adev->vcn.ring_enc[i]); + fences += amdgpu_fence_count_emitted(&adev->vcn.inst[ring->me].ring_enc[i]); } if (fences) new_state.fw_based = VCN_DPG_STATE__PAUSE; else new_state.fw_based = VCN_DPG_STATE__UNPAUSE; - if (amdgpu_fence_count_emitted(&adev->vcn.ring_jpeg)) + if (amdgpu_fence_count_emitted(&adev->vcn.inst[ring->me].ring_jpeg)) new_state.jpeg = VCN_DPG_STATE__PAUSE; else new_state.jpeg = VCN_DPG_STATE__UNPAUSE; @@ -332,7 +376,7 @@ int amdgpu_vcn_dec_ring_test_ring(struct amdgpu_ring *ring) unsigned i; int r; - WREG32(adev->vcn.external.scratch9, 0xCAFEDEAD); + WREG32(adev->vcn.inst[ring->me].external.scratch9, 0xCAFEDEAD); r = amdgpu_ring_alloc(ring, 3); if (r) return r; @@ -340,7 +384,7 @@ int amdgpu_vcn_dec_ring_test_ring(struct amdgpu_ring *ring) amdgpu_ring_write(ring, 0xDEADBEEF); amdgpu_ring_commit(ring); for (i = 0; i < adev->usec_timeout; i++) { - tmp = RREG32(adev->vcn.external.scratch9); + tmp = RREG32(adev->vcn.inst[ring->me].external.scratch9); if (tmp == 0xDEADBEEF) break; udelay(1); @@ -651,7 +695,7 @@ int amdgpu_vcn_jpeg_ring_test_ring(struct amdgpu_ring *ring) unsigned i; int r; - WREG32(adev->vcn.external.jpeg_pitch, 0xCAFEDEAD); + WREG32(adev->vcn.inst[ring->me].external.jpeg_pitch, 0xCAFEDEAD); r = amdgpu_ring_alloc(ring, 3); if (r) return r; @@ -661,7 +705,7 @@ int amdgpu_vcn_jpeg_ring_test_ring(struct amdgpu_ring *ring) amdgpu_ring_commit(ring); for (i = 0; i < adev->usec_timeout; i++) { - tmp = RREG32(adev->vcn.external.jpeg_pitch); + tmp = RREG32(adev->vcn.inst[ring->me].external.jpeg_pitch); if (tmp == 0xDEADBEEF) break; udelay(1); @@ -735,7 +779,7 @@ int amdgpu_vcn_jpeg_ring_test_ib(struct amdgpu_ring *ring, long timeout) } for (i = 0; i < adev->usec_timeout; i++) { - tmp = RREG32(adev->vcn.external.jpeg_pitch); + tmp = RREG32(adev->vcn.inst[ring->me].external.jpeg_pitch); if (tmp == 0xDEADBEEF) break; udelay(1); diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_vcn.h b/drivers/gpu/drm/amd/amdgpu/amdgpu_vcn.h index 99f14fcc1460..dface275c81a 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_vcn.h +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_vcn.h @@ -30,6 +30,12 @@ #define AMDGPU_VCN_FIRMWARE_OFFSET 256 #define AMDGPU_VCN_MAX_ENC_RINGS 3 +#define AMDGPU_MAX_VCN_INSTANCES 2 + +#define AMDGPU_VCN_HARVEST_VCN0 (1 << 0) +#define AMDGPU_VCN_HARVEST_VCN1 (1 << 1) + +#define VCN_DEC_KMD_CMD 0x80000000 #define VCN_DEC_CMD_FENCE 0x00000000 #define VCN_DEC_CMD_TRAP 0x00000001 #define VCN_DEC_CMD_WRITE_REG 0x00000004 @@ -145,34 +151,49 @@ struct amdgpu_vcn_reg{ unsigned data1; unsigned cmd; unsigned nop; + unsigned context_id; + unsigned ib_vmid; + unsigned ib_bar_low; + unsigned ib_bar_high; + unsigned ib_size; + unsigned gp_scratch8; unsigned scratch9; unsigned jpeg_pitch; }; -struct amdgpu_vcn { +struct amdgpu_vcn_inst { struct amdgpu_bo *vcpu_bo; void *cpu_addr; uint64_t gpu_addr; - unsigned fw_version; void *saved_bo; - struct delayed_work idle_work; - const struct firmware *fw; /* VCN firmware */ struct amdgpu_ring ring_dec; struct amdgpu_ring ring_enc[AMDGPU_VCN_MAX_ENC_RINGS]; struct amdgpu_ring ring_jpeg; struct amdgpu_irq_src irq; + struct amdgpu_vcn_reg external; +}; + +struct amdgpu_vcn { + unsigned fw_version; + struct delayed_work idle_work; + const struct firmware *fw; /* VCN firmware */ unsigned num_enc_rings; enum amd_powergating_state cur_state; struct dpg_pause_state pause_state; - struct amdgpu_vcn_reg internal, external; - int (*pause_dpg_mode)(struct amdgpu_device *adev, - struct dpg_pause_state *new_state); bool indirect_sram; struct amdgpu_bo *dpg_sram_bo; void *dpg_sram_cpu_addr; uint64_t dpg_sram_gpu_addr; uint32_t *dpg_sram_curr_addr; + + uint8_t num_vcn_inst; + struct amdgpu_vcn_inst inst[AMDGPU_MAX_VCN_INSTANCES]; + struct amdgpu_vcn_reg internal; + + unsigned harvest_config; + int (*pause_dpg_mode)(struct amdgpu_device *adev, + struct dpg_pause_state *new_state); }; int amdgpu_vcn_sw_init(struct amdgpu_device *adev); diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_virt.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_virt.c index 59dd204498c5..e32ae906d797 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_virt.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_virt.c @@ -430,48 +430,3 @@ uint32_t amdgpu_virt_get_mclk(struct amdgpu_device *adev, bool lowest) return clk; } - -void amdgpu_virt_init_reg_access_mode(struct amdgpu_device *adev) -{ - struct amdgpu_virt *virt = &adev->virt; - - if (virt->ops && virt->ops->init_reg_access_mode) - virt->ops->init_reg_access_mode(adev); -} - -bool amdgpu_virt_support_psp_prg_ih_reg(struct amdgpu_device *adev) -{ - bool ret = false; - struct amdgpu_virt *virt = &adev->virt; - - if (amdgpu_sriov_vf(adev) - && (virt->reg_access_mode & AMDGPU_VIRT_REG_ACCESS_PSP_PRG_IH)) - ret = true; - - return ret; -} - -bool amdgpu_virt_support_rlc_prg_reg(struct amdgpu_device *adev) -{ - bool ret = false; - struct amdgpu_virt *virt = &adev->virt; - - if (amdgpu_sriov_vf(adev) - && (virt->reg_access_mode & AMDGPU_VIRT_REG_ACCESS_RLC) - && !(amdgpu_sriov_runtime(adev))) - ret = true; - - return ret; -} - -bool amdgpu_virt_support_skip_setting(struct amdgpu_device *adev) -{ - bool ret = false; - struct amdgpu_virt *virt = &adev->virt; - - if (amdgpu_sriov_vf(adev) - && (virt->reg_access_mode & AMDGPU_VIRT_REG_SKIP_SEETING)) - ret = true; - - return ret; -} diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_virt.h b/drivers/gpu/drm/amd/amdgpu/amdgpu_virt.h index f5107731e9c4..b0b2bdc750df 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_virt.h +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_virt.h @@ -48,12 +48,6 @@ struct amdgpu_vf_error_buffer { uint64_t data[AMDGPU_VF_ERROR_ENTRY_SIZE]; }; -/* According to the fw feature, some new reg access modes are supported */ -#define AMDGPU_VIRT_REG_ACCESS_LEGACY (1 << 0) /* directly mmio */ -#define AMDGPU_VIRT_REG_ACCESS_PSP_PRG_IH (1 << 1) /* by PSP */ -#define AMDGPU_VIRT_REG_ACCESS_RLC (1 << 2) /* by RLC */ -#define AMDGPU_VIRT_REG_SKIP_SEETING (1 << 3) /* Skip setting reg */ - /** * struct amdgpu_virt_ops - amdgpu device virt operations */ @@ -65,7 +59,6 @@ struct amdgpu_virt_ops { void (*trans_msg)(struct amdgpu_device *adev, u32 req, u32 data1, u32 data2, u32 data3); int (*get_pp_clk)(struct amdgpu_device *adev, u32 type, char *buf); int (*force_dpm_level)(struct amdgpu_device *adev, u32 level); - void (*init_reg_access_mode)(struct amdgpu_device *adev); }; /* @@ -315,10 +308,4 @@ int amdgpu_virt_fw_reserve_get_checksum(void *obj, unsigned long obj_size, void amdgpu_virt_init_data_exchange(struct amdgpu_device *adev); uint32_t amdgpu_virt_get_sclk(struct amdgpu_device *adev, bool lowest); uint32_t amdgpu_virt_get_mclk(struct amdgpu_device *adev, bool lowest); - -void amdgpu_virt_init_reg_access_mode(struct amdgpu_device *adev); -bool amdgpu_virt_support_psp_prg_ih_reg(struct amdgpu_device *adev); -bool amdgpu_virt_support_rlc_prg_reg(struct amdgpu_device *adev); -bool amdgpu_virt_support_skip_setting(struct amdgpu_device *adev); - #endif diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_vm.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_vm.c index 95eef0ac2829..c8244ce184e8 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_vm.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_vm.c @@ -1574,7 +1574,7 @@ static int amdgpu_vm_bo_split_mapping(struct amdgpu_device *adev, flags &= ~AMDGPU_PTE_EXECUTABLE; flags |= mapping->flags & AMDGPU_PTE_EXECUTABLE; - if (adev->asic_type == CHIP_NAVI10) { + if (adev->asic_type >= CHIP_NAVI10) { flags &= ~AMDGPU_PTE_MTYPE_NV10_MASK; flags |= (mapping->flags & AMDGPU_PTE_MTYPE_NV10_MASK); } else { @@ -3061,12 +3061,12 @@ int amdgpu_vm_ioctl(struct drm_device *dev, void *data, struct drm_file *filp) switch (args->in.op) { case AMDGPU_VM_OP_RESERVE_VMID: /* current, we only have requirement to reserve vmid from gfxhub */ - r = amdgpu_vmid_alloc_reserved(adev, &fpriv->vm, AMDGPU_GFXHUB); + r = amdgpu_vmid_alloc_reserved(adev, &fpriv->vm, AMDGPU_GFXHUB_0); if (r) return r; break; case AMDGPU_VM_OP_UNRESERVE_VMID: - amdgpu_vmid_free_reserved(adev, &fpriv->vm, AMDGPU_GFXHUB); + amdgpu_vmid_free_reserved(adev, &fpriv->vm, AMDGPU_GFXHUB_0); break; default: return -EINVAL; diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_vm.h b/drivers/gpu/drm/amd/amdgpu/amdgpu_vm.h index 489a162ca620..2eda3a8c330d 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_vm.h +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_vm.h @@ -90,7 +90,7 @@ struct amdgpu_bo_list_entry; | AMDGPU_PTE_WRITEABLE \ | AMDGPU_PTE_MTYPE_VG10(AMDGPU_MTYPE_CC)) -/* NAVI10 only */ +/* gfx10 */ #define AMDGPU_PTE_MTYPE_NV10(a) ((uint64_t)(a) << 48) #define AMDGPU_PTE_MTYPE_NV10_MASK AMDGPU_PTE_MTYPE_NV10(7ULL) @@ -100,9 +100,10 @@ struct amdgpu_bo_list_entry; #define AMDGPU_VM_FAULT_STOP_ALWAYS 2 /* max number of VMHUB */ -#define AMDGPU_MAX_VMHUBS 2 -#define AMDGPU_GFXHUB 0 -#define AMDGPU_MMHUB 1 +#define AMDGPU_MAX_VMHUBS 3 +#define AMDGPU_GFXHUB_0 0 +#define AMDGPU_MMHUB_0 1 +#define AMDGPU_MMHUB_1 2 /* hardcode that limit for now */ #define AMDGPU_VA_RESERVED_SIZE (1ULL << 20) diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_xgmi.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_xgmi.c index d11eba09eadd..65aae75f80fd 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_xgmi.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_xgmi.c @@ -25,7 +25,7 @@ #include "amdgpu.h" #include "amdgpu_xgmi.h" #include "amdgpu_smu.h" - +#include "df/df_3_6_offset.h" static DEFINE_MUTEX(xgmi_mutex); @@ -131,9 +131,37 @@ static ssize_t amdgpu_xgmi_show_device_id(struct device *dev, } +#define AMDGPU_XGMI_SET_FICAA(o) ((o) | 0x456801) +static ssize_t amdgpu_xgmi_show_error(struct device *dev, + struct device_attribute *attr, + char *buf) +{ + struct drm_device *ddev = dev_get_drvdata(dev); + struct amdgpu_device *adev = ddev->dev_private; + uint32_t ficaa_pie_ctl_in, ficaa_pie_status_in; + uint64_t fica_out; + unsigned int error_count = 0; + + ficaa_pie_ctl_in = AMDGPU_XGMI_SET_FICAA(0x200); + ficaa_pie_status_in = AMDGPU_XGMI_SET_FICAA(0x208); -static DEVICE_ATTR(xgmi_device_id, S_IRUGO, amdgpu_xgmi_show_device_id, NULL); + fica_out = adev->df_funcs->get_fica(adev, ficaa_pie_ctl_in); + if (fica_out != 0x1f) + pr_err("xGMI error counters not enabled!\n"); + + fica_out = adev->df_funcs->get_fica(adev, ficaa_pie_status_in); + + if ((fica_out & 0xffff) == 2) + error_count = ((fica_out >> 62) & 0x1) + (fica_out >> 63); + adev->df_funcs->set_fica(adev, ficaa_pie_status_in, 0, 0); + + return snprintf(buf, PAGE_SIZE, "%d\n", error_count); +} + + +static DEVICE_ATTR(xgmi_device_id, S_IRUGO, amdgpu_xgmi_show_device_id, NULL); +static DEVICE_ATTR(xgmi_error, S_IRUGO, amdgpu_xgmi_show_error, NULL); static int amdgpu_xgmi_sysfs_add_dev_info(struct amdgpu_device *adev, struct amdgpu_hive_info *hive) @@ -148,6 +176,12 @@ static int amdgpu_xgmi_sysfs_add_dev_info(struct amdgpu_device *adev, return ret; } + /* Create xgmi error file */ + ret = device_create_file(adev->dev, &dev_attr_xgmi_error); + if (ret) + pr_err("failed to create xgmi_error\n"); + + /* Create sysfs link to hive info folder on the first device */ if (adev != hive->adev) { ret = sysfs_create_link(&adev->dev->kobj, hive->kobj, @@ -248,7 +282,7 @@ int amdgpu_xgmi_set_pstate(struct amdgpu_device *adev, int pstate) dev_dbg(adev->dev, "Set xgmi pstate %d.\n", pstate); - if (is_support_sw_smu(adev)) + if (is_support_sw_smu_xgmi(adev)) ret = smu_set_xgmi_pstate(&adev->smu, pstate); if (ret) dev_err(adev->dev, @@ -296,23 +330,28 @@ int amdgpu_xgmi_add_device(struct amdgpu_device *adev) struct amdgpu_xgmi *entry; struct amdgpu_device *tmp_adev = NULL; - int count = 0, ret = -EINVAL; + int count = 0, ret = 0; if (!adev->gmc.xgmi.supported) return 0; - ret = psp_xgmi_get_node_id(&adev->psp, &adev->gmc.xgmi.node_id); - if (ret) { - dev_err(adev->dev, - "XGMI: Failed to get node id\n"); - return ret; - } + if (amdgpu_device_ip_get_ip_block(adev, AMD_IP_BLOCK_TYPE_PSP)) { + ret = psp_xgmi_get_hive_id(&adev->psp, &adev->gmc.xgmi.hive_id); + if (ret) { + dev_err(adev->dev, + "XGMI: Failed to get hive id\n"); + return ret; + } - ret = psp_xgmi_get_hive_id(&adev->psp, &adev->gmc.xgmi.hive_id); - if (ret) { - dev_err(adev->dev, - "XGMI: Failed to get hive id\n"); - return ret; + ret = psp_xgmi_get_node_id(&adev->psp, &adev->gmc.xgmi.node_id); + if (ret) { + dev_err(adev->dev, + "XGMI: Failed to get node id\n"); + return ret; + } + } else { + adev->gmc.xgmi.hive_id = 16; + adev->gmc.xgmi.node_id = adev->gmc.xgmi.physical_node_id + 16; } hive = amdgpu_get_xgmi_hive(adev, 1); @@ -332,29 +371,32 @@ int amdgpu_xgmi_add_device(struct amdgpu_device *adev) top_info->num_nodes = count; hive->number_devices = count; - list_for_each_entry(tmp_adev, &hive->device_list, gmc.xgmi.head) { - /* update node list for other device in the hive */ - if (tmp_adev != adev) { - top_info = &tmp_adev->psp.xgmi_context.top_info; - top_info->nodes[count - 1].node_id = adev->gmc.xgmi.node_id; - top_info->num_nodes = count; + if (amdgpu_device_ip_get_ip_block(adev, AMD_IP_BLOCK_TYPE_PSP)) { + list_for_each_entry(tmp_adev, &hive->device_list, gmc.xgmi.head) { + /* update node list for other device in the hive */ + if (tmp_adev != adev) { + top_info = &tmp_adev->psp.xgmi_context.top_info; + top_info->nodes[count - 1].node_id = + adev->gmc.xgmi.node_id; + top_info->num_nodes = count; + } + ret = amdgpu_xgmi_update_topology(hive, tmp_adev); + if (ret) + goto exit; } - ret = amdgpu_xgmi_update_topology(hive, tmp_adev); - if (ret) - goto exit; - } - /* get latest topology info for each device from psp */ - list_for_each_entry(tmp_adev, &hive->device_list, gmc.xgmi.head) { - ret = psp_xgmi_get_topology_info(&tmp_adev->psp, count, - &tmp_adev->psp.xgmi_context.top_info); - if (ret) { - dev_err(tmp_adev->dev, - "XGMI: Get topology failure on device %llx, hive %llx, ret %d", - tmp_adev->gmc.xgmi.node_id, - tmp_adev->gmc.xgmi.hive_id, ret); - /* To do : continue with some node failed or disable the whole hive */ - goto exit; + /* get latest topology info for each device from psp */ + list_for_each_entry(tmp_adev, &hive->device_list, gmc.xgmi.head) { + ret = psp_xgmi_get_topology_info(&tmp_adev->psp, count, + &tmp_adev->psp.xgmi_context.top_info); + if (ret) { + dev_err(tmp_adev->dev, + "XGMI: Get topology failure on device %llx, hive %llx, ret %d", + tmp_adev->gmc.xgmi.node_id, + tmp_adev->gmc.xgmi.hive_id, ret); + /* To do : continue with some node failed or disable the whole hive */ + goto exit; + } } } diff --git a/drivers/gpu/drm/amd/amdgpu/arct_reg_init.c b/drivers/gpu/drm/amd/amdgpu/arct_reg_init.c new file mode 100644 index 000000000000..4853899b1824 --- /dev/null +++ b/drivers/gpu/drm/amd/amdgpu/arct_reg_init.c @@ -0,0 +1,59 @@ +/* + * Copyright 2018 Advanced Micro Devices, Inc. + * + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and associated documentation files (the "Software"), + * to deal in the Software without restriction, including without limitation + * the rights to use, copy, modify, merge, publish, distribute, sublicense, + * and/or sell copies of the Software, and to permit persons to whom the + * Software is furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included in + * all copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL + * THE COPYRIGHT HOLDER(S) OR AUTHOR(S) BE LIABLE FOR ANY CLAIM, DAMAGES OR + * OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, + * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR + * OTHER DEALINGS IN THE SOFTWARE. + * + */ +#include "amdgpu.h" +#include "soc15.h" + +#include "soc15_common.h" +#include "soc15_hw_ip.h" +#include "arct_ip_offset.h" + +int arct_reg_base_init(struct amdgpu_device *adev) +{ + /* HW has more IP blocks, only initialized the block needed by our driver */ + uint32_t i; + for (i = 0 ; i < MAX_INSTANCE ; ++i) { + adev->reg_offset[GC_HWIP][i] = (uint32_t *)(&(GC_BASE.instance[i])); + adev->reg_offset[HDP_HWIP][i] = (uint32_t *)(&(HDP_BASE.instance[i])); + adev->reg_offset[MMHUB_HWIP][i] = (uint32_t *)(&(MMHUB_BASE.instance[i])); + adev->reg_offset[ATHUB_HWIP][i] = (uint32_t *)(&(ATHUB_BASE.instance[i])); + adev->reg_offset[NBIO_HWIP][i] = (uint32_t *)(&(NBIF0_BASE.instance[i])); + adev->reg_offset[MP0_HWIP][i] = (uint32_t *)(&(MP0_BASE.instance[i])); + adev->reg_offset[MP1_HWIP][i] = (uint32_t *)(&(MP1_BASE.instance[i])); + adev->reg_offset[UVD_HWIP][i] = (uint32_t *)(&(UVD_BASE.instance[i])); + adev->reg_offset[DF_HWIP][i] = (uint32_t *)(&(DF_BASE.instance[i])); + adev->reg_offset[OSSSYS_HWIP][i] = (uint32_t *)(&(OSSSYS_BASE.instance[i])); + adev->reg_offset[SDMA0_HWIP][i] = (uint32_t *)(&(SDMA0_BASE.instance[i])); + adev->reg_offset[SDMA1_HWIP][i] = (uint32_t *)(&(SDMA1_BASE.instance[i])); + adev->reg_offset[SDMA2_HWIP][i] = (uint32_t *)(&(SDMA2_BASE.instance[i])); + adev->reg_offset[SDMA3_HWIP][i] = (uint32_t *)(&(SDMA3_BASE.instance[i])); + adev->reg_offset[SDMA4_HWIP][i] = (uint32_t *)(&(SDMA4_BASE.instance[i])); + adev->reg_offset[SDMA5_HWIP][i] = (uint32_t *)(&(SDMA5_BASE.instance[i])); + adev->reg_offset[SDMA6_HWIP][i] = (uint32_t *)(&(SDMA6_BASE.instance[i])); + adev->reg_offset[SDMA7_HWIP][i] = (uint32_t *)(&(SDMA7_BASE.instance[i])); + adev->reg_offset[SMUIO_HWIP][i] = (uint32_t *)(&(SMUIO_BASE.instance[i])); + adev->reg_offset[THM_HWIP][i] = (uint32_t *)(&(THM_BASE.instance[i])); + } + return 0; +} + + diff --git a/drivers/gpu/drm/amd/amdgpu/athub_v2_0.c b/drivers/gpu/drm/amd/amdgpu/athub_v2_0.c index 89b32b6b81c8..7e6c0bc3e8dd 100644 --- a/drivers/gpu/drm/amd/amdgpu/athub_v2_0.c +++ b/drivers/gpu/drm/amd/amdgpu/athub_v2_0.c @@ -74,6 +74,7 @@ int athub_v2_0_set_clockgating(struct amdgpu_device *adev, switch (adev->asic_type) { case CHIP_NAVI10: + case CHIP_NAVI14: athub_v2_0_update_medium_grain_clock_gating(adev, state == AMD_CG_STATE_GATE ? true : false); athub_v2_0_update_medium_grain_light_sleep(adev, diff --git a/drivers/gpu/drm/amd/amdgpu/cik.c b/drivers/gpu/drm/amd/amdgpu/cik.c index 1ffbc0d3d7a1..b81bb414fcb3 100644 --- a/drivers/gpu/drm/amd/amdgpu/cik.c +++ b/drivers/gpu/drm/amd/amdgpu/cik.c @@ -1291,6 +1291,12 @@ static int cik_asic_reset(struct amdgpu_device *adev) return r; } +static enum amd_reset_method +cik_asic_reset_method(struct amdgpu_device *adev) +{ + return AMD_RESET_METHOD_LEGACY; +} + static u32 cik_get_config_memsize(struct amdgpu_device *adev) { return RREG32(mmCONFIG_MEMSIZE); @@ -1823,6 +1829,7 @@ static const struct amdgpu_asic_funcs cik_asic_funcs = .read_bios_from_rom = &cik_read_bios_from_rom, .read_register = &cik_read_register, .reset = &cik_asic_reset, + .reset_method = &cik_asic_reset_method, .set_vga_state = &cik_vga_set_state, .get_xclk = &cik_get_xclk, .set_uvd_clocks = &cik_set_uvd_clocks, diff --git a/drivers/gpu/drm/amd/amdgpu/dce_virtual.c b/drivers/gpu/drm/amd/amdgpu/dce_virtual.c index 3cc0a16649f9..4c6d792d51a5 100644 --- a/drivers/gpu/drm/amd/amdgpu/dce_virtual.c +++ b/drivers/gpu/drm/amd/amdgpu/dce_virtual.c @@ -457,7 +457,10 @@ static int dce_virtual_hw_init(void *handle) case CHIP_VEGA10: case CHIP_VEGA12: case CHIP_VEGA20: + case CHIP_ARCTURUS: case CHIP_NAVI10: + case CHIP_NAVI14: + case CHIP_NAVI12: break; default: DRM_ERROR("Virtual display unsupported ASIC type: 0x%X\n", adev->asic_type); diff --git a/drivers/gpu/drm/amd/amdgpu/df_v3_6.c b/drivers/gpu/drm/amd/amdgpu/df_v3_6.c index ef6e91f9f51c..5850c8e34caa 100644 --- a/drivers/gpu/drm/amd/amdgpu/df_v3_6.c +++ b/drivers/gpu/drm/amd/amdgpu/df_v3_6.c @@ -93,6 +93,96 @@ const struct attribute_group *df_v3_6_attr_groups[] = { NULL }; +static uint64_t df_v3_6_get_fica(struct amdgpu_device *adev, + uint32_t ficaa_val) +{ + unsigned long flags, address, data; + uint32_t ficadl_val, ficadh_val; + + address = adev->nbio_funcs->get_pcie_index_offset(adev); + data = adev->nbio_funcs->get_pcie_data_offset(adev); + + spin_lock_irqsave(&adev->pcie_idx_lock, flags); + WREG32(address, smnDF_PIE_AON_FabricIndirectConfigAccessAddress3); + WREG32(data, ficaa_val); + + WREG32(address, smnDF_PIE_AON_FabricIndirectConfigAccessDataLo3); + ficadl_val = RREG32(data); + + WREG32(address, smnDF_PIE_AON_FabricIndirectConfigAccessDataHi3); + ficadh_val = RREG32(data); + + spin_unlock_irqrestore(&adev->pcie_idx_lock, flags); + + return (((ficadh_val & 0xFFFFFFFFFFFFFFFF) << 32) | ficadl_val); +} + +static void df_v3_6_set_fica(struct amdgpu_device *adev, uint32_t ficaa_val, + uint32_t ficadl_val, uint32_t ficadh_val) +{ + unsigned long flags, address, data; + + address = adev->nbio_funcs->get_pcie_index_offset(adev); + data = adev->nbio_funcs->get_pcie_data_offset(adev); + + spin_lock_irqsave(&adev->pcie_idx_lock, flags); + WREG32(address, smnDF_PIE_AON_FabricIndirectConfigAccessAddress3); + WREG32(data, ficaa_val); + + WREG32(address, smnDF_PIE_AON_FabricIndirectConfigAccessDataLo3); + WREG32(data, ficadl_val); + + WREG32(address, smnDF_PIE_AON_FabricIndirectConfigAccessDataHi3); + WREG32(data, ficadh_val); + + spin_unlock_irqrestore(&adev->pcie_idx_lock, flags); +} + +/* + * df_v3_6_perfmon_rreg - read perfmon lo and hi + * + * required to be atomic. no mmio method provided so subsequent reads for lo + * and hi require to preserve df finite state machine + */ +static void df_v3_6_perfmon_rreg(struct amdgpu_device *adev, + uint32_t lo_addr, uint32_t *lo_val, + uint32_t hi_addr, uint32_t *hi_val) +{ + unsigned long flags, address, data; + + address = adev->nbio_funcs->get_pcie_index_offset(adev); + data = adev->nbio_funcs->get_pcie_data_offset(adev); + + spin_lock_irqsave(&adev->pcie_idx_lock, flags); + WREG32(address, lo_addr); + *lo_val = RREG32(data); + WREG32(address, hi_addr); + *hi_val = RREG32(data); + spin_unlock_irqrestore(&adev->pcie_idx_lock, flags); +} + +/* + * df_v3_6_perfmon_wreg - write to perfmon lo and hi + * + * required to be atomic. no mmio method provided so subsequent reads after + * data writes cannot occur to preserve data fabrics finite state machine. + */ +static void df_v3_6_perfmon_wreg(struct amdgpu_device *adev, uint32_t lo_addr, + uint32_t lo_val, uint32_t hi_addr, uint32_t hi_val) +{ + unsigned long flags, address, data; + + address = adev->nbio_funcs->get_pcie_index_offset(adev); + data = adev->nbio_funcs->get_pcie_data_offset(adev); + + spin_lock_irqsave(&adev->pcie_idx_lock, flags); + WREG32(address, lo_addr); + WREG32(data, lo_val); + WREG32(address, hi_addr); + WREG32(data, hi_val); + spin_unlock_irqrestore(&adev->pcie_idx_lock, flags); +} + /* get the number of df counters available */ static ssize_t df_v3_6_get_df_cntr_avail(struct device *dev, struct device_attribute *attr, @@ -268,6 +358,10 @@ static int df_v3_6_pmc_get_ctrl_settings(struct amdgpu_device *adev, uint32_t *lo_val, uint32_t *hi_val) { + + uint32_t eventsel, instance, unitmask; + uint32_t instance_10, instance_5432, instance_76; + df_v3_6_pmc_get_addr(adev, config, 1, lo_base_addr, hi_base_addr); if ((*lo_base_addr == 0) || (*hi_base_addr == 0)) { @@ -276,40 +370,33 @@ static int df_v3_6_pmc_get_ctrl_settings(struct amdgpu_device *adev, return -ENXIO; } - if (lo_val && hi_val) { - uint32_t eventsel, instance, unitmask; - uint32_t instance_10, instance_5432, instance_76; + eventsel = DF_V3_6_GET_EVENT(config) & 0x3f; + unitmask = DF_V3_6_GET_UNITMASK(config) & 0xf; + instance = DF_V3_6_GET_INSTANCE(config); - eventsel = DF_V3_6_GET_EVENT(config) & 0x3f; - unitmask = DF_V3_6_GET_UNITMASK(config) & 0xf; - instance = DF_V3_6_GET_INSTANCE(config); + instance_10 = instance & 0x3; + instance_5432 = (instance >> 2) & 0xf; + instance_76 = (instance >> 6) & 0x3; - instance_10 = instance & 0x3; - instance_5432 = (instance >> 2) & 0xf; - instance_76 = (instance >> 6) & 0x3; + *lo_val = (unitmask << 8) | (instance_10 << 6) | eventsel | (1 << 22); + *hi_val = (instance_76 << 29) | instance_5432; - *lo_val = (unitmask << 8) | (instance_10 << 6) | eventsel; - *hi_val = (instance_76 << 29) | instance_5432; - } + DRM_DEBUG_DRIVER("config=%llx addr=%08x:%08x val=%08x:%08x", + config, *lo_base_addr, *hi_base_addr, *lo_val, *hi_val); return 0; } -/* assign df performance counters for read */ -static int df_v3_6_pmc_assign_cntr(struct amdgpu_device *adev, - uint64_t config, - int *is_assigned) +/* add df performance counters for read */ +static int df_v3_6_pmc_add_cntr(struct amdgpu_device *adev, + uint64_t config) { int i, target_cntr; - *is_assigned = 0; - target_cntr = df_v3_6_pmc_config_2_cntr(adev, config); - if (target_cntr >= 0) { - *is_assigned = 1; + if (target_cntr >= 0) return 0; - } for (i = 0; i < DF_V3_6_MAX_COUNTERS; i++) { if (adev->df_perfmon_config_assign_mask[i] == 0U) { @@ -344,45 +431,13 @@ static void df_v3_6_reset_perfmon_cntr(struct amdgpu_device *adev, if ((lo_base_addr == 0) || (hi_base_addr == 0)) return; - WREG32_PCIE(lo_base_addr, 0UL); - WREG32_PCIE(hi_base_addr, 0UL); -} - - -static int df_v3_6_add_perfmon_cntr(struct amdgpu_device *adev, - uint64_t config) -{ - uint32_t lo_base_addr, hi_base_addr, lo_val, hi_val; - int ret, is_assigned; - - ret = df_v3_6_pmc_assign_cntr(adev, config, &is_assigned); - - if (ret || is_assigned) - return ret; - - ret = df_v3_6_pmc_get_ctrl_settings(adev, - config, - &lo_base_addr, - &hi_base_addr, - &lo_val, - &hi_val); - - if (ret) - return ret; - - DRM_DEBUG_DRIVER("config=%llx addr=%08x:%08x val=%08x:%08x", - config, lo_base_addr, hi_base_addr, lo_val, hi_val); - - WREG32_PCIE(lo_base_addr, lo_val); - WREG32_PCIE(hi_base_addr, hi_val); - - return ret; + df_v3_6_perfmon_wreg(adev, lo_base_addr, 0, hi_base_addr, 0); } static int df_v3_6_pmc_start(struct amdgpu_device *adev, uint64_t config, int is_enable) { - uint32_t lo_base_addr, hi_base_addr, lo_val; + uint32_t lo_base_addr, hi_base_addr, lo_val, hi_val; int ret = 0; switch (adev->asic_type) { @@ -391,24 +446,20 @@ static int df_v3_6_pmc_start(struct amdgpu_device *adev, uint64_t config, df_v3_6_reset_perfmon_cntr(adev, config); if (is_enable) { - ret = df_v3_6_add_perfmon_cntr(adev, config); + ret = df_v3_6_pmc_add_cntr(adev, config); } else { ret = df_v3_6_pmc_get_ctrl_settings(adev, config, &lo_base_addr, &hi_base_addr, - NULL, - NULL); + &lo_val, + &hi_val); if (ret) return ret; - lo_val = RREG32_PCIE(lo_base_addr); - - DRM_DEBUG_DRIVER("config=%llx addr=%08x:%08x val=%08x", - config, lo_base_addr, hi_base_addr, lo_val); - - WREG32_PCIE(lo_base_addr, lo_val | (1ULL << 22)); + df_v3_6_perfmon_wreg(adev, lo_base_addr, lo_val, + hi_base_addr, hi_val); } break; @@ -422,7 +473,7 @@ static int df_v3_6_pmc_start(struct amdgpu_device *adev, uint64_t config, static int df_v3_6_pmc_stop(struct amdgpu_device *adev, uint64_t config, int is_disable) { - uint32_t lo_base_addr, hi_base_addr, lo_val; + uint32_t lo_base_addr, hi_base_addr, lo_val, hi_val; int ret = 0; switch (adev->asic_type) { @@ -431,18 +482,13 @@ static int df_v3_6_pmc_stop(struct amdgpu_device *adev, uint64_t config, config, &lo_base_addr, &hi_base_addr, - NULL, - NULL); + &lo_val, + &hi_val); if (ret) return ret; - lo_val = RREG32_PCIE(lo_base_addr); - - DRM_DEBUG_DRIVER("config=%llx addr=%08x:%08x val=%08x", - config, lo_base_addr, hi_base_addr, lo_val); - - WREG32_PCIE(lo_base_addr, lo_val & ~(1ULL << 22)); + df_v3_6_perfmon_wreg(adev, lo_base_addr, 0, hi_base_addr, 0); if (is_disable) df_v3_6_pmc_release_cntr(adev, config); @@ -471,8 +517,8 @@ static void df_v3_6_pmc_get_count(struct amdgpu_device *adev, if ((lo_base_addr == 0) || (hi_base_addr == 0)) return; - lo_val = RREG32_PCIE(lo_base_addr); - hi_val = RREG32_PCIE(hi_base_addr); + df_v3_6_perfmon_rreg(adev, lo_base_addr, &lo_val, + hi_base_addr, &hi_val); *count = ((hi_val | 0ULL) << 32) | (lo_val | 0ULL); @@ -480,7 +526,7 @@ static void df_v3_6_pmc_get_count(struct amdgpu_device *adev, *count = 0; DRM_DEBUG_DRIVER("config=%llx addr=%08x:%08x val=%08x:%08x", - config, lo_base_addr, hi_base_addr, lo_val, hi_val); + config, lo_base_addr, hi_base_addr, lo_val, hi_val); break; @@ -499,5 +545,7 @@ const struct amdgpu_df_funcs df_v3_6_funcs = { .get_clockgating_state = df_v3_6_get_clockgating_state, .pmc_start = df_v3_6_pmc_start, .pmc_stop = df_v3_6_pmc_stop, - .pmc_get_count = df_v3_6_pmc_get_count + .pmc_get_count = df_v3_6_pmc_get_count, + .get_fica = df_v3_6_get_fica, + .set_fica = df_v3_6_set_fica }; diff --git a/drivers/gpu/drm/amd/amdgpu/gfx_v10_0.c b/drivers/gpu/drm/amd/amdgpu/gfx_v10_0.c index 32773b7523d2..43427a3148b7 100644 --- a/drivers/gpu/drm/amd/amdgpu/gfx_v10_0.c +++ b/drivers/gpu/drm/amd/amdgpu/gfx_v10_0.c @@ -20,8 +20,12 @@ * OTHER DEALINGS IN THE SOFTWARE. * */ + +#include <linux/delay.h> +#include <linux/kernel.h> #include <linux/firmware.h> -#include <drm/drmP.h> +#include <linux/module.h> +#include <linux/pci.h> #include "amdgpu.h" #include "amdgpu_gfx.h" #include "amdgpu_psp.h" @@ -56,6 +60,9 @@ #define F32_CE_PROGRAM_RAM_SIZE 65536 #define RLCG_UCODE_LOADING_START_ADDRESS 0x00002000L +#define mmCGTT_GS_NGG_CLK_CTRL 0x5087 +#define mmCGTT_GS_NGG_CLK_CTRL_BASE_IDX 1 + MODULE_FIRMWARE("amdgpu/navi10_ce.bin"); MODULE_FIRMWARE("amdgpu/navi10_pfp.bin"); MODULE_FIRMWARE("amdgpu/navi10_me.bin"); @@ -63,6 +70,20 @@ MODULE_FIRMWARE("amdgpu/navi10_mec.bin"); MODULE_FIRMWARE("amdgpu/navi10_mec2.bin"); MODULE_FIRMWARE("amdgpu/navi10_rlc.bin"); +MODULE_FIRMWARE("amdgpu/navi14_ce.bin"); +MODULE_FIRMWARE("amdgpu/navi14_pfp.bin"); +MODULE_FIRMWARE("amdgpu/navi14_me.bin"); +MODULE_FIRMWARE("amdgpu/navi14_mec.bin"); +MODULE_FIRMWARE("amdgpu/navi14_mec2.bin"); +MODULE_FIRMWARE("amdgpu/navi14_rlc.bin"); + +MODULE_FIRMWARE("amdgpu/navi12_ce.bin"); +MODULE_FIRMWARE("amdgpu/navi12_pfp.bin"); +MODULE_FIRMWARE("amdgpu/navi12_me.bin"); +MODULE_FIRMWARE("amdgpu/navi12_mec.bin"); +MODULE_FIRMWARE("amdgpu/navi12_mec2.bin"); +MODULE_FIRMWARE("amdgpu/navi12_rlc.bin"); + static const struct soc15_reg_golden golden_settings_gc_10_1[] = { SOC15_REG_GOLDEN_VALUE(GC, 0, mmCB_HW_CONTROL_4, 0xffffffff, 0x00400014), @@ -109,6 +130,99 @@ static const struct soc15_reg_golden golden_settings_gc_10_0_nv10[] = /* Pending on emulation bring up */ }; +static const struct soc15_reg_golden golden_settings_gc_10_1_1[] = +{ + SOC15_REG_GOLDEN_VALUE(GC, 0, mmCB_HW_CONTROL_4, 0xffffffff, 0x003c0014), + SOC15_REG_GOLDEN_VALUE(GC, 0, mmCGTT_GS_NGG_CLK_CTRL, 0xffff8fff, 0xffff8100), + SOC15_REG_GOLDEN_VALUE(GC, 0, mmCGTT_IA_CLK_CTRL, 0xffff0fff, 0xffff0100), + SOC15_REG_GOLDEN_VALUE(GC, 0, mmCGTT_SPI_CLK_CTRL, 0xc0000000, 0xc0000100), + SOC15_REG_GOLDEN_VALUE(GC, 0, mmCGTT_SQ_CLK_CTRL, 0xf8ff0fff, 0x60000100), + SOC15_REG_GOLDEN_VALUE(GC, 0, mmCGTT_SQG_CLK_CTRL, 0x40000ff0, 0x40000100), + SOC15_REG_GOLDEN_VALUE(GC, 0, mmCGTT_VGT_CLK_CTRL, 0xffff8fff, 0xffff8100), + SOC15_REG_GOLDEN_VALUE(GC, 0, mmCGTT_WD_CLK_CTRL, 0xffff8fff, 0xffff8100), + SOC15_REG_GOLDEN_VALUE(GC, 0, mmCH_PIPE_STEER, 0xffffffff, 0xe4e4e4e4), + SOC15_REG_GOLDEN_VALUE(GC, 0, mmCH_VC5_ENABLE, 0x00000002, 0x00000000), + SOC15_REG_GOLDEN_VALUE(GC, 0, mmCP_SD_CNTL, 0x800007ff, 0x000005ff), + SOC15_REG_GOLDEN_VALUE(GC, 0, mmDB_DEBUG, 0xffffffff, 0x20000000), + SOC15_REG_GOLDEN_VALUE(GC, 0, mmDB_DEBUG2, 0xffffffff, 0x00000420), + SOC15_REG_GOLDEN_VALUE(GC, 0, mmDB_DEBUG3, 0x00000200, 0x00000200), + SOC15_REG_GOLDEN_VALUE(GC, 0, mmDB_DEBUG4, 0xffffffff, 0x04900000), + SOC15_REG_GOLDEN_VALUE(GC, 0, mmDB_DFSM_TILES_IN_FLIGHT, 0x0000ffff, 0x0000003f), + SOC15_REG_GOLDEN_VALUE(GC, 0, mmDB_LAST_OF_BURST_CONFIG, 0xffffffff, 0x03860204), + SOC15_REG_GOLDEN_VALUE(GC, 0, mmGCR_GENERAL_CNTL, 0x1ff0ffff, 0x00000500), + SOC15_REG_GOLDEN_VALUE(GC, 0, mmGE_PRIV_CONTROL, 0x000007ff, 0x000001fe), + SOC15_REG_GOLDEN_VALUE(GC, 0, mmGL1_PIPE_STEER, 0xffffffff, 0xe4e4e4e4), + SOC15_REG_GOLDEN_VALUE(GC, 0, mmGL2A_ADDR_MATCH_MASK, 0xffffffff, 0xffffffe7), + SOC15_REG_GOLDEN_VALUE(GC, 0, mmGL2C_ADDR_MATCH_MASK, 0xffffffff, 0xffffffe7), + SOC15_REG_GOLDEN_VALUE(GC, 0, mmGL2C_CGTT_SCLK_CTRL, 0xffff0fff, 0x10000100), + SOC15_REG_GOLDEN_VALUE(GC, 0, mmGL2C_CTRL2, 0xffffffff, 0x1402002f), + SOC15_REG_GOLDEN_VALUE(GC, 0, mmGL2C_CTRL3, 0xffffbfff, 0x00000188), + SOC15_REG_GOLDEN_VALUE(GC, 0, mmPA_SC_ENHANCE, 0x3fffffff, 0x08000009), + SOC15_REG_GOLDEN_VALUE(GC, 0, mmPA_SC_ENHANCE_1, 0x00400000, 0x04440000), + SOC15_REG_GOLDEN_VALUE(GC, 0, mmPA_SC_LINE_STIPPLE_STATE, 0x0000ff0f, 0x00000000), + SOC15_REG_GOLDEN_VALUE(GC, 0, mmRMI_SPARE, 0xffffffff, 0xffff3101), + SOC15_REG_GOLDEN_VALUE(GC, 0, mmSQ_ALU_CLK_CTRL, 0xffffffff, 0xffffffff), + SOC15_REG_GOLDEN_VALUE(GC, 0, mmSQ_ARB_CONFIG, 0x00000133, 0x00000130), + SOC15_REG_GOLDEN_VALUE(GC, 0, mmSQ_LDS_CLK_CTRL, 0xffffffff, 0xffffffff), + SOC15_REG_GOLDEN_VALUE(GC, 0, mmTA_CNTL_AUX, 0xfff7ffff, 0x01030000), + SOC15_REG_GOLDEN_VALUE(GC, 0, mmTCP_CNTL, 0x60000010, 0x479c0010), + SOC15_REG_GOLDEN_VALUE(GC, 0, mmUTCL1_CTRL, 0x00800000, 0x00800000), +}; + +static const struct soc15_reg_golden golden_settings_gc_10_1_2[] = +{ + SOC15_REG_GOLDEN_VALUE(GC, 0, mmCB_HW_CONTROL_4, 0x003e001f, 0x003c0014), + SOC15_REG_GOLDEN_VALUE(GC, 0, mmCGTT_GS_NGG_CLK_CTRL, 0xffff8fff, 0xffff8100), + SOC15_REG_GOLDEN_VALUE(GC, 0, mmCGTT_IA_CLK_CTRL, 0xffff0fff, 0xffff0100), + SOC15_REG_GOLDEN_VALUE(GC, 0, mmCGTT_SPI_CLK_CTRL, 0xff7f0fff, 0xc0000100), + SOC15_REG_GOLDEN_VALUE(GC, 0, mmCGTT_SQ_CLK_CTRL, 0xffffcfff, 0x60000100), + SOC15_REG_GOLDEN_VALUE(GC, 0, mmCGTT_SQG_CLK_CTRL, 0xffff0fff, 0x40000100), + SOC15_REG_GOLDEN_VALUE(GC, 0, mmCGTT_VGT_CLK_CTRL, 0xffff8fff, 0xffff8100), + SOC15_REG_GOLDEN_VALUE(GC, 0, mmCGTT_WD_CLK_CTRL, 0xffff8fff, 0xffff8100), + SOC15_REG_GOLDEN_VALUE(GC, 0, mmCH_PIPE_STEER, 0xffffffff, 0xe4e4e4e4), + SOC15_REG_GOLDEN_VALUE(GC, 0, mmCH_VC5_ENABLE, 0x00000003, 0x00000000), + SOC15_REG_GOLDEN_VALUE(GC, 0, mmCP_SD_CNTL, 0x800007ff, 0x000005ff), + SOC15_REG_GOLDEN_VALUE(GC, 0, mmDB_DEBUG, 0xffffffff, 0x20000000), + SOC15_REG_GOLDEN_VALUE(GC, 0, mmDB_DEBUG2, 0xffffffff, 0x00000420), + SOC15_REG_GOLDEN_VALUE(GC, 0, mmDB_DEBUG3, 0xffffffff, 0x00000200), + SOC15_REG_GOLDEN_VALUE(GC, 0, mmDB_DEBUG4, 0xffffffff, 0x04800000), + SOC15_REG_GOLDEN_VALUE(GC, 0, mmDB_DFSM_TILES_IN_FLIGHT, 0x0000ffff, 0x0000003f), + SOC15_REG_GOLDEN_VALUE(GC, 0, mmDB_LAST_OF_BURST_CONFIG, 0xffffffff, 0x03860204), + SOC15_REG_GOLDEN_VALUE(GC, 0, mmGCR_GENERAL_CNTL, 0x1ff0ffff, 0x00000500), + SOC15_REG_GOLDEN_VALUE(GC, 0, mmGE_PRIV_CONTROL, 0x00007fff, 0x000001fe), + SOC15_REG_GOLDEN_VALUE(GC, 0, mmGL1_PIPE_STEER, 0xffffffff, 0xe4e4e4e4), + SOC15_REG_GOLDEN_VALUE(GC, 0, mmGL2_PIPE_STEER_0, 0x77777777, 0x10321032), + SOC15_REG_GOLDEN_VALUE(GC, 0, mmGL2_PIPE_STEER_1, 0x77777777, 0x02310231), + SOC15_REG_GOLDEN_VALUE(GC, 0, mmGL2A_ADDR_MATCH_MASK, 0xffffffff, 0xffffffcf), + SOC15_REG_GOLDEN_VALUE(GC, 0, mmGL2C_ADDR_MATCH_MASK, 0xffffffff, 0xffffffcf), + SOC15_REG_GOLDEN_VALUE(GC, 0, mmGL2C_CGTT_SCLK_CTRL, 0xffff0fff, 0x10000100), + SOC15_REG_GOLDEN_VALUE(GC, 0, mmGL2C_CTRL2, 0xffffffff, 0x1402002f), + SOC15_REG_GOLDEN_VALUE(GC, 0, mmGL2C_CTRL3, 0xffffbfff, 0x00000188), + SOC15_REG_GOLDEN_VALUE(GC, 0, mmPA_SC_BINNER_EVENT_CNTL_0, 0xffffffff, 0x842a4c02), + SOC15_REG_GOLDEN_VALUE(GC, 0, mmPA_SC_BINNER_TIMEOUT_COUNTER, 0xffffffff, 0x00000800), + SOC15_REG_GOLDEN_VALUE(GC, 0, mmPA_SC_ENHANCE, 0x3fffffff, 0x08000009), + SOC15_REG_GOLDEN_VALUE(GC, 0, mmPA_SC_ENHANCE_1, 0xffffffff, 0x04440000), + SOC15_REG_GOLDEN_VALUE(GC, 0, mmPA_SC_ENHANCE_2, 0x00000820, 0x00000820), + SOC15_REG_GOLDEN_VALUE(GC, 0, mmPA_SC_LINE_STIPPLE_STATE, 0x0000ff0f, 0x00000000), + SOC15_REG_GOLDEN_VALUE(GC, 0, mmRMI_SPARE, 0xffffffff, 0xffff3101), + SOC15_REG_GOLDEN_VALUE(GC, 0, mmSQ_ALU_CLK_CTRL, 0xffffffff, 0xffffffff), + SOC15_REG_GOLDEN_VALUE(GC, 0, mmSQ_ARB_CONFIG, 0x00000133, 0x00000130), + SOC15_REG_GOLDEN_VALUE(GC, 0, mmSQ_LDS_CLK_CTRL, 0xffffffff, 0xffffffff), + SOC15_REG_GOLDEN_VALUE(GC, 0, mmTA_CNTL_AUX, 0xfff7ffff, 0x01030000), + SOC15_REG_GOLDEN_VALUE(GC, 0, mmTCP_CNTL, 0xffdf80ff, 0x479c0010), + SOC15_REG_GOLDEN_VALUE(GC, 0, mmUTCL1_CTRL, 0xffffffff, 0x00800000) +}; + +static const struct soc15_reg_golden golden_settings_gc_10_1_nv14[] = +{ + /* Pending on emulation bring up */ +}; + +static const struct soc15_reg_golden golden_settings_gc_10_1_2_nv12[] = +{ + /* Pending on emulation bring up */ +}; + #define DEFAULT_SH_MEM_CONFIG \ ((SH_MEM_ADDRESS_MODE_64 << SH_MEM_CONFIG__ADDRESS_MODE__SHIFT) | \ (SH_MEM_ALIGNMENT_MODE_UNALIGNED << SH_MEM_CONFIG__ALIGNMENT_MODE__SHIFT) | \ @@ -250,6 +364,22 @@ static void gfx_v10_0_init_golden_registers(struct amdgpu_device *adev) golden_settings_gc_10_0_nv10, (const u32)ARRAY_SIZE(golden_settings_gc_10_0_nv10)); break; + case CHIP_NAVI14: + soc15_program_register_sequence(adev, + golden_settings_gc_10_1_1, + (const u32)ARRAY_SIZE(golden_settings_gc_10_1_1)); + soc15_program_register_sequence(adev, + golden_settings_gc_10_1_nv14, + (const u32)ARRAY_SIZE(golden_settings_gc_10_1_nv14)); + break; + case CHIP_NAVI12: + soc15_program_register_sequence(adev, + golden_settings_gc_10_1_2, + (const u32)ARRAY_SIZE(golden_settings_gc_10_1_2)); + soc15_program_register_sequence(adev, + golden_settings_gc_10_1_2_nv12, + (const u32)ARRAY_SIZE(golden_settings_gc_10_1_2_nv12)); + break; default: break; } @@ -331,7 +461,7 @@ static int gfx_v10_0_ring_test_ring(struct amdgpu_ring *ring) if (amdgpu_emu_mode == 1) msleep(1); else - DRM_UDELAY(1); + udelay(1); } if (i < adev->usec_timeout) { if (amdgpu_emu_mode == 1) @@ -481,6 +611,12 @@ static int gfx_v10_0_init_microcode(struct amdgpu_device *adev) case CHIP_NAVI10: chip_name = "navi10"; break; + case CHIP_NAVI14: + chip_name = "navi14"; + break; + case CHIP_NAVI12: + chip_name = "navi12"; + break; default: BUG(); } @@ -1026,6 +1162,8 @@ static void gfx_v10_0_gpu_early_init(struct amdgpu_device *adev) switch (adev->asic_type) { case CHIP_NAVI10: + case CHIP_NAVI14: + case CHIP_NAVI12: adev->gfx.config.max_hw_contexts = 8; adev->gfx.config.sc_prim_fifo_size_frontend = 0x20; adev->gfx.config.sc_prim_fifo_size_backend = 0x100; @@ -1133,6 +1271,8 @@ static int gfx_v10_0_sw_init(void *handle) switch (adev->asic_type) { case CHIP_NAVI10: + case CHIP_NAVI14: + case CHIP_NAVI12: adev->gfx.me.num_me = 1; adev->gfx.me.num_pipe_per_me = 2; adev->gfx.me.num_queue_per_pipe = 1; @@ -1452,6 +1592,25 @@ static void gfx_v10_0_init_compute_vmid(struct amdgpu_device *adev) } } +static void gfx_v10_0_init_gds_vmid(struct amdgpu_device *adev) +{ + int vmid; + + /* + * Initialize all compute and user-gfx VMIDs to have no GDS, GWS, or OA + * access. Compute VMIDs should be enabled by FW for target VMIDs, + * the driver can enable them for graphics. VMID0 should maintain + * access so that HWS firmware can save/restore entries. + */ + for (vmid = 1; vmid < 16; vmid++) { + WREG32_SOC15_OFFSET(GC, 0, mmGDS_VMID0_BASE, 2 * vmid, 0); + WREG32_SOC15_OFFSET(GC, 0, mmGDS_VMID0_SIZE, 2 * vmid, 0); + WREG32_SOC15_OFFSET(GC, 0, mmGDS_GWS_VMID0, vmid, 0); + WREG32_SOC15_OFFSET(GC, 0, mmGDS_OA_VMID0, vmid, 0); + } +} + + static void gfx_v10_0_tcp_harvest(struct amdgpu_device *adev) { int i, j, k; @@ -1461,7 +1620,8 @@ static void gfx_v10_0_tcp_harvest(struct amdgpu_device *adev) u32 utcl_invreq_disable = 0; /* * GCRD_TARGETS_DISABLE field contains - * for Navi10: GL1C=[18:15], SQC=[14:10], TCP=[9:0] + * for Navi10/Navi12: GL1C=[18:15], SQC=[14:10], TCP=[9:0] + * for Navi14: GL1C=[21:18], SQC=[17:12], TCP=[11:0] */ u32 gcrd_targets_disable_mask = amdgpu_gfx_create_bitmask( 2 * max_wgp_per_sh + /* TCP */ @@ -1469,7 +1629,8 @@ static void gfx_v10_0_tcp_harvest(struct amdgpu_device *adev) 4); /* GL1C */ /* * UTCL1_UTCL0_INVREQ_DISABLE field contains - * for Navi10: SQG=[24], RMI=[23:20], SQC=[19:10], TCP=[9:0] + * for Navi10Navi12: SQG=[24], RMI=[23:20], SQC=[19:10], TCP=[9:0] + * for Navi14: SQG=[28], RMI=[27:24], SQC=[23:12], TCP=[11:0] */ u32 utcl_invreq_disable_mask = amdgpu_gfx_create_bitmask( 2 * max_wgp_per_sh + /* TCP */ @@ -1477,7 +1638,9 @@ static void gfx_v10_0_tcp_harvest(struct amdgpu_device *adev) 4 + /* RMI */ 1); /* SQG */ - if (adev->asic_type == CHIP_NAVI10) { + if (adev->asic_type == CHIP_NAVI10 || + adev->asic_type == CHIP_NAVI14 || + adev->asic_type == CHIP_NAVI12) { mutex_lock(&adev->grbm_idx_mutex); for (i = 0; i < adev->gfx.config.max_shader_engines; i++) { for (j = 0; j < adev->gfx.config.max_sh_per_se; j++) { @@ -1535,7 +1698,7 @@ static void gfx_v10_0_constants_init(struct amdgpu_device *adev) /* XXX SH_MEM regs */ /* where to put LDS, scratch, GPUVM in FSA64 space */ mutex_lock(&adev->srbm_mutex); - for (i = 0; i < adev->vm_manager.id_mgr[AMDGPU_GFXHUB].num_ids; i++) { + for (i = 0; i < adev->vm_manager.id_mgr[AMDGPU_GFXHUB_0].num_ids; i++) { nv_grbm_select(adev, 0, 0, 0, i); /* CP and shaders */ WREG32_SOC15(GC, 0, mmSH_MEM_CONFIG, DEFAULT_SH_MEM_CONFIG); @@ -1552,6 +1715,7 @@ static void gfx_v10_0_constants_init(struct amdgpu_device *adev) mutex_unlock(&adev->srbm_mutex); gfx_v10_0_init_compute_vmid(adev); + gfx_v10_0_init_gds_vmid(adev); } @@ -1624,9 +1788,9 @@ static void gfx_v10_0_rlc_smu_handshake_cntl(struct amdgpu_device *adev, * hence no handshake between SMU & RLC * GFXOFF will be disabled */ - rlc_pg_cntl |= 0x80000; + rlc_pg_cntl |= 0x800000; } else - rlc_pg_cntl &= ~0x80000; + rlc_pg_cntl &= ~0x800000; WREG32_SOC15(GC, 0, mmRLC_PG_CNTL, rlc_pg_cntl); } @@ -4037,6 +4201,7 @@ static int gfx_v10_0_set_powergating_state(void *handle, bool enable = (state == AMD_PG_STATE_GATE) ? true : false; switch (adev->asic_type) { case CHIP_NAVI10: + case CHIP_NAVI14: if (!enable) { amdgpu_gfx_off_ctrl(adev, false); cancel_delayed_work_sync(&adev->gfx.gfx_off_delay_work); @@ -4056,6 +4221,8 @@ static int gfx_v10_0_set_clockgating_state(void *handle, switch (adev->asic_type) { case CHIP_NAVI10: + case CHIP_NAVI14: + case CHIP_NAVI12: gfx_v10_0_update_gfx_clock_gating(adev, state == AMD_CG_STATE_GATE ? true : false); break; @@ -4462,7 +4629,7 @@ static int gfx_v10_0_ring_preempt_ib(struct amdgpu_ring *ring) if (ring->trail_seq == le32_to_cpu(*(ring->trail_fence_cpu_addr))) break; - DRM_UDELAY(1); + udelay(1); } if (i >= adev->usec_timeout) { @@ -4936,7 +5103,7 @@ static const struct amdgpu_ring_funcs gfx_v10_0_ring_funcs_gfx = { .align_mask = 0xff, .nop = PACKET3(PACKET3_NOP, 0x3FFF), .support_64bit_ptrs = true, - .vmhub = AMDGPU_GFXHUB, + .vmhub = AMDGPU_GFXHUB_0, .get_rptr = gfx_v10_0_ring_get_rptr_gfx, .get_wptr = gfx_v10_0_ring_get_wptr_gfx, .set_wptr = gfx_v10_0_ring_set_wptr_gfx, @@ -4987,7 +5154,7 @@ static const struct amdgpu_ring_funcs gfx_v10_0_ring_funcs_compute = { .align_mask = 0xff, .nop = PACKET3(PACKET3_NOP, 0x3FFF), .support_64bit_ptrs = true, - .vmhub = AMDGPU_GFXHUB, + .vmhub = AMDGPU_GFXHUB_0, .get_rptr = gfx_v10_0_ring_get_rptr_compute, .get_wptr = gfx_v10_0_ring_get_wptr_compute, .set_wptr = gfx_v10_0_ring_set_wptr_compute, @@ -5020,7 +5187,7 @@ static const struct amdgpu_ring_funcs gfx_v10_0_ring_funcs_kiq = { .align_mask = 0xff, .nop = PACKET3(PACKET3_NOP, 0x3FFF), .support_64bit_ptrs = true, - .vmhub = AMDGPU_GFXHUB, + .vmhub = AMDGPU_GFXHUB_0, .get_rptr = gfx_v10_0_ring_get_rptr_compute, .get_wptr = gfx_v10_0_ring_get_wptr_compute, .set_wptr = gfx_v10_0_ring_set_wptr_compute, @@ -5097,6 +5264,8 @@ static void gfx_v10_0_set_rlc_funcs(struct amdgpu_device *adev) { switch (adev->asic_type) { case CHIP_NAVI10: + case CHIP_NAVI14: + case CHIP_NAVI12: adev->gfx.rlc.funcs = &gfx_v10_0_rlc_funcs; break; default: diff --git a/drivers/gpu/drm/amd/amdgpu/gfx_v7_0.c b/drivers/gpu/drm/amd/amdgpu/gfx_v7_0.c index 21187275dfd3..791ba398f007 100644 --- a/drivers/gpu/drm/amd/amdgpu/gfx_v7_0.c +++ b/drivers/gpu/drm/amd/amdgpu/gfx_v7_0.c @@ -1890,6 +1890,24 @@ static void gfx_v7_0_init_compute_vmid(struct amdgpu_device *adev) } } +static void gfx_v7_0_init_gds_vmid(struct amdgpu_device *adev) +{ + int vmid; + + /* + * Initialize all compute and user-gfx VMIDs to have no GDS, GWS, or OA + * access. Compute VMIDs should be enabled by FW for target VMIDs, + * the driver can enable them for graphics. VMID0 should maintain + * access so that HWS firmware can save/restore entries. + */ + for (vmid = 1; vmid < 16; vmid++) { + WREG32(amdgpu_gds_reg_offset[vmid].mem_base, 0); + WREG32(amdgpu_gds_reg_offset[vmid].mem_size, 0); + WREG32(amdgpu_gds_reg_offset[vmid].gws, 0); + WREG32(amdgpu_gds_reg_offset[vmid].oa, 0); + } +} + static void gfx_v7_0_config_init(struct amdgpu_device *adev) { adev->gfx.config.double_offchip_lds_buf = 1; @@ -1968,6 +1986,7 @@ static void gfx_v7_0_constants_init(struct amdgpu_device *adev) mutex_unlock(&adev->srbm_mutex); gfx_v7_0_init_compute_vmid(adev); + gfx_v7_0_init_gds_vmid(adev); WREG32(mmSX_DEBUG_1, 0x20); diff --git a/drivers/gpu/drm/amd/amdgpu/gfx_v8_0.c b/drivers/gpu/drm/amd/amdgpu/gfx_v8_0.c index 751567f78567..87dd55e9d72b 100644 --- a/drivers/gpu/drm/amd/amdgpu/gfx_v8_0.c +++ b/drivers/gpu/drm/amd/amdgpu/gfx_v8_0.c @@ -1321,6 +1321,39 @@ static int gfx_v8_0_rlc_init(struct amdgpu_device *adev) return 0; } +static int gfx_v8_0_csb_vram_pin(struct amdgpu_device *adev) +{ + int r; + + r = amdgpu_bo_reserve(adev->gfx.rlc.clear_state_obj, false); + if (unlikely(r != 0)) + return r; + + r = amdgpu_bo_pin(adev->gfx.rlc.clear_state_obj, + AMDGPU_GEM_DOMAIN_VRAM); + if (!r) + adev->gfx.rlc.clear_state_gpu_addr = + amdgpu_bo_gpu_offset(adev->gfx.rlc.clear_state_obj); + + amdgpu_bo_unreserve(adev->gfx.rlc.clear_state_obj); + + return r; +} + +static void gfx_v8_0_csb_vram_unpin(struct amdgpu_device *adev) +{ + int r; + + if (!adev->gfx.rlc.clear_state_obj) + return; + + r = amdgpu_bo_reserve(adev->gfx.rlc.clear_state_obj, true); + if (likely(r == 0)) { + amdgpu_bo_unpin(adev->gfx.rlc.clear_state_obj); + amdgpu_bo_unreserve(adev->gfx.rlc.clear_state_obj); + } +} + static void gfx_v8_0_mec_fini(struct amdgpu_device *adev) { amdgpu_bo_free_kernel(&adev->gfx.mec.hpd_eop_obj, NULL, NULL); @@ -3717,6 +3750,24 @@ static void gfx_v8_0_init_compute_vmid(struct amdgpu_device *adev) } } +static void gfx_v8_0_init_gds_vmid(struct amdgpu_device *adev) +{ + int vmid; + + /* + * Initialize all compute and user-gfx VMIDs to have no GDS, GWS, or OA + * access. Compute VMIDs should be enabled by FW for target VMIDs, + * the driver can enable them for graphics. VMID0 should maintain + * access so that HWS firmware can save/restore entries. + */ + for (vmid = 1; vmid < 16; vmid++) { + WREG32(amdgpu_gds_reg_offset[vmid].mem_base, 0); + WREG32(amdgpu_gds_reg_offset[vmid].mem_size, 0); + WREG32(amdgpu_gds_reg_offset[vmid].gws, 0); + WREG32(amdgpu_gds_reg_offset[vmid].oa, 0); + } +} + static void gfx_v8_0_config_init(struct amdgpu_device *adev) { switch (adev->asic_type) { @@ -3783,6 +3834,7 @@ static void gfx_v8_0_constants_init(struct amdgpu_device *adev) mutex_unlock(&adev->srbm_mutex); gfx_v8_0_init_compute_vmid(adev); + gfx_v8_0_init_gds_vmid(adev); mutex_lock(&adev->grbm_idx_mutex); /* @@ -4785,6 +4837,10 @@ static int gfx_v8_0_hw_init(void *handle) gfx_v8_0_init_golden_registers(adev); gfx_v8_0_constants_init(adev); + r = gfx_v8_0_csb_vram_pin(adev); + if (r) + return r; + r = adev->gfx.rlc.funcs->resume(adev); if (r) return r; @@ -4901,6 +4957,9 @@ static int gfx_v8_0_hw_fini(void *handle) else pr_err("rlc is busy, skip halt rlc\n"); amdgpu_gfx_rlc_exit_safe_mode(adev); + + gfx_v8_0_csb_vram_unpin(adev); + return 0; } diff --git a/drivers/gpu/drm/amd/amdgpu/gfx_v9_0.c b/drivers/gpu/drm/amd/amdgpu/gfx_v9_0.c index 1cf639a51178..52a6fd12e266 100644 --- a/drivers/gpu/drm/amd/amdgpu/gfx_v9_0.c +++ b/drivers/gpu/drm/amd/amdgpu/gfx_v9_0.c @@ -104,6 +104,390 @@ MODULE_FIRMWARE("amdgpu/raven2_mec2.bin"); MODULE_FIRMWARE("amdgpu/raven2_rlc.bin"); MODULE_FIRMWARE("amdgpu/raven_kicker_rlc.bin"); +MODULE_FIRMWARE("amdgpu/arcturus_mec.bin"); +MODULE_FIRMWARE("amdgpu/arcturus_mec2.bin"); +MODULE_FIRMWARE("amdgpu/arcturus_rlc.bin"); + +#define mmTCP_CHAN_STEER_0_ARCT 0x0b03 +#define mmTCP_CHAN_STEER_0_ARCT_BASE_IDX 0 +#define mmTCP_CHAN_STEER_1_ARCT 0x0b04 +#define mmTCP_CHAN_STEER_1_ARCT_BASE_IDX 0 +#define mmTCP_CHAN_STEER_2_ARCT 0x0b09 +#define mmTCP_CHAN_STEER_2_ARCT_BASE_IDX 0 +#define mmTCP_CHAN_STEER_3_ARCT 0x0b0a +#define mmTCP_CHAN_STEER_3_ARCT_BASE_IDX 0 +#define mmTCP_CHAN_STEER_4_ARCT 0x0b0b +#define mmTCP_CHAN_STEER_4_ARCT_BASE_IDX 0 +#define mmTCP_CHAN_STEER_5_ARCT 0x0b0c +#define mmTCP_CHAN_STEER_5_ARCT_BASE_IDX 0 + +enum ta_ras_gfx_subblock { + /*CPC*/ + TA_RAS_BLOCK__GFX_CPC_INDEX_START = 0, + TA_RAS_BLOCK__GFX_CPC_SCRATCH = TA_RAS_BLOCK__GFX_CPC_INDEX_START, + TA_RAS_BLOCK__GFX_CPC_UCODE, + TA_RAS_BLOCK__GFX_DC_STATE_ME1, + TA_RAS_BLOCK__GFX_DC_CSINVOC_ME1, + TA_RAS_BLOCK__GFX_DC_RESTORE_ME1, + TA_RAS_BLOCK__GFX_DC_STATE_ME2, + TA_RAS_BLOCK__GFX_DC_CSINVOC_ME2, + TA_RAS_BLOCK__GFX_DC_RESTORE_ME2, + TA_RAS_BLOCK__GFX_CPC_INDEX_END = TA_RAS_BLOCK__GFX_DC_RESTORE_ME2, + /* CPF*/ + TA_RAS_BLOCK__GFX_CPF_INDEX_START, + TA_RAS_BLOCK__GFX_CPF_ROQ_ME2 = TA_RAS_BLOCK__GFX_CPF_INDEX_START, + TA_RAS_BLOCK__GFX_CPF_ROQ_ME1, + TA_RAS_BLOCK__GFX_CPF_TAG, + TA_RAS_BLOCK__GFX_CPF_INDEX_END = TA_RAS_BLOCK__GFX_CPF_TAG, + /* CPG*/ + TA_RAS_BLOCK__GFX_CPG_INDEX_START, + TA_RAS_BLOCK__GFX_CPG_DMA_ROQ = TA_RAS_BLOCK__GFX_CPG_INDEX_START, + TA_RAS_BLOCK__GFX_CPG_DMA_TAG, + TA_RAS_BLOCK__GFX_CPG_TAG, + TA_RAS_BLOCK__GFX_CPG_INDEX_END = TA_RAS_BLOCK__GFX_CPG_TAG, + /* GDS*/ + TA_RAS_BLOCK__GFX_GDS_INDEX_START, + TA_RAS_BLOCK__GFX_GDS_MEM = TA_RAS_BLOCK__GFX_GDS_INDEX_START, + TA_RAS_BLOCK__GFX_GDS_INPUT_QUEUE, + TA_RAS_BLOCK__GFX_GDS_OA_PHY_CMD_RAM_MEM, + TA_RAS_BLOCK__GFX_GDS_OA_PHY_DATA_RAM_MEM, + TA_RAS_BLOCK__GFX_GDS_OA_PIPE_MEM, + TA_RAS_BLOCK__GFX_GDS_INDEX_END = TA_RAS_BLOCK__GFX_GDS_OA_PIPE_MEM, + /* SPI*/ + TA_RAS_BLOCK__GFX_SPI_SR_MEM, + /* SQ*/ + TA_RAS_BLOCK__GFX_SQ_INDEX_START, + TA_RAS_BLOCK__GFX_SQ_SGPR = TA_RAS_BLOCK__GFX_SQ_INDEX_START, + TA_RAS_BLOCK__GFX_SQ_LDS_D, + TA_RAS_BLOCK__GFX_SQ_LDS_I, + TA_RAS_BLOCK__GFX_SQ_VGPR, /* VGPR = SP*/ + TA_RAS_BLOCK__GFX_SQ_INDEX_END = TA_RAS_BLOCK__GFX_SQ_VGPR, + /* SQC (3 ranges)*/ + TA_RAS_BLOCK__GFX_SQC_INDEX_START, + /* SQC range 0*/ + TA_RAS_BLOCK__GFX_SQC_INDEX0_START = TA_RAS_BLOCK__GFX_SQC_INDEX_START, + TA_RAS_BLOCK__GFX_SQC_INST_UTCL1_LFIFO = + TA_RAS_BLOCK__GFX_SQC_INDEX0_START, + TA_RAS_BLOCK__GFX_SQC_DATA_CU0_WRITE_DATA_BUF, + TA_RAS_BLOCK__GFX_SQC_DATA_CU0_UTCL1_LFIFO, + TA_RAS_BLOCK__GFX_SQC_DATA_CU1_WRITE_DATA_BUF, + TA_RAS_BLOCK__GFX_SQC_DATA_CU1_UTCL1_LFIFO, + TA_RAS_BLOCK__GFX_SQC_DATA_CU2_WRITE_DATA_BUF, + TA_RAS_BLOCK__GFX_SQC_DATA_CU2_UTCL1_LFIFO, + TA_RAS_BLOCK__GFX_SQC_INDEX0_END = + TA_RAS_BLOCK__GFX_SQC_DATA_CU2_UTCL1_LFIFO, + /* SQC range 1*/ + TA_RAS_BLOCK__GFX_SQC_INDEX1_START, + TA_RAS_BLOCK__GFX_SQC_INST_BANKA_TAG_RAM = + TA_RAS_BLOCK__GFX_SQC_INDEX1_START, + TA_RAS_BLOCK__GFX_SQC_INST_BANKA_UTCL1_MISS_FIFO, + TA_RAS_BLOCK__GFX_SQC_INST_BANKA_MISS_FIFO, + TA_RAS_BLOCK__GFX_SQC_INST_BANKA_BANK_RAM, + TA_RAS_BLOCK__GFX_SQC_DATA_BANKA_TAG_RAM, + TA_RAS_BLOCK__GFX_SQC_DATA_BANKA_HIT_FIFO, + TA_RAS_BLOCK__GFX_SQC_DATA_BANKA_MISS_FIFO, + TA_RAS_BLOCK__GFX_SQC_DATA_BANKA_DIRTY_BIT_RAM, + TA_RAS_BLOCK__GFX_SQC_DATA_BANKA_BANK_RAM, + TA_RAS_BLOCK__GFX_SQC_INDEX1_END = + TA_RAS_BLOCK__GFX_SQC_DATA_BANKA_BANK_RAM, + /* SQC range 2*/ + TA_RAS_BLOCK__GFX_SQC_INDEX2_START, + TA_RAS_BLOCK__GFX_SQC_INST_BANKB_TAG_RAM = + TA_RAS_BLOCK__GFX_SQC_INDEX2_START, + TA_RAS_BLOCK__GFX_SQC_INST_BANKB_UTCL1_MISS_FIFO, + TA_RAS_BLOCK__GFX_SQC_INST_BANKB_MISS_FIFO, + TA_RAS_BLOCK__GFX_SQC_INST_BANKB_BANK_RAM, + TA_RAS_BLOCK__GFX_SQC_DATA_BANKB_TAG_RAM, + TA_RAS_BLOCK__GFX_SQC_DATA_BANKB_HIT_FIFO, + TA_RAS_BLOCK__GFX_SQC_DATA_BANKB_MISS_FIFO, + TA_RAS_BLOCK__GFX_SQC_DATA_BANKB_DIRTY_BIT_RAM, + TA_RAS_BLOCK__GFX_SQC_DATA_BANKB_BANK_RAM, + TA_RAS_BLOCK__GFX_SQC_INDEX2_END = + TA_RAS_BLOCK__GFX_SQC_DATA_BANKB_BANK_RAM, + TA_RAS_BLOCK__GFX_SQC_INDEX_END = TA_RAS_BLOCK__GFX_SQC_INDEX2_END, + /* TA*/ + TA_RAS_BLOCK__GFX_TA_INDEX_START, + TA_RAS_BLOCK__GFX_TA_FS_DFIFO = TA_RAS_BLOCK__GFX_TA_INDEX_START, + TA_RAS_BLOCK__GFX_TA_FS_AFIFO, + TA_RAS_BLOCK__GFX_TA_FL_LFIFO, + TA_RAS_BLOCK__GFX_TA_FX_LFIFO, + TA_RAS_BLOCK__GFX_TA_FS_CFIFO, + TA_RAS_BLOCK__GFX_TA_INDEX_END = TA_RAS_BLOCK__GFX_TA_FS_CFIFO, + /* TCA*/ + TA_RAS_BLOCK__GFX_TCA_INDEX_START, + TA_RAS_BLOCK__GFX_TCA_HOLE_FIFO = TA_RAS_BLOCK__GFX_TCA_INDEX_START, + TA_RAS_BLOCK__GFX_TCA_REQ_FIFO, + TA_RAS_BLOCK__GFX_TCA_INDEX_END = TA_RAS_BLOCK__GFX_TCA_REQ_FIFO, + /* TCC (5 sub-ranges)*/ + TA_RAS_BLOCK__GFX_TCC_INDEX_START, + /* TCC range 0*/ + TA_RAS_BLOCK__GFX_TCC_INDEX0_START = TA_RAS_BLOCK__GFX_TCC_INDEX_START, + TA_RAS_BLOCK__GFX_TCC_CACHE_DATA = TA_RAS_BLOCK__GFX_TCC_INDEX0_START, + TA_RAS_BLOCK__GFX_TCC_CACHE_DATA_BANK_0_1, + TA_RAS_BLOCK__GFX_TCC_CACHE_DATA_BANK_1_0, + TA_RAS_BLOCK__GFX_TCC_CACHE_DATA_BANK_1_1, + TA_RAS_BLOCK__GFX_TCC_CACHE_DIRTY_BANK_0, + TA_RAS_BLOCK__GFX_TCC_CACHE_DIRTY_BANK_1, + TA_RAS_BLOCK__GFX_TCC_HIGH_RATE_TAG, + TA_RAS_BLOCK__GFX_TCC_LOW_RATE_TAG, + TA_RAS_BLOCK__GFX_TCC_INDEX0_END = TA_RAS_BLOCK__GFX_TCC_LOW_RATE_TAG, + /* TCC range 1*/ + TA_RAS_BLOCK__GFX_TCC_INDEX1_START, + TA_RAS_BLOCK__GFX_TCC_IN_USE_DEC = TA_RAS_BLOCK__GFX_TCC_INDEX1_START, + TA_RAS_BLOCK__GFX_TCC_IN_USE_TRANSFER, + TA_RAS_BLOCK__GFX_TCC_INDEX1_END = + TA_RAS_BLOCK__GFX_TCC_IN_USE_TRANSFER, + /* TCC range 2*/ + TA_RAS_BLOCK__GFX_TCC_INDEX2_START, + TA_RAS_BLOCK__GFX_TCC_RETURN_DATA = TA_RAS_BLOCK__GFX_TCC_INDEX2_START, + TA_RAS_BLOCK__GFX_TCC_RETURN_CONTROL, + TA_RAS_BLOCK__GFX_TCC_UC_ATOMIC_FIFO, + TA_RAS_BLOCK__GFX_TCC_WRITE_RETURN, + TA_RAS_BLOCK__GFX_TCC_WRITE_CACHE_READ, + TA_RAS_BLOCK__GFX_TCC_SRC_FIFO, + TA_RAS_BLOCK__GFX_TCC_SRC_FIFO_NEXT_RAM, + TA_RAS_BLOCK__GFX_TCC_CACHE_TAG_PROBE_FIFO, + TA_RAS_BLOCK__GFX_TCC_INDEX2_END = + TA_RAS_BLOCK__GFX_TCC_CACHE_TAG_PROBE_FIFO, + /* TCC range 3*/ + TA_RAS_BLOCK__GFX_TCC_INDEX3_START, + TA_RAS_BLOCK__GFX_TCC_LATENCY_FIFO = TA_RAS_BLOCK__GFX_TCC_INDEX3_START, + TA_RAS_BLOCK__GFX_TCC_LATENCY_FIFO_NEXT_RAM, + TA_RAS_BLOCK__GFX_TCC_INDEX3_END = + TA_RAS_BLOCK__GFX_TCC_LATENCY_FIFO_NEXT_RAM, + /* TCC range 4*/ + TA_RAS_BLOCK__GFX_TCC_INDEX4_START, + TA_RAS_BLOCK__GFX_TCC_WRRET_TAG_WRITE_RETURN = + TA_RAS_BLOCK__GFX_TCC_INDEX4_START, + TA_RAS_BLOCK__GFX_TCC_ATOMIC_RETURN_BUFFER, + TA_RAS_BLOCK__GFX_TCC_INDEX4_END = + TA_RAS_BLOCK__GFX_TCC_ATOMIC_RETURN_BUFFER, + TA_RAS_BLOCK__GFX_TCC_INDEX_END = TA_RAS_BLOCK__GFX_TCC_INDEX4_END, + /* TCI*/ + TA_RAS_BLOCK__GFX_TCI_WRITE_RAM, + /* TCP*/ + TA_RAS_BLOCK__GFX_TCP_INDEX_START, + TA_RAS_BLOCK__GFX_TCP_CACHE_RAM = TA_RAS_BLOCK__GFX_TCP_INDEX_START, + TA_RAS_BLOCK__GFX_TCP_LFIFO_RAM, + TA_RAS_BLOCK__GFX_TCP_CMD_FIFO, + TA_RAS_BLOCK__GFX_TCP_VM_FIFO, + TA_RAS_BLOCK__GFX_TCP_DB_RAM, + TA_RAS_BLOCK__GFX_TCP_UTCL1_LFIFO0, + TA_RAS_BLOCK__GFX_TCP_UTCL1_LFIFO1, + TA_RAS_BLOCK__GFX_TCP_INDEX_END = TA_RAS_BLOCK__GFX_TCP_UTCL1_LFIFO1, + /* TD*/ + TA_RAS_BLOCK__GFX_TD_INDEX_START, + TA_RAS_BLOCK__GFX_TD_SS_FIFO_LO = TA_RAS_BLOCK__GFX_TD_INDEX_START, + TA_RAS_BLOCK__GFX_TD_SS_FIFO_HI, + TA_RAS_BLOCK__GFX_TD_CS_FIFO, + TA_RAS_BLOCK__GFX_TD_INDEX_END = TA_RAS_BLOCK__GFX_TD_CS_FIFO, + /* EA (3 sub-ranges)*/ + TA_RAS_BLOCK__GFX_EA_INDEX_START, + /* EA range 0*/ + TA_RAS_BLOCK__GFX_EA_INDEX0_START = TA_RAS_BLOCK__GFX_EA_INDEX_START, + TA_RAS_BLOCK__GFX_EA_DRAMRD_CMDMEM = TA_RAS_BLOCK__GFX_EA_INDEX0_START, + TA_RAS_BLOCK__GFX_EA_DRAMWR_CMDMEM, + TA_RAS_BLOCK__GFX_EA_DRAMWR_DATAMEM, + TA_RAS_BLOCK__GFX_EA_RRET_TAGMEM, + TA_RAS_BLOCK__GFX_EA_WRET_TAGMEM, + TA_RAS_BLOCK__GFX_EA_GMIRD_CMDMEM, + TA_RAS_BLOCK__GFX_EA_GMIWR_CMDMEM, + TA_RAS_BLOCK__GFX_EA_GMIWR_DATAMEM, + TA_RAS_BLOCK__GFX_EA_INDEX0_END = TA_RAS_BLOCK__GFX_EA_GMIWR_DATAMEM, + /* EA range 1*/ + TA_RAS_BLOCK__GFX_EA_INDEX1_START, + TA_RAS_BLOCK__GFX_EA_DRAMRD_PAGEMEM = TA_RAS_BLOCK__GFX_EA_INDEX1_START, + TA_RAS_BLOCK__GFX_EA_DRAMWR_PAGEMEM, + TA_RAS_BLOCK__GFX_EA_IORD_CMDMEM, + TA_RAS_BLOCK__GFX_EA_IOWR_CMDMEM, + TA_RAS_BLOCK__GFX_EA_IOWR_DATAMEM, + TA_RAS_BLOCK__GFX_EA_GMIRD_PAGEMEM, + TA_RAS_BLOCK__GFX_EA_GMIWR_PAGEMEM, + TA_RAS_BLOCK__GFX_EA_INDEX1_END = TA_RAS_BLOCK__GFX_EA_GMIWR_PAGEMEM, + /* EA range 2*/ + TA_RAS_BLOCK__GFX_EA_INDEX2_START, + TA_RAS_BLOCK__GFX_EA_MAM_D0MEM = TA_RAS_BLOCK__GFX_EA_INDEX2_START, + TA_RAS_BLOCK__GFX_EA_MAM_D1MEM, + TA_RAS_BLOCK__GFX_EA_MAM_D2MEM, + TA_RAS_BLOCK__GFX_EA_MAM_D3MEM, + TA_RAS_BLOCK__GFX_EA_INDEX2_END = TA_RAS_BLOCK__GFX_EA_MAM_D3MEM, + TA_RAS_BLOCK__GFX_EA_INDEX_END = TA_RAS_BLOCK__GFX_EA_INDEX2_END, + /* UTC VM L2 bank*/ + TA_RAS_BLOCK__UTC_VML2_BANK_CACHE, + /* UTC VM walker*/ + TA_RAS_BLOCK__UTC_VML2_WALKER, + /* UTC ATC L2 2MB cache*/ + TA_RAS_BLOCK__UTC_ATCL2_CACHE_2M_BANK, + /* UTC ATC L2 4KB cache*/ + TA_RAS_BLOCK__UTC_ATCL2_CACHE_4K_BANK, + TA_RAS_BLOCK__GFX_MAX +}; + +struct ras_gfx_subblock { + unsigned char *name; + int ta_subblock; + int hw_supported_error_type; + int sw_supported_error_type; +}; + +#define AMDGPU_RAS_SUB_BLOCK(subblock, a, b, c, d, e, f, g, h) \ + [AMDGPU_RAS_BLOCK__##subblock] = { \ + #subblock, \ + TA_RAS_BLOCK__##subblock, \ + ((a) | ((b) << 1) | ((c) << 2) | ((d) << 3)), \ + (((e) << 1) | ((f) << 3) | (g) | ((h) << 2)), \ + } + +static const struct ras_gfx_subblock ras_gfx_subblocks[] = { + AMDGPU_RAS_SUB_BLOCK(GFX_CPC_SCRATCH, 0, 1, 1, 1, 1, 0, 0, 1), + AMDGPU_RAS_SUB_BLOCK(GFX_CPC_UCODE, 0, 1, 1, 1, 1, 0, 0, 1), + AMDGPU_RAS_SUB_BLOCK(GFX_DC_STATE_ME1, 1, 0, 0, 1, 0, 0, 1, 0), + AMDGPU_RAS_SUB_BLOCK(GFX_DC_CSINVOC_ME1, 1, 0, 0, 1, 0, 0, 0, 0), + AMDGPU_RAS_SUB_BLOCK(GFX_DC_RESTORE_ME1, 1, 0, 0, 1, 0, 0, 0, 0), + AMDGPU_RAS_SUB_BLOCK(GFX_DC_STATE_ME2, 1, 0, 0, 1, 0, 0, 0, 0), + AMDGPU_RAS_SUB_BLOCK(GFX_DC_CSINVOC_ME2, 1, 0, 0, 1, 0, 0, 0, 0), + AMDGPU_RAS_SUB_BLOCK(GFX_DC_RESTORE_ME2, 1, 0, 0, 1, 0, 0, 0, 0), + AMDGPU_RAS_SUB_BLOCK(GFX_CPF_ROQ_ME2, 1, 0, 0, 1, 0, 0, 0, 0), + AMDGPU_RAS_SUB_BLOCK(GFX_CPF_ROQ_ME1, 1, 0, 0, 1, 0, 0, 1, 0), + AMDGPU_RAS_SUB_BLOCK(GFX_CPF_TAG, 0, 1, 1, 1, 1, 0, 0, 1), + AMDGPU_RAS_SUB_BLOCK(GFX_CPG_DMA_ROQ, 1, 0, 0, 1, 0, 0, 1, 0), + AMDGPU_RAS_SUB_BLOCK(GFX_CPG_DMA_TAG, 0, 1, 1, 1, 0, 1, 0, 1), + AMDGPU_RAS_SUB_BLOCK(GFX_CPG_TAG, 0, 1, 1, 1, 1, 1, 0, 1), + AMDGPU_RAS_SUB_BLOCK(GFX_GDS_MEM, 0, 1, 1, 1, 0, 0, 0, 0), + AMDGPU_RAS_SUB_BLOCK(GFX_GDS_INPUT_QUEUE, 1, 0, 0, 1, 0, 0, 0, 0), + AMDGPU_RAS_SUB_BLOCK(GFX_GDS_OA_PHY_CMD_RAM_MEM, 0, 1, 1, 1, 0, 0, 0, + 0), + AMDGPU_RAS_SUB_BLOCK(GFX_GDS_OA_PHY_DATA_RAM_MEM, 1, 0, 0, 1, 0, 0, 0, + 0), + AMDGPU_RAS_SUB_BLOCK(GFX_GDS_OA_PIPE_MEM, 0, 1, 1, 1, 0, 0, 0, 0), + AMDGPU_RAS_SUB_BLOCK(GFX_SPI_SR_MEM, 1, 0, 0, 1, 0, 0, 0, 0), + AMDGPU_RAS_SUB_BLOCK(GFX_SQ_SGPR, 0, 1, 1, 1, 0, 0, 0, 0), + AMDGPU_RAS_SUB_BLOCK(GFX_SQ_LDS_D, 0, 1, 1, 1, 1, 0, 0, 1), + AMDGPU_RAS_SUB_BLOCK(GFX_SQ_LDS_I, 0, 1, 1, 1, 0, 0, 0, 0), + AMDGPU_RAS_SUB_BLOCK(GFX_SQ_VGPR, 0, 1, 1, 1, 0, 0, 0, 0), + AMDGPU_RAS_SUB_BLOCK(GFX_SQC_INST_UTCL1_LFIFO, 0, 1, 1, 1, 0, 0, 0, 1), + AMDGPU_RAS_SUB_BLOCK(GFX_SQC_DATA_CU0_WRITE_DATA_BUF, 0, 1, 1, 1, 0, 0, + 0, 0), + AMDGPU_RAS_SUB_BLOCK(GFX_SQC_DATA_CU0_UTCL1_LFIFO, 0, 1, 1, 1, 0, 0, 0, + 0), + AMDGPU_RAS_SUB_BLOCK(GFX_SQC_DATA_CU1_WRITE_DATA_BUF, 0, 1, 1, 1, 0, 0, + 0, 0), + AMDGPU_RAS_SUB_BLOCK(GFX_SQC_DATA_CU1_UTCL1_LFIFO, 0, 1, 1, 1, 1, 0, 0, + 0), + AMDGPU_RAS_SUB_BLOCK(GFX_SQC_DATA_CU2_WRITE_DATA_BUF, 0, 1, 1, 1, 0, 0, + 0, 0), + AMDGPU_RAS_SUB_BLOCK(GFX_SQC_DATA_CU2_UTCL1_LFIFO, 0, 1, 1, 1, 0, 0, 0, + 0), + AMDGPU_RAS_SUB_BLOCK(GFX_SQC_INST_BANKA_TAG_RAM, 0, 1, 1, 1, 1, 0, 0, + 1), + AMDGPU_RAS_SUB_BLOCK(GFX_SQC_INST_BANKA_UTCL1_MISS_FIFO, 1, 0, 0, 1, 0, + 0, 0, 0), + AMDGPU_RAS_SUB_BLOCK(GFX_SQC_INST_BANKA_MISS_FIFO, 1, 0, 0, 1, 0, 0, 0, + 0), + AMDGPU_RAS_SUB_BLOCK(GFX_SQC_INST_BANKA_BANK_RAM, 0, 1, 1, 1, 0, 0, 0, + 0), + AMDGPU_RAS_SUB_BLOCK(GFX_SQC_DATA_BANKA_TAG_RAM, 0, 1, 1, 1, 0, 0, 0, + 0), + AMDGPU_RAS_SUB_BLOCK(GFX_SQC_DATA_BANKA_HIT_FIFO, 1, 0, 0, 1, 0, 0, 0, + 0), + AMDGPU_RAS_SUB_BLOCK(GFX_SQC_DATA_BANKA_MISS_FIFO, 1, 0, 0, 1, 0, 0, 0, + 0), + AMDGPU_RAS_SUB_BLOCK(GFX_SQC_DATA_BANKA_DIRTY_BIT_RAM, 1, 0, 0, 1, 0, 0, + 0, 0), + AMDGPU_RAS_SUB_BLOCK(GFX_SQC_DATA_BANKA_BANK_RAM, 0, 1, 1, 1, 0, 0, 0, + 0), + AMDGPU_RAS_SUB_BLOCK(GFX_SQC_INST_BANKB_TAG_RAM, 0, 1, 1, 1, 1, 0, 0, + 0), + AMDGPU_RAS_SUB_BLOCK(GFX_SQC_INST_BANKB_UTCL1_MISS_FIFO, 1, 0, 0, 1, 0, + 0, 0, 0), + AMDGPU_RAS_SUB_BLOCK(GFX_SQC_INST_BANKB_MISS_FIFO, 1, 0, 0, 1, 0, 0, 0, + 0), + AMDGPU_RAS_SUB_BLOCK(GFX_SQC_INST_BANKB_BANK_RAM, 0, 1, 1, 1, 0, 0, 0, + 0), + AMDGPU_RAS_SUB_BLOCK(GFX_SQC_DATA_BANKB_TAG_RAM, 0, 1, 1, 1, 0, 0, 0, + 0), + AMDGPU_RAS_SUB_BLOCK(GFX_SQC_DATA_BANKB_HIT_FIFO, 1, 0, 0, 1, 0, 0, 0, + 0), + AMDGPU_RAS_SUB_BLOCK(GFX_SQC_DATA_BANKB_MISS_FIFO, 1, 0, 0, 1, 0, 0, 0, + 0), + AMDGPU_RAS_SUB_BLOCK(GFX_SQC_DATA_BANKB_DIRTY_BIT_RAM, 1, 0, 0, 1, 0, 0, + 0, 0), + AMDGPU_RAS_SUB_BLOCK(GFX_SQC_DATA_BANKB_BANK_RAM, 0, 1, 1, 1, 0, 0, 0, + 0), + AMDGPU_RAS_SUB_BLOCK(GFX_TA_FS_DFIFO, 0, 1, 1, 1, 1, 0, 0, 1), + AMDGPU_RAS_SUB_BLOCK(GFX_TA_FS_AFIFO, 1, 0, 0, 1, 0, 0, 0, 0), + AMDGPU_RAS_SUB_BLOCK(GFX_TA_FL_LFIFO, 1, 0, 0, 1, 0, 0, 0, 0), + AMDGPU_RAS_SUB_BLOCK(GFX_TA_FX_LFIFO, 1, 0, 0, 1, 0, 0, 0, 0), + AMDGPU_RAS_SUB_BLOCK(GFX_TA_FS_CFIFO, 1, 0, 0, 1, 0, 0, 0, 0), + AMDGPU_RAS_SUB_BLOCK(GFX_TCA_HOLE_FIFO, 1, 0, 0, 1, 0, 1, 1, 0), + AMDGPU_RAS_SUB_BLOCK(GFX_TCA_REQ_FIFO, 1, 0, 0, 1, 0, 0, 0, 0), + AMDGPU_RAS_SUB_BLOCK(GFX_TCC_CACHE_DATA, 0, 1, 1, 1, 1, 0, 0, 1), + AMDGPU_RAS_SUB_BLOCK(GFX_TCC_CACHE_DATA_BANK_0_1, 0, 1, 1, 1, 1, 0, 0, + 1), + AMDGPU_RAS_SUB_BLOCK(GFX_TCC_CACHE_DATA_BANK_1_0, 0, 1, 1, 1, 1, 0, 0, + 1), + AMDGPU_RAS_SUB_BLOCK(GFX_TCC_CACHE_DATA_BANK_1_1, 0, 1, 1, 1, 1, 0, 0, + 1), + AMDGPU_RAS_SUB_BLOCK(GFX_TCC_CACHE_DIRTY_BANK_0, 0, 1, 1, 1, 0, 0, 0, + 0), + AMDGPU_RAS_SUB_BLOCK(GFX_TCC_CACHE_DIRTY_BANK_1, 0, 1, 1, 1, 0, 0, 0, + 0), + AMDGPU_RAS_SUB_BLOCK(GFX_TCC_HIGH_RATE_TAG, 0, 1, 1, 1, 0, 0, 0, 0), + AMDGPU_RAS_SUB_BLOCK(GFX_TCC_LOW_RATE_TAG, 0, 1, 1, 1, 0, 0, 0, 0), + AMDGPU_RAS_SUB_BLOCK(GFX_TCC_IN_USE_DEC, 1, 0, 0, 1, 0, 0, 0, 0), + AMDGPU_RAS_SUB_BLOCK(GFX_TCC_IN_USE_TRANSFER, 1, 0, 0, 1, 0, 0, 0, 0), + AMDGPU_RAS_SUB_BLOCK(GFX_TCC_RETURN_DATA, 1, 0, 0, 1, 0, 0, 0, 0), + AMDGPU_RAS_SUB_BLOCK(GFX_TCC_RETURN_CONTROL, 1, 0, 0, 1, 0, 0, 0, 0), + AMDGPU_RAS_SUB_BLOCK(GFX_TCC_UC_ATOMIC_FIFO, 1, 0, 0, 1, 0, 0, 0, 0), + AMDGPU_RAS_SUB_BLOCK(GFX_TCC_WRITE_RETURN, 1, 0, 0, 1, 0, 1, 1, 0), + AMDGPU_RAS_SUB_BLOCK(GFX_TCC_WRITE_CACHE_READ, 1, 0, 0, 1, 0, 0, 0, 0), + AMDGPU_RAS_SUB_BLOCK(GFX_TCC_SRC_FIFO, 0, 1, 1, 1, 0, 0, 0, 0), + AMDGPU_RAS_SUB_BLOCK(GFX_TCC_SRC_FIFO_NEXT_RAM, 1, 0, 0, 1, 0, 0, 1, 0), + AMDGPU_RAS_SUB_BLOCK(GFX_TCC_CACHE_TAG_PROBE_FIFO, 1, 0, 0, 1, 0, 0, 0, + 0), + AMDGPU_RAS_SUB_BLOCK(GFX_TCC_LATENCY_FIFO, 1, 0, 0, 1, 0, 0, 0, 0), + AMDGPU_RAS_SUB_BLOCK(GFX_TCC_LATENCY_FIFO_NEXT_RAM, 1, 0, 0, 1, 0, 0, 0, + 0), + AMDGPU_RAS_SUB_BLOCK(GFX_TCC_WRRET_TAG_WRITE_RETURN, 1, 0, 0, 1, 0, 0, + 0, 0), + AMDGPU_RAS_SUB_BLOCK(GFX_TCC_ATOMIC_RETURN_BUFFER, 1, 0, 0, 1, 0, 0, 0, + 0), + AMDGPU_RAS_SUB_BLOCK(GFX_TCI_WRITE_RAM, 1, 0, 0, 1, 0, 0, 0, 0), + AMDGPU_RAS_SUB_BLOCK(GFX_TCP_CACHE_RAM, 0, 1, 1, 1, 1, 0, 0, 1), + AMDGPU_RAS_SUB_BLOCK(GFX_TCP_LFIFO_RAM, 0, 1, 1, 1, 0, 0, 0, 0), + AMDGPU_RAS_SUB_BLOCK(GFX_TCP_CMD_FIFO, 1, 0, 0, 1, 0, 0, 0, 0), + AMDGPU_RAS_SUB_BLOCK(GFX_TCP_VM_FIFO, 0, 1, 1, 1, 0, 0, 0, 0), + AMDGPU_RAS_SUB_BLOCK(GFX_TCP_DB_RAM, 1, 0, 0, 1, 0, 0, 0, 0), + AMDGPU_RAS_SUB_BLOCK(GFX_TCP_UTCL1_LFIFO0, 0, 1, 1, 1, 0, 0, 0, 0), + AMDGPU_RAS_SUB_BLOCK(GFX_TCP_UTCL1_LFIFO1, 0, 1, 1, 1, 0, 0, 0, 0), + AMDGPU_RAS_SUB_BLOCK(GFX_TD_SS_FIFO_LO, 0, 1, 1, 1, 1, 0, 0, 1), + AMDGPU_RAS_SUB_BLOCK(GFX_TD_SS_FIFO_HI, 0, 1, 1, 1, 0, 0, 0, 0), + AMDGPU_RAS_SUB_BLOCK(GFX_TD_CS_FIFO, 1, 0, 0, 1, 0, 0, 0, 0), + AMDGPU_RAS_SUB_BLOCK(GFX_EA_DRAMRD_CMDMEM, 0, 1, 1, 1, 1, 0, 0, 1), + AMDGPU_RAS_SUB_BLOCK(GFX_EA_DRAMWR_CMDMEM, 0, 1, 1, 1, 0, 0, 0, 0), + AMDGPU_RAS_SUB_BLOCK(GFX_EA_DRAMWR_DATAMEM, 0, 1, 1, 1, 0, 0, 0, 0), + AMDGPU_RAS_SUB_BLOCK(GFX_EA_RRET_TAGMEM, 0, 1, 1, 1, 0, 0, 0, 0), + AMDGPU_RAS_SUB_BLOCK(GFX_EA_WRET_TAGMEM, 0, 1, 1, 1, 0, 0, 0, 0), + AMDGPU_RAS_SUB_BLOCK(GFX_EA_GMIRD_CMDMEM, 0, 1, 1, 1, 0, 0, 0, 0), + AMDGPU_RAS_SUB_BLOCK(GFX_EA_GMIWR_CMDMEM, 0, 1, 1, 1, 0, 0, 0, 0), + AMDGPU_RAS_SUB_BLOCK(GFX_EA_GMIWR_DATAMEM, 0, 1, 1, 1, 0, 0, 0, 0), + AMDGPU_RAS_SUB_BLOCK(GFX_EA_DRAMRD_PAGEMEM, 1, 0, 0, 1, 0, 0, 0, 0), + AMDGPU_RAS_SUB_BLOCK(GFX_EA_DRAMWR_PAGEMEM, 1, 0, 0, 1, 0, 0, 0, 0), + AMDGPU_RAS_SUB_BLOCK(GFX_EA_IORD_CMDMEM, 1, 0, 0, 1, 0, 0, 0, 0), + AMDGPU_RAS_SUB_BLOCK(GFX_EA_IOWR_CMDMEM, 1, 0, 0, 1, 0, 0, 0, 0), + AMDGPU_RAS_SUB_BLOCK(GFX_EA_IOWR_DATAMEM, 1, 0, 0, 1, 0, 0, 0, 0), + AMDGPU_RAS_SUB_BLOCK(GFX_EA_GMIRD_PAGEMEM, 1, 0, 0, 1, 0, 0, 0, 0), + AMDGPU_RAS_SUB_BLOCK(GFX_EA_GMIWR_PAGEMEM, 1, 0, 0, 1, 0, 0, 0, 0), + AMDGPU_RAS_SUB_BLOCK(GFX_EA_MAM_D0MEM, 1, 0, 0, 1, 0, 0, 0, 0), + AMDGPU_RAS_SUB_BLOCK(GFX_EA_MAM_D1MEM, 1, 0, 0, 1, 0, 0, 0, 0), + AMDGPU_RAS_SUB_BLOCK(GFX_EA_MAM_D2MEM, 1, 0, 0, 1, 0, 0, 0, 0), + AMDGPU_RAS_SUB_BLOCK(GFX_EA_MAM_D3MEM, 1, 0, 0, 1, 0, 0, 0, 0), + AMDGPU_RAS_SUB_BLOCK(UTC_VML2_BANK_CACHE, 0, 1, 1, 1, 0, 0, 0, 0), + AMDGPU_RAS_SUB_BLOCK(UTC_VML2_WALKER, 0, 1, 1, 1, 0, 0, 0, 0), + AMDGPU_RAS_SUB_BLOCK(UTC_ATCL2_CACHE_2M_BANK, 1, 0, 0, 1, 0, 0, 0, 0), + AMDGPU_RAS_SUB_BLOCK(UTC_ATCL2_CACHE_4K_BANK, 0, 1, 1, 1, 0, 0, 0, 0), +}; + static const struct soc15_reg_golden golden_settings_gc_9_0[] = { SOC15_REG_GOLDEN_VALUE(GC, 0, mmDB_DEBUG2, 0xf00fffff, 0x00000400), @@ -271,6 +655,18 @@ static const struct soc15_reg_golden golden_settings_gc_9_2_1_vg12[] = SOC15_REG_GOLDEN_VALUE(GC, 0, mmCP_DEBUG, 0x00000000, 0x00008000) }; +static const struct soc15_reg_golden golden_settings_gc_9_4_1_arct[] = +{ + SOC15_REG_GOLDEN_VALUE(GC, 0, mmGB_ADDR_CONFIG, 0xffff77ff, 0x2a114042), + SOC15_REG_GOLDEN_VALUE(GC, 0, mmTA_CNTL_AUX, 0xfffffeef, 0x10b0000), + SOC15_REG_GOLDEN_VALUE(GC, 0, mmTCP_CHAN_STEER_0_ARCT, 0x3fffffff, 0x346f0a4e), + SOC15_REG_GOLDEN_VALUE(GC, 0, mmTCP_CHAN_STEER_1_ARCT, 0x3fffffff, 0x1c642ca), + SOC15_REG_GOLDEN_VALUE(GC, 0, mmTCP_CHAN_STEER_2_ARCT, 0x3fffffff, 0x26f45098), + SOC15_REG_GOLDEN_VALUE(GC, 0, mmTCP_CHAN_STEER_3_ARCT, 0x3fffffff, 0x2ebd9fe3), + SOC15_REG_GOLDEN_VALUE(GC, 0, mmTCP_CHAN_STEER_4_ARCT, 0x3fffffff, 0xb90f5b1), + SOC15_REG_GOLDEN_VALUE(GC, 0, mmTCP_CHAN_STEER_5_ARCT, 0x3ff, 0x135), +}; + static const u32 GFX_RLC_SRM_INDEX_CNTL_ADDR_OFFSETS[] = { mmRLC_SRM_INDEX_CNTL_ADDR_0 - mmRLC_SRM_INDEX_CNTL_ADDR_0, @@ -310,19 +706,21 @@ static uint64_t gfx_v9_0_get_gpu_clock_counter(struct amdgpu_device *adev); static void gfx_v9_0_select_se_sh(struct amdgpu_device *adev, u32 se_num, u32 sh_num, u32 instance); static void gfx_v9_0_ring_emit_de_meta(struct amdgpu_ring *ring); static u64 gfx_v9_0_ring_get_rptr_compute(struct amdgpu_ring *ring); +static int gfx_v9_0_query_ras_error_count(struct amdgpu_device *adev, + void *ras_error_status); +static int gfx_v9_0_ras_error_inject(struct amdgpu_device *adev, + void *inject_if); static void gfx_v9_0_init_golden_registers(struct amdgpu_device *adev) { switch (adev->asic_type) { case CHIP_VEGA10: - if (!amdgpu_virt_support_skip_setting(adev)) { - soc15_program_register_sequence(adev, - golden_settings_gc_9_0, - ARRAY_SIZE(golden_settings_gc_9_0)); - soc15_program_register_sequence(adev, - golden_settings_gc_9_0_vg10, - ARRAY_SIZE(golden_settings_gc_9_0_vg10)); - } + soc15_program_register_sequence(adev, + golden_settings_gc_9_0, + ARRAY_SIZE(golden_settings_gc_9_0)); + soc15_program_register_sequence(adev, + golden_settings_gc_9_0_vg10, + ARRAY_SIZE(golden_settings_gc_9_0_vg10)); break; case CHIP_VEGA12: soc15_program_register_sequence(adev, @@ -340,6 +738,11 @@ static void gfx_v9_0_init_golden_registers(struct amdgpu_device *adev) golden_settings_gc_9_0_vg20, ARRAY_SIZE(golden_settings_gc_9_0_vg20)); break; + case CHIP_ARCTURUS: + soc15_program_register_sequence(adev, + golden_settings_gc_9_4_1_arct, + ARRAY_SIZE(golden_settings_gc_9_4_1_arct)); + break; case CHIP_RAVEN: soc15_program_register_sequence(adev, golden_settings_gc_9_1, ARRAY_SIZE(golden_settings_gc_9_1)); @@ -356,8 +759,9 @@ static void gfx_v9_0_init_golden_registers(struct amdgpu_device *adev) break; } - soc15_program_register_sequence(adev, golden_settings_gc_9_x_common, - (const u32)ARRAY_SIZE(golden_settings_gc_9_x_common)); + if (adev->asic_type != CHIP_ARCTURUS) + soc15_program_register_sequence(adev, golden_settings_gc_9_x_common, + (const u32)ARRAY_SIZE(golden_settings_gc_9_x_common)); } static void gfx_v9_0_scratch_init(struct amdgpu_device *adev) @@ -610,44 +1014,14 @@ static void gfx_v9_0_check_if_need_gfxoff(struct amdgpu_device *adev) } } -static int gfx_v9_0_init_microcode(struct amdgpu_device *adev) +static int gfx_v9_0_init_cp_gfx_microcode(struct amdgpu_device *adev, + const char *chip_name) { - const char *chip_name; char fw_name[30]; int err; struct amdgpu_firmware_info *info = NULL; const struct common_firmware_header *header = NULL; const struct gfx_firmware_header_v1_0 *cp_hdr; - const struct rlc_firmware_header_v2_0 *rlc_hdr; - unsigned int *tmp = NULL; - unsigned int i = 0; - uint16_t version_major; - uint16_t version_minor; - uint32_t smu_version; - - DRM_DEBUG("\n"); - - switch (adev->asic_type) { - case CHIP_VEGA10: - chip_name = "vega10"; - break; - case CHIP_VEGA12: - chip_name = "vega12"; - break; - case CHIP_VEGA20: - chip_name = "vega20"; - break; - case CHIP_RAVEN: - if (adev->rev_id >= 8) - chip_name = "raven2"; - else if (adev->pdev->device == 0x15d8) - chip_name = "picasso"; - else - chip_name = "raven"; - break; - default: - BUG(); - } snprintf(fw_name, sizeof(fw_name), "amdgpu/%s_pfp.bin", chip_name); err = request_firmware(&adev->gfx.pfp_fw, fw_name, adev->dev); @@ -682,6 +1056,58 @@ static int gfx_v9_0_init_microcode(struct amdgpu_device *adev) adev->gfx.ce_fw_version = le32_to_cpu(cp_hdr->header.ucode_version); adev->gfx.ce_feature_version = le32_to_cpu(cp_hdr->ucode_feature_version); + if (adev->firmware.load_type == AMDGPU_FW_LOAD_PSP) { + info = &adev->firmware.ucode[AMDGPU_UCODE_ID_CP_PFP]; + info->ucode_id = AMDGPU_UCODE_ID_CP_PFP; + info->fw = adev->gfx.pfp_fw; + header = (const struct common_firmware_header *)info->fw->data; + adev->firmware.fw_size += + ALIGN(le32_to_cpu(header->ucode_size_bytes), PAGE_SIZE); + + info = &adev->firmware.ucode[AMDGPU_UCODE_ID_CP_ME]; + info->ucode_id = AMDGPU_UCODE_ID_CP_ME; + info->fw = adev->gfx.me_fw; + header = (const struct common_firmware_header *)info->fw->data; + adev->firmware.fw_size += + ALIGN(le32_to_cpu(header->ucode_size_bytes), PAGE_SIZE); + + info = &adev->firmware.ucode[AMDGPU_UCODE_ID_CP_CE]; + info->ucode_id = AMDGPU_UCODE_ID_CP_CE; + info->fw = adev->gfx.ce_fw; + header = (const struct common_firmware_header *)info->fw->data; + adev->firmware.fw_size += + ALIGN(le32_to_cpu(header->ucode_size_bytes), PAGE_SIZE); + } + +out: + if (err) { + dev_err(adev->dev, + "gfx9: Failed to load firmware \"%s\"\n", + fw_name); + release_firmware(adev->gfx.pfp_fw); + adev->gfx.pfp_fw = NULL; + release_firmware(adev->gfx.me_fw); + adev->gfx.me_fw = NULL; + release_firmware(adev->gfx.ce_fw); + adev->gfx.ce_fw = NULL; + } + return err; +} + +static int gfx_v9_0_init_rlc_microcode(struct amdgpu_device *adev, + const char *chip_name) +{ + char fw_name[30]; + int err; + struct amdgpu_firmware_info *info = NULL; + const struct common_firmware_header *header = NULL; + const struct rlc_firmware_header_v2_0 *rlc_hdr; + unsigned int *tmp = NULL; + unsigned int i = 0; + uint16_t version_major; + uint16_t version_minor; + uint32_t smu_version; + /* * For Picasso && AM4 SOCKET board, we use picasso_rlc_am4.bin * instead of picasso_rlc.bin. @@ -756,57 +1182,7 @@ static int gfx_v9_0_init_microcode(struct amdgpu_device *adev) if (adev->gfx.rlc.is_rlc_v2_1) gfx_v9_0_init_rlc_ext_microcode(adev); - snprintf(fw_name, sizeof(fw_name), "amdgpu/%s_mec.bin", chip_name); - err = request_firmware(&adev->gfx.mec_fw, fw_name, adev->dev); - if (err) - goto out; - err = amdgpu_ucode_validate(adev->gfx.mec_fw); - if (err) - goto out; - cp_hdr = (const struct gfx_firmware_header_v1_0 *)adev->gfx.mec_fw->data; - adev->gfx.mec_fw_version = le32_to_cpu(cp_hdr->header.ucode_version); - adev->gfx.mec_feature_version = le32_to_cpu(cp_hdr->ucode_feature_version); - - - snprintf(fw_name, sizeof(fw_name), "amdgpu/%s_mec2.bin", chip_name); - err = request_firmware(&adev->gfx.mec2_fw, fw_name, adev->dev); - if (!err) { - err = amdgpu_ucode_validate(adev->gfx.mec2_fw); - if (err) - goto out; - cp_hdr = (const struct gfx_firmware_header_v1_0 *) - adev->gfx.mec2_fw->data; - adev->gfx.mec2_fw_version = - le32_to_cpu(cp_hdr->header.ucode_version); - adev->gfx.mec2_feature_version = - le32_to_cpu(cp_hdr->ucode_feature_version); - } else { - err = 0; - adev->gfx.mec2_fw = NULL; - } - if (adev->firmware.load_type == AMDGPU_FW_LOAD_PSP) { - info = &adev->firmware.ucode[AMDGPU_UCODE_ID_CP_PFP]; - info->ucode_id = AMDGPU_UCODE_ID_CP_PFP; - info->fw = adev->gfx.pfp_fw; - header = (const struct common_firmware_header *)info->fw->data; - adev->firmware.fw_size += - ALIGN(le32_to_cpu(header->ucode_size_bytes), PAGE_SIZE); - - info = &adev->firmware.ucode[AMDGPU_UCODE_ID_CP_ME]; - info->ucode_id = AMDGPU_UCODE_ID_CP_ME; - info->fw = adev->gfx.me_fw; - header = (const struct common_firmware_header *)info->fw->data; - adev->firmware.fw_size += - ALIGN(le32_to_cpu(header->ucode_size_bytes), PAGE_SIZE); - - info = &adev->firmware.ucode[AMDGPU_UCODE_ID_CP_CE]; - info->ucode_id = AMDGPU_UCODE_ID_CP_CE; - info->fw = adev->gfx.ce_fw; - header = (const struct common_firmware_header *)info->fw->data; - adev->firmware.fw_size += - ALIGN(le32_to_cpu(header->ucode_size_bytes), PAGE_SIZE); - info = &adev->firmware.ucode[AMDGPU_UCODE_ID_RLC_G]; info->ucode_id = AMDGPU_UCODE_ID_RLC_G; info->fw = adev->gfx.rlc_fw; @@ -836,7 +1212,58 @@ static int gfx_v9_0_init_microcode(struct amdgpu_device *adev) adev->firmware.fw_size += ALIGN(adev->gfx.rlc.save_restore_list_srm_size_bytes, PAGE_SIZE); } + } +out: + if (err) { + dev_err(adev->dev, + "gfx9: Failed to load firmware \"%s\"\n", + fw_name); + release_firmware(adev->gfx.rlc_fw); + adev->gfx.rlc_fw = NULL; + } + return err; +} + +static int gfx_v9_0_init_cp_compute_microcode(struct amdgpu_device *adev, + const char *chip_name) +{ + char fw_name[30]; + int err; + struct amdgpu_firmware_info *info = NULL; + const struct common_firmware_header *header = NULL; + const struct gfx_firmware_header_v1_0 *cp_hdr; + + snprintf(fw_name, sizeof(fw_name), "amdgpu/%s_mec.bin", chip_name); + err = request_firmware(&adev->gfx.mec_fw, fw_name, adev->dev); + if (err) + goto out; + err = amdgpu_ucode_validate(adev->gfx.mec_fw); + if (err) + goto out; + cp_hdr = (const struct gfx_firmware_header_v1_0 *)adev->gfx.mec_fw->data; + adev->gfx.mec_fw_version = le32_to_cpu(cp_hdr->header.ucode_version); + adev->gfx.mec_feature_version = le32_to_cpu(cp_hdr->ucode_feature_version); + + + snprintf(fw_name, sizeof(fw_name), "amdgpu/%s_mec2.bin", chip_name); + err = request_firmware(&adev->gfx.mec2_fw, fw_name, adev->dev); + if (!err) { + err = amdgpu_ucode_validate(adev->gfx.mec2_fw); + if (err) + goto out; + cp_hdr = (const struct gfx_firmware_header_v1_0 *) + adev->gfx.mec2_fw->data; + adev->gfx.mec2_fw_version = + le32_to_cpu(cp_hdr->header.ucode_version); + adev->gfx.mec2_feature_version = + le32_to_cpu(cp_hdr->ucode_feature_version); + } else { + err = 0; + adev->gfx.mec2_fw = NULL; + } + + if (adev->firmware.load_type == AMDGPU_FW_LOAD_PSP) { info = &adev->firmware.ucode[AMDGPU_UCODE_ID_CP_MEC1]; info->ucode_id = AMDGPU_UCODE_ID_CP_MEC1; info->fw = adev->gfx.mec_fw; @@ -859,13 +1286,18 @@ static int gfx_v9_0_init_microcode(struct amdgpu_device *adev) cp_hdr = (const struct gfx_firmware_header_v1_0 *)info->fw->data; adev->firmware.fw_size += ALIGN(le32_to_cpu(header->ucode_size_bytes) - le32_to_cpu(cp_hdr->jt_size) * 4, PAGE_SIZE); - info = &adev->firmware.ucode[AMDGPU_UCODE_ID_CP_MEC2_JT]; - info->ucode_id = AMDGPU_UCODE_ID_CP_MEC2_JT; - info->fw = adev->gfx.mec2_fw; - adev->firmware.fw_size += - ALIGN(le32_to_cpu(cp_hdr->jt_size) * 4, PAGE_SIZE); - } + /* TODO: Determine if MEC2 JT FW loading can be removed + for all GFX V9 asic and above */ + if (adev->asic_type != CHIP_ARCTURUS) { + info = &adev->firmware.ucode[AMDGPU_UCODE_ID_CP_MEC2_JT]; + info->ucode_id = AMDGPU_UCODE_ID_CP_MEC2_JT; + info->fw = adev->gfx.mec2_fw; + adev->firmware.fw_size += + ALIGN(le32_to_cpu(cp_hdr->jt_size) * 4, + PAGE_SIZE); + } + } } out: @@ -875,14 +1307,6 @@ out: dev_err(adev->dev, "gfx9: Failed to load firmware \"%s\"\n", fw_name); - release_firmware(adev->gfx.pfp_fw); - adev->gfx.pfp_fw = NULL; - release_firmware(adev->gfx.me_fw); - adev->gfx.me_fw = NULL; - release_firmware(adev->gfx.ce_fw); - adev->gfx.ce_fw = NULL; - release_firmware(adev->gfx.rlc_fw); - adev->gfx.rlc_fw = NULL; release_firmware(adev->gfx.mec_fw); adev->gfx.mec_fw = NULL; release_firmware(adev->gfx.mec2_fw); @@ -891,6 +1315,56 @@ out: return err; } +static int gfx_v9_0_init_microcode(struct amdgpu_device *adev) +{ + const char *chip_name; + int r; + + DRM_DEBUG("\n"); + + switch (adev->asic_type) { + case CHIP_VEGA10: + chip_name = "vega10"; + break; + case CHIP_VEGA12: + chip_name = "vega12"; + break; + case CHIP_VEGA20: + chip_name = "vega20"; + break; + case CHIP_RAVEN: + if (adev->rev_id >= 8) + chip_name = "raven2"; + else if (adev->pdev->device == 0x15d8) + chip_name = "picasso"; + else + chip_name = "raven"; + break; + case CHIP_ARCTURUS: + chip_name = "arcturus"; + break; + default: + BUG(); + } + + /* No CPG in Arcturus */ + if (adev->asic_type != CHIP_ARCTURUS) { + r = gfx_v9_0_init_cp_gfx_microcode(adev, chip_name); + if (r) + return r; + } + + r = gfx_v9_0_init_rlc_microcode(adev, chip_name); + if (r) + return r; + + r = gfx_v9_0_init_cp_compute_microcode(adev, chip_name); + if (r) + return r; + + return r; +} + static u32 gfx_v9_0_get_csb_size(struct amdgpu_device *adev) { u32 count = 0; @@ -1324,7 +1798,9 @@ static const struct amdgpu_gfx_funcs gfx_v9_0_gfx_funcs = { .read_wave_data = &gfx_v9_0_read_wave_data, .read_wave_sgprs = &gfx_v9_0_read_wave_sgprs, .read_wave_vgprs = &gfx_v9_0_read_wave_vgprs, - .select_me_pipe_q = &gfx_v9_0_select_me_pipe_q + .select_me_pipe_q = &gfx_v9_0_select_me_pipe_q, + .ras_error_inject = &gfx_v9_0_ras_error_inject, + .query_ras_error_count = &gfx_v9_0_query_ras_error_count }; static int gfx_v9_0_gpu_early_init(struct amdgpu_device *adev) @@ -1377,6 +1853,16 @@ static int gfx_v9_0_gpu_early_init(struct amdgpu_device *adev) else gb_addr_config = RAVEN_GB_ADDR_CONFIG_GOLDEN; break; + case CHIP_ARCTURUS: + adev->gfx.config.max_hw_contexts = 8; + adev->gfx.config.sc_prim_fifo_size_frontend = 0x20; + adev->gfx.config.sc_prim_fifo_size_backend = 0x100; + adev->gfx.config.sc_hiz_tile_fifo_size = 0x30; + adev->gfx.config.sc_earlyz_tile_fifo_size = 0x4C0; + gb_addr_config = RREG32_SOC15(GC, 0, mmGB_ADDR_CONFIG); + gb_addr_config &= ~0xf3e777ff; + gb_addr_config |= 0x22014042; + break; default: BUG(); break; @@ -1653,6 +2139,7 @@ static int gfx_v9_0_sw_init(void *handle) case CHIP_VEGA12: case CHIP_VEGA20: case CHIP_RAVEN: + case CHIP_ARCTURUS: adev->gfx.mec.num_mec = 2; break; default: @@ -1929,6 +2416,24 @@ static void gfx_v9_0_init_compute_vmid(struct amdgpu_device *adev) } } +static void gfx_v9_0_init_gds_vmid(struct amdgpu_device *adev) +{ + int vmid; + + /* + * Initialize all compute and user-gfx VMIDs to have no GDS, GWS, or OA + * access. Compute VMIDs should be enabled by FW for target VMIDs, + * the driver can enable them for graphics. VMID0 should maintain + * access so that HWS firmware can save/restore entries. + */ + for (vmid = 1; vmid < 16; vmid++) { + WREG32_SOC15_OFFSET(GC, 0, mmGDS_VMID0_BASE, 2 * vmid, 0); + WREG32_SOC15_OFFSET(GC, 0, mmGDS_VMID0_SIZE, 2 * vmid, 0); + WREG32_SOC15_OFFSET(GC, 0, mmGDS_GWS_VMID0, vmid, 0); + WREG32_SOC15_OFFSET(GC, 0, mmGDS_OA_VMID0, vmid, 0); + } +} + static void gfx_v9_0_constants_init(struct amdgpu_device *adev) { u32 tmp; @@ -1945,7 +2450,7 @@ static void gfx_v9_0_constants_init(struct amdgpu_device *adev) /* XXX SH_MEM regs */ /* where to put LDS, scratch, GPUVM in FSA64 space */ mutex_lock(&adev->srbm_mutex); - for (i = 0; i < adev->vm_manager.id_mgr[AMDGPU_GFXHUB].num_ids; i++) { + for (i = 0; i < adev->vm_manager.id_mgr[AMDGPU_GFXHUB_0].num_ids; i++) { soc15_grbm_select(adev, 0, 0, 0, i); /* CP and shaders */ if (i == 0) { @@ -1973,6 +2478,7 @@ static void gfx_v9_0_constants_init(struct amdgpu_device *adev) mutex_unlock(&adev->srbm_mutex); gfx_v9_0_init_compute_vmid(adev); + gfx_v9_0_init_gds_vmid(adev); } static void gfx_v9_0_wait_for_rlc_serdes(struct amdgpu_device *adev) @@ -2849,6 +3355,10 @@ static int gfx_v9_0_mqd_init(struct amdgpu_ring *ring) mqd->compute_static_thread_mgmt_se1 = 0xffffffff; mqd->compute_static_thread_mgmt_se2 = 0xffffffff; mqd->compute_static_thread_mgmt_se3 = 0xffffffff; + mqd->compute_static_thread_mgmt_se4 = 0xffffffff; + mqd->compute_static_thread_mgmt_se5 = 0xffffffff; + mqd->compute_static_thread_mgmt_se6 = 0xffffffff; + mqd->compute_static_thread_mgmt_se7 = 0xffffffff; mqd->compute_misc_reserved = 0x00000003; mqd->dynamic_cu_mask_addr_lo = @@ -3252,10 +3762,12 @@ static int gfx_v9_0_cp_resume(struct amdgpu_device *adev) gfx_v9_0_enable_gui_idle_interrupt(adev, false); if (adev->firmware.load_type != AMDGPU_FW_LOAD_PSP) { - /* legacy firmware loading */ - r = gfx_v9_0_cp_gfx_load_microcode(adev); - if (r) - return r; + if (adev->asic_type != CHIP_ARCTURUS) { + /* legacy firmware loading */ + r = gfx_v9_0_cp_gfx_load_microcode(adev); + if (r) + return r; + } r = gfx_v9_0_cp_compute_load_microcode(adev); if (r) @@ -3266,18 +3778,22 @@ static int gfx_v9_0_cp_resume(struct amdgpu_device *adev) if (r) return r; - r = gfx_v9_0_cp_gfx_resume(adev); - if (r) - return r; + if (adev->asic_type != CHIP_ARCTURUS) { + r = gfx_v9_0_cp_gfx_resume(adev); + if (r) + return r; + } r = gfx_v9_0_kcq_resume(adev); if (r) return r; - ring = &adev->gfx.gfx_ring[0]; - r = amdgpu_ring_test_helper(ring); - if (r) - return r; + if (adev->asic_type != CHIP_ARCTURUS) { + ring = &adev->gfx.gfx_ring[0]; + r = amdgpu_ring_test_helper(ring); + if (r) + return r; + } for (i = 0; i < adev->gfx.num_compute_rings; i++) { ring = &adev->gfx.compute_ring[i]; @@ -3291,7 +3807,8 @@ static int gfx_v9_0_cp_resume(struct amdgpu_device *adev) static void gfx_v9_0_cp_enable(struct amdgpu_device *adev, bool enable) { - gfx_v9_0_cp_gfx_enable(adev, enable); + if (adev->asic_type != CHIP_ARCTURUS) + gfx_v9_0_cp_gfx_enable(adev, enable); gfx_v9_0_cp_compute_enable(adev, enable); } @@ -3300,7 +3817,8 @@ static int gfx_v9_0_hw_init(void *handle) int r; struct amdgpu_device *adev = (struct amdgpu_device *)handle; - gfx_v9_0_init_golden_registers(adev); + if (!amdgpu_sriov_vf(adev)) + gfx_v9_0_init_golden_registers(adev); gfx_v9_0_constants_init(adev); @@ -3316,9 +3834,11 @@ static int gfx_v9_0_hw_init(void *handle) if (r) return r; - r = gfx_v9_0_ngg_en(adev); - if (r) - return r; + if (adev->asic_type != CHIP_ARCTURUS) { + r = gfx_v9_0_ngg_en(adev); + if (r) + return r; + } return r; } @@ -3466,8 +3986,9 @@ static int gfx_v9_0_soft_reset(void *handle) /* stop the rlc */ adev->gfx.rlc.funcs->stop(adev); - /* Disable GFX parsing/prefetching */ - gfx_v9_0_cp_gfx_enable(adev, false); + if (adev->asic_type != CHIP_ARCTURUS) + /* Disable GFX parsing/prefetching */ + gfx_v9_0_cp_gfx_enable(adev, false); /* Disable MEC parsing/prefetching */ gfx_v9_0_cp_compute_enable(adev, false); @@ -3810,7 +4331,10 @@ static int gfx_v9_0_early_init(void *handle) { struct amdgpu_device *adev = (struct amdgpu_device *)handle; - adev->gfx.num_gfx_rings = GFX9_NUM_GFX_RINGS; + if (adev->asic_type == CHIP_ARCTURUS) + adev->gfx.num_gfx_rings = 0; + else + adev->gfx.num_gfx_rings = GFX9_NUM_GFX_RINGS; adev->gfx.num_compute_rings = AMDGPU_MAX_COMPUTE_RINGS; gfx_v9_0_set_ring_funcs(adev); gfx_v9_0_set_irq_funcs(adev); @@ -3821,6 +4345,7 @@ static int gfx_v9_0_early_init(void *handle) } static int gfx_v9_0_process_ras_data_cb(struct amdgpu_device *adev, + struct ras_err_data *err_data, struct amdgpu_iv_entry *entry); static int gfx_v9_0_ecc_late_init(void *handle) @@ -4330,14 +4855,16 @@ static void gfx_v9_0_get_clockgating_state(void *handle, u32 *flags) if (data & CP_MEM_SLP_CNTL__CP_MEM_LS_EN_MASK) *flags |= AMD_CG_SUPPORT_GFX_CP_LS | AMD_CG_SUPPORT_GFX_MGLS; - /* AMD_CG_SUPPORT_GFX_3D_CGCG */ - data = RREG32_SOC15(GC, 0, mmRLC_CGCG_CGLS_CTRL_3D); - if (data & RLC_CGCG_CGLS_CTRL_3D__CGCG_EN_MASK) - *flags |= AMD_CG_SUPPORT_GFX_3D_CGCG; + if (adev->asic_type != CHIP_ARCTURUS) { + /* AMD_CG_SUPPORT_GFX_3D_CGCG */ + data = RREG32_SOC15(GC, 0, mmRLC_CGCG_CGLS_CTRL_3D); + if (data & RLC_CGCG_CGLS_CTRL_3D__CGCG_EN_MASK) + *flags |= AMD_CG_SUPPORT_GFX_3D_CGCG; - /* AMD_CG_SUPPORT_GFX_3D_CGLS */ - if (data & RLC_CGCG_CGLS_CTRL_3D__CGLS_EN_MASK) - *flags |= AMD_CG_SUPPORT_GFX_3D_CGLS; + /* AMD_CG_SUPPORT_GFX_3D_CGLS */ + if (data & RLC_CGCG_CGLS_CTRL_3D__CGLS_EN_MASK) + *flags |= AMD_CG_SUPPORT_GFX_3D_CGLS; + } } static u64 gfx_v9_0_ring_get_rptr_gfx(struct amdgpu_ring *ring) @@ -5133,12 +5660,420 @@ static int gfx_v9_0_priv_inst_irq(struct amdgpu_device *adev, } static int gfx_v9_0_process_ras_data_cb(struct amdgpu_device *adev, + struct ras_err_data *err_data, struct amdgpu_iv_entry *entry) { /* TODO ue will trigger an interrupt. */ kgd2kfd_set_sram_ecc_flag(adev->kfd.dev); + if (adev->gfx.funcs->query_ras_error_count) + adev->gfx.funcs->query_ras_error_count(adev, err_data); amdgpu_ras_reset_gpu(adev, 0); - return AMDGPU_RAS_UE; + return AMDGPU_RAS_SUCCESS; +} + +static const struct { + const char *name; + uint32_t ip; + uint32_t inst; + uint32_t seg; + uint32_t reg_offset; + uint32_t per_se_instance; + int32_t num_instance; + uint32_t sec_count_mask; + uint32_t ded_count_mask; +} gfx_ras_edc_regs[] = { + { "CPC_SCRATCH", SOC15_REG_ENTRY(GC, 0, mmCPC_EDC_SCRATCH_CNT), 0, 1, + REG_FIELD_MASK(CPC_EDC_SCRATCH_CNT, SEC_COUNT), + REG_FIELD_MASK(CPC_EDC_SCRATCH_CNT, DED_COUNT) }, + { "CPC_UCODE", SOC15_REG_ENTRY(GC, 0, mmCPC_EDC_UCODE_CNT), 0, 1, + REG_FIELD_MASK(CPC_EDC_UCODE_CNT, SEC_COUNT), + REG_FIELD_MASK(CPC_EDC_UCODE_CNT, DED_COUNT) }, + { "CPF_ROQ_ME1", SOC15_REG_ENTRY(GC, 0, mmCPF_EDC_ROQ_CNT), 0, 1, + REG_FIELD_MASK(CPF_EDC_ROQ_CNT, COUNT_ME1), 0 }, + { "CPF_ROQ_ME2", SOC15_REG_ENTRY(GC, 0, mmCPF_EDC_ROQ_CNT), 0, 1, + REG_FIELD_MASK(CPF_EDC_ROQ_CNT, COUNT_ME2), 0 }, + { "CPF_TAG", SOC15_REG_ENTRY(GC, 0, mmCPF_EDC_TAG_CNT), 0, 1, + REG_FIELD_MASK(CPF_EDC_TAG_CNT, SEC_COUNT), + REG_FIELD_MASK(CPF_EDC_TAG_CNT, DED_COUNT) }, + { "CPG_DMA_ROQ", SOC15_REG_ENTRY(GC, 0, mmCPG_EDC_DMA_CNT), 0, 1, + REG_FIELD_MASK(CPG_EDC_DMA_CNT, ROQ_COUNT), 0 }, + { "CPG_DMA_TAG", SOC15_REG_ENTRY(GC, 0, mmCPG_EDC_DMA_CNT), 0, 1, + REG_FIELD_MASK(CPG_EDC_DMA_CNT, TAG_SEC_COUNT), + REG_FIELD_MASK(CPG_EDC_DMA_CNT, TAG_DED_COUNT) }, + { "CPG_TAG", SOC15_REG_ENTRY(GC, 0, mmCPG_EDC_TAG_CNT), 0, 1, + REG_FIELD_MASK(CPG_EDC_TAG_CNT, SEC_COUNT), + REG_FIELD_MASK(CPG_EDC_TAG_CNT, DED_COUNT) }, + { "DC_CSINVOC", SOC15_REG_ENTRY(GC, 0, mmDC_EDC_CSINVOC_CNT), 0, 1, + REG_FIELD_MASK(DC_EDC_CSINVOC_CNT, COUNT_ME1), 0 }, + { "DC_RESTORE", SOC15_REG_ENTRY(GC, 0, mmDC_EDC_RESTORE_CNT), 0, 1, + REG_FIELD_MASK(DC_EDC_RESTORE_CNT, COUNT_ME1), 0 }, + { "DC_STATE", SOC15_REG_ENTRY(GC, 0, mmDC_EDC_STATE_CNT), 0, 1, + REG_FIELD_MASK(DC_EDC_STATE_CNT, COUNT_ME1), 0 }, + { "GDS_MEM", SOC15_REG_ENTRY(GC, 0, mmGDS_EDC_CNT), 0, 1, + REG_FIELD_MASK(GDS_EDC_CNT, GDS_MEM_SEC), + REG_FIELD_MASK(GDS_EDC_CNT, GDS_MEM_DED) }, + { "GDS_INPUT_QUEUE", SOC15_REG_ENTRY(GC, 0, mmGDS_EDC_CNT), 0, 1, + REG_FIELD_MASK(GDS_EDC_CNT, GDS_INPUT_QUEUE_SED), 0 }, + { "GDS_ME0_CS_PIPE_MEM", SOC15_REG_ENTRY(GC, 0, mmGDS_EDC_OA_PHY_CNT), + 0, 1, REG_FIELD_MASK(GDS_EDC_OA_PHY_CNT, ME0_CS_PIPE_MEM_SEC), + REG_FIELD_MASK(GDS_EDC_OA_PHY_CNT, ME0_CS_PIPE_MEM_DED) }, + { "GDS_OA_PHY_PHY_CMD_RAM_MEM", + SOC15_REG_ENTRY(GC, 0, mmGDS_EDC_OA_PHY_CNT), 0, 1, + REG_FIELD_MASK(GDS_EDC_OA_PHY_CNT, PHY_CMD_RAM_MEM_SEC), + REG_FIELD_MASK(GDS_EDC_OA_PHY_CNT, PHY_CMD_RAM_MEM_DED) }, + { "GDS_OA_PHY_PHY_DATA_RAM_MEM", + SOC15_REG_ENTRY(GC, 0, mmGDS_EDC_OA_PHY_CNT), 0, 1, + REG_FIELD_MASK(GDS_EDC_OA_PHY_CNT, PHY_DATA_RAM_MEM_SED), 0 }, + { "GDS_OA_PIPE_ME1_PIPE0_PIPE_MEM", + SOC15_REG_ENTRY(GC, 0, mmGDS_EDC_OA_PIPE_CNT), 0, 1, + REG_FIELD_MASK(GDS_EDC_OA_PIPE_CNT, ME1_PIPE0_PIPE_MEM_SEC), + REG_FIELD_MASK(GDS_EDC_OA_PIPE_CNT, ME1_PIPE0_PIPE_MEM_DED) }, + { "GDS_OA_PIPE_ME1_PIPE1_PIPE_MEM", + SOC15_REG_ENTRY(GC, 0, mmGDS_EDC_OA_PIPE_CNT), 0, 1, + REG_FIELD_MASK(GDS_EDC_OA_PIPE_CNT, ME1_PIPE1_PIPE_MEM_SEC), + REG_FIELD_MASK(GDS_EDC_OA_PIPE_CNT, ME1_PIPE1_PIPE_MEM_DED) }, + { "GDS_OA_PIPE_ME1_PIPE2_PIPE_MEM", + SOC15_REG_ENTRY(GC, 0, mmGDS_EDC_OA_PIPE_CNT), 0, 1, + REG_FIELD_MASK(GDS_EDC_OA_PIPE_CNT, ME1_PIPE2_PIPE_MEM_SEC), + REG_FIELD_MASK(GDS_EDC_OA_PIPE_CNT, ME1_PIPE2_PIPE_MEM_DED) }, + { "GDS_OA_PIPE_ME1_PIPE3_PIPE_MEM", + SOC15_REG_ENTRY(GC, 0, mmGDS_EDC_OA_PIPE_CNT), 0, 1, + REG_FIELD_MASK(GDS_EDC_OA_PIPE_CNT, ME1_PIPE3_PIPE_MEM_SEC), + REG_FIELD_MASK(GDS_EDC_OA_PIPE_CNT, ME1_PIPE3_PIPE_MEM_DED) }, + { "SPI_SR_MEM", SOC15_REG_ENTRY(GC, 0, mmSPI_EDC_CNT), 1, 1, + REG_FIELD_MASK(SPI_EDC_CNT, SPI_SR_MEM_SED_COUNT), 0 }, + { "TA_FS_DFIFO", SOC15_REG_ENTRY(GC, 0, mmTA_EDC_CNT), 1, 16, + REG_FIELD_MASK(TA_EDC_CNT, TA_FS_DFIFO_SEC_COUNT), + REG_FIELD_MASK(TA_EDC_CNT, TA_FS_DFIFO_DED_COUNT) }, + { "TA_FS_AFIFO", SOC15_REG_ENTRY(GC, 0, mmTA_EDC_CNT), 1, 16, + REG_FIELD_MASK(TA_EDC_CNT, TA_FS_AFIFO_SED_COUNT), 0 }, + { "TA_FL_LFIFO", SOC15_REG_ENTRY(GC, 0, mmTA_EDC_CNT), 1, 16, + REG_FIELD_MASK(TA_EDC_CNT, TA_FL_LFIFO_SED_COUNT), 0 }, + { "TA_FX_LFIFO", SOC15_REG_ENTRY(GC, 0, mmTA_EDC_CNT), 1, 16, + REG_FIELD_MASK(TA_EDC_CNT, TA_FX_LFIFO_SED_COUNT), 0 }, + { "TA_FS_CFIFO", SOC15_REG_ENTRY(GC, 0, mmTA_EDC_CNT), 1, 16, + REG_FIELD_MASK(TA_EDC_CNT, TA_FS_CFIFO_SED_COUNT), 0 }, + { "TCA_HOLE_FIFO", SOC15_REG_ENTRY(GC, 0, mmTCA_EDC_CNT), 0, 2, + REG_FIELD_MASK(TCA_EDC_CNT, HOLE_FIFO_SED_COUNT), 0 }, + { "TCA_REQ_FIFO", SOC15_REG_ENTRY(GC, 0, mmTCA_EDC_CNT), 0, 2, + REG_FIELD_MASK(TCA_EDC_CNT, REQ_FIFO_SED_COUNT), 0 }, + { "TCC_CACHE_DATA", SOC15_REG_ENTRY(GC, 0, mmTCC_EDC_CNT), 0, 16, + REG_FIELD_MASK(TCC_EDC_CNT, CACHE_DATA_SEC_COUNT), + REG_FIELD_MASK(TCC_EDC_CNT, CACHE_DATA_DED_COUNT) }, + { "TCC_CACHE_DIRTY", SOC15_REG_ENTRY(GC, 0, mmTCC_EDC_CNT), 0, 16, + REG_FIELD_MASK(TCC_EDC_CNT, CACHE_DIRTY_SEC_COUNT), + REG_FIELD_MASK(TCC_EDC_CNT, CACHE_DIRTY_DED_COUNT) }, + { "TCC_HIGH_RATE_TAG", SOC15_REG_ENTRY(GC, 0, mmTCC_EDC_CNT), 0, 16, + REG_FIELD_MASK(TCC_EDC_CNT, HIGH_RATE_TAG_SEC_COUNT), + REG_FIELD_MASK(TCC_EDC_CNT, HIGH_RATE_TAG_DED_COUNT) }, + { "TCC_LOW_RATE_TAG", SOC15_REG_ENTRY(GC, 0, mmTCC_EDC_CNT), 0, 16, + REG_FIELD_MASK(TCC_EDC_CNT, LOW_RATE_TAG_SEC_COUNT), + REG_FIELD_MASK(TCC_EDC_CNT, LOW_RATE_TAG_DED_COUNT) }, + { "TCC_SRC_FIFO", SOC15_REG_ENTRY(GC, 0, mmTCC_EDC_CNT), 0, 16, + REG_FIELD_MASK(TCC_EDC_CNT, SRC_FIFO_SEC_COUNT), + REG_FIELD_MASK(TCC_EDC_CNT, SRC_FIFO_DED_COUNT) }, + { "TCC_IN_USE_DEC", SOC15_REG_ENTRY(GC, 0, mmTCC_EDC_CNT), 0, 16, + REG_FIELD_MASK(TCC_EDC_CNT, IN_USE_DEC_SED_COUNT), 0 }, + { "TCC_IN_USE_TRANSFER", SOC15_REG_ENTRY(GC, 0, mmTCC_EDC_CNT), 0, 16, + REG_FIELD_MASK(TCC_EDC_CNT, IN_USE_TRANSFER_SED_COUNT), 0 }, + { "TCC_LATENCY_FIFO", SOC15_REG_ENTRY(GC, 0, mmTCC_EDC_CNT), 0, 16, + REG_FIELD_MASK(TCC_EDC_CNT, LATENCY_FIFO_SED_COUNT), 0 }, + { "TCC_RETURN_DATA", SOC15_REG_ENTRY(GC, 0, mmTCC_EDC_CNT), 0, 16, + REG_FIELD_MASK(TCC_EDC_CNT, RETURN_DATA_SED_COUNT), 0 }, + { "TCC_RETURN_CONTROL", SOC15_REG_ENTRY(GC, 0, mmTCC_EDC_CNT), 0, 16, + REG_FIELD_MASK(TCC_EDC_CNT, RETURN_CONTROL_SED_COUNT), 0 }, + { "TCC_UC_ATOMIC_FIFO", SOC15_REG_ENTRY(GC, 0, mmTCC_EDC_CNT), 0, 16, + REG_FIELD_MASK(TCC_EDC_CNT, UC_ATOMIC_FIFO_SED_COUNT), 0 }, + { "TCC_WRITE_RETURN", SOC15_REG_ENTRY(GC, 0, mmTCC_EDC_CNT2), 0, 16, + REG_FIELD_MASK(TCC_EDC_CNT2, WRITE_RETURN_SED_COUNT), 0 }, + { "TCC_WRITE_CACHE_READ", SOC15_REG_ENTRY(GC, 0, mmTCC_EDC_CNT2), 0, 16, + REG_FIELD_MASK(TCC_EDC_CNT2, WRITE_CACHE_READ_SED_COUNT), 0 }, + { "TCC_SRC_FIFO_NEXT_RAM", SOC15_REG_ENTRY(GC, 0, mmTCC_EDC_CNT2), 0, + 16, REG_FIELD_MASK(TCC_EDC_CNT2, SRC_FIFO_NEXT_RAM_SED_COUNT), 0 }, + { "TCC_LATENCY_FIFO_NEXT_RAM", SOC15_REG_ENTRY(GC, 0, mmTCC_EDC_CNT2), + 0, 16, REG_FIELD_MASK(TCC_EDC_CNT2, LATENCY_FIFO_NEXT_RAM_SED_COUNT), + 0 }, + { "TCC_CACHE_TAG_PROBE_FIFO", SOC15_REG_ENTRY(GC, 0, mmTCC_EDC_CNT2), 0, + 16, REG_FIELD_MASK(TCC_EDC_CNT2, CACHE_TAG_PROBE_FIFO_SED_COUNT), 0 }, + { "TCC_WRRET_TAG_WRITE_RETURN", SOC15_REG_ENTRY(GC, 0, mmTCC_EDC_CNT2), + 0, 16, REG_FIELD_MASK(TCC_EDC_CNT2, WRRET_TAG_WRITE_RETURN_SED_COUNT), + 0 }, + { "TCC_ATOMIC_RETURN_BUFFER", SOC15_REG_ENTRY(GC, 0, mmTCC_EDC_CNT2), 0, + 16, REG_FIELD_MASK(TCC_EDC_CNT2, ATOMIC_RETURN_BUFFER_SED_COUNT), 0 }, + { "TCI_WRITE_RAM", SOC15_REG_ENTRY(GC, 0, mmTCI_EDC_CNT), 0, 72, + REG_FIELD_MASK(TCI_EDC_CNT, WRITE_RAM_SED_COUNT), 0 }, + { "TCP_CACHE_RAM", SOC15_REG_ENTRY(GC, 0, mmTCP_EDC_CNT_NEW), 1, 16, + REG_FIELD_MASK(TCP_EDC_CNT_NEW, CACHE_RAM_SEC_COUNT), + REG_FIELD_MASK(TCP_EDC_CNT_NEW, CACHE_RAM_DED_COUNT) }, + { "TCP_LFIFO_RAM", SOC15_REG_ENTRY(GC, 0, mmTCP_EDC_CNT_NEW), 1, 16, + REG_FIELD_MASK(TCP_EDC_CNT_NEW, LFIFO_RAM_SEC_COUNT), + REG_FIELD_MASK(TCP_EDC_CNT_NEW, LFIFO_RAM_DED_COUNT) }, + { "TCP_CMD_FIFO", SOC15_REG_ENTRY(GC, 0, mmTCP_EDC_CNT_NEW), 1, 16, + REG_FIELD_MASK(TCP_EDC_CNT_NEW, CMD_FIFO_SED_COUNT), 0 }, + { "TCP_VM_FIFO", SOC15_REG_ENTRY(GC, 0, mmTCP_EDC_CNT_NEW), 1, 16, + REG_FIELD_MASK(TCP_EDC_CNT_NEW, VM_FIFO_SEC_COUNT), 0 }, + { "TCP_DB_RAM", SOC15_REG_ENTRY(GC, 0, mmTCP_EDC_CNT_NEW), 1, 16, + REG_FIELD_MASK(TCP_EDC_CNT_NEW, DB_RAM_SED_COUNT), 0 }, + { "TCP_UTCL1_LFIFO0", SOC15_REG_ENTRY(GC, 0, mmTCP_EDC_CNT_NEW), 1, 16, + REG_FIELD_MASK(TCP_EDC_CNT_NEW, UTCL1_LFIFO0_SEC_COUNT), + REG_FIELD_MASK(TCP_EDC_CNT_NEW, UTCL1_LFIFO0_DED_COUNT) }, + { "TCP_UTCL1_LFIFO1", SOC15_REG_ENTRY(GC, 0, mmTCP_EDC_CNT_NEW), 1, 16, + REG_FIELD_MASK(TCP_EDC_CNT_NEW, UTCL1_LFIFO1_SEC_COUNT), + REG_FIELD_MASK(TCP_EDC_CNT_NEW, UTCL1_LFIFO1_DED_COUNT) }, + { "TD_SS_FIFO_LO", SOC15_REG_ENTRY(GC, 0, mmTD_EDC_CNT), 1, 16, + REG_FIELD_MASK(TD_EDC_CNT, SS_FIFO_LO_SEC_COUNT), + REG_FIELD_MASK(TD_EDC_CNT, SS_FIFO_LO_DED_COUNT) }, + { "TD_SS_FIFO_HI", SOC15_REG_ENTRY(GC, 0, mmTD_EDC_CNT), 1, 16, + REG_FIELD_MASK(TD_EDC_CNT, SS_FIFO_HI_SEC_COUNT), + REG_FIELD_MASK(TD_EDC_CNT, SS_FIFO_HI_DED_COUNT) }, + { "TD_CS_FIFO", SOC15_REG_ENTRY(GC, 0, mmTD_EDC_CNT), 1, 16, + REG_FIELD_MASK(TD_EDC_CNT, CS_FIFO_SED_COUNT), 0 }, + { "SQ_LDS_D", SOC15_REG_ENTRY(GC, 0, mmSQ_EDC_CNT), 1, 16, + REG_FIELD_MASK(SQ_EDC_CNT, LDS_D_SEC_COUNT), + REG_FIELD_MASK(SQ_EDC_CNT, LDS_D_DED_COUNT) }, + { "SQ_LDS_I", SOC15_REG_ENTRY(GC, 0, mmSQ_EDC_CNT), 1, 16, + REG_FIELD_MASK(SQ_EDC_CNT, LDS_I_SEC_COUNT), + REG_FIELD_MASK(SQ_EDC_CNT, LDS_I_DED_COUNT) }, + { "SQ_SGPR", SOC15_REG_ENTRY(GC, 0, mmSQ_EDC_CNT), 1, 16, + REG_FIELD_MASK(SQ_EDC_CNT, SGPR_SEC_COUNT), + REG_FIELD_MASK(SQ_EDC_CNT, SGPR_DED_COUNT) }, + { "SQ_VGPR0", SOC15_REG_ENTRY(GC, 0, mmSQ_EDC_CNT), 1, 16, + REG_FIELD_MASK(SQ_EDC_CNT, VGPR0_SEC_COUNT), + REG_FIELD_MASK(SQ_EDC_CNT, VGPR0_DED_COUNT) }, + { "SQ_VGPR1", SOC15_REG_ENTRY(GC, 0, mmSQ_EDC_CNT), 1, 16, + REG_FIELD_MASK(SQ_EDC_CNT, VGPR1_SEC_COUNT), + REG_FIELD_MASK(SQ_EDC_CNT, VGPR1_DED_COUNT) }, + { "SQ_VGPR2", SOC15_REG_ENTRY(GC, 0, mmSQ_EDC_CNT), 1, 16, + REG_FIELD_MASK(SQ_EDC_CNT, VGPR2_SEC_COUNT), + REG_FIELD_MASK(SQ_EDC_CNT, VGPR2_DED_COUNT) }, + { "SQ_VGPR3", SOC15_REG_ENTRY(GC, 0, mmSQ_EDC_CNT), 1, 16, + REG_FIELD_MASK(SQ_EDC_CNT, VGPR3_SEC_COUNT), + REG_FIELD_MASK(SQ_EDC_CNT, VGPR3_DED_COUNT) }, + { "SQC_DATA_CU0_WRITE_DATA_BUF", SOC15_REG_ENTRY(GC, 0, mmSQC_EDC_CNT), + 1, 6, REG_FIELD_MASK(SQC_EDC_CNT, DATA_CU0_WRITE_DATA_BUF_SEC_COUNT), + REG_FIELD_MASK(SQC_EDC_CNT, DATA_CU0_WRITE_DATA_BUF_DED_COUNT) }, + { "SQC_DATA_CU0_UTCL1_LFIFO", SOC15_REG_ENTRY(GC, 0, mmSQC_EDC_CNT), 1, + 6, REG_FIELD_MASK(SQC_EDC_CNT, DATA_CU0_UTCL1_LFIFO_SEC_COUNT), + REG_FIELD_MASK(SQC_EDC_CNT, DATA_CU0_UTCL1_LFIFO_DED_COUNT) }, + { "SQC_DATA_CU1_WRITE_DATA_BUF", SOC15_REG_ENTRY(GC, 0, mmSQC_EDC_CNT), + 1, 6, REG_FIELD_MASK(SQC_EDC_CNT, DATA_CU1_WRITE_DATA_BUF_SEC_COUNT), + REG_FIELD_MASK(SQC_EDC_CNT, DATA_CU1_WRITE_DATA_BUF_DED_COUNT) }, + { "SQC_DATA_CU1_UTCL1_LFIFO", SOC15_REG_ENTRY(GC, 0, mmSQC_EDC_CNT), 1, + 6, REG_FIELD_MASK(SQC_EDC_CNT, DATA_CU1_UTCL1_LFIFO_SEC_COUNT), + REG_FIELD_MASK(SQC_EDC_CNT, DATA_CU1_UTCL1_LFIFO_DED_COUNT) }, + { "SQC_DATA_CU2_WRITE_DATA_BUF", SOC15_REG_ENTRY(GC, 0, mmSQC_EDC_CNT), + 1, 6, REG_FIELD_MASK(SQC_EDC_CNT, DATA_CU2_WRITE_DATA_BUF_SEC_COUNT), + REG_FIELD_MASK(SQC_EDC_CNT, DATA_CU2_WRITE_DATA_BUF_DED_COUNT) }, + { "SQC_DATA_CU2_UTCL1_LFIFO", SOC15_REG_ENTRY(GC, 0, mmSQC_EDC_CNT), 1, + 6, REG_FIELD_MASK(SQC_EDC_CNT, DATA_CU2_UTCL1_LFIFO_SEC_COUNT), + REG_FIELD_MASK(SQC_EDC_CNT, DATA_CU2_UTCL1_LFIFO_DED_COUNT) }, + { "SQC_INST_BANKA_TAG_RAM", SOC15_REG_ENTRY(GC, 0, mmSQC_EDC_CNT2), 1, + 6, REG_FIELD_MASK(SQC_EDC_CNT2, INST_BANKA_TAG_RAM_SEC_COUNT), + REG_FIELD_MASK(SQC_EDC_CNT2, INST_BANKA_TAG_RAM_DED_COUNT) }, + { "SQC_INST_BANKA_BANK_RAM", SOC15_REG_ENTRY(GC, 0, mmSQC_EDC_CNT2), 1, + 6, REG_FIELD_MASK(SQC_EDC_CNT2, INST_BANKA_BANK_RAM_SEC_COUNT), + REG_FIELD_MASK(SQC_EDC_CNT2, INST_BANKA_BANK_RAM_DED_COUNT) }, + { "SQC_DATA_BANKA_TAG_RAM", SOC15_REG_ENTRY(GC, 0, mmSQC_EDC_CNT2), 1, + 6, REG_FIELD_MASK(SQC_EDC_CNT2, DATA_BANKA_TAG_RAM_SEC_COUNT), + REG_FIELD_MASK(SQC_EDC_CNT2, DATA_BANKA_TAG_RAM_DED_COUNT) }, + { "SQC_DATA_BANKA_BANK_RAM", SOC15_REG_ENTRY(GC, 0, mmSQC_EDC_CNT2), 1, + 6, REG_FIELD_MASK(SQC_EDC_CNT2, DATA_BANKA_BANK_RAM_SEC_COUNT), + REG_FIELD_MASK(SQC_EDC_CNT2, DATA_BANKA_BANK_RAM_DED_COUNT) }, + { "SQC_INST_BANKA_UTCL1_MISS_FIFO", + SOC15_REG_ENTRY(GC, 0, mmSQC_EDC_CNT2), 1, 6, + REG_FIELD_MASK(SQC_EDC_CNT2, INST_BANKA_UTCL1_MISS_FIFO_SED_COUNT), + 0 }, + { "SQC_INST_BANKA_MISS_FIFO", SOC15_REG_ENTRY(GC, 0, mmSQC_EDC_CNT2), 1, + 6, REG_FIELD_MASK(SQC_EDC_CNT2, INST_BANKA_MISS_FIFO_SED_COUNT), 0 }, + { "SQC_DATA_BANKA_HIT_FIFO", SOC15_REG_ENTRY(GC, 0, mmSQC_EDC_CNT2), 1, + 6, REG_FIELD_MASK(SQC_EDC_CNT2, DATA_BANKA_HIT_FIFO_SED_COUNT), 0 }, + { "SQC_DATA_BANKA_MISS_FIFO", SOC15_REG_ENTRY(GC, 0, mmSQC_EDC_CNT2), 1, + 6, REG_FIELD_MASK(SQC_EDC_CNT2, DATA_BANKA_MISS_FIFO_SED_COUNT), 0 }, + { "SQC_DATA_BANKA_DIRTY_BIT_RAM", + SOC15_REG_ENTRY(GC, 0, mmSQC_EDC_CNT2), 1, 6, + REG_FIELD_MASK(SQC_EDC_CNT2, DATA_BANKA_DIRTY_BIT_RAM_SED_COUNT), 0 }, + { "SQC_INST_UTCL1_LFIFO", SOC15_REG_ENTRY(GC, 0, mmSQC_EDC_CNT2), 1, 6, + REG_FIELD_MASK(SQC_EDC_CNT2, INST_UTCL1_LFIFO_SEC_COUNT), + REG_FIELD_MASK(SQC_EDC_CNT2, INST_UTCL1_LFIFO_DED_COUNT) }, + { "SQC_INST_BANKB_TAG_RAM", SOC15_REG_ENTRY(GC, 0, mmSQC_EDC_CNT3), 1, + 6, REG_FIELD_MASK(SQC_EDC_CNT3, INST_BANKB_TAG_RAM_SEC_COUNT), + REG_FIELD_MASK(SQC_EDC_CNT3, INST_BANKB_TAG_RAM_DED_COUNT) }, + { "SQC_INST_BANKB_BANK_RAM", SOC15_REG_ENTRY(GC, 0, mmSQC_EDC_CNT3), 1, + 6, REG_FIELD_MASK(SQC_EDC_CNT3, INST_BANKB_BANK_RAM_SEC_COUNT), + REG_FIELD_MASK(SQC_EDC_CNT3, INST_BANKB_BANK_RAM_DED_COUNT) }, + { "SQC_DATA_BANKB_TAG_RAM", SOC15_REG_ENTRY(GC, 0, mmSQC_EDC_CNT3), 1, + 6, REG_FIELD_MASK(SQC_EDC_CNT3, DATA_BANKB_TAG_RAM_SEC_COUNT), + REG_FIELD_MASK(SQC_EDC_CNT3, DATA_BANKB_TAG_RAM_DED_COUNT) }, + { "SQC_DATA_BANKB_BANK_RAM", SOC15_REG_ENTRY(GC, 0, mmSQC_EDC_CNT3), 1, + 6, REG_FIELD_MASK(SQC_EDC_CNT3, DATA_BANKB_BANK_RAM_SEC_COUNT), + REG_FIELD_MASK(SQC_EDC_CNT3, DATA_BANKB_BANK_RAM_DED_COUNT) }, + { "SQC_INST_BANKB_UTCL1_MISS_FIFO", + SOC15_REG_ENTRY(GC, 0, mmSQC_EDC_CNT3), 1, 6, + REG_FIELD_MASK(SQC_EDC_CNT3, INST_BANKB_UTCL1_MISS_FIFO_SED_COUNT), + 0 }, + { "SQC_INST_BANKB_MISS_FIFO", SOC15_REG_ENTRY(GC, 0, mmSQC_EDC_CNT3), 1, + 6, REG_FIELD_MASK(SQC_EDC_CNT3, INST_BANKB_MISS_FIFO_SED_COUNT), 0 }, + { "SQC_DATA_BANKB_HIT_FIFO", SOC15_REG_ENTRY(GC, 0, mmSQC_EDC_CNT3), 1, + 6, REG_FIELD_MASK(SQC_EDC_CNT3, DATA_BANKB_HIT_FIFO_SED_COUNT), 0 }, + { "SQC_DATA_BANKB_MISS_FIFO", SOC15_REG_ENTRY(GC, 0, mmSQC_EDC_CNT3), 1, + 6, REG_FIELD_MASK(SQC_EDC_CNT3, DATA_BANKB_MISS_FIFO_SED_COUNT), 0 }, + { "SQC_DATA_BANKB_DIRTY_BIT_RAM", + SOC15_REG_ENTRY(GC, 0, mmSQC_EDC_CNT3), 1, 6, + REG_FIELD_MASK(SQC_EDC_CNT3, DATA_BANKB_DIRTY_BIT_RAM_SED_COUNT), 0 }, + { "EA_DRAMRD_CMDMEM", SOC15_REG_ENTRY(GC, 0, mmGCEA_EDC_CNT), 0, 32, + REG_FIELD_MASK(GCEA_EDC_CNT, DRAMRD_CMDMEM_SEC_COUNT), + REG_FIELD_MASK(GCEA_EDC_CNT, DRAMRD_CMDMEM_DED_COUNT) }, + { "EA_DRAMWR_CMDMEM", SOC15_REG_ENTRY(GC, 0, mmGCEA_EDC_CNT), 0, 32, + REG_FIELD_MASK(GCEA_EDC_CNT, DRAMWR_CMDMEM_SEC_COUNT), + REG_FIELD_MASK(GCEA_EDC_CNT, DRAMWR_CMDMEM_DED_COUNT) }, + { "EA_DRAMWR_DATAMEM", SOC15_REG_ENTRY(GC, 0, mmGCEA_EDC_CNT), 0, 32, + REG_FIELD_MASK(GCEA_EDC_CNT, DRAMWR_DATAMEM_SEC_COUNT), + REG_FIELD_MASK(GCEA_EDC_CNT, DRAMWR_DATAMEM_DED_COUNT) }, + { "EA_RRET_TAGMEM", SOC15_REG_ENTRY(GC, 0, mmGCEA_EDC_CNT), 0, 32, + REG_FIELD_MASK(GCEA_EDC_CNT, RRET_TAGMEM_SEC_COUNT), + REG_FIELD_MASK(GCEA_EDC_CNT, RRET_TAGMEM_DED_COUNT) }, + { "EA_WRET_TAGMEM", SOC15_REG_ENTRY(GC, 0, mmGCEA_EDC_CNT), 0, 32, + REG_FIELD_MASK(GCEA_EDC_CNT, WRET_TAGMEM_SEC_COUNT), + REG_FIELD_MASK(GCEA_EDC_CNT, WRET_TAGMEM_DED_COUNT) }, + { "EA_DRAMRD_PAGEMEM", SOC15_REG_ENTRY(GC, 0, mmGCEA_EDC_CNT), 0, 32, + REG_FIELD_MASK(GCEA_EDC_CNT, DRAMRD_PAGEMEM_SED_COUNT), 0 }, + { "EA_DRAMWR_PAGEMEM", SOC15_REG_ENTRY(GC, 0, mmGCEA_EDC_CNT), 0, 32, + REG_FIELD_MASK(GCEA_EDC_CNT, DRAMWR_PAGEMEM_SED_COUNT), 0 }, + { "EA_IORD_CMDMEM", SOC15_REG_ENTRY(GC, 0, mmGCEA_EDC_CNT), 0, 32, + REG_FIELD_MASK(GCEA_EDC_CNT, IORD_CMDMEM_SED_COUNT), 0 }, + { "EA_IOWR_CMDMEM", SOC15_REG_ENTRY(GC, 0, mmGCEA_EDC_CNT), 0, 32, + REG_FIELD_MASK(GCEA_EDC_CNT, IOWR_CMDMEM_SED_COUNT), 0 }, + { "EA_IOWR_DATAMEM", SOC15_REG_ENTRY(GC, 0, mmGCEA_EDC_CNT), 0, 32, + REG_FIELD_MASK(GCEA_EDC_CNT, IOWR_DATAMEM_SED_COUNT), 0 }, + { "GMIRD_CMDMEM", SOC15_REG_ENTRY(GC, 0, mmGCEA_EDC_CNT2), 0, 32, + REG_FIELD_MASK(GCEA_EDC_CNT2, GMIRD_CMDMEM_SEC_COUNT), + REG_FIELD_MASK(GCEA_EDC_CNT2, GMIRD_CMDMEM_DED_COUNT) }, + { "GMIWR_CMDMEM", SOC15_REG_ENTRY(GC, 0, mmGCEA_EDC_CNT2), 0, 32, + REG_FIELD_MASK(GCEA_EDC_CNT2, GMIWR_CMDMEM_SEC_COUNT), + REG_FIELD_MASK(GCEA_EDC_CNT2, GMIWR_CMDMEM_DED_COUNT) }, + { "GMIWR_DATAMEM", SOC15_REG_ENTRY(GC, 0, mmGCEA_EDC_CNT2), 0, 32, + REG_FIELD_MASK(GCEA_EDC_CNT2, GMIWR_DATAMEM_SEC_COUNT), + REG_FIELD_MASK(GCEA_EDC_CNT2, GMIWR_DATAMEM_DED_COUNT) }, + { "GMIRD_PAGEMEM", SOC15_REG_ENTRY(GC, 0, mmGCEA_EDC_CNT2), 0, 32, + REG_FIELD_MASK(GCEA_EDC_CNT2, GMIRD_PAGEMEM_SED_COUNT), 0 }, + { "GMIWR_PAGEMEM", SOC15_REG_ENTRY(GC, 0, mmGCEA_EDC_CNT2), 0, 32, + REG_FIELD_MASK(GCEA_EDC_CNT2, GMIWR_PAGEMEM_SED_COUNT), 0 }, + { "MAM_D0MEM", SOC15_REG_ENTRY(GC, 0, mmGCEA_EDC_CNT2), 0, 32, + REG_FIELD_MASK(GCEA_EDC_CNT2, MAM_D0MEM_SED_COUNT), 0 }, + { "MAM_D1MEM", SOC15_REG_ENTRY(GC, 0, mmGCEA_EDC_CNT2), 0, 32, + REG_FIELD_MASK(GCEA_EDC_CNT2, MAM_D1MEM_SED_COUNT), 0 }, + { "MAM_D2MEM", SOC15_REG_ENTRY(GC, 0, mmGCEA_EDC_CNT2), 0, 32, + REG_FIELD_MASK(GCEA_EDC_CNT2, MAM_D2MEM_SED_COUNT), 0 }, + { "MAM_D3MEM", SOC15_REG_ENTRY(GC, 0, mmGCEA_EDC_CNT2), 0, 32, + REG_FIELD_MASK(GCEA_EDC_CNT2, MAM_D3MEM_SED_COUNT), 0 }, +}; + +static int gfx_v9_0_ras_error_inject(struct amdgpu_device *adev, + void *inject_if) +{ + struct ras_inject_if *info = (struct ras_inject_if *)inject_if; + int ret; + struct ta_ras_trigger_error_input block_info = { 0 }; + + if (adev->asic_type != CHIP_VEGA20) + return -EINVAL; + + if (!ras_gfx_subblocks[info->head.sub_block_index].name) + return -EPERM; + + if (!(ras_gfx_subblocks[info->head.sub_block_index].hw_supported_error_type & + info->head.type)) { + DRM_ERROR("GFX Subblock %s, hardware do not support type 0x%x\n", + ras_gfx_subblocks[info->head.sub_block_index].name, + info->head.type); + return -EPERM; + } + + if (!(ras_gfx_subblocks[info->head.sub_block_index].sw_supported_error_type & + info->head.type)) { + DRM_ERROR("GFX Subblock %s, driver do not support type 0x%x\n", + ras_gfx_subblocks[info->head.sub_block_index].name, + info->head.type); + return -EPERM; + } + + block_info.block_id = amdgpu_ras_block_to_ta(info->head.block); + block_info.sub_block_index = + ras_gfx_subblocks[info->head.sub_block_index].ta_subblock; + block_info.inject_error_type = amdgpu_ras_error_to_ta(info->head.type); + block_info.address = info->address; + block_info.value = info->value; + + mutex_lock(&adev->grbm_idx_mutex); + ret = psp_ras_trigger_error(&adev->psp, &block_info); + mutex_unlock(&adev->grbm_idx_mutex); + + return ret; +} + +static int gfx_v9_0_query_ras_error_count(struct amdgpu_device *adev, + void *ras_error_status) +{ + struct ras_err_data *err_data = (struct ras_err_data *)ras_error_status; + uint32_t sec_count, ded_count; + uint32_t i; + uint32_t reg_value; + uint32_t se_id, instance_id; + + if (adev->asic_type != CHIP_VEGA20) + return -EINVAL; + + err_data->ue_count = 0; + err_data->ce_count = 0; + + mutex_lock(&adev->grbm_idx_mutex); + for (se_id = 0; se_id < adev->gfx.config.max_shader_engines; se_id++) { + for (instance_id = 0; instance_id < 256; instance_id++) { + for (i = 0; + i < sizeof(gfx_ras_edc_regs) / sizeof(gfx_ras_edc_regs[0]); + i++) { + if (se_id != 0 && + !gfx_ras_edc_regs[i].per_se_instance) + continue; + if (instance_id >= gfx_ras_edc_regs[i].num_instance) + continue; + + gfx_v9_0_select_se_sh(adev, se_id, 0, + instance_id); + + reg_value = RREG32( + adev->reg_offset[gfx_ras_edc_regs[i].ip] + [gfx_ras_edc_regs[i].inst] + [gfx_ras_edc_regs[i].seg] + + gfx_ras_edc_regs[i].reg_offset); + sec_count = reg_value & + gfx_ras_edc_regs[i].sec_count_mask; + ded_count = reg_value & + gfx_ras_edc_regs[i].ded_count_mask; + if (sec_count) { + DRM_INFO( + "Instance[%d][%d]: SubBlock %s, SEC %d\n", + se_id, instance_id, + gfx_ras_edc_regs[i].name, + sec_count); + err_data->ce_count++; + } + + if (ded_count) { + DRM_INFO( + "Instance[%d][%d]: SubBlock %s, DED %d\n", + se_id, instance_id, + gfx_ras_edc_regs[i].name, + ded_count); + err_data->ue_count++; + } + } + } + } + gfx_v9_0_select_se_sh(adev, 0xffffffff, 0xffffffff, 0xffffffff); + mutex_unlock(&adev->grbm_idx_mutex); + + return 0; } static int gfx_v9_0_cp_ecc_error_irq(struct amdgpu_device *adev, @@ -5183,7 +6118,7 @@ static const struct amdgpu_ring_funcs gfx_v9_0_ring_funcs_gfx = { .align_mask = 0xff, .nop = PACKET3(PACKET3_NOP, 0x3FFF), .support_64bit_ptrs = true, - .vmhub = AMDGPU_GFXHUB, + .vmhub = AMDGPU_GFXHUB_0, .get_rptr = gfx_v9_0_ring_get_rptr_gfx, .get_wptr = gfx_v9_0_ring_get_wptr_gfx, .set_wptr = gfx_v9_0_ring_set_wptr_gfx, @@ -5234,7 +6169,7 @@ static const struct amdgpu_ring_funcs gfx_v9_0_ring_funcs_compute = { .align_mask = 0xff, .nop = PACKET3(PACKET3_NOP, 0x3FFF), .support_64bit_ptrs = true, - .vmhub = AMDGPU_GFXHUB, + .vmhub = AMDGPU_GFXHUB_0, .get_rptr = gfx_v9_0_ring_get_rptr_compute, .get_wptr = gfx_v9_0_ring_get_wptr_compute, .set_wptr = gfx_v9_0_ring_set_wptr_compute, @@ -5269,7 +6204,7 @@ static const struct amdgpu_ring_funcs gfx_v9_0_ring_funcs_kiq = { .align_mask = 0xff, .nop = PACKET3(PACKET3_NOP, 0x3FFF), .support_64bit_ptrs = true, - .vmhub = AMDGPU_GFXHUB, + .vmhub = AMDGPU_GFXHUB_0, .get_rptr = gfx_v9_0_ring_get_rptr_compute, .get_wptr = gfx_v9_0_ring_get_wptr_compute, .set_wptr = gfx_v9_0_ring_set_wptr_compute, @@ -5349,6 +6284,7 @@ static void gfx_v9_0_set_rlc_funcs(struct amdgpu_device *adev) case CHIP_VEGA12: case CHIP_VEGA20: case CHIP_RAVEN: + case CHIP_ARCTURUS: adev->gfx.rlc.funcs = &gfx_v9_0_rlc_funcs; break; default: @@ -5366,6 +6302,7 @@ static void gfx_v9_0_set_gds_init(struct amdgpu_device *adev) adev->gds.gds_size = 0x10000; break; case CHIP_RAVEN: + case CHIP_ARCTURUS: adev->gds.gds_size = 0x1000; break; default: @@ -5387,6 +6324,9 @@ static void gfx_v9_0_set_gds_init(struct amdgpu_device *adev) else adev->gds.gds_compute_max_wave_id = 0x15f; /* raven1 */ break; + case CHIP_ARCTURUS: + adev->gds.gds_compute_max_wave_id = 0xfff; + break; default: /* this really depends on the chip */ adev->gds.gds_compute_max_wave_id = 0x7ff; @@ -5431,12 +6371,21 @@ static int gfx_v9_0_get_cu_info(struct amdgpu_device *adev, { int i, j, k, counter, active_cu_number = 0; u32 mask, bitmap, ao_bitmap, ao_cu_mask = 0; - unsigned disable_masks[4 * 2]; + unsigned disable_masks[4 * 4]; if (!adev || !cu_info) return -EINVAL; - amdgpu_gfx_parse_disable_cu(disable_masks, 4, 2); + /* + * 16 comes from bitmap array size 4*4, and it can cover all gfx9 ASICs + */ + if (adev->gfx.config.max_shader_engines * + adev->gfx.config.max_sh_per_se > 16) + return -EINVAL; + + amdgpu_gfx_parse_disable_cu(disable_masks, + adev->gfx.config.max_shader_engines, + adev->gfx.config.max_sh_per_se); mutex_lock(&adev->grbm_idx_mutex); for (i = 0; i < adev->gfx.config.max_shader_engines; i++) { @@ -5445,11 +6394,23 @@ static int gfx_v9_0_get_cu_info(struct amdgpu_device *adev, ao_bitmap = 0; counter = 0; gfx_v9_0_select_se_sh(adev, i, j, 0xffffffff); - if (i < 4 && j < 2) - gfx_v9_0_set_user_cu_inactive_bitmap( - adev, disable_masks[i * 2 + j]); + gfx_v9_0_set_user_cu_inactive_bitmap( + adev, disable_masks[i * adev->gfx.config.max_sh_per_se + j]); bitmap = gfx_v9_0_get_cu_active_bitmap(adev); - cu_info->bitmap[i][j] = bitmap; + + /* + * The bitmap(and ao_cu_bitmap) in cu_info structure is + * 4x4 size array, and it's usually suitable for Vega + * ASICs which has 4*2 SE/SH layout. + * But for Arcturus, SE/SH layout is changed to 8*1. + * To mostly reduce the impact, we make it compatible + * with current bitmap array as below: + * SE4,SH0 --> bitmap[0][1] + * SE5,SH0 --> bitmap[1][1] + * SE6,SH0 --> bitmap[2][1] + * SE7,SH0 --> bitmap[3][1] + */ + cu_info->bitmap[i % 4][j + i / 4] = bitmap; for (k = 0; k < adev->gfx.config.max_cu_per_sh; k ++) { if (bitmap & mask) { @@ -5462,7 +6423,7 @@ static int gfx_v9_0_get_cu_info(struct amdgpu_device *adev, active_cu_number += counter; if (i < 2 && j < 2) ao_cu_mask |= (ao_bitmap << (i * 16 + j * 8)); - cu_info->ao_cu_bitmap[i][j] = ao_bitmap; + cu_info->ao_cu_bitmap[i % 4][j + i / 4] = ao_bitmap; } } gfx_v9_0_select_se_sh(adev, 0xffffffff, 0xffffffff, 0xffffffff); diff --git a/drivers/gpu/drm/amd/amdgpu/gfxhub_v1_0.c b/drivers/gpu/drm/amd/amdgpu/gfxhub_v1_0.c index 15986748f59f..6ce37ce77d14 100644 --- a/drivers/gpu/drm/amd/amdgpu/gfxhub_v1_0.c +++ b/drivers/gpu/drm/amd/amdgpu/gfxhub_v1_0.c @@ -357,7 +357,7 @@ void gfxhub_v1_0_set_fault_enable_default(struct amdgpu_device *adev, void gfxhub_v1_0_init(struct amdgpu_device *adev) { - struct amdgpu_vmhub *hub = &adev->vmhub[AMDGPU_GFXHUB]; + struct amdgpu_vmhub *hub = &adev->vmhub[AMDGPU_GFXHUB_0]; hub->ctx0_ptb_addr_lo32 = SOC15_REG_OFFSET(GC, 0, diff --git a/drivers/gpu/drm/amd/amdgpu/gfxhub_v2_0.c b/drivers/gpu/drm/amd/amdgpu/gfxhub_v2_0.c index d605b4963f8a..8ce5bf5feb45 100644 --- a/drivers/gpu/drm/amd/amdgpu/gfxhub_v2_0.c +++ b/drivers/gpu/drm/amd/amdgpu/gfxhub_v2_0.c @@ -333,7 +333,7 @@ void gfxhub_v2_0_set_fault_enable_default(struct amdgpu_device *adev, void gfxhub_v2_0_init(struct amdgpu_device *adev) { - struct amdgpu_vmhub *hub = &adev->vmhub[AMDGPU_GFXHUB]; + struct amdgpu_vmhub *hub = &adev->vmhub[AMDGPU_GFXHUB_0]; hub->ctx0_ptb_addr_lo32 = SOC15_REG_OFFSET(GC, 0, diff --git a/drivers/gpu/drm/amd/amdgpu/gmc_v10_0.c b/drivers/gpu/drm/amd/amdgpu/gmc_v10_0.c index 5eeb72fcc123..f585fc92871b 100644 --- a/drivers/gpu/drm/amd/amdgpu/gmc_v10_0.c +++ b/drivers/gpu/drm/amd/amdgpu/gmc_v10_0.c @@ -62,7 +62,7 @@ gmc_v10_0_vm_fault_interrupt_state(struct amdgpu_device *adev, struct amdgpu_vmhub *hub; u32 tmp, reg, bits[AMDGPU_MAX_VMHUBS], i; - bits[AMDGPU_GFXHUB] = GCVM_CONTEXT1_CNTL__RANGE_PROTECTION_FAULT_ENABLE_INTERRUPT_MASK | + bits[AMDGPU_GFXHUB_0] = GCVM_CONTEXT1_CNTL__RANGE_PROTECTION_FAULT_ENABLE_INTERRUPT_MASK | GCVM_CONTEXT1_CNTL__DUMMY_PAGE_PROTECTION_FAULT_ENABLE_INTERRUPT_MASK | GCVM_CONTEXT1_CNTL__PDE0_PROTECTION_FAULT_ENABLE_INTERRUPT_MASK | GCVM_CONTEXT1_CNTL__VALID_PROTECTION_FAULT_ENABLE_INTERRUPT_MASK | @@ -70,7 +70,7 @@ gmc_v10_0_vm_fault_interrupt_state(struct amdgpu_device *adev, GCVM_CONTEXT1_CNTL__WRITE_PROTECTION_FAULT_ENABLE_INTERRUPT_MASK | GCVM_CONTEXT1_CNTL__EXECUTE_PROTECTION_FAULT_ENABLE_INTERRUPT_MASK; - bits[AMDGPU_MMHUB] = MMVM_CONTEXT1_CNTL__RANGE_PROTECTION_FAULT_ENABLE_INTERRUPT_MASK | + bits[AMDGPU_MMHUB_0] = MMVM_CONTEXT1_CNTL__RANGE_PROTECTION_FAULT_ENABLE_INTERRUPT_MASK | MMVM_CONTEXT1_CNTL__DUMMY_PAGE_PROTECTION_FAULT_ENABLE_INTERRUPT_MASK | MMVM_CONTEXT1_CNTL__PDE0_PROTECTION_FAULT_ENABLE_INTERRUPT_MASK | MMVM_CONTEXT1_CNTL__VALID_PROTECTION_FAULT_ENABLE_INTERRUPT_MASK | @@ -81,39 +81,39 @@ gmc_v10_0_vm_fault_interrupt_state(struct amdgpu_device *adev, switch (state) { case AMDGPU_IRQ_STATE_DISABLE: /* MM HUB */ - hub = &adev->vmhub[AMDGPU_MMHUB]; + hub = &adev->vmhub[AMDGPU_MMHUB_0]; for (i = 0; i < 16; i++) { reg = hub->vm_context0_cntl + i; tmp = RREG32(reg); - tmp &= ~bits[AMDGPU_MMHUB]; + tmp &= ~bits[AMDGPU_MMHUB_0]; WREG32(reg, tmp); } /* GFX HUB */ - hub = &adev->vmhub[AMDGPU_GFXHUB]; + hub = &adev->vmhub[AMDGPU_GFXHUB_0]; for (i = 0; i < 16; i++) { reg = hub->vm_context0_cntl + i; tmp = RREG32(reg); - tmp &= ~bits[AMDGPU_GFXHUB]; + tmp &= ~bits[AMDGPU_GFXHUB_0]; WREG32(reg, tmp); } break; case AMDGPU_IRQ_STATE_ENABLE: /* MM HUB */ - hub = &adev->vmhub[AMDGPU_MMHUB]; + hub = &adev->vmhub[AMDGPU_MMHUB_0]; for (i = 0; i < 16; i++) { reg = hub->vm_context0_cntl + i; tmp = RREG32(reg); - tmp |= bits[AMDGPU_MMHUB]; + tmp |= bits[AMDGPU_MMHUB_0]; WREG32(reg, tmp); } /* GFX HUB */ - hub = &adev->vmhub[AMDGPU_GFXHUB]; + hub = &adev->vmhub[AMDGPU_GFXHUB_0]; for (i = 0; i < 16; i++) { reg = hub->vm_context0_cntl + i; tmp = RREG32(reg); - tmp |= bits[AMDGPU_GFXHUB]; + tmp |= bits[AMDGPU_GFXHUB_0]; WREG32(reg, tmp); } break; @@ -244,11 +244,11 @@ static void gmc_v10_0_flush_gpu_tlb(struct amdgpu_device *adev, mutex_lock(&adev->mman.gtt_window_lock); - gmc_v10_0_flush_vm_hub(adev, vmid, AMDGPU_MMHUB, 0); + gmc_v10_0_flush_vm_hub(adev, vmid, AMDGPU_MMHUB_0, 0); if (!adev->mman.buffer_funcs_enabled || !adev->ib_pool_ready || adev->in_gpu_reset) { - gmc_v10_0_flush_vm_hub(adev, vmid, AMDGPU_GFXHUB, 0); + gmc_v10_0_flush_vm_hub(adev, vmid, AMDGPU_GFXHUB_0, 0); mutex_unlock(&adev->mman.gtt_window_lock); return; } @@ -313,7 +313,7 @@ static void gmc_v10_0_emit_pasid_mapping(struct amdgpu_ring *ring, unsigned vmid struct amdgpu_device *adev = ring->adev; uint32_t reg; - if (ring->funcs->vmhub == AMDGPU_GFXHUB) + if (ring->funcs->vmhub == AMDGPU_GFXHUB_0) reg = SOC15_REG_OFFSET(OSSSYS, 0, mmIH_VMID_0_LUT) + vmid; else reg = SOC15_REG_OFFSET(OSSSYS, 0, mmIH_VMID_0_LUT_MM) + vmid; @@ -524,6 +524,8 @@ static int gmc_v10_0_mc_init(struct amdgpu_device *adev) if (amdgpu_gart_size == -1) { switch (adev->asic_type) { case CHIP_NAVI10: + case CHIP_NAVI14: + case CHIP_NAVI12: default: adev->gmc.gart_size = 512ULL << 20; break; @@ -601,9 +603,12 @@ static int gmc_v10_0_sw_init(void *handle) adev->gmc.vram_type = amdgpu_atomfirmware_get_vram_type(adev); switch (adev->asic_type) { case CHIP_NAVI10: + case CHIP_NAVI14: + case CHIP_NAVI12: + adev->num_vmhubs = 2; /* * To fulfill 4-level page support, - * vm size is 256TB (48bit), maximum size of Navi10, + * vm size is 256TB (48bit), maximum size of Navi10/Navi14/Navi12, * block size 512 (9bit) */ amdgpu_vm_adjust_size(adev, 256 * 1024, 9, 3, 48); @@ -680,8 +685,8 @@ static int gmc_v10_0_sw_init(void *handle) * amdgpu graphics/compute will use VMIDs 1-7 * amdkfd will use VMIDs 8-15 */ - adev->vm_manager.id_mgr[AMDGPU_GFXHUB].num_ids = AMDGPU_NUM_OF_VMIDS; - adev->vm_manager.id_mgr[AMDGPU_MMHUB].num_ids = AMDGPU_NUM_OF_VMIDS; + adev->vm_manager.id_mgr[AMDGPU_GFXHUB_0].num_ids = AMDGPU_NUM_OF_VMIDS; + adev->vm_manager.id_mgr[AMDGPU_MMHUB_0].num_ids = AMDGPU_NUM_OF_VMIDS; amdgpu_vm_manager_init(adev); @@ -717,6 +722,8 @@ static void gmc_v10_0_init_golden_registers(struct amdgpu_device *adev) { switch (adev->asic_type) { case CHIP_NAVI10: + case CHIP_NAVI14: + case CHIP_NAVI12: break; default: break; diff --git a/drivers/gpu/drm/amd/amdgpu/gmc_v9_0.c b/drivers/gpu/drm/amd/amdgpu/gmc_v9_0.c index 73f3b79ab131..0c77b9f244bb 100644 --- a/drivers/gpu/drm/amd/amdgpu/gmc_v9_0.c +++ b/drivers/gpu/drm/amd/amdgpu/gmc_v9_0.c @@ -48,6 +48,8 @@ #include "gfxhub_v1_0.h" #include "mmhub_v1_0.h" #include "gfxhub_v1_1.h" +#include "mmhub_v9_4.h" +#include "umc_v6_1.h" #include "ivsrcid/vmc/irqsrcs_vmc_1_0.h" @@ -241,11 +243,23 @@ static int gmc_v9_0_ecc_interrupt_state(struct amdgpu_device *adev, } static int gmc_v9_0_process_ras_data_cb(struct amdgpu_device *adev, + struct ras_err_data *err_data, struct amdgpu_iv_entry *entry) { kgd2kfd_set_sram_ecc_flag(adev->kfd.dev); - amdgpu_ras_reset_gpu(adev, 0); - return AMDGPU_RAS_UE; + if (adev->umc.funcs->query_ras_error_count) + adev->umc.funcs->query_ras_error_count(adev, err_data); + /* umc query_ras_error_address is also responsible for clearing + * error status + */ + if (adev->umc.funcs->query_ras_error_address) + adev->umc.funcs->query_ras_error_address(adev, err_data); + + /* only uncorrectable error needs gpu reset */ + if (err_data->ue_count) + amdgpu_ras_reset_gpu(adev, 0); + + return AMDGPU_RAS_SUCCESS; } static int gmc_v9_0_process_ecc_irq(struct amdgpu_device *adev, @@ -284,7 +298,7 @@ static int gmc_v9_0_vm_fault_interrupt_state(struct amdgpu_device *adev, switch (state) { case AMDGPU_IRQ_STATE_DISABLE: - for (j = 0; j < AMDGPU_MAX_VMHUBS; j++) { + for (j = 0; j < adev->num_vmhubs; j++) { hub = &adev->vmhub[j]; for (i = 0; i < 16; i++) { reg = hub->vm_context0_cntl + i; @@ -295,7 +309,7 @@ static int gmc_v9_0_vm_fault_interrupt_state(struct amdgpu_device *adev, } break; case AMDGPU_IRQ_STATE_ENABLE: - for (j = 0; j < AMDGPU_MAX_VMHUBS; j++) { + for (j = 0; j < adev->num_vmhubs; j++) { hub = &adev->vmhub[j]; for (i = 0; i < 16; i++) { reg = hub->vm_context0_cntl + i; @@ -315,10 +329,11 @@ static int gmc_v9_0_process_interrupt(struct amdgpu_device *adev, struct amdgpu_irq_src *source, struct amdgpu_iv_entry *entry) { - struct amdgpu_vmhub *hub = &adev->vmhub[entry->vmid_src]; + struct amdgpu_vmhub *hub; bool retry_fault = !!(entry->src_data[1] & 0x80); uint32_t status = 0; u64 addr; + char hub_name[10]; addr = (u64)entry->src_data[0] << 12; addr |= ((u64)entry->src_data[1] & 0xf) << 44; @@ -327,6 +342,17 @@ static int gmc_v9_0_process_interrupt(struct amdgpu_device *adev, entry->timestamp)) return 1; /* This also prevents sending it to KFD */ + if (entry->client_id == SOC15_IH_CLIENTID_VMC) { + snprintf(hub_name, sizeof(hub_name), "mmhub0"); + hub = &adev->vmhub[AMDGPU_MMHUB_0]; + } else if (entry->client_id == SOC15_IH_CLIENTID_VMC1) { + snprintf(hub_name, sizeof(hub_name), "mmhub1"); + hub = &adev->vmhub[AMDGPU_MMHUB_1]; + } else { + snprintf(hub_name, sizeof(hub_name), "gfxhub0"); + hub = &adev->vmhub[AMDGPU_GFXHUB_0]; + } + /* If it's the first fault for this address, process it normally */ if (!amdgpu_sriov_vf(adev)) { status = RREG32(hub->vm_l2_pro_fault_status); @@ -342,17 +368,30 @@ static int gmc_v9_0_process_interrupt(struct amdgpu_device *adev, dev_err(adev->dev, "[%s] %s page fault (src_id:%u ring:%u vmid:%u " "pasid:%u, for process %s pid %d thread %s pid %d)\n", - entry->vmid_src ? "mmhub" : "gfxhub", - retry_fault ? "retry" : "no-retry", + hub_name, retry_fault ? "retry" : "no-retry", entry->src_id, entry->ring_id, entry->vmid, entry->pasid, task_info.process_name, task_info.tgid, task_info.task_name, task_info.pid); - dev_err(adev->dev, " in page starting at address 0x%016llx from %d\n", + dev_err(adev->dev, " in page starting at address 0x%016llx from client %d\n", addr, entry->client_id); - if (!amdgpu_sriov_vf(adev)) + if (!amdgpu_sriov_vf(adev)) { dev_err(adev->dev, "VM_L2_PROTECTION_FAULT_STATUS:0x%08X\n", status); + dev_err(adev->dev, "\t MORE_FAULTS: 0x%lx\n", + REG_GET_FIELD(status, + VM_L2_PROTECTION_FAULT_STATUS, MORE_FAULTS)); + dev_err(adev->dev, "\t WALKER_ERROR: 0x%lx\n", + REG_GET_FIELD(status, + VM_L2_PROTECTION_FAULT_STATUS, WALKER_ERROR)); + dev_err(adev->dev, "\t PERMISSION_FAULTS: 0x%lx\n", + REG_GET_FIELD(status, + VM_L2_PROTECTION_FAULT_STATUS, PERMISSION_FAULTS)); + dev_err(adev->dev, "\t MAPPING_ERROR: 0x%lx\n", + REG_GET_FIELD(status, + VM_L2_PROTECTION_FAULT_STATUS, MAPPING_ERROR)); + + } } return 0; @@ -419,7 +458,7 @@ static void gmc_v9_0_flush_gpu_tlb(struct amdgpu_device *adev, const unsigned eng = 17; unsigned i, j; - for (i = 0; i < AMDGPU_MAX_VMHUBS; ++i) { + for (i = 0; i < adev->num_vmhubs; ++i) { struct amdgpu_vmhub *hub = &adev->vmhub[i]; u32 tmp = gmc_v9_0_get_invalidate_req(vmid, flush_type); @@ -480,7 +519,11 @@ static void gmc_v9_0_emit_pasid_mapping(struct amdgpu_ring *ring, unsigned vmid, struct amdgpu_device *adev = ring->adev; uint32_t reg; - if (ring->funcs->vmhub == AMDGPU_GFXHUB) + /* Do nothing because there's no lut register for mmhub1. */ + if (ring->funcs->vmhub == AMDGPU_MMHUB_1) + return; + + if (ring->funcs->vmhub == AMDGPU_GFXHUB_0) reg = SOC15_REG_OFFSET(OSSSYS, 0, mmIH_VMID_0_LUT) + vmid; else reg = SOC15_REG_OFFSET(OSSSYS, 0, mmIH_VMID_0_LUT_MM) + vmid; @@ -597,12 +640,29 @@ static void gmc_v9_0_set_gmc_funcs(struct amdgpu_device *adev) adev->gmc.gmc_funcs = &gmc_v9_0_gmc_funcs; } +static void gmc_v9_0_set_umc_funcs(struct amdgpu_device *adev) +{ + switch (adev->asic_type) { + case CHIP_VEGA20: + adev->umc.max_ras_err_cnt_per_query = UMC_V6_1_TOTAL_CHANNEL_NUM; + adev->umc.channel_inst_num = UMC_V6_1_CHANNEL_INSTANCE_NUM; + adev->umc.umc_inst_num = UMC_V6_1_UMC_INSTANCE_NUM; + adev->umc.channel_offs = UMC_V6_1_PER_CHANNEL_OFFSET; + adev->umc.channel_idx_tbl = &umc_v6_1_channel_idx_tbl[0][0]; + adev->umc.funcs = &umc_v6_1_funcs; + break; + default: + break; + } +} + static int gmc_v9_0_early_init(void *handle) { struct amdgpu_device *adev = (struct amdgpu_device *)handle; gmc_v9_0_set_gmc_funcs(adev); gmc_v9_0_set_irq_funcs(adev); + gmc_v9_0_set_umc_funcs(adev); adev->gmc.shared_aperture_start = 0x2000000000000000ULL; adev->gmc.shared_aperture_end = @@ -629,6 +689,7 @@ static bool gmc_v9_0_keep_stolen_memory(struct amdgpu_device *adev) switch (adev->asic_type) { case CHIP_VEGA10: case CHIP_RAVEN: + case CHIP_ARCTURUS: return true; case CHIP_VEGA12: case CHIP_VEGA20: @@ -641,7 +702,8 @@ static int gmc_v9_0_allocate_vm_inv_eng(struct amdgpu_device *adev) { struct amdgpu_ring *ring; unsigned vm_inv_engs[AMDGPU_MAX_VMHUBS] = - {GFXHUB_FREE_VM_INV_ENGS_BITMAP, MMHUB_FREE_VM_INV_ENGS_BITMAP}; + {GFXHUB_FREE_VM_INV_ENGS_BITMAP, MMHUB_FREE_VM_INV_ENGS_BITMAP, + GFXHUB_FREE_VM_INV_ENGS_BITMAP}; unsigned i; unsigned vmhub, inv_eng; @@ -689,6 +751,7 @@ static int gmc_v9_0_ecc_late_init(void *handle) amdgpu_ras_feature_enable_on_boot(adev, &ras_block, 0); return 0; } + /* handle resume path. */ if (*ras_if) { /* resend ras TA enable cmd during resume. @@ -806,8 +869,12 @@ static void gmc_v9_0_vram_gtt_location(struct amdgpu_device *adev, struct amdgpu_gmc *mc) { u64 base = 0; - if (!amdgpu_sriov_vf(adev)) - base = mmhub_v1_0_get_fb_location(adev); + if (!amdgpu_sriov_vf(adev)) { + if (adev->asic_type == CHIP_ARCTURUS) + base = mmhub_v9_4_get_fb_location(adev); + else + base = mmhub_v1_0_get_fb_location(adev); + } /* add the xgmi offset of the physical node */ base += adev->gmc.xgmi.physical_node_id * adev->gmc.xgmi.node_segment_size; amdgpu_gmc_vram_location(adev, mc, base); @@ -887,6 +954,7 @@ static int gmc_v9_0_mc_init(struct amdgpu_device *adev) case CHIP_VEGA10: /* all engines support GPUVM */ case CHIP_VEGA12: /* all engines support GPUVM */ case CHIP_VEGA20: + case CHIP_ARCTURUS: default: adev->gmc.gart_size = 512ULL << 20; break; @@ -923,7 +991,7 @@ static int gmc_v9_0_gart_init(struct amdgpu_device *adev) static unsigned gmc_v9_0_get_vbios_fb_size(struct amdgpu_device *adev) { - u32 d1vga_control = RREG32_SOC15(DCE, 0, mmD1VGA_CONTROL); + u32 d1vga_control; unsigned size; /* @@ -933,6 +1001,7 @@ static unsigned gmc_v9_0_get_vbios_fb_size(struct amdgpu_device *adev) if (gmc_v9_0_keep_stolen_memory(adev)) return 9 * 1024 * 1024; + d1vga_control = RREG32_SOC15(DCE, 0, mmD1VGA_CONTROL); if (REG_GET_FIELD(d1vga_control, D1VGA_CONTROL, D1VGA_MODE_ENABLE)) { size = 9 * 1024 * 1024; /* reserve 8MB for vga emulator and 1 MB for FB */ } else { @@ -972,13 +1041,18 @@ static int gmc_v9_0_sw_init(void *handle) struct amdgpu_device *adev = (struct amdgpu_device *)handle; gfxhub_v1_0_init(adev); - mmhub_v1_0_init(adev); + if (adev->asic_type == CHIP_ARCTURUS) + mmhub_v9_4_init(adev); + else + mmhub_v1_0_init(adev); spin_lock_init(&adev->gmc.invalidate_lock); adev->gmc.vram_type = amdgpu_atomfirmware_get_vram_type(adev); switch (adev->asic_type) { case CHIP_RAVEN: + adev->num_vmhubs = 2; + if (adev->rev_id == 0x0 || adev->rev_id == 0x1) { amdgpu_vm_adjust_size(adev, 256 * 1024, 9, 3, 48); } else { @@ -991,6 +1065,8 @@ static int gmc_v9_0_sw_init(void *handle) case CHIP_VEGA10: case CHIP_VEGA12: case CHIP_VEGA20: + adev->num_vmhubs = 2; + /* * To fulfill 4-level page support, * vm size is 256TB (48bit), maximum size of Vega10, @@ -1002,6 +1078,12 @@ static int gmc_v9_0_sw_init(void *handle) else amdgpu_vm_adjust_size(adev, 256 * 1024, 9, 3, 48); break; + case CHIP_ARCTURUS: + adev->num_vmhubs = 3; + + /* Keep the vm size same with Vega20 */ + amdgpu_vm_adjust_size(adev, 256 * 1024, 9, 3, 48); + break; default: break; } @@ -1012,6 +1094,13 @@ static int gmc_v9_0_sw_init(void *handle) if (r) return r; + if (adev->asic_type == CHIP_ARCTURUS) { + r = amdgpu_irq_add_id(adev, SOC15_IH_CLIENTID_VMC1, VMC_1_0__SRCID__VM_FAULT, + &adev->gmc.vm_fault); + if (r) + return r; + } + r = amdgpu_irq_add_id(adev, SOC15_IH_CLIENTID_UTCL2, UTCL2_1_0__SRCID__FAULT, &adev->gmc.vm_fault); @@ -1077,8 +1166,9 @@ static int gmc_v9_0_sw_init(void *handle) * amdgpu graphics/compute will use VMIDs 1-7 * amdkfd will use VMIDs 8-15 */ - adev->vm_manager.id_mgr[AMDGPU_GFXHUB].num_ids = AMDGPU_NUM_OF_VMIDS; - adev->vm_manager.id_mgr[AMDGPU_MMHUB].num_ids = AMDGPU_NUM_OF_VMIDS; + adev->vm_manager.id_mgr[AMDGPU_GFXHUB_0].num_ids = AMDGPU_NUM_OF_VMIDS; + adev->vm_manager.id_mgr[AMDGPU_MMHUB_0].num_ids = AMDGPU_NUM_OF_VMIDS; + adev->vm_manager.id_mgr[AMDGPU_MMHUB_1].num_ids = AMDGPU_NUM_OF_VMIDS; amdgpu_vm_manager_init(adev); @@ -1123,7 +1213,7 @@ static void gmc_v9_0_init_golden_registers(struct amdgpu_device *adev) switch (adev->asic_type) { case CHIP_VEGA10: - if (amdgpu_virt_support_skip_setting(adev)) + if (amdgpu_sriov_vf(adev)) break; /* fall through */ case CHIP_VEGA20: @@ -1181,7 +1271,10 @@ static int gmc_v9_0_gart_enable(struct amdgpu_device *adev) if (r) return r; - r = mmhub_v1_0_gart_enable(adev); + if (adev->asic_type == CHIP_ARCTURUS) + r = mmhub_v9_4_gart_enable(adev); + else + r = mmhub_v1_0_gart_enable(adev); if (r) return r; @@ -1202,7 +1295,10 @@ static int gmc_v9_0_gart_enable(struct amdgpu_device *adev) value = true; gfxhub_v1_0_set_fault_enable_default(adev, value); - mmhub_v1_0_set_fault_enable_default(adev, value); + if (adev->asic_type == CHIP_ARCTURUS) + mmhub_v9_4_set_fault_enable_default(adev, value); + else + mmhub_v1_0_set_fault_enable_default(adev, value); gmc_v9_0_flush_gpu_tlb(adev, 0, 0); DRM_INFO("PCIE GART of %uM enabled (table at 0x%016llX).\n", @@ -1243,7 +1339,10 @@ static int gmc_v9_0_hw_init(void *handle) static void gmc_v9_0_gart_disable(struct amdgpu_device *adev) { gfxhub_v1_0_gart_disable(adev); - mmhub_v1_0_gart_disable(adev); + if (adev->asic_type == CHIP_ARCTURUS) + mmhub_v9_4_gart_disable(adev); + else + mmhub_v1_0_gart_disable(adev); amdgpu_gart_table_vram_unpin(adev); } @@ -1308,6 +1407,9 @@ static int gmc_v9_0_set_clockgating_state(void *handle, { struct amdgpu_device *adev = (struct amdgpu_device *)handle; + if (adev->asic_type == CHIP_ARCTURUS) + return 0; + return mmhub_v1_0_set_clockgating(adev, state); } @@ -1315,6 +1417,9 @@ static void gmc_v9_0_get_clockgating_state(void *handle, u32 *flags) { struct amdgpu_device *adev = (struct amdgpu_device *)handle; + if (adev->asic_type == CHIP_ARCTURUS) + return; + mmhub_v1_0_get_clockgating(adev, flags); } diff --git a/drivers/gpu/drm/amd/amdgpu/gmc_v9_0.h b/drivers/gpu/drm/amd/amdgpu/gmc_v9_0.h index 5c8deac65580..971c0840358f 100644 --- a/drivers/gpu/drm/amd/amdgpu/gmc_v9_0.h +++ b/drivers/gpu/drm/amd/amdgpu/gmc_v9_0.h @@ -37,4 +37,11 @@ extern const struct amd_ip_funcs gmc_v9_0_ip_funcs; extern const struct amdgpu_ip_block_version gmc_v9_0_ip_block; +/* amdgpu_amdkfd*.c */ +void gfxhub_v1_0_setup_vm_pt_regs(struct amdgpu_device *adev, uint32_t vmid, + uint64_t value); +void mmhub_v1_0_setup_vm_pt_regs(struct amdgpu_device *adev, uint32_t vmid, + uint64_t value); +void mmhub_v9_4_setup_vm_pt_regs(struct amdgpu_device *adev, int hubid, + uint32_t vmid, uint64_t value); #endif diff --git a/drivers/gpu/drm/amd/amdgpu/mmhub_v1_0.c b/drivers/gpu/drm/amd/amdgpu/mmhub_v1_0.c index dc5ce03034d3..292f3b1cddf2 100644 --- a/drivers/gpu/drm/amd/amdgpu/mmhub_v1_0.c +++ b/drivers/gpu/drm/amd/amdgpu/mmhub_v1_0.c @@ -111,7 +111,7 @@ static void mmhub_v1_0_init_system_aperture_regs(struct amdgpu_device *adev) WREG32_SOC15(MMHUB, 0, mmMC_VM_SYSTEM_APERTURE_HIGH_ADDR, max(adev->gmc.fb_end, adev->gmc.agp_end) >> 18); - if (amdgpu_virt_support_skip_setting(adev)) + if (amdgpu_sriov_vf(adev)) return; /* Set default page address. */ @@ -159,7 +159,7 @@ static void mmhub_v1_0_init_cache_regs(struct amdgpu_device *adev) { uint32_t tmp; - if (amdgpu_virt_support_skip_setting(adev)) + if (amdgpu_sriov_vf(adev)) return; /* Setup L2 cache */ @@ -208,7 +208,7 @@ static void mmhub_v1_0_enable_system_domain(struct amdgpu_device *adev) static void mmhub_v1_0_disable_identity_aperture(struct amdgpu_device *adev) { - if (amdgpu_virt_support_skip_setting(adev)) + if (amdgpu_sriov_vf(adev)) return; WREG32_SOC15(MMHUB, 0, mmVM_L2_CONTEXT1_IDENTITY_APERTURE_LOW_ADDR_LO32, @@ -348,7 +348,7 @@ void mmhub_v1_0_gart_disable(struct amdgpu_device *adev) 0); WREG32_SOC15(MMHUB, 0, mmMC_VM_MX_L1_TLB_CNTL, tmp); - if (!amdgpu_virt_support_skip_setting(adev)) { + if (!amdgpu_sriov_vf(adev)) { /* Setup L2 cache */ tmp = RREG32_SOC15(MMHUB, 0, mmVM_L2_CNTL); tmp = REG_SET_FIELD(tmp, VM_L2_CNTL, ENABLE_L2_CACHE, 0); @@ -367,7 +367,7 @@ void mmhub_v1_0_set_fault_enable_default(struct amdgpu_device *adev, bool value) { u32 tmp; - if (amdgpu_virt_support_skip_setting(adev)) + if (amdgpu_sriov_vf(adev)) return; tmp = RREG32_SOC15(MMHUB, 0, mmVM_L2_PROTECTION_FAULT_CNTL); @@ -407,7 +407,7 @@ void mmhub_v1_0_set_fault_enable_default(struct amdgpu_device *adev, bool value) void mmhub_v1_0_init(struct amdgpu_device *adev) { - struct amdgpu_vmhub *hub = &adev->vmhub[AMDGPU_MMHUB]; + struct amdgpu_vmhub *hub = &adev->vmhub[AMDGPU_MMHUB_0]; hub->ctx0_ptb_addr_lo32 = SOC15_REG_OFFSET(MMHUB, 0, diff --git a/drivers/gpu/drm/amd/amdgpu/mmhub_v2_0.c b/drivers/gpu/drm/amd/amdgpu/mmhub_v2_0.c index 0f9549f19ade..d2f4775299c7 100644 --- a/drivers/gpu/drm/amd/amdgpu/mmhub_v2_0.c +++ b/drivers/gpu/drm/amd/amdgpu/mmhub_v2_0.c @@ -324,7 +324,7 @@ void mmhub_v2_0_set_fault_enable_default(struct amdgpu_device *adev, bool value) void mmhub_v2_0_init(struct amdgpu_device *adev) { - struct amdgpu_vmhub *hub = &adev->vmhub[AMDGPU_MMHUB]; + struct amdgpu_vmhub *hub = &adev->vmhub[AMDGPU_MMHUB_0]; hub->ctx0_ptb_addr_lo32 = SOC15_REG_OFFSET(MMHUB, 0, @@ -406,6 +406,7 @@ int mmhub_v2_0_set_clockgating(struct amdgpu_device *adev, switch (adev->asic_type) { case CHIP_NAVI10: + case CHIP_NAVI14: mmhub_v2_0_update_medium_grain_clock_gating(adev, state == AMD_CG_STATE_GATE ? true : false); mmhub_v2_0_update_medium_grain_light_sleep(adev, diff --git a/drivers/gpu/drm/amd/amdgpu/mmhub_v9_4.c b/drivers/gpu/drm/amd/amdgpu/mmhub_v9_4.c new file mode 100644 index 000000000000..33b0de54a5da --- /dev/null +++ b/drivers/gpu/drm/amd/amdgpu/mmhub_v9_4.c @@ -0,0 +1,517 @@ +/* + * Copyright 2018 Advanced Micro Devices, Inc. + * + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and associated documentation files (the "Software"), + * to deal in the Software without restriction, including without limitation + * the rights to use, copy, modify, merge, publish, distribute, sublicense, + * and/or sell copies of the Software, and to permit persons to whom the + * Software is furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included in + * all copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL + * THE COPYRIGHT HOLDER(S) OR AUTHOR(S) BE LIABLE FOR ANY CLAIM, DAMAGES OR + * OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, + * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR + * OTHER DEALINGS IN THE SOFTWARE. + * + */ +#include "amdgpu.h" +#include "mmhub_v9_4.h" + +#include "mmhub/mmhub_9_4_1_offset.h" +#include "mmhub/mmhub_9_4_1_sh_mask.h" +#include "mmhub/mmhub_9_4_1_default.h" +#include "athub/athub_1_0_offset.h" +#include "athub/athub_1_0_sh_mask.h" +#include "vega10_enum.h" + +#include "soc15_common.h" + +#define MMHUB_NUM_INSTANCES 2 +#define MMHUB_INSTANCE_REGISTER_OFFSET 0x3000 + +u64 mmhub_v9_4_get_fb_location(struct amdgpu_device *adev) +{ + /* The base should be same b/t 2 mmhubs on Acrturus. Read one here. */ + u64 base = RREG32_SOC15(MMHUB, 0, mmVMSHAREDVC0_MC_VM_FB_LOCATION_BASE); + u64 top = RREG32_SOC15(MMHUB, 0, mmVMSHAREDVC0_MC_VM_FB_LOCATION_TOP); + + base &= VMSHAREDVC0_MC_VM_FB_LOCATION_BASE__FB_BASE_MASK; + base <<= 24; + + top &= VMSHAREDVC0_MC_VM_FB_LOCATION_TOP__FB_TOP_MASK; + top <<= 24; + + adev->gmc.fb_start = base; + adev->gmc.fb_end = top; + + return base; +} + +void mmhub_v9_4_setup_vm_pt_regs(struct amdgpu_device *adev, int hubid, + uint32_t vmid, uint64_t value) +{ + /* two registers distance between mmVML2VC0_VM_CONTEXT0_* to + * mmVML2VC0_VM_CONTEXT1_* + */ + int dist = mmVML2VC0_VM_CONTEXT1_PAGE_TABLE_BASE_ADDR_LO32 + - mmVML2VC0_VM_CONTEXT0_PAGE_TABLE_BASE_ADDR_LO32; + + WREG32_SOC15_OFFSET(MMHUB, 0, + mmVML2VC0_VM_CONTEXT0_PAGE_TABLE_BASE_ADDR_LO32, + dist * vmid + hubid * MMHUB_INSTANCE_REGISTER_OFFSET, + lower_32_bits(value)); + + WREG32_SOC15_OFFSET(MMHUB, 0, + mmVML2VC0_VM_CONTEXT0_PAGE_TABLE_BASE_ADDR_HI32, + dist * vmid + hubid * MMHUB_INSTANCE_REGISTER_OFFSET, + upper_32_bits(value)); + +} + +static void mmhub_v9_4_init_gart_aperture_regs(struct amdgpu_device *adev, + int hubid) +{ + uint64_t pt_base = amdgpu_gmc_pd_addr(adev->gart.bo); + + mmhub_v9_4_setup_vm_pt_regs(adev, hubid, 0, pt_base); + + WREG32_SOC15_OFFSET(MMHUB, 0, + mmVML2VC0_VM_CONTEXT0_PAGE_TABLE_START_ADDR_LO32, + hubid * MMHUB_INSTANCE_REGISTER_OFFSET, + (u32)(adev->gmc.gart_start >> 12)); + WREG32_SOC15_OFFSET(MMHUB, 0, + mmVML2VC0_VM_CONTEXT0_PAGE_TABLE_START_ADDR_HI32, + hubid * MMHUB_INSTANCE_REGISTER_OFFSET, + (u32)(adev->gmc.gart_start >> 44)); + + WREG32_SOC15_OFFSET(MMHUB, 0, + mmVML2VC0_VM_CONTEXT0_PAGE_TABLE_END_ADDR_LO32, + hubid * MMHUB_INSTANCE_REGISTER_OFFSET, + (u32)(adev->gmc.gart_end >> 12)); + WREG32_SOC15_OFFSET(MMHUB, 0, + mmVML2VC0_VM_CONTEXT0_PAGE_TABLE_END_ADDR_HI32, + hubid * MMHUB_INSTANCE_REGISTER_OFFSET, + (u32)(adev->gmc.gart_end >> 44)); +} + +static void mmhub_v9_4_init_system_aperture_regs(struct amdgpu_device *adev, + int hubid) +{ + uint64_t value; + uint32_t tmp; + + /* Program the AGP BAR */ + WREG32_SOC15_OFFSET(MMHUB, 0, mmVMSHAREDVC0_MC_VM_AGP_BASE, + hubid * MMHUB_INSTANCE_REGISTER_OFFSET, + 0); + WREG32_SOC15_OFFSET(MMHUB, 0, mmVMSHAREDVC0_MC_VM_AGP_TOP, + hubid * MMHUB_INSTANCE_REGISTER_OFFSET, + adev->gmc.agp_end >> 24); + WREG32_SOC15_OFFSET(MMHUB, 0, mmVMSHAREDVC0_MC_VM_AGP_BOT, + hubid * MMHUB_INSTANCE_REGISTER_OFFSET, + adev->gmc.agp_start >> 24); + + /* Program the system aperture low logical page number. */ + WREG32_SOC15_OFFSET(MMHUB, 0, + mmVMSHAREDVC0_MC_VM_SYSTEM_APERTURE_LOW_ADDR, + hubid * MMHUB_INSTANCE_REGISTER_OFFSET, + min(adev->gmc.fb_start, adev->gmc.agp_start) >> 18); + WREG32_SOC15_OFFSET(MMHUB, 0, + mmVMSHAREDVC0_MC_VM_SYSTEM_APERTURE_HIGH_ADDR, + hubid * MMHUB_INSTANCE_REGISTER_OFFSET, + max(adev->gmc.fb_end, adev->gmc.agp_end) >> 18); + + /* Set default page address. */ + value = adev->vram_scratch.gpu_addr - adev->gmc.vram_start + + adev->vm_manager.vram_base_offset; + WREG32_SOC15_OFFSET(MMHUB, 0, + mmVMSHAREDPF0_MC_VM_SYSTEM_APERTURE_DEFAULT_ADDR_LSB, + hubid * MMHUB_INSTANCE_REGISTER_OFFSET, + (u32)(value >> 12)); + WREG32_SOC15_OFFSET(MMHUB, 0, + mmVMSHAREDPF0_MC_VM_SYSTEM_APERTURE_DEFAULT_ADDR_MSB, + hubid * MMHUB_INSTANCE_REGISTER_OFFSET, + (u32)(value >> 44)); + + /* Program "protection fault". */ + WREG32_SOC15_OFFSET(MMHUB, 0, + mmVML2PF0_VM_L2_PROTECTION_FAULT_DEFAULT_ADDR_LO32, + hubid * MMHUB_INSTANCE_REGISTER_OFFSET, + (u32)(adev->dummy_page_addr >> 12)); + WREG32_SOC15_OFFSET(MMHUB, 0, + mmVML2PF0_VM_L2_PROTECTION_FAULT_DEFAULT_ADDR_HI32, + hubid * MMHUB_INSTANCE_REGISTER_OFFSET, + (u32)((u64)adev->dummy_page_addr >> 44)); + + tmp = RREG32_SOC15_OFFSET(MMHUB, 0, + mmVML2PF0_VM_L2_PROTECTION_FAULT_CNTL2, + hubid * MMHUB_INSTANCE_REGISTER_OFFSET); + tmp = REG_SET_FIELD(tmp, VML2PF0_VM_L2_PROTECTION_FAULT_CNTL2, + ACTIVE_PAGE_MIGRATION_PTE_READ_RETRY, 1); + WREG32_SOC15_OFFSET(MMHUB, 0, mmVML2PF0_VM_L2_PROTECTION_FAULT_CNTL2, + hubid * MMHUB_INSTANCE_REGISTER_OFFSET, tmp); +} + +static void mmhub_v9_4_init_tlb_regs(struct amdgpu_device *adev, int hubid) +{ + uint32_t tmp; + + /* Setup TLB control */ + tmp = RREG32_SOC15_OFFSET(MMHUB, 0, + mmVMSHAREDVC0_MC_VM_MX_L1_TLB_CNTL, + hubid * MMHUB_INSTANCE_REGISTER_OFFSET); + + tmp = REG_SET_FIELD(tmp, VMSHAREDVC0_MC_VM_MX_L1_TLB_CNTL, + ENABLE_L1_TLB, 1); + tmp = REG_SET_FIELD(tmp, VMSHAREDVC0_MC_VM_MX_L1_TLB_CNTL, + SYSTEM_ACCESS_MODE, 3); + tmp = REG_SET_FIELD(tmp, VMSHAREDVC0_MC_VM_MX_L1_TLB_CNTL, + ENABLE_ADVANCED_DRIVER_MODEL, 1); + tmp = REG_SET_FIELD(tmp, VMSHAREDVC0_MC_VM_MX_L1_TLB_CNTL, + SYSTEM_APERTURE_UNMAPPED_ACCESS, 0); + tmp = REG_SET_FIELD(tmp, VMSHAREDVC0_MC_VM_MX_L1_TLB_CNTL, + ECO_BITS, 0); + tmp = REG_SET_FIELD(tmp, VMSHAREDVC0_MC_VM_MX_L1_TLB_CNTL, + MTYPE, MTYPE_UC);/* XXX for emulation. */ + tmp = REG_SET_FIELD(tmp, VMSHAREDVC0_MC_VM_MX_L1_TLB_CNTL, + ATC_EN, 1); + + WREG32_SOC15_OFFSET(MMHUB, 0, mmVMSHAREDVC0_MC_VM_MX_L1_TLB_CNTL, + hubid * MMHUB_INSTANCE_REGISTER_OFFSET, tmp); +} + +static void mmhub_v9_4_init_cache_regs(struct amdgpu_device *adev, int hubid) +{ + uint32_t tmp; + + /* Setup L2 cache */ + tmp = RREG32_SOC15_OFFSET(MMHUB, 0, mmVML2PF0_VM_L2_CNTL, + hubid * MMHUB_INSTANCE_REGISTER_OFFSET); + tmp = REG_SET_FIELD(tmp, VML2PF0_VM_L2_CNTL, + ENABLE_L2_CACHE, 1); + tmp = REG_SET_FIELD(tmp, VML2PF0_VM_L2_CNTL, + ENABLE_L2_FRAGMENT_PROCESSING, 1); + /* XXX for emulation, Refer to closed source code.*/ + tmp = REG_SET_FIELD(tmp, VML2PF0_VM_L2_CNTL, + L2_PDE0_CACHE_TAG_GENERATION_MODE, 0); + tmp = REG_SET_FIELD(tmp, VML2PF0_VM_L2_CNTL, + PDE_FAULT_CLASSIFICATION, 0); + tmp = REG_SET_FIELD(tmp, VML2PF0_VM_L2_CNTL, + CONTEXT1_IDENTITY_ACCESS_MODE, 1); + tmp = REG_SET_FIELD(tmp, VML2PF0_VM_L2_CNTL, + IDENTITY_MODE_FRAGMENT_SIZE, 0); + WREG32_SOC15_OFFSET(MMHUB, 0, mmVML2PF0_VM_L2_CNTL, + hubid * MMHUB_INSTANCE_REGISTER_OFFSET, tmp); + + tmp = RREG32_SOC15_OFFSET(MMHUB, 0, mmVML2PF0_VM_L2_CNTL2, + hubid * MMHUB_INSTANCE_REGISTER_OFFSET); + tmp = REG_SET_FIELD(tmp, VML2PF0_VM_L2_CNTL2, + INVALIDATE_ALL_L1_TLBS, 1); + tmp = REG_SET_FIELD(tmp, VML2PF0_VM_L2_CNTL2, + INVALIDATE_L2_CACHE, 1); + WREG32_SOC15_OFFSET(MMHUB, 0, mmVML2PF0_VM_L2_CNTL2, + hubid * MMHUB_INSTANCE_REGISTER_OFFSET, tmp); + + tmp = mmVML2PF0_VM_L2_CNTL3_DEFAULT; + WREG32_SOC15_OFFSET(MMHUB, 0, mmVML2PF0_VM_L2_CNTL3, + hubid * MMHUB_INSTANCE_REGISTER_OFFSET, tmp); + + tmp = mmVML2PF0_VM_L2_CNTL4_DEFAULT; + tmp = REG_SET_FIELD(tmp, VML2PF0_VM_L2_CNTL4, + VMC_TAP_PDE_REQUEST_PHYSICAL, 0); + tmp = REG_SET_FIELD(tmp, VML2PF0_VM_L2_CNTL4, + VMC_TAP_PTE_REQUEST_PHYSICAL, 0); + WREG32_SOC15_OFFSET(MMHUB, 0, mmVML2PF0_VM_L2_CNTL4, + hubid * MMHUB_INSTANCE_REGISTER_OFFSET, tmp); +} + +static void mmhub_v9_4_enable_system_domain(struct amdgpu_device *adev, + int hubid) +{ + uint32_t tmp; + + tmp = RREG32_SOC15_OFFSET(MMHUB, 0, mmVML2VC0_VM_CONTEXT0_CNTL, + hubid * MMHUB_INSTANCE_REGISTER_OFFSET); + tmp = REG_SET_FIELD(tmp, VML2VC0_VM_CONTEXT0_CNTL, ENABLE_CONTEXT, 1); + tmp = REG_SET_FIELD(tmp, VML2VC0_VM_CONTEXT0_CNTL, PAGE_TABLE_DEPTH, 0); + WREG32_SOC15_OFFSET(MMHUB, 0, mmVML2VC0_VM_CONTEXT0_CNTL, + hubid * MMHUB_INSTANCE_REGISTER_OFFSET, tmp); +} + +static void mmhub_v9_4_disable_identity_aperture(struct amdgpu_device *adev, + int hubid) +{ + WREG32_SOC15_OFFSET(MMHUB, 0, + mmVML2PF0_VM_L2_CONTEXT1_IDENTITY_APERTURE_LOW_ADDR_LO32, + hubid * MMHUB_INSTANCE_REGISTER_OFFSET, 0XFFFFFFFF); + WREG32_SOC15_OFFSET(MMHUB, 0, + mmVML2PF0_VM_L2_CONTEXT1_IDENTITY_APERTURE_LOW_ADDR_HI32, + hubid * MMHUB_INSTANCE_REGISTER_OFFSET, 0x0000000F); + + WREG32_SOC15_OFFSET(MMHUB, 0, + mmVML2PF0_VM_L2_CONTEXT1_IDENTITY_APERTURE_HIGH_ADDR_LO32, + hubid * MMHUB_INSTANCE_REGISTER_OFFSET, 0); + WREG32_SOC15_OFFSET(MMHUB, 0, + mmVML2PF0_VM_L2_CONTEXT1_IDENTITY_APERTURE_HIGH_ADDR_HI32, + hubid * MMHUB_INSTANCE_REGISTER_OFFSET, 0); + + WREG32_SOC15_OFFSET(MMHUB, 0, + mmVML2PF0_VM_L2_CONTEXT_IDENTITY_PHYSICAL_OFFSET_LO32, + hubid * MMHUB_INSTANCE_REGISTER_OFFSET, 0); + WREG32_SOC15_OFFSET(MMHUB, 0, + mmVML2PF0_VM_L2_CONTEXT_IDENTITY_PHYSICAL_OFFSET_HI32, + hubid * MMHUB_INSTANCE_REGISTER_OFFSET, 0); +} + +static void mmhub_v9_4_setup_vmid_config(struct amdgpu_device *adev, int hubid) +{ + uint32_t tmp; + int i; + + for (i = 0; i <= 14; i++) { + tmp = RREG32_SOC15_OFFSET(MMHUB, 0, mmVML2VC0_VM_CONTEXT1_CNTL, + hubid * MMHUB_INSTANCE_REGISTER_OFFSET + i); + tmp = REG_SET_FIELD(tmp, VML2VC0_VM_CONTEXT1_CNTL, + ENABLE_CONTEXT, 1); + tmp = REG_SET_FIELD(tmp, VML2VC0_VM_CONTEXT1_CNTL, + PAGE_TABLE_DEPTH, + adev->vm_manager.num_level); + tmp = REG_SET_FIELD(tmp, VML2VC0_VM_CONTEXT1_CNTL, + RANGE_PROTECTION_FAULT_ENABLE_DEFAULT, 1); + tmp = REG_SET_FIELD(tmp, VML2VC0_VM_CONTEXT1_CNTL, + DUMMY_PAGE_PROTECTION_FAULT_ENABLE_DEFAULT, + 1); + tmp = REG_SET_FIELD(tmp, VML2VC0_VM_CONTEXT1_CNTL, + PDE0_PROTECTION_FAULT_ENABLE_DEFAULT, 1); + tmp = REG_SET_FIELD(tmp, VML2VC0_VM_CONTEXT1_CNTL, + VALID_PROTECTION_FAULT_ENABLE_DEFAULT, 1); + tmp = REG_SET_FIELD(tmp, VML2VC0_VM_CONTEXT1_CNTL, + READ_PROTECTION_FAULT_ENABLE_DEFAULT, 1); + tmp = REG_SET_FIELD(tmp, VML2VC0_VM_CONTEXT1_CNTL, + WRITE_PROTECTION_FAULT_ENABLE_DEFAULT, 1); + tmp = REG_SET_FIELD(tmp, VML2VC0_VM_CONTEXT1_CNTL, + EXECUTE_PROTECTION_FAULT_ENABLE_DEFAULT, 1); + tmp = REG_SET_FIELD(tmp, VML2VC0_VM_CONTEXT1_CNTL, + PAGE_TABLE_BLOCK_SIZE, + adev->vm_manager.block_size - 9); + /* Send no-retry XNACK on fault to suppress VM fault storm. */ + tmp = REG_SET_FIELD(tmp, VML2VC0_VM_CONTEXT1_CNTL, + RETRY_PERMISSION_OR_INVALID_PAGE_FAULT, 0); + WREG32_SOC15_OFFSET(MMHUB, 0, mmVML2VC0_VM_CONTEXT1_CNTL, + hubid * MMHUB_INSTANCE_REGISTER_OFFSET + i, + tmp); + WREG32_SOC15_OFFSET(MMHUB, 0, + mmVML2VC0_VM_CONTEXT1_PAGE_TABLE_START_ADDR_LO32, + hubid * MMHUB_INSTANCE_REGISTER_OFFSET + i*2, 0); + WREG32_SOC15_OFFSET(MMHUB, 0, + mmVML2VC0_VM_CONTEXT1_PAGE_TABLE_START_ADDR_HI32, + hubid * MMHUB_INSTANCE_REGISTER_OFFSET + i*2, 0); + WREG32_SOC15_OFFSET(MMHUB, 0, + mmVML2VC0_VM_CONTEXT1_PAGE_TABLE_END_ADDR_LO32, + hubid * MMHUB_INSTANCE_REGISTER_OFFSET + i*2, + lower_32_bits(adev->vm_manager.max_pfn - 1)); + WREG32_SOC15_OFFSET(MMHUB, 0, + mmVML2VC0_VM_CONTEXT1_PAGE_TABLE_END_ADDR_HI32, + hubid * MMHUB_INSTANCE_REGISTER_OFFSET + i*2, + upper_32_bits(adev->vm_manager.max_pfn - 1)); + } +} + +static void mmhub_v9_4_program_invalidation(struct amdgpu_device *adev, + int hubid) +{ + unsigned i; + + for (i = 0; i < 18; ++i) { + WREG32_SOC15_OFFSET(MMHUB, 0, + mmVML2VC0_VM_INVALIDATE_ENG0_ADDR_RANGE_LO32, + hubid * MMHUB_INSTANCE_REGISTER_OFFSET + 2 * i, + 0xffffffff); + WREG32_SOC15_OFFSET(MMHUB, 0, + mmVML2VC0_VM_INVALIDATE_ENG0_ADDR_RANGE_HI32, + hubid * MMHUB_INSTANCE_REGISTER_OFFSET + 2 * i, + 0x1f); + } +} + +int mmhub_v9_4_gart_enable(struct amdgpu_device *adev) +{ + int i; + + for (i = 0; i < MMHUB_NUM_INSTANCES; i++) { + if (amdgpu_sriov_vf(adev)) { + /* + * MC_VM_FB_LOCATION_BASE/TOP is NULL for VF, becuase + * they are VF copy registers so vbios post doesn't + * program them, for SRIOV driver need to program them + */ + WREG32_SOC15_OFFSET(MMHUB, 0, + mmVMSHAREDVC0_MC_VM_FB_LOCATION_BASE, + i * MMHUB_INSTANCE_REGISTER_OFFSET, + adev->gmc.vram_start >> 24); + WREG32_SOC15_OFFSET(MMHUB, 0, + mmVMSHAREDVC0_MC_VM_FB_LOCATION_TOP, + i * MMHUB_INSTANCE_REGISTER_OFFSET, + adev->gmc.vram_end >> 24); + } + + /* GART Enable. */ + mmhub_v9_4_init_gart_aperture_regs(adev, i); + mmhub_v9_4_init_system_aperture_regs(adev, i); + mmhub_v9_4_init_tlb_regs(adev, i); + mmhub_v9_4_init_cache_regs(adev, i); + + mmhub_v9_4_enable_system_domain(adev, i); + mmhub_v9_4_disable_identity_aperture(adev, i); + mmhub_v9_4_setup_vmid_config(adev, i); + mmhub_v9_4_program_invalidation(adev, i); + } + + return 0; +} + +void mmhub_v9_4_gart_disable(struct amdgpu_device *adev) +{ + u32 tmp; + u32 i, j; + + for (j = 0; j < MMHUB_NUM_INSTANCES; j++) { + /* Disable all tables */ + for (i = 0; i < 16; i++) + WREG32_SOC15_OFFSET(MMHUB, 0, + mmVML2VC0_VM_CONTEXT0_CNTL, + j * MMHUB_INSTANCE_REGISTER_OFFSET + + i, 0); + + /* Setup TLB control */ + tmp = RREG32_SOC15_OFFSET(MMHUB, 0, + mmVMSHAREDVC0_MC_VM_MX_L1_TLB_CNTL, + j * MMHUB_INSTANCE_REGISTER_OFFSET); + tmp = REG_SET_FIELD(tmp, VMSHAREDVC0_MC_VM_MX_L1_TLB_CNTL, + ENABLE_L1_TLB, 0); + tmp = REG_SET_FIELD(tmp, + VMSHAREDVC0_MC_VM_MX_L1_TLB_CNTL, + ENABLE_ADVANCED_DRIVER_MODEL, 0); + WREG32_SOC15_OFFSET(MMHUB, 0, + mmVMSHAREDVC0_MC_VM_MX_L1_TLB_CNTL, + j * MMHUB_INSTANCE_REGISTER_OFFSET, tmp); + + /* Setup L2 cache */ + tmp = RREG32_SOC15_OFFSET(MMHUB, 0, mmVML2PF0_VM_L2_CNTL, + j * MMHUB_INSTANCE_REGISTER_OFFSET); + tmp = REG_SET_FIELD(tmp, VML2PF0_VM_L2_CNTL, + ENABLE_L2_CACHE, 0); + WREG32_SOC15_OFFSET(MMHUB, 0, mmVML2PF0_VM_L2_CNTL, + j * MMHUB_INSTANCE_REGISTER_OFFSET, tmp); + WREG32_SOC15_OFFSET(MMHUB, 0, mmVML2PF0_VM_L2_CNTL3, + j * MMHUB_INSTANCE_REGISTER_OFFSET, 0); + } +} + +/** + * mmhub_v1_0_set_fault_enable_default - update GART/VM fault handling + * + * @adev: amdgpu_device pointer + * @value: true redirects VM faults to the default page + */ +void mmhub_v9_4_set_fault_enable_default(struct amdgpu_device *adev, bool value) +{ + u32 tmp; + int i; + + for (i = 0; i < MMHUB_NUM_INSTANCES; i++) { + tmp = RREG32_SOC15_OFFSET(MMHUB, 0, + mmVML2PF0_VM_L2_PROTECTION_FAULT_CNTL, + i * MMHUB_INSTANCE_REGISTER_OFFSET); + tmp = REG_SET_FIELD(tmp, VML2PF0_VM_L2_PROTECTION_FAULT_CNTL, + RANGE_PROTECTION_FAULT_ENABLE_DEFAULT, + value); + tmp = REG_SET_FIELD(tmp, VML2PF0_VM_L2_PROTECTION_FAULT_CNTL, + PDE0_PROTECTION_FAULT_ENABLE_DEFAULT, + value); + tmp = REG_SET_FIELD(tmp, VML2PF0_VM_L2_PROTECTION_FAULT_CNTL, + PDE1_PROTECTION_FAULT_ENABLE_DEFAULT, + value); + tmp = REG_SET_FIELD(tmp, VML2PF0_VM_L2_PROTECTION_FAULT_CNTL, + PDE2_PROTECTION_FAULT_ENABLE_DEFAULT, + value); + tmp = REG_SET_FIELD(tmp, + VML2PF0_VM_L2_PROTECTION_FAULT_CNTL, + TRANSLATE_FURTHER_PROTECTION_FAULT_ENABLE_DEFAULT, + value); + tmp = REG_SET_FIELD(tmp, VML2PF0_VM_L2_PROTECTION_FAULT_CNTL, + NACK_PROTECTION_FAULT_ENABLE_DEFAULT, + value); + tmp = REG_SET_FIELD(tmp, VML2PF0_VM_L2_PROTECTION_FAULT_CNTL, + DUMMY_PAGE_PROTECTION_FAULT_ENABLE_DEFAULT, + value); + tmp = REG_SET_FIELD(tmp, VML2PF0_VM_L2_PROTECTION_FAULT_CNTL, + VALID_PROTECTION_FAULT_ENABLE_DEFAULT, + value); + tmp = REG_SET_FIELD(tmp, VML2PF0_VM_L2_PROTECTION_FAULT_CNTL, + READ_PROTECTION_FAULT_ENABLE_DEFAULT, + value); + tmp = REG_SET_FIELD(tmp, VML2PF0_VM_L2_PROTECTION_FAULT_CNTL, + WRITE_PROTECTION_FAULT_ENABLE_DEFAULT, + value); + tmp = REG_SET_FIELD(tmp, VML2PF0_VM_L2_PROTECTION_FAULT_CNTL, + EXECUTE_PROTECTION_FAULT_ENABLE_DEFAULT, + value); + if (!value) { + tmp = REG_SET_FIELD(tmp, + VML2PF0_VM_L2_PROTECTION_FAULT_CNTL, + CRASH_ON_NO_RETRY_FAULT, 1); + tmp = REG_SET_FIELD(tmp, + VML2PF0_VM_L2_PROTECTION_FAULT_CNTL, + CRASH_ON_RETRY_FAULT, 1); + } + + WREG32_SOC15_OFFSET(MMHUB, 0, + mmVML2PF0_VM_L2_PROTECTION_FAULT_CNTL, + i * MMHUB_INSTANCE_REGISTER_OFFSET, tmp); + } +} + +void mmhub_v9_4_init(struct amdgpu_device *adev) +{ + struct amdgpu_vmhub *hub[MMHUB_NUM_INSTANCES] = + {&adev->vmhub[AMDGPU_MMHUB_0], &adev->vmhub[AMDGPU_MMHUB_1]}; + int i; + + for (i = 0; i < MMHUB_NUM_INSTANCES; i++) { + hub[i]->ctx0_ptb_addr_lo32 = + SOC15_REG_OFFSET(MMHUB, 0, + mmVML2VC0_VM_CONTEXT0_PAGE_TABLE_BASE_ADDR_LO32) + + i * MMHUB_INSTANCE_REGISTER_OFFSET; + hub[i]->ctx0_ptb_addr_hi32 = + SOC15_REG_OFFSET(MMHUB, 0, + mmVML2VC0_VM_CONTEXT0_PAGE_TABLE_BASE_ADDR_HI32) + + i * MMHUB_INSTANCE_REGISTER_OFFSET; + hub[i]->vm_inv_eng0_req = + SOC15_REG_OFFSET(MMHUB, 0, + mmVML2VC0_VM_INVALIDATE_ENG0_REQ) + + i * MMHUB_INSTANCE_REGISTER_OFFSET; + hub[i]->vm_inv_eng0_ack = + SOC15_REG_OFFSET(MMHUB, 0, + mmVML2VC0_VM_INVALIDATE_ENG0_ACK) + + i * MMHUB_INSTANCE_REGISTER_OFFSET; + hub[i]->vm_context0_cntl = + SOC15_REG_OFFSET(MMHUB, 0, + mmVML2VC0_VM_CONTEXT0_CNTL) + + i * MMHUB_INSTANCE_REGISTER_OFFSET; + hub[i]->vm_l2_pro_fault_status = + SOC15_REG_OFFSET(MMHUB, 0, + mmVML2PF0_VM_L2_PROTECTION_FAULT_STATUS) + + i * MMHUB_INSTANCE_REGISTER_OFFSET; + hub[i]->vm_l2_pro_fault_cntl = + SOC15_REG_OFFSET(MMHUB, 0, + mmVML2PF0_VM_L2_PROTECTION_FAULT_CNTL) + + i * MMHUB_INSTANCE_REGISTER_OFFSET; + } +} diff --git a/drivers/gpu/drm/amd/amdgpu/mmhub_v9_4.h b/drivers/gpu/drm/amd/amdgpu/mmhub_v9_4.h new file mode 100644 index 000000000000..9ba3dd808826 --- /dev/null +++ b/drivers/gpu/drm/amd/amdgpu/mmhub_v9_4.h @@ -0,0 +1,33 @@ +/* + * Copyright 2018 Advanced Micro Devices, Inc. + * + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and associated documentation files (the "Software"), + * to deal in the Software without restriction, including without limitation + * the rights to use, copy, modify, merge, publish, distribute, sublicense, + * and/or sell copies of the Software, and to permit persons to whom the + * Software is furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included in + * all copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL + * THE COPYRIGHT HOLDER(S) OR AUTHOR(S) BE LIABLE FOR ANY CLAIM, DAMAGES OR + * OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, + * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR + * OTHER DEALINGS IN THE SOFTWARE. + * + */ +#ifndef __MMHUB_V9_4_H__ +#define __MMHUB_V9_4_H__ + +u64 mmhub_v9_4_get_fb_location(struct amdgpu_device *adev); +int mmhub_v9_4_gart_enable(struct amdgpu_device *adev); +void mmhub_v9_4_gart_disable(struct amdgpu_device *adev); +void mmhub_v9_4_set_fault_enable_default(struct amdgpu_device *adev, + bool value); +void mmhub_v9_4_init(struct amdgpu_device *adev); + +#endif diff --git a/drivers/gpu/drm/amd/amdgpu/mxgpu_ai.c b/drivers/gpu/drm/amd/amdgpu/mxgpu_ai.c index 235548c0b41f..cc5bf595f9b1 100644 --- a/drivers/gpu/drm/amd/amdgpu/mxgpu_ai.c +++ b/drivers/gpu/drm/amd/amdgpu/mxgpu_ai.c @@ -449,20 +449,6 @@ void xgpu_ai_mailbox_put_irq(struct amdgpu_device *adev) amdgpu_irq_put(adev, &adev->virt.rcv_irq, 0); } -static void xgpu_ai_init_reg_access_mode(struct amdgpu_device *adev) -{ - adev->virt.reg_access_mode = AMDGPU_VIRT_REG_ACCESS_LEGACY; - - /* Enable L1 security reg access mode by defaul, as non-security VF - * will no longer be supported. - */ - adev->virt.reg_access_mode |= AMDGPU_VIRT_REG_ACCESS_RLC; - - adev->virt.reg_access_mode |= AMDGPU_VIRT_REG_ACCESS_PSP_PRG_IH; - - adev->virt.reg_access_mode |= AMDGPU_VIRT_REG_SKIP_SEETING; -} - const struct amdgpu_virt_ops xgpu_ai_virt_ops = { .req_full_gpu = xgpu_ai_request_full_gpu_access, .rel_full_gpu = xgpu_ai_release_full_gpu_access, @@ -471,5 +457,4 @@ const struct amdgpu_virt_ops xgpu_ai_virt_ops = { .trans_msg = xgpu_ai_mailbox_trans_msg, .get_pp_clk = xgpu_ai_get_pp_clk, .force_dpm_level = xgpu_ai_force_dpm_level, - .init_reg_access_mode = xgpu_ai_init_reg_access_mode, }; diff --git a/drivers/gpu/drm/amd/amdgpu/navi10_ih.c b/drivers/gpu/drm/amd/amdgpu/navi10_ih.c index e963746be11c..9fe08408db58 100644 --- a/drivers/gpu/drm/amd/amdgpu/navi10_ih.c +++ b/drivers/gpu/drm/amd/amdgpu/navi10_ih.c @@ -21,7 +21,8 @@ * */ -#include <drm/drmP.h> +#include <linux/pci.h> + #include "amdgpu.h" #include "amdgpu_ih.h" diff --git a/drivers/gpu/drm/amd/amdgpu/navi10_reg_init.c b/drivers/gpu/drm/amd/amdgpu/navi10_reg_init.c index 55014ce8670a..a56c93620e78 100644 --- a/drivers/gpu/drm/amd/amdgpu/navi10_reg_init.c +++ b/drivers/gpu/drm/amd/amdgpu/navi10_reg_init.c @@ -29,20 +29,8 @@ int navi10_reg_base_init(struct amdgpu_device *adev) { - int r, i; + int i; - if (amdgpu_discovery) { - r = amdgpu_discovery_reg_base_init(adev); - if (r) { - DRM_WARN("failed to init reg base from ip discovery table, " - "fallback to legacy init method\n"); - goto legacy_init; - } - - return 0; - } - -legacy_init: for (i = 0 ; i < MAX_INSTANCE ; ++i) { adev->reg_offset[GC_HWIP][i] = (uint32_t *)(&(GC_BASE.instance[i])); adev->reg_offset[HDP_HWIP][i] = (uint32_t *)(&(HDP_BASE.instance[i])); diff --git a/drivers/gpu/drm/amd/amdgpu/navi12_reg_init.c b/drivers/gpu/drm/amd/amdgpu/navi12_reg_init.c new file mode 100644 index 000000000000..cadc7603ca41 --- /dev/null +++ b/drivers/gpu/drm/amd/amdgpu/navi12_reg_init.c @@ -0,0 +1,53 @@ +/* + * Copyright 2018 Advanced Micro Devices, Inc. + * + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and associated documentation files (the "Software"), + * to deal in the Software without restriction, including without limitation + * the rights to use, copy, modify, merge, publish, distribute, sublicense, + * and/or sell copies of the Software, and to permit persons to whom the + * Software is furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included in + * all copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL + * THE COPYRIGHT HOLDER(S) OR AUTHOR(S) BE LIABLE FOR ANY CLAIM, DAMAGES OR + * OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, + * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR + * OTHER DEALINGS IN THE SOFTWARE. + * + */ +#include "amdgpu.h" +#include "nv.h" + +#include "soc15_common.h" +#include "soc15_hw_ip.h" +#include "navi12_ip_offset.h" + +int navi12_reg_base_init(struct amdgpu_device *adev) +{ + /* HW has more IP blocks, only initialized the blocks needed by driver */ + uint32_t i; + for (i = 0 ; i < MAX_INSTANCE ; ++i) { + adev->reg_offset[GC_HWIP][i] = (uint32_t *)(&(GC_BASE.instance[i])); + adev->reg_offset[HDP_HWIP][i] = (uint32_t *)(&(HDP_BASE.instance[i])); + adev->reg_offset[MMHUB_HWIP][i] = (uint32_t *)(&(MMHUB_BASE.instance[i])); + adev->reg_offset[ATHUB_HWIP][i] = (uint32_t *)(&(ATHUB_BASE.instance[i])); + adev->reg_offset[NBIO_HWIP][i] = (uint32_t *)(&(NBIF0_BASE.instance[i])); + adev->reg_offset[MP0_HWIP][i] = (uint32_t *)(&(MP0_BASE.instance[i])); + adev->reg_offset[MP1_HWIP][i] = (uint32_t *)(&(MP1_BASE.instance[i])); + adev->reg_offset[VCN_HWIP][i] = (uint32_t *)(&(UVD0_BASE.instance[i])); + adev->reg_offset[DF_HWIP][i] = (uint32_t *)(&(DF_BASE.instance[i])); + adev->reg_offset[DCE_HWIP][i] = (uint32_t *)(&(DMU_BASE.instance[i])); + adev->reg_offset[OSSSYS_HWIP][i] = (uint32_t *)(&(OSSSYS_BASE.instance[i])); + adev->reg_offset[SDMA0_HWIP][i] = (uint32_t *)(&(GC_BASE.instance[i])); + adev->reg_offset[SDMA1_HWIP][i] = (uint32_t *)(&(GC_BASE.instance[i])); + adev->reg_offset[SMUIO_HWIP][i] = (uint32_t *)(&(SMUIO_BASE.instance[i])); + adev->reg_offset[THM_HWIP][i] = (uint32_t *)(&(THM_BASE.instance[i])); + adev->reg_offset[CLK_HWIP][i] = (uint32_t *)(&(CLK_BASE.instance[i])); + } + return 0; +} diff --git a/drivers/gpu/drm/amd/amdgpu/navi14_reg_init.c b/drivers/gpu/drm/amd/amdgpu/navi14_reg_init.c new file mode 100644 index 000000000000..3b5f0f65e096 --- /dev/null +++ b/drivers/gpu/drm/amd/amdgpu/navi14_reg_init.c @@ -0,0 +1,54 @@ +/* + * Copyright 2018 Advanced Micro Devices, Inc. + * + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and associated documentation files (the "Software"), + * to deal in the Software without restriction, including without limitation + * the rights to use, copy, modify, merge, publish, distribute, sublicense, + * and/or sell copies of the Software, and to permit persons to whom the + * Software is furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included in + * all copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL + * THE COPYRIGHT HOLDER(S) OR AUTHOR(S) BE LIABLE FOR ANY CLAIM, DAMAGES OR + * OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, + * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR + * OTHER DEALINGS IN THE SOFTWARE. + * + */ +#include "amdgpu.h" +#include "nv.h" + +#include "soc15_common.h" +#include "soc15_hw_ip.h" +#include "navi14_ip_offset.h" + +int navi14_reg_base_init(struct amdgpu_device *adev) +{ + int i; + + for (i = 0 ; i < MAX_INSTANCE ; ++i) { + adev->reg_offset[GC_HWIP][i] = (uint32_t *)(&(GC_BASE.instance[i])); + adev->reg_offset[HDP_HWIP][i] = (uint32_t *)(&(HDP_BASE.instance[i])); + adev->reg_offset[MMHUB_HWIP][i] = (uint32_t *)(&(MMHUB_BASE.instance[i])); + adev->reg_offset[ATHUB_HWIP][i] = (uint32_t *)(&(ATHUB_BASE.instance[i])); + adev->reg_offset[NBIO_HWIP][i] = (uint32_t *)(&(NBIF0_BASE.instance[i])); + adev->reg_offset[MP0_HWIP][i] = (uint32_t *)(&(MP0_BASE.instance[i])); + adev->reg_offset[MP1_HWIP][i] = (uint32_t *)(&(MP1_BASE.instance[i])); + adev->reg_offset[VCN_HWIP][i] = (uint32_t *)(&(UVD0_BASE.instance[i])); + adev->reg_offset[DF_HWIP][i] = (uint32_t *)(&(DF_BASE.instance[i])); + adev->reg_offset[DCE_HWIP][i] = (uint32_t *)(&(DMU_BASE.instance[i])); + adev->reg_offset[OSSSYS_HWIP][i] = (uint32_t *)(&(OSSSYS_BASE.instance[i])); + adev->reg_offset[SDMA0_HWIP][i] = (uint32_t *)(&(GC_BASE.instance[i])); + adev->reg_offset[SDMA1_HWIP][i] = (uint32_t *)(&(GC_BASE.instance[i])); + adev->reg_offset[SMUIO_HWIP][i] = (uint32_t *)(&(SMUIO_BASE.instance[i])); + adev->reg_offset[THM_HWIP][i] = (uint32_t *)(&(THM_BASE.instance[i])); + adev->reg_offset[CLK_HWIP][i] = (uint32_t *)(&(CLK_BASE.instance[i])); + } + + return 0; +} diff --git a/drivers/gpu/drm/amd/amdgpu/nbio_v2_3.c b/drivers/gpu/drm/amd/amdgpu/nbio_v2_3.c index 835d7b1a841f..c05d78d4efc6 100644 --- a/drivers/gpu/drm/amd/amdgpu/nbio_v2_3.c +++ b/drivers/gpu/drm/amd/amdgpu/nbio_v2_3.c @@ -92,7 +92,7 @@ static void nbio_v2_3_sdma_doorbell_range(struct amdgpu_device *adev, int instan } static void nbio_v2_3_vcn_doorbell_range(struct amdgpu_device *adev, bool use_doorbell, - int doorbell_index) + int doorbell_index, int instance) { u32 reg = SOC15_REG_OFFSET(NBIO, 0, mmBIF_MMSCH0_DOORBELL_RANGE); diff --git a/drivers/gpu/drm/amd/amdgpu/nbio_v7_4.c b/drivers/gpu/drm/amd/amdgpu/nbio_v7_4.c index bfaaa327ae3c..910fffced43b 100644 --- a/drivers/gpu/drm/amd/amdgpu/nbio_v7_4.c +++ b/drivers/gpu/drm/amd/amdgpu/nbio_v7_4.c @@ -31,6 +31,25 @@ #define smnNBIF_MGCG_CTRL_LCLK 0x1013a21c +/* + * These are nbio v7_4_1 registers mask. Temporarily define these here since + * nbio v7_4_1 header is incomplete. + */ +#define GPU_HDP_FLUSH_DONE__RSVD_ENG0_MASK 0x00001000L +#define GPU_HDP_FLUSH_DONE__RSVD_ENG1_MASK 0x00002000L +#define GPU_HDP_FLUSH_DONE__RSVD_ENG2_MASK 0x00004000L +#define GPU_HDP_FLUSH_DONE__RSVD_ENG3_MASK 0x00008000L +#define GPU_HDP_FLUSH_DONE__RSVD_ENG4_MASK 0x00010000L +#define GPU_HDP_FLUSH_DONE__RSVD_ENG5_MASK 0x00020000L + +#define mmBIF_MMSCH1_DOORBELL_RANGE 0x01dc +#define mmBIF_MMSCH1_DOORBELL_RANGE_BASE_IDX 2 +//BIF_MMSCH1_DOORBELL_RANGE +#define BIF_MMSCH1_DOORBELL_RANGE__OFFSET__SHIFT 0x2 +#define BIF_MMSCH1_DOORBELL_RANGE__SIZE__SHIFT 0x10 +#define BIF_MMSCH1_DOORBELL_RANGE__OFFSET_MASK 0x00000FFCL +#define BIF_MMSCH1_DOORBELL_RANGE__SIZE_MASK 0x001F0000L + static void nbio_v7_4_remap_hdp_registers(struct amdgpu_device *adev) { WREG32_SOC15(NBIO, 0, mmREMAP_HDP_MEM_FLUSH_CNTL, @@ -75,10 +94,24 @@ static u32 nbio_v7_4_get_memsize(struct amdgpu_device *adev) static void nbio_v7_4_sdma_doorbell_range(struct amdgpu_device *adev, int instance, bool use_doorbell, int doorbell_index, int doorbell_size) { - u32 reg = instance == 0 ? SOC15_REG_OFFSET(NBIO, 0, mmBIF_SDMA0_DOORBELL_RANGE) : - SOC15_REG_OFFSET(NBIO, 0, mmBIF_SDMA1_DOORBELL_RANGE); + u32 reg, doorbell_range; - u32 doorbell_range = RREG32(reg); + if (instance < 2) + reg = instance + + SOC15_REG_OFFSET(NBIO, 0, mmBIF_SDMA0_DOORBELL_RANGE); + else + /* + * These registers address of SDMA2~7 is not consecutive + * from SDMA0~1. Need plus 4 dwords offset. + * + * BIF_SDMA0_DOORBELL_RANGE: 0x3bc0 + * BIF_SDMA1_DOORBELL_RANGE: 0x3bc4 + * BIF_SDMA2_DOORBELL_RANGE: 0x3bd8 + */ + reg = instance + 0x4 + + SOC15_REG_OFFSET(NBIO, 0, mmBIF_SDMA0_DOORBELL_RANGE); + + doorbell_range = RREG32(reg); if (use_doorbell) { doorbell_range = REG_SET_FIELD(doorbell_range, BIF_SDMA0_DOORBELL_RANGE, OFFSET, doorbell_index); @@ -89,6 +122,32 @@ static void nbio_v7_4_sdma_doorbell_range(struct amdgpu_device *adev, int instan WREG32(reg, doorbell_range); } +static void nbio_v7_4_vcn_doorbell_range(struct amdgpu_device *adev, bool use_doorbell, + int doorbell_index, int instance) +{ + u32 reg; + u32 doorbell_range; + + if (instance) + reg = SOC15_REG_OFFSET(NBIO, 0, mmBIF_MMSCH1_DOORBELL_RANGE); + else + reg = SOC15_REG_OFFSET(NBIO, 0, mmBIF_MMSCH0_DOORBELL_RANGE); + + doorbell_range = RREG32(reg); + + if (use_doorbell) { + doorbell_range = REG_SET_FIELD(doorbell_range, + BIF_MMSCH0_DOORBELL_RANGE, OFFSET, + doorbell_index); + doorbell_range = REG_SET_FIELD(doorbell_range, + BIF_MMSCH0_DOORBELL_RANGE, SIZE, 8); + } else + doorbell_range = REG_SET_FIELD(doorbell_range, + BIF_MMSCH0_DOORBELL_RANGE, SIZE, 0); + + WREG32(reg, doorbell_range); +} + static void nbio_v7_4_enable_doorbell_aperture(struct amdgpu_device *adev, bool enable) { @@ -220,6 +279,12 @@ static const struct nbio_hdp_flush_reg nbio_v7_4_hdp_flush_reg = { .ref_and_mask_cp9 = GPU_HDP_FLUSH_DONE__CP9_MASK, .ref_and_mask_sdma0 = GPU_HDP_FLUSH_DONE__SDMA0_MASK, .ref_and_mask_sdma1 = GPU_HDP_FLUSH_DONE__SDMA1_MASK, + .ref_and_mask_sdma2 = GPU_HDP_FLUSH_DONE__RSVD_ENG0_MASK, + .ref_and_mask_sdma3 = GPU_HDP_FLUSH_DONE__RSVD_ENG1_MASK, + .ref_and_mask_sdma4 = GPU_HDP_FLUSH_DONE__RSVD_ENG2_MASK, + .ref_and_mask_sdma5 = GPU_HDP_FLUSH_DONE__RSVD_ENG3_MASK, + .ref_and_mask_sdma6 = GPU_HDP_FLUSH_DONE__RSVD_ENG4_MASK, + .ref_and_mask_sdma7 = GPU_HDP_FLUSH_DONE__RSVD_ENG5_MASK, }; static void nbio_v7_4_detect_hw_virt(struct amdgpu_device *adev) @@ -261,6 +326,7 @@ const struct amdgpu_nbio_funcs nbio_v7_4_funcs = { .hdp_flush = nbio_v7_4_hdp_flush, .get_memsize = nbio_v7_4_get_memsize, .sdma_doorbell_range = nbio_v7_4_sdma_doorbell_range, + .vcn_doorbell_range = nbio_v7_4_vcn_doorbell_range, .enable_doorbell_aperture = nbio_v7_4_enable_doorbell_aperture, .enable_doorbell_selfring_aperture = nbio_v7_4_enable_doorbell_selfring_aperture, .ih_doorbell_range = nbio_v7_4_ih_doorbell_range, diff --git a/drivers/gpu/drm/amd/amdgpu/nv.c b/drivers/gpu/drm/amd/amdgpu/nv.c index 662612f89c70..3e67536f0dc9 100644 --- a/drivers/gpu/drm/amd/amdgpu/nv.c +++ b/drivers/gpu/drm/amd/amdgpu/nv.c @@ -23,7 +23,8 @@ #include <linux/firmware.h> #include <linux/slab.h> #include <linux/module.h> -#include <drm/drmP.h> +#include <linux/pci.h> + #include "amdgpu.h" #include "amdgpu_atombios.h" #include "amdgpu_ih.h" @@ -289,6 +290,18 @@ static int nv_asic_mode1_reset(struct amdgpu_device *adev) return ret; } + +static enum amd_reset_method +nv_asic_reset_method(struct amdgpu_device *adev) +{ + struct smu_context *smu = &adev->smu; + + if (smu_baco_is_support(smu)) + return AMD_RESET_METHOD_BACO; + else + return AMD_RESET_METHOD_MODE1; +} + static int nv_asic_reset(struct amdgpu_device *adev) { @@ -303,7 +316,7 @@ static int nv_asic_reset(struct amdgpu_device *adev) int ret = 0; struct smu_context *smu = &adev->smu; - if (smu_baco_is_support(smu)) + if (nv_asic_reset_method(adev) == AMD_RESET_METHOD_BACO) ret = smu_baco_reset(smu); else ret = nv_asic_mode1_reset(adev); @@ -363,23 +376,55 @@ static const struct amdgpu_ip_block_version nv_common_ip_block = .funcs = &nv_common_ip_funcs, }; -int nv_set_ip_blocks(struct amdgpu_device *adev) +static int nv_reg_base_init(struct amdgpu_device *adev) { - /* Set IP register base before any HW register access */ + int r; + + if (amdgpu_discovery) { + r = amdgpu_discovery_reg_base_init(adev); + if (r) { + DRM_WARN("failed to init reg base from ip discovery table, " + "fallback to legacy init method\n"); + goto legacy_init; + } + + return 0; + } + +legacy_init: switch (adev->asic_type) { case CHIP_NAVI10: navi10_reg_base_init(adev); break; + case CHIP_NAVI14: + navi14_reg_base_init(adev); + break; + case CHIP_NAVI12: + navi12_reg_base_init(adev); + break; default: return -EINVAL; } + return 0; +} + +int nv_set_ip_blocks(struct amdgpu_device *adev) +{ + int r; + + /* Set IP register base before any HW register access */ + r = nv_reg_base_init(adev); + if (r) + return r; + adev->nbio_funcs = &nbio_v2_3_funcs; adev->nbio_funcs->detect_hw_virt(adev); switch (adev->asic_type) { case CHIP_NAVI10: + case CHIP_NAVI14: amdgpu_device_ip_block_add(adev, &nv_common_ip_block); amdgpu_device_ip_block_add(adev, &gmc_v10_0_ip_block); amdgpu_device_ip_block_add(adev, &navi10_ih_ip_block); @@ -402,6 +447,25 @@ int nv_set_ip_blocks(struct amdgpu_device *adev) if (adev->enable_mes) amdgpu_device_ip_block_add(adev, &mes_v10_1_ip_block); break; + case CHIP_NAVI12: + amdgpu_device_ip_block_add(adev, &nv_common_ip_block); + amdgpu_device_ip_block_add(adev, &gmc_v10_0_ip_block); + amdgpu_device_ip_block_add(adev, &navi10_ih_ip_block); + amdgpu_device_ip_block_add(adev, &psp_v11_0_ip_block); + if (adev->firmware.load_type == AMDGPU_FW_LOAD_PSP && + is_support_sw_smu(adev)) + amdgpu_device_ip_block_add(adev, &smu_v11_0_ip_block); + if (adev->enable_virtual_display || amdgpu_sriov_vf(adev)) + amdgpu_device_ip_block_add(adev, &dce_virtual_ip_block); + else if (amdgpu_device_has_dc_support(adev)) + amdgpu_device_ip_block_add(adev, &dm_ip_block); + amdgpu_device_ip_block_add(adev, &gfx_v10_0_ip_block); + amdgpu_device_ip_block_add(adev, &sdma_v5_0_ip_block); + if (adev->firmware.load_type == AMDGPU_FW_LOAD_DIRECT && + is_support_sw_smu(adev)) + amdgpu_device_ip_block_add(adev, &smu_v11_0_ip_block); + amdgpu_device_ip_block_add(adev, &vcn_v2_0_ip_block); + break; default: return -EINVAL; } @@ -496,6 +560,7 @@ static const struct amdgpu_asic_funcs nv_asic_funcs = .read_bios_from_rom = &nv_read_bios_from_rom, .read_register = &nv_read_register, .reset = &nv_asic_reset, + .reset_method = &nv_asic_reset_method, .set_vga_state = &nv_vga_set_state, .get_xclk = &nv_get_xclk, .set_uvd_clocks = &nv_set_uvd_clocks, @@ -556,6 +621,30 @@ static int nv_common_early_init(void *handle) AMD_PG_SUPPORT_ATHUB; adev->external_rev_id = adev->rev_id + 0x1; break; + case CHIP_NAVI14: + adev->cg_flags = AMD_CG_SUPPORT_GFX_MGCG | + AMD_CG_SUPPORT_GFX_CGCG | + AMD_CG_SUPPORT_IH_CG | + AMD_CG_SUPPORT_HDP_MGCG | + AMD_CG_SUPPORT_HDP_LS | + AMD_CG_SUPPORT_SDMA_MGCG | + AMD_CG_SUPPORT_SDMA_LS | + AMD_CG_SUPPORT_MC_MGCG | + AMD_CG_SUPPORT_MC_LS | + AMD_CG_SUPPORT_ATHUB_MGCG | + AMD_CG_SUPPORT_ATHUB_LS | + AMD_CG_SUPPORT_VCN_MGCG | + AMD_CG_SUPPORT_BIF_MGCG | + AMD_CG_SUPPORT_BIF_LS; + adev->pg_flags = AMD_PG_SUPPORT_VCN | + AMD_PG_SUPPORT_VCN_DPG; + adev->external_rev_id = adev->rev_id + 20; + break; + case CHIP_NAVI12: + adev->cg_flags = 0; + adev->pg_flags = AMD_PG_SUPPORT_VCN_DPG; + adev->external_rev_id = adev->rev_id + 0xa; + break; default: /* FIXME: not supported yet */ return -EINVAL; @@ -748,6 +837,8 @@ static int nv_common_set_clockgating_state(void *handle, switch (adev->asic_type) { case CHIP_NAVI10: + case CHIP_NAVI14: + case CHIP_NAVI12: adev->nbio_funcs->update_medium_grain_clock_gating(adev, state == AMD_CG_STATE_GATE ? true : false); adev->nbio_funcs->update_medium_grain_light_sleep(adev, diff --git a/drivers/gpu/drm/amd/amdgpu/nv.h b/drivers/gpu/drm/amd/amdgpu/nv.h index 639c54933cc5..82e6cb432f3d 100644 --- a/drivers/gpu/drm/amd/amdgpu/nv.h +++ b/drivers/gpu/drm/amd/amdgpu/nv.h @@ -30,4 +30,6 @@ void nv_grbm_select(struct amdgpu_device *adev, u32 me, u32 pipe, u32 queue, u32 vmid); int nv_set_ip_blocks(struct amdgpu_device *adev); int navi10_reg_base_init(struct amdgpu_device *adev); +int navi14_reg_base_init(struct amdgpu_device *adev); +int navi12_reg_base_init(struct amdgpu_device *adev); #endif diff --git a/drivers/gpu/drm/amd/amdgpu/psp_gfx_if.h b/drivers/gpu/drm/amd/amdgpu/psp_gfx_if.h index 5080a73a95a5..74a9fe8e0cfb 100644 --- a/drivers/gpu/drm/amd/amdgpu/psp_gfx_if.h +++ b/drivers/gpu/drm/amd/amdgpu/psp_gfx_if.h @@ -233,8 +233,15 @@ enum psp_gfx_fw_type { GFX_FW_TYPE_RLCP_CAM = 46, /* RLCP CAM NV */ GFX_FW_TYPE_RLC_SPP_CAM_EXT = 47, /* RLC SPP CAM EXT NV */ GFX_FW_TYPE_RLX6_DRAM_BOOT = 48, /* RLX6 DRAM BOOT NV */ - GFX_FW_TYPE_VCN0_RAM = 49, /* VCN_RAM NV */ - GFX_FW_TYPE_VCN1_RAM = 50, /* VCN_RAM NV */ + GFX_FW_TYPE_VCN0_RAM = 49, /* VCN_RAM NV + RN */ + GFX_FW_TYPE_VCN1_RAM = 50, /* VCN_RAM NV + RN */ + GFX_FW_TYPE_DMUB = 51, /* DMUB RN */ + GFX_FW_TYPE_SDMA2 = 52, /* SDMA2 MI */ + GFX_FW_TYPE_SDMA3 = 53, /* SDMA3 MI */ + GFX_FW_TYPE_SDMA4 = 54, /* SDMA4 MI */ + GFX_FW_TYPE_SDMA5 = 55, /* SDMA5 MI */ + GFX_FW_TYPE_SDMA6 = 56, /* SDMA6 MI */ + GFX_FW_TYPE_SDMA7 = 57, /* SDMA7 MI */ GFX_FW_TYPE_MAX }; diff --git a/drivers/gpu/drm/amd/amdgpu/psp_v11_0.c b/drivers/gpu/drm/amd/amdgpu/psp_v11_0.c index 41b72588adcf..f0a0ecb07818 100644 --- a/drivers/gpu/drm/amd/amdgpu/psp_v11_0.c +++ b/drivers/gpu/drm/amd/amdgpu/psp_v11_0.c @@ -43,6 +43,12 @@ MODULE_FIRMWARE("amdgpu/vega20_asd.bin"); MODULE_FIRMWARE("amdgpu/vega20_ta.bin"); MODULE_FIRMWARE("amdgpu/navi10_sos.bin"); MODULE_FIRMWARE("amdgpu/navi10_asd.bin"); +MODULE_FIRMWARE("amdgpu/navi14_sos.bin"); +MODULE_FIRMWARE("amdgpu/navi14_asd.bin"); +MODULE_FIRMWARE("amdgpu/navi12_sos.bin"); +MODULE_FIRMWARE("amdgpu/navi12_asd.bin"); +MODULE_FIRMWARE("amdgpu/arcturus_sos.bin"); +MODULE_FIRMWARE("amdgpu/arcturus_asd.bin"); /* address block */ #define smnMP1_FIRMWARE_FLAGS 0x3010024 @@ -60,6 +66,7 @@ static int psp_v11_0_init_microcode(struct psp_context *psp) int err = 0; const struct psp_firmware_header_v1_0 *sos_hdr; const struct psp_firmware_header_v1_1 *sos_hdr_v1_1; + const struct psp_firmware_header_v1_2 *sos_hdr_v1_2; const struct psp_firmware_header_v1_0 *asd_hdr; const struct ta_firmware_header_v1_0 *ta_hdr; @@ -72,6 +79,15 @@ static int psp_v11_0_init_microcode(struct psp_context *psp) case CHIP_NAVI10: chip_name = "navi10"; break; + case CHIP_NAVI14: + chip_name = "navi14"; + break; + case CHIP_NAVI12: + chip_name = "navi12"; + break; + case CHIP_ARCTURUS: + chip_name = "arcturus"; + break; default: BUG(); } @@ -107,6 +123,12 @@ static int psp_v11_0_init_microcode(struct psp_context *psp) adev->psp.kdb_start_addr = (uint8_t *)adev->psp.sys_start_addr + le32_to_cpu(sos_hdr_v1_1->kdb_offset_bytes); } + if (sos_hdr->header.header_version_minor == 2) { + sos_hdr_v1_2 = (const struct psp_firmware_header_v1_2 *)adev->psp.sos_fw->data; + adev->psp.kdb_bin_size = le32_to_cpu(sos_hdr_v1_2->kdb_size_bytes); + adev->psp.kdb_start_addr = (uint8_t *)adev->psp.sys_start_addr + + le32_to_cpu(sos_hdr_v1_2->kdb_offset_bytes); + } break; default: dev_err(adev->dev, @@ -158,6 +180,9 @@ static int psp_v11_0_init_microcode(struct psp_context *psp) } break; case CHIP_NAVI10: + case CHIP_NAVI14: + case CHIP_NAVI12: + case CHIP_ARCTURUS: break; default: BUG(); diff --git a/drivers/gpu/drm/amd/amdgpu/psp_v3_1.c b/drivers/gpu/drm/amd/amdgpu/psp_v3_1.c index 019c47feee42..c2ebc0020e5d 100644 --- a/drivers/gpu/drm/amd/amdgpu/psp_v3_1.c +++ b/drivers/gpu/drm/amd/amdgpu/psp_v3_1.c @@ -636,7 +636,7 @@ static int psp_v3_1_mode1_reset(struct psp_context *psp) static bool psp_v3_1_support_vmr_ring(struct psp_context *psp) { - if (amdgpu_sriov_vf(psp->adev) && psp->sos_fw_version >= 0x80455) + if (amdgpu_sriov_vf(psp->adev)) return true; return false; diff --git a/drivers/gpu/drm/amd/amdgpu/sdma_v4_0.c b/drivers/gpu/drm/amd/amdgpu/sdma_v4_0.c index 4428018672d3..c04259182614 100644 --- a/drivers/gpu/drm/amd/amdgpu/sdma_v4_0.c +++ b/drivers/gpu/drm/amd/amdgpu/sdma_v4_0.c @@ -34,6 +34,18 @@ #include "sdma0/sdma0_4_2_sh_mask.h" #include "sdma1/sdma1_4_2_offset.h" #include "sdma1/sdma1_4_2_sh_mask.h" +#include "sdma2/sdma2_4_2_2_offset.h" +#include "sdma2/sdma2_4_2_2_sh_mask.h" +#include "sdma3/sdma3_4_2_2_offset.h" +#include "sdma3/sdma3_4_2_2_sh_mask.h" +#include "sdma4/sdma4_4_2_2_offset.h" +#include "sdma4/sdma4_4_2_2_sh_mask.h" +#include "sdma5/sdma5_4_2_2_offset.h" +#include "sdma5/sdma5_4_2_2_sh_mask.h" +#include "sdma6/sdma6_4_2_2_offset.h" +#include "sdma6/sdma6_4_2_2_sh_mask.h" +#include "sdma7/sdma7_4_2_2_offset.h" +#include "sdma7/sdma7_4_2_2_sh_mask.h" #include "hdp/hdp_4_0_offset.h" #include "sdma0/sdma0_4_1_default.h" @@ -55,6 +67,7 @@ MODULE_FIRMWARE("amdgpu/vega20_sdma1.bin"); MODULE_FIRMWARE("amdgpu/raven_sdma.bin"); MODULE_FIRMWARE("amdgpu/picasso_sdma.bin"); MODULE_FIRMWARE("amdgpu/raven2_sdma.bin"); +MODULE_FIRMWARE("amdgpu/arcturus_sdma.bin"); #define SDMA0_POWER_CNTL__ON_OFF_CONDITION_HOLD_TIME_MASK 0x000000F8L #define SDMA0_POWER_CNTL__ON_OFF_STATUS_DURATION_TIME_MASK 0xFC000000L @@ -202,25 +215,120 @@ static const struct soc15_reg_golden golden_settings_sdma_rv2[] = SOC15_REG_GOLDEN_VALUE(SDMA0, 0, mmSDMA0_GB_ADDR_CONFIG_READ, 0x0018773f, 0x00003001) }; +static const struct soc15_reg_golden golden_settings_sdma_arct[] = +{ + SOC15_REG_GOLDEN_VALUE(SDMA0, 0, mmSDMA0_CHICKEN_BITS, 0xfe931f07, 0x02831f07), + SOC15_REG_GOLDEN_VALUE(SDMA0, 0, mmSDMA0_GB_ADDR_CONFIG, 0x0000773f, 0x00004002), + SOC15_REG_GOLDEN_VALUE(SDMA0, 0, mmSDMA0_GB_ADDR_CONFIG_READ, 0x0000773f, 0x00004002), + SOC15_REG_GOLDEN_VALUE(SDMA1, 0, mmSDMA1_CHICKEN_BITS, 0xfe931f07, 0x02831f07), + SOC15_REG_GOLDEN_VALUE(SDMA1, 0, mmSDMA1_GB_ADDR_CONFIG, 0x0000773f, 0x00004002), + SOC15_REG_GOLDEN_VALUE(SDMA1, 0, mmSDMA1_GB_ADDR_CONFIG_READ, 0x0000773f, 0x00004002), + SOC15_REG_GOLDEN_VALUE(SDMA2, 0, mmSDMA2_CHICKEN_BITS, 0xfe931f07, 0x02831f07), + SOC15_REG_GOLDEN_VALUE(SDMA2, 0, mmSDMA2_GB_ADDR_CONFIG, 0x0000773f, 0x00004002), + SOC15_REG_GOLDEN_VALUE(SDMA2, 0, mmSDMA2_GB_ADDR_CONFIG_READ, 0x0000773f, 0x00004002), + SOC15_REG_GOLDEN_VALUE(SDMA3, 0, mmSDMA3_CHICKEN_BITS, 0xfe931f07, 0x02831f07), + SOC15_REG_GOLDEN_VALUE(SDMA3, 0, mmSDMA3_GB_ADDR_CONFIG, 0x0000773f, 0x00004002), + SOC15_REG_GOLDEN_VALUE(SDMA3, 0, mmSDMA3_GB_ADDR_CONFIG_READ, 0x0000773f, 0x00004002), + SOC15_REG_GOLDEN_VALUE(SDMA4, 0, mmSDMA4_CHICKEN_BITS, 0xfe931f07, 0x02831f07), + SOC15_REG_GOLDEN_VALUE(SDMA4, 0, mmSDMA4_GB_ADDR_CONFIG, 0x0000773f, 0x00004002), + SOC15_REG_GOLDEN_VALUE(SDMA4, 0, mmSDMA4_GB_ADDR_CONFIG_READ, 0x0000773f, 0x00004002), + SOC15_REG_GOLDEN_VALUE(SDMA5, 0, mmSDMA5_CHICKEN_BITS, 0xfe931f07, 0x02831f07), + SOC15_REG_GOLDEN_VALUE(SDMA5, 0, mmSDMA5_GB_ADDR_CONFIG, 0x0000773f, 0x00004002), + SOC15_REG_GOLDEN_VALUE(SDMA5, 0, mmSDMA5_GB_ADDR_CONFIG_READ, 0x0000773f, 0x00004002), + SOC15_REG_GOLDEN_VALUE(SDMA6, 0, mmSDMA6_CHICKEN_BITS, 0xfe931f07, 0x02831f07), + SOC15_REG_GOLDEN_VALUE(SDMA6, 0, mmSDMA6_GB_ADDR_CONFIG, 0x0000773f, 0x00004002), + SOC15_REG_GOLDEN_VALUE(SDMA6, 0, mmSDMA6_GB_ADDR_CONFIG_READ, 0x0000773f, 0x00004002), + SOC15_REG_GOLDEN_VALUE(SDMA7, 0, mmSDMA7_CHICKEN_BITS, 0xfe931f07, 0x02831f07), + SOC15_REG_GOLDEN_VALUE(SDMA7, 0, mmSDMA7_GB_ADDR_CONFIG, 0x0000773f, 0x00004002), + SOC15_REG_GOLDEN_VALUE(SDMA7, 0, mmSDMA7_GB_ADDR_CONFIG_READ, 0x0000773f, 0x00004002) +}; + static u32 sdma_v4_0_get_reg_offset(struct amdgpu_device *adev, u32 instance, u32 offset) { - return ( 0 == instance ? (adev->reg_offset[SDMA0_HWIP][0][0] + offset) : - (adev->reg_offset[SDMA1_HWIP][0][0] + offset)); + switch (instance) { + case 0: + return (adev->reg_offset[SDMA0_HWIP][0][0] + offset); + case 1: + return (adev->reg_offset[SDMA1_HWIP][0][0] + offset); + case 2: + return (adev->reg_offset[SDMA2_HWIP][0][1] + offset); + case 3: + return (adev->reg_offset[SDMA3_HWIP][0][1] + offset); + case 4: + return (adev->reg_offset[SDMA4_HWIP][0][1] + offset); + case 5: + return (adev->reg_offset[SDMA5_HWIP][0][1] + offset); + case 6: + return (adev->reg_offset[SDMA6_HWIP][0][1] + offset); + case 7: + return (adev->reg_offset[SDMA7_HWIP][0][1] + offset); + default: + break; + } + return 0; +} + +static unsigned sdma_v4_0_seq_to_irq_id(int seq_num) +{ + switch (seq_num) { + case 0: + return SOC15_IH_CLIENTID_SDMA0; + case 1: + return SOC15_IH_CLIENTID_SDMA1; + case 2: + return SOC15_IH_CLIENTID_SDMA2; + case 3: + return SOC15_IH_CLIENTID_SDMA3; + case 4: + return SOC15_IH_CLIENTID_SDMA4; + case 5: + return SOC15_IH_CLIENTID_SDMA5; + case 6: + return SOC15_IH_CLIENTID_SDMA6; + case 7: + return SOC15_IH_CLIENTID_SDMA7; + default: + break; + } + return -EINVAL; +} + +static int sdma_v4_0_irq_id_to_seq(unsigned client_id) +{ + switch (client_id) { + case SOC15_IH_CLIENTID_SDMA0: + return 0; + case SOC15_IH_CLIENTID_SDMA1: + return 1; + case SOC15_IH_CLIENTID_SDMA2: + return 2; + case SOC15_IH_CLIENTID_SDMA3: + return 3; + case SOC15_IH_CLIENTID_SDMA4: + return 4; + case SOC15_IH_CLIENTID_SDMA5: + return 5; + case SOC15_IH_CLIENTID_SDMA6: + return 6; + case SOC15_IH_CLIENTID_SDMA7: + return 7; + default: + break; + } + return -EINVAL; } static void sdma_v4_0_init_golden_registers(struct amdgpu_device *adev) { switch (adev->asic_type) { case CHIP_VEGA10: - if (!amdgpu_virt_support_skip_setting(adev)) { - soc15_program_register_sequence(adev, - golden_settings_sdma_4, - ARRAY_SIZE(golden_settings_sdma_4)); - soc15_program_register_sequence(adev, - golden_settings_sdma_vg10, - ARRAY_SIZE(golden_settings_sdma_vg10)); - } + soc15_program_register_sequence(adev, + golden_settings_sdma_4, + ARRAY_SIZE(golden_settings_sdma_4)); + soc15_program_register_sequence(adev, + golden_settings_sdma_vg10, + ARRAY_SIZE(golden_settings_sdma_vg10)); break; case CHIP_VEGA12: soc15_program_register_sequence(adev, @@ -241,6 +349,11 @@ static void sdma_v4_0_init_golden_registers(struct amdgpu_device *adev) golden_settings_sdma1_4_2, ARRAY_SIZE(golden_settings_sdma1_4_2)); break; + case CHIP_ARCTURUS: + soc15_program_register_sequence(adev, + golden_settings_sdma_arct, + ARRAY_SIZE(golden_settings_sdma_arct)); + break; case CHIP_RAVEN: soc15_program_register_sequence(adev, golden_settings_sdma_4_1, @@ -259,6 +372,43 @@ static void sdma_v4_0_init_golden_registers(struct amdgpu_device *adev) } } +static int sdma_v4_0_init_inst_ctx(struct amdgpu_sdma_instance *sdma_inst) +{ + int err = 0; + const struct sdma_firmware_header_v1_0 *hdr; + + err = amdgpu_ucode_validate(sdma_inst->fw); + if (err) + return err; + + hdr = (const struct sdma_firmware_header_v1_0 *)sdma_inst->fw->data; + sdma_inst->fw_version = le32_to_cpu(hdr->header.ucode_version); + sdma_inst->feature_version = le32_to_cpu(hdr->ucode_feature_version); + + if (sdma_inst->feature_version >= 20) + sdma_inst->burst_nop = true; + + return 0; +} + +static void sdma_v4_0_destroy_inst_ctx(struct amdgpu_device *adev) +{ + int i; + + for (i = 0; i < adev->sdma.num_instances; i++) { + if (adev->sdma.instance[i].fw != NULL) + release_firmware(adev->sdma.instance[i].fw); + + /* arcturus shares the same FW memory across + all SDMA isntances */ + if (adev->asic_type == CHIP_ARCTURUS) + break; + } + + memset((void*)adev->sdma.instance, 0, + sizeof(struct amdgpu_sdma_instance) * AMDGPU_MAX_SDMA_INSTANCES); +} + /** * sdma_v4_0_init_microcode - load ucode images from disk * @@ -278,7 +428,6 @@ static int sdma_v4_0_init_microcode(struct amdgpu_device *adev) int err = 0, i; struct amdgpu_firmware_info *info = NULL; const struct common_firmware_header *header = NULL; - const struct sdma_firmware_header_v1_0 *hdr; DRM_DEBUG("\n"); @@ -300,30 +449,49 @@ static int sdma_v4_0_init_microcode(struct amdgpu_device *adev) else chip_name = "raven"; break; + case CHIP_ARCTURUS: + chip_name = "arcturus"; + break; default: BUG(); } - for (i = 0; i < adev->sdma.num_instances; i++) { - if (i == 0) - snprintf(fw_name, sizeof(fw_name), "amdgpu/%s_sdma.bin", chip_name); - else - snprintf(fw_name, sizeof(fw_name), "amdgpu/%s_sdma1.bin", chip_name); - err = request_firmware(&adev->sdma.instance[i].fw, fw_name, adev->dev); - if (err) - goto out; - err = amdgpu_ucode_validate(adev->sdma.instance[i].fw); - if (err) - goto out; - hdr = (const struct sdma_firmware_header_v1_0 *)adev->sdma.instance[i].fw->data; - adev->sdma.instance[i].fw_version = le32_to_cpu(hdr->header.ucode_version); - adev->sdma.instance[i].feature_version = le32_to_cpu(hdr->ucode_feature_version); - if (adev->sdma.instance[i].feature_version >= 20) - adev->sdma.instance[i].burst_nop = true; - DRM_DEBUG("psp_load == '%s'\n", - adev->firmware.load_type == AMDGPU_FW_LOAD_PSP ? "true" : "false"); - - if (adev->firmware.load_type == AMDGPU_FW_LOAD_PSP) { + snprintf(fw_name, sizeof(fw_name), "amdgpu/%s_sdma.bin", chip_name); + + err = request_firmware(&adev->sdma.instance[0].fw, fw_name, adev->dev); + if (err) + goto out; + + err = sdma_v4_0_init_inst_ctx(&adev->sdma.instance[0]); + if (err) + goto out; + + for (i = 1; i < adev->sdma.num_instances; i++) { + if (adev->asic_type == CHIP_ARCTURUS) { + /* Acturus will leverage the same FW memory + for every SDMA instance */ + memcpy((void*)&adev->sdma.instance[i], + (void*)&adev->sdma.instance[0], + sizeof(struct amdgpu_sdma_instance)); + } + else { + snprintf(fw_name, sizeof(fw_name), "amdgpu/%s_sdma%d.bin", chip_name, i); + + err = request_firmware(&adev->sdma.instance[i].fw, fw_name, adev->dev); + if (err) + goto out; + + err = sdma_v4_0_init_inst_ctx(&adev->sdma.instance[i]); + if (err) + goto out; + } + } + + DRM_DEBUG("psp_load == '%s'\n", + adev->firmware.load_type == AMDGPU_FW_LOAD_PSP ? "true" : "false"); + + if (adev->firmware.load_type == AMDGPU_FW_LOAD_PSP) { + for (i = 0; i < adev->sdma.num_instances; i++) { info = &adev->firmware.ucode[AMDGPU_UCODE_ID_SDMA0 + i]; info->ucode_id = AMDGPU_UCODE_ID_SDMA0 + i; info->fw = adev->sdma.instance[i].fw; @@ -332,13 +500,11 @@ static int sdma_v4_0_init_microcode(struct amdgpu_device *adev) ALIGN(le32_to_cpu(header->ucode_size_bytes), PAGE_SIZE); } } + out: if (err) { DRM_ERROR("sdma_v4_0: Failed to load firmware \"%s\"\n", fw_name); - for (i = 0; i < adev->sdma.num_instances; i++) { - release_firmware(adev->sdma.instance[i].fw); - adev->sdma.instance[i].fw = NULL; - } + sdma_v4_0_destroy_inst_ctx(adev); } return err; } @@ -561,10 +727,7 @@ static void sdma_v4_0_ring_emit_hdp_flush(struct amdgpu_ring *ring) u32 ref_and_mask = 0; const struct nbio_hdp_flush_reg *nbio_hf_reg = adev->nbio_funcs->hdp_flush_reg; - if (ring->me == 0) - ref_and_mask = nbio_hf_reg->ref_and_mask_sdma0; - else - ref_and_mask = nbio_hf_reg->ref_and_mask_sdma1; + ref_and_mask = nbio_hf_reg->ref_and_mask_sdma0 << ring->me; sdma_v4_0_wait_reg_mem(ring, 0, 1, adev->nbio_funcs->get_hdp_flush_done_offset(adev), @@ -620,26 +783,27 @@ static void sdma_v4_0_ring_emit_fence(struct amdgpu_ring *ring, u64 addr, u64 se */ static void sdma_v4_0_gfx_stop(struct amdgpu_device *adev) { - struct amdgpu_ring *sdma0 = &adev->sdma.instance[0].ring; - struct amdgpu_ring *sdma1 = &adev->sdma.instance[1].ring; + struct amdgpu_ring *sdma[AMDGPU_MAX_SDMA_INSTANCES]; u32 rb_cntl, ib_cntl; - int i; + int i, unset = 0; + + for (i = 0; i < adev->sdma.num_instances; i++) { + sdma[i] = &adev->sdma.instance[i].ring; - if ((adev->mman.buffer_funcs_ring == sdma0) || - (adev->mman.buffer_funcs_ring == sdma1)) + if ((adev->mman.buffer_funcs_ring == sdma[i]) && unset != 1) { amdgpu_ttm_set_buffer_funcs_status(adev, false); + unset = 1; + } - for (i = 0; i < adev->sdma.num_instances; i++) { rb_cntl = RREG32_SDMA(i, mmSDMA0_GFX_RB_CNTL); rb_cntl = REG_SET_FIELD(rb_cntl, SDMA0_GFX_RB_CNTL, RB_ENABLE, 0); WREG32_SDMA(i, mmSDMA0_GFX_RB_CNTL, rb_cntl); ib_cntl = RREG32_SDMA(i, mmSDMA0_GFX_IB_CNTL); ib_cntl = REG_SET_FIELD(ib_cntl, SDMA0_GFX_IB_CNTL, IB_ENABLE, 0); WREG32_SDMA(i, mmSDMA0_GFX_IB_CNTL, ib_cntl); - } - sdma0->sched.ready = false; - sdma1->sched.ready = false; + sdma[i]->sched.ready = false; + } } /** @@ -663,16 +827,20 @@ static void sdma_v4_0_rlc_stop(struct amdgpu_device *adev) */ static void sdma_v4_0_page_stop(struct amdgpu_device *adev) { - struct amdgpu_ring *sdma0 = &adev->sdma.instance[0].page; - struct amdgpu_ring *sdma1 = &adev->sdma.instance[1].page; + struct amdgpu_ring *sdma[AMDGPU_MAX_SDMA_INSTANCES]; u32 rb_cntl, ib_cntl; int i; - - if ((adev->mman.buffer_funcs_ring == sdma0) || - (adev->mman.buffer_funcs_ring == sdma1)) - amdgpu_ttm_set_buffer_funcs_status(adev, false); + bool unset = false; for (i = 0; i < adev->sdma.num_instances; i++) { + sdma[i] = &adev->sdma.instance[i].page; + + if ((adev->mman.buffer_funcs_ring == sdma[i]) && + (unset == false)) { + amdgpu_ttm_set_buffer_funcs_status(adev, false); + unset = true; + } + rb_cntl = RREG32_SDMA(i, mmSDMA0_PAGE_RB_CNTL); rb_cntl = REG_SET_FIELD(rb_cntl, SDMA0_PAGE_RB_CNTL, RB_ENABLE, 0); @@ -681,10 +849,9 @@ static void sdma_v4_0_page_stop(struct amdgpu_device *adev) ib_cntl = REG_SET_FIELD(ib_cntl, SDMA0_PAGE_IB_CNTL, IB_ENABLE, 0); WREG32_SDMA(i, mmSDMA0_PAGE_IB_CNTL, ib_cntl); - } - sdma0->sched.ready = false; - sdma1->sched.ready = false; + sdma[i]->sched.ready = false; + } } /** @@ -1475,6 +1642,8 @@ static int sdma_v4_0_early_init(void *handle) if (adev->asic_type == CHIP_RAVEN) adev->sdma.num_instances = 1; + else if (adev->asic_type == CHIP_ARCTURUS) + adev->sdma.num_instances = 8; else adev->sdma.num_instances = 2; @@ -1499,6 +1668,7 @@ static int sdma_v4_0_early_init(void *handle) } static int sdma_v4_0_process_ras_data_cb(struct amdgpu_device *adev, + struct ras_err_data *err_data, struct amdgpu_iv_entry *entry); static int sdma_v4_0_late_init(void *handle) @@ -1518,7 +1688,7 @@ static int sdma_v4_0_late_init(void *handle) .sub_block_index = 0, .name = "sdma", }; - int r; + int r, i; if (!amdgpu_ras_is_supported(adev, AMDGPU_RAS_BLOCK__SDMA)) { amdgpu_ras_feature_enable_on_boot(adev, &ras_block, 0); @@ -1575,14 +1745,11 @@ static int sdma_v4_0_late_init(void *handle) if (r) goto sysfs; resume: - r = amdgpu_irq_get(adev, &adev->sdma.ecc_irq, AMDGPU_SDMA_IRQ_INSTANCE0); - if (r) - goto irq; - - r = amdgpu_irq_get(adev, &adev->sdma.ecc_irq, AMDGPU_SDMA_IRQ_INSTANCE1); - if (r) { - amdgpu_irq_put(adev, &adev->sdma.ecc_irq, AMDGPU_SDMA_IRQ_INSTANCE0); - goto irq; + for (i = 0; i < adev->sdma.num_instances; i++) { + r = amdgpu_irq_get(adev, &adev->sdma.ecc_irq, + AMDGPU_SDMA_IRQ_INSTANCE0 + i); + if (r) + goto irq; } return 0; @@ -1606,28 +1773,22 @@ static int sdma_v4_0_sw_init(void *handle) struct amdgpu_device *adev = (struct amdgpu_device *)handle; /* SDMA trap event */ - r = amdgpu_irq_add_id(adev, SOC15_IH_CLIENTID_SDMA0, SDMA0_4_0__SRCID__SDMA_TRAP, - &adev->sdma.trap_irq); - if (r) - return r; - - /* SDMA trap event */ - r = amdgpu_irq_add_id(adev, SOC15_IH_CLIENTID_SDMA1, SDMA1_4_0__SRCID__SDMA_TRAP, - &adev->sdma.trap_irq); - if (r) - return r; - - /* SDMA SRAM ECC event */ - r = amdgpu_irq_add_id(adev, SOC15_IH_CLIENTID_SDMA0, SDMA0_4_0__SRCID__SDMA_SRAM_ECC, - &adev->sdma.ecc_irq); - if (r) - return r; + for (i = 0; i < adev->sdma.num_instances; i++) { + r = amdgpu_irq_add_id(adev, sdma_v4_0_seq_to_irq_id(i), + SDMA0_4_0__SRCID__SDMA_TRAP, + &adev->sdma.trap_irq); + if (r) + return r; + } /* SDMA SRAM ECC event */ - r = amdgpu_irq_add_id(adev, SOC15_IH_CLIENTID_SDMA1, SDMA1_4_0__SRCID__SDMA_SRAM_ECC, - &adev->sdma.ecc_irq); - if (r) - return r; + for (i = 0; i < adev->sdma.num_instances; i++) { + r = amdgpu_irq_add_id(adev, sdma_v4_0_seq_to_irq_id(i), + SDMA0_4_0__SRCID__SDMA_SRAM_ECC, + &adev->sdma.ecc_irq); + if (r) + return r; + } for (i = 0; i < adev->sdma.num_instances; i++) { ring = &adev->sdma.instance[i].ring; @@ -1641,11 +1802,8 @@ static int sdma_v4_0_sw_init(void *handle) ring->doorbell_index = adev->doorbell_index.sdma_engine[i] << 1; sprintf(ring->name, "sdma%d", i); - r = amdgpu_ring_init(adev, ring, 1024, - &adev->sdma.trap_irq, - (i == 0) ? - AMDGPU_SDMA_IRQ_INSTANCE0 : - AMDGPU_SDMA_IRQ_INSTANCE1); + r = amdgpu_ring_init(adev, ring, 1024, &adev->sdma.trap_irq, + AMDGPU_SDMA_IRQ_INSTANCE0 + i); if (r) return r; @@ -1663,9 +1821,7 @@ static int sdma_v4_0_sw_init(void *handle) sprintf(ring->name, "page%d", i); r = amdgpu_ring_init(adev, ring, 1024, &adev->sdma.trap_irq, - (i == 0) ? - AMDGPU_SDMA_IRQ_INSTANCE0 : - AMDGPU_SDMA_IRQ_INSTANCE1); + AMDGPU_SDMA_IRQ_INSTANCE0 + i); if (r) return r; } @@ -1701,10 +1857,7 @@ static int sdma_v4_0_sw_fini(void *handle) amdgpu_ring_fini(&adev->sdma.instance[i].page); } - for (i = 0; i < adev->sdma.num_instances; i++) { - release_firmware(adev->sdma.instance[i].fw); - adev->sdma.instance[i].fw = NULL; - } + sdma_v4_0_destroy_inst_ctx(adev); return 0; } @@ -1718,7 +1871,8 @@ static int sdma_v4_0_hw_init(void *handle) adev->powerplay.pp_funcs->set_powergating_by_smu) amdgpu_dpm_set_powergating_by_smu(adev, AMD_IP_BLOCK_TYPE_SDMA, false); - sdma_v4_0_init_golden_registers(adev); + if (!amdgpu_sriov_vf(adev)) + sdma_v4_0_init_golden_registers(adev); r = sdma_v4_0_start(adev); @@ -1728,12 +1882,15 @@ static int sdma_v4_0_hw_init(void *handle) static int sdma_v4_0_hw_fini(void *handle) { struct amdgpu_device *adev = (struct amdgpu_device *)handle; + int i; if (amdgpu_sriov_vf(adev)) return 0; - amdgpu_irq_put(adev, &adev->sdma.ecc_irq, AMDGPU_SDMA_IRQ_INSTANCE0); - amdgpu_irq_put(adev, &adev->sdma.ecc_irq, AMDGPU_SDMA_IRQ_INSTANCE1); + for (i = 0; i < adev->sdma.num_instances; i++) { + amdgpu_irq_put(adev, &adev->sdma.ecc_irq, + AMDGPU_SDMA_IRQ_INSTANCE0 + i); + } sdma_v4_0_ctx_switch_enable(adev, false); sdma_v4_0_enable(adev, false); @@ -1776,15 +1933,17 @@ static bool sdma_v4_0_is_idle(void *handle) static int sdma_v4_0_wait_for_idle(void *handle) { - unsigned i; - u32 sdma0, sdma1; + unsigned i, j; + u32 sdma[AMDGPU_MAX_SDMA_INSTANCES]; struct amdgpu_device *adev = (struct amdgpu_device *)handle; for (i = 0; i < adev->usec_timeout; i++) { - sdma0 = RREG32_SDMA(0, mmSDMA0_STATUS_REG); - sdma1 = RREG32_SDMA(1, mmSDMA0_STATUS_REG); - - if (sdma0 & sdma1 & SDMA0_STATUS_REG__IDLE_MASK) + for (j = 0; j < adev->sdma.num_instances; j++) { + sdma[j] = RREG32_SDMA(j, mmSDMA0_STATUS_REG); + if (!(sdma[j] & SDMA0_STATUS_REG__IDLE_MASK)) + break; + } + if (j == adev->sdma.num_instances) return 0; udelay(1); } @@ -1820,17 +1979,7 @@ static int sdma_v4_0_process_trap_irq(struct amdgpu_device *adev, uint32_t instance; DRM_DEBUG("IH: SDMA trap\n"); - switch (entry->client_id) { - case SOC15_IH_CLIENTID_SDMA0: - instance = 0; - break; - case SOC15_IH_CLIENTID_SDMA1: - instance = 1; - break; - default: - return 0; - } - + instance = sdma_v4_0_irq_id_to_seq(entry->client_id); switch (entry->ring_id) { case 0: amdgpu_fence_process(&adev->sdma.instance[instance].ring); @@ -1851,20 +2000,15 @@ static int sdma_v4_0_process_trap_irq(struct amdgpu_device *adev, } static int sdma_v4_0_process_ras_data_cb(struct amdgpu_device *adev, + struct ras_err_data *err_data, struct amdgpu_iv_entry *entry) { - uint32_t instance, err_source; + uint32_t err_source; + int instance; - switch (entry->client_id) { - case SOC15_IH_CLIENTID_SDMA0: - instance = 0; - break; - case SOC15_IH_CLIENTID_SDMA1: - instance = 1; - break; - default: + instance = sdma_v4_0_irq_id_to_seq(entry->client_id); + if (instance < 0) return 0; - } switch (entry->src_id) { case SDMA0_4_0__SRCID__SDMA_SRAM_ECC: @@ -1881,7 +2025,7 @@ static int sdma_v4_0_process_ras_data_cb(struct amdgpu_device *adev, amdgpu_ras_reset_gpu(adev, 0); - return AMDGPU_RAS_UE; + return AMDGPU_RAS_SUCCESS; } static int sdma_v4_0_process_ecc_irq(struct amdgpu_device *adev, @@ -1910,16 +2054,9 @@ static int sdma_v4_0_process_illegal_inst_irq(struct amdgpu_device *adev, DRM_ERROR("Illegal instruction in SDMA command stream\n"); - switch (entry->client_id) { - case SOC15_IH_CLIENTID_SDMA0: - instance = 0; - break; - case SOC15_IH_CLIENTID_SDMA1: - instance = 1; - break; - default: + instance = sdma_v4_0_irq_id_to_seq(entry->client_id); + if (instance < 0) return 0; - } switch (entry->ring_id) { case 0: @@ -1936,14 +2073,10 @@ static int sdma_v4_0_set_ecc_irq_state(struct amdgpu_device *adev, { u32 sdma_edc_config; - u32 reg_offset = (type == AMDGPU_SDMA_IRQ_INSTANCE0) ? - sdma_v4_0_get_reg_offset(adev, 0, mmSDMA0_EDC_CONFIG) : - sdma_v4_0_get_reg_offset(adev, 1, mmSDMA0_EDC_CONFIG); - - sdma_edc_config = RREG32(reg_offset); + sdma_edc_config = RREG32_SDMA(type, mmSDMA0_EDC_CONFIG); sdma_edc_config = REG_SET_FIELD(sdma_edc_config, SDMA0_EDC_CONFIG, ECC_INT_ENABLE, state == AMDGPU_IRQ_STATE_ENABLE ? 1 : 0); - WREG32(reg_offset, sdma_edc_config); + WREG32_SDMA(type, mmSDMA0_EDC_CONFIG, sdma_edc_config); return 0; } @@ -2133,7 +2266,43 @@ static const struct amdgpu_ring_funcs sdma_v4_0_ring_funcs = { .align_mask = 0xf, .nop = SDMA_PKT_NOP_HEADER_OP(SDMA_OP_NOP), .support_64bit_ptrs = true, - .vmhub = AMDGPU_MMHUB, + .vmhub = AMDGPU_MMHUB_0, + .get_rptr = sdma_v4_0_ring_get_rptr, + .get_wptr = sdma_v4_0_ring_get_wptr, + .set_wptr = sdma_v4_0_ring_set_wptr, + .emit_frame_size = + 6 + /* sdma_v4_0_ring_emit_hdp_flush */ + 3 + /* hdp invalidate */ + 6 + /* sdma_v4_0_ring_emit_pipeline_sync */ + /* sdma_v4_0_ring_emit_vm_flush */ + SOC15_FLUSH_GPU_TLB_NUM_WREG * 3 + + SOC15_FLUSH_GPU_TLB_NUM_REG_WAIT * 6 + + 10 + 10 + 10, /* sdma_v4_0_ring_emit_fence x3 for user fence, vm fence */ + .emit_ib_size = 7 + 6, /* sdma_v4_0_ring_emit_ib */ + .emit_ib = sdma_v4_0_ring_emit_ib, + .emit_fence = sdma_v4_0_ring_emit_fence, + .emit_pipeline_sync = sdma_v4_0_ring_emit_pipeline_sync, + .emit_vm_flush = sdma_v4_0_ring_emit_vm_flush, + .emit_hdp_flush = sdma_v4_0_ring_emit_hdp_flush, + .test_ring = sdma_v4_0_ring_test_ring, + .test_ib = sdma_v4_0_ring_test_ib, + .insert_nop = sdma_v4_0_ring_insert_nop, + .pad_ib = sdma_v4_0_ring_pad_ib, + .emit_wreg = sdma_v4_0_ring_emit_wreg, + .emit_reg_wait = sdma_v4_0_ring_emit_reg_wait, + .emit_reg_write_reg_wait = amdgpu_ring_emit_reg_write_reg_wait_helper, +}; + +/* + * On Arcturus, SDMA instance 5~7 has a different vmhub type(AMDGPU_MMHUB_1). + * So create a individual constant ring_funcs for those instances. + */ +static const struct amdgpu_ring_funcs sdma_v4_0_ring_funcs_2nd_mmhub = { + .type = AMDGPU_RING_TYPE_SDMA, + .align_mask = 0xf, + .nop = SDMA_PKT_NOP_HEADER_OP(SDMA_OP_NOP), + .support_64bit_ptrs = true, + .vmhub = AMDGPU_MMHUB_1, .get_rptr = sdma_v4_0_ring_get_rptr, .get_wptr = sdma_v4_0_ring_get_wptr, .set_wptr = sdma_v4_0_ring_set_wptr, @@ -2165,7 +2334,39 @@ static const struct amdgpu_ring_funcs sdma_v4_0_page_ring_funcs = { .align_mask = 0xf, .nop = SDMA_PKT_NOP_HEADER_OP(SDMA_OP_NOP), .support_64bit_ptrs = true, - .vmhub = AMDGPU_MMHUB, + .vmhub = AMDGPU_MMHUB_0, + .get_rptr = sdma_v4_0_ring_get_rptr, + .get_wptr = sdma_v4_0_page_ring_get_wptr, + .set_wptr = sdma_v4_0_page_ring_set_wptr, + .emit_frame_size = + 6 + /* sdma_v4_0_ring_emit_hdp_flush */ + 3 + /* hdp invalidate */ + 6 + /* sdma_v4_0_ring_emit_pipeline_sync */ + /* sdma_v4_0_ring_emit_vm_flush */ + SOC15_FLUSH_GPU_TLB_NUM_WREG * 3 + + SOC15_FLUSH_GPU_TLB_NUM_REG_WAIT * 6 + + 10 + 10 + 10, /* sdma_v4_0_ring_emit_fence x3 for user fence, vm fence */ + .emit_ib_size = 7 + 6, /* sdma_v4_0_ring_emit_ib */ + .emit_ib = sdma_v4_0_ring_emit_ib, + .emit_fence = sdma_v4_0_ring_emit_fence, + .emit_pipeline_sync = sdma_v4_0_ring_emit_pipeline_sync, + .emit_vm_flush = sdma_v4_0_ring_emit_vm_flush, + .emit_hdp_flush = sdma_v4_0_ring_emit_hdp_flush, + .test_ring = sdma_v4_0_ring_test_ring, + .test_ib = sdma_v4_0_ring_test_ib, + .insert_nop = sdma_v4_0_ring_insert_nop, + .pad_ib = sdma_v4_0_ring_pad_ib, + .emit_wreg = sdma_v4_0_ring_emit_wreg, + .emit_reg_wait = sdma_v4_0_ring_emit_reg_wait, + .emit_reg_write_reg_wait = amdgpu_ring_emit_reg_write_reg_wait_helper, +}; + +static const struct amdgpu_ring_funcs sdma_v4_0_page_ring_funcs_2nd_mmhub = { + .type = AMDGPU_RING_TYPE_SDMA, + .align_mask = 0xf, + .nop = SDMA_PKT_NOP_HEADER_OP(SDMA_OP_NOP), + .support_64bit_ptrs = true, + .vmhub = AMDGPU_MMHUB_1, .get_rptr = sdma_v4_0_ring_get_rptr, .get_wptr = sdma_v4_0_page_ring_get_wptr, .set_wptr = sdma_v4_0_page_ring_set_wptr, @@ -2197,10 +2398,20 @@ static void sdma_v4_0_set_ring_funcs(struct amdgpu_device *adev) int i; for (i = 0; i < adev->sdma.num_instances; i++) { - adev->sdma.instance[i].ring.funcs = &sdma_v4_0_ring_funcs; + if (adev->asic_type == CHIP_ARCTURUS && i >= 5) + adev->sdma.instance[i].ring.funcs = + &sdma_v4_0_ring_funcs_2nd_mmhub; + else + adev->sdma.instance[i].ring.funcs = + &sdma_v4_0_ring_funcs; adev->sdma.instance[i].ring.me = i; if (adev->sdma.has_page_queue) { - adev->sdma.instance[i].page.funcs = &sdma_v4_0_page_ring_funcs; + if (adev->asic_type == CHIP_ARCTURUS && i >= 5) + adev->sdma.instance[i].page.funcs = + &sdma_v4_0_page_ring_funcs_2nd_mmhub; + else + adev->sdma.instance[i].page.funcs = + &sdma_v4_0_page_ring_funcs; adev->sdma.instance[i].page.me = i; } } @@ -2224,10 +2435,23 @@ static const struct amdgpu_irq_src_funcs sdma_v4_0_ecc_irq_funcs = { static void sdma_v4_0_set_irq_funcs(struct amdgpu_device *adev) { - adev->sdma.trap_irq.num_types = AMDGPU_SDMA_IRQ_LAST; + switch (adev->sdma.num_instances) { + case 1: + adev->sdma.trap_irq.num_types = AMDGPU_SDMA_IRQ_INSTANCE1; + adev->sdma.ecc_irq.num_types = AMDGPU_SDMA_IRQ_INSTANCE1; + break; + case 8: + adev->sdma.trap_irq.num_types = AMDGPU_SDMA_IRQ_LAST; + adev->sdma.ecc_irq.num_types = AMDGPU_SDMA_IRQ_LAST; + break; + case 2: + default: + adev->sdma.trap_irq.num_types = AMDGPU_SDMA_IRQ_INSTANCE2; + adev->sdma.ecc_irq.num_types = AMDGPU_SDMA_IRQ_INSTANCE2; + break; + } adev->sdma.trap_irq.funcs = &sdma_v4_0_trap_irq_funcs; adev->sdma.illegal_inst_irq.funcs = &sdma_v4_0_illegal_inst_irq_funcs; - adev->sdma.ecc_irq.num_types = AMDGPU_SDMA_IRQ_LAST; adev->sdma.ecc_irq.funcs = &sdma_v4_0_ecc_irq_funcs; } diff --git a/drivers/gpu/drm/amd/amdgpu/sdma_v5_0.c b/drivers/gpu/drm/amd/amdgpu/sdma_v5_0.c index 3747c3f1f0cc..3e180152c5ad 100644 --- a/drivers/gpu/drm/amd/amdgpu/sdma_v5_0.c +++ b/drivers/gpu/drm/amd/amdgpu/sdma_v5_0.c @@ -21,8 +21,11 @@ * */ +#include <linux/delay.h> #include <linux/firmware.h> -#include <drm/drmP.h> +#include <linux/module.h> +#include <linux/pci.h> + #include "amdgpu.h" #include "amdgpu_ucode.h" #include "amdgpu_trace.h" @@ -42,6 +45,12 @@ MODULE_FIRMWARE("amdgpu/navi10_sdma.bin"); MODULE_FIRMWARE("amdgpu/navi10_sdma1.bin"); +MODULE_FIRMWARE("amdgpu/navi14_sdma.bin"); +MODULE_FIRMWARE("amdgpu/navi14_sdma1.bin"); + +MODULE_FIRMWARE("amdgpu/navi12_sdma.bin"); +MODULE_FIRMWARE("amdgpu/navi12_sdma1.bin"); + #define SDMA1_REG_OFFSET 0x600 #define SDMA0_HYP_DEC_REG_START 0x5880 #define SDMA0_HYP_DEC_REG_END 0x5893 @@ -59,7 +68,7 @@ static const struct soc15_reg_golden golden_settings_sdma_5[] = { SOC15_REG_GOLDEN_VALUE(GC, 0, mmSDMA0_RLC0_RB_WPTR_POLL_CNTL, 0xfffffff7, 0x00403000), SOC15_REG_GOLDEN_VALUE(GC, 0, mmSDMA0_RLC1_RB_WPTR_POLL_CNTL, 0xfffffff7, 0x00403000), SOC15_REG_GOLDEN_VALUE(GC, 0, mmSDMA0_RLC2_RB_WPTR_POLL_CNTL, 0xfffffff7, 0x00403000), - SOC15_REG_GOLDEN_VALUE(GC, 0, mmSDMA0_RLC3_RB_WPTR_POLL_CNTL, 0x0000fff0, 0x00403000), + SOC15_REG_GOLDEN_VALUE(GC, 0, mmSDMA0_RLC3_RB_WPTR_POLL_CNTL, 0xfffffff7, 0x00403000), SOC15_REG_GOLDEN_VALUE(GC, 0, mmSDMA0_RLC4_RB_WPTR_POLL_CNTL, 0xfffffff7, 0x00403000), SOC15_REG_GOLDEN_VALUE(GC, 0, mmSDMA0_RLC5_RB_WPTR_POLL_CNTL, 0xfffffff7, 0x00403000), SOC15_REG_GOLDEN_VALUE(GC, 0, mmSDMA0_RLC6_RB_WPTR_POLL_CNTL, 0xfffffff7, 0x00403000), @@ -71,7 +80,7 @@ static const struct soc15_reg_golden golden_settings_sdma_5[] = { SOC15_REG_GOLDEN_VALUE(GC, 0, mmSDMA1_RLC0_RB_WPTR_POLL_CNTL, 0xfffffff7, 0x00403000), SOC15_REG_GOLDEN_VALUE(GC, 0, mmSDMA1_RLC1_RB_WPTR_POLL_CNTL, 0xfffffff7, 0x00403000), SOC15_REG_GOLDEN_VALUE(GC, 0, mmSDMA1_RLC2_RB_WPTR_POLL_CNTL, 0xfffffff7, 0x00403000), - SOC15_REG_GOLDEN_VALUE(GC, 0, mmSDMA1_RLC3_RB_WPTR_POLL_CNTL, 0x0000fff0, 0x00403000), + SOC15_REG_GOLDEN_VALUE(GC, 0, mmSDMA1_RLC3_RB_WPTR_POLL_CNTL, 0xfffffff7, 0x00403000), SOC15_REG_GOLDEN_VALUE(GC, 0, mmSDMA1_RLC4_RB_WPTR_POLL_CNTL, 0xfffffff7, 0x00403000), SOC15_REG_GOLDEN_VALUE(GC, 0, mmSDMA1_RLC5_RB_WPTR_POLL_CNTL, 0xfffffff7, 0x00403000), SOC15_REG_GOLDEN_VALUE(GC, 0, mmSDMA1_RLC6_RB_WPTR_POLL_CNTL, 0xfffffff7, 0x00403000), @@ -80,6 +89,18 @@ static const struct soc15_reg_golden golden_settings_sdma_5[] = { }; static const struct soc15_reg_golden golden_settings_sdma_nv10[] = { + SOC15_REG_GOLDEN_VALUE(GC, 0, mmSDMA0_RLC3_RB_WPTR_POLL_CNTL, 0x0000fff0, 0x00403000), + SOC15_REG_GOLDEN_VALUE(GC, 0, mmSDMA1_RLC3_RB_WPTR_POLL_CNTL, 0x0000fff0, 0x00403000), +}; + +static const struct soc15_reg_golden golden_settings_sdma_nv14[] = { + SOC15_REG_GOLDEN_VALUE(GC, 0, mmSDMA0_RLC3_RB_WPTR_POLL_CNTL, 0xfffffff7, 0x00403000), + SOC15_REG_GOLDEN_VALUE(GC, 0, mmSDMA1_RLC3_RB_WPTR_POLL_CNTL, 0xfffffff7, 0x00403000), +}; + +static const struct soc15_reg_golden golden_settings_sdma_nv12[] = { + SOC15_REG_GOLDEN_VALUE(GC, 0, mmSDMA0_RLC3_RB_WPTR_POLL_CNTL, 0xfffffff7, 0x00403000), + SOC15_REG_GOLDEN_VALUE(GC, 0, mmSDMA1_RLC3_RB_WPTR_POLL_CNTL, 0xfffffff7, 0x00403000), }; static u32 sdma_v5_0_get_reg_offset(struct amdgpu_device *adev, u32 instance, u32 internal_offset) @@ -111,6 +132,22 @@ static void sdma_v5_0_init_golden_registers(struct amdgpu_device *adev) golden_settings_sdma_nv10, (const u32)ARRAY_SIZE(golden_settings_sdma_nv10)); break; + case CHIP_NAVI14: + soc15_program_register_sequence(adev, + golden_settings_sdma_5, + (const u32)ARRAY_SIZE(golden_settings_sdma_5)); + soc15_program_register_sequence(adev, + golden_settings_sdma_nv14, + (const u32)ARRAY_SIZE(golden_settings_sdma_nv14)); + break; + case CHIP_NAVI12: + soc15_program_register_sequence(adev, + golden_settings_sdma_5, + (const u32)ARRAY_SIZE(golden_settings_sdma_5)); + soc15_program_register_sequence(adev, + golden_settings_sdma_nv12, + (const u32)ARRAY_SIZE(golden_settings_sdma_nv12)); + break; default: break; } @@ -143,6 +180,12 @@ static int sdma_v5_0_init_microcode(struct amdgpu_device *adev) case CHIP_NAVI10: chip_name = "navi10"; break; + case CHIP_NAVI14: + chip_name = "navi14"; + break; + case CHIP_NAVI12: + chip_name = "navi12"; + break; default: BUG(); } @@ -861,7 +904,7 @@ static int sdma_v5_0_ring_test_ring(struct amdgpu_ring *ring) if (amdgpu_emu_mode == 1) msleep(1); else - DRM_UDELAY(1); + udelay(1); } if (i < adev->usec_timeout) { @@ -1316,7 +1359,7 @@ static int sdma_v5_0_ring_preempt_ib(struct amdgpu_ring *ring) if (ring->trail_seq == le32_to_cpu(*(ring->trail_fence_cpu_addr))) break; - DRM_UDELAY(1); + udelay(1); } if (i >= adev->usec_timeout) { @@ -1472,6 +1515,7 @@ static int sdma_v5_0_set_clockgating_state(void *handle, switch (adev->asic_type) { case CHIP_NAVI10: + case CHIP_NAVI14: sdma_v5_0_update_medium_grain_clock_gating(adev, state == AMD_CG_STATE_GATE ? true : false); sdma_v5_0_update_medium_grain_light_sleep(adev, @@ -1532,7 +1576,7 @@ static const struct amdgpu_ring_funcs sdma_v5_0_ring_funcs = { .align_mask = 0xf, .nop = SDMA_PKT_NOP_HEADER_OP(SDMA_OP_NOP), .support_64bit_ptrs = true, - .vmhub = AMDGPU_GFXHUB, + .vmhub = AMDGPU_GFXHUB_0, .get_rptr = sdma_v5_0_ring_get_rptr, .get_wptr = sdma_v5_0_ring_get_wptr, .set_wptr = sdma_v5_0_ring_set_wptr, diff --git a/drivers/gpu/drm/amd/amdgpu/si.c b/drivers/gpu/drm/amd/amdgpu/si.c index 4d74453f3cfb..f09930a416ec 100644 --- a/drivers/gpu/drm/amd/amdgpu/si.c +++ b/drivers/gpu/drm/amd/amdgpu/si.c @@ -1186,6 +1186,12 @@ static int si_asic_reset(struct amdgpu_device *adev) return 0; } +static enum amd_reset_method +si_asic_reset_method(struct amdgpu_device *adev) +{ + return AMD_RESET_METHOD_LEGACY; +} + static u32 si_get_config_memsize(struct amdgpu_device *adev) { return RREG32(mmCONFIG_MEMSIZE); @@ -1394,6 +1400,7 @@ static const struct amdgpu_asic_funcs si_asic_funcs = .read_bios_from_rom = &si_read_bios_from_rom, .read_register = &si_read_register, .reset = &si_asic_reset, + .reset_method = &si_asic_reset_method, .set_vga_state = &si_vga_set_state, .get_xclk = &si_get_xclk, .set_uvd_clocks = &si_set_uvd_clocks, diff --git a/drivers/gpu/drm/amd/amdgpu/soc15.c b/drivers/gpu/drm/amd/amdgpu/soc15.c index 23265414d448..5116d0bf9e4a 100644 --- a/drivers/gpu/drm/amd/amdgpu/soc15.c +++ b/drivers/gpu/drm/amd/amdgpu/soc15.c @@ -63,6 +63,7 @@ #include "uvd_v7_0.h" #include "vce_v4_0.h" #include "vcn_v1_0.h" +#include "vcn_v2_5.h" #include "dce_virtual.h" #include "mxgpu_ai.h" #include "amdgpu_smu.h" @@ -115,6 +116,49 @@ static void soc15_pcie_wreg(struct amdgpu_device *adev, u32 reg, u32 v) spin_unlock_irqrestore(&adev->pcie_idx_lock, flags); } +static u64 soc15_pcie_rreg64(struct amdgpu_device *adev, u32 reg) +{ + unsigned long flags, address, data; + u64 r; + address = adev->nbio_funcs->get_pcie_index_offset(adev); + data = adev->nbio_funcs->get_pcie_data_offset(adev); + + spin_lock_irqsave(&adev->pcie_idx_lock, flags); + /* read low 32 bit */ + WREG32(address, reg); + (void)RREG32(address); + r = RREG32(data); + + /* read high 32 bit*/ + WREG32(address, reg + 4); + (void)RREG32(address); + r |= ((u64)RREG32(data) << 32); + spin_unlock_irqrestore(&adev->pcie_idx_lock, flags); + return r; +} + +static void soc15_pcie_wreg64(struct amdgpu_device *adev, u32 reg, u64 v) +{ + unsigned long flags, address, data; + + address = adev->nbio_funcs->get_pcie_index_offset(adev); + data = adev->nbio_funcs->get_pcie_data_offset(adev); + + spin_lock_irqsave(&adev->pcie_idx_lock, flags); + /* write low 32 bit */ + WREG32(address, reg); + (void)RREG32(address); + WREG32(data, (u32)(v & 0xffffffffULL)); + (void)RREG32(data); + + /* write high 32 bit */ + WREG32(address, reg + 4); + (void)RREG32(address); + WREG32(data, (u32)(v >> 32)); + (void)RREG32(data); + spin_unlock_irqrestore(&adev->pcie_idx_lock, flags); +} + static u32 soc15_uvd_ctx_rreg(struct amdgpu_device *adev, u32 reg) { unsigned long flags, address, data; @@ -464,12 +508,14 @@ static int soc15_asic_baco_reset(struct amdgpu_device *adev) return 0; } -static int soc15_asic_reset(struct amdgpu_device *adev) +static enum amd_reset_method +soc15_asic_reset_method(struct amdgpu_device *adev) { - int ret; bool baco_reset; switch (adev->asic_type) { + case CHIP_RAVEN: + return AMD_RESET_METHOD_MODE2; case CHIP_VEGA10: case CHIP_VEGA12: soc15_asic_get_baco_capability(adev, &baco_reset); @@ -493,6 +539,16 @@ static int soc15_asic_reset(struct amdgpu_device *adev) } if (baco_reset) + return AMD_RESET_METHOD_BACO; + else + return AMD_RESET_METHOD_MODE1; +} + +static int soc15_asic_reset(struct amdgpu_device *adev) +{ + int ret; + + if (soc15_asic_reset_method(adev) == AMD_RESET_METHOD_BACO) ret = soc15_asic_baco_reset(adev); else ret = soc15_asic_mode1_reset(adev); @@ -586,21 +642,25 @@ int soc15_set_ip_blocks(struct amdgpu_device *adev) case CHIP_VEGA20: vega20_reg_base_init(adev); break; + case CHIP_ARCTURUS: + arct_reg_base_init(adev); + break; default: return -EINVAL; } - if (adev->asic_type == CHIP_VEGA20) + if (adev->asic_type == CHIP_VEGA20 || adev->asic_type == CHIP_ARCTURUS) adev->gmc.xgmi.supported = true; if (adev->flags & AMD_IS_APU) adev->nbio_funcs = &nbio_v7_0_funcs; - else if (adev->asic_type == CHIP_VEGA20) + else if (adev->asic_type == CHIP_VEGA20 || + adev->asic_type == CHIP_ARCTURUS) adev->nbio_funcs = &nbio_v7_4_funcs; else adev->nbio_funcs = &nbio_v6_1_funcs; - if (adev->asic_type == CHIP_VEGA20) + if (adev->asic_type == CHIP_VEGA20 || adev->asic_type == CHIP_ARCTURUS) adev->df_funcs = &df_v3_6_funcs; else adev->df_funcs = &df_v1_7_funcs; @@ -672,6 +732,17 @@ int soc15_set_ip_blocks(struct amdgpu_device *adev) #endif amdgpu_device_ip_block_add(adev, &vcn_v1_0_ip_block); break; + case CHIP_ARCTURUS: + amdgpu_device_ip_block_add(adev, &vega10_common_ip_block); + amdgpu_device_ip_block_add(adev, &gmc_v9_0_ip_block); + amdgpu_device_ip_block_add(adev, &vega10_ih_ip_block); + if (adev->enable_virtual_display || amdgpu_sriov_vf(adev)) + amdgpu_device_ip_block_add(adev, &dce_virtual_ip_block); + amdgpu_device_ip_block_add(adev, &gfx_v9_0_ip_block); + amdgpu_device_ip_block_add(adev, &sdma_v4_0_ip_block); + amdgpu_device_ip_block_add(adev, &smu_v11_0_ip_block); + amdgpu_device_ip_block_add(adev, &vcn_v2_5_ip_block); + break; default: return -EINVAL; } @@ -688,7 +759,7 @@ static void soc15_invalidate_hdp(struct amdgpu_device *adev, struct amdgpu_ring *ring) { if (!ring || !ring->funcs->emit_wreg) - WREG32_SOC15_NO_KIQ(NBIO, 0, mmHDP_READ_CACHE_INVALIDATE, 1); + WREG32_SOC15_NO_KIQ(HDP, 0, mmHDP_READ_CACHE_INVALIDATE, 1); else amdgpu_ring_emit_wreg(ring, SOC15_REG_OFFSET( HDP, 0, mmHDP_READ_CACHE_INVALIDATE), 1); @@ -714,14 +785,9 @@ static void soc15_get_pcie_usage(struct amdgpu_device *adev, uint64_t *count0, /* Set the 2 events that we wish to watch, defined above */ /* Reg 40 is # received msgs */ + /* Reg 104 is # of posted requests sent */ perfctr = REG_SET_FIELD(perfctr, PCIE_PERF_CNTL_TXCLK, EVENT0_SEL, 40); - /* Pre-VG20, Reg 104 is # of posted requests sent. On VG20 it's 108 */ - if (adev->asic_type == CHIP_VEGA20) - perfctr = REG_SET_FIELD(perfctr, PCIE_PERF_CNTL_TXCLK, - EVENT1_SEL, 108); - else - perfctr = REG_SET_FIELD(perfctr, PCIE_PERF_CNTL_TXCLK, - EVENT1_SEL, 104); + perfctr = REG_SET_FIELD(perfctr, PCIE_PERF_CNTL_TXCLK, EVENT1_SEL, 104); /* Write to enable desired perf counters */ WREG32_PCIE(smnPCIE_PERF_CNTL_TXCLK, perfctr); @@ -751,6 +817,55 @@ static void soc15_get_pcie_usage(struct amdgpu_device *adev, uint64_t *count0, *count1 = RREG32_PCIE(smnPCIE_PERF_COUNT1_TXCLK) | (cnt1_of << 32); } +static void vega20_get_pcie_usage(struct amdgpu_device *adev, uint64_t *count0, + uint64_t *count1) +{ + uint32_t perfctr = 0; + uint64_t cnt0_of, cnt1_of; + int tmp; + + /* This reports 0 on APUs, so return to avoid writing/reading registers + * that may or may not be different from their GPU counterparts + */ + if (adev->flags & AMD_IS_APU) + return; + + /* Set the 2 events that we wish to watch, defined above */ + /* Reg 40 is # received msgs */ + /* Reg 108 is # of posted requests sent on VG20 */ + perfctr = REG_SET_FIELD(perfctr, PCIE_PERF_CNTL_TXCLK3, + EVENT0_SEL, 40); + perfctr = REG_SET_FIELD(perfctr, PCIE_PERF_CNTL_TXCLK3, + EVENT1_SEL, 108); + + /* Write to enable desired perf counters */ + WREG32_PCIE(smnPCIE_PERF_CNTL_TXCLK3, perfctr); + /* Zero out and enable the perf counters + * Write 0x5: + * Bit 0 = Start all counters(1) + * Bit 2 = Global counter reset enable(1) + */ + WREG32_PCIE(smnPCIE_PERF_COUNT_CNTL, 0x00000005); + + msleep(1000); + + /* Load the shadow and disable the perf counters + * Write 0x2: + * Bit 0 = Stop counters(0) + * Bit 1 = Load the shadow counters(1) + */ + WREG32_PCIE(smnPCIE_PERF_COUNT_CNTL, 0x00000002); + + /* Read register values to get any >32bit overflow */ + tmp = RREG32_PCIE(smnPCIE_PERF_CNTL_TXCLK3); + cnt0_of = REG_GET_FIELD(tmp, PCIE_PERF_CNTL_TXCLK3, COUNTER0_UPPER); + cnt1_of = REG_GET_FIELD(tmp, PCIE_PERF_CNTL_TXCLK3, COUNTER1_UPPER); + + /* Get the values and add the overflow */ + *count0 = RREG32_PCIE(smnPCIE_PERF_COUNT0_TXCLK3) | (cnt0_of << 32); + *count1 = RREG32_PCIE(smnPCIE_PERF_COUNT1_TXCLK3) | (cnt1_of << 32); +} + static bool soc15_need_reset_on_init(struct amdgpu_device *adev) { u32 sol_reg; @@ -792,6 +907,7 @@ static const struct amdgpu_asic_funcs soc15_asic_funcs = .read_bios_from_rom = &soc15_read_bios_from_rom, .read_register = &soc15_read_register, .reset = &soc15_asic_reset, + .reset_method = &soc15_asic_reset_method, .set_vga_state = &soc15_vga_set_state, .get_xclk = &soc15_get_xclk, .set_uvd_clocks = &soc15_set_uvd_clocks, @@ -821,9 +937,10 @@ static const struct amdgpu_asic_funcs vega20_asic_funcs = .invalidate_hdp = &soc15_invalidate_hdp, .need_full_reset = &soc15_need_full_reset, .init_doorbell_index = &vega20_doorbell_index_init, - .get_pcie_usage = &soc15_get_pcie_usage, + .get_pcie_usage = &vega20_get_pcie_usage, .need_reset_on_init = &soc15_need_reset_on_init, .get_pcie_replay_count = &soc15_get_pcie_replay_count, + .reset_method = &soc15_asic_reset_method }; static int soc15_common_early_init(void *handle) @@ -837,6 +954,8 @@ static int soc15_common_early_init(void *handle) adev->smc_wreg = NULL; adev->pcie_rreg = &soc15_pcie_rreg; adev->pcie_wreg = &soc15_pcie_wreg; + adev->pcie_rreg64 = &soc15_pcie_rreg64; + adev->pcie_wreg64 = &soc15_pcie_wreg64; adev->uvd_ctx_rreg = &soc15_uvd_ctx_rreg; adev->uvd_ctx_wreg = &soc15_uvd_ctx_wreg; adev->didt_rreg = &soc15_didt_rreg; @@ -998,6 +1117,12 @@ static int soc15_common_early_init(void *handle) AMD_PG_SUPPORT_CP | AMD_PG_SUPPORT_RLC_SMU_HS; break; + case CHIP_ARCTURUS: + adev->asic_funcs = &vega20_asic_funcs; + adev->cg_flags = 0; + adev->pg_flags = 0; + adev->external_rev_id = adev->rev_id + 0x32; + break; default: /* FIXME: not supported yet */ return -EINVAL; @@ -1043,21 +1168,18 @@ static void soc15_doorbell_range_init(struct amdgpu_device *adev) int i; struct amdgpu_ring *ring; - /* Two reasons to skip - * 1, Host driver already programmed them - * 2, To avoid registers program violations in SR-IOV - */ - if (!amdgpu_virt_support_skip_setting(adev)) { + /* sdma/ih doorbell range are programed by hypervisor */ + if (!amdgpu_sriov_vf(adev)) { for (i = 0; i < adev->sdma.num_instances; i++) { ring = &adev->sdma.instance[i].ring; adev->nbio_funcs->sdma_doorbell_range(adev, i, ring->use_doorbell, ring->doorbell_index, adev->doorbell_index.sdma_doorbell_range); } - } - adev->nbio_funcs->ih_doorbell_range(adev, adev->irq.ih.use_doorbell, + adev->nbio_funcs->ih_doorbell_range(adev, adev->irq.ih.use_doorbell, adev->irq.ih.doorbell_index); + } } static int soc15_common_hw_init(void *handle) diff --git a/drivers/gpu/drm/amd/amdgpu/soc15.h b/drivers/gpu/drm/amd/amdgpu/soc15.h index 7a6b2cc6d9f5..a3dde0c31f57 100644 --- a/drivers/gpu/drm/amd/amdgpu/soc15.h +++ b/drivers/gpu/drm/amd/amdgpu/soc15.h @@ -77,6 +77,7 @@ void soc15_program_register_sequence(struct amdgpu_device *adev, int vega10_reg_base_init(struct amdgpu_device *adev); int vega20_reg_base_init(struct amdgpu_device *adev); +int arct_reg_base_init(struct amdgpu_device *adev); void vega10_doorbell_index_init(struct amdgpu_device *adev); void vega20_doorbell_index_init(struct amdgpu_device *adev); diff --git a/drivers/gpu/drm/amd/amdgpu/soc15_common.h b/drivers/gpu/drm/amd/amdgpu/soc15_common.h index 47f74dab365d..839f186e1182 100644 --- a/drivers/gpu/drm/amd/amdgpu/soc15_common.h +++ b/drivers/gpu/drm/amd/amdgpu/soc15_common.h @@ -69,9 +69,10 @@ } \ } while (0) +#define AMDGPU_VIRT_SUPPORT_RLC_PRG_REG(a) (amdgpu_sriov_vf((a)) && !amdgpu_sriov_runtime((a))) #define WREG32_RLC(reg, value) \ do { \ - if (amdgpu_virt_support_rlc_prg_reg(adev)) { \ + if (AMDGPU_VIRT_SUPPORT_RLC_PRG_REG(adev)) { \ uint32_t i = 0; \ uint32_t retries = 50000; \ uint32_t r0 = adev->reg_offset[GC_HWIP][0][mmSCRATCH_REG0_BASE_IDX] + mmSCRATCH_REG0; \ @@ -96,7 +97,7 @@ #define WREG32_SOC15_RLC_SHADOW(ip, inst, reg, value) \ do { \ uint32_t target_reg = adev->reg_offset[ip##_HWIP][inst][reg##_BASE_IDX] + reg;\ - if (amdgpu_virt_support_rlc_prg_reg(adev)) { \ + if (AMDGPU_VIRT_SUPPORT_RLC_PRG_REG(adev)) { \ uint32_t r2 = adev->reg_offset[GC_HWIP][0][mmSCRATCH_REG1_BASE_IDX] + mmSCRATCH_REG2; \ uint32_t r3 = adev->reg_offset[GC_HWIP][0][mmSCRATCH_REG1_BASE_IDX] + mmSCRATCH_REG3; \ uint32_t grbm_cntl = adev->reg_offset[GC_HWIP][0][mmGRBM_GFX_CNTL_BASE_IDX] + mmGRBM_GFX_CNTL; \ diff --git a/drivers/gpu/drm/amd/amdgpu/umc_v6_1.c b/drivers/gpu/drm/amd/amdgpu/umc_v6_1.c new file mode 100644 index 000000000000..8502e736f721 --- /dev/null +++ b/drivers/gpu/drm/amd/amdgpu/umc_v6_1.c @@ -0,0 +1,255 @@ +/* + * Copyright 2019 Advanced Micro Devices, Inc. + * + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and associated documentation files (the "Software"), + * to deal in the Software without restriction, including without limitation + * the rights to use, copy, modify, merge, publish, distribute, sublicense, + * and/or sell copies of the Software, and to permit persons to whom the + * Software is furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included in + * all copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL + * THE COPYRIGHT HOLDER(S) OR AUTHOR(S) BE LIABLE FOR ANY CLAIM, DAMAGES OR + * OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, + * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR + * OTHER DEALINGS IN THE SOFTWARE. + * + */ +#include "umc_v6_1.h" +#include "amdgpu_ras.h" +#include "amdgpu.h" + +#include "rsmu/rsmu_0_0_2_offset.h" +#include "rsmu/rsmu_0_0_2_sh_mask.h" +#include "umc/umc_6_1_1_offset.h" +#include "umc/umc_6_1_1_sh_mask.h" + +#define smnMCA_UMC0_MCUMC_ADDRT0 0x50f10 + +/* + * (addr / 256) * 8192, the higher 26 bits in ErrorAddr + * is the index of 8KB block + */ +#define ADDR_OF_8KB_BLOCK(addr) (((addr) & ~0xffULL) << 5) +/* channel index is the index of 256B block */ +#define ADDR_OF_256B_BLOCK(channel_index) ((channel_index) << 8) +/* offset in 256B block */ +#define OFFSET_IN_256B_BLOCK(addr) ((addr) & 0xffULL) + +const uint32_t + umc_v6_1_channel_idx_tbl[UMC_V6_1_UMC_INSTANCE_NUM][UMC_V6_1_CHANNEL_INSTANCE_NUM] = { + {2, 18, 11, 27}, {4, 20, 13, 29}, + {1, 17, 8, 24}, {7, 23, 14, 30}, + {10, 26, 3, 19}, {12, 28, 5, 21}, + {9, 25, 0, 16}, {15, 31, 6, 22} +}; + +static void umc_v6_1_enable_umc_index_mode(struct amdgpu_device *adev, + uint32_t umc_instance) +{ + uint32_t rsmu_umc_index; + + rsmu_umc_index = RREG32_SOC15(RSMU, 0, + mmRSMU_UMC_INDEX_REGISTER_NBIF_VG20_GPU); + rsmu_umc_index = REG_SET_FIELD(rsmu_umc_index, + RSMU_UMC_INDEX_REGISTER_NBIF_VG20_GPU, + RSMU_UMC_INDEX_MODE_EN, 1); + rsmu_umc_index = REG_SET_FIELD(rsmu_umc_index, + RSMU_UMC_INDEX_REGISTER_NBIF_VG20_GPU, + RSMU_UMC_INDEX_INSTANCE, umc_instance); + rsmu_umc_index = REG_SET_FIELD(rsmu_umc_index, + RSMU_UMC_INDEX_REGISTER_NBIF_VG20_GPU, + RSMU_UMC_INDEX_WREN, 1 << umc_instance); + WREG32_SOC15(RSMU, 0, mmRSMU_UMC_INDEX_REGISTER_NBIF_VG20_GPU, + rsmu_umc_index); +} + +static void umc_v6_1_disable_umc_index_mode(struct amdgpu_device *adev) +{ + WREG32_FIELD15(RSMU, 0, RSMU_UMC_INDEX_REGISTER_NBIF_VG20_GPU, + RSMU_UMC_INDEX_MODE_EN, 0); +} + +static void umc_v6_1_query_correctable_error_count(struct amdgpu_device *adev, + uint32_t umc_reg_offset, + unsigned long *error_count) +{ + uint32_t ecc_err_cnt_sel, ecc_err_cnt_sel_addr; + uint32_t ecc_err_cnt, ecc_err_cnt_addr; + uint64_t mc_umc_status; + uint32_t mc_umc_status_addr; + + ecc_err_cnt_sel_addr = + SOC15_REG_OFFSET(UMC, 0, mmUMCCH0_0_EccErrCntSel); + ecc_err_cnt_addr = + SOC15_REG_OFFSET(UMC, 0, mmUMCCH0_0_EccErrCnt); + mc_umc_status_addr = + SOC15_REG_OFFSET(UMC, 0, mmMCA_UMC_UMC0_MCUMC_STATUST0); + + /* select the lower chip and check the error count */ + ecc_err_cnt_sel = RREG32(ecc_err_cnt_sel_addr + umc_reg_offset); + ecc_err_cnt_sel = REG_SET_FIELD(ecc_err_cnt_sel, UMCCH0_0_EccErrCntSel, + EccErrCntCsSel, 0); + WREG32(ecc_err_cnt_sel_addr + umc_reg_offset, ecc_err_cnt_sel); + ecc_err_cnt = RREG32(ecc_err_cnt_addr + umc_reg_offset); + *error_count += + (REG_GET_FIELD(ecc_err_cnt, UMCCH0_0_EccErrCnt, EccErrCnt) - + UMC_V6_1_CE_CNT_INIT); + /* clear the lower chip err count */ + WREG32(ecc_err_cnt_addr + umc_reg_offset, UMC_V6_1_CE_CNT_INIT); + + /* select the higher chip and check the err counter */ + ecc_err_cnt_sel = REG_SET_FIELD(ecc_err_cnt_sel, UMCCH0_0_EccErrCntSel, + EccErrCntCsSel, 1); + WREG32(ecc_err_cnt_sel_addr + umc_reg_offset, ecc_err_cnt_sel); + ecc_err_cnt = RREG32(ecc_err_cnt_addr + umc_reg_offset); + *error_count += + (REG_GET_FIELD(ecc_err_cnt, UMCCH0_0_EccErrCnt, EccErrCnt) - + UMC_V6_1_CE_CNT_INIT); + /* clear the higher chip err count */ + WREG32(ecc_err_cnt_addr + umc_reg_offset, UMC_V6_1_CE_CNT_INIT); + + /* check for SRAM correctable error + MCUMC_STATUS is a 64 bit register */ + mc_umc_status = RREG64_UMC(mc_umc_status_addr + umc_reg_offset); + if (REG_GET_FIELD(mc_umc_status, MCA_UMC_UMC0_MCUMC_STATUST0, ErrorCodeExt) == 6 && + REG_GET_FIELD(mc_umc_status, MCA_UMC_UMC0_MCUMC_STATUST0, Val) == 1 && + REG_GET_FIELD(mc_umc_status, MCA_UMC_UMC0_MCUMC_STATUST0, CECC) == 1) + *error_count += 1; +} + +static void umc_v6_1_querry_uncorrectable_error_count(struct amdgpu_device *adev, + uint32_t umc_reg_offset, + unsigned long *error_count) +{ + uint64_t mc_umc_status; + uint32_t mc_umc_status_addr; + + mc_umc_status_addr = + SOC15_REG_OFFSET(UMC, 0, mmMCA_UMC_UMC0_MCUMC_STATUST0); + + /* check the MCUMC_STATUS */ + mc_umc_status = RREG64_UMC(mc_umc_status_addr + umc_reg_offset); + if ((REG_GET_FIELD(mc_umc_status, MCA_UMC_UMC0_MCUMC_STATUST0, Val) == 1) && + (REG_GET_FIELD(mc_umc_status, MCA_UMC_UMC0_MCUMC_STATUST0, Deferred) == 1 || + REG_GET_FIELD(mc_umc_status, MCA_UMC_UMC0_MCUMC_STATUST0, UECC) == 1 || + REG_GET_FIELD(mc_umc_status, MCA_UMC_UMC0_MCUMC_STATUST0, PCC) == 1 || + REG_GET_FIELD(mc_umc_status, MCA_UMC_UMC0_MCUMC_STATUST0, UC) == 1 || + REG_GET_FIELD(mc_umc_status, MCA_UMC_UMC0_MCUMC_STATUST0, TCC) == 1)) + *error_count += 1; +} + +static void umc_v6_1_query_error_count(struct amdgpu_device *adev, + struct ras_err_data *err_data, uint32_t umc_reg_offset, + uint32_t channel_index) +{ + umc_v6_1_query_correctable_error_count(adev, umc_reg_offset, + &(err_data->ce_count)); + umc_v6_1_querry_uncorrectable_error_count(adev, umc_reg_offset, + &(err_data->ue_count)); +} + +static void umc_v6_1_query_ras_error_count(struct amdgpu_device *adev, + void *ras_error_status) +{ + amdgpu_umc_for_each_channel(umc_v6_1_query_error_count); +} + +static void umc_v6_1_query_error_address(struct amdgpu_device *adev, + struct ras_err_data *err_data, + uint32_t umc_reg_offset, uint32_t channel_index) +{ + uint32_t lsb, mc_umc_status_addr; + uint64_t mc_umc_status, err_addr; + + mc_umc_status_addr = + SOC15_REG_OFFSET(UMC, 0, mmMCA_UMC_UMC0_MCUMC_STATUST0); + + /* skip error address process if -ENOMEM */ + if (!err_data->err_addr) { + /* clear umc status */ + WREG64_UMC(mc_umc_status_addr + umc_reg_offset, 0x0ULL); + return; + } + + mc_umc_status = RREG64_UMC(mc_umc_status_addr + umc_reg_offset); + + /* calculate error address if ue/ce error is detected */ + if (REG_GET_FIELD(mc_umc_status, MCA_UMC_UMC0_MCUMC_STATUST0, Val) == 1 && + (REG_GET_FIELD(mc_umc_status, MCA_UMC_UMC0_MCUMC_STATUST0, UECC) == 1 || + REG_GET_FIELD(mc_umc_status, MCA_UMC_UMC0_MCUMC_STATUST0, CECC) == 1)) { + err_addr = RREG64_PCIE(smnMCA_UMC0_MCUMC_ADDRT0 + umc_reg_offset * 4); + + /* the lowest lsb bits should be ignored */ + lsb = REG_GET_FIELD(err_addr, MCA_UMC_UMC0_MCUMC_ADDRT0, LSB); + err_addr = REG_GET_FIELD(err_addr, MCA_UMC_UMC0_MCUMC_ADDRT0, ErrorAddr); + err_addr &= ~((0x1ULL << lsb) - 1); + + /* translate umc channel address to soc pa, 3 parts are included */ + err_data->err_addr[err_data->err_addr_cnt] = + ADDR_OF_8KB_BLOCK(err_addr) | + ADDR_OF_256B_BLOCK(channel_index) | + OFFSET_IN_256B_BLOCK(err_addr); + + err_data->err_addr_cnt++; + } + + /* clear umc status */ + WREG64_UMC(mc_umc_status_addr + umc_reg_offset, 0x0ULL); +} + +static void umc_v6_1_query_ras_error_address(struct amdgpu_device *adev, + void *ras_error_status) +{ + amdgpu_umc_for_each_channel(umc_v6_1_query_error_address); +} + +static void umc_v6_1_ras_init_per_channel(struct amdgpu_device *adev, + struct ras_err_data *err_data, + uint32_t umc_reg_offset, uint32_t channel_index) +{ + uint32_t ecc_err_cnt_sel, ecc_err_cnt_sel_addr; + uint32_t ecc_err_cnt_addr; + + ecc_err_cnt_sel_addr = + SOC15_REG_OFFSET(UMC, 0, mmUMCCH0_0_EccErrCntSel); + ecc_err_cnt_addr = + SOC15_REG_OFFSET(UMC, 0, mmUMCCH0_0_EccErrCnt); + + /* select the lower chip and check the error count */ + ecc_err_cnt_sel = RREG32(ecc_err_cnt_sel_addr + umc_reg_offset); + ecc_err_cnt_sel = REG_SET_FIELD(ecc_err_cnt_sel, UMCCH0_0_EccErrCntSel, + EccErrCntCsSel, 0); + /* set ce error interrupt type to APIC based interrupt */ + ecc_err_cnt_sel = REG_SET_FIELD(ecc_err_cnt_sel, UMCCH0_0_EccErrCntSel, + EccErrInt, 0x1); + WREG32(ecc_err_cnt_sel_addr + umc_reg_offset, ecc_err_cnt_sel); + /* set error count to initial value */ + WREG32(ecc_err_cnt_addr + umc_reg_offset, UMC_V6_1_CE_CNT_INIT); + + /* select the higher chip and check the err counter */ + ecc_err_cnt_sel = REG_SET_FIELD(ecc_err_cnt_sel, UMCCH0_0_EccErrCntSel, + EccErrCntCsSel, 1); + WREG32(ecc_err_cnt_sel_addr + umc_reg_offset, ecc_err_cnt_sel); + WREG32(ecc_err_cnt_addr + umc_reg_offset, UMC_V6_1_CE_CNT_INIT); +} + +static void umc_v6_1_ras_init(struct amdgpu_device *adev) +{ + void *ras_error_status = NULL; + + amdgpu_umc_for_each_channel(umc_v6_1_ras_init_per_channel); +} + +const struct amdgpu_umc_funcs umc_v6_1_funcs = { + .ras_init = umc_v6_1_ras_init, + .query_ras_error_count = umc_v6_1_query_ras_error_count, + .query_ras_error_address = umc_v6_1_query_ras_error_address, + .enable_umc_index_mode = umc_v6_1_enable_umc_index_mode, + .disable_umc_index_mode = umc_v6_1_disable_umc_index_mode, +}; diff --git a/drivers/gpu/drm/amd/amdgpu/umc_v6_1.h b/drivers/gpu/drm/amd/amdgpu/umc_v6_1.h new file mode 100644 index 000000000000..dab9cbd292c5 --- /dev/null +++ b/drivers/gpu/drm/amd/amdgpu/umc_v6_1.h @@ -0,0 +1,51 @@ +/* + * Copyright 2019 Advanced Micro Devices, Inc. + * + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and associated documentation files (the "Software"), + * to deal in the Software without restriction, including without limitation + * the rights to use, copy, modify, merge, publish, distribute, sublicense, + * and/or sell copies of the Software, and to permit persons to whom the + * Software is furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included in + * all copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL + * THE COPYRIGHT HOLDER(S) OR AUTHOR(S) BE LIABLE FOR ANY CLAIM, DAMAGES OR + * OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, + * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR + * OTHER DEALINGS IN THE SOFTWARE. + * + */ +#ifndef __UMC_V6_1_H__ +#define __UMC_V6_1_H__ + +#include "soc15_common.h" +#include "amdgpu.h" + +/* HBM Memory Channel Width */ +#define UMC_V6_1_HBM_MEMORY_CHANNEL_WIDTH 128 +/* number of umc channel instance with memory map register access */ +#define UMC_V6_1_CHANNEL_INSTANCE_NUM 4 +/* number of umc instance with memory map register access */ +#define UMC_V6_1_UMC_INSTANCE_NUM 8 +/* total channel instances in one umc block */ +#define UMC_V6_1_TOTAL_CHANNEL_NUM (UMC_V6_1_CHANNEL_INSTANCE_NUM * UMC_V6_1_UMC_INSTANCE_NUM) +/* UMC regiser per channel offset */ +#define UMC_V6_1_PER_CHANNEL_OFFSET 0x800 + +/* EccErrCnt max value */ +#define UMC_V6_1_CE_CNT_MAX 0xffff +/* umc ce interrupt threshold */ +#define UMC_V6_1_CE_INT_THRESHOLD 0xffff +/* umc ce count initial value */ +#define UMC_V6_1_CE_CNT_INIT (UMC_V6_1_CE_CNT_MAX - UMC_V6_1_CE_INT_THRESHOLD) + +extern const struct amdgpu_umc_funcs umc_v6_1_funcs; +extern const uint32_t + umc_v6_1_channel_idx_tbl[UMC_V6_1_UMC_INSTANCE_NUM][UMC_V6_1_CHANNEL_INSTANCE_NUM]; + +#endif diff --git a/drivers/gpu/drm/amd/amdgpu/uvd_v7_0.c b/drivers/gpu/drm/amd/amdgpu/uvd_v7_0.c index a6bfe7651d07..01f658fa72c6 100644 --- a/drivers/gpu/drm/amd/amdgpu/uvd_v7_0.c +++ b/drivers/gpu/drm/amd/amdgpu/uvd_v7_0.c @@ -1763,7 +1763,7 @@ static const struct amdgpu_ring_funcs uvd_v7_0_ring_vm_funcs = { .align_mask = 0xf, .support_64bit_ptrs = false, .no_user_fence = true, - .vmhub = AMDGPU_MMHUB, + .vmhub = AMDGPU_MMHUB_0, .get_rptr = uvd_v7_0_ring_get_rptr, .get_wptr = uvd_v7_0_ring_get_wptr, .set_wptr = uvd_v7_0_ring_set_wptr, @@ -1796,7 +1796,7 @@ static const struct amdgpu_ring_funcs uvd_v7_0_enc_ring_vm_funcs = { .nop = HEVC_ENC_CMD_NO_OP, .support_64bit_ptrs = false, .no_user_fence = true, - .vmhub = AMDGPU_MMHUB, + .vmhub = AMDGPU_MMHUB_0, .get_rptr = uvd_v7_0_enc_ring_get_rptr, .get_wptr = uvd_v7_0_enc_ring_get_wptr, .set_wptr = uvd_v7_0_enc_ring_set_wptr, diff --git a/drivers/gpu/drm/amd/amdgpu/vce_v4_0.c b/drivers/gpu/drm/amd/amdgpu/vce_v4_0.c index eafbe8d8248d..683701cf7270 100644 --- a/drivers/gpu/drm/amd/amdgpu/vce_v4_0.c +++ b/drivers/gpu/drm/amd/amdgpu/vce_v4_0.c @@ -1070,7 +1070,7 @@ static const struct amdgpu_ring_funcs vce_v4_0_ring_vm_funcs = { .nop = VCE_CMD_NO_OP, .support_64bit_ptrs = false, .no_user_fence = true, - .vmhub = AMDGPU_MMHUB, + .vmhub = AMDGPU_MMHUB_0, .get_rptr = vce_v4_0_ring_get_rptr, .get_wptr = vce_v4_0_ring_get_wptr, .set_wptr = vce_v4_0_ring_set_wptr, diff --git a/drivers/gpu/drm/amd/amdgpu/vcn_v1_0.c b/drivers/gpu/drm/amd/amdgpu/vcn_v1_0.c index dde22b7d140d..93b3500e522b 100644 --- a/drivers/gpu/drm/amd/amdgpu/vcn_v1_0.c +++ b/drivers/gpu/drm/amd/amdgpu/vcn_v1_0.c @@ -63,6 +63,7 @@ static int vcn_v1_0_early_init(void *handle) { struct amdgpu_device *adev = (struct amdgpu_device *)handle; + adev->vcn.num_vcn_inst = 1; adev->vcn.num_enc_rings = 2; vcn_v1_0_set_dec_ring_funcs(adev); @@ -87,20 +88,21 @@ static int vcn_v1_0_sw_init(void *handle) struct amdgpu_device *adev = (struct amdgpu_device *)handle; /* VCN DEC TRAP */ - r = amdgpu_irq_add_id(adev, SOC15_IH_CLIENTID_VCN, VCN_1_0__SRCID__UVD_SYSTEM_MESSAGE_INTERRUPT, &adev->vcn.irq); + r = amdgpu_irq_add_id(adev, SOC15_IH_CLIENTID_VCN, + VCN_1_0__SRCID__UVD_SYSTEM_MESSAGE_INTERRUPT, &adev->vcn.inst->irq); if (r) return r; /* VCN ENC TRAP */ for (i = 0; i < adev->vcn.num_enc_rings; ++i) { r = amdgpu_irq_add_id(adev, SOC15_IH_CLIENTID_VCN, i + VCN_1_0__SRCID__UVD_ENC_GENERAL_PURPOSE, - &adev->vcn.irq); + &adev->vcn.inst->irq); if (r) return r; } /* VCN JPEG TRAP */ - r = amdgpu_irq_add_id(adev, SOC15_IH_CLIENTID_VCN, 126, &adev->vcn.irq); + r = amdgpu_irq_add_id(adev, SOC15_IH_CLIENTID_VCN, 126, &adev->vcn.inst->irq); if (r) return r; @@ -122,39 +124,39 @@ static int vcn_v1_0_sw_init(void *handle) if (r) return r; - ring = &adev->vcn.ring_dec; + ring = &adev->vcn.inst->ring_dec; sprintf(ring->name, "vcn_dec"); - r = amdgpu_ring_init(adev, ring, 512, &adev->vcn.irq, 0); + r = amdgpu_ring_init(adev, ring, 512, &adev->vcn.inst->irq, 0); if (r) return r; - adev->vcn.internal.scratch9 = adev->vcn.external.scratch9 = + adev->vcn.internal.scratch9 = adev->vcn.inst->external.scratch9 = SOC15_REG_OFFSET(UVD, 0, mmUVD_SCRATCH9); - adev->vcn.internal.data0 = adev->vcn.external.data0 = + adev->vcn.internal.data0 = adev->vcn.inst->external.data0 = SOC15_REG_OFFSET(UVD, 0, mmUVD_GPCOM_VCPU_DATA0); - adev->vcn.internal.data1 = adev->vcn.external.data1 = + adev->vcn.internal.data1 = adev->vcn.inst->external.data1 = SOC15_REG_OFFSET(UVD, 0, mmUVD_GPCOM_VCPU_DATA1); - adev->vcn.internal.cmd = adev->vcn.external.cmd = + adev->vcn.internal.cmd = adev->vcn.inst->external.cmd = SOC15_REG_OFFSET(UVD, 0, mmUVD_GPCOM_VCPU_CMD); - adev->vcn.internal.nop = adev->vcn.external.nop = + adev->vcn.internal.nop = adev->vcn.inst->external.nop = SOC15_REG_OFFSET(UVD, 0, mmUVD_NO_OP); for (i = 0; i < adev->vcn.num_enc_rings; ++i) { - ring = &adev->vcn.ring_enc[i]; + ring = &adev->vcn.inst->ring_enc[i]; sprintf(ring->name, "vcn_enc%d", i); - r = amdgpu_ring_init(adev, ring, 512, &adev->vcn.irq, 0); + r = amdgpu_ring_init(adev, ring, 512, &adev->vcn.inst->irq, 0); if (r) return r; } - ring = &adev->vcn.ring_jpeg; + ring = &adev->vcn.inst->ring_jpeg; sprintf(ring->name, "vcn_jpeg"); - r = amdgpu_ring_init(adev, ring, 512, &adev->vcn.irq, 0); + r = amdgpu_ring_init(adev, ring, 512, &adev->vcn.inst->irq, 0); if (r) return r; adev->vcn.pause_dpg_mode = vcn_v1_0_pause_dpg_mode; - adev->vcn.internal.jpeg_pitch = adev->vcn.external.jpeg_pitch = + adev->vcn.internal.jpeg_pitch = adev->vcn.inst->external.jpeg_pitch = SOC15_REG_OFFSET(UVD, 0, mmUVD_JPEG_PITCH); return 0; @@ -191,7 +193,7 @@ static int vcn_v1_0_sw_fini(void *handle) static int vcn_v1_0_hw_init(void *handle) { struct amdgpu_device *adev = (struct amdgpu_device *)handle; - struct amdgpu_ring *ring = &adev->vcn.ring_dec; + struct amdgpu_ring *ring = &adev->vcn.inst->ring_dec; int i, r; r = amdgpu_ring_test_helper(ring); @@ -199,14 +201,14 @@ static int vcn_v1_0_hw_init(void *handle) goto done; for (i = 0; i < adev->vcn.num_enc_rings; ++i) { - ring = &adev->vcn.ring_enc[i]; + ring = &adev->vcn.inst->ring_enc[i]; ring->sched.ready = true; r = amdgpu_ring_test_helper(ring); if (r) goto done; } - ring = &adev->vcn.ring_jpeg; + ring = &adev->vcn.inst->ring_jpeg; r = amdgpu_ring_test_helper(ring); if (r) goto done; @@ -229,7 +231,7 @@ done: static int vcn_v1_0_hw_fini(void *handle) { struct amdgpu_device *adev = (struct amdgpu_device *)handle; - struct amdgpu_ring *ring = &adev->vcn.ring_dec; + struct amdgpu_ring *ring = &adev->vcn.inst->ring_dec; if ((adev->pg_flags & AMD_PG_SUPPORT_VCN_DPG) || RREG32_SOC15(VCN, 0, mmUVD_STATUS)) @@ -304,9 +306,9 @@ static void vcn_v1_0_mc_resume_spg_mode(struct amdgpu_device *adev) offset = 0; } else { WREG32_SOC15(UVD, 0, mmUVD_LMI_VCPU_CACHE_64BIT_BAR_LOW, - lower_32_bits(adev->vcn.gpu_addr)); + lower_32_bits(adev->vcn.inst->gpu_addr)); WREG32_SOC15(UVD, 0, mmUVD_LMI_VCPU_CACHE_64BIT_BAR_HIGH, - upper_32_bits(adev->vcn.gpu_addr)); + upper_32_bits(adev->vcn.inst->gpu_addr)); offset = size; WREG32_SOC15(UVD, 0, mmUVD_VCPU_CACHE_OFFSET0, AMDGPU_UVD_FIRMWARE_OFFSET >> 3); @@ -316,17 +318,17 @@ static void vcn_v1_0_mc_resume_spg_mode(struct amdgpu_device *adev) /* cache window 1: stack */ WREG32_SOC15(UVD, 0, mmUVD_LMI_VCPU_CACHE1_64BIT_BAR_LOW, - lower_32_bits(adev->vcn.gpu_addr + offset)); + lower_32_bits(adev->vcn.inst->gpu_addr + offset)); WREG32_SOC15(UVD, 0, mmUVD_LMI_VCPU_CACHE1_64BIT_BAR_HIGH, - upper_32_bits(adev->vcn.gpu_addr + offset)); + upper_32_bits(adev->vcn.inst->gpu_addr + offset)); WREG32_SOC15(UVD, 0, mmUVD_VCPU_CACHE_OFFSET1, 0); WREG32_SOC15(UVD, 0, mmUVD_VCPU_CACHE_SIZE1, AMDGPU_VCN_STACK_SIZE); /* cache window 2: context */ WREG32_SOC15(UVD, 0, mmUVD_LMI_VCPU_CACHE2_64BIT_BAR_LOW, - lower_32_bits(adev->vcn.gpu_addr + offset + AMDGPU_VCN_STACK_SIZE)); + lower_32_bits(adev->vcn.inst->gpu_addr + offset + AMDGPU_VCN_STACK_SIZE)); WREG32_SOC15(UVD, 0, mmUVD_LMI_VCPU_CACHE2_64BIT_BAR_HIGH, - upper_32_bits(adev->vcn.gpu_addr + offset + AMDGPU_VCN_STACK_SIZE)); + upper_32_bits(adev->vcn.inst->gpu_addr + offset + AMDGPU_VCN_STACK_SIZE)); WREG32_SOC15(UVD, 0, mmUVD_VCPU_CACHE_OFFSET2, 0); WREG32_SOC15(UVD, 0, mmUVD_VCPU_CACHE_SIZE2, AMDGPU_VCN_CONTEXT_SIZE); @@ -374,9 +376,9 @@ static void vcn_v1_0_mc_resume_dpg_mode(struct amdgpu_device *adev) offset = 0; } else { WREG32_SOC15_DPG_MODE(UVD, 0, mmUVD_LMI_VCPU_CACHE_64BIT_BAR_LOW, - lower_32_bits(adev->vcn.gpu_addr), 0xFFFFFFFF, 0); + lower_32_bits(adev->vcn.inst->gpu_addr), 0xFFFFFFFF, 0); WREG32_SOC15_DPG_MODE(UVD, 0, mmUVD_LMI_VCPU_CACHE_64BIT_BAR_HIGH, - upper_32_bits(adev->vcn.gpu_addr), 0xFFFFFFFF, 0); + upper_32_bits(adev->vcn.inst->gpu_addr), 0xFFFFFFFF, 0); offset = size; WREG32_SOC15_DPG_MODE(UVD, 0, mmUVD_VCPU_CACHE_OFFSET0, AMDGPU_UVD_FIRMWARE_OFFSET >> 3, 0xFFFFFFFF, 0); @@ -386,9 +388,9 @@ static void vcn_v1_0_mc_resume_dpg_mode(struct amdgpu_device *adev) /* cache window 1: stack */ WREG32_SOC15_DPG_MODE(UVD, 0, mmUVD_LMI_VCPU_CACHE1_64BIT_BAR_LOW, - lower_32_bits(adev->vcn.gpu_addr + offset), 0xFFFFFFFF, 0); + lower_32_bits(adev->vcn.inst->gpu_addr + offset), 0xFFFFFFFF, 0); WREG32_SOC15_DPG_MODE(UVD, 0, mmUVD_LMI_VCPU_CACHE1_64BIT_BAR_HIGH, - upper_32_bits(adev->vcn.gpu_addr + offset), 0xFFFFFFFF, 0); + upper_32_bits(adev->vcn.inst->gpu_addr + offset), 0xFFFFFFFF, 0); WREG32_SOC15_DPG_MODE(UVD, 0, mmUVD_VCPU_CACHE_OFFSET1, 0, 0xFFFFFFFF, 0); WREG32_SOC15_DPG_MODE(UVD, 0, mmUVD_VCPU_CACHE_SIZE1, AMDGPU_VCN_STACK_SIZE, @@ -396,10 +398,10 @@ static void vcn_v1_0_mc_resume_dpg_mode(struct amdgpu_device *adev) /* cache window 2: context */ WREG32_SOC15_DPG_MODE(UVD, 0, mmUVD_LMI_VCPU_CACHE2_64BIT_BAR_LOW, - lower_32_bits(adev->vcn.gpu_addr + offset + AMDGPU_VCN_STACK_SIZE), + lower_32_bits(adev->vcn.inst->gpu_addr + offset + AMDGPU_VCN_STACK_SIZE), 0xFFFFFFFF, 0); WREG32_SOC15_DPG_MODE(UVD, 0, mmUVD_LMI_VCPU_CACHE2_64BIT_BAR_HIGH, - upper_32_bits(adev->vcn.gpu_addr + offset + AMDGPU_VCN_STACK_SIZE), + upper_32_bits(adev->vcn.inst->gpu_addr + offset + AMDGPU_VCN_STACK_SIZE), 0xFFFFFFFF, 0); WREG32_SOC15_DPG_MODE(UVD, 0, mmUVD_VCPU_CACHE_OFFSET2, 0, 0xFFFFFFFF, 0); WREG32_SOC15_DPG_MODE(UVD, 0, mmUVD_VCPU_CACHE_SIZE2, AMDGPU_VCN_CONTEXT_SIZE, @@ -779,7 +781,7 @@ static void vcn_1_0_enable_static_power_gating(struct amdgpu_device *adev) */ static int vcn_v1_0_start_spg_mode(struct amdgpu_device *adev) { - struct amdgpu_ring *ring = &adev->vcn.ring_dec; + struct amdgpu_ring *ring = &adev->vcn.inst->ring_dec; uint32_t rb_bufsz, tmp; uint32_t lmi_swap_cntl; int i, j, r; @@ -932,21 +934,21 @@ static int vcn_v1_0_start_spg_mode(struct amdgpu_device *adev) WREG32_P(SOC15_REG_OFFSET(UVD, 0, mmUVD_RBC_RB_CNTL), 0, ~UVD_RBC_RB_CNTL__RB_NO_FETCH_MASK); - ring = &adev->vcn.ring_enc[0]; + ring = &adev->vcn.inst->ring_enc[0]; WREG32_SOC15(UVD, 0, mmUVD_RB_RPTR, lower_32_bits(ring->wptr)); WREG32_SOC15(UVD, 0, mmUVD_RB_WPTR, lower_32_bits(ring->wptr)); WREG32_SOC15(UVD, 0, mmUVD_RB_BASE_LO, ring->gpu_addr); WREG32_SOC15(UVD, 0, mmUVD_RB_BASE_HI, upper_32_bits(ring->gpu_addr)); WREG32_SOC15(UVD, 0, mmUVD_RB_SIZE, ring->ring_size / 4); - ring = &adev->vcn.ring_enc[1]; + ring = &adev->vcn.inst->ring_enc[1]; WREG32_SOC15(UVD, 0, mmUVD_RB_RPTR2, lower_32_bits(ring->wptr)); WREG32_SOC15(UVD, 0, mmUVD_RB_WPTR2, lower_32_bits(ring->wptr)); WREG32_SOC15(UVD, 0, mmUVD_RB_BASE_LO2, ring->gpu_addr); WREG32_SOC15(UVD, 0, mmUVD_RB_BASE_HI2, upper_32_bits(ring->gpu_addr)); WREG32_SOC15(UVD, 0, mmUVD_RB_SIZE2, ring->ring_size / 4); - ring = &adev->vcn.ring_jpeg; + ring = &adev->vcn.inst->ring_jpeg; WREG32_SOC15(UVD, 0, mmUVD_LMI_JRBC_RB_VMID, 0); WREG32_SOC15(UVD, 0, mmUVD_JRBC_RB_CNTL, UVD_JRBC_RB_CNTL__RB_NO_FETCH_MASK | UVD_JRBC_RB_CNTL__RB_RPTR_WR_EN_MASK); @@ -968,7 +970,7 @@ static int vcn_v1_0_start_spg_mode(struct amdgpu_device *adev) static int vcn_v1_0_start_dpg_mode(struct amdgpu_device *adev) { - struct amdgpu_ring *ring = &adev->vcn.ring_dec; + struct amdgpu_ring *ring = &adev->vcn.inst->ring_dec; uint32_t rb_bufsz, tmp; uint32_t lmi_swap_cntl; @@ -1106,7 +1108,7 @@ static int vcn_v1_0_start_dpg_mode(struct amdgpu_device *adev) ~UVD_RBC_RB_CNTL__RB_NO_FETCH_MASK); /* initialize JPEG wptr */ - ring = &adev->vcn.ring_jpeg; + ring = &adev->vcn.inst->ring_jpeg; ring->wptr = RREG32_SOC15(UVD, 0, mmUVD_JRBC_RB_WPTR); /* copy patch commands to the jpeg ring */ @@ -1255,21 +1257,21 @@ static int vcn_v1_0_pause_dpg_mode(struct amdgpu_device *adev, UVD_DPG_PAUSE__NJ_PAUSE_DPG_ACK_MASK, ret_code); /* Restore */ - ring = &adev->vcn.ring_enc[0]; + ring = &adev->vcn.inst->ring_enc[0]; WREG32_SOC15(UVD, 0, mmUVD_RB_BASE_LO, ring->gpu_addr); WREG32_SOC15(UVD, 0, mmUVD_RB_BASE_HI, upper_32_bits(ring->gpu_addr)); WREG32_SOC15(UVD, 0, mmUVD_RB_SIZE, ring->ring_size / 4); WREG32_SOC15(UVD, 0, mmUVD_RB_RPTR, lower_32_bits(ring->wptr)); WREG32_SOC15(UVD, 0, mmUVD_RB_WPTR, lower_32_bits(ring->wptr)); - ring = &adev->vcn.ring_enc[1]; + ring = &adev->vcn.inst->ring_enc[1]; WREG32_SOC15(UVD, 0, mmUVD_RB_BASE_LO2, ring->gpu_addr); WREG32_SOC15(UVD, 0, mmUVD_RB_BASE_HI2, upper_32_bits(ring->gpu_addr)); WREG32_SOC15(UVD, 0, mmUVD_RB_SIZE2, ring->ring_size / 4); WREG32_SOC15(UVD, 0, mmUVD_RB_RPTR2, lower_32_bits(ring->wptr)); WREG32_SOC15(UVD, 0, mmUVD_RB_WPTR2, lower_32_bits(ring->wptr)); - ring = &adev->vcn.ring_dec; + ring = &adev->vcn.inst->ring_dec; WREG32_SOC15(UVD, 0, mmUVD_RBC_RB_WPTR, RREG32_SOC15(UVD, 0, mmUVD_SCRATCH2) & 0x7FFFFFFF); SOC15_WAIT_ON_RREG(UVD, 0, mmUVD_POWER_STATUS, @@ -1315,7 +1317,7 @@ static int vcn_v1_0_pause_dpg_mode(struct amdgpu_device *adev, UVD_DPG_PAUSE__JPEG_PAUSE_DPG_ACK_MASK, ret_code); /* Restore */ - ring = &adev->vcn.ring_jpeg; + ring = &adev->vcn.inst->ring_jpeg; WREG32_SOC15(UVD, 0, mmUVD_LMI_JRBC_RB_VMID, 0); WREG32_SOC15(UVD, 0, mmUVD_JRBC_RB_CNTL, UVD_JRBC_RB_CNTL__RB_NO_FETCH_MASK | @@ -1329,7 +1331,7 @@ static int vcn_v1_0_pause_dpg_mode(struct amdgpu_device *adev, WREG32_SOC15(UVD, 0, mmUVD_JRBC_RB_CNTL, UVD_JRBC_RB_CNTL__RB_RPTR_WR_EN_MASK); - ring = &adev->vcn.ring_dec; + ring = &adev->vcn.inst->ring_dec; WREG32_SOC15(UVD, 0, mmUVD_RBC_RB_WPTR, RREG32_SOC15(UVD, 0, mmUVD_SCRATCH2) & 0x7FFFFFFF); SOC15_WAIT_ON_RREG(UVD, 0, mmUVD_POWER_STATUS, @@ -1596,7 +1598,7 @@ static uint64_t vcn_v1_0_enc_ring_get_rptr(struct amdgpu_ring *ring) { struct amdgpu_device *adev = ring->adev; - if (ring == &adev->vcn.ring_enc[0]) + if (ring == &adev->vcn.inst->ring_enc[0]) return RREG32_SOC15(UVD, 0, mmUVD_RB_RPTR); else return RREG32_SOC15(UVD, 0, mmUVD_RB_RPTR2); @@ -1613,7 +1615,7 @@ static uint64_t vcn_v1_0_enc_ring_get_wptr(struct amdgpu_ring *ring) { struct amdgpu_device *adev = ring->adev; - if (ring == &adev->vcn.ring_enc[0]) + if (ring == &adev->vcn.inst->ring_enc[0]) return RREG32_SOC15(UVD, 0, mmUVD_RB_WPTR); else return RREG32_SOC15(UVD, 0, mmUVD_RB_WPTR2); @@ -1630,7 +1632,7 @@ static void vcn_v1_0_enc_ring_set_wptr(struct amdgpu_ring *ring) { struct amdgpu_device *adev = ring->adev; - if (ring == &adev->vcn.ring_enc[0]) + if (ring == &adev->vcn.inst->ring_enc[0]) WREG32_SOC15(UVD, 0, mmUVD_RB_WPTR, lower_32_bits(ring->wptr)); else @@ -2114,16 +2116,16 @@ static int vcn_v1_0_process_interrupt(struct amdgpu_device *adev, switch (entry->src_id) { case 124: - amdgpu_fence_process(&adev->vcn.ring_dec); + amdgpu_fence_process(&adev->vcn.inst->ring_dec); break; case 119: - amdgpu_fence_process(&adev->vcn.ring_enc[0]); + amdgpu_fence_process(&adev->vcn.inst->ring_enc[0]); break; case 120: - amdgpu_fence_process(&adev->vcn.ring_enc[1]); + amdgpu_fence_process(&adev->vcn.inst->ring_enc[1]); break; case 126: - amdgpu_fence_process(&adev->vcn.ring_jpeg); + amdgpu_fence_process(&adev->vcn.inst->ring_jpeg); break; default: DRM_ERROR("Unhandled interrupt: %d %d\n", @@ -2198,7 +2200,7 @@ static const struct amdgpu_ring_funcs vcn_v1_0_dec_ring_vm_funcs = { .align_mask = 0xf, .support_64bit_ptrs = false, .no_user_fence = true, - .vmhub = AMDGPU_MMHUB, + .vmhub = AMDGPU_MMHUB_0, .get_rptr = vcn_v1_0_dec_ring_get_rptr, .get_wptr = vcn_v1_0_dec_ring_get_wptr, .set_wptr = vcn_v1_0_dec_ring_set_wptr, @@ -2232,7 +2234,7 @@ static const struct amdgpu_ring_funcs vcn_v1_0_enc_ring_vm_funcs = { .nop = VCN_ENC_CMD_NO_OP, .support_64bit_ptrs = false, .no_user_fence = true, - .vmhub = AMDGPU_MMHUB, + .vmhub = AMDGPU_MMHUB_0, .get_rptr = vcn_v1_0_enc_ring_get_rptr, .get_wptr = vcn_v1_0_enc_ring_get_wptr, .set_wptr = vcn_v1_0_enc_ring_set_wptr, @@ -2264,7 +2266,7 @@ static const struct amdgpu_ring_funcs vcn_v1_0_jpeg_ring_vm_funcs = { .nop = PACKET0(0x81ff, 0), .support_64bit_ptrs = false, .no_user_fence = true, - .vmhub = AMDGPU_MMHUB, + .vmhub = AMDGPU_MMHUB_0, .extra_dw = 64, .get_rptr = vcn_v1_0_jpeg_ring_get_rptr, .get_wptr = vcn_v1_0_jpeg_ring_get_wptr, @@ -2295,7 +2297,7 @@ static const struct amdgpu_ring_funcs vcn_v1_0_jpeg_ring_vm_funcs = { static void vcn_v1_0_set_dec_ring_funcs(struct amdgpu_device *adev) { - adev->vcn.ring_dec.funcs = &vcn_v1_0_dec_ring_vm_funcs; + adev->vcn.inst->ring_dec.funcs = &vcn_v1_0_dec_ring_vm_funcs; DRM_INFO("VCN decode is enabled in VM mode\n"); } @@ -2304,14 +2306,14 @@ static void vcn_v1_0_set_enc_ring_funcs(struct amdgpu_device *adev) int i; for (i = 0; i < adev->vcn.num_enc_rings; ++i) - adev->vcn.ring_enc[i].funcs = &vcn_v1_0_enc_ring_vm_funcs; + adev->vcn.inst->ring_enc[i].funcs = &vcn_v1_0_enc_ring_vm_funcs; DRM_INFO("VCN encode is enabled in VM mode\n"); } static void vcn_v1_0_set_jpeg_ring_funcs(struct amdgpu_device *adev) { - adev->vcn.ring_jpeg.funcs = &vcn_v1_0_jpeg_ring_vm_funcs; + adev->vcn.inst->ring_jpeg.funcs = &vcn_v1_0_jpeg_ring_vm_funcs; DRM_INFO("VCN jpeg decode is enabled in VM mode\n"); } @@ -2322,8 +2324,8 @@ static const struct amdgpu_irq_src_funcs vcn_v1_0_irq_funcs = { static void vcn_v1_0_set_irq_funcs(struct amdgpu_device *adev) { - adev->vcn.irq.num_types = adev->vcn.num_enc_rings + 2; - adev->vcn.irq.funcs = &vcn_v1_0_irq_funcs; + adev->vcn.inst->irq.num_types = adev->vcn.num_enc_rings + 2; + adev->vcn.inst->irq.funcs = &vcn_v1_0_irq_funcs; } const struct amdgpu_ip_block_version vcn_v1_0_ip_block = diff --git a/drivers/gpu/drm/amd/amdgpu/vcn_v2_0.c b/drivers/gpu/drm/amd/amdgpu/vcn_v2_0.c index 1cfc2620b2dd..36ad0c0e8efb 100644 --- a/drivers/gpu/drm/amd/amdgpu/vcn_v2_0.c +++ b/drivers/gpu/drm/amd/amdgpu/vcn_v2_0.c @@ -22,7 +22,7 @@ */ #include <linux/firmware.h> -#include <drm/drmP.h> + #include "amdgpu.h" #include "amdgpu_vcn.h" #include "soc15.h" @@ -92,6 +92,7 @@ static int vcn_v2_0_early_init(void *handle) { struct amdgpu_device *adev = (struct amdgpu_device *)handle; + adev->vcn.num_vcn_inst = 1; adev->vcn.num_enc_rings = 2; vcn_v2_0_set_dec_ring_funcs(adev); @@ -118,7 +119,7 @@ static int vcn_v2_0_sw_init(void *handle) /* VCN DEC TRAP */ r = amdgpu_irq_add_id(adev, SOC15_IH_CLIENTID_VCN, VCN_2_0__SRCID__UVD_SYSTEM_MESSAGE_INTERRUPT, - &adev->vcn.irq); + &adev->vcn.inst->irq); if (r) return r; @@ -126,15 +127,14 @@ static int vcn_v2_0_sw_init(void *handle) for (i = 0; i < adev->vcn.num_enc_rings; ++i) { r = amdgpu_irq_add_id(adev, SOC15_IH_CLIENTID_VCN, i + VCN_2_0__SRCID__UVD_ENC_GENERAL_PURPOSE, - &adev->vcn.irq); + &adev->vcn.inst->irq); if (r) return r; } /* VCN JPEG TRAP */ r = amdgpu_irq_add_id(adev, SOC15_IH_CLIENTID_VCN, - VCN_2_0__SRCID__JPEG_DECODE, - &adev->vcn.irq); + VCN_2_0__SRCID__JPEG_DECODE, &adev->vcn.inst->irq); if (r) return r; @@ -156,49 +156,56 @@ static int vcn_v2_0_sw_init(void *handle) if (r) return r; - ring = &adev->vcn.ring_dec; + ring = &adev->vcn.inst->ring_dec; ring->use_doorbell = true; ring->doorbell_index = adev->doorbell_index.vcn.vcn_ring0_1 << 1; sprintf(ring->name, "vcn_dec"); - r = amdgpu_ring_init(adev, ring, 512, &adev->vcn.irq, 0); + r = amdgpu_ring_init(adev, ring, 512, &adev->vcn.inst->irq, 0); if (r) return r; + adev->vcn.internal.context_id = mmUVD_CONTEXT_ID_INTERNAL_OFFSET; + adev->vcn.internal.ib_vmid = mmUVD_LMI_RBC_IB_VMID_INTERNAL_OFFSET; + adev->vcn.internal.ib_bar_low = mmUVD_LMI_RBC_IB_64BIT_BAR_LOW_INTERNAL_OFFSET; + adev->vcn.internal.ib_bar_high = mmUVD_LMI_RBC_IB_64BIT_BAR_HIGH_INTERNAL_OFFSET; + adev->vcn.internal.ib_size = mmUVD_RBC_IB_SIZE_INTERNAL_OFFSET; + adev->vcn.internal.gp_scratch8 = mmUVD_GP_SCRATCH8_INTERNAL_OFFSET; + adev->vcn.internal.scratch9 = mmUVD_SCRATCH9_INTERNAL_OFFSET; - adev->vcn.external.scratch9 = SOC15_REG_OFFSET(UVD, 0, mmUVD_SCRATCH9); + adev->vcn.inst->external.scratch9 = SOC15_REG_OFFSET(UVD, 0, mmUVD_SCRATCH9); adev->vcn.internal.data0 = mmUVD_GPCOM_VCPU_DATA0_INTERNAL_OFFSET; - adev->vcn.external.data0 = SOC15_REG_OFFSET(UVD, 0, mmUVD_GPCOM_VCPU_DATA0); + adev->vcn.inst->external.data0 = SOC15_REG_OFFSET(UVD, 0, mmUVD_GPCOM_VCPU_DATA0); adev->vcn.internal.data1 = mmUVD_GPCOM_VCPU_DATA1_INTERNAL_OFFSET; - adev->vcn.external.data1 = SOC15_REG_OFFSET(UVD, 0, mmUVD_GPCOM_VCPU_DATA1); + adev->vcn.inst->external.data1 = SOC15_REG_OFFSET(UVD, 0, mmUVD_GPCOM_VCPU_DATA1); adev->vcn.internal.cmd = mmUVD_GPCOM_VCPU_CMD_INTERNAL_OFFSET; - adev->vcn.external.cmd = SOC15_REG_OFFSET(UVD, 0, mmUVD_GPCOM_VCPU_CMD); + adev->vcn.inst->external.cmd = SOC15_REG_OFFSET(UVD, 0, mmUVD_GPCOM_VCPU_CMD); adev->vcn.internal.nop = mmUVD_NO_OP_INTERNAL_OFFSET; - adev->vcn.external.nop = SOC15_REG_OFFSET(UVD, 0, mmUVD_NO_OP); + adev->vcn.inst->external.nop = SOC15_REG_OFFSET(UVD, 0, mmUVD_NO_OP); for (i = 0; i < adev->vcn.num_enc_rings; ++i) { - ring = &adev->vcn.ring_enc[i]; + ring = &adev->vcn.inst->ring_enc[i]; ring->use_doorbell = true; ring->doorbell_index = (adev->doorbell_index.vcn.vcn_ring0_1 << 1) + 2 + i; sprintf(ring->name, "vcn_enc%d", i); - r = amdgpu_ring_init(adev, ring, 512, &adev->vcn.irq, 0); + r = amdgpu_ring_init(adev, ring, 512, &adev->vcn.inst->irq, 0); if (r) return r; } - ring = &adev->vcn.ring_jpeg; + ring = &adev->vcn.inst->ring_jpeg; ring->use_doorbell = true; ring->doorbell_index = (adev->doorbell_index.vcn.vcn_ring0_1 << 1) + 1; sprintf(ring->name, "vcn_jpeg"); - r = amdgpu_ring_init(adev, ring, 512, &adev->vcn.irq, 0); + r = amdgpu_ring_init(adev, ring, 512, &adev->vcn.inst->irq, 0); if (r) return r; adev->vcn.pause_dpg_mode = vcn_v2_0_pause_dpg_mode; adev->vcn.internal.jpeg_pitch = mmUVD_JPEG_PITCH_INTERNAL_OFFSET; - adev->vcn.external.jpeg_pitch = SOC15_REG_OFFSET(UVD, 0, mmUVD_JPEG_PITCH); + adev->vcn.inst->external.jpeg_pitch = SOC15_REG_OFFSET(UVD, 0, mmUVD_JPEG_PITCH); return 0; } @@ -234,11 +241,11 @@ static int vcn_v2_0_sw_fini(void *handle) static int vcn_v2_0_hw_init(void *handle) { struct amdgpu_device *adev = (struct amdgpu_device *)handle; - struct amdgpu_ring *ring = &adev->vcn.ring_dec; + struct amdgpu_ring *ring = &adev->vcn.inst->ring_dec; int i, r; adev->nbio_funcs->vcn_doorbell_range(adev, ring->use_doorbell, - ring->doorbell_index); + ring->doorbell_index, 0); ring->sched.ready = true; r = amdgpu_ring_test_ring(ring); @@ -248,7 +255,7 @@ static int vcn_v2_0_hw_init(void *handle) } for (i = 0; i < adev->vcn.num_enc_rings; ++i) { - ring = &adev->vcn.ring_enc[i]; + ring = &adev->vcn.inst->ring_enc[i]; ring->sched.ready = true; r = amdgpu_ring_test_ring(ring); if (r) { @@ -257,7 +264,7 @@ static int vcn_v2_0_hw_init(void *handle) } } - ring = &adev->vcn.ring_jpeg; + ring = &adev->vcn.inst->ring_jpeg; ring->sched.ready = true; r = amdgpu_ring_test_ring(ring); if (r) { @@ -283,7 +290,7 @@ done: static int vcn_v2_0_hw_fini(void *handle) { struct amdgpu_device *adev = (struct amdgpu_device *)handle; - struct amdgpu_ring *ring = &adev->vcn.ring_dec; + struct amdgpu_ring *ring = &adev->vcn.inst->ring_dec; int i; if ((adev->pg_flags & AMD_PG_SUPPORT_VCN_DPG) || @@ -294,11 +301,11 @@ static int vcn_v2_0_hw_fini(void *handle) ring->sched.ready = false; for (i = 0; i < adev->vcn.num_enc_rings; ++i) { - ring = &adev->vcn.ring_enc[i]; + ring = &adev->vcn.inst->ring_enc[i]; ring->sched.ready = false; } - ring = &adev->vcn.ring_jpeg; + ring = &adev->vcn.inst->ring_jpeg; ring->sched.ready = false; return 0; @@ -368,9 +375,9 @@ static void vcn_v2_0_mc_resume(struct amdgpu_device *adev) offset = 0; } else { WREG32_SOC15(UVD, 0, mmUVD_LMI_VCPU_CACHE_64BIT_BAR_LOW, - lower_32_bits(adev->vcn.gpu_addr)); + lower_32_bits(adev->vcn.inst->gpu_addr)); WREG32_SOC15(UVD, 0, mmUVD_LMI_VCPU_CACHE_64BIT_BAR_HIGH, - upper_32_bits(adev->vcn.gpu_addr)); + upper_32_bits(adev->vcn.inst->gpu_addr)); offset = size; WREG32_SOC15(UVD, 0, mmUVD_VCPU_CACHE_OFFSET0, AMDGPU_UVD_FIRMWARE_OFFSET >> 3); @@ -380,17 +387,17 @@ static void vcn_v2_0_mc_resume(struct amdgpu_device *adev) /* cache window 1: stack */ WREG32_SOC15(UVD, 0, mmUVD_LMI_VCPU_CACHE1_64BIT_BAR_LOW, - lower_32_bits(adev->vcn.gpu_addr + offset)); + lower_32_bits(adev->vcn.inst->gpu_addr + offset)); WREG32_SOC15(UVD, 0, mmUVD_LMI_VCPU_CACHE1_64BIT_BAR_HIGH, - upper_32_bits(adev->vcn.gpu_addr + offset)); + upper_32_bits(adev->vcn.inst->gpu_addr + offset)); WREG32_SOC15(UVD, 0, mmUVD_VCPU_CACHE_OFFSET1, 0); WREG32_SOC15(UVD, 0, mmUVD_VCPU_CACHE_SIZE1, AMDGPU_VCN_STACK_SIZE); /* cache window 2: context */ WREG32_SOC15(UVD, 0, mmUVD_LMI_VCPU_CACHE2_64BIT_BAR_LOW, - lower_32_bits(adev->vcn.gpu_addr + offset + AMDGPU_VCN_STACK_SIZE)); + lower_32_bits(adev->vcn.inst->gpu_addr + offset + AMDGPU_VCN_STACK_SIZE)); WREG32_SOC15(UVD, 0, mmUVD_LMI_VCPU_CACHE2_64BIT_BAR_HIGH, - upper_32_bits(adev->vcn.gpu_addr + offset + AMDGPU_VCN_STACK_SIZE)); + upper_32_bits(adev->vcn.inst->gpu_addr + offset + AMDGPU_VCN_STACK_SIZE)); WREG32_SOC15(UVD, 0, mmUVD_VCPU_CACHE_OFFSET2, 0); WREG32_SOC15(UVD, 0, mmUVD_VCPU_CACHE_SIZE2, AMDGPU_VCN_CONTEXT_SIZE); @@ -426,10 +433,10 @@ static void vcn_v2_0_mc_resume_dpg_mode(struct amdgpu_device *adev, bool indirec } else { WREG32_SOC15_DPG_MODE_2_0(SOC15_DPG_MODE_OFFSET_2_0( UVD, 0, mmUVD_LMI_VCPU_CACHE_64BIT_BAR_LOW), - lower_32_bits(adev->vcn.gpu_addr), 0, indirect); + lower_32_bits(adev->vcn.inst->gpu_addr), 0, indirect); WREG32_SOC15_DPG_MODE_2_0(SOC15_DPG_MODE_OFFSET_2_0( UVD, 0, mmUVD_LMI_VCPU_CACHE_64BIT_BAR_HIGH), - upper_32_bits(adev->vcn.gpu_addr), 0, indirect); + upper_32_bits(adev->vcn.inst->gpu_addr), 0, indirect); offset = size; WREG32_SOC15_DPG_MODE_2_0(SOC15_DPG_MODE_OFFSET_2_0( UVD, 0, mmUVD_VCPU_CACHE_OFFSET0), @@ -447,10 +454,10 @@ static void vcn_v2_0_mc_resume_dpg_mode(struct amdgpu_device *adev, bool indirec if (!indirect) { WREG32_SOC15_DPG_MODE_2_0(SOC15_DPG_MODE_OFFSET_2_0( UVD, 0, mmUVD_LMI_VCPU_CACHE1_64BIT_BAR_LOW), - lower_32_bits(adev->vcn.gpu_addr + offset), 0, indirect); + lower_32_bits(adev->vcn.inst->gpu_addr + offset), 0, indirect); WREG32_SOC15_DPG_MODE_2_0(SOC15_DPG_MODE_OFFSET_2_0( UVD, 0, mmUVD_LMI_VCPU_CACHE1_64BIT_BAR_HIGH), - upper_32_bits(adev->vcn.gpu_addr + offset), 0, indirect); + upper_32_bits(adev->vcn.inst->gpu_addr + offset), 0, indirect); WREG32_SOC15_DPG_MODE_2_0(SOC15_DPG_MODE_OFFSET_2_0( UVD, 0, mmUVD_VCPU_CACHE_OFFSET1), 0, 0, indirect); } else { @@ -467,10 +474,10 @@ static void vcn_v2_0_mc_resume_dpg_mode(struct amdgpu_device *adev, bool indirec /* cache window 2: context */ WREG32_SOC15_DPG_MODE_2_0(SOC15_DPG_MODE_OFFSET_2_0( UVD, 0, mmUVD_LMI_VCPU_CACHE2_64BIT_BAR_LOW), - lower_32_bits(adev->vcn.gpu_addr + offset + AMDGPU_VCN_STACK_SIZE), 0, indirect); + lower_32_bits(adev->vcn.inst->gpu_addr + offset + AMDGPU_VCN_STACK_SIZE), 0, indirect); WREG32_SOC15_DPG_MODE_2_0(SOC15_DPG_MODE_OFFSET_2_0( UVD, 0, mmUVD_LMI_VCPU_CACHE2_64BIT_BAR_HIGH), - upper_32_bits(adev->vcn.gpu_addr + offset + AMDGPU_VCN_STACK_SIZE), 0, indirect); + upper_32_bits(adev->vcn.inst->gpu_addr + offset + AMDGPU_VCN_STACK_SIZE), 0, indirect); WREG32_SOC15_DPG_MODE_2_0(SOC15_DPG_MODE_OFFSET_2_0( UVD, 0, mmUVD_VCPU_CACHE_OFFSET2), 0, 0, indirect); WREG32_SOC15_DPG_MODE_2_0(SOC15_DPG_MODE_OFFSET_2_0( @@ -658,7 +665,7 @@ static void vcn_v2_0_clock_gating_dpg_mode(struct amdgpu_device *adev, */ static int jpeg_v2_0_start(struct amdgpu_device *adev) { - struct amdgpu_ring *ring = &adev->vcn.ring_jpeg; + struct amdgpu_ring *ring = &adev->vcn.inst->ring_jpeg; uint32_t tmp; int r = 0; @@ -920,7 +927,7 @@ static void vcn_v2_0_enable_static_power_gating(struct amdgpu_device *adev) static int vcn_v2_0_start_dpg_mode(struct amdgpu_device *adev, bool indirect) { - struct amdgpu_ring *ring = &adev->vcn.ring_dec; + struct amdgpu_ring *ring = &adev->vcn.inst->ring_dec; uint32_t rb_bufsz, tmp; vcn_v2_0_enable_static_power_gating(adev); @@ -1046,7 +1053,7 @@ static int vcn_v2_0_start_dpg_mode(struct amdgpu_device *adev, bool indirect) static int vcn_v2_0_start(struct amdgpu_device *adev) { - struct amdgpu_ring *ring = &adev->vcn.ring_dec; + struct amdgpu_ring *ring = &adev->vcn.inst->ring_dec; uint32_t rb_bufsz, tmp; uint32_t lmi_swap_cntl; int i, j, r; @@ -1197,14 +1204,14 @@ static int vcn_v2_0_start(struct amdgpu_device *adev) WREG32_SOC15(UVD, 0, mmUVD_RBC_RB_WPTR, lower_32_bits(ring->wptr)); - ring = &adev->vcn.ring_enc[0]; + ring = &adev->vcn.inst->ring_enc[0]; WREG32_SOC15(UVD, 0, mmUVD_RB_RPTR, lower_32_bits(ring->wptr)); WREG32_SOC15(UVD, 0, mmUVD_RB_WPTR, lower_32_bits(ring->wptr)); WREG32_SOC15(UVD, 0, mmUVD_RB_BASE_LO, ring->gpu_addr); WREG32_SOC15(UVD, 0, mmUVD_RB_BASE_HI, upper_32_bits(ring->gpu_addr)); WREG32_SOC15(UVD, 0, mmUVD_RB_SIZE, ring->ring_size / 4); - ring = &adev->vcn.ring_enc[1]; + ring = &adev->vcn.inst->ring_enc[1]; WREG32_SOC15(UVD, 0, mmUVD_RB_RPTR2, lower_32_bits(ring->wptr)); WREG32_SOC15(UVD, 0, mmUVD_RB_WPTR2, lower_32_bits(ring->wptr)); WREG32_SOC15(UVD, 0, mmUVD_RB_BASE_LO2, ring->gpu_addr); @@ -1351,14 +1358,14 @@ static int vcn_v2_0_pause_dpg_mode(struct amdgpu_device *adev, UVD_DPG_PAUSE__NJ_PAUSE_DPG_ACK_MASK, ret_code); /* Restore */ - ring = &adev->vcn.ring_enc[0]; + ring = &adev->vcn.inst->ring_enc[0]; WREG32_SOC15(UVD, 0, mmUVD_RB_BASE_LO, ring->gpu_addr); WREG32_SOC15(UVD, 0, mmUVD_RB_BASE_HI, upper_32_bits(ring->gpu_addr)); WREG32_SOC15(UVD, 0, mmUVD_RB_SIZE, ring->ring_size / 4); WREG32_SOC15(UVD, 0, mmUVD_RB_RPTR, lower_32_bits(ring->wptr)); WREG32_SOC15(UVD, 0, mmUVD_RB_WPTR, lower_32_bits(ring->wptr)); - ring = &adev->vcn.ring_enc[1]; + ring = &adev->vcn.inst->ring_enc[1]; WREG32_SOC15(UVD, 0, mmUVD_RB_BASE_LO2, ring->gpu_addr); WREG32_SOC15(UVD, 0, mmUVD_RB_BASE_HI2, upper_32_bits(ring->gpu_addr)); WREG32_SOC15(UVD, 0, mmUVD_RB_SIZE2, ring->ring_size / 4); @@ -1480,12 +1487,14 @@ static void vcn_v2_0_dec_ring_set_wptr(struct amdgpu_ring *ring) * * Write a start command to the ring. */ -static void vcn_v2_0_dec_ring_insert_start(struct amdgpu_ring *ring) +void vcn_v2_0_dec_ring_insert_start(struct amdgpu_ring *ring) { - amdgpu_ring_write(ring, PACKET0(mmUVD_GPCOM_VCPU_DATA0_INTERNAL_OFFSET, 0)); + struct amdgpu_device *adev = ring->adev; + + amdgpu_ring_write(ring, PACKET0(adev->vcn.internal.data0, 0)); amdgpu_ring_write(ring, 0); - amdgpu_ring_write(ring, PACKET0(mmUVD_GPCOM_VCPU_CMD_INTERNAL_OFFSET, 0)); - amdgpu_ring_write(ring, VCN_DEC_CMD_PACKET_START << 1); + amdgpu_ring_write(ring, PACKET0(adev->vcn.internal.cmd, 0)); + amdgpu_ring_write(ring, VCN_DEC_KMD_CMD | (VCN_DEC_CMD_PACKET_START << 1)); } /** @@ -1495,10 +1504,12 @@ static void vcn_v2_0_dec_ring_insert_start(struct amdgpu_ring *ring) * * Write a end command to the ring. */ -static void vcn_v2_0_dec_ring_insert_end(struct amdgpu_ring *ring) +void vcn_v2_0_dec_ring_insert_end(struct amdgpu_ring *ring) { - amdgpu_ring_write(ring, PACKET0(mmUVD_GPCOM_VCPU_CMD_INTERNAL_OFFSET, 0)); - amdgpu_ring_write(ring, VCN_DEC_CMD_PACKET_END << 1); + struct amdgpu_device *adev = ring->adev; + + amdgpu_ring_write(ring, PACKET0(adev->vcn.internal.cmd, 0)); + amdgpu_ring_write(ring, VCN_DEC_KMD_CMD | (VCN_DEC_CMD_PACKET_END << 1)); } /** @@ -1508,14 +1519,15 @@ static void vcn_v2_0_dec_ring_insert_end(struct amdgpu_ring *ring) * * Write a nop command to the ring. */ -static void vcn_v2_0_dec_ring_insert_nop(struct amdgpu_ring *ring, uint32_t count) +void vcn_v2_0_dec_ring_insert_nop(struct amdgpu_ring *ring, uint32_t count) { + struct amdgpu_device *adev = ring->adev; int i; WARN_ON(ring->wptr % 2 || count % 2); for (i = 0; i < count / 2; i++) { - amdgpu_ring_write(ring, PACKET0(mmUVD_NO_OP_INTERNAL_OFFSET, 0)); + amdgpu_ring_write(ring, PACKET0(adev->vcn.internal.nop, 0)); amdgpu_ring_write(ring, 0); } } @@ -1528,32 +1540,33 @@ static void vcn_v2_0_dec_ring_insert_nop(struct amdgpu_ring *ring, uint32_t coun * * Write a fence and a trap command to the ring. */ -static void vcn_v2_0_dec_ring_emit_fence(struct amdgpu_ring *ring, u64 addr, u64 seq, - unsigned flags) +void vcn_v2_0_dec_ring_emit_fence(struct amdgpu_ring *ring, u64 addr, u64 seq, + unsigned flags) { - WARN_ON(flags & AMDGPU_FENCE_FLAG_64BIT); + struct amdgpu_device *adev = ring->adev; - amdgpu_ring_write(ring, PACKET0(mmUVD_CONTEXT_ID_INTERNAL_OFFSET, 0)); + WARN_ON(flags & AMDGPU_FENCE_FLAG_64BIT); + amdgpu_ring_write(ring, PACKET0(adev->vcn.internal.context_id, 0)); amdgpu_ring_write(ring, seq); - amdgpu_ring_write(ring, PACKET0(mmUVD_GPCOM_VCPU_DATA0_INTERNAL_OFFSET, 0)); + amdgpu_ring_write(ring, PACKET0(adev->vcn.internal.data0, 0)); amdgpu_ring_write(ring, addr & 0xffffffff); - amdgpu_ring_write(ring, PACKET0(mmUVD_GPCOM_VCPU_DATA1_INTERNAL_OFFSET, 0)); + amdgpu_ring_write(ring, PACKET0(adev->vcn.internal.data1, 0)); amdgpu_ring_write(ring, upper_32_bits(addr) & 0xff); - amdgpu_ring_write(ring, PACKET0(mmUVD_GPCOM_VCPU_CMD_INTERNAL_OFFSET, 0)); - amdgpu_ring_write(ring, VCN_DEC_CMD_FENCE << 1); + amdgpu_ring_write(ring, PACKET0(adev->vcn.internal.cmd, 0)); + amdgpu_ring_write(ring, VCN_DEC_KMD_CMD | (VCN_DEC_CMD_FENCE << 1)); - amdgpu_ring_write(ring, PACKET0(mmUVD_GPCOM_VCPU_DATA0_INTERNAL_OFFSET, 0)); + amdgpu_ring_write(ring, PACKET0(adev->vcn.internal.data0, 0)); amdgpu_ring_write(ring, 0); - amdgpu_ring_write(ring, PACKET0(mmUVD_GPCOM_VCPU_DATA1_INTERNAL_OFFSET, 0)); + amdgpu_ring_write(ring, PACKET0(adev->vcn.internal.data1, 0)); amdgpu_ring_write(ring, 0); - amdgpu_ring_write(ring, PACKET0(mmUVD_GPCOM_VCPU_CMD_INTERNAL_OFFSET, 0)); + amdgpu_ring_write(ring, PACKET0(adev->vcn.internal.cmd, 0)); - amdgpu_ring_write(ring, VCN_DEC_CMD_TRAP << 1); + amdgpu_ring_write(ring, VCN_DEC_KMD_CMD | (VCN_DEC_CMD_TRAP << 1)); } /** @@ -1564,44 +1577,46 @@ static void vcn_v2_0_dec_ring_emit_fence(struct amdgpu_ring *ring, u64 addr, u64 * * Write ring commands to execute the indirect buffer */ -static void vcn_v2_0_dec_ring_emit_ib(struct amdgpu_ring *ring, - struct amdgpu_job *job, - struct amdgpu_ib *ib, - uint32_t flags) +void vcn_v2_0_dec_ring_emit_ib(struct amdgpu_ring *ring, + struct amdgpu_job *job, + struct amdgpu_ib *ib, + uint32_t flags) { + struct amdgpu_device *adev = ring->adev; unsigned vmid = AMDGPU_JOB_GET_VMID(job); - amdgpu_ring_write(ring, PACKET0(mmUVD_LMI_RBC_IB_VMID_INTERNAL_OFFSET, 0)); + amdgpu_ring_write(ring, PACKET0(adev->vcn.internal.ib_vmid, 0)); amdgpu_ring_write(ring, vmid); - amdgpu_ring_write(ring, PACKET0(mmUVD_LMI_RBC_IB_64BIT_BAR_LOW_INTERNAL_OFFSET, 0)); + amdgpu_ring_write(ring, PACKET0(adev->vcn.internal.ib_bar_low, 0)); amdgpu_ring_write(ring, lower_32_bits(ib->gpu_addr)); - amdgpu_ring_write(ring, PACKET0(mmUVD_LMI_RBC_IB_64BIT_BAR_HIGH_INTERNAL_OFFSET, 0)); + amdgpu_ring_write(ring, PACKET0(adev->vcn.internal.ib_bar_high, 0)); amdgpu_ring_write(ring, upper_32_bits(ib->gpu_addr)); - amdgpu_ring_write(ring, PACKET0(mmUVD_RBC_IB_SIZE_INTERNAL_OFFSET, 0)); + amdgpu_ring_write(ring, PACKET0(adev->vcn.internal.ib_size, 0)); amdgpu_ring_write(ring, ib->length_dw); } -static void vcn_v2_0_dec_ring_emit_reg_wait(struct amdgpu_ring *ring, - uint32_t reg, uint32_t val, - uint32_t mask) +void vcn_v2_0_dec_ring_emit_reg_wait(struct amdgpu_ring *ring, uint32_t reg, + uint32_t val, uint32_t mask) { - amdgpu_ring_write(ring, PACKET0(mmUVD_GPCOM_VCPU_DATA0_INTERNAL_OFFSET, 0)); + struct amdgpu_device *adev = ring->adev; + + amdgpu_ring_write(ring, PACKET0(adev->vcn.internal.data0, 0)); amdgpu_ring_write(ring, reg << 2); - amdgpu_ring_write(ring, PACKET0(mmUVD_GPCOM_VCPU_DATA1_INTERNAL_OFFSET, 0)); + amdgpu_ring_write(ring, PACKET0(adev->vcn.internal.data1, 0)); amdgpu_ring_write(ring, val); - amdgpu_ring_write(ring, PACKET0(mmUVD_GP_SCRATCH8_INTERNAL_OFFSET, 0)); + amdgpu_ring_write(ring, PACKET0(adev->vcn.internal.gp_scratch8, 0)); amdgpu_ring_write(ring, mask); - amdgpu_ring_write(ring, PACKET0(mmUVD_GPCOM_VCPU_CMD_INTERNAL_OFFSET, 0)); + amdgpu_ring_write(ring, PACKET0(adev->vcn.internal.cmd, 0)); - amdgpu_ring_write(ring, VCN_DEC_CMD_REG_READ_COND_WAIT << 1); + amdgpu_ring_write(ring, VCN_DEC_KMD_CMD | (VCN_DEC_CMD_REG_READ_COND_WAIT << 1)); } -static void vcn_v2_0_dec_ring_emit_vm_flush(struct amdgpu_ring *ring, - unsigned vmid, uint64_t pd_addr) +void vcn_v2_0_dec_ring_emit_vm_flush(struct amdgpu_ring *ring, + unsigned vmid, uint64_t pd_addr) { struct amdgpu_vmhub *hub = &ring->adev->vmhub[ring->funcs->vmhub]; uint32_t data0, data1, mask; @@ -1615,18 +1630,20 @@ static void vcn_v2_0_dec_ring_emit_vm_flush(struct amdgpu_ring *ring, vcn_v2_0_dec_ring_emit_reg_wait(ring, data0, data1, mask); } -static void vcn_v2_0_dec_ring_emit_wreg(struct amdgpu_ring *ring, - uint32_t reg, uint32_t val) +void vcn_v2_0_dec_ring_emit_wreg(struct amdgpu_ring *ring, + uint32_t reg, uint32_t val) { - amdgpu_ring_write(ring, PACKET0(mmUVD_GPCOM_VCPU_DATA0_INTERNAL_OFFSET, 0)); + struct amdgpu_device *adev = ring->adev; + + amdgpu_ring_write(ring, PACKET0(adev->vcn.internal.data0, 0)); amdgpu_ring_write(ring, reg << 2); - amdgpu_ring_write(ring, PACKET0(mmUVD_GPCOM_VCPU_DATA1_INTERNAL_OFFSET, 0)); + amdgpu_ring_write(ring, PACKET0(adev->vcn.internal.data1, 0)); amdgpu_ring_write(ring, val); - amdgpu_ring_write(ring, PACKET0(mmUVD_GPCOM_VCPU_CMD_INTERNAL_OFFSET, 0)); + amdgpu_ring_write(ring, PACKET0(adev->vcn.internal.cmd, 0)); - amdgpu_ring_write(ring, VCN_DEC_CMD_WRITE_REG << 1); + amdgpu_ring_write(ring, VCN_DEC_KMD_CMD | (VCN_DEC_CMD_WRITE_REG << 1)); } /** @@ -1640,7 +1657,7 @@ static uint64_t vcn_v2_0_enc_ring_get_rptr(struct amdgpu_ring *ring) { struct amdgpu_device *adev = ring->adev; - if (ring == &adev->vcn.ring_enc[0]) + if (ring == &adev->vcn.inst->ring_enc[0]) return RREG32_SOC15(UVD, 0, mmUVD_RB_RPTR); else return RREG32_SOC15(UVD, 0, mmUVD_RB_RPTR2); @@ -1657,7 +1674,7 @@ static uint64_t vcn_v2_0_enc_ring_get_wptr(struct amdgpu_ring *ring) { struct amdgpu_device *adev = ring->adev; - if (ring == &adev->vcn.ring_enc[0]) { + if (ring == &adev->vcn.inst->ring_enc[0]) { if (ring->use_doorbell) return adev->wb.wb[ring->wptr_offs]; else @@ -1681,7 +1698,7 @@ static void vcn_v2_0_enc_ring_set_wptr(struct amdgpu_ring *ring) { struct amdgpu_device *adev = ring->adev; - if (ring == &adev->vcn.ring_enc[0]) { + if (ring == &adev->vcn.inst->ring_enc[0]) { if (ring->use_doorbell) { adev->wb.wb[ring->wptr_offs] = lower_32_bits(ring->wptr); WDOORBELL32(ring->doorbell_index, lower_32_bits(ring->wptr)); @@ -1706,8 +1723,8 @@ static void vcn_v2_0_enc_ring_set_wptr(struct amdgpu_ring *ring) * * Write enc a fence and a trap command to the ring. */ -static void vcn_v2_0_enc_ring_emit_fence(struct amdgpu_ring *ring, u64 addr, - u64 seq, unsigned flags) +void vcn_v2_0_enc_ring_emit_fence(struct amdgpu_ring *ring, u64 addr, + u64 seq, unsigned flags) { WARN_ON(flags & AMDGPU_FENCE_FLAG_64BIT); @@ -1718,7 +1735,7 @@ static void vcn_v2_0_enc_ring_emit_fence(struct amdgpu_ring *ring, u64 addr, amdgpu_ring_write(ring, VCN_ENC_CMD_TRAP); } -static void vcn_v2_0_enc_ring_insert_end(struct amdgpu_ring *ring) +void vcn_v2_0_enc_ring_insert_end(struct amdgpu_ring *ring) { amdgpu_ring_write(ring, VCN_ENC_CMD_END); } @@ -1731,10 +1748,10 @@ static void vcn_v2_0_enc_ring_insert_end(struct amdgpu_ring *ring) * * Write enc ring commands to execute the indirect buffer */ -static void vcn_v2_0_enc_ring_emit_ib(struct amdgpu_ring *ring, - struct amdgpu_job *job, - struct amdgpu_ib *ib, - uint32_t flags) +void vcn_v2_0_enc_ring_emit_ib(struct amdgpu_ring *ring, + struct amdgpu_job *job, + struct amdgpu_ib *ib, + uint32_t flags) { unsigned vmid = AMDGPU_JOB_GET_VMID(job); @@ -1745,9 +1762,8 @@ static void vcn_v2_0_enc_ring_emit_ib(struct amdgpu_ring *ring, amdgpu_ring_write(ring, ib->length_dw); } -static void vcn_v2_0_enc_ring_emit_reg_wait(struct amdgpu_ring *ring, - uint32_t reg, uint32_t val, - uint32_t mask) +void vcn_v2_0_enc_ring_emit_reg_wait(struct amdgpu_ring *ring, uint32_t reg, + uint32_t val, uint32_t mask) { amdgpu_ring_write(ring, VCN_ENC_CMD_REG_WAIT); amdgpu_ring_write(ring, reg << 2); @@ -1755,8 +1771,8 @@ static void vcn_v2_0_enc_ring_emit_reg_wait(struct amdgpu_ring *ring, amdgpu_ring_write(ring, val); } -static void vcn_v2_0_enc_ring_emit_vm_flush(struct amdgpu_ring *ring, - unsigned int vmid, uint64_t pd_addr) +void vcn_v2_0_enc_ring_emit_vm_flush(struct amdgpu_ring *ring, + unsigned int vmid, uint64_t pd_addr) { struct amdgpu_vmhub *hub = &ring->adev->vmhub[ring->funcs->vmhub]; @@ -1767,8 +1783,7 @@ static void vcn_v2_0_enc_ring_emit_vm_flush(struct amdgpu_ring *ring, lower_32_bits(pd_addr), 0xffffffff); } -static void vcn_v2_0_enc_ring_emit_wreg(struct amdgpu_ring *ring, - uint32_t reg, uint32_t val) +void vcn_v2_0_enc_ring_emit_wreg(struct amdgpu_ring *ring, uint32_t reg, uint32_t val) { amdgpu_ring_write(ring, VCN_ENC_CMD_REG_WRITE); amdgpu_ring_write(ring, reg << 2); @@ -1832,7 +1847,7 @@ static void vcn_v2_0_jpeg_ring_set_wptr(struct amdgpu_ring *ring) * * Write a start command to the ring. */ -static void vcn_v2_0_jpeg_ring_insert_start(struct amdgpu_ring *ring) +void vcn_v2_0_jpeg_ring_insert_start(struct amdgpu_ring *ring) { amdgpu_ring_write(ring, PACKETJ(mmUVD_JRBC_EXTERNAL_REG_INTERNAL_OFFSET, 0, 0, PACKETJ_TYPE0)); @@ -1850,7 +1865,7 @@ static void vcn_v2_0_jpeg_ring_insert_start(struct amdgpu_ring *ring) * * Write a end command to the ring. */ -static void vcn_v2_0_jpeg_ring_insert_end(struct amdgpu_ring *ring) +void vcn_v2_0_jpeg_ring_insert_end(struct amdgpu_ring *ring) { amdgpu_ring_write(ring, PACKETJ(mmUVD_JRBC_EXTERNAL_REG_INTERNAL_OFFSET, 0, 0, PACKETJ_TYPE0)); @@ -1869,8 +1884,8 @@ static void vcn_v2_0_jpeg_ring_insert_end(struct amdgpu_ring *ring) * * Write a fence and a trap command to the ring. */ -static void vcn_v2_0_jpeg_ring_emit_fence(struct amdgpu_ring *ring, u64 addr, u64 seq, - unsigned flags) +void vcn_v2_0_jpeg_ring_emit_fence(struct amdgpu_ring *ring, u64 addr, u64 seq, + unsigned flags) { WARN_ON(flags & AMDGPU_FENCE_FLAG_64BIT); @@ -1918,10 +1933,10 @@ static void vcn_v2_0_jpeg_ring_emit_fence(struct amdgpu_ring *ring, u64 addr, u6 * * Write ring commands to execute the indirect buffer. */ -static void vcn_v2_0_jpeg_ring_emit_ib(struct amdgpu_ring *ring, - struct amdgpu_job *job, - struct amdgpu_ib *ib, - uint32_t flags) +void vcn_v2_0_jpeg_ring_emit_ib(struct amdgpu_ring *ring, + struct amdgpu_job *job, + struct amdgpu_ib *ib, + uint32_t flags) { unsigned vmid = AMDGPU_JOB_GET_VMID(job); @@ -1969,9 +1984,8 @@ static void vcn_v2_0_jpeg_ring_emit_ib(struct amdgpu_ring *ring, amdgpu_ring_write(ring, 0x2); } -static void vcn_v2_0_jpeg_ring_emit_reg_wait(struct amdgpu_ring *ring, - uint32_t reg, uint32_t val, - uint32_t mask) +void vcn_v2_0_jpeg_ring_emit_reg_wait(struct amdgpu_ring *ring, uint32_t reg, + uint32_t val, uint32_t mask) { uint32_t reg_offset = (reg << 2); @@ -1997,8 +2011,8 @@ static void vcn_v2_0_jpeg_ring_emit_reg_wait(struct amdgpu_ring *ring, amdgpu_ring_write(ring, mask); } -static void vcn_v2_0_jpeg_ring_emit_vm_flush(struct amdgpu_ring *ring, - unsigned vmid, uint64_t pd_addr) +void vcn_v2_0_jpeg_ring_emit_vm_flush(struct amdgpu_ring *ring, + unsigned vmid, uint64_t pd_addr) { struct amdgpu_vmhub *hub = &ring->adev->vmhub[ring->funcs->vmhub]; uint32_t data0, data1, mask; @@ -2012,8 +2026,7 @@ static void vcn_v2_0_jpeg_ring_emit_vm_flush(struct amdgpu_ring *ring, vcn_v2_0_jpeg_ring_emit_reg_wait(ring, data0, data1, mask); } -static void vcn_v2_0_jpeg_ring_emit_wreg(struct amdgpu_ring *ring, - uint32_t reg, uint32_t val) +void vcn_v2_0_jpeg_ring_emit_wreg(struct amdgpu_ring *ring, uint32_t reg, uint32_t val) { uint32_t reg_offset = (reg << 2); @@ -2031,7 +2044,7 @@ static void vcn_v2_0_jpeg_ring_emit_wreg(struct amdgpu_ring *ring, amdgpu_ring_write(ring, val); } -static void vcn_v2_0_jpeg_ring_nop(struct amdgpu_ring *ring, uint32_t count) +void vcn_v2_0_jpeg_ring_nop(struct amdgpu_ring *ring, uint32_t count) { int i; @@ -2059,16 +2072,16 @@ static int vcn_v2_0_process_interrupt(struct amdgpu_device *adev, switch (entry->src_id) { case VCN_2_0__SRCID__UVD_SYSTEM_MESSAGE_INTERRUPT: - amdgpu_fence_process(&adev->vcn.ring_dec); + amdgpu_fence_process(&adev->vcn.inst->ring_dec); break; case VCN_2_0__SRCID__UVD_ENC_GENERAL_PURPOSE: - amdgpu_fence_process(&adev->vcn.ring_enc[0]); + amdgpu_fence_process(&adev->vcn.inst->ring_enc[0]); break; case VCN_2_0__SRCID__UVD_ENC_LOW_LATENCY: - amdgpu_fence_process(&adev->vcn.ring_enc[1]); + amdgpu_fence_process(&adev->vcn.inst->ring_enc[1]); break; case VCN_2_0__SRCID__JPEG_DECODE: - amdgpu_fence_process(&adev->vcn.ring_jpeg); + amdgpu_fence_process(&adev->vcn.inst->ring_jpeg); break; default: DRM_ERROR("Unhandled interrupt: %d %d\n", @@ -2079,6 +2092,36 @@ static int vcn_v2_0_process_interrupt(struct amdgpu_device *adev, return 0; } +static int vcn_v2_0_dec_ring_test_ring(struct amdgpu_ring *ring) +{ + struct amdgpu_device *adev = ring->adev; + uint32_t tmp = 0; + unsigned i; + int r; + + WREG32(adev->vcn.inst[ring->me].external.scratch9, 0xCAFEDEAD); + r = amdgpu_ring_alloc(ring, 4); + if (r) + return r; + amdgpu_ring_write(ring, PACKET0(adev->vcn.internal.cmd, 0)); + amdgpu_ring_write(ring, VCN_DEC_KMD_CMD | (VCN_DEC_CMD_PACKET_START << 1)); + amdgpu_ring_write(ring, PACKET0(adev->vcn.internal.scratch9, 0)); + amdgpu_ring_write(ring, 0xDEADBEEF); + amdgpu_ring_commit(ring); + for (i = 0; i < adev->usec_timeout; i++) { + tmp = RREG32(adev->vcn.inst[ring->me].external.scratch9); + if (tmp == 0xDEADBEEF) + break; + udelay(1); + } + + if (i >= adev->usec_timeout) + r = -ETIMEDOUT; + + return r; +} + + static int vcn_v2_0_set_powergating_state(void *handle, enum amd_powergating_state state) { @@ -2128,7 +2171,7 @@ static const struct amd_ip_funcs vcn_v2_0_ip_funcs = { static const struct amdgpu_ring_funcs vcn_v2_0_dec_ring_vm_funcs = { .type = AMDGPU_RING_TYPE_VCN_DEC, .align_mask = 0xf, - .vmhub = AMDGPU_MMHUB, + .vmhub = AMDGPU_MMHUB_0, .get_rptr = vcn_v2_0_dec_ring_get_rptr, .get_wptr = vcn_v2_0_dec_ring_get_wptr, .set_wptr = vcn_v2_0_dec_ring_set_wptr, @@ -2142,7 +2185,7 @@ static const struct amdgpu_ring_funcs vcn_v2_0_dec_ring_vm_funcs = { .emit_ib = vcn_v2_0_dec_ring_emit_ib, .emit_fence = vcn_v2_0_dec_ring_emit_fence, .emit_vm_flush = vcn_v2_0_dec_ring_emit_vm_flush, - .test_ring = amdgpu_vcn_dec_ring_test_ring, + .test_ring = vcn_v2_0_dec_ring_test_ring, .test_ib = amdgpu_vcn_dec_ring_test_ib, .insert_nop = vcn_v2_0_dec_ring_insert_nop, .insert_start = vcn_v2_0_dec_ring_insert_start, @@ -2159,7 +2202,7 @@ static const struct amdgpu_ring_funcs vcn_v2_0_enc_ring_vm_funcs = { .type = AMDGPU_RING_TYPE_VCN_ENC, .align_mask = 0x3f, .nop = VCN_ENC_CMD_NO_OP, - .vmhub = AMDGPU_MMHUB, + .vmhub = AMDGPU_MMHUB_0, .get_rptr = vcn_v2_0_enc_ring_get_rptr, .get_wptr = vcn_v2_0_enc_ring_get_wptr, .set_wptr = vcn_v2_0_enc_ring_set_wptr, @@ -2188,7 +2231,7 @@ static const struct amdgpu_ring_funcs vcn_v2_0_enc_ring_vm_funcs = { static const struct amdgpu_ring_funcs vcn_v2_0_jpeg_ring_vm_funcs = { .type = AMDGPU_RING_TYPE_VCN_JPEG, .align_mask = 0xf, - .vmhub = AMDGPU_MMHUB, + .vmhub = AMDGPU_MMHUB_0, .get_rptr = vcn_v2_0_jpeg_ring_get_rptr, .get_wptr = vcn_v2_0_jpeg_ring_get_wptr, .set_wptr = vcn_v2_0_jpeg_ring_set_wptr, @@ -2217,7 +2260,7 @@ static const struct amdgpu_ring_funcs vcn_v2_0_jpeg_ring_vm_funcs = { static void vcn_v2_0_set_dec_ring_funcs(struct amdgpu_device *adev) { - adev->vcn.ring_dec.funcs = &vcn_v2_0_dec_ring_vm_funcs; + adev->vcn.inst->ring_dec.funcs = &vcn_v2_0_dec_ring_vm_funcs; DRM_INFO("VCN decode is enabled in VM mode\n"); } @@ -2226,14 +2269,14 @@ static void vcn_v2_0_set_enc_ring_funcs(struct amdgpu_device *adev) int i; for (i = 0; i < adev->vcn.num_enc_rings; ++i) - adev->vcn.ring_enc[i].funcs = &vcn_v2_0_enc_ring_vm_funcs; + adev->vcn.inst->ring_enc[i].funcs = &vcn_v2_0_enc_ring_vm_funcs; DRM_INFO("VCN encode is enabled in VM mode\n"); } static void vcn_v2_0_set_jpeg_ring_funcs(struct amdgpu_device *adev) { - adev->vcn.ring_jpeg.funcs = &vcn_v2_0_jpeg_ring_vm_funcs; + adev->vcn.inst->ring_jpeg.funcs = &vcn_v2_0_jpeg_ring_vm_funcs; DRM_INFO("VCN jpeg decode is enabled in VM mode\n"); } @@ -2244,8 +2287,8 @@ static const struct amdgpu_irq_src_funcs vcn_v2_0_irq_funcs = { static void vcn_v2_0_set_irq_funcs(struct amdgpu_device *adev) { - adev->vcn.irq.num_types = adev->vcn.num_enc_rings + 2; - adev->vcn.irq.funcs = &vcn_v2_0_irq_funcs; + adev->vcn.inst->irq.num_types = adev->vcn.num_enc_rings + 2; + adev->vcn.inst->irq.funcs = &vcn_v2_0_irq_funcs; } const struct amdgpu_ip_block_version vcn_v2_0_ip_block = diff --git a/drivers/gpu/drm/amd/amdgpu/vcn_v2_0.h b/drivers/gpu/drm/amd/amdgpu/vcn_v2_0.h index a74227f4663b..8467292f32e5 100644 --- a/drivers/gpu/drm/amd/amdgpu/vcn_v2_0.h +++ b/drivers/gpu/drm/amd/amdgpu/vcn_v2_0.h @@ -24,6 +24,44 @@ #ifndef __VCN_V2_0_H__ #define __VCN_V2_0_H__ +extern void vcn_v2_0_dec_ring_insert_start(struct amdgpu_ring *ring); +extern void vcn_v2_0_dec_ring_insert_end(struct amdgpu_ring *ring); +extern void vcn_v2_0_dec_ring_insert_nop(struct amdgpu_ring *ring, uint32_t count); +extern void vcn_v2_0_dec_ring_emit_fence(struct amdgpu_ring *ring, u64 addr, u64 seq, + unsigned flags); +extern void vcn_v2_0_dec_ring_emit_ib(struct amdgpu_ring *ring, struct amdgpu_job *job, + struct amdgpu_ib *ib, uint32_t flags); +extern void vcn_v2_0_dec_ring_emit_reg_wait(struct amdgpu_ring *ring, uint32_t reg, + uint32_t val, uint32_t mask); +extern void vcn_v2_0_dec_ring_emit_vm_flush(struct amdgpu_ring *ring, + unsigned vmid, uint64_t pd_addr); +extern void vcn_v2_0_dec_ring_emit_wreg(struct amdgpu_ring *ring, + uint32_t reg, uint32_t val); + +extern void vcn_v2_0_enc_ring_insert_end(struct amdgpu_ring *ring); +extern void vcn_v2_0_enc_ring_emit_fence(struct amdgpu_ring *ring, u64 addr, + u64 seq, unsigned flags); +extern void vcn_v2_0_enc_ring_emit_ib(struct amdgpu_ring *ring, struct amdgpu_job *job, + struct amdgpu_ib *ib, uint32_t flags); +extern void vcn_v2_0_enc_ring_emit_reg_wait(struct amdgpu_ring *ring, uint32_t reg, + uint32_t val, uint32_t mask); +extern void vcn_v2_0_enc_ring_emit_vm_flush(struct amdgpu_ring *ring, + unsigned int vmid, uint64_t pd_addr); +extern void vcn_v2_0_enc_ring_emit_wreg(struct amdgpu_ring *ring, uint32_t reg, uint32_t val); + +extern void vcn_v2_0_jpeg_ring_insert_start(struct amdgpu_ring *ring); +extern void vcn_v2_0_jpeg_ring_insert_end(struct amdgpu_ring *ring); +extern void vcn_v2_0_jpeg_ring_emit_fence(struct amdgpu_ring *ring, u64 addr, u64 seq, + unsigned flags); +extern void vcn_v2_0_jpeg_ring_emit_ib(struct amdgpu_ring *ring, struct amdgpu_job *job, + struct amdgpu_ib *ib, uint32_t flags); +extern void vcn_v2_0_jpeg_ring_emit_reg_wait(struct amdgpu_ring *ring, uint32_t reg, + uint32_t val, uint32_t mask); +extern void vcn_v2_0_jpeg_ring_emit_vm_flush(struct amdgpu_ring *ring, + unsigned vmid, uint64_t pd_addr); +extern void vcn_v2_0_jpeg_ring_emit_wreg(struct amdgpu_ring *ring, uint32_t reg, uint32_t val); +extern void vcn_v2_0_jpeg_ring_nop(struct amdgpu_ring *ring, uint32_t count); + extern const struct amdgpu_ip_block_version vcn_v2_0_ip_block; #endif /* __VCN_V2_0_H__ */ diff --git a/drivers/gpu/drm/amd/amdgpu/vcn_v2_5.c b/drivers/gpu/drm/amd/amdgpu/vcn_v2_5.c new file mode 100644 index 000000000000..395c2259f979 --- /dev/null +++ b/drivers/gpu/drm/amd/amdgpu/vcn_v2_5.c @@ -0,0 +1,1414 @@ +/* + * Copyright 2019 Advanced Micro Devices, Inc. + * + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and associated documentation files (the "Software"), + * to deal in the Software without restriction, including without limitation + * the rights to use, copy, modify, merge, publish, distribute, sublicense, + * and/or sell copies of the Software, and to permit persons to whom the + * Software is furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included in + * all copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL + * THE COPYRIGHT HOLDER(S) OR AUTHOR(S) BE LIABLE FOR ANY CLAIM, DAMAGES OR + * OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, + * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR + * OTHER DEALINGS IN THE SOFTWARE. + * + */ + +#include <linux/firmware.h> + +#include "amdgpu.h" +#include "amdgpu_vcn.h" +#include "soc15.h" +#include "soc15d.h" +#include "vcn_v2_0.h" + +#include "vcn/vcn_2_5_offset.h" +#include "vcn/vcn_2_5_sh_mask.h" +#include "ivsrcid/vcn/irqsrcs_vcn_2_0.h" + +#define mmUVD_CONTEXT_ID_INTERNAL_OFFSET 0x27 +#define mmUVD_GPCOM_VCPU_CMD_INTERNAL_OFFSET 0x0f +#define mmUVD_GPCOM_VCPU_DATA0_INTERNAL_OFFSET 0x10 +#define mmUVD_GPCOM_VCPU_DATA1_INTERNAL_OFFSET 0x11 +#define mmUVD_NO_OP_INTERNAL_OFFSET 0x29 +#define mmUVD_GP_SCRATCH8_INTERNAL_OFFSET 0x66 +#define mmUVD_SCRATCH9_INTERNAL_OFFSET 0xc01d + +#define mmUVD_LMI_RBC_IB_VMID_INTERNAL_OFFSET 0x431 +#define mmUVD_LMI_RBC_IB_64BIT_BAR_LOW_INTERNAL_OFFSET 0x3b4 +#define mmUVD_LMI_RBC_IB_64BIT_BAR_HIGH_INTERNAL_OFFSET 0x3b5 +#define mmUVD_RBC_IB_SIZE_INTERNAL_OFFSET 0x25c + +#define mmUVD_JPEG_PITCH_INTERNAL_OFFSET 0x401f + +#define VCN25_MAX_HW_INSTANCES_ARCTURUS 2 + +static void vcn_v2_5_set_dec_ring_funcs(struct amdgpu_device *adev); +static void vcn_v2_5_set_enc_ring_funcs(struct amdgpu_device *adev); +static void vcn_v2_5_set_jpeg_ring_funcs(struct amdgpu_device *adev); +static void vcn_v2_5_set_irq_funcs(struct amdgpu_device *adev); +static int vcn_v2_5_set_powergating_state(void *handle, + enum amd_powergating_state state); + +static int amdgpu_ih_clientid_vcns[] = { + SOC15_IH_CLIENTID_VCN, + SOC15_IH_CLIENTID_VCN1 +}; + +/** + * vcn_v2_5_early_init - set function pointers + * + * @handle: amdgpu_device pointer + * + * Set ring and irq function pointers + */ +static int vcn_v2_5_early_init(void *handle) +{ + struct amdgpu_device *adev = (struct amdgpu_device *)handle; + if (adev->asic_type == CHIP_ARCTURUS) { + u32 harvest; + int i; + + adev->vcn.num_vcn_inst = VCN25_MAX_HW_INSTANCES_ARCTURUS; + for (i = 0; i < adev->vcn.num_vcn_inst; i++) { + harvest = RREG32_SOC15(UVD, i, mmCC_UVD_HARVESTING); + if (harvest & CC_UVD_HARVESTING__UVD_DISABLE_MASK) + adev->vcn.harvest_config |= 1 << i; + } + + if (adev->vcn.harvest_config == (AMDGPU_VCN_HARVEST_VCN0 | + AMDGPU_VCN_HARVEST_VCN1)) + /* both instances are harvested, disable the block */ + return -ENOENT; + } else + adev->vcn.num_vcn_inst = 1; + + adev->vcn.num_enc_rings = 2; + + vcn_v2_5_set_dec_ring_funcs(adev); + vcn_v2_5_set_enc_ring_funcs(adev); + vcn_v2_5_set_jpeg_ring_funcs(adev); + vcn_v2_5_set_irq_funcs(adev); + + return 0; +} + +/** + * vcn_v2_5_sw_init - sw init for VCN block + * + * @handle: amdgpu_device pointer + * + * Load firmware and sw initialization + */ +static int vcn_v2_5_sw_init(void *handle) +{ + struct amdgpu_ring *ring; + int i, j, r; + struct amdgpu_device *adev = (struct amdgpu_device *)handle; + + for (j = 0; j < adev->vcn.num_vcn_inst; j++) { + if (adev->vcn.harvest_config & (1 << j)) + continue; + /* VCN DEC TRAP */ + r = amdgpu_irq_add_id(adev, amdgpu_ih_clientid_vcns[j], + VCN_2_0__SRCID__UVD_SYSTEM_MESSAGE_INTERRUPT, &adev->vcn.inst[j].irq); + if (r) + return r; + + /* VCN ENC TRAP */ + for (i = 0; i < adev->vcn.num_enc_rings; ++i) { + r = amdgpu_irq_add_id(adev, amdgpu_ih_clientid_vcns[j], + i + VCN_2_0__SRCID__UVD_ENC_GENERAL_PURPOSE, &adev->vcn.inst[j].irq); + if (r) + return r; + } + + /* VCN JPEG TRAP */ + r = amdgpu_irq_add_id(adev, amdgpu_ih_clientid_vcns[j], + VCN_2_0__SRCID__JPEG_DECODE, &adev->vcn.inst[j].irq); + if (r) + return r; + } + + r = amdgpu_vcn_sw_init(adev); + if (r) + return r; + + if (adev->firmware.load_type == AMDGPU_FW_LOAD_PSP) { + const struct common_firmware_header *hdr; + hdr = (const struct common_firmware_header *)adev->vcn.fw->data; + adev->firmware.ucode[AMDGPU_UCODE_ID_VCN].ucode_id = AMDGPU_UCODE_ID_VCN; + adev->firmware.ucode[AMDGPU_UCODE_ID_VCN].fw = adev->vcn.fw; + adev->firmware.fw_size += + ALIGN(le32_to_cpu(hdr->ucode_size_bytes), PAGE_SIZE); + + if (adev->vcn.num_vcn_inst == VCN25_MAX_HW_INSTANCES_ARCTURUS) { + adev->firmware.ucode[AMDGPU_UCODE_ID_VCN1].ucode_id = AMDGPU_UCODE_ID_VCN1; + adev->firmware.ucode[AMDGPU_UCODE_ID_VCN1].fw = adev->vcn.fw; + adev->firmware.fw_size += + ALIGN(le32_to_cpu(hdr->ucode_size_bytes), PAGE_SIZE); + } + DRM_INFO("PSP loading VCN firmware\n"); + } + + r = amdgpu_vcn_resume(adev); + if (r) + return r; + + for (j = 0; j < adev->vcn.num_vcn_inst; j++) { + if (adev->vcn.harvest_config & (1 << j)) + continue; + adev->vcn.internal.context_id = mmUVD_CONTEXT_ID_INTERNAL_OFFSET; + adev->vcn.internal.ib_vmid = mmUVD_LMI_RBC_IB_VMID_INTERNAL_OFFSET; + adev->vcn.internal.ib_bar_low = mmUVD_LMI_RBC_IB_64BIT_BAR_LOW_INTERNAL_OFFSET; + adev->vcn.internal.ib_bar_high = mmUVD_LMI_RBC_IB_64BIT_BAR_HIGH_INTERNAL_OFFSET; + adev->vcn.internal.ib_size = mmUVD_RBC_IB_SIZE_INTERNAL_OFFSET; + adev->vcn.internal.gp_scratch8 = mmUVD_GP_SCRATCH8_INTERNAL_OFFSET; + + adev->vcn.internal.scratch9 = mmUVD_SCRATCH9_INTERNAL_OFFSET; + adev->vcn.inst[j].external.scratch9 = SOC15_REG_OFFSET(UVD, j, mmUVD_SCRATCH9); + adev->vcn.internal.data0 = mmUVD_GPCOM_VCPU_DATA0_INTERNAL_OFFSET; + adev->vcn.inst[j].external.data0 = SOC15_REG_OFFSET(UVD, j, mmUVD_GPCOM_VCPU_DATA0); + adev->vcn.internal.data1 = mmUVD_GPCOM_VCPU_DATA1_INTERNAL_OFFSET; + adev->vcn.inst[j].external.data1 = SOC15_REG_OFFSET(UVD, j, mmUVD_GPCOM_VCPU_DATA1); + adev->vcn.internal.cmd = mmUVD_GPCOM_VCPU_CMD_INTERNAL_OFFSET; + adev->vcn.inst[j].external.cmd = SOC15_REG_OFFSET(UVD, j, mmUVD_GPCOM_VCPU_CMD); + adev->vcn.internal.nop = mmUVD_NO_OP_INTERNAL_OFFSET; + adev->vcn.inst[j].external.nop = SOC15_REG_OFFSET(UVD, j, mmUVD_NO_OP); + + adev->vcn.internal.jpeg_pitch = mmUVD_JPEG_PITCH_INTERNAL_OFFSET; + adev->vcn.inst[j].external.jpeg_pitch = SOC15_REG_OFFSET(UVD, j, mmUVD_JPEG_PITCH); + + ring = &adev->vcn.inst[j].ring_dec; + ring->use_doorbell = true; + ring->doorbell_index = (adev->doorbell_index.vcn.vcn_ring0_1 << 1) + 8*j; + sprintf(ring->name, "vcn_dec_%d", j); + r = amdgpu_ring_init(adev, ring, 512, &adev->vcn.inst[j].irq, 0); + if (r) + return r; + + for (i = 0; i < adev->vcn.num_enc_rings; ++i) { + ring = &adev->vcn.inst[j].ring_enc[i]; + ring->use_doorbell = true; + ring->doorbell_index = (adev->doorbell_index.vcn.vcn_ring0_1 << 1) + 2 + i + 8*j; + sprintf(ring->name, "vcn_enc_%d.%d", j, i); + r = amdgpu_ring_init(adev, ring, 512, &adev->vcn.inst[j].irq, 0); + if (r) + return r; + } + + ring = &adev->vcn.inst[j].ring_jpeg; + ring->use_doorbell = true; + ring->doorbell_index = (adev->doorbell_index.vcn.vcn_ring0_1 << 1) + 1 + 8*j; + sprintf(ring->name, "vcn_jpeg_%d", j); + r = amdgpu_ring_init(adev, ring, 512, &adev->vcn.inst[j].irq, 0); + if (r) + return r; + } + + return 0; +} + +/** + * vcn_v2_5_sw_fini - sw fini for VCN block + * + * @handle: amdgpu_device pointer + * + * VCN suspend and free up sw allocation + */ +static int vcn_v2_5_sw_fini(void *handle) +{ + int r; + struct amdgpu_device *adev = (struct amdgpu_device *)handle; + + r = amdgpu_vcn_suspend(adev); + if (r) + return r; + + r = amdgpu_vcn_sw_fini(adev); + + return r; +} + +/** + * vcn_v2_5_hw_init - start and test VCN block + * + * @handle: amdgpu_device pointer + * + * Initialize the hardware, boot up the VCPU and do some testing + */ +static int vcn_v2_5_hw_init(void *handle) +{ + struct amdgpu_device *adev = (struct amdgpu_device *)handle; + struct amdgpu_ring *ring; + int i, j, r; + + for (j = 0; j < adev->vcn.num_vcn_inst; ++j) { + if (adev->vcn.harvest_config & (1 << j)) + continue; + ring = &adev->vcn.inst[j].ring_dec; + + adev->nbio_funcs->vcn_doorbell_range(adev, ring->use_doorbell, + ring->doorbell_index, j); + + r = amdgpu_ring_test_ring(ring); + if (r) { + ring->sched.ready = false; + goto done; + } + + for (i = 0; i < adev->vcn.num_enc_rings; ++i) { + ring = &adev->vcn.inst[j].ring_enc[i]; + ring->sched.ready = false; + continue; + r = amdgpu_ring_test_ring(ring); + if (r) { + ring->sched.ready = false; + goto done; + } + } + + ring = &adev->vcn.inst[j].ring_jpeg; + r = amdgpu_ring_test_ring(ring); + if (r) { + ring->sched.ready = false; + goto done; + } + } +done: + if (!r) + DRM_INFO("VCN decode and encode initialized successfully.\n"); + + return r; +} + +/** + * vcn_v2_5_hw_fini - stop the hardware block + * + * @handle: amdgpu_device pointer + * + * Stop the VCN block, mark ring as not ready any more + */ +static int vcn_v2_5_hw_fini(void *handle) +{ + struct amdgpu_device *adev = (struct amdgpu_device *)handle; + struct amdgpu_ring *ring; + int i; + + for (i = 0; i < adev->vcn.num_vcn_inst; ++i) { + if (adev->vcn.harvest_config & (1 << i)) + continue; + ring = &adev->vcn.inst[i].ring_dec; + + if (RREG32_SOC15(VCN, i, mmUVD_STATUS)) + vcn_v2_5_set_powergating_state(adev, AMD_PG_STATE_GATE); + + ring->sched.ready = false; + + for (i = 0; i < adev->vcn.num_enc_rings; ++i) { + ring = &adev->vcn.inst[i].ring_enc[i]; + ring->sched.ready = false; + } + + ring = &adev->vcn.inst[i].ring_jpeg; + ring->sched.ready = false; + } + + return 0; +} + +/** + * vcn_v2_5_suspend - suspend VCN block + * + * @handle: amdgpu_device pointer + * + * HW fini and suspend VCN block + */ +static int vcn_v2_5_suspend(void *handle) +{ + int r; + struct amdgpu_device *adev = (struct amdgpu_device *)handle; + + r = vcn_v2_5_hw_fini(adev); + if (r) + return r; + + r = amdgpu_vcn_suspend(adev); + + return r; +} + +/** + * vcn_v2_5_resume - resume VCN block + * + * @handle: amdgpu_device pointer + * + * Resume firmware and hw init VCN block + */ +static int vcn_v2_5_resume(void *handle) +{ + int r; + struct amdgpu_device *adev = (struct amdgpu_device *)handle; + + r = amdgpu_vcn_resume(adev); + if (r) + return r; + + r = vcn_v2_5_hw_init(adev); + + return r; +} + +/** + * vcn_v2_5_mc_resume - memory controller programming + * + * @adev: amdgpu_device pointer + * + * Let the VCN memory controller know it's offsets + */ +static void vcn_v2_5_mc_resume(struct amdgpu_device *adev) +{ + uint32_t size = AMDGPU_GPU_PAGE_ALIGN(adev->vcn.fw->size + 4); + uint32_t offset; + int i; + + for (i = 0; i < adev->vcn.num_vcn_inst; ++i) { + if (adev->vcn.harvest_config & (1 << i)) + continue; + /* cache window 0: fw */ + if (adev->firmware.load_type == AMDGPU_FW_LOAD_PSP) { + WREG32_SOC15(UVD, i, mmUVD_LMI_VCPU_CACHE_64BIT_BAR_LOW, + (adev->firmware.ucode[AMDGPU_UCODE_ID_VCN].tmr_mc_addr_lo)); + WREG32_SOC15(UVD, i, mmUVD_LMI_VCPU_CACHE_64BIT_BAR_HIGH, + (adev->firmware.ucode[AMDGPU_UCODE_ID_VCN].tmr_mc_addr_hi)); + WREG32_SOC15(UVD, i, mmUVD_VCPU_CACHE_OFFSET0, 0); + offset = 0; + } else { + WREG32_SOC15(UVD, i, mmUVD_LMI_VCPU_CACHE_64BIT_BAR_LOW, + lower_32_bits(adev->vcn.inst[i].gpu_addr)); + WREG32_SOC15(UVD, i, mmUVD_LMI_VCPU_CACHE_64BIT_BAR_HIGH, + upper_32_bits(adev->vcn.inst[i].gpu_addr)); + offset = size; + WREG32_SOC15(UVD, i, mmUVD_VCPU_CACHE_OFFSET0, + AMDGPU_UVD_FIRMWARE_OFFSET >> 3); + } + WREG32_SOC15(UVD, i, mmUVD_VCPU_CACHE_SIZE0, size); + + /* cache window 1: stack */ + WREG32_SOC15(UVD, i, mmUVD_LMI_VCPU_CACHE1_64BIT_BAR_LOW, + lower_32_bits(adev->vcn.inst[i].gpu_addr + offset)); + WREG32_SOC15(UVD, i, mmUVD_LMI_VCPU_CACHE1_64BIT_BAR_HIGH, + upper_32_bits(adev->vcn.inst[i].gpu_addr + offset)); + WREG32_SOC15(UVD, i, mmUVD_VCPU_CACHE_OFFSET1, 0); + WREG32_SOC15(UVD, i, mmUVD_VCPU_CACHE_SIZE1, AMDGPU_VCN_STACK_SIZE); + + /* cache window 2: context */ + WREG32_SOC15(UVD, i, mmUVD_LMI_VCPU_CACHE2_64BIT_BAR_LOW, + lower_32_bits(adev->vcn.inst[i].gpu_addr + offset + AMDGPU_VCN_STACK_SIZE)); + WREG32_SOC15(UVD, i, mmUVD_LMI_VCPU_CACHE2_64BIT_BAR_HIGH, + upper_32_bits(adev->vcn.inst[i].gpu_addr + offset + AMDGPU_VCN_STACK_SIZE)); + WREG32_SOC15(UVD, i, mmUVD_VCPU_CACHE_OFFSET2, 0); + WREG32_SOC15(UVD, i, mmUVD_VCPU_CACHE_SIZE2, AMDGPU_VCN_CONTEXT_SIZE); + } +} + +/** + * vcn_v2_5_disable_clock_gating - disable VCN clock gating + * + * @adev: amdgpu_device pointer + * @sw: enable SW clock gating + * + * Disable clock gating for VCN block + */ +static void vcn_v2_5_disable_clock_gating(struct amdgpu_device *adev) +{ + uint32_t data; + int ret = 0; + int i; + + for (i = 0; i < adev->vcn.num_vcn_inst; ++i) { + if (adev->vcn.harvest_config & (1 << i)) + continue; + /* UVD disable CGC */ + data = RREG32_SOC15(VCN, i, mmUVD_CGC_CTRL); + if (adev->cg_flags & AMD_CG_SUPPORT_VCN_MGCG) + data |= 1 << UVD_CGC_CTRL__DYN_CLOCK_MODE__SHIFT; + else + data &= ~UVD_CGC_CTRL__DYN_CLOCK_MODE_MASK; + data |= 1 << UVD_CGC_CTRL__CLK_GATE_DLY_TIMER__SHIFT; + data |= 4 << UVD_CGC_CTRL__CLK_OFF_DELAY__SHIFT; + WREG32_SOC15(VCN, i, mmUVD_CGC_CTRL, data); + + data = RREG32_SOC15(VCN, i, mmUVD_CGC_GATE); + data &= ~(UVD_CGC_GATE__SYS_MASK + | UVD_CGC_GATE__UDEC_MASK + | UVD_CGC_GATE__MPEG2_MASK + | UVD_CGC_GATE__REGS_MASK + | UVD_CGC_GATE__RBC_MASK + | UVD_CGC_GATE__LMI_MC_MASK + | UVD_CGC_GATE__LMI_UMC_MASK + | UVD_CGC_GATE__IDCT_MASK + | UVD_CGC_GATE__MPRD_MASK + | UVD_CGC_GATE__MPC_MASK + | UVD_CGC_GATE__LBSI_MASK + | UVD_CGC_GATE__LRBBM_MASK + | UVD_CGC_GATE__UDEC_RE_MASK + | UVD_CGC_GATE__UDEC_CM_MASK + | UVD_CGC_GATE__UDEC_IT_MASK + | UVD_CGC_GATE__UDEC_DB_MASK + | UVD_CGC_GATE__UDEC_MP_MASK + | UVD_CGC_GATE__WCB_MASK + | UVD_CGC_GATE__VCPU_MASK + | UVD_CGC_GATE__MMSCH_MASK); + + WREG32_SOC15(VCN, i, mmUVD_CGC_GATE, data); + + SOC15_WAIT_ON_RREG(VCN, i, mmUVD_CGC_GATE, 0, 0xFFFFFFFF, ret); + + data = RREG32_SOC15(VCN, i, mmUVD_CGC_CTRL); + data &= ~(UVD_CGC_CTRL__UDEC_RE_MODE_MASK + | UVD_CGC_CTRL__UDEC_CM_MODE_MASK + | UVD_CGC_CTRL__UDEC_IT_MODE_MASK + | UVD_CGC_CTRL__UDEC_DB_MODE_MASK + | UVD_CGC_CTRL__UDEC_MP_MODE_MASK + | UVD_CGC_CTRL__SYS_MODE_MASK + | UVD_CGC_CTRL__UDEC_MODE_MASK + | UVD_CGC_CTRL__MPEG2_MODE_MASK + | UVD_CGC_CTRL__REGS_MODE_MASK + | UVD_CGC_CTRL__RBC_MODE_MASK + | UVD_CGC_CTRL__LMI_MC_MODE_MASK + | UVD_CGC_CTRL__LMI_UMC_MODE_MASK + | UVD_CGC_CTRL__IDCT_MODE_MASK + | UVD_CGC_CTRL__MPRD_MODE_MASK + | UVD_CGC_CTRL__MPC_MODE_MASK + | UVD_CGC_CTRL__LBSI_MODE_MASK + | UVD_CGC_CTRL__LRBBM_MODE_MASK + | UVD_CGC_CTRL__WCB_MODE_MASK + | UVD_CGC_CTRL__VCPU_MODE_MASK + | UVD_CGC_CTRL__MMSCH_MODE_MASK); + WREG32_SOC15(VCN, i, mmUVD_CGC_CTRL, data); + + /* turn on */ + data = RREG32_SOC15(VCN, i, mmUVD_SUVD_CGC_GATE); + data |= (UVD_SUVD_CGC_GATE__SRE_MASK + | UVD_SUVD_CGC_GATE__SIT_MASK + | UVD_SUVD_CGC_GATE__SMP_MASK + | UVD_SUVD_CGC_GATE__SCM_MASK + | UVD_SUVD_CGC_GATE__SDB_MASK + | UVD_SUVD_CGC_GATE__SRE_H264_MASK + | UVD_SUVD_CGC_GATE__SRE_HEVC_MASK + | UVD_SUVD_CGC_GATE__SIT_H264_MASK + | UVD_SUVD_CGC_GATE__SIT_HEVC_MASK + | UVD_SUVD_CGC_GATE__SCM_H264_MASK + | UVD_SUVD_CGC_GATE__SCM_HEVC_MASK + | UVD_SUVD_CGC_GATE__SDB_H264_MASK + | UVD_SUVD_CGC_GATE__SDB_HEVC_MASK + | UVD_SUVD_CGC_GATE__SCLR_MASK + | UVD_SUVD_CGC_GATE__UVD_SC_MASK + | UVD_SUVD_CGC_GATE__ENT_MASK + | UVD_SUVD_CGC_GATE__SIT_HEVC_DEC_MASK + | UVD_SUVD_CGC_GATE__SIT_HEVC_ENC_MASK + | UVD_SUVD_CGC_GATE__SITE_MASK + | UVD_SUVD_CGC_GATE__SRE_VP9_MASK + | UVD_SUVD_CGC_GATE__SCM_VP9_MASK + | UVD_SUVD_CGC_GATE__SIT_VP9_DEC_MASK + | UVD_SUVD_CGC_GATE__SDB_VP9_MASK + | UVD_SUVD_CGC_GATE__IME_HEVC_MASK); + WREG32_SOC15(VCN, i, mmUVD_SUVD_CGC_GATE, data); + + data = RREG32_SOC15(VCN, i, mmUVD_SUVD_CGC_CTRL); + data &= ~(UVD_SUVD_CGC_CTRL__SRE_MODE_MASK + | UVD_SUVD_CGC_CTRL__SIT_MODE_MASK + | UVD_SUVD_CGC_CTRL__SMP_MODE_MASK + | UVD_SUVD_CGC_CTRL__SCM_MODE_MASK + | UVD_SUVD_CGC_CTRL__SDB_MODE_MASK + | UVD_SUVD_CGC_CTRL__SCLR_MODE_MASK + | UVD_SUVD_CGC_CTRL__UVD_SC_MODE_MASK + | UVD_SUVD_CGC_CTRL__ENT_MODE_MASK + | UVD_SUVD_CGC_CTRL__IME_MODE_MASK + | UVD_SUVD_CGC_CTRL__SITE_MODE_MASK); + WREG32_SOC15(VCN, i, mmUVD_SUVD_CGC_CTRL, data); + } +} + +/** + * vcn_v2_5_enable_clock_gating - enable VCN clock gating + * + * @adev: amdgpu_device pointer + * @sw: enable SW clock gating + * + * Enable clock gating for VCN block + */ +static void vcn_v2_5_enable_clock_gating(struct amdgpu_device *adev) +{ + uint32_t data = 0; + int i; + + for (i = 0; i < adev->vcn.num_vcn_inst; ++i) { + if (adev->vcn.harvest_config & (1 << i)) + continue; + /* enable UVD CGC */ + data = RREG32_SOC15(VCN, i, mmUVD_CGC_CTRL); + if (adev->cg_flags & AMD_CG_SUPPORT_VCN_MGCG) + data |= 1 << UVD_CGC_CTRL__DYN_CLOCK_MODE__SHIFT; + else + data |= 0 << UVD_CGC_CTRL__DYN_CLOCK_MODE__SHIFT; + data |= 1 << UVD_CGC_CTRL__CLK_GATE_DLY_TIMER__SHIFT; + data |= 4 << UVD_CGC_CTRL__CLK_OFF_DELAY__SHIFT; + WREG32_SOC15(VCN, i, mmUVD_CGC_CTRL, data); + + data = RREG32_SOC15(VCN, i, mmUVD_CGC_CTRL); + data |= (UVD_CGC_CTRL__UDEC_RE_MODE_MASK + | UVD_CGC_CTRL__UDEC_CM_MODE_MASK + | UVD_CGC_CTRL__UDEC_IT_MODE_MASK + | UVD_CGC_CTRL__UDEC_DB_MODE_MASK + | UVD_CGC_CTRL__UDEC_MP_MODE_MASK + | UVD_CGC_CTRL__SYS_MODE_MASK + | UVD_CGC_CTRL__UDEC_MODE_MASK + | UVD_CGC_CTRL__MPEG2_MODE_MASK + | UVD_CGC_CTRL__REGS_MODE_MASK + | UVD_CGC_CTRL__RBC_MODE_MASK + | UVD_CGC_CTRL__LMI_MC_MODE_MASK + | UVD_CGC_CTRL__LMI_UMC_MODE_MASK + | UVD_CGC_CTRL__IDCT_MODE_MASK + | UVD_CGC_CTRL__MPRD_MODE_MASK + | UVD_CGC_CTRL__MPC_MODE_MASK + | UVD_CGC_CTRL__LBSI_MODE_MASK + | UVD_CGC_CTRL__LRBBM_MODE_MASK + | UVD_CGC_CTRL__WCB_MODE_MASK + | UVD_CGC_CTRL__VCPU_MODE_MASK); + WREG32_SOC15(VCN, i, mmUVD_CGC_CTRL, data); + + data = RREG32_SOC15(VCN, i, mmUVD_SUVD_CGC_CTRL); + data |= (UVD_SUVD_CGC_CTRL__SRE_MODE_MASK + | UVD_SUVD_CGC_CTRL__SIT_MODE_MASK + | UVD_SUVD_CGC_CTRL__SMP_MODE_MASK + | UVD_SUVD_CGC_CTRL__SCM_MODE_MASK + | UVD_SUVD_CGC_CTRL__SDB_MODE_MASK + | UVD_SUVD_CGC_CTRL__SCLR_MODE_MASK + | UVD_SUVD_CGC_CTRL__UVD_SC_MODE_MASK + | UVD_SUVD_CGC_CTRL__ENT_MODE_MASK + | UVD_SUVD_CGC_CTRL__IME_MODE_MASK + | UVD_SUVD_CGC_CTRL__SITE_MODE_MASK); + WREG32_SOC15(VCN, i, mmUVD_SUVD_CGC_CTRL, data); + } +} + +/** + * jpeg_v2_5_start - start JPEG block + * + * @adev: amdgpu_device pointer + * + * Setup and start the JPEG block + */ +static int jpeg_v2_5_start(struct amdgpu_device *adev) +{ + struct amdgpu_ring *ring; + uint32_t tmp; + int i; + + for (i = 0; i < adev->vcn.num_vcn_inst; ++i) { + if (adev->vcn.harvest_config & (1 << i)) + continue; + ring = &adev->vcn.inst[i].ring_jpeg; + /* disable anti hang mechanism */ + WREG32_P(SOC15_REG_OFFSET(UVD, i, mmUVD_JPEG_POWER_STATUS), 0, + ~UVD_JPEG_POWER_STATUS__JPEG_POWER_STATUS_MASK); + + /* JPEG disable CGC */ + tmp = RREG32_SOC15(VCN, i, mmJPEG_CGC_CTRL); + tmp |= 1 << JPEG_CGC_CTRL__DYN_CLOCK_MODE__SHIFT; + tmp |= 1 << JPEG_CGC_CTRL__CLK_GATE_DLY_TIMER__SHIFT; + tmp |= 4 << JPEG_CGC_CTRL__CLK_OFF_DELAY__SHIFT; + WREG32_SOC15(VCN, i, mmJPEG_CGC_CTRL, tmp); + + tmp = RREG32_SOC15(VCN, i, mmJPEG_CGC_GATE); + tmp &= ~(JPEG_CGC_GATE__JPEG_DEC_MASK + | JPEG_CGC_GATE__JPEG2_DEC_MASK + | JPEG_CGC_GATE__JMCIF_MASK + | JPEG_CGC_GATE__JRBBM_MASK); + WREG32_SOC15(VCN, i, mmJPEG_CGC_GATE, tmp); + + tmp = RREG32_SOC15(VCN, i, mmJPEG_CGC_CTRL); + tmp &= ~(JPEG_CGC_CTRL__JPEG_DEC_MODE_MASK + | JPEG_CGC_CTRL__JPEG2_DEC_MODE_MASK + | JPEG_CGC_CTRL__JMCIF_MODE_MASK + | JPEG_CGC_CTRL__JRBBM_MODE_MASK); + WREG32_SOC15(VCN, i, mmJPEG_CGC_CTRL, tmp); + + /* MJPEG global tiling registers */ + WREG32_SOC15(UVD, i, mmJPEG_DEC_GFX8_ADDR_CONFIG, + adev->gfx.config.gb_addr_config); + WREG32_SOC15(UVD, i, mmJPEG_DEC_GFX10_ADDR_CONFIG, + adev->gfx.config.gb_addr_config); + + /* enable JMI channel */ + WREG32_P(SOC15_REG_OFFSET(UVD, i, mmUVD_JMI_CNTL), 0, + ~UVD_JMI_CNTL__SOFT_RESET_MASK); + + /* enable System Interrupt for JRBC */ + WREG32_P(SOC15_REG_OFFSET(VCN, i, mmJPEG_SYS_INT_EN), + JPEG_SYS_INT_EN__DJRBC_MASK, + ~JPEG_SYS_INT_EN__DJRBC_MASK); + + WREG32_SOC15(UVD, i, mmUVD_LMI_JRBC_RB_VMID, 0); + WREG32_SOC15(UVD, i, mmUVD_JRBC_RB_CNTL, (0x00000001L | 0x00000002L)); + WREG32_SOC15(UVD, i, mmUVD_LMI_JRBC_RB_64BIT_BAR_LOW, + lower_32_bits(ring->gpu_addr)); + WREG32_SOC15(UVD, i, mmUVD_LMI_JRBC_RB_64BIT_BAR_HIGH, + upper_32_bits(ring->gpu_addr)); + WREG32_SOC15(UVD, i, mmUVD_JRBC_RB_RPTR, 0); + WREG32_SOC15(UVD, i, mmUVD_JRBC_RB_WPTR, 0); + WREG32_SOC15(UVD, i, mmUVD_JRBC_RB_CNTL, 0x00000002L); + WREG32_SOC15(UVD, i, mmUVD_JRBC_RB_SIZE, ring->ring_size / 4); + ring->wptr = RREG32_SOC15(UVD, i, mmUVD_JRBC_RB_WPTR); + } + + return 0; +} + +/** + * jpeg_v2_5_stop - stop JPEG block + * + * @adev: amdgpu_device pointer + * + * stop the JPEG block + */ +static int jpeg_v2_5_stop(struct amdgpu_device *adev) +{ + uint32_t tmp; + int i; + + for (i = 0; i < adev->vcn.num_vcn_inst; ++i) { + if (adev->vcn.harvest_config & (1 << i)) + continue; + /* reset JMI */ + WREG32_P(SOC15_REG_OFFSET(UVD, i, mmUVD_JMI_CNTL), + UVD_JMI_CNTL__SOFT_RESET_MASK, + ~UVD_JMI_CNTL__SOFT_RESET_MASK); + + tmp = RREG32_SOC15(VCN, i, mmJPEG_CGC_GATE); + tmp |= (JPEG_CGC_GATE__JPEG_DEC_MASK + |JPEG_CGC_GATE__JPEG2_DEC_MASK + |JPEG_CGC_GATE__JMCIF_MASK + |JPEG_CGC_GATE__JRBBM_MASK); + WREG32_SOC15(VCN, i, mmJPEG_CGC_GATE, tmp); + + /* enable anti hang mechanism */ + WREG32_P(SOC15_REG_OFFSET(UVD, i, mmUVD_JPEG_POWER_STATUS), + UVD_JPEG_POWER_STATUS__JPEG_POWER_STATUS_MASK, + ~UVD_JPEG_POWER_STATUS__JPEG_POWER_STATUS_MASK); + } + + return 0; +} + +static int vcn_v2_5_start(struct amdgpu_device *adev) +{ + struct amdgpu_ring *ring; + uint32_t rb_bufsz, tmp; + int i, j, k, r; + + for (i = 0; i < adev->vcn.num_vcn_inst; ++i) { + if (adev->vcn.harvest_config & (1 << i)) + continue; + /* disable register anti-hang mechanism */ + WREG32_P(SOC15_REG_OFFSET(UVD, i, mmUVD_POWER_STATUS), 0, + ~UVD_POWER_STATUS__UVD_POWER_STATUS_MASK); + + /* set uvd status busy */ + tmp = RREG32_SOC15(UVD, i, mmUVD_STATUS) | UVD_STATUS__UVD_BUSY; + WREG32_SOC15(UVD, i, mmUVD_STATUS, tmp); + } + + /*SW clock gating */ + vcn_v2_5_disable_clock_gating(adev); + + for (i = 0; i < adev->vcn.num_vcn_inst; ++i) { + if (adev->vcn.harvest_config & (1 << i)) + continue; + /* enable VCPU clock */ + WREG32_P(SOC15_REG_OFFSET(UVD, i, mmUVD_VCPU_CNTL), + UVD_VCPU_CNTL__CLK_EN_MASK, ~UVD_VCPU_CNTL__CLK_EN_MASK); + + /* disable master interrupt */ + WREG32_P(SOC15_REG_OFFSET(UVD, i, mmUVD_MASTINT_EN), 0, + ~UVD_MASTINT_EN__VCPU_EN_MASK); + + /* setup mmUVD_LMI_CTRL */ + tmp = RREG32_SOC15(UVD, i, mmUVD_LMI_CTRL); + tmp &= ~0xff; + WREG32_SOC15(UVD, i, mmUVD_LMI_CTRL, tmp | 0x8| + UVD_LMI_CTRL__WRITE_CLEAN_TIMER_EN_MASK | + UVD_LMI_CTRL__MASK_MC_URGENT_MASK | + UVD_LMI_CTRL__DATA_COHERENCY_EN_MASK | + UVD_LMI_CTRL__VCPU_DATA_COHERENCY_EN_MASK); + + /* setup mmUVD_MPC_CNTL */ + tmp = RREG32_SOC15(UVD, i, mmUVD_MPC_CNTL); + tmp &= ~UVD_MPC_CNTL__REPLACEMENT_MODE_MASK; + tmp |= 0x2 << UVD_MPC_CNTL__REPLACEMENT_MODE__SHIFT; + WREG32_SOC15(VCN, i, mmUVD_MPC_CNTL, tmp); + + /* setup UVD_MPC_SET_MUXA0 */ + WREG32_SOC15(UVD, i, mmUVD_MPC_SET_MUXA0, + ((0x1 << UVD_MPC_SET_MUXA0__VARA_1__SHIFT) | + (0x2 << UVD_MPC_SET_MUXA0__VARA_2__SHIFT) | + (0x3 << UVD_MPC_SET_MUXA0__VARA_3__SHIFT) | + (0x4 << UVD_MPC_SET_MUXA0__VARA_4__SHIFT))); + + /* setup UVD_MPC_SET_MUXB0 */ + WREG32_SOC15(UVD, i, mmUVD_MPC_SET_MUXB0, + ((0x1 << UVD_MPC_SET_MUXB0__VARB_1__SHIFT) | + (0x2 << UVD_MPC_SET_MUXB0__VARB_2__SHIFT) | + (0x3 << UVD_MPC_SET_MUXB0__VARB_3__SHIFT) | + (0x4 << UVD_MPC_SET_MUXB0__VARB_4__SHIFT))); + + /* setup mmUVD_MPC_SET_MUX */ + WREG32_SOC15(UVD, i, mmUVD_MPC_SET_MUX, + ((0x0 << UVD_MPC_SET_MUX__SET_0__SHIFT) | + (0x1 << UVD_MPC_SET_MUX__SET_1__SHIFT) | + (0x2 << UVD_MPC_SET_MUX__SET_2__SHIFT))); + } + + vcn_v2_5_mc_resume(adev); + + for (i = 0; i < adev->vcn.num_vcn_inst; ++i) { + if (adev->vcn.harvest_config & (1 << i)) + continue; + /* VCN global tiling registers */ + WREG32_SOC15(UVD, i, mmUVD_GFX8_ADDR_CONFIG, + adev->gfx.config.gb_addr_config); + WREG32_SOC15(UVD, i, mmUVD_GFX8_ADDR_CONFIG, + adev->gfx.config.gb_addr_config); + + /* enable LMI MC and UMC channels */ + WREG32_P(SOC15_REG_OFFSET(UVD, i, mmUVD_LMI_CTRL2), 0, + ~UVD_LMI_CTRL2__STALL_ARB_UMC_MASK); + + /* unblock VCPU register access */ + WREG32_P(SOC15_REG_OFFSET(UVD, i, mmUVD_RB_ARB_CTRL), 0, + ~UVD_RB_ARB_CTRL__VCPU_DIS_MASK); + + WREG32_P(SOC15_REG_OFFSET(UVD, i, mmUVD_VCPU_CNTL), 0, + ~UVD_VCPU_CNTL__BLK_RST_MASK); + + for (k = 0; k < 10; ++k) { + uint32_t status; + + for (j = 0; j < 100; ++j) { + status = RREG32_SOC15(UVD, i, mmUVD_STATUS); + if (status & 2) + break; + if (amdgpu_emu_mode == 1) + msleep(500); + else + mdelay(10); + } + r = 0; + if (status & 2) + break; + + DRM_ERROR("VCN decode not responding, trying to reset the VCPU!!!\n"); + WREG32_P(SOC15_REG_OFFSET(UVD, i, mmUVD_VCPU_CNTL), + UVD_VCPU_CNTL__BLK_RST_MASK, + ~UVD_VCPU_CNTL__BLK_RST_MASK); + mdelay(10); + WREG32_P(SOC15_REG_OFFSET(UVD, i, mmUVD_VCPU_CNTL), 0, + ~UVD_VCPU_CNTL__BLK_RST_MASK); + + mdelay(10); + r = -1; + } + + if (r) { + DRM_ERROR("VCN decode not responding, giving up!!!\n"); + return r; + } + + /* enable master interrupt */ + WREG32_P(SOC15_REG_OFFSET(UVD, i, mmUVD_MASTINT_EN), + UVD_MASTINT_EN__VCPU_EN_MASK, + ~UVD_MASTINT_EN__VCPU_EN_MASK); + + /* clear the busy bit of VCN_STATUS */ + WREG32_P(SOC15_REG_OFFSET(UVD, i, mmUVD_STATUS), 0, + ~(2 << UVD_STATUS__VCPU_REPORT__SHIFT)); + + WREG32_SOC15(UVD, i, mmUVD_LMI_RBC_RB_VMID, 0); + + ring = &adev->vcn.inst[i].ring_dec; + /* force RBC into idle state */ + rb_bufsz = order_base_2(ring->ring_size); + tmp = REG_SET_FIELD(0, UVD_RBC_RB_CNTL, RB_BUFSZ, rb_bufsz); + tmp = REG_SET_FIELD(tmp, UVD_RBC_RB_CNTL, RB_BLKSZ, 1); + tmp = REG_SET_FIELD(tmp, UVD_RBC_RB_CNTL, RB_NO_FETCH, 1); + tmp = REG_SET_FIELD(tmp, UVD_RBC_RB_CNTL, RB_NO_UPDATE, 1); + tmp = REG_SET_FIELD(tmp, UVD_RBC_RB_CNTL, RB_RPTR_WR_EN, 1); + WREG32_SOC15(UVD, i, mmUVD_RBC_RB_CNTL, tmp); + + /* programm the RB_BASE for ring buffer */ + WREG32_SOC15(UVD, i, mmUVD_LMI_RBC_RB_64BIT_BAR_LOW, + lower_32_bits(ring->gpu_addr)); + WREG32_SOC15(UVD, i, mmUVD_LMI_RBC_RB_64BIT_BAR_HIGH, + upper_32_bits(ring->gpu_addr)); + + /* Initialize the ring buffer's read and write pointers */ + WREG32_SOC15(UVD, i, mmUVD_RBC_RB_RPTR, 0); + + ring->wptr = RREG32_SOC15(UVD, i, mmUVD_RBC_RB_RPTR); + WREG32_SOC15(UVD, i, mmUVD_RBC_RB_WPTR, + lower_32_bits(ring->wptr)); + ring = &adev->vcn.inst[i].ring_enc[0]; + WREG32_SOC15(UVD, i, mmUVD_RB_RPTR, lower_32_bits(ring->wptr)); + WREG32_SOC15(UVD, i, mmUVD_RB_WPTR, lower_32_bits(ring->wptr)); + WREG32_SOC15(UVD, i, mmUVD_RB_BASE_LO, ring->gpu_addr); + WREG32_SOC15(UVD, i, mmUVD_RB_BASE_HI, upper_32_bits(ring->gpu_addr)); + WREG32_SOC15(UVD, i, mmUVD_RB_SIZE, ring->ring_size / 4); + + ring = &adev->vcn.inst[i].ring_enc[1]; + WREG32_SOC15(UVD, i, mmUVD_RB_RPTR2, lower_32_bits(ring->wptr)); + WREG32_SOC15(UVD, i, mmUVD_RB_WPTR2, lower_32_bits(ring->wptr)); + WREG32_SOC15(UVD, i, mmUVD_RB_BASE_LO2, ring->gpu_addr); + WREG32_SOC15(UVD, i, mmUVD_RB_BASE_HI2, upper_32_bits(ring->gpu_addr)); + WREG32_SOC15(UVD, i, mmUVD_RB_SIZE2, ring->ring_size / 4); + } + r = jpeg_v2_5_start(adev); + + return r; +} + +static int vcn_v2_5_stop(struct amdgpu_device *adev) +{ + uint32_t tmp; + int i, r; + + r = jpeg_v2_5_stop(adev); + if (r) + return r; + + for (i = 0; i < adev->vcn.num_vcn_inst; ++i) { + if (adev->vcn.harvest_config & (1 << i)) + continue; + /* wait for vcn idle */ + SOC15_WAIT_ON_RREG(VCN, i, mmUVD_STATUS, UVD_STATUS__IDLE, 0x7, r); + if (r) + return r; + + tmp = UVD_LMI_STATUS__VCPU_LMI_WRITE_CLEAN_MASK | + UVD_LMI_STATUS__READ_CLEAN_MASK | + UVD_LMI_STATUS__WRITE_CLEAN_MASK | + UVD_LMI_STATUS__WRITE_CLEAN_RAW_MASK; + SOC15_WAIT_ON_RREG(VCN, i, mmUVD_LMI_STATUS, tmp, tmp, r); + if (r) + return r; + + /* block LMI UMC channel */ + tmp = RREG32_SOC15(VCN, i, mmUVD_LMI_CTRL2); + tmp |= UVD_LMI_CTRL2__STALL_ARB_UMC_MASK; + WREG32_SOC15(VCN, i, mmUVD_LMI_CTRL2, tmp); + + tmp = UVD_LMI_STATUS__UMC_READ_CLEAN_RAW_MASK| + UVD_LMI_STATUS__UMC_WRITE_CLEAN_RAW_MASK; + SOC15_WAIT_ON_RREG(VCN, i, mmUVD_LMI_STATUS, tmp, tmp, r); + if (r) + return r; + + /* block VCPU register access */ + WREG32_P(SOC15_REG_OFFSET(UVD, i, mmUVD_RB_ARB_CTRL), + UVD_RB_ARB_CTRL__VCPU_DIS_MASK, + ~UVD_RB_ARB_CTRL__VCPU_DIS_MASK); + + /* reset VCPU */ + WREG32_P(SOC15_REG_OFFSET(UVD, i, mmUVD_VCPU_CNTL), + UVD_VCPU_CNTL__BLK_RST_MASK, + ~UVD_VCPU_CNTL__BLK_RST_MASK); + + /* disable VCPU clock */ + WREG32_P(SOC15_REG_OFFSET(UVD, i, mmUVD_VCPU_CNTL), 0, + ~(UVD_VCPU_CNTL__CLK_EN_MASK)); + + /* clear status */ + WREG32_SOC15(VCN, i, mmUVD_STATUS, 0); + + vcn_v2_5_enable_clock_gating(adev); + + /* enable register anti-hang mechanism */ + WREG32_P(SOC15_REG_OFFSET(UVD, i, mmUVD_POWER_STATUS), + UVD_POWER_STATUS__UVD_POWER_STATUS_MASK, + ~UVD_POWER_STATUS__UVD_POWER_STATUS_MASK); + } + + return 0; +} + +/** + * vcn_v2_5_dec_ring_get_rptr - get read pointer + * + * @ring: amdgpu_ring pointer + * + * Returns the current hardware read pointer + */ +static uint64_t vcn_v2_5_dec_ring_get_rptr(struct amdgpu_ring *ring) +{ + struct amdgpu_device *adev = ring->adev; + + return RREG32_SOC15(UVD, ring->me, mmUVD_RBC_RB_RPTR); +} + +/** + * vcn_v2_5_dec_ring_get_wptr - get write pointer + * + * @ring: amdgpu_ring pointer + * + * Returns the current hardware write pointer + */ +static uint64_t vcn_v2_5_dec_ring_get_wptr(struct amdgpu_ring *ring) +{ + struct amdgpu_device *adev = ring->adev; + + if (ring->use_doorbell) + return adev->wb.wb[ring->wptr_offs]; + else + return RREG32_SOC15(UVD, ring->me, mmUVD_RBC_RB_WPTR); +} + +/** + * vcn_v2_5_dec_ring_set_wptr - set write pointer + * + * @ring: amdgpu_ring pointer + * + * Commits the write pointer to the hardware + */ +static void vcn_v2_5_dec_ring_set_wptr(struct amdgpu_ring *ring) +{ + struct amdgpu_device *adev = ring->adev; + + if (ring->use_doorbell) { + adev->wb.wb[ring->wptr_offs] = lower_32_bits(ring->wptr); + WDOORBELL32(ring->doorbell_index, lower_32_bits(ring->wptr)); + } else { + WREG32_SOC15(UVD, ring->me, mmUVD_RBC_RB_WPTR, lower_32_bits(ring->wptr)); + } +} + +static const struct amdgpu_ring_funcs vcn_v2_5_dec_ring_vm_funcs = { + .type = AMDGPU_RING_TYPE_VCN_DEC, + .align_mask = 0xf, + .vmhub = AMDGPU_MMHUB_1, + .get_rptr = vcn_v2_5_dec_ring_get_rptr, + .get_wptr = vcn_v2_5_dec_ring_get_wptr, + .set_wptr = vcn_v2_5_dec_ring_set_wptr, + .emit_frame_size = + SOC15_FLUSH_GPU_TLB_NUM_WREG * 6 + + SOC15_FLUSH_GPU_TLB_NUM_REG_WAIT * 8 + + 8 + /* vcn_v2_0_dec_ring_emit_vm_flush */ + 14 + 14 + /* vcn_v2_0_dec_ring_emit_fence x2 vm fence */ + 6, + .emit_ib_size = 8, /* vcn_v2_0_dec_ring_emit_ib */ + .emit_ib = vcn_v2_0_dec_ring_emit_ib, + .emit_fence = vcn_v2_0_dec_ring_emit_fence, + .emit_vm_flush = vcn_v2_0_dec_ring_emit_vm_flush, + .test_ring = amdgpu_vcn_dec_ring_test_ring, + .test_ib = amdgpu_vcn_dec_ring_test_ib, + .insert_nop = vcn_v2_0_dec_ring_insert_nop, + .insert_start = vcn_v2_0_dec_ring_insert_start, + .insert_end = vcn_v2_0_dec_ring_insert_end, + .pad_ib = amdgpu_ring_generic_pad_ib, + .begin_use = amdgpu_vcn_ring_begin_use, + .end_use = amdgpu_vcn_ring_end_use, + .emit_wreg = vcn_v2_0_dec_ring_emit_wreg, + .emit_reg_wait = vcn_v2_0_dec_ring_emit_reg_wait, + .emit_reg_write_reg_wait = amdgpu_ring_emit_reg_write_reg_wait_helper, +}; + +/** + * vcn_v2_5_enc_ring_get_rptr - get enc read pointer + * + * @ring: amdgpu_ring pointer + * + * Returns the current hardware enc read pointer + */ +static uint64_t vcn_v2_5_enc_ring_get_rptr(struct amdgpu_ring *ring) +{ + struct amdgpu_device *adev = ring->adev; + + if (ring == &adev->vcn.inst[ring->me].ring_enc[0]) + return RREG32_SOC15(UVD, ring->me, mmUVD_RB_RPTR); + else + return RREG32_SOC15(UVD, ring->me, mmUVD_RB_RPTR2); +} + +/** + * vcn_v2_5_enc_ring_get_wptr - get enc write pointer + * + * @ring: amdgpu_ring pointer + * + * Returns the current hardware enc write pointer + */ +static uint64_t vcn_v2_5_enc_ring_get_wptr(struct amdgpu_ring *ring) +{ + struct amdgpu_device *adev = ring->adev; + + if (ring == &adev->vcn.inst[ring->me].ring_enc[0]) { + if (ring->use_doorbell) + return adev->wb.wb[ring->wptr_offs]; + else + return RREG32_SOC15(UVD, ring->me, mmUVD_RB_WPTR); + } else { + if (ring->use_doorbell) + return adev->wb.wb[ring->wptr_offs]; + else + return RREG32_SOC15(UVD, ring->me, mmUVD_RB_WPTR2); + } +} + +/** + * vcn_v2_5_enc_ring_set_wptr - set enc write pointer + * + * @ring: amdgpu_ring pointer + * + * Commits the enc write pointer to the hardware + */ +static void vcn_v2_5_enc_ring_set_wptr(struct amdgpu_ring *ring) +{ + struct amdgpu_device *adev = ring->adev; + + if (ring == &adev->vcn.inst[ring->me].ring_enc[0]) { + if (ring->use_doorbell) { + adev->wb.wb[ring->wptr_offs] = lower_32_bits(ring->wptr); + WDOORBELL32(ring->doorbell_index, lower_32_bits(ring->wptr)); + } else { + WREG32_SOC15(UVD, ring->me, mmUVD_RB_WPTR, lower_32_bits(ring->wptr)); + } + } else { + if (ring->use_doorbell) { + adev->wb.wb[ring->wptr_offs] = lower_32_bits(ring->wptr); + WDOORBELL32(ring->doorbell_index, lower_32_bits(ring->wptr)); + } else { + WREG32_SOC15(UVD, ring->me, mmUVD_RB_WPTR2, lower_32_bits(ring->wptr)); + } + } +} + +static const struct amdgpu_ring_funcs vcn_v2_5_enc_ring_vm_funcs = { + .type = AMDGPU_RING_TYPE_VCN_ENC, + .align_mask = 0x3f, + .nop = VCN_ENC_CMD_NO_OP, + .vmhub = AMDGPU_MMHUB_1, + .get_rptr = vcn_v2_5_enc_ring_get_rptr, + .get_wptr = vcn_v2_5_enc_ring_get_wptr, + .set_wptr = vcn_v2_5_enc_ring_set_wptr, + .emit_frame_size = + SOC15_FLUSH_GPU_TLB_NUM_WREG * 3 + + SOC15_FLUSH_GPU_TLB_NUM_REG_WAIT * 4 + + 4 + /* vcn_v2_0_enc_ring_emit_vm_flush */ + 5 + 5 + /* vcn_v2_0_enc_ring_emit_fence x2 vm fence */ + 1, /* vcn_v2_0_enc_ring_insert_end */ + .emit_ib_size = 5, /* vcn_v2_0_enc_ring_emit_ib */ + .emit_ib = vcn_v2_0_enc_ring_emit_ib, + .emit_fence = vcn_v2_0_enc_ring_emit_fence, + .emit_vm_flush = vcn_v2_0_enc_ring_emit_vm_flush, + .test_ring = amdgpu_vcn_enc_ring_test_ring, + .test_ib = amdgpu_vcn_enc_ring_test_ib, + .insert_nop = amdgpu_ring_insert_nop, + .insert_end = vcn_v2_0_enc_ring_insert_end, + .pad_ib = amdgpu_ring_generic_pad_ib, + .begin_use = amdgpu_vcn_ring_begin_use, + .end_use = amdgpu_vcn_ring_end_use, + .emit_wreg = vcn_v2_0_enc_ring_emit_wreg, + .emit_reg_wait = vcn_v2_0_enc_ring_emit_reg_wait, + .emit_reg_write_reg_wait = amdgpu_ring_emit_reg_write_reg_wait_helper, +}; + +/** + * vcn_v2_5_jpeg_ring_get_rptr - get read pointer + * + * @ring: amdgpu_ring pointer + * + * Returns the current hardware read pointer + */ +static uint64_t vcn_v2_5_jpeg_ring_get_rptr(struct amdgpu_ring *ring) +{ + struct amdgpu_device *adev = ring->adev; + + return RREG32_SOC15(UVD, ring->me, mmUVD_JRBC_RB_RPTR); +} + +/** + * vcn_v2_5_jpeg_ring_get_wptr - get write pointer + * + * @ring: amdgpu_ring pointer + * + * Returns the current hardware write pointer + */ +static uint64_t vcn_v2_5_jpeg_ring_get_wptr(struct amdgpu_ring *ring) +{ + struct amdgpu_device *adev = ring->adev; + + if (ring->use_doorbell) + return adev->wb.wb[ring->wptr_offs]; + else + return RREG32_SOC15(UVD, ring->me, mmUVD_JRBC_RB_WPTR); +} + +/** + * vcn_v2_5_jpeg_ring_set_wptr - set write pointer + * + * @ring: amdgpu_ring pointer + * + * Commits the write pointer to the hardware + */ +static void vcn_v2_5_jpeg_ring_set_wptr(struct amdgpu_ring *ring) +{ + struct amdgpu_device *adev = ring->adev; + + if (ring->use_doorbell) { + adev->wb.wb[ring->wptr_offs] = lower_32_bits(ring->wptr); + WDOORBELL32(ring->doorbell_index, lower_32_bits(ring->wptr)); + } else { + WREG32_SOC15(UVD, ring->me, mmUVD_JRBC_RB_WPTR, lower_32_bits(ring->wptr)); + } +} + +static const struct amdgpu_ring_funcs vcn_v2_5_jpeg_ring_vm_funcs = { + .type = AMDGPU_RING_TYPE_VCN_JPEG, + .align_mask = 0xf, + .vmhub = AMDGPU_MMHUB_1, + .get_rptr = vcn_v2_5_jpeg_ring_get_rptr, + .get_wptr = vcn_v2_5_jpeg_ring_get_wptr, + .set_wptr = vcn_v2_5_jpeg_ring_set_wptr, + .emit_frame_size = + SOC15_FLUSH_GPU_TLB_NUM_WREG * 6 + + SOC15_FLUSH_GPU_TLB_NUM_REG_WAIT * 8 + + 8 + /* vcn_v2_0_jpeg_ring_emit_vm_flush */ + 18 + 18 + /* vcn_v2_0_jpeg_ring_emit_fence x2 vm fence */ + 8 + 16, + .emit_ib_size = 22, /* vcn_v2_0_jpeg_ring_emit_ib */ + .emit_ib = vcn_v2_0_jpeg_ring_emit_ib, + .emit_fence = vcn_v2_0_jpeg_ring_emit_fence, + .emit_vm_flush = vcn_v2_0_jpeg_ring_emit_vm_flush, + .test_ring = amdgpu_vcn_jpeg_ring_test_ring, + .test_ib = amdgpu_vcn_jpeg_ring_test_ib, + .insert_nop = vcn_v2_0_jpeg_ring_nop, + .insert_start = vcn_v2_0_jpeg_ring_insert_start, + .insert_end = vcn_v2_0_jpeg_ring_insert_end, + .pad_ib = amdgpu_ring_generic_pad_ib, + .begin_use = amdgpu_vcn_ring_begin_use, + .end_use = amdgpu_vcn_ring_end_use, + .emit_wreg = vcn_v2_0_jpeg_ring_emit_wreg, + .emit_reg_wait = vcn_v2_0_jpeg_ring_emit_reg_wait, + .emit_reg_write_reg_wait = amdgpu_ring_emit_reg_write_reg_wait_helper, +}; + +static void vcn_v2_5_set_dec_ring_funcs(struct amdgpu_device *adev) +{ + int i; + + for (i = 0; i < adev->vcn.num_vcn_inst; ++i) { + if (adev->vcn.harvest_config & (1 << i)) + continue; + adev->vcn.inst[i].ring_dec.funcs = &vcn_v2_5_dec_ring_vm_funcs; + adev->vcn.inst[i].ring_dec.me = i; + DRM_INFO("VCN(%d) decode is enabled in VM mode\n", i); + } +} + +static void vcn_v2_5_set_enc_ring_funcs(struct amdgpu_device *adev) +{ + int i, j; + + for (j = 0; j < adev->vcn.num_vcn_inst; ++j) { + if (adev->vcn.harvest_config & (1 << j)) + continue; + for (i = 0; i < adev->vcn.num_enc_rings; ++i) { + adev->vcn.inst[j].ring_enc[i].funcs = &vcn_v2_5_enc_ring_vm_funcs; + adev->vcn.inst[j].ring_enc[i].me = j; + } + DRM_INFO("VCN(%d) encode is enabled in VM mode\n", j); + } +} + +static void vcn_v2_5_set_jpeg_ring_funcs(struct amdgpu_device *adev) +{ + int i; + + for (i = 0; i < adev->vcn.num_vcn_inst; ++i) { + if (adev->vcn.harvest_config & (1 << i)) + continue; + adev->vcn.inst[i].ring_jpeg.funcs = &vcn_v2_5_jpeg_ring_vm_funcs; + adev->vcn.inst[i].ring_jpeg.me = i; + DRM_INFO("VCN(%d) jpeg decode is enabled in VM mode\n", i); + } +} + +static bool vcn_v2_5_is_idle(void *handle) +{ + struct amdgpu_device *adev = (struct amdgpu_device *)handle; + int i, ret = 1; + + for (i = 0; i < adev->vcn.num_vcn_inst; ++i) { + if (adev->vcn.harvest_config & (1 << i)) + continue; + ret &= (RREG32_SOC15(VCN, i, mmUVD_STATUS) == UVD_STATUS__IDLE); + } + + return ret; +} + +static int vcn_v2_5_wait_for_idle(void *handle) +{ + struct amdgpu_device *adev = (struct amdgpu_device *)handle; + int i, ret = 0; + + for (i = 0; i < adev->vcn.num_vcn_inst; ++i) { + if (adev->vcn.harvest_config & (1 << i)) + continue; + SOC15_WAIT_ON_RREG(VCN, i, mmUVD_STATUS, UVD_STATUS__IDLE, + UVD_STATUS__IDLE, ret); + if (ret) + return ret; + } + + return ret; +} + +static int vcn_v2_5_set_clockgating_state(void *handle, + enum amd_clockgating_state state) +{ + struct amdgpu_device *adev = (struct amdgpu_device *)handle; + bool enable = (state == AMD_CG_STATE_GATE) ? true : false; + + if (enable) { + if (vcn_v2_5_is_idle(handle)) + return -EBUSY; + vcn_v2_5_enable_clock_gating(adev); + } else { + vcn_v2_5_disable_clock_gating(adev); + } + + return 0; +} + +static int vcn_v2_5_set_powergating_state(void *handle, + enum amd_powergating_state state) +{ + struct amdgpu_device *adev = (struct amdgpu_device *)handle; + int ret; + + if(state == adev->vcn.cur_state) + return 0; + + if (state == AMD_PG_STATE_GATE) + ret = vcn_v2_5_stop(adev); + else + ret = vcn_v2_5_start(adev); + + if(!ret) + adev->vcn.cur_state = state; + + return ret; +} + +static int vcn_v2_5_set_interrupt_state(struct amdgpu_device *adev, + struct amdgpu_irq_src *source, + unsigned type, + enum amdgpu_interrupt_state state) +{ + return 0; +} + +static int vcn_v2_5_process_interrupt(struct amdgpu_device *adev, + struct amdgpu_irq_src *source, + struct amdgpu_iv_entry *entry) +{ + uint32_t ip_instance; + + switch (entry->client_id) { + case SOC15_IH_CLIENTID_VCN: + ip_instance = 0; + break; + case SOC15_IH_CLIENTID_VCN1: + ip_instance = 1; + break; + default: + DRM_ERROR("Unhandled client id: %d\n", entry->client_id); + return 0; + } + + DRM_DEBUG("IH: VCN TRAP\n"); + + switch (entry->src_id) { + case VCN_2_0__SRCID__UVD_SYSTEM_MESSAGE_INTERRUPT: + amdgpu_fence_process(&adev->vcn.inst[ip_instance].ring_dec); + break; + case VCN_2_0__SRCID__UVD_ENC_GENERAL_PURPOSE: + amdgpu_fence_process(&adev->vcn.inst[ip_instance].ring_enc[0]); + break; + case VCN_2_0__SRCID__UVD_ENC_LOW_LATENCY: + amdgpu_fence_process(&adev->vcn.inst[ip_instance].ring_enc[1]); + break; + case VCN_2_0__SRCID__JPEG_DECODE: + amdgpu_fence_process(&adev->vcn.inst[ip_instance].ring_jpeg); + break; + default: + DRM_ERROR("Unhandled interrupt: %d %d\n", + entry->src_id, entry->src_data[0]); + break; + } + + return 0; +} + +static const struct amdgpu_irq_src_funcs vcn_v2_5_irq_funcs = { + .set = vcn_v2_5_set_interrupt_state, + .process = vcn_v2_5_process_interrupt, +}; + +static void vcn_v2_5_set_irq_funcs(struct amdgpu_device *adev) +{ + int i; + + for (i = 0; i < adev->vcn.num_vcn_inst; ++i) { + if (adev->vcn.harvest_config & (1 << i)) + continue; + adev->vcn.inst[i].irq.num_types = adev->vcn.num_enc_rings + 2; + adev->vcn.inst[i].irq.funcs = &vcn_v2_5_irq_funcs; + } +} + +static const struct amd_ip_funcs vcn_v2_5_ip_funcs = { + .name = "vcn_v2_5", + .early_init = vcn_v2_5_early_init, + .late_init = NULL, + .sw_init = vcn_v2_5_sw_init, + .sw_fini = vcn_v2_5_sw_fini, + .hw_init = vcn_v2_5_hw_init, + .hw_fini = vcn_v2_5_hw_fini, + .suspend = vcn_v2_5_suspend, + .resume = vcn_v2_5_resume, + .is_idle = vcn_v2_5_is_idle, + .wait_for_idle = vcn_v2_5_wait_for_idle, + .check_soft_reset = NULL, + .pre_soft_reset = NULL, + .soft_reset = NULL, + .post_soft_reset = NULL, + .set_clockgating_state = vcn_v2_5_set_clockgating_state, + .set_powergating_state = vcn_v2_5_set_powergating_state, +}; + +const struct amdgpu_ip_block_version vcn_v2_5_ip_block = +{ + .type = AMD_IP_BLOCK_TYPE_VCN, + .major = 2, + .minor = 5, + .rev = 0, + .funcs = &vcn_v2_5_ip_funcs, +}; diff --git a/drivers/gpu/drm/amd/amdgpu/vcn_v2_5.h b/drivers/gpu/drm/amd/amdgpu/vcn_v2_5.h new file mode 100644 index 000000000000..8d9c0800b8e0 --- /dev/null +++ b/drivers/gpu/drm/amd/amdgpu/vcn_v2_5.h @@ -0,0 +1,29 @@ +/* + * Copyright 2019 Advanced Micro Devices, Inc. + * + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and associated documentation files (the "Software"), + * to deal in the Software without restriction, including without limitation + * the rights to use, copy, modify, merge, publish, distribute, sublicense, + * and/or sell copies of the Software, and to permit persons to whom the + * Software is furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included in + * all copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL + * THE COPYRIGHT HOLDER(S) OR AUTHOR(S) BE LIABLE FOR ANY CLAIM, DAMAGES OR + * OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, + * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR + * OTHER DEALINGS IN THE SOFTWARE. + * + */ + +#ifndef __VCN_V2_5_H__ +#define __VCN_V2_5_H__ + +extern const struct amdgpu_ip_block_version vcn_v2_5_ip_block; + +#endif /* __VCN_V2_5_H__ */ diff --git a/drivers/gpu/drm/amd/amdgpu/vega10_ih.c b/drivers/gpu/drm/amd/amdgpu/vega10_ih.c index 22260e6963b8..c1c0a39ae269 100644 --- a/drivers/gpu/drm/amd/amdgpu/vega10_ih.c +++ b/drivers/gpu/drm/amd/amdgpu/vega10_ih.c @@ -50,7 +50,7 @@ static void vega10_ih_enable_interrupts(struct amdgpu_device *adev) ih_rb_cntl = REG_SET_FIELD(ih_rb_cntl, IH_RB_CNTL, RB_ENABLE, 1); ih_rb_cntl = REG_SET_FIELD(ih_rb_cntl, IH_RB_CNTL, ENABLE_INTR, 1); - if (amdgpu_virt_support_psp_prg_ih_reg(adev)) { + if (amdgpu_sriov_vf(adev)) { if (psp_reg_program(&adev->psp, PSP_REG_IH_RB_CNTL, ih_rb_cntl)) { DRM_ERROR("PSP program IH_RB_CNTL failed!\n"); return; @@ -64,7 +64,7 @@ static void vega10_ih_enable_interrupts(struct amdgpu_device *adev) ih_rb_cntl = RREG32_SOC15(OSSSYS, 0, mmIH_RB_CNTL_RING1); ih_rb_cntl = REG_SET_FIELD(ih_rb_cntl, IH_RB_CNTL_RING1, RB_ENABLE, 1); - if (amdgpu_virt_support_psp_prg_ih_reg(adev)) { + if (amdgpu_sriov_vf(adev)) { if (psp_reg_program(&adev->psp, PSP_REG_IH_RB_CNTL_RING1, ih_rb_cntl)) { DRM_ERROR("program IH_RB_CNTL_RING1 failed!\n"); @@ -80,7 +80,7 @@ static void vega10_ih_enable_interrupts(struct amdgpu_device *adev) ih_rb_cntl = RREG32_SOC15(OSSSYS, 0, mmIH_RB_CNTL_RING2); ih_rb_cntl = REG_SET_FIELD(ih_rb_cntl, IH_RB_CNTL_RING2, RB_ENABLE, 1); - if (amdgpu_virt_support_psp_prg_ih_reg(adev)) { + if (amdgpu_sriov_vf(adev)) { if (psp_reg_program(&adev->psp, PSP_REG_IH_RB_CNTL_RING2, ih_rb_cntl)) { DRM_ERROR("program IH_RB_CNTL_RING2 failed!\n"); @@ -106,7 +106,7 @@ static void vega10_ih_disable_interrupts(struct amdgpu_device *adev) ih_rb_cntl = REG_SET_FIELD(ih_rb_cntl, IH_RB_CNTL, RB_ENABLE, 0); ih_rb_cntl = REG_SET_FIELD(ih_rb_cntl, IH_RB_CNTL, ENABLE_INTR, 0); - if (amdgpu_virt_support_psp_prg_ih_reg(adev)) { + if (amdgpu_sriov_vf(adev)) { if (psp_reg_program(&adev->psp, PSP_REG_IH_RB_CNTL, ih_rb_cntl)) { DRM_ERROR("PSP program IH_RB_CNTL failed!\n"); return; @@ -125,7 +125,7 @@ static void vega10_ih_disable_interrupts(struct amdgpu_device *adev) ih_rb_cntl = RREG32_SOC15(OSSSYS, 0, mmIH_RB_CNTL_RING1); ih_rb_cntl = REG_SET_FIELD(ih_rb_cntl, IH_RB_CNTL_RING1, RB_ENABLE, 0); - if (amdgpu_virt_support_psp_prg_ih_reg(adev)) { + if (amdgpu_sriov_vf(adev)) { if (psp_reg_program(&adev->psp, PSP_REG_IH_RB_CNTL_RING1, ih_rb_cntl)) { DRM_ERROR("program IH_RB_CNTL_RING1 failed!\n"); @@ -145,7 +145,7 @@ static void vega10_ih_disable_interrupts(struct amdgpu_device *adev) ih_rb_cntl = RREG32_SOC15(OSSSYS, 0, mmIH_RB_CNTL_RING2); ih_rb_cntl = REG_SET_FIELD(ih_rb_cntl, IH_RB_CNTL_RING2, RB_ENABLE, 0); - if (amdgpu_virt_support_psp_prg_ih_reg(adev)) { + if (amdgpu_sriov_vf(adev)) { if (psp_reg_program(&adev->psp, PSP_REG_IH_RB_CNTL_RING2, ih_rb_cntl)) { DRM_ERROR("program IH_RB_CNTL_RING2 failed!\n"); @@ -219,7 +219,7 @@ static uint32_t vega10_ih_doorbell_rptr(struct amdgpu_ih_ring *ih) static int vega10_ih_irq_init(struct amdgpu_device *adev) { struct amdgpu_ih_ring *ih; - u32 ih_rb_cntl; + u32 ih_rb_cntl, ih_chicken; int ret = 0; u32 tmp; @@ -238,7 +238,7 @@ static int vega10_ih_irq_init(struct amdgpu_device *adev) ih_rb_cntl = REG_SET_FIELD(ih_rb_cntl, IH_RB_CNTL, RPTR_REARM, !!adev->irq.msi_enabled); - if (amdgpu_virt_support_psp_prg_ih_reg(adev)) { + if (amdgpu_sriov_vf(adev)) { if (psp_reg_program(&adev->psp, PSP_REG_IH_RB_CNTL, ih_rb_cntl)) { DRM_ERROR("PSP program IH_RB_CNTL failed!\n"); return -ETIMEDOUT; @@ -247,6 +247,15 @@ static int vega10_ih_irq_init(struct amdgpu_device *adev) WREG32_SOC15(OSSSYS, 0, mmIH_RB_CNTL, ih_rb_cntl); } + if (adev->asic_type == CHIP_ARCTURUS && + adev->firmware.load_type == AMDGPU_FW_LOAD_DIRECT) { + if (adev->irq.ih.use_bus_addr) { + ih_chicken = RREG32_SOC15(OSSSYS, 0, mmIH_CHICKEN); + ih_chicken |= 0x00000010; + WREG32_SOC15(OSSSYS, 0, mmIH_CHICKEN, ih_chicken); + } + } + /* set the writeback address whether it's enabled or not */ WREG32_SOC15(OSSSYS, 0, mmIH_RB_WPTR_ADDR_LO, lower_32_bits(ih->wptr_addr)); @@ -272,7 +281,7 @@ static int vega10_ih_irq_init(struct amdgpu_device *adev) WPTR_OVERFLOW_ENABLE, 0); ih_rb_cntl = REG_SET_FIELD(ih_rb_cntl, IH_RB_CNTL, RB_FULL_DRAIN_ENABLE, 1); - if (amdgpu_virt_support_psp_prg_ih_reg(adev)) { + if (amdgpu_sriov_vf(adev)) { if (psp_reg_program(&adev->psp, PSP_REG_IH_RB_CNTL_RING1, ih_rb_cntl)) { DRM_ERROR("program IH_RB_CNTL_RING1 failed!\n"); @@ -299,7 +308,7 @@ static int vega10_ih_irq_init(struct amdgpu_device *adev) ih_rb_cntl = RREG32_SOC15(OSSSYS, 0, mmIH_RB_CNTL_RING2); ih_rb_cntl = vega10_ih_rb_cntl(ih, ih_rb_cntl); - if (amdgpu_virt_support_psp_prg_ih_reg(adev)) { + if (amdgpu_sriov_vf(adev)) { if (psp_reg_program(&adev->psp, PSP_REG_IH_RB_CNTL_RING2, ih_rb_cntl)) { DRM_ERROR("program IH_RB_CNTL_RING2 failed!\n"); diff --git a/drivers/gpu/drm/amd/amdgpu/vega20_reg_init.c b/drivers/gpu/drm/amd/amdgpu/vega20_reg_init.c index 0db84386252a..587e33f5dcce 100644 --- a/drivers/gpu/drm/amd/amdgpu/vega20_reg_init.c +++ b/drivers/gpu/drm/amd/amdgpu/vega20_reg_init.c @@ -50,6 +50,8 @@ int vega20_reg_base_init(struct amdgpu_device *adev) adev->reg_offset[NBIF_HWIP][i] = (uint32_t *)(&(NBIO_BASE.instance[i])); adev->reg_offset[THM_HWIP][i] = (uint32_t *)(&(THM_BASE.instance[i])); adev->reg_offset[CLK_HWIP][i] = (uint32_t *)(&(CLK_BASE.instance[i])); + adev->reg_offset[UMC_HWIP][i] = (uint32_t *)(&(UMC_BASE.instance[i])); + adev->reg_offset[RSMU_HWIP][i] = (uint32_t *)(&(RSMU_BASE.instance[i])); } return 0; } @@ -85,6 +87,10 @@ void vega20_doorbell_index_init(struct amdgpu_device *adev) adev->doorbell_index.uvd_vce.vce_ring2_3 = AMDGPU_VEGA20_DOORBELL64_VCE_RING2_3; adev->doorbell_index.uvd_vce.vce_ring4_5 = AMDGPU_VEGA20_DOORBELL64_VCE_RING4_5; adev->doorbell_index.uvd_vce.vce_ring6_7 = AMDGPU_VEGA20_DOORBELL64_VCE_RING6_7; + adev->doorbell_index.vcn.vcn_ring0_1 = AMDGPU_VEGA20_DOORBELL64_VCN0_1; + adev->doorbell_index.vcn.vcn_ring2_3 = AMDGPU_VEGA20_DOORBELL64_VCN2_3; + adev->doorbell_index.vcn.vcn_ring4_5 = AMDGPU_VEGA20_DOORBELL64_VCN4_5; + adev->doorbell_index.vcn.vcn_ring6_7 = AMDGPU_VEGA20_DOORBELL64_VCN6_7; adev->doorbell_index.first_non_cp = AMDGPU_VEGA20_DOORBELL64_FIRST_NON_CP; adev->doorbell_index.last_non_cp = AMDGPU_VEGA20_DOORBELL64_LAST_NON_CP; diff --git a/drivers/gpu/drm/amd/amdgpu/vi.c b/drivers/gpu/drm/amd/amdgpu/vi.c index 6575ddcfcf00..5f8c8786cac5 100644 --- a/drivers/gpu/drm/amd/amdgpu/vi.c +++ b/drivers/gpu/drm/amd/amdgpu/vi.c @@ -711,6 +711,12 @@ static int vi_asic_reset(struct amdgpu_device *adev) return r; } +static enum amd_reset_method +vi_asic_reset_method(struct amdgpu_device *adev) +{ + return AMD_RESET_METHOD_LEGACY; +} + static u32 vi_get_config_memsize(struct amdgpu_device *adev) { return RREG32(mmCONFIG_MEMSIZE); @@ -1023,6 +1029,7 @@ static const struct amdgpu_asic_funcs vi_asic_funcs = .read_bios_from_rom = &vi_read_bios_from_rom, .read_register = &vi_read_register, .reset = &vi_asic_reset, + .reset_method = &vi_asic_reset_method, .set_vga_state = &vi_vga_set_state, .get_xclk = &vi_get_xclk, .set_uvd_clocks = &vi_set_uvd_clocks, |