diff options
Diffstat (limited to 'drivers/gpu/drm/amd/amdgpu')
114 files changed, 4323 insertions, 2843 deletions
diff --git a/drivers/gpu/drm/amd/amdgpu/Makefile b/drivers/gpu/drm/amd/amdgpu/Makefile index 00962a659009..ca0e435559d5 100644 --- a/drivers/gpu/drm/amd/amdgpu/Makefile +++ b/drivers/gpu/drm/amd/amdgpu/Makefile @@ -53,8 +53,9 @@ amdgpu-y += amdgpu_device.o amdgpu_kms.o \ amdgpu_ucode.o amdgpu_bo_list.o amdgpu_ctx.o amdgpu_sync.o \ amdgpu_gtt_mgr.o amdgpu_vram_mgr.o amdgpu_virt.o amdgpu_atomfirmware.o \ amdgpu_vf_error.o amdgpu_sched.o amdgpu_debugfs.o amdgpu_ids.o \ - amdgpu_gmc.o amdgpu_xgmi.o amdgpu_csa.o amdgpu_ras.o amdgpu_vm_cpu.o \ - amdgpu_vm_sdma.o amdgpu_discovery.o amdgpu_ras_eeprom.o smu_v11_0_i2c.o + amdgpu_gmc.o amdgpu_mmhub.o amdgpu_xgmi.o amdgpu_csa.o amdgpu_ras.o amdgpu_vm_cpu.o \ + amdgpu_vm_sdma.o amdgpu_discovery.o amdgpu_ras_eeprom.o amdgpu_nbio.o \ + amdgpu_umc.o smu_v11_0_i2c.o amdgpu-$(CONFIG_PERF_EVENTS) += amdgpu_pmu.o @@ -67,7 +68,7 @@ amdgpu-$(CONFIG_DRM_AMDGPU_SI)+= si.o gmc_v6_0.o gfx_v6_0.o si_ih.o si_dma.o dce amdgpu-y += \ vi.o mxgpu_vi.o nbio_v6_1.o soc15.o emu_soc.o mxgpu_ai.o nbio_v7_0.o vega10_reg_init.o \ vega20_reg_init.o nbio_v7_4.o nbio_v2_3.o nv.o navi10_reg_init.o navi14_reg_init.o \ - arct_reg_init.o navi12_reg_init.o + arct_reg_init.o navi12_reg_init.o mxgpu_nv.o # add DF block amdgpu-y += \ @@ -83,7 +84,7 @@ amdgpu-y += \ # add UMC block amdgpu-y += \ - umc_v6_1.o + umc_v6_1.o umc_v6_0.o # add IH block amdgpu-y += \ diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu.h b/drivers/gpu/drm/amd/amdgpu/amdgpu.h index bd37df5dd6d0..c82579c93265 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu.h +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu.h @@ -73,6 +73,7 @@ #include "amdgpu_gmc.h" #include "amdgpu_gfx.h" #include "amdgpu_sdma.h" +#include "amdgpu_nbio.h" #include "amdgpu_dm.h" #include "amdgpu_virt.h" #include "amdgpu_csa.h" @@ -106,6 +107,8 @@ struct amdgpu_mgpu_info uint32_t num_apu; }; +#define AMDGPU_MAX_TIMEOUT_PARAM_LENTH 256 + /* * Modules parameters. */ @@ -122,6 +125,7 @@ extern int amdgpu_disp_priority; extern int amdgpu_hw_i2c; extern int amdgpu_pcie_gen2; extern int amdgpu_msi; +extern char amdgpu_lockup_timeout[AMDGPU_MAX_TIMEOUT_PARAM_LENTH]; extern int amdgpu_dpm; extern int amdgpu_fw_load_type; extern int amdgpu_aspm; @@ -146,11 +150,6 @@ extern uint amdgpu_sdma_phase_quantum; extern char *amdgpu_disable_cu; extern char *amdgpu_virtual_display; extern uint amdgpu_pp_feature_mask; -extern int amdgpu_ngg; -extern int amdgpu_prim_buf_per_se; -extern int amdgpu_pos_buf_per_se; -extern int amdgpu_cntl_sb_buf_per_se; -extern int amdgpu_param_buf_per_se; extern int amdgpu_job_hang_limit; extern int amdgpu_lbpw; extern int amdgpu_compute_multipipe; @@ -167,6 +166,12 @@ extern int amdgpu_mcbp; extern int amdgpu_discovery; extern int amdgpu_mes; extern int amdgpu_noretry; +extern int amdgpu_force_asic_type; +#ifdef CONFIG_HSA_AMD +extern int sched_policy; +#else +static const int sched_policy = KFD_SCHED_POLICY_HWS; +#endif #ifdef CONFIG_DRM_AMDGPU_SI extern int amdgpu_si_support; @@ -425,7 +430,6 @@ struct amdgpu_fpriv { }; int amdgpu_file_to_fpriv(struct file *filp, struct amdgpu_fpriv **fpriv); -int amdgpu_device_get_job_timeout_settings(struct amdgpu_device *adev); int amdgpu_ib_get(struct amdgpu_device *adev, struct amdgpu_vm *vm, unsigned size, struct amdgpu_ib *ib); @@ -477,7 +481,6 @@ struct amdgpu_cs_parser { uint64_t bytes_moved_vis_threshold; uint64_t bytes_moved; uint64_t bytes_moved_vis; - struct amdgpu_bo_list_entry *evictable; /* user fence */ struct amdgpu_bo_list_entry uf_entry; @@ -644,71 +647,14 @@ typedef void (*amdgpu_wreg64_t)(struct amdgpu_device*, uint32_t, uint64_t); typedef uint32_t (*amdgpu_block_rreg_t)(struct amdgpu_device*, uint32_t, uint32_t); typedef void (*amdgpu_block_wreg_t)(struct amdgpu_device*, uint32_t, uint32_t, uint32_t); - -/* - * amdgpu nbio functions - * - */ -struct nbio_hdp_flush_reg { - u32 ref_and_mask_cp0; - u32 ref_and_mask_cp1; - u32 ref_and_mask_cp2; - u32 ref_and_mask_cp3; - u32 ref_and_mask_cp4; - u32 ref_and_mask_cp5; - u32 ref_and_mask_cp6; - u32 ref_and_mask_cp7; - u32 ref_and_mask_cp8; - u32 ref_and_mask_cp9; - u32 ref_and_mask_sdma0; - u32 ref_and_mask_sdma1; - u32 ref_and_mask_sdma2; - u32 ref_and_mask_sdma3; - u32 ref_and_mask_sdma4; - u32 ref_and_mask_sdma5; - u32 ref_and_mask_sdma6; - u32 ref_and_mask_sdma7; -}; - struct amdgpu_mmio_remap { u32 reg_offset; resource_size_t bus_addr; }; -struct amdgpu_nbio_funcs { - const struct nbio_hdp_flush_reg *hdp_flush_reg; - u32 (*get_hdp_flush_req_offset)(struct amdgpu_device *adev); - u32 (*get_hdp_flush_done_offset)(struct amdgpu_device *adev); - u32 (*get_pcie_index_offset)(struct amdgpu_device *adev); - u32 (*get_pcie_data_offset)(struct amdgpu_device *adev); - u32 (*get_rev_id)(struct amdgpu_device *adev); - void (*mc_access_enable)(struct amdgpu_device *adev, bool enable); - void (*hdp_flush)(struct amdgpu_device *adev, struct amdgpu_ring *ring); - u32 (*get_memsize)(struct amdgpu_device *adev); - void (*sdma_doorbell_range)(struct amdgpu_device *adev, int instance, - bool use_doorbell, int doorbell_index, int doorbell_size); - void (*vcn_doorbell_range)(struct amdgpu_device *adev, bool use_doorbell, - int doorbell_index, int instance); - void (*enable_doorbell_aperture)(struct amdgpu_device *adev, - bool enable); - void (*enable_doorbell_selfring_aperture)(struct amdgpu_device *adev, - bool enable); - void (*ih_doorbell_range)(struct amdgpu_device *adev, - bool use_doorbell, int doorbell_index); - void (*update_medium_grain_clock_gating)(struct amdgpu_device *adev, - bool enable); - void (*update_medium_grain_light_sleep)(struct amdgpu_device *adev, - bool enable); - void (*get_clockgating_state)(struct amdgpu_device *adev, - u32 *flags); - void (*ih_control)(struct amdgpu_device *adev); - void (*init_registers)(struct amdgpu_device *adev); - void (*detect_hw_virt)(struct amdgpu_device *adev); - void (*remap_hdp_registers)(struct amdgpu_device *adev); -}; - struct amdgpu_df_funcs { void (*sw_init)(struct amdgpu_device *adev); + void (*sw_fini)(struct amdgpu_device *adev); void (*enable_broadcast_mode)(struct amdgpu_device *adev, bool enable); u32 (*get_fb_channel_number)(struct amdgpu_device *adev); @@ -921,6 +867,12 @@ struct amdgpu_device { u32 cg_flags; u32 pg_flags; + /* nbio */ + struct amdgpu_nbio nbio; + + /* mmhub */ + struct amdgpu_mmhub mmhub; + /* gfx */ struct amdgpu_gfx gfx; @@ -974,9 +926,7 @@ struct amdgpu_device { /* soc15 register offset based on ip, instance and segment */ uint32_t *reg_offset[MAX_HWIP][HWIP_MAX_INSTANCE]; - const struct amdgpu_nbio_funcs *nbio_funcs; const struct amdgpu_df_funcs *df_funcs; - const struct amdgpu_mmhub_funcs *mmhub_funcs; /* delayed work_func for deferring clockgating during resume */ struct delayed_work delayed_init_work; diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd.c index 07eb29885372..8c531793fe17 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd.c @@ -63,45 +63,10 @@ void amdgpu_amdkfd_fini(void) void amdgpu_amdkfd_device_probe(struct amdgpu_device *adev) { - const struct kfd2kgd_calls *kfd2kgd; - - switch (adev->asic_type) { -#ifdef CONFIG_DRM_AMDGPU_CIK - case CHIP_KAVERI: - case CHIP_HAWAII: - kfd2kgd = amdgpu_amdkfd_gfx_7_get_functions(); - break; -#endif - case CHIP_CARRIZO: - case CHIP_TONGA: - case CHIP_FIJI: - case CHIP_POLARIS10: - case CHIP_POLARIS11: - case CHIP_POLARIS12: - case CHIP_VEGAM: - kfd2kgd = amdgpu_amdkfd_gfx_8_0_get_functions(); - break; - case CHIP_VEGA10: - case CHIP_VEGA12: - case CHIP_VEGA20: - case CHIP_RAVEN: - kfd2kgd = amdgpu_amdkfd_gfx_9_0_get_functions(); - break; - case CHIP_ARCTURUS: - kfd2kgd = amdgpu_amdkfd_arcturus_get_functions(); - break; - case CHIP_NAVI10: - case CHIP_NAVI14: - case CHIP_NAVI12: - kfd2kgd = amdgpu_amdkfd_gfx_10_0_get_functions(); - break; - default: - dev_info(adev->dev, "kfd not supported on this ASIC\n"); - return; - } + bool vf = amdgpu_sriov_vf(adev); adev->kfd.dev = kgd2kfd_probe((struct kgd_dev *)adev, - adev->pdev, kfd2kgd); + adev->pdev, adev->asic_type, vf); if (adev->kfd.dev) amdgpu_amdkfd_total_mem_size += adev->gmc.real_vram_size; @@ -202,7 +167,7 @@ void amdgpu_amdkfd_device_init(struct amdgpu_device *adev) adev->doorbell_index.last_non_cp; } - kgd2kfd_device_init(adev->kfd.dev, &gpu_resources); + kgd2kfd_device_init(adev->kfd.dev, adev->ddev, &gpu_resources); } } @@ -709,38 +674,14 @@ int amdgpu_amdkfd_evict_userptr(struct kgd_mem *mem, struct mm_struct *mm) return 0; } -struct kfd2kgd_calls *amdgpu_amdkfd_gfx_7_get_functions(void) -{ - return NULL; -} - -struct kfd2kgd_calls *amdgpu_amdkfd_gfx_8_0_get_functions(void) -{ - return NULL; -} - -struct kfd2kgd_calls *amdgpu_amdkfd_gfx_9_0_get_functions(void) -{ - return NULL; -} - -struct kfd2kgd_calls *amdgpu_amdkfd_arcturus_get_functions(void) -{ - return NULL; -} - -struct kfd2kgd_calls *amdgpu_amdkfd_gfx_10_0_get_functions(void) -{ - return NULL; -} - struct kfd_dev *kgd2kfd_probe(struct kgd_dev *kgd, struct pci_dev *pdev, - const struct kfd2kgd_calls *f2g) + unsigned int asic_type, bool vf) { return NULL; } bool kgd2kfd_device_init(struct kfd_dev *kfd, + struct drm_device *ddev, const struct kgd2kfd_shared_resources *gpu_resources) { return false; diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd.h b/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd.h index e519df3fd2b6..069d5d230810 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd.h +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd.h @@ -57,7 +57,7 @@ struct kgd_mem { unsigned int mapped_to_gpu_memory; uint64_t va; - uint32_t mapping_flags; + uint32_t alloc_flags; atomic_t invalid; struct amdkfd_process_info *process_info; @@ -137,12 +137,6 @@ int amdgpu_amdkfd_submit_ib(struct kgd_dev *kgd, enum kgd_engine_type engine, void amdgpu_amdkfd_set_compute_idle(struct kgd_dev *kgd, bool idle); bool amdgpu_amdkfd_have_atomics_support(struct kgd_dev *kgd); -struct kfd2kgd_calls *amdgpu_amdkfd_gfx_7_get_functions(void); -struct kfd2kgd_calls *amdgpu_amdkfd_gfx_8_0_get_functions(void); -struct kfd2kgd_calls *amdgpu_amdkfd_gfx_9_0_get_functions(void); -struct kfd2kgd_calls *amdgpu_amdkfd_arcturus_get_functions(void); -struct kfd2kgd_calls *amdgpu_amdkfd_gfx_10_0_get_functions(void); - bool amdgpu_amdkfd_is_kfd_vmid(struct amdgpu_device *adev, u32 vmid); int amdgpu_amdkfd_pre_reset(struct amdgpu_device *adev); @@ -179,10 +173,17 @@ uint64_t amdgpu_amdkfd_get_mmio_remap_phys_addr(struct kgd_dev *kgd); uint32_t amdgpu_amdkfd_get_num_gws(struct kgd_dev *kgd); uint8_t amdgpu_amdkfd_get_xgmi_hops_count(struct kgd_dev *dst, struct kgd_dev *src); +/* Read user wptr from a specified user address space with page fault + * disabled. The memory must be pinned and mapped to the hardware when + * this is called in hqd_load functions, so it should never fault in + * the first place. This resolves a circular lock dependency involving + * four locks, including the DQM lock and mmap_sem. + */ #define read_user_wptr(mmptr, wptr, dst) \ ({ \ bool valid = false; \ if ((mmptr) && (wptr)) { \ + pagefault_disable(); \ if ((mmptr) == current->mm) { \ valid = !get_user((dst), (wptr)); \ } else if (current->mm == NULL) { \ @@ -190,6 +191,7 @@ uint8_t amdgpu_amdkfd_get_xgmi_hops_count(struct kgd_dev *dst, struct kgd_dev *s valid = !get_user((dst), (wptr)); \ unuse_mm(mmptr); \ } \ + pagefault_enable(); \ } \ valid; \ }) @@ -240,8 +242,9 @@ void amdgpu_amdkfd_unreserve_memory_limit(struct amdgpu_bo *bo); int kgd2kfd_init(void); void kgd2kfd_exit(void); struct kfd_dev *kgd2kfd_probe(struct kgd_dev *kgd, struct pci_dev *pdev, - const struct kfd2kgd_calls *f2g); + unsigned int asic_type, bool vf); bool kgd2kfd_device_init(struct kfd_dev *kfd, + struct drm_device *ddev, const struct kgd2kfd_shared_resources *gpu_resources); void kgd2kfd_device_exit(struct kfd_dev *kfd); void kgd2kfd_suspend(struct kfd_dev *kfd); diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_arcturus.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_arcturus.c index c79aaebeeaf0..e1fbbebce4fd 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_arcturus.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_arcturus.c @@ -69,11 +69,11 @@ static inline struct v9_sdma_mqd *get_sdma_mqd(void *mqd) return (struct v9_sdma_mqd *)mqd; } -static uint32_t get_sdma_base_addr(struct amdgpu_device *adev, +static uint32_t get_sdma_rlc_reg_offset(struct amdgpu_device *adev, unsigned int engine_id, unsigned int queue_id) { - uint32_t base[8] = { + uint32_t sdma_engine_reg_base[8] = { SOC15_REG_OFFSET(SDMA0, 0, mmSDMA0_RLC0_RB_CNTL) - mmSDMA0_RLC0_RB_CNTL, SOC15_REG_OFFSET(SDMA1, 0, @@ -91,111 +91,82 @@ static uint32_t get_sdma_base_addr(struct amdgpu_device *adev, SOC15_REG_OFFSET(SDMA7, 0, mmSDMA7_RLC0_RB_CNTL) - mmSDMA7_RLC0_RB_CNTL }; - uint32_t retval; - retval = base[engine_id] + queue_id * (mmSDMA0_RLC1_RB_CNTL - - mmSDMA0_RLC0_RB_CNTL); + uint32_t retval = sdma_engine_reg_base[engine_id] + + queue_id * (mmSDMA0_RLC1_RB_CNTL - mmSDMA0_RLC0_RB_CNTL); - pr_debug("sdma base address: 0x%x\n", retval); + pr_debug("RLC register offset for SDMA%d RLC%d: 0x%x\n", engine_id, + queue_id, retval); return retval; } -static u32 sdma_v4_0_get_reg_offset(struct amdgpu_device *adev, - u32 instance, u32 offset) -{ - switch (instance) { - case 0: - return (adev->reg_offset[SDMA0_HWIP][0][0] + offset); - case 1: - return (adev->reg_offset[SDMA1_HWIP][0][1] + offset); - case 2: - return (adev->reg_offset[SDMA2_HWIP][0][1] + offset); - case 3: - return (adev->reg_offset[SDMA3_HWIP][0][1] + offset); - case 4: - return (adev->reg_offset[SDMA4_HWIP][0][1] + offset); - case 5: - return (adev->reg_offset[SDMA5_HWIP][0][1] + offset); - case 6: - return (adev->reg_offset[SDMA6_HWIP][0][1] + offset); - case 7: - return (adev->reg_offset[SDMA7_HWIP][0][1] + offset); - default: - break; - } - return 0; -} - static int kgd_hqd_sdma_load(struct kgd_dev *kgd, void *mqd, uint32_t __user *wptr, struct mm_struct *mm) { struct amdgpu_device *adev = get_amdgpu_device(kgd); struct v9_sdma_mqd *m; - uint32_t sdma_base_addr, sdmax_gfx_context_cntl; + uint32_t sdma_rlc_reg_offset; unsigned long end_jiffies; uint32_t data; uint64_t data64; uint64_t __user *wptr64 = (uint64_t __user *)wptr; m = get_sdma_mqd(mqd); - sdma_base_addr = get_sdma_base_addr(adev, m->sdma_engine_id, + sdma_rlc_reg_offset = get_sdma_rlc_reg_offset(adev, m->sdma_engine_id, m->sdma_queue_id); - sdmax_gfx_context_cntl = sdma_v4_0_get_reg_offset(adev, - m->sdma_engine_id, mmSDMA0_GFX_CONTEXT_CNTL); - WREG32(sdma_base_addr + mmSDMA0_RLC0_RB_CNTL, + WREG32(sdma_rlc_reg_offset + mmSDMA0_RLC0_RB_CNTL, m->sdmax_rlcx_rb_cntl & (~SDMA0_RLC0_RB_CNTL__RB_ENABLE_MASK)); end_jiffies = msecs_to_jiffies(2000) + jiffies; while (true) { - data = RREG32(sdma_base_addr + mmSDMA0_RLC0_CONTEXT_STATUS); + data = RREG32(sdma_rlc_reg_offset + mmSDMA0_RLC0_CONTEXT_STATUS); if (data & SDMA0_RLC0_CONTEXT_STATUS__IDLE_MASK) break; - if (time_after(jiffies, end_jiffies)) + if (time_after(jiffies, end_jiffies)) { + pr_err("SDMA RLC not idle in %s\n", __func__); return -ETIME; + } usleep_range(500, 1000); } - data = RREG32(sdmax_gfx_context_cntl); - data = REG_SET_FIELD(data, SDMA0_GFX_CONTEXT_CNTL, - RESUME_CTX, 0); - WREG32(sdmax_gfx_context_cntl, data); - WREG32(sdma_base_addr + mmSDMA0_RLC0_DOORBELL_OFFSET, + WREG32(sdma_rlc_reg_offset + mmSDMA0_RLC0_DOORBELL_OFFSET, m->sdmax_rlcx_doorbell_offset); data = REG_SET_FIELD(m->sdmax_rlcx_doorbell, SDMA0_RLC0_DOORBELL, ENABLE, 1); - WREG32(sdma_base_addr + mmSDMA0_RLC0_DOORBELL, data); - WREG32(sdma_base_addr + mmSDMA0_RLC0_RB_RPTR, m->sdmax_rlcx_rb_rptr); - WREG32(sdma_base_addr + mmSDMA0_RLC0_RB_RPTR_HI, + WREG32(sdma_rlc_reg_offset + mmSDMA0_RLC0_DOORBELL, data); + WREG32(sdma_rlc_reg_offset + mmSDMA0_RLC0_RB_RPTR, + m->sdmax_rlcx_rb_rptr); + WREG32(sdma_rlc_reg_offset + mmSDMA0_RLC0_RB_RPTR_HI, m->sdmax_rlcx_rb_rptr_hi); - WREG32(sdma_base_addr + mmSDMA0_RLC0_MINOR_PTR_UPDATE, 1); + WREG32(sdma_rlc_reg_offset + mmSDMA0_RLC0_MINOR_PTR_UPDATE, 1); if (read_user_wptr(mm, wptr64, data64)) { - WREG32(sdma_base_addr + mmSDMA0_RLC0_RB_WPTR, + WREG32(sdma_rlc_reg_offset + mmSDMA0_RLC0_RB_WPTR, lower_32_bits(data64)); - WREG32(sdma_base_addr + mmSDMA0_RLC0_RB_WPTR_HI, + WREG32(sdma_rlc_reg_offset + mmSDMA0_RLC0_RB_WPTR_HI, upper_32_bits(data64)); } else { - WREG32(sdma_base_addr + mmSDMA0_RLC0_RB_WPTR, + WREG32(sdma_rlc_reg_offset + mmSDMA0_RLC0_RB_WPTR, m->sdmax_rlcx_rb_rptr); - WREG32(sdma_base_addr + mmSDMA0_RLC0_RB_WPTR_HI, + WREG32(sdma_rlc_reg_offset + mmSDMA0_RLC0_RB_WPTR_HI, m->sdmax_rlcx_rb_rptr_hi); } - WREG32(sdma_base_addr + mmSDMA0_RLC0_MINOR_PTR_UPDATE, 0); + WREG32(sdma_rlc_reg_offset + mmSDMA0_RLC0_MINOR_PTR_UPDATE, 0); - WREG32(sdma_base_addr + mmSDMA0_RLC0_RB_BASE, m->sdmax_rlcx_rb_base); - WREG32(sdma_base_addr + mmSDMA0_RLC0_RB_BASE_HI, + WREG32(sdma_rlc_reg_offset + mmSDMA0_RLC0_RB_BASE, m->sdmax_rlcx_rb_base); + WREG32(sdma_rlc_reg_offset + mmSDMA0_RLC0_RB_BASE_HI, m->sdmax_rlcx_rb_base_hi); - WREG32(sdma_base_addr + mmSDMA0_RLC0_RB_RPTR_ADDR_LO, + WREG32(sdma_rlc_reg_offset + mmSDMA0_RLC0_RB_RPTR_ADDR_LO, m->sdmax_rlcx_rb_rptr_addr_lo); - WREG32(sdma_base_addr + mmSDMA0_RLC0_RB_RPTR_ADDR_HI, + WREG32(sdma_rlc_reg_offset + mmSDMA0_RLC0_RB_RPTR_ADDR_HI, m->sdmax_rlcx_rb_rptr_addr_hi); data = REG_SET_FIELD(m->sdmax_rlcx_rb_cntl, SDMA0_RLC0_RB_CNTL, RB_ENABLE, 1); - WREG32(sdma_base_addr + mmSDMA0_RLC0_RB_CNTL, data); + WREG32(sdma_rlc_reg_offset + mmSDMA0_RLC0_RB_CNTL, data); return 0; } @@ -205,7 +176,8 @@ static int kgd_hqd_sdma_dump(struct kgd_dev *kgd, uint32_t (**dump)[2], uint32_t *n_regs) { struct amdgpu_device *adev = get_amdgpu_device(kgd); - uint32_t sdma_base_addr = get_sdma_base_addr(adev, engine_id, queue_id); + uint32_t sdma_rlc_reg_offset = get_sdma_rlc_reg_offset(adev, + engine_id, queue_id); uint32_t i = 0, reg; #undef HQD_N_REGS #define HQD_N_REGS (19+6+7+10) @@ -215,15 +187,15 @@ static int kgd_hqd_sdma_dump(struct kgd_dev *kgd, return -ENOMEM; for (reg = mmSDMA0_RLC0_RB_CNTL; reg <= mmSDMA0_RLC0_DOORBELL; reg++) - DUMP_REG(sdma_base_addr + reg); + DUMP_REG(sdma_rlc_reg_offset + reg); for (reg = mmSDMA0_RLC0_STATUS; reg <= mmSDMA0_RLC0_CSA_ADDR_HI; reg++) - DUMP_REG(sdma_base_addr + reg); + DUMP_REG(sdma_rlc_reg_offset + reg); for (reg = mmSDMA0_RLC0_IB_SUB_REMAIN; reg <= mmSDMA0_RLC0_MINOR_PTR_UPDATE; reg++) - DUMP_REG(sdma_base_addr + reg); + DUMP_REG(sdma_rlc_reg_offset + reg); for (reg = mmSDMA0_RLC0_MIDCMD_DATA0; reg <= mmSDMA0_RLC0_MIDCMD_CNTL; reg++) - DUMP_REG(sdma_base_addr + reg); + DUMP_REG(sdma_rlc_reg_offset + reg); WARN_ON_ONCE(i != HQD_N_REGS); *n_regs = i; @@ -235,14 +207,14 @@ static bool kgd_hqd_sdma_is_occupied(struct kgd_dev *kgd, void *mqd) { struct amdgpu_device *adev = get_amdgpu_device(kgd); struct v9_sdma_mqd *m; - uint32_t sdma_base_addr; + uint32_t sdma_rlc_reg_offset; uint32_t sdma_rlc_rb_cntl; m = get_sdma_mqd(mqd); - sdma_base_addr = get_sdma_base_addr(adev, m->sdma_engine_id, + sdma_rlc_reg_offset = get_sdma_rlc_reg_offset(adev, m->sdma_engine_id, m->sdma_queue_id); - sdma_rlc_rb_cntl = RREG32(sdma_base_addr + mmSDMA0_RLC0_RB_CNTL); + sdma_rlc_rb_cntl = RREG32(sdma_rlc_reg_offset + mmSDMA0_RLC0_RB_CNTL); if (sdma_rlc_rb_cntl & SDMA0_RLC0_RB_CNTL__RB_ENABLE_MASK) return true; @@ -255,40 +227,42 @@ static int kgd_hqd_sdma_destroy(struct kgd_dev *kgd, void *mqd, { struct amdgpu_device *adev = get_amdgpu_device(kgd); struct v9_sdma_mqd *m; - uint32_t sdma_base_addr; + uint32_t sdma_rlc_reg_offset; uint32_t temp; unsigned long end_jiffies = (utimeout * HZ / 1000) + jiffies; m = get_sdma_mqd(mqd); - sdma_base_addr = get_sdma_base_addr(adev, m->sdma_engine_id, + sdma_rlc_reg_offset = get_sdma_rlc_reg_offset(adev, m->sdma_engine_id, m->sdma_queue_id); - temp = RREG32(sdma_base_addr + mmSDMA0_RLC0_RB_CNTL); + temp = RREG32(sdma_rlc_reg_offset + mmSDMA0_RLC0_RB_CNTL); temp = temp & ~SDMA0_RLC0_RB_CNTL__RB_ENABLE_MASK; - WREG32(sdma_base_addr + mmSDMA0_RLC0_RB_CNTL, temp); + WREG32(sdma_rlc_reg_offset + mmSDMA0_RLC0_RB_CNTL, temp); while (true) { - temp = RREG32(sdma_base_addr + mmSDMA0_RLC0_CONTEXT_STATUS); + temp = RREG32(sdma_rlc_reg_offset + mmSDMA0_RLC0_CONTEXT_STATUS); if (temp & SDMA0_RLC0_CONTEXT_STATUS__IDLE_MASK) break; - if (time_after(jiffies, end_jiffies)) + if (time_after(jiffies, end_jiffies)) { + pr_err("SDMA RLC not idle in %s\n", __func__); return -ETIME; + } usleep_range(500, 1000); } - WREG32(sdma_base_addr + mmSDMA0_RLC0_DOORBELL, 0); - WREG32(sdma_base_addr + mmSDMA0_RLC0_RB_CNTL, - RREG32(sdma_base_addr + mmSDMA0_RLC0_RB_CNTL) | + WREG32(sdma_rlc_reg_offset + mmSDMA0_RLC0_DOORBELL, 0); + WREG32(sdma_rlc_reg_offset + mmSDMA0_RLC0_RB_CNTL, + RREG32(sdma_rlc_reg_offset + mmSDMA0_RLC0_RB_CNTL) | SDMA0_RLC0_RB_CNTL__RB_ENABLE_MASK); - m->sdmax_rlcx_rb_rptr = RREG32(sdma_base_addr + mmSDMA0_RLC0_RB_RPTR); + m->sdmax_rlcx_rb_rptr = RREG32(sdma_rlc_reg_offset + mmSDMA0_RLC0_RB_RPTR); m->sdmax_rlcx_rb_rptr_hi = - RREG32(sdma_base_addr + mmSDMA0_RLC0_RB_RPTR_HI); + RREG32(sdma_rlc_reg_offset + mmSDMA0_RLC0_RB_RPTR_HI); return 0; } -static const struct kfd2kgd_calls kfd2kgd = { +const struct kfd2kgd_calls arcturus_kfd2kgd = { .program_sh_mem_settings = kgd_gfx_v9_program_sh_mem_settings, .set_pasid_vmid_mapping = kgd_gfx_v9_set_pasid_vmid_mapping, .init_interrupts = kgd_gfx_v9_init_interrupts, @@ -304,20 +278,11 @@ static const struct kfd2kgd_calls kfd2kgd = { .address_watch_execute = kgd_gfx_v9_address_watch_execute, .wave_control_execute = kgd_gfx_v9_wave_control_execute, .address_watch_get_offset = kgd_gfx_v9_address_watch_get_offset, - .get_atc_vmid_pasid_mapping_pasid = - kgd_gfx_v9_get_atc_vmid_pasid_mapping_pasid, - .get_atc_vmid_pasid_mapping_valid = - kgd_gfx_v9_get_atc_vmid_pasid_mapping_valid, - .set_scratch_backing_va = kgd_gfx_v9_set_scratch_backing_va, + .get_atc_vmid_pasid_mapping_info = + kgd_gfx_v9_get_atc_vmid_pasid_mapping_info, .get_tile_config = kgd_gfx_v9_get_tile_config, .set_vm_context_page_table_base = kgd_gfx_v9_set_vm_context_page_table_base, .invalidate_tlbs = kgd_gfx_v9_invalidate_tlbs, .invalidate_tlbs_vmid = kgd_gfx_v9_invalidate_tlbs_vmid, .get_hive_id = amdgpu_amdkfd_get_hive_id, }; - -struct kfd2kgd_calls *amdgpu_amdkfd_arcturus_get_functions(void) -{ - return (struct kfd2kgd_calls *)&kfd2kgd; -} - diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_gfx_v10.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_gfx_v10.c index d10f483f5e27..0878f59ec340 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_gfx_v10.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_gfx_v10.c @@ -22,15 +22,9 @@ #undef pr_fmt #define pr_fmt(fmt) "kfd2kgd: " fmt -#include <linux/module.h> -#include <linux/fdtable.h> -#include <linux/uaccess.h> -#include <linux/firmware.h> #include <linux/mmu_context.h> #include "amdgpu.h" #include "amdgpu_amdkfd.h" -#include "amdgpu_ucode.h" -#include "soc15_hw_ip.h" #include "gc/gc_10_1_0_offset.h" #include "gc/gc_10_1_0_sh_mask.h" #include "navi10_enum.h" @@ -42,6 +36,7 @@ #include "v10_structs.h" #include "nv.h" #include "nvd.h" +#include "gfxhub_v2_0.h" enum hqd_dequeue_request_type { NO_ACTION = 0, @@ -50,63 +45,6 @@ enum hqd_dequeue_request_type { SAVE_WAVES }; -/* - * Register access functions - */ - -static void kgd_program_sh_mem_settings(struct kgd_dev *kgd, uint32_t vmid, - uint32_t sh_mem_config, - uint32_t sh_mem_ape1_base, uint32_t sh_mem_ape1_limit, - uint32_t sh_mem_bases); -static int kgd_set_pasid_vmid_mapping(struct kgd_dev *kgd, unsigned int pasid, - unsigned int vmid); -static int kgd_init_interrupts(struct kgd_dev *kgd, uint32_t pipe_id); -static int kgd_hqd_load(struct kgd_dev *kgd, void *mqd, uint32_t pipe_id, - uint32_t queue_id, uint32_t __user *wptr, - uint32_t wptr_shift, uint32_t wptr_mask, - struct mm_struct *mm); -static int kgd_hqd_dump(struct kgd_dev *kgd, - uint32_t pipe_id, uint32_t queue_id, - uint32_t (**dump)[2], uint32_t *n_regs); -static int kgd_hqd_sdma_load(struct kgd_dev *kgd, void *mqd, - uint32_t __user *wptr, struct mm_struct *mm); -static int kgd_hqd_sdma_dump(struct kgd_dev *kgd, - uint32_t engine_id, uint32_t queue_id, - uint32_t (**dump)[2], uint32_t *n_regs); -static bool kgd_hqd_is_occupied(struct kgd_dev *kgd, uint64_t queue_address, - uint32_t pipe_id, uint32_t queue_id); -static bool kgd_hqd_sdma_is_occupied(struct kgd_dev *kgd, void *mqd); -static int kgd_hqd_destroy(struct kgd_dev *kgd, void *mqd, - enum kfd_preempt_type reset_type, - unsigned int utimeout, uint32_t pipe_id, - uint32_t queue_id); -static int kgd_hqd_sdma_destroy(struct kgd_dev *kgd, void *mqd, - unsigned int utimeout); -#if 0 -static uint32_t get_watch_base_addr(struct amdgpu_device *adev); -#endif -static int kgd_address_watch_disable(struct kgd_dev *kgd); -static int kgd_address_watch_execute(struct kgd_dev *kgd, - unsigned int watch_point_id, - uint32_t cntl_val, - uint32_t addr_hi, - uint32_t addr_lo); -static int kgd_wave_control_execute(struct kgd_dev *kgd, - uint32_t gfx_index_val, - uint32_t sq_cmd); -static uint32_t kgd_address_watch_get_offset(struct kgd_dev *kgd, - unsigned int watch_point_id, - unsigned int reg_offset); - -static bool get_atc_vmid_pasid_mapping_valid(struct kgd_dev *kgd, - uint8_t vmid); -static uint16_t get_atc_vmid_pasid_mapping_pasid(struct kgd_dev *kgd, - uint8_t vmid); -static void set_vm_context_page_table_base(struct kgd_dev *kgd, uint32_t vmid, - uint64_t page_table_base); -static int invalidate_tlbs(struct kgd_dev *kgd, uint16_t pasid); -static int invalidate_tlbs_vmid(struct kgd_dev *kgd, uint16_t vmid); - /* Because of REG_GET_FIELD() being used, we put this function in the * asic specific file. */ @@ -139,37 +77,6 @@ static int amdgpu_amdkfd_get_tile_config(struct kgd_dev *kgd, return 0; } -static const struct kfd2kgd_calls kfd2kgd = { - .program_sh_mem_settings = kgd_program_sh_mem_settings, - .set_pasid_vmid_mapping = kgd_set_pasid_vmid_mapping, - .init_interrupts = kgd_init_interrupts, - .hqd_load = kgd_hqd_load, - .hqd_sdma_load = kgd_hqd_sdma_load, - .hqd_dump = kgd_hqd_dump, - .hqd_sdma_dump = kgd_hqd_sdma_dump, - .hqd_is_occupied = kgd_hqd_is_occupied, - .hqd_sdma_is_occupied = kgd_hqd_sdma_is_occupied, - .hqd_destroy = kgd_hqd_destroy, - .hqd_sdma_destroy = kgd_hqd_sdma_destroy, - .address_watch_disable = kgd_address_watch_disable, - .address_watch_execute = kgd_address_watch_execute, - .wave_control_execute = kgd_wave_control_execute, - .address_watch_get_offset = kgd_address_watch_get_offset, - .get_atc_vmid_pasid_mapping_pasid = - get_atc_vmid_pasid_mapping_pasid, - .get_atc_vmid_pasid_mapping_valid = - get_atc_vmid_pasid_mapping_valid, - .invalidate_tlbs = invalidate_tlbs, - .invalidate_tlbs_vmid = invalidate_tlbs_vmid, - .set_vm_context_page_table_base = set_vm_context_page_table_base, - .get_tile_config = amdgpu_amdkfd_get_tile_config, -}; - -struct kfd2kgd_calls *amdgpu_amdkfd_gfx_10_0_get_functions() -{ - return (struct kfd2kgd_calls *)&kfd2kgd; -} - static inline struct amdgpu_device *get_amdgpu_device(struct kgd_dev *kgd) { return (struct amdgpu_device *)kgd; @@ -250,11 +157,6 @@ static int kgd_set_pasid_vmid_mapping(struct kgd_dev *kgd, unsigned int pasid, ATC_VMID0_PASID_MAPPING__VALID_MASK; pr_debug("pasid 0x%x vmid %d, reg value %x\n", pasid, vmid, pasid_mapping); - /* - * need to do this twice, once for gfx and once for mmhub - * for ATC add 16 to VMID for mmhub, for IH different registers. - * ATC_VMID0..15 registers are separate from ATC_VMID16..31. - */ pr_debug("ATHUB, reg %x\n", SOC15_REG_OFFSET(ATHUB, 0, mmATC_VMID0_PASID_MAPPING) + vmid); WREG32(SOC15_REG_OFFSET(ATHUB, 0, mmATC_VMID0_PASID_MAPPING) + vmid, @@ -306,11 +208,11 @@ static int kgd_init_interrupts(struct kgd_dev *kgd, uint32_t pipe_id) return 0; } -static uint32_t get_sdma_base_addr(struct amdgpu_device *adev, +static uint32_t get_sdma_rlc_reg_offset(struct amdgpu_device *adev, unsigned int engine_id, unsigned int queue_id) { - uint32_t base[2] = { + uint32_t sdma_engine_reg_base[2] = { SOC15_REG_OFFSET(SDMA0, 0, mmSDMA0_RLC0_RB_CNTL) - mmSDMA0_RLC0_RB_CNTL, /* On gfx10, mmSDMA1_xxx registers are defined NOT based @@ -322,12 +224,12 @@ static uint32_t get_sdma_base_addr(struct amdgpu_device *adev, SOC15_REG_OFFSET(SDMA1, 0, mmSDMA1_RLC0_RB_CNTL) - mmSDMA0_RLC0_RB_CNTL }; - uint32_t retval; - retval = base[engine_id] + queue_id * (mmSDMA0_RLC1_RB_CNTL - - mmSDMA0_RLC0_RB_CNTL); + uint32_t retval = sdma_engine_reg_base[engine_id] + + queue_id * (mmSDMA0_RLC1_RB_CNTL - mmSDMA0_RLC0_RB_CNTL); - pr_debug("sdma base address: 0x%x\n", retval); + pr_debug("RLC register offset for SDMA%d RLC%d: 0x%x\n", engine_id, + queue_id, retval); return retval; } @@ -488,72 +390,67 @@ static int kgd_hqd_sdma_load(struct kgd_dev *kgd, void *mqd, { struct amdgpu_device *adev = get_amdgpu_device(kgd); struct v10_sdma_mqd *m; - uint32_t sdma_base_addr, sdmax_gfx_context_cntl; + uint32_t sdma_rlc_reg_offset; unsigned long end_jiffies; uint32_t data; uint64_t data64; uint64_t __user *wptr64 = (uint64_t __user *)wptr; m = get_sdma_mqd(mqd); - sdma_base_addr = get_sdma_base_addr(adev, m->sdma_engine_id, + sdma_rlc_reg_offset = get_sdma_rlc_reg_offset(adev, m->sdma_engine_id, m->sdma_queue_id); - pr_debug("sdma load base addr %x for engine %d, queue %d\n", sdma_base_addr, m->sdma_engine_id, m->sdma_queue_id); - sdmax_gfx_context_cntl = m->sdma_engine_id ? - SOC15_REG_OFFSET(SDMA1, 0, mmSDMA1_GFX_CONTEXT_CNTL) : - SOC15_REG_OFFSET(SDMA0, 0, mmSDMA0_GFX_CONTEXT_CNTL); - WREG32(sdma_base_addr + mmSDMA0_RLC0_RB_CNTL, + WREG32(sdma_rlc_reg_offset + mmSDMA0_RLC0_RB_CNTL, m->sdmax_rlcx_rb_cntl & (~SDMA0_RLC0_RB_CNTL__RB_ENABLE_MASK)); end_jiffies = msecs_to_jiffies(2000) + jiffies; while (true) { - data = RREG32(sdma_base_addr + mmSDMA0_RLC0_CONTEXT_STATUS); + data = RREG32(sdma_rlc_reg_offset + mmSDMA0_RLC0_CONTEXT_STATUS); if (data & SDMA0_RLC0_CONTEXT_STATUS__IDLE_MASK) break; - if (time_after(jiffies, end_jiffies)) + if (time_after(jiffies, end_jiffies)) { + pr_err("SDMA RLC not idle in %s\n", __func__); return -ETIME; + } usleep_range(500, 1000); } - data = RREG32(sdmax_gfx_context_cntl); - data = REG_SET_FIELD(data, SDMA0_GFX_CONTEXT_CNTL, - RESUME_CTX, 0); - WREG32(sdmax_gfx_context_cntl, data); - WREG32(sdma_base_addr + mmSDMA0_RLC0_DOORBELL_OFFSET, + WREG32(sdma_rlc_reg_offset + mmSDMA0_RLC0_DOORBELL_OFFSET, m->sdmax_rlcx_doorbell_offset); data = REG_SET_FIELD(m->sdmax_rlcx_doorbell, SDMA0_RLC0_DOORBELL, ENABLE, 1); - WREG32(sdma_base_addr + mmSDMA0_RLC0_DOORBELL, data); - WREG32(sdma_base_addr + mmSDMA0_RLC0_RB_RPTR, m->sdmax_rlcx_rb_rptr); - WREG32(sdma_base_addr + mmSDMA0_RLC0_RB_RPTR_HI, + WREG32(sdma_rlc_reg_offset + mmSDMA0_RLC0_DOORBELL, data); + WREG32(sdma_rlc_reg_offset + mmSDMA0_RLC0_RB_RPTR, + m->sdmax_rlcx_rb_rptr); + WREG32(sdma_rlc_reg_offset + mmSDMA0_RLC0_RB_RPTR_HI, m->sdmax_rlcx_rb_rptr_hi); - WREG32(sdma_base_addr + mmSDMA0_RLC0_MINOR_PTR_UPDATE, 1); + WREG32(sdma_rlc_reg_offset + mmSDMA0_RLC0_MINOR_PTR_UPDATE, 1); if (read_user_wptr(mm, wptr64, data64)) { - WREG32(sdma_base_addr + mmSDMA0_RLC0_RB_WPTR, + WREG32(sdma_rlc_reg_offset + mmSDMA0_RLC0_RB_WPTR, lower_32_bits(data64)); - WREG32(sdma_base_addr + mmSDMA0_RLC0_RB_WPTR_HI, + WREG32(sdma_rlc_reg_offset + mmSDMA0_RLC0_RB_WPTR_HI, upper_32_bits(data64)); } else { - WREG32(sdma_base_addr + mmSDMA0_RLC0_RB_WPTR, + WREG32(sdma_rlc_reg_offset + mmSDMA0_RLC0_RB_WPTR, m->sdmax_rlcx_rb_rptr); - WREG32(sdma_base_addr + mmSDMA0_RLC0_RB_WPTR_HI, + WREG32(sdma_rlc_reg_offset + mmSDMA0_RLC0_RB_WPTR_HI, m->sdmax_rlcx_rb_rptr_hi); } - WREG32(sdma_base_addr + mmSDMA0_RLC0_MINOR_PTR_UPDATE, 0); + WREG32(sdma_rlc_reg_offset + mmSDMA0_RLC0_MINOR_PTR_UPDATE, 0); - WREG32(sdma_base_addr + mmSDMA0_RLC0_RB_BASE, m->sdmax_rlcx_rb_base); - WREG32(sdma_base_addr + mmSDMA0_RLC0_RB_BASE_HI, + WREG32(sdma_rlc_reg_offset + mmSDMA0_RLC0_RB_BASE, m->sdmax_rlcx_rb_base); + WREG32(sdma_rlc_reg_offset + mmSDMA0_RLC0_RB_BASE_HI, m->sdmax_rlcx_rb_base_hi); - WREG32(sdma_base_addr + mmSDMA0_RLC0_RB_RPTR_ADDR_LO, + WREG32(sdma_rlc_reg_offset + mmSDMA0_RLC0_RB_RPTR_ADDR_LO, m->sdmax_rlcx_rb_rptr_addr_lo); - WREG32(sdma_base_addr + mmSDMA0_RLC0_RB_RPTR_ADDR_HI, + WREG32(sdma_rlc_reg_offset + mmSDMA0_RLC0_RB_RPTR_ADDR_HI, m->sdmax_rlcx_rb_rptr_addr_hi); data = REG_SET_FIELD(m->sdmax_rlcx_rb_cntl, SDMA0_RLC0_RB_CNTL, RB_ENABLE, 1); - WREG32(sdma_base_addr + mmSDMA0_RLC0_RB_CNTL, data); + WREG32(sdma_rlc_reg_offset + mmSDMA0_RLC0_RB_CNTL, data); return 0; } @@ -563,28 +460,26 @@ static int kgd_hqd_sdma_dump(struct kgd_dev *kgd, uint32_t (**dump)[2], uint32_t *n_regs) { struct amdgpu_device *adev = get_amdgpu_device(kgd); - uint32_t sdma_base_addr = get_sdma_base_addr(adev, engine_id, queue_id); + uint32_t sdma_rlc_reg_offset = get_sdma_rlc_reg_offset(adev, + engine_id, queue_id); uint32_t i = 0, reg; #undef HQD_N_REGS #define HQD_N_REGS (19+6+7+10) - pr_debug("sdma dump engine id %d queue_id %d\n", engine_id, queue_id); - pr_debug("sdma base addr %x\n", sdma_base_addr); - *dump = kmalloc(HQD_N_REGS*2*sizeof(uint32_t), GFP_KERNEL); if (*dump == NULL) return -ENOMEM; for (reg = mmSDMA0_RLC0_RB_CNTL; reg <= mmSDMA0_RLC0_DOORBELL; reg++) - DUMP_REG(sdma_base_addr + reg); + DUMP_REG(sdma_rlc_reg_offset + reg); for (reg = mmSDMA0_RLC0_STATUS; reg <= mmSDMA0_RLC0_CSA_ADDR_HI; reg++) - DUMP_REG(sdma_base_addr + reg); + DUMP_REG(sdma_rlc_reg_offset + reg); for (reg = mmSDMA0_RLC0_IB_SUB_REMAIN; reg <= mmSDMA0_RLC0_MINOR_PTR_UPDATE; reg++) - DUMP_REG(sdma_base_addr + reg); + DUMP_REG(sdma_rlc_reg_offset + reg); for (reg = mmSDMA0_RLC0_MIDCMD_DATA0; reg <= mmSDMA0_RLC0_MIDCMD_CNTL; reg++) - DUMP_REG(sdma_base_addr + reg); + DUMP_REG(sdma_rlc_reg_offset + reg); WARN_ON_ONCE(i != HQD_N_REGS); *n_regs = i; @@ -618,14 +513,14 @@ static bool kgd_hqd_sdma_is_occupied(struct kgd_dev *kgd, void *mqd) { struct amdgpu_device *adev = get_amdgpu_device(kgd); struct v10_sdma_mqd *m; - uint32_t sdma_base_addr; + uint32_t sdma_rlc_reg_offset; uint32_t sdma_rlc_rb_cntl; m = get_sdma_mqd(mqd); - sdma_base_addr = get_sdma_base_addr(adev, m->sdma_engine_id, + sdma_rlc_reg_offset = get_sdma_rlc_reg_offset(adev, m->sdma_engine_id, m->sdma_queue_id); - sdma_rlc_rb_cntl = RREG32(sdma_base_addr + mmSDMA0_RLC0_RB_CNTL); + sdma_rlc_rb_cntl = RREG32(sdma_rlc_reg_offset + mmSDMA0_RLC0_RB_CNTL); if (sdma_rlc_rb_cntl & SDMA0_RLC0_RB_CNTL__RB_ENABLE_MASK) return true; @@ -746,59 +641,52 @@ static int kgd_hqd_sdma_destroy(struct kgd_dev *kgd, void *mqd, { struct amdgpu_device *adev = get_amdgpu_device(kgd); struct v10_sdma_mqd *m; - uint32_t sdma_base_addr; + uint32_t sdma_rlc_reg_offset; uint32_t temp; unsigned long end_jiffies = (utimeout * HZ / 1000) + jiffies; m = get_sdma_mqd(mqd); - sdma_base_addr = get_sdma_base_addr(adev, m->sdma_engine_id, + sdma_rlc_reg_offset = get_sdma_rlc_reg_offset(adev, m->sdma_engine_id, m->sdma_queue_id); - temp = RREG32(sdma_base_addr + mmSDMA0_RLC0_RB_CNTL); + temp = RREG32(sdma_rlc_reg_offset + mmSDMA0_RLC0_RB_CNTL); temp = temp & ~SDMA0_RLC0_RB_CNTL__RB_ENABLE_MASK; - WREG32(sdma_base_addr + mmSDMA0_RLC0_RB_CNTL, temp); + WREG32(sdma_rlc_reg_offset + mmSDMA0_RLC0_RB_CNTL, temp); while (true) { - temp = RREG32(sdma_base_addr + mmSDMA0_RLC0_CONTEXT_STATUS); + temp = RREG32(sdma_rlc_reg_offset + mmSDMA0_RLC0_CONTEXT_STATUS); if (temp & SDMA0_RLC0_CONTEXT_STATUS__IDLE_MASK) break; - if (time_after(jiffies, end_jiffies)) + if (time_after(jiffies, end_jiffies)) { + pr_err("SDMA RLC not idle in %s\n", __func__); return -ETIME; + } usleep_range(500, 1000); } - WREG32(sdma_base_addr + mmSDMA0_RLC0_DOORBELL, 0); - WREG32(sdma_base_addr + mmSDMA0_RLC0_RB_CNTL, - RREG32(sdma_base_addr + mmSDMA0_RLC0_RB_CNTL) | + WREG32(sdma_rlc_reg_offset + mmSDMA0_RLC0_DOORBELL, 0); + WREG32(sdma_rlc_reg_offset + mmSDMA0_RLC0_RB_CNTL, + RREG32(sdma_rlc_reg_offset + mmSDMA0_RLC0_RB_CNTL) | SDMA0_RLC0_RB_CNTL__RB_ENABLE_MASK); - m->sdmax_rlcx_rb_rptr = RREG32(sdma_base_addr + mmSDMA0_RLC0_RB_RPTR); + m->sdmax_rlcx_rb_rptr = RREG32(sdma_rlc_reg_offset + mmSDMA0_RLC0_RB_RPTR); m->sdmax_rlcx_rb_rptr_hi = - RREG32(sdma_base_addr + mmSDMA0_RLC0_RB_RPTR_HI); + RREG32(sdma_rlc_reg_offset + mmSDMA0_RLC0_RB_RPTR_HI); return 0; } -static bool get_atc_vmid_pasid_mapping_valid(struct kgd_dev *kgd, - uint8_t vmid) +static bool get_atc_vmid_pasid_mapping_info(struct kgd_dev *kgd, + uint8_t vmid, uint16_t *p_pasid) { - uint32_t reg; + uint32_t value; struct amdgpu_device *adev = (struct amdgpu_device *) kgd; - reg = RREG32(SOC15_REG_OFFSET(ATHUB, 0, mmATC_VMID0_PASID_MAPPING) + value = RREG32(SOC15_REG_OFFSET(ATHUB, 0, mmATC_VMID0_PASID_MAPPING) + vmid); - return reg & ATC_VMID0_PASID_MAPPING__VALID_MASK; -} - -static uint16_t get_atc_vmid_pasid_mapping_pasid(struct kgd_dev *kgd, - uint8_t vmid) -{ - uint32_t reg; - struct amdgpu_device *adev = (struct amdgpu_device *) kgd; + *p_pasid = value & ATC_VMID0_PASID_MAPPING__PASID_MASK; - reg = RREG32(SOC15_REG_OFFSET(ATHUB, 0, mmATC_VMID0_PASID_MAPPING) - + vmid); - return reg & ATC_VMID0_PASID_MAPPING__PASID_MASK; + return !!(value & ATC_VMID0_PASID_MAPPING__VALID_MASK); } static int invalidate_tlbs_with_kiq(struct amdgpu_device *adev, uint16_t pasid) @@ -830,6 +718,8 @@ static int invalidate_tlbs(struct kgd_dev *kgd, uint16_t pasid) { struct amdgpu_device *adev = (struct amdgpu_device *) kgd; int vmid; + uint16_t queried_pasid; + bool ret; struct amdgpu_ring *ring = &adev->gfx.kiq.ring; if (amdgpu_emu_mode == 0 && ring->sched.ready) @@ -838,13 +728,13 @@ static int invalidate_tlbs(struct kgd_dev *kgd, uint16_t pasid) for (vmid = 0; vmid < 16; vmid++) { if (!amdgpu_amdkfd_is_kfd_vmid(adev, vmid)) continue; - if (get_atc_vmid_pasid_mapping_valid(kgd, vmid)) { - if (get_atc_vmid_pasid_mapping_pasid(kgd, vmid) - == pasid) { - amdgpu_gmc_flush_gpu_tlb(adev, vmid, - AMDGPU_GFXHUB_0, 0); - break; - } + + ret = get_atc_vmid_pasid_mapping_info(kgd, vmid, + &queried_pasid); + if (ret && queried_pasid == pasid) { + amdgpu_gmc_flush_gpu_tlb(adev, vmid, + AMDGPU_GFXHUB_0, 0); + break; } } @@ -914,7 +804,6 @@ static void set_vm_context_page_table_base(struct kgd_dev *kgd, uint32_t vmid, uint64_t page_table_base) { struct amdgpu_device *adev = get_amdgpu_device(kgd); - uint64_t base = page_table_base | AMDGPU_PTE_VALID; if (!amdgpu_amdkfd_is_kfd_vmid(adev, vmid)) { pr_err("trying to set page table base for wrong VMID %u\n", @@ -922,18 +811,31 @@ static void set_vm_context_page_table_base(struct kgd_dev *kgd, uint32_t vmid, return; } - /* TODO: take advantage of per-process address space size. For - * now, all processes share the same address space size, like - * on GFX8 and older. - */ - WREG32(SOC15_REG_OFFSET(GC, 0, mmGCVM_CONTEXT0_PAGE_TABLE_START_ADDR_LO32) + (vmid*2), 0); - WREG32(SOC15_REG_OFFSET(GC, 0, mmGCVM_CONTEXT0_PAGE_TABLE_START_ADDR_HI32) + (vmid*2), 0); - - WREG32(SOC15_REG_OFFSET(GC, 0, mmGCVM_CONTEXT0_PAGE_TABLE_END_ADDR_LO32) + (vmid*2), - lower_32_bits(adev->vm_manager.max_pfn - 1)); - WREG32(SOC15_REG_OFFSET(GC, 0, mmGCVM_CONTEXT0_PAGE_TABLE_END_ADDR_HI32) + (vmid*2), - upper_32_bits(adev->vm_manager.max_pfn - 1)); - - WREG32(SOC15_REG_OFFSET(GC, 0, mmGCVM_CONTEXT0_PAGE_TABLE_BASE_ADDR_LO32) + (vmid*2), lower_32_bits(base)); - WREG32(SOC15_REG_OFFSET(GC, 0, mmGCVM_CONTEXT0_PAGE_TABLE_BASE_ADDR_HI32) + (vmid*2), upper_32_bits(base)); + /* SDMA is on gfxhub as well for Navi1* series */ + gfxhub_v2_0_setup_vm_pt_regs(adev, vmid, page_table_base); } + +const struct kfd2kgd_calls gfx_v10_kfd2kgd = { + .program_sh_mem_settings = kgd_program_sh_mem_settings, + .set_pasid_vmid_mapping = kgd_set_pasid_vmid_mapping, + .init_interrupts = kgd_init_interrupts, + .hqd_load = kgd_hqd_load, + .hqd_sdma_load = kgd_hqd_sdma_load, + .hqd_dump = kgd_hqd_dump, + .hqd_sdma_dump = kgd_hqd_sdma_dump, + .hqd_is_occupied = kgd_hqd_is_occupied, + .hqd_sdma_is_occupied = kgd_hqd_sdma_is_occupied, + .hqd_destroy = kgd_hqd_destroy, + .hqd_sdma_destroy = kgd_hqd_sdma_destroy, + .address_watch_disable = kgd_address_watch_disable, + .address_watch_execute = kgd_address_watch_execute, + .wave_control_execute = kgd_wave_control_execute, + .address_watch_get_offset = kgd_address_watch_get_offset, + .get_atc_vmid_pasid_mapping_info = + get_atc_vmid_pasid_mapping_info, + .get_tile_config = amdgpu_amdkfd_get_tile_config, + .set_vm_context_page_table_base = set_vm_context_page_table_base, + .invalidate_tlbs = invalidate_tlbs, + .invalidate_tlbs_vmid = invalidate_tlbs_vmid, + .get_hive_id = amdgpu_amdkfd_get_hive_id, +}; diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_gfx_v7.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_gfx_v7.c index 5f459bf5f622..6e6f0a99ec06 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_gfx_v7.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_gfx_v7.c @@ -20,8 +20,6 @@ * OTHER DEALINGS IN THE SOFTWARE. */ -#include <linux/fdtable.h> -#include <linux/uaccess.h> #include <linux/mmu_context.h> #include "amdgpu.h" @@ -86,65 +84,6 @@ union TCP_WATCH_CNTL_BITS { float f32All; }; -/* - * Register access functions - */ - -static void kgd_program_sh_mem_settings(struct kgd_dev *kgd, uint32_t vmid, - uint32_t sh_mem_config, uint32_t sh_mem_ape1_base, - uint32_t sh_mem_ape1_limit, uint32_t sh_mem_bases); - -static int kgd_set_pasid_vmid_mapping(struct kgd_dev *kgd, unsigned int pasid, - unsigned int vmid); - -static int kgd_init_interrupts(struct kgd_dev *kgd, uint32_t pipe_id); -static int kgd_hqd_load(struct kgd_dev *kgd, void *mqd, uint32_t pipe_id, - uint32_t queue_id, uint32_t __user *wptr, - uint32_t wptr_shift, uint32_t wptr_mask, - struct mm_struct *mm); -static int kgd_hqd_dump(struct kgd_dev *kgd, - uint32_t pipe_id, uint32_t queue_id, - uint32_t (**dump)[2], uint32_t *n_regs); -static int kgd_hqd_sdma_load(struct kgd_dev *kgd, void *mqd, - uint32_t __user *wptr, struct mm_struct *mm); -static int kgd_hqd_sdma_dump(struct kgd_dev *kgd, - uint32_t engine_id, uint32_t queue_id, - uint32_t (**dump)[2], uint32_t *n_regs); -static bool kgd_hqd_is_occupied(struct kgd_dev *kgd, uint64_t queue_address, - uint32_t pipe_id, uint32_t queue_id); - -static int kgd_hqd_destroy(struct kgd_dev *kgd, void *mqd, - enum kfd_preempt_type reset_type, - unsigned int utimeout, uint32_t pipe_id, - uint32_t queue_id); -static bool kgd_hqd_sdma_is_occupied(struct kgd_dev *kgd, void *mqd); -static int kgd_hqd_sdma_destroy(struct kgd_dev *kgd, void *mqd, - unsigned int utimeout); -static int kgd_address_watch_disable(struct kgd_dev *kgd); -static int kgd_address_watch_execute(struct kgd_dev *kgd, - unsigned int watch_point_id, - uint32_t cntl_val, - uint32_t addr_hi, - uint32_t addr_lo); -static int kgd_wave_control_execute(struct kgd_dev *kgd, - uint32_t gfx_index_val, - uint32_t sq_cmd); -static uint32_t kgd_address_watch_get_offset(struct kgd_dev *kgd, - unsigned int watch_point_id, - unsigned int reg_offset); - -static bool get_atc_vmid_pasid_mapping_valid(struct kgd_dev *kgd, uint8_t vmid); -static uint16_t get_atc_vmid_pasid_mapping_pasid(struct kgd_dev *kgd, - uint8_t vmid); - -static void set_scratch_backing_va(struct kgd_dev *kgd, - uint64_t va, uint32_t vmid); -static void set_vm_context_page_table_base(struct kgd_dev *kgd, uint32_t vmid, - uint64_t page_table_base); -static int invalidate_tlbs(struct kgd_dev *kgd, uint16_t pasid); -static int invalidate_tlbs_vmid(struct kgd_dev *kgd, uint16_t vmid); -static uint32_t read_vmid_from_vmfault_reg(struct kgd_dev *kgd); - /* Because of REG_GET_FIELD() being used, we put this function in the * asic specific file. */ @@ -170,37 +109,6 @@ static int get_tile_config(struct kgd_dev *kgd, return 0; } -static const struct kfd2kgd_calls kfd2kgd = { - .program_sh_mem_settings = kgd_program_sh_mem_settings, - .set_pasid_vmid_mapping = kgd_set_pasid_vmid_mapping, - .init_interrupts = kgd_init_interrupts, - .hqd_load = kgd_hqd_load, - .hqd_sdma_load = kgd_hqd_sdma_load, - .hqd_dump = kgd_hqd_dump, - .hqd_sdma_dump = kgd_hqd_sdma_dump, - .hqd_is_occupied = kgd_hqd_is_occupied, - .hqd_sdma_is_occupied = kgd_hqd_sdma_is_occupied, - .hqd_destroy = kgd_hqd_destroy, - .hqd_sdma_destroy = kgd_hqd_sdma_destroy, - .address_watch_disable = kgd_address_watch_disable, - .address_watch_execute = kgd_address_watch_execute, - .wave_control_execute = kgd_wave_control_execute, - .address_watch_get_offset = kgd_address_watch_get_offset, - .get_atc_vmid_pasid_mapping_pasid = get_atc_vmid_pasid_mapping_pasid, - .get_atc_vmid_pasid_mapping_valid = get_atc_vmid_pasid_mapping_valid, - .set_scratch_backing_va = set_scratch_backing_va, - .get_tile_config = get_tile_config, - .set_vm_context_page_table_base = set_vm_context_page_table_base, - .invalidate_tlbs = invalidate_tlbs, - .invalidate_tlbs_vmid = invalidate_tlbs_vmid, - .read_vmid_from_vmfault_reg = read_vmid_from_vmfault_reg, -}; - -struct kfd2kgd_calls *amdgpu_amdkfd_gfx_7_get_functions(void) -{ - return (struct kfd2kgd_calls *)&kfd2kgd; -} - static inline struct amdgpu_device *get_amdgpu_device(struct kgd_dev *kgd) { return (struct amdgpu_device *)kgd; @@ -303,14 +211,15 @@ static int kgd_init_interrupts(struct kgd_dev *kgd, uint32_t pipe_id) return 0; } -static inline uint32_t get_sdma_base_addr(struct cik_sdma_rlc_registers *m) +static inline uint32_t get_sdma_rlc_reg_offset(struct cik_sdma_rlc_registers *m) { uint32_t retval; retval = m->sdma_engine_id * SDMA1_REGISTER_OFFSET + m->sdma_queue_id * KFD_CIK_SDMA_QUEUE_OFFSET; - pr_debug("sdma base address: 0x%x\n", retval); + pr_debug("RLC register offset for SDMA%d RLC%d: 0x%x\n", + m->sdma_engine_id, m->sdma_queue_id, retval); return retval; } @@ -413,60 +322,52 @@ static int kgd_hqd_sdma_load(struct kgd_dev *kgd, void *mqd, struct amdgpu_device *adev = get_amdgpu_device(kgd); struct cik_sdma_rlc_registers *m; unsigned long end_jiffies; - uint32_t sdma_base_addr; + uint32_t sdma_rlc_reg_offset; uint32_t data; m = get_sdma_mqd(mqd); - sdma_base_addr = get_sdma_base_addr(m); + sdma_rlc_reg_offset = get_sdma_rlc_reg_offset(m); - WREG32(sdma_base_addr + mmSDMA0_RLC0_RB_CNTL, + WREG32(sdma_rlc_reg_offset + mmSDMA0_RLC0_RB_CNTL, m->sdma_rlc_rb_cntl & (~SDMA0_RLC0_RB_CNTL__RB_ENABLE_MASK)); end_jiffies = msecs_to_jiffies(2000) + jiffies; while (true) { - data = RREG32(sdma_base_addr + mmSDMA0_RLC0_CONTEXT_STATUS); + data = RREG32(sdma_rlc_reg_offset + mmSDMA0_RLC0_CONTEXT_STATUS); if (data & SDMA0_RLC0_CONTEXT_STATUS__IDLE_MASK) break; - if (time_after(jiffies, end_jiffies)) + if (time_after(jiffies, end_jiffies)) { + pr_err("SDMA RLC not idle in %s\n", __func__); return -ETIME; + } usleep_range(500, 1000); } - if (m->sdma_engine_id) { - data = RREG32(mmSDMA1_GFX_CONTEXT_CNTL); - data = REG_SET_FIELD(data, SDMA1_GFX_CONTEXT_CNTL, - RESUME_CTX, 0); - WREG32(mmSDMA1_GFX_CONTEXT_CNTL, data); - } else { - data = RREG32(mmSDMA0_GFX_CONTEXT_CNTL); - data = REG_SET_FIELD(data, SDMA0_GFX_CONTEXT_CNTL, - RESUME_CTX, 0); - WREG32(mmSDMA0_GFX_CONTEXT_CNTL, data); - } data = REG_SET_FIELD(m->sdma_rlc_doorbell, SDMA0_RLC0_DOORBELL, ENABLE, 1); - WREG32(sdma_base_addr + mmSDMA0_RLC0_DOORBELL, data); - WREG32(sdma_base_addr + mmSDMA0_RLC0_RB_RPTR, m->sdma_rlc_rb_rptr); + WREG32(sdma_rlc_reg_offset + mmSDMA0_RLC0_DOORBELL, data); + WREG32(sdma_rlc_reg_offset + mmSDMA0_RLC0_RB_RPTR, + m->sdma_rlc_rb_rptr); if (read_user_wptr(mm, wptr, data)) - WREG32(sdma_base_addr + mmSDMA0_RLC0_RB_WPTR, data); + WREG32(sdma_rlc_reg_offset + mmSDMA0_RLC0_RB_WPTR, data); else - WREG32(sdma_base_addr + mmSDMA0_RLC0_RB_WPTR, + WREG32(sdma_rlc_reg_offset + mmSDMA0_RLC0_RB_WPTR, m->sdma_rlc_rb_rptr); - WREG32(sdma_base_addr + mmSDMA0_RLC0_VIRTUAL_ADDR, + WREG32(sdma_rlc_reg_offset + mmSDMA0_RLC0_VIRTUAL_ADDR, m->sdma_rlc_virtual_addr); - WREG32(sdma_base_addr + mmSDMA0_RLC0_RB_BASE, m->sdma_rlc_rb_base); - WREG32(sdma_base_addr + mmSDMA0_RLC0_RB_BASE_HI, + WREG32(sdma_rlc_reg_offset + mmSDMA0_RLC0_RB_BASE, m->sdma_rlc_rb_base); + WREG32(sdma_rlc_reg_offset + mmSDMA0_RLC0_RB_BASE_HI, m->sdma_rlc_rb_base_hi); - WREG32(sdma_base_addr + mmSDMA0_RLC0_RB_RPTR_ADDR_LO, + WREG32(sdma_rlc_reg_offset + mmSDMA0_RLC0_RB_RPTR_ADDR_LO, m->sdma_rlc_rb_rptr_addr_lo); - WREG32(sdma_base_addr + mmSDMA0_RLC0_RB_RPTR_ADDR_HI, + WREG32(sdma_rlc_reg_offset + mmSDMA0_RLC0_RB_RPTR_ADDR_HI, m->sdma_rlc_rb_rptr_addr_hi); data = REG_SET_FIELD(m->sdma_rlc_rb_cntl, SDMA0_RLC0_RB_CNTL, RB_ENABLE, 1); - WREG32(sdma_base_addr + mmSDMA0_RLC0_RB_CNTL, data); + WREG32(sdma_rlc_reg_offset + mmSDMA0_RLC0_RB_CNTL, data); return 0; } @@ -524,13 +425,13 @@ static bool kgd_hqd_sdma_is_occupied(struct kgd_dev *kgd, void *mqd) { struct amdgpu_device *adev = get_amdgpu_device(kgd); struct cik_sdma_rlc_registers *m; - uint32_t sdma_base_addr; + uint32_t sdma_rlc_reg_offset; uint32_t sdma_rlc_rb_cntl; m = get_sdma_mqd(mqd); - sdma_base_addr = get_sdma_base_addr(m); + sdma_rlc_reg_offset = get_sdma_rlc_reg_offset(m); - sdma_rlc_rb_cntl = RREG32(sdma_base_addr + mmSDMA0_RLC0_RB_CNTL); + sdma_rlc_rb_cntl = RREG32(sdma_rlc_reg_offset + mmSDMA0_RLC0_RB_CNTL); if (sdma_rlc_rb_cntl & SDMA0_RLC0_RB_CNTL__RB_ENABLE_MASK) return true; @@ -645,32 +546,34 @@ static int kgd_hqd_sdma_destroy(struct kgd_dev *kgd, void *mqd, { struct amdgpu_device *adev = get_amdgpu_device(kgd); struct cik_sdma_rlc_registers *m; - uint32_t sdma_base_addr; + uint32_t sdma_rlc_reg_offset; uint32_t temp; unsigned long end_jiffies = (utimeout * HZ / 1000) + jiffies; m = get_sdma_mqd(mqd); - sdma_base_addr = get_sdma_base_addr(m); + sdma_rlc_reg_offset = get_sdma_rlc_reg_offset(m); - temp = RREG32(sdma_base_addr + mmSDMA0_RLC0_RB_CNTL); + temp = RREG32(sdma_rlc_reg_offset + mmSDMA0_RLC0_RB_CNTL); temp = temp & ~SDMA0_RLC0_RB_CNTL__RB_ENABLE_MASK; - WREG32(sdma_base_addr + mmSDMA0_RLC0_RB_CNTL, temp); + WREG32(sdma_rlc_reg_offset + mmSDMA0_RLC0_RB_CNTL, temp); while (true) { - temp = RREG32(sdma_base_addr + mmSDMA0_RLC0_CONTEXT_STATUS); + temp = RREG32(sdma_rlc_reg_offset + mmSDMA0_RLC0_CONTEXT_STATUS); if (temp & SDMA0_RLC0_CONTEXT_STATUS__IDLE_MASK) break; - if (time_after(jiffies, end_jiffies)) + if (time_after(jiffies, end_jiffies)) { + pr_err("SDMA RLC not idle in %s\n", __func__); return -ETIME; + } usleep_range(500, 1000); } - WREG32(sdma_base_addr + mmSDMA0_RLC0_DOORBELL, 0); - WREG32(sdma_base_addr + mmSDMA0_RLC0_RB_CNTL, - RREG32(sdma_base_addr + mmSDMA0_RLC0_RB_CNTL) | + WREG32(sdma_rlc_reg_offset + mmSDMA0_RLC0_DOORBELL, 0); + WREG32(sdma_rlc_reg_offset + mmSDMA0_RLC0_RB_CNTL, + RREG32(sdma_rlc_reg_offset + mmSDMA0_RLC0_RB_CNTL) | SDMA0_RLC0_RB_CNTL__RB_ENABLE_MASK); - m->sdma_rlc_rb_rptr = RREG32(sdma_base_addr + mmSDMA0_RLC0_RB_RPTR); + m->sdma_rlc_rb_rptr = RREG32(sdma_rlc_reg_offset + mmSDMA0_RLC0_RB_RPTR); return 0; } @@ -758,24 +661,16 @@ static uint32_t kgd_address_watch_get_offset(struct kgd_dev *kgd, return watchRegs[watch_point_id * ADDRESS_WATCH_REG_MAX + reg_offset]; } -static bool get_atc_vmid_pasid_mapping_valid(struct kgd_dev *kgd, - uint8_t vmid) +static bool get_atc_vmid_pasid_mapping_info(struct kgd_dev *kgd, + uint8_t vmid, uint16_t *p_pasid) { - uint32_t reg; + uint32_t value; struct amdgpu_device *adev = (struct amdgpu_device *) kgd; - reg = RREG32(mmATC_VMID0_PASID_MAPPING + vmid); - return reg & ATC_VMID0_PASID_MAPPING__VALID_MASK; -} - -static uint16_t get_atc_vmid_pasid_mapping_pasid(struct kgd_dev *kgd, - uint8_t vmid) -{ - uint32_t reg; - struct amdgpu_device *adev = (struct amdgpu_device *) kgd; + value = RREG32(mmATC_VMID0_PASID_MAPPING + vmid); + *p_pasid = value & ATC_VMID0_PASID_MAPPING__PASID_MASK; - reg = RREG32(mmATC_VMID0_PASID_MAPPING + vmid); - return reg & ATC_VMID0_PASID_MAPPING__PASID_MASK; + return !!(value & ATC_VMID0_PASID_MAPPING__VALID_MASK); } static void set_scratch_backing_va(struct kgd_dev *kgd, @@ -855,3 +750,28 @@ static uint32_t read_vmid_from_vmfault_reg(struct kgd_dev *kgd) return REG_GET_FIELD(status, VM_CONTEXT1_PROTECTION_FAULT_STATUS, VMID); } + +const struct kfd2kgd_calls gfx_v7_kfd2kgd = { + .program_sh_mem_settings = kgd_program_sh_mem_settings, + .set_pasid_vmid_mapping = kgd_set_pasid_vmid_mapping, + .init_interrupts = kgd_init_interrupts, + .hqd_load = kgd_hqd_load, + .hqd_sdma_load = kgd_hqd_sdma_load, + .hqd_dump = kgd_hqd_dump, + .hqd_sdma_dump = kgd_hqd_sdma_dump, + .hqd_is_occupied = kgd_hqd_is_occupied, + .hqd_sdma_is_occupied = kgd_hqd_sdma_is_occupied, + .hqd_destroy = kgd_hqd_destroy, + .hqd_sdma_destroy = kgd_hqd_sdma_destroy, + .address_watch_disable = kgd_address_watch_disable, + .address_watch_execute = kgd_address_watch_execute, + .wave_control_execute = kgd_wave_control_execute, + .address_watch_get_offset = kgd_address_watch_get_offset, + .get_atc_vmid_pasid_mapping_info = get_atc_vmid_pasid_mapping_info, + .set_scratch_backing_va = set_scratch_backing_va, + .get_tile_config = get_tile_config, + .set_vm_context_page_table_base = set_vm_context_page_table_base, + .invalidate_tlbs = invalidate_tlbs, + .invalidate_tlbs_vmid = invalidate_tlbs_vmid, + .read_vmid_from_vmfault_reg = read_vmid_from_vmfault_reg, +}; diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_gfx_v8.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_gfx_v8.c index 6d2f61449606..bfbddedb2380 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_gfx_v8.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_gfx_v8.c @@ -20,9 +20,6 @@ * OTHER DEALINGS IN THE SOFTWARE. */ -#include <linux/module.h> -#include <linux/fdtable.h> -#include <linux/uaccess.h> #include <linux/mmu_context.h> #include "amdgpu.h" @@ -44,62 +41,6 @@ enum hqd_dequeue_request_type { RESET_WAVES }; -/* - * Register access functions - */ - -static void kgd_program_sh_mem_settings(struct kgd_dev *kgd, uint32_t vmid, - uint32_t sh_mem_config, - uint32_t sh_mem_ape1_base, uint32_t sh_mem_ape1_limit, - uint32_t sh_mem_bases); -static int kgd_set_pasid_vmid_mapping(struct kgd_dev *kgd, unsigned int pasid, - unsigned int vmid); -static int kgd_init_interrupts(struct kgd_dev *kgd, uint32_t pipe_id); -static int kgd_hqd_load(struct kgd_dev *kgd, void *mqd, uint32_t pipe_id, - uint32_t queue_id, uint32_t __user *wptr, - uint32_t wptr_shift, uint32_t wptr_mask, - struct mm_struct *mm); -static int kgd_hqd_dump(struct kgd_dev *kgd, - uint32_t pipe_id, uint32_t queue_id, - uint32_t (**dump)[2], uint32_t *n_regs); -static int kgd_hqd_sdma_load(struct kgd_dev *kgd, void *mqd, - uint32_t __user *wptr, struct mm_struct *mm); -static int kgd_hqd_sdma_dump(struct kgd_dev *kgd, - uint32_t engine_id, uint32_t queue_id, - uint32_t (**dump)[2], uint32_t *n_regs); -static bool kgd_hqd_is_occupied(struct kgd_dev *kgd, uint64_t queue_address, - uint32_t pipe_id, uint32_t queue_id); -static bool kgd_hqd_sdma_is_occupied(struct kgd_dev *kgd, void *mqd); -static int kgd_hqd_destroy(struct kgd_dev *kgd, void *mqd, - enum kfd_preempt_type reset_type, - unsigned int utimeout, uint32_t pipe_id, - uint32_t queue_id); -static int kgd_hqd_sdma_destroy(struct kgd_dev *kgd, void *mqd, - unsigned int utimeout); -static int kgd_address_watch_disable(struct kgd_dev *kgd); -static int kgd_address_watch_execute(struct kgd_dev *kgd, - unsigned int watch_point_id, - uint32_t cntl_val, - uint32_t addr_hi, - uint32_t addr_lo); -static int kgd_wave_control_execute(struct kgd_dev *kgd, - uint32_t gfx_index_val, - uint32_t sq_cmd); -static uint32_t kgd_address_watch_get_offset(struct kgd_dev *kgd, - unsigned int watch_point_id, - unsigned int reg_offset); - -static bool get_atc_vmid_pasid_mapping_valid(struct kgd_dev *kgd, - uint8_t vmid); -static uint16_t get_atc_vmid_pasid_mapping_pasid(struct kgd_dev *kgd, - uint8_t vmid); -static void set_scratch_backing_va(struct kgd_dev *kgd, - uint64_t va, uint32_t vmid); -static void set_vm_context_page_table_base(struct kgd_dev *kgd, uint32_t vmid, - uint64_t page_table_base); -static int invalidate_tlbs(struct kgd_dev *kgd, uint16_t pasid); -static int invalidate_tlbs_vmid(struct kgd_dev *kgd, uint16_t vmid); - /* Because of REG_GET_FIELD() being used, we put this function in the * asic specific file. */ @@ -125,38 +66,6 @@ static int get_tile_config(struct kgd_dev *kgd, return 0; } -static const struct kfd2kgd_calls kfd2kgd = { - .program_sh_mem_settings = kgd_program_sh_mem_settings, - .set_pasid_vmid_mapping = kgd_set_pasid_vmid_mapping, - .init_interrupts = kgd_init_interrupts, - .hqd_load = kgd_hqd_load, - .hqd_sdma_load = kgd_hqd_sdma_load, - .hqd_dump = kgd_hqd_dump, - .hqd_sdma_dump = kgd_hqd_sdma_dump, - .hqd_is_occupied = kgd_hqd_is_occupied, - .hqd_sdma_is_occupied = kgd_hqd_sdma_is_occupied, - .hqd_destroy = kgd_hqd_destroy, - .hqd_sdma_destroy = kgd_hqd_sdma_destroy, - .address_watch_disable = kgd_address_watch_disable, - .address_watch_execute = kgd_address_watch_execute, - .wave_control_execute = kgd_wave_control_execute, - .address_watch_get_offset = kgd_address_watch_get_offset, - .get_atc_vmid_pasid_mapping_pasid = - get_atc_vmid_pasid_mapping_pasid, - .get_atc_vmid_pasid_mapping_valid = - get_atc_vmid_pasid_mapping_valid, - .set_scratch_backing_va = set_scratch_backing_va, - .get_tile_config = get_tile_config, - .set_vm_context_page_table_base = set_vm_context_page_table_base, - .invalidate_tlbs = invalidate_tlbs, - .invalidate_tlbs_vmid = invalidate_tlbs_vmid, -}; - -struct kfd2kgd_calls *amdgpu_amdkfd_gfx_8_0_get_functions(void) -{ - return (struct kfd2kgd_calls *)&kfd2kgd; -} - static inline struct amdgpu_device *get_amdgpu_device(struct kgd_dev *kgd) { return (struct amdgpu_device *)kgd; @@ -260,13 +169,15 @@ static int kgd_init_interrupts(struct kgd_dev *kgd, uint32_t pipe_id) return 0; } -static inline uint32_t get_sdma_base_addr(struct vi_sdma_mqd *m) +static inline uint32_t get_sdma_rlc_reg_offset(struct vi_sdma_mqd *m) { uint32_t retval; retval = m->sdma_engine_id * SDMA1_REGISTER_OFFSET + m->sdma_queue_id * KFD_VI_SDMA_QUEUE_OFFSET; - pr_debug("sdma base address: 0x%x\n", retval); + + pr_debug("RLC register offset for SDMA%d RLC%d: 0x%x\n", + m->sdma_engine_id, m->sdma_queue_id, retval); return retval; } @@ -398,59 +309,51 @@ static int kgd_hqd_sdma_load(struct kgd_dev *kgd, void *mqd, struct amdgpu_device *adev = get_amdgpu_device(kgd); struct vi_sdma_mqd *m; unsigned long end_jiffies; - uint32_t sdma_base_addr; + uint32_t sdma_rlc_reg_offset; uint32_t data; m = get_sdma_mqd(mqd); - sdma_base_addr = get_sdma_base_addr(m); - WREG32(sdma_base_addr + mmSDMA0_RLC0_RB_CNTL, + sdma_rlc_reg_offset = get_sdma_rlc_reg_offset(m); + WREG32(sdma_rlc_reg_offset + mmSDMA0_RLC0_RB_CNTL, m->sdmax_rlcx_rb_cntl & (~SDMA0_RLC0_RB_CNTL__RB_ENABLE_MASK)); end_jiffies = msecs_to_jiffies(2000) + jiffies; while (true) { - data = RREG32(sdma_base_addr + mmSDMA0_RLC0_CONTEXT_STATUS); + data = RREG32(sdma_rlc_reg_offset + mmSDMA0_RLC0_CONTEXT_STATUS); if (data & SDMA0_RLC0_CONTEXT_STATUS__IDLE_MASK) break; - if (time_after(jiffies, end_jiffies)) + if (time_after(jiffies, end_jiffies)) { + pr_err("SDMA RLC not idle in %s\n", __func__); return -ETIME; + } usleep_range(500, 1000); } - if (m->sdma_engine_id) { - data = RREG32(mmSDMA1_GFX_CONTEXT_CNTL); - data = REG_SET_FIELD(data, SDMA1_GFX_CONTEXT_CNTL, - RESUME_CTX, 0); - WREG32(mmSDMA1_GFX_CONTEXT_CNTL, data); - } else { - data = RREG32(mmSDMA0_GFX_CONTEXT_CNTL); - data = REG_SET_FIELD(data, SDMA0_GFX_CONTEXT_CNTL, - RESUME_CTX, 0); - WREG32(mmSDMA0_GFX_CONTEXT_CNTL, data); - } data = REG_SET_FIELD(m->sdmax_rlcx_doorbell, SDMA0_RLC0_DOORBELL, ENABLE, 1); - WREG32(sdma_base_addr + mmSDMA0_RLC0_DOORBELL, data); - WREG32(sdma_base_addr + mmSDMA0_RLC0_RB_RPTR, m->sdmax_rlcx_rb_rptr); + WREG32(sdma_rlc_reg_offset + mmSDMA0_RLC0_DOORBELL, data); + WREG32(sdma_rlc_reg_offset + mmSDMA0_RLC0_RB_RPTR, + m->sdmax_rlcx_rb_rptr); if (read_user_wptr(mm, wptr, data)) - WREG32(sdma_base_addr + mmSDMA0_RLC0_RB_WPTR, data); + WREG32(sdma_rlc_reg_offset + mmSDMA0_RLC0_RB_WPTR, data); else - WREG32(sdma_base_addr + mmSDMA0_RLC0_RB_WPTR, + WREG32(sdma_rlc_reg_offset + mmSDMA0_RLC0_RB_WPTR, m->sdmax_rlcx_rb_rptr); - WREG32(sdma_base_addr + mmSDMA0_RLC0_VIRTUAL_ADDR, + WREG32(sdma_rlc_reg_offset + mmSDMA0_RLC0_VIRTUAL_ADDR, m->sdmax_rlcx_virtual_addr); - WREG32(sdma_base_addr + mmSDMA0_RLC0_RB_BASE, m->sdmax_rlcx_rb_base); - WREG32(sdma_base_addr + mmSDMA0_RLC0_RB_BASE_HI, + WREG32(sdma_rlc_reg_offset + mmSDMA0_RLC0_RB_BASE, m->sdmax_rlcx_rb_base); + WREG32(sdma_rlc_reg_offset + mmSDMA0_RLC0_RB_BASE_HI, m->sdmax_rlcx_rb_base_hi); - WREG32(sdma_base_addr + mmSDMA0_RLC0_RB_RPTR_ADDR_LO, + WREG32(sdma_rlc_reg_offset + mmSDMA0_RLC0_RB_RPTR_ADDR_LO, m->sdmax_rlcx_rb_rptr_addr_lo); - WREG32(sdma_base_addr + mmSDMA0_RLC0_RB_RPTR_ADDR_HI, + WREG32(sdma_rlc_reg_offset + mmSDMA0_RLC0_RB_RPTR_ADDR_HI, m->sdmax_rlcx_rb_rptr_addr_hi); data = REG_SET_FIELD(m->sdmax_rlcx_rb_cntl, SDMA0_RLC0_RB_CNTL, RB_ENABLE, 1); - WREG32(sdma_base_addr + mmSDMA0_RLC0_RB_CNTL, data); + WREG32(sdma_rlc_reg_offset + mmSDMA0_RLC0_RB_CNTL, data); return 0; } @@ -517,13 +420,13 @@ static bool kgd_hqd_sdma_is_occupied(struct kgd_dev *kgd, void *mqd) { struct amdgpu_device *adev = get_amdgpu_device(kgd); struct vi_sdma_mqd *m; - uint32_t sdma_base_addr; + uint32_t sdma_rlc_reg_offset; uint32_t sdma_rlc_rb_cntl; m = get_sdma_mqd(mqd); - sdma_base_addr = get_sdma_base_addr(m); + sdma_rlc_reg_offset = get_sdma_rlc_reg_offset(m); - sdma_rlc_rb_cntl = RREG32(sdma_base_addr + mmSDMA0_RLC0_RB_CNTL); + sdma_rlc_rb_cntl = RREG32(sdma_rlc_reg_offset + mmSDMA0_RLC0_RB_CNTL); if (sdma_rlc_rb_cntl & SDMA0_RLC0_RB_CNTL__RB_ENABLE_MASK) return true; @@ -641,54 +544,48 @@ static int kgd_hqd_sdma_destroy(struct kgd_dev *kgd, void *mqd, { struct amdgpu_device *adev = get_amdgpu_device(kgd); struct vi_sdma_mqd *m; - uint32_t sdma_base_addr; + uint32_t sdma_rlc_reg_offset; uint32_t temp; unsigned long end_jiffies = (utimeout * HZ / 1000) + jiffies; m = get_sdma_mqd(mqd); - sdma_base_addr = get_sdma_base_addr(m); + sdma_rlc_reg_offset = get_sdma_rlc_reg_offset(m); - temp = RREG32(sdma_base_addr + mmSDMA0_RLC0_RB_CNTL); + temp = RREG32(sdma_rlc_reg_offset + mmSDMA0_RLC0_RB_CNTL); temp = temp & ~SDMA0_RLC0_RB_CNTL__RB_ENABLE_MASK; - WREG32(sdma_base_addr + mmSDMA0_RLC0_RB_CNTL, temp); + WREG32(sdma_rlc_reg_offset + mmSDMA0_RLC0_RB_CNTL, temp); while (true) { - temp = RREG32(sdma_base_addr + mmSDMA0_RLC0_CONTEXT_STATUS); + temp = RREG32(sdma_rlc_reg_offset + mmSDMA0_RLC0_CONTEXT_STATUS); if (temp & SDMA0_RLC0_CONTEXT_STATUS__IDLE_MASK) break; - if (time_after(jiffies, end_jiffies)) + if (time_after(jiffies, end_jiffies)) { + pr_err("SDMA RLC not idle in %s\n", __func__); return -ETIME; + } usleep_range(500, 1000); } - WREG32(sdma_base_addr + mmSDMA0_RLC0_DOORBELL, 0); - WREG32(sdma_base_addr + mmSDMA0_RLC0_RB_CNTL, - RREG32(sdma_base_addr + mmSDMA0_RLC0_RB_CNTL) | + WREG32(sdma_rlc_reg_offset + mmSDMA0_RLC0_DOORBELL, 0); + WREG32(sdma_rlc_reg_offset + mmSDMA0_RLC0_RB_CNTL, + RREG32(sdma_rlc_reg_offset + mmSDMA0_RLC0_RB_CNTL) | SDMA0_RLC0_RB_CNTL__RB_ENABLE_MASK); - m->sdmax_rlcx_rb_rptr = RREG32(sdma_base_addr + mmSDMA0_RLC0_RB_RPTR); + m->sdmax_rlcx_rb_rptr = RREG32(sdma_rlc_reg_offset + mmSDMA0_RLC0_RB_RPTR); return 0; } -static bool get_atc_vmid_pasid_mapping_valid(struct kgd_dev *kgd, - uint8_t vmid) +static bool get_atc_vmid_pasid_mapping_info(struct kgd_dev *kgd, + uint8_t vmid, uint16_t *p_pasid) { - uint32_t reg; + uint32_t value; struct amdgpu_device *adev = (struct amdgpu_device *) kgd; - reg = RREG32(mmATC_VMID0_PASID_MAPPING + vmid); - return reg & ATC_VMID0_PASID_MAPPING__VALID_MASK; -} + value = RREG32(mmATC_VMID0_PASID_MAPPING + vmid); + *p_pasid = value & ATC_VMID0_PASID_MAPPING__PASID_MASK; -static uint16_t get_atc_vmid_pasid_mapping_pasid(struct kgd_dev *kgd, - uint8_t vmid) -{ - uint32_t reg; - struct amdgpu_device *adev = (struct amdgpu_device *) kgd; - - reg = RREG32(mmATC_VMID0_PASID_MAPPING + vmid); - return reg & ATC_VMID0_PASID_MAPPING__PASID_MASK; + return !!(value & ATC_VMID0_PASID_MAPPING__VALID_MASK); } static int kgd_address_watch_disable(struct kgd_dev *kgd) @@ -798,3 +695,28 @@ static int invalidate_tlbs_vmid(struct kgd_dev *kgd, uint16_t vmid) RREG32(mmVM_INVALIDATE_RESPONSE); return 0; } + +const struct kfd2kgd_calls gfx_v8_kfd2kgd = { + .program_sh_mem_settings = kgd_program_sh_mem_settings, + .set_pasid_vmid_mapping = kgd_set_pasid_vmid_mapping, + .init_interrupts = kgd_init_interrupts, + .hqd_load = kgd_hqd_load, + .hqd_sdma_load = kgd_hqd_sdma_load, + .hqd_dump = kgd_hqd_dump, + .hqd_sdma_dump = kgd_hqd_sdma_dump, + .hqd_is_occupied = kgd_hqd_is_occupied, + .hqd_sdma_is_occupied = kgd_hqd_sdma_is_occupied, + .hqd_destroy = kgd_hqd_destroy, + .hqd_sdma_destroy = kgd_hqd_sdma_destroy, + .address_watch_disable = kgd_address_watch_disable, + .address_watch_execute = kgd_address_watch_execute, + .wave_control_execute = kgd_wave_control_execute, + .address_watch_get_offset = kgd_address_watch_get_offset, + .get_atc_vmid_pasid_mapping_info = + get_atc_vmid_pasid_mapping_info, + .set_scratch_backing_va = set_scratch_backing_va, + .get_tile_config = get_tile_config, + .set_vm_context_page_table_base = set_vm_context_page_table_base, + .invalidate_tlbs = invalidate_tlbs, + .invalidate_tlbs_vmid = invalidate_tlbs_vmid, +}; diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_gfx_v9.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_gfx_v9.c index e262f2ac07a3..c72246f2c08a 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_gfx_v9.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_gfx_v9.c @@ -22,14 +22,10 @@ #define pr_fmt(fmt) "kfd2kgd: " fmt -#include <linux/module.h> -#include <linux/fdtable.h> -#include <linux/uaccess.h> #include <linux/mmu_context.h> #include "amdgpu.h" #include "amdgpu_amdkfd.h" -#include "soc15_hw_ip.h" #include "gc/gc_9_0_offset.h" #include "gc/gc_9_0_sh_mask.h" #include "vega10_enum.h" @@ -50,9 +46,6 @@ #include "gmc_v9_0.h" -#define V9_PIPE_PER_MEC (4) -#define V9_QUEUES_PER_PIPE_MEC (8) - enum hqd_dequeue_request_type { NO_ACTION = 0, DRAIN_PIPE, @@ -226,22 +219,21 @@ int kgd_gfx_v9_init_interrupts(struct kgd_dev *kgd, uint32_t pipe_id) return 0; } -static uint32_t get_sdma_base_addr(struct amdgpu_device *adev, +static uint32_t get_sdma_rlc_reg_offset(struct amdgpu_device *adev, unsigned int engine_id, unsigned int queue_id) { - uint32_t base[2] = { + uint32_t sdma_engine_reg_base[2] = { SOC15_REG_OFFSET(SDMA0, 0, mmSDMA0_RLC0_RB_CNTL) - mmSDMA0_RLC0_RB_CNTL, SOC15_REG_OFFSET(SDMA1, 0, mmSDMA1_RLC0_RB_CNTL) - mmSDMA1_RLC0_RB_CNTL }; - uint32_t retval; - - retval = base[engine_id] + queue_id * (mmSDMA0_RLC1_RB_CNTL - - mmSDMA0_RLC0_RB_CNTL); + uint32_t retval = sdma_engine_reg_base[engine_id] + + queue_id * (mmSDMA0_RLC1_RB_CNTL - mmSDMA0_RLC0_RB_CNTL); - pr_debug("sdma base address: 0x%x\n", retval); + pr_debug("RLC register offset for SDMA%d RLC%d: 0x%x\n", engine_id, + queue_id, retval); return retval; } @@ -388,71 +380,67 @@ static int kgd_hqd_sdma_load(struct kgd_dev *kgd, void *mqd, { struct amdgpu_device *adev = get_amdgpu_device(kgd); struct v9_sdma_mqd *m; - uint32_t sdma_base_addr, sdmax_gfx_context_cntl; + uint32_t sdma_rlc_reg_offset; unsigned long end_jiffies; uint32_t data; uint64_t data64; uint64_t __user *wptr64 = (uint64_t __user *)wptr; m = get_sdma_mqd(mqd); - sdma_base_addr = get_sdma_base_addr(adev, m->sdma_engine_id, + sdma_rlc_reg_offset = get_sdma_rlc_reg_offset(adev, m->sdma_engine_id, m->sdma_queue_id); - sdmax_gfx_context_cntl = m->sdma_engine_id ? - SOC15_REG_OFFSET(SDMA1, 0, mmSDMA1_GFX_CONTEXT_CNTL) : - SOC15_REG_OFFSET(SDMA0, 0, mmSDMA0_GFX_CONTEXT_CNTL); - WREG32(sdma_base_addr + mmSDMA0_RLC0_RB_CNTL, + WREG32(sdma_rlc_reg_offset + mmSDMA0_RLC0_RB_CNTL, m->sdmax_rlcx_rb_cntl & (~SDMA0_RLC0_RB_CNTL__RB_ENABLE_MASK)); end_jiffies = msecs_to_jiffies(2000) + jiffies; while (true) { - data = RREG32(sdma_base_addr + mmSDMA0_RLC0_CONTEXT_STATUS); + data = RREG32(sdma_rlc_reg_offset + mmSDMA0_RLC0_CONTEXT_STATUS); if (data & SDMA0_RLC0_CONTEXT_STATUS__IDLE_MASK) break; - if (time_after(jiffies, end_jiffies)) + if (time_after(jiffies, end_jiffies)) { + pr_err("SDMA RLC not idle in %s\n", __func__); return -ETIME; + } usleep_range(500, 1000); } - data = RREG32(sdmax_gfx_context_cntl); - data = REG_SET_FIELD(data, SDMA0_GFX_CONTEXT_CNTL, - RESUME_CTX, 0); - WREG32(sdmax_gfx_context_cntl, data); - WREG32(sdma_base_addr + mmSDMA0_RLC0_DOORBELL_OFFSET, + WREG32(sdma_rlc_reg_offset + mmSDMA0_RLC0_DOORBELL_OFFSET, m->sdmax_rlcx_doorbell_offset); data = REG_SET_FIELD(m->sdmax_rlcx_doorbell, SDMA0_RLC0_DOORBELL, ENABLE, 1); - WREG32(sdma_base_addr + mmSDMA0_RLC0_DOORBELL, data); - WREG32(sdma_base_addr + mmSDMA0_RLC0_RB_RPTR, m->sdmax_rlcx_rb_rptr); - WREG32(sdma_base_addr + mmSDMA0_RLC0_RB_RPTR_HI, + WREG32(sdma_rlc_reg_offset + mmSDMA0_RLC0_DOORBELL, data); + WREG32(sdma_rlc_reg_offset + mmSDMA0_RLC0_RB_RPTR, + m->sdmax_rlcx_rb_rptr); + WREG32(sdma_rlc_reg_offset + mmSDMA0_RLC0_RB_RPTR_HI, m->sdmax_rlcx_rb_rptr_hi); - WREG32(sdma_base_addr + mmSDMA0_RLC0_MINOR_PTR_UPDATE, 1); + WREG32(sdma_rlc_reg_offset + mmSDMA0_RLC0_MINOR_PTR_UPDATE, 1); if (read_user_wptr(mm, wptr64, data64)) { - WREG32(sdma_base_addr + mmSDMA0_RLC0_RB_WPTR, + WREG32(sdma_rlc_reg_offset + mmSDMA0_RLC0_RB_WPTR, lower_32_bits(data64)); - WREG32(sdma_base_addr + mmSDMA0_RLC0_RB_WPTR_HI, + WREG32(sdma_rlc_reg_offset + mmSDMA0_RLC0_RB_WPTR_HI, upper_32_bits(data64)); } else { - WREG32(sdma_base_addr + mmSDMA0_RLC0_RB_WPTR, + WREG32(sdma_rlc_reg_offset + mmSDMA0_RLC0_RB_WPTR, m->sdmax_rlcx_rb_rptr); - WREG32(sdma_base_addr + mmSDMA0_RLC0_RB_WPTR_HI, + WREG32(sdma_rlc_reg_offset + mmSDMA0_RLC0_RB_WPTR_HI, m->sdmax_rlcx_rb_rptr_hi); } - WREG32(sdma_base_addr + mmSDMA0_RLC0_MINOR_PTR_UPDATE, 0); + WREG32(sdma_rlc_reg_offset + mmSDMA0_RLC0_MINOR_PTR_UPDATE, 0); - WREG32(sdma_base_addr + mmSDMA0_RLC0_RB_BASE, m->sdmax_rlcx_rb_base); - WREG32(sdma_base_addr + mmSDMA0_RLC0_RB_BASE_HI, + WREG32(sdma_rlc_reg_offset + mmSDMA0_RLC0_RB_BASE, m->sdmax_rlcx_rb_base); + WREG32(sdma_rlc_reg_offset + mmSDMA0_RLC0_RB_BASE_HI, m->sdmax_rlcx_rb_base_hi); - WREG32(sdma_base_addr + mmSDMA0_RLC0_RB_RPTR_ADDR_LO, + WREG32(sdma_rlc_reg_offset + mmSDMA0_RLC0_RB_RPTR_ADDR_LO, m->sdmax_rlcx_rb_rptr_addr_lo); - WREG32(sdma_base_addr + mmSDMA0_RLC0_RB_RPTR_ADDR_HI, + WREG32(sdma_rlc_reg_offset + mmSDMA0_RLC0_RB_RPTR_ADDR_HI, m->sdmax_rlcx_rb_rptr_addr_hi); data = REG_SET_FIELD(m->sdmax_rlcx_rb_cntl, SDMA0_RLC0_RB_CNTL, RB_ENABLE, 1); - WREG32(sdma_base_addr + mmSDMA0_RLC0_RB_CNTL, data); + WREG32(sdma_rlc_reg_offset + mmSDMA0_RLC0_RB_CNTL, data); return 0; } @@ -462,7 +450,8 @@ static int kgd_hqd_sdma_dump(struct kgd_dev *kgd, uint32_t (**dump)[2], uint32_t *n_regs) { struct amdgpu_device *adev = get_amdgpu_device(kgd); - uint32_t sdma_base_addr = get_sdma_base_addr(adev, engine_id, queue_id); + uint32_t sdma_rlc_reg_offset = get_sdma_rlc_reg_offset(adev, + engine_id, queue_id); uint32_t i = 0, reg; #undef HQD_N_REGS #define HQD_N_REGS (19+6+7+10) @@ -472,15 +461,15 @@ static int kgd_hqd_sdma_dump(struct kgd_dev *kgd, return -ENOMEM; for (reg = mmSDMA0_RLC0_RB_CNTL; reg <= mmSDMA0_RLC0_DOORBELL; reg++) - DUMP_REG(sdma_base_addr + reg); + DUMP_REG(sdma_rlc_reg_offset + reg); for (reg = mmSDMA0_RLC0_STATUS; reg <= mmSDMA0_RLC0_CSA_ADDR_HI; reg++) - DUMP_REG(sdma_base_addr + reg); + DUMP_REG(sdma_rlc_reg_offset + reg); for (reg = mmSDMA0_RLC0_IB_SUB_REMAIN; reg <= mmSDMA0_RLC0_MINOR_PTR_UPDATE; reg++) - DUMP_REG(sdma_base_addr + reg); + DUMP_REG(sdma_rlc_reg_offset + reg); for (reg = mmSDMA0_RLC0_MIDCMD_DATA0; reg <= mmSDMA0_RLC0_MIDCMD_CNTL; reg++) - DUMP_REG(sdma_base_addr + reg); + DUMP_REG(sdma_rlc_reg_offset + reg); WARN_ON_ONCE(i != HQD_N_REGS); *n_regs = i; @@ -514,14 +503,14 @@ static bool kgd_hqd_sdma_is_occupied(struct kgd_dev *kgd, void *mqd) { struct amdgpu_device *adev = get_amdgpu_device(kgd); struct v9_sdma_mqd *m; - uint32_t sdma_base_addr; + uint32_t sdma_rlc_reg_offset; uint32_t sdma_rlc_rb_cntl; m = get_sdma_mqd(mqd); - sdma_base_addr = get_sdma_base_addr(adev, m->sdma_engine_id, + sdma_rlc_reg_offset = get_sdma_rlc_reg_offset(adev, m->sdma_engine_id, m->sdma_queue_id); - sdma_rlc_rb_cntl = RREG32(sdma_base_addr + mmSDMA0_RLC0_RB_CNTL); + sdma_rlc_rb_cntl = RREG32(sdma_rlc_reg_offset + mmSDMA0_RLC0_RB_CNTL); if (sdma_rlc_rb_cntl & SDMA0_RLC0_RB_CNTL__RB_ENABLE_MASK) return true; @@ -584,59 +573,52 @@ static int kgd_hqd_sdma_destroy(struct kgd_dev *kgd, void *mqd, { struct amdgpu_device *adev = get_amdgpu_device(kgd); struct v9_sdma_mqd *m; - uint32_t sdma_base_addr; + uint32_t sdma_rlc_reg_offset; uint32_t temp; unsigned long end_jiffies = (utimeout * HZ / 1000) + jiffies; m = get_sdma_mqd(mqd); - sdma_base_addr = get_sdma_base_addr(adev, m->sdma_engine_id, + sdma_rlc_reg_offset = get_sdma_rlc_reg_offset(adev, m->sdma_engine_id, m->sdma_queue_id); - temp = RREG32(sdma_base_addr + mmSDMA0_RLC0_RB_CNTL); + temp = RREG32(sdma_rlc_reg_offset + mmSDMA0_RLC0_RB_CNTL); temp = temp & ~SDMA0_RLC0_RB_CNTL__RB_ENABLE_MASK; - WREG32(sdma_base_addr + mmSDMA0_RLC0_RB_CNTL, temp); + WREG32(sdma_rlc_reg_offset + mmSDMA0_RLC0_RB_CNTL, temp); while (true) { - temp = RREG32(sdma_base_addr + mmSDMA0_RLC0_CONTEXT_STATUS); + temp = RREG32(sdma_rlc_reg_offset + mmSDMA0_RLC0_CONTEXT_STATUS); if (temp & SDMA0_RLC0_CONTEXT_STATUS__IDLE_MASK) break; - if (time_after(jiffies, end_jiffies)) + if (time_after(jiffies, end_jiffies)) { + pr_err("SDMA RLC not idle in %s\n", __func__); return -ETIME; + } usleep_range(500, 1000); } - WREG32(sdma_base_addr + mmSDMA0_RLC0_DOORBELL, 0); - WREG32(sdma_base_addr + mmSDMA0_RLC0_RB_CNTL, - RREG32(sdma_base_addr + mmSDMA0_RLC0_RB_CNTL) | + WREG32(sdma_rlc_reg_offset + mmSDMA0_RLC0_DOORBELL, 0); + WREG32(sdma_rlc_reg_offset + mmSDMA0_RLC0_RB_CNTL, + RREG32(sdma_rlc_reg_offset + mmSDMA0_RLC0_RB_CNTL) | SDMA0_RLC0_RB_CNTL__RB_ENABLE_MASK); - m->sdmax_rlcx_rb_rptr = RREG32(sdma_base_addr + mmSDMA0_RLC0_RB_RPTR); + m->sdmax_rlcx_rb_rptr = RREG32(sdma_rlc_reg_offset + mmSDMA0_RLC0_RB_RPTR); m->sdmax_rlcx_rb_rptr_hi = - RREG32(sdma_base_addr + mmSDMA0_RLC0_RB_RPTR_HI); + RREG32(sdma_rlc_reg_offset + mmSDMA0_RLC0_RB_RPTR_HI); return 0; } -bool kgd_gfx_v9_get_atc_vmid_pasid_mapping_valid(struct kgd_dev *kgd, - uint8_t vmid) +bool kgd_gfx_v9_get_atc_vmid_pasid_mapping_info(struct kgd_dev *kgd, + uint8_t vmid, uint16_t *p_pasid) { - uint32_t reg; + uint32_t value; struct amdgpu_device *adev = (struct amdgpu_device *) kgd; - reg = RREG32(SOC15_REG_OFFSET(ATHUB, 0, mmATC_VMID0_PASID_MAPPING) + value = RREG32(SOC15_REG_OFFSET(ATHUB, 0, mmATC_VMID0_PASID_MAPPING) + vmid); - return reg & ATC_VMID0_PASID_MAPPING__VALID_MASK; -} - -uint16_t kgd_gfx_v9_get_atc_vmid_pasid_mapping_pasid(struct kgd_dev *kgd, - uint8_t vmid) -{ - uint32_t reg; - struct amdgpu_device *adev = (struct amdgpu_device *) kgd; + *p_pasid = value & ATC_VMID0_PASID_MAPPING__PASID_MASK; - reg = RREG32(SOC15_REG_OFFSET(ATHUB, 0, mmATC_VMID0_PASID_MAPPING) - + vmid); - return reg & ATC_VMID0_PASID_MAPPING__PASID_MASK; + return !!(value & ATC_VMID0_PASID_MAPPING__VALID_MASK); } static int invalidate_tlbs_with_kiq(struct amdgpu_device *adev, uint16_t pasid, @@ -671,6 +653,8 @@ int kgd_gfx_v9_invalidate_tlbs(struct kgd_dev *kgd, uint16_t pasid) { struct amdgpu_device *adev = (struct amdgpu_device *) kgd; int vmid, i; + uint16_t queried_pasid; + bool ret; struct amdgpu_ring *ring = &adev->gfx.kiq.ring; uint32_t flush_type = 0; @@ -686,14 +670,14 @@ int kgd_gfx_v9_invalidate_tlbs(struct kgd_dev *kgd, uint16_t pasid) for (vmid = 0; vmid < 16; vmid++) { if (!amdgpu_amdkfd_is_kfd_vmid(adev, vmid)) continue; - if (kgd_gfx_v9_get_atc_vmid_pasid_mapping_valid(kgd, vmid)) { - if (kgd_gfx_v9_get_atc_vmid_pasid_mapping_pasid(kgd, vmid) - == pasid) { - for (i = 0; i < adev->num_vmhubs; i++) - amdgpu_gmc_flush_gpu_tlb(adev, vmid, - i, flush_type); - break; - } + + ret = kgd_gfx_v9_get_atc_vmid_pasid_mapping_info(kgd, vmid, + &queried_pasid); + if (ret && queried_pasid == pasid) { + for (i = 0; i < adev->num_vmhubs; i++) + amdgpu_gmc_flush_gpu_tlb(adev, vmid, + i, flush_type); + break; } } @@ -777,15 +761,6 @@ uint32_t kgd_gfx_v9_address_watch_get_offset(struct kgd_dev *kgd, return 0; } -void kgd_gfx_v9_set_scratch_backing_va(struct kgd_dev *kgd, - uint64_t va, uint32_t vmid) -{ - /* No longer needed on GFXv9. The scratch base address is - * passed to the shader by the CP. It's the user mode driver's - * responsibility. - */ -} - void kgd_gfx_v9_set_vm_context_page_table_base(struct kgd_dev *kgd, uint32_t vmid, uint64_t page_table_base) { @@ -811,7 +786,7 @@ void kgd_gfx_v9_set_vm_context_page_table_base(struct kgd_dev *kgd, uint32_t vmi gfxhub_v1_0_setup_vm_pt_regs(adev, vmid, page_table_base); } -static const struct kfd2kgd_calls kfd2kgd = { +const struct kfd2kgd_calls gfx_v9_kfd2kgd = { .program_sh_mem_settings = kgd_gfx_v9_program_sh_mem_settings, .set_pasid_vmid_mapping = kgd_gfx_v9_set_pasid_vmid_mapping, .init_interrupts = kgd_gfx_v9_init_interrupts, @@ -827,19 +802,11 @@ static const struct kfd2kgd_calls kfd2kgd = { .address_watch_execute = kgd_gfx_v9_address_watch_execute, .wave_control_execute = kgd_gfx_v9_wave_control_execute, .address_watch_get_offset = kgd_gfx_v9_address_watch_get_offset, - .get_atc_vmid_pasid_mapping_pasid = - kgd_gfx_v9_get_atc_vmid_pasid_mapping_pasid, - .get_atc_vmid_pasid_mapping_valid = - kgd_gfx_v9_get_atc_vmid_pasid_mapping_valid, - .set_scratch_backing_va = kgd_gfx_v9_set_scratch_backing_va, + .get_atc_vmid_pasid_mapping_info = + kgd_gfx_v9_get_atc_vmid_pasid_mapping_info, .get_tile_config = kgd_gfx_v9_get_tile_config, .set_vm_context_page_table_base = kgd_gfx_v9_set_vm_context_page_table_base, .invalidate_tlbs = kgd_gfx_v9_invalidate_tlbs, .invalidate_tlbs_vmid = kgd_gfx_v9_invalidate_tlbs_vmid, .get_hive_id = amdgpu_amdkfd_get_hive_id, }; - -struct kfd2kgd_calls *amdgpu_amdkfd_gfx_9_0_get_functions(void) -{ - return (struct kfd2kgd_calls *)&kfd2kgd; -} diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_gfx_v9.h b/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_gfx_v9.h index 26d8879bff9d..d9e9ad22b2bd 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_gfx_v9.h +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_gfx_v9.h @@ -55,14 +55,10 @@ uint32_t kgd_gfx_v9_address_watch_get_offset(struct kgd_dev *kgd, unsigned int watch_point_id, unsigned int reg_offset); -bool kgd_gfx_v9_get_atc_vmid_pasid_mapping_valid(struct kgd_dev *kgd, - uint8_t vmid); -uint16_t kgd_gfx_v9_get_atc_vmid_pasid_mapping_pasid(struct kgd_dev *kgd, - uint8_t vmid); +bool kgd_gfx_v9_get_atc_vmid_pasid_mapping_info(struct kgd_dev *kgd, + uint8_t vmid, uint16_t *p_pasid); void kgd_gfx_v9_set_vm_context_page_table_base(struct kgd_dev *kgd, uint32_t vmid, uint64_t page_table_base); -void kgd_gfx_v9_set_scratch_backing_va(struct kgd_dev *kgd, - uint64_t va, uint32_t vmid); int kgd_gfx_v9_invalidate_tlbs(struct kgd_dev *kgd, uint16_t pasid); int kgd_gfx_v9_invalidate_tlbs_vmid(struct kgd_dev *kgd, uint16_t vmid); int kgd_gfx_v9_get_tile_config(struct kgd_dev *kgd, diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_gpuvm.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_gpuvm.c index 6d021ecc8d59..a07897e21526 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_gpuvm.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_gpuvm.c @@ -33,11 +33,6 @@ #include "amdgpu_amdkfd.h" #include "amdgpu_dma_buf.h" -/* Special VM and GART address alignment needed for VI pre-Fiji due to - * a HW bug. - */ -#define VI_BO_SIZE_ALIGN (0x8000) - /* BO flag to indicate a KFD userptr BO */ #define AMDGPU_AMDKFD_USERPTR_BO (1ULL << 63) @@ -349,13 +344,46 @@ static int vm_update_pds(struct amdgpu_vm *vm, struct amdgpu_sync *sync) struct amdgpu_device *adev = amdgpu_ttm_adev(pd->tbo.bdev); int ret; - ret = amdgpu_vm_update_directories(adev, vm); + ret = amdgpu_vm_update_pdes(adev, vm, false); if (ret) return ret; return amdgpu_sync_fence(NULL, sync, vm->last_update, false); } +static uint64_t get_pte_flags(struct amdgpu_device *adev, struct kgd_mem *mem) +{ + struct amdgpu_device *bo_adev = amdgpu_ttm_adev(mem->bo->tbo.bdev); + bool coherent = mem->alloc_flags & ALLOC_MEM_FLAGS_COHERENT; + uint32_t mapping_flags; + + mapping_flags = AMDGPU_VM_PAGE_READABLE; + if (mem->alloc_flags & ALLOC_MEM_FLAGS_WRITABLE) + mapping_flags |= AMDGPU_VM_PAGE_WRITEABLE; + if (mem->alloc_flags & ALLOC_MEM_FLAGS_EXECUTABLE) + mapping_flags |= AMDGPU_VM_PAGE_EXECUTABLE; + + switch (adev->asic_type) { + case CHIP_ARCTURUS: + if (mem->alloc_flags & ALLOC_MEM_FLAGS_VRAM) { + if (bo_adev == adev) + mapping_flags |= coherent ? + AMDGPU_VM_MTYPE_CC : AMDGPU_VM_MTYPE_RW; + else + mapping_flags |= AMDGPU_VM_MTYPE_UC; + } else { + mapping_flags |= coherent ? + AMDGPU_VM_MTYPE_UC : AMDGPU_VM_MTYPE_NC; + } + break; + default: + mapping_flags |= coherent ? + AMDGPU_VM_MTYPE_UC : AMDGPU_VM_MTYPE_NC; + } + + return amdgpu_gem_va_map_flags(adev, mapping_flags); +} + /* add_bo_to_vm - Add a BO to a VM * * Everything that needs to bo done only once when a BO is first added @@ -404,8 +432,7 @@ static int add_bo_to_vm(struct amdgpu_device *adev, struct kgd_mem *mem, } bo_va_entry->va = va; - bo_va_entry->pte_flags = amdgpu_gmc_get_pte_flags(adev, - mem->mapping_flags); + bo_va_entry->pte_flags = get_pte_flags(adev, mem); bo_va_entry->kgd_dev = (void *)adev; list_add(&bo_va_entry->bo_list, list_bo_va); @@ -1079,10 +1106,8 @@ int amdgpu_amdkfd_gpuvm_alloc_memory_of_gpu( uint64_t user_addr = 0; struct amdgpu_bo *bo; struct amdgpu_bo_param bp; - int byte_align; u32 domain, alloc_domain; u64 alloc_flags; - uint32_t mapping_flags; int ret; /* @@ -1135,25 +1160,7 @@ int amdgpu_amdkfd_gpuvm_alloc_memory_of_gpu( if ((*mem)->aql_queue) size = size >> 1; - /* Workaround for TLB bug on older VI chips */ - byte_align = (adev->family == AMDGPU_FAMILY_VI && - adev->asic_type != CHIP_FIJI && - adev->asic_type != CHIP_POLARIS10 && - adev->asic_type != CHIP_POLARIS11 && - adev->asic_type != CHIP_POLARIS12 && - adev->asic_type != CHIP_VEGAM) ? - VI_BO_SIZE_ALIGN : 1; - - mapping_flags = AMDGPU_VM_PAGE_READABLE; - if (flags & ALLOC_MEM_FLAGS_WRITABLE) - mapping_flags |= AMDGPU_VM_PAGE_WRITEABLE; - if (flags & ALLOC_MEM_FLAGS_EXECUTABLE) - mapping_flags |= AMDGPU_VM_PAGE_EXECUTABLE; - if (flags & ALLOC_MEM_FLAGS_COHERENT) - mapping_flags |= AMDGPU_VM_MTYPE_UC; - else - mapping_flags |= AMDGPU_VM_MTYPE_NC; - (*mem)->mapping_flags = mapping_flags; + (*mem)->alloc_flags = flags; amdgpu_sync_create(&(*mem)->sync); @@ -1168,7 +1175,7 @@ int amdgpu_amdkfd_gpuvm_alloc_memory_of_gpu( memset(&bp, 0, sizeof(bp)); bp.size = size; - bp.byte_align = byte_align; + bp.byte_align = 1; bp.domain = alloc_domain; bp.flags = alloc_flags; bp.type = bo_type; @@ -1626,9 +1633,10 @@ int amdgpu_amdkfd_gpuvm_import_dmabuf(struct kgd_dev *kgd, INIT_LIST_HEAD(&(*mem)->bo_va_list); mutex_init(&(*mem)->lock); - (*mem)->mapping_flags = - AMDGPU_VM_PAGE_READABLE | AMDGPU_VM_PAGE_WRITEABLE | - AMDGPU_VM_PAGE_EXECUTABLE | AMDGPU_VM_MTYPE_NC; + (*mem)->alloc_flags = + ((bo->preferred_domains & AMDGPU_GEM_DOMAIN_VRAM) ? + ALLOC_MEM_FLAGS_VRAM : ALLOC_MEM_FLAGS_GTT) | + ALLOC_MEM_FLAGS_WRITABLE | ALLOC_MEM_FLAGS_EXECUTABLE; (*mem)->bo = amdgpu_bo_ref(bo); (*mem)->va = va; diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_atomfirmware.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_atomfirmware.c index daf687428cdb..39fd8ae5a822 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_atomfirmware.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_atomfirmware.c @@ -120,65 +120,14 @@ union vram_info { struct atom_vram_info_header_v2_3 v23; struct atom_vram_info_header_v2_4 v24; }; -/* - * Return vram width from integrated system info table, if available, - * or 0 if not. - */ -int amdgpu_atomfirmware_get_vram_width(struct amdgpu_device *adev) -{ - struct amdgpu_mode_info *mode_info = &adev->mode_info; - int index; - u16 data_offset, size; - union igp_info *igp_info; - union vram_info *vram_info; - u32 mem_channel_number; - u32 mem_channel_width; - u8 frev, crev; - if (adev->flags & AMD_IS_APU) - index = get_index_into_master_table(atom_master_list_of_data_tables_v2_1, - integratedsysteminfo); - else - index = get_index_into_master_table(atom_master_list_of_data_tables_v2_1, - vram_info); - - /* get any igp specific overrides */ - if (amdgpu_atom_parse_data_header(mode_info->atom_context, index, &size, - &frev, &crev, &data_offset)) { - if (adev->flags & AMD_IS_APU) { - igp_info = (union igp_info *) - (mode_info->atom_context->bios + data_offset); - switch (crev) { - case 11: - mem_channel_number = igp_info->v11.umachannelnumber; - /* channel width is 64 */ - return mem_channel_number * 64; - default: - return 0; - } - } else { - vram_info = (union vram_info *) - (mode_info->atom_context->bios + data_offset); - switch (crev) { - case 3: - mem_channel_number = vram_info->v23.vram_module[0].channel_num; - mem_channel_width = vram_info->v23.vram_module[0].channel_width; - return mem_channel_number * (1 << mem_channel_width); - case 4: - mem_channel_number = vram_info->v24.vram_module[0].channel_num; - mem_channel_width = vram_info->v24.vram_module[0].channel_width; - return mem_channel_number * (1 << mem_channel_width); - default: - return 0; - } - } - } - - return 0; -} +union vram_module { + struct atom_vram_module_v9 v9; + struct atom_vram_module_v10 v10; +}; -static int convert_atom_mem_type_to_vram_type (struct amdgpu_device *adev, - int atom_mem_type) +static int convert_atom_mem_type_to_vram_type(struct amdgpu_device *adev, + int atom_mem_type) { int vram_type; @@ -219,19 +168,25 @@ static int convert_atom_mem_type_to_vram_type (struct amdgpu_device *adev, return vram_type; } -/* - * Return vram type from either integrated system info table - * or umc info table, if available, or 0 (TYPE_UNKNOWN) if not - */ -int amdgpu_atomfirmware_get_vram_type(struct amdgpu_device *adev) + + +int +amdgpu_atomfirmware_get_vram_info(struct amdgpu_device *adev, + int *vram_width, int *vram_type, + int *vram_vendor) { struct amdgpu_mode_info *mode_info = &adev->mode_info; - int index; + int index, i = 0; u16 data_offset, size; union igp_info *igp_info; union vram_info *vram_info; + union vram_module *vram_module; u8 frev, crev; u8 mem_type; + u8 mem_vendor; + u32 mem_channel_number; + u32 mem_channel_width; + u32 module_id; if (adev->flags & AMD_IS_APU) index = get_index_into_master_table(atom_master_list_of_data_tables_v2_1, @@ -239,6 +194,7 @@ int amdgpu_atomfirmware_get_vram_type(struct amdgpu_device *adev) else index = get_index_into_master_table(atom_master_list_of_data_tables_v2_1, vram_info); + if (amdgpu_atom_parse_data_header(mode_info->atom_context, index, &size, &frev, &crev, &data_offset)) { @@ -247,25 +203,67 @@ int amdgpu_atomfirmware_get_vram_type(struct amdgpu_device *adev) (mode_info->atom_context->bios + data_offset); switch (crev) { case 11: + mem_channel_number = igp_info->v11.umachannelnumber; + /* channel width is 64 */ + if (vram_width) + *vram_width = mem_channel_number * 64; mem_type = igp_info->v11.memorytype; - return convert_atom_mem_type_to_vram_type(adev, mem_type); + if (vram_type) + *vram_type = convert_atom_mem_type_to_vram_type(adev, mem_type); + break; default: - return 0; + return -EINVAL; } } else { vram_info = (union vram_info *) (mode_info->atom_context->bios + data_offset); + module_id = (RREG32(adev->bios_scratch_reg_offset + 4) & 0x00ff0000) >> 16; switch (crev) { case 3: - mem_type = vram_info->v23.vram_module[0].memory_type; - return convert_atom_mem_type_to_vram_type(adev, mem_type); + if (module_id > vram_info->v23.vram_module_num) + module_id = 0; + vram_module = (union vram_module *)vram_info->v23.vram_module; + while (i < module_id) { + vram_module = (union vram_module *) + ((u8 *)vram_module + vram_module->v9.vram_module_size); + i++; + } + mem_type = vram_module->v9.memory_type; + if (vram_type) + *vram_type = convert_atom_mem_type_to_vram_type(adev, mem_type); + mem_channel_number = vram_module->v9.channel_num; + mem_channel_width = vram_module->v9.channel_width; + if (vram_width) + *vram_width = mem_channel_number * (1 << mem_channel_width); + mem_vendor = (vram_module->v9.vender_rev_id) & 0xF; + if (vram_vendor) + *vram_vendor = mem_vendor; + break; case 4: - mem_type = vram_info->v24.vram_module[0].memory_type; - return convert_atom_mem_type_to_vram_type(adev, mem_type); + if (module_id > vram_info->v24.vram_module_num) + module_id = 0; + vram_module = (union vram_module *)vram_info->v24.vram_module; + while (i < module_id) { + vram_module = (union vram_module *) + ((u8 *)vram_module + vram_module->v10.vram_module_size); + i++; + } + mem_type = vram_module->v10.memory_type; + if (vram_type) + *vram_type = convert_atom_mem_type_to_vram_type(adev, mem_type); + mem_channel_number = vram_module->v10.channel_num; + mem_channel_width = vram_module->v10.channel_width; + if (vram_width) + *vram_width = mem_channel_number * (1 << mem_channel_width); + mem_vendor = (vram_module->v10.vender_rev_id) & 0xF; + if (vram_vendor) + *vram_vendor = mem_vendor; + break; default: - return 0; + return -EINVAL; } } + } return 0; diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_atomfirmware.h b/drivers/gpu/drm/amd/amdgpu/amdgpu_atomfirmware.h index 5ec6f92f353c..53449fc7baf4 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_atomfirmware.h +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_atomfirmware.h @@ -29,8 +29,8 @@ bool amdgpu_atomfirmware_gpu_supports_virtualization(struct amdgpu_device *adev); void amdgpu_atomfirmware_scratch_regs_init(struct amdgpu_device *adev); int amdgpu_atomfirmware_allocate_fb_scratch(struct amdgpu_device *adev); -int amdgpu_atomfirmware_get_vram_width(struct amdgpu_device *adev); -int amdgpu_atomfirmware_get_vram_type(struct amdgpu_device *adev); +int amdgpu_atomfirmware_get_vram_info(struct amdgpu_device *adev, + int *vram_width, int *vram_type, int *vram_vendor); int amdgpu_atomfirmware_get_clock_info(struct amdgpu_device *adev); int amdgpu_atomfirmware_get_gfx_info(struct amdgpu_device *adev); bool amdgpu_atomfirmware_mem_ecc_supported(struct amdgpu_device *adev); diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_connectors.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_connectors.c index d8729285f731..a62cbc8199de 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_connectors.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_connectors.c @@ -1019,8 +1019,12 @@ amdgpu_connector_dvi_detect(struct drm_connector *connector, bool force) */ if (amdgpu_connector->shared_ddc && (ret == connector_status_connected)) { struct drm_connector *list_connector; + struct drm_connector_list_iter iter; struct amdgpu_connector *list_amdgpu_connector; - list_for_each_entry(list_connector, &dev->mode_config.connector_list, head) { + + drm_connector_list_iter_begin(dev, &iter); + drm_for_each_connector_iter(list_connector, + &iter) { if (connector == list_connector) continue; list_amdgpu_connector = to_amdgpu_connector(list_connector); @@ -1037,6 +1041,7 @@ amdgpu_connector_dvi_detect(struct drm_connector *connector, bool force) } } } + drm_connector_list_iter_end(&iter); } } } @@ -1494,6 +1499,7 @@ amdgpu_connector_add(struct amdgpu_device *adev, { struct drm_device *dev = adev->ddev; struct drm_connector *connector; + struct drm_connector_list_iter iter; struct amdgpu_connector *amdgpu_connector; struct amdgpu_connector_atom_dig *amdgpu_dig_connector; struct drm_encoder *encoder; @@ -1508,10 +1514,12 @@ amdgpu_connector_add(struct amdgpu_device *adev, return; /* see if we already added it */ - list_for_each_entry(connector, &dev->mode_config.connector_list, head) { + drm_connector_list_iter_begin(dev, &iter); + drm_for_each_connector_iter(connector, &iter) { amdgpu_connector = to_amdgpu_connector(connector); if (amdgpu_connector->connector_id == connector_id) { amdgpu_connector->devices |= supported_device; + drm_connector_list_iter_end(&iter); return; } if (amdgpu_connector->ddc_bus && i2c_bus->valid) { @@ -1526,6 +1534,7 @@ amdgpu_connector_add(struct amdgpu_device *adev, } } } + drm_connector_list_iter_end(&iter); /* check if it's a dp bridge */ list_for_each_entry(encoder, &dev->mode_config.encoder_list, head) { diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_cs.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_cs.c index 2e53feed40e2..49b767b7238f 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_cs.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_cs.c @@ -35,6 +35,7 @@ #include "amdgpu_trace.h" #include "amdgpu_gmc.h" #include "amdgpu_gem.h" +#include "amdgpu_ras.h" static int amdgpu_cs_user_fence_chunk(struct amdgpu_cs_parser *p, struct drm_amdgpu_cs_chunk_fence *data, @@ -449,75 +450,12 @@ retry: return r; } -/* Last resort, try to evict something from the current working set */ -static bool amdgpu_cs_try_evict(struct amdgpu_cs_parser *p, - struct amdgpu_bo *validated) -{ - uint32_t domain = validated->allowed_domains; - struct ttm_operation_ctx ctx = { true, false }; - int r; - - if (!p->evictable) - return false; - - for (;&p->evictable->tv.head != &p->validated; - p->evictable = list_prev_entry(p->evictable, tv.head)) { - - struct amdgpu_bo_list_entry *candidate = p->evictable; - struct amdgpu_bo *bo = ttm_to_amdgpu_bo(candidate->tv.bo); - struct amdgpu_device *adev = amdgpu_ttm_adev(bo->tbo.bdev); - bool update_bytes_moved_vis; - uint32_t other; - - /* If we reached our current BO we can forget it */ - if (bo == validated) - break; - - /* We can't move pinned BOs here */ - if (bo->pin_count) - continue; - - other = amdgpu_mem_type_to_domain(bo->tbo.mem.mem_type); - - /* Check if this BO is in one of the domains we need space for */ - if (!(other & domain)) - continue; - - /* Check if we can move this BO somewhere else */ - other = bo->allowed_domains & ~domain; - if (!other) - continue; - - /* Good we can try to move this BO somewhere else */ - update_bytes_moved_vis = - !amdgpu_gmc_vram_full_visible(&adev->gmc) && - amdgpu_bo_in_cpu_visible_vram(bo); - amdgpu_bo_placement_from_domain(bo, other); - r = ttm_bo_validate(&bo->tbo, &bo->placement, &ctx); - p->bytes_moved += ctx.bytes_moved; - if (update_bytes_moved_vis) - p->bytes_moved_vis += ctx.bytes_moved; - - if (unlikely(r)) - break; - - p->evictable = list_prev_entry(p->evictable, tv.head); - list_move(&candidate->tv.head, &p->validated); - - return true; - } - - return false; -} - static int amdgpu_cs_validate(void *param, struct amdgpu_bo *bo) { struct amdgpu_cs_parser *p = param; int r; - do { - r = amdgpu_cs_bo_validate(p, bo); - } while (r == -ENOMEM && amdgpu_cs_try_evict(p, bo)); + r = amdgpu_cs_bo_validate(p, bo); if (r) return r; @@ -556,9 +494,6 @@ static int amdgpu_cs_list_validate(struct amdgpu_cs_parser *p, binding_userptr = true; } - if (p->evictable == lobj) - p->evictable = NULL; - r = amdgpu_cs_validate(p, bo); if (r) return r; @@ -661,9 +596,6 @@ static int amdgpu_cs_parser_bos(struct amdgpu_cs_parser *p, &p->bytes_moved_vis_threshold); p->bytes_moved = 0; p->bytes_moved_vis = 0; - p->evictable = list_last_entry(&p->validated, - struct amdgpu_bo_list_entry, - tv.head); r = amdgpu_vm_validate_pt_bos(p->adev, &fpriv->vm, amdgpu_cs_validate, p); @@ -915,7 +847,7 @@ static int amdgpu_cs_vm_handling(struct amdgpu_cs_parser *p) if (r) return r; - r = amdgpu_vm_update_directories(adev, vm); + r = amdgpu_vm_update_pdes(adev, vm, false); if (r) return r; @@ -1359,6 +1291,9 @@ int amdgpu_cs_ioctl(struct drm_device *dev, void *data, struct drm_file *filp) bool reserved_buffers = false; int i, r; + if (amdgpu_ras_intr_triggered()) + return -EHWPOISON; + if (!adev->accel_working) return -EBUSY; diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_debugfs.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_debugfs.c index 5652cc72ed3a..cb94627fc0f4 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_debugfs.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_debugfs.c @@ -1077,8 +1077,7 @@ failure: ttm_bo_unlock_delayed_workqueue(&adev->mman.bdev, resched); - if (fences) - kfree(fences); + kfree(fences); return 0; } @@ -1103,8 +1102,7 @@ int amdgpu_debugfs_init(struct amdgpu_device *adev) void amdgpu_debugfs_preempt_cleanup(struct amdgpu_device *adev) { - if (adev->debugfs_preempt) - debugfs_remove(adev->debugfs_preempt); + debugfs_remove(adev->debugfs_preempt); } #else diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_device.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_device.c index 5a1939dbd4e3..f25275abf408 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_device.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_device.c @@ -65,6 +65,8 @@ #include "amdgpu_ras.h" #include "amdgpu_pmu.h" +#include <linux/suspend.h> + MODULE_FIRMWARE("amdgpu/vega10_gpu_info.bin"); MODULE_FIRMWARE("amdgpu/vega12_gpu_info.bin"); MODULE_FIRMWARE("amdgpu/raven_gpu_info.bin"); @@ -78,7 +80,7 @@ MODULE_FIRMWARE("amdgpu/navi12_gpu_info.bin"); #define AMDGPU_RESUME_MS 2000 -static const char *amdgpu_asic_name[] = { +const char *amdgpu_asic_name[] = { "TAHITI", "PITCAIRN", "VERDE", @@ -1023,12 +1025,6 @@ static int amdgpu_device_check_arguments(struct amdgpu_device *adev) amdgpu_device_check_block_size(adev); - ret = amdgpu_device_get_job_timeout_settings(adev); - if (ret) { - dev_err(adev->dev, "invalid lockup_timeout parameter syntax\n"); - return ret; - } - adev->firmware.load_type = amdgpu_ucode_get_load_type(adev, amdgpu_fw_load_type); return ret; @@ -1469,6 +1465,9 @@ static int amdgpu_device_parse_gpu_info_fw(struct amdgpu_device *adev) (const struct gpu_info_firmware_v1_0 *)(adev->firmware.gpu_info_fw->data + le32_to_cpu(hdr->header.ucode_array_offset_bytes)); + if (amdgpu_discovery && adev->asic_type >= CHIP_NAVI10) + goto parse_soc_bounding_box; + adev->gfx.config.max_shader_engines = le32_to_cpu(gpu_info_fw->gc_num_se); adev->gfx.config.max_cu_per_sh = le32_to_cpu(gpu_info_fw->gc_num_cu_per_sh); adev->gfx.config.max_sh_per_se = le32_to_cpu(gpu_info_fw->gc_num_sh_per_se); @@ -1496,7 +1495,13 @@ static int amdgpu_device_parse_gpu_info_fw(struct amdgpu_device *adev) adev->gfx.config.num_packer_per_sc = le32_to_cpu(gpu_info_fw->num_packer_per_sc); } + +parse_soc_bounding_box: #ifdef CONFIG_DRM_AMD_DC_DCN2_0 + /* + * soc bounding box info is not integrated in disocovery table, + * we always need to parse it from gpu info firmware. + */ if (hdr->version_minor == 2) { const struct gpu_info_firmware_v1_2 *gpu_info_fw = (const struct gpu_info_firmware_v1_2 *)(adev->firmware.gpu_info_fw->data + @@ -1613,6 +1618,9 @@ static int amdgpu_device_ip_early_init(struct amdgpu_device *adev) if (r) return r; + if (amdgpu_discovery && adev->asic_type >= CHIP_NAVI10) + amdgpu_discovery_get_gfx_info(adev); + amdgpu_amdkfd_device_probe(adev); if (amdgpu_sriov_vf(adev)) { @@ -1622,7 +1630,7 @@ static int amdgpu_device_ip_early_init(struct amdgpu_device *adev) } adev->pm.pp_feature = amdgpu_pp_feature_mask; - if (amdgpu_sriov_vf(adev)) + if (amdgpu_sriov_vf(adev) || sched_policy == KFD_SCHED_POLICY_NO_HWS) adev->pm.pp_feature &= ~PP_GFXOFF_MASK; for (i = 0; i < adev->num_ip_blocks; i++) { @@ -2231,17 +2239,17 @@ static int amdgpu_device_ip_suspend_phase2(struct amdgpu_device *adev) /* handle putting the SMC in the appropriate state */ if (adev->ip_blocks[i].version->type == AMD_IP_BLOCK_TYPE_SMC) { if (is_support_sw_smu(adev)) { - /* todo */ + r = smu_set_mp1_state(&adev->smu, adev->mp1_state); } else if (adev->powerplay.pp_funcs && adev->powerplay.pp_funcs->set_mp1_state) { r = adev->powerplay.pp_funcs->set_mp1_state( adev->powerplay.pp_handle, adev->mp1_state); - if (r) { - DRM_ERROR("SMC failed to set mp1 state %d, %d\n", - adev->mp1_state, r); - return r; - } + } + if (r) { + DRM_ERROR("SMC failed to set mp1 state %d, %d\n", + adev->mp1_state, r); + return r; } } @@ -2556,6 +2564,70 @@ static void amdgpu_device_xgmi_reset_func(struct work_struct *__work) adev->asic_reset_res, adev->ddev->unique); } +static int amdgpu_device_get_job_timeout_settings(struct amdgpu_device *adev) +{ + char *input = amdgpu_lockup_timeout; + char *timeout_setting = NULL; + int index = 0; + long timeout; + int ret = 0; + + /* + * By default timeout for non compute jobs is 10000. + * And there is no timeout enforced on compute jobs. + * In SR-IOV or passthrough mode, timeout for compute + * jobs are 10000 by default. + */ + adev->gfx_timeout = msecs_to_jiffies(10000); + adev->sdma_timeout = adev->video_timeout = adev->gfx_timeout; + if (amdgpu_sriov_vf(adev) || amdgpu_passthrough(adev)) + adev->compute_timeout = adev->gfx_timeout; + else + adev->compute_timeout = MAX_SCHEDULE_TIMEOUT; + + if (strnlen(input, AMDGPU_MAX_TIMEOUT_PARAM_LENTH)) { + while ((timeout_setting = strsep(&input, ",")) && + strnlen(timeout_setting, AMDGPU_MAX_TIMEOUT_PARAM_LENTH)) { + ret = kstrtol(timeout_setting, 0, &timeout); + if (ret) + return ret; + + if (timeout == 0) { + index++; + continue; + } else if (timeout < 0) { + timeout = MAX_SCHEDULE_TIMEOUT; + } else { + timeout = msecs_to_jiffies(timeout); + } + + switch (index++) { + case 0: + adev->gfx_timeout = timeout; + break; + case 1: + adev->compute_timeout = timeout; + break; + case 2: + adev->sdma_timeout = timeout; + break; + case 3: + adev->video_timeout = timeout; + break; + default: + break; + } + } + /* + * There is only one value specified and + * it should apply to all non-compute jobs. + */ + if (index == 1) + adev->sdma_timeout = adev->video_timeout = adev->gfx_timeout; + } + + return ret; +} /** * amdgpu_device_init - initialize the driver @@ -2583,7 +2655,12 @@ int amdgpu_device_init(struct amdgpu_device *adev, adev->ddev = ddev; adev->pdev = pdev; adev->flags = flags; - adev->asic_type = flags & AMD_ASIC_MASK; + + if (amdgpu_force_asic_type >= 0 && amdgpu_force_asic_type < CHIP_LAST) + adev->asic_type = amdgpu_force_asic_type; + else + adev->asic_type = flags & AMD_ASIC_MASK; + adev->usec_timeout = AMDGPU_MAX_USEC_TIMEOUT; if (amdgpu_emu_mode == 1) adev->usec_timeout *= 2; @@ -2726,6 +2803,12 @@ int amdgpu_device_init(struct amdgpu_device *adev, if (r) return r; + r = amdgpu_device_get_job_timeout_settings(adev); + if (r) { + dev_err(adev->dev, "invalid lockup_timeout parameter syntax\n"); + return r; + } + /* doorbell bar mapping and doorbell index init*/ amdgpu_device_doorbell_init(adev); @@ -3007,6 +3090,7 @@ int amdgpu_device_suspend(struct drm_device *dev, bool suspend, bool fbcon) struct amdgpu_device *adev; struct drm_crtc *crtc; struct drm_connector *connector; + struct drm_connector_list_iter iter; int r; if (dev == NULL || dev->dev_private == NULL) { @@ -3029,9 +3113,11 @@ int amdgpu_device_suspend(struct drm_device *dev, bool suspend, bool fbcon) if (!amdgpu_device_has_dc_support(adev)) { /* turn off display hw */ drm_modeset_lock_all(dev); - list_for_each_entry(connector, &dev->mode_config.connector_list, head) { - drm_helper_connector_dpms(connector, DRM_MODE_DPMS_OFF); - } + drm_connector_list_iter_begin(dev, &iter); + drm_for_each_connector_iter(connector, &iter) + drm_helper_connector_dpms(connector, + DRM_MODE_DPMS_OFF); + drm_connector_list_iter_end(&iter); drm_modeset_unlock_all(dev); /* unpin the front buffers and cursors */ list_for_each_entry(crtc, &dev->mode_config.crtc_list, head) { @@ -3110,6 +3196,7 @@ int amdgpu_device_suspend(struct drm_device *dev, bool suspend, bool fbcon) int amdgpu_device_resume(struct drm_device *dev, bool resume, bool fbcon) { struct drm_connector *connector; + struct drm_connector_list_iter iter; struct amdgpu_device *adev = dev->dev_private; struct drm_crtc *crtc; int r = 0; @@ -3180,9 +3267,13 @@ int amdgpu_device_resume(struct drm_device *dev, bool resume, bool fbcon) /* turn on display hw */ drm_modeset_lock_all(dev); - list_for_each_entry(connector, &dev->mode_config.connector_list, head) { - drm_helper_connector_dpms(connector, DRM_MODE_DPMS_ON); - } + + drm_connector_list_iter_begin(dev, &iter); + drm_for_each_connector_iter(connector, &iter) + drm_helper_connector_dpms(connector, + DRM_MODE_DPMS_ON); + drm_connector_list_iter_end(&iter); + drm_modeset_unlock_all(dev); } amdgpu_fbdev_set_suspend(adev, 0); @@ -3628,11 +3719,6 @@ static int amdgpu_do_asic_reset(struct amdgpu_hive_info *hive, break; } } - - list_for_each_entry(tmp_adev, device_list_handle, - gmc.xgmi.head) { - amdgpu_ras_reserve_bad_pages(tmp_adev); - } } } @@ -3736,25 +3822,18 @@ static bool amdgpu_device_lock_adev(struct amdgpu_device *adev, bool trylock) adev->mp1_state = PP_MP1_STATE_NONE; break; } - /* Block kfd: SRIOV would do it separately */ - if (!amdgpu_sriov_vf(adev)) - amdgpu_amdkfd_pre_reset(adev); return true; } static void amdgpu_device_unlock_adev(struct amdgpu_device *adev) { - /*unlock kfd: SRIOV would do it separately */ - if (!amdgpu_sriov_vf(adev)) - amdgpu_amdkfd_post_reset(adev); amdgpu_vf_error_trans_all(adev); adev->mp1_state = PP_MP1_STATE_NONE; adev->in_gpu_reset = 0; mutex_unlock(&adev->lock_reset); } - /** * amdgpu_device_gpu_recover - reset the asic and recover scheduler * @@ -3774,11 +3853,24 @@ int amdgpu_device_gpu_recover(struct amdgpu_device *adev, struct amdgpu_hive_info *hive = NULL; struct amdgpu_device *tmp_adev = NULL; int i, r = 0; + bool in_ras_intr = amdgpu_ras_intr_triggered(); + + /* + * Flush RAM to disk so that after reboot + * the user can read log and see why the system rebooted. + */ + if (in_ras_intr && amdgpu_ras_get_context(adev)->reboot) { + + DRM_WARN("Emergency reboot."); + + ksys_sync_helper(); + emergency_restart(); + } need_full_reset = job_signaled = false; INIT_LIST_HEAD(&device_list); - dev_info(adev->dev, "GPU reset begin!\n"); + dev_info(adev->dev, "GPU %s begin!\n", in_ras_intr ? "jobs stop":"reset"); cancel_delayed_work_sync(&adev->delayed_init_work); @@ -3805,9 +3897,16 @@ int amdgpu_device_gpu_recover(struct amdgpu_device *adev, return 0; } + /* Block kfd: SRIOV would do it separately */ + if (!amdgpu_sriov_vf(adev)) + amdgpu_amdkfd_pre_reset(adev); + /* Build list of devices to reset */ if (adev->gmc.xgmi.num_physical_nodes > 1) { if (!hive) { + /*unlock kfd: SRIOV would do it separately */ + if (!amdgpu_sriov_vf(adev)) + amdgpu_amdkfd_post_reset(adev); amdgpu_device_unlock_adev(adev); return -ENODEV; } @@ -3823,17 +3922,22 @@ int amdgpu_device_gpu_recover(struct amdgpu_device *adev, device_list_handle = &device_list; } - /* - * Mark these ASICs to be reseted as untracked first - * And add them back after reset completed - */ - list_for_each_entry(tmp_adev, device_list_handle, gmc.xgmi.head) - amdgpu_unregister_gpu_instance(tmp_adev); - /* block all schedulers and reset given job's ring */ list_for_each_entry(tmp_adev, device_list_handle, gmc.xgmi.head) { + if (tmp_adev != adev) { + amdgpu_device_lock_adev(tmp_adev, false); + if (!amdgpu_sriov_vf(tmp_adev)) + amdgpu_amdkfd_pre_reset(tmp_adev); + } + + /* + * Mark these ASICs to be reseted as untracked first + * And add them back after reset completed + */ + amdgpu_unregister_gpu_instance(tmp_adev); + /* disable ras on ALL IPs */ - if (amdgpu_device_ip_need_full_reset(tmp_adev)) + if (!in_ras_intr && amdgpu_device_ip_need_full_reset(tmp_adev)) amdgpu_ras_suspend(tmp_adev); for (i = 0; i < AMDGPU_MAX_RINGS; ++i) { @@ -3843,10 +3947,16 @@ int amdgpu_device_gpu_recover(struct amdgpu_device *adev, continue; drm_sched_stop(&ring->sched, job ? &job->base : NULL); + + if (in_ras_intr) + amdgpu_job_stop_all_jobs_on_sched(&ring->sched); } } + if (in_ras_intr) + goto skip_sched_resume; + /* * Must check guilty signal here since after this point all old * HW fences are force signaled. @@ -3857,9 +3967,6 @@ int amdgpu_device_gpu_recover(struct amdgpu_device *adev, dma_fence_is_signaled(job->base.s_fence->parent)) job_signaled = true; - if (!amdgpu_device_ip_need_full_reset(adev)) - device_list_handle = &device_list; - if (job_signaled) { dev_info(adev->dev, "Guilty job already signaled, skipping HW reset"); goto skip_hw_reset; @@ -3881,7 +3988,6 @@ retry: /* Rest of adevs pre asic reset from XGMI hive. */ if (tmp_adev == adev) continue; - amdgpu_device_lock_adev(tmp_adev, false); r = amdgpu_device_pre_asic_reset(tmp_adev, NULL, &need_full_reset); @@ -3909,6 +4015,7 @@ skip_hw_reset: /* Post ASIC reset for all devs .*/ list_for_each_entry(tmp_adev, device_list_handle, gmc.xgmi.head) { + for (i = 0; i < AMDGPU_MAX_RINGS; ++i) { struct amdgpu_ring *ring = tmp_adev->rings[i]; @@ -3930,12 +4037,18 @@ skip_hw_reset: if (r) { /* bad news, how to tell it to userspace ? */ - dev_info(tmp_adev->dev, "GPU reset(%d) failed\n", atomic_read(&adev->gpu_reset_counter)); + dev_info(tmp_adev->dev, "GPU reset(%d) failed\n", atomic_read(&tmp_adev->gpu_reset_counter)); amdgpu_vf_error_put(tmp_adev, AMDGIM_ERROR_VF_GPU_RESET_FAIL, 0, r); } else { - dev_info(tmp_adev->dev, "GPU reset(%d) succeeded!\n", atomic_read(&adev->gpu_reset_counter)); + dev_info(tmp_adev->dev, "GPU reset(%d) succeeded!\n", atomic_read(&tmp_adev->gpu_reset_counter)); } + } +skip_sched_resume: + list_for_each_entry(tmp_adev, device_list_handle, gmc.xgmi.head) { + /*unlock kfd: SRIOV would do it separately */ + if (!in_ras_intr && !amdgpu_sriov_vf(tmp_adev)) + amdgpu_amdkfd_post_reset(tmp_adev); amdgpu_device_unlock_adev(tmp_adev); } diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_display.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_display.c index 1d4aaa9580f4..d2dd59a95e8a 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_display.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_display.c @@ -370,11 +370,13 @@ void amdgpu_display_print_display_setup(struct drm_device *dev) struct amdgpu_connector *amdgpu_connector; struct drm_encoder *encoder; struct amdgpu_encoder *amdgpu_encoder; + struct drm_connector_list_iter iter; uint32_t devices; int i = 0; + drm_connector_list_iter_begin(dev, &iter); DRM_INFO("AMDGPU Display Connectors\n"); - list_for_each_entry(connector, &dev->mode_config.connector_list, head) { + drm_for_each_connector_iter(connector, &iter) { amdgpu_connector = to_amdgpu_connector(connector); DRM_INFO("Connector %d:\n", i); DRM_INFO(" %s\n", connector->name); @@ -438,6 +440,7 @@ void amdgpu_display_print_display_setup(struct drm_device *dev) } i++; } + drm_connector_list_iter_end(&iter); } /** diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_dma_buf.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_dma_buf.c index 61f108ec2b5c..4917b548b7f2 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_dma_buf.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_dma_buf.c @@ -321,7 +321,6 @@ const struct dma_buf_ops amdgpu_dmabuf_ops = { /** * amdgpu_gem_prime_export - &drm_driver.gem_prime_export implementation - * @dev: DRM device * @gobj: GEM BO * @flags: Flags such as DRM_CLOEXEC and DRM_RDWR. * diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_drv.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_drv.c index 4da1d7fb10f4..ddff958aa3ff 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_drv.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_drv.c @@ -43,6 +43,8 @@ #include "amdgpu_amdkfd.h" +#include "amdgpu_ras.h" + /* * KMS wrapper. * - 3.0.0 - initial driver @@ -87,8 +89,6 @@ #define KMS_DRIVER_MINOR 35 #define KMS_DRIVER_PATCHLEVEL 0 -#define AMDGPU_MAX_TIMEOUT_PARAM_LENTH 256 - int amdgpu_vram_limit = 0; int amdgpu_vis_vram_limit = 0; int amdgpu_gart_size = -1; /* auto */ @@ -128,11 +128,6 @@ char *amdgpu_disable_cu = NULL; char *amdgpu_virtual_display = NULL; /* OverDrive(bit 14) disabled by default*/ uint amdgpu_pp_feature_mask = 0xffffbfff; -int amdgpu_ngg = 0; -int amdgpu_prim_buf_per_se = 0; -int amdgpu_pos_buf_per_se = 0; -int amdgpu_cntl_sb_buf_per_se = 0; -int amdgpu_param_buf_per_se = 0; int amdgpu_job_hang_limit = 0; int amdgpu_lbpw = -1; int amdgpu_compute_multipipe = -1; @@ -146,12 +141,13 @@ int amdgpu_mcbp = 0; int amdgpu_discovery = -1; int amdgpu_mes = 0; int amdgpu_noretry = 1; +int amdgpu_force_asic_type = -1; struct amdgpu_mgpu_info mgpu_info = { .mutex = __MUTEX_INITIALIZER(mgpu_info.mutex), }; int amdgpu_ras_enable = -1; -uint amdgpu_ras_mask = 0xfffffffb; +uint amdgpu_ras_mask = 0xffffffff; /** * DOC: vramlimit (int) @@ -244,10 +240,13 @@ module_param_named(msi, amdgpu_msi, int, 0444); * * The format can be [Non-Compute] or [GFX,Compute,SDMA,Video]. That is there can be one or * multiple values specified. 0 and negative values are invalidated. They will be adjusted - * to default timeout. - * - With one value specified, the setting will apply to all non-compute jobs. - * - With multiple values specified, the first one will be for GFX. The second one is for Compute. - * And the third and fourth ones are for SDMA and Video. + * to the default timeout. + * + * - With one value specified, the setting will apply to all non-compute jobs. + * - With multiple values specified, the first one will be for GFX. + * The second one is for Compute. The third and fourth ones are + * for SDMA and Video. + * * By default(with no lockup_timeout settings), the timeout for all non-compute(GFX, SDMA and Video) * jobs is 10000. And there is no timeout enforced on compute jobs. */ @@ -449,42 +448,6 @@ MODULE_PARM_DESC(virtual_display, module_param_named(virtual_display, amdgpu_virtual_display, charp, 0444); /** - * DOC: ngg (int) - * Set to enable Next Generation Graphics (1 = enable). The default is 0 (disabled). - */ -MODULE_PARM_DESC(ngg, "Next Generation Graphics (1 = enable, 0 = disable(default depending on gfx))"); -module_param_named(ngg, amdgpu_ngg, int, 0444); - -/** - * DOC: prim_buf_per_se (int) - * Override the size of Primitive Buffer per Shader Engine in Byte. The default is 0 (depending on gfx). - */ -MODULE_PARM_DESC(prim_buf_per_se, "the size of Primitive Buffer per Shader Engine (default depending on gfx)"); -module_param_named(prim_buf_per_se, amdgpu_prim_buf_per_se, int, 0444); - -/** - * DOC: pos_buf_per_se (int) - * Override the size of Position Buffer per Shader Engine in Byte. The default is 0 (depending on gfx). - */ -MODULE_PARM_DESC(pos_buf_per_se, "the size of Position Buffer per Shader Engine (default depending on gfx)"); -module_param_named(pos_buf_per_se, amdgpu_pos_buf_per_se, int, 0444); - -/** - * DOC: cntl_sb_buf_per_se (int) - * Override the size of Control Sideband per Shader Engine in Byte. The default is 0 (depending on gfx). - */ -MODULE_PARM_DESC(cntl_sb_buf_per_se, "the size of Control Sideband per Shader Engine (default depending on gfx)"); -module_param_named(cntl_sb_buf_per_se, amdgpu_cntl_sb_buf_per_se, int, 0444); - -/** - * DOC: param_buf_per_se (int) - * Override the size of Off-Chip Parameter Cache per Shader Engine in Byte. - * The default is 0 (depending on gfx). - */ -MODULE_PARM_DESC(param_buf_per_se, "the size of Off-Chip Parameter Cache per Shader Engine (default depending on gfx)"); -module_param_named(param_buf_per_se, amdgpu_param_buf_per_se, int, 0444); - -/** * DOC: job_hang_limit (int) * Set how much time allow a job hang and not drop it. The default is 0. */ @@ -616,6 +579,16 @@ MODULE_PARM_DESC(noretry, "Disable retry faults (0 = retry enabled, 1 = retry disabled (default))"); module_param_named(noretry, amdgpu_noretry, int, 0644); +/** + * DOC: force_asic_type (int) + * A non negative value used to specify the asic type for all supported GPUs. + */ +MODULE_PARM_DESC(force_asic_type, + "A non negative value used to specify the asic type for all supported GPUs"); +module_param_named(force_asic_type, amdgpu_force_asic_type, int, 0444); + + + #ifdef CONFIG_HSA_AMD /** * DOC: sched_policy (int) @@ -1022,6 +995,7 @@ static const struct pci_device_id pciidlist[] = { /* Navi12 */ {0x1002, 0x7360, PCI_ANY_ID, PCI_ANY_ID, 0, 0, CHIP_NAVI12|AMD_EXP_HW_SUPPORT}, + {0x1002, 0x7362, PCI_ANY_ID, PCI_ANY_ID, 0, 0, CHIP_NAVI12|AMD_EXP_HW_SUPPORT}, {0, 0, 0} }; @@ -1127,7 +1101,10 @@ amdgpu_pci_remove(struct pci_dev *pdev) { struct drm_device *dev = pci_get_drvdata(pdev); - DRM_ERROR("Device removal is currently not supported outside of fbcon\n"); +#ifdef MODULE + if (THIS_MODULE->state != MODULE_STATE_GOING) +#endif + DRM_ERROR("Hotplug removal is not supported\n"); drm_dev_unplug(dev); drm_dev_put(dev); pci_disable_device(pdev); @@ -1140,6 +1117,9 @@ amdgpu_pci_shutdown(struct pci_dev *pdev) struct drm_device *dev = pci_get_drvdata(pdev); struct amdgpu_device *adev = dev->dev_private; + if (amdgpu_ras_intr_triggered()) + return; + /* if we are running in a VM, make sure the device * torn down properly on reboot/shutdown. * unfortunately we can't detect certain @@ -1347,66 +1327,6 @@ int amdgpu_file_to_fpriv(struct file *filp, struct amdgpu_fpriv **fpriv) return 0; } -int amdgpu_device_get_job_timeout_settings(struct amdgpu_device *adev) -{ - char *input = amdgpu_lockup_timeout; - char *timeout_setting = NULL; - int index = 0; - long timeout; - int ret = 0; - - /* - * By default timeout for non compute jobs is 10000. - * And there is no timeout enforced on compute jobs. - */ - adev->gfx_timeout = msecs_to_jiffies(10000); - adev->sdma_timeout = adev->video_timeout = adev->gfx_timeout; - adev->compute_timeout = MAX_SCHEDULE_TIMEOUT; - - if (strnlen(input, AMDGPU_MAX_TIMEOUT_PARAM_LENTH)) { - while ((timeout_setting = strsep(&input, ",")) && - strnlen(timeout_setting, AMDGPU_MAX_TIMEOUT_PARAM_LENTH)) { - ret = kstrtol(timeout_setting, 0, &timeout); - if (ret) - return ret; - - if (timeout == 0) { - index++; - continue; - } else if (timeout < 0) { - timeout = MAX_SCHEDULE_TIMEOUT; - } else { - timeout = msecs_to_jiffies(timeout); - } - - switch (index++) { - case 0: - adev->gfx_timeout = timeout; - break; - case 1: - adev->compute_timeout = timeout; - break; - case 2: - adev->sdma_timeout = timeout; - break; - case 3: - adev->video_timeout = timeout; - break; - default: - break; - } - } - /* - * There is only one value specified and - * it should apply to all non-compute jobs. - */ - if (index == 1) - adev->sdma_timeout = adev->video_timeout = adev->gfx_timeout; - } - - return ret; -} - static bool amdgpu_get_crtc_scanout_position(struct drm_device *dev, unsigned int pipe, bool in_vblank_irq, int *vpos, int *hpos, diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_encoders.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_encoders.c index 571a6dfb473e..61fcf247a638 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_encoders.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_encoders.c @@ -37,12 +37,14 @@ amdgpu_link_encoder_connector(struct drm_device *dev) { struct amdgpu_device *adev = dev->dev_private; struct drm_connector *connector; + struct drm_connector_list_iter iter; struct amdgpu_connector *amdgpu_connector; struct drm_encoder *encoder; struct amdgpu_encoder *amdgpu_encoder; + drm_connector_list_iter_begin(dev, &iter); /* walk the list and link encoders to connectors */ - list_for_each_entry(connector, &dev->mode_config.connector_list, head) { + drm_for_each_connector_iter(connector, &iter) { amdgpu_connector = to_amdgpu_connector(connector); list_for_each_entry(encoder, &dev->mode_config.encoder_list, head) { amdgpu_encoder = to_amdgpu_encoder(encoder); @@ -55,6 +57,7 @@ amdgpu_link_encoder_connector(struct drm_device *dev) } } } + drm_connector_list_iter_end(&iter); } void amdgpu_encoder_set_active_device(struct drm_encoder *encoder) @@ -62,8 +65,10 @@ void amdgpu_encoder_set_active_device(struct drm_encoder *encoder) struct drm_device *dev = encoder->dev; struct amdgpu_encoder *amdgpu_encoder = to_amdgpu_encoder(encoder); struct drm_connector *connector; + struct drm_connector_list_iter iter; - list_for_each_entry(connector, &dev->mode_config.connector_list, head) { + drm_connector_list_iter_begin(dev, &iter); + drm_for_each_connector_iter(connector, &iter) { if (connector->encoder == encoder) { struct amdgpu_connector *amdgpu_connector = to_amdgpu_connector(connector); amdgpu_encoder->active_device = amdgpu_encoder->devices & amdgpu_connector->devices; @@ -72,6 +77,7 @@ void amdgpu_encoder_set_active_device(struct drm_encoder *encoder) amdgpu_connector->devices, encoder->encoder_type); } } + drm_connector_list_iter_end(&iter); } struct drm_connector * @@ -79,15 +85,20 @@ amdgpu_get_connector_for_encoder(struct drm_encoder *encoder) { struct drm_device *dev = encoder->dev; struct amdgpu_encoder *amdgpu_encoder = to_amdgpu_encoder(encoder); - struct drm_connector *connector; + struct drm_connector *connector, *found = NULL; + struct drm_connector_list_iter iter; struct amdgpu_connector *amdgpu_connector; - list_for_each_entry(connector, &dev->mode_config.connector_list, head) { + drm_connector_list_iter_begin(dev, &iter); + drm_for_each_connector_iter(connector, &iter) { amdgpu_connector = to_amdgpu_connector(connector); - if (amdgpu_encoder->active_device & amdgpu_connector->devices) - return connector; + if (amdgpu_encoder->active_device & amdgpu_connector->devices) { + found = connector; + break; + } } - return NULL; + drm_connector_list_iter_end(&iter); + return found; } struct drm_connector * @@ -95,15 +106,20 @@ amdgpu_get_connector_for_encoder_init(struct drm_encoder *encoder) { struct drm_device *dev = encoder->dev; struct amdgpu_encoder *amdgpu_encoder = to_amdgpu_encoder(encoder); - struct drm_connector *connector; + struct drm_connector *connector, *found = NULL; + struct drm_connector_list_iter iter; struct amdgpu_connector *amdgpu_connector; - list_for_each_entry(connector, &dev->mode_config.connector_list, head) { + drm_connector_list_iter_begin(dev, &iter); + drm_for_each_connector_iter(connector, &iter) { amdgpu_connector = to_amdgpu_connector(connector); - if (amdgpu_encoder->devices & amdgpu_connector->devices) - return connector; + if (amdgpu_encoder->devices & amdgpu_connector->devices) { + found = connector; + break; + } } - return NULL; + drm_connector_list_iter_end(&iter); + return found; } struct drm_encoder *amdgpu_get_external_encoder(struct drm_encoder *encoder) diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_fence.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_fence.c index 23085b352cf2..377fe20bce23 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_fence.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_fence.c @@ -462,18 +462,7 @@ int amdgpu_fence_driver_init_ring(struct amdgpu_ring *ring, timeout = adev->gfx_timeout; break; case AMDGPU_RING_TYPE_COMPUTE: - /* - * For non-sriov case, no timeout enforce - * on compute ring by default. Unless user - * specifies a timeout for compute ring. - * - * For sriov case, always use the timeout - * as gfx ring - */ - if (!amdgpu_sriov_vf(ring->adev)) - timeout = adev->compute_timeout; - else - timeout = adev->gfx_timeout; + timeout = adev->compute_timeout; break; case AMDGPU_RING_TYPE_SDMA: timeout = adev->sdma_timeout; diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_gem.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_gem.c index 8ceb44925947..6cf8862ebba1 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_gem.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_gem.c @@ -527,13 +527,41 @@ static void amdgpu_gem_va_update_vm(struct amdgpu_device *adev, goto error; } - r = amdgpu_vm_update_directories(adev, vm); + r = amdgpu_vm_update_pdes(adev, vm, false); error: if (r && r != -ERESTARTSYS) DRM_ERROR("Couldn't update BO_VA (%d)\n", r); } +/** + * amdgpu_gem_va_map_flags - map GEM UAPI flags into hardware flags + * + * @adev: amdgpu_device pointer + * @flags: GEM UAPI flags + * + * Returns the GEM UAPI flags mapped into hardware for the ASIC. + */ +uint64_t amdgpu_gem_va_map_flags(struct amdgpu_device *adev, uint32_t flags) +{ + uint64_t pte_flag = 0; + + if (flags & AMDGPU_VM_PAGE_EXECUTABLE) + pte_flag |= AMDGPU_PTE_EXECUTABLE; + if (flags & AMDGPU_VM_PAGE_READABLE) + pte_flag |= AMDGPU_PTE_READABLE; + if (flags & AMDGPU_VM_PAGE_WRITEABLE) + pte_flag |= AMDGPU_PTE_WRITEABLE; + if (flags & AMDGPU_VM_PAGE_PRT) + pte_flag |= AMDGPU_PTE_PRT; + + if (adev->gmc.gmc_funcs->map_mtype) + pte_flag |= amdgpu_gmc_map_mtype(adev, + flags & AMDGPU_VM_MTYPE_MASK); + + return pte_flag; +} + int amdgpu_gem_va_ioctl(struct drm_device *dev, void *data, struct drm_file *filp) { @@ -631,7 +659,7 @@ int amdgpu_gem_va_ioctl(struct drm_device *dev, void *data, switch (args->operation) { case AMDGPU_VA_OP_MAP: - va_flags = amdgpu_gmc_get_pte_flags(adev, args->flags); + va_flags = amdgpu_gem_va_map_flags(adev, args->flags); r = amdgpu_vm_bo_map(adev, bo_va, args->va_address, args->offset_in_bo, args->map_size, va_flags); @@ -646,7 +674,7 @@ int amdgpu_gem_va_ioctl(struct drm_device *dev, void *data, args->map_size); break; case AMDGPU_VA_OP_REPLACE: - va_flags = amdgpu_gmc_get_pte_flags(adev, args->flags); + va_flags = amdgpu_gem_va_map_flags(adev, args->flags); r = amdgpu_vm_bo_replace_map(adev, bo_va, args->va_address, args->offset_in_bo, args->map_size, va_flags); diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_gem.h b/drivers/gpu/drm/amd/amdgpu/amdgpu_gem.h index 0b66d2e6b5d5..e0f025dd1b14 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_gem.h +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_gem.h @@ -67,6 +67,7 @@ int amdgpu_gem_mmap_ioctl(struct drm_device *dev, void *data, struct drm_file *filp); int amdgpu_gem_wait_idle_ioctl(struct drm_device *dev, void *data, struct drm_file *filp); +uint64_t amdgpu_gem_va_map_flags(struct amdgpu_device *adev, uint32_t flags); int amdgpu_gem_va_ioctl(struct drm_device *dev, void *data, struct drm_file *filp); int amdgpu_gem_op_ioctl(struct drm_device *dev, void *data, diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_gfx.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_gfx.c index f9bef3154b99..069515f57c2a 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_gfx.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_gfx.c @@ -26,6 +26,7 @@ #include "amdgpu.h" #include "amdgpu_gfx.h" #include "amdgpu_rlc.h" +#include "amdgpu_ras.h" /* delay 0.1 second to enable gfx off feature */ #define GFX_OFF_DELAY_ENABLE msecs_to_jiffies(100) @@ -231,12 +232,10 @@ void amdgpu_gfx_compute_queue_acquire(struct amdgpu_device *adev) void amdgpu_gfx_graphics_queue_acquire(struct amdgpu_device *adev) { - int i, queue, pipe, me; + int i, queue, me; for (i = 0; i < AMDGPU_MAX_GFX_QUEUES; ++i) { queue = i % adev->gfx.me.num_queue_per_pipe; - pipe = (i / adev->gfx.me.num_queue_per_pipe) - % adev->gfx.me.num_pipe_per_me; me = (i / adev->gfx.me.num_queue_per_pipe) / adev->gfx.me.num_pipe_per_me; @@ -569,3 +568,102 @@ void amdgpu_gfx_off_ctrl(struct amdgpu_device *adev, bool enable) mutex_unlock(&adev->gfx.gfx_off_mutex); } + +int amdgpu_gfx_ras_late_init(struct amdgpu_device *adev) +{ + int r; + struct ras_fs_if fs_info = { + .sysfs_name = "gfx_err_count", + .debugfs_name = "gfx_err_inject", + }; + struct ras_ih_if ih_info = { + .cb = amdgpu_gfx_process_ras_data_cb, + }; + + if (!adev->gfx.ras_if) { + adev->gfx.ras_if = kmalloc(sizeof(struct ras_common_if), GFP_KERNEL); + if (!adev->gfx.ras_if) + return -ENOMEM; + adev->gfx.ras_if->block = AMDGPU_RAS_BLOCK__GFX; + adev->gfx.ras_if->type = AMDGPU_RAS_ERROR__MULTI_UNCORRECTABLE; + adev->gfx.ras_if->sub_block_index = 0; + strcpy(adev->gfx.ras_if->name, "gfx"); + } + fs_info.head = ih_info.head = *adev->gfx.ras_if; + + r = amdgpu_ras_late_init(adev, adev->gfx.ras_if, + &fs_info, &ih_info); + if (r) + goto free; + + if (amdgpu_ras_is_supported(adev, adev->gfx.ras_if->block)) { + r = amdgpu_irq_get(adev, &adev->gfx.cp_ecc_error_irq, 0); + if (r) + goto late_fini; + } else { + /* free gfx ras_if if ras is not supported */ + r = 0; + goto free; + } + + return 0; +late_fini: + amdgpu_ras_late_fini(adev, adev->gfx.ras_if, &ih_info); +free: + kfree(adev->gfx.ras_if); + adev->gfx.ras_if = NULL; + return r; +} + +void amdgpu_gfx_ras_fini(struct amdgpu_device *adev) +{ + if (amdgpu_ras_is_supported(adev, AMDGPU_RAS_BLOCK__GFX) && + adev->gfx.ras_if) { + struct ras_common_if *ras_if = adev->gfx.ras_if; + struct ras_ih_if ih_info = { + .head = *ras_if, + .cb = amdgpu_gfx_process_ras_data_cb, + }; + + amdgpu_ras_late_fini(adev, ras_if, &ih_info); + kfree(ras_if); + } +} + +int amdgpu_gfx_process_ras_data_cb(struct amdgpu_device *adev, + void *err_data, + struct amdgpu_iv_entry *entry) +{ + /* TODO ue will trigger an interrupt. + * + * When “Full RAS” is enabled, the per-IP interrupt sources should + * be disabled and the driver should only look for the aggregated + * interrupt via sync flood + */ + if (!amdgpu_ras_is_supported(adev, AMDGPU_RAS_BLOCK__GFX)) { + kgd2kfd_set_sram_ecc_flag(adev->kfd.dev); + if (adev->gfx.funcs->query_ras_error_count) + adev->gfx.funcs->query_ras_error_count(adev, err_data); + amdgpu_ras_reset_gpu(adev, 0); + } + return AMDGPU_RAS_SUCCESS; +} + +int amdgpu_gfx_cp_ecc_error_irq(struct amdgpu_device *adev, + struct amdgpu_irq_src *source, + struct amdgpu_iv_entry *entry) +{ + struct ras_common_if *ras_if = adev->gfx.ras_if; + struct ras_dispatch_if ih_data = { + .entry = entry, + }; + + if (!ras_if) + return 0; + + ih_data.head = *ras_if; + + DRM_ERROR("CP ECC ERROR IRQ\n"); + amdgpu_ras_interrupt_dispatch(adev, &ih_data); + return 0; +} diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_gfx.h b/drivers/gpu/drm/amd/amdgpu/amdgpu_gfx.h index 6ee4021910e2..35eff9e6ce16 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_gfx.h +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_gfx.h @@ -201,28 +201,6 @@ struct amdgpu_gfx_funcs { int (*query_ras_error_count) (struct amdgpu_device *adev, void *ras_error_status); }; -struct amdgpu_ngg_buf { - struct amdgpu_bo *bo; - uint64_t gpu_addr; - uint32_t size; - uint32_t bo_size; -}; - -enum { - NGG_PRIM = 0, - NGG_POS, - NGG_CNTL, - NGG_PARAM, - NGG_BUF_MAX -}; - -struct amdgpu_ngg { - struct amdgpu_ngg_buf buf[NGG_BUF_MAX]; - uint32_t gds_reserve_addr; - uint32_t gds_reserve_size; - bool init; -}; - struct sq_work { struct work_struct work; unsigned ih_data; @@ -311,9 +289,6 @@ struct amdgpu_gfx { uint32_t grbm_soft_reset; uint32_t srbm_soft_reset; - /* NGG */ - struct amdgpu_ngg ngg; - /* gfx off */ bool gfx_off_state; /* true: enabled, false: disabled */ struct mutex gfx_off_mutex; @@ -384,5 +359,12 @@ void amdgpu_gfx_bit_to_me_queue(struct amdgpu_device *adev, int bit, bool amdgpu_gfx_is_me_queue_enabled(struct amdgpu_device *adev, int me, int pipe, int queue); void amdgpu_gfx_off_ctrl(struct amdgpu_device *adev, bool enable); - +int amdgpu_gfx_ras_late_init(struct amdgpu_device *adev); +void amdgpu_gfx_ras_fini(struct amdgpu_device *adev); +int amdgpu_gfx_process_ras_data_cb(struct amdgpu_device *adev, + void *err_data, + struct amdgpu_iv_entry *entry); +int amdgpu_gfx_cp_ecc_error_irq(struct amdgpu_device *adev, + struct amdgpu_irq_src *source, + struct amdgpu_iv_entry *entry); #endif diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_gmc.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_gmc.c index 5790db61fa2c..a12f33c0f5df 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_gmc.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_gmc.c @@ -27,6 +27,8 @@ #include <linux/io-64-nonatomic-lo-hi.h> #include "amdgpu.h" +#include "amdgpu_ras.h" +#include "amdgpu_xgmi.h" /** * amdgpu_gmc_get_pde_for_bo - get the PDE for a BO @@ -305,3 +307,29 @@ bool amdgpu_gmc_filter_faults(struct amdgpu_device *adev, uint64_t addr, gmc->fault_hash[hash].idx = gmc->last_fault++; return false; } + +int amdgpu_gmc_ras_late_init(struct amdgpu_device *adev) +{ + int r; + + if (adev->umc.funcs && adev->umc.funcs->ras_late_init) { + r = adev->umc.funcs->ras_late_init(adev); + if (r) + return r; + } + + if (adev->mmhub.funcs && adev->mmhub.funcs->ras_late_init) { + r = adev->mmhub.funcs->ras_late_init(adev); + if (r) + return r; + } + + return amdgpu_xgmi_ras_late_init(adev); +} + +void amdgpu_gmc_ras_fini(struct amdgpu_device *adev) +{ + amdgpu_umc_ras_fini(adev); + amdgpu_mmhub_ras_fini(adev); + amdgpu_xgmi_ras_fini(adev); +} diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_gmc.h b/drivers/gpu/drm/amd/amdgpu/amdgpu_gmc.h index b6e1d98ef01e..555d8e57fae9 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_gmc.h +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_gmc.h @@ -99,12 +99,15 @@ struct amdgpu_gmc_funcs { unsigned pasid); /* enable/disable PRT support */ void (*set_prt)(struct amdgpu_device *adev, bool enable); - /* set pte flags based per asic */ - uint64_t (*get_vm_pte_flags)(struct amdgpu_device *adev, - uint32_t flags); + /* map mtype to hardware flags */ + uint64_t (*map_mtype)(struct amdgpu_device *adev, uint32_t flags); /* get the pde for a given mc addr */ void (*get_vm_pde)(struct amdgpu_device *adev, int level, u64 *dst, u64 *flags); + /* get the pte flags to use for a BO VA mapping */ + void (*get_vm_pte)(struct amdgpu_device *adev, + struct amdgpu_bo_va_mapping *mapping, + uint64_t *flags); }; struct amdgpu_xgmi { @@ -120,6 +123,7 @@ struct amdgpu_xgmi { /* gpu list in the same hive */ struct list_head head; bool supported; + struct ras_common_if *ras_if; }; struct amdgpu_gmc { @@ -153,6 +157,7 @@ struct amdgpu_gmc { uint32_t fw_version; struct amdgpu_irq_src vm_fault; uint32_t vram_type; + uint8_t vram_vendor; uint32_t srbm_soft_reset; bool prt_warning; uint64_t stolen_size; @@ -177,15 +182,14 @@ struct amdgpu_gmc { struct amdgpu_xgmi xgmi; struct amdgpu_irq_src ecc_irq; - struct ras_common_if *umc_ras_if; - struct ras_common_if *mmhub_ras_if; }; #define amdgpu_gmc_flush_gpu_tlb(adev, vmid, vmhub, type) ((adev)->gmc.gmc_funcs->flush_gpu_tlb((adev), (vmid), (vmhub), (type))) #define amdgpu_gmc_emit_flush_gpu_tlb(r, vmid, addr) (r)->adev->gmc.gmc_funcs->emit_flush_gpu_tlb((r), (vmid), (addr)) #define amdgpu_gmc_emit_pasid_mapping(r, vmid, pasid) (r)->adev->gmc.gmc_funcs->emit_pasid_mapping((r), (vmid), (pasid)) +#define amdgpu_gmc_map_mtype(adev, flags) (adev)->gmc.gmc_funcs->map_mtype((adev),(flags)) #define amdgpu_gmc_get_vm_pde(adev, level, dst, flags) (adev)->gmc.gmc_funcs->get_vm_pde((adev), (level), (dst), (flags)) -#define amdgpu_gmc_get_pte_flags(adev, flags) (adev)->gmc.gmc_funcs->get_vm_pte_flags((adev),(flags)) +#define amdgpu_gmc_get_vm_pte(adev, mapping, flags) (adev)->gmc.gmc_funcs->get_vm_pte((adev), (mapping), (flags)) /** * amdgpu_gmc_vram_full_visible - Check if full VRAM is visible through the BAR @@ -230,5 +234,7 @@ void amdgpu_gmc_agp_location(struct amdgpu_device *adev, struct amdgpu_gmc *mc); bool amdgpu_gmc_filter_faults(struct amdgpu_device *adev, uint64_t addr, uint16_t pasid, uint64_t timestamp); +int amdgpu_gmc_ras_late_init(struct amdgpu_device *adev); +void amdgpu_gmc_ras_fini(struct amdgpu_device *adev); #endif diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_ids.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_ids.c index 53734da1c2df..6f9289735e31 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_ids.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_ids.c @@ -282,7 +282,7 @@ static int amdgpu_vmid_grab_reserved(struct amdgpu_vm *vm, !dma_fence_is_later(updates, (*id)->flushed_updates)) updates = NULL; - if ((*id)->owner != vm->entity.fence_context || + if ((*id)->owner != vm->direct.fence_context || job->vm_pd_addr != (*id)->pd_gpu_addr || updates || !(*id)->last_flush || ((*id)->last_flush->context != fence_context && @@ -349,7 +349,7 @@ static int amdgpu_vmid_grab_used(struct amdgpu_vm *vm, struct dma_fence *flushed; /* Check all the prerequisites to using this VMID */ - if ((*id)->owner != vm->entity.fence_context) + if ((*id)->owner != vm->direct.fence_context) continue; if ((*id)->pd_gpu_addr != job->vm_pd_addr) @@ -449,7 +449,7 @@ int amdgpu_vmid_grab(struct amdgpu_vm *vm, struct amdgpu_ring *ring, } id->pd_gpu_addr = job->vm_pd_addr; - id->owner = vm->entity.fence_context; + id->owner = vm->direct.fence_context; if (job->vm_needs_flush) { dma_fence_put(id->last_flush); diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_irq.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_irq.c index 2a3f5ec298db..6f3b03f6224f 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_irq.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_irq.c @@ -87,10 +87,13 @@ static void amdgpu_hotplug_work_func(struct work_struct *work) struct drm_device *dev = adev->ddev; struct drm_mode_config *mode_config = &dev->mode_config; struct drm_connector *connector; + struct drm_connector_list_iter iter; mutex_lock(&mode_config->mutex); - list_for_each_entry(connector, &mode_config->connector_list, head) + drm_connector_list_iter_begin(dev, &iter); + drm_for_each_connector_iter(connector, &iter) amdgpu_connector_hotplug(connector); + drm_connector_list_iter_end(&iter); mutex_unlock(&mode_config->mutex); /* Just fire off a uevent and let userspace tell us what to do */ drm_helper_hpd_irq_event(dev); @@ -153,6 +156,20 @@ irqreturn_t amdgpu_irq_handler(int irq, void *arg) ret = amdgpu_ih_process(adev, &adev->irq.ih); if (ret == IRQ_HANDLED) pm_runtime_mark_last_busy(dev->dev); + + /* For the hardware that cannot enable bif ring for both ras_controller_irq + * and ras_err_evnet_athub_irq ih cookies, the driver has to poll status + * register to check whether the interrupt is triggered or not, and properly + * ack the interrupt if it is there + */ + if (adev->nbio.funcs && + adev->nbio.funcs->handle_ras_controller_intr_no_bifring) + adev->nbio.funcs->handle_ras_controller_intr_no_bifring(adev); + + if (adev->nbio.funcs && + adev->nbio.funcs->handle_ras_err_event_athub_intr_no_bifring) + adev->nbio.funcs->handle_ras_err_event_athub_intr_no_bifring(adev); + return ret; } @@ -228,10 +245,19 @@ int amdgpu_irq_init(struct amdgpu_device *adev) adev->irq.msi_enabled = false; if (amdgpu_msi_ok(adev)) { - int ret = pci_enable_msi(adev->pdev); - if (!ret) { + int nvec = pci_msix_vec_count(adev->pdev); + unsigned int flags; + + if (nvec <= 0) { + flags = PCI_IRQ_MSI; + } else { + flags = PCI_IRQ_MSI | PCI_IRQ_MSIX; + } + /* we only need one vector */ + nvec = pci_alloc_irq_vectors(adev->pdev, 1, 1, flags); + if (nvec > 0) { adev->irq.msi_enabled = true; - dev_dbg(adev->dev, "amdgpu: using MSI.\n"); + dev_dbg(adev->dev, "amdgpu: using MSI/MSI-X.\n"); } } @@ -254,7 +280,8 @@ int amdgpu_irq_init(struct amdgpu_device *adev) INIT_WORK(&adev->irq.ih2_work, amdgpu_irq_handle_ih2); adev->irq.installed = true; - r = drm_irq_install(adev->ddev, adev->ddev->pdev->irq); + /* Use vector 0 for MSI-X */ + r = drm_irq_install(adev->ddev, pci_irq_vector(adev->pdev, 0)); if (r) { adev->irq.installed = false; if (!amdgpu_device_has_dc_support(adev)) @@ -369,7 +396,7 @@ int amdgpu_irq_add_id(struct amdgpu_device *adev, * amdgpu_irq_dispatch - dispatch IRQ to IP blocks * * @adev: amdgpu device pointer - * @entry: interrupt vector pointer + * @ih: interrupt ring instance * * Dispatches IRQ to IP blocks. */ diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_job.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_job.c index 9d76e0923a5a..e1bad992e83b 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_job.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_job.c @@ -246,6 +246,44 @@ static struct dma_fence *amdgpu_job_run(struct drm_sched_job *sched_job) return fence; } +#define to_drm_sched_job(sched_job) \ + container_of((sched_job), struct drm_sched_job, queue_node) + +void amdgpu_job_stop_all_jobs_on_sched(struct drm_gpu_scheduler *sched) +{ + struct drm_sched_job *s_job; + struct drm_sched_entity *s_entity = NULL; + int i; + + /* Signal all jobs not yet scheduled */ + for (i = DRM_SCHED_PRIORITY_MAX - 1; i >= DRM_SCHED_PRIORITY_MIN; i--) { + struct drm_sched_rq *rq = &sched->sched_rq[i]; + + if (!rq) + continue; + + spin_lock(&rq->lock); + list_for_each_entry(s_entity, &rq->entities, list) { + while ((s_job = to_drm_sched_job(spsc_queue_pop(&s_entity->job_queue)))) { + struct drm_sched_fence *s_fence = s_job->s_fence; + + dma_fence_signal(&s_fence->scheduled); + dma_fence_set_error(&s_fence->finished, -EHWPOISON); + dma_fence_signal(&s_fence->finished); + } + } + spin_unlock(&rq->lock); + } + + /* Signal all jobs already scheduled to HW */ + list_for_each_entry(s_job, &sched->ring_mirror_list, node) { + struct drm_sched_fence *s_fence = s_job->s_fence; + + dma_fence_set_error(&s_fence->finished, -EHWPOISON); + dma_fence_signal(&s_fence->finished); + } +} + const struct drm_sched_backend_ops amdgpu_sched_ops = { .dependency = amdgpu_job_dependency, .run_job = amdgpu_job_run, diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_job.h b/drivers/gpu/drm/amd/amdgpu/amdgpu_job.h index 51e62504c279..dc7ee9358dcd 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_job.h +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_job.h @@ -76,4 +76,7 @@ int amdgpu_job_submit(struct amdgpu_job *job, struct drm_sched_entity *entity, void *owner, struct dma_fence **f); int amdgpu_job_submit_direct(struct amdgpu_job *job, struct amdgpu_ring *ring, struct dma_fence **fence); + +void amdgpu_job_stop_all_jobs_on_sched(struct drm_gpu_scheduler *sched); + #endif diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_kms.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_kms.c index d55f5baa83d3..137a8573d556 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_kms.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_kms.c @@ -584,9 +584,12 @@ static int amdgpu_info_ioctl(struct drm_device *dev, void *data, struct drm_file struct drm_amdgpu_info_vram_gtt vram_gtt; vram_gtt.vram_size = adev->gmc.real_vram_size - - atomic64_read(&adev->vram_pin_size); - vram_gtt.vram_cpu_accessible_size = adev->gmc.visible_vram_size - - atomic64_read(&adev->visible_pin_size); + atomic64_read(&adev->vram_pin_size) - + AMDGPU_VM_RESERVED_VRAM; + vram_gtt.vram_cpu_accessible_size = + min(adev->gmc.visible_vram_size - + atomic64_read(&adev->visible_pin_size), + vram_gtt.vram_size); vram_gtt.gtt_size = adev->mman.bdev.man[TTM_PL_TT].size; vram_gtt.gtt_size *= PAGE_SIZE; vram_gtt.gtt_size -= atomic64_read(&adev->gart_pin_size); @@ -599,15 +602,18 @@ static int amdgpu_info_ioctl(struct drm_device *dev, void *data, struct drm_file memset(&mem, 0, sizeof(mem)); mem.vram.total_heap_size = adev->gmc.real_vram_size; mem.vram.usable_heap_size = adev->gmc.real_vram_size - - atomic64_read(&adev->vram_pin_size); + atomic64_read(&adev->vram_pin_size) - + AMDGPU_VM_RESERVED_VRAM; mem.vram.heap_usage = amdgpu_vram_mgr_usage(&adev->mman.bdev.man[TTM_PL_VRAM]); mem.vram.max_allocation = mem.vram.usable_heap_size * 3 / 4; mem.cpu_accessible_vram.total_heap_size = adev->gmc.visible_vram_size; - mem.cpu_accessible_vram.usable_heap_size = adev->gmc.visible_vram_size - - atomic64_read(&adev->visible_pin_size); + mem.cpu_accessible_vram.usable_heap_size = + min(adev->gmc.visible_vram_size - + atomic64_read(&adev->visible_pin_size), + mem.vram.usable_heap_size); mem.cpu_accessible_vram.heap_usage = amdgpu_vram_mgr_vis_usage(&adev->mman.bdev.man[TTM_PL_VRAM]); mem.cpu_accessible_vram.max_allocation = @@ -729,17 +735,6 @@ static int amdgpu_info_ioctl(struct drm_device *dev, void *data, struct drm_file dev_info.vce_harvest_config = adev->vce.harvest_config; dev_info.gc_double_offchip_lds_buf = adev->gfx.config.double_offchip_lds_buf; - - if (amdgpu_ngg) { - dev_info.prim_buf_gpu_addr = adev->gfx.ngg.buf[NGG_PRIM].gpu_addr; - dev_info.prim_buf_size = adev->gfx.ngg.buf[NGG_PRIM].size; - dev_info.pos_buf_gpu_addr = adev->gfx.ngg.buf[NGG_POS].gpu_addr; - dev_info.pos_buf_size = adev->gfx.ngg.buf[NGG_POS].size; - dev_info.cntl_sb_buf_gpu_addr = adev->gfx.ngg.buf[NGG_CNTL].gpu_addr; - dev_info.cntl_sb_buf_size = adev->gfx.ngg.buf[NGG_CNTL].size; - dev_info.param_buf_gpu_addr = adev->gfx.ngg.buf[NGG_PARAM].gpu_addr; - dev_info.param_buf_size = adev->gfx.ngg.buf[NGG_PARAM].size; - } dev_info.wave_front_size = adev->gfx.cu_info.wave_front_size; dev_info.num_shader_visible_vgprs = adev->gfx.config.max_gprs; dev_info.num_cu_per_sh = adev->gfx.config.max_cu_per_sh; @@ -968,6 +963,12 @@ int amdgpu_driver_open_kms(struct drm_device *dev, struct drm_file *file_priv) /* Ensure IB tests are run on ring */ flush_delayed_work(&adev->delayed_init_work); + + if (amdgpu_ras_intr_triggered()) { + DRM_ERROR("RAS Intr triggered, device disabled!!"); + return -EHWPOISON; + } + file_priv->driver_priv = NULL; r = pm_runtime_get_sync(dev->dev); diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_mmhub.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_mmhub.c new file mode 100644 index 000000000000..676c48c02d77 --- /dev/null +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_mmhub.c @@ -0,0 +1,70 @@ +/* + * Copyright 2019 Advanced Micro Devices, Inc. + * + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and associated documentation files (the "Software"), + * to deal in the Software without restriction, including without limitation + * the rights to use, copy, modify, merge, publish, distribute, sublicense, + * and/or sell copies of the Software, and to permit persons to whom the + * Software is furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included in + * all copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL + * THE COPYRIGHT HOLDER(S) OR AUTHOR(S) BE LIABLE FOR ANY CLAIM, DAMAGES OR + * OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, + * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR + * OTHER DEALINGS IN THE SOFTWARE. + * + */ + +#include "amdgpu.h" +#include "amdgpu_ras.h" + +int amdgpu_mmhub_ras_late_init(struct amdgpu_device *adev) +{ + int r; + struct ras_ih_if ih_info = { + .cb = NULL, + }; + struct ras_fs_if fs_info = { + .sysfs_name = "mmhub_err_count", + .debugfs_name = "mmhub_err_inject", + }; + + if (!adev->mmhub.ras_if) { + adev->mmhub.ras_if = kmalloc(sizeof(struct ras_common_if), GFP_KERNEL); + if (!adev->mmhub.ras_if) + return -ENOMEM; + adev->mmhub.ras_if->block = AMDGPU_RAS_BLOCK__MMHUB; + adev->mmhub.ras_if->type = AMDGPU_RAS_ERROR__MULTI_UNCORRECTABLE; + adev->mmhub.ras_if->sub_block_index = 0; + strcpy(adev->mmhub.ras_if->name, "mmhub"); + } + ih_info.head = fs_info.head = *adev->mmhub.ras_if; + r = amdgpu_ras_late_init(adev, adev->mmhub.ras_if, + &fs_info, &ih_info); + if (r || !amdgpu_ras_is_supported(adev, adev->mmhub.ras_if->block)) { + kfree(adev->mmhub.ras_if); + adev->mmhub.ras_if = NULL; + } + + return r; +} + +void amdgpu_mmhub_ras_fini(struct amdgpu_device *adev) +{ + if (amdgpu_ras_is_supported(adev, AMDGPU_RAS_BLOCK__MMHUB) && + adev->mmhub.ras_if) { + struct ras_common_if *ras_if = adev->mmhub.ras_if; + struct ras_ih_if ih_info = { + .cb = NULL, + }; + + amdgpu_ras_late_fini(adev, ras_if, &ih_info); + kfree(ras_if); + } +} diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_mmhub.h b/drivers/gpu/drm/amd/amdgpu/amdgpu_mmhub.h index 2d75ecfa199b..1cd78940cf82 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_mmhub.h +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_mmhub.h @@ -23,9 +23,17 @@ struct amdgpu_mmhub_funcs { void (*ras_init)(struct amdgpu_device *adev); + int (*ras_late_init)(struct amdgpu_device *adev); void (*query_ras_error_count)(struct amdgpu_device *adev, void *ras_error_status); }; +struct amdgpu_mmhub { + struct ras_common_if *ras_if; + const struct amdgpu_mmhub_funcs *funcs; +}; + +int amdgpu_mmhub_ras_late_init(struct amdgpu_device *adev); +void amdgpu_mmhub_ras_fini(struct amdgpu_device *adev); #endif diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_mn.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_mn.c index 31d4deb5d294..392300f77b13 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_mn.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_mn.c @@ -136,6 +136,7 @@ void amdgpu_mn_unlock(struct amdgpu_mn *mn) * amdgpu_mn_read_lock - take the read side lock for this notifier * * @amn: our notifier + * @blockable: is the notifier blockable */ static int amdgpu_mn_read_lock(struct amdgpu_mn *amn, bool blockable) { diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_nbio.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_nbio.c new file mode 100644 index 000000000000..7d5c3a9de9ea --- /dev/null +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_nbio.c @@ -0,0 +1,84 @@ +/* + * Copyright (C) 2019 Advanced Micro Devices, Inc. + * + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and associated documentation files (the "Software"), + * to deal in the Software without restriction, including without limitation + * the rights to use, copy, modify, merge, publish, distribute, sublicense, + * and/or sell copies of the Software, and to permit persons to whom the + * Software is furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included + * in all copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS + * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL + * THE COPYRIGHT HOLDER(S) BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN + * AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN + * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. + */ + +#include "amdgpu.h" +#include "amdgpu_ras.h" + +int amdgpu_nbio_ras_late_init(struct amdgpu_device *adev) +{ + int r; + struct ras_ih_if ih_info = { + .cb = NULL, + }; + struct ras_fs_if fs_info = { + .sysfs_name = "pcie_bif_err_count", + .debugfs_name = "pcie_bif_err_inject", + }; + + if (!adev->nbio.ras_if) { + adev->nbio.ras_if = kmalloc(sizeof(struct ras_common_if), GFP_KERNEL); + if (!adev->nbio.ras_if) + return -ENOMEM; + adev->nbio.ras_if->block = AMDGPU_RAS_BLOCK__PCIE_BIF; + adev->nbio.ras_if->type = AMDGPU_RAS_ERROR__MULTI_UNCORRECTABLE; + adev->nbio.ras_if->sub_block_index = 0; + strcpy(adev->nbio.ras_if->name, "pcie_bif"); + } + ih_info.head = fs_info.head = *adev->nbio.ras_if; + r = amdgpu_ras_late_init(adev, adev->nbio.ras_if, + &fs_info, &ih_info); + if (r) + goto free; + + if (amdgpu_ras_is_supported(adev, adev->nbio.ras_if->block)) { + r = amdgpu_irq_get(adev, &adev->nbio.ras_controller_irq, 0); + if (r) + goto late_fini; + r = amdgpu_irq_get(adev, &adev->nbio.ras_err_event_athub_irq, 0); + if (r) + goto late_fini; + } else { + r = 0; + goto free; + } + + return 0; +late_fini: + amdgpu_ras_late_fini(adev, adev->nbio.ras_if, &ih_info); +free: + kfree(adev->nbio.ras_if); + adev->nbio.ras_if = NULL; + return r; +} + +void amdgpu_nbio_ras_fini(struct amdgpu_device *adev) +{ + if (amdgpu_ras_is_supported(adev, AMDGPU_RAS_BLOCK__PCIE_BIF) && + adev->nbio.ras_if) { + struct ras_common_if *ras_if = adev->nbio.ras_if; + struct ras_ih_if ih_info = { + .cb = NULL, + }; + + amdgpu_ras_late_fini(adev, ras_if, &ih_info); + kfree(ras_if); + } +} diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_nbio.h b/drivers/gpu/drm/amd/amdgpu/amdgpu_nbio.h new file mode 100644 index 000000000000..1f26a17e6561 --- /dev/null +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_nbio.h @@ -0,0 +1,99 @@ +/* + * Copyright 2019 Advanced Micro Devices, Inc. + * + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and associated documentation files (the "Software"), + * to deal in the Software without restriction, including without limitation + * the rights to use, copy, modify, merge, publish, distribute, sublicense, + * and/or sell copies of the Software, and to permit persons to whom the + * Software is furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included in + * all copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL + * THE COPYRIGHT HOLDER(S) OR AUTHOR(S) BE LIABLE FOR ANY CLAIM, DAMAGES OR + * OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, + * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR + * OTHER DEALINGS IN THE SOFTWARE. + * + */ +#ifndef __AMDGPU_NBIO_H__ +#define __AMDGPU_NBIO_H__ + +/* + * amdgpu nbio functions + */ +struct nbio_hdp_flush_reg { + u32 ref_and_mask_cp0; + u32 ref_and_mask_cp1; + u32 ref_and_mask_cp2; + u32 ref_and_mask_cp3; + u32 ref_and_mask_cp4; + u32 ref_and_mask_cp5; + u32 ref_and_mask_cp6; + u32 ref_and_mask_cp7; + u32 ref_and_mask_cp8; + u32 ref_and_mask_cp9; + u32 ref_and_mask_sdma0; + u32 ref_and_mask_sdma1; + u32 ref_and_mask_sdma2; + u32 ref_and_mask_sdma3; + u32 ref_and_mask_sdma4; + u32 ref_and_mask_sdma5; + u32 ref_and_mask_sdma6; + u32 ref_and_mask_sdma7; +}; + +struct amdgpu_nbio_funcs { + const struct nbio_hdp_flush_reg *hdp_flush_reg; + u32 (*get_hdp_flush_req_offset)(struct amdgpu_device *adev); + u32 (*get_hdp_flush_done_offset)(struct amdgpu_device *adev); + u32 (*get_pcie_index_offset)(struct amdgpu_device *adev); + u32 (*get_pcie_data_offset)(struct amdgpu_device *adev); + u32 (*get_rev_id)(struct amdgpu_device *adev); + void (*mc_access_enable)(struct amdgpu_device *adev, bool enable); + void (*hdp_flush)(struct amdgpu_device *adev, struct amdgpu_ring *ring); + u32 (*get_memsize)(struct amdgpu_device *adev); + void (*sdma_doorbell_range)(struct amdgpu_device *adev, int instance, + bool use_doorbell, int doorbell_index, int doorbell_size); + void (*vcn_doorbell_range)(struct amdgpu_device *adev, bool use_doorbell, + int doorbell_index, int instance); + void (*enable_doorbell_aperture)(struct amdgpu_device *adev, + bool enable); + void (*enable_doorbell_selfring_aperture)(struct amdgpu_device *adev, + bool enable); + void (*ih_doorbell_range)(struct amdgpu_device *adev, + bool use_doorbell, int doorbell_index); + void (*update_medium_grain_clock_gating)(struct amdgpu_device *adev, + bool enable); + void (*update_medium_grain_light_sleep)(struct amdgpu_device *adev, + bool enable); + void (*get_clockgating_state)(struct amdgpu_device *adev, + u32 *flags); + void (*ih_control)(struct amdgpu_device *adev); + void (*init_registers)(struct amdgpu_device *adev); + void (*detect_hw_virt)(struct amdgpu_device *adev); + void (*remap_hdp_registers)(struct amdgpu_device *adev); + void (*handle_ras_controller_intr_no_bifring)(struct amdgpu_device *adev); + void (*handle_ras_err_event_athub_intr_no_bifring)(struct amdgpu_device *adev); + int (*init_ras_controller_interrupt)(struct amdgpu_device *adev); + int (*init_ras_err_event_athub_interrupt)(struct amdgpu_device *adev); + void (*query_ras_error_count)(struct amdgpu_device *adev, + void *ras_error_status); + int (*ras_late_init)(struct amdgpu_device *adev); +}; + +struct amdgpu_nbio { + const struct nbio_hdp_flush_reg *hdp_flush_reg; + struct amdgpu_irq_src ras_controller_irq; + struct amdgpu_irq_src ras_err_event_athub_irq; + struct ras_common_if *ras_if; + const struct amdgpu_nbio_funcs *funcs; +}; + +int amdgpu_nbio_ras_late_init(struct amdgpu_device *adev); +void amdgpu_nbio_ras_fini(struct amdgpu_device *adev); +#endif diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_object.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_object.c index 1fead0e8b890..f10b6175e20c 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_object.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_object.c @@ -343,6 +343,70 @@ int amdgpu_bo_create_kernel(struct amdgpu_device *adev, } /** + * amdgpu_bo_create_kernel_at - create BO for kernel use at specific location + * + * @adev: amdgpu device object + * @offset: offset of the BO + * @size: size of the BO + * @domain: where to place it + * @bo_ptr: used to initialize BOs in structures + * @cpu_addr: optional CPU address mapping + * + * Creates a kernel BO at a specific offset in the address space of the domain. + * + * Returns: + * 0 on success, negative error code otherwise. + */ +int amdgpu_bo_create_kernel_at(struct amdgpu_device *adev, + uint64_t offset, uint64_t size, uint32_t domain, + struct amdgpu_bo **bo_ptr, void **cpu_addr) +{ + struct ttm_operation_ctx ctx = { false, false }; + unsigned int i; + int r; + + offset &= PAGE_MASK; + size = ALIGN(size, PAGE_SIZE); + + r = amdgpu_bo_create_reserved(adev, size, PAGE_SIZE, domain, bo_ptr, + NULL, cpu_addr); + if (r) + return r; + + /* + * Remove the original mem node and create a new one at the request + * position. + */ + if (cpu_addr) + amdgpu_bo_kunmap(*bo_ptr); + + ttm_bo_mem_put(&(*bo_ptr)->tbo, &(*bo_ptr)->tbo.mem); + + for (i = 0; i < (*bo_ptr)->placement.num_placement; ++i) { + (*bo_ptr)->placements[i].fpfn = offset >> PAGE_SHIFT; + (*bo_ptr)->placements[i].lpfn = (offset + size) >> PAGE_SHIFT; + } + r = ttm_bo_mem_space(&(*bo_ptr)->tbo, &(*bo_ptr)->placement, + &(*bo_ptr)->tbo.mem, &ctx); + if (r) + goto error; + + if (cpu_addr) { + r = amdgpu_bo_kmap(*bo_ptr, cpu_addr); + if (r) + goto error; + } + + amdgpu_bo_unreserve(*bo_ptr); + return 0; + +error: + amdgpu_bo_unreserve(*bo_ptr); + amdgpu_bo_unref(bo_ptr); + return r; +} + +/** * amdgpu_bo_free_kernel - free BO for kernel use * * @bo: amdgpu BO to free @@ -451,7 +515,7 @@ static int amdgpu_bo_do_create(struct amdgpu_device *adev, { struct ttm_operation_ctx ctx = { .interruptible = (bp->type != ttm_bo_type_kernel), - .no_wait_gpu = false, + .no_wait_gpu = bp->no_wait_gpu, .resv = bp->resv, .flags = TTM_OPT_FLAG_ALLOW_RES_EVICT }; diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_object.h b/drivers/gpu/drm/amd/amdgpu/amdgpu_object.h index 658f4c9779b7..7e99f6c58c48 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_object.h +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_object.h @@ -41,6 +41,7 @@ struct amdgpu_bo_param { u32 preferred_domain; u64 flags; enum ttm_bo_type type; + bool no_wait_gpu; struct dma_resv *resv; }; @@ -237,6 +238,9 @@ int amdgpu_bo_create_kernel(struct amdgpu_device *adev, unsigned long size, int align, u32 domain, struct amdgpu_bo **bo_ptr, u64 *gpu_addr, void **cpu_addr); +int amdgpu_bo_create_kernel_at(struct amdgpu_device *adev, + uint64_t offset, uint64_t size, uint32_t domain, + struct amdgpu_bo **bo_ptr, void **cpu_addr); void amdgpu_bo_free_kernel(struct amdgpu_bo **bo, u64 *gpu_addr, void **cpu_addr); int amdgpu_bo_kmap(struct amdgpu_bo *bo, void **ptr); diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_pm.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_pm.c index 03930313c263..d63866164496 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_pm.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_pm.c @@ -805,8 +805,7 @@ static ssize_t amdgpu_get_pp_feature_status(struct device *dev, } /** - * DOC: pp_dpm_sclk pp_dpm_mclk pp_dpm_socclk pp_dpm_fclk pp_dpm_dcefclk - * pp_dpm_pcie + * DOC: pp_dpm_sclk pp_dpm_mclk pp_dpm_socclk pp_dpm_fclk pp_dpm_dcefclk pp_dpm_pcie * * The amdgpu driver provides a sysfs API for adjusting what power levels * are enabled for a given power state. The files pp_dpm_sclk, pp_dpm_mclk, @@ -822,9 +821,15 @@ static ssize_t amdgpu_get_pp_feature_status(struct device *dev, * * To manually adjust these states, first select manual using * power_dpm_force_performance_level. - * Secondly,Enter a new value for each level by inputing a string that + * Secondly, enter a new value for each level by inputing a string that * contains " echo xx xx xx > pp_dpm_sclk/mclk/pcie" - * E.g., echo 4 5 6 to > pp_dpm_sclk will enable sclk levels 4, 5, and 6. + * E.g., + * + * .. code-block:: bash + * + * echo "4 5 6" > pp_dpm_sclk + * + * will enable sclk levels 4, 5, and 6. * * NOTE: change to the dcefclk max dpm level is not supported now */ @@ -2196,9 +2201,9 @@ static ssize_t amdgpu_hwmon_show_mclk_label(struct device *dev, * * - fan1_input: fan speed in RPM * - * - fan[1-*]_target: Desired fan speed Unit: revolution/min (RPM) + * - fan[1-\*]_target: Desired fan speed Unit: revolution/min (RPM) * - * - fan[1-*]_enable: Enable or disable the sensors.1: Enable 0: Disable + * - fan[1-\*]_enable: Enable or disable the sensors.1: Enable 0: Disable * * hwmon interfaces for GPU clocks: * diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_psp.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_psp.c index 4d71537a960d..37ffed5e2171 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_psp.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_psp.c @@ -151,10 +151,12 @@ psp_cmd_submit_buf(struct psp_context *psp, return ret; } + amdgpu_asic_invalidate_hdp(psp->adev, NULL); while (*((unsigned int *)psp->fence_buf) != index) { if (--timeout == 0) break; msleep(1); + amdgpu_asic_invalidate_hdp(psp->adev, NULL); } /* In some cases, psp response status is not 0 even there is no @@ -168,8 +170,9 @@ psp_cmd_submit_buf(struct psp_context *psp, if (ucode) DRM_WARN("failed to load ucode id (%d) ", ucode->ucode_id); - DRM_WARN("psp command failed and response status is (0x%X)\n", - psp->cmd_buf_mem->resp.status & GFX_CMD_STATUS_MASK); + DRM_DEBUG_DRIVER("psp command (0x%X) failed and response status is (0x%X)\n", + psp->cmd_buf_mem->cmd_id, + psp->cmd_buf_mem->resp.status & GFX_CMD_STATUS_MASK); if (!timeout) { mutex_unlock(&psp->mutex); return -EINVAL; @@ -253,7 +256,8 @@ static int psp_tmr_init(struct psp_context *psp) /* For ASICs support RLC autoload, psp will parse the toc * and calculate the total size of TMR needed */ - if (psp->toc_start_addr && + if (!amdgpu_sriov_vf(psp->adev) && + psp->toc_start_addr && psp->toc_bin_size && psp->fw_pri_buf) { ret = psp_load_toc(psp, &tmr_size); @@ -287,15 +291,9 @@ static int psp_tmr_load(struct psp_context *psp) ret = psp_cmd_submit_buf(psp, NULL, cmd, psp->fence_buf_mc_addr); - if (ret) - goto failed; kfree(cmd); - return 0; - -failed: - kfree(cmd); return ret; } @@ -772,6 +770,324 @@ static int psp_ras_initialize(struct psp_context *psp) } // ras end +// HDCP start +static void psp_prep_hdcp_ta_load_cmd_buf(struct psp_gfx_cmd_resp *cmd, + uint64_t hdcp_ta_mc, + uint64_t hdcp_mc_shared, + uint32_t hdcp_ta_size, + uint32_t shared_size) +{ + cmd->cmd_id = GFX_CMD_ID_LOAD_TA; + cmd->cmd.cmd_load_ta.app_phy_addr_lo = lower_32_bits(hdcp_ta_mc); + cmd->cmd.cmd_load_ta.app_phy_addr_hi = upper_32_bits(hdcp_ta_mc); + cmd->cmd.cmd_load_ta.app_len = hdcp_ta_size; + + cmd->cmd.cmd_load_ta.cmd_buf_phy_addr_lo = + lower_32_bits(hdcp_mc_shared); + cmd->cmd.cmd_load_ta.cmd_buf_phy_addr_hi = + upper_32_bits(hdcp_mc_shared); + cmd->cmd.cmd_load_ta.cmd_buf_len = shared_size; +} + +static int psp_hdcp_init_shared_buf(struct psp_context *psp) +{ + int ret; + + /* + * Allocate 16k memory aligned to 4k from Frame Buffer (local + * physical) for hdcp ta <-> Driver + */ + ret = amdgpu_bo_create_kernel(psp->adev, PSP_HDCP_SHARED_MEM_SIZE, + PAGE_SIZE, AMDGPU_GEM_DOMAIN_VRAM, + &psp->hdcp_context.hdcp_shared_bo, + &psp->hdcp_context.hdcp_shared_mc_addr, + &psp->hdcp_context.hdcp_shared_buf); + + return ret; +} + +static int psp_hdcp_load(struct psp_context *psp) +{ + int ret; + struct psp_gfx_cmd_resp *cmd; + + /* + * TODO: bypass the loading in sriov for now + */ + if (amdgpu_sriov_vf(psp->adev)) + return 0; + + cmd = kzalloc(sizeof(struct psp_gfx_cmd_resp), GFP_KERNEL); + if (!cmd) + return -ENOMEM; + + memset(psp->fw_pri_buf, 0, PSP_1_MEG); + memcpy(psp->fw_pri_buf, psp->ta_hdcp_start_addr, + psp->ta_hdcp_ucode_size); + + psp_prep_hdcp_ta_load_cmd_buf(cmd, psp->fw_pri_mc_addr, + psp->hdcp_context.hdcp_shared_mc_addr, + psp->ta_hdcp_ucode_size, + PSP_HDCP_SHARED_MEM_SIZE); + + ret = psp_cmd_submit_buf(psp, NULL, cmd, psp->fence_buf_mc_addr); + + if (!ret) { + psp->hdcp_context.hdcp_initialized = 1; + psp->hdcp_context.session_id = cmd->resp.session_id; + } + + kfree(cmd); + + return ret; +} +static int psp_hdcp_initialize(struct psp_context *psp) +{ + int ret; + + if (!psp->hdcp_context.hdcp_initialized) { + ret = psp_hdcp_init_shared_buf(psp); + if (ret) + return ret; + } + + ret = psp_hdcp_load(psp); + if (ret) + return ret; + + return 0; +} +static void psp_prep_hdcp_ta_unload_cmd_buf(struct psp_gfx_cmd_resp *cmd, + uint32_t hdcp_session_id) +{ + cmd->cmd_id = GFX_CMD_ID_UNLOAD_TA; + cmd->cmd.cmd_unload_ta.session_id = hdcp_session_id; +} + +static int psp_hdcp_unload(struct psp_context *psp) +{ + int ret; + struct psp_gfx_cmd_resp *cmd; + + /* + * TODO: bypass the unloading in sriov for now + */ + if (amdgpu_sriov_vf(psp->adev)) + return 0; + + cmd = kzalloc(sizeof(struct psp_gfx_cmd_resp), GFP_KERNEL); + if (!cmd) + return -ENOMEM; + + psp_prep_hdcp_ta_unload_cmd_buf(cmd, psp->hdcp_context.session_id); + + ret = psp_cmd_submit_buf(psp, NULL, cmd, psp->fence_buf_mc_addr); + + kfree(cmd); + + return ret; +} + +static void psp_prep_hdcp_ta_invoke_cmd_buf(struct psp_gfx_cmd_resp *cmd, + uint32_t ta_cmd_id, + uint32_t hdcp_session_id) +{ + cmd->cmd_id = GFX_CMD_ID_INVOKE_CMD; + cmd->cmd.cmd_invoke_cmd.session_id = hdcp_session_id; + cmd->cmd.cmd_invoke_cmd.ta_cmd_id = ta_cmd_id; + /* Note: cmd_invoke_cmd.buf is not used for now */ +} + +int psp_hdcp_invoke(struct psp_context *psp, uint32_t ta_cmd_id) +{ + int ret; + struct psp_gfx_cmd_resp *cmd; + + /* + * TODO: bypass the loading in sriov for now + */ + if (amdgpu_sriov_vf(psp->adev)) + return 0; + + cmd = kzalloc(sizeof(struct psp_gfx_cmd_resp), GFP_KERNEL); + if (!cmd) + return -ENOMEM; + + psp_prep_hdcp_ta_invoke_cmd_buf(cmd, ta_cmd_id, + psp->hdcp_context.session_id); + + ret = psp_cmd_submit_buf(psp, NULL, cmd, psp->fence_buf_mc_addr); + + kfree(cmd); + + return ret; +} + +static int psp_hdcp_terminate(struct psp_context *psp) +{ + int ret; + + if (!psp->hdcp_context.hdcp_initialized) + return 0; + + ret = psp_hdcp_unload(psp); + if (ret) + return ret; + + psp->hdcp_context.hdcp_initialized = 0; + + /* free hdcp shared memory */ + amdgpu_bo_free_kernel(&psp->hdcp_context.hdcp_shared_bo, + &psp->hdcp_context.hdcp_shared_mc_addr, + &psp->hdcp_context.hdcp_shared_buf); + + return 0; +} +// HDCP end + +// DTM start +static void psp_prep_dtm_ta_load_cmd_buf(struct psp_gfx_cmd_resp *cmd, + uint64_t dtm_ta_mc, + uint64_t dtm_mc_shared, + uint32_t dtm_ta_size, + uint32_t shared_size) +{ + cmd->cmd_id = GFX_CMD_ID_LOAD_TA; + cmd->cmd.cmd_load_ta.app_phy_addr_lo = lower_32_bits(dtm_ta_mc); + cmd->cmd.cmd_load_ta.app_phy_addr_hi = upper_32_bits(dtm_ta_mc); + cmd->cmd.cmd_load_ta.app_len = dtm_ta_size; + + cmd->cmd.cmd_load_ta.cmd_buf_phy_addr_lo = lower_32_bits(dtm_mc_shared); + cmd->cmd.cmd_load_ta.cmd_buf_phy_addr_hi = upper_32_bits(dtm_mc_shared); + cmd->cmd.cmd_load_ta.cmd_buf_len = shared_size; +} + +static int psp_dtm_init_shared_buf(struct psp_context *psp) +{ + int ret; + + /* + * Allocate 16k memory aligned to 4k from Frame Buffer (local + * physical) for dtm ta <-> Driver + */ + ret = amdgpu_bo_create_kernel(psp->adev, PSP_DTM_SHARED_MEM_SIZE, + PAGE_SIZE, AMDGPU_GEM_DOMAIN_VRAM, + &psp->dtm_context.dtm_shared_bo, + &psp->dtm_context.dtm_shared_mc_addr, + &psp->dtm_context.dtm_shared_buf); + + return ret; +} + +static int psp_dtm_load(struct psp_context *psp) +{ + int ret; + struct psp_gfx_cmd_resp *cmd; + + /* + * TODO: bypass the loading in sriov for now + */ + if (amdgpu_sriov_vf(psp->adev)) + return 0; + + cmd = kzalloc(sizeof(struct psp_gfx_cmd_resp), GFP_KERNEL); + if (!cmd) + return -ENOMEM; + + memset(psp->fw_pri_buf, 0, PSP_1_MEG); + memcpy(psp->fw_pri_buf, psp->ta_dtm_start_addr, psp->ta_dtm_ucode_size); + + psp_prep_dtm_ta_load_cmd_buf(cmd, psp->fw_pri_mc_addr, + psp->dtm_context.dtm_shared_mc_addr, + psp->ta_dtm_ucode_size, + PSP_DTM_SHARED_MEM_SIZE); + + ret = psp_cmd_submit_buf(psp, NULL, cmd, psp->fence_buf_mc_addr); + + if (!ret) { + psp->dtm_context.dtm_initialized = 1; + psp->dtm_context.session_id = cmd->resp.session_id; + } + + kfree(cmd); + + return ret; +} + +static int psp_dtm_initialize(struct psp_context *psp) +{ + int ret; + + if (!psp->dtm_context.dtm_initialized) { + ret = psp_dtm_init_shared_buf(psp); + if (ret) + return ret; + } + + ret = psp_dtm_load(psp); + if (ret) + return ret; + + return 0; +} + +static void psp_prep_dtm_ta_invoke_cmd_buf(struct psp_gfx_cmd_resp *cmd, + uint32_t ta_cmd_id, + uint32_t dtm_session_id) +{ + cmd->cmd_id = GFX_CMD_ID_INVOKE_CMD; + cmd->cmd.cmd_invoke_cmd.session_id = dtm_session_id; + cmd->cmd.cmd_invoke_cmd.ta_cmd_id = ta_cmd_id; + /* Note: cmd_invoke_cmd.buf is not used for now */ +} + +int psp_dtm_invoke(struct psp_context *psp, uint32_t ta_cmd_id) +{ + int ret; + struct psp_gfx_cmd_resp *cmd; + + /* + * TODO: bypass the loading in sriov for now + */ + if (amdgpu_sriov_vf(psp->adev)) + return 0; + + cmd = kzalloc(sizeof(struct psp_gfx_cmd_resp), GFP_KERNEL); + if (!cmd) + return -ENOMEM; + + psp_prep_dtm_ta_invoke_cmd_buf(cmd, ta_cmd_id, + psp->dtm_context.session_id); + + ret = psp_cmd_submit_buf(psp, NULL, cmd, psp->fence_buf_mc_addr); + + kfree(cmd); + + return ret; +} + +static int psp_dtm_terminate(struct psp_context *psp) +{ + int ret; + + if (!psp->dtm_context.dtm_initialized) + return 0; + + ret = psp_hdcp_unload(psp); + if (ret) + return ret; + + psp->dtm_context.dtm_initialized = 0; + + /* free hdcp shared memory */ + amdgpu_bo_free_kernel(&psp->dtm_context.dtm_shared_bo, + &psp->dtm_context.dtm_shared_mc_addr, + &psp->dtm_context.dtm_shared_buf); + + return 0; +} +// DTM end + static int psp_hw_start(struct psp_context *psp) { struct amdgpu_device *adev = psp->adev; @@ -845,6 +1161,16 @@ static int psp_hw_start(struct psp_context *psp) if (ret) dev_err(psp->adev->dev, "RAS: Failed to initialize RAS\n"); + + ret = psp_hdcp_initialize(psp); + if (ret) + dev_err(psp->adev->dev, + "HDCP: Failed to initialize HDCP\n"); + + ret = psp_dtm_initialize(psp); + if (ret) + dev_err(psp->adev->dev, + "DTM: Failed to initialize DTM\n"); } return 0; @@ -950,21 +1276,7 @@ static void psp_print_fw_hdr(struct psp_context *psp, struct amdgpu_firmware_info *ucode) { struct amdgpu_device *adev = psp->adev; - const struct sdma_firmware_header_v1_0 *sdma_hdr = - (const struct sdma_firmware_header_v1_0 *) - adev->sdma.instance[ucode->ucode_id - AMDGPU_UCODE_ID_SDMA0].fw->data; - const struct gfx_firmware_header_v1_0 *ce_hdr = - (const struct gfx_firmware_header_v1_0 *)adev->gfx.ce_fw->data; - const struct gfx_firmware_header_v1_0 *pfp_hdr = - (const struct gfx_firmware_header_v1_0 *)adev->gfx.pfp_fw->data; - const struct gfx_firmware_header_v1_0 *me_hdr = - (const struct gfx_firmware_header_v1_0 *)adev->gfx.me_fw->data; - const struct gfx_firmware_header_v1_0 *mec_hdr = - (const struct gfx_firmware_header_v1_0 *)adev->gfx.mec_fw->data; - const struct rlc_firmware_header_v2_0 *rlc_hdr = - (const struct rlc_firmware_header_v2_0 *)adev->gfx.rlc_fw->data; - const struct smc_firmware_header_v1_0 *smc_hdr = - (const struct smc_firmware_header_v1_0 *)adev->pm.fw->data; + struct common_firmware_header *hdr; switch (ucode->ucode_id) { case AMDGPU_UCODE_ID_SDMA0: @@ -975,25 +1287,33 @@ static void psp_print_fw_hdr(struct psp_context *psp, case AMDGPU_UCODE_ID_SDMA5: case AMDGPU_UCODE_ID_SDMA6: case AMDGPU_UCODE_ID_SDMA7: - amdgpu_ucode_print_sdma_hdr(&sdma_hdr->header); + hdr = (struct common_firmware_header *) + adev->sdma.instance[ucode->ucode_id - AMDGPU_UCODE_ID_SDMA0].fw->data; + amdgpu_ucode_print_sdma_hdr(hdr); break; case AMDGPU_UCODE_ID_CP_CE: - amdgpu_ucode_print_gfx_hdr(&ce_hdr->header); + hdr = (struct common_firmware_header *)adev->gfx.ce_fw->data; + amdgpu_ucode_print_gfx_hdr(hdr); break; case AMDGPU_UCODE_ID_CP_PFP: - amdgpu_ucode_print_gfx_hdr(&pfp_hdr->header); + hdr = (struct common_firmware_header *)adev->gfx.pfp_fw->data; + amdgpu_ucode_print_gfx_hdr(hdr); break; case AMDGPU_UCODE_ID_CP_ME: - amdgpu_ucode_print_gfx_hdr(&me_hdr->header); + hdr = (struct common_firmware_header *)adev->gfx.me_fw->data; + amdgpu_ucode_print_gfx_hdr(hdr); break; case AMDGPU_UCODE_ID_CP_MEC1: - amdgpu_ucode_print_gfx_hdr(&mec_hdr->header); + hdr = (struct common_firmware_header *)adev->gfx.mec_fw->data; + amdgpu_ucode_print_gfx_hdr(hdr); break; case AMDGPU_UCODE_ID_RLC_G: - amdgpu_ucode_print_rlc_hdr(&rlc_hdr->header); + hdr = (struct common_firmware_header *)adev->gfx.rlc_fw->data; + amdgpu_ucode_print_rlc_hdr(hdr); break; case AMDGPU_UCODE_ID_SMC: - amdgpu_ucode_print_smc_hdr(&smc_hdr->header); + hdr = (struct common_firmware_header *)adev->pm.fw->data; + amdgpu_ucode_print_smc_hdr(hdr); break; default: break; @@ -1079,10 +1399,6 @@ out: ucode->ucode_id == AMDGPU_UCODE_ID_CP_MEC2_JT)) /* skip mec JT when autoload is enabled */ continue; - /* Renoir only needs to load mec jump table one time */ - if (adev->asic_type == CHIP_RENOIR && - ucode->ucode_id == AMDGPU_UCODE_ID_CP_MEC2_JT) - continue; psp_print_fw_hdr(psp, ucode); @@ -1091,7 +1407,8 @@ out: return ret; /* Start rlc autoload after psp recieved all the gfx firmware */ - if (ucode->ucode_id == AMDGPU_UCODE_ID_RLC_RESTORE_LIST_SRM_MEM) { + if (psp->autoload_supported && ucode->ucode_id == + AMDGPU_UCODE_ID_RLC_RESTORE_LIST_SRM_MEM) { ret = psp_rlc_autoload(psp); if (ret) { DRM_ERROR("Failed to start rlc autoload\n"); @@ -1216,8 +1533,11 @@ static int psp_hw_fini(void *handle) psp->xgmi_context.initialized == 1) psp_xgmi_terminate(psp); - if (psp->adev->psp.ta_fw) + if (psp->adev->psp.ta_fw) { psp_ras_terminate(psp); + psp_dtm_terminate(psp); + psp_hdcp_terminate(psp); + } psp_ring_destroy(psp, PSP_RING_TYPE__KM); @@ -1259,6 +1579,16 @@ static int psp_suspend(void *handle) DRM_ERROR("Failed to terminate ras ta\n"); return ret; } + ret = psp_hdcp_terminate(psp); + if (ret) { + DRM_ERROR("Failed to terminate hdcp ta\n"); + return ret; + } + ret = psp_dtm_terminate(psp); + if (ret) { + DRM_ERROR("Failed to terminate dtm ta\n"); + return ret; + } } ret = psp_ring_stop(psp, PSP_RING_TYPE__KM); @@ -1317,9 +1647,6 @@ int psp_rlc_autoload_start(struct psp_context *psp) int ret; struct psp_gfx_cmd_resp *cmd; - if (amdgpu_sriov_vf(psp->adev)) - return 0; - cmd = kzalloc(sizeof(struct psp_gfx_cmd_resp), GFP_KERNEL); if (!cmd) return -ENOMEM; diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_psp.h b/drivers/gpu/drm/amd/amdgpu/amdgpu_psp.h index bc0947f6bc8a..7dd9ae7dbbe4 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_psp.h +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_psp.h @@ -37,6 +37,9 @@ #define PSP_RAS_SHARED_MEM_SIZE 0x4000 #define PSP_1_MEG 0x100000 #define PSP_TMR_SIZE 0x400000 +#define PSP_HDCP_SHARED_MEM_SIZE 0x4000 +#define PSP_DTM_SHARED_MEM_SIZE 0x4000 +#define PSP_SHARED_MEM_SIZE 0x4000 struct psp_context; struct psp_xgmi_node_info; @@ -142,6 +145,22 @@ struct psp_ras_context { struct amdgpu_ras *ras; }; +struct psp_hdcp_context { + bool hdcp_initialized; + uint32_t session_id; + struct amdgpu_bo *hdcp_shared_bo; + uint64_t hdcp_shared_mc_addr; + void *hdcp_shared_buf; +}; + +struct psp_dtm_context { + bool dtm_initialized; + uint32_t session_id; + struct amdgpu_bo *dtm_shared_bo; + uint64_t dtm_shared_mc_addr; + void *dtm_shared_buf; +}; + struct psp_context { struct amdgpu_device *adev; @@ -206,8 +225,19 @@ struct psp_context uint32_t ta_ras_ucode_version; uint32_t ta_ras_ucode_size; uint8_t *ta_ras_start_addr; + + uint32_t ta_hdcp_ucode_version; + uint32_t ta_hdcp_ucode_size; + uint8_t *ta_hdcp_start_addr; + + uint32_t ta_dtm_ucode_version; + uint32_t ta_dtm_ucode_size; + uint8_t *ta_dtm_start_addr; + struct psp_xgmi_context xgmi_context; struct psp_ras_context ras; + struct psp_hdcp_context hdcp_context; + struct psp_dtm_context dtm_context; struct mutex mutex; }; @@ -279,6 +309,8 @@ int psp_xgmi_invoke(struct psp_context *psp, uint32_t ta_cmd_id); int psp_ras_invoke(struct psp_context *psp, uint32_t ta_cmd_id); int psp_ras_enable_features(struct psp_context *psp, union ta_ras_cmd_input *info, bool enable); +int psp_hdcp_invoke(struct psp_context *psp, uint32_t ta_cmd_id); +int psp_dtm_invoke(struct psp_context *psp, uint32_t ta_cmd_id); int psp_rlc_autoload_start(struct psp_context *psp); diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_ras.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_ras.c index 016ea274b955..0e2ee5869b5f 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_ras.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_ras.c @@ -25,10 +25,13 @@ #include <linux/list.h> #include <linux/module.h> #include <linux/uaccess.h> +#include <linux/reboot.h> +#include <linux/syscalls.h> #include "amdgpu.h" #include "amdgpu_ras.h" #include "amdgpu_atomfirmware.h" +#include "ivsrcid/nbio/irqsrcs_nbif_7_4.h" const char *ras_error_string[] = { "none", @@ -65,11 +68,8 @@ const char *ras_block_string[] = { /* inject address is 52 bits */ #define RAS_UMC_INJECT_ADDR_LIMIT (0x1ULL << 52) -static int amdgpu_ras_reserve_vram(struct amdgpu_device *adev, - uint64_t offset, uint64_t size, - struct amdgpu_bo **bo_ptr); -static int amdgpu_ras_release_vram(struct amdgpu_device *adev, - struct amdgpu_bo **bo_ptr); + +atomic_t amdgpu_ras_in_intr = ATOMIC_INIT(0); static ssize_t amdgpu_ras_debugfs_read(struct file *f, char __user *buf, size_t size, loff_t *pos) @@ -150,6 +150,8 @@ static int amdgpu_ras_debugfs_ctrl_parse_data(struct file *f, op = 1; else if (sscanf(str, "inject %32s %8s", block_name, err) == 2) op = 2; + else if (sscanf(str, "reboot %32s", block_name) == 1) + op = 3; else if (str[0] && str[1] && str[2] && str[3]) /* ascii string, but commands are not matched. */ return -EINVAL; @@ -189,6 +191,10 @@ static int amdgpu_ras_debugfs_ctrl_parse_data(struct file *f, return 0; } + +static struct ras_manager *amdgpu_ras_find_obj(struct amdgpu_device *adev, + struct ras_common_if *head); + /** * DOC: AMDGPU RAS debugfs control interface * @@ -210,29 +216,36 @@ static int amdgpu_ras_debugfs_ctrl_parse_data(struct file *f, * * Second member: struct ras_debug_if::op. * It has three kinds of operations. - * 0: disable RAS on the block. Take ::head as its data. - * 1: enable RAS on the block. Take ::head as its data. - * 2: inject errors on the block. Take ::inject as its data. + * + * - 0: disable RAS on the block. Take ::head as its data. + * - 1: enable RAS on the block. Take ::head as its data. + * - 2: inject errors on the block. Take ::inject as its data. * * How to use the interface? * programs: * copy the struct ras_debug_if in your codes and initialize it. * write the struct to the control node. * - * bash: - * echo op block [error [sub_blcok address value]] > .../ras/ras_ctrl - * op: disable, enable, inject - * disable: only block is needed - * enable: block and error are needed - * inject: error, address, value are needed - * block: umc, smda, gfx, ......... - * see ras_block_string[] for details - * error: ue, ce - * ue: multi_uncorrectable - * ce: single_correctable - * sub_block: sub block index, pass 0 if there is no sub block + * .. code-block:: bash + * + * echo op block [error [sub_blcok address value]] > .../ras/ras_ctrl + * + * op: disable, enable, inject + * disable: only block is needed + * enable: block and error are needed + * inject: error, address, value are needed + * block: umc, smda, gfx, ......... + * see ras_block_string[] for details + * error: ue, ce + * ue: multi_uncorrectable + * ce: single_correctable + * sub_block: + * sub block index, pass 0 if there is no sub block + * + * here are some examples for bash commands: + * + * .. code-block:: bash * - * here are some examples for bash commands, * echo inject umc ue 0x0 0x0 0x0 > /sys/kernel/debug/dri/0/ras/ras_ctrl * echo inject umc ce 0 0 0 > /sys/kernel/debug/dri/0/ras/ras_ctrl * echo disable umc > /sys/kernel/debug/dri/0/ras/ras_ctrl @@ -245,8 +258,9 @@ static int amdgpu_ras_debugfs_ctrl_parse_data(struct file *f, * For inject, please check corresponding err count at * /sys/class/drm/card[0/1/2...]/device/ras/[gfx/sdma/...]_err_count * - * NOTE: operation is only allowed on blocks which are supported. - * Please check ras mask at /sys/module/amdgpu/parameters/ras_mask + * .. note:: + * Operation is only allowed on blocks which are supported. + * Please check ras mask at /sys/module/amdgpu/parameters/ras_mask */ static ssize_t amdgpu_ras_debugfs_ctrl_write(struct file *f, const char __user *buf, size_t size, loff_t *pos) @@ -279,6 +293,9 @@ static ssize_t amdgpu_ras_debugfs_ctrl_write(struct file *f, const char __user * /* data.inject.address is offset instead of absolute gpu address */ ret = amdgpu_ras_error_inject(adev, &data.inject); break; + case 3: + amdgpu_ras_get_context(adev)->reboot = true; + break; default: ret = -EINVAL; break; @@ -290,6 +307,33 @@ static ssize_t amdgpu_ras_debugfs_ctrl_write(struct file *f, const char __user * return size; } +/** + * DOC: AMDGPU RAS debugfs EEPROM table reset interface + * + * Some boards contain an EEPROM which is used to persistently store a list of + * bad pages containing ECC errors detected in vram. This interface provides + * a way to reset the EEPROM, e.g., after testing error injection. + * + * Usage: + * + * .. code-block:: bash + * + * echo 1 > ../ras/ras_eeprom_reset + * + * will reset EEPROM table to 0 entries. + * + */ +static ssize_t amdgpu_ras_debugfs_eeprom_write(struct file *f, const char __user *buf, + size_t size, loff_t *pos) +{ + struct amdgpu_device *adev = (struct amdgpu_device *)file_inode(f)->i_private; + int ret; + + ret = amdgpu_ras_eeprom_reset_table(&adev->psp.ras.ras->eeprom_control); + + return ret == 1 ? size : -EIO; +} + static const struct file_operations amdgpu_ras_debugfs_ctrl_ops = { .owner = THIS_MODULE, .read = NULL, @@ -297,6 +341,34 @@ static const struct file_operations amdgpu_ras_debugfs_ctrl_ops = { .llseek = default_llseek }; +static const struct file_operations amdgpu_ras_debugfs_eeprom_ops = { + .owner = THIS_MODULE, + .read = NULL, + .write = amdgpu_ras_debugfs_eeprom_write, + .llseek = default_llseek +}; + +/** + * DOC: AMDGPU RAS sysfs Error Count Interface + * + * It allows user to read the error count for each IP block on the gpu through + * /sys/class/drm/card[0/1/2...]/device/ras/[gfx/sdma/...]_err_count + * + * It outputs the multiple lines which report the uncorrected (ue) and corrected + * (ce) error counts. + * + * The format of one line is below, + * + * [ce|ue]: count + * + * Example: + * + * .. code-block:: bash + * + * ue: 0 + * ce: 1 + * + */ static ssize_t amdgpu_ras_sysfs_read(struct device *dev, struct device_attribute *attr, char *buf) { @@ -615,8 +687,12 @@ int amdgpu_ras_error_query(struct amdgpu_device *adev, adev->gfx.funcs->query_ras_error_count(adev, &err_data); break; case AMDGPU_RAS_BLOCK__MMHUB: - if (adev->mmhub_funcs->query_ras_error_count) - adev->mmhub_funcs->query_ras_error_count(adev, &err_data); + if (adev->mmhub.funcs->query_ras_error_count) + adev->mmhub.funcs->query_ras_error_count(adev, &err_data); + break; + case AMDGPU_RAS_BLOCK__PCIE_BIF: + if (adev->nbio.funcs->query_ras_error_count) + adev->nbio.funcs->query_ras_error_count(adev, &err_data); break; default: break; @@ -628,12 +704,14 @@ int amdgpu_ras_error_query(struct amdgpu_device *adev, info->ue_count = obj->err_data.ue_count; info->ce_count = obj->err_data.ce_count; - if (err_data.ce_count) + if (err_data.ce_count) { dev_info(adev->dev, "%ld correctable errors detected in %s block\n", obj->err_data.ce_count, ras_block_str(info->head.block)); - if (err_data.ue_count) + } + if (err_data.ue_count) { dev_info(adev->dev, "%ld uncorrectable errors detected in %s block\n", obj->err_data.ue_count, ras_block_str(info->head.block)); + } return 0; } @@ -664,6 +742,8 @@ int amdgpu_ras_error_inject(struct amdgpu_device *adev, break; case AMDGPU_RAS_BLOCK__UMC: case AMDGPU_RAS_BLOCK__MMHUB: + case AMDGPU_RAS_BLOCK__XGMI_WAFL: + case AMDGPU_RAS_BLOCK__PCIE_BIF: ret = psp_ras_trigger_error(&adev->psp, &block_info); break; default: @@ -733,8 +813,8 @@ static char *amdgpu_ras_badpage_flags_str(unsigned int flags) }; } -/* - * DOC: ras sysfs gpu_vram_bad_pages interface +/** + * DOC: AMDGPU RAS sysfs gpu_vram_bad_pages Interface * * It allows user to read the bad pages of vram on the gpu through * /sys/class/drm/card[0/1/2...]/device/ras/gpu_vram_bad_pages @@ -746,14 +826,21 @@ static char *amdgpu_ras_badpage_flags_str(unsigned int flags) * * gpu pfn and gpu page size are printed in hex format. * flags can be one of below character, + * * R: reserved, this gpu page is reserved and not able to use. + * * P: pending for reserve, this gpu page is marked as bad, will be reserved - * in next window of page_reserve. + * in next window of page_reserve. + * * F: unable to reserve. this gpu page can't be reserved due to some reasons. * - * examples: - * 0x00000001 : 0x00001000 : R - * 0x00000002 : 0x00001000 : P + * Examples: + * + * .. code-block:: bash + * + * 0x00000001 : 0x00001000 : R + * 0x00000002 : 0x00001000 : P + * */ static ssize_t amdgpu_ras_sysfs_badpages_read(struct file *f, @@ -934,8 +1021,10 @@ static void amdgpu_ras_debugfs_create_ctrl_node(struct amdgpu_device *adev) struct drm_minor *minor = adev->ddev->primary; con->dir = debugfs_create_dir("ras", minor->debugfs_root); - con->ent = debugfs_create_file("ras_ctrl", S_IWUGO | S_IRUGO, con->dir, - adev, &amdgpu_ras_debugfs_ctrl_ops); + debugfs_create_file("ras_ctrl", S_IWUGO | S_IRUGO, con->dir, + adev, &amdgpu_ras_debugfs_ctrl_ops); + debugfs_create_file("ras_eeprom_reset", S_IWUGO | S_IRUGO, con->dir, + adev, &amdgpu_ras_debugfs_eeprom_ops); } void amdgpu_ras_debugfs_create(struct amdgpu_device *adev, @@ -980,10 +1069,8 @@ static void amdgpu_ras_debugfs_remove_all(struct amdgpu_device *adev) amdgpu_ras_debugfs_remove(adev, &obj->head); } - debugfs_remove(con->ent); - debugfs_remove(con->dir); + debugfs_remove_recursive(con->dir); con->dir = NULL; - con->ent = NULL; } /* debugfs end */ @@ -1188,14 +1275,14 @@ static int amdgpu_ras_badpages_read(struct amdgpu_device *adev, for (; i < data->count; i++) { (*bps)[i] = (struct ras_badpage){ - .bp = data->bps[i].bp, + .bp = data->bps[i].retired_page, .size = AMDGPU_GPU_PAGE_SIZE, .flags = 0, }; if (data->last_reserved <= i) (*bps)[i].flags = 1; - else if (data->bps[i].bo == NULL) + else if (data->bps_bo[i] == NULL) (*bps)[i].flags = 2; } @@ -1214,105 +1301,46 @@ static void amdgpu_ras_do_recovery(struct work_struct *work) atomic_set(&ras->in_recovery, 0); } -static int amdgpu_ras_release_vram(struct amdgpu_device *adev, - struct amdgpu_bo **bo_ptr) -{ - /* no need to free it actually. */ - amdgpu_bo_free_kernel(bo_ptr, NULL, NULL); - return 0; -} - -/* reserve vram with size@offset */ -static int amdgpu_ras_reserve_vram(struct amdgpu_device *adev, - uint64_t offset, uint64_t size, - struct amdgpu_bo **bo_ptr) -{ - struct ttm_operation_ctx ctx = { false, false }; - struct amdgpu_bo_param bp; - int r = 0; - int i; - struct amdgpu_bo *bo; - - if (bo_ptr) - *bo_ptr = NULL; - memset(&bp, 0, sizeof(bp)); - bp.size = size; - bp.byte_align = PAGE_SIZE; - bp.domain = AMDGPU_GEM_DOMAIN_VRAM; - bp.flags = AMDGPU_GEM_CREATE_VRAM_CONTIGUOUS | - AMDGPU_GEM_CREATE_NO_CPU_ACCESS; - bp.type = ttm_bo_type_kernel; - bp.resv = NULL; - - r = amdgpu_bo_create(adev, &bp, &bo); - if (r) - return -EINVAL; - - r = amdgpu_bo_reserve(bo, false); - if (r) - goto error_reserve; - - offset = ALIGN(offset, PAGE_SIZE); - for (i = 0; i < bo->placement.num_placement; ++i) { - bo->placements[i].fpfn = offset >> PAGE_SHIFT; - bo->placements[i].lpfn = (offset + size) >> PAGE_SHIFT; - } - - ttm_bo_mem_put(&bo->tbo, &bo->tbo.mem); - r = ttm_bo_mem_space(&bo->tbo, &bo->placement, &bo->tbo.mem, &ctx); - if (r) - goto error_pin; - - r = amdgpu_bo_pin_restricted(bo, - AMDGPU_GEM_DOMAIN_VRAM, - offset, - offset + size); - if (r) - goto error_pin; - - if (bo_ptr) - *bo_ptr = bo; - - amdgpu_bo_unreserve(bo); - return r; - -error_pin: - amdgpu_bo_unreserve(bo); -error_reserve: - amdgpu_bo_unref(&bo); - return r; -} - /* alloc/realloc bps array */ static int amdgpu_ras_realloc_eh_data_space(struct amdgpu_device *adev, struct ras_err_handler_data *data, int pages) { unsigned int old_space = data->count + data->space_left; unsigned int new_space = old_space + pages; - unsigned int align_space = ALIGN(new_space, 1024); - void *tmp = kmalloc(align_space * sizeof(*data->bps), GFP_KERNEL); - - if (!tmp) + unsigned int align_space = ALIGN(new_space, 512); + void *bps = kmalloc(align_space * sizeof(*data->bps), GFP_KERNEL); + struct amdgpu_bo **bps_bo = + kmalloc(align_space * sizeof(*data->bps_bo), GFP_KERNEL); + + if (!bps || !bps_bo) { + kfree(bps); + kfree(bps_bo); return -ENOMEM; + } if (data->bps) { - memcpy(tmp, data->bps, + memcpy(bps, data->bps, data->count * sizeof(*data->bps)); kfree(data->bps); } + if (data->bps_bo) { + memcpy(bps_bo, data->bps_bo, + data->count * sizeof(*data->bps_bo)); + kfree(data->bps_bo); + } - data->bps = tmp; + data->bps = bps; + data->bps_bo = bps_bo; data->space_left += align_space - old_space; return 0; } /* it deal with vram only. */ int amdgpu_ras_add_bad_pages(struct amdgpu_device *adev, - unsigned long *bps, int pages) + struct eeprom_table_record *bps, int pages) { struct amdgpu_ras *con = amdgpu_ras_get_context(adev); struct ras_err_handler_data *data; - int i = pages; int ret = 0; if (!con || !con->eh_data || !bps || pages <= 0) @@ -1329,24 +1357,87 @@ int amdgpu_ras_add_bad_pages(struct amdgpu_device *adev, goto out; } - while (i--) - data->bps[data->count++].bp = bps[i]; - + memcpy(&data->bps[data->count], bps, pages * sizeof(*data->bps)); + data->count += pages; data->space_left -= pages; + out: mutex_unlock(&con->recovery_lock); return ret; } +/* + * write error record array to eeprom, the function should be + * protected by recovery_lock + */ +static int amdgpu_ras_save_bad_pages(struct amdgpu_device *adev) +{ + struct amdgpu_ras *con = amdgpu_ras_get_context(adev); + struct ras_err_handler_data *data; + struct amdgpu_ras_eeprom_control *control; + int save_count; + + if (!con || !con->eh_data) + return 0; + + control = &con->eeprom_control; + data = con->eh_data; + save_count = data->count - control->num_recs; + /* only new entries are saved */ + if (save_count > 0) + if (amdgpu_ras_eeprom_process_recods(control, + &data->bps[control->num_recs], + true, + save_count)) { + DRM_ERROR("Failed to save EEPROM table data!"); + return -EIO; + } + + return 0; +} + +/* + * read error record array in eeprom and reserve enough space for + * storing new bad pages + */ +static int amdgpu_ras_load_bad_pages(struct amdgpu_device *adev) +{ + struct amdgpu_ras_eeprom_control *control = + &adev->psp.ras.ras->eeprom_control; + struct eeprom_table_record *bps = NULL; + int ret = 0; + + /* no bad page record, skip eeprom access */ + if (!control->num_recs) + return ret; + + bps = kcalloc(control->num_recs, sizeof(*bps), GFP_KERNEL); + if (!bps) + return -ENOMEM; + + if (amdgpu_ras_eeprom_process_recods(control, bps, false, + control->num_recs)) { + DRM_ERROR("Failed to load EEPROM table records!"); + ret = -EIO; + goto out; + } + + ret = amdgpu_ras_add_bad_pages(adev, bps, control->num_recs); + +out: + kfree(bps); + return ret; +} + /* called in gpu recovery/init */ int amdgpu_ras_reserve_bad_pages(struct amdgpu_device *adev) { struct amdgpu_ras *con = amdgpu_ras_get_context(adev); struct ras_err_handler_data *data; uint64_t bp; - struct amdgpu_bo *bo; - int i; + struct amdgpu_bo *bo = NULL; + int i, ret = 0; if (!con || !con->eh_data) return 0; @@ -1357,18 +1448,29 @@ int amdgpu_ras_reserve_bad_pages(struct amdgpu_device *adev) goto out; /* reserve vram at driver post stage. */ for (i = data->last_reserved; i < data->count; i++) { - bp = data->bps[i].bp; + bp = data->bps[i].retired_page; - if (amdgpu_ras_reserve_vram(adev, bp << PAGE_SHIFT, - PAGE_SIZE, &bo)) - DRM_ERROR("RAS ERROR: reserve vram %llx fail\n", bp); + /* There are two cases of reserve error should be ignored: + * 1) a ras bad page has been allocated (used by someone); + * 2) a ras bad page has been reserved (duplicate error injection + * for one page); + */ + if (amdgpu_bo_create_kernel_at(adev, bp << AMDGPU_GPU_PAGE_SHIFT, + AMDGPU_GPU_PAGE_SIZE, + AMDGPU_GEM_DOMAIN_VRAM, + &bo, NULL)) + DRM_WARN("RAS WARN: reserve vram for retired page %llx fail\n", bp); - data->bps[i].bo = bo; + data->bps_bo[i] = bo; data->last_reserved = i + 1; + bo = NULL; } + + /* continue to save bad pages to eeprom even reesrve_vram fails */ + ret = amdgpu_ras_save_bad_pages(adev); out: mutex_unlock(&con->recovery_lock); - return 0; + return ret; } /* called when driver unload */ @@ -1388,11 +1490,11 @@ static int amdgpu_ras_release_bad_pages(struct amdgpu_device *adev) goto out; for (i = data->last_reserved - 1; i >= 0; i--) { - bo = data->bps[i].bo; + bo = data->bps_bo[i]; - amdgpu_ras_release_vram(adev, &bo); + amdgpu_bo_free_kernel(&bo, NULL, NULL); - data->bps[i].bo = bo; + data->bps_bo[i] = bo; data->last_reserved = i; } out: @@ -1400,41 +1502,54 @@ out: return 0; } -static int amdgpu_ras_save_bad_pages(struct amdgpu_device *adev) -{ - /* TODO - * write the array to eeprom when SMU disabled. - */ - return 0; -} - -static int amdgpu_ras_load_bad_pages(struct amdgpu_device *adev) -{ - /* TODO - * read the array to eeprom when SMU disabled. - */ - return 0; -} - -static int amdgpu_ras_recovery_init(struct amdgpu_device *adev) +int amdgpu_ras_recovery_init(struct amdgpu_device *adev) { struct amdgpu_ras *con = amdgpu_ras_get_context(adev); - struct ras_err_handler_data **data = &con->eh_data; + struct ras_err_handler_data **data; + int ret; - *data = kmalloc(sizeof(**data), - GFP_KERNEL|__GFP_ZERO); - if (!*data) - return -ENOMEM; + if (con) + data = &con->eh_data; + else + return 0; + + *data = kmalloc(sizeof(**data), GFP_KERNEL | __GFP_ZERO); + if (!*data) { + ret = -ENOMEM; + goto out; + } mutex_init(&con->recovery_lock); INIT_WORK(&con->recovery_work, amdgpu_ras_do_recovery); atomic_set(&con->in_recovery, 0); con->adev = adev; - amdgpu_ras_load_bad_pages(adev); - amdgpu_ras_reserve_bad_pages(adev); + ret = amdgpu_ras_eeprom_init(&con->eeprom_control); + if (ret) + goto free; + + if (con->eeprom_control.num_recs) { + ret = amdgpu_ras_load_bad_pages(adev); + if (ret) + goto free; + ret = amdgpu_ras_reserve_bad_pages(adev); + if (ret) + goto release; + } return 0; + +release: + amdgpu_ras_release_bad_pages(adev); +free: + kfree((*data)->bps); + kfree((*data)->bps_bo); + kfree(*data); + con->eh_data = NULL; +out: + DRM_WARN("Failed to initialize ras recovery!\n"); + + return ret; } static int amdgpu_ras_recovery_fini(struct amdgpu_device *adev) @@ -1442,13 +1557,17 @@ static int amdgpu_ras_recovery_fini(struct amdgpu_device *adev) struct amdgpu_ras *con = amdgpu_ras_get_context(adev); struct ras_err_handler_data *data = con->eh_data; + /* recovery_init failed to init it, fini is useless */ + if (!data) + return 0; + cancel_work_sync(&con->recovery_work); - amdgpu_ras_save_bad_pages(adev); amdgpu_ras_release_bad_pages(adev); mutex_lock(&con->recovery_lock); con->eh_data = NULL; kfree(data->bps); + kfree(data->bps_bo); kfree(data); mutex_unlock(&con->recovery_lock); @@ -1500,6 +1619,7 @@ static void amdgpu_ras_check_supported(struct amdgpu_device *adev, int amdgpu_ras_init(struct amdgpu_device *adev) { struct amdgpu_ras *con = amdgpu_ras_get_context(adev); + int r; if (con) return 0; @@ -1527,31 +1647,106 @@ int amdgpu_ras_init(struct amdgpu_device *adev) /* Might need get this flag from vbios. */ con->flags = RAS_DEFAULT_FLAGS; - if (amdgpu_ras_recovery_init(adev)) - goto recovery_out; + if (adev->nbio.funcs->init_ras_controller_interrupt) { + r = adev->nbio.funcs->init_ras_controller_interrupt(adev); + if (r) + return r; + } + + if (adev->nbio.funcs->init_ras_err_event_athub_interrupt) { + r = adev->nbio.funcs->init_ras_err_event_athub_interrupt(adev); + if (r) + return r; + } amdgpu_ras_mask &= AMDGPU_RAS_BLOCK_MASK; if (amdgpu_ras_fs_init(adev)) goto fs_out; - /* ras init for each ras block */ - if (adev->umc.funcs->ras_init) - adev->umc.funcs->ras_init(adev); - DRM_INFO("RAS INFO: ras initialized successfully, " "hardware ability[%x] ras_mask[%x]\n", con->hw_supported, con->supported); return 0; fs_out: - amdgpu_ras_recovery_fini(adev); -recovery_out: amdgpu_ras_set_context(adev, NULL); kfree(con); return -EINVAL; } +/* helper function to handle common stuff in ip late init phase */ +int amdgpu_ras_late_init(struct amdgpu_device *adev, + struct ras_common_if *ras_block, + struct ras_fs_if *fs_info, + struct ras_ih_if *ih_info) +{ + int r; + + /* disable RAS feature per IP block if it is not supported */ + if (!amdgpu_ras_is_supported(adev, ras_block->block)) { + amdgpu_ras_feature_enable_on_boot(adev, ras_block, 0); + return 0; + } + + r = amdgpu_ras_feature_enable_on_boot(adev, ras_block, 1); + if (r) { + if (r == -EAGAIN) { + /* request gpu reset. will run again */ + amdgpu_ras_request_reset_on_boot(adev, + ras_block->block); + return 0; + } else if (adev->in_suspend || adev->in_gpu_reset) { + /* in resume phase, if fail to enable ras, + * clean up all ras fs nodes, and disable ras */ + goto cleanup; + } else + return r; + } + + /* in resume phase, no need to create ras fs node */ + if (adev->in_suspend || adev->in_gpu_reset) + return 0; + + if (ih_info->cb) { + r = amdgpu_ras_interrupt_add_handler(adev, ih_info); + if (r) + goto interrupt; + } + + amdgpu_ras_debugfs_create(adev, fs_info); + + r = amdgpu_ras_sysfs_create(adev, fs_info); + if (r) + goto sysfs; + + return 0; +cleanup: + amdgpu_ras_sysfs_remove(adev, ras_block); +sysfs: + amdgpu_ras_debugfs_remove(adev, ras_block); + if (ih_info->cb) + amdgpu_ras_interrupt_remove_handler(adev, ih_info); +interrupt: + amdgpu_ras_feature_enable(adev, ras_block, 0); + return r; +} + +/* helper function to remove ras fs node and interrupt handler */ +void amdgpu_ras_late_fini(struct amdgpu_device *adev, + struct ras_common_if *ras_block, + struct ras_ih_if *ih_info) +{ + if (!ras_block || !ih_info) + return; + + amdgpu_ras_sysfs_remove(adev, ras_block); + amdgpu_ras_debugfs_remove(adev, ras_block); + if (ih_info->cb) + amdgpu_ras_interrupt_remove_handler(adev, ih_info); + amdgpu_ras_feature_enable(adev, ras_block, 0); +} + /* do some init work after IP late init as dependence. * and it runs in resume/gpu reset/booting up cases. */ @@ -1645,3 +1840,12 @@ int amdgpu_ras_fini(struct amdgpu_device *adev) return 0; } + +void amdgpu_ras_global_ras_isr(struct amdgpu_device *adev) +{ + if (atomic_cmpxchg(&amdgpu_ras_in_intr, 0, 1) == 0) { + DRM_WARN("RAS event of type ERREVENT_ATHUB_INTERRUPT detected!\n"); + + amdgpu_ras_reset_gpu(adev, false); + } +} diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_ras.h b/drivers/gpu/drm/amd/amdgpu/amdgpu_ras.h index 6c76bb2a6843..f80fd3428c98 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_ras.h +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_ras.h @@ -317,8 +317,6 @@ struct amdgpu_ras { struct list_head head; /* debugfs */ struct dentry *dir; - /* debugfs ctrl */ - struct dentry *ent; /* sysfs */ struct device_attribute features_attr; struct bin_attribute badpages_attr; @@ -334,7 +332,7 @@ struct amdgpu_ras { struct mutex recovery_lock; uint32_t flags; - + bool reboot; struct amdgpu_ras_eeprom_control eeprom_control; }; @@ -347,15 +345,14 @@ struct ras_err_data { unsigned long ue_count; unsigned long ce_count; unsigned long err_addr_cnt; - uint64_t *err_addr; + struct eeprom_table_record *err_addr; }; struct ras_err_handler_data { - /* point to bad pages array */ - struct { - unsigned long bp; - struct amdgpu_bo *bo; - } *bps; + /* point to bad page records array */ + struct eeprom_table_record *bps; + /* point to reserved bo array */ + struct amdgpu_bo **bps_bo; /* the count of entries */ int count; /* the space can place new entries */ @@ -365,7 +362,7 @@ struct ras_err_handler_data { }; typedef int (*ras_ih_cb)(struct amdgpu_device *adev, - struct ras_err_data *err_data, + void *err_data, struct amdgpu_iv_entry *entry); struct ras_ih_data { @@ -481,6 +478,7 @@ static inline int amdgpu_ras_is_supported(struct amdgpu_device *adev, return ras && (ras->supported & (1 << block)); } +int amdgpu_ras_recovery_init(struct amdgpu_device *adev); int amdgpu_ras_request_reset_on_boot(struct amdgpu_device *adev, unsigned int block); @@ -492,7 +490,7 @@ unsigned long amdgpu_ras_query_error_count(struct amdgpu_device *adev, /* error handling functions */ int amdgpu_ras_add_bad_pages(struct amdgpu_device *adev, - unsigned long *bps, int pages); + struct eeprom_table_record *bps, int pages); int amdgpu_ras_reserve_bad_pages(struct amdgpu_device *adev); @@ -501,6 +499,12 @@ static inline int amdgpu_ras_reset_gpu(struct amdgpu_device *adev, { struct amdgpu_ras *ras = amdgpu_ras_get_context(adev); + /* save bad page to eeprom before gpu reset, + * i2c may be unstable in gpu reset + */ + if (in_task()) + amdgpu_ras_reserve_bad_pages(adev); + if (atomic_cmpxchg(&ras->in_recovery, 0, 1) == 0) schedule_work(&ras->recovery_work); return 0; @@ -566,6 +570,13 @@ amdgpu_ras_error_to_ta(enum amdgpu_ras_error_type error) { int amdgpu_ras_init(struct amdgpu_device *adev); int amdgpu_ras_fini(struct amdgpu_device *adev); int amdgpu_ras_pre_fini(struct amdgpu_device *adev); +int amdgpu_ras_late_init(struct amdgpu_device *adev, + struct ras_common_if *ras_block, + struct ras_fs_if *fs_info, + struct ras_ih_if *ih_info); +void amdgpu_ras_late_fini(struct amdgpu_device *adev, + struct ras_common_if *ras_block, + struct ras_ih_if *ih_info); int amdgpu_ras_feature_enable(struct amdgpu_device *adev, struct ras_common_if *head, bool enable); @@ -599,4 +610,14 @@ int amdgpu_ras_interrupt_remove_handler(struct amdgpu_device *adev, int amdgpu_ras_interrupt_dispatch(struct amdgpu_device *adev, struct ras_dispatch_if *info); + +extern atomic_t amdgpu_ras_in_intr; + +static inline bool amdgpu_ras_intr_triggered(void) +{ + return !!atomic_read(&amdgpu_ras_in_intr); +} + +void amdgpu_ras_global_ras_isr(struct amdgpu_device *adev); + #endif diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_ras_eeprom.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_ras_eeprom.c index 8a32b5c93778..20af0a17d00b 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_ras_eeprom.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_ras_eeprom.c @@ -100,7 +100,101 @@ static int __update_table_header(struct amdgpu_ras_eeprom_control *control, return ret; } -static uint32_t __calc_hdr_byte_sum(struct amdgpu_ras_eeprom_control *control); + + +static uint32_t __calc_hdr_byte_sum(struct amdgpu_ras_eeprom_control *control) +{ + int i; + uint32_t tbl_sum = 0; + + /* Header checksum, skip checksum field in the calculation */ + for (i = 0; i < sizeof(control->tbl_hdr) - sizeof(control->tbl_hdr.checksum); i++) + tbl_sum += *(((unsigned char *)&control->tbl_hdr) + i); + + return tbl_sum; +} + +static uint32_t __calc_recs_byte_sum(struct eeprom_table_record *records, + int num) +{ + int i, j; + uint32_t tbl_sum = 0; + + /* Records checksum */ + for (i = 0; i < num; i++) { + struct eeprom_table_record *record = &records[i]; + + for (j = 0; j < sizeof(*record); j++) { + tbl_sum += *(((unsigned char *)record) + j); + } + } + + return tbl_sum; +} + +static inline uint32_t __calc_tbl_byte_sum(struct amdgpu_ras_eeprom_control *control, + struct eeprom_table_record *records, int num) +{ + return __calc_hdr_byte_sum(control) + __calc_recs_byte_sum(records, num); +} + +/* Checksum = 256 -((sum of all table entries) mod 256) */ +static void __update_tbl_checksum(struct amdgpu_ras_eeprom_control *control, + struct eeprom_table_record *records, int num, + uint32_t old_hdr_byte_sum) +{ + /* + * This will update the table sum with new records. + * + * TODO: What happens when the EEPROM table is to be wrapped around + * and old records from start will get overridden. + */ + + /* need to recalculate updated header byte sum */ + control->tbl_byte_sum -= old_hdr_byte_sum; + control->tbl_byte_sum += __calc_tbl_byte_sum(control, records, num); + + control->tbl_hdr.checksum = 256 - (control->tbl_byte_sum % 256); +} + +/* table sum mod 256 + checksum must equals 256 */ +static bool __validate_tbl_checksum(struct amdgpu_ras_eeprom_control *control, + struct eeprom_table_record *records, int num) +{ + control->tbl_byte_sum = __calc_tbl_byte_sum(control, records, num); + + if (control->tbl_hdr.checksum + (control->tbl_byte_sum % 256) != 256) { + DRM_WARN("Checksum mismatch, checksum: %u ", control->tbl_hdr.checksum); + return false; + } + + return true; +} + +int amdgpu_ras_eeprom_reset_table(struct amdgpu_ras_eeprom_control *control) +{ + unsigned char buff[EEPROM_ADDRESS_SIZE + EEPROM_TABLE_HEADER_SIZE] = { 0 }; + struct amdgpu_ras_eeprom_table_header *hdr = &control->tbl_hdr; + int ret = 0; + + mutex_lock(&control->tbl_mutex); + + hdr->header = EEPROM_TABLE_HDR_VAL; + hdr->version = EEPROM_TABLE_VER; + hdr->first_rec_offset = EEPROM_RECORD_START; + hdr->tbl_size = EEPROM_TABLE_HEADER_SIZE; + + control->tbl_byte_sum = 0; + __update_tbl_checksum(control, NULL, 0, 0); + control->next_addr = EEPROM_RECORD_START; + + ret = __update_table_header(control, buff); + + mutex_unlock(&control->tbl_mutex); + + return ret; + +} int amdgpu_ras_eeprom_init(struct amdgpu_ras_eeprom_control *control) { @@ -143,25 +237,18 @@ int amdgpu_ras_eeprom_init(struct amdgpu_ras_eeprom_control *control) if (hdr->header == EEPROM_TABLE_HDR_VAL) { control->num_recs = (hdr->tbl_size - EEPROM_TABLE_HEADER_SIZE) / EEPROM_TABLE_RECORD_SIZE; + control->tbl_byte_sum = __calc_hdr_byte_sum(control); + control->next_addr = EEPROM_RECORD_START; + DRM_DEBUG_DRIVER("Found existing EEPROM table with %d records", control->num_recs); } else { DRM_INFO("Creating new EEPROM table"); - hdr->header = EEPROM_TABLE_HDR_VAL; - hdr->version = EEPROM_TABLE_VER; - hdr->first_rec_offset = EEPROM_RECORD_START; - hdr->tbl_size = EEPROM_TABLE_HEADER_SIZE; - - adev->psp.ras.ras->eeprom_control.tbl_byte_sum = - __calc_hdr_byte_sum(&adev->psp.ras.ras->eeprom_control); - ret = __update_table_header(control, buff); + ret = amdgpu_ras_eeprom_reset_table(control); } - /* Start inserting records from here */ - adev->psp.ras.ras->eeprom_control.next_addr = EEPROM_RECORD_START; - return ret == 1 ? 0 : -EIO; } @@ -226,8 +313,8 @@ static void __decode_table_record_from_buff(struct amdgpu_ras_eeprom_control *co record->offset = (le64_to_cpu(tmp) & 0xffffffffffff); i += 6; - buff[i++] = record->mem_channel; - buff[i++] = record->mcumc_id; + record->mem_channel = buff[i++]; + record->mcumc_id = buff[i++]; memcpy(&tmp, buff + i, 6); record->retired_page = (le64_to_cpu(tmp) & 0xffffffffffff); @@ -266,84 +353,15 @@ static uint32_t __correct_eeprom_dest_address(uint32_t curr_address) return curr_address; } - -static uint32_t __calc_hdr_byte_sum(struct amdgpu_ras_eeprom_control *control) -{ - int i; - uint32_t tbl_sum = 0; - - /* Header checksum, skip checksum field in the calculation */ - for (i = 0; i < sizeof(control->tbl_hdr) - sizeof(control->tbl_hdr.checksum); i++) - tbl_sum += *(((unsigned char *)&control->tbl_hdr) + i); - - return tbl_sum; -} - -static uint32_t __calc_recs_byte_sum(struct eeprom_table_record *records, - int num) -{ - int i, j; - uint32_t tbl_sum = 0; - - /* Records checksum */ - for (i = 0; i < num; i++) { - struct eeprom_table_record *record = &records[i]; - - for (j = 0; j < sizeof(*record); j++) { - tbl_sum += *(((unsigned char *)record) + j); - } - } - - return tbl_sum; -} - -static inline uint32_t __calc_tbl_byte_sum(struct amdgpu_ras_eeprom_control *control, - struct eeprom_table_record *records, int num) -{ - return __calc_hdr_byte_sum(control) + __calc_recs_byte_sum(records, num); -} - -/* Checksum = 256 -((sum of all table entries) mod 256) */ -static void __update_tbl_checksum(struct amdgpu_ras_eeprom_control *control, - struct eeprom_table_record *records, int num, - uint32_t old_hdr_byte_sum) -{ - /* - * This will update the table sum with new records. - * - * TODO: What happens when the EEPROM table is to be wrapped around - * and old records from start will get overridden. - */ - - /* need to recalculate updated header byte sum */ - control->tbl_byte_sum -= old_hdr_byte_sum; - control->tbl_byte_sum += __calc_tbl_byte_sum(control, records, num); - - control->tbl_hdr.checksum = 256 - (control->tbl_byte_sum % 256); -} - -/* table sum mod 256 + checksum must equals 256 */ -static bool __validate_tbl_checksum(struct amdgpu_ras_eeprom_control *control, - struct eeprom_table_record *records, int num) -{ - control->tbl_byte_sum = __calc_tbl_byte_sum(control, records, num); - - if (control->tbl_hdr.checksum + (control->tbl_byte_sum % 256) != 256) { - DRM_WARN("Checksum mismatch, checksum: %u ", control->tbl_hdr.checksum); - return false; - } - - return true; -} - int amdgpu_ras_eeprom_process_recods(struct amdgpu_ras_eeprom_control *control, struct eeprom_table_record *records, bool write, int num) { int i, ret = 0; - struct i2c_msg *msgs; - unsigned char *buffs; + struct i2c_msg *msgs, *msg; + unsigned char *buffs, *buff; + struct eeprom_table_record *record; struct amdgpu_device *adev = to_amdgpu_device(control); if (adev->asic_type != CHIP_VEGA20) @@ -373,9 +391,9 @@ int amdgpu_ras_eeprom_process_recods(struct amdgpu_ras_eeprom_control *control, * 256b */ for (i = 0; i < num; i++) { - unsigned char *buff = &buffs[i * (EEPROM_ADDRESS_SIZE + EEPROM_TABLE_RECORD_SIZE)]; - struct eeprom_table_record *record = &records[i]; - struct i2c_msg *msg = &msgs[i]; + buff = &buffs[i * (EEPROM_ADDRESS_SIZE + EEPROM_TABLE_RECORD_SIZE)]; + record = &records[i]; + msg = &msgs[i]; control->next_addr = __correct_eeprom_dest_address(control->next_addr); @@ -415,8 +433,8 @@ int amdgpu_ras_eeprom_process_recods(struct amdgpu_ras_eeprom_control *control, if (!write) { for (i = 0; i < num; i++) { - unsigned char *buff = &buffs[i*(EEPROM_ADDRESS_SIZE + EEPROM_TABLE_RECORD_SIZE)]; - struct eeprom_table_record *record = &records[i]; + buff = &buffs[i*(EEPROM_ADDRESS_SIZE + EEPROM_TABLE_RECORD_SIZE)]; + record = &records[i]; __decode_table_record_from_buff(control, record, buff + EEPROM_ADDRESS_SIZE); } diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_ras_eeprom.h b/drivers/gpu/drm/amd/amdgpu/amdgpu_ras_eeprom.h index 41f3fcb9a29b..622269957c1b 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_ras_eeprom.h +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_ras_eeprom.h @@ -79,6 +79,7 @@ struct eeprom_table_record { int amdgpu_ras_eeprom_init(struct amdgpu_ras_eeprom_control *control); void amdgpu_ras_eeprom_fini(struct amdgpu_ras_eeprom_control *control); +int amdgpu_ras_eeprom_reset_table(struct amdgpu_ras_eeprom_control *control); int amdgpu_ras_eeprom_process_recods(struct amdgpu_ras_eeprom_control *control, struct eeprom_table_record *records, diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_sdma.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_sdma.c index 5c13c503e61f..6010999d9020 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_sdma.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_sdma.c @@ -23,6 +23,7 @@ #include "amdgpu.h" #include "amdgpu_sdma.h" +#include "amdgpu_ras.h" #define AMDGPU_CSA_SDMA_SIZE 64 /* SDMA CSA reside in the 3rd page of CSA */ @@ -83,3 +84,101 @@ uint64_t amdgpu_sdma_get_csa_mc_addr(struct amdgpu_ring *ring, return csa_mc_addr; } + +int amdgpu_sdma_ras_late_init(struct amdgpu_device *adev, + void *ras_ih_info) +{ + int r, i; + struct ras_ih_if *ih_info = (struct ras_ih_if *)ras_ih_info; + struct ras_fs_if fs_info = { + .sysfs_name = "sdma_err_count", + .debugfs_name = "sdma_err_inject", + }; + + if (!ih_info) + return -EINVAL; + + if (!adev->sdma.ras_if) { + adev->sdma.ras_if = kmalloc(sizeof(struct ras_common_if), GFP_KERNEL); + if (!adev->sdma.ras_if) + return -ENOMEM; + adev->sdma.ras_if->block = AMDGPU_RAS_BLOCK__SDMA; + adev->sdma.ras_if->type = AMDGPU_RAS_ERROR__MULTI_UNCORRECTABLE; + adev->sdma.ras_if->sub_block_index = 0; + strcpy(adev->sdma.ras_if->name, "sdma"); + } + fs_info.head = ih_info->head = *adev->sdma.ras_if; + + r = amdgpu_ras_late_init(adev, adev->sdma.ras_if, + &fs_info, ih_info); + if (r) + goto free; + + if (amdgpu_ras_is_supported(adev, adev->sdma.ras_if->block)) { + for (i = 0; i < adev->sdma.num_instances; i++) { + r = amdgpu_irq_get(adev, &adev->sdma.ecc_irq, + AMDGPU_SDMA_IRQ_INSTANCE0 + i); + if (r) + goto late_fini; + } + } else { + r = 0; + goto free; + } + + return 0; + +late_fini: + amdgpu_ras_late_fini(adev, adev->sdma.ras_if, ih_info); +free: + kfree(adev->sdma.ras_if); + adev->sdma.ras_if = NULL; + return r; +} + +void amdgpu_sdma_ras_fini(struct amdgpu_device *adev) +{ + if (amdgpu_ras_is_supported(adev, AMDGPU_RAS_BLOCK__SDMA) && + adev->sdma.ras_if) { + struct ras_common_if *ras_if = adev->sdma.ras_if; + struct ras_ih_if ih_info = { + .head = *ras_if, + /* the cb member will not be used by + * amdgpu_ras_interrupt_remove_handler, init it only + * to cheat the check in ras_late_fini + */ + .cb = amdgpu_sdma_process_ras_data_cb, + }; + + amdgpu_ras_late_fini(adev, ras_if, &ih_info); + kfree(ras_if); + } +} + +int amdgpu_sdma_process_ras_data_cb(struct amdgpu_device *adev, + void *err_data, + struct amdgpu_iv_entry *entry) +{ + kgd2kfd_set_sram_ecc_flag(adev->kfd.dev); + amdgpu_ras_reset_gpu(adev, 0); + + return AMDGPU_RAS_SUCCESS; +} + +int amdgpu_sdma_process_ecc_irq(struct amdgpu_device *adev, + struct amdgpu_irq_src *source, + struct amdgpu_iv_entry *entry) +{ + struct ras_common_if *ras_if = adev->sdma.ras_if; + struct ras_dispatch_if ih_data = { + .entry = entry, + }; + + if (!ras_if) + return 0; + + ih_data.head = *ras_if; + + amdgpu_ras_interrupt_dispatch(adev, &ih_data); + return 0; +} diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_sdma.h b/drivers/gpu/drm/amd/amdgpu/amdgpu_sdma.h index a9ae0d8a0589..761ff8be6314 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_sdma.h +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_sdma.h @@ -104,4 +104,13 @@ struct amdgpu_sdma_instance * amdgpu_sdma_get_instance_from_ring(struct amdgpu_ring *ring); int amdgpu_sdma_get_index_from_ring(struct amdgpu_ring *ring, uint32_t *index); uint64_t amdgpu_sdma_get_csa_mc_addr(struct amdgpu_ring *ring, unsigned vmid); +int amdgpu_sdma_ras_late_init(struct amdgpu_device *adev, + void *ras_ih_info); +void amdgpu_sdma_ras_fini(struct amdgpu_device *adev); +int amdgpu_sdma_process_ras_data_cb(struct amdgpu_device *adev, + void *err_data, + struct amdgpu_iv_entry *entry); +int amdgpu_sdma_process_ecc_irq(struct amdgpu_device *adev, + struct amdgpu_irq_src *source, + struct amdgpu_iv_entry *entry); #endif diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_trace.h b/drivers/gpu/drm/amd/amdgpu/amdgpu_trace.h index 77674a7b9616..8227ebd0f511 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_trace.h +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_trace.h @@ -323,14 +323,15 @@ DEFINE_EVENT(amdgpu_vm_mapping, amdgpu_vm_bo_cs, TRACE_EVENT(amdgpu_vm_set_ptes, TP_PROTO(uint64_t pe, uint64_t addr, unsigned count, - uint32_t incr, uint64_t flags), - TP_ARGS(pe, addr, count, incr, flags), + uint32_t incr, uint64_t flags, bool direct), + TP_ARGS(pe, addr, count, incr, flags, direct), TP_STRUCT__entry( __field(u64, pe) __field(u64, addr) __field(u32, count) __field(u32, incr) __field(u64, flags) + __field(bool, direct) ), TP_fast_assign( @@ -339,28 +340,32 @@ TRACE_EVENT(amdgpu_vm_set_ptes, __entry->count = count; __entry->incr = incr; __entry->flags = flags; + __entry->direct = direct; ), - TP_printk("pe=%010Lx, addr=%010Lx, incr=%u, flags=%llx, count=%u", - __entry->pe, __entry->addr, __entry->incr, - __entry->flags, __entry->count) + TP_printk("pe=%010Lx, addr=%010Lx, incr=%u, flags=%llx, count=%u, " + "direct=%d", __entry->pe, __entry->addr, __entry->incr, + __entry->flags, __entry->count, __entry->direct) ); TRACE_EVENT(amdgpu_vm_copy_ptes, - TP_PROTO(uint64_t pe, uint64_t src, unsigned count), - TP_ARGS(pe, src, count), + TP_PROTO(uint64_t pe, uint64_t src, unsigned count, bool direct), + TP_ARGS(pe, src, count, direct), TP_STRUCT__entry( __field(u64, pe) __field(u64, src) __field(u32, count) + __field(bool, direct) ), TP_fast_assign( __entry->pe = pe; __entry->src = src; __entry->count = count; + __entry->direct = direct; ), - TP_printk("pe=%010Lx, src=%010Lx, count=%u", - __entry->pe, __entry->src, __entry->count) + TP_printk("pe=%010Lx, src=%010Lx, count=%u, direct=%d", + __entry->pe, __entry->src, __entry->count, + __entry->direct) ); TRACE_EVENT(amdgpu_vm_flush, diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_ttm.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_ttm.c index 8e867b8b432f..0976fffd3fee 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_ttm.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_ttm.c @@ -54,6 +54,7 @@ #include "amdgpu_trace.h" #include "amdgpu_amdkfd.h" #include "amdgpu_sdma.h" +#include "amdgpu_ras.h" #include "bif/bif_4_1_d.h" static int amdgpu_map_buffer(struct ttm_buffer_object *bo, @@ -1634,81 +1635,23 @@ static void amdgpu_ttm_fw_reserve_vram_fini(struct amdgpu_device *adev) */ static int amdgpu_ttm_fw_reserve_vram_init(struct amdgpu_device *adev) { - struct ttm_operation_ctx ctx = { false, false }; - struct amdgpu_bo_param bp; - int r = 0; - int i; - u64 vram_size = adev->gmc.visible_vram_size; - u64 offset = adev->fw_vram_usage.start_offset; - u64 size = adev->fw_vram_usage.size; - struct amdgpu_bo *bo; - - memset(&bp, 0, sizeof(bp)); - bp.size = adev->fw_vram_usage.size; - bp.byte_align = PAGE_SIZE; - bp.domain = AMDGPU_GEM_DOMAIN_VRAM; - bp.flags = AMDGPU_GEM_CREATE_CPU_ACCESS_REQUIRED | - AMDGPU_GEM_CREATE_VRAM_CONTIGUOUS; - bp.type = ttm_bo_type_kernel; - bp.resv = NULL; + uint64_t vram_size = adev->gmc.visible_vram_size; + adev->fw_vram_usage.va = NULL; adev->fw_vram_usage.reserved_bo = NULL; - if (adev->fw_vram_usage.size > 0 && - adev->fw_vram_usage.size <= vram_size) { - - r = amdgpu_bo_create(adev, &bp, - &adev->fw_vram_usage.reserved_bo); - if (r) - goto error_create; - - r = amdgpu_bo_reserve(adev->fw_vram_usage.reserved_bo, false); - if (r) - goto error_reserve; - - /* remove the original mem node and create a new one at the - * request position - */ - bo = adev->fw_vram_usage.reserved_bo; - offset = ALIGN(offset, PAGE_SIZE); - for (i = 0; i < bo->placement.num_placement; ++i) { - bo->placements[i].fpfn = offset >> PAGE_SHIFT; - bo->placements[i].lpfn = (offset + size) >> PAGE_SHIFT; - } - - ttm_bo_mem_put(&bo->tbo, &bo->tbo.mem); - r = ttm_bo_mem_space(&bo->tbo, &bo->placement, - &bo->tbo.mem, &ctx); - if (r) - goto error_pin; - - r = amdgpu_bo_pin_restricted(adev->fw_vram_usage.reserved_bo, - AMDGPU_GEM_DOMAIN_VRAM, - adev->fw_vram_usage.start_offset, - (adev->fw_vram_usage.start_offset + - adev->fw_vram_usage.size)); - if (r) - goto error_pin; - r = amdgpu_bo_kmap(adev->fw_vram_usage.reserved_bo, - &adev->fw_vram_usage.va); - if (r) - goto error_kmap; - - amdgpu_bo_unreserve(adev->fw_vram_usage.reserved_bo); - } - return r; + if (adev->fw_vram_usage.size == 0 || + adev->fw_vram_usage.size > vram_size) + return 0; -error_kmap: - amdgpu_bo_unpin(adev->fw_vram_usage.reserved_bo); -error_pin: - amdgpu_bo_unreserve(adev->fw_vram_usage.reserved_bo); -error_reserve: - amdgpu_bo_unref(&adev->fw_vram_usage.reserved_bo); -error_create: - adev->fw_vram_usage.va = NULL; - adev->fw_vram_usage.reserved_bo = NULL; - return r; + return amdgpu_bo_create_kernel_at(adev, + adev->fw_vram_usage.start_offset, + adev->fw_vram_usage.size, + AMDGPU_GEM_DOMAIN_VRAM, + &adev->fw_vram_usage.reserved_bo, + &adev->fw_vram_usage.va); } + /** * amdgpu_ttm_init - Init the memory management (ttm) as well as various * gtt/vram related fields. @@ -1764,6 +1707,17 @@ int amdgpu_ttm_init(struct amdgpu_device *adev) #endif /* + * retired pages will be loaded from eeprom and reserved here, + * it should be called after ttm init since new bo may be created, + * recovery_init may fail, but it can free all resources allocated by + * itself and its failure should not stop amdgpu init process. + * + * Note: theoretically, this should be called before all vram allocations + * to protect retired page from abusing + */ + amdgpu_ras_recovery_init(adev); + + /* *The reserved vram for firmware must be pinned to the specified *place on the VRAM, so reserve it early. */ diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_ucode.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_ucode.c index 3a6115ad0196..833fc4b68940 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_ucode.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_ucode.c @@ -360,6 +360,7 @@ amdgpu_ucode_get_load_type(struct amdgpu_device *adev, int load_type) case CHIP_RAVEN: case CHIP_VEGA12: case CHIP_VEGA20: + case CHIP_ARCTURUS: case CHIP_RENOIR: case CHIP_NAVI10: case CHIP_NAVI14: @@ -368,8 +369,6 @@ amdgpu_ucode_get_load_type(struct amdgpu_device *adev, int load_type) return AMDGPU_FW_LOAD_DIRECT; else return AMDGPU_FW_LOAD_PSP; - case CHIP_ARCTURUS: - return AMDGPU_FW_LOAD_DIRECT; default: DRM_ERROR("Unknown firmware load type\n"); diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_ucode.h b/drivers/gpu/drm/amd/amdgpu/amdgpu_ucode.h index b34f00d42049..410587b950f3 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_ucode.h +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_ucode.h @@ -108,6 +108,12 @@ struct ta_firmware_header_v1_0 { uint32_t ta_ras_ucode_version; uint32_t ta_ras_offset_bytes; uint32_t ta_ras_size_bytes; + uint32_t ta_hdcp_ucode_version; + uint32_t ta_hdcp_offset_bytes; + uint32_t ta_hdcp_size_bytes; + uint32_t ta_dtm_ucode_version; + uint32_t ta_dtm_offset_bytes; + uint32_t ta_dtm_size_bytes; }; /* version_major=1, version_minor=0 */ diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_umc.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_umc.c new file mode 100644 index 000000000000..d4fb9cf27e21 --- /dev/null +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_umc.c @@ -0,0 +1,158 @@ +/* + * Copyright 2019 Advanced Micro Devices, Inc. + * + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and associated documentation files (the "Software"), + * to deal in the Software without restriction, including without limitation + * the rights to use, copy, modify, merge, publish, distribute, sublicense, + * and/or sell copies of the Software, and to permit persons to whom the + * Software is furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included in + * all copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL + * THE COPYRIGHT HOLDER(S) OR AUTHOR(S) BE LIABLE FOR ANY CLAIM, DAMAGES OR + * OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, + * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR + * OTHER DEALINGS IN THE SOFTWARE. + * + */ + +#include "amdgpu_ras.h" + +int amdgpu_umc_ras_late_init(struct amdgpu_device *adev) +{ + int r; + struct ras_fs_if fs_info = { + .sysfs_name = "umc_err_count", + .debugfs_name = "umc_err_inject", + }; + struct ras_ih_if ih_info = { + .cb = amdgpu_umc_process_ras_data_cb, + }; + + if (!adev->umc.ras_if) { + adev->umc.ras_if = + kmalloc(sizeof(struct ras_common_if), GFP_KERNEL); + if (!adev->umc.ras_if) + return -ENOMEM; + adev->umc.ras_if->block = AMDGPU_RAS_BLOCK__UMC; + adev->umc.ras_if->type = AMDGPU_RAS_ERROR__MULTI_UNCORRECTABLE; + adev->umc.ras_if->sub_block_index = 0; + strcpy(adev->umc.ras_if->name, "umc"); + } + ih_info.head = fs_info.head = *adev->umc.ras_if; + + r = amdgpu_ras_late_init(adev, adev->umc.ras_if, + &fs_info, &ih_info); + if (r) + goto free; + + if (amdgpu_ras_is_supported(adev, adev->umc.ras_if->block)) { + r = amdgpu_irq_get(adev, &adev->gmc.ecc_irq, 0); + if (r) + goto late_fini; + } else { + r = 0; + goto free; + } + + /* ras init of specific umc version */ + if (adev->umc.funcs && adev->umc.funcs->err_cnt_init) + adev->umc.funcs->err_cnt_init(adev); + + return 0; + +late_fini: + amdgpu_ras_late_fini(adev, adev->umc.ras_if, &ih_info); +free: + kfree(adev->umc.ras_if); + adev->umc.ras_if = NULL; + return r; +} + +void amdgpu_umc_ras_fini(struct amdgpu_device *adev) +{ + if (amdgpu_ras_is_supported(adev, AMDGPU_RAS_BLOCK__UMC) && + adev->umc.ras_if) { + struct ras_common_if *ras_if = adev->umc.ras_if; + struct ras_ih_if ih_info = { + .head = *ras_if, + .cb = amdgpu_umc_process_ras_data_cb, + }; + + amdgpu_ras_late_fini(adev, ras_if, &ih_info); + kfree(ras_if); + } +} + +int amdgpu_umc_process_ras_data_cb(struct amdgpu_device *adev, + void *ras_error_status, + struct amdgpu_iv_entry *entry) +{ + struct ras_err_data *err_data = (struct ras_err_data *)ras_error_status; + + /* When “Full RAS” is enabled, the per-IP interrupt sources should + * be disabled and the driver should only look for the aggregated + * interrupt via sync flood + */ + if (amdgpu_ras_is_supported(adev, AMDGPU_RAS_BLOCK__GFX)) + return AMDGPU_RAS_SUCCESS; + + kgd2kfd_set_sram_ecc_flag(adev->kfd.dev); + if (adev->umc.funcs && + adev->umc.funcs->query_ras_error_count) + adev->umc.funcs->query_ras_error_count(adev, ras_error_status); + + if (adev->umc.funcs && + adev->umc.funcs->query_ras_error_address && + adev->umc.max_ras_err_cnt_per_query) { + err_data->err_addr = + kcalloc(adev->umc.max_ras_err_cnt_per_query, + sizeof(struct eeprom_table_record), GFP_KERNEL); + /* still call query_ras_error_address to clear error status + * even NOMEM error is encountered + */ + if(!err_data->err_addr) + DRM_WARN("Failed to alloc memory for umc error address record!\n"); + + /* umc query_ras_error_address is also responsible for clearing + * error status + */ + adev->umc.funcs->query_ras_error_address(adev, ras_error_status); + } + + /* only uncorrectable error needs gpu reset */ + if (err_data->ue_count) { + if (err_data->err_addr_cnt && + amdgpu_ras_add_bad_pages(adev, err_data->err_addr, + err_data->err_addr_cnt)) + DRM_WARN("Failed to add ras bad page!\n"); + + amdgpu_ras_reset_gpu(adev, 0); + } + + kfree(err_data->err_addr); + return AMDGPU_RAS_SUCCESS; +} + +int amdgpu_umc_process_ecc_irq(struct amdgpu_device *adev, + struct amdgpu_irq_src *source, + struct amdgpu_iv_entry *entry) +{ + struct ras_common_if *ras_if = adev->umc.ras_if; + struct ras_dispatch_if ih_data = { + .entry = entry, + }; + + if (!ras_if) + return 0; + + ih_data.head = *ras_if; + + amdgpu_ras_interrupt_dispatch(adev, &ih_data); + return 0; +} diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_umc.h b/drivers/gpu/drm/amd/amdgpu/amdgpu_umc.h index 975afa04df09..3283032a78e5 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_umc.h +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_umc.h @@ -54,7 +54,8 @@ adev->umc.funcs->disable_umc_index_mode(adev); struct amdgpu_umc_funcs { - void (*ras_init)(struct amdgpu_device *adev); + void (*err_cnt_init)(struct amdgpu_device *adev); + int (*ras_late_init)(struct amdgpu_device *adev); void (*query_ras_error_count)(struct amdgpu_device *adev, void *ras_error_status); void (*query_ras_error_address)(struct amdgpu_device *adev, @@ -62,6 +63,7 @@ struct amdgpu_umc_funcs { void (*enable_umc_index_mode)(struct amdgpu_device *adev, uint32_t umc_instance); void (*disable_umc_index_mode)(struct amdgpu_device *adev); + void (*init_registers)(struct amdgpu_device *adev); }; struct amdgpu_umc { @@ -75,8 +77,17 @@ struct amdgpu_umc { uint32_t channel_offs; /* channel index table of interleaved memory */ const uint32_t *channel_idx_tbl; + struct ras_common_if *ras_if; const struct amdgpu_umc_funcs *funcs; }; +int amdgpu_umc_ras_late_init(struct amdgpu_device *adev); +void amdgpu_umc_ras_fini(struct amdgpu_device *adev); +int amdgpu_umc_process_ras_data_cb(struct amdgpu_device *adev, + void *ras_error_status, + struct amdgpu_iv_entry *entry); +int amdgpu_umc_process_ecc_irq(struct amdgpu_device *adev, + struct amdgpu_irq_src *source, + struct amdgpu_iv_entry *entry); #endif diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_vm.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_vm.c index 5251352f5922..be10e4b9a94d 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_vm.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_vm.c @@ -130,7 +130,8 @@ static unsigned amdgpu_vm_num_entries(struct amdgpu_device *adev, if (level == adev->vm_manager.root_level) /* For the root directory */ - return round_up(adev->vm_manager.max_pfn, 1ULL << shift) >> shift; + return round_up(adev->vm_manager.max_pfn, 1ULL << shift) + >> shift; else if (level != AMDGPU_VM_PTB) /* Everything in between */ return 512; @@ -341,7 +342,7 @@ static struct amdgpu_vm_pt *amdgpu_vm_pt_parent(struct amdgpu_vm_pt *pt) return container_of(parent->vm_bo, struct amdgpu_vm_pt, base); } -/** +/* * amdgpu_vm_pt_cursor - state for for_each_amdgpu_vm_pt */ struct amdgpu_vm_pt_cursor { @@ -482,6 +483,7 @@ static void amdgpu_vm_pt_next(struct amdgpu_device *adev, * * @adev: amdgpu_device structure * @vm: amdgpu_vm structure + * @start: optional cursor to start with * @cursor: state to initialize * * Starts a deep first traversal of the PD/PT tree. @@ -535,7 +537,7 @@ static void amdgpu_vm_pt_next_dfs(struct amdgpu_device *adev, amdgpu_vm_pt_ancestor(cursor); } -/** +/* * for_each_amdgpu_vm_pt_dfs_safe - safe deep first search of all PDs/PTs */ #define for_each_amdgpu_vm_pt_dfs_safe(adev, vm, start, cursor, entry) \ @@ -566,6 +568,14 @@ void amdgpu_vm_get_pd_bo(struct amdgpu_vm *vm, list_add(&entry->tv.head, validated); } +/** + * amdgpu_vm_del_from_lru_notify - update bulk_moveable flag + * + * @bo: BO which was removed from the LRU + * + * Make sure the bulk_moveable flag is updated when a BO is removed from the + * LRU. + */ void amdgpu_vm_del_from_lru_notify(struct ttm_buffer_object *bo) { struct amdgpu_bo *abo; @@ -693,6 +703,7 @@ bool amdgpu_vm_ready(struct amdgpu_vm *vm) * @adev: amdgpu_device pointer * @vm: VM to clear BO from * @bo: BO to clear + * @direct: use a direct update * * Root PD needs to be reserved when calling this. * @@ -701,7 +712,8 @@ bool amdgpu_vm_ready(struct amdgpu_vm *vm) */ static int amdgpu_vm_clear_bo(struct amdgpu_device *adev, struct amdgpu_vm *vm, - struct amdgpu_bo *bo) + struct amdgpu_bo *bo, + bool direct) { struct ttm_operation_ctx ctx = { true, false }; unsigned level = adev->vm_manager.root_level; @@ -760,6 +772,7 @@ static int amdgpu_vm_clear_bo(struct amdgpu_device *adev, memset(¶ms, 0, sizeof(params)); params.adev = adev; params.vm = vm; + params.direct = direct; r = vm->update_funcs->prepare(¶ms, AMDGPU_FENCE_OWNER_KFD, NULL); if (r) @@ -813,10 +826,13 @@ static int amdgpu_vm_clear_bo(struct amdgpu_device *adev, * * @adev: amdgpu_device pointer * @vm: requesting vm + * @level: the page table level + * @direct: use a direct update * @bp: resulting BO allocation parameters */ static void amdgpu_vm_bo_param(struct amdgpu_device *adev, struct amdgpu_vm *vm, - int level, struct amdgpu_bo_param *bp) + int level, bool direct, + struct amdgpu_bo_param *bp) { memset(bp, 0, sizeof(*bp)); @@ -831,6 +847,7 @@ static void amdgpu_vm_bo_param(struct amdgpu_device *adev, struct amdgpu_vm *vm, else if (!vm->root.base.bo || vm->root.base.bo->shadow) bp->flags |= AMDGPU_GEM_CREATE_SHADOW; bp->type = ttm_bo_type_kernel; + bp->no_wait_gpu = direct; if (vm->root.base.bo) bp->resv = vm->root.base.bo->tbo.base.resv; } @@ -841,6 +858,7 @@ static void amdgpu_vm_bo_param(struct amdgpu_device *adev, struct amdgpu_vm *vm, * @adev: amdgpu_device pointer * @vm: VM to allocate page tables for * @cursor: Which page table to allocate + * @direct: use a direct update * * Make sure a specific page table or directory is allocated. * @@ -850,7 +868,8 @@ static void amdgpu_vm_bo_param(struct amdgpu_device *adev, struct amdgpu_vm *vm, */ static int amdgpu_vm_alloc_pts(struct amdgpu_device *adev, struct amdgpu_vm *vm, - struct amdgpu_vm_pt_cursor *cursor) + struct amdgpu_vm_pt_cursor *cursor, + bool direct) { struct amdgpu_vm_pt *entry = cursor->entry; struct amdgpu_bo_param bp; @@ -871,7 +890,7 @@ static int amdgpu_vm_alloc_pts(struct amdgpu_device *adev, if (entry->base.bo) return 0; - amdgpu_vm_bo_param(adev, vm, cursor->level, &bp); + amdgpu_vm_bo_param(adev, vm, cursor->level, direct, &bp); r = amdgpu_bo_create(adev, &bp, &pt); if (r) @@ -883,7 +902,7 @@ static int amdgpu_vm_alloc_pts(struct amdgpu_device *adev, pt->parent = amdgpu_bo_ref(cursor->parent->base.bo); amdgpu_vm_bo_base_init(&entry->base, vm, pt); - r = amdgpu_vm_clear_bo(adev, vm, pt); + r = amdgpu_vm_clear_bo(adev, vm, pt, direct); if (r) goto error_free_pt; @@ -1020,7 +1039,8 @@ bool amdgpu_vm_need_pipeline_sync(struct amdgpu_ring *ring, * Returns: * 0 on success, errno otherwise. */ -int amdgpu_vm_flush(struct amdgpu_ring *ring, struct amdgpu_job *job, bool need_pipe_sync) +int amdgpu_vm_flush(struct amdgpu_ring *ring, struct amdgpu_job *job, + bool need_pipe_sync) { struct amdgpu_device *adev = ring->adev; unsigned vmhub = ring->funcs->vmhub; @@ -1034,10 +1054,8 @@ int amdgpu_vm_flush(struct amdgpu_ring *ring, struct amdgpu_job *job, bool need_ id->oa_base != job->oa_base || id->oa_size != job->oa_size); bool vm_flush_needed = job->vm_needs_flush; - bool pasid_mapping_needed = id->pasid != job->pasid || - !id->pasid_mapping || - !dma_fence_is_signaled(id->pasid_mapping); struct dma_fence *fence = NULL; + bool pasid_mapping_needed = false; unsigned patch_offset = 0; int r; @@ -1047,6 +1065,12 @@ int amdgpu_vm_flush(struct amdgpu_ring *ring, struct amdgpu_job *job, bool need_ pasid_mapping_needed = true; } + mutex_lock(&id_mgr->lock); + if (id->pasid != job->pasid || !id->pasid_mapping || + !dma_fence_is_signaled(id->pasid_mapping)) + pasid_mapping_needed = true; + mutex_unlock(&id_mgr->lock); + gds_switch_needed &= !!ring->funcs->emit_gds_switch; vm_flush_needed &= !!ring->funcs->emit_vm_flush && job->vm_pd_addr != AMDGPU_BO_INVALID_OFFSET; @@ -1086,9 +1110,11 @@ int amdgpu_vm_flush(struct amdgpu_ring *ring, struct amdgpu_job *job, bool need_ } if (pasid_mapping_needed) { + mutex_lock(&id_mgr->lock); id->pasid = job->pasid; dma_fence_put(id->pasid_mapping); id->pasid_mapping = dma_fence_get(fence); + mutex_unlock(&id_mgr->lock); } dma_fence_put(fence); @@ -1172,10 +1198,10 @@ uint64_t amdgpu_vm_map_gart(const dma_addr_t *pages_addr, uint64_t addr) return result; } -/* +/** * amdgpu_vm_update_pde - update a single level in the hierarchy * - * @param: parameters for the update + * @params: parameters for the update * @vm: requested vm * @entry: entry to update * @@ -1199,7 +1225,7 @@ static int amdgpu_vm_update_pde(struct amdgpu_vm_update_params *params, return vm->update_funcs->update(params, bo, pde, pt, 1, 0, flags); } -/* +/** * amdgpu_vm_invalidate_pds - mark all PDs as invalid * * @adev: amdgpu_device pointer @@ -1218,19 +1244,20 @@ static void amdgpu_vm_invalidate_pds(struct amdgpu_device *adev, amdgpu_vm_bo_relocated(&entry->base); } -/* - * amdgpu_vm_update_directories - make sure that all directories are valid +/** + * amdgpu_vm_update_pdes - make sure that all directories are valid * * @adev: amdgpu_device pointer * @vm: requested vm + * @direct: submit directly to the paging queue * * Makes sure all directories are up to date. * * Returns: * 0 for success, error for failure. */ -int amdgpu_vm_update_directories(struct amdgpu_device *adev, - struct amdgpu_vm *vm) +int amdgpu_vm_update_pdes(struct amdgpu_device *adev, + struct amdgpu_vm *vm, bool direct) { struct amdgpu_vm_update_params params; int r; @@ -1241,6 +1268,7 @@ int amdgpu_vm_update_directories(struct amdgpu_device *adev, memset(¶ms, 0, sizeof(params)); params.adev = adev; params.vm = vm; + params.direct = direct; r = vm->update_funcs->prepare(¶ms, AMDGPU_FENCE_OWNER_VM, NULL); if (r) @@ -1268,7 +1296,7 @@ error: return r; } -/** +/* * amdgpu_vm_update_flags - figure out flags for PTE updates * * Make sure to set the right flags for the PTEs at the desired level. @@ -1391,7 +1419,8 @@ static int amdgpu_vm_update_ptes(struct amdgpu_vm_update_params *params, uint64_t incr, entry_end, pe_start; struct amdgpu_bo *pt; - r = amdgpu_vm_alloc_pts(params->adev, params->vm, &cursor); + r = amdgpu_vm_alloc_pts(params->adev, params->vm, &cursor, + params->direct); if (r) return r; @@ -1482,13 +1511,14 @@ static int amdgpu_vm_update_ptes(struct amdgpu_vm_update_params *params, * amdgpu_vm_bo_update_mapping - update a mapping in the vm page table * * @adev: amdgpu_device pointer - * @exclusive: fence we need to sync to - * @pages_addr: DMA addresses to use for mapping * @vm: requested vm + * @direct: direct submission in a page fault + * @exclusive: fence we need to sync to * @start: start of mapped range * @last: last mapped entry * @flags: flags for the entries * @addr: addr to set the area to + * @pages_addr: DMA addresses to use for mapping * @fence: optional resulting fence * * Fill in the page table entries between @start and @last. @@ -1497,11 +1527,11 @@ static int amdgpu_vm_update_ptes(struct amdgpu_vm_update_params *params, * 0 for success, -EINVAL for failure. */ static int amdgpu_vm_bo_update_mapping(struct amdgpu_device *adev, + struct amdgpu_vm *vm, bool direct, struct dma_fence *exclusive, - dma_addr_t *pages_addr, - struct amdgpu_vm *vm, uint64_t start, uint64_t last, uint64_t flags, uint64_t addr, + dma_addr_t *pages_addr, struct dma_fence **fence) { struct amdgpu_vm_update_params params; @@ -1511,6 +1541,7 @@ static int amdgpu_vm_bo_update_mapping(struct amdgpu_device *adev, memset(¶ms, 0, sizeof(params)); params.adev = adev; params.vm = vm; + params.direct = direct; params.pages_addr = pages_addr; /* sync to everything except eviction fences on unmapping */ @@ -1569,27 +1600,8 @@ static int amdgpu_vm_bo_split_mapping(struct amdgpu_device *adev, if (!(mapping->flags & AMDGPU_PTE_WRITEABLE)) flags &= ~AMDGPU_PTE_WRITEABLE; - flags &= ~AMDGPU_PTE_EXECUTABLE; - flags |= mapping->flags & AMDGPU_PTE_EXECUTABLE; - - if (adev->asic_type >= CHIP_NAVI10) { - flags &= ~AMDGPU_PTE_MTYPE_NV10_MASK; - flags |= (mapping->flags & AMDGPU_PTE_MTYPE_NV10_MASK); - } else { - flags &= ~AMDGPU_PTE_MTYPE_VG10_MASK; - flags |= (mapping->flags & AMDGPU_PTE_MTYPE_VG10_MASK); - } - - if ((mapping->flags & AMDGPU_PTE_PRT) && - (adev->asic_type >= CHIP_VEGA10)) { - flags |= AMDGPU_PTE_PRT; - if (adev->asic_type >= CHIP_NAVI10) { - flags |= AMDGPU_PTE_SNOOPED; - flags |= AMDGPU_PTE_LOG; - flags |= AMDGPU_PTE_SYSTEM; - } - flags &= ~AMDGPU_PTE_VALID; - } + /* Apply ASIC specific mapping flags */ + amdgpu_gmc_get_vm_pte(adev, mapping, &flags); trace_amdgpu_vm_bo_update(mapping); @@ -1633,7 +1645,8 @@ static int amdgpu_vm_bo_split_mapping(struct amdgpu_device *adev, dma_addr = pages_addr; } else { addr = pages_addr[pfn]; - max_entries = count * AMDGPU_GPU_PAGES_IN_CPU_PAGE; + max_entries = count * + AMDGPU_GPU_PAGES_IN_CPU_PAGE; } } else if (flags & AMDGPU_PTE_VALID) { @@ -1642,9 +1655,9 @@ static int amdgpu_vm_bo_split_mapping(struct amdgpu_device *adev, } last = min((uint64_t)mapping->last, start + max_entries - 1); - r = amdgpu_vm_bo_update_mapping(adev, exclusive, dma_addr, vm, + r = amdgpu_vm_bo_update_mapping(adev, vm, false, exclusive, start, last, flags, addr, - fence); + dma_addr, fence); if (r) return r; @@ -1672,8 +1685,7 @@ static int amdgpu_vm_bo_split_mapping(struct amdgpu_device *adev, * Returns: * 0 for success, -EINVAL for failure. */ -int amdgpu_vm_bo_update(struct amdgpu_device *adev, - struct amdgpu_bo_va *bo_va, +int amdgpu_vm_bo_update(struct amdgpu_device *adev, struct amdgpu_bo_va *bo_va, bool clear) { struct amdgpu_bo *bo = bo_va->base.bo; @@ -1700,7 +1712,7 @@ int amdgpu_vm_bo_update(struct amdgpu_device *adev, ttm = container_of(bo->tbo.ttm, struct ttm_dma_tt, ttm); pages_addr = ttm->dma_address; } - exclusive = dma_resv_get_excl(bo->tbo.base.resv); + exclusive = bo->tbo.moving; } if (bo) { @@ -1731,12 +1743,6 @@ int amdgpu_vm_bo_update(struct amdgpu_device *adev, return r; } - if (vm->use_cpu_for_update) { - /* Flush HDP */ - mb(); - amdgpu_asic_flush_hdp(adev, NULL); - } - /* If the BO is not in its preferred location add it back to * the evicted list so that it gets validated again on the * next command submission. @@ -1744,7 +1750,8 @@ int amdgpu_vm_bo_update(struct amdgpu_device *adev, if (bo && bo->tbo.base.resv == vm->root.base.bo->tbo.base.resv) { uint32_t mem_type = bo->tbo.mem.mem_type; - if (!(bo->preferred_domains & amdgpu_mem_type_to_domain(mem_type))) + if (!(bo->preferred_domains & + amdgpu_mem_type_to_domain(mem_type))) amdgpu_vm_bo_evicted(&bo_va->base); else amdgpu_vm_bo_idle(&bo_va->base); @@ -1938,9 +1945,9 @@ int amdgpu_vm_clear_freed(struct amdgpu_device *adev, mapping->start < AMDGPU_GMC_HOLE_START) init_pte_value = AMDGPU_PTE_DEFAULT_ATC; - r = amdgpu_vm_bo_update_mapping(adev, NULL, NULL, vm, + r = amdgpu_vm_bo_update_mapping(adev, vm, false, NULL, mapping->start, mapping->last, - init_pte_value, 0, &f); + init_pte_value, 0, NULL, &f); amdgpu_vm_free_mapping(adev, vm, mapping, f); if (r) { dma_fence_put(f); @@ -2682,12 +2689,17 @@ int amdgpu_vm_init(struct amdgpu_device *adev, struct amdgpu_vm *vm, spin_lock_init(&vm->invalidated_lock); INIT_LIST_HEAD(&vm->freed); - /* create scheduler entity for page table updates */ - r = drm_sched_entity_init(&vm->entity, adev->vm_manager.vm_pte_rqs, + /* create scheduler entities for page table updates */ + r = drm_sched_entity_init(&vm->direct, adev->vm_manager.vm_pte_rqs, adev->vm_manager.vm_pte_num_rqs, NULL); if (r) return r; + r = drm_sched_entity_init(&vm->delayed, adev->vm_manager.vm_pte_rqs, + adev->vm_manager.vm_pte_num_rqs, NULL); + if (r) + goto error_free_direct; + vm->pte_support_ats = false; if (vm_context == AMDGPU_VM_CONTEXT_COMPUTE) { @@ -2702,7 +2714,8 @@ int amdgpu_vm_init(struct amdgpu_device *adev, struct amdgpu_vm *vm, } DRM_DEBUG_DRIVER("VM update mode is %s\n", vm->use_cpu_for_update ? "CPU" : "SDMA"); - WARN_ONCE((vm->use_cpu_for_update && !amdgpu_gmc_vram_full_visible(&adev->gmc)), + WARN_ONCE((vm->use_cpu_for_update && + !amdgpu_gmc_vram_full_visible(&adev->gmc)), "CPU update of VM recommended only for large BAR system\n"); if (vm->use_cpu_for_update) @@ -2711,12 +2724,12 @@ int amdgpu_vm_init(struct amdgpu_device *adev, struct amdgpu_vm *vm, vm->update_funcs = &amdgpu_vm_sdma_funcs; vm->last_update = NULL; - amdgpu_vm_bo_param(adev, vm, adev->vm_manager.root_level, &bp); + amdgpu_vm_bo_param(adev, vm, adev->vm_manager.root_level, false, &bp); if (vm_context == AMDGPU_VM_CONTEXT_COMPUTE) bp.flags &= ~AMDGPU_GEM_CREATE_SHADOW; r = amdgpu_bo_create(adev, &bp, &root); if (r) - goto error_free_sched_entity; + goto error_free_delayed; r = amdgpu_bo_reserve(root, true); if (r) @@ -2728,7 +2741,7 @@ int amdgpu_vm_init(struct amdgpu_device *adev, struct amdgpu_vm *vm, amdgpu_vm_bo_base_init(&vm->root.base, vm, root); - r = amdgpu_vm_clear_bo(adev, vm, root); + r = amdgpu_vm_clear_bo(adev, vm, root, false); if (r) goto error_unreserve; @@ -2759,8 +2772,11 @@ error_free_root: amdgpu_bo_unref(&vm->root.base.bo); vm->root.base.bo = NULL; -error_free_sched_entity: - drm_sched_entity_destroy(&vm->entity); +error_free_delayed: + drm_sched_entity_destroy(&vm->delayed); + +error_free_direct: + drm_sched_entity_destroy(&vm->direct); return r; } @@ -2801,6 +2817,7 @@ static int amdgpu_vm_check_clean_reserved(struct amdgpu_device *adev, * * @adev: amdgpu_device pointer * @vm: requested vm + * @pasid: pasid to use * * This only works on GFX VMs that don't have any BOs added and no * page tables allocated yet. @@ -2816,7 +2833,8 @@ static int amdgpu_vm_check_clean_reserved(struct amdgpu_device *adev, * Returns: * 0 for success, -errno for errors. */ -int amdgpu_vm_make_compute(struct amdgpu_device *adev, struct amdgpu_vm *vm, unsigned int pasid) +int amdgpu_vm_make_compute(struct amdgpu_device *adev, struct amdgpu_vm *vm, + unsigned int pasid) { bool pte_support_ats = (adev->asic_type == CHIP_RAVEN); int r; @@ -2848,7 +2866,7 @@ int amdgpu_vm_make_compute(struct amdgpu_device *adev, struct amdgpu_vm *vm, uns */ if (pte_support_ats != vm->pte_support_ats) { vm->pte_support_ats = pte_support_ats; - r = amdgpu_vm_clear_bo(adev, vm, vm->root.base.bo); + r = amdgpu_vm_clear_bo(adev, vm, vm->root.base.bo, false); if (r) goto free_idr; } @@ -2858,7 +2876,8 @@ int amdgpu_vm_make_compute(struct amdgpu_device *adev, struct amdgpu_vm *vm, uns AMDGPU_VM_USE_CPU_FOR_COMPUTE); DRM_DEBUG_DRIVER("VM update mode is %s\n", vm->use_cpu_for_update ? "CPU" : "SDMA"); - WARN_ONCE((vm->use_cpu_for_update && !amdgpu_gmc_vram_full_visible(&adev->gmc)), + WARN_ONCE((vm->use_cpu_for_update && + !amdgpu_gmc_vram_full_visible(&adev->gmc)), "CPU update of VM recommended only for large BAR system\n"); if (vm->use_cpu_for_update) @@ -2937,19 +2956,28 @@ void amdgpu_vm_fini(struct amdgpu_device *adev, struct amdgpu_vm *vm) struct amdgpu_bo_va_mapping *mapping, *tmp; bool prt_fini_needed = !!adev->gmc.gmc_funcs->set_prt; struct amdgpu_bo *root; - int i, r; + int i; amdgpu_amdkfd_gpuvm_destroy_cb(adev, vm); + root = amdgpu_bo_ref(vm->root.base.bo); + amdgpu_bo_reserve(root, true); if (vm->pasid) { unsigned long flags; spin_lock_irqsave(&adev->vm_manager.pasid_lock, flags); idr_remove(&adev->vm_manager.pasid_idr, vm->pasid); spin_unlock_irqrestore(&adev->vm_manager.pasid_lock, flags); + vm->pasid = 0; } - drm_sched_entity_destroy(&vm->entity); + amdgpu_vm_free_pts(adev, vm, NULL); + amdgpu_bo_unreserve(root); + amdgpu_bo_unref(&root); + WARN_ON(vm->root.base.bo); + + drm_sched_entity_destroy(&vm->direct); + drm_sched_entity_destroy(&vm->delayed); if (!RB_EMPTY_ROOT(&vm->va.rb_root)) { dev_err(adev->dev, "still active bo inside vm\n"); @@ -2972,16 +3000,6 @@ void amdgpu_vm_fini(struct amdgpu_device *adev, struct amdgpu_vm *vm) amdgpu_vm_free_mapping(adev, vm, mapping, NULL); } - root = amdgpu_bo_ref(vm->root.base.bo); - r = amdgpu_bo_reserve(root, true); - if (r) { - dev_err(adev->dev, "Leaking page tables because BO reservation failed\n"); - } else { - amdgpu_vm_free_pts(adev, vm, NULL); - amdgpu_bo_unreserve(root); - } - amdgpu_bo_unref(&root); - WARN_ON(vm->root.base.bo); dma_fence_put(vm->last_update); for (i = 0; i < AMDGPU_MAX_VMHUBS; i++) amdgpu_vmid_free_reserved(adev, vm, i); @@ -3065,8 +3083,9 @@ int amdgpu_vm_ioctl(struct drm_device *dev, void *data, struct drm_file *filp) switch (args->in.op) { case AMDGPU_VM_OP_RESERVE_VMID: - /* current, we only have requirement to reserve vmid from gfxhub */ - r = amdgpu_vmid_alloc_reserved(adev, &fpriv->vm, AMDGPU_GFXHUB_0); + /* We only have requirement to reserve vmid from gfxhub */ + r = amdgpu_vmid_alloc_reserved(adev, &fpriv->vm, + AMDGPU_GFXHUB_0); if (r) return r; break; @@ -3109,13 +3128,88 @@ void amdgpu_vm_get_task_info(struct amdgpu_device *adev, unsigned int pasid, */ void amdgpu_vm_set_task_info(struct amdgpu_vm *vm) { - if (!vm->task_info.pid) { - vm->task_info.pid = current->pid; - get_task_comm(vm->task_info.task_name, current); + if (vm->task_info.pid) + return; - if (current->group_leader->mm == current->mm) { - vm->task_info.tgid = current->group_leader->pid; - get_task_comm(vm->task_info.process_name, current->group_leader); - } + vm->task_info.pid = current->pid; + get_task_comm(vm->task_info.task_name, current); + + if (current->group_leader->mm != current->mm) + return; + + vm->task_info.tgid = current->group_leader->pid; + get_task_comm(vm->task_info.process_name, current->group_leader); +} + +/** + * amdgpu_vm_handle_fault - graceful handling of VM faults. + * @adev: amdgpu device pointer + * @pasid: PASID of the VM + * @addr: Address of the fault + * + * Try to gracefully handle a VM fault. Return true if the fault was handled and + * shouldn't be reported any more. + */ +bool amdgpu_vm_handle_fault(struct amdgpu_device *adev, unsigned int pasid, + uint64_t addr) +{ + struct amdgpu_bo *root; + uint64_t value, flags; + struct amdgpu_vm *vm; + long r; + + spin_lock(&adev->vm_manager.pasid_lock); + vm = idr_find(&adev->vm_manager.pasid_idr, pasid); + if (vm) + root = amdgpu_bo_ref(vm->root.base.bo); + else + root = NULL; + spin_unlock(&adev->vm_manager.pasid_lock); + + if (!root) + return false; + + r = amdgpu_bo_reserve(root, true); + if (r) + goto error_unref; + + /* Double check that the VM still exists */ + spin_lock(&adev->vm_manager.pasid_lock); + vm = idr_find(&adev->vm_manager.pasid_idr, pasid); + if (vm && vm->root.base.bo != root) + vm = NULL; + spin_unlock(&adev->vm_manager.pasid_lock); + if (!vm) + goto error_unlock; + + addr /= AMDGPU_GPU_PAGE_SIZE; + flags = AMDGPU_PTE_VALID | AMDGPU_PTE_SNOOPED | + AMDGPU_PTE_SYSTEM; + + if (amdgpu_vm_fault_stop == AMDGPU_VM_FAULT_STOP_NEVER) { + /* Redirect the access to the dummy page */ + value = adev->dummy_page_addr; + flags |= AMDGPU_PTE_EXECUTABLE | AMDGPU_PTE_READABLE | + AMDGPU_PTE_WRITEABLE; + } else { + /* Let the hw retry silently on the PTE */ + value = 0; } + + r = amdgpu_vm_bo_update_mapping(adev, vm, true, NULL, addr, addr + 1, + flags, value, NULL, NULL); + if (r) + goto error_unlock; + + r = amdgpu_vm_update_pdes(adev, vm, true); + +error_unlock: + amdgpu_bo_unreserve(root); + if (r < 0) + DRM_ERROR("Can't handle page fault (%ld)\n", r); + +error_unref: + amdgpu_bo_unref(&root); + + return false; } diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_vm.h b/drivers/gpu/drm/amd/amdgpu/amdgpu_vm.h index 2eda3a8c330d..4dbbe1b6b413 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_vm.h +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_vm.h @@ -99,6 +99,9 @@ struct amdgpu_bo_list_entry; #define AMDGPU_VM_FAULT_STOP_FIRST 1 #define AMDGPU_VM_FAULT_STOP_ALWAYS 2 +/* Reserve 4MB VRAM for page tables */ +#define AMDGPU_VM_RESERVED_VRAM (4ULL << 20) + /* max number of VMHUB */ #define AMDGPU_MAX_VMHUBS 3 #define AMDGPU_GFXHUB_0 0 @@ -199,6 +202,11 @@ struct amdgpu_vm_update_params { struct amdgpu_vm *vm; /** + * @direct: if changes should be made directly + */ + bool direct; + + /** * @pages_addr: * * DMA addresses to use for mapping @@ -254,8 +262,9 @@ struct amdgpu_vm { struct amdgpu_vm_pt root; struct dma_fence *last_update; - /* Scheduler entity for page table updates */ - struct drm_sched_entity entity; + /* Scheduler entities for page table updates */ + struct drm_sched_entity direct; + struct drm_sched_entity delayed; unsigned int pasid; /* dedicated to vm */ @@ -357,8 +366,8 @@ int amdgpu_vm_validate_pt_bos(struct amdgpu_device *adev, struct amdgpu_vm *vm, int (*callback)(void *p, struct amdgpu_bo *bo), void *param); int amdgpu_vm_flush(struct amdgpu_ring *ring, struct amdgpu_job *job, bool need_pipe_sync); -int amdgpu_vm_update_directories(struct amdgpu_device *adev, - struct amdgpu_vm *vm); +int amdgpu_vm_update_pdes(struct amdgpu_device *adev, + struct amdgpu_vm *vm, bool direct); int amdgpu_vm_clear_freed(struct amdgpu_device *adev, struct amdgpu_vm *vm, struct dma_fence **fence); @@ -404,6 +413,8 @@ void amdgpu_vm_check_compute_bug(struct amdgpu_device *adev); void amdgpu_vm_get_task_info(struct amdgpu_device *adev, unsigned int pasid, struct amdgpu_task_info *task_info); +bool amdgpu_vm_handle_fault(struct amdgpu_device *adev, unsigned int pasid, + uint64_t addr); void amdgpu_vm_set_task_info(struct amdgpu_vm *vm); diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_vm_cpu.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_vm_cpu.c index 5222d165abfc..73fec7a0ced5 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_vm_cpu.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_vm_cpu.c @@ -49,13 +49,6 @@ static int amdgpu_vm_cpu_prepare(struct amdgpu_vm_update_params *p, void *owner, { int r; - /* Wait for PT BOs to be idle. PTs share the same resv. object - * as the root PD BO - */ - r = amdgpu_bo_sync_wait(p->vm->root.base.bo, owner, true); - if (unlikely(r)) - return r; - /* Wait for any BO move to be completed */ if (exclusive) { r = dma_fence_wait(exclusive, true); @@ -63,7 +56,14 @@ static int amdgpu_vm_cpu_prepare(struct amdgpu_vm_update_params *p, void *owner, return r; } - return 0; + /* Don't wait for submissions during page fault */ + if (p->direct) + return 0; + + /* Wait for PT BOs to be idle. PTs share the same resv. object + * as the root PD BO + */ + return amdgpu_bo_sync_wait(p->vm->root.base.bo, owner, true); } /** @@ -89,7 +89,7 @@ static int amdgpu_vm_cpu_update(struct amdgpu_vm_update_params *p, pe += (unsigned long)amdgpu_bo_kptr(bo); - trace_amdgpu_vm_set_ptes(pe, addr, count, incr, flags); + trace_amdgpu_vm_set_ptes(pe, addr, count, incr, flags, p->direct); for (i = 0; i < count; i++) { value = p->pages_addr ? diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_vm_sdma.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_vm_sdma.c index 61fc584cbb1a..832db59f441e 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_vm_sdma.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_vm_sdma.c @@ -68,17 +68,19 @@ static int amdgpu_vm_sdma_prepare(struct amdgpu_vm_update_params *p, if (r) return r; + p->num_dw_left = ndw; + + /* Wait for moves to be completed */ r = amdgpu_sync_fence(p->adev, &p->job->sync, exclusive, false); if (r) return r; - r = amdgpu_sync_resv(p->adev, &p->job->sync, root->tbo.base.resv, - owner, false); - if (r) - return r; + /* Don't wait for any submissions during page fault handling */ + if (p->direct) + return 0; - p->num_dw_left = ndw; - return 0; + return amdgpu_sync_resv(p->adev, &p->job->sync, root->tbo.base.resv, + owner, false); } /** @@ -95,22 +97,23 @@ static int amdgpu_vm_sdma_commit(struct amdgpu_vm_update_params *p, { struct amdgpu_bo *root = p->vm->root.base.bo; struct amdgpu_ib *ib = p->job->ibs; + struct drm_sched_entity *entity; struct amdgpu_ring *ring; struct dma_fence *f; int r; - ring = container_of(p->vm->entity.rq->sched, struct amdgpu_ring, sched); + entity = p->direct ? &p->vm->direct : &p->vm->delayed; + ring = container_of(entity->rq->sched, struct amdgpu_ring, sched); WARN_ON(ib->length_dw == 0); amdgpu_ring_pad_ib(ring, ib); WARN_ON(ib->length_dw > p->num_dw_left); - r = amdgpu_job_submit(p->job, &p->vm->entity, - AMDGPU_FENCE_OWNER_VM, &f); + r = amdgpu_job_submit(p->job, entity, AMDGPU_FENCE_OWNER_VM, &f); if (r) goto error; amdgpu_bo_fence(root, f, true); - if (fence) + if (fence && !p->direct) swap(*fence, f); dma_fence_put(f); return 0; @@ -120,7 +123,6 @@ error: return r; } - /** * amdgpu_vm_sdma_copy_ptes - copy the PTEs from mapping * @@ -141,7 +143,7 @@ static void amdgpu_vm_sdma_copy_ptes(struct amdgpu_vm_update_params *p, src += p->num_dw_left * 4; pe += amdgpu_bo_gpu_offset(bo); - trace_amdgpu_vm_copy_ptes(pe, src, count); + trace_amdgpu_vm_copy_ptes(pe, src, count, p->direct); amdgpu_vm_copy_pte(p->adev, ib, pe, src, count); } @@ -168,7 +170,7 @@ static void amdgpu_vm_sdma_set_ptes(struct amdgpu_vm_update_params *p, struct amdgpu_ib *ib = p->job->ibs; pe += amdgpu_bo_gpu_offset(bo); - trace_amdgpu_vm_set_ptes(pe, addr, count, incr, flags); + trace_amdgpu_vm_set_ptes(pe, addr, count, incr, flags, p->direct); if (count < 3) { amdgpu_vm_write_pte(p->adev, ib, pe, addr | flags, count, incr); diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_vram_mgr.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_vram_mgr.c index 3a9d8c15fe9f..82a3299e53c0 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_vram_mgr.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_vram_mgr.c @@ -23,6 +23,9 @@ */ #include "amdgpu.h" +#include "amdgpu_vm.h" +#include "amdgpu_atomfirmware.h" +#include "atom.h" struct amdgpu_vram_mgr { struct drm_mm mm; @@ -101,6 +104,39 @@ static ssize_t amdgpu_mem_info_vis_vram_used_show(struct device *dev, amdgpu_vram_mgr_vis_usage(&adev->mman.bdev.man[TTM_PL_VRAM])); } +static ssize_t amdgpu_mem_info_vram_vendor(struct device *dev, + struct device_attribute *attr, + char *buf) +{ + struct drm_device *ddev = dev_get_drvdata(dev); + struct amdgpu_device *adev = ddev->dev_private; + + switch (adev->gmc.vram_vendor) { + case SAMSUNG: + return snprintf(buf, PAGE_SIZE, "samsung\n"); + case INFINEON: + return snprintf(buf, PAGE_SIZE, "infineon\n"); + case ELPIDA: + return snprintf(buf, PAGE_SIZE, "elpida\n"); + case ETRON: + return snprintf(buf, PAGE_SIZE, "etron\n"); + case NANYA: + return snprintf(buf, PAGE_SIZE, "nanya\n"); + case HYNIX: + return snprintf(buf, PAGE_SIZE, "hynix\n"); + case MOSEL: + return snprintf(buf, PAGE_SIZE, "mosel\n"); + case WINBOND: + return snprintf(buf, PAGE_SIZE, "winbond\n"); + case ESMT: + return snprintf(buf, PAGE_SIZE, "esmt\n"); + case MICRON: + return snprintf(buf, PAGE_SIZE, "micron\n"); + default: + return snprintf(buf, PAGE_SIZE, "unknown\n"); + } +} + static DEVICE_ATTR(mem_info_vram_total, S_IRUGO, amdgpu_mem_info_vram_total_show, NULL); static DEVICE_ATTR(mem_info_vis_vram_total, S_IRUGO, @@ -109,6 +145,8 @@ static DEVICE_ATTR(mem_info_vram_used, S_IRUGO, amdgpu_mem_info_vram_used_show, NULL); static DEVICE_ATTR(mem_info_vis_vram_used, S_IRUGO, amdgpu_mem_info_vis_vram_used_show, NULL); +static DEVICE_ATTR(mem_info_vram_vendor, S_IRUGO, + amdgpu_mem_info_vram_vendor, NULL); /** * amdgpu_vram_mgr_init - init VRAM manager and DRM MM @@ -154,6 +192,11 @@ static int amdgpu_vram_mgr_init(struct ttm_mem_type_manager *man, DRM_ERROR("Failed to create device file mem_info_vis_vram_used\n"); return ret; } + ret = device_create_file(adev->dev, &dev_attr_mem_info_vram_vendor); + if (ret) { + DRM_ERROR("Failed to create device file mem_info_vram_vendor\n"); + return ret; + } return 0; } @@ -180,6 +223,7 @@ static int amdgpu_vram_mgr_fini(struct ttm_mem_type_manager *man) device_remove_file(adev->dev, &dev_attr_mem_info_vis_vram_total); device_remove_file(adev->dev, &dev_attr_mem_info_vram_used); device_remove_file(adev->dev, &dev_attr_mem_info_vis_vram_used); + device_remove_file(adev->dev, &dev_attr_mem_info_vram_vendor); return 0; } @@ -275,7 +319,7 @@ static int amdgpu_vram_mgr_new(struct ttm_mem_type_manager *man, struct drm_mm_node *nodes; enum drm_mm_insert_mode mode; unsigned long lpfn, num_nodes, pages_per_node, pages_left; - uint64_t vis_usage = 0, mem_bytes; + uint64_t vis_usage = 0, mem_bytes, max_bytes; unsigned i; int r; @@ -283,9 +327,13 @@ static int amdgpu_vram_mgr_new(struct ttm_mem_type_manager *man, if (!lpfn) lpfn = man->size; + max_bytes = adev->gmc.mc_vram_size; + if (tbo->type != ttm_bo_type_kernel) + max_bytes -= AMDGPU_VM_RESERVED_VRAM; + /* bail out quickly if there's likely not enough VRAM for this BO */ mem_bytes = (u64)mem->num_pages << PAGE_SHIFT; - if (atomic64_add_return(mem_bytes, &mgr->usage) > adev->gmc.mc_vram_size) { + if (atomic64_add_return(mem_bytes, &mgr->usage) > max_bytes) { atomic64_sub(mem_bytes, &mgr->usage); mem->mm_node = NULL; return 0; diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_xgmi.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_xgmi.c index 65aae75f80fd..00371713c671 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_xgmi.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_xgmi.c @@ -25,6 +25,7 @@ #include "amdgpu.h" #include "amdgpu_xgmi.h" #include "amdgpu_smu.h" +#include "amdgpu_ras.h" #include "df/df_3_6_offset.h" static DEFINE_MUTEX(xgmi_mutex); @@ -437,3 +438,52 @@ void amdgpu_xgmi_remove_device(struct amdgpu_device *adev) mutex_unlock(&hive->hive_lock); } } + +int amdgpu_xgmi_ras_late_init(struct amdgpu_device *adev) +{ + int r; + struct ras_ih_if ih_info = { + .cb = NULL, + }; + struct ras_fs_if fs_info = { + .sysfs_name = "xgmi_wafl_err_count", + .debugfs_name = "xgmi_wafl_err_inject", + }; + + if (!adev->gmc.xgmi.supported || + adev->gmc.xgmi.num_physical_nodes == 0) + return 0; + + if (!adev->gmc.xgmi.ras_if) { + adev->gmc.xgmi.ras_if = kmalloc(sizeof(struct ras_common_if), GFP_KERNEL); + if (!adev->gmc.xgmi.ras_if) + return -ENOMEM; + adev->gmc.xgmi.ras_if->block = AMDGPU_RAS_BLOCK__XGMI_WAFL; + adev->gmc.xgmi.ras_if->type = AMDGPU_RAS_ERROR__MULTI_UNCORRECTABLE; + adev->gmc.xgmi.ras_if->sub_block_index = 0; + strcpy(adev->gmc.xgmi.ras_if->name, "xgmi_wafl"); + } + ih_info.head = fs_info.head = *adev->gmc.xgmi.ras_if; + r = amdgpu_ras_late_init(adev, adev->gmc.xgmi.ras_if, + &fs_info, &ih_info); + if (r || !amdgpu_ras_is_supported(adev, adev->gmc.xgmi.ras_if->block)) { + kfree(adev->gmc.xgmi.ras_if); + adev->gmc.xgmi.ras_if = NULL; + } + + return r; +} + +void amdgpu_xgmi_ras_fini(struct amdgpu_device *adev) +{ + if (amdgpu_ras_is_supported(adev, AMDGPU_RAS_BLOCK__XGMI_WAFL) && + adev->gmc.xgmi.ras_if) { + struct ras_common_if *ras_if = adev->gmc.xgmi.ras_if; + struct ras_ih_if ih_info = { + .cb = NULL, + }; + + amdgpu_ras_late_fini(adev, ras_if, &ih_info); + kfree(ras_if); + } +} diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_xgmi.h b/drivers/gpu/drm/amd/amdgpu/amdgpu_xgmi.h index fbcee31788c4..bbf504ff7051 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_xgmi.h +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_xgmi.h @@ -42,6 +42,8 @@ void amdgpu_xgmi_remove_device(struct amdgpu_device *adev); int amdgpu_xgmi_set_pstate(struct amdgpu_device *adev, int pstate); int amdgpu_xgmi_get_hops_count(struct amdgpu_device *adev, struct amdgpu_device *peer_adev); +int amdgpu_xgmi_ras_late_init(struct amdgpu_device *adev); +void amdgpu_xgmi_ras_fini(struct amdgpu_device *adev); static inline bool amdgpu_xgmi_same_hive(struct amdgpu_device *adev, struct amdgpu_device *bo_adev) diff --git a/drivers/gpu/drm/amd/amdgpu/arct_reg_init.c b/drivers/gpu/drm/amd/amdgpu/arct_reg_init.c index 4853899b1824..fda99c958c3b 100644 --- a/drivers/gpu/drm/amd/amdgpu/arct_reg_init.c +++ b/drivers/gpu/drm/amd/amdgpu/arct_reg_init.c @@ -24,7 +24,6 @@ #include "soc15.h" #include "soc15_common.h" -#include "soc15_hw_ip.h" #include "arct_ip_offset.h" int arct_reg_base_init(struct amdgpu_device *adev) @@ -52,6 +51,8 @@ int arct_reg_base_init(struct amdgpu_device *adev) adev->reg_offset[SDMA7_HWIP][i] = (uint32_t *)(&(SDMA7_BASE.instance[i])); adev->reg_offset[SMUIO_HWIP][i] = (uint32_t *)(&(SMUIO_BASE.instance[i])); adev->reg_offset[THM_HWIP][i] = (uint32_t *)(&(THM_BASE.instance[i])); + adev->reg_offset[UMC_HWIP][i] = (uint32_t *)(&(UMC_BASE.instance[i])); + adev->reg_offset[RSMU_HWIP][i] = (uint32_t *)(&(RSMU_BASE.instance[i])); } return 0; } diff --git a/drivers/gpu/drm/amd/amdgpu/dce_v10_0.c b/drivers/gpu/drm/amd/amdgpu/dce_v10_0.c index 645550e7caf5..40d2ac723dd6 100644 --- a/drivers/gpu/drm/amd/amdgpu/dce_v10_0.c +++ b/drivers/gpu/drm/amd/amdgpu/dce_v10_0.c @@ -330,9 +330,11 @@ static void dce_v10_0_hpd_init(struct amdgpu_device *adev) { struct drm_device *dev = adev->ddev; struct drm_connector *connector; + struct drm_connector_list_iter iter; u32 tmp; - list_for_each_entry(connector, &dev->mode_config.connector_list, head) { + drm_connector_list_iter_begin(dev, &iter); + drm_for_each_connector_iter(connector, &iter) { struct amdgpu_connector *amdgpu_connector = to_amdgpu_connector(connector); if (amdgpu_connector->hpd.hpd >= adev->mode_info.num_hpd) @@ -368,6 +370,7 @@ static void dce_v10_0_hpd_init(struct amdgpu_device *adev) amdgpu_irq_get(adev, &adev->hpd_irq, amdgpu_connector->hpd.hpd); } + drm_connector_list_iter_end(&iter); } /** @@ -382,9 +385,11 @@ static void dce_v10_0_hpd_fini(struct amdgpu_device *adev) { struct drm_device *dev = adev->ddev; struct drm_connector *connector; + struct drm_connector_list_iter iter; u32 tmp; - list_for_each_entry(connector, &dev->mode_config.connector_list, head) { + drm_connector_list_iter_begin(dev, &iter); + drm_for_each_connector_iter(connector, &iter) { struct amdgpu_connector *amdgpu_connector = to_amdgpu_connector(connector); if (amdgpu_connector->hpd.hpd >= adev->mode_info.num_hpd) @@ -397,6 +402,7 @@ static void dce_v10_0_hpd_fini(struct amdgpu_device *adev) amdgpu_irq_put(adev, &adev->hpd_irq, amdgpu_connector->hpd.hpd); } + drm_connector_list_iter_end(&iter); } static u32 dce_v10_0_hpd_get_gpio_reg(struct amdgpu_device *adev) @@ -1219,10 +1225,12 @@ static void dce_v10_0_afmt_audio_select_pin(struct drm_encoder *encoder) static void dce_v10_0_audio_write_latency_fields(struct drm_encoder *encoder, struct drm_display_mode *mode) { - struct amdgpu_device *adev = encoder->dev->dev_private; + struct drm_device *dev = encoder->dev; + struct amdgpu_device *adev = dev->dev_private; struct amdgpu_encoder *amdgpu_encoder = to_amdgpu_encoder(encoder); struct amdgpu_encoder_atom_dig *dig = amdgpu_encoder->enc_priv; struct drm_connector *connector; + struct drm_connector_list_iter iter; struct amdgpu_connector *amdgpu_connector = NULL; u32 tmp; int interlace = 0; @@ -1230,12 +1238,14 @@ static void dce_v10_0_audio_write_latency_fields(struct drm_encoder *encoder, if (!dig || !dig->afmt || !dig->afmt->pin) return; - list_for_each_entry(connector, &encoder->dev->mode_config.connector_list, head) { + drm_connector_list_iter_begin(dev, &iter); + drm_for_each_connector_iter(connector, &iter) { if (connector->encoder == encoder) { amdgpu_connector = to_amdgpu_connector(connector); break; } } + drm_connector_list_iter_end(&iter); if (!amdgpu_connector) { DRM_ERROR("Couldn't find encoder's connector\n"); @@ -1261,10 +1271,12 @@ static void dce_v10_0_audio_write_latency_fields(struct drm_encoder *encoder, static void dce_v10_0_audio_write_speaker_allocation(struct drm_encoder *encoder) { - struct amdgpu_device *adev = encoder->dev->dev_private; + struct drm_device *dev = encoder->dev; + struct amdgpu_device *adev = dev->dev_private; struct amdgpu_encoder *amdgpu_encoder = to_amdgpu_encoder(encoder); struct amdgpu_encoder_atom_dig *dig = amdgpu_encoder->enc_priv; struct drm_connector *connector; + struct drm_connector_list_iter iter; struct amdgpu_connector *amdgpu_connector = NULL; u32 tmp; u8 *sadb = NULL; @@ -1273,12 +1285,14 @@ static void dce_v10_0_audio_write_speaker_allocation(struct drm_encoder *encoder if (!dig || !dig->afmt || !dig->afmt->pin) return; - list_for_each_entry(connector, &encoder->dev->mode_config.connector_list, head) { + drm_connector_list_iter_begin(dev, &iter); + drm_for_each_connector_iter(connector, &iter) { if (connector->encoder == encoder) { amdgpu_connector = to_amdgpu_connector(connector); break; } } + drm_connector_list_iter_end(&iter); if (!amdgpu_connector) { DRM_ERROR("Couldn't find encoder's connector\n"); @@ -1313,10 +1327,12 @@ static void dce_v10_0_audio_write_speaker_allocation(struct drm_encoder *encoder static void dce_v10_0_audio_write_sad_regs(struct drm_encoder *encoder) { - struct amdgpu_device *adev = encoder->dev->dev_private; + struct drm_device *dev = encoder->dev; + struct amdgpu_device *adev = dev->dev_private; struct amdgpu_encoder *amdgpu_encoder = to_amdgpu_encoder(encoder); struct amdgpu_encoder_atom_dig *dig = amdgpu_encoder->enc_priv; struct drm_connector *connector; + struct drm_connector_list_iter iter; struct amdgpu_connector *amdgpu_connector = NULL; struct cea_sad *sads; int i, sad_count; @@ -1339,12 +1355,14 @@ static void dce_v10_0_audio_write_sad_regs(struct drm_encoder *encoder) if (!dig || !dig->afmt || !dig->afmt->pin) return; - list_for_each_entry(connector, &encoder->dev->mode_config.connector_list, head) { + drm_connector_list_iter_begin(dev, &iter); + drm_for_each_connector_iter(connector, &iter) { if (connector->encoder == encoder) { amdgpu_connector = to_amdgpu_connector(connector); break; } } + drm_connector_list_iter_end(&iter); if (!amdgpu_connector) { DRM_ERROR("Couldn't find encoder's connector\n"); @@ -1352,10 +1370,10 @@ static void dce_v10_0_audio_write_sad_regs(struct drm_encoder *encoder) } sad_count = drm_edid_to_sad(amdgpu_connector_edid(connector), &sads); - if (sad_count <= 0) { + if (sad_count < 0) DRM_ERROR("Couldn't read SADs: %d\n", sad_count); + if (sad_count <= 0) return; - } BUG_ON(!sads); for (i = 0; i < ARRAY_SIZE(eld_reg_to_type); i++) { diff --git a/drivers/gpu/drm/amd/amdgpu/dce_v11_0.c b/drivers/gpu/drm/amd/amdgpu/dce_v11_0.c index d9f470632b2c..898ef72d423c 100644 --- a/drivers/gpu/drm/amd/amdgpu/dce_v11_0.c +++ b/drivers/gpu/drm/amd/amdgpu/dce_v11_0.c @@ -348,9 +348,11 @@ static void dce_v11_0_hpd_init(struct amdgpu_device *adev) { struct drm_device *dev = adev->ddev; struct drm_connector *connector; + struct drm_connector_list_iter iter; u32 tmp; - list_for_each_entry(connector, &dev->mode_config.connector_list, head) { + drm_connector_list_iter_begin(dev, &iter); + drm_for_each_connector_iter(connector, &iter) { struct amdgpu_connector *amdgpu_connector = to_amdgpu_connector(connector); if (amdgpu_connector->hpd.hpd >= adev->mode_info.num_hpd) @@ -385,6 +387,7 @@ static void dce_v11_0_hpd_init(struct amdgpu_device *adev) dce_v11_0_hpd_set_polarity(adev, amdgpu_connector->hpd.hpd); amdgpu_irq_get(adev, &adev->hpd_irq, amdgpu_connector->hpd.hpd); } + drm_connector_list_iter_end(&iter); } /** @@ -399,9 +402,11 @@ static void dce_v11_0_hpd_fini(struct amdgpu_device *adev) { struct drm_device *dev = adev->ddev; struct drm_connector *connector; + struct drm_connector_list_iter iter; u32 tmp; - list_for_each_entry(connector, &dev->mode_config.connector_list, head) { + drm_connector_list_iter_begin(dev, &iter); + drm_for_each_connector_iter(connector, &iter) { struct amdgpu_connector *amdgpu_connector = to_amdgpu_connector(connector); if (amdgpu_connector->hpd.hpd >= adev->mode_info.num_hpd) @@ -413,6 +418,7 @@ static void dce_v11_0_hpd_fini(struct amdgpu_device *adev) amdgpu_irq_put(adev, &adev->hpd_irq, amdgpu_connector->hpd.hpd); } + drm_connector_list_iter_end(&iter); } static u32 dce_v11_0_hpd_get_gpio_reg(struct amdgpu_device *adev) @@ -1245,10 +1251,12 @@ static void dce_v11_0_afmt_audio_select_pin(struct drm_encoder *encoder) static void dce_v11_0_audio_write_latency_fields(struct drm_encoder *encoder, struct drm_display_mode *mode) { - struct amdgpu_device *adev = encoder->dev->dev_private; + struct drm_device *dev = encoder->dev; + struct amdgpu_device *adev = dev->dev_private; struct amdgpu_encoder *amdgpu_encoder = to_amdgpu_encoder(encoder); struct amdgpu_encoder_atom_dig *dig = amdgpu_encoder->enc_priv; struct drm_connector *connector; + struct drm_connector_list_iter iter; struct amdgpu_connector *amdgpu_connector = NULL; u32 tmp; int interlace = 0; @@ -1256,12 +1264,14 @@ static void dce_v11_0_audio_write_latency_fields(struct drm_encoder *encoder, if (!dig || !dig->afmt || !dig->afmt->pin) return; - list_for_each_entry(connector, &encoder->dev->mode_config.connector_list, head) { + drm_connector_list_iter_begin(dev, &iter); + drm_for_each_connector_iter(connector, &iter) { if (connector->encoder == encoder) { amdgpu_connector = to_amdgpu_connector(connector); break; } } + drm_connector_list_iter_end(&iter); if (!amdgpu_connector) { DRM_ERROR("Couldn't find encoder's connector\n"); @@ -1287,10 +1297,12 @@ static void dce_v11_0_audio_write_latency_fields(struct drm_encoder *encoder, static void dce_v11_0_audio_write_speaker_allocation(struct drm_encoder *encoder) { - struct amdgpu_device *adev = encoder->dev->dev_private; + struct drm_device *dev = encoder->dev; + struct amdgpu_device *adev = dev->dev_private; struct amdgpu_encoder *amdgpu_encoder = to_amdgpu_encoder(encoder); struct amdgpu_encoder_atom_dig *dig = amdgpu_encoder->enc_priv; struct drm_connector *connector; + struct drm_connector_list_iter iter; struct amdgpu_connector *amdgpu_connector = NULL; u32 tmp; u8 *sadb = NULL; @@ -1299,12 +1311,14 @@ static void dce_v11_0_audio_write_speaker_allocation(struct drm_encoder *encoder if (!dig || !dig->afmt || !dig->afmt->pin) return; - list_for_each_entry(connector, &encoder->dev->mode_config.connector_list, head) { + drm_connector_list_iter_begin(dev, &iter); + drm_for_each_connector_iter(connector, &iter) { if (connector->encoder == encoder) { amdgpu_connector = to_amdgpu_connector(connector); break; } } + drm_connector_list_iter_end(&iter); if (!amdgpu_connector) { DRM_ERROR("Couldn't find encoder's connector\n"); @@ -1339,10 +1353,12 @@ static void dce_v11_0_audio_write_speaker_allocation(struct drm_encoder *encoder static void dce_v11_0_audio_write_sad_regs(struct drm_encoder *encoder) { - struct amdgpu_device *adev = encoder->dev->dev_private; + struct drm_device *dev = encoder->dev; + struct amdgpu_device *adev = dev->dev_private; struct amdgpu_encoder *amdgpu_encoder = to_amdgpu_encoder(encoder); struct amdgpu_encoder_atom_dig *dig = amdgpu_encoder->enc_priv; struct drm_connector *connector; + struct drm_connector_list_iter iter; struct amdgpu_connector *amdgpu_connector = NULL; struct cea_sad *sads; int i, sad_count; @@ -1365,12 +1381,14 @@ static void dce_v11_0_audio_write_sad_regs(struct drm_encoder *encoder) if (!dig || !dig->afmt || !dig->afmt->pin) return; - list_for_each_entry(connector, &encoder->dev->mode_config.connector_list, head) { + drm_connector_list_iter_begin(dev, &iter); + drm_for_each_connector_iter(connector, &iter) { if (connector->encoder == encoder) { amdgpu_connector = to_amdgpu_connector(connector); break; } } + drm_connector_list_iter_end(&iter); if (!amdgpu_connector) { DRM_ERROR("Couldn't find encoder's connector\n"); @@ -1378,10 +1396,10 @@ static void dce_v11_0_audio_write_sad_regs(struct drm_encoder *encoder) } sad_count = drm_edid_to_sad(amdgpu_connector_edid(connector), &sads); - if (sad_count <= 0) { + if (sad_count < 0) DRM_ERROR("Couldn't read SADs: %d\n", sad_count); + if (sad_count <= 0) return; - } BUG_ON(!sads); for (i = 0; i < ARRAY_SIZE(eld_reg_to_type); i++) { diff --git a/drivers/gpu/drm/amd/amdgpu/dce_v6_0.c b/drivers/gpu/drm/amd/amdgpu/dce_v6_0.c index 3eb2e7429269..db15a112becc 100644 --- a/drivers/gpu/drm/amd/amdgpu/dce_v6_0.c +++ b/drivers/gpu/drm/amd/amdgpu/dce_v6_0.c @@ -281,9 +281,11 @@ static void dce_v6_0_hpd_init(struct amdgpu_device *adev) { struct drm_device *dev = adev->ddev; struct drm_connector *connector; + struct drm_connector_list_iter iter; u32 tmp; - list_for_each_entry(connector, &dev->mode_config.connector_list, head) { + drm_connector_list_iter_begin(dev, &iter); + drm_for_each_connector_iter(connector, &iter) { struct amdgpu_connector *amdgpu_connector = to_amdgpu_connector(connector); if (amdgpu_connector->hpd.hpd >= adev->mode_info.num_hpd) @@ -309,7 +311,7 @@ static void dce_v6_0_hpd_init(struct amdgpu_device *adev) dce_v6_0_hpd_set_polarity(adev, amdgpu_connector->hpd.hpd); amdgpu_irq_get(adev, &adev->hpd_irq, amdgpu_connector->hpd.hpd); } - + drm_connector_list_iter_end(&iter); } /** @@ -324,9 +326,11 @@ static void dce_v6_0_hpd_fini(struct amdgpu_device *adev) { struct drm_device *dev = adev->ddev; struct drm_connector *connector; + struct drm_connector_list_iter iter; u32 tmp; - list_for_each_entry(connector, &dev->mode_config.connector_list, head) { + drm_connector_list_iter_begin(dev, &iter); + drm_for_each_connector_iter(connector, &iter) { struct amdgpu_connector *amdgpu_connector = to_amdgpu_connector(connector); if (amdgpu_connector->hpd.hpd >= adev->mode_info.num_hpd) @@ -338,6 +342,7 @@ static void dce_v6_0_hpd_fini(struct amdgpu_device *adev) amdgpu_irq_put(adev, &adev->hpd_irq, amdgpu_connector->hpd.hpd); } + drm_connector_list_iter_end(&iter); } static u32 dce_v6_0_hpd_get_gpio_reg(struct amdgpu_device *adev) @@ -1124,20 +1129,24 @@ static void dce_v6_0_audio_select_pin(struct drm_encoder *encoder) static void dce_v6_0_audio_write_latency_fields(struct drm_encoder *encoder, struct drm_display_mode *mode) { - struct amdgpu_device *adev = encoder->dev->dev_private; + struct drm_device *dev = encoder->dev; + struct amdgpu_device *adev = dev->dev_private; struct amdgpu_encoder *amdgpu_encoder = to_amdgpu_encoder(encoder); struct amdgpu_encoder_atom_dig *dig = amdgpu_encoder->enc_priv; struct drm_connector *connector; + struct drm_connector_list_iter iter; struct amdgpu_connector *amdgpu_connector = NULL; int interlace = 0; u32 tmp; - list_for_each_entry(connector, &encoder->dev->mode_config.connector_list, head) { + drm_connector_list_iter_begin(dev, &iter); + drm_for_each_connector_iter(connector, &iter) { if (connector->encoder == encoder) { amdgpu_connector = to_amdgpu_connector(connector); break; } } + drm_connector_list_iter_end(&iter); if (!amdgpu_connector) { DRM_ERROR("Couldn't find encoder's connector\n"); @@ -1164,21 +1173,25 @@ static void dce_v6_0_audio_write_latency_fields(struct drm_encoder *encoder, static void dce_v6_0_audio_write_speaker_allocation(struct drm_encoder *encoder) { - struct amdgpu_device *adev = encoder->dev->dev_private; + struct drm_device *dev = encoder->dev; + struct amdgpu_device *adev = dev->dev_private; struct amdgpu_encoder *amdgpu_encoder = to_amdgpu_encoder(encoder); struct amdgpu_encoder_atom_dig *dig = amdgpu_encoder->enc_priv; struct drm_connector *connector; + struct drm_connector_list_iter iter; struct amdgpu_connector *amdgpu_connector = NULL; u8 *sadb = NULL; int sad_count; u32 tmp; - list_for_each_entry(connector, &encoder->dev->mode_config.connector_list, head) { + drm_connector_list_iter_begin(dev, &iter); + drm_for_each_connector_iter(connector, &iter) { if (connector->encoder == encoder) { amdgpu_connector = to_amdgpu_connector(connector); break; } } + drm_connector_list_iter_end(&iter); if (!amdgpu_connector) { DRM_ERROR("Couldn't find encoder's connector\n"); @@ -1221,10 +1234,12 @@ static void dce_v6_0_audio_write_speaker_allocation(struct drm_encoder *encoder) static void dce_v6_0_audio_write_sad_regs(struct drm_encoder *encoder) { - struct amdgpu_device *adev = encoder->dev->dev_private; + struct drm_device *dev = encoder->dev; + struct amdgpu_device *adev = dev->dev_private; struct amdgpu_encoder *amdgpu_encoder = to_amdgpu_encoder(encoder); struct amdgpu_encoder_atom_dig *dig = amdgpu_encoder->enc_priv; struct drm_connector *connector; + struct drm_connector_list_iter iter; struct amdgpu_connector *amdgpu_connector = NULL; struct cea_sad *sads; int i, sad_count; @@ -1244,12 +1259,14 @@ static void dce_v6_0_audio_write_sad_regs(struct drm_encoder *encoder) { ixAZALIA_F0_CODEC_PIN_CONTROL_AUDIO_DESCRIPTOR13, HDMI_AUDIO_CODING_TYPE_WMA_PRO }, }; - list_for_each_entry(connector, &encoder->dev->mode_config.connector_list, head) { + drm_connector_list_iter_begin(dev, &iter); + drm_for_each_connector_iter(connector, &iter) { if (connector->encoder == encoder) { amdgpu_connector = to_amdgpu_connector(connector); break; } } + drm_connector_list_iter_end(&iter); if (!amdgpu_connector) { DRM_ERROR("Couldn't find encoder's connector\n"); @@ -1257,10 +1274,10 @@ static void dce_v6_0_audio_write_sad_regs(struct drm_encoder *encoder) } sad_count = drm_edid_to_sad(amdgpu_connector_edid(connector), &sads); - if (sad_count <= 0) { + if (sad_count < 0) DRM_ERROR("Couldn't read SADs: %d\n", sad_count); + if (sad_count <= 0) return; - } for (i = 0; i < ARRAY_SIZE(eld_reg_to_type); i++) { u32 tmp = 0; @@ -1632,6 +1649,7 @@ static void dce_v6_0_afmt_setmode(struct drm_encoder *encoder, struct amdgpu_encoder *amdgpu_encoder = to_amdgpu_encoder(encoder); struct amdgpu_encoder_atom_dig *dig = amdgpu_encoder->enc_priv; struct drm_connector *connector; + struct drm_connector_list_iter iter; struct amdgpu_connector *amdgpu_connector = NULL; int em = amdgpu_atombios_encoder_get_encoder_mode(encoder); int bpc = 8; @@ -1639,12 +1657,14 @@ static void dce_v6_0_afmt_setmode(struct drm_encoder *encoder, if (!dig || !dig->afmt) return; - list_for_each_entry(connector, &encoder->dev->mode_config.connector_list, head) { + drm_connector_list_iter_begin(dev, &iter); + drm_for_each_connector_iter(connector, &iter) { if (connector->encoder == encoder) { amdgpu_connector = to_amdgpu_connector(connector); break; } } + drm_connector_list_iter_end(&iter); if (!amdgpu_connector) { DRM_ERROR("Couldn't find encoder's connector\n"); diff --git a/drivers/gpu/drm/amd/amdgpu/dce_v8_0.c b/drivers/gpu/drm/amd/amdgpu/dce_v8_0.c index a16c5e9e610e..f06c9022c1fd 100644 --- a/drivers/gpu/drm/amd/amdgpu/dce_v8_0.c +++ b/drivers/gpu/drm/amd/amdgpu/dce_v8_0.c @@ -275,9 +275,11 @@ static void dce_v8_0_hpd_init(struct amdgpu_device *adev) { struct drm_device *dev = adev->ddev; struct drm_connector *connector; + struct drm_connector_list_iter iter; u32 tmp; - list_for_each_entry(connector, &dev->mode_config.connector_list, head) { + drm_connector_list_iter_begin(dev, &iter); + drm_for_each_connector_iter(connector, &iter) { struct amdgpu_connector *amdgpu_connector = to_amdgpu_connector(connector); if (amdgpu_connector->hpd.hpd >= adev->mode_info.num_hpd) @@ -303,6 +305,7 @@ static void dce_v8_0_hpd_init(struct amdgpu_device *adev) dce_v8_0_hpd_set_polarity(adev, amdgpu_connector->hpd.hpd); amdgpu_irq_get(adev, &adev->hpd_irq, amdgpu_connector->hpd.hpd); } + drm_connector_list_iter_end(&iter); } /** @@ -317,9 +320,11 @@ static void dce_v8_0_hpd_fini(struct amdgpu_device *adev) { struct drm_device *dev = adev->ddev; struct drm_connector *connector; + struct drm_connector_list_iter iter; u32 tmp; - list_for_each_entry(connector, &dev->mode_config.connector_list, head) { + drm_connector_list_iter_begin(dev, &iter); + drm_for_each_connector_iter(connector, &iter) { struct amdgpu_connector *amdgpu_connector = to_amdgpu_connector(connector); if (amdgpu_connector->hpd.hpd >= adev->mode_info.num_hpd) @@ -331,6 +336,7 @@ static void dce_v8_0_hpd_fini(struct amdgpu_device *adev) amdgpu_irq_put(adev, &adev->hpd_irq, amdgpu_connector->hpd.hpd); } + drm_connector_list_iter_end(&iter); } static u32 dce_v8_0_hpd_get_gpio_reg(struct amdgpu_device *adev) @@ -1157,10 +1163,12 @@ static void dce_v8_0_afmt_audio_select_pin(struct drm_encoder *encoder) static void dce_v8_0_audio_write_latency_fields(struct drm_encoder *encoder, struct drm_display_mode *mode) { - struct amdgpu_device *adev = encoder->dev->dev_private; + struct drm_device *dev = encoder->dev; + struct amdgpu_device *adev = dev->dev_private; struct amdgpu_encoder *amdgpu_encoder = to_amdgpu_encoder(encoder); struct amdgpu_encoder_atom_dig *dig = amdgpu_encoder->enc_priv; struct drm_connector *connector; + struct drm_connector_list_iter iter; struct amdgpu_connector *amdgpu_connector = NULL; u32 tmp = 0, offset; @@ -1169,12 +1177,14 @@ static void dce_v8_0_audio_write_latency_fields(struct drm_encoder *encoder, offset = dig->afmt->pin->offset; - list_for_each_entry(connector, &encoder->dev->mode_config.connector_list, head) { + drm_connector_list_iter_begin(dev, &iter); + drm_for_each_connector_iter(connector, &iter) { if (connector->encoder == encoder) { amdgpu_connector = to_amdgpu_connector(connector); break; } } + drm_connector_list_iter_end(&iter); if (!amdgpu_connector) { DRM_ERROR("Couldn't find encoder's connector\n"); @@ -1214,10 +1224,12 @@ static void dce_v8_0_audio_write_latency_fields(struct drm_encoder *encoder, static void dce_v8_0_audio_write_speaker_allocation(struct drm_encoder *encoder) { - struct amdgpu_device *adev = encoder->dev->dev_private; + struct drm_device *dev = encoder->dev; + struct amdgpu_device *adev = dev->dev_private; struct amdgpu_encoder *amdgpu_encoder = to_amdgpu_encoder(encoder); struct amdgpu_encoder_atom_dig *dig = amdgpu_encoder->enc_priv; struct drm_connector *connector; + struct drm_connector_list_iter iter; struct amdgpu_connector *amdgpu_connector = NULL; u32 offset, tmp; u8 *sadb = NULL; @@ -1228,12 +1240,14 @@ static void dce_v8_0_audio_write_speaker_allocation(struct drm_encoder *encoder) offset = dig->afmt->pin->offset; - list_for_each_entry(connector, &encoder->dev->mode_config.connector_list, head) { + drm_connector_list_iter_begin(dev, &iter); + drm_for_each_connector_iter(connector, &iter) { if (connector->encoder == encoder) { amdgpu_connector = to_amdgpu_connector(connector); break; } } + drm_connector_list_iter_end(&iter); if (!amdgpu_connector) { DRM_ERROR("Couldn't find encoder's connector\n"); @@ -1263,11 +1277,13 @@ static void dce_v8_0_audio_write_speaker_allocation(struct drm_encoder *encoder) static void dce_v8_0_audio_write_sad_regs(struct drm_encoder *encoder) { - struct amdgpu_device *adev = encoder->dev->dev_private; + struct drm_device *dev = encoder->dev; + struct amdgpu_device *adev = dev->dev_private; struct amdgpu_encoder *amdgpu_encoder = to_amdgpu_encoder(encoder); struct amdgpu_encoder_atom_dig *dig = amdgpu_encoder->enc_priv; u32 offset; struct drm_connector *connector; + struct drm_connector_list_iter iter; struct amdgpu_connector *amdgpu_connector = NULL; struct cea_sad *sads; int i, sad_count; @@ -1292,12 +1308,14 @@ static void dce_v8_0_audio_write_sad_regs(struct drm_encoder *encoder) offset = dig->afmt->pin->offset; - list_for_each_entry(connector, &encoder->dev->mode_config.connector_list, head) { + drm_connector_list_iter_begin(dev, &iter); + drm_for_each_connector_iter(connector, &iter) { if (connector->encoder == encoder) { amdgpu_connector = to_amdgpu_connector(connector); break; } } + drm_connector_list_iter_end(&iter); if (!amdgpu_connector) { DRM_ERROR("Couldn't find encoder's connector\n"); @@ -1305,10 +1323,10 @@ static void dce_v8_0_audio_write_sad_regs(struct drm_encoder *encoder) } sad_count = drm_edid_to_sad(amdgpu_connector_edid(connector), &sads); - if (sad_count <= 0) { + if (sad_count < 0) DRM_ERROR("Couldn't read SADs: %d\n", sad_count); + if (sad_count <= 0) return; - } BUG_ON(!sads); for (i = 0; i < ARRAY_SIZE(eld_reg_to_type); i++) { diff --git a/drivers/gpu/drm/amd/amdgpu/df_v1_7.c b/drivers/gpu/drm/amd/amdgpu/df_v1_7.c index 844c03868248..d6221298b477 100644 --- a/drivers/gpu/drm/amd/amdgpu/df_v1_7.c +++ b/drivers/gpu/drm/amd/amdgpu/df_v1_7.c @@ -33,6 +33,10 @@ static void df_v1_7_sw_init(struct amdgpu_device *adev) { } +static void df_v1_7_sw_fini(struct amdgpu_device *adev) +{ +} + static void df_v1_7_enable_broadcast_mode(struct amdgpu_device *adev, bool enable) { @@ -111,6 +115,7 @@ static void df_v1_7_enable_ecc_force_par_wr_rmw(struct amdgpu_device *adev, const struct amdgpu_df_funcs df_v1_7_funcs = { .sw_init = df_v1_7_sw_init, + .sw_fini = df_v1_7_sw_fini, .enable_broadcast_mode = df_v1_7_enable_broadcast_mode, .get_fb_channel_number = df_v1_7_get_fb_channel_number, .get_hbm_channel_number = df_v1_7_get_hbm_channel_number, diff --git a/drivers/gpu/drm/amd/amdgpu/df_v3_6.c b/drivers/gpu/drm/amd/amdgpu/df_v3_6.c index 5850c8e34caa..16fbd2bc8ad1 100644 --- a/drivers/gpu/drm/amd/amdgpu/df_v3_6.c +++ b/drivers/gpu/drm/amd/amdgpu/df_v3_6.c @@ -99,8 +99,8 @@ static uint64_t df_v3_6_get_fica(struct amdgpu_device *adev, unsigned long flags, address, data; uint32_t ficadl_val, ficadh_val; - address = adev->nbio_funcs->get_pcie_index_offset(adev); - data = adev->nbio_funcs->get_pcie_data_offset(adev); + address = adev->nbio.funcs->get_pcie_index_offset(adev); + data = adev->nbio.funcs->get_pcie_data_offset(adev); spin_lock_irqsave(&adev->pcie_idx_lock, flags); WREG32(address, smnDF_PIE_AON_FabricIndirectConfigAccessAddress3); @@ -122,8 +122,8 @@ static void df_v3_6_set_fica(struct amdgpu_device *adev, uint32_t ficaa_val, { unsigned long flags, address, data; - address = adev->nbio_funcs->get_pcie_index_offset(adev); - data = adev->nbio_funcs->get_pcie_data_offset(adev); + address = adev->nbio.funcs->get_pcie_index_offset(adev); + data = adev->nbio.funcs->get_pcie_data_offset(adev); spin_lock_irqsave(&adev->pcie_idx_lock, flags); WREG32(address, smnDF_PIE_AON_FabricIndirectConfigAccessAddress3); @@ -150,8 +150,8 @@ static void df_v3_6_perfmon_rreg(struct amdgpu_device *adev, { unsigned long flags, address, data; - address = adev->nbio_funcs->get_pcie_index_offset(adev); - data = adev->nbio_funcs->get_pcie_data_offset(adev); + address = adev->nbio.funcs->get_pcie_index_offset(adev); + data = adev->nbio.funcs->get_pcie_data_offset(adev); spin_lock_irqsave(&adev->pcie_idx_lock, flags); WREG32(address, lo_addr); @@ -172,8 +172,8 @@ static void df_v3_6_perfmon_wreg(struct amdgpu_device *adev, uint32_t lo_addr, { unsigned long flags, address, data; - address = adev->nbio_funcs->get_pcie_index_offset(adev); - data = adev->nbio_funcs->get_pcie_data_offset(adev); + address = adev->nbio.funcs->get_pcie_index_offset(adev); + data = adev->nbio.funcs->get_pcie_data_offset(adev); spin_lock_irqsave(&adev->pcie_idx_lock, flags); WREG32(address, lo_addr); @@ -220,6 +220,13 @@ static void df_v3_6_sw_init(struct amdgpu_device *adev) adev->df_perfmon_config_assign_mask[i] = 0; } +static void df_v3_6_sw_fini(struct amdgpu_device *adev) +{ + + device_remove_file(adev->dev, &dev_attr_df_cntr_avail); + +} + static void df_v3_6_enable_broadcast_mode(struct amdgpu_device *adev, bool enable) { @@ -537,6 +544,7 @@ static void df_v3_6_pmc_get_count(struct amdgpu_device *adev, const struct amdgpu_df_funcs df_v3_6_funcs = { .sw_init = df_v3_6_sw_init, + .sw_fini = df_v3_6_sw_fini, .enable_broadcast_mode = df_v3_6_enable_broadcast_mode, .get_fb_channel_number = df_v3_6_get_fb_channel_number, .get_hbm_channel_number = df_v3_6_get_hbm_channel_number, diff --git a/drivers/gpu/drm/amd/amdgpu/gfx_v10_0.c b/drivers/gpu/drm/amd/amdgpu/gfx_v10_0.c index 957811b73672..0fcea94f0a4b 100644 --- a/drivers/gpu/drm/amd/amdgpu/gfx_v10_0.c +++ b/drivers/gpu/drm/amd/amdgpu/gfx_v10_0.c @@ -127,7 +127,7 @@ static const struct soc15_reg_golden golden_settings_gc_10_1[] = SOC15_REG_GOLDEN_VALUE(GC, 0, mmTA_CNTL_AUX, 0xfff7ffff, 0x01030000), SOC15_REG_GOLDEN_VALUE(GC, 0, mmTCP_CNTL, 0x60000010, 0x479c0010), SOC15_REG_GOLDEN_VALUE(GC, 0, mmUTCL1_CGTT_CLK_CTRL, 0xfeff0fff, 0x40000100), - SOC15_REG_GOLDEN_VALUE(GC, 0, mmUTCL1_CTRL, 0x00800000, 0x00800000) + SOC15_REG_GOLDEN_VALUE(GC, 0, mmUTCL1_CTRL, 0x00c00000, 0x00c00000) }; static const struct soc15_reg_golden golden_settings_gc_10_0_nv10[] = @@ -171,7 +171,7 @@ static const struct soc15_reg_golden golden_settings_gc_10_1_1[] = SOC15_REG_GOLDEN_VALUE(GC, 0, mmSQ_LDS_CLK_CTRL, 0xffffffff, 0xffffffff), SOC15_REG_GOLDEN_VALUE(GC, 0, mmTA_CNTL_AUX, 0xfff7ffff, 0x01030000), SOC15_REG_GOLDEN_VALUE(GC, 0, mmTCP_CNTL, 0x60000010, 0x479c0010), - SOC15_REG_GOLDEN_VALUE(GC, 0, mmUTCL1_CTRL, 0x00800000, 0x00800000), + SOC15_REG_GOLDEN_VALUE(GC, 0, mmUTCL1_CTRL, 0x00c00000, 0x00c00000), }; static const struct soc15_reg_golden golden_settings_gc_10_1_2[] = @@ -2443,7 +2443,7 @@ static int gfx_v10_0_cp_gfx_load_pfp_microcode(struct amdgpu_device *adev) } if (amdgpu_emu_mode == 1) - adev->nbio_funcs->hdp_flush(adev, NULL); + adev->nbio.funcs->hdp_flush(adev, NULL); tmp = RREG32_SOC15(GC, 0, mmCP_PFP_IC_BASE_CNTL); tmp = REG_SET_FIELD(tmp, CP_PFP_IC_BASE_CNTL, VMID, 0); @@ -2513,7 +2513,7 @@ static int gfx_v10_0_cp_gfx_load_ce_microcode(struct amdgpu_device *adev) } if (amdgpu_emu_mode == 1) - adev->nbio_funcs->hdp_flush(adev, NULL); + adev->nbio.funcs->hdp_flush(adev, NULL); tmp = RREG32_SOC15(GC, 0, mmCP_CE_IC_BASE_CNTL); tmp = REG_SET_FIELD(tmp, CP_CE_IC_BASE_CNTL, VMID, 0); @@ -2582,7 +2582,7 @@ static int gfx_v10_0_cp_gfx_load_me_microcode(struct amdgpu_device *adev) } if (amdgpu_emu_mode == 1) - adev->nbio_funcs->hdp_flush(adev, NULL); + adev->nbio.funcs->hdp_flush(adev, NULL); tmp = RREG32_SOC15(GC, 0, mmCP_ME_IC_BASE_CNTL); tmp = REG_SET_FIELD(tmp, CP_ME_IC_BASE_CNTL, VMID, 0); @@ -2903,7 +2903,7 @@ static int gfx_v10_0_cp_compute_load_microcode(struct amdgpu_device *adev) } if (amdgpu_emu_mode == 1) - adev->nbio_funcs->hdp_flush(adev, NULL); + adev->nbio.funcs->hdp_flush(adev, NULL); tmp = RREG32_SOC15(GC, 0, mmCP_CPC_IC_BASE_CNTL); tmp = REG_SET_FIELD(tmp, CP_CPC_IC_BASE_CNTL, CACHE_POLICY, 0); @@ -4357,7 +4357,7 @@ static void gfx_v10_0_ring_emit_hdp_flush(struct amdgpu_ring *ring) { struct amdgpu_device *adev = ring->adev; u32 ref_and_mask, reg_mem_engine; - const struct nbio_hdp_flush_reg *nbio_hf_reg = adev->nbio_funcs->hdp_flush_reg; + const struct nbio_hdp_flush_reg *nbio_hf_reg = adev->nbio.hdp_flush_reg; if (ring->funcs->type == AMDGPU_RING_TYPE_COMPUTE) { switch (ring->me) { @@ -4377,8 +4377,8 @@ static void gfx_v10_0_ring_emit_hdp_flush(struct amdgpu_ring *ring) } gfx_v10_0_wait_reg_mem(ring, reg_mem_engine, 0, 1, - adev->nbio_funcs->get_hdp_flush_req_offset(adev), - adev->nbio_funcs->get_hdp_flush_done_offset(adev), + adev->nbio.funcs->get_hdp_flush_req_offset(adev), + adev->nbio.funcs->get_hdp_flush_done_offset(adev), ref_and_mask, ref_and_mask, 0x20); } diff --git a/drivers/gpu/drm/amd/amdgpu/gfx_v9_0.c b/drivers/gpu/drm/amd/amdgpu/gfx_v9_0.c index dcadc73bffd2..9c63961515b9 100644 --- a/drivers/gpu/drm/amd/amdgpu/gfx_v9_0.c +++ b/drivers/gpu/drm/amd/amdgpu/gfx_v9_0.c @@ -517,9 +517,9 @@ static const struct soc15_reg_golden golden_settings_gc_9_0[] = SOC15_REG_GOLDEN_VALUE(GC, 0, mmTCP_CHAN_STEER_LO, 0xffffffff, 0xb5d3f197), SOC15_REG_GOLDEN_VALUE(GC, 0, mmVGT_CACHE_INVALIDATION, 0x3fff3af3, 0x19200000), SOC15_REG_GOLDEN_VALUE(GC, 0, mmVGT_GS_MAX_WAVE_ID, 0x00000fff, 0x000003ff), - SOC15_REG_GOLDEN_VALUE(GC, 0, mmCP_MEC1_F32_INT_DIS, 0x00000000, 0x00000800), - SOC15_REG_GOLDEN_VALUE(GC, 0, mmCP_MEC2_F32_INT_DIS, 0x00000000, 0x00000800), - SOC15_REG_GOLDEN_VALUE(GC, 0, mmCP_DEBUG, 0x00000000, 0x00008000) + SOC15_REG_GOLDEN_VALUE(GC, 0, mmCP_MEC1_F32_INT_DIS, 0x00000800, 0x00000800), + SOC15_REG_GOLDEN_VALUE(GC, 0, mmCP_MEC2_F32_INT_DIS, 0x00000800, 0x00000800), + SOC15_REG_GOLDEN_VALUE(GC, 0, mmCP_DEBUG, 0x00008000, 0x00008000) }; static const struct soc15_reg_golden golden_settings_gc_9_0_vg10[] = @@ -582,9 +582,9 @@ static const struct soc15_reg_golden golden_settings_gc_9_1[] = SOC15_REG_GOLDEN_VALUE(GC, 0, mmVGT_CACHE_INVALIDATION, 0x3fff3af3, 0x19200000), SOC15_REG_GOLDEN_VALUE(GC, 0, mmVGT_GS_MAX_WAVE_ID, 0x00000fff, 0x000000ff), SOC15_REG_GOLDEN_VALUE(GC, 0, mmWD_UTCL1_CNTL, 0x08000000, 0x08000080), - SOC15_REG_GOLDEN_VALUE(GC, 0, mmCP_MEC1_F32_INT_DIS, 0x00000000, 0x00000800), - SOC15_REG_GOLDEN_VALUE(GC, 0, mmCP_MEC2_F32_INT_DIS, 0x00000000, 0x00000800), - SOC15_REG_GOLDEN_VALUE(GC, 0, mmCP_DEBUG, 0x00000000, 0x00008000) + SOC15_REG_GOLDEN_VALUE(GC, 0, mmCP_MEC1_F32_INT_DIS, 0x00000800, 0x00000800), + SOC15_REG_GOLDEN_VALUE(GC, 0, mmCP_MEC2_F32_INT_DIS, 0x00000800, 0x00000800), + SOC15_REG_GOLDEN_VALUE(GC, 0, mmCP_DEBUG, 0x00008000, 0x00008000) }; static const struct soc15_reg_golden golden_settings_gc_9_1_rv1[] = @@ -676,9 +676,9 @@ static const struct soc15_reg_golden golden_settings_gc_9_2_1_vg12[] = SOC15_REG_GOLDEN_VALUE(GC, 0, mmTCP_CHAN_STEER_HI, 0xffffffff, 0x00000000), SOC15_REG_GOLDEN_VALUE(GC, 0, mmTCP_CHAN_STEER_LO, 0xffffffff, 0x76325410), SOC15_REG_GOLDEN_VALUE(GC, 0, mmTD_CNTL, 0x01bd9f33, 0x01000000), - SOC15_REG_GOLDEN_VALUE(GC, 0, mmCP_MEC1_F32_INT_DIS, 0x00000000, 0x00000800), - SOC15_REG_GOLDEN_VALUE(GC, 0, mmCP_MEC2_F32_INT_DIS, 0x00000000, 0x00000800), - SOC15_REG_GOLDEN_VALUE(GC, 0, mmCP_DEBUG, 0x00000000, 0x00008000) + SOC15_REG_GOLDEN_VALUE(GC, 0, mmCP_MEC1_F32_INT_DIS, 0x00000800, 0x00000800), + SOC15_REG_GOLDEN_VALUE(GC, 0, mmCP_MEC2_F32_INT_DIS, 0x00000800, 0x00000800), + SOC15_REG_GOLDEN_VALUE(GC, 0, mmCP_DEBUG, 0x00008000, 0x00008000) }; static const struct soc15_reg_golden golden_settings_gc_9_4_1_arct[] = @@ -1324,7 +1324,8 @@ static int gfx_v9_0_init_cp_compute_microcode(struct amdgpu_device *adev, /* TODO: Determine if MEC2 JT FW loading can be removed for all GFX V9 asic and above */ - if (adev->asic_type != CHIP_ARCTURUS) { + if (adev->asic_type != CHIP_ARCTURUS && + adev->asic_type != CHIP_RENOIR) { info = &adev->firmware.ucode[AMDGPU_UCODE_ID_CP_MEC2_JT]; info->ucode_id = AMDGPU_UCODE_ID_CP_MEC2_JT; info->fw = adev->gfx.mec2_fw; @@ -1956,190 +1957,6 @@ static int gfx_v9_0_gpu_early_init(struct amdgpu_device *adev) return 0; } -static int gfx_v9_0_ngg_create_buf(struct amdgpu_device *adev, - struct amdgpu_ngg_buf *ngg_buf, - int size_se, - int default_size_se) -{ - int r; - - if (size_se < 0) { - dev_err(adev->dev, "Buffer size is invalid: %d\n", size_se); - return -EINVAL; - } - size_se = size_se ? size_se : default_size_se; - - ngg_buf->size = size_se * adev->gfx.config.max_shader_engines; - r = amdgpu_bo_create_kernel(adev, ngg_buf->size, - PAGE_SIZE, AMDGPU_GEM_DOMAIN_VRAM, - &ngg_buf->bo, - &ngg_buf->gpu_addr, - NULL); - if (r) { - dev_err(adev->dev, "(%d) failed to create NGG buffer\n", r); - return r; - } - ngg_buf->bo_size = amdgpu_bo_size(ngg_buf->bo); - - return r; -} - -static int gfx_v9_0_ngg_fini(struct amdgpu_device *adev) -{ - int i; - - for (i = 0; i < NGG_BUF_MAX; i++) - amdgpu_bo_free_kernel(&adev->gfx.ngg.buf[i].bo, - &adev->gfx.ngg.buf[i].gpu_addr, - NULL); - - memset(&adev->gfx.ngg.buf[0], 0, - sizeof(struct amdgpu_ngg_buf) * NGG_BUF_MAX); - - adev->gfx.ngg.init = false; - - return 0; -} - -static int gfx_v9_0_ngg_init(struct amdgpu_device *adev) -{ - int r; - - if (!amdgpu_ngg || adev->gfx.ngg.init == true) - return 0; - - /* GDS reserve memory: 64 bytes alignment */ - adev->gfx.ngg.gds_reserve_size = ALIGN(5 * 4, 0x40); - adev->gds.gds_size -= adev->gfx.ngg.gds_reserve_size; - adev->gfx.ngg.gds_reserve_addr = RREG32_SOC15(GC, 0, mmGDS_VMID0_BASE); - adev->gfx.ngg.gds_reserve_addr += RREG32_SOC15(GC, 0, mmGDS_VMID0_SIZE); - - /* Primitive Buffer */ - r = gfx_v9_0_ngg_create_buf(adev, &adev->gfx.ngg.buf[NGG_PRIM], - amdgpu_prim_buf_per_se, - 64 * 1024); - if (r) { - dev_err(adev->dev, "Failed to create Primitive Buffer\n"); - goto err; - } - - /* Position Buffer */ - r = gfx_v9_0_ngg_create_buf(adev, &adev->gfx.ngg.buf[NGG_POS], - amdgpu_pos_buf_per_se, - 256 * 1024); - if (r) { - dev_err(adev->dev, "Failed to create Position Buffer\n"); - goto err; - } - - /* Control Sideband */ - r = gfx_v9_0_ngg_create_buf(adev, &adev->gfx.ngg.buf[NGG_CNTL], - amdgpu_cntl_sb_buf_per_se, - 256); - if (r) { - dev_err(adev->dev, "Failed to create Control Sideband Buffer\n"); - goto err; - } - - /* Parameter Cache, not created by default */ - if (amdgpu_param_buf_per_se <= 0) - goto out; - - r = gfx_v9_0_ngg_create_buf(adev, &adev->gfx.ngg.buf[NGG_PARAM], - amdgpu_param_buf_per_se, - 512 * 1024); - if (r) { - dev_err(adev->dev, "Failed to create Parameter Cache\n"); - goto err; - } - -out: - adev->gfx.ngg.init = true; - return 0; -err: - gfx_v9_0_ngg_fini(adev); - return r; -} - -static int gfx_v9_0_ngg_en(struct amdgpu_device *adev) -{ - struct amdgpu_ring *ring = &adev->gfx.gfx_ring[0]; - int r; - u32 data, base; - - if (!amdgpu_ngg) - return 0; - - /* Program buffer size */ - data = REG_SET_FIELD(0, WD_BUF_RESOURCE_1, INDEX_BUF_SIZE, - adev->gfx.ngg.buf[NGG_PRIM].size >> 8); - data = REG_SET_FIELD(data, WD_BUF_RESOURCE_1, POS_BUF_SIZE, - adev->gfx.ngg.buf[NGG_POS].size >> 8); - WREG32_SOC15(GC, 0, mmWD_BUF_RESOURCE_1, data); - - data = REG_SET_FIELD(0, WD_BUF_RESOURCE_2, CNTL_SB_BUF_SIZE, - adev->gfx.ngg.buf[NGG_CNTL].size >> 8); - data = REG_SET_FIELD(data, WD_BUF_RESOURCE_2, PARAM_BUF_SIZE, - adev->gfx.ngg.buf[NGG_PARAM].size >> 10); - WREG32_SOC15(GC, 0, mmWD_BUF_RESOURCE_2, data); - - /* Program buffer base address */ - base = lower_32_bits(adev->gfx.ngg.buf[NGG_PRIM].gpu_addr); - data = REG_SET_FIELD(0, WD_INDEX_BUF_BASE, BASE, base); - WREG32_SOC15(GC, 0, mmWD_INDEX_BUF_BASE, data); - - base = upper_32_bits(adev->gfx.ngg.buf[NGG_PRIM].gpu_addr); - data = REG_SET_FIELD(0, WD_INDEX_BUF_BASE_HI, BASE_HI, base); - WREG32_SOC15(GC, 0, mmWD_INDEX_BUF_BASE_HI, data); - - base = lower_32_bits(adev->gfx.ngg.buf[NGG_POS].gpu_addr); - data = REG_SET_FIELD(0, WD_POS_BUF_BASE, BASE, base); - WREG32_SOC15(GC, 0, mmWD_POS_BUF_BASE, data); - - base = upper_32_bits(adev->gfx.ngg.buf[NGG_POS].gpu_addr); - data = REG_SET_FIELD(0, WD_POS_BUF_BASE_HI, BASE_HI, base); - WREG32_SOC15(GC, 0, mmWD_POS_BUF_BASE_HI, data); - - base = lower_32_bits(adev->gfx.ngg.buf[NGG_CNTL].gpu_addr); - data = REG_SET_FIELD(0, WD_CNTL_SB_BUF_BASE, BASE, base); - WREG32_SOC15(GC, 0, mmWD_CNTL_SB_BUF_BASE, data); - - base = upper_32_bits(adev->gfx.ngg.buf[NGG_CNTL].gpu_addr); - data = REG_SET_FIELD(0, WD_CNTL_SB_BUF_BASE_HI, BASE_HI, base); - WREG32_SOC15(GC, 0, mmWD_CNTL_SB_BUF_BASE_HI, data); - - /* Clear GDS reserved memory */ - r = amdgpu_ring_alloc(ring, 17); - if (r) { - DRM_ERROR("amdgpu: NGG failed to lock ring %s (%d).\n", - ring->name, r); - return r; - } - - gfx_v9_0_write_data_to_reg(ring, 0, false, - SOC15_REG_OFFSET(GC, 0, mmGDS_VMID0_SIZE), - (adev->gds.gds_size + - adev->gfx.ngg.gds_reserve_size)); - - amdgpu_ring_write(ring, PACKET3(PACKET3_DMA_DATA, 5)); - amdgpu_ring_write(ring, (PACKET3_DMA_DATA_CP_SYNC | - PACKET3_DMA_DATA_DST_SEL(1) | - PACKET3_DMA_DATA_SRC_SEL(2))); - amdgpu_ring_write(ring, 0); - amdgpu_ring_write(ring, 0); - amdgpu_ring_write(ring, adev->gfx.ngg.gds_reserve_addr); - amdgpu_ring_write(ring, 0); - amdgpu_ring_write(ring, PACKET3_DMA_DATA_CMD_RAW_WAIT | - adev->gfx.ngg.gds_reserve_size); - - gfx_v9_0_write_data_to_reg(ring, 0, false, - SOC15_REG_OFFSET(GC, 0, mmGDS_VMID0_SIZE), 0); - - amdgpu_ring_commit(ring); - - return 0; -} - static int gfx_v9_0_compute_ring_init(struct amdgpu_device *adev, int ring_id, int mec, int pipe, int queue) { @@ -2307,10 +2124,6 @@ static int gfx_v9_0_sw_init(void *handle) if (r) return r; - r = gfx_v9_0_ngg_init(adev); - if (r) - return r; - return 0; } @@ -2320,19 +2133,7 @@ static int gfx_v9_0_sw_fini(void *handle) int i; struct amdgpu_device *adev = (struct amdgpu_device *)handle; - if (amdgpu_ras_is_supported(adev, AMDGPU_RAS_BLOCK__GFX) && - adev->gfx.ras_if) { - struct ras_common_if *ras_if = adev->gfx.ras_if; - struct ras_ih_if ih_info = { - .head = *ras_if, - }; - - amdgpu_ras_debugfs_remove(adev, ras_if); - amdgpu_ras_sysfs_remove(adev, ras_if); - amdgpu_ras_interrupt_remove_handler(adev, &ih_info); - amdgpu_ras_feature_enable(adev, ras_if, 0); - kfree(ras_if); - } + amdgpu_gfx_ras_fini(adev); for (i = 0; i < adev->gfx.num_gfx_rings; i++) amdgpu_ring_fini(&adev->gfx.gfx_ring[i]); @@ -2344,7 +2145,6 @@ static int gfx_v9_0_sw_fini(void *handle) amdgpu_gfx_kiq_fini(adev); gfx_v9_0_mec_fini(adev); - gfx_v9_0_ngg_fini(adev); amdgpu_bo_unref(&adev->gfx.rlc.clear_state_obj); if (adev->asic_type == CHIP_RAVEN || adev->asic_type == CHIP_RENOIR) { amdgpu_bo_free_kernel(&adev->gfx.rlc.cp_table_obj, @@ -3883,12 +3683,6 @@ static int gfx_v9_0_hw_init(void *handle) if (r) return r; - if (adev->asic_type != CHIP_ARCTURUS) { - r = gfx_v9_0_ngg_en(adev); - if (r) - return r; - } - return r; } @@ -4203,6 +3997,10 @@ static int gfx_v9_0_do_edc_gds_workarounds(struct amdgpu_device *adev) struct amdgpu_ring *ring = &adev->gfx.compute_ring[0]; int i, r; + /* only support when RAS is enabled */ + if (!amdgpu_ras_is_supported(adev, AMDGPU_RAS_BLOCK__GFX)) + return 0; + r = amdgpu_ring_alloc(ring, 7); if (r) { DRM_ERROR("amdgpu: GDS workarounds failed to lock ring %s (%d).\n", @@ -4393,33 +4191,14 @@ static int gfx_v9_0_early_init(void *handle) return 0; } -static int gfx_v9_0_process_ras_data_cb(struct amdgpu_device *adev, - struct ras_err_data *err_data, - struct amdgpu_iv_entry *entry); - static int gfx_v9_0_ecc_late_init(void *handle) { struct amdgpu_device *adev = (struct amdgpu_device *)handle; - struct ras_common_if **ras_if = &adev->gfx.ras_if; - struct ras_ih_if ih_info = { - .cb = gfx_v9_0_process_ras_data_cb, - }; - struct ras_fs_if fs_info = { - .sysfs_name = "gfx_err_count", - .debugfs_name = "gfx_err_inject", - }; - struct ras_common_if ras_block = { - .block = AMDGPU_RAS_BLOCK__GFX, - .type = AMDGPU_RAS_ERROR__MULTI_UNCORRECTABLE, - .sub_block_index = 0, - .name = "gfx", - }; int r; - if (!amdgpu_ras_is_supported(adev, AMDGPU_RAS_BLOCK__GFX)) { - amdgpu_ras_feature_enable_on_boot(adev, &ras_block, 0); - return 0; - } + r = amdgpu_gfx_ras_late_init(adev); + if (r) + return r; r = gfx_v9_0_do_edc_gds_workarounds(adev); if (r) @@ -4430,72 +4209,7 @@ static int gfx_v9_0_ecc_late_init(void *handle) if (r) return r; - /* handle resume path. */ - if (*ras_if) { - /* resend ras TA enable cmd during resume. - * prepare to handle failure. - */ - ih_info.head = **ras_if; - r = amdgpu_ras_feature_enable_on_boot(adev, *ras_if, 1); - if (r) { - if (r == -EAGAIN) { - /* request a gpu reset. will run again. */ - amdgpu_ras_request_reset_on_boot(adev, - AMDGPU_RAS_BLOCK__GFX); - return 0; - } - /* fail to enable ras, cleanup all. */ - goto irq; - } - /* enable successfully. continue. */ - goto resume; - } - - *ras_if = kmalloc(sizeof(**ras_if), GFP_KERNEL); - if (!*ras_if) - return -ENOMEM; - - **ras_if = ras_block; - - r = amdgpu_ras_feature_enable_on_boot(adev, *ras_if, 1); - if (r) { - if (r == -EAGAIN) { - amdgpu_ras_request_reset_on_boot(adev, - AMDGPU_RAS_BLOCK__GFX); - r = 0; - } - goto feature; - } - - ih_info.head = **ras_if; - fs_info.head = **ras_if; - - r = amdgpu_ras_interrupt_add_handler(adev, &ih_info); - if (r) - goto interrupt; - - amdgpu_ras_debugfs_create(adev, &fs_info); - - r = amdgpu_ras_sysfs_create(adev, &fs_info); - if (r) - goto sysfs; -resume: - r = amdgpu_irq_get(adev, &adev->gfx.cp_ecc_error_irq, 0); - if (r) - goto irq; - return 0; -irq: - amdgpu_ras_sysfs_remove(adev, *ras_if); -sysfs: - amdgpu_ras_debugfs_remove(adev, *ras_if); - amdgpu_ras_interrupt_remove_handler(adev, &ih_info); -interrupt: - amdgpu_ras_feature_enable(adev, *ras_if, 0); -feature: - kfree(*ras_if); - *ras_if = NULL; - return r; } static int gfx_v9_0_late_init(void *handle) @@ -4970,7 +4684,7 @@ static void gfx_v9_0_ring_emit_hdp_flush(struct amdgpu_ring *ring) { struct amdgpu_device *adev = ring->adev; u32 ref_and_mask, reg_mem_engine; - const struct nbio_hdp_flush_reg *nbio_hf_reg = adev->nbio_funcs->hdp_flush_reg; + const struct nbio_hdp_flush_reg *nbio_hf_reg = adev->nbio.hdp_flush_reg; if (ring->funcs->type == AMDGPU_RING_TYPE_COMPUTE) { switch (ring->me) { @@ -4990,8 +4704,8 @@ static void gfx_v9_0_ring_emit_hdp_flush(struct amdgpu_ring *ring) } gfx_v9_0_wait_reg_mem(ring, reg_mem_engine, 0, 1, - adev->nbio_funcs->get_hdp_flush_req_offset(adev), - adev->nbio_funcs->get_hdp_flush_done_offset(adev), + adev->nbio.funcs->get_hdp_flush_req_offset(adev), + adev->nbio.funcs->get_hdp_flush_done_offset(adev), ref_and_mask, ref_and_mask, 0x20); } @@ -5723,18 +5437,6 @@ static int gfx_v9_0_priv_inst_irq(struct amdgpu_device *adev, return 0; } -static int gfx_v9_0_process_ras_data_cb(struct amdgpu_device *adev, - struct ras_err_data *err_data, - struct amdgpu_iv_entry *entry) -{ - /* TODO ue will trigger an interrupt. */ - kgd2kfd_set_sram_ecc_flag(adev->kfd.dev); - if (adev->gfx.funcs->query_ras_error_count) - adev->gfx.funcs->query_ras_error_count(adev, err_data); - amdgpu_ras_reset_gpu(adev, 0); - return AMDGPU_RAS_SUCCESS; -} - static const struct { const char *name; uint32_t ip; @@ -6143,25 +5845,6 @@ static int gfx_v9_0_query_ras_error_count(struct amdgpu_device *adev, return 0; } -static int gfx_v9_0_cp_ecc_error_irq(struct amdgpu_device *adev, - struct amdgpu_irq_src *source, - struct amdgpu_iv_entry *entry) -{ - struct ras_common_if *ras_if = adev->gfx.ras_if; - struct ras_dispatch_if ih_data = { - .entry = entry, - }; - - if (!ras_if) - return 0; - - ih_data.head = *ras_if; - - DRM_ERROR("CP ECC ERROR IRQ\n"); - amdgpu_ras_interrupt_dispatch(adev, &ih_data); - return 0; -} - static const struct amd_ip_funcs gfx_v9_0_ip_funcs = { .name = "gfx_v9_0", .early_init = gfx_v9_0_early_init, @@ -6325,7 +6008,7 @@ static const struct amdgpu_irq_src_funcs gfx_v9_0_priv_inst_irq_funcs = { static const struct amdgpu_irq_src_funcs gfx_v9_0_cp_ecc_error_irq_funcs = { .set = gfx_v9_0_set_cp_ecc_error_state, - .process = gfx_v9_0_cp_ecc_error_irq, + .process = amdgpu_gfx_cp_ecc_error_irq, }; diff --git a/drivers/gpu/drm/amd/amdgpu/gfxhub_v1_0.c b/drivers/gpu/drm/amd/amdgpu/gfxhub_v1_0.c index 6ce37ce77d14..9ec4297e61e5 100644 --- a/drivers/gpu/drm/amd/amdgpu/gfxhub_v1_0.c +++ b/drivers/gpu/drm/amd/amdgpu/gfxhub_v1_0.c @@ -178,6 +178,8 @@ static void gfxhub_v1_0_enable_system_domain(struct amdgpu_device *adev) tmp = RREG32_SOC15(GC, 0, mmVM_CONTEXT0_CNTL); tmp = REG_SET_FIELD(tmp, VM_CONTEXT0_CNTL, ENABLE_CONTEXT, 1); tmp = REG_SET_FIELD(tmp, VM_CONTEXT0_CNTL, PAGE_TABLE_DEPTH, 0); + tmp = REG_SET_FIELD(tmp, VM_CONTEXT0_CNTL, + RETRY_PERMISSION_OR_INVALID_PAGE_FAULT, 0); WREG32_SOC15(GC, 0, mmVM_CONTEXT0_CNTL, tmp); } diff --git a/drivers/gpu/drm/amd/amdgpu/gfxhub_v2_0.c b/drivers/gpu/drm/amd/amdgpu/gfxhub_v2_0.c index 8b789f750b72..b601c6740ef5 100644 --- a/drivers/gpu/drm/amd/amdgpu/gfxhub_v2_0.c +++ b/drivers/gpu/drm/amd/amdgpu/gfxhub_v2_0.c @@ -46,21 +46,25 @@ u64 gfxhub_v2_0_get_mc_fb_offset(struct amdgpu_device *adev) return (u64)RREG32_SOC15(GC, 0, mmGCMC_VM_FB_OFFSET) << 24; } -static void gfxhub_v2_0_init_gart_pt_regs(struct amdgpu_device *adev) +void gfxhub_v2_0_setup_vm_pt_regs(struct amdgpu_device *adev, uint32_t vmid, + uint64_t page_table_base) { - uint64_t value = amdgpu_gmc_pd_addr(adev->gart.bo); + /* two registers distance between mmGCVM_CONTEXT0_* to mmGCVM_CONTEXT1_* */ + int offset = mmGCVM_CONTEXT1_PAGE_TABLE_BASE_ADDR_LO32 + - mmGCVM_CONTEXT0_PAGE_TABLE_BASE_ADDR_LO32; + WREG32_SOC15_OFFSET(GC, 0, mmGCVM_CONTEXT0_PAGE_TABLE_BASE_ADDR_LO32, + offset * vmid, lower_32_bits(page_table_base)); - WREG32_SOC15(GC, 0, mmGCVM_CONTEXT0_PAGE_TABLE_BASE_ADDR_LO32, - lower_32_bits(value)); - - WREG32_SOC15(GC, 0, mmGCVM_CONTEXT0_PAGE_TABLE_BASE_ADDR_HI32, - upper_32_bits(value)); + WREG32_SOC15_OFFSET(GC, 0, mmGCVM_CONTEXT0_PAGE_TABLE_BASE_ADDR_HI32, + offset * vmid, upper_32_bits(page_table_base)); } static void gfxhub_v2_0_init_gart_aperture_regs(struct amdgpu_device *adev) { - gfxhub_v2_0_init_gart_pt_regs(adev); + uint64_t pt_base = amdgpu_gmc_pd_addr(adev->gart.bo); + + gfxhub_v2_0_setup_vm_pt_regs(adev, 0, pt_base); WREG32_SOC15(GC, 0, mmGCVM_CONTEXT0_PAGE_TABLE_START_ADDR_LO32, (u32)(adev->gmc.gart_start >> 12)); @@ -166,6 +170,8 @@ static void gfxhub_v2_0_enable_system_domain(struct amdgpu_device *adev) tmp = RREG32_SOC15(GC, 0, mmGCVM_CONTEXT0_CNTL); tmp = REG_SET_FIELD(tmp, GCVM_CONTEXT0_CNTL, ENABLE_CONTEXT, 1); tmp = REG_SET_FIELD(tmp, GCVM_CONTEXT0_CNTL, PAGE_TABLE_DEPTH, 0); + tmp = REG_SET_FIELD(tmp, GCVM_CONTEXT0_CNTL, + RETRY_PERMISSION_OR_INVALID_PAGE_FAULT, 0); WREG32_SOC15(GC, 0, mmGCVM_CONTEXT0_CNTL, tmp); } diff --git a/drivers/gpu/drm/amd/amdgpu/gfxhub_v2_0.h b/drivers/gpu/drm/amd/amdgpu/gfxhub_v2_0.h index 06807940748b..392b8cd94fc0 100644 --- a/drivers/gpu/drm/amd/amdgpu/gfxhub_v2_0.h +++ b/drivers/gpu/drm/amd/amdgpu/gfxhub_v2_0.h @@ -31,5 +31,7 @@ void gfxhub_v2_0_set_fault_enable_default(struct amdgpu_device *adev, bool value); void gfxhub_v2_0_init(struct amdgpu_device *adev); u64 gfxhub_v2_0_get_mc_fb_offset(struct amdgpu_device *adev); +void gfxhub_v2_0_setup_vm_pt_regs(struct amdgpu_device *adev, uint32_t vmid, + uint64_t page_table_base); #endif diff --git a/drivers/gpu/drm/amd/amdgpu/gmc_v10_0.c b/drivers/gpu/drm/amd/amdgpu/gmc_v10_0.c index 241a4e57cf4a..fb48622c2abd 100644 --- a/drivers/gpu/drm/amd/amdgpu/gmc_v10_0.c +++ b/drivers/gpu/drm/amd/amdgpu/gmc_v10_0.c @@ -278,7 +278,7 @@ static void gmc_v10_0_flush_gpu_tlb(struct amdgpu_device *adev, uint32_t vmid, int r; /* flush hdp cache */ - adev->nbio_funcs->hdp_flush(adev, NULL); + adev->nbio.funcs->hdp_flush(adev, NULL); mutex_lock(&adev->mman.gtt_window_lock); @@ -397,43 +397,23 @@ static void gmc_v10_0_emit_pasid_mapping(struct amdgpu_ring *ring, unsigned vmid * 1 system * 0 valid */ -static uint64_t gmc_v10_0_get_vm_pte_flags(struct amdgpu_device *adev, - uint32_t flags) -{ - uint64_t pte_flag = 0; - - if (flags & AMDGPU_VM_PAGE_EXECUTABLE) - pte_flag |= AMDGPU_PTE_EXECUTABLE; - if (flags & AMDGPU_VM_PAGE_READABLE) - pte_flag |= AMDGPU_PTE_READABLE; - if (flags & AMDGPU_VM_PAGE_WRITEABLE) - pte_flag |= AMDGPU_PTE_WRITEABLE; - switch (flags & AMDGPU_VM_MTYPE_MASK) { +static uint64_t gmc_v10_0_map_mtype(struct amdgpu_device *adev, uint32_t flags) +{ + switch (flags) { case AMDGPU_VM_MTYPE_DEFAULT: - pte_flag |= AMDGPU_PTE_MTYPE_NV10(MTYPE_NC); - break; + return AMDGPU_PTE_MTYPE_NV10(MTYPE_NC); case AMDGPU_VM_MTYPE_NC: - pte_flag |= AMDGPU_PTE_MTYPE_NV10(MTYPE_NC); - break; + return AMDGPU_PTE_MTYPE_NV10(MTYPE_NC); case AMDGPU_VM_MTYPE_WC: - pte_flag |= AMDGPU_PTE_MTYPE_NV10(MTYPE_WC); - break; + return AMDGPU_PTE_MTYPE_NV10(MTYPE_WC); case AMDGPU_VM_MTYPE_CC: - pte_flag |= AMDGPU_PTE_MTYPE_NV10(MTYPE_CC); - break; + return AMDGPU_PTE_MTYPE_NV10(MTYPE_CC); case AMDGPU_VM_MTYPE_UC: - pte_flag |= AMDGPU_PTE_MTYPE_NV10(MTYPE_UC); - break; + return AMDGPU_PTE_MTYPE_NV10(MTYPE_UC); default: - pte_flag |= AMDGPU_PTE_MTYPE_NV10(MTYPE_NC); - break; + return AMDGPU_PTE_MTYPE_NV10(MTYPE_NC); } - - if (flags & AMDGPU_VM_PAGE_PRT) - pte_flag |= AMDGPU_PTE_PRT; - - return pte_flag; } static void gmc_v10_0_get_vm_pde(struct amdgpu_device *adev, int level, @@ -460,12 +440,32 @@ static void gmc_v10_0_get_vm_pde(struct amdgpu_device *adev, int level, } } +static void gmc_v10_0_get_vm_pte(struct amdgpu_device *adev, + struct amdgpu_bo_va_mapping *mapping, + uint64_t *flags) +{ + *flags &= ~AMDGPU_PTE_EXECUTABLE; + *flags |= mapping->flags & AMDGPU_PTE_EXECUTABLE; + + *flags &= ~AMDGPU_PTE_MTYPE_NV10_MASK; + *flags |= (mapping->flags & AMDGPU_PTE_MTYPE_NV10_MASK); + + if (mapping->flags & AMDGPU_PTE_PRT) { + *flags |= AMDGPU_PTE_PRT; + *flags |= AMDGPU_PTE_SNOOPED; + *flags |= AMDGPU_PTE_LOG; + *flags |= AMDGPU_PTE_SYSTEM; + *flags &= ~AMDGPU_PTE_VALID; + } +} + static const struct amdgpu_gmc_funcs gmc_v10_0_gmc_funcs = { .flush_gpu_tlb = gmc_v10_0_flush_gpu_tlb, .emit_flush_gpu_tlb = gmc_v10_0_emit_flush_gpu_tlb, .emit_pasid_mapping = gmc_v10_0_emit_pasid_mapping, - .get_vm_pte_flags = gmc_v10_0_get_vm_pte_flags, - .get_vm_pde = gmc_v10_0_get_vm_pde + .map_mtype = gmc_v10_0_map_mtype, + .get_vm_pde = gmc_v10_0_get_vm_pde, + .get_vm_pte = gmc_v10_0_get_vm_pte }; static void gmc_v10_0_set_gmc_funcs(struct amdgpu_device *adev) @@ -519,8 +519,7 @@ static void gmc_v10_0_vram_gtt_location(struct amdgpu_device *adev, { u64 base = 0; - if (!amdgpu_sriov_vf(adev)) - base = gfxhub_v2_0_get_fb_location(adev); + base = gfxhub_v2_0_get_fb_location(adev); amdgpu_gmc_vram_location(adev, &adev->gmc, base); amdgpu_gmc_gart_location(adev, mc); @@ -540,24 +539,13 @@ static void gmc_v10_0_vram_gtt_location(struct amdgpu_device *adev, */ static int gmc_v10_0_mc_init(struct amdgpu_device *adev) { - int chansize, numchan; - - if (!amdgpu_emu_mode) - adev->gmc.vram_width = amdgpu_atomfirmware_get_vram_width(adev); - else { - /* hard code vram_width for emulation */ - chansize = 128; - numchan = 1; - adev->gmc.vram_width = numchan * chansize; - } - /* Could aper size report 0 ? */ adev->gmc.aper_base = pci_resource_start(adev->pdev, 0); adev->gmc.aper_size = pci_resource_len(adev->pdev, 0); /* size in MB on si */ adev->gmc.mc_vram_size = - adev->nbio_funcs->get_memsize(adev) * 1024ULL * 1024ULL; + adev->nbio.funcs->get_memsize(adev) * 1024ULL * 1024ULL; adev->gmc.real_vram_size = adev->gmc.mc_vram_size; adev->gmc.visible_vram_size = adev->gmc.aper_size; @@ -636,7 +624,7 @@ static unsigned gmc_v10_0_get_vbios_fb_size(struct amdgpu_device *adev) static int gmc_v10_0_sw_init(void *handle) { - int r; + int r, vram_width = 0, vram_type = 0, vram_vendor = 0; struct amdgpu_device *adev = (struct amdgpu_device *)handle; gfxhub_v2_0_init(adev); @@ -644,7 +632,15 @@ static int gmc_v10_0_sw_init(void *handle) spin_lock_init(&adev->gmc.invalidate_lock); - adev->gmc.vram_type = amdgpu_atomfirmware_get_vram_type(adev); + r = amdgpu_atomfirmware_get_vram_info(adev, + &vram_width, &vram_type, &vram_vendor); + if (!amdgpu_emu_mode) + adev->gmc.vram_width = vram_width; + else + adev->gmc.vram_width = 1 * 128; /* numchan * chansize */ + + adev->gmc.vram_type = vram_type; + adev->gmc.vram_vendor = vram_vendor; switch (adev->asic_type) { case CHIP_NAVI10: case CHIP_NAVI14: @@ -794,7 +790,7 @@ static int gmc_v10_0_gart_enable(struct amdgpu_device *adev) WREG32_SOC15(HDP, 0, mmHDP_HOST_PATH_CNTL, tmp); /* Flush HDP after it is initialized */ - adev->nbio_funcs->hdp_flush(adev, NULL); + adev->nbio.funcs->hdp_flush(adev, NULL); value = (amdgpu_vm_fault_stop == AMDGPU_VM_FAULT_STOP_ALWAYS) ? false : true; diff --git a/drivers/gpu/drm/amd/amdgpu/gmc_v6_0.c b/drivers/gpu/drm/amd/amdgpu/gmc_v6_0.c index 9fb1765e92d1..b205039350b6 100644 --- a/drivers/gpu/drm/amd/amdgpu/gmc_v6_0.c +++ b/drivers/gpu/drm/amd/amdgpu/gmc_v6_0.c @@ -386,27 +386,20 @@ static uint64_t gmc_v6_0_emit_flush_gpu_tlb(struct amdgpu_ring *ring, return pd_addr; } -static uint64_t gmc_v6_0_get_vm_pte_flags(struct amdgpu_device *adev, - uint32_t flags) -{ - uint64_t pte_flag = 0; - - if (flags & AMDGPU_VM_PAGE_READABLE) - pte_flag |= AMDGPU_PTE_READABLE; - if (flags & AMDGPU_VM_PAGE_WRITEABLE) - pte_flag |= AMDGPU_PTE_WRITEABLE; - if (flags & AMDGPU_VM_PAGE_PRT) - pte_flag |= AMDGPU_PTE_PRT; - - return pte_flag; -} - static void gmc_v6_0_get_vm_pde(struct amdgpu_device *adev, int level, uint64_t *addr, uint64_t *flags) { BUG_ON(*addr & 0xFFFFFF0000000FFFULL); } +static void gmc_v6_0_get_vm_pte(struct amdgpu_device *adev, + struct amdgpu_bo_va_mapping *mapping, + uint64_t *flags) +{ + *flags &= ~AMDGPU_PTE_EXECUTABLE; + *flags &= ~AMDGPU_PTE_PRT; +} + static void gmc_v6_0_set_fault_enable_default(struct amdgpu_device *adev, bool value) { @@ -1153,7 +1146,7 @@ static const struct amdgpu_gmc_funcs gmc_v6_0_gmc_funcs = { .emit_flush_gpu_tlb = gmc_v6_0_emit_flush_gpu_tlb, .set_prt = gmc_v6_0_set_prt, .get_vm_pde = gmc_v6_0_get_vm_pde, - .get_vm_pte_flags = gmc_v6_0_get_vm_pte_flags + .get_vm_pte = gmc_v6_0_get_vm_pte, }; static const struct amdgpu_irq_src_funcs gmc_v6_0_irq_funcs = { diff --git a/drivers/gpu/drm/amd/amdgpu/gmc_v7_0.c b/drivers/gpu/drm/amd/amdgpu/gmc_v7_0.c index 0c3d9bc3a641..f08e5330642d 100644 --- a/drivers/gpu/drm/amd/amdgpu/gmc_v7_0.c +++ b/drivers/gpu/drm/amd/amdgpu/gmc_v7_0.c @@ -463,27 +463,20 @@ static void gmc_v7_0_emit_pasid_mapping(struct amdgpu_ring *ring, unsigned vmid, amdgpu_ring_emit_wreg(ring, mmIH_VMID_0_LUT + vmid, pasid); } -static uint64_t gmc_v7_0_get_vm_pte_flags(struct amdgpu_device *adev, - uint32_t flags) -{ - uint64_t pte_flag = 0; - - if (flags & AMDGPU_VM_PAGE_READABLE) - pte_flag |= AMDGPU_PTE_READABLE; - if (flags & AMDGPU_VM_PAGE_WRITEABLE) - pte_flag |= AMDGPU_PTE_WRITEABLE; - if (flags & AMDGPU_VM_PAGE_PRT) - pte_flag |= AMDGPU_PTE_PRT; - - return pte_flag; -} - static void gmc_v7_0_get_vm_pde(struct amdgpu_device *adev, int level, uint64_t *addr, uint64_t *flags) { BUG_ON(*addr & 0xFFFFFF0000000FFFULL); } +static void gmc_v7_0_get_vm_pte(struct amdgpu_device *adev, + struct amdgpu_bo_va_mapping *mapping, + uint64_t *flags) +{ + *flags &= ~AMDGPU_PTE_EXECUTABLE; + *flags &= ~AMDGPU_PTE_PRT; +} + /** * gmc_v8_0_set_fault_enable_default - update VM fault handling * @@ -1343,8 +1336,8 @@ static const struct amdgpu_gmc_funcs gmc_v7_0_gmc_funcs = { .emit_flush_gpu_tlb = gmc_v7_0_emit_flush_gpu_tlb, .emit_pasid_mapping = gmc_v7_0_emit_pasid_mapping, .set_prt = gmc_v7_0_set_prt, - .get_vm_pte_flags = gmc_v7_0_get_vm_pte_flags, - .get_vm_pde = gmc_v7_0_get_vm_pde + .get_vm_pde = gmc_v7_0_get_vm_pde, + .get_vm_pte = gmc_v7_0_get_vm_pte }; static const struct amdgpu_irq_src_funcs gmc_v7_0_irq_funcs = { diff --git a/drivers/gpu/drm/amd/amdgpu/gmc_v8_0.c b/drivers/gpu/drm/amd/amdgpu/gmc_v8_0.c index ea764dd9245d..6d96d40fbcb8 100644 --- a/drivers/gpu/drm/amd/amdgpu/gmc_v8_0.c +++ b/drivers/gpu/drm/amd/amdgpu/gmc_v8_0.c @@ -686,29 +686,21 @@ static void gmc_v8_0_emit_pasid_mapping(struct amdgpu_ring *ring, unsigned vmid, * 0 valid */ -static uint64_t gmc_v8_0_get_vm_pte_flags(struct amdgpu_device *adev, - uint32_t flags) -{ - uint64_t pte_flag = 0; - - if (flags & AMDGPU_VM_PAGE_EXECUTABLE) - pte_flag |= AMDGPU_PTE_EXECUTABLE; - if (flags & AMDGPU_VM_PAGE_READABLE) - pte_flag |= AMDGPU_PTE_READABLE; - if (flags & AMDGPU_VM_PAGE_WRITEABLE) - pte_flag |= AMDGPU_PTE_WRITEABLE; - if (flags & AMDGPU_VM_PAGE_PRT) - pte_flag |= AMDGPU_PTE_PRT; - - return pte_flag; -} - static void gmc_v8_0_get_vm_pde(struct amdgpu_device *adev, int level, uint64_t *addr, uint64_t *flags) { BUG_ON(*addr & 0xFFFFFF0000000FFFULL); } +static void gmc_v8_0_get_vm_pte(struct amdgpu_device *adev, + struct amdgpu_bo_va_mapping *mapping, + uint64_t *flags) +{ + *flags &= ~AMDGPU_PTE_EXECUTABLE; + *flags |= mapping->flags & AMDGPU_PTE_EXECUTABLE; + *flags &= ~AMDGPU_PTE_PRT; +} + /** * gmc_v8_0_set_fault_enable_default - update VM fault handling * @@ -1711,8 +1703,8 @@ static const struct amdgpu_gmc_funcs gmc_v8_0_gmc_funcs = { .emit_flush_gpu_tlb = gmc_v8_0_emit_flush_gpu_tlb, .emit_pasid_mapping = gmc_v8_0_emit_pasid_mapping, .set_prt = gmc_v8_0_set_prt, - .get_vm_pte_flags = gmc_v8_0_get_vm_pte_flags, - .get_vm_pde = gmc_v8_0_get_vm_pde + .get_vm_pde = gmc_v8_0_get_vm_pde, + .get_vm_pte = gmc_v8_0_get_vm_pte }; static const struct amdgpu_irq_src_funcs gmc_v8_0_irq_funcs = { diff --git a/drivers/gpu/drm/amd/amdgpu/gmc_v9_0.c b/drivers/gpu/drm/amd/amdgpu/gmc_v9_0.c index f91337030dc0..defeb6afcb5d 100644 --- a/drivers/gpu/drm/amd/amdgpu/gmc_v9_0.c +++ b/drivers/gpu/drm/amd/amdgpu/gmc_v9_0.c @@ -51,10 +51,12 @@ #include "gfxhub_v1_1.h" #include "mmhub_v9_4.h" #include "umc_v6_1.h" +#include "umc_v6_0.h" #include "ivsrcid/vmc/irqsrcs_vmc_1_0.h" #include "amdgpu_ras.h" +#include "amdgpu_xgmi.h" /* add these here since we already include dce12 headers and these are for DCN */ #define mmHUBP0_DCSURF_PRI_VIEWPORT_DIMENSION 0x055d @@ -243,44 +245,6 @@ static int gmc_v9_0_ecc_interrupt_state(struct amdgpu_device *adev, return 0; } -static int gmc_v9_0_process_ras_data_cb(struct amdgpu_device *adev, - struct ras_err_data *err_data, - struct amdgpu_iv_entry *entry) -{ - kgd2kfd_set_sram_ecc_flag(adev->kfd.dev); - if (adev->umc.funcs->query_ras_error_count) - adev->umc.funcs->query_ras_error_count(adev, err_data); - /* umc query_ras_error_address is also responsible for clearing - * error status - */ - if (adev->umc.funcs->query_ras_error_address) - adev->umc.funcs->query_ras_error_address(adev, err_data); - - /* only uncorrectable error needs gpu reset */ - if (err_data->ue_count) - amdgpu_ras_reset_gpu(adev, 0); - - return AMDGPU_RAS_SUCCESS; -} - -static int gmc_v9_0_process_ecc_irq(struct amdgpu_device *adev, - struct amdgpu_irq_src *source, - struct amdgpu_iv_entry *entry) -{ - struct ras_common_if *ras_if = adev->gmc.umc_ras_if; - struct ras_dispatch_if ih_data = { - .entry = entry, - }; - - if (!ras_if) - return 0; - - ih_data.head = *ras_if; - - amdgpu_ras_interrupt_dispatch(adev, &ih_data); - return 0; -} - static int gmc_v9_0_vm_fault_interrupt_state(struct amdgpu_device *adev, struct amdgpu_irq_src *src, unsigned type, @@ -355,6 +319,10 @@ static int gmc_v9_0_process_interrupt(struct amdgpu_device *adev, } /* If it's the first fault for this address, process it normally */ + if (retry_fault && !in_interrupt() && + amdgpu_vm_handle_fault(adev, entry->pasid, addr)) + return 1; /* This also prevents sending it to KFD */ + if (!amdgpu_sriov_vf(adev)) { /* * Issue a dummy read to wait for the status register to @@ -417,7 +385,7 @@ static const struct amdgpu_irq_src_funcs gmc_v9_0_irq_funcs = { static const struct amdgpu_irq_src_funcs gmc_v9_0_ecc_funcs = { .set = gmc_v9_0_ecc_interrupt_state, - .process = gmc_v9_0_process_ecc_irq, + .process = amdgpu_umc_process_ecc_irq, }; static void gmc_v9_0_set_irq_funcs(struct amdgpu_device *adev) @@ -584,44 +552,25 @@ static void gmc_v9_0_emit_pasid_mapping(struct amdgpu_ring *ring, unsigned vmid, * 0 valid */ -static uint64_t gmc_v9_0_get_vm_pte_flags(struct amdgpu_device *adev, - uint32_t flags) +static uint64_t gmc_v9_0_map_mtype(struct amdgpu_device *adev, uint32_t flags) { - uint64_t pte_flag = 0; - - if (flags & AMDGPU_VM_PAGE_EXECUTABLE) - pte_flag |= AMDGPU_PTE_EXECUTABLE; - if (flags & AMDGPU_VM_PAGE_READABLE) - pte_flag |= AMDGPU_PTE_READABLE; - if (flags & AMDGPU_VM_PAGE_WRITEABLE) - pte_flag |= AMDGPU_PTE_WRITEABLE; - - switch (flags & AMDGPU_VM_MTYPE_MASK) { + switch (flags) { case AMDGPU_VM_MTYPE_DEFAULT: - pte_flag |= AMDGPU_PTE_MTYPE_VG10(MTYPE_NC); - break; + return AMDGPU_PTE_MTYPE_VG10(MTYPE_NC); case AMDGPU_VM_MTYPE_NC: - pte_flag |= AMDGPU_PTE_MTYPE_VG10(MTYPE_NC); - break; + return AMDGPU_PTE_MTYPE_VG10(MTYPE_NC); case AMDGPU_VM_MTYPE_WC: - pte_flag |= AMDGPU_PTE_MTYPE_VG10(MTYPE_WC); - break; + return AMDGPU_PTE_MTYPE_VG10(MTYPE_WC); + case AMDGPU_VM_MTYPE_RW: + return AMDGPU_PTE_MTYPE_VG10(MTYPE_RW); case AMDGPU_VM_MTYPE_CC: - pte_flag |= AMDGPU_PTE_MTYPE_VG10(MTYPE_CC); - break; + return AMDGPU_PTE_MTYPE_VG10(MTYPE_CC); case AMDGPU_VM_MTYPE_UC: - pte_flag |= AMDGPU_PTE_MTYPE_VG10(MTYPE_UC); - break; + return AMDGPU_PTE_MTYPE_VG10(MTYPE_UC); default: - pte_flag |= AMDGPU_PTE_MTYPE_VG10(MTYPE_NC); - break; + return AMDGPU_PTE_MTYPE_VG10(MTYPE_NC); } - - if (flags & AMDGPU_VM_PAGE_PRT) - pte_flag |= AMDGPU_PTE_PRT; - - return pte_flag; } static void gmc_v9_0_get_vm_pde(struct amdgpu_device *adev, int level, @@ -648,12 +597,34 @@ static void gmc_v9_0_get_vm_pde(struct amdgpu_device *adev, int level, } } +static void gmc_v9_0_get_vm_pte(struct amdgpu_device *adev, + struct amdgpu_bo_va_mapping *mapping, + uint64_t *flags) +{ + *flags &= ~AMDGPU_PTE_EXECUTABLE; + *flags |= mapping->flags & AMDGPU_PTE_EXECUTABLE; + + *flags &= ~AMDGPU_PTE_MTYPE_VG10_MASK; + *flags |= mapping->flags & AMDGPU_PTE_MTYPE_VG10_MASK; + + if (mapping->flags & AMDGPU_PTE_PRT) { + *flags |= AMDGPU_PTE_PRT; + *flags &= ~AMDGPU_PTE_VALID; + } + + if (adev->asic_type == CHIP_ARCTURUS && + !(*flags & AMDGPU_PTE_SYSTEM) && + mapping->bo_va->is_xgmi) + *flags |= AMDGPU_PTE_SNOOPED; +} + static const struct amdgpu_gmc_funcs gmc_v9_0_gmc_funcs = { .flush_gpu_tlb = gmc_v9_0_flush_gpu_tlb, .emit_flush_gpu_tlb = gmc_v9_0_emit_flush_gpu_tlb, .emit_pasid_mapping = gmc_v9_0_emit_pasid_mapping, - .get_vm_pte_flags = gmc_v9_0_get_vm_pte_flags, - .get_vm_pde = gmc_v9_0_get_vm_pde + .map_mtype = gmc_v9_0_map_mtype, + .get_vm_pde = gmc_v9_0_get_vm_pde, + .get_vm_pte = gmc_v9_0_get_vm_pte }; static void gmc_v9_0_set_gmc_funcs(struct amdgpu_device *adev) @@ -664,6 +635,9 @@ static void gmc_v9_0_set_gmc_funcs(struct amdgpu_device *adev) static void gmc_v9_0_set_umc_funcs(struct amdgpu_device *adev) { switch (adev->asic_type) { + case CHIP_VEGA10: + adev->umc.funcs = &umc_v6_0_funcs; + break; case CHIP_VEGA20: adev->umc.max_ras_err_cnt_per_query = UMC_V6_1_TOTAL_CHANNEL_NUM; adev->umc.channel_inst_num = UMC_V6_1_CHANNEL_INSTANCE_NUM; @@ -681,7 +655,7 @@ static void gmc_v9_0_set_mmhub_funcs(struct amdgpu_device *adev) { switch (adev->asic_type) { case CHIP_VEGA20: - adev->mmhub_funcs = &mmhub_v1_0_funcs; + adev->mmhub.funcs = &mmhub_v1_0_funcs; break; default: break; @@ -762,140 +736,10 @@ static int gmc_v9_0_allocate_vm_inv_eng(struct amdgpu_device *adev) return 0; } -static int gmc_v9_0_ecc_ras_block_late_init(void *handle, - struct ras_fs_if *fs_info, struct ras_common_if *ras_block) -{ - struct amdgpu_device *adev = (struct amdgpu_device *)handle; - struct ras_common_if **ras_if = NULL; - struct ras_ih_if ih_info = { - .cb = gmc_v9_0_process_ras_data_cb, - }; - int r; - - if (ras_block->block == AMDGPU_RAS_BLOCK__UMC) - ras_if = &adev->gmc.umc_ras_if; - else if (ras_block->block == AMDGPU_RAS_BLOCK__MMHUB) - ras_if = &adev->gmc.mmhub_ras_if; - else - BUG(); - - if (!amdgpu_ras_is_supported(adev, ras_block->block)) { - amdgpu_ras_feature_enable_on_boot(adev, ras_block, 0); - return 0; - } - - /* handle resume path. */ - if (*ras_if) { - /* resend ras TA enable cmd during resume. - * prepare to handle failure. - */ - ih_info.head = **ras_if; - r = amdgpu_ras_feature_enable_on_boot(adev, *ras_if, 1); - if (r) { - if (r == -EAGAIN) { - /* request a gpu reset. will run again. */ - amdgpu_ras_request_reset_on_boot(adev, - ras_block->block); - return 0; - } - /* fail to enable ras, cleanup all. */ - goto irq; - } - /* enable successfully. continue. */ - goto resume; - } - - *ras_if = kmalloc(sizeof(**ras_if), GFP_KERNEL); - if (!*ras_if) - return -ENOMEM; - - **ras_if = *ras_block; - - r = amdgpu_ras_feature_enable_on_boot(adev, *ras_if, 1); - if (r) { - if (r == -EAGAIN) { - amdgpu_ras_request_reset_on_boot(adev, - ras_block->block); - r = 0; - } - goto feature; - } - - ih_info.head = **ras_if; - fs_info->head = **ras_if; - - if (ras_block->block == AMDGPU_RAS_BLOCK__UMC) { - r = amdgpu_ras_interrupt_add_handler(adev, &ih_info); - if (r) - goto interrupt; - } - - amdgpu_ras_debugfs_create(adev, fs_info); - - r = amdgpu_ras_sysfs_create(adev, fs_info); - if (r) - goto sysfs; -resume: - if (ras_block->block == AMDGPU_RAS_BLOCK__UMC) { - r = amdgpu_irq_get(adev, &adev->gmc.ecc_irq, 0); - if (r) - goto irq; - } - - return 0; -irq: - amdgpu_ras_sysfs_remove(adev, *ras_if); -sysfs: - amdgpu_ras_debugfs_remove(adev, *ras_if); - if (ras_block->block == AMDGPU_RAS_BLOCK__UMC) - amdgpu_ras_interrupt_remove_handler(adev, &ih_info); -interrupt: - amdgpu_ras_feature_enable(adev, *ras_if, 0); -feature: - kfree(*ras_if); - *ras_if = NULL; - return r; -} - -static int gmc_v9_0_ecc_late_init(void *handle) -{ - int r; - - struct ras_fs_if umc_fs_info = { - .sysfs_name = "umc_err_count", - .debugfs_name = "umc_err_inject", - }; - struct ras_common_if umc_ras_block = { - .block = AMDGPU_RAS_BLOCK__UMC, - .type = AMDGPU_RAS_ERROR__MULTI_UNCORRECTABLE, - .sub_block_index = 0, - .name = "umc", - }; - struct ras_fs_if mmhub_fs_info = { - .sysfs_name = "mmhub_err_count", - .debugfs_name = "mmhub_err_inject", - }; - struct ras_common_if mmhub_ras_block = { - .block = AMDGPU_RAS_BLOCK__MMHUB, - .type = AMDGPU_RAS_ERROR__MULTI_UNCORRECTABLE, - .sub_block_index = 0, - .name = "mmhub", - }; - - r = gmc_v9_0_ecc_ras_block_late_init(handle, - &umc_fs_info, &umc_ras_block); - if (r) - return r; - - r = gmc_v9_0_ecc_ras_block_late_init(handle, - &mmhub_fs_info, &mmhub_ras_block); - return r; -} - static int gmc_v9_0_late_init(void *handle) { struct amdgpu_device *adev = (struct amdgpu_device *)handle; - bool r; + int r; if (!gmc_v9_0_keep_stolen_memory(adev)) amdgpu_bo_late_init(adev); @@ -929,7 +773,7 @@ static int gmc_v9_0_late_init(void *handle) } } - r = gmc_v9_0_ecc_late_init(handle); + r = amdgpu_gmc_ras_late_init(adev); if (r) return r; @@ -970,33 +814,11 @@ static void gmc_v9_0_vram_gtt_location(struct amdgpu_device *adev, */ static int gmc_v9_0_mc_init(struct amdgpu_device *adev) { - int chansize, numchan; int r; - if (amdgpu_sriov_vf(adev)) { - /* For Vega10 SR-IOV, vram_width can't be read from ATOM as RAVEN, - * and DF related registers is not readable, seems hardcord is the - * only way to set the correct vram_width - */ - adev->gmc.vram_width = 2048; - } else if (amdgpu_emu_mode != 1) { - adev->gmc.vram_width = amdgpu_atomfirmware_get_vram_width(adev); - } - - if (!adev->gmc.vram_width) { - /* hbm memory channel size */ - if (adev->flags & AMD_IS_APU) - chansize = 64; - else - chansize = 128; - - numchan = adev->df_funcs->get_hbm_channel_number(adev); - adev->gmc.vram_width = numchan * chansize; - } - /* size in MB on si */ adev->gmc.mc_vram_size = - adev->nbio_funcs->get_memsize(adev) * 1024ULL * 1024ULL; + adev->nbio.funcs->get_memsize(adev) * 1024ULL * 1024ULL; adev->gmc.real_vram_size = adev->gmc.mc_vram_size; if (!(adev->flags & AMD_IS_APU)) { @@ -1108,7 +930,7 @@ static unsigned gmc_v9_0_get_vbios_fb_size(struct amdgpu_device *adev) static int gmc_v9_0_sw_init(void *handle) { - int r; + int r, vram_width = 0, vram_type = 0, vram_vendor = 0; struct amdgpu_device *adev = (struct amdgpu_device *)handle; gfxhub_v1_0_init(adev); @@ -1119,7 +941,32 @@ static int gmc_v9_0_sw_init(void *handle) spin_lock_init(&adev->gmc.invalidate_lock); - adev->gmc.vram_type = amdgpu_atomfirmware_get_vram_type(adev); + r = amdgpu_atomfirmware_get_vram_info(adev, + &vram_width, &vram_type, &vram_vendor); + if (amdgpu_sriov_vf(adev)) + /* For Vega10 SR-IOV, vram_width can't be read from ATOM as RAVEN, + * and DF related registers is not readable, seems hardcord is the + * only way to set the correct vram_width + */ + adev->gmc.vram_width = 2048; + else if (amdgpu_emu_mode != 1) + adev->gmc.vram_width = vram_width; + + if (!adev->gmc.vram_width) { + int chansize, numchan; + + /* hbm memory channel size */ + if (adev->flags & AMD_IS_APU) + chansize = 64; + else + chansize = 128; + + numchan = adev->df_funcs->get_hbm_channel_number(adev); + adev->gmc.vram_width = numchan * chansize; + } + + adev->gmc.vram_type = vram_type; + adev->gmc.vram_vendor = vram_vendor; switch (adev->asic_type) { case CHIP_RAVEN: adev->num_vmhubs = 2; @@ -1240,33 +1087,7 @@ static int gmc_v9_0_sw_fini(void *handle) struct amdgpu_device *adev = (struct amdgpu_device *)handle; void *stolen_vga_buf; - if (amdgpu_ras_is_supported(adev, AMDGPU_RAS_BLOCK__UMC) && - adev->gmc.umc_ras_if) { - struct ras_common_if *ras_if = adev->gmc.umc_ras_if; - struct ras_ih_if ih_info = { - .head = *ras_if, - }; - - /* remove fs first */ - amdgpu_ras_debugfs_remove(adev, ras_if); - amdgpu_ras_sysfs_remove(adev, ras_if); - /* remove the IH */ - amdgpu_ras_interrupt_remove_handler(adev, &ih_info); - amdgpu_ras_feature_enable(adev, ras_if, 0); - kfree(ras_if); - } - - if (amdgpu_ras_is_supported(adev, AMDGPU_RAS_BLOCK__MMHUB) && - adev->gmc.mmhub_ras_if) { - struct ras_common_if *ras_if = adev->gmc.mmhub_ras_if; - - /* remove fs and disable ras feature */ - amdgpu_ras_debugfs_remove(adev, ras_if); - amdgpu_ras_sysfs_remove(adev, ras_if); - amdgpu_ras_feature_enable(adev, ras_if, 0); - kfree(ras_if); - } - + amdgpu_gmc_ras_fini(adev); amdgpu_gem_force_release(adev); amdgpu_vm_manager_fini(adev); @@ -1361,7 +1182,7 @@ static int gmc_v9_0_gart_enable(struct amdgpu_device *adev) WREG32_SOC15(HDP, 0, mmHDP_NONSURFACE_BASE_HI, (adev->gmc.vram_start >> 40)); /* After HDP is initialized, flush HDP.*/ - adev->nbio_funcs->hdp_flush(adev, NULL); + adev->nbio.funcs->hdp_flush(adev, NULL); if (amdgpu_vm_fault_stop == AMDGPU_VM_FAULT_STOP_ALWAYS) value = false; @@ -1377,6 +1198,9 @@ static int gmc_v9_0_gart_enable(struct amdgpu_device *adev) for (i = 0; i < adev->num_vmhubs; ++i) gmc_v9_0_flush_gpu_tlb(adev, 0, i, 0); + if (adev->umc.funcs && adev->umc.funcs->init_registers) + adev->umc.funcs->init_registers(adev); + DRM_INFO("PCIE GART of %uM enabled (table at 0x%016llX).\n", (unsigned)(adev->gmc.gart_size >> 20), (unsigned long long)amdgpu_bo_gpu_offset(adev->gart.bo)); @@ -1393,11 +1217,13 @@ static int gmc_v9_0_hw_init(void *handle) gmc_v9_0_init_golden_registers(adev); if (adev->mode_info.num_crtc) { - /* Lockout access through VGA aperture*/ - WREG32_FIELD15(DCE, 0, VGA_HDP_CONTROL, VGA_MEMORY_DISABLE, 1); + if (adev->asic_type != CHIP_ARCTURUS) { + /* Lockout access through VGA aperture*/ + WREG32_FIELD15(DCE, 0, VGA_HDP_CONTROL, VGA_MEMORY_DISABLE, 1); - /* disable VGA render */ - WREG32_FIELD15(DCE, 0, VGA_RENDER_CONTROL, VGA_VSTATUS_CNTL, 0); + /* disable VGA render */ + WREG32_FIELD15(DCE, 0, VGA_RENDER_CONTROL, VGA_VSTATUS_CNTL, 0); + } } r = gmc_v9_0_gart_enable(adev); diff --git a/drivers/gpu/drm/amd/amdgpu/mmhub_v1_0.c b/drivers/gpu/drm/amd/amdgpu/mmhub_v1_0.c index 04cd4b6f95d4..6965e1e6fa9e 100644 --- a/drivers/gpu/drm/amd/amdgpu/mmhub_v1_0.c +++ b/drivers/gpu/drm/amd/amdgpu/mmhub_v1_0.c @@ -206,6 +206,8 @@ static void mmhub_v1_0_enable_system_domain(struct amdgpu_device *adev) tmp = RREG32_SOC15(MMHUB, 0, mmVM_CONTEXT0_CNTL); tmp = REG_SET_FIELD(tmp, VM_CONTEXT0_CNTL, ENABLE_CONTEXT, 1); tmp = REG_SET_FIELD(tmp, VM_CONTEXT0_CNTL, PAGE_TABLE_DEPTH, 0); + tmp = REG_SET_FIELD(tmp, VM_CONTEXT0_CNTL, + RETRY_PERMISSION_OR_INVALID_PAGE_FAULT, 0); WREG32_SOC15(MMHUB, 0, mmVM_CONTEXT0_CNTL, tmp); } @@ -616,5 +618,6 @@ static void mmhub_v1_0_query_ras_error_count(struct amdgpu_device *adev, } const struct amdgpu_mmhub_funcs mmhub_v1_0_funcs = { + .ras_late_init = amdgpu_mmhub_ras_late_init, .query_ras_error_count = mmhub_v1_0_query_ras_error_count, }; diff --git a/drivers/gpu/drm/amd/amdgpu/mmhub_v2_0.c b/drivers/gpu/drm/amd/amdgpu/mmhub_v2_0.c index 3542c203c3c8..2eea702de8ee 100644 --- a/drivers/gpu/drm/amd/amdgpu/mmhub_v2_0.c +++ b/drivers/gpu/drm/amd/amdgpu/mmhub_v2_0.c @@ -31,20 +31,25 @@ #include "soc15_common.h" -static void mmhub_v2_0_init_gart_pt_regs(struct amdgpu_device *adev) +void mmhub_v2_0_setup_vm_pt_regs(struct amdgpu_device *adev, uint32_t vmid, + uint64_t page_table_base) { - uint64_t value = amdgpu_gmc_pd_addr(adev->gart.bo); + /* two registers distance between mmMMVM_CONTEXT0_* to mmMMVM_CONTEXT1_* */ + int offset = mmMMVM_CONTEXT1_PAGE_TABLE_BASE_ADDR_LO32 + - mmMMVM_CONTEXT0_PAGE_TABLE_BASE_ADDR_LO32; - WREG32_SOC15(MMHUB, 0, mmMMVM_CONTEXT0_PAGE_TABLE_BASE_ADDR_LO32, - lower_32_bits(value)); + WREG32_SOC15_OFFSET(MMHUB, 0, mmMMVM_CONTEXT0_PAGE_TABLE_BASE_ADDR_LO32, + offset * vmid, lower_32_bits(page_table_base)); - WREG32_SOC15(MMHUB, 0, mmMMVM_CONTEXT0_PAGE_TABLE_BASE_ADDR_HI32, - upper_32_bits(value)); + WREG32_SOC15_OFFSET(MMHUB, 0, mmMMVM_CONTEXT0_PAGE_TABLE_BASE_ADDR_HI32, + offset * vmid, upper_32_bits(page_table_base)); } static void mmhub_v2_0_init_gart_aperture_regs(struct amdgpu_device *adev) { - mmhub_v2_0_init_gart_pt_regs(adev); + uint64_t pt_base = amdgpu_gmc_pd_addr(adev->gart.bo); + + mmhub_v2_0_setup_vm_pt_regs(adev, 0, pt_base); WREG32_SOC15(MMHUB, 0, mmMMVM_CONTEXT0_PAGE_TABLE_START_ADDR_LO32, (u32)(adev->gmc.gart_start >> 12)); @@ -152,6 +157,8 @@ static void mmhub_v2_0_enable_system_domain(struct amdgpu_device *adev) tmp = RREG32_SOC15(MMHUB, 0, mmMMVM_CONTEXT0_CNTL); tmp = REG_SET_FIELD(tmp, MMVM_CONTEXT0_CNTL, ENABLE_CONTEXT, 1); tmp = REG_SET_FIELD(tmp, MMVM_CONTEXT0_CNTL, PAGE_TABLE_DEPTH, 0); + tmp = REG_SET_FIELD(tmp, MMVM_CONTEXT0_CNTL, + RETRY_PERMISSION_OR_INVALID_PAGE_FAULT, 0); WREG32_SOC15(MMHUB, 0, mmMMVM_CONTEXT0_CNTL, tmp); } diff --git a/drivers/gpu/drm/amd/amdgpu/mmhub_v2_0.h b/drivers/gpu/drm/amd/amdgpu/mmhub_v2_0.h index db16f3ece218..3ea4344f0315 100644 --- a/drivers/gpu/drm/amd/amdgpu/mmhub_v2_0.h +++ b/drivers/gpu/drm/amd/amdgpu/mmhub_v2_0.h @@ -31,5 +31,7 @@ void mmhub_v2_0_init(struct amdgpu_device *adev); int mmhub_v2_0_set_clockgating(struct amdgpu_device *adev, enum amd_clockgating_state state); void mmhub_v2_0_get_clockgating(struct amdgpu_device *adev, u32 *flags); +void mmhub_v2_0_setup_vm_pt_regs(struct amdgpu_device *adev, uint32_t vmid, + uint64_t page_table_base); #endif diff --git a/drivers/gpu/drm/amd/amdgpu/mmhub_v9_4.c b/drivers/gpu/drm/amd/amdgpu/mmhub_v9_4.c index 0cf7ef44b4b5..657970f9ebfb 100644 --- a/drivers/gpu/drm/amd/amdgpu/mmhub_v9_4.c +++ b/drivers/gpu/drm/amd/amdgpu/mmhub_v9_4.c @@ -240,6 +240,8 @@ static void mmhub_v9_4_enable_system_domain(struct amdgpu_device *adev, hubid * MMHUB_INSTANCE_REGISTER_OFFSET); tmp = REG_SET_FIELD(tmp, VML2VC0_VM_CONTEXT0_CNTL, ENABLE_CONTEXT, 1); tmp = REG_SET_FIELD(tmp, VML2VC0_VM_CONTEXT0_CNTL, PAGE_TABLE_DEPTH, 0); + tmp = REG_SET_FIELD(tmp, VML2VC0_VM_CONTEXT0_CNTL, + RETRY_PERMISSION_OR_INVALID_PAGE_FAULT, 0); WREG32_SOC15_OFFSET(MMHUB, 0, mmVML2VC0_VM_CONTEXT0_CNTL, hubid * MMHUB_INSTANCE_REGISTER_OFFSET, tmp); } diff --git a/drivers/gpu/drm/amd/amdgpu/mxgpu_nv.c b/drivers/gpu/drm/amd/amdgpu/mxgpu_nv.c new file mode 100644 index 000000000000..0d8767eb7a70 --- /dev/null +++ b/drivers/gpu/drm/amd/amdgpu/mxgpu_nv.c @@ -0,0 +1,380 @@ +/* + * Copyright 2014 Advanced Micro Devices, Inc. + * + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and associated documentation files (the "Software"), + * to deal in the Software without restriction, including without limitation + * the rights to use, copy, modify, merge, publish, distribute, sublicense, + * and/or sell copies of the Software, and to permit persons to whom the + * Software is furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included in + * all copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL + * THE COPYRIGHT HOLDER(S) OR AUTHOR(S) BE LIABLE FOR ANY CLAIM, DAMAGES OR + * OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, + * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR + * OTHER DEALINGS IN THE SOFTWARE. + * + */ + +#include "amdgpu.h" +#include "nbio/nbio_2_3_offset.h" +#include "nbio/nbio_2_3_sh_mask.h" +#include "gc/gc_10_1_0_offset.h" +#include "gc/gc_10_1_0_sh_mask.h" +#include "soc15.h" +#include "navi10_ih.h" +#include "soc15_common.h" +#include "mxgpu_nv.h" +#include "mxgpu_ai.h" + +static void xgpu_nv_mailbox_send_ack(struct amdgpu_device *adev) +{ + WREG8(NV_MAIBOX_CONTROL_RCV_OFFSET_BYTE, 2); +} + +static void xgpu_nv_mailbox_set_valid(struct amdgpu_device *adev, bool val) +{ + WREG8(NV_MAIBOX_CONTROL_TRN_OFFSET_BYTE, val ? 1 : 0); +} + +/* + * this peek_msg could *only* be called in IRQ routine becuase in IRQ routine + * RCV_MSG_VALID filed of BIF_BX_PF_MAILBOX_CONTROL must already be set to 1 + * by host. + * + * if called no in IRQ routine, this peek_msg cannot guaranteed to return the + * correct value since it doesn't return the RCV_DW0 under the case that + * RCV_MSG_VALID is set by host. + */ +static enum idh_event xgpu_nv_mailbox_peek_msg(struct amdgpu_device *adev) +{ + return RREG32_NO_KIQ(SOC15_REG_OFFSET(NBIO, 0, + mmBIF_BX_PF_MAILBOX_MSGBUF_RCV_DW0)); +} + + +static int xgpu_nv_mailbox_rcv_msg(struct amdgpu_device *adev, + enum idh_event event) +{ + u32 reg; + + reg = RREG32_NO_KIQ(SOC15_REG_OFFSET(NBIO, 0, + mmBIF_BX_PF_MAILBOX_MSGBUF_RCV_DW0)); + if (reg != event) + return -ENOENT; + + xgpu_nv_mailbox_send_ack(adev); + + return 0; +} + +static uint8_t xgpu_nv_peek_ack(struct amdgpu_device *adev) +{ + return RREG8(NV_MAIBOX_CONTROL_TRN_OFFSET_BYTE) & 2; +} + +static int xgpu_nv_poll_ack(struct amdgpu_device *adev) +{ + int timeout = NV_MAILBOX_POLL_ACK_TIMEDOUT; + u8 reg; + + do { + reg = RREG8(NV_MAIBOX_CONTROL_TRN_OFFSET_BYTE); + if (reg & 2) + return 0; + + mdelay(5); + timeout -= 5; + } while (timeout > 1); + + pr_err("Doesn't get TRN_MSG_ACK from pf in %d msec\n", NV_MAILBOX_POLL_ACK_TIMEDOUT); + + return -ETIME; +} + +static int xgpu_nv_poll_msg(struct amdgpu_device *adev, enum idh_event event) +{ + int r, timeout = NV_MAILBOX_POLL_MSG_TIMEDOUT; + + do { + r = xgpu_nv_mailbox_rcv_msg(adev, event); + if (!r) + return 0; + + msleep(10); + timeout -= 10; + } while (timeout > 1); + + pr_err("Doesn't get msg:%d from pf, error=%d\n", event, r); + + return -ETIME; +} + +static void xgpu_nv_mailbox_trans_msg (struct amdgpu_device *adev, + enum idh_request req, u32 data1, u32 data2, u32 data3) +{ + u32 reg; + int r; + uint8_t trn; + + /* IMPORTANT: + * clear TRN_MSG_VALID valid to clear host's RCV_MSG_ACK + * and with host's RCV_MSG_ACK cleared hw automatically clear host's RCV_MSG_ACK + * which lead to VF's TRN_MSG_ACK cleared, otherwise below xgpu_nv_poll_ack() + * will return immediatly + */ + do { + xgpu_nv_mailbox_set_valid(adev, false); + trn = xgpu_nv_peek_ack(adev); + if (trn) { + pr_err("trn=%x ACK should not assert! wait again !\n", trn); + msleep(1); + } + } while (trn); + + reg = RREG32_NO_KIQ(SOC15_REG_OFFSET(NBIO, 0, + mmBIF_BX_PF_MAILBOX_MSGBUF_TRN_DW0)); + reg = REG_SET_FIELD(reg, BIF_BX_PF_MAILBOX_MSGBUF_TRN_DW0, + MSGBUF_DATA, req); + WREG32_NO_KIQ(SOC15_REG_OFFSET(NBIO, 0, mmBIF_BX_PF_MAILBOX_MSGBUF_TRN_DW0), + reg); + WREG32_NO_KIQ(SOC15_REG_OFFSET(NBIO, 0, mmBIF_BX_PF_MAILBOX_MSGBUF_TRN_DW1), + data1); + WREG32_NO_KIQ(SOC15_REG_OFFSET(NBIO, 0, mmBIF_BX_PF_MAILBOX_MSGBUF_TRN_DW2), + data2); + WREG32_NO_KIQ(SOC15_REG_OFFSET(NBIO, 0, mmBIF_BX_PF_MAILBOX_MSGBUF_TRN_DW3), + data3); + + xgpu_nv_mailbox_set_valid(adev, true); + + /* start to poll ack */ + r = xgpu_nv_poll_ack(adev); + if (r) + pr_err("Doesn't get ack from pf, continue\n"); + + xgpu_nv_mailbox_set_valid(adev, false); +} + +static int xgpu_nv_send_access_requests(struct amdgpu_device *adev, + enum idh_request req) +{ + int r; + + xgpu_nv_mailbox_trans_msg(adev, req, 0, 0, 0); + + /* start to check msg if request is idh_req_gpu_init_access */ + if (req == IDH_REQ_GPU_INIT_ACCESS || + req == IDH_REQ_GPU_FINI_ACCESS || + req == IDH_REQ_GPU_RESET_ACCESS) { + r = xgpu_nv_poll_msg(adev, IDH_READY_TO_ACCESS_GPU); + if (r) { + pr_err("Doesn't get READY_TO_ACCESS_GPU from pf, give up\n"); + return r; + } + /* Retrieve checksum from mailbox2 */ + if (req == IDH_REQ_GPU_INIT_ACCESS || req == IDH_REQ_GPU_RESET_ACCESS) { + adev->virt.fw_reserve.checksum_key = + RREG32_NO_KIQ(SOC15_REG_OFFSET(NBIO, 0, + mmBIF_BX_PF_MAILBOX_MSGBUF_RCV_DW2)); + } + } + + return 0; +} + +static int xgpu_nv_request_reset(struct amdgpu_device *adev) +{ + return xgpu_nv_send_access_requests(adev, IDH_REQ_GPU_RESET_ACCESS); +} + +static int xgpu_nv_request_full_gpu_access(struct amdgpu_device *adev, + bool init) +{ + enum idh_request req; + + req = init ? IDH_REQ_GPU_INIT_ACCESS : IDH_REQ_GPU_FINI_ACCESS; + return xgpu_nv_send_access_requests(adev, req); +} + +static int xgpu_nv_release_full_gpu_access(struct amdgpu_device *adev, + bool init) +{ + enum idh_request req; + int r = 0; + + req = init ? IDH_REL_GPU_INIT_ACCESS : IDH_REL_GPU_FINI_ACCESS; + r = xgpu_nv_send_access_requests(adev, req); + + return r; +} + +static int xgpu_nv_mailbox_ack_irq(struct amdgpu_device *adev, + struct amdgpu_irq_src *source, + struct amdgpu_iv_entry *entry) +{ + DRM_DEBUG("get ack intr and do nothing.\n"); + return 0; +} + +static int xgpu_nv_set_mailbox_ack_irq(struct amdgpu_device *adev, + struct amdgpu_irq_src *source, + unsigned type, + enum amdgpu_interrupt_state state) +{ + u32 tmp = RREG32_NO_KIQ(SOC15_REG_OFFSET(NBIO, 0, mmBIF_BX_PF_MAILBOX_INT_CNTL)); + + tmp = REG_SET_FIELD(tmp, BIF_BX_PF_MAILBOX_INT_CNTL, ACK_INT_EN, + (state == AMDGPU_IRQ_STATE_ENABLE) ? 1 : 0); + WREG32_NO_KIQ(SOC15_REG_OFFSET(NBIO, 0, mmBIF_BX_PF_MAILBOX_INT_CNTL), tmp); + + return 0; +} + +static void xgpu_nv_mailbox_flr_work(struct work_struct *work) +{ + struct amdgpu_virt *virt = container_of(work, struct amdgpu_virt, flr_work); + struct amdgpu_device *adev = container_of(virt, struct amdgpu_device, virt); + int timeout = NV_MAILBOX_POLL_FLR_TIMEDOUT; + int locked; + + /* block amdgpu_gpu_recover till msg FLR COMPLETE received, + * otherwise the mailbox msg will be ruined/reseted by + * the VF FLR. + * + * we can unlock the lock_reset to allow "amdgpu_job_timedout" + * to run gpu_recover() after FLR_NOTIFICATION_CMPL received + * which means host side had finished this VF's FLR. + */ + locked = mutex_trylock(&adev->lock_reset); + if (locked) + adev->in_gpu_reset = 1; + + do { + if (xgpu_nv_mailbox_peek_msg(adev) == IDH_FLR_NOTIFICATION_CMPL) + goto flr_done; + + msleep(10); + timeout -= 10; + } while (timeout > 1); + +flr_done: + if (locked) { + adev->in_gpu_reset = 0; + mutex_unlock(&adev->lock_reset); + } + + /* Trigger recovery for world switch failure if no TDR */ + if (amdgpu_device_should_recover_gpu(adev)) + amdgpu_device_gpu_recover(adev, NULL); +} + +static int xgpu_nv_set_mailbox_rcv_irq(struct amdgpu_device *adev, + struct amdgpu_irq_src *src, + unsigned type, + enum amdgpu_interrupt_state state) +{ + u32 tmp = RREG32_NO_KIQ(SOC15_REG_OFFSET(NBIO, 0, mmBIF_BX_PF_MAILBOX_INT_CNTL)); + + tmp = REG_SET_FIELD(tmp, BIF_BX_PF_MAILBOX_INT_CNTL, VALID_INT_EN, + (state == AMDGPU_IRQ_STATE_ENABLE) ? 1 : 0); + WREG32_NO_KIQ(SOC15_REG_OFFSET(NBIO, 0, mmBIF_BX_PF_MAILBOX_INT_CNTL), tmp); + + return 0; +} + +static int xgpu_nv_mailbox_rcv_irq(struct amdgpu_device *adev, + struct amdgpu_irq_src *source, + struct amdgpu_iv_entry *entry) +{ + enum idh_event event = xgpu_nv_mailbox_peek_msg(adev); + + switch (event) { + case IDH_FLR_NOTIFICATION: + if (amdgpu_sriov_runtime(adev)) + schedule_work(&adev->virt.flr_work); + break; + /* READY_TO_ACCESS_GPU is fetched by kernel polling, IRQ can ignore + * it byfar since that polling thread will handle it, + * other msg like flr complete is not handled here. + */ + case IDH_CLR_MSG_BUF: + case IDH_FLR_NOTIFICATION_CMPL: + case IDH_READY_TO_ACCESS_GPU: + default: + break; + } + + return 0; +} + +static const struct amdgpu_irq_src_funcs xgpu_nv_mailbox_ack_irq_funcs = { + .set = xgpu_nv_set_mailbox_ack_irq, + .process = xgpu_nv_mailbox_ack_irq, +}; + +static const struct amdgpu_irq_src_funcs xgpu_nv_mailbox_rcv_irq_funcs = { + .set = xgpu_nv_set_mailbox_rcv_irq, + .process = xgpu_nv_mailbox_rcv_irq, +}; + +void xgpu_nv_mailbox_set_irq_funcs(struct amdgpu_device *adev) +{ + adev->virt.ack_irq.num_types = 1; + adev->virt.ack_irq.funcs = &xgpu_nv_mailbox_ack_irq_funcs; + adev->virt.rcv_irq.num_types = 1; + adev->virt.rcv_irq.funcs = &xgpu_nv_mailbox_rcv_irq_funcs; +} + +int xgpu_nv_mailbox_add_irq_id(struct amdgpu_device *adev) +{ + int r; + + r = amdgpu_irq_add_id(adev, SOC15_IH_CLIENTID_BIF, 135, &adev->virt.rcv_irq); + if (r) + return r; + + r = amdgpu_irq_add_id(adev, SOC15_IH_CLIENTID_BIF, 138, &adev->virt.ack_irq); + if (r) { + amdgpu_irq_put(adev, &adev->virt.rcv_irq, 0); + return r; + } + + return 0; +} + +int xgpu_nv_mailbox_get_irq(struct amdgpu_device *adev) +{ + int r; + + r = amdgpu_irq_get(adev, &adev->virt.rcv_irq, 0); + if (r) + return r; + r = amdgpu_irq_get(adev, &adev->virt.ack_irq, 0); + if (r) { + amdgpu_irq_put(adev, &adev->virt.rcv_irq, 0); + return r; + } + + INIT_WORK(&adev->virt.flr_work, xgpu_nv_mailbox_flr_work); + + return 0; +} + +void xgpu_nv_mailbox_put_irq(struct amdgpu_device *adev) +{ + amdgpu_irq_put(adev, &adev->virt.ack_irq, 0); + amdgpu_irq_put(adev, &adev->virt.rcv_irq, 0); +} + +const struct amdgpu_virt_ops xgpu_nv_virt_ops = { + .req_full_gpu = xgpu_nv_request_full_gpu_access, + .rel_full_gpu = xgpu_nv_release_full_gpu_access, + .reset_gpu = xgpu_nv_request_reset, + .wait_reset = NULL, + .trans_msg = xgpu_nv_mailbox_trans_msg, +}; diff --git a/drivers/gpu/drm/amd/amdgpu/mxgpu_nv.h b/drivers/gpu/drm/amd/amdgpu/mxgpu_nv.h new file mode 100644 index 000000000000..99b15f6865cb --- /dev/null +++ b/drivers/gpu/drm/amd/amdgpu/mxgpu_nv.h @@ -0,0 +1,41 @@ +/* + * Copyright 2014 Advanced Micro Devices, Inc. + * + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and associated documentation files (the "Software"), + * to deal in the Software without restriction, including without limitation + * the rights to use, copy, modify, merge, publish, distribute, sublicense, + * and/or sell copies of the Software, and to permit persons to whom the + * Software is furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included in + * all copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL + * THE COPYRIGHT HOLDER(S) OR AUTHOR(S) BE LIABLE FOR ANY CLAIM, DAMAGES OR + * OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, + * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR + * OTHER DEALINGS IN THE SOFTWARE. + * + */ + +#ifndef __MXGPU_NV_H__ +#define __MXGPU_NV_H__ + +#define NV_MAILBOX_POLL_ACK_TIMEDOUT 500 +#define NV_MAILBOX_POLL_MSG_TIMEDOUT 12000 +#define NV_MAILBOX_POLL_FLR_TIMEDOUT 500 + +extern const struct amdgpu_virt_ops xgpu_nv_virt_ops; + +void xgpu_nv_mailbox_set_irq_funcs(struct amdgpu_device *adev); +int xgpu_nv_mailbox_add_irq_id(struct amdgpu_device *adev); +int xgpu_nv_mailbox_get_irq(struct amdgpu_device *adev); +void xgpu_nv_mailbox_put_irq(struct amdgpu_device *adev); + +#define NV_MAIBOX_CONTROL_TRN_OFFSET_BYTE (SOC15_REG_OFFSET(NBIO, 0, mmBIF_BX_PF_MAILBOX_CONTROL) * 4) +#define NV_MAIBOX_CONTROL_RCV_OFFSET_BYTE (SOC15_REG_OFFSET(NBIO, 0, mmBIF_BX_PF_MAILBOX_CONTROL) * 4 + 1) + +#endif diff --git a/drivers/gpu/drm/amd/amdgpu/navi10_ih.c b/drivers/gpu/drm/amd/amdgpu/navi10_ih.c index 9fe08408db58..9af73567e716 100644 --- a/drivers/gpu/drm/amd/amdgpu/navi10_ih.c +++ b/drivers/gpu/drm/amd/amdgpu/navi10_ih.c @@ -117,7 +117,7 @@ static int navi10_ih_irq_init(struct amdgpu_device *adev) /* disable irqs */ navi10_ih_disable_interrupts(adev); - adev->nbio_funcs->ih_control(adev); + adev->nbio.funcs->ih_control(adev); /* Ring Buffer base. [39:8] of 40-bit address of the beginning of the ring buffer*/ WREG32_SOC15(OSSSYS, 0, mmIH_RB_BASE, ih->gpu_addr >> 8); @@ -162,7 +162,7 @@ static int navi10_ih_irq_init(struct amdgpu_device *adev) } WREG32_SOC15(OSSSYS, 0, mmIH_DOORBELL_RPTR, ih_doorbell_rtpr); - adev->nbio_funcs->ih_doorbell_range(adev, ih->use_doorbell, + adev->nbio.funcs->ih_doorbell_range(adev, ih->use_doorbell, ih->doorbell_index); tmp = RREG32_SOC15(OSSSYS, 0, mmIH_STORM_CLIENT_LIST_CNTL); diff --git a/drivers/gpu/drm/amd/amdgpu/navi10_reg_init.c b/drivers/gpu/drm/amd/amdgpu/navi10_reg_init.c index a56c93620e78..88efaecf9f70 100644 --- a/drivers/gpu/drm/amd/amdgpu/navi10_reg_init.c +++ b/drivers/gpu/drm/amd/amdgpu/navi10_reg_init.c @@ -24,7 +24,6 @@ #include "nv.h" #include "soc15_common.h" -#include "soc15_hw_ip.h" #include "navi10_ip_offset.h" int navi10_reg_base_init(struct amdgpu_device *adev) diff --git a/drivers/gpu/drm/amd/amdgpu/navi12_reg_init.c b/drivers/gpu/drm/amd/amdgpu/navi12_reg_init.c index cadc7603ca41..a786d159e5e9 100644 --- a/drivers/gpu/drm/amd/amdgpu/navi12_reg_init.c +++ b/drivers/gpu/drm/amd/amdgpu/navi12_reg_init.c @@ -24,7 +24,6 @@ #include "nv.h" #include "soc15_common.h" -#include "soc15_hw_ip.h" #include "navi12_ip_offset.h" int navi12_reg_base_init(struct amdgpu_device *adev) diff --git a/drivers/gpu/drm/amd/amdgpu/navi14_reg_init.c b/drivers/gpu/drm/amd/amdgpu/navi14_reg_init.c index 3b5f0f65e096..4ea1e8fbb601 100644 --- a/drivers/gpu/drm/amd/amdgpu/navi14_reg_init.c +++ b/drivers/gpu/drm/amd/amdgpu/navi14_reg_init.c @@ -24,7 +24,6 @@ #include "nv.h" #include "soc15_common.h" -#include "soc15_hw_ip.h" #include "navi14_ip_offset.h" int navi14_reg_base_init(struct amdgpu_device *adev) diff --git a/drivers/gpu/drm/amd/amdgpu/nbio_v2_3.c b/drivers/gpu/drm/amd/amdgpu/nbio_v2_3.c index c05d78d4efc6..f3a3fe746222 100644 --- a/drivers/gpu/drm/amd/amdgpu/nbio_v2_3.c +++ b/drivers/gpu/drm/amd/amdgpu/nbio_v2_3.c @@ -27,11 +27,21 @@ #include "nbio/nbio_2_3_default.h" #include "nbio/nbio_2_3_offset.h" #include "nbio/nbio_2_3_sh_mask.h" +#include <uapi/linux/kfd_ioctl.h> #define smnPCIE_CONFIG_CNTL 0x11180044 #define smnCPM_CONTROL 0x11180460 #define smnPCIE_CNTL2 0x11180070 + +static void nbio_v2_3_remap_hdp_registers(struct amdgpu_device *adev) +{ + WREG32_SOC15(NBIO, 0, mmREMAP_HDP_MEM_FLUSH_CNTL, + adev->rmmio_remap.reg_offset + KFD_MMIO_REMAP_HDP_MEM_FLUSH_CNTL); + WREG32_SOC15(NBIO, 0, mmREMAP_HDP_REG_FLUSH_CNTL, + adev->rmmio_remap.reg_offset + KFD_MMIO_REMAP_HDP_REG_FLUSH_CNTL); +} + static u32 nbio_v2_3_get_rev_id(struct amdgpu_device *adev) { u32 tmp = RREG32_SOC15(NBIO, 0, mmRCC_DEV0_EPF0_STRAP0); @@ -56,10 +66,9 @@ static void nbio_v2_3_hdp_flush(struct amdgpu_device *adev, struct amdgpu_ring *ring) { if (!ring || !ring->funcs->emit_wreg) - WREG32_SOC15_NO_KIQ(NBIO, 0, mmBIF_BX_PF_HDP_MEM_COHERENCY_FLUSH_CNTL, 0); + WREG32_NO_KIQ((adev->rmmio_remap.reg_offset + KFD_MMIO_REMAP_HDP_MEM_FLUSH_CNTL) >> 2, 0); else - amdgpu_ring_emit_wreg(ring, SOC15_REG_OFFSET( - NBIO, 0, mmBIF_BX_PF_HDP_MEM_COHERENCY_FLUSH_CNTL), 0); + amdgpu_ring_emit_wreg(ring, (adev->rmmio_remap.reg_offset + KFD_MMIO_REMAP_HDP_MEM_FLUSH_CNTL) >> 2, 0); } static u32 nbio_v2_3_get_memsize(struct amdgpu_device *adev) @@ -311,7 +320,6 @@ static void nbio_v2_3_init_registers(struct amdgpu_device *adev) } const struct amdgpu_nbio_funcs nbio_v2_3_funcs = { - .hdp_flush_reg = &nbio_v2_3_hdp_flush_reg, .get_hdp_flush_req_offset = nbio_v2_3_get_hdp_flush_req_offset, .get_hdp_flush_done_offset = nbio_v2_3_get_hdp_flush_done_offset, .get_pcie_index_offset = nbio_v2_3_get_pcie_index_offset, @@ -331,4 +339,5 @@ const struct amdgpu_nbio_funcs nbio_v2_3_funcs = { .ih_control = nbio_v2_3_ih_control, .init_registers = nbio_v2_3_init_registers, .detect_hw_virt = nbio_v2_3_detect_hw_virt, + .remap_hdp_registers = nbio_v2_3_remap_hdp_registers, }; diff --git a/drivers/gpu/drm/amd/amdgpu/nbio_v2_3.h b/drivers/gpu/drm/amd/amdgpu/nbio_v2_3.h index 5ae52085f6b7..a43b60acf7f6 100644 --- a/drivers/gpu/drm/amd/amdgpu/nbio_v2_3.h +++ b/drivers/gpu/drm/amd/amdgpu/nbio_v2_3.h @@ -26,6 +26,7 @@ #include "soc15_common.h" +extern const struct nbio_hdp_flush_reg nbio_v2_3_hdp_flush_reg; extern const struct amdgpu_nbio_funcs nbio_v2_3_funcs; #endif diff --git a/drivers/gpu/drm/amd/amdgpu/nbio_v6_1.c b/drivers/gpu/drm/amd/amdgpu/nbio_v6_1.c index 6590143c3f75..635d9e1fc0a3 100644 --- a/drivers/gpu/drm/amd/amdgpu/nbio_v6_1.c +++ b/drivers/gpu/drm/amd/amdgpu/nbio_v6_1.c @@ -226,7 +226,7 @@ static u32 nbio_v6_1_get_pcie_data_offset(struct amdgpu_device *adev) return SOC15_REG_OFFSET(NBIO, 0, mmPCIE_DATA2); } -static const struct nbio_hdp_flush_reg nbio_v6_1_hdp_flush_reg = { +const struct nbio_hdp_flush_reg nbio_v6_1_hdp_flush_reg = { .ref_and_mask_cp0 = BIF_BX_PF0_GPU_HDP_FLUSH_DONE__CP0_MASK, .ref_and_mask_cp1 = BIF_BX_PF0_GPU_HDP_FLUSH_DONE__CP1_MASK, .ref_and_mask_cp2 = BIF_BX_PF0_GPU_HDP_FLUSH_DONE__CP2_MASK, @@ -277,7 +277,6 @@ static void nbio_v6_1_init_registers(struct amdgpu_device *adev) } const struct amdgpu_nbio_funcs nbio_v6_1_funcs = { - .hdp_flush_reg = &nbio_v6_1_hdp_flush_reg, .get_hdp_flush_req_offset = nbio_v6_1_get_hdp_flush_req_offset, .get_hdp_flush_done_offset = nbio_v6_1_get_hdp_flush_done_offset, .get_pcie_index_offset = nbio_v6_1_get_pcie_index_offset, diff --git a/drivers/gpu/drm/amd/amdgpu/nbio_v6_1.h b/drivers/gpu/drm/amd/amdgpu/nbio_v6_1.h index 0743a6f016f3..6dc743b73218 100644 --- a/drivers/gpu/drm/amd/amdgpu/nbio_v6_1.h +++ b/drivers/gpu/drm/amd/amdgpu/nbio_v6_1.h @@ -26,6 +26,7 @@ #include "soc15_common.h" +extern const struct nbio_hdp_flush_reg nbio_v6_1_hdp_flush_reg; extern const struct amdgpu_nbio_funcs nbio_v6_1_funcs; #endif diff --git a/drivers/gpu/drm/amd/amdgpu/nbio_v7_0.c b/drivers/gpu/drm/amd/amdgpu/nbio_v7_0.c index 74eecb768a82..d6cbf26074bc 100644 --- a/drivers/gpu/drm/amd/amdgpu/nbio_v7_0.c +++ b/drivers/gpu/drm/amd/amdgpu/nbio_v7_0.c @@ -292,7 +292,6 @@ static void nbio_v7_0_init_registers(struct amdgpu_device *adev) } const struct amdgpu_nbio_funcs nbio_v7_0_funcs = { - .hdp_flush_reg = &nbio_v7_0_hdp_flush_reg, .get_hdp_flush_req_offset = nbio_v7_0_get_hdp_flush_req_offset, .get_hdp_flush_done_offset = nbio_v7_0_get_hdp_flush_done_offset, .get_pcie_index_offset = nbio_v7_0_get_pcie_index_offset, diff --git a/drivers/gpu/drm/amd/amdgpu/nbio_v7_0.h b/drivers/gpu/drm/amd/amdgpu/nbio_v7_0.h index 508d549c5029..e7aefb252550 100644 --- a/drivers/gpu/drm/amd/amdgpu/nbio_v7_0.h +++ b/drivers/gpu/drm/amd/amdgpu/nbio_v7_0.h @@ -26,6 +26,7 @@ #include "soc15_common.h" +extern const struct nbio_hdp_flush_reg nbio_v7_0_hdp_flush_reg; extern const struct amdgpu_nbio_funcs nbio_v7_0_funcs; #endif diff --git a/drivers/gpu/drm/amd/amdgpu/nbio_v7_4.c b/drivers/gpu/drm/amd/amdgpu/nbio_v7_4.c index 910fffced43b..238c2483496a 100644 --- a/drivers/gpu/drm/amd/amdgpu/nbio_v7_4.c +++ b/drivers/gpu/drm/amd/amdgpu/nbio_v7_4.c @@ -23,10 +23,12 @@ #include "amdgpu.h" #include "amdgpu_atombios.h" #include "nbio_v7_4.h" +#include "amdgpu_ras.h" #include "nbio/nbio_7_4_offset.h" #include "nbio/nbio_7_4_sh_mask.h" #include "nbio/nbio_7_4_0_smn.h" +#include "ivsrcid/nbio/irqsrcs_nbif_7_4.h" #include <uapi/linux/kfd_ioctl.h> #define smnNBIF_MGCG_CTRL_LCLK 0x1013a21c @@ -266,7 +268,7 @@ static u32 nbio_v7_4_get_pcie_data_offset(struct amdgpu_device *adev) return SOC15_REG_OFFSET(NBIO, 0, mmPCIE_DATA2); } -static const struct nbio_hdp_flush_reg nbio_v7_4_hdp_flush_reg = { +const struct nbio_hdp_flush_reg nbio_v7_4_hdp_flush_reg = { .ref_and_mask_cp0 = GPU_HDP_FLUSH_DONE__CP0_MASK, .ref_and_mask_cp1 = GPU_HDP_FLUSH_DONE__CP1_MASK, .ref_and_mask_cp2 = GPU_HDP_FLUSH_DONE__CP2_MASK, @@ -306,17 +308,201 @@ static void nbio_v7_4_detect_hw_virt(struct amdgpu_device *adev) static void nbio_v7_4_init_registers(struct amdgpu_device *adev) { - uint32_t def, data; - def = data = RREG32_PCIE(smnPCIE_CI_CNTL); - data = REG_SET_FIELD(data, PCIE_CI_CNTL, CI_SLV_ORDERING_DIS, 1); +} - if (def != data) - WREG32_PCIE(smnPCIE_CI_CNTL, data); +static void nbio_v7_4_handle_ras_controller_intr_no_bifring(struct amdgpu_device *adev) +{ + uint32_t bif_doorbell_intr_cntl; + + bif_doorbell_intr_cntl = RREG32_SOC15(NBIO, 0, mmBIF_DOORBELL_INT_CNTL); + if (REG_GET_FIELD(bif_doorbell_intr_cntl, + BIF_DOORBELL_INT_CNTL, RAS_CNTLR_INTERRUPT_STATUS)) { + /* driver has to clear the interrupt status when bif ring is disabled */ + bif_doorbell_intr_cntl = REG_SET_FIELD(bif_doorbell_intr_cntl, + BIF_DOORBELL_INT_CNTL, + RAS_CNTLR_INTERRUPT_CLEAR, 1); + WREG32_SOC15(NBIO, 0, mmBIF_DOORBELL_INT_CNTL, bif_doorbell_intr_cntl); + + amdgpu_ras_global_ras_isr(adev); + } +} + +static void nbio_v7_4_handle_ras_err_event_athub_intr_no_bifring(struct amdgpu_device *adev) +{ + uint32_t bif_doorbell_intr_cntl; + + bif_doorbell_intr_cntl = RREG32_SOC15(NBIO, 0, mmBIF_DOORBELL_INT_CNTL); + if (REG_GET_FIELD(bif_doorbell_intr_cntl, + BIF_DOORBELL_INT_CNTL, RAS_ATHUB_ERR_EVENT_INTERRUPT_STATUS)) { + /* driver has to clear the interrupt status when bif ring is disabled */ + bif_doorbell_intr_cntl = REG_SET_FIELD(bif_doorbell_intr_cntl, + BIF_DOORBELL_INT_CNTL, + RAS_ATHUB_ERR_EVENT_INTERRUPT_CLEAR, 1); + WREG32_SOC15(NBIO, 0, mmBIF_DOORBELL_INT_CNTL, bif_doorbell_intr_cntl); + + amdgpu_ras_global_ras_isr(adev); + } +} + + +static int nbio_v7_4_set_ras_controller_irq_state(struct amdgpu_device *adev, + struct amdgpu_irq_src *src, + unsigned type, + enum amdgpu_interrupt_state state) +{ + /* The ras_controller_irq enablement should be done in psp bl when it + * tries to enable ras feature. Driver only need to set the correct interrupt + * vector for bare-metal and sriov use case respectively + */ + uint32_t bif_intr_cntl; + + bif_intr_cntl = RREG32_SOC15(NBIO, 0, mmBIF_INTR_CNTL); + if (state == AMDGPU_IRQ_STATE_ENABLE) { + /* set interrupt vector select bit to 0 to select + * vetcor 1 for bare metal case */ + bif_intr_cntl = REG_SET_FIELD(bif_intr_cntl, + BIF_INTR_CNTL, + RAS_INTR_VEC_SEL, 0); + WREG32_SOC15(NBIO, 0, mmBIF_INTR_CNTL, bif_intr_cntl); + } + + return 0; +} + +static int nbio_v7_4_process_ras_controller_irq(struct amdgpu_device *adev, + struct amdgpu_irq_src *source, + struct amdgpu_iv_entry *entry) +{ + /* By design, the ih cookie for ras_controller_irq should be written + * to BIFring instead of general iv ring. However, due to known bif ring + * hw bug, it has to be disabled. There is no chance the process function + * will be involked. Just left it as a dummy one. + */ + return 0; +} + +static int nbio_v7_4_set_ras_err_event_athub_irq_state(struct amdgpu_device *adev, + struct amdgpu_irq_src *src, + unsigned type, + enum amdgpu_interrupt_state state) +{ + /* The ras_controller_irq enablement should be done in psp bl when it + * tries to enable ras feature. Driver only need to set the correct interrupt + * vector for bare-metal and sriov use case respectively + */ + uint32_t bif_intr_cntl; + + bif_intr_cntl = RREG32_SOC15(NBIO, 0, mmBIF_INTR_CNTL); + if (state == AMDGPU_IRQ_STATE_ENABLE) { + /* set interrupt vector select bit to 0 to select + * vetcor 1 for bare metal case */ + bif_intr_cntl = REG_SET_FIELD(bif_intr_cntl, + BIF_INTR_CNTL, + RAS_INTR_VEC_SEL, 0); + WREG32_SOC15(NBIO, 0, mmBIF_INTR_CNTL, bif_intr_cntl); + } + + return 0; +} + +static int nbio_v7_4_process_err_event_athub_irq(struct amdgpu_device *adev, + struct amdgpu_irq_src *source, + struct amdgpu_iv_entry *entry) +{ + /* By design, the ih cookie for err_event_athub_irq should be written + * to BIFring instead of general iv ring. However, due to known bif ring + * hw bug, it has to be disabled. There is no chance the process function + * will be involked. Just left it as a dummy one. + */ + return 0; +} + +static const struct amdgpu_irq_src_funcs nbio_v7_4_ras_controller_irq_funcs = { + .set = nbio_v7_4_set_ras_controller_irq_state, + .process = nbio_v7_4_process_ras_controller_irq, +}; + +static const struct amdgpu_irq_src_funcs nbio_v7_4_ras_err_event_athub_irq_funcs = { + .set = nbio_v7_4_set_ras_err_event_athub_irq_state, + .process = nbio_v7_4_process_err_event_athub_irq, +}; + +static int nbio_v7_4_init_ras_controller_interrupt (struct amdgpu_device *adev) +{ + int r; + + /* init the irq funcs */ + adev->nbio.ras_controller_irq.funcs = + &nbio_v7_4_ras_controller_irq_funcs; + adev->nbio.ras_controller_irq.num_types = 1; + + /* register ras controller interrupt */ + r = amdgpu_irq_add_id(adev, SOC15_IH_CLIENTID_BIF, + NBIF_7_4__SRCID__RAS_CONTROLLER_INTERRUPT, + &adev->nbio.ras_controller_irq); + if (r) + return r; + + return 0; +} + +static int nbio_v7_4_init_ras_err_event_athub_interrupt (struct amdgpu_device *adev) +{ + + int r; + + /* init the irq funcs */ + adev->nbio.ras_err_event_athub_irq.funcs = + &nbio_v7_4_ras_err_event_athub_irq_funcs; + adev->nbio.ras_err_event_athub_irq.num_types = 1; + + /* register ras err event athub interrupt */ + r = amdgpu_irq_add_id(adev, SOC15_IH_CLIENTID_BIF, + NBIF_7_4__SRCID__ERREVENT_ATHUB_INTERRUPT, + &adev->nbio.ras_err_event_athub_irq); + if (r) + return r; + + return 0; +} + +static void nbio_v7_4_query_ras_error_count(struct amdgpu_device *adev, + void *ras_error_status) +{ + uint32_t global_sts, central_sts, int_eoi; + uint32_t corr, fatal, non_fatal; + struct ras_err_data *err_data = (struct ras_err_data *)ras_error_status; + + global_sts = RREG32_PCIE(smnRAS_GLOBAL_STATUS_LO); + corr = REG_GET_FIELD(global_sts, RAS_GLOBAL_STATUS_LO, ParityErrCorr); + fatal = REG_GET_FIELD(global_sts, RAS_GLOBAL_STATUS_LO, ParityErrFatal); + non_fatal = REG_GET_FIELD(global_sts, RAS_GLOBAL_STATUS_LO, + ParityErrNonFatal); + + if (corr) + err_data->ce_count++; + if (fatal) + err_data->ue_count++; + + if (corr || fatal || non_fatal) { + central_sts = RREG32_PCIE(smnBIFL_RAS_CENTRAL_STATUS); + /* clear error status register */ + WREG32_PCIE(smnRAS_GLOBAL_STATUS_LO, global_sts); + + if (REG_GET_FIELD(central_sts, BIFL_RAS_CENTRAL_STATUS, + BIFL_RasContller_Intr_Recv)) { + /* clear interrupt status register */ + WREG32_PCIE(smnBIFL_RAS_CENTRAL_STATUS, central_sts); + int_eoi = RREG32_PCIE(smnIOHC_INTERRUPT_EOI); + int_eoi = REG_SET_FIELD(int_eoi, + IOHC_INTERRUPT_EOI, SMI_EOI, 1); + WREG32_PCIE(smnIOHC_INTERRUPT_EOI, int_eoi); + } + } } const struct amdgpu_nbio_funcs nbio_v7_4_funcs = { - .hdp_flush_reg = &nbio_v7_4_hdp_flush_reg, .get_hdp_flush_req_offset = nbio_v7_4_get_hdp_flush_req_offset, .get_hdp_flush_done_offset = nbio_v7_4_get_hdp_flush_done_offset, .get_pcie_index_offset = nbio_v7_4_get_pcie_index_offset, @@ -337,4 +523,10 @@ const struct amdgpu_nbio_funcs nbio_v7_4_funcs = { .init_registers = nbio_v7_4_init_registers, .detect_hw_virt = nbio_v7_4_detect_hw_virt, .remap_hdp_registers = nbio_v7_4_remap_hdp_registers, + .handle_ras_controller_intr_no_bifring = nbio_v7_4_handle_ras_controller_intr_no_bifring, + .handle_ras_err_event_athub_intr_no_bifring = nbio_v7_4_handle_ras_err_event_athub_intr_no_bifring, + .init_ras_controller_interrupt = nbio_v7_4_init_ras_controller_interrupt, + .init_ras_err_event_athub_interrupt = nbio_v7_4_init_ras_err_event_athub_interrupt, + .query_ras_error_count = nbio_v7_4_query_ras_error_count, + .ras_late_init = amdgpu_nbio_ras_late_init, }; diff --git a/drivers/gpu/drm/amd/amdgpu/nbio_v7_4.h b/drivers/gpu/drm/amd/amdgpu/nbio_v7_4.h index c442865bac4f..b1ac82872752 100644 --- a/drivers/gpu/drm/amd/amdgpu/nbio_v7_4.h +++ b/drivers/gpu/drm/amd/amdgpu/nbio_v7_4.h @@ -26,6 +26,7 @@ #include "soc15_common.h" +extern const struct nbio_hdp_flush_reg nbio_v7_4_hdp_flush_reg; extern const struct amdgpu_nbio_funcs nbio_v7_4_funcs; #endif diff --git a/drivers/gpu/drm/amd/amdgpu/nv.c b/drivers/gpu/drm/amd/amdgpu/nv.c index de9b995b65b1..46206a1a1f4d 100644 --- a/drivers/gpu/drm/amd/amdgpu/nv.c +++ b/drivers/gpu/drm/amd/amdgpu/nv.c @@ -46,6 +46,7 @@ #include "gmc_v10_0.h" #include "gfxhub_v2_0.h" #include "mmhub_v2_0.h" +#include "nbio_v2_3.h" #include "nv.h" #include "navi10_ih.h" #include "gfx_v10_0.h" @@ -53,6 +54,7 @@ #include "vcn_v2_0.h" #include "dce_virtual.h" #include "mes_v10_1.h" +#include "mxgpu_nv.h" static const struct amd_ip_funcs nv_common_ip_funcs; @@ -63,8 +65,8 @@ static u32 nv_pcie_rreg(struct amdgpu_device *adev, u32 reg) { unsigned long flags, address, data; u32 r; - address = adev->nbio_funcs->get_pcie_index_offset(adev); - data = adev->nbio_funcs->get_pcie_data_offset(adev); + address = adev->nbio.funcs->get_pcie_index_offset(adev); + data = adev->nbio.funcs->get_pcie_data_offset(adev); spin_lock_irqsave(&adev->pcie_idx_lock, flags); WREG32(address, reg); @@ -78,8 +80,8 @@ static void nv_pcie_wreg(struct amdgpu_device *adev, u32 reg, u32 v) { unsigned long flags, address, data; - address = adev->nbio_funcs->get_pcie_index_offset(adev); - data = adev->nbio_funcs->get_pcie_data_offset(adev); + address = adev->nbio.funcs->get_pcie_index_offset(adev); + data = adev->nbio.funcs->get_pcie_data_offset(adev); spin_lock_irqsave(&adev->pcie_idx_lock, flags); WREG32(address, reg); @@ -119,7 +121,7 @@ static void nv_didt_wreg(struct amdgpu_device *adev, u32 reg, u32 v) static u32 nv_get_config_memsize(struct amdgpu_device *adev) { - return adev->nbio_funcs->get_memsize(adev); + return adev->nbio.funcs->get_memsize(adev); } static u32 nv_get_xclk(struct amdgpu_device *adev) @@ -279,7 +281,7 @@ static int nv_asic_mode1_reset(struct amdgpu_device *adev) /* wait for asic to come out of reset */ for (i = 0; i < adev->usec_timeout; i++) { - u32 memsize = adev->nbio_funcs->get_memsize(adev); + u32 memsize = adev->nbio.funcs->get_memsize(adev); if (memsize != 0xffffffff) break; @@ -368,8 +370,8 @@ static void nv_program_aspm(struct amdgpu_device *adev) static void nv_enable_doorbell_aperture(struct amdgpu_device *adev, bool enable) { - adev->nbio_funcs->enable_doorbell_aperture(adev, enable); - adev->nbio_funcs->enable_doorbell_selfring_aperture(adev, enable); + adev->nbio.funcs->enable_doorbell_aperture(adev, enable); + adev->nbio.funcs->enable_doorbell_selfring_aperture(adev, enable); } static const struct amdgpu_ip_block_version nv_common_ip_block = @@ -423,9 +425,13 @@ int nv_set_ip_blocks(struct amdgpu_device *adev) if (r) return r; - adev->nbio_funcs = &nbio_v2_3_funcs; + adev->nbio.funcs = &nbio_v2_3_funcs; + adev->nbio.hdp_flush_reg = &nbio_v2_3_hdp_flush_reg; - adev->nbio_funcs->detect_hw_virt(adev); + adev->nbio.funcs->detect_hw_virt(adev); + + if (amdgpu_sriov_vf(adev)) + adev->virt.ops = &xgpu_nv_virt_ops; switch (adev->asic_type) { case CHIP_NAVI10: @@ -435,7 +441,7 @@ int nv_set_ip_blocks(struct amdgpu_device *adev) amdgpu_device_ip_block_add(adev, &navi10_ih_ip_block); amdgpu_device_ip_block_add(adev, &psp_v11_0_ip_block); if (adev->firmware.load_type == AMDGPU_FW_LOAD_PSP && - is_support_sw_smu(adev)) + is_support_sw_smu(adev) && !amdgpu_sriov_vf(adev)) amdgpu_device_ip_block_add(adev, &smu_v11_0_ip_block); if (adev->enable_virtual_display || amdgpu_sriov_vf(adev)) amdgpu_device_ip_block_add(adev, &dce_virtual_ip_block); @@ -446,7 +452,7 @@ int nv_set_ip_blocks(struct amdgpu_device *adev) amdgpu_device_ip_block_add(adev, &gfx_v10_0_ip_block); amdgpu_device_ip_block_add(adev, &sdma_v5_0_ip_block); if (adev->firmware.load_type == AMDGPU_FW_LOAD_DIRECT && - is_support_sw_smu(adev)) + is_support_sw_smu(adev) && !amdgpu_sriov_vf(adev)) amdgpu_device_ip_block_add(adev, &smu_v11_0_ip_block); amdgpu_device_ip_block_add(adev, &vcn_v2_0_ip_block); if (adev->enable_mes) @@ -458,7 +464,7 @@ int nv_set_ip_blocks(struct amdgpu_device *adev) amdgpu_device_ip_block_add(adev, &navi10_ih_ip_block); amdgpu_device_ip_block_add(adev, &psp_v11_0_ip_block); if (adev->firmware.load_type == AMDGPU_FW_LOAD_PSP && - is_support_sw_smu(adev)) + is_support_sw_smu(adev) && !amdgpu_sriov_vf(adev)) amdgpu_device_ip_block_add(adev, &smu_v11_0_ip_block); if (adev->enable_virtual_display || amdgpu_sriov_vf(adev)) amdgpu_device_ip_block_add(adev, &dce_virtual_ip_block); @@ -469,7 +475,7 @@ int nv_set_ip_blocks(struct amdgpu_device *adev) amdgpu_device_ip_block_add(adev, &gfx_v10_0_ip_block); amdgpu_device_ip_block_add(adev, &sdma_v5_0_ip_block); if (adev->firmware.load_type == AMDGPU_FW_LOAD_DIRECT && - is_support_sw_smu(adev)) + is_support_sw_smu(adev) && !amdgpu_sriov_vf(adev)) amdgpu_device_ip_block_add(adev, &smu_v11_0_ip_block); amdgpu_device_ip_block_add(adev, &vcn_v2_0_ip_block); break; @@ -482,12 +488,12 @@ int nv_set_ip_blocks(struct amdgpu_device *adev) static uint32_t nv_get_rev_id(struct amdgpu_device *adev) { - return adev->nbio_funcs->get_rev_id(adev); + return adev->nbio.funcs->get_rev_id(adev); } static void nv_flush_hdp(struct amdgpu_device *adev, struct amdgpu_ring *ring) { - adev->nbio_funcs->hdp_flush(adev, ring); + adev->nbio.funcs->hdp_flush(adev, ring); } static void nv_invalidate_hdp(struct amdgpu_device *adev, @@ -583,8 +589,11 @@ static const struct amdgpu_asic_funcs nv_asic_funcs = static int nv_common_early_init(void *handle) { +#define MMIO_REG_HOLE_OFFSET (0x80000 - PAGE_SIZE) struct amdgpu_device *adev = (struct amdgpu_device *)handle; + adev->rmmio_remap.reg_offset = MMIO_REG_HOLE_OFFSET; + adev->rmmio_remap.bus_addr = adev->rmmio_base + MMIO_REG_HOLE_OFFSET; adev->smc_rreg = NULL; adev->smc_wreg = NULL; adev->pcie_rreg = &nv_pcie_rreg; @@ -667,16 +676,31 @@ static int nv_common_early_init(void *handle) return -EINVAL; } + if (amdgpu_sriov_vf(adev)) { + amdgpu_virt_init_setting(adev); + xgpu_nv_mailbox_set_irq_funcs(adev); + } + return 0; } static int nv_common_late_init(void *handle) { + struct amdgpu_device *adev = (struct amdgpu_device *)handle; + + if (amdgpu_sriov_vf(adev)) + xgpu_nv_mailbox_get_irq(adev); + return 0; } static int nv_common_sw_init(void *handle) { + struct amdgpu_device *adev = (struct amdgpu_device *)handle; + + if (amdgpu_sriov_vf(adev)) + xgpu_nv_mailbox_add_irq_id(adev); + return 0; } @@ -694,7 +718,13 @@ static int nv_common_hw_init(void *handle) /* enable aspm */ nv_program_aspm(adev); /* setup nbio registers */ - adev->nbio_funcs->init_registers(adev); + adev->nbio.funcs->init_registers(adev); + /* remap HDP registers to a hole in mmio space, + * for the purpose of expose those registers + * to process space + */ + if (adev->nbio.funcs->remap_hdp_registers) + adev->nbio.funcs->remap_hdp_registers(adev); /* enable the doorbell aperture */ nv_enable_doorbell_aperture(adev, true); @@ -856,9 +886,9 @@ static int nv_common_set_clockgating_state(void *handle, case CHIP_NAVI10: case CHIP_NAVI14: case CHIP_NAVI12: - adev->nbio_funcs->update_medium_grain_clock_gating(adev, + adev->nbio.funcs->update_medium_grain_clock_gating(adev, state == AMD_CG_STATE_GATE ? true : false); - adev->nbio_funcs->update_medium_grain_light_sleep(adev, + adev->nbio.funcs->update_medium_grain_light_sleep(adev, state == AMD_CG_STATE_GATE ? true : false); nv_update_hdp_mem_power_gating(adev, state == AMD_CG_STATE_GATE ? true : false); @@ -886,7 +916,7 @@ static void nv_common_get_clockgating_state(void *handle, u32 *flags) if (amdgpu_sriov_vf(adev)) *flags = 0; - adev->nbio_funcs->get_clockgating_state(adev, flags); + adev->nbio.funcs->get_clockgating_state(adev, flags); /* AMD_CG_SUPPORT_HDP_MGCG */ tmp = RREG32_SOC15(HDP, 0, mmHDP_CLK_CNTL); diff --git a/drivers/gpu/drm/amd/amdgpu/psp_v10_0.c b/drivers/gpu/drm/amd/amdgpu/psp_v10_0.c index 5d95e614369a..b96484a72535 100644 --- a/drivers/gpu/drm/amd/amdgpu/psp_v10_0.c +++ b/drivers/gpu/drm/amd/amdgpu/psp_v10_0.c @@ -48,7 +48,7 @@ static int psp_v10_0_init_microcode(struct psp_context *psp) char fw_name[30]; int err = 0; const struct psp_firmware_header_v1_0 *hdr; - + const struct ta_firmware_header_v1_0 *ta_hdr; DRM_DEBUG("\n"); switch (adev->asic_type) { @@ -79,7 +79,45 @@ static int psp_v10_0_init_microcode(struct psp_context *psp) adev->psp.asd_start_addr = (uint8_t *)hdr + le32_to_cpu(hdr->header.ucode_array_offset_bytes); + snprintf(fw_name, sizeof(fw_name), "amdgpu/%s_ta.bin", chip_name); + err = request_firmware(&adev->psp.ta_fw, fw_name, adev->dev); + if (err) { + release_firmware(adev->psp.ta_fw); + adev->psp.ta_fw = NULL; + dev_info(adev->dev, + "psp v10.0: Failed to load firmware \"%s\"\n", + fw_name); + } else { + err = amdgpu_ucode_validate(adev->psp.ta_fw); + if (err) + goto out2; + + ta_hdr = (const struct ta_firmware_header_v1_0 *) + adev->psp.ta_fw->data; + adev->psp.ta_hdcp_ucode_version = + le32_to_cpu(ta_hdr->ta_hdcp_ucode_version); + adev->psp.ta_hdcp_ucode_size = + le32_to_cpu(ta_hdr->ta_hdcp_size_bytes); + adev->psp.ta_hdcp_start_addr = + (uint8_t *)ta_hdr + + le32_to_cpu(ta_hdr->header.ucode_array_offset_bytes); + + adev->psp.ta_fw_version = le32_to_cpu(ta_hdr->header.ucode_version); + + adev->psp.ta_dtm_ucode_version = + le32_to_cpu(ta_hdr->ta_dtm_ucode_version); + adev->psp.ta_dtm_ucode_size = + le32_to_cpu(ta_hdr->ta_dtm_size_bytes); + adev->psp.ta_dtm_start_addr = + (uint8_t *)adev->psp.ta_hdcp_start_addr + + le32_to_cpu(ta_hdr->ta_dtm_offset_bytes); + } + return 0; + +out2: + release_firmware(adev->psp.ta_fw); + adev->psp.ta_fw = NULL; out: if (err) { dev_err(adev->dev, @@ -228,6 +266,7 @@ static int psp_v10_0_cmd_submit(struct psp_context *psp, write_frame->fence_addr_hi = upper_32_bits(fence_mc_addr); write_frame->fence_addr_lo = lower_32_bits(fence_mc_addr); write_frame->fence_value = index; + amdgpu_asic_flush_hdp(adev, NULL); /* Update the write Pointer in DWORDs */ psp_write_ptr_reg = (psp_write_ptr_reg + rb_frame_size_dw) % ring_size_dw; diff --git a/drivers/gpu/drm/amd/amdgpu/psp_v11_0.c b/drivers/gpu/drm/amd/amdgpu/psp_v11_0.c index 10166104b8a3..04318cfd50a8 100644 --- a/drivers/gpu/drm/amd/amdgpu/psp_v11_0.c +++ b/drivers/gpu/drm/amd/amdgpu/psp_v11_0.c @@ -49,6 +49,7 @@ MODULE_FIRMWARE("amdgpu/navi12_sos.bin"); MODULE_FIRMWARE("amdgpu/navi12_asd.bin"); MODULE_FIRMWARE("amdgpu/arcturus_sos.bin"); MODULE_FIRMWARE("amdgpu/arcturus_asd.bin"); +MODULE_FIRMWARE("amdgpu/arcturus_ta.bin"); /* address block */ #define smnMP1_FIRMWARE_FLAGS 0x3010024 @@ -155,6 +156,7 @@ static int psp_v11_0_init_microcode(struct psp_context *psp) switch (adev->asic_type) { case CHIP_VEGA20: + case CHIP_ARCTURUS: snprintf(fw_name, sizeof(fw_name), "amdgpu/%s_ta.bin", chip_name); err = request_firmware(&adev->psp.ta_fw, fw_name, adev->dev); if (err) { @@ -182,7 +184,6 @@ static int psp_v11_0_init_microcode(struct psp_context *psp) case CHIP_NAVI10: case CHIP_NAVI14: case CHIP_NAVI12: - case CHIP_ARCTURUS: break; default: BUG(); @@ -398,6 +399,34 @@ static bool psp_v11_0_support_vmr_ring(struct psp_context *psp) return false; } +static int psp_v11_0_ring_stop(struct psp_context *psp, + enum psp_ring_type ring_type) +{ + int ret = 0; + struct amdgpu_device *adev = psp->adev; + + /* Write the ring destroy command*/ + if (psp_v11_0_support_vmr_ring(psp)) + WREG32_SOC15(MP0, 0, mmMP0_SMN_C2PMSG_101, + GFX_CTRL_CMD_ID_DESTROY_GPCOM_RING); + else + WREG32_SOC15(MP0, 0, mmMP0_SMN_C2PMSG_64, + GFX_CTRL_CMD_ID_DESTROY_RINGS); + + /* there might be handshake issue with hardware which needs delay */ + mdelay(20); + + /* Wait for response flag (bit 31) */ + if (psp_v11_0_support_vmr_ring(psp)) + ret = psp_wait_for(psp, SOC15_REG_OFFSET(MP0, 0, mmMP0_SMN_C2PMSG_101), + 0x80000000, 0x80000000, false); + else + ret = psp_wait_for(psp, SOC15_REG_OFFSET(MP0, 0, mmMP0_SMN_C2PMSG_64), + 0x80000000, 0x80000000, false); + + return ret; +} + static int psp_v11_0_ring_create(struct psp_context *psp, enum psp_ring_type ring_type) { @@ -407,6 +436,12 @@ static int psp_v11_0_ring_create(struct psp_context *psp, struct amdgpu_device *adev = psp->adev; if (psp_v11_0_support_vmr_ring(psp)) { + ret = psp_v11_0_ring_stop(psp, ring_type); + if (ret) { + DRM_ERROR("psp_v11_0_ring_stop_sriov failed!\n"); + return ret; + } + /* Write low address of the ring to C2PMSG_102 */ psp_ring_reg = lower_32_bits(ring->ring_mem_mc_addr); WREG32_SOC15(MP0, 0, mmMP0_SMN_C2PMSG_102, psp_ring_reg); @@ -451,33 +486,6 @@ static int psp_v11_0_ring_create(struct psp_context *psp, return ret; } -static int psp_v11_0_ring_stop(struct psp_context *psp, - enum psp_ring_type ring_type) -{ - int ret = 0; - struct amdgpu_device *adev = psp->adev; - - /* Write the ring destroy command*/ - if (psp_v11_0_support_vmr_ring(psp)) - WREG32_SOC15(MP0, 0, mmMP0_SMN_C2PMSG_101, - GFX_CTRL_CMD_ID_DESTROY_GPCOM_RING); - else - WREG32_SOC15(MP0, 0, mmMP0_SMN_C2PMSG_64, - GFX_CTRL_CMD_ID_DESTROY_RINGS); - - /* there might be handshake issue with hardware which needs delay */ - mdelay(20); - - /* Wait for response flag (bit 31) */ - if (psp_v11_0_support_vmr_ring(psp)) - ret = psp_wait_for(psp, SOC15_REG_OFFSET(MP0, 0, mmMP0_SMN_C2PMSG_101), - 0x80000000, 0x80000000, false); - else - ret = psp_wait_for(psp, SOC15_REG_OFFSET(MP0, 0, mmMP0_SMN_C2PMSG_64), - 0x80000000, 0x80000000, false); - - return ret; -} static int psp_v11_0_ring_destroy(struct psp_context *psp, enum psp_ring_type ring_type) @@ -541,6 +549,7 @@ static int psp_v11_0_cmd_submit(struct psp_context *psp, write_frame->fence_addr_hi = upper_32_bits(fence_mc_addr); write_frame->fence_addr_lo = lower_32_bits(fence_mc_addr); write_frame->fence_value = index; + amdgpu_asic_flush_hdp(adev, NULL); /* Update the write Pointer in DWORDs */ psp_write_ptr_reg = (psp_write_ptr_reg + rb_frame_size_dw) % ring_size_dw; diff --git a/drivers/gpu/drm/amd/amdgpu/psp_v12_0.c b/drivers/gpu/drm/amd/amdgpu/psp_v12_0.c index c72e43f8e0be..8f553f6f92d6 100644 --- a/drivers/gpu/drm/amd/amdgpu/psp_v12_0.c +++ b/drivers/gpu/drm/amd/amdgpu/psp_v12_0.c @@ -378,6 +378,7 @@ static int psp_v12_0_cmd_submit(struct psp_context *psp, write_frame->fence_addr_hi = upper_32_bits(fence_mc_addr); write_frame->fence_addr_lo = lower_32_bits(fence_mc_addr); write_frame->fence_value = index; + amdgpu_asic_flush_hdp(adev, NULL); /* Update the write Pointer in DWORDs */ psp_write_ptr_reg = (psp_write_ptr_reg + rb_frame_size_dw) % ring_size_dw; diff --git a/drivers/gpu/drm/amd/amdgpu/psp_v3_1.c b/drivers/gpu/drm/amd/amdgpu/psp_v3_1.c index d2c727f6a8bd..fdc00938327b 100644 --- a/drivers/gpu/drm/amd/amdgpu/psp_v3_1.c +++ b/drivers/gpu/drm/amd/amdgpu/psp_v3_1.c @@ -454,6 +454,7 @@ static int psp_v3_1_cmd_submit(struct psp_context *psp, write_frame->fence_addr_hi = upper_32_bits(fence_mc_addr); write_frame->fence_addr_lo = lower_32_bits(fence_mc_addr); write_frame->fence_value = index; + amdgpu_asic_flush_hdp(adev, NULL); /* Update the write Pointer in DWORDs */ psp_write_ptr_reg = (psp_write_ptr_reg + rb_frame_size_dw) % ring_size_dw; diff --git a/drivers/gpu/drm/amd/amdgpu/sdma_v4_0.c b/drivers/gpu/drm/amd/amdgpu/sdma_v4_0.c index 78452cf0115d..96581b5b0a8a 100644 --- a/drivers/gpu/drm/amd/amdgpu/sdma_v4_0.c +++ b/drivers/gpu/drm/amd/amdgpu/sdma_v4_0.c @@ -746,13 +746,13 @@ static void sdma_v4_0_ring_emit_hdp_flush(struct amdgpu_ring *ring) { struct amdgpu_device *adev = ring->adev; u32 ref_and_mask = 0; - const struct nbio_hdp_flush_reg *nbio_hf_reg = adev->nbio_funcs->hdp_flush_reg; + const struct nbio_hdp_flush_reg *nbio_hf_reg = adev->nbio.hdp_flush_reg; ref_and_mask = nbio_hf_reg->ref_and_mask_sdma0 << ring->me; sdma_v4_0_wait_reg_mem(ring, 0, 1, - adev->nbio_funcs->get_hdp_flush_done_offset(adev), - adev->nbio_funcs->get_hdp_flush_req_offset(adev), + adev->nbio.funcs->get_hdp_flush_done_offset(adev), + adev->nbio.funcs->get_hdp_flush_req_offset(adev), ref_and_mask, ref_and_mask, 10); } @@ -1690,102 +1690,17 @@ static int sdma_v4_0_early_init(void *handle) } static int sdma_v4_0_process_ras_data_cb(struct amdgpu_device *adev, - struct ras_err_data *err_data, + void *err_data, struct amdgpu_iv_entry *entry); static int sdma_v4_0_late_init(void *handle) { struct amdgpu_device *adev = (struct amdgpu_device *)handle; - struct ras_common_if **ras_if = &adev->sdma.ras_if; struct ras_ih_if ih_info = { .cb = sdma_v4_0_process_ras_data_cb, }; - struct ras_fs_if fs_info = { - .sysfs_name = "sdma_err_count", - .debugfs_name = "sdma_err_inject", - }; - struct ras_common_if ras_block = { - .block = AMDGPU_RAS_BLOCK__SDMA, - .type = AMDGPU_RAS_ERROR__MULTI_UNCORRECTABLE, - .sub_block_index = 0, - .name = "sdma", - }; - int r, i; - if (!amdgpu_ras_is_supported(adev, AMDGPU_RAS_BLOCK__SDMA)) { - amdgpu_ras_feature_enable_on_boot(adev, &ras_block, 0); - return 0; - } - - /* handle resume path. */ - if (*ras_if) { - /* resend ras TA enable cmd during resume. - * prepare to handle failure. - */ - ih_info.head = **ras_if; - r = amdgpu_ras_feature_enable_on_boot(adev, *ras_if, 1); - if (r) { - if (r == -EAGAIN) { - /* request a gpu reset. will run again. */ - amdgpu_ras_request_reset_on_boot(adev, - AMDGPU_RAS_BLOCK__SDMA); - return 0; - } - /* fail to enable ras, cleanup all. */ - goto irq; - } - /* enable successfully. continue. */ - goto resume; - } - - *ras_if = kmalloc(sizeof(**ras_if), GFP_KERNEL); - if (!*ras_if) - return -ENOMEM; - - **ras_if = ras_block; - - r = amdgpu_ras_feature_enable_on_boot(adev, *ras_if, 1); - if (r) { - if (r == -EAGAIN) { - amdgpu_ras_request_reset_on_boot(adev, - AMDGPU_RAS_BLOCK__SDMA); - r = 0; - } - goto feature; - } - - ih_info.head = **ras_if; - fs_info.head = **ras_if; - - r = amdgpu_ras_interrupt_add_handler(adev, &ih_info); - if (r) - goto interrupt; - - amdgpu_ras_debugfs_create(adev, &fs_info); - - r = amdgpu_ras_sysfs_create(adev, &fs_info); - if (r) - goto sysfs; -resume: - for (i = 0; i < adev->sdma.num_instances; i++) { - r = amdgpu_irq_get(adev, &adev->sdma.ecc_irq, - AMDGPU_SDMA_IRQ_INSTANCE0 + i); - if (r) - goto irq; - } - - return 0; -irq: - amdgpu_ras_sysfs_remove(adev, *ras_if); -sysfs: - amdgpu_ras_debugfs_remove(adev, *ras_if); - amdgpu_ras_interrupt_remove_handler(adev, &ih_info); -interrupt: - amdgpu_ras_feature_enable(adev, *ras_if, 0); -feature: - kfree(*ras_if); - *ras_if = NULL; - return r; + return amdgpu_sdma_ras_late_init(adev, &ih_info); } static int sdma_v4_0_sw_init(void *handle) @@ -1857,21 +1772,7 @@ static int sdma_v4_0_sw_fini(void *handle) struct amdgpu_device *adev = (struct amdgpu_device *)handle; int i; - if (amdgpu_ras_is_supported(adev, AMDGPU_RAS_BLOCK__SDMA) && - adev->sdma.ras_if) { - struct ras_common_if *ras_if = adev->sdma.ras_if; - struct ras_ih_if ih_info = { - .head = *ras_if, - }; - - /*remove fs first*/ - amdgpu_ras_debugfs_remove(adev, ras_if); - amdgpu_ras_sysfs_remove(adev, ras_if); - /*remove the IH*/ - amdgpu_ras_interrupt_remove_handler(adev, &ih_info); - amdgpu_ras_feature_enable(adev, ras_if, 0); - kfree(ras_if); - } + amdgpu_sdma_ras_fini(adev); for (i = 0; i < adev->sdma.num_instances; i++) { amdgpu_ring_fini(&adev->sdma.instance[i].ring); @@ -2024,52 +1925,28 @@ static int sdma_v4_0_process_trap_irq(struct amdgpu_device *adev, } static int sdma_v4_0_process_ras_data_cb(struct amdgpu_device *adev, - struct ras_err_data *err_data, + void *err_data, struct amdgpu_iv_entry *entry) { - uint32_t err_source; int instance; + /* When “Full RAS” is enabled, the per-IP interrupt sources should + * be disabled and the driver should only look for the aggregated + * interrupt via sync flood + */ + if (amdgpu_ras_is_supported(adev, AMDGPU_RAS_BLOCK__GFX)) + goto out; + instance = sdma_v4_0_irq_id_to_seq(entry->client_id); if (instance < 0) - return 0; - - switch (entry->src_id) { - case SDMA0_4_0__SRCID__SDMA_SRAM_ECC: - err_source = 0; - break; - case SDMA0_4_0__SRCID__SDMA_ECC: - err_source = 1; - break; - default: - return 0; - } - - kgd2kfd_set_sram_ecc_flag(adev->kfd.dev); + goto out; - amdgpu_ras_reset_gpu(adev, 0); + amdgpu_sdma_process_ras_data_cb(adev, err_data, entry); +out: return AMDGPU_RAS_SUCCESS; } -static int sdma_v4_0_process_ecc_irq(struct amdgpu_device *adev, - struct amdgpu_irq_src *source, - struct amdgpu_iv_entry *entry) -{ - struct ras_common_if *ras_if = adev->sdma.ras_if; - struct ras_dispatch_if ih_data = { - .entry = entry, - }; - - if (!ras_if) - return 0; - - ih_data.head = *ras_if; - - amdgpu_ras_interrupt_dispatch(adev, &ih_data); - return 0; -} - static int sdma_v4_0_process_illegal_inst_irq(struct amdgpu_device *adev, struct amdgpu_irq_src *source, struct amdgpu_iv_entry *entry) @@ -2417,7 +2294,7 @@ static const struct amdgpu_irq_src_funcs sdma_v4_0_illegal_inst_irq_funcs = { static const struct amdgpu_irq_src_funcs sdma_v4_0_ecc_irq_funcs = { .set = sdma_v4_0_set_ecc_irq_state, - .process = sdma_v4_0_process_ecc_irq, + .process = amdgpu_sdma_process_ecc_irq, }; diff --git a/drivers/gpu/drm/amd/amdgpu/sdma_v5_0.c b/drivers/gpu/drm/amd/amdgpu/sdma_v5_0.c index f6e81680dd7e..b8fdb192f6d6 100644 --- a/drivers/gpu/drm/amd/amdgpu/sdma_v5_0.c +++ b/drivers/gpu/drm/amd/amdgpu/sdma_v5_0.c @@ -406,7 +406,7 @@ static void sdma_v5_0_ring_emit_hdp_flush(struct amdgpu_ring *ring) { struct amdgpu_device *adev = ring->adev; u32 ref_and_mask = 0; - const struct nbio_hdp_flush_reg *nbio_hf_reg = adev->nbio_funcs->hdp_flush_reg; + const struct nbio_hdp_flush_reg *nbio_hf_reg = adev->nbio.hdp_flush_reg; if (ring->me == 0) ref_and_mask = nbio_hf_reg->ref_and_mask_sdma0; @@ -416,8 +416,8 @@ static void sdma_v5_0_ring_emit_hdp_flush(struct amdgpu_ring *ring) amdgpu_ring_write(ring, SDMA_PKT_HEADER_OP(SDMA_OP_POLL_REGMEM) | SDMA_PKT_POLL_REGMEM_HEADER_HDP_FLUSH(1) | SDMA_PKT_POLL_REGMEM_HEADER_FUNC(3)); /* == */ - amdgpu_ring_write(ring, (adev->nbio_funcs->get_hdp_flush_done_offset(adev)) << 2); - amdgpu_ring_write(ring, (adev->nbio_funcs->get_hdp_flush_req_offset(adev)) << 2); + amdgpu_ring_write(ring, (adev->nbio.funcs->get_hdp_flush_done_offset(adev)) << 2); + amdgpu_ring_write(ring, (adev->nbio.funcs->get_hdp_flush_req_offset(adev)) << 2); amdgpu_ring_write(ring, ref_and_mask); /* reference */ amdgpu_ring_write(ring, ref_and_mask); /* mask */ amdgpu_ring_write(ring, SDMA_PKT_POLL_REGMEM_DW5_RETRY_COUNT(0xfff) | @@ -683,7 +683,7 @@ static int sdma_v5_0_gfx_resume(struct amdgpu_device *adev) WREG32(sdma_v5_0_get_reg_offset(adev, i, mmSDMA0_GFX_DOORBELL), doorbell); WREG32(sdma_v5_0_get_reg_offset(adev, i, mmSDMA0_GFX_DOORBELL_OFFSET), doorbell_offset); - adev->nbio_funcs->sdma_doorbell_range(adev, i, ring->use_doorbell, + adev->nbio.funcs->sdma_doorbell_range(adev, i, ring->use_doorbell, ring->doorbell_index, 20); if (amdgpu_sriov_vf(adev)) diff --git a/drivers/gpu/drm/amd/amdgpu/soc15.c b/drivers/gpu/drm/amd/amdgpu/soc15.c index f8ab80c8801b..fc6cfbced170 100644 --- a/drivers/gpu/drm/amd/amdgpu/soc15.c +++ b/drivers/gpu/drm/amd/amdgpu/soc15.c @@ -58,6 +58,9 @@ #include "mmhub_v1_0.h" #include "df_v1_7.h" #include "df_v3_6.h" +#include "nbio_v6_1.h" +#include "nbio_v7_0.h" +#include "nbio_v7_4.h" #include "vega10_ih.h" #include "sdma_v4_0.h" #include "uvd_v7_0.h" @@ -91,8 +94,8 @@ static u32 soc15_pcie_rreg(struct amdgpu_device *adev, u32 reg) { unsigned long flags, address, data; u32 r; - address = adev->nbio_funcs->get_pcie_index_offset(adev); - data = adev->nbio_funcs->get_pcie_data_offset(adev); + address = adev->nbio.funcs->get_pcie_index_offset(adev); + data = adev->nbio.funcs->get_pcie_data_offset(adev); spin_lock_irqsave(&adev->pcie_idx_lock, flags); WREG32(address, reg); @@ -106,8 +109,8 @@ static void soc15_pcie_wreg(struct amdgpu_device *adev, u32 reg, u32 v) { unsigned long flags, address, data; - address = adev->nbio_funcs->get_pcie_index_offset(adev); - data = adev->nbio_funcs->get_pcie_data_offset(adev); + address = adev->nbio.funcs->get_pcie_index_offset(adev); + data = adev->nbio.funcs->get_pcie_data_offset(adev); spin_lock_irqsave(&adev->pcie_idx_lock, flags); WREG32(address, reg); @@ -121,8 +124,8 @@ static u64 soc15_pcie_rreg64(struct amdgpu_device *adev, u32 reg) { unsigned long flags, address, data; u64 r; - address = adev->nbio_funcs->get_pcie_index_offset(adev); - data = adev->nbio_funcs->get_pcie_data_offset(adev); + address = adev->nbio.funcs->get_pcie_index_offset(adev); + data = adev->nbio.funcs->get_pcie_data_offset(adev); spin_lock_irqsave(&adev->pcie_idx_lock, flags); /* read low 32 bit */ @@ -142,8 +145,8 @@ static void soc15_pcie_wreg64(struct amdgpu_device *adev, u32 reg, u64 v) { unsigned long flags, address, data; - address = adev->nbio_funcs->get_pcie_index_offset(adev); - data = adev->nbio_funcs->get_pcie_data_offset(adev); + address = adev->nbio.funcs->get_pcie_index_offset(adev); + data = adev->nbio.funcs->get_pcie_data_offset(adev); spin_lock_irqsave(&adev->pcie_idx_lock, flags); /* write low 32 bit */ @@ -262,7 +265,7 @@ static void soc15_se_cac_wreg(struct amdgpu_device *adev, u32 reg, u32 v) static u32 soc15_get_config_memsize(struct amdgpu_device *adev) { - return adev->nbio_funcs->get_memsize(adev); + return adev->nbio.funcs->get_memsize(adev); } static u32 soc15_get_xclk(struct amdgpu_device *adev) @@ -461,7 +464,7 @@ static int soc15_asic_mode1_reset(struct amdgpu_device *adev) /* wait for asic to come out of reset */ for (i = 0; i < adev->usec_timeout; i++) { - u32 memsize = adev->nbio_funcs->get_memsize(adev); + u32 memsize = adev->nbio.funcs->get_memsize(adev); if (memsize != 0xffffffff) break; @@ -511,6 +514,8 @@ static int soc15_asic_baco_reset(struct amdgpu_device *adev) static int soc15_mode2_reset(struct amdgpu_device *adev) { + if (is_support_sw_smu(adev)) + return smu_mode2_reset(&adev->smu); if (!adev->powerplay.pp_funcs || !adev->powerplay.pp_funcs->asic_reset_mode_2) return -ENOENT; @@ -525,6 +530,7 @@ soc15_asic_reset_method(struct amdgpu_device *adev) switch (adev->asic_type) { case CHIP_RAVEN: + case CHIP_RENOIR: return AMD_RESET_METHOD_MODE2; case CHIP_VEGA10: case CHIP_VEGA12: @@ -626,8 +632,8 @@ static void soc15_program_aspm(struct amdgpu_device *adev) static void soc15_enable_doorbell_aperture(struct amdgpu_device *adev, bool enable) { - adev->nbio_funcs->enable_doorbell_aperture(adev, enable); - adev->nbio_funcs->enable_doorbell_selfring_aperture(adev, enable); + adev->nbio.funcs->enable_doorbell_aperture(adev, enable); + adev->nbio.funcs->enable_doorbell_selfring_aperture(adev, enable); } static const struct amdgpu_ip_block_version vega10_common_ip_block = @@ -641,7 +647,7 @@ static const struct amdgpu_ip_block_version vega10_common_ip_block = static uint32_t soc15_get_rev_id(struct amdgpu_device *adev) { - return adev->nbio_funcs->get_rev_id(adev); + return adev->nbio.funcs->get_rev_id(adev); } int soc15_set_ip_blocks(struct amdgpu_device *adev) @@ -667,13 +673,17 @@ int soc15_set_ip_blocks(struct amdgpu_device *adev) if (adev->asic_type == CHIP_VEGA20 || adev->asic_type == CHIP_ARCTURUS) adev->gmc.xgmi.supported = true; - if (adev->flags & AMD_IS_APU) - adev->nbio_funcs = &nbio_v7_0_funcs; - else if (adev->asic_type == CHIP_VEGA20 || - adev->asic_type == CHIP_ARCTURUS) - adev->nbio_funcs = &nbio_v7_4_funcs; - else - adev->nbio_funcs = &nbio_v6_1_funcs; + if (adev->flags & AMD_IS_APU) { + adev->nbio.funcs = &nbio_v7_0_funcs; + adev->nbio.hdp_flush_reg = &nbio_v7_0_hdp_flush_reg; + } else if (adev->asic_type == CHIP_VEGA20 || + adev->asic_type == CHIP_ARCTURUS) { + adev->nbio.funcs = &nbio_v7_4_funcs; + adev->nbio.hdp_flush_reg = &nbio_v7_4_hdp_flush_reg; + } else { + adev->nbio.funcs = &nbio_v6_1_funcs; + adev->nbio.hdp_flush_reg = &nbio_v6_1_hdp_flush_reg; + } if (adev->asic_type == CHIP_VEGA20 || adev->asic_type == CHIP_ARCTURUS) adev->df_funcs = &df_v3_6_funcs; @@ -681,7 +691,7 @@ int soc15_set_ip_blocks(struct amdgpu_device *adev) adev->df_funcs = &df_v1_7_funcs; adev->rev_id = soc15_get_rev_id(adev); - adev->nbio_funcs->detect_hw_virt(adev); + adev->nbio.funcs->detect_hw_virt(adev); if (amdgpu_sriov_vf(adev)) adev->virt.ops = &xgpu_ai_virt_ops; @@ -750,13 +760,26 @@ int soc15_set_ip_blocks(struct amdgpu_device *adev) case CHIP_ARCTURUS: amdgpu_device_ip_block_add(adev, &vega10_common_ip_block); amdgpu_device_ip_block_add(adev, &gmc_v9_0_ip_block); - amdgpu_device_ip_block_add(adev, &vega10_ih_ip_block); + + if (amdgpu_sriov_vf(adev)) { + if (likely(adev->firmware.load_type == AMDGPU_FW_LOAD_PSP)) + amdgpu_device_ip_block_add(adev, &psp_v11_0_ip_block); + amdgpu_device_ip_block_add(adev, &vega10_ih_ip_block); + } else { + amdgpu_device_ip_block_add(adev, &vega10_ih_ip_block); + if (likely(adev->firmware.load_type == AMDGPU_FW_LOAD_PSP)) + amdgpu_device_ip_block_add(adev, &psp_v11_0_ip_block); + } + if (adev->enable_virtual_display || amdgpu_sriov_vf(adev)) amdgpu_device_ip_block_add(adev, &dce_virtual_ip_block); amdgpu_device_ip_block_add(adev, &gfx_v9_0_ip_block); amdgpu_device_ip_block_add(adev, &sdma_v4_0_ip_block); - amdgpu_device_ip_block_add(adev, &smu_v11_0_ip_block); - amdgpu_device_ip_block_add(adev, &vcn_v2_5_ip_block); + if (!amdgpu_sriov_vf(adev)) + amdgpu_device_ip_block_add(adev, &smu_v11_0_ip_block); + + if (unlikely(adev->firmware.load_type == AMDGPU_FW_LOAD_DIRECT)) + amdgpu_device_ip_block_add(adev, &vcn_v2_5_ip_block); break; case CHIP_RENOIR: amdgpu_device_ip_block_add(adev, &vega10_common_ip_block); @@ -785,7 +808,7 @@ int soc15_set_ip_blocks(struct amdgpu_device *adev) static void soc15_flush_hdp(struct amdgpu_device *adev, struct amdgpu_ring *ring) { - adev->nbio_funcs->hdp_flush(adev, ring); + adev->nbio.funcs->hdp_flush(adev, ring); } static void soc15_invalidate_hdp(struct amdgpu_device *adev, @@ -1157,7 +1180,8 @@ static int soc15_common_early_init(void *handle) AMD_CG_SUPPORT_SDMA_MGCG | AMD_CG_SUPPORT_SDMA_LS | AMD_CG_SUPPORT_MC_MGCG | - AMD_CG_SUPPORT_MC_LS; + AMD_CG_SUPPORT_MC_LS | + AMD_CG_SUPPORT_IH_CG; adev->pg_flags = 0; adev->external_rev_id = adev->rev_id + 0x32; break; @@ -1208,11 +1232,15 @@ static int soc15_common_early_init(void *handle) static int soc15_common_late_init(void *handle) { struct amdgpu_device *adev = (struct amdgpu_device *)handle; + int r = 0; if (amdgpu_sriov_vf(adev)) xgpu_ai_mailbox_get_irq(adev); - return 0; + if (adev->nbio.funcs->ras_late_init) + r = adev->nbio.funcs->ras_late_init(adev); + + return r; } static int soc15_common_sw_init(void *handle) @@ -1229,6 +1257,10 @@ static int soc15_common_sw_init(void *handle) static int soc15_common_sw_fini(void *handle) { + struct amdgpu_device *adev = (struct amdgpu_device *)handle; + + amdgpu_nbio_ras_fini(adev); + adev->df_funcs->sw_fini(adev); return 0; } @@ -1241,12 +1273,12 @@ static void soc15_doorbell_range_init(struct amdgpu_device *adev) if (!amdgpu_sriov_vf(adev)) { for (i = 0; i < adev->sdma.num_instances; i++) { ring = &adev->sdma.instance[i].ring; - adev->nbio_funcs->sdma_doorbell_range(adev, i, + adev->nbio.funcs->sdma_doorbell_range(adev, i, ring->use_doorbell, ring->doorbell_index, adev->doorbell_index.sdma_doorbell_range); } - adev->nbio_funcs->ih_doorbell_range(adev, adev->irq.ih.use_doorbell, + adev->nbio.funcs->ih_doorbell_range(adev, adev->irq.ih.use_doorbell, adev->irq.ih.doorbell_index); } } @@ -1260,13 +1292,13 @@ static int soc15_common_hw_init(void *handle) /* enable aspm */ soc15_program_aspm(adev); /* setup nbio registers */ - adev->nbio_funcs->init_registers(adev); + adev->nbio.funcs->init_registers(adev); /* remap HDP registers to a hole in mmio space, * for the purpose of expose those registers * to process space */ - if (adev->nbio_funcs->remap_hdp_registers) - adev->nbio_funcs->remap_hdp_registers(adev); + if (adev->nbio.funcs->remap_hdp_registers) + adev->nbio.funcs->remap_hdp_registers(adev); /* enable the doorbell aperture */ soc15_enable_doorbell_aperture(adev, true); @@ -1289,6 +1321,14 @@ static int soc15_common_hw_fini(void *handle) if (amdgpu_sriov_vf(adev)) xgpu_ai_mailbox_put_irq(adev); + if (adev->nbio.ras_if && + amdgpu_ras_is_supported(adev, adev->nbio.ras_if->block)) { + if (adev->nbio.funcs->init_ras_controller_interrupt) + amdgpu_irq_put(adev, &adev->nbio.ras_controller_irq, 0); + if (adev->nbio.funcs->init_ras_err_event_athub_interrupt) + amdgpu_irq_put(adev, &adev->nbio.ras_err_event_athub_irq, 0); + } + return 0; } @@ -1429,9 +1469,9 @@ static int soc15_common_set_clockgating_state(void *handle, case CHIP_VEGA10: case CHIP_VEGA12: case CHIP_VEGA20: - adev->nbio_funcs->update_medium_grain_clock_gating(adev, + adev->nbio.funcs->update_medium_grain_clock_gating(adev, state == AMD_CG_STATE_GATE ? true : false); - adev->nbio_funcs->update_medium_grain_light_sleep(adev, + adev->nbio.funcs->update_medium_grain_light_sleep(adev, state == AMD_CG_STATE_GATE ? true : false); soc15_update_hdp_light_sleep(adev, state == AMD_CG_STATE_GATE ? true : false); @@ -1446,9 +1486,9 @@ static int soc15_common_set_clockgating_state(void *handle, break; case CHIP_RAVEN: case CHIP_RENOIR: - adev->nbio_funcs->update_medium_grain_clock_gating(adev, + adev->nbio.funcs->update_medium_grain_clock_gating(adev, state == AMD_CG_STATE_GATE ? true : false); - adev->nbio_funcs->update_medium_grain_light_sleep(adev, + adev->nbio.funcs->update_medium_grain_light_sleep(adev, state == AMD_CG_STATE_GATE ? true : false); soc15_update_hdp_light_sleep(adev, state == AMD_CG_STATE_GATE ? true : false); @@ -1477,7 +1517,7 @@ static void soc15_common_get_clockgating_state(void *handle, u32 *flags) if (amdgpu_sriov_vf(adev)) *flags = 0; - adev->nbio_funcs->get_clockgating_state(adev, flags); + adev->nbio.funcs->get_clockgating_state(adev, flags); /* AMD_CG_SUPPORT_HDP_LS */ data = RREG32(SOC15_REG_OFFSET(HDP, 0, mmHDP_MEM_POWER_LS)); diff --git a/drivers/gpu/drm/amd/amdgpu/umc_v6_0.c b/drivers/gpu/drm/amd/amdgpu/umc_v6_0.c new file mode 100644 index 000000000000..0d6b50528d76 --- /dev/null +++ b/drivers/gpu/drm/amd/amdgpu/umc_v6_0.c @@ -0,0 +1,37 @@ +/* + * Copyright 2019 Advanced Micro Devices, Inc. + * + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and associated documentation files (the "Software"), + * to deal in the Software without restriction, including without limitation + * the rights to use, copy, modify, merge, publish, distribute, sublicense, + * and/or sell copies of the Software, and to permit persons to whom the + * Software is furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included in + * all copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL + * THE COPYRIGHT HOLDER(S) OR AUTHOR(S) BE LIABLE FOR ANY CLAIM, DAMAGES OR + * OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, + * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR + * OTHER DEALINGS IN THE SOFTWARE. + * + */ +#include "umc_v6_0.h" +#include "amdgpu.h" + +static void umc_v6_0_init_registers(struct amdgpu_device *adev) +{ + unsigned i,j; + + for (i = 0; i < 4; i++) + for (j = 0; j < 4; j++) + WREG32((i*0x100000 + 0x5010c + j*0x2000)/4, 0x1002); +} + +const struct amdgpu_umc_funcs umc_v6_0_funcs = { + .init_registers = umc_v6_0_init_registers, +}; diff --git a/drivers/gpu/drm/amd/amdgpu/umc_v6_0.h b/drivers/gpu/drm/amd/amdgpu/umc_v6_0.h new file mode 100644 index 000000000000..109f1a57a46e --- /dev/null +++ b/drivers/gpu/drm/amd/amdgpu/umc_v6_0.h @@ -0,0 +1,31 @@ +/* + * Copyright 2019 Advanced Micro Devices, Inc. + * + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and associated documentation files (the "Software"), + * to deal in the Software without restriction, including without limitation + * the rights to use, copy, modify, merge, publish, distribute, sublicense, + * and/or sell copies of the Software, and to permit persons to whom the + * Software is furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included in + * all copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL + * THE COPYRIGHT HOLDER(S) OR AUTHOR(S) BE LIABLE FOR ANY CLAIM, DAMAGES OR + * OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, + * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR + * OTHER DEALINGS IN THE SOFTWARE. + * + */ +#ifndef __UMC_V6_0_H__ +#define __UMC_V6_0_H__ + +#include "soc15_common.h" +#include "amdgpu.h" + +extern const struct amdgpu_umc_funcs umc_v6_0_funcs; + +#endif diff --git a/drivers/gpu/drm/amd/amdgpu/umc_v6_1.c b/drivers/gpu/drm/amd/amdgpu/umc_v6_1.c index 8502e736f721..47c4b96b14d1 100644 --- a/drivers/gpu/drm/amd/amdgpu/umc_v6_1.c +++ b/drivers/gpu/drm/amd/amdgpu/umc_v6_1.c @@ -75,6 +75,17 @@ static void umc_v6_1_disable_umc_index_mode(struct amdgpu_device *adev) RSMU_UMC_INDEX_MODE_EN, 0); } +static uint32_t umc_v6_1_get_umc_inst(struct amdgpu_device *adev) +{ + uint32_t rsmu_umc_index; + + rsmu_umc_index = RREG32_SOC15(RSMU, 0, + mmRSMU_UMC_INDEX_REGISTER_NBIF_VG20_GPU); + return REG_GET_FIELD(rsmu_umc_index, + RSMU_UMC_INDEX_REGISTER_NBIF_VG20_GPU, + RSMU_UMC_INDEX_INSTANCE); +} + static void umc_v6_1_query_correctable_error_count(struct amdgpu_device *adev, uint32_t umc_reg_offset, unsigned long *error_count) @@ -165,7 +176,8 @@ static void umc_v6_1_query_error_address(struct amdgpu_device *adev, uint32_t umc_reg_offset, uint32_t channel_index) { uint32_t lsb, mc_umc_status_addr; - uint64_t mc_umc_status, err_addr; + uint64_t mc_umc_status, err_addr, retired_page; + struct eeprom_table_record *err_rec; mc_umc_status_addr = SOC15_REG_OFFSET(UMC, 0, mmMCA_UMC_UMC0_MCUMC_STATUST0); @@ -177,6 +189,7 @@ static void umc_v6_1_query_error_address(struct amdgpu_device *adev, return; } + err_rec = &err_data->err_addr[err_data->err_addr_cnt]; mc_umc_status = RREG64_UMC(mc_umc_status_addr + umc_reg_offset); /* calculate error address if ue/ce error is detected */ @@ -191,12 +204,24 @@ static void umc_v6_1_query_error_address(struct amdgpu_device *adev, err_addr &= ~((0x1ULL << lsb) - 1); /* translate umc channel address to soc pa, 3 parts are included */ - err_data->err_addr[err_data->err_addr_cnt] = - ADDR_OF_8KB_BLOCK(err_addr) | - ADDR_OF_256B_BLOCK(channel_index) | - OFFSET_IN_256B_BLOCK(err_addr); - - err_data->err_addr_cnt++; + retired_page = ADDR_OF_8KB_BLOCK(err_addr) | + ADDR_OF_256B_BLOCK(channel_index) | + OFFSET_IN_256B_BLOCK(err_addr); + + /* we only save ue error information currently, ce is skipped */ + if (REG_GET_FIELD(mc_umc_status, MCA_UMC_UMC0_MCUMC_STATUST0, UECC) + == 1) { + err_rec->address = err_addr; + /* page frame address is saved */ + err_rec->retired_page = retired_page >> AMDGPU_GPU_PAGE_SHIFT; + err_rec->ts = (uint64_t)ktime_get_real_seconds(); + err_rec->err_type = AMDGPU_RAS_EEPROM_ERR_NON_RECOVERABLE; + err_rec->cu = 0; + err_rec->mem_channel = channel_index; + err_rec->mcumc_id = umc_v6_1_get_umc_inst(adev); + + err_data->err_addr_cnt++; + } } /* clear umc status */ @@ -209,7 +234,7 @@ static void umc_v6_1_query_ras_error_address(struct amdgpu_device *adev, amdgpu_umc_for_each_channel(umc_v6_1_query_error_address); } -static void umc_v6_1_ras_init_per_channel(struct amdgpu_device *adev, +static void umc_v6_1_err_cnt_init_per_channel(struct amdgpu_device *adev, struct ras_err_data *err_data, uint32_t umc_reg_offset, uint32_t channel_index) { @@ -239,15 +264,16 @@ static void umc_v6_1_ras_init_per_channel(struct amdgpu_device *adev, WREG32(ecc_err_cnt_addr + umc_reg_offset, UMC_V6_1_CE_CNT_INIT); } -static void umc_v6_1_ras_init(struct amdgpu_device *adev) +static void umc_v6_1_err_cnt_init(struct amdgpu_device *adev) { void *ras_error_status = NULL; - amdgpu_umc_for_each_channel(umc_v6_1_ras_init_per_channel); + amdgpu_umc_for_each_channel(umc_v6_1_err_cnt_init_per_channel); } const struct amdgpu_umc_funcs umc_v6_1_funcs = { - .ras_init = umc_v6_1_ras_init, + .err_cnt_init = umc_v6_1_err_cnt_init, + .ras_late_init = amdgpu_umc_ras_late_init, .query_ras_error_count = umc_v6_1_query_ras_error_count, .query_ras_error_address = umc_v6_1_query_ras_error_address, .enable_umc_index_mode = umc_v6_1_enable_umc_index_mode, diff --git a/drivers/gpu/drm/amd/amdgpu/vcn_v1_0.c b/drivers/gpu/drm/amd/amdgpu/vcn_v1_0.c index 93b3500e522b..b4f84a820a44 100644 --- a/drivers/gpu/drm/amd/amdgpu/vcn_v1_0.c +++ b/drivers/gpu/drm/amd/amdgpu/vcn_v1_0.c @@ -202,7 +202,6 @@ static int vcn_v1_0_hw_init(void *handle) for (i = 0; i < adev->vcn.num_enc_rings; ++i) { ring = &adev->vcn.inst->ring_enc[i]; - ring->sched.ready = true; r = amdgpu_ring_test_helper(ring); if (r) goto done; diff --git a/drivers/gpu/drm/amd/amdgpu/vcn_v2_0.c b/drivers/gpu/drm/amd/amdgpu/vcn_v2_0.c index 36ad0c0e8efb..38f787a560cb 100644 --- a/drivers/gpu/drm/amd/amdgpu/vcn_v2_0.c +++ b/drivers/gpu/drm/amd/amdgpu/vcn_v2_0.c @@ -244,33 +244,24 @@ static int vcn_v2_0_hw_init(void *handle) struct amdgpu_ring *ring = &adev->vcn.inst->ring_dec; int i, r; - adev->nbio_funcs->vcn_doorbell_range(adev, ring->use_doorbell, + adev->nbio.funcs->vcn_doorbell_range(adev, ring->use_doorbell, ring->doorbell_index, 0); - ring->sched.ready = true; - r = amdgpu_ring_test_ring(ring); - if (r) { - ring->sched.ready = false; + r = amdgpu_ring_test_helper(ring); + if (r) goto done; - } for (i = 0; i < adev->vcn.num_enc_rings; ++i) { ring = &adev->vcn.inst->ring_enc[i]; - ring->sched.ready = true; - r = amdgpu_ring_test_ring(ring); - if (r) { - ring->sched.ready = false; + r = amdgpu_ring_test_helper(ring); + if (r) goto done; - } } ring = &adev->vcn.inst->ring_jpeg; - ring->sched.ready = true; - r = amdgpu_ring_test_ring(ring); - if (r) { - ring->sched.ready = false; + r = amdgpu_ring_test_helper(ring); + if (r) goto done; - } done: if (!r) diff --git a/drivers/gpu/drm/amd/amdgpu/vcn_v2_5.c b/drivers/gpu/drm/amd/amdgpu/vcn_v2_5.c index 395c2259f979..0d5c95d73b63 100644 --- a/drivers/gpu/drm/amd/amdgpu/vcn_v2_5.c +++ b/drivers/gpu/drm/amd/amdgpu/vcn_v2_5.c @@ -255,32 +255,27 @@ static int vcn_v2_5_hw_init(void *handle) continue; ring = &adev->vcn.inst[j].ring_dec; - adev->nbio_funcs->vcn_doorbell_range(adev, ring->use_doorbell, + adev->nbio.funcs->vcn_doorbell_range(adev, ring->use_doorbell, ring->doorbell_index, j); - r = amdgpu_ring_test_ring(ring); - if (r) { - ring->sched.ready = false; + r = amdgpu_ring_test_helper(ring); + if (r) goto done; - } for (i = 0; i < adev->vcn.num_enc_rings; ++i) { ring = &adev->vcn.inst[j].ring_enc[i]; + /* disable encode rings till the robustness of the FW */ ring->sched.ready = false; continue; - r = amdgpu_ring_test_ring(ring); - if (r) { - ring->sched.ready = false; + r = amdgpu_ring_test_helper(ring); + if (r) goto done; - } } ring = &adev->vcn.inst[j].ring_jpeg; - r = amdgpu_ring_test_ring(ring); - if (r) { - ring->sched.ready = false; + r = amdgpu_ring_test_helper(ring); + if (r) goto done; - } } done: if (!r) @@ -423,7 +418,6 @@ static void vcn_v2_5_mc_resume(struct amdgpu_device *adev) * vcn_v2_5_disable_clock_gating - disable VCN clock gating * * @adev: amdgpu_device pointer - * @sw: enable SW clock gating * * Disable clock gating for VCN block */ @@ -542,7 +536,6 @@ static void vcn_v2_5_disable_clock_gating(struct amdgpu_device *adev) * vcn_v2_5_enable_clock_gating - enable VCN clock gating * * @adev: amdgpu_device pointer - * @sw: enable SW clock gating * * Enable clock gating for VCN block */ diff --git a/drivers/gpu/drm/amd/amdgpu/vega10_ih.c b/drivers/gpu/drm/amd/amdgpu/vega10_ih.c index 9eae3536ddad..5cb7e231de5f 100644 --- a/drivers/gpu/drm/amd/amdgpu/vega10_ih.c +++ b/drivers/gpu/drm/amd/amdgpu/vega10_ih.c @@ -226,7 +226,7 @@ static int vega10_ih_irq_init(struct amdgpu_device *adev) /* disable irqs */ vega10_ih_disable_interrupts(adev); - adev->nbio_funcs->ih_control(adev); + adev->nbio.funcs->ih_control(adev); ih = &adev->irq.ih; /* Ring Buffer base. [39:8] of 40-bit address of the beginning of the ring buffer*/ @@ -675,10 +675,49 @@ static int vega10_ih_soft_reset(void *handle) return 0; } +static void vega10_ih_update_clockgating_state(struct amdgpu_device *adev, + bool enable) +{ + uint32_t data, def, field_val; + + if (adev->cg_flags & AMD_CG_SUPPORT_IH_CG) { + def = data = RREG32_SOC15(OSSSYS, 0, mmIH_CLK_CTRL); + field_val = enable ? 0 : 1; + /** + * Vega10 does not have IH_RETRY_INT_CAM_MEM_CLK_SOFT_OVERRIDE + * and IH_BUFFER_MEM_CLK_SOFT_OVERRIDE field. + */ + if (adev->asic_type > CHIP_VEGA10) { + data = REG_SET_FIELD(data, IH_CLK_CTRL, + IH_RETRY_INT_CAM_MEM_CLK_SOFT_OVERRIDE, field_val); + data = REG_SET_FIELD(data, IH_CLK_CTRL, + IH_BUFFER_MEM_CLK_SOFT_OVERRIDE, field_val); + } + + data = REG_SET_FIELD(data, IH_CLK_CTRL, + DBUS_MUX_CLK_SOFT_OVERRIDE, field_val); + data = REG_SET_FIELD(data, IH_CLK_CTRL, + OSSSYS_SHARE_CLK_SOFT_OVERRIDE, field_val); + data = REG_SET_FIELD(data, IH_CLK_CTRL, + LIMIT_SMN_CLK_SOFT_OVERRIDE, field_val); + data = REG_SET_FIELD(data, IH_CLK_CTRL, + DYN_CLK_SOFT_OVERRIDE, field_val); + data = REG_SET_FIELD(data, IH_CLK_CTRL, + REG_CLK_SOFT_OVERRIDE, field_val); + if (def != data) + WREG32_SOC15(OSSSYS, 0, mmIH_CLK_CTRL, data); + } +} + static int vega10_ih_set_clockgating_state(void *handle, enum amd_clockgating_state state) { + struct amdgpu_device *adev = (struct amdgpu_device *)handle; + + vega10_ih_update_clockgating_state(adev, + state == AMD_CG_STATE_GATE ? true : false); return 0; + } static int vega10_ih_set_powergating_state(void *handle, diff --git a/drivers/gpu/drm/amd/amdgpu/vega10_reg_init.c b/drivers/gpu/drm/amd/amdgpu/vega10_reg_init.c index bd0580334f83..6b52a539d51b 100644 --- a/drivers/gpu/drm/amd/amdgpu/vega10_reg_init.c +++ b/drivers/gpu/drm/amd/amdgpu/vega10_reg_init.c @@ -24,7 +24,6 @@ #include "soc15.h" #include "soc15_common.h" -#include "soc15_hw_ip.h" #include "vega10_ip_offset.h" int vega10_reg_base_init(struct amdgpu_device *adev) diff --git a/drivers/gpu/drm/amd/amdgpu/vega20_reg_init.c b/drivers/gpu/drm/amd/amdgpu/vega20_reg_init.c index 587e33f5dcce..556f854e3551 100644 --- a/drivers/gpu/drm/amd/amdgpu/vega20_reg_init.c +++ b/drivers/gpu/drm/amd/amdgpu/vega20_reg_init.c @@ -24,7 +24,6 @@ #include "soc15.h" #include "soc15_common.h" -#include "soc15_hw_ip.h" #include "vega20_ip_offset.h" int vega20_reg_base_init(struct amdgpu_device *adev) |