diff options
Diffstat (limited to 'drivers/gpu/drm/amd/amdgpu')
240 files changed, 21408 insertions, 4257 deletions
diff --git a/drivers/gpu/drm/amd/amdgpu/Kconfig b/drivers/gpu/drm/amd/amdgpu/Kconfig index 22d88f8ef527..0051fb1b437f 100644 --- a/drivers/gpu/drm/amd/amdgpu/Kconfig +++ b/drivers/gpu/drm/amd/amdgpu/Kconfig @@ -17,6 +17,7 @@ config DRM_AMDGPU select HWMON select I2C select I2C_ALGOBIT + select CRC16 select BACKLIGHT_CLASS_DEVICE select INTERVAL_TREE select DRM_BUDDY @@ -70,6 +71,17 @@ config DRM_AMDGPU_USERPTR This option selects CONFIG_HMM and CONFIG_HMM_MIRROR if it isn't already selected to enabled full userptr support. +config DRM_AMD_ISP + bool "Enable AMD Image Signal Processor IP support" + depends on DRM_AMDGPU + select MFD_CORE + select PM_GENERIC_DOMAINS if PM + help + Choose this option to enable ISP IP support for AMD SOCs. + This adds the ISP (Image Signal Processor) IP driver and wires + it up into the amdgpu driver. It is required for cameras + on APUs which utilize mipi cameras. + config DRM_AMDGPU_WERROR bool "Force the compiler to throw an error instead of a warning when compiling" depends on DRM_AMDGPU diff --git a/drivers/gpu/drm/amd/amdgpu/Makefile b/drivers/gpu/drm/amd/amdgpu/Makefile index 4536c8ad0e11..38408e4e158e 100644 --- a/drivers/gpu/drm/amd/amdgpu/Makefile +++ b/drivers/gpu/drm/amd/amdgpu/Makefile @@ -23,7 +23,7 @@ # Makefile for the drm device driver. This driver provides support for the # Direct Rendering Infrastructure (DRI) in XFree86 4.1.0 and higher. -FULL_AMD_PATH=$(srctree)/$(src)/.. +FULL_AMD_PATH=$(src)/.. DISPLAY_FOLDER_NAME=display FULL_AMD_DISPLAY_PATH = $(FULL_AMD_PATH)/$(DISPLAY_FOLDER_NAME) @@ -70,7 +70,8 @@ amdgpu-y += amdgpu_device.o amdgpu_doorbell_mgr.o amdgpu_kms.o \ amdgpu_cs.o amdgpu_bios.o amdgpu_benchmark.o \ atombios_dp.o amdgpu_afmt.o amdgpu_trace_points.o \ atombios_encoders.o amdgpu_sa.o atombios_i2c.o \ - amdgpu_dma_buf.o amdgpu_vm.o amdgpu_vm_pt.o amdgpu_ib.o amdgpu_pll.o \ + amdgpu_dma_buf.o amdgpu_vm.o amdgpu_vm_pt.o amdgpu_vm_tlb_fence.o \ + amdgpu_ib.o amdgpu_pll.o \ amdgpu_ucode.o amdgpu_bo_list.o amdgpu_ctx.o amdgpu_sync.o \ amdgpu_gtt_mgr.o amdgpu_preempt_mgr.o amdgpu_vram_mgr.o amdgpu_virt.o \ amdgpu_atomfirmware.o amdgpu_vf_error.o amdgpu_sched.o \ @@ -80,7 +81,7 @@ amdgpu-y += amdgpu_device.o amdgpu_doorbell_mgr.o amdgpu_kms.o \ amdgpu_umc.o smu_v11_0_i2c.o amdgpu_fru_eeprom.o amdgpu_rap.o \ amdgpu_fw_attestation.o amdgpu_securedisplay.o \ amdgpu_eeprom.o amdgpu_mca.o amdgpu_psp_ta.o amdgpu_lsdma.o \ - amdgpu_ring_mux.o amdgpu_xcp.o amdgpu_seq64.o amdgpu_aca.o + amdgpu_ring_mux.o amdgpu_xcp.o amdgpu_seq64.o amdgpu_aca.o amdgpu_dev_coredump.o amdgpu-$(CONFIG_PROC_FS) += amdgpu_fdinfo.o @@ -96,7 +97,7 @@ amdgpu-$(CONFIG_DRM_AMDGPU_SI)+= si.o gmc_v6_0.o gfx_v6_0.o si_ih.o si_dma.o dce amdgpu-y += \ vi.o mxgpu_vi.o nbio_v6_1.o soc15.o emu_soc.o mxgpu_ai.o nbio_v7_0.o vega10_reg_init.o \ vega20_reg_init.o nbio_v7_4.o nbio_v2_3.o nv.o arct_reg_init.o mxgpu_nv.o \ - nbio_v7_2.o hdp_v4_0.o hdp_v5_0.o aldebaran_reg_init.o aldebaran.o soc21.o \ + nbio_v7_2.o hdp_v4_0.o hdp_v5_0.o aldebaran_reg_init.o aldebaran.o soc21.o soc24.o \ sienna_cichlid.o smu_v13_0_10.o nbio_v4_3.o hdp_v6_0.o nbio_v7_7.o hdp_v5_2.o lsdma_v6_0.o \ nbio_v7_9.o aqua_vanjaram.o nbio_v7_11.o lsdma_v7_0.o hdp_v7_0.o nbif_v6_3_1.o @@ -105,7 +106,8 @@ amdgpu-y += \ df_v1_7.o \ df_v3_6.o \ df_v4_3.o \ - df_v4_6_2.o + df_v4_6_2.o \ + df_v4_15.o # add GMC block amdgpu-y += \ @@ -115,7 +117,7 @@ amdgpu-y += \ gfxhub_v2_0.o mmhub_v2_0.o gmc_v10_0.o gfxhub_v2_1.o mmhub_v2_3.o \ mmhub_v1_7.o gfxhub_v3_0.o mmhub_v3_0.o mmhub_v3_0_2.o gmc_v11_0.o \ mmhub_v3_0_1.o gfxhub_v3_0_3.o gfxhub_v1_2.o mmhub_v1_8.o mmhub_v3_3.o \ - gfxhub_v11_5_0.o + gfxhub_v11_5_0.o mmhub_v4_1_0.o gfxhub_v12_0.o gmc_v12_0.o # add UMC block amdgpu-y += \ @@ -166,7 +168,9 @@ amdgpu-y += \ imu_v11_0.o \ gfx_v11_0.o \ gfx_v11_0_3.o \ - imu_v11_0_3.o + imu_v11_0_3.o \ + gfx_v12_0.o \ + imu_v12_0.o # add async DMA block amdgpu-y += \ @@ -178,13 +182,14 @@ amdgpu-y += \ sdma_v4_4_2.o \ sdma_v5_0.o \ sdma_v5_2.o \ - sdma_v6_0.o + sdma_v6_0.o \ + sdma_v7_0.o # add MES block amdgpu-y += \ amdgpu_mes.o \ - mes_v10_1.o \ - mes_v11_0.o + mes_v11_0.o \ + mes_v12_0.o # add UVD block amdgpu-y += \ @@ -247,7 +252,8 @@ amdgpu-y += \ smuio_v11_0_6.o \ smuio_v13_0.o \ smuio_v13_0_3.o \ - smuio_v13_0_6.o + smuio_v13_0_6.o \ + smuio_v14_0_2.o # add reset block amdgpu-y += \ @@ -275,7 +281,8 @@ amdgpu-y += \ amdgpu_amdkfd_gc_9_4_3.o \ amdgpu_amdkfd_gfx_v10.o \ amdgpu_amdkfd_gfx_v10_3.o \ - amdgpu_amdkfd_gfx_v11.o + amdgpu_amdkfd_gfx_v11.o \ + amdgpu_amdkfd_gfx_v12.o ifneq ($(CONFIG_DRM_AMDGPU_CIK),) amdgpu-y += amdgpu_amdkfd_gfx_v7.o @@ -317,4 +324,12 @@ amdgpu-y += $(AMD_DISPLAY_FILES) endif +# add isp block +ifneq ($(CONFIG_DRM_AMD_ISP),) +amdgpu-y += \ + amdgpu_isp.o \ + isp_v4_1_0.o \ + isp_v4_1_1.o +endif + obj-$(CONFIG_DRM_AMDGPU)+= amdgpu.o diff --git a/drivers/gpu/drm/amd/amdgpu/aldebaran.c b/drivers/gpu/drm/amd/amdgpu/aldebaran.c index 576067d66bb9..b0f95a7649bf 100644 --- a/drivers/gpu/drm/amd/amdgpu/aldebaran.c +++ b/drivers/gpu/drm/amd/amdgpu/aldebaran.c @@ -97,7 +97,7 @@ static int aldebaran_mode2_suspend_ip(struct amdgpu_device *adev) adev->ip_blocks[i].status.hw = false; } - return r; + return 0; } static int @@ -316,8 +316,6 @@ static int aldebaran_mode2_restore_ip(struct amdgpu_device *adev) adev->ip_blocks[i].status.late_initialized = true; } - amdgpu_ras_set_error_query_ready(adev, true); - amdgpu_device_set_cg_state(adev, AMD_CG_STATE_GATE); amdgpu_device_set_pg_state(adev, AMD_PG_STATE_GATE); diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu.h b/drivers/gpu/drm/amd/amdgpu/amdgpu.h index 9c62552bec34..137a88b8de45 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu.h +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu.h @@ -112,6 +112,9 @@ #include "amdgpu_xcp.h" #include "amdgpu_seq64.h" #include "amdgpu_reg_state.h" +#if defined(CONFIG_DRM_AMD_ISP) +#include "amdgpu_isp.h" +#endif #define MAX_GPU_INSTANCE 64 @@ -139,6 +142,14 @@ enum amdgpu_ss { AMDGPU_SS_DRV_UNLOAD }; +struct amdgpu_hwip_reg_entry { + u32 hwip; + u32 inst; + u32 seg; + u32 reg_offset; + const char *reg_name; +}; + struct amdgpu_watchdog_timer { bool timeout_fatal_disable; uint32_t period; /* maxCycles = (1 << period), the number of cycles before a timeout */ @@ -210,7 +221,9 @@ extern int amdgpu_async_gfx_ring; extern int amdgpu_mcbp; extern int amdgpu_discovery; extern int amdgpu_mes; +extern int amdgpu_mes_log_enable; extern int amdgpu_mes_kiq; +extern int amdgpu_uni_mes; extern int amdgpu_noretry; extern int amdgpu_force_asic_type; extern int amdgpu_smartshift_bias; @@ -244,10 +257,12 @@ extern int amdgpu_cik_support; extern int amdgpu_num_kcq; #define AMDGPU_VCNFW_LOG_SIZE (32 * 1024) +#define AMDGPU_UMSCHFW_LOG_SIZE (32 * 1024) extern int amdgpu_vcnfw_log; extern int amdgpu_sg_display; extern int amdgpu_umsch_mm; extern int amdgpu_seamless; +extern int amdgpu_umsch_mm_fwlog; extern int amdgpu_user_partt_mode; extern int amdgpu_agp; @@ -332,9 +347,9 @@ enum amdgpu_kiq_irq { AMDGPU_CP_KIQ_IRQ_DRIVER0 = 0, AMDGPU_CP_KIQ_IRQ_LAST }; -#define SRIOV_USEC_TIMEOUT 1200000 /* wait 12 * 100ms for SRIOV */ -#define MAX_KIQ_REG_WAIT 5000 /* in usecs, 5ms */ -#define MAX_KIQ_REG_BAILOUT_INTERVAL 5 /* in msecs, 5ms */ +#define SRIOV_USEC_TIMEOUT 1200000 /* wait 12 * 100ms for SRIOV */ +#define MAX_KIQ_REG_WAIT (amdgpu_sriov_vf(adev) ? 50000 : 5000) /* in usecs, extend for VF */ +#define MAX_KIQ_REG_BAILOUT_INTERVAL 5 /* in msecs, 5ms */ #define MAX_KIQ_REG_TRY 1000 int amdgpu_device_ip_set_clockgating_state(void *dev, @@ -493,6 +508,7 @@ struct amdgpu_wb { uint64_t gpu_addr; u32 num_wb; /* Number of wb slots actually reserved for amdgpu. */ unsigned long used[DIV_ROUND_UP(AMDGPU_MAX_WB, BITS_PER_LONG)]; + spinlock_t lock; }; int amdgpu_device_wb_get(struct amdgpu_device *adev, u32 *wb); @@ -605,7 +621,7 @@ struct amdgpu_asic_funcs { /* PCIe replay counter */ uint64_t (*get_pcie_replay_count)(struct amdgpu_device *adev); /* device supports BACO */ - bool (*supports_baco)(struct amdgpu_device *adev); + int (*supports_baco)(struct amdgpu_device *adev); /* pre asic_init quirks */ void (*pre_asic_init)(struct amdgpu_device *adev); /* enter/exit umd stable pstate */ @@ -707,6 +723,7 @@ enum amd_hw_ip_block_type { XGMI_HWIP, DCI_HWIP, PCIE_HWIP, + ISP_HWIP, MAX_HWIP }; @@ -1033,9 +1050,15 @@ struct amdgpu_device { /* display related functionality */ struct amdgpu_display_manager dm; +#if defined(CONFIG_DRM_AMD_ISP) + /* isp */ + struct amdgpu_isp isp; +#endif + /* mes */ bool enable_mes; bool enable_mes_kiq; + bool enable_uni_mes; struct amdgpu_mes mes; struct amdgpu_mqd mqds[AMDGPU_HW_IP_NUM]; @@ -1151,6 +1174,7 @@ struct amdgpu_device { bool debug_largebar; bool debug_disable_soft_recovery; bool debug_use_vram_fw_buf; + bool debug_enable_ras_aca; }; static inline uint32_t amdgpu_ip_version(const struct amdgpu_device *adev, @@ -1407,7 +1431,8 @@ bool amdgpu_device_supports_atpx(struct drm_device *dev); bool amdgpu_device_supports_px(struct drm_device *dev); bool amdgpu_device_supports_boco(struct drm_device *dev); bool amdgpu_device_supports_smart_shift(struct drm_device *dev); -bool amdgpu_device_supports_baco(struct drm_device *dev); +int amdgpu_device_supports_baco(struct drm_device *dev); +void amdgpu_device_detect_runtime_pm_mode(struct amdgpu_device *adev); bool amdgpu_device_is_peer_accessible(struct amdgpu_device *adev, struct amdgpu_device *peer_adev); int amdgpu_device_baco_enter(struct drm_device *dev); @@ -1423,6 +1448,7 @@ u32 amdgpu_device_pcie_port_rreg(struct amdgpu_device *adev, u32 reg); void amdgpu_device_pcie_port_wreg(struct amdgpu_device *adev, u32 reg, u32 v); +struct dma_fence *amdgpu_device_get_gang(struct amdgpu_device *adev); struct dma_fence *amdgpu_device_switch_gang(struct amdgpu_device *adev, struct dma_fence *gang); bool amdgpu_device_has_display_hardware(struct amdgpu_device *adev); @@ -1549,6 +1575,7 @@ static inline int amdgpu_acpi_power_shift_control(struct amdgpu_device *adev, u8 dev_state, bool drv_state) { return 0; } static inline int amdgpu_acpi_smart_shift_update(struct drm_device *dev, enum amdgpu_ss ss_state) { return 0; } +static inline void amdgpu_acpi_get_backlight_caps(struct amdgpu_dm_backlight_caps *caps) { } #endif #if defined(CONFIG_ACPI) && defined(CONFIG_SUSPEND) diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_aca.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_aca.c index 493982f94649..19158cc30f31 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_aca.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_aca.c @@ -28,7 +28,7 @@ #define ACA_BANK_HWID(type, hwid, mcatype) [ACA_HWIP_TYPE_##type] = {hwid, mcatype} -typedef int bank_handler_t(struct aca_handle *handle, struct aca_bank *bank, enum aca_error_type type, void *data); +typedef int bank_handler_t(struct aca_handle *handle, struct aca_bank *bank, enum aca_smu_type type, void *data); struct aca_banks { int nr_banks; @@ -86,7 +86,7 @@ static void aca_banks_release(struct aca_banks *banks) } } -static int aca_smu_get_valid_aca_count(struct amdgpu_device *adev, enum aca_error_type type, u32 *count) +static int aca_smu_get_valid_aca_count(struct amdgpu_device *adev, enum aca_smu_type type, u32 *count) { struct amdgpu_aca *aca = &adev->aca; const struct aca_smu_funcs *smu_funcs = aca->smu_funcs; @@ -116,20 +116,22 @@ static struct aca_regs_dump { {"CONTROL_MASK", ACA_REG_IDX_CTL_MASK}, }; -static void aca_smu_bank_dump(struct amdgpu_device *adev, int idx, int total, struct aca_bank *bank) +static void aca_smu_bank_dump(struct amdgpu_device *adev, int idx, int total, struct aca_bank *bank, + struct ras_query_context *qctx) { + u64 event_id = qctx ? qctx->evid.event_id : RAS_EVENT_INVALID_ID; int i; - dev_info(adev->dev, HW_ERR "Accelerator Check Architecture events logged\n"); + RAS_EVENT_LOG(adev, event_id, HW_ERR "Accelerator Check Architecture events logged\n"); /* plus 1 for output format, e.g: ACA[08/08]: xxxx */ for (i = 0; i < ARRAY_SIZE(aca_regs); i++) - dev_info(adev->dev, HW_ERR "ACA[%02d/%02d].%s=0x%016llx\n", - idx + 1, total, aca_regs[i].name, bank->regs[aca_regs[i].reg_idx]); + RAS_EVENT_LOG(adev, event_id, HW_ERR "ACA[%02d/%02d].%s=0x%016llx\n", + idx + 1, total, aca_regs[i].name, bank->regs[aca_regs[i].reg_idx]); } -static int aca_smu_get_valid_aca_banks(struct amdgpu_device *adev, enum aca_error_type type, +static int aca_smu_get_valid_aca_banks(struct amdgpu_device *adev, enum aca_smu_type type, int start, int count, - struct aca_banks *banks) + struct aca_banks *banks, struct ras_query_context *qctx) { struct amdgpu_aca *aca = &adev->aca; const struct aca_smu_funcs *smu_funcs = aca->smu_funcs; @@ -143,13 +145,12 @@ static int aca_smu_get_valid_aca_banks(struct amdgpu_device *adev, enum aca_erro return -EOPNOTSUPP; switch (type) { - case ACA_ERROR_TYPE_UE: + case ACA_SMU_TYPE_UE: max_count = smu_funcs->max_ue_bank_count; break; - case ACA_ERROR_TYPE_CE: + case ACA_SMU_TYPE_CE: max_count = smu_funcs->max_ce_bank_count; break; - case ACA_ERROR_TYPE_DEFERRED: default: return -EINVAL; } @@ -164,7 +165,9 @@ static int aca_smu_get_valid_aca_banks(struct amdgpu_device *adev, enum aca_erro if (ret) return ret; - aca_smu_bank_dump(adev, i, count, &bank); + bank.type = type; + + aca_smu_bank_dump(adev, i, count, &bank, qctx); ret = aca_banks_add_bank(banks, &bank); if (ret) @@ -195,7 +198,7 @@ static bool aca_bank_hwip_is_matched(struct aca_bank *bank, enum aca_hwip_type t return hwip->hwid == hwid && hwip->mcatype == mcatype; } -static bool aca_bank_is_valid(struct aca_handle *handle, struct aca_bank *bank, enum aca_error_type type) +static bool aca_bank_is_valid(struct aca_handle *handle, struct aca_bank *bank, enum aca_smu_type type) { const struct aca_bank_ops *bank_ops = handle->bank_ops; @@ -273,59 +276,49 @@ static struct aca_bank_error *get_bank_error(struct aca_error *aerr, struct aca_ return new_bank_error(aerr, info); } -static int aca_log_errors(struct aca_handle *handle, enum aca_error_type type, - struct aca_bank_report *report) +int aca_error_cache_log_bank_error(struct aca_handle *handle, struct aca_bank_info *info, + enum aca_error_type type, u64 count) { struct aca_error_cache *error_cache = &handle->error_cache; struct aca_bank_error *bank_error; struct aca_error *aerr; - if (!handle || !report) + if (!handle || !info || type >= ACA_ERROR_TYPE_COUNT) return -EINVAL; - if (!report->count[type]) + if (!count) return 0; aerr = &error_cache->errors[type]; - bank_error = get_bank_error(aerr, &report->info); + bank_error = get_bank_error(aerr, info); if (!bank_error) return -ENOMEM; - bank_error->count[type] += report->count[type]; + bank_error->count += count; return 0; } -static int aca_generate_bank_report(struct aca_handle *handle, struct aca_bank *bank, - enum aca_error_type type, struct aca_bank_report *report) +static int aca_bank_parser(struct aca_handle *handle, struct aca_bank *bank, enum aca_smu_type type) { const struct aca_bank_ops *bank_ops = handle->bank_ops; - if (!bank || !report) + if (!bank) return -EINVAL; - if (!bank_ops->aca_bank_generate_report) + if (!bank_ops->aca_bank_parser) return -EOPNOTSUPP; - memset(report, 0, sizeof(*report)); - return bank_ops->aca_bank_generate_report(handle, bank, type, - report, handle->data); + return bank_ops->aca_bank_parser(handle, bank, type, + handle->data); } static int handler_aca_log_bank_error(struct aca_handle *handle, struct aca_bank *bank, - enum aca_error_type type, void *data) + enum aca_smu_type type, void *data) { - struct aca_bank_report report; int ret; - ret = aca_generate_bank_report(handle, bank, type, &report); - if (ret) - return ret; - - if (!report.count[type]) - return 0; - - ret = aca_log_errors(handle, type, &report); + ret = aca_bank_parser(handle, bank, type); if (ret) return ret; @@ -333,7 +326,7 @@ static int handler_aca_log_bank_error(struct aca_handle *handle, struct aca_bank } static int aca_dispatch_bank(struct aca_handle_manager *mgr, struct aca_bank *bank, - enum aca_error_type type, bank_handler_t handler, void *data) + enum aca_smu_type type, bank_handler_t handler, void *data) { struct aca_handle *handle; int ret; @@ -354,7 +347,7 @@ static int aca_dispatch_bank(struct aca_handle_manager *mgr, struct aca_bank *ba } static int aca_dispatch_banks(struct aca_handle_manager *mgr, struct aca_banks *banks, - enum aca_error_type type, bank_handler_t handler, void *data) + enum aca_smu_type type, bank_handler_t handler, void *data) { struct aca_bank_node *node; struct aca_bank *bank; @@ -378,8 +371,28 @@ static int aca_dispatch_banks(struct aca_handle_manager *mgr, struct aca_banks * return 0; } -static int aca_banks_update(struct amdgpu_device *adev, enum aca_error_type type, - bank_handler_t handler, void *data) +static bool aca_bank_should_update(struct amdgpu_device *adev, enum aca_smu_type type) +{ + struct amdgpu_aca *aca = &adev->aca; + bool ret = true; + + /* + * Because the UE Valid MCA count will only be cleared after reset, + * in order to avoid repeated counting of the error count, + * the aca bank is only updated once during the gpu recovery stage. + */ + if (type == ACA_SMU_TYPE_UE) { + if (amdgpu_ras_intr_triggered()) + ret = atomic_cmpxchg(&aca->ue_update_flag, 0, 1) == 0; + else + atomic_set(&aca->ue_update_flag, 0); + } + + return ret; +} + +static int aca_banks_update(struct amdgpu_device *adev, enum aca_smu_type type, + bank_handler_t handler, struct ras_query_context *qctx, void *data) { struct amdgpu_aca *aca = &adev->aca; struct aca_banks banks; @@ -389,9 +402,8 @@ static int aca_banks_update(struct amdgpu_device *adev, enum aca_error_type type if (list_empty(&aca->mgr.list)) return 0; - /* NOTE: pmfw is only support UE and CE */ - if (type == ACA_ERROR_TYPE_DEFERRED) - type = ACA_ERROR_TYPE_CE; + if (!aca_bank_should_update(adev, type)) + return 0; ret = aca_smu_get_valid_aca_count(adev, type, &count); if (ret) @@ -402,7 +414,7 @@ static int aca_banks_update(struct amdgpu_device *adev, enum aca_error_type type aca_banks_init(&banks); - ret = aca_smu_get_valid_aca_banks(adev, type, 0, count, &banks); + ret = aca_smu_get_valid_aca_banks(adev, type, 0, count, &banks, qctx); if (ret) goto err_release_banks; @@ -431,7 +443,7 @@ static int aca_log_aca_error_data(struct aca_bank_error *bank_error, enum aca_er if (type >= ACA_ERROR_TYPE_COUNT) return -EINVAL; - count = bank_error->count[type]; + count = bank_error->count; if (!count) return 0; @@ -447,6 +459,8 @@ static int aca_log_aca_error_data(struct aca_bank_error *bank_error, enum aca_er amdgpu_ras_error_statistic_ce_count(err_data, &mcm_info, NULL, count); break; case ACA_ERROR_TYPE_DEFERRED: + amdgpu_ras_error_statistic_de_count(err_data, &mcm_info, NULL, count); + break; default: break; } @@ -477,12 +491,25 @@ out_unlock: } static int __aca_get_error_data(struct amdgpu_device *adev, struct aca_handle *handle, enum aca_error_type type, - struct ras_err_data *err_data) + struct ras_err_data *err_data, struct ras_query_context *qctx) { + enum aca_smu_type smu_type; int ret; + switch (type) { + case ACA_ERROR_TYPE_UE: + smu_type = ACA_SMU_TYPE_UE; + break; + case ACA_ERROR_TYPE_CE: + case ACA_ERROR_TYPE_DEFERRED: + smu_type = ACA_SMU_TYPE_CE; + break; + default: + return -EINVAL; + } + /* udpate aca bank to aca source error_cache first */ - ret = aca_banks_update(adev, type, handler_aca_log_bank_error, NULL); + ret = aca_banks_update(adev, smu_type, handler_aca_log_bank_error, qctx, NULL); if (ret) return ret; @@ -498,20 +525,19 @@ static bool aca_handle_is_valid(struct aca_handle *handle) } int amdgpu_aca_get_error_data(struct amdgpu_device *adev, struct aca_handle *handle, - enum aca_error_type type, void *data) + enum aca_error_type type, struct ras_err_data *err_data, + struct ras_query_context *qctx) { - struct ras_err_data *err_data = (struct ras_err_data *)data; - if (!handle || !err_data) return -EINVAL; if (aca_handle_is_valid(handle)) return -EOPNOTSUPP; - if (!(BIT(type) & handle->mask)) + if ((type < 0) || (!(BIT(type) & handle->mask))) return 0; - return __aca_get_error_data(adev, handle, type, err_data); + return __aca_get_error_data(adev, handle, type, err_data, qctx); } static void aca_error_init(struct aca_error *aerr, enum aca_error_type type) @@ -660,7 +686,8 @@ static void aca_manager_fini(struct aca_handle_manager *mgr) bool amdgpu_aca_is_enabled(struct amdgpu_device *adev) { - return adev->aca.is_enabled; + return (adev->aca.is_enabled || + adev->debug_enable_ras_aca); } int amdgpu_aca_init(struct amdgpu_device *adev) @@ -668,6 +695,8 @@ int amdgpu_aca_init(struct amdgpu_device *adev) struct amdgpu_aca *aca = &adev->aca; int ret; + atomic_set(&aca->ue_update_flag, 0); + ret = aca_manager_init(&aca->mgr); if (ret) return ret; @@ -680,13 +709,17 @@ void amdgpu_aca_fini(struct amdgpu_device *adev) struct amdgpu_aca *aca = &adev->aca; aca_manager_fini(&aca->mgr); + + atomic_set(&aca->ue_update_flag, 0); } int amdgpu_aca_reset(struct amdgpu_device *adev) { - amdgpu_aca_fini(adev); + struct amdgpu_aca *aca = &adev->aca; - return amdgpu_aca_init(adev); + atomic_set(&aca->ue_update_flag, 0); + + return 0; } void amdgpu_aca_set_smu_funcs(struct amdgpu_device *adev, const struct aca_smu_funcs *smu_funcs) @@ -723,23 +756,13 @@ int aca_bank_info_decode(struct aca_bank *bank, struct aca_bank_info *info) static int aca_bank_get_error_code(struct amdgpu_device *adev, struct aca_bank *bank) { - int error_code; - - switch (amdgpu_ip_version(adev, MP1_HWIP, 0)) { - case IP_VERSION(13, 0, 6): - if (!(adev->flags & AMD_IS_APU) && adev->pm.fw_version >= 0x00555600) { - error_code = ACA_REG__SYND__ERRORINFORMATION(bank->regs[ACA_REG_IDX_SYND]); - return error_code & 0xff; - } - break; - default: - break; - } + struct amdgpu_aca *aca = &adev->aca; + const struct aca_smu_funcs *smu_funcs = aca->smu_funcs; - /* NOTE: the true error code is encoded in status.errorcode[0:7] */ - error_code = ACA_REG__STATUS__ERRORCODE(bank->regs[ACA_REG_IDX_STATUS]); + if (!smu_funcs || !smu_funcs->parse_error_code) + return -EOPNOTSUPP; - return error_code & 0xff; + return smu_funcs->parse_error_code(adev, bank); } int aca_bank_check_error_codes(struct amdgpu_device *adev, struct aca_bank *bank, int *err_codes, int size) @@ -750,6 +773,9 @@ int aca_bank_check_error_codes(struct amdgpu_device *adev, struct aca_bank *bank return -EINVAL; error_code = aca_bank_get_error_code(adev, bank); + if (error_code < 0) + return error_code; + for (i = 0; i < size; i++) { if (err_codes[i] == error_code) return 0; @@ -784,7 +810,7 @@ static int amdgpu_aca_smu_debug_mode_set(void *data, u64 val) return 0; } -static void aca_dump_entry(struct seq_file *m, struct aca_bank *bank, enum aca_error_type type, int idx) +static void aca_dump_entry(struct seq_file *m, struct aca_bank *bank, enum aca_smu_type type, int idx) { struct aca_bank_info info; int i, ret; @@ -793,7 +819,7 @@ static void aca_dump_entry(struct seq_file *m, struct aca_bank *bank, enum aca_e if (ret) return; - seq_printf(m, "aca entry[%d].type: %s\n", idx, type == ACA_ERROR_TYPE_UE ? "UE" : "CE"); + seq_printf(m, "aca entry[%d].type: %s\n", idx, type == ACA_SMU_TYPE_UE ? "UE" : "CE"); seq_printf(m, "aca entry[%d].info: socketid:%d aid:%d hwid:0x%03x mcatype:0x%04x\n", idx, info.socket_id, info.die_id, info.hwid, info.mcatype); @@ -807,7 +833,7 @@ struct aca_dump_context { }; static int handler_aca_bank_dump(struct aca_handle *handle, struct aca_bank *bank, - enum aca_error_type type, void *data) + enum aca_smu_type type, void *data) { struct aca_dump_context *ctx = (struct aca_dump_context *)data; @@ -816,7 +842,7 @@ static int handler_aca_bank_dump(struct aca_handle *handle, struct aca_bank *ban return handler_aca_log_bank_error(handle, bank, type, NULL); } -static int aca_dump_show(struct seq_file *m, enum aca_error_type type) +static int aca_dump_show(struct seq_file *m, enum aca_smu_type type) { struct amdgpu_device *adev = (struct amdgpu_device *)m->private; struct aca_dump_context context = { @@ -824,12 +850,12 @@ static int aca_dump_show(struct seq_file *m, enum aca_error_type type) .idx = 0, }; - return aca_banks_update(adev, type, handler_aca_bank_dump, (void *)&context); + return aca_banks_update(adev, type, handler_aca_bank_dump, NULL, (void *)&context); } static int aca_dump_ce_show(struct seq_file *m, void *unused) { - return aca_dump_show(m, ACA_ERROR_TYPE_CE); + return aca_dump_show(m, ACA_SMU_TYPE_CE); } static int aca_dump_ce_open(struct inode *inode, struct file *file) @@ -847,7 +873,7 @@ static const struct file_operations aca_ce_dump_debug_fops = { static int aca_dump_ue_show(struct seq_file *m, void *unused) { - return aca_dump_show(m, ACA_ERROR_TYPE_UE); + return aca_dump_show(m, ACA_SMU_TYPE_UE); } static int aca_dump_ue_open(struct inode *inode, struct file *file) @@ -869,7 +895,7 @@ DEFINE_DEBUGFS_ATTRIBUTE(aca_debug_mode_fops, NULL, amdgpu_aca_smu_debug_mode_se void amdgpu_aca_smu_debugfs_init(struct amdgpu_device *adev, struct dentry *root) { #if defined(CONFIG_DEBUG_FS) - if (!root || adev->ip_versions[MP1_HWIP][0] != IP_VERSION(13, 0, 6)) + if (!root) return; debugfs_create_file("aca_debug_mode", 0200, root, adev, &aca_debug_mode_fops); diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_aca.h b/drivers/gpu/drm/amd/amdgpu/amdgpu_aca.h index 2da50e095883..5ef6b745f222 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_aca.h +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_aca.h @@ -26,6 +26,9 @@ #include <linux/list.h> +struct ras_err_data; +struct ras_query_context; + #define ACA_MAX_REGS_COUNT (16) #define ACA_REG_FIELD(x, h, l) (((x) & GENMASK_ULL(h, l)) >> l) @@ -99,7 +102,14 @@ enum aca_error_type { ACA_ERROR_TYPE_COUNT }; +enum aca_smu_type { + ACA_SMU_TYPE_UE = 0, + ACA_SMU_TYPE_CE, + ACA_SMU_TYPE_COUNT, +}; + struct aca_bank { + enum aca_smu_type type; u64 regs[ACA_MAX_REGS_COUNT]; }; @@ -115,15 +125,10 @@ struct aca_bank_info { int mcatype; }; -struct aca_bank_report { - struct aca_bank_info info; - u64 count[ACA_ERROR_TYPE_COUNT]; -}; - struct aca_bank_error { struct list_head node; struct aca_bank_info info; - u64 count[ACA_ERROR_TYPE_COUNT]; + u64 count; }; struct aca_error { @@ -157,9 +162,8 @@ struct aca_handle { }; struct aca_bank_ops { - int (*aca_bank_generate_report)(struct aca_handle *handle, struct aca_bank *bank, enum aca_error_type type, - struct aca_bank_report *report, void *data); - bool (*aca_bank_is_valid)(struct aca_handle *handle, struct aca_bank *bank, enum aca_error_type type, + int (*aca_bank_parser)(struct aca_handle *handle, struct aca_bank *bank, enum aca_smu_type type, void *data); + bool (*aca_bank_is_valid)(struct aca_handle *handle, struct aca_bank *bank, enum aca_smu_type type, void *data); }; @@ -167,13 +171,15 @@ struct aca_smu_funcs { int max_ue_bank_count; int max_ce_bank_count; int (*set_debug_mode)(struct amdgpu_device *adev, bool enable); - int (*get_valid_aca_count)(struct amdgpu_device *adev, enum aca_error_type type, u32 *count); - int (*get_valid_aca_bank)(struct amdgpu_device *adev, enum aca_error_type type, int idx, struct aca_bank *bank); + int (*get_valid_aca_count)(struct amdgpu_device *adev, enum aca_smu_type type, u32 *count); + int (*get_valid_aca_bank)(struct amdgpu_device *adev, enum aca_smu_type type, int idx, struct aca_bank *bank); + int (*parse_error_code)(struct amdgpu_device *adev, struct aca_bank *bank); }; struct amdgpu_aca { struct aca_handle_manager mgr; const struct aca_smu_funcs *smu_funcs; + atomic_t ue_update_flag; bool is_enabled; }; @@ -196,7 +202,10 @@ int amdgpu_aca_add_handle(struct amdgpu_device *adev, struct aca_handle *handle, const char *name, const struct aca_info *aca_info, void *data); void amdgpu_aca_remove_handle(struct aca_handle *handle); int amdgpu_aca_get_error_data(struct amdgpu_device *adev, struct aca_handle *handle, - enum aca_error_type type, void *data); + enum aca_error_type type, struct ras_err_data *err_data, + struct ras_query_context *qctx); int amdgpu_aca_smu_set_debug_mode(struct amdgpu_device *adev, bool en); void amdgpu_aca_smu_debugfs_init(struct amdgpu_device *adev, struct dentry *root); +int aca_error_cache_log_bank_error(struct aca_handle *handle, struct aca_bank_info *info, + enum aca_error_type type, u64 count); #endif diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_acp.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_acp.c index 6d72355ac492..bf6c4a0d0525 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_acp.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_acp.c @@ -637,6 +637,8 @@ static const struct amd_ip_funcs acp_ip_funcs = { .soft_reset = acp_soft_reset, .set_clockgating_state = acp_set_clockgating_state, .set_powergating_state = acp_set_powergating_state, + .dump_ip_state = NULL, + .print_ip_state = NULL, }; const struct amdgpu_ip_block_version acp_ip_block = { diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_acpi.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_acpi.c index 7099ff9cf8c5..f85ace0384d2 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_acpi.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_acpi.c @@ -383,6 +383,8 @@ static int amdgpu_atif_query_backlight_caps(struct amdgpu_atif *atif) characteristics.min_input_signal; atif->backlight_caps.max_input_signal = characteristics.max_input_signal; + atif->backlight_caps.ac_level = characteristics.ac_level; + atif->backlight_caps.dc_level = characteristics.dc_level; out: kfree(info); return err; @@ -1268,6 +1270,8 @@ void amdgpu_acpi_get_backlight_caps(struct amdgpu_dm_backlight_caps *caps) caps->caps_valid = atif->backlight_caps.caps_valid; caps->min_input_signal = atif->backlight_caps.min_input_signal; caps->max_input_signal = atif->backlight_caps.max_input_signal; + caps->ac_level = atif->backlight_caps.ac_level; + caps->dc_level = atif->backlight_caps.dc_level; } /** diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_afmt.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_afmt.c index a4d65973bf7c..80771b1480ff 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_afmt.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_afmt.c @@ -100,6 +100,7 @@ struct amdgpu_afmt_acr amdgpu_afmt_acr(uint32_t clock) amdgpu_afmt_calc_cts(clock, &res.cts_32khz, &res.n_32khz, 32000); amdgpu_afmt_calc_cts(clock, &res.cts_44_1khz, &res.n_44_1khz, 44100); amdgpu_afmt_calc_cts(clock, &res.cts_48khz, &res.n_48khz, 48000); + res.clock = clock; return res; } diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd.c index 35dd6effa9a3..03205e3c3746 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd.c @@ -133,6 +133,9 @@ static void amdgpu_amdkfd_reset_work(struct work_struct *work) reset_context.method = AMD_RESET_METHOD_NONE; reset_context.reset_req_dev = adev; + reset_context.src = adev->enable_mes ? + AMDGPU_RESET_SRC_MES : + AMDGPU_RESET_SRC_HWS; clear_bit(AMDGPU_NEED_FULL_RESET, &reset_context.flags); amdgpu_device_gpu_recover(adev, NULL, &reset_context); @@ -261,12 +264,13 @@ int amdgpu_amdkfd_resume(struct amdgpu_device *adev, bool run_pm) return r; } -int amdgpu_amdkfd_pre_reset(struct amdgpu_device *adev) +int amdgpu_amdkfd_pre_reset(struct amdgpu_device *adev, + struct amdgpu_reset_context *reset_context) { int r = 0; if (adev->kfd.dev) - r = kgd2kfd_pre_reset(adev->kfd.dev); + r = kgd2kfd_pre_reset(adev->kfd.dev, reset_context); return r; } @@ -455,6 +459,9 @@ void amdgpu_amdkfd_get_local_mem_info(struct amdgpu_device *adev, else mem_info->local_mem_size_private = KFD_XCP_MEMORY_SIZE(adev, xcp->id); + } else if (adev->flags & AMD_IS_APU) { + mem_info->local_mem_size_public = (ttm_tt_pages_limit() << PAGE_SHIFT); + mem_info->local_mem_size_private = 0; } else { mem_info->local_mem_size_public = adev->gmc.visible_vram_size; mem_info->local_mem_size_private = adev->gmc.real_vram_size - @@ -747,10 +754,17 @@ bool amdgpu_amdkfd_is_fed(struct amdgpu_device *adev) return amdgpu_ras_get_fed_status(adev); } +void amdgpu_amdkfd_ras_pasid_poison_consumption_handler(struct amdgpu_device *adev, + enum amdgpu_ras_block block, uint16_t pasid, + pasid_notify pasid_fn, void *data, uint32_t reset) +{ + amdgpu_umc_pasid_poison_handler(adev, block, pasid, pasid_fn, data, reset); +} + void amdgpu_amdkfd_ras_poison_consumption_handler(struct amdgpu_device *adev, - enum amdgpu_ras_block block, bool reset) + enum amdgpu_ras_block block, uint32_t reset) { - amdgpu_umc_poison_handler(adev, block, reset); + amdgpu_umc_pasid_poison_handler(adev, block, 0, NULL, NULL, reset); } int amdgpu_amdkfd_send_close_event_drain_irq(struct amdgpu_device *adev, @@ -769,12 +783,20 @@ int amdgpu_amdkfd_send_close_event_drain_irq(struct amdgpu_device *adev, return 0; } -bool amdgpu_amdkfd_ras_query_utcl2_poison_status(struct amdgpu_device *adev) +bool amdgpu_amdkfd_ras_query_utcl2_poison_status(struct amdgpu_device *adev, + int hub_inst, int hub_type) { - if (adev->gfx.ras && adev->gfx.ras->query_utcl2_poison_status) - return adev->gfx.ras->query_utcl2_poison_status(adev); - else - return false; + if (!hub_type) { + if (adev->gfxhub.funcs->query_utcl2_poison_status) + return adev->gfxhub.funcs->query_utcl2_poison_status(adev, hub_inst); + else + return false; + } else { + if (adev->mmhub.funcs->query_utcl2_poison_status) + return adev->mmhub.funcs->query_utcl2_poison_status(adev, hub_inst); + else + return false; + } } int amdgpu_amdkfd_check_and_lock_kfd(struct amdgpu_device *adev) @@ -809,6 +831,8 @@ u64 amdgpu_amdkfd_xcp_memory_size(struct amdgpu_device *adev, int xcp_id) } do_div(tmp, adev->xcp_mgr->num_xcp_per_mem_partition); return ALIGN_DOWN(tmp, PAGE_SIZE); + } else if (adev->flags & AMD_IS_APU) { + return (ttm_tt_pages_limit() << PAGE_SHIFT); } else { return adev->gmc.real_vram_size; } diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd.h b/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd.h index 0ef223c2affb..e7bb1ca35801 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd.h +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd.h @@ -47,6 +47,7 @@ enum TLB_FLUSH_TYPE { }; struct amdgpu_device; +struct amdgpu_reset_context; enum kfd_mem_attachment_type { KFD_MEM_ATT_SHARED, /* Share kgd_mem->bo or another attachment's */ @@ -170,7 +171,8 @@ bool amdgpu_amdkfd_have_atomics_support(struct amdgpu_device *adev); bool amdgpu_amdkfd_is_kfd_vmid(struct amdgpu_device *adev, u32 vmid); -int amdgpu_amdkfd_pre_reset(struct amdgpu_device *adev); +int amdgpu_amdkfd_pre_reset(struct amdgpu_device *adev, + struct amdgpu_reset_context *reset_context); int amdgpu_amdkfd_post_reset(struct amdgpu_device *adev); @@ -336,12 +338,18 @@ void amdgpu_amdkfd_debug_mem_fence(struct amdgpu_device *adev); int amdgpu_amdkfd_get_tile_config(struct amdgpu_device *adev, struct tile_config *config); void amdgpu_amdkfd_ras_poison_consumption_handler(struct amdgpu_device *adev, - enum amdgpu_ras_block block, bool reset); + enum amdgpu_ras_block block, uint32_t reset); + +void amdgpu_amdkfd_ras_pasid_poison_consumption_handler(struct amdgpu_device *adev, + enum amdgpu_ras_block block, uint16_t pasid, + pasid_notify pasid_fn, void *data, uint32_t reset); + bool amdgpu_amdkfd_is_fed(struct amdgpu_device *adev); bool amdgpu_amdkfd_bo_mapped_to_dev(struct amdgpu_device *adev, struct kgd_mem *mem); void amdgpu_amdkfd_block_mmu_notifications(void *p); int amdgpu_amdkfd_criu_resume(void *p); -bool amdgpu_amdkfd_ras_query_utcl2_poison_status(struct amdgpu_device *adev); +bool amdgpu_amdkfd_ras_query_utcl2_poison_status(struct amdgpu_device *adev, + int hub_inst, int hub_type); int amdgpu_amdkfd_reserve_mem_limit(struct amdgpu_device *adev, uint64_t size, u32 alloc_flag, int8_t xcp_id); void amdgpu_amdkfd_unreserve_mem_limit(struct amdgpu_device *adev, @@ -410,7 +418,8 @@ bool kgd2kfd_device_init(struct kfd_dev *kfd, void kgd2kfd_device_exit(struct kfd_dev *kfd); void kgd2kfd_suspend(struct kfd_dev *kfd, bool run_pm); int kgd2kfd_resume(struct kfd_dev *kfd, bool run_pm); -int kgd2kfd_pre_reset(struct kfd_dev *kfd); +int kgd2kfd_pre_reset(struct kfd_dev *kfd, + struct amdgpu_reset_context *reset_context); int kgd2kfd_post_reset(struct kfd_dev *kfd); void kgd2kfd_interrupt(struct kfd_dev *kfd, const void *ih_ring_entry); void kgd2kfd_set_sram_ecc_flag(struct kfd_dev *kfd); @@ -453,7 +462,8 @@ static inline int kgd2kfd_resume(struct kfd_dev *kfd, bool run_pm) return 0; } -static inline int kgd2kfd_pre_reset(struct kfd_dev *kfd) +static inline int kgd2kfd_pre_reset(struct kfd_dev *kfd, + struct amdgpu_reset_context *reset_context) { return 0; } diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_gfx_v10.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_gfx_v10.c index 69810b3f1c63..3ab6c3aa0ad1 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_gfx_v10.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_gfx_v10.c @@ -881,6 +881,7 @@ uint32_t kgd_gfx_v10_set_wave_launch_mode(struct amdgpu_device *adev, } #define TCP_WATCH_STRIDE (mmTCP_WATCH1_ADDR_H - mmTCP_WATCH0_ADDR_H) +#define SQ_WATCH_STRIDE (mmSQ_WATCH1_ADDR_H - mmSQ_WATCH0_ADDR_H) uint32_t kgd_gfx_v10_set_address_watch(struct amdgpu_device *adev, uint64_t watch_address, uint32_t watch_address_mask, @@ -889,55 +890,93 @@ uint32_t kgd_gfx_v10_set_address_watch(struct amdgpu_device *adev, uint32_t debug_vmid, uint32_t inst) { + /* SQ_WATCH?_ADDR_* and TCP_WATCH?_ADDR_* are programmed with the + * same values. + */ uint32_t watch_address_high; uint32_t watch_address_low; - uint32_t watch_address_cntl; - - watch_address_cntl = 0; + uint32_t tcp_watch_address_cntl; + uint32_t sq_watch_address_cntl; watch_address_low = lower_32_bits(watch_address); watch_address_high = upper_32_bits(watch_address) & 0xffff; - watch_address_cntl = REG_SET_FIELD(watch_address_cntl, + tcp_watch_address_cntl = 0; + tcp_watch_address_cntl = REG_SET_FIELD(tcp_watch_address_cntl, TCP_WATCH0_CNTL, VMID, debug_vmid); - watch_address_cntl = REG_SET_FIELD(watch_address_cntl, + tcp_watch_address_cntl = REG_SET_FIELD(tcp_watch_address_cntl, TCP_WATCH0_CNTL, MODE, watch_mode); - watch_address_cntl = REG_SET_FIELD(watch_address_cntl, + tcp_watch_address_cntl = REG_SET_FIELD(tcp_watch_address_cntl, TCP_WATCH0_CNTL, MASK, watch_address_mask >> 7); + sq_watch_address_cntl = 0; + sq_watch_address_cntl = REG_SET_FIELD(sq_watch_address_cntl, + SQ_WATCH0_CNTL, + VMID, + debug_vmid); + sq_watch_address_cntl = REG_SET_FIELD(sq_watch_address_cntl, + SQ_WATCH0_CNTL, + MODE, + watch_mode); + sq_watch_address_cntl = REG_SET_FIELD(sq_watch_address_cntl, + SQ_WATCH0_CNTL, + MASK, + watch_address_mask >> 6); + /* Turning off this watch point until we set all the registers */ - watch_address_cntl = REG_SET_FIELD(watch_address_cntl, + tcp_watch_address_cntl = REG_SET_FIELD(tcp_watch_address_cntl, TCP_WATCH0_CNTL, VALID, 0); - WREG32((SOC15_REG_OFFSET(GC, 0, mmTCP_WATCH0_CNTL) + (watch_id * TCP_WATCH_STRIDE)), - watch_address_cntl); + tcp_watch_address_cntl); + + sq_watch_address_cntl = REG_SET_FIELD(sq_watch_address_cntl, + SQ_WATCH0_CNTL, + VALID, + 0); + WREG32((SOC15_REG_OFFSET(GC, 0, mmSQ_WATCH0_CNTL) + + (watch_id * SQ_WATCH_STRIDE)), + sq_watch_address_cntl); + /* Program {TCP,SQ}_WATCH?_ADDR* */ WREG32((SOC15_REG_OFFSET(GC, 0, mmTCP_WATCH0_ADDR_H) + (watch_id * TCP_WATCH_STRIDE)), watch_address_high); - WREG32((SOC15_REG_OFFSET(GC, 0, mmTCP_WATCH0_ADDR_L) + (watch_id * TCP_WATCH_STRIDE)), watch_address_low); + WREG32((SOC15_REG_OFFSET(GC, 0, mmSQ_WATCH0_ADDR_H) + + (watch_id * SQ_WATCH_STRIDE)), + watch_address_high); + WREG32((SOC15_REG_OFFSET(GC, 0, mmSQ_WATCH0_ADDR_L) + + (watch_id * SQ_WATCH_STRIDE)), + watch_address_low); + /* Enable the watch point */ - watch_address_cntl = REG_SET_FIELD(watch_address_cntl, + tcp_watch_address_cntl = REG_SET_FIELD(tcp_watch_address_cntl, TCP_WATCH0_CNTL, VALID, 1); - WREG32((SOC15_REG_OFFSET(GC, 0, mmTCP_WATCH0_CNTL) + (watch_id * TCP_WATCH_STRIDE)), - watch_address_cntl); + tcp_watch_address_cntl); + + sq_watch_address_cntl = REG_SET_FIELD(sq_watch_address_cntl, + SQ_WATCH0_CNTL, + VALID, + 1); + WREG32((SOC15_REG_OFFSET(GC, 0, mmSQ_WATCH0_CNTL) + + (watch_id * SQ_WATCH_STRIDE)), + sq_watch_address_cntl); return 0; } @@ -953,8 +992,14 @@ uint32_t kgd_gfx_v10_clear_address_watch(struct amdgpu_device *adev, (watch_id * TCP_WATCH_STRIDE)), watch_address_cntl); + WREG32((SOC15_REG_OFFSET(GC, 0, mmSQ_WATCH0_CNTL) + + (watch_id * SQ_WATCH_STRIDE)), + watch_address_cntl); + return 0; } +#undef TCP_WATCH_STRIDE +#undef SQ_WATCH_STRIDE /* kgd_gfx_v10_get_iq_wait_times: Returns the mmCP_IQ_WAIT_TIME1/2 values diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_gfx_v12.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_gfx_v12.c new file mode 100644 index 000000000000..0dfe7093bd8a --- /dev/null +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_gfx_v12.c @@ -0,0 +1,377 @@ +/* + * Copyright 2023 Advanced Micro Devices, Inc. + * + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and associated documentation files (the "Software"), + * to deal in the Software without restriction, including without limitation + * the rights to use, copy, modify, merge, publish, distribute, sublicense, + * and/or sell copies of the Software, and to permit persons to whom the + * Software is furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included in + * all copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL + * THE COPYRIGHT HOLDER(S) OR AUTHOR(S) BE LIABLE FOR ANY CLAIM, DAMAGES OR + * OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, + * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR + * OTHER DEALINGS IN THE SOFTWARE. + */ + +#include "amdgpu.h" +#include "amdgpu_amdkfd.h" +#include "gc/gc_12_0_0_offset.h" +#include "gc/gc_12_0_0_sh_mask.h" +#include "soc24.h" +#include <uapi/linux/kfd_ioctl.h> + +static void lock_srbm(struct amdgpu_device *adev, uint32_t mec, uint32_t pipe, + uint32_t queue, uint32_t vmid) +{ + mutex_lock(&adev->srbm_mutex); + soc24_grbm_select(adev, mec, pipe, queue, vmid); +} + +static void unlock_srbm(struct amdgpu_device *adev) +{ + soc24_grbm_select(adev, 0, 0, 0, 0); + mutex_unlock(&adev->srbm_mutex); +} + +static void acquire_queue(struct amdgpu_device *adev, uint32_t pipe_id, + uint32_t queue_id) +{ + uint32_t mec = (pipe_id / adev->gfx.mec.num_pipe_per_mec) + 1; + uint32_t pipe = (pipe_id % adev->gfx.mec.num_pipe_per_mec); + + lock_srbm(adev, mec, pipe, queue_id, 0); +} + +static void release_queue(struct amdgpu_device *adev) +{ + unlock_srbm(adev); +} + +static int init_interrupts_v12(struct amdgpu_device *adev, uint32_t pipe_id, uint32_t inst) +{ + uint32_t mec; + uint32_t pipe; + + mec = (pipe_id / adev->gfx.mec.num_pipe_per_mec) + 1; + pipe = (pipe_id % adev->gfx.mec.num_pipe_per_mec); + + lock_srbm(adev, mec, pipe, 0, 0); + + WREG32_SOC15(GC, 0, regCPC_INT_CNTL, + CP_INT_CNTL_RING0__TIME_STAMP_INT_ENABLE_MASK | + CP_INT_CNTL_RING0__OPCODE_ERROR_INT_ENABLE_MASK); + + unlock_srbm(adev); + + return 0; +} + +static uint32_t get_sdma_rlc_reg_offset(struct amdgpu_device *adev, + unsigned int engine_id, + unsigned int queue_id) +{ + uint32_t sdma_engine_reg_base = 0; + uint32_t sdma_rlc_reg_offset; + + switch (engine_id) { + case 0: + sdma_engine_reg_base = SOC15_REG_OFFSET(SDMA0, 0, + regSDMA0_QUEUE0_RB_CNTL) - regSDMA0_QUEUE0_RB_CNTL; + break; + case 1: + sdma_engine_reg_base = SOC15_REG_OFFSET(SDMA1, 0, + regSDMA1_QUEUE0_RB_CNTL) - regSDMA0_QUEUE0_RB_CNTL; + break; + default: + BUG(); + } + + sdma_rlc_reg_offset = sdma_engine_reg_base + + queue_id * (regSDMA0_QUEUE1_RB_CNTL - regSDMA0_QUEUE0_RB_CNTL); + + pr_debug("RLC register offset for SDMA%d RLC%d: 0x%x\n", engine_id, + queue_id, sdma_rlc_reg_offset); + + return sdma_rlc_reg_offset; +} + +static int hqd_dump_v12(struct amdgpu_device *adev, + uint32_t pipe_id, uint32_t queue_id, + uint32_t (**dump)[2], uint32_t *n_regs, uint32_t inst) +{ + uint32_t i = 0, reg; +#define HQD_N_REGS 56 +#define DUMP_REG(addr) do { \ + if (WARN_ON_ONCE(i >= HQD_N_REGS)) \ + break; \ + (*dump)[i][0] = (addr) << 2; \ + (*dump)[i++][1] = RREG32(addr); \ + } while (0) + + *dump = kmalloc(HQD_N_REGS*2*sizeof(uint32_t), GFP_KERNEL); + if (*dump == NULL) + return -ENOMEM; + + acquire_queue(adev, pipe_id, queue_id); + + for (reg = SOC15_REG_OFFSET(GC, 0, regCP_MQD_BASE_ADDR); + reg <= SOC15_REG_OFFSET(GC, 0, regCP_HQD_PQ_WPTR_HI); reg++) + DUMP_REG(reg); + + release_queue(adev); + + WARN_ON_ONCE(i != HQD_N_REGS); + *n_regs = i; + + return 0; +} + +static int hqd_sdma_dump_v12(struct amdgpu_device *adev, + uint32_t engine_id, uint32_t queue_id, + uint32_t (**dump)[2], uint32_t *n_regs) +{ + uint32_t sdma_rlc_reg_offset = get_sdma_rlc_reg_offset(adev, + engine_id, queue_id); + uint32_t i = 0, reg; + + const uint32_t first_reg = regSDMA0_QUEUE0_RB_CNTL; + const uint32_t last_reg = regSDMA0_QUEUE0_CONTEXT_STATUS; +#undef HQD_N_REGS +#define HQD_N_REGS (last_reg - first_reg + 1) + + *dump = kmalloc(HQD_N_REGS*2*sizeof(uint32_t), GFP_KERNEL); + if (*dump == NULL) + return -ENOMEM; + + for (reg = first_reg; + reg <= last_reg; reg++) + DUMP_REG(sdma_rlc_reg_offset + reg); + + WARN_ON_ONCE(i != HQD_N_REGS); + *n_regs = i; + + return 0; +} + +static int wave_control_execute_v12(struct amdgpu_device *adev, + uint32_t gfx_index_val, + uint32_t sq_cmd, uint32_t inst) +{ + uint32_t data = 0; + + mutex_lock(&adev->grbm_idx_mutex); + + WREG32(SOC15_REG_OFFSET(GC, 0, regGRBM_GFX_INDEX), gfx_index_val); + WREG32(SOC15_REG_OFFSET(GC, 0, regSQ_CMD), sq_cmd); + + data = REG_SET_FIELD(data, GRBM_GFX_INDEX, + INSTANCE_BROADCAST_WRITES, 1); + data = REG_SET_FIELD(data, GRBM_GFX_INDEX, + SA_BROADCAST_WRITES, 1); + data = REG_SET_FIELD(data, GRBM_GFX_INDEX, + SE_BROADCAST_WRITES, 1); + + WREG32(SOC15_REG_OFFSET(GC, 0, regGRBM_GFX_INDEX), data); + mutex_unlock(&adev->grbm_idx_mutex); + + return 0; +} + +/* returns TRAP_EN, EXCP_EN and EXCP_REPLACE. */ +static uint32_t kgd_gfx_v12_enable_debug_trap(struct amdgpu_device *adev, + bool restore_dbg_registers, + uint32_t vmid) +{ + uint32_t data = 0; + + data = REG_SET_FIELD(data, SPI_GDBG_PER_VMID_CNTL, TRAP_EN, 1); + data = REG_SET_FIELD(data, SPI_GDBG_PER_VMID_CNTL, EXCP_EN, 0); + data = REG_SET_FIELD(data, SPI_GDBG_PER_VMID_CNTL, EXCP_REPLACE, 0); + + return data; +} + +/* returns TRAP_EN, EXCP_EN and EXCP_REPLACE. */ +static uint32_t kgd_gfx_v12_disable_debug_trap(struct amdgpu_device *adev, + bool keep_trap_enabled, + uint32_t vmid) +{ + uint32_t data = 0; + + data = REG_SET_FIELD(data, SPI_GDBG_PER_VMID_CNTL, TRAP_EN, 1); + data = REG_SET_FIELD(data, SPI_GDBG_PER_VMID_CNTL, EXCP_EN, 0); + data = REG_SET_FIELD(data, SPI_GDBG_PER_VMID_CNTL, EXCP_REPLACE, 0); + + return data; +} + +static int kgd_gfx_v12_validate_trap_override_request(struct amdgpu_device *adev, + uint32_t trap_override, + uint32_t *trap_mask_supported) +{ + *trap_mask_supported &= KFD_DBG_TRAP_MASK_FP_INVALID | + KFD_DBG_TRAP_MASK_FP_INPUT_DENORMAL | + KFD_DBG_TRAP_MASK_FP_DIVIDE_BY_ZERO | + KFD_DBG_TRAP_MASK_FP_OVERFLOW | + KFD_DBG_TRAP_MASK_FP_UNDERFLOW | + KFD_DBG_TRAP_MASK_FP_INEXACT | + KFD_DBG_TRAP_MASK_INT_DIVIDE_BY_ZERO | + KFD_DBG_TRAP_MASK_DBG_ADDRESS_WATCH | + KFD_DBG_TRAP_MASK_DBG_MEMORY_VIOLATION | + KFD_DBG_TRAP_MASK_TRAP_ON_WAVE_START | + KFD_DBG_TRAP_MASK_TRAP_ON_WAVE_END; + + + if (trap_override != KFD_DBG_TRAP_OVERRIDE_OR && + trap_override != KFD_DBG_TRAP_OVERRIDE_REPLACE) + return -EPERM; + + return 0; +} + +static uint32_t trap_mask_map_sw_to_hw(uint32_t mask) +{ + uint32_t trap_on_start = (mask & KFD_DBG_TRAP_MASK_TRAP_ON_WAVE_START) ? 1 : 0; + uint32_t trap_on_end = (mask & KFD_DBG_TRAP_MASK_TRAP_ON_WAVE_END) ? 1 : 0; + uint32_t excp_en = mask & (KFD_DBG_TRAP_MASK_FP_INVALID | + KFD_DBG_TRAP_MASK_FP_INPUT_DENORMAL | + KFD_DBG_TRAP_MASK_FP_DIVIDE_BY_ZERO | + KFD_DBG_TRAP_MASK_FP_OVERFLOW | + KFD_DBG_TRAP_MASK_FP_UNDERFLOW | + KFD_DBG_TRAP_MASK_FP_INEXACT | + KFD_DBG_TRAP_MASK_INT_DIVIDE_BY_ZERO | + KFD_DBG_TRAP_MASK_DBG_ADDRESS_WATCH | + KFD_DBG_TRAP_MASK_DBG_MEMORY_VIOLATION); + uint32_t ret; + + ret = REG_SET_FIELD(0, SPI_GDBG_PER_VMID_CNTL, EXCP_EN, excp_en); + ret = REG_SET_FIELD(ret, SPI_GDBG_PER_VMID_CNTL, TRAP_ON_START, trap_on_start); + ret = REG_SET_FIELD(ret, SPI_GDBG_PER_VMID_CNTL, TRAP_ON_END, trap_on_end); + + return ret; +} + +static uint32_t trap_mask_map_hw_to_sw(uint32_t mask) +{ + uint32_t ret = REG_GET_FIELD(mask, SPI_GDBG_PER_VMID_CNTL, EXCP_EN); + + if (REG_GET_FIELD(mask, SPI_GDBG_PER_VMID_CNTL, TRAP_ON_START)) + ret |= KFD_DBG_TRAP_MASK_TRAP_ON_WAVE_START; + + if (REG_GET_FIELD(mask, SPI_GDBG_PER_VMID_CNTL, TRAP_ON_END)) + ret |= KFD_DBG_TRAP_MASK_TRAP_ON_WAVE_END; + + return ret; +} + +/* returns TRAP_EN, EXCP_EN and EXCP_REPLACE. */ +static uint32_t kgd_gfx_v12_set_wave_launch_trap_override(struct amdgpu_device *adev, + uint32_t vmid, + uint32_t trap_override, + uint32_t trap_mask_bits, + uint32_t trap_mask_request, + uint32_t *trap_mask_prev, + uint32_t kfd_dbg_trap_cntl_prev) + +{ + uint32_t data = 0; + + *trap_mask_prev = trap_mask_map_hw_to_sw(kfd_dbg_trap_cntl_prev); + + data = (trap_mask_bits & trap_mask_request) | (*trap_mask_prev & ~trap_mask_request); + data = trap_mask_map_sw_to_hw(data); + + data = REG_SET_FIELD(data, SPI_GDBG_PER_VMID_CNTL, TRAP_EN, 1); + data = REG_SET_FIELD(data, SPI_GDBG_PER_VMID_CNTL, EXCP_REPLACE, trap_override); + + return data; +} + +/* returns STALL_VMID or LAUNCH_MODE. */ +static uint32_t kgd_gfx_v12_set_wave_launch_mode(struct amdgpu_device *adev, + uint8_t wave_launch_mode, + uint32_t vmid) +{ + uint32_t data = 0; + bool is_stall_mode = wave_launch_mode == 4; + + if (is_stall_mode) + data = REG_SET_FIELD(data, SPI_GDBG_PER_VMID_CNTL, STALL_VMID, + 1); + else + data = REG_SET_FIELD(data, SPI_GDBG_PER_VMID_CNTL, LAUNCH_MODE, + wave_launch_mode); + + return data; +} + +#define TCP_WATCH_STRIDE (regTCP_WATCH1_ADDR_H - regTCP_WATCH0_ADDR_H) +static uint32_t kgd_gfx_v12_set_address_watch(struct amdgpu_device *adev, + uint64_t watch_address, + uint32_t watch_address_mask, + uint32_t watch_id, + uint32_t watch_mode, + uint32_t debug_vmid, + uint32_t inst) +{ + uint32_t watch_address_high; + uint32_t watch_address_low; + uint32_t watch_address_cntl; + + watch_address_cntl = 0; + watch_address_low = lower_32_bits(watch_address); + watch_address_high = upper_32_bits(watch_address) & 0xffff; + + watch_address_cntl = REG_SET_FIELD(watch_address_cntl, + TCP_WATCH0_CNTL, + MODE, + watch_mode); + + watch_address_cntl = REG_SET_FIELD(watch_address_cntl, + TCP_WATCH0_CNTL, + MASK, + watch_address_mask >> 7); + + watch_address_cntl = REG_SET_FIELD(watch_address_cntl, + TCP_WATCH0_CNTL, + VALID, + 1); + + WREG32_RLC((SOC15_REG_OFFSET(GC, 0, regTCP_WATCH0_ADDR_H) + + (watch_id * TCP_WATCH_STRIDE)), + watch_address_high); + + WREG32_RLC((SOC15_REG_OFFSET(GC, 0, regTCP_WATCH0_ADDR_L) + + (watch_id * TCP_WATCH_STRIDE)), + watch_address_low); + + return watch_address_cntl; +} + +static uint32_t kgd_gfx_v12_clear_address_watch(struct amdgpu_device *adev, + uint32_t watch_id) +{ + return 0; +} + +const struct kfd2kgd_calls gfx_v12_kfd2kgd = { + .init_interrupts = init_interrupts_v12, + .hqd_dump = hqd_dump_v12, + .hqd_sdma_dump = hqd_sdma_dump_v12, + .wave_control_execute = wave_control_execute_v12, + .get_atc_vmid_pasid_mapping_info = NULL, + .enable_debug_trap = kgd_gfx_v12_enable_debug_trap, + .disable_debug_trap = kgd_gfx_v12_disable_debug_trap, + .validate_trap_override_request = kgd_gfx_v12_validate_trap_override_request, + .set_wave_launch_trap_override = kgd_gfx_v12_set_wave_launch_trap_override, + .set_wave_launch_mode = kgd_gfx_v12_set_wave_launch_mode, + .set_address_watch = kgd_gfx_v12_set_address_watch, + .clear_address_watch = kgd_gfx_v12_clear_address_watch, +}; diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_gpuvm.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_gpuvm.c index df58a6a1a67e..11672bfe4fad 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_gpuvm.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_gpuvm.c @@ -172,6 +172,8 @@ int amdgpu_amdkfd_reserve_mem_limit(struct amdgpu_device *adev, { uint64_t reserved_for_pt = ESTIMATE_PT_SIZE(amdgpu_amdkfd_total_mem_size); + struct amdgpu_ras *con = amdgpu_ras_get_context(adev); + uint64_t reserved_for_ras = (con ? con->reserved_pages_in_bytes : 0); size_t system_mem_needed, ttm_mem_needed, vram_needed; int ret = 0; uint64_t vram_size = 0; @@ -196,7 +198,7 @@ int amdgpu_amdkfd_reserve_mem_limit(struct amdgpu_device *adev, return -EINVAL; vram_size = KFD_XCP_MEMORY_SIZE(adev, xcp_id); - if (adev->gmc.is_app_apu) { + if (adev->flags & AMD_IS_APU) { system_mem_needed = size; ttm_mem_needed = size; } @@ -220,7 +222,7 @@ int amdgpu_amdkfd_reserve_mem_limit(struct amdgpu_device *adev, (kfd_mem_limit.ttm_mem_used + ttm_mem_needed > kfd_mem_limit.max_ttm_mem_limit) || (adev && xcp_id >= 0 && adev->kfd.vram_used[xcp_id] + vram_needed > - vram_size - reserved_for_pt)) { + vram_size - reserved_for_pt - reserved_for_ras - atomic64_read(&adev->vram_pin_size))) { ret = -ENOMEM; goto release; } @@ -232,7 +234,8 @@ int amdgpu_amdkfd_reserve_mem_limit(struct amdgpu_device *adev, "adev reference can't be null when vram is used"); if (adev && xcp_id >= 0) { adev->kfd.vram_used[xcp_id] += vram_needed; - adev->kfd.vram_used_aligned[xcp_id] += adev->gmc.is_app_apu ? + adev->kfd.vram_used_aligned[xcp_id] += + (adev->flags & AMD_IS_APU) ? vram_needed : ALIGN(vram_needed, VRAM_AVAILABLITY_ALIGN); } @@ -260,7 +263,7 @@ void amdgpu_amdkfd_unreserve_mem_limit(struct amdgpu_device *adev, if (adev) { adev->kfd.vram_used[xcp_id] -= size; - if (adev->gmc.is_app_apu) { + if (adev->flags & AMD_IS_APU) { adev->kfd.vram_used_aligned[xcp_id] -= size; kfd_mem_limit.system_mem_used -= size; kfd_mem_limit.ttm_mem_used -= size; @@ -414,6 +417,10 @@ static int amdgpu_amdkfd_bo_validate(struct amdgpu_bo *bo, uint32_t domain, "Called with userptr BO")) return -EINVAL; + /* bo has been pinned, not need validate it */ + if (bo->tbo.pin_count) + return 0; + amdgpu_bo_placement_from_domain(bo, domain); ret = ttm_bo_validate(&bo->tbo, &bo->placement, &ctx); @@ -889,7 +896,7 @@ static int kfd_mem_attach(struct amdgpu_device *adev, struct kgd_mem *mem, * if peer device has large BAR. In contrast, access over xGMI is * allowed for both small and large BAR configurations of peer device */ - if ((adev != bo_adev && !adev->gmc.is_app_apu) && + if ((adev != bo_adev && !(adev->flags & AMD_IS_APU)) && ((mem->domain == AMDGPU_GEM_DOMAIN_VRAM) || (mem->alloc_flags & KFD_IOC_ALLOC_MEM_FLAGS_DOORBELL) || (mem->alloc_flags & KFD_IOC_ALLOC_MEM_FLAGS_MMIO_REMAP))) { @@ -1087,7 +1094,10 @@ static int init_user_pages(struct kgd_mem *mem, uint64_t user_addr, ret = amdgpu_ttm_tt_get_user_pages(bo, bo->tbo.ttm->pages, &range); if (ret) { - pr_err("%s: Failed to get user pages: %d\n", __func__, ret); + if (ret == -EAGAIN) + pr_debug("Failed to get user pages, try again\n"); + else + pr_err("%s: Failed to get user pages: %d\n", __func__, ret); goto unregister_out; } @@ -1188,7 +1198,8 @@ static int reserve_bo_and_cond_vms(struct kgd_mem *mem, int ret; ctx->sync = &mem->sync; - drm_exec_init(&ctx->exec, DRM_EXEC_INTERRUPTIBLE_WAIT, 0); + drm_exec_init(&ctx->exec, DRM_EXEC_INTERRUPTIBLE_WAIT | + DRM_EXEC_IGNORE_DUPLICATES, 0); drm_exec_until_all_locked(&ctx->exec) { ctx->n_vms = 0; list_for_each_entry(entry, &mem->attachments, list) { @@ -1470,13 +1481,30 @@ static int amdgpu_amdkfd_gpuvm_pin_bo(struct amdgpu_bo *bo, u32 domain) if (unlikely(ret)) return ret; + if (bo->flags & AMDGPU_GEM_CREATE_VRAM_CONTIGUOUS) { + /* + * If bo is not contiguous on VRAM, move to system memory first to ensure + * we can get contiguous VRAM space after evicting other BOs. + */ + if (!(bo->tbo.resource->placement & TTM_PL_FLAG_CONTIGUOUS)) { + struct ttm_operation_ctx ctx = { true, false }; + + amdgpu_bo_placement_from_domain(bo, AMDGPU_GEM_DOMAIN_GTT); + ret = ttm_bo_validate(&bo->tbo, &bo->placement, &ctx); + if (unlikely(ret)) { + pr_debug("validate bo 0x%p to GTT failed %d\n", &bo->tbo, ret); + goto out; + } + } + } + ret = amdgpu_bo_pin_restricted(bo, domain, 0, 0); if (ret) pr_err("Error in Pinning BO to domain: %d\n", domain); amdgpu_bo_sync_wait(bo, AMDGPU_FENCE_OWNER_KFD, false); +out: amdgpu_bo_unreserve(bo); - return ret; } @@ -1647,6 +1675,8 @@ size_t amdgpu_amdkfd_get_available_memory(struct amdgpu_device *adev, { uint64_t reserved_for_pt = ESTIMATE_PT_SIZE(amdgpu_amdkfd_total_mem_size); + struct amdgpu_ras *con = amdgpu_ras_get_context(adev); + uint64_t reserved_for_ras = (con ? con->reserved_pages_in_bytes : 0); ssize_t available; uint64_t vram_available, system_mem_available, ttm_mem_available; @@ -1654,9 +1684,10 @@ size_t amdgpu_amdkfd_get_available_memory(struct amdgpu_device *adev, vram_available = KFD_XCP_MEMORY_SIZE(adev, xcp_id) - adev->kfd.vram_used_aligned[xcp_id] - atomic64_read(&adev->vram_pin_size) - - reserved_for_pt; + - reserved_for_pt + - reserved_for_ras; - if (adev->gmc.is_app_apu) { + if (adev->flags & AMD_IS_APU) { system_mem_available = no_system_mem_limit ? kfd_mem_limit.max_system_mem_limit : kfd_mem_limit.max_system_mem_limit - @@ -1704,7 +1735,7 @@ int amdgpu_amdkfd_gpuvm_alloc_memory_of_gpu( if (flags & KFD_IOC_ALLOC_MEM_FLAGS_VRAM) { domain = alloc_domain = AMDGPU_GEM_DOMAIN_VRAM; - if (adev->gmc.is_app_apu) { + if (adev->flags & AMD_IS_APU) { domain = AMDGPU_GEM_DOMAIN_GTT; alloc_domain = AMDGPU_GEM_DOMAIN_GTT; alloc_flags = 0; @@ -1712,6 +1743,10 @@ int amdgpu_amdkfd_gpuvm_alloc_memory_of_gpu( alloc_flags = AMDGPU_GEM_CREATE_VRAM_WIPE_ON_RELEASE; alloc_flags |= (flags & KFD_IOC_ALLOC_MEM_FLAGS_PUBLIC) ? AMDGPU_GEM_CREATE_CPU_ACCESS_REQUIRED : 0; + + /* For contiguous VRAM allocation */ + if (flags & KFD_IOC_ALLOC_MEM_FLAGS_CONTIGUOUS) + alloc_flags |= AMDGPU_GEM_CREATE_VRAM_CONTIGUOUS; } xcp_id = fpriv->xcp_id == AMDGPU_XCP_NO_PARTITION ? 0 : fpriv->xcp_id; @@ -1854,6 +1889,7 @@ err_node_allow: err_bo_create: amdgpu_amdkfd_unreserve_mem_limit(adev, aligned_size, flags, xcp_id); err_reserve_limit: + amdgpu_sync_free(&(*mem)->sync); mutex_destroy(&(*mem)->lock); if (gobj) drm_gem_object_put(gobj); @@ -1950,7 +1986,7 @@ int amdgpu_amdkfd_gpuvm_free_memory_of_gpu( if (size) { if (!is_imported && (mem->bo->preferred_domains == AMDGPU_GEM_DOMAIN_VRAM || - (adev->gmc.is_app_apu && + ((adev->flags & AMD_IS_APU) && mem->bo->preferred_domains == AMDGPU_GEM_DOMAIN_GTT))) *size = bo_size; else @@ -2372,8 +2408,9 @@ static int import_obj_create(struct amdgpu_device *adev, (*mem)->dmabuf = dma_buf; (*mem)->bo = bo; (*mem)->va = va; - (*mem)->domain = (bo->preferred_domains & AMDGPU_GEM_DOMAIN_VRAM) && !adev->gmc.is_app_apu ? - AMDGPU_GEM_DOMAIN_VRAM : AMDGPU_GEM_DOMAIN_GTT; + (*mem)->domain = (bo->preferred_domains & AMDGPU_GEM_DOMAIN_VRAM) && + !(adev->flags & AMD_IS_APU) ? + AMDGPU_GEM_DOMAIN_VRAM : AMDGPU_GEM_DOMAIN_GTT; (*mem)->mapped_to_gpu_memory = 0; (*mem)->process_info = avm->process_info; @@ -2708,7 +2745,7 @@ static int confirm_valid_user_pages_locked(struct amdkfd_process_info *process_i /* keep mem without hmm range at userptr_inval_list */ if (!mem->range) - continue; + continue; /* Only check mem with hmm range associated */ valid = amdgpu_ttm_tt_get_user_pages_done( @@ -2900,13 +2937,12 @@ int amdgpu_amdkfd_gpuvm_restore_process_bos(void *info, struct dma_fence __rcu * amdgpu_sync_create(&sync_obj); - /* Validate BOs and map them to GPUVM (update VM page tables). */ + /* Validate BOs managed by KFD */ list_for_each_entry(mem, &process_info->kfd_bo_list, validate_list) { struct amdgpu_bo *bo = mem->bo; uint32_t domain = mem->domain; - struct kfd_mem_attachment *attachment; struct dma_resv_iter cursor; struct dma_fence *fence; @@ -2931,20 +2967,6 @@ int amdgpu_amdkfd_gpuvm_restore_process_bos(void *info, struct dma_fence __rcu * goto validate_map_fail; } } - list_for_each_entry(attachment, &mem->attachments, list) { - if (!attachment->is_mapped) - continue; - - if (attachment->bo_va->base.bo->tbo.pin_count) - continue; - - kfd_mem_dmaunmap_attachment(mem, attachment); - ret = update_gpuvm_pte(mem, attachment, &sync_obj); - if (ret) { - pr_debug("Memory eviction: update PTE failed. Try again\n"); - goto validate_map_fail; - } - } } if (failed_size) @@ -2959,6 +2981,24 @@ int amdgpu_amdkfd_gpuvm_restore_process_bos(void *info, struct dma_fence __rcu * goto validate_map_fail; } + /* Update mappings managed by KFD. */ + list_for_each_entry(mem, &process_info->kfd_bo_list, + validate_list) { + struct kfd_mem_attachment *attachment; + + list_for_each_entry(attachment, &mem->attachments, list) { + if (!attachment->is_mapped) + continue; + + kfd_mem_dmaunmap_attachment(mem, attachment); + ret = update_gpuvm_pte(mem, attachment, &sync_obj); + if (ret) { + pr_debug("Memory eviction: update PTE failed. Try again\n"); + goto validate_map_fail; + } + } + } + /* Update mappings not managed by KFD */ list_for_each_entry(peer_vm, &process_info->vm_list_head, vm_list_node) { diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_atombios.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_atombios.c index 52b12c1718eb..7dc102f0bc1d 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_atombios.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_atombios.c @@ -1484,6 +1484,8 @@ int amdgpu_atombios_init_mc_reg_table(struct amdgpu_device *adev, (u32)le32_to_cpu(*((u32 *)reg_data + j)); j++; } else if ((reg_table->mc_reg_address[i].pre_reg_data & LOW_NIBBLE_MASK) == DATA_EQU_PREV) { + if (i == 0) + continue; reg_table->mc_reg_table_entry[num_ranges].mc_data[i] = reg_table->mc_reg_table_entry[num_ranges].mc_data[i - 1]; } diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_atomfirmware.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_atomfirmware.c index 6857c586ded7..f873dd3cae16 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_atomfirmware.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_atomfirmware.c @@ -34,6 +34,7 @@ union firmware_info { struct atom_firmware_info_v3_2 v32; struct atom_firmware_info_v3_3 v33; struct atom_firmware_info_v3_4 v34; + struct atom_firmware_info_v3_5 v35; }; /* @@ -211,6 +212,7 @@ union igp_info { struct atom_integrated_system_info_v1_11 v11; struct atom_integrated_system_info_v1_12 v12; struct atom_integrated_system_info_v2_1 v21; + struct atom_integrated_system_info_v2_3 v23; }; union umc_info { @@ -288,7 +290,6 @@ static int convert_atom_mem_type_to_vram_type(struct amdgpu_device *adev, return vram_type; } - int amdgpu_atomfirmware_get_vram_info(struct amdgpu_device *adev, int *vram_width, int *vram_type, @@ -299,6 +300,7 @@ amdgpu_atomfirmware_get_vram_info(struct amdgpu_device *adev, u16 data_offset, size; union igp_info *igp_info; union vram_info *vram_info; + union umc_info *umc_info; union vram_module *vram_module; u8 frev, crev; u8 mem_type; @@ -310,10 +312,16 @@ amdgpu_atomfirmware_get_vram_info(struct amdgpu_device *adev, if (adev->flags & AMD_IS_APU) index = get_index_into_master_table(atom_master_list_of_data_tables_v2_1, integratedsysteminfo); - else - index = get_index_into_master_table(atom_master_list_of_data_tables_v2_1, - vram_info); - + else { + switch (amdgpu_ip_version(adev, GC_HWIP, 0)) { + case IP_VERSION(12, 0, 0): + case IP_VERSION(12, 0, 1): + index = get_index_into_master_table(atom_master_list_of_data_tables_v2_1, umc_info); + break; + default: + index = get_index_into_master_table(atom_master_list_of_data_tables_v2_1, vram_info); + } + } if (amdgpu_atom_parse_data_header(mode_info->atom_context, index, &size, &frev, &crev, &data_offset)) { @@ -359,6 +367,20 @@ amdgpu_atomfirmware_get_vram_info(struct amdgpu_device *adev, if (vram_type) *vram_type = convert_atom_mem_type_to_vram_type(adev, mem_type); break; + case 3: + mem_channel_number = igp_info->v23.umachannelnumber; + if (!mem_channel_number) + mem_channel_number = 1; + mem_type = igp_info->v23.memorytype; + if (mem_type == LpDdr5MemType) + mem_channel_width = 32; + else + mem_channel_width = 64; + if (vram_width) + *vram_width = mem_channel_number * mem_channel_width; + if (vram_type) + *vram_type = convert_atom_mem_type_to_vram_type(adev, mem_type); + break; default: return -EINVAL; } @@ -367,123 +389,150 @@ amdgpu_atomfirmware_get_vram_info(struct amdgpu_device *adev, return -EINVAL; } } else { - vram_info = (union vram_info *) - (mode_info->atom_context->bios + data_offset); - module_id = (RREG32(adev->bios_scratch_reg_offset + 4) & 0x00ff0000) >> 16; - if (frev == 3) { - switch (crev) { - /* v30 */ - case 0: - vram_module = (union vram_module *)vram_info->v30.vram_module; - mem_vendor = (vram_module->v30.dram_vendor_id) & 0xF; - if (vram_vendor) - *vram_vendor = mem_vendor; - mem_type = vram_info->v30.memory_type; - if (vram_type) - *vram_type = convert_atom_mem_type_to_vram_type(adev, mem_type); - mem_channel_number = vram_info->v30.channel_num; - mem_channel_width = vram_info->v30.channel_width; - if (vram_width) - *vram_width = mem_channel_number * (1 << mem_channel_width); - break; - default: - return -EINVAL; - } - } else if (frev == 2) { - switch (crev) { - /* v23 */ - case 3: - if (module_id > vram_info->v23.vram_module_num) - module_id = 0; - vram_module = (union vram_module *)vram_info->v23.vram_module; - while (i < module_id) { - vram_module = (union vram_module *) - ((u8 *)vram_module + vram_module->v9.vram_module_size); - i++; - } - mem_type = vram_module->v9.memory_type; - if (vram_type) - *vram_type = convert_atom_mem_type_to_vram_type(adev, mem_type); - mem_channel_number = vram_module->v9.channel_num; - mem_channel_width = vram_module->v9.channel_width; - if (vram_width) - *vram_width = mem_channel_number * (1 << mem_channel_width); - mem_vendor = (vram_module->v9.vender_rev_id) & 0xF; - if (vram_vendor) - *vram_vendor = mem_vendor; - break; - /* v24 */ - case 4: - if (module_id > vram_info->v24.vram_module_num) - module_id = 0; - vram_module = (union vram_module *)vram_info->v24.vram_module; - while (i < module_id) { - vram_module = (union vram_module *) - ((u8 *)vram_module + vram_module->v10.vram_module_size); - i++; + switch (amdgpu_ip_version(adev, GC_HWIP, 0)) { + case IP_VERSION(12, 0, 0): + case IP_VERSION(12, 0, 1): + umc_info = (union umc_info *)(mode_info->atom_context->bios + data_offset); + + if (frev == 4) { + switch (crev) { + case 0: + mem_channel_number = le32_to_cpu(umc_info->v40.channel_num); + mem_type = le32_to_cpu(umc_info->v40.vram_type); + mem_channel_width = le32_to_cpu(umc_info->v40.channel_width); + mem_vendor = RREG32(adev->bios_scratch_reg_offset + 4) & 0xF; + if (vram_vendor) + *vram_vendor = mem_vendor; + if (vram_type) + *vram_type = convert_atom_mem_type_to_vram_type(adev, mem_type); + if (vram_width) + *vram_width = mem_channel_number * (1 << mem_channel_width); + break; + default: + return -EINVAL; } - mem_type = vram_module->v10.memory_type; - if (vram_type) - *vram_type = convert_atom_mem_type_to_vram_type(adev, mem_type); - mem_channel_number = vram_module->v10.channel_num; - mem_channel_width = vram_module->v10.channel_width; - if (vram_width) - *vram_width = mem_channel_number * (1 << mem_channel_width); - mem_vendor = (vram_module->v10.vender_rev_id) & 0xF; - if (vram_vendor) - *vram_vendor = mem_vendor; - break; - /* v25 */ - case 5: - if (module_id > vram_info->v25.vram_module_num) - module_id = 0; - vram_module = (union vram_module *)vram_info->v25.vram_module; - while (i < module_id) { - vram_module = (union vram_module *) - ((u8 *)vram_module + vram_module->v11.vram_module_size); - i++; + } else + return -EINVAL; + break; + default: + vram_info = (union vram_info *) + (mode_info->atom_context->bios + data_offset); + + module_id = (RREG32(adev->bios_scratch_reg_offset + 4) & 0x00ff0000) >> 16; + if (frev == 3) { + switch (crev) { + /* v30 */ + case 0: + vram_module = (union vram_module *)vram_info->v30.vram_module; + mem_vendor = (vram_module->v30.dram_vendor_id) & 0xF; + if (vram_vendor) + *vram_vendor = mem_vendor; + mem_type = vram_info->v30.memory_type; + if (vram_type) + *vram_type = convert_atom_mem_type_to_vram_type(adev, mem_type); + mem_channel_number = vram_info->v30.channel_num; + mem_channel_width = vram_info->v30.channel_width; + if (vram_width) + *vram_width = mem_channel_number * 16; + break; + default: + return -EINVAL; } - mem_type = vram_module->v11.memory_type; - if (vram_type) - *vram_type = convert_atom_mem_type_to_vram_type(adev, mem_type); - mem_channel_number = vram_module->v11.channel_num; - mem_channel_width = vram_module->v11.channel_width; - if (vram_width) - *vram_width = mem_channel_number * (1 << mem_channel_width); - mem_vendor = (vram_module->v11.vender_rev_id) & 0xF; - if (vram_vendor) - *vram_vendor = mem_vendor; - break; - /* v26 */ - case 6: - if (module_id > vram_info->v26.vram_module_num) - module_id = 0; - vram_module = (union vram_module *)vram_info->v26.vram_module; - while (i < module_id) { - vram_module = (union vram_module *) - ((u8 *)vram_module + vram_module->v9.vram_module_size); - i++; + } else if (frev == 2) { + switch (crev) { + /* v23 */ + case 3: + if (module_id > vram_info->v23.vram_module_num) + module_id = 0; + vram_module = (union vram_module *)vram_info->v23.vram_module; + while (i < module_id) { + vram_module = (union vram_module *) + ((u8 *)vram_module + vram_module->v9.vram_module_size); + i++; + } + mem_type = vram_module->v9.memory_type; + if (vram_type) + *vram_type = convert_atom_mem_type_to_vram_type(adev, mem_type); + mem_channel_number = vram_module->v9.channel_num; + mem_channel_width = vram_module->v9.channel_width; + if (vram_width) + *vram_width = mem_channel_number * (1 << mem_channel_width); + mem_vendor = (vram_module->v9.vender_rev_id) & 0xF; + if (vram_vendor) + *vram_vendor = mem_vendor; + break; + /* v24 */ + case 4: + if (module_id > vram_info->v24.vram_module_num) + module_id = 0; + vram_module = (union vram_module *)vram_info->v24.vram_module; + while (i < module_id) { + vram_module = (union vram_module *) + ((u8 *)vram_module + vram_module->v10.vram_module_size); + i++; + } + mem_type = vram_module->v10.memory_type; + if (vram_type) + *vram_type = convert_atom_mem_type_to_vram_type(adev, mem_type); + mem_channel_number = vram_module->v10.channel_num; + mem_channel_width = vram_module->v10.channel_width; + if (vram_width) + *vram_width = mem_channel_number * (1 << mem_channel_width); + mem_vendor = (vram_module->v10.vender_rev_id) & 0xF; + if (vram_vendor) + *vram_vendor = mem_vendor; + break; + /* v25 */ + case 5: + if (module_id > vram_info->v25.vram_module_num) + module_id = 0; + vram_module = (union vram_module *)vram_info->v25.vram_module; + while (i < module_id) { + vram_module = (union vram_module *) + ((u8 *)vram_module + vram_module->v11.vram_module_size); + i++; + } + mem_type = vram_module->v11.memory_type; + if (vram_type) + *vram_type = convert_atom_mem_type_to_vram_type(adev, mem_type); + mem_channel_number = vram_module->v11.channel_num; + mem_channel_width = vram_module->v11.channel_width; + if (vram_width) + *vram_width = mem_channel_number * (1 << mem_channel_width); + mem_vendor = (vram_module->v11.vender_rev_id) & 0xF; + if (vram_vendor) + *vram_vendor = mem_vendor; + break; + /* v26 */ + case 6: + if (module_id > vram_info->v26.vram_module_num) + module_id = 0; + vram_module = (union vram_module *)vram_info->v26.vram_module; + while (i < module_id) { + vram_module = (union vram_module *) + ((u8 *)vram_module + vram_module->v9.vram_module_size); + i++; + } + mem_type = vram_module->v9.memory_type; + if (vram_type) + *vram_type = convert_atom_mem_type_to_vram_type(adev, mem_type); + mem_channel_number = vram_module->v9.channel_num; + mem_channel_width = vram_module->v9.channel_width; + if (vram_width) + *vram_width = mem_channel_number * (1 << mem_channel_width); + mem_vendor = (vram_module->v9.vender_rev_id) & 0xF; + if (vram_vendor) + *vram_vendor = mem_vendor; + break; + default: + return -EINVAL; } - mem_type = vram_module->v9.memory_type; - if (vram_type) - *vram_type = convert_atom_mem_type_to_vram_type(adev, mem_type); - mem_channel_number = vram_module->v9.channel_num; - mem_channel_width = vram_module->v9.channel_width; - if (vram_width) - *vram_width = mem_channel_number * (1 << mem_channel_width); - mem_vendor = (vram_module->v9.vender_rev_id) & 0xF; - if (vram_vendor) - *vram_vendor = mem_vendor; - break; - default: + } else { + /* invalid frev */ return -EINVAL; } - } else { - /* invalid frev */ - return -EINVAL; } } - } return 0; @@ -872,6 +921,10 @@ int amdgpu_atomfirmware_get_fw_reserved_fb_size(struct amdgpu_device *adev) fw_reserved_fb_size = (firmware_info->v34.fw_reserved_size_in_kb << 10); break; + case 5: + fw_reserved_fb_size = + (firmware_info->v35.fw_reserved_size_in_kb << 10); + break; default: fw_reserved_fb_size = 0; break; diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_benchmark.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_benchmark.c index edc6377ec5ff..199693369c7c 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_benchmark.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_benchmark.c @@ -39,7 +39,7 @@ static int amdgpu_benchmark_do_move(struct amdgpu_device *adev, unsigned size, for (i = 0; i < n; i++) { struct amdgpu_ring *ring = adev->mman.buffer_funcs_ring; r = amdgpu_copy_buffer(ring, saddr, daddr, size, NULL, &fence, - false, false, false); + false, false, 0); if (r) goto exit_do_move; r = dma_fence_wait(fence, false); diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_cgs.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_cgs.c index b8280be6225d..c3d89088123d 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_cgs.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_cgs.c @@ -213,6 +213,9 @@ static int amdgpu_cgs_get_firmware_info(struct cgs_device *cgs_device, struct amdgpu_firmware_info *ucode; id = fw_type_convert(cgs_device, type); + if (id >= AMDGPU_UCODE_ID_MAXIMUM) + return -EINVAL; + ucode = &adev->firmware.ucode[id]; if (ucode->fw == NULL) return -EINVAL; diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_connectors.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_connectors.c index 9caba10315a8..cae7479c3ecf 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_connectors.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_connectors.c @@ -246,22 +246,6 @@ amdgpu_connector_find_encoder(struct drm_connector *connector, return NULL; } -struct edid *amdgpu_connector_edid(struct drm_connector *connector) -{ - struct amdgpu_connector *amdgpu_connector = to_amdgpu_connector(connector); - struct drm_property_blob *edid_blob = connector->edid_blob_ptr; - - if (amdgpu_connector->edid) { - return amdgpu_connector->edid; - } else if (edid_blob) { - struct edid *edid = kmemdup(edid_blob->data, edid_blob->length, GFP_KERNEL); - - if (edid) - amdgpu_connector->edid = edid; - } - return amdgpu_connector->edid; -} - static struct edid * amdgpu_connector_get_hardcoded_edid(struct amdgpu_device *adev) { diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_connectors.h b/drivers/gpu/drm/amd/amdgpu/amdgpu_connectors.h index 61fcef15ad72..eff833b6ed31 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_connectors.h +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_connectors.h @@ -24,7 +24,6 @@ #ifndef __AMDGPU_CONNECTORS_H__ #define __AMDGPU_CONNECTORS_H__ -struct edid *amdgpu_connector_edid(struct drm_connector *connector); void amdgpu_connector_hotplug(struct drm_connector *connector); int amdgpu_connector_get_monitor_bpc(struct drm_connector *connector); u16 amdgpu_connector_encoder_get_dp_bridge_encoder_id(struct drm_connector *connector); diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_cs.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_cs.c index 0a4b09709cfb..916b6b8cf7d9 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_cs.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_cs.c @@ -819,7 +819,7 @@ retry: p->bytes_moved += ctx.bytes_moved; if (!amdgpu_gmc_vram_full_visible(&adev->gmc) && - amdgpu_bo_in_cpu_visible_vram(bo)) + amdgpu_res_cpu_visible(adev, bo->tbo.resource)) p->bytes_moved_vis += ctx.bytes_moved; if (unlikely(r == -ENOMEM) && domain != bo->allowed_domains) { @@ -1093,6 +1093,21 @@ static int amdgpu_cs_vm_handling(struct amdgpu_cs_parser *p) unsigned int i; int r; + /* + * We can't use gang submit on with reserved VMIDs when the VM changes + * can't be invalidated by more than one engine at the same time. + */ + if (p->gang_size > 1 && !p->adev->vm_manager.concurrent_flush) { + for (i = 0; i < p->gang_size; ++i) { + struct drm_sched_entity *entity = p->entities[i]; + struct drm_gpu_scheduler *sched = entity->rq->sched; + struct amdgpu_ring *ring = to_amdgpu_ring(sched); + + if (amdgpu_vmid_uses_reserved(vm, ring->vm_hub)) + return -EINVAL; + } + } + r = amdgpu_vm_clear_freed(adev, vm, NULL); if (r) return r; diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_debugfs.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_debugfs.c index f5d0fa207a88..0e1a11b6b989 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_debugfs.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_debugfs.c @@ -918,7 +918,7 @@ static ssize_t amdgpu_debugfs_gca_config_read(struct file *f, char __user *buf, /* rev==1 */ config[no_regs++] = adev->rev_id; - config[no_regs++] = lower_32_bits(adev->pg_flags); + config[no_regs++] = adev->pg_flags; config[no_regs++] = lower_32_bits(adev->cg_flags); /* rev==2 */ @@ -935,7 +935,7 @@ static ssize_t amdgpu_debugfs_gca_config_read(struct file *f, char __user *buf, config[no_regs++] = adev->flags & AMD_IS_APU ? 1 : 0; /* rev==5 PG/CG flag upper 32bit */ - config[no_regs++] = upper_32_bits(adev->pg_flags); + config[no_regs++] = 0; config[no_regs++] = upper_32_bits(adev->cg_flags); while (size && (*pos < no_regs * 4)) { @@ -2065,12 +2065,13 @@ static ssize_t amdgpu_reset_dump_register_list_write(struct file *f, struct amdgpu_device *adev = (struct amdgpu_device *)file_inode(f)->i_private; char reg_offset[11]; uint32_t *new = NULL, *tmp = NULL; - int ret, i = 0, len = 0; + unsigned int len = 0; + int ret, i = 0; do { memset(reg_offset, 0, 11); if (copy_from_user(reg_offset, buf + len, - min(10, ((int)size-len)))) { + min(10, (size-len)))) { ret = -EFAULT; goto error_free; } @@ -2185,6 +2186,9 @@ int amdgpu_debugfs_init(struct amdgpu_device *adev) amdgpu_debugfs_vcn_fwlog_init(adev, i, &adev->vcn.inst[i]); } + if (amdgpu_umsch_mm & amdgpu_umsch_mm_fwlog) + amdgpu_debugfs_umsch_fwlog_init(adev, &adev->umsch_mm); + amdgpu_ras_debugfs_create_all(adev); amdgpu_rap_debugfs_init(adev); amdgpu_securedisplay_debugfs_init(adev); diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_dev_coredump.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_dev_coredump.c new file mode 100644 index 000000000000..f0a44d0dec27 --- /dev/null +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_dev_coredump.c @@ -0,0 +1,377 @@ +// SPDX-License-Identifier: MIT +/* + * Copyright 2024 Advanced Micro Devices, Inc. + * + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and associated documentation files (the "Software"), + * to deal in the Software without restriction, including without limitation + * the rights to use, copy, modify, merge, publish, distribute, sublicense, + * and/or sell copies of the Software, and to permit persons to whom the + * Software is furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included in + * all copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL + * THE COPYRIGHT HOLDER(S) OR AUTHOR(S) BE LIABLE FOR ANY CLAIM, DAMAGES OR + * OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, + * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR + * OTHER DEALINGS IN THE SOFTWARE. + * + */ + +#include <generated/utsrelease.h> +#include <linux/devcoredump.h> +#include "amdgpu_dev_coredump.h" +#include "atom.h" + +#ifndef CONFIG_DEV_COREDUMP +void amdgpu_coredump(struct amdgpu_device *adev, bool vram_lost, + struct amdgpu_reset_context *reset_context) +{ +} +#else + +const char *hw_ip_names[MAX_HWIP] = { + [GC_HWIP] = "GC", + [HDP_HWIP] = "HDP", + [SDMA0_HWIP] = "SDMA0", + [SDMA1_HWIP] = "SDMA1", + [SDMA2_HWIP] = "SDMA2", + [SDMA3_HWIP] = "SDMA3", + [SDMA4_HWIP] = "SDMA4", + [SDMA5_HWIP] = "SDMA5", + [SDMA6_HWIP] = "SDMA6", + [SDMA7_HWIP] = "SDMA7", + [LSDMA_HWIP] = "LSDMA", + [MMHUB_HWIP] = "MMHUB", + [ATHUB_HWIP] = "ATHUB", + [NBIO_HWIP] = "NBIO", + [MP0_HWIP] = "MP0", + [MP1_HWIP] = "MP1", + [UVD_HWIP] = "UVD/JPEG/VCN", + [VCN1_HWIP] = "VCN1", + [VCE_HWIP] = "VCE", + [VPE_HWIP] = "VPE", + [DF_HWIP] = "DF", + [DCE_HWIP] = "DCE", + [OSSSYS_HWIP] = "OSSSYS", + [SMUIO_HWIP] = "SMUIO", + [PWR_HWIP] = "PWR", + [NBIF_HWIP] = "NBIF", + [THM_HWIP] = "THM", + [CLK_HWIP] = "CLK", + [UMC_HWIP] = "UMC", + [RSMU_HWIP] = "RSMU", + [XGMI_HWIP] = "XGMI", + [DCI_HWIP] = "DCI", + [PCIE_HWIP] = "PCIE", +}; + +static void amdgpu_devcoredump_fw_info(struct amdgpu_device *adev, + struct drm_printer *p) +{ + uint32_t version; + uint32_t feature; + uint8_t smu_program, smu_major, smu_minor, smu_debug; + struct atom_context *ctx = adev->mode_info.atom_context; + + drm_printf(p, "VCE feature version: %u, fw version: 0x%08x\n", + adev->vce.fb_version, adev->vce.fw_version); + drm_printf(p, "UVD feature version: %u, fw version: 0x%08x\n", 0, + adev->uvd.fw_version); + drm_printf(p, "GMC feature version: %u, fw version: 0x%08x\n", 0, + adev->gmc.fw_version); + drm_printf(p, "ME feature version: %u, fw version: 0x%08x\n", + adev->gfx.me_feature_version, adev->gfx.me_fw_version); + drm_printf(p, "PFP feature version: %u, fw version: 0x%08x\n", + adev->gfx.pfp_feature_version, adev->gfx.pfp_fw_version); + drm_printf(p, "CE feature version: %u, fw version: 0x%08x\n", + adev->gfx.ce_feature_version, adev->gfx.ce_fw_version); + drm_printf(p, "RLC feature version: %u, fw version: 0x%08x\n", + adev->gfx.rlc_feature_version, adev->gfx.rlc_fw_version); + + drm_printf(p, "RLC SRLC feature version: %u, fw version: 0x%08x\n", + adev->gfx.rlc_srlc_feature_version, + adev->gfx.rlc_srlc_fw_version); + drm_printf(p, "RLC SRLG feature version: %u, fw version: 0x%08x\n", + adev->gfx.rlc_srlg_feature_version, + adev->gfx.rlc_srlg_fw_version); + drm_printf(p, "RLC SRLS feature version: %u, fw version: 0x%08x\n", + adev->gfx.rlc_srls_feature_version, + adev->gfx.rlc_srls_fw_version); + drm_printf(p, "RLCP feature version: %u, fw version: 0x%08x\n", + adev->gfx.rlcp_ucode_feature_version, + adev->gfx.rlcp_ucode_version); + drm_printf(p, "RLCV feature version: %u, fw version: 0x%08x\n", + adev->gfx.rlcv_ucode_feature_version, + adev->gfx.rlcv_ucode_version); + drm_printf(p, "MEC feature version: %u, fw version: 0x%08x\n", + adev->gfx.mec_feature_version, adev->gfx.mec_fw_version); + + if (adev->gfx.mec2_fw) + drm_printf(p, "MEC2 feature version: %u, fw version: 0x%08x\n", + adev->gfx.mec2_feature_version, + adev->gfx.mec2_fw_version); + + drm_printf(p, "IMU feature version: %u, fw version: 0x%08x\n", 0, + adev->gfx.imu_fw_version); + drm_printf(p, "PSP SOS feature version: %u, fw version: 0x%08x\n", + adev->psp.sos.feature_version, adev->psp.sos.fw_version); + drm_printf(p, "PSP ASD feature version: %u, fw version: 0x%08x\n", + adev->psp.asd_context.bin_desc.feature_version, + adev->psp.asd_context.bin_desc.fw_version); + + drm_printf(p, "TA XGMI feature version: 0x%08x, fw version: 0x%08x\n", + adev->psp.xgmi_context.context.bin_desc.feature_version, + adev->psp.xgmi_context.context.bin_desc.fw_version); + drm_printf(p, "TA RAS feature version: 0x%08x, fw version: 0x%08x\n", + adev->psp.ras_context.context.bin_desc.feature_version, + adev->psp.ras_context.context.bin_desc.fw_version); + drm_printf(p, "TA HDCP feature version: 0x%08x, fw version: 0x%08x\n", + adev->psp.hdcp_context.context.bin_desc.feature_version, + adev->psp.hdcp_context.context.bin_desc.fw_version); + drm_printf(p, "TA DTM feature version: 0x%08x, fw version: 0x%08x\n", + adev->psp.dtm_context.context.bin_desc.feature_version, + adev->psp.dtm_context.context.bin_desc.fw_version); + drm_printf(p, "TA RAP feature version: 0x%08x, fw version: 0x%08x\n", + adev->psp.rap_context.context.bin_desc.feature_version, + adev->psp.rap_context.context.bin_desc.fw_version); + drm_printf(p, + "TA SECURE DISPLAY feature version: 0x%08x, fw version: 0x%08x\n", + adev->psp.securedisplay_context.context.bin_desc.feature_version, + adev->psp.securedisplay_context.context.bin_desc.fw_version); + + /* SMC firmware */ + version = adev->pm.fw_version; + + smu_program = (version >> 24) & 0xff; + smu_major = (version >> 16) & 0xff; + smu_minor = (version >> 8) & 0xff; + smu_debug = (version >> 0) & 0xff; + drm_printf(p, + "SMC feature version: %u, program: %d, fw version: 0x%08x (%d.%d.%d)\n", + 0, smu_program, version, smu_major, smu_minor, smu_debug); + + /* SDMA firmware */ + for (int i = 0; i < adev->sdma.num_instances; i++) { + drm_printf(p, + "SDMA%d feature version: %u, firmware version: 0x%08x\n", + i, adev->sdma.instance[i].feature_version, + adev->sdma.instance[i].fw_version); + } + + drm_printf(p, "VCN feature version: %u, fw version: 0x%08x\n", 0, + adev->vcn.fw_version); + drm_printf(p, "DMCU feature version: %u, fw version: 0x%08x\n", 0, + adev->dm.dmcu_fw_version); + drm_printf(p, "DMCUB feature version: %u, fw version: 0x%08x\n", 0, + adev->dm.dmcub_fw_version); + drm_printf(p, "PSP TOC feature version: %u, fw version: 0x%08x\n", + adev->psp.toc.feature_version, adev->psp.toc.fw_version); + + version = adev->mes.kiq_version & AMDGPU_MES_VERSION_MASK; + feature = (adev->mes.kiq_version & AMDGPU_MES_FEAT_VERSION_MASK) >> + AMDGPU_MES_FEAT_VERSION_SHIFT; + drm_printf(p, "MES_KIQ feature version: %u, fw version: 0x%08x\n", + feature, version); + + version = adev->mes.sched_version & AMDGPU_MES_VERSION_MASK; + feature = (adev->mes.sched_version & AMDGPU_MES_FEAT_VERSION_MASK) >> + AMDGPU_MES_FEAT_VERSION_SHIFT; + drm_printf(p, "MES feature version: %u, fw version: 0x%08x\n", feature, + version); + + drm_printf(p, "VPE feature version: %u, fw version: 0x%08x\n", + adev->vpe.feature_version, adev->vpe.fw_version); + + drm_printf(p, "\nVBIOS Information\n"); + drm_printf(p, "vbios name : %s\n", ctx->name); + drm_printf(p, "vbios pn : %s\n", ctx->vbios_pn); + drm_printf(p, "vbios version : %d\n", ctx->version); + drm_printf(p, "vbios ver_str : %s\n", ctx->vbios_ver_str); + drm_printf(p, "vbios date : %s\n", ctx->date); +} + +static ssize_t +amdgpu_devcoredump_read(char *buffer, loff_t offset, size_t count, + void *data, size_t datalen) +{ + struct drm_printer p; + struct amdgpu_coredump_info *coredump = data; + struct drm_print_iterator iter; + struct amdgpu_vm_fault_info *fault_info; + int i, ver; + + iter.data = buffer; + iter.offset = 0; + iter.start = offset; + iter.remain = count; + + p = drm_coredump_printer(&iter); + + drm_printf(&p, "**** AMDGPU Device Coredump ****\n"); + drm_printf(&p, "version: " AMDGPU_COREDUMP_VERSION "\n"); + drm_printf(&p, "kernel: " UTS_RELEASE "\n"); + drm_printf(&p, "module: " KBUILD_MODNAME "\n"); + drm_printf(&p, "time: %lld.%09ld\n", coredump->reset_time.tv_sec, + coredump->reset_time.tv_nsec); + + if (coredump->reset_task_info.pid) + drm_printf(&p, "process_name: %s PID: %d\n", + coredump->reset_task_info.process_name, + coredump->reset_task_info.pid); + + /* SOC Information */ + drm_printf(&p, "\nSOC Information\n"); + drm_printf(&p, "SOC Device id: %d\n", coredump->adev->pdev->device); + drm_printf(&p, "SOC PCI Revision id: %d\n", coredump->adev->pdev->revision); + drm_printf(&p, "SOC Family: %d\n", coredump->adev->family); + drm_printf(&p, "SOC Revision id: %d\n", coredump->adev->rev_id); + drm_printf(&p, "SOC External Revision id: %d\n", coredump->adev->external_rev_id); + + /* Memory Information */ + drm_printf(&p, "\nSOC Memory Information\n"); + drm_printf(&p, "real vram size: %llu\n", coredump->adev->gmc.real_vram_size); + drm_printf(&p, "visible vram size: %llu\n", coredump->adev->gmc.visible_vram_size); + drm_printf(&p, "visible vram size: %llu\n", coredump->adev->mman.gtt_mgr.manager.size); + + /* GDS Config */ + drm_printf(&p, "\nGDS Config\n"); + drm_printf(&p, "gds: total size: %d\n", coredump->adev->gds.gds_size); + drm_printf(&p, "gds: compute partition size: %d\n", coredump->adev->gds.gds_size); + drm_printf(&p, "gds: gws per compute partition: %d\n", coredump->adev->gds.gws_size); + drm_printf(&p, "gds: os per compute partition: %d\n", coredump->adev->gds.oa_size); + + /* HWIP Version Information */ + drm_printf(&p, "\nHW IP Version Information\n"); + for (int i = 1; i < MAX_HWIP; i++) { + for (int j = 0; j < HWIP_MAX_INSTANCE; j++) { + ver = coredump->adev->ip_versions[i][j]; + if (ver) + drm_printf(&p, "HWIP: %s[%d][%d]: v%d.%d.%d.%d.%d\n", + hw_ip_names[i], i, j, + IP_VERSION_MAJ(ver), + IP_VERSION_MIN(ver), + IP_VERSION_REV(ver), + IP_VERSION_VARIANT(ver), + IP_VERSION_SUBREV(ver)); + } + } + + /* IP firmware information */ + drm_printf(&p, "\nIP Firmwares\n"); + amdgpu_devcoredump_fw_info(coredump->adev, &p); + + if (coredump->ring) { + drm_printf(&p, "\nRing timed out details\n"); + drm_printf(&p, "IP Type: %d Ring Name: %s\n", + coredump->ring->funcs->type, + coredump->ring->name); + } + + /* Add page fault information */ + fault_info = &coredump->adev->vm_manager.fault_info; + drm_printf(&p, "\n[%s] Page fault observed\n", + fault_info->vmhub ? "mmhub" : "gfxhub"); + drm_printf(&p, "Faulty page starting at address: 0x%016llx\n", fault_info->addr); + drm_printf(&p, "Protection fault status register: 0x%x\n\n", fault_info->status); + + /* dump the ip state for each ip */ + drm_printf(&p, "IP Dump\n"); + for (int i = 0; i < coredump->adev->num_ip_blocks; i++) { + if (coredump->adev->ip_blocks[i].version->funcs->print_ip_state) { + drm_printf(&p, "IP: %s\n", + coredump->adev->ip_blocks[i] + .version->funcs->name); + coredump->adev->ip_blocks[i] + .version->funcs->print_ip_state( + (void *)coredump->adev, &p); + drm_printf(&p, "\n"); + } + } + + /* Add ring buffer information */ + drm_printf(&p, "Ring buffer information\n"); + for (int i = 0; i < coredump->adev->num_rings; i++) { + int j = 0; + struct amdgpu_ring *ring = coredump->adev->rings[i]; + + drm_printf(&p, "ring name: %s\n", ring->name); + drm_printf(&p, "Rptr: 0x%llx Wptr: 0x%llx RB mask: %x\n", + amdgpu_ring_get_rptr(ring), + amdgpu_ring_get_wptr(ring), + ring->buf_mask); + drm_printf(&p, "Ring size in dwords: %d\n", + ring->ring_size / 4); + drm_printf(&p, "Ring contents\n"); + drm_printf(&p, "Offset \t Value\n"); + + while (j < ring->ring_size) { + drm_printf(&p, "0x%x \t 0x%x\n", j, ring->ring[j / 4]); + j += 4; + } + } + + if (coredump->reset_vram_lost) + drm_printf(&p, "VRAM is lost due to GPU reset!\n"); + if (coredump->adev->reset_info.num_regs) { + drm_printf(&p, "AMDGPU register dumps:\nOffset: Value:\n"); + + for (i = 0; i < coredump->adev->reset_info.num_regs; i++) + drm_printf(&p, "0x%08x: 0x%08x\n", + coredump->adev->reset_info.reset_dump_reg_list[i], + coredump->adev->reset_info.reset_dump_reg_value[i]); + } + + return count - iter.remain; +} + +static void amdgpu_devcoredump_free(void *data) +{ + kfree(data); +} + +void amdgpu_coredump(struct amdgpu_device *adev, bool vram_lost, + struct amdgpu_reset_context *reset_context) +{ + struct amdgpu_coredump_info *coredump; + struct drm_device *dev = adev_to_drm(adev); + struct amdgpu_job *job = reset_context->job; + struct drm_sched_job *s_job; + + coredump = kzalloc(sizeof(*coredump), GFP_NOWAIT); + + if (!coredump) { + DRM_ERROR("%s: failed to allocate memory for coredump\n", __func__); + return; + } + + coredump->reset_vram_lost = vram_lost; + + if (reset_context->job && reset_context->job->vm) { + struct amdgpu_task_info *ti; + struct amdgpu_vm *vm = reset_context->job->vm; + + ti = amdgpu_vm_get_task_info_vm(vm); + if (ti) { + coredump->reset_task_info = *ti; + amdgpu_vm_put_task_info(ti); + } + } + + if (job) { + s_job = &job->base; + coredump->ring = to_amdgpu_ring(s_job->sched); + } + + coredump->adev = adev; + + ktime_get_ts64(&coredump->reset_time); + + dev_coredumpm(dev->dev, THIS_MODULE, coredump, 0, GFP_NOWAIT, + amdgpu_devcoredump_read, amdgpu_devcoredump_free); +} +#endif diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_dev_coredump.h b/drivers/gpu/drm/amd/amdgpu/amdgpu_dev_coredump.h new file mode 100644 index 000000000000..52459512cb2b --- /dev/null +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_dev_coredump.h @@ -0,0 +1,47 @@ +/* SPDX-License-Identifier: MIT */ +/* + * Copyright 2024 Advanced Micro Devices, Inc. + * + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and associated documentation files (the "Software"), + * to deal in the Software without restriction, including without limitation + * the rights to use, copy, modify, merge, publish, distribute, sublicense, + * and/or sell copies of the Software, and to permit persons to whom the + * Software is furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included in + * all copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL + * THE COPYRIGHT HOLDER(S) OR AUTHOR(S) BE LIABLE FOR ANY CLAIM, DAMAGES OR + * OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, + * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR + * OTHER DEALINGS IN THE SOFTWARE. + * + */ + +#ifndef __AMDGPU_DEV_COREDUMP_H__ +#define __AMDGPU_DEV_COREDUMP_H__ + +#include "amdgpu.h" +#include "amdgpu_reset.h" + +#ifdef CONFIG_DEV_COREDUMP + +#define AMDGPU_COREDUMP_VERSION "1" + +struct amdgpu_coredump_info { + struct amdgpu_device *adev; + struct amdgpu_task_info reset_task_info; + struct timespec64 reset_time; + bool reset_vram_lost; + struct amdgpu_ring *ring; +}; +#endif + +void amdgpu_coredump(struct amdgpu_device *adev, bool vram_lost, + struct amdgpu_reset_context *reset_context); + +#endif diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_device.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_device.c index 5dc24c971b41..bcacf2e35eba 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_device.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_device.c @@ -74,6 +74,7 @@ #include "amdgpu_fru_eeprom.h" #include "amdgpu_reset.h" #include "amdgpu_virt.h" +#include "amdgpu_dev_coredump.h" #include <linux/suspend.h> #include <drm/task_barrier.h> @@ -143,6 +144,8 @@ const char *amdgpu_asic_name[] = { "LAST", }; +static inline void amdgpu_device_stop_pending_resets(struct amdgpu_device *adev); + /** * DOC: pcie_replay_count * @@ -335,16 +338,93 @@ bool amdgpu_device_supports_boco(struct drm_device *dev) * * @dev: drm_device pointer * - * Returns true if the device supporte BACO, - * otherwise return false. + * Return: + * 1 if the device supporte BACO; + * 3 if the device support MACO (only works if BACO is supported) + * otherwise return 0. */ -bool amdgpu_device_supports_baco(struct drm_device *dev) +int amdgpu_device_supports_baco(struct drm_device *dev) { struct amdgpu_device *adev = drm_to_adev(dev); return amdgpu_asic_supports_baco(adev); } +void amdgpu_device_detect_runtime_pm_mode(struct amdgpu_device *adev) +{ + struct drm_device *dev; + int bamaco_support; + + dev = adev_to_drm(adev); + + adev->pm.rpm_mode = AMDGPU_RUNPM_NONE; + bamaco_support = amdgpu_device_supports_baco(dev); + + switch (amdgpu_runtime_pm) { + case 2: + if (bamaco_support & MACO_SUPPORT) { + adev->pm.rpm_mode = AMDGPU_RUNPM_BAMACO; + dev_info(adev->dev, "Forcing BAMACO for runtime pm\n"); + } else if (bamaco_support == BACO_SUPPORT) { + adev->pm.rpm_mode = AMDGPU_RUNPM_BACO; + dev_info(adev->dev, "Requested mode BAMACO not available,fallback to use BACO\n"); + } + break; + case 1: + if (bamaco_support & BACO_SUPPORT) { + adev->pm.rpm_mode = AMDGPU_RUNPM_BACO; + dev_info(adev->dev, "Forcing BACO for runtime pm\n"); + } + break; + case -1: + case -2: + if (amdgpu_device_supports_px(dev)) { /* enable PX as runtime mode */ + adev->pm.rpm_mode = AMDGPU_RUNPM_PX; + dev_info(adev->dev, "Using ATPX for runtime pm\n"); + } else if (amdgpu_device_supports_boco(dev)) { /* enable boco as runtime mode */ + adev->pm.rpm_mode = AMDGPU_RUNPM_BOCO; + dev_info(adev->dev, "Using BOCO for runtime pm\n"); + } else { + if (!bamaco_support) + goto no_runtime_pm; + + switch (adev->asic_type) { + case CHIP_VEGA20: + case CHIP_ARCTURUS: + /* BACO are not supported on vega20 and arctrus */ + break; + case CHIP_VEGA10: + /* enable BACO as runpm mode if noretry=0 */ + if (!adev->gmc.noretry) + adev->pm.rpm_mode = AMDGPU_RUNPM_BACO; + break; + default: + /* enable BACO as runpm mode on CI+ */ + adev->pm.rpm_mode = AMDGPU_RUNPM_BACO; + break; + } + + if (adev->pm.rpm_mode == AMDGPU_RUNPM_BACO) { + if (bamaco_support & MACO_SUPPORT) { + adev->pm.rpm_mode = AMDGPU_RUNPM_BAMACO; + dev_info(adev->dev, "Using BAMACO for runtime pm\n"); + } else { + dev_info(adev->dev, "Using BACO for runtime pm\n"); + } + } + } + break; + case 0: + dev_info(adev->dev, "runtime pm is manually disabled\n"); + break; + default: + break; + } + +no_runtime_pm: + if (adev->pm.rpm_mode == AMDGPU_RUNPM_NONE) + dev_info(adev->dev, "Runtime PM not available\n"); +} /** * amdgpu_device_supports_smart_shift - Is the device dGPU with * smart shift support @@ -599,7 +679,7 @@ uint32_t amdgpu_device_xcc_rreg(struct amdgpu_device *adev, amdgpu_virt_get_rlcg_reg_access_flag(adev, acc_flags, GC_HWIP, false, &rlcg_flag)) { - ret = amdgpu_virt_rlcg_reg_rw(adev, reg, 0, rlcg_flag, xcc_id); + ret = amdgpu_virt_rlcg_reg_rw(adev, reg, 0, rlcg_flag, GET_INST(GC, xcc_id)); } else if (!(acc_flags & AMDGPU_REGS_NO_KIQ) && amdgpu_sriov_runtime(adev) && down_read_trylock(&adev->reset_domain->sem)) { @@ -730,7 +810,7 @@ void amdgpu_device_xcc_wreg(struct amdgpu_device *adev, amdgpu_virt_get_rlcg_reg_access_flag(adev, acc_flags, GC_HWIP, true, &rlcg_flag)) { - amdgpu_virt_rlcg_reg_rw(adev, reg, v, rlcg_flag, xcc_id); + amdgpu_virt_rlcg_reg_rw(adev, reg, v, rlcg_flag, GET_INST(GC, xcc_id)); } else if (!(acc_flags & AMDGPU_REGS_NO_KIQ) && amdgpu_sriov_runtime(adev) && down_read_trylock(&adev->reset_domain->sem)) { @@ -1228,6 +1308,7 @@ static int amdgpu_device_asic_init(struct amdgpu_device *adev) amdgpu_asic_pre_asic_init(adev); if (amdgpu_ip_version(adev, GC_HWIP, 0) == IP_VERSION(9, 4, 3) || + amdgpu_ip_version(adev, GC_HWIP, 0) == IP_VERSION(9, 4, 4) || amdgpu_ip_version(adev, GC_HWIP, 0) >= IP_VERSION(11, 0, 0)) { amdgpu_psp_wait_for_bootloader(adev); ret = amdgpu_atomfirmware_asic_init(adev, true); @@ -1402,13 +1483,17 @@ static int amdgpu_device_wb_init(struct amdgpu_device *adev) */ int amdgpu_device_wb_get(struct amdgpu_device *adev, u32 *wb) { - unsigned long offset = find_first_zero_bit(adev->wb.used, adev->wb.num_wb); + unsigned long flags, offset; + spin_lock_irqsave(&adev->wb.lock, flags); + offset = find_first_zero_bit(adev->wb.used, adev->wb.num_wb); if (offset < adev->wb.num_wb) { __set_bit(offset, adev->wb.used); + spin_unlock_irqrestore(&adev->wb.lock, flags); *wb = offset << 3; /* convert to dw offset */ return 0; } else { + spin_unlock_irqrestore(&adev->wb.lock, flags); return -EINVAL; } } @@ -1423,9 +1508,13 @@ int amdgpu_device_wb_get(struct amdgpu_device *adev, u32 *wb) */ void amdgpu_device_wb_free(struct amdgpu_device *adev, u32 wb) { + unsigned long flags; + wb >>= 3; + spin_lock_irqsave(&adev->wb.lock, flags); if (wb < adev->wb.num_wb) __clear_bit(wb, adev->wb.used); + spin_unlock_irqrestore(&adev->wb.lock, flags); } /** @@ -1455,7 +1544,7 @@ int amdgpu_device_resize_fb_bar(struct amdgpu_device *adev) /* PCI_EXT_CAP_ID_VNDR extended capability is located at 0x100 */ if (!pci_find_ext_capability(adev->pdev, PCI_EXT_CAP_ID_VNDR)) - DRM_WARN("System can't access extended configuration space,please check!!\n"); + DRM_WARN("System can't access extended configuration space, please check!!\n"); /* skip if the bios has already enabled large BAR */ if (adev->gmc.real_vram_size && @@ -2261,7 +2350,6 @@ void amdgpu_device_set_sriov_virtual_display(struct amdgpu_device *adev) static int amdgpu_device_parse_gpu_info_fw(struct amdgpu_device *adev) { const char *chip_name; - char fw_name[40]; int err; const struct gpu_info_firmware_header_v1_0 *hdr; @@ -2295,12 +2383,12 @@ static int amdgpu_device_parse_gpu_info_fw(struct amdgpu_device *adev) break; } - snprintf(fw_name, sizeof(fw_name), "amdgpu/%s_gpu_info.bin", chip_name); - err = amdgpu_ucode_request(adev, &adev->firmware.gpu_info_fw, fw_name); + err = amdgpu_ucode_request(adev, &adev->firmware.gpu_info_fw, + "amdgpu/%s_gpu_info.bin", chip_name); if (err) { dev_err(adev->dev, - "Failed to get gpu_info firmware \"%s\"\n", - fw_name); + "Failed to get gpu_info firmware \"%s_gpu_info.bin\"\n", + chip_name); goto out; } @@ -3054,7 +3142,8 @@ static int amdgpu_device_ip_late_init(struct amdgpu_device *adev) return r; } - amdgpu_ras_set_error_query_ready(adev, true); + if (!amdgpu_in_reset(adev)) + amdgpu_ras_set_error_query_ready(adev, true); amdgpu_device_set_cg_state(adev, AMD_CG_STATE_GATE); amdgpu_device_set_pg_state(adev, AMD_PG_STATE_GATE); @@ -3960,6 +4049,7 @@ int amdgpu_device_init(struct amdgpu_device *adev, mutex_init(&adev->grbm_idx_mutex); mutex_init(&adev->mn_lock); mutex_init(&adev->virt.vf_errors.lock); + mutex_init(&adev->virt.rlcg_reg_lock); hash_init(adev->mn_hash); mutex_init(&adev->psp.mutex); mutex_init(&adev->notifier_lock); @@ -3981,6 +4071,7 @@ int amdgpu_device_init(struct amdgpu_device *adev, spin_lock_init(&adev->se_cac_idx_lock); spin_lock_init(&adev->audio_endpt_idx_lock); spin_lock_init(&adev->mm_stats.lock); + spin_lock_init(&adev->wb.lock); INIT_LIST_HEAD(&adev->shadow_list); mutex_init(&adev->shadow_list_lock); @@ -4069,6 +4160,13 @@ int amdgpu_device_init(struct amdgpu_device *adev, /* Enable TMZ based on IP_VERSION */ amdgpu_gmc_tmz_set(adev); + if (amdgpu_sriov_vf(adev) && + amdgpu_ip_version(adev, GC_HWIP, 0) >= IP_VERSION(10, 3, 0)) + /* VF MMIO access (except mailbox range) from CPU + * will be blocked during sriov runtime + */ + adev->virt.caps |= AMDGPU_VF_MMIO_ACCESS_PROTECT; + amdgpu_gmc_noretry_set(adev); /* Need to get xgmi info early to decide the reset behavior*/ if (adev->gmc.xgmi.supported) { @@ -4135,18 +4233,22 @@ int amdgpu_device_init(struct amdgpu_device *adev, adev->ip_blocks[i].status.hw = true; } } + } else if (amdgpu_ip_version(adev, MP1_HWIP, 0) == IP_VERSION(13, 0, 10) && + !amdgpu_device_has_display_hardware(adev)) { + r = psp_gpu_reset(adev); } else { - tmp = amdgpu_reset_method; - /* It should do a default reset when loading or reloading the driver, - * regardless of the module parameter reset_method. - */ - amdgpu_reset_method = AMD_RESET_METHOD_NONE; - r = amdgpu_asic_reset(adev); - amdgpu_reset_method = tmp; - if (r) { - dev_err(adev->dev, "asic reset on init failed\n"); - goto failed; - } + tmp = amdgpu_reset_method; + /* It should do a default reset when loading or reloading the driver, + * regardless of the module parameter reset_method. + */ + amdgpu_reset_method = AMD_RESET_METHOD_NONE; + r = amdgpu_asic_reset(adev); + amdgpu_reset_method = tmp; + } + + if (r) { + dev_err(adev->dev, "asic reset on init failed\n"); + goto failed; } } @@ -4539,6 +4641,8 @@ int amdgpu_device_prepare(struct drm_device *dev) if (r) goto unprepare; + flush_delayed_work(&adev->gfx.gfx_off_delay_work); + for (i = 0; i < adev->num_ip_blocks; i++) { if (!adev->ip_blocks[i].status.valid) continue; @@ -4909,7 +5013,8 @@ static int amdgpu_device_recover_vram(struct amdgpu_device *adev) shadow = vmbo->shadow; /* No need to recover an evicted BO */ - if (shadow->tbo.resource->mem_type != TTM_PL_TT || + if (!shadow->tbo.resource || + shadow->tbo.resource->mem_type != TTM_PL_TT || shadow->tbo.resource->start == AMDGPU_BO_INVALID_OFFSET || shadow->parent->tbo.resource->mem_type != TTM_PL_VRAM) continue; @@ -4953,27 +5058,30 @@ static int amdgpu_device_recover_vram(struct amdgpu_device *adev) * amdgpu_device_reset_sriov - reset ASIC for SR-IOV vf * * @adev: amdgpu_device pointer - * @from_hypervisor: request from hypervisor + * @reset_context: amdgpu reset context pointer * * do VF FLR and reinitialize Asic * return 0 means succeeded otherwise failed */ static int amdgpu_device_reset_sriov(struct amdgpu_device *adev, - bool from_hypervisor) + struct amdgpu_reset_context *reset_context) { int r; struct amdgpu_hive_info *hive = NULL; - int retry_limit = 0; -retry: - amdgpu_amdkfd_pre_reset(adev); - - if (from_hypervisor) + if (test_bit(AMDGPU_HOST_FLR, &reset_context->flags)) { + if (!amdgpu_ras_get_fed_status(adev)) + amdgpu_virt_ready_to_reset(adev); + amdgpu_virt_wait_reset(adev); + clear_bit(AMDGPU_HOST_FLR, &reset_context->flags); r = amdgpu_virt_request_full_gpu(adev, true); - else + } else { r = amdgpu_virt_reset_gpu(adev); + } if (r) return r; + + amdgpu_ras_set_fed(adev, false); amdgpu_irq_gpu_reset_resume_helper(adev); /* some sw clean up VF needs to do before recover */ @@ -4982,7 +5090,7 @@ retry: /* Resume IP prior to SMC */ r = amdgpu_device_ip_reinit_early_sriov(adev); if (r) - goto error; + return r; amdgpu_virt_init_data_exchange(adev); @@ -4993,38 +5101,41 @@ retry: /* now we are okay to resume SMC/CP/SDMA */ r = amdgpu_device_ip_reinit_late_sriov(adev); if (r) - goto error; + return r; hive = amdgpu_get_xgmi_hive(adev); /* Update PSP FW topology after reset */ if (hive && adev->gmc.xgmi.num_physical_nodes > 1) r = amdgpu_xgmi_update_topology(hive, adev); - if (hive) amdgpu_put_xgmi_hive(hive); + if (r) + return r; - if (!r) { - r = amdgpu_ib_ring_tests(adev); - - amdgpu_amdkfd_post_reset(adev); - } + r = amdgpu_ib_ring_tests(adev); + if (r) + return r; -error: - if (!r && adev->virt.gim_feature & AMDGIM_FEATURE_GIM_FLR_VRAMLOST) { + if (adev->virt.gim_feature & AMDGIM_FEATURE_GIM_FLR_VRAMLOST) { amdgpu_inc_vram_lost(adev); r = amdgpu_device_recover_vram(adev); } - amdgpu_virt_release_full_gpu(adev, true); + if (r) + return r; - if (AMDGPU_RETRY_SRIOV_RESET(r)) { - if (retry_limit < AMDGPU_MAX_RETRY_LIMIT) { - retry_limit++; - goto retry; - } else - DRM_ERROR("GPU reset retry is beyond the retry limit\n"); - } + /* need to be called during full access so we can't do it later like + * bare-metal does. + */ + amdgpu_amdkfd_post_reset(adev); + amdgpu_virt_release_full_gpu(adev, true); - return r; + /* Aldebaran and gfx_11_0_3 support ras in SRIOV, so need resume ras during reset */ + if (amdgpu_ip_version(adev, GC_HWIP, 0) == IP_VERSION(9, 4, 2) || + amdgpu_ip_version(adev, GC_HWIP, 0) == IP_VERSION(9, 4, 3) || + amdgpu_ip_version(adev, GC_HWIP, 0) == IP_VERSION(9, 4, 4) || + amdgpu_ip_version(adev, GC_HWIP, 0) == IP_VERSION(11, 0, 3)) + amdgpu_ras_resume(adev); + return 0; } /** @@ -5115,11 +5226,14 @@ int amdgpu_device_mode1_reset(struct amdgpu_device *adev) dev_info(adev->dev, "GPU mode1 reset\n"); + /* Cache the state before bus master disable. The saved config space + * values are used in other cases like restore after mode-2 reset. + */ + amdgpu_device_cache_pci_state(adev->pdev); + /* disable BM */ pci_clear_master(adev->pdev); - amdgpu_device_cache_pci_state(adev->pdev); - if (amdgpu_dpm_is_mode1_reset_supported(adev)) { dev_info(adev->dev, "GPU smu mode1 reset\n"); ret = amdgpu_dpm_mode1_reset(adev); @@ -5257,11 +5371,23 @@ int amdgpu_do_asic_reset(struct list_head *device_list_handle, struct amdgpu_device *tmp_adev = NULL; bool need_full_reset, skip_hw_reset, vram_lost = false; int r = 0; + uint32_t i; /* Try reset handler method first */ tmp_adev = list_first_entry(device_list_handle, struct amdgpu_device, reset_list); - amdgpu_reset_reg_dumps(tmp_adev); + + if (!test_bit(AMDGPU_SKIP_COREDUMP, &reset_context->flags)) { + amdgpu_reset_reg_dumps(tmp_adev); + + dev_info(tmp_adev->dev, "Dumping IP State\n"); + /* Trigger ip dump before we reset the asic */ + for (i = 0; i < tmp_adev->num_ip_blocks; i++) + if (tmp_adev->ip_blocks[i].version->funcs->dump_ip_state) + tmp_adev->ip_blocks[i].version->funcs + ->dump_ip_state((void *)tmp_adev); + dev_info(tmp_adev->dev, "Dumping IP State Completed\n"); + } reset_context->reset_device_list = device_list_handle; r = amdgpu_reset_perform_reset(tmp_adev, reset_context); @@ -5334,7 +5460,8 @@ int amdgpu_do_asic_reset(struct list_head *device_list_handle, vram_lost = amdgpu_device_check_vram_lost(tmp_adev); - amdgpu_coredump(tmp_adev, vram_lost, reset_context); + if (!test_bit(AMDGPU_SKIP_COREDUMP, &reset_context->flags)) + amdgpu_coredump(tmp_adev, vram_lost, reset_context); if (vram_lost) { DRM_INFO("VRAM is lost due to GPU reset!\n"); @@ -5532,6 +5659,23 @@ static inline void amdgpu_device_stop_pending_resets(struct amdgpu_device *adev) } +static int amdgpu_device_health_check(struct list_head *device_list_handle) +{ + struct amdgpu_device *tmp_adev; + int ret = 0; + u32 status; + + list_for_each_entry(tmp_adev, device_list_handle, reset_list) { + pci_read_config_dword(tmp_adev->pdev, PCI_COMMAND, &status); + if (PCI_POSSIBLE_ERROR(status)) { + dev_err(tmp_adev->dev, "device lost from bus!"); + ret = -ENODEV; + } + } + + return ret; +} + /** * amdgpu_device_gpu_recover - reset the asic and recover scheduler * @@ -5555,6 +5699,7 @@ int amdgpu_device_gpu_recover(struct amdgpu_device *adev, int i, r = 0; bool need_emergency_restart = false; bool audio_suspended = false; + int retry_limit = AMDGPU_MAX_RETRY_LIMIT; /* * Special case: RAS triggered and full reset isn't supported @@ -5589,7 +5734,7 @@ int amdgpu_device_gpu_recover(struct amdgpu_device *adev, * to put adev in the 1st position. */ INIT_LIST_HEAD(&device_list); - if (!amdgpu_sriov_vf(adev) && (adev->gmc.xgmi.num_physical_nodes > 1)) { + if (!amdgpu_sriov_vf(adev) && (adev->gmc.xgmi.num_physical_nodes > 1) && hive) { list_for_each_entry(tmp_adev, &hive->device_list, gmc.xgmi.head) { list_add_tail(&tmp_adev->reset_list, &device_list); if (adev->shutdown) @@ -5603,6 +5748,12 @@ int amdgpu_device_gpu_recover(struct amdgpu_device *adev, device_list_handle = &device_list; } + if (!amdgpu_sriov_vf(adev)) { + r = amdgpu_device_health_check(device_list_handle); + if (r) + goto end_reset; + } + /* We need to lock reset domain only once both for XGMI and single device */ tmp_adev = list_first_entry(device_list_handle, struct amdgpu_device, reset_list); @@ -5630,8 +5781,7 @@ int amdgpu_device_gpu_recover(struct amdgpu_device *adev, cancel_delayed_work_sync(&tmp_adev->delayed_init_work); - if (!amdgpu_sriov_vf(tmp_adev)) - amdgpu_amdkfd_pre_reset(tmp_adev); + amdgpu_amdkfd_pre_reset(tmp_adev, reset_context); /* * Mark these ASICs to be reseted as untracked first @@ -5684,33 +5834,40 @@ retry: /* Rest of adevs pre asic reset from XGMI hive. */ r, adev_to_drm(tmp_adev)->unique); tmp_adev->asic_reset_res = r; } - - /* - * Drop all pending non scheduler resets. Scheduler resets - * were already dropped during drm_sched_stop - */ - amdgpu_device_stop_pending_resets(tmp_adev); } /* Actual ASIC resets if needed.*/ /* Host driver will handle XGMI hive reset for SRIOV */ if (amdgpu_sriov_vf(adev)) { - r = amdgpu_device_reset_sriov(adev, job ? false : true); + if (amdgpu_ras_get_fed_status(adev) || amdgpu_virt_rcvd_ras_interrupt(adev)) { + dev_dbg(adev->dev, "Detected RAS error, wait for FLR completion\n"); + amdgpu_ras_set_fed(adev, true); + set_bit(AMDGPU_HOST_FLR, &reset_context->flags); + } + + r = amdgpu_device_reset_sriov(adev, reset_context); + if (AMDGPU_RETRY_SRIOV_RESET(r) && (retry_limit--) > 0) { + amdgpu_virt_release_full_gpu(adev, true); + goto retry; + } if (r) adev->asic_reset_res = r; - - /* Aldebaran and gfx_11_0_3 support ras in SRIOV, so need resume ras during reset */ - if (amdgpu_ip_version(adev, GC_HWIP, 0) == - IP_VERSION(9, 4, 2) || - amdgpu_ip_version(adev, GC_HWIP, 0) == IP_VERSION(9, 4, 3) || - amdgpu_ip_version(adev, GC_HWIP, 0) == IP_VERSION(11, 0, 3)) - amdgpu_ras_resume(adev); } else { r = amdgpu_do_asic_reset(device_list_handle, reset_context); if (r && r == -EAGAIN) goto retry; } + list_for_each_entry(tmp_adev, device_list_handle, reset_list) { + /* + * Drop any pending non scheduler resets queued before reset is done. + * Any reset scheduled after this point would be valid. Scheduler resets + * were already dropped during drm_sched_stop and no new ones can come + * in before drm_sched_start. + */ + amdgpu_device_stop_pending_resets(tmp_adev); + } + skip_hw_reset: /* Post ASIC reset for all devs .*/ @@ -5768,6 +5925,7 @@ skip_sched_resume: reset_list); amdgpu_device_unlock_reset_domain(tmp_adev->reset_domain); +end_reset: if (hive) { mutex_unlock(&hive->hive_lock); amdgpu_put_xgmi_hive(hive); @@ -5803,13 +5961,18 @@ static void amdgpu_device_partner_bandwidth(struct amdgpu_device *adev, *speed = PCI_SPEED_UNKNOWN; *width = PCIE_LNK_WIDTH_UNKNOWN; - while ((parent = pci_upstream_bridge(parent))) { - /* skip upstream/downstream switches internal to dGPU*/ - if (parent->vendor == PCI_VENDOR_ID_ATI) - continue; - *speed = pcie_get_speed_cap(parent); - *width = pcie_get_width_cap(parent); - break; + if (amdgpu_device_pcie_dynamic_switching_supported(adev)) { + while ((parent = pci_upstream_bridge(parent))) { + /* skip upstream/downstream switches internal to dGPU*/ + if (parent->vendor == PCI_VENDOR_ID_ATI) + continue; + *speed = pcie_get_speed_cap(parent); + *width = pcie_get_width_cap(parent); + break; + } + } else { + /* use the current speeds rather than max if switching is not supported */ + pcie_bandwidth_available(adev->pdev, NULL, speed, width); } } @@ -6024,7 +6187,7 @@ int amdgpu_device_baco_exit(struct drm_device *dev) adev->nbio.funcs->enable_doorbell_interrupt) adev->nbio.funcs->enable_doorbell_interrupt(adev, true); - if (amdgpu_passthrough(adev) && + if (amdgpu_passthrough(adev) && adev->nbio.funcs && adev->nbio.funcs->clear_doorbell_interrupt) adev->nbio.funcs->clear_doorbell_interrupt(adev); @@ -6124,19 +6287,11 @@ pci_ers_result_t amdgpu_pci_slot_reset(struct pci_dev *pdev) struct amdgpu_reset_context reset_context; u32 memsize; struct list_head device_list; - struct amdgpu_hive_info *hive; - int hive_ras_recovery = 0; - struct amdgpu_ras *ras; /* PCI error slot reset should be skipped During RAS recovery */ - hive = amdgpu_get_xgmi_hive(adev); - if (hive) { - hive_ras_recovery = atomic_read(&hive->ras_recovery); - amdgpu_put_xgmi_hive(hive); - } - ras = amdgpu_ras_get_context(adev); - if ((amdgpu_ip_version(adev, GC_HWIP, 0) == IP_VERSION(9, 4, 3)) && - ras && (atomic_read(&ras->in_recovery) || hive_ras_recovery)) + if ((amdgpu_ip_version(adev, GC_HWIP, 0) == IP_VERSION(9, 4, 3) || + amdgpu_ip_version(adev, GC_HWIP, 0) == IP_VERSION(9, 4, 4)) && + amdgpu_ras_in_recovery(adev)) return PCI_ERS_RESULT_RECOVERED; DRM_INFO("PCI error: slot reset callback!!\n"); @@ -6379,6 +6534,22 @@ void amdgpu_device_pcie_port_wreg(struct amdgpu_device *adev, } /** + * amdgpu_device_get_gang - return a reference to the current gang + * @adev: amdgpu_device pointer + * + * Returns: A new reference to the current gang leader. + */ +struct dma_fence *amdgpu_device_get_gang(struct amdgpu_device *adev) +{ + struct dma_fence *fence; + + rcu_read_lock(); + fence = dma_fence_get_rcu_safe(&adev->gang_submit); + rcu_read_unlock(); + return fence; +} + +/** * amdgpu_device_switch_gang - switch to a new gang * @adev: amdgpu_device pointer * @gang: the gang to switch to @@ -6394,10 +6565,7 @@ struct dma_fence *amdgpu_device_switch_gang(struct amdgpu_device *adev, do { dma_fence_put(old); - rcu_read_lock(); - old = dma_fence_get_rcu_safe(&adev->gang_submit); - rcu_read_unlock(); - + old = amdgpu_device_get_gang(adev); if (old == gang) break; diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_df.h b/drivers/gpu/drm/amd/amdgpu/amdgpu_df.h index 1538b2dbfff1..eb605e79ae0e 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_df.h +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_df.h @@ -33,6 +33,7 @@ struct amdgpu_df_hash_status { struct amdgpu_df_funcs { void (*sw_init)(struct amdgpu_device *adev); void (*sw_fini)(struct amdgpu_device *adev); + void (*hw_init)(struct amdgpu_device *adev); void (*enable_broadcast_mode)(struct amdgpu_device *adev, bool enable); u32 (*get_fb_channel_number)(struct amdgpu_device *adev); diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_discovery.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_discovery.c index a07e4b87d4ca..ac108fca64fe 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_discovery.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_discovery.c @@ -37,6 +37,7 @@ #include "df_v3_6.h" #include "df_v4_3.h" #include "df_v4_6_2.h" +#include "df_v4_15.h" #include "nbio_v6_1.h" #include "nbio_v7_0.h" #include "nbio_v7_4.h" @@ -55,6 +56,7 @@ #include "smuio_v9_0.h" #include "gmc_v10_0.h" #include "gmc_v11_0.h" +#include "gmc_v12_0.h" #include "gfxhub_v2_0.h" #include "mmhub_v2_0.h" #include "nbio_v2_3.h" @@ -68,15 +70,18 @@ #include "hdp_v7_0.h" #include "nv.h" #include "soc21.h" +#include "soc24.h" #include "navi10_ih.h" #include "ih_v6_0.h" #include "ih_v6_1.h" #include "ih_v7_0.h" #include "gfx_v10_0.h" #include "gfx_v11_0.h" +#include "gfx_v12_0.h" #include "sdma_v5_0.h" #include "sdma_v5_2.h" #include "sdma_v6_0.h" +#include "sdma_v7_0.h" #include "lsdma_v6_0.h" #include "lsdma_v7_0.h" #include "vcn_v2_0.h" @@ -90,17 +95,21 @@ #include "vcn_v4_0_5.h" #include "jpeg_v4_0_5.h" #include "amdgpu_vkms.h" -#include "mes_v10_1.h" #include "mes_v11_0.h" +#include "mes_v12_0.h" #include "smuio_v11_0.h" #include "smuio_v11_0_6.h" #include "smuio_v13_0.h" #include "smuio_v13_0_3.h" #include "smuio_v13_0_6.h" +#include "smuio_v14_0_2.h" #include "vcn_v5_0_0.h" #include "jpeg_v5_0_0.h" #include "amdgpu_vpe.h" +#if defined(CONFIG_DRM_AMD_ISP) +#include "amdgpu_isp.h" +#endif #define FIRMWARE_IP_DISCOVERY "amdgpu/ip_discovery.bin" MODULE_FIRMWARE(FIRMWARE_IP_DISCOVERY); @@ -220,6 +229,7 @@ static int hw_id_map[MAX_HWIP] = { [DCI_HWIP] = DCI_HWID, [PCIE_HWIP] = PCIE_HWID, [VPE_HWIP] = VPE_HWID, + [ISP_HWIP] = ISP_HWID, }; static int amdgpu_discovery_read_binary_from_sysmem(struct amdgpu_device *adev, uint8_t *binary) @@ -245,6 +255,9 @@ static int amdgpu_discovery_read_binary_from_sysmem(struct amdgpu_device *adev, return -ENOENT; } +#define IP_DISCOVERY_V2 2 +#define IP_DISCOVERY_V4 4 + static int amdgpu_discovery_read_binary_from_mem(struct amdgpu_device *adev, uint8_t *binary) { @@ -252,21 +265,23 @@ static int amdgpu_discovery_read_binary_from_mem(struct amdgpu_device *adev, u32 msg; int i, ret = 0; - /* It can take up to a second for IFWI init to complete on some dGPUs, - * but generally it should be in the 60-100ms range. Normally this starts - * as soon as the device gets power so by the time the OS loads this has long - * completed. However, when a card is hotplugged via e.g., USB4, we need to - * wait for this to complete. Once the C2PMSG is updated, we can - * continue. - */ - if (dev_is_removable(&adev->pdev->dev)) { + if (!amdgpu_sriov_vf(adev)) { + /* It can take up to a second for IFWI init to complete on some dGPUs, + * but generally it should be in the 60-100ms range. Normally this starts + * as soon as the device gets power so by the time the OS loads this has long + * completed. However, when a card is hotplugged via e.g., USB4, we need to + * wait for this to complete. Once the C2PMSG is updated, we can + * continue. + */ + for (i = 0; i < 1000; i++) { msg = RREG32(mmMP0_SMN_C2PMSG_33); if (msg & 0x80000000) break; - msleep(1); + usleep_range(1000, 1100); } } + vram_size = (uint64_t)RREG32(mmRCC_CONFIG_MEMSIZE) << 20; if (vram_size) { @@ -358,6 +373,35 @@ static void amdgpu_discovery_harvest_config_quirk(struct amdgpu_device *adev) } } +static int amdgpu_discovery_verify_npsinfo(struct amdgpu_device *adev, + struct binary_header *bhdr) +{ + struct table_info *info; + uint16_t checksum; + uint16_t offset; + + info = &bhdr->table_list[NPS_INFO]; + offset = le16_to_cpu(info->offset); + checksum = le16_to_cpu(info->checksum); + + struct nps_info_header *nhdr = + (struct nps_info_header *)(adev->mman.discovery_bin + offset); + + if (le32_to_cpu(nhdr->table_id) != NPS_INFO_TABLE_ID) { + dev_dbg(adev->dev, "invalid ip discovery nps info table id\n"); + return -EINVAL; + } + + if (!amdgpu_discovery_verify_checksum(adev->mman.discovery_bin + offset, + le32_to_cpu(nhdr->size_bytes), + checksum)) { + dev_dbg(adev->dev, "invalid nps info data table checksum\n"); + return -EINVAL; + } + + return 0; +} + static int amdgpu_discovery_init(struct amdgpu_device *adev) { struct table_info *info; @@ -672,6 +716,12 @@ static void amdgpu_discovery_read_from_harvest_table(struct amdgpu_device *adev, adev->sdma.sdma_mask &= ~(1U << harvest_info->list[i].number_instance); break; +#if defined(CONFIG_DRM_AMD_ISP) + case ISP_HWID: + adev->isp.harvest_config |= + ~(1U << harvest_info->list[i].number_instance); + break; +#endif default: break; } @@ -1417,7 +1467,8 @@ static void amdgpu_discovery_harvest_ip(struct amdgpu_device *adev) * harvest configuration. */ if (amdgpu_ip_version(adev, GC_HWIP, 0) < IP_VERSION(10, 2, 0) && - amdgpu_ip_version(adev, GC_HWIP, 0) != IP_VERSION(9, 4, 3)) { + amdgpu_ip_version(adev, GC_HWIP, 0) != IP_VERSION(9, 4, 3) && + amdgpu_ip_version(adev, GC_HWIP, 0) != IP_VERSION(9, 4, 4)) { if ((adev->pdev->device == 0x731E && (adev->pdev->revision == 0xC6 || adev->pdev->revision == 0xC7)) || @@ -1591,7 +1642,7 @@ static int amdgpu_discovery_get_mall_info(struct amdgpu_device *adev) break; case 2: mall_size_per_umc = le32_to_cpu(mall_info->v2.mall_size_per_umc); - adev->gmc.mall_size = mall_size_per_umc * adev->gmc.num_umc; + adev->gmc.mall_size = (uint64_t)mall_size_per_umc * adev->gmc.num_umc; break; default: dev_err(adev->dev, @@ -1657,6 +1708,69 @@ static int amdgpu_discovery_get_vcn_info(struct amdgpu_device *adev) return 0; } +union nps_info { + struct nps_info_v1_0 v1; +}; + +int amdgpu_discovery_get_nps_info(struct amdgpu_device *adev, + uint32_t *nps_type, + struct amdgpu_gmc_memrange **ranges, + int *range_cnt) +{ + struct amdgpu_gmc_memrange *mem_ranges; + struct binary_header *bhdr; + union nps_info *nps_info; + u16 offset; + int i; + + if (!nps_type || !range_cnt || !ranges) + return -EINVAL; + + if (!adev->mman.discovery_bin) { + dev_err(adev->dev, + "fetch mem range failed, ip discovery uninitialized\n"); + return -EINVAL; + } + + bhdr = (struct binary_header *)adev->mman.discovery_bin; + offset = le16_to_cpu(bhdr->table_list[NPS_INFO].offset); + + if (!offset) + return -ENOENT; + + /* If verification fails, return as if NPS table doesn't exist */ + if (amdgpu_discovery_verify_npsinfo(adev, bhdr)) + return -ENOENT; + + nps_info = (union nps_info *)(adev->mman.discovery_bin + offset); + + switch (le16_to_cpu(nps_info->v1.header.version_major)) { + case 1: + *nps_type = nps_info->v1.nps_type; + *range_cnt = nps_info->v1.count; + mem_ranges = kvzalloc( + *range_cnt * sizeof(struct amdgpu_gmc_memrange), + GFP_KERNEL); + for (i = 0; i < *range_cnt; i++) { + mem_ranges[i].base_address = + nps_info->v1.instance_info[i].base_address; + mem_ranges[i].limit_address = + nps_info->v1.instance_info[i].limit_address; + mem_ranges[i].nid_mask = -1; + mem_ranges[i].flags = 0; + } + *ranges = mem_ranges; + break; + default: + dev_err(adev->dev, "Unhandled NPS info table %d.%d\n", + le16_to_cpu(nps_info->v1.header.version_major), + le16_to_cpu(nps_info->v1.header.version_minor)); + return -EINVAL; + } + + return 0; +} + static int amdgpu_discovery_set_common_ip_blocks(struct amdgpu_device *adev) { /* what IP to use for this? */ @@ -1670,6 +1784,7 @@ static int amdgpu_discovery_set_common_ip_blocks(struct amdgpu_device *adev) case IP_VERSION(9, 4, 1): case IP_VERSION(9, 4, 2): case IP_VERSION(9, 4, 3): + case IP_VERSION(9, 4, 4): amdgpu_device_ip_block_add(adev, &vega10_common_ip_block); break; case IP_VERSION(10, 1, 10): @@ -1694,8 +1809,13 @@ static int amdgpu_discovery_set_common_ip_blocks(struct amdgpu_device *adev) case IP_VERSION(11, 0, 4): case IP_VERSION(11, 5, 0): case IP_VERSION(11, 5, 1): + case IP_VERSION(11, 5, 2): amdgpu_device_ip_block_add(adev, &soc21_common_ip_block); break; + case IP_VERSION(12, 0, 0): + case IP_VERSION(12, 0, 1): + amdgpu_device_ip_block_add(adev, &soc24_common_ip_block); + break; default: dev_err(adev->dev, "Failed to add common ip block(GC_HWIP:0x%x)\n", @@ -1718,6 +1838,7 @@ static int amdgpu_discovery_set_gmc_ip_blocks(struct amdgpu_device *adev) case IP_VERSION(9, 4, 1): case IP_VERSION(9, 4, 2): case IP_VERSION(9, 4, 3): + case IP_VERSION(9, 4, 4): amdgpu_device_ip_block_add(adev, &gmc_v9_0_ip_block); break; case IP_VERSION(10, 1, 10): @@ -1742,8 +1863,13 @@ static int amdgpu_discovery_set_gmc_ip_blocks(struct amdgpu_device *adev) case IP_VERSION(11, 0, 4): case IP_VERSION(11, 5, 0): case IP_VERSION(11, 5, 1): + case IP_VERSION(11, 5, 2): amdgpu_device_ip_block_add(adev, &gmc_v11_0_ip_block); break; + case IP_VERSION(12, 0, 0): + case IP_VERSION(12, 0, 1): + amdgpu_device_ip_block_add(adev, &gmc_v12_0_ip_block); + break; default: dev_err(adev->dev, "Failed to add gmc ip block(GC_HWIP:0x%x)\n", amdgpu_ip_version(adev, GC_HWIP, 0)); @@ -1766,6 +1892,7 @@ static int amdgpu_discovery_set_ih_ip_blocks(struct amdgpu_device *adev) case IP_VERSION(4, 2, 1): case IP_VERSION(4, 4, 0): case IP_VERSION(4, 4, 2): + case IP_VERSION(4, 4, 5): amdgpu_device_ip_block_add(adev, &vega20_ih_ip_block); break; case IP_VERSION(5, 0, 0): @@ -1835,8 +1962,10 @@ static int amdgpu_discovery_set_psp_ip_blocks(struct amdgpu_device *adev) case IP_VERSION(13, 0, 8): case IP_VERSION(13, 0, 10): case IP_VERSION(13, 0, 11): + case IP_VERSION(13, 0, 14): case IP_VERSION(14, 0, 0): case IP_VERSION(14, 0, 1): + case IP_VERSION(14, 0, 4): amdgpu_device_ip_block_add(adev, &psp_v13_0_ip_block); break; case IP_VERSION(13, 0, 4): @@ -1893,9 +2022,14 @@ static int amdgpu_discovery_set_smu_ip_blocks(struct amdgpu_device *adev) case IP_VERSION(13, 0, 8): case IP_VERSION(13, 0, 10): case IP_VERSION(13, 0, 11): + case IP_VERSION(13, 0, 14): amdgpu_device_ip_block_add(adev, &smu_v13_0_ip_block); break; case IP_VERSION(14, 0, 0): + case IP_VERSION(14, 0, 1): + case IP_VERSION(14, 0, 2): + case IP_VERSION(14, 0, 3): + case IP_VERSION(14, 0, 4): amdgpu_device_ip_block_add(adev, &smu_v14_0_ip_block); break; default: @@ -1947,6 +2081,11 @@ static int amdgpu_discovery_set_display_ip_blocks(struct amdgpu_device *adev) case IP_VERSION(3, 2, 1): case IP_VERSION(3, 5, 0): case IP_VERSION(3, 5, 1): + case IP_VERSION(4, 1, 0): + /* TODO: Fix IP version. DC code expects version 4.0.1 */ + if (adev->ip_versions[DCE_HWIP][0] == IP_VERSION(4, 1, 0)) + adev->ip_versions[DCE_HWIP][0] = IP_VERSION(4, 0, 1); + if (amdgpu_sriov_vf(adev)) amdgpu_discovery_set_sriov_display(adev); else @@ -1993,6 +2132,7 @@ static int amdgpu_discovery_set_gc_ip_blocks(struct amdgpu_device *adev) amdgpu_device_ip_block_add(adev, &gfx_v9_0_ip_block); break; case IP_VERSION(9, 4, 3): + case IP_VERSION(9, 4, 4): amdgpu_device_ip_block_add(adev, &gfx_v9_4_3_ip_block); break; case IP_VERSION(10, 1, 10): @@ -2017,8 +2157,13 @@ static int amdgpu_discovery_set_gc_ip_blocks(struct amdgpu_device *adev) case IP_VERSION(11, 0, 4): case IP_VERSION(11, 5, 0): case IP_VERSION(11, 5, 1): + case IP_VERSION(11, 5, 2): amdgpu_device_ip_block_add(adev, &gfx_v11_0_ip_block); break; + case IP_VERSION(12, 0, 0): + case IP_VERSION(12, 0, 1): + amdgpu_device_ip_block_add(adev, &gfx_v12_0_ip_block); + break; default: dev_err(adev->dev, "Failed to add gfx ip block(GC_HWIP:0x%x)\n", amdgpu_ip_version(adev, GC_HWIP, 0)); @@ -2041,6 +2186,7 @@ static int amdgpu_discovery_set_sdma_ip_blocks(struct amdgpu_device *adev) amdgpu_device_ip_block_add(adev, &sdma_v4_0_ip_block); break; case IP_VERSION(4, 4, 2): + case IP_VERSION(4, 4, 5): amdgpu_device_ip_block_add(adev, &sdma_v4_4_2_ip_block); break; case IP_VERSION(5, 0, 0): @@ -2065,8 +2211,13 @@ static int amdgpu_discovery_set_sdma_ip_blocks(struct amdgpu_device *adev) case IP_VERSION(6, 0, 3): case IP_VERSION(6, 1, 0): case IP_VERSION(6, 1, 1): + case IP_VERSION(6, 1, 2): amdgpu_device_ip_block_add(adev, &sdma_v6_0_ip_block); break; + case IP_VERSION(7, 0, 0): + case IP_VERSION(7, 0, 1): + amdgpu_device_ip_block_add(adev, &sdma_v7_0_ip_block); + break; default: dev_err(adev->dev, "Failed to add sdma ip block(SDMA0_HWIP:0x%x)\n", @@ -2172,25 +2323,6 @@ static int amdgpu_discovery_set_mm_ip_blocks(struct amdgpu_device *adev) static int amdgpu_discovery_set_mes_ip_blocks(struct amdgpu_device *adev) { switch (amdgpu_ip_version(adev, GC_HWIP, 0)) { - case IP_VERSION(10, 1, 10): - case IP_VERSION(10, 1, 1): - case IP_VERSION(10, 1, 2): - case IP_VERSION(10, 1, 3): - case IP_VERSION(10, 1, 4): - case IP_VERSION(10, 3, 0): - case IP_VERSION(10, 3, 1): - case IP_VERSION(10, 3, 2): - case IP_VERSION(10, 3, 3): - case IP_VERSION(10, 3, 4): - case IP_VERSION(10, 3, 5): - case IP_VERSION(10, 3, 6): - if (amdgpu_mes) { - amdgpu_device_ip_block_add(adev, &mes_v10_1_ip_block); - adev->enable_mes = true; - if (amdgpu_mes_kiq) - adev->enable_mes_kiq = true; - } - break; case IP_VERSION(11, 0, 0): case IP_VERSION(11, 0, 1): case IP_VERSION(11, 0, 2): @@ -2198,10 +2330,19 @@ static int amdgpu_discovery_set_mes_ip_blocks(struct amdgpu_device *adev) case IP_VERSION(11, 0, 4): case IP_VERSION(11, 5, 0): case IP_VERSION(11, 5, 1): + case IP_VERSION(11, 5, 2): amdgpu_device_ip_block_add(adev, &mes_v11_0_ip_block); adev->enable_mes = true; adev->enable_mes_kiq = true; break; + case IP_VERSION(12, 0, 0): + case IP_VERSION(12, 0, 1): + amdgpu_device_ip_block_add(adev, &mes_v12_0_ip_block); + adev->enable_mes = true; + adev->enable_mes_kiq = true; + if (amdgpu_uni_mes) + adev->enable_uni_mes = true; + break; default: break; } @@ -2212,6 +2353,7 @@ static void amdgpu_discovery_init_soc_config(struct amdgpu_device *adev) { switch (amdgpu_ip_version(adev, GC_HWIP, 0)) { case IP_VERSION(9, 4, 3): + case IP_VERSION(9, 4, 4): aqua_vanjaram_init_soc_config(adev); break; default: @@ -2224,6 +2366,7 @@ static int amdgpu_discovery_set_vpe_ip_blocks(struct amdgpu_device *adev) switch (amdgpu_ip_version(adev, VPE_HWIP, 0)) { case IP_VERSION(6, 1, 0): case IP_VERSION(6, 1, 1): + case IP_VERSION(6, 1, 3): amdgpu_device_ip_block_add(adev, &vpe_v6_1_ip_block); break; default: @@ -2237,6 +2380,7 @@ static int amdgpu_discovery_set_umsch_mm_ip_blocks(struct amdgpu_device *adev) { switch (amdgpu_ip_version(adev, VCN_HWIP, 0)) { case IP_VERSION(4, 0, 5): + case IP_VERSION(4, 0, 6): if (amdgpu_umsch_mm & 0x1) { amdgpu_device_ip_block_add(adev, &umsch_mm_v4_0_ip_block); adev->enable_umsch_mm = true; @@ -2249,6 +2393,24 @@ static int amdgpu_discovery_set_umsch_mm_ip_blocks(struct amdgpu_device *adev) return 0; } +static int amdgpu_discovery_set_isp_ip_blocks(struct amdgpu_device *adev) +{ +#if defined(CONFIG_DRM_AMD_ISP) + switch (amdgpu_ip_version(adev, ISP_HWIP, 0)) { + case IP_VERSION(4, 1, 0): + amdgpu_device_ip_block_add(adev, &isp_v4_1_0_ip_block); + break; + case IP_VERSION(4, 1, 1): + amdgpu_device_ip_block_add(adev, &isp_v4_1_1_ip_block); + break; + default: + break; + } +#endif + + return 0; +} + int amdgpu_discovery_set_ip_blocks(struct amdgpu_device *adev) { int r; @@ -2436,6 +2598,7 @@ int amdgpu_discovery_set_ip_blocks(struct amdgpu_device *adev) case IP_VERSION(9, 4, 1): case IP_VERSION(9, 4, 2): case IP_VERSION(9, 4, 3): + case IP_VERSION(9, 4, 4): adev->family = AMDGPU_FAMILY_AI; break; case IP_VERSION(9, 1, 0): @@ -2478,8 +2641,13 @@ int amdgpu_discovery_set_ip_blocks(struct amdgpu_device *adev) break; case IP_VERSION(11, 5, 0): case IP_VERSION(11, 5, 1): + case IP_VERSION(11, 5, 2): adev->family = AMDGPU_FAMILY_GC_11_5_0; break; + case IP_VERSION(12, 0, 0): + case IP_VERSION(12, 0, 1): + adev->family = AMDGPU_FAMILY_GC_12_0_0; + break; default: return -EINVAL; } @@ -2498,6 +2666,7 @@ int amdgpu_discovery_set_ip_blocks(struct amdgpu_device *adev) case IP_VERSION(11, 0, 4): case IP_VERSION(11, 5, 0): case IP_VERSION(11, 5, 1): + case IP_VERSION(11, 5, 2): adev->flags |= AMD_IS_APU; break; default: @@ -2507,7 +2676,8 @@ int amdgpu_discovery_set_ip_blocks(struct amdgpu_device *adev) if (amdgpu_ip_version(adev, XGMI_HWIP, 0) == IP_VERSION(4, 8, 0)) adev->gmc.xgmi.supported = true; - if (amdgpu_ip_version(adev, GC_HWIP, 0) == IP_VERSION(9, 4, 3)) + if (amdgpu_ip_version(adev, GC_HWIP, 0) == IP_VERSION(9, 4, 3) || + amdgpu_ip_version(adev, GC_HWIP, 0) == IP_VERSION(9, 4, 4)) adev->ip_versions[XGMI_HWIP][0] = IP_VERSION(6, 4, 0); /* set NBIO version */ @@ -2535,6 +2705,7 @@ int amdgpu_discovery_set_ip_blocks(struct amdgpu_device *adev) break; case IP_VERSION(7, 11, 0): case IP_VERSION(7, 11, 1): + case IP_VERSION(7, 11, 3): adev->nbio.funcs = &nbio_v7_11_funcs; adev->nbio.hdp_flush_reg = &nbio_v7_11_hdp_flush_reg; break; @@ -2588,6 +2759,7 @@ int amdgpu_discovery_set_ip_blocks(struct amdgpu_device *adev) case IP_VERSION(4, 2, 1): case IP_VERSION(4, 4, 0): case IP_VERSION(4, 4, 2): + case IP_VERSION(4, 4, 5): adev->hdp.funcs = &hdp_v4_0_funcs; break; case IP_VERSION(5, 0, 0): @@ -2632,6 +2804,10 @@ int amdgpu_discovery_set_ip_blocks(struct amdgpu_device *adev) case IP_VERSION(4, 6, 2): adev->df.funcs = &df_v4_6_2_funcs; break; + case IP_VERSION(4, 15, 0): + case IP_VERSION(4, 15, 1): + adev->df.funcs = &df_v4_15_funcs; + break; default: break; } @@ -2676,6 +2852,9 @@ int amdgpu_discovery_set_ip_blocks(struct amdgpu_device *adev) case IP_VERSION(14, 0, 1): adev->smuio.funcs = &smuio_v13_0_6_funcs; break; + case IP_VERSION(14, 0, 2): + adev->smuio.funcs = &smuio_v14_0_2_funcs; + break; default: break; } @@ -2765,6 +2944,9 @@ int amdgpu_discovery_set_ip_blocks(struct amdgpu_device *adev) if (r) return r; + r = amdgpu_discovery_set_isp_ip_blocks(adev); + if (r) + return r; return 0; } diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_discovery.h b/drivers/gpu/drm/amd/amdgpu/amdgpu_discovery.h index 4d03cd5b3410..f5d36525ec3e 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_discovery.h +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_discovery.h @@ -30,4 +30,9 @@ void amdgpu_discovery_fini(struct amdgpu_device *adev); int amdgpu_discovery_set_ip_blocks(struct amdgpu_device *adev); +int amdgpu_discovery_get_nps_info(struct amdgpu_device *adev, + uint32_t *nps_type, + struct amdgpu_gmc_memrange **ranges, + int *range_cnt); + #endif /* __AMDGPU_DISCOVERY__ */ diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_display.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_display.c index 3ecc7ef95172..092ec11258cd 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_display.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_display.c @@ -654,6 +654,10 @@ amdgpu_lookup_format_info(u32 format, uint64_t modifier) if (!IS_AMD_FMT_MOD(modifier)) return NULL; + if (AMD_FMT_MOD_GET(TILE_VERSION, modifier) < AMD_FMT_MOD_TILE_VER_GFX9 || + AMD_FMT_MOD_GET(TILE_VERSION, modifier) >= AMD_FMT_MOD_TILE_VER_GFX12) + return NULL; + if (AMD_FMT_MOD_GET(DCC_RETILE, modifier)) return lookup_format_info(dcc_retile_formats, ARRAY_SIZE(dcc_retile_formats), @@ -718,6 +722,30 @@ extract_render_dcc_offset(struct amdgpu_device *adev, return 0; } +static int convert_tiling_flags_to_modifier_gfx12(struct amdgpu_framebuffer *afb) +{ + u64 modifier = 0; + int swizzle_mode = AMDGPU_TILING_GET(afb->tiling_flags, GFX12_SWIZZLE_MODE); + + if (!swizzle_mode) { + modifier = DRM_FORMAT_MOD_LINEAR; + } else { + int max_comp_block = + AMDGPU_TILING_GET(afb->tiling_flags, GFX12_DCC_MAX_COMPRESSED_BLOCK); + + modifier = + AMD_FMT_MOD | + AMD_FMT_MOD_SET(TILE_VERSION, AMD_FMT_MOD_TILE_VER_GFX12) | + AMD_FMT_MOD_SET(TILE, swizzle_mode) | + AMD_FMT_MOD_SET(DCC, afb->gfx12_dcc) | + AMD_FMT_MOD_SET(DCC_MAX_COMPRESSED_BLOCK, max_comp_block); + } + + afb->base.modifier = modifier; + afb->base.flags |= DRM_MODE_FB_MODIFIERS; + return 0; +} + static int convert_tiling_flags_to_modifier(struct amdgpu_framebuffer *afb) { struct amdgpu_device *adev = drm_to_adev(afb->base.dev); @@ -917,8 +945,7 @@ static int check_tiling_flags_gfx6(struct amdgpu_framebuffer *afb) { u64 micro_tile_mode; - /* Zero swizzle mode means linear */ - if (AMDGPU_TILING_GET(afb->tiling_flags, SWIZZLE_MODE) == 0) + if (AMDGPU_TILING_GET(afb->tiling_flags, ARRAY_MODE) == 1) /* LINEAR_ALIGNED */ return 0; micro_tile_mode = AMDGPU_TILING_GET(afb->tiling_flags, MICRO_TILE_MODE); @@ -1042,6 +1069,30 @@ static int amdgpu_display_verify_sizes(struct amdgpu_framebuffer *rfb) block_width = 256 / format_info->cpp[i]; block_height = 1; block_size_log2 = 8; + } else if (AMD_FMT_MOD_GET(TILE_VERSION, modifier) >= AMD_FMT_MOD_TILE_VER_GFX12) { + int swizzle = AMD_FMT_MOD_GET(TILE, modifier); + + switch (swizzle) { + case AMD_FMT_MOD_TILE_GFX12_256B_2D: + block_size_log2 = 8; + break; + case AMD_FMT_MOD_TILE_GFX12_4K_2D: + block_size_log2 = 12; + break; + case AMD_FMT_MOD_TILE_GFX12_64K_2D: + block_size_log2 = 16; + break; + case AMD_FMT_MOD_TILE_GFX12_256K_2D: + block_size_log2 = 18; + break; + default: + drm_dbg_kms(rfb->base.dev, + "Gfx12 swizzle mode with unknown block size: %d\n", swizzle); + return -EINVAL; + } + + get_block_dimensions(block_size_log2, format_info->cpp[i], + &block_width, &block_height); } else { int swizzle = AMD_FMT_MOD_GET(TILE, modifier); @@ -1077,7 +1128,8 @@ static int amdgpu_display_verify_sizes(struct amdgpu_framebuffer *rfb) return ret; } - if (AMD_FMT_MOD_GET(DCC, modifier)) { + if (AMD_FMT_MOD_GET(TILE_VERSION, modifier) <= AMD_FMT_MOD_TILE_VER_GFX11 && + AMD_FMT_MOD_GET(DCC, modifier)) { if (AMD_FMT_MOD_GET(DCC_RETILE, modifier)) { block_size_log2 = get_dcc_block_size(modifier, false, false); get_block_dimensions(block_size_log2 + 8, format_info->cpp[0], @@ -1107,7 +1159,8 @@ static int amdgpu_display_verify_sizes(struct amdgpu_framebuffer *rfb) } static int amdgpu_display_get_fb_info(const struct amdgpu_framebuffer *amdgpu_fb, - uint64_t *tiling_flags, bool *tmz_surface) + uint64_t *tiling_flags, bool *tmz_surface, + bool *gfx12_dcc) { struct amdgpu_bo *rbo; int r; @@ -1115,6 +1168,7 @@ static int amdgpu_display_get_fb_info(const struct amdgpu_framebuffer *amdgpu_fb if (!amdgpu_fb) { *tiling_flags = 0; *tmz_surface = false; + *gfx12_dcc = false; return 0; } @@ -1128,11 +1182,9 @@ static int amdgpu_display_get_fb_info(const struct amdgpu_framebuffer *amdgpu_fb return r; } - if (tiling_flags) - amdgpu_bo_get_tiling_flags(rbo, tiling_flags); - - if (tmz_surface) - *tmz_surface = amdgpu_bo_encrypted(rbo); + amdgpu_bo_get_tiling_flags(rbo, tiling_flags); + *tmz_surface = amdgpu_bo_encrypted(rbo); + *gfx12_dcc = rbo->flags & AMDGPU_GEM_CREATE_GFX12_DCC; amdgpu_bo_unreserve(rbo); @@ -1201,7 +1253,8 @@ static int amdgpu_display_framebuffer_init(struct drm_device *dev, } } - ret = amdgpu_display_get_fb_info(rfb, &rfb->tiling_flags, &rfb->tmz_surface); + ret = amdgpu_display_get_fb_info(rfb, &rfb->tiling_flags, &rfb->tmz_surface, + &rfb->gfx12_dcc); if (ret) return ret; @@ -1215,7 +1268,11 @@ static int amdgpu_display_framebuffer_init(struct drm_device *dev, if (!dev->mode_config.fb_modifiers_not_supported && !(rfb->base.flags & DRM_MODE_FB_MODIFIERS)) { - ret = convert_tiling_flags_to_modifier(rfb); + if (amdgpu_ip_version(adev, GC_HWIP, 0) >= IP_VERSION(12, 0, 0)) + ret = convert_tiling_flags_to_modifier_gfx12(rfb); + else + ret = convert_tiling_flags_to_modifier(rfb); + if (ret) { drm_dbg_kms(dev, "Failed to convert tiling flags 0x%llX to a modifier", rfb->tiling_flags); diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_dma_buf.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_dma_buf.c index 055ba2ea4c12..8e81a83d37d8 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_dma_buf.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_dma_buf.c @@ -41,8 +41,6 @@ #include <linux/dma-buf.h> #include <linux/dma-fence-array.h> #include <linux/pci-p2pdma.h> -#include <linux/pm_runtime.h> -#include "amdgpu_trace.h" /** * amdgpu_dma_buf_attach - &dma_buf_ops.attach implementation @@ -58,42 +56,11 @@ static int amdgpu_dma_buf_attach(struct dma_buf *dmabuf, struct drm_gem_object *obj = dmabuf->priv; struct amdgpu_bo *bo = gem_to_amdgpu_bo(obj); struct amdgpu_device *adev = amdgpu_ttm_adev(bo->tbo.bdev); - int r; if (pci_p2pdma_distance(adev->pdev, attach->dev, false) < 0) attach->peer2peer = false; - r = pm_runtime_get_sync(adev_to_drm(adev)->dev); - trace_amdgpu_runpm_reference_dumps(1, __func__); - if (r < 0) - goto out; - return 0; - -out: - pm_runtime_put_autosuspend(adev_to_drm(adev)->dev); - trace_amdgpu_runpm_reference_dumps(0, __func__); - return r; -} - -/** - * amdgpu_dma_buf_detach - &dma_buf_ops.detach implementation - * - * @dmabuf: DMA-buf where we remove the attachment from - * @attach: the attachment to remove - * - * Called when an attachment is removed from the DMA-buf. - */ -static void amdgpu_dma_buf_detach(struct dma_buf *dmabuf, - struct dma_buf_attachment *attach) -{ - struct drm_gem_object *obj = dmabuf->priv; - struct amdgpu_bo *bo = gem_to_amdgpu_bo(obj); - struct amdgpu_device *adev = amdgpu_ttm_adev(bo->tbo.bdev); - - pm_runtime_mark_last_busy(adev_to_drm(adev)->dev); - pm_runtime_put_autosuspend(adev_to_drm(adev)->dev); - trace_amdgpu_runpm_reference_dumps(0, __func__); } /** @@ -165,8 +132,7 @@ static struct sg_table *amdgpu_dma_buf_map(struct dma_buf_attachment *attach, if (r) return ERR_PTR(r); - } else if (!(amdgpu_mem_type_to_domain(bo->tbo.resource->mem_type) & - AMDGPU_GEM_DOMAIN_GTT)) { + } else if (bo->tbo.resource->mem_type != TTM_PL_TT) { return ERR_PTR(-EBUSY); } @@ -267,7 +233,6 @@ static int amdgpu_dma_buf_begin_cpu_access(struct dma_buf *dma_buf, const struct dma_buf_ops amdgpu_dmabuf_ops = { .attach = amdgpu_dma_buf_attach, - .detach = amdgpu_dma_buf_detach, .pin = amdgpu_dma_buf_pin, .unpin = amdgpu_dma_buf_unpin, .map_dma_buf = amdgpu_dma_buf_map, diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_drv.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_drv.c index 80b9642f2bc4..094498a0964b 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_drv.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_drv.c @@ -24,7 +24,7 @@ #include <drm/amdgpu_drm.h> #include <drm/drm_drv.h> -#include <drm/drm_fbdev_generic.h> +#include <drm/drm_fbdev_ttm.h> #include <drm/drm_gem.h> #include <drm/drm_managed.h> #include <drm/drm_pciids.h> @@ -116,9 +116,10 @@ * - 3.55.0 - Add AMDGPU_INFO_GPUVM_FAULT query * - 3.56.0 - Update IB start address and size alignment for decode and encode * - 3.57.0 - Compute tunneling on GFX10+ + * - 3.58.0 - Add GFX12 DCC support */ #define KMS_DRIVER_MAJOR 3 -#define KMS_DRIVER_MINOR 57 +#define KMS_DRIVER_MINOR 58 #define KMS_DRIVER_PATCHLEVEL 0 /* @@ -129,6 +130,7 @@ enum AMDGPU_DEBUG_MASK { AMDGPU_DEBUG_LARGEBAR = BIT(1), AMDGPU_DEBUG_DISABLE_GPU_SOFT_RECOVERY = BIT(2), AMDGPU_DEBUG_USE_VRAM_FW_BUF = BIT(3), + AMDGPU_DEBUG_ENABLE_RAS_ACA = BIT(4), }; unsigned int amdgpu_vram_limit = UINT_MAX; @@ -195,7 +197,9 @@ int amdgpu_async_gfx_ring = 1; int amdgpu_mcbp = -1; int amdgpu_discovery = -1; int amdgpu_mes; +int amdgpu_mes_log_enable = 0; int amdgpu_mes_kiq; +int amdgpu_uni_mes = 1; int amdgpu_noretry = -1; int amdgpu_force_asic_type = -1; int amdgpu_tmz = -1; /* auto */ @@ -213,6 +217,7 @@ uint amdgpu_debug_mask; int amdgpu_agp = -1; /* auto */ int amdgpu_wbrf = -1; int amdgpu_damage_clips = -1; /* auto */ +int amdgpu_umsch_mm_fwlog; static void amdgpu_drv_delayed_reset_work_handler(struct work_struct *work); @@ -668,6 +673,15 @@ MODULE_PARM_DESC(mes, module_param_named(mes, amdgpu_mes, int, 0444); /** + * DOC: mes_log_enable (int) + * Enable Micro Engine Scheduler log. This is used to enable/disable MES internal log. + * (0 = disabled (default), 1 = enabled) + */ +MODULE_PARM_DESC(mes_log_enable, + "Enable Micro Engine Scheduler log (0 = disabled (default), 1 = enabled)"); +module_param_named(mes_log_enable, amdgpu_mes_log_enable, int, 0444); + +/** * DOC: mes_kiq (int) * Enable Micro Engine Scheduler KIQ. This is a new engine pipe for kiq. * (0 = disabled (default), 1 = enabled) @@ -677,6 +691,15 @@ MODULE_PARM_DESC(mes_kiq, module_param_named(mes_kiq, amdgpu_mes_kiq, int, 0444); /** + * DOC: uni_mes (int) + * Enable Unified Micro Engine Scheduler. This is a new engine pipe for unified scheduler. + * (0 = disabled (default), 1 = enabled) + */ +MODULE_PARM_DESC(uni_mes, + "Enable Unified Micro Engine Scheduler (0 = disabled, 1 = enabled(default)"); +module_param_named(uni_mes, amdgpu_uni_mes, int, 0444); + +/** * DOC: noretry (int) * Disable XNACK retry in the SQ by default on GFXv9 hardware. On ASICs that * do not support per-process XNACK this also disables retry page faults. @@ -915,7 +938,7 @@ module_param_named(freesync_video, amdgpu_freesync_vid_mode, uint, 0444); * GPU reset method (-1 = auto (default), 0 = legacy, 1 = mode0, 2 = mode1, 3 = mode2, 4 = baco) */ MODULE_PARM_DESC(reset_method, "GPU reset method (-1 = auto (default), 0 = legacy, 1 = mode0, 2 = mode1, 3 = mode2, 4 = baco/bamaco)"); -module_param_named(reset_method, amdgpu_reset_method, int, 0444); +module_param_named(reset_method, amdgpu_reset_method, int, 0644); /** * DOC: bad_page_threshold (int) Bad page threshold is specifies the @@ -956,6 +979,13 @@ MODULE_PARM_DESC(umsch_mm, module_param_named(umsch_mm, amdgpu_umsch_mm, int, 0444); /** + * DOC: umsch_mm_fwlog (int) + * Enable umschfw log output for debugging, the default is disabled. + */ +MODULE_PARM_DESC(umsch_mm_fwlog, "Enable umschfw log(0 = disable (default value), 1 = enable)"); +module_param_named(umsch_mm_fwlog, amdgpu_umsch_mm_fwlog, int, 0444); + +/** * DOC: smu_pptable_id (int) * Used to override pptable id. id = 0 use VBIOS pptable. * id > 0 use the soft pptable with specicfied id. @@ -2164,6 +2194,11 @@ static void amdgpu_init_debug_options(struct amdgpu_device *adev) pr_info("debug: place fw in vram for frontdoor loading\n"); adev->debug_use_vram_fw_buf = true; } + + if (amdgpu_debug_mask & AMDGPU_DEBUG_ENABLE_RAS_ACA) { + pr_info("debug: enable RAS ACA\n"); + adev->debug_enable_ras_aca = true; + } } static unsigned long amdgpu_fix_asic_type(struct pci_dev *pdev, unsigned long flags) @@ -2308,9 +2343,9 @@ retry_init: !list_empty(&adev_to_drm(adev)->mode_config.connector_list)) { /* select 8 bpp console on low vram cards */ if (adev->gmc.real_vram_size <= (32*1024*1024)) - drm_fbdev_generic_setup(adev_to_drm(adev), 8); + drm_fbdev_ttm_setup(adev_to_drm(adev), 8); else - drm_fbdev_generic_setup(adev_to_drm(adev), 32); + drm_fbdev_ttm_setup(adev_to_drm(adev), 32); } ret = amdgpu_debugfs_init(adev); @@ -2471,6 +2506,7 @@ static void amdgpu_drv_delayed_reset_work_handler(struct work_struct *work) /* Use a common context, just need to make sure full reset is done */ set_bit(AMDGPU_SKIP_HW_RESET, &reset_context.flags); + set_bit(AMDGPU_SKIP_COREDUMP, &reset_context.flags); r = amdgpu_do_asic_reset(&device_list, &reset_context); if (r) { @@ -2734,7 +2770,8 @@ static int amdgpu_pmops_runtime_suspend(struct device *dev) drm_dev->switch_power_state = DRM_SWITCH_POWER_DYNAMIC_OFF; } else if (adev->pm.rpm_mode == AMDGPU_RUNPM_BOCO) { /* nothing to do */ - } else if (adev->pm.rpm_mode == AMDGPU_RUNPM_BACO) { + } else if ((adev->pm.rpm_mode == AMDGPU_RUNPM_BACO) || + (adev->pm.rpm_mode == AMDGPU_RUNPM_BAMACO)) { amdgpu_device_baco_enter(drm_dev); } @@ -2774,7 +2811,8 @@ static int amdgpu_pmops_runtime_resume(struct device *dev) * PCI core handles it for _PR3. */ pci_set_master(pdev); - } else if (adev->pm.rpm_mode == AMDGPU_RUNPM_BACO) { + } else if ((adev->pm.rpm_mode == AMDGPU_RUNPM_BACO) || + (adev->pm.rpm_mode == AMDGPU_RUNPM_BAMACO)) { amdgpu_device_baco_exit(drm_dev); } ret = amdgpu_device_resume(drm_dev, false); diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_eeprom.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_eeprom.c index e71768661ca8..35fee3e8cde2 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_eeprom.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_eeprom.c @@ -90,7 +90,7 @@ #define MAKE_I2C_ADDR(_aa) ((0xA << 3) | (((_aa) >> 16) & 0xF)) static int __amdgpu_eeprom_xfer(struct i2c_adapter *i2c_adap, u32 eeprom_addr, - u8 *eeprom_buf, u16 buf_size, bool read) + u8 *eeprom_buf, u32 buf_size, bool read) { u8 eeprom_offset_buf[EEPROM_OFFSET_SIZE]; struct i2c_msg msgs[] = { @@ -133,15 +133,15 @@ static int __amdgpu_eeprom_xfer(struct i2c_adapter *i2c_adap, u32 eeprom_addr, * cycle begins. This is implied for the * "i2c_transfer()" abstraction. */ - len = min(EEPROM_PAGE_SIZE - (eeprom_addr & - EEPROM_PAGE_MASK), - (u32)buf_size); + len = min(EEPROM_PAGE_SIZE - (eeprom_addr & EEPROM_PAGE_MASK), + buf_size); } else { /* Reading from the EEPROM has no limitation * on the number of bytes read from the EEPROM * device--they are simply sequenced out. + * Keep in mind that i2c_msg.len is u16 type. */ - len = buf_size; + len = min(U16_MAX, buf_size); } msgs[1].len = len; msgs[1].buf = eeprom_buf; @@ -179,7 +179,7 @@ static int __amdgpu_eeprom_xfer(struct i2c_adapter *i2c_adap, u32 eeprom_addr, * Returns the number of bytes read/written; -errno on error. */ static int amdgpu_eeprom_xfer(struct i2c_adapter *i2c_adap, u32 eeprom_addr, - u8 *eeprom_buf, u16 buf_size, bool read) + u8 *eeprom_buf, u32 buf_size, bool read) { const struct i2c_adapter_quirks *quirks = i2c_adap->quirks; u16 limit; @@ -225,7 +225,7 @@ static int amdgpu_eeprom_xfer(struct i2c_adapter *i2c_adap, u32 eeprom_addr, int amdgpu_eeprom_read(struct i2c_adapter *i2c_adap, u32 eeprom_addr, u8 *eeprom_buf, - u16 bytes) + u32 bytes) { return amdgpu_eeprom_xfer(i2c_adap, eeprom_addr, eeprom_buf, bytes, true); @@ -233,7 +233,7 @@ int amdgpu_eeprom_read(struct i2c_adapter *i2c_adap, int amdgpu_eeprom_write(struct i2c_adapter *i2c_adap, u32 eeprom_addr, u8 *eeprom_buf, - u16 bytes) + u32 bytes) { return amdgpu_eeprom_xfer(i2c_adap, eeprom_addr, eeprom_buf, bytes, false); diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_eeprom.h b/drivers/gpu/drm/amd/amdgpu/amdgpu_eeprom.h index 6935adb2be1f..8083b8253ef4 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_eeprom.h +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_eeprom.h @@ -28,10 +28,10 @@ int amdgpu_eeprom_read(struct i2c_adapter *i2c_adap, u32 eeprom_addr, u8 *eeprom_buf, - u16 bytes); + u32 bytes); int amdgpu_eeprom_write(struct i2c_adapter *i2c_adap, u32 eeprom_addr, u8 *eeprom_buf, - u16 bytes); + u32 bytes); #endif diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_fence.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_fence.c index 10832b470448..2f24a6aa13bf 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_fence.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_fence.c @@ -181,7 +181,6 @@ int amdgpu_fence_emit(struct amdgpu_ring *ring, struct dma_fence **f, struct amd amdgpu_ring_emit_fence(ring, ring->fence_drv.gpu_addr, seq, flags | AMDGPU_FENCE_FLAG_INT); pm_runtime_get_noresume(adev_to_drm(adev)->dev); - trace_amdgpu_runpm_reference_dumps(1, __func__); ptr = &ring->fence_drv.fences[seq & ring->fence_drv.num_fences_mask]; if (unlikely(rcu_dereference_protected(*ptr, 1))) { struct dma_fence *old; @@ -309,7 +308,6 @@ bool amdgpu_fence_process(struct amdgpu_ring *ring) dma_fence_put(fence); pm_runtime_mark_last_busy(adev_to_drm(adev)->dev); pm_runtime_put_autosuspend(adev_to_drm(adev)->dev); - trace_amdgpu_runpm_reference_dumps(0, __func__); } while (last_seq != seq); return true; @@ -980,7 +978,9 @@ static void amdgpu_debugfs_reset_work(struct work_struct *work) reset_context.method = AMD_RESET_METHOD_NONE; reset_context.reset_req_dev = adev; + reset_context.src = AMDGPU_RESET_SRC_USER; set_bit(AMDGPU_NEED_FULL_RESET, &reset_context.flags); + set_bit(AMDGPU_SKIP_COREDUMP, &reset_context.flags); amdgpu_device_gpu_recover(adev, NULL, &reset_context); } diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_fru_eeprom.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_fru_eeprom.c index a08c148b13f9..ceb5163480f4 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_fru_eeprom.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_fru_eeprom.c @@ -100,6 +100,7 @@ static bool is_fru_eeprom_supported(struct amdgpu_device *adev, u32 *fru_addr) *fru_addr = FRU_EEPROM_MADDR_6; return true; case IP_VERSION(13, 0, 6): + case IP_VERSION(13, 0, 14): if (fru_addr) *fru_addr = FRU_EEPROM_MADDR_8; return true; diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_gart.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_gart.c index c623e23049d1..256b95232de5 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_gart.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_gart.c @@ -34,6 +34,7 @@ #include <asm/set_memory.h> #endif #include "amdgpu.h" +#include "amdgpu_reset.h" #include <drm/drm_drv.h> #include <drm/ttm/ttm_tt.h> @@ -325,10 +326,7 @@ void amdgpu_gart_unbind(struct amdgpu_device *adev, uint64_t offset, page_base += AMDGPU_GPU_PAGE_SIZE; } } - mb(); - amdgpu_device_flush_hdp(adev, NULL); - for_each_set_bit(i, adev->vmhubs_mask, AMDGPU_MAX_VMHUBS) - amdgpu_gmc_flush_gpu_tlb(adev, 0, i, 0); + amdgpu_gart_invalidate_tlb(adev); drm_dev_exit(idx); } @@ -408,7 +406,10 @@ void amdgpu_gart_invalidate_tlb(struct amdgpu_device *adev) return; mb(); - amdgpu_device_flush_hdp(adev, NULL); + if (down_read_trylock(&adev->reset_domain->sem)) { + amdgpu_device_flush_hdp(adev, NULL); + up_read(&adev->reset_domain->sem); + } for_each_set_bit(i, adev->vmhubs_mask, AMDGPU_MAX_VMHUBS) amdgpu_gmc_flush_gpu_tlb(adev, 0, i, 0); } diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_gem.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_gem.c index 67c234bcf89f..aad2027e5c7c 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_gem.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_gem.c @@ -108,6 +108,7 @@ int amdgpu_gem_object_create(struct amdgpu_device *adev, unsigned long size, memset(&bp, 0, sizeof(bp)); *obj = NULL; + flags |= AMDGPU_GEM_CREATE_VRAM_WIPE_ON_RELEASE; bp.size = size; bp.byte_align = alignment; @@ -174,7 +175,7 @@ static int amdgpu_gem_object_open(struct drm_gem_object *obj, return -EPERM; if (abo->flags & AMDGPU_GEM_CREATE_VM_ALWAYS_VALID && - abo->tbo.base.resv != vm->root.bo->tbo.base.resv) + !amdgpu_vm_is_bo_always_valid(vm, abo)) return -EPERM; r = amdgpu_bo_reserve(abo, false); @@ -334,6 +335,7 @@ int amdgpu_gem_create_ioctl(struct drm_device *dev, void *data, AMDGPU_GEM_CREATE_VM_ALWAYS_VALID | AMDGPU_GEM_CREATE_EXPLICIT_SYNC | AMDGPU_GEM_CREATE_ENCRYPTED | + AMDGPU_GEM_CREATE_GFX12_DCC | AMDGPU_GEM_CREATE_DISCARDABLE)) return -EINVAL; @@ -683,7 +685,7 @@ uint64_t amdgpu_gem_va_map_flags(struct amdgpu_device *adev, uint32_t flags) if (flags & AMDGPU_VM_PAGE_WRITEABLE) pte_flag |= AMDGPU_PTE_WRITEABLE; if (flags & AMDGPU_VM_PAGE_PRT) - pte_flag |= AMDGPU_PTE_PRT; + pte_flag |= AMDGPU_PTE_PRT_FLAG(adev); if (flags & AMDGPU_VM_PAGE_NOALLOC) pte_flag |= AMDGPU_PTE_NOALLOC; diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_gfx.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_gfx.c index 55d5508987ff..82452606ae6c 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_gfx.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_gfx.c @@ -329,8 +329,9 @@ int amdgpu_gfx_kiq_init_ring(struct amdgpu_device *adev, int xcc_id) ring->eop_gpu_addr = kiq->eop_gpu_addr; ring->no_scheduler = true; - snprintf(ring->name, sizeof(ring->name), "kiq_%d.%d.%d.%d", - xcc_id, ring->me, ring->pipe, ring->queue); + snprintf(ring->name, sizeof(ring->name), "kiq_%hhu.%hhu.%hhu.%hhu", + (unsigned char)xcc_id, (unsigned char)ring->me, + (unsigned char)ring->pipe, (unsigned char)ring->queue); r = amdgpu_ring_init(adev, ring, 1024, irq, AMDGPU_CP_KIQ_IRQ_DRIVER0, AMDGPU_RING_PRIO_DEFAULT, NULL); if (r) @@ -505,9 +506,6 @@ int amdgpu_gfx_disable_kcq(struct amdgpu_device *adev, int xcc_id) { struct amdgpu_kiq *kiq = &adev->gfx.kiq[xcc_id]; struct amdgpu_ring *kiq_ring = &kiq->ring; - struct amdgpu_hive_info *hive; - struct amdgpu_ras *ras; - int hive_ras_recovery = 0; int i, r = 0; int j; @@ -532,15 +530,9 @@ int amdgpu_gfx_disable_kcq(struct amdgpu_device *adev, int xcc_id) * This is workaround: only skip kiq_ring test * during ras recovery in suspend stage for gfx9.4.3 */ - hive = amdgpu_get_xgmi_hive(adev); - if (hive) { - hive_ras_recovery = atomic_read(&hive->ras_recovery); - amdgpu_put_xgmi_hive(hive); - } - - ras = amdgpu_ras_get_context(adev); - if ((amdgpu_ip_version(adev, GC_HWIP, 0) == IP_VERSION(9, 4, 3)) && - ras && (atomic_read(&ras->in_recovery) || hive_ras_recovery)) { + if ((amdgpu_ip_version(adev, GC_HWIP, 0) == IP_VERSION(9, 4, 3) || + amdgpu_ip_version(adev, GC_HWIP, 0) == IP_VERSION(9, 4, 4)) && + amdgpu_ras_in_recovery(adev)) { spin_unlock(&kiq->ring_lock); return 0; } @@ -598,6 +590,44 @@ int amdgpu_queue_mask_bit_to_set_resource_bit(struct amdgpu_device *adev, return set_resource_bit; } +static int amdgpu_gfx_mes_enable_kcq(struct amdgpu_device *adev, int xcc_id) +{ + struct amdgpu_kiq *kiq = &adev->gfx.kiq[xcc_id]; + struct amdgpu_ring *kiq_ring = &kiq->ring; + uint64_t queue_mask = ~0ULL; + int r, i, j; + + amdgpu_device_flush_hdp(adev, NULL); + + if (!adev->enable_uni_mes) { + spin_lock(&kiq->ring_lock); + r = amdgpu_ring_alloc(kiq_ring, kiq->pmf->set_resources_size); + if (r) { + dev_err(adev->dev, "Failed to lock KIQ (%d).\n", r); + spin_unlock(&kiq->ring_lock); + return r; + } + + kiq->pmf->kiq_set_resources(kiq_ring, queue_mask); + r = amdgpu_ring_test_helper(kiq_ring); + spin_unlock(&kiq->ring_lock); + if (r) + dev_err(adev->dev, "KIQ failed to set resources\n"); + } + + for (i = 0; i < adev->gfx.num_compute_rings; i++) { + j = i + xcc_id * adev->gfx.num_compute_rings; + r = amdgpu_mes_map_legacy_queue(adev, + &adev->gfx.compute_ring[j]); + if (r) { + dev_err(adev->dev, "failed to map compute queue\n"); + return r; + } + } + + return 0; +} + int amdgpu_gfx_enable_kcq(struct amdgpu_device *adev, int xcc_id) { struct amdgpu_kiq *kiq = &adev->gfx.kiq[xcc_id]; @@ -605,6 +635,9 @@ int amdgpu_gfx_enable_kcq(struct amdgpu_device *adev, int xcc_id) uint64_t queue_mask = 0; int r, i, j; + if (adev->enable_mes) + return amdgpu_gfx_mes_enable_kcq(adev, xcc_id); + if (!kiq->pmf || !kiq->pmf->kiq_map_queues || !kiq->pmf->kiq_set_resources) return -EINVAL; @@ -623,10 +656,11 @@ int amdgpu_gfx_enable_kcq(struct amdgpu_device *adev, int xcc_id) queue_mask |= (1ull << amdgpu_queue_mask_bit_to_set_resource_bit(adev, i)); } - DRM_INFO("kiq ring mec %d pipe %d q %d\n", kiq_ring->me, kiq_ring->pipe, - kiq_ring->queue); amdgpu_device_flush_hdp(adev, NULL); + DRM_INFO("kiq ring mec %d pipe %d q %d\n", kiq_ring->me, kiq_ring->pipe, + kiq_ring->queue); + spin_lock(&kiq->ring_lock); r = amdgpu_ring_alloc(kiq_ring, kiq->pmf->map_queues_size * adev->gfx.num_compute_rings + @@ -637,9 +671,6 @@ int amdgpu_gfx_enable_kcq(struct amdgpu_device *adev, int xcc_id) return r; } - if (adev->enable_mes) - queue_mask = ~0ULL; - kiq->pmf->kiq_set_resources(kiq_ring, queue_mask); for (i = 0; i < adev->gfx.num_compute_rings; i++) { j = i + xcc_id * adev->gfx.num_compute_rings; @@ -666,6 +697,20 @@ int amdgpu_gfx_enable_kgq(struct amdgpu_device *adev, int xcc_id) amdgpu_device_flush_hdp(adev, NULL); + if (adev->enable_mes) { + for (i = 0; i < adev->gfx.num_gfx_rings; i++) { + j = i + xcc_id * adev->gfx.num_gfx_rings; + r = amdgpu_mes_map_legacy_queue(adev, + &adev->gfx.gfx_ring[j]); + if (r) { + DRM_ERROR("failed to map gfx queue\n"); + return r; + } + } + + return 0; + } + spin_lock(&kiq->ring_lock); /* No need to map kcq on the slave */ if (amdgpu_gfx_is_master_xcc(adev, xcc_id)) { @@ -1206,7 +1251,8 @@ void amdgpu_gfx_cp_init_microcode(struct amdgpu_device *adev, fw_size = le32_to_cpu(cp_hdr_v2_0->data_size_bytes); break; default: - break; + dev_err(adev->dev, "Invalid ucode id %u\n", ucode_id); + return; } if (adev->firmware.load_type == AMDGPU_FW_LOAD_PSP) { diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_gfx.h b/drivers/gpu/drm/amd/amdgpu/amdgpu_gfx.h index 8fcf889ddce9..ddda94e49db4 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_gfx.h +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_gfx.h @@ -259,7 +259,6 @@ struct amdgpu_cu_info { struct amdgpu_gfx_ras { struct amdgpu_ras_block_object ras_block; void (*enable_watchdog_timer)(struct amdgpu_device *adev); - bool (*query_utcl2_poison_status)(struct amdgpu_device *adev); int (*rlc_gc_fed_irq)(struct amdgpu_device *adev, struct amdgpu_irq_src *source, struct amdgpu_iv_entry *entry); @@ -298,6 +297,7 @@ struct amdgpu_gfx_funcs { int (*switch_partition_mode)(struct amdgpu_device *adev, int num_xccs_per_xcp); int (*ih_node_to_logical_xcc)(struct amdgpu_device *adev, int ih_node); + int (*get_xccs_per_xcp)(struct amdgpu_device *adev); }; struct sq_work { @@ -434,6 +434,11 @@ struct amdgpu_gfx { uint32_t num_xcc_per_xcp; struct mutex partition_mutex; bool mcbp; /* mid command buffer preemption */ + + /* IP reg dump */ + uint32_t *ip_dump_core; + uint32_t *ip_dump_compute_queues; + uint32_t *ip_dump_gfx_queues; }; struct amdgpu_gfx_ras_reg_entry { @@ -552,8 +557,6 @@ static inline const char *amdgpu_gfx_compute_mode_desc(int mode) default: return "UNKNOWN"; } - - return "UNKNOWN"; } #endif diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_gfxhub.h b/drivers/gpu/drm/amd/amdgpu/amdgpu_gfxhub.h index c7b44aeb671b..103a837ccc71 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_gfxhub.h +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_gfxhub.h @@ -38,6 +38,8 @@ struct amdgpu_gfxhub_funcs { void (*mode2_save_regs)(struct amdgpu_device *adev); void (*mode2_restore_regs)(struct amdgpu_device *adev); void (*halt)(struct amdgpu_device *adev); + bool (*query_utcl2_poison_status)(struct amdgpu_device *adev, + int xcc_id); }; struct amdgpu_gfxhub { diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_gmc.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_gmc.c index be4629cdac04..c02659025656 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_gmc.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_gmc.c @@ -620,10 +620,8 @@ void amdgpu_gmc_flush_gpu_tlb(struct amdgpu_device *adev, uint32_t vmid, int r; if (!hub->sdma_invalidation_workaround || vmid || - !adev->mman.buffer_funcs_enabled || - !adev->ib_pool_ready || amdgpu_in_reset(adev) || + !adev->mman.buffer_funcs_enabled || !adev->ib_pool_ready || !ring->sched.ready) { - /* * A GPU reset should flush all TLBs anyway, so no need to do * this while one is ongoing. @@ -684,12 +682,17 @@ int amdgpu_gmc_flush_gpu_tlb_pasid(struct amdgpu_device *adev, uint16_t pasid, struct amdgpu_ring *ring = &adev->gfx.kiq[inst].ring; struct amdgpu_kiq *kiq = &adev->gfx.kiq[inst]; unsigned int ndw; - signed long r; + int r; uint32_t seq; - if (!adev->gmc.flush_pasid_uses_kiq || !ring->sched.ready || - !down_read_trylock(&adev->reset_domain->sem)) { + /* + * A GPU reset should flush all TLBs anyway, so no need to do + * this while one is ongoing. + */ + if (!down_read_trylock(&adev->reset_domain->sem)) + return 0; + if (!adev->gmc.flush_pasid_uses_kiq || !ring->sched.ready) { if (adev->gmc.flush_tlb_needs_extra_type_2) adev->gmc.gmc_funcs->flush_gpu_tlb_pasid(adev, pasid, 2, all_hub, @@ -703,43 +706,44 @@ int amdgpu_gmc_flush_gpu_tlb_pasid(struct amdgpu_device *adev, uint16_t pasid, adev->gmc.gmc_funcs->flush_gpu_tlb_pasid(adev, pasid, flush_type, all_hub, inst); - return 0; - } + r = 0; + } else { + /* 2 dwords flush + 8 dwords fence */ + ndw = kiq->pmf->invalidate_tlbs_size + 8; - /* 2 dwords flush + 8 dwords fence */ - ndw = kiq->pmf->invalidate_tlbs_size + 8; + if (adev->gmc.flush_tlb_needs_extra_type_2) + ndw += kiq->pmf->invalidate_tlbs_size; - if (adev->gmc.flush_tlb_needs_extra_type_2) - ndw += kiq->pmf->invalidate_tlbs_size; + if (adev->gmc.flush_tlb_needs_extra_type_0) + ndw += kiq->pmf->invalidate_tlbs_size; - if (adev->gmc.flush_tlb_needs_extra_type_0) - ndw += kiq->pmf->invalidate_tlbs_size; + spin_lock(&adev->gfx.kiq[inst].ring_lock); + r = amdgpu_ring_alloc(ring, ndw); + if (r) { + spin_unlock(&adev->gfx.kiq[inst].ring_lock); + goto error_unlock_reset; + } + if (adev->gmc.flush_tlb_needs_extra_type_2) + kiq->pmf->kiq_invalidate_tlbs(ring, pasid, 2, all_hub); - spin_lock(&adev->gfx.kiq[inst].ring_lock); - amdgpu_ring_alloc(ring, ndw); - if (adev->gmc.flush_tlb_needs_extra_type_2) - kiq->pmf->kiq_invalidate_tlbs(ring, pasid, 2, all_hub); + if (flush_type == 2 && adev->gmc.flush_tlb_needs_extra_type_0) + kiq->pmf->kiq_invalidate_tlbs(ring, pasid, 0, all_hub); - if (flush_type == 2 && adev->gmc.flush_tlb_needs_extra_type_0) - kiq->pmf->kiq_invalidate_tlbs(ring, pasid, 0, all_hub); + kiq->pmf->kiq_invalidate_tlbs(ring, pasid, flush_type, all_hub); + r = amdgpu_fence_emit_polling(ring, &seq, MAX_KIQ_REG_WAIT); + if (r) { + amdgpu_ring_undo(ring); + spin_unlock(&adev->gfx.kiq[inst].ring_lock); + goto error_unlock_reset; + } - kiq->pmf->kiq_invalidate_tlbs(ring, pasid, flush_type, all_hub); - r = amdgpu_fence_emit_polling(ring, &seq, MAX_KIQ_REG_WAIT); - if (r) { - amdgpu_ring_undo(ring); + amdgpu_ring_commit(ring); spin_unlock(&adev->gfx.kiq[inst].ring_lock); - goto error_unlock_reset; - } - - amdgpu_ring_commit(ring); - spin_unlock(&adev->gfx.kiq[inst].ring_lock); - r = amdgpu_fence_wait_polling(ring, seq, usec_timeout); - if (r < 1) { - dev_err(adev->dev, "wait for kiq fence error: %ld.\n", r); - r = -ETIME; - goto error_unlock_reset; + if (amdgpu_fence_wait_polling(ring, seq, usec_timeout) < 1) { + dev_err(adev->dev, "timeout waiting for kiq fence\n"); + r = -ETIME; + } } - r = 0; error_unlock_reset: up_read(&adev->reset_domain->sem); @@ -844,6 +848,7 @@ void amdgpu_gmc_tmz_set(struct amdgpu_device *adev) case IP_VERSION(11, 0, 4): case IP_VERSION(11, 5, 0): case IP_VERSION(11, 5, 1): + case IP_VERSION(11, 5, 2): /* Don't enable it by default yet. */ if (amdgpu_tmz < 1) { @@ -876,11 +881,11 @@ void amdgpu_gmc_noretry_set(struct amdgpu_device *adev) struct amdgpu_gmc *gmc = &adev->gmc; uint32_t gc_ver = amdgpu_ip_version(adev, GC_HWIP, 0); bool noretry_default = (gc_ver == IP_VERSION(9, 0, 1) || - gc_ver == IP_VERSION(9, 3, 0) || gc_ver == IP_VERSION(9, 4, 0) || gc_ver == IP_VERSION(9, 4, 1) || gc_ver == IP_VERSION(9, 4, 2) || gc_ver == IP_VERSION(9, 4, 3) || + gc_ver == IP_VERSION(9, 4, 4) || gc_ver >= IP_VERSION(10, 3, 0)); if (!amdgpu_sriov_xnack_support(adev)) @@ -1015,7 +1020,7 @@ void amdgpu_gmc_init_pdb0(struct amdgpu_device *adev) flags |= AMDGPU_PTE_WRITEABLE; flags |= AMDGPU_PTE_SNOOPED; flags |= AMDGPU_PTE_FRAG((adev->gmc.vmid0_page_table_block_size + 9*1)); - flags |= AMDGPU_PDE_PTE; + flags |= AMDGPU_PDE_PTE_FLAG(adev); /* The first n PDE0 entries are used as PTE, * pointing to vram @@ -1028,7 +1033,7 @@ void amdgpu_gmc_init_pdb0(struct amdgpu_device *adev) * pointing to a 4K system page */ flags = AMDGPU_PTE_VALID; - flags |= AMDGPU_PDE_BFS(0) | AMDGPU_PTE_SNOOPED; + flags |= AMDGPU_PTE_SNOOPED | AMDGPU_PDE_BFS_FLAG(adev, 0); /* Requires gart_ptb_gpu_pa to be 4K aligned */ amdgpu_gmc_set_pte_pde(adev, adev->gmc.ptr_pdb0, i, gart_ptb_gpu_pa, flags); drm_dev_exit(idx); @@ -1147,8 +1152,6 @@ static ssize_t current_memory_partition_show( default: return sysfs_emit(buf, "UNKNOWN\n"); } - - return sysfs_emit(buf, "UNKNOWN\n"); } static DEVICE_ATTR_RO(current_memory_partition); @@ -1166,3 +1169,79 @@ void amdgpu_gmc_sysfs_fini(struct amdgpu_device *adev) { device_remove_file(adev->dev, &dev_attr_current_memory_partition); } + +int amdgpu_gmc_get_nps_memranges(struct amdgpu_device *adev, + struct amdgpu_mem_partition_info *mem_ranges, + int exp_ranges) +{ + struct amdgpu_gmc_memrange *ranges; + int range_cnt, ret, i, j; + uint32_t nps_type; + + if (!mem_ranges) + return -EINVAL; + + ret = amdgpu_discovery_get_nps_info(adev, &nps_type, &ranges, + &range_cnt); + + if (ret) + return ret; + + /* TODO: For now, expect ranges and partition count to be the same. + * Adjust if there are holes expected in any NPS domain. + */ + if (range_cnt != exp_ranges) { + dev_warn( + adev->dev, + "NPS config mismatch - expected ranges: %d discovery - nps mode: %d, nps ranges: %d", + exp_ranges, nps_type, range_cnt); + ret = -EINVAL; + goto err; + } + + for (i = 0; i < exp_ranges; ++i) { + if (ranges[i].base_address >= ranges[i].limit_address) { + dev_warn( + adev->dev, + "Invalid NPS range - nps mode: %d, range[%d]: base: %llx limit: %llx", + nps_type, i, ranges[i].base_address, + ranges[i].limit_address); + ret = -EINVAL; + goto err; + } + + /* Check for overlaps, not expecting any now */ + for (j = i - 1; j >= 0; j--) { + if (max(ranges[j].base_address, + ranges[i].base_address) <= + min(ranges[j].limit_address, + ranges[i].limit_address)) { + dev_warn( + adev->dev, + "overlapping ranges detected [ %llx - %llx ] | [%llx - %llx]", + ranges[j].base_address, + ranges[j].limit_address, + ranges[i].base_address, + ranges[i].limit_address); + ret = -EINVAL; + goto err; + } + } + + mem_ranges[i].range.fpfn = + (ranges[i].base_address - + adev->vm_manager.vram_base_offset) >> + AMDGPU_GPU_PAGE_SHIFT; + mem_ranges[i].range.lpfn = + (ranges[i].limit_address - + adev->vm_manager.vram_base_offset) >> + AMDGPU_GPU_PAGE_SHIFT; + mem_ranges[i].size = + ranges[i].limit_address - ranges[i].base_address + 1; + } + +err: + kfree(ranges); + + return ret; +} diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_gmc.h b/drivers/gpu/drm/amd/amdgpu/amdgpu_gmc.h index 17f40ea1104b..febca3130497 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_gmc.h +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_gmc.h @@ -199,6 +199,13 @@ struct amdgpu_mem_partition_info { #define INVALID_PFN -1 +struct amdgpu_gmc_memrange { + uint64_t base_address; + uint64_t limit_address; + uint32_t flags; + int nid_mask; +}; + enum amdgpu_gart_placement { AMDGPU_GART_PLACEMENT_BEST_FIT = 0, AMDGPU_GART_PLACEMENT_HIGH, @@ -439,4 +446,8 @@ int amdgpu_gmc_vram_checking(struct amdgpu_device *adev); int amdgpu_gmc_sysfs_init(struct amdgpu_device *adev); void amdgpu_gmc_sysfs_fini(struct amdgpu_device *adev); +int amdgpu_gmc_get_nps_memranges(struct amdgpu_device *adev, + struct amdgpu_mem_partition_info *mem_ranges, + int exp_ranges); + #endif diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_gtt_mgr.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_gtt_mgr.c index 44367f03316f..0760e70402ec 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_gtt_mgr.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_gtt_mgr.c @@ -200,8 +200,6 @@ void amdgpu_gtt_mgr_recover(struct amdgpu_gtt_mgr *mgr) amdgpu_ttm_recover_gart(node->base.bo); } spin_unlock(&mgr->lock); - - amdgpu_gart_invalidate_tlb(adev); } /** diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_hmm.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_hmm.c index 431ec72655ec..e36fede7f74c 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_hmm.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_hmm.c @@ -202,20 +202,12 @@ int amdgpu_hmm_range_get_pages(struct mmu_interval_notifier *notifier, pr_debug("hmm range: start = 0x%lx, end = 0x%lx", hmm_range->start, hmm_range->end); - /* Assuming 64MB takes maximum 1 second to fault page address */ - timeout = max((hmm_range->end - hmm_range->start) >> 26, 1UL); - timeout *= HMM_RANGE_DEFAULT_TIMEOUT; - timeout = jiffies + msecs_to_jiffies(timeout); + timeout = jiffies + msecs_to_jiffies(HMM_RANGE_DEFAULT_TIMEOUT); retry: hmm_range->notifier_seq = mmu_interval_read_begin(notifier); r = hmm_range_fault(hmm_range); if (unlikely(r)) { - schedule(); - /* - * FIXME: This timeout should encompass the retry from - * mmu_interval_read_retry() as well. - */ if (r == -EBUSY && !time_after(jiffies, timeout)) goto retry; goto out_free_pfns; @@ -247,6 +239,8 @@ out_free_pfns: out_free_range: kfree(hmm_range); + if (r == -EBUSY) + r = -EAGAIN; return r; } diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_i2c.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_i2c.c index d79cb13e1aa8..00d6211e0fbf 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_i2c.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_i2c.c @@ -279,7 +279,7 @@ amdgpu_i2c_lookup(struct amdgpu_device *adev, return NULL; } -static void amdgpu_i2c_get_byte(struct amdgpu_i2c_chan *i2c_bus, +static int amdgpu_i2c_get_byte(struct amdgpu_i2c_chan *i2c_bus, u8 slave_addr, u8 addr, u8 *val) @@ -304,16 +304,18 @@ static void amdgpu_i2c_get_byte(struct amdgpu_i2c_chan *i2c_bus, out_buf[0] = addr; out_buf[1] = 0; - if (i2c_transfer(&i2c_bus->adapter, msgs, 2) == 2) { - *val = in_buf[0]; - DRM_DEBUG("val = 0x%02x\n", *val); - } else { - DRM_DEBUG("i2c 0x%02x 0x%02x read failed\n", - addr, *val); + if (i2c_transfer(&i2c_bus->adapter, msgs, 2) != 2) { + DRM_DEBUG("i2c 0x%02x read failed\n", addr); + return -EIO; } + + *val = in_buf[0]; + DRM_DEBUG("val = 0x%02x\n", *val); + + return 0; } -static void amdgpu_i2c_put_byte(struct amdgpu_i2c_chan *i2c_bus, +static int amdgpu_i2c_put_byte(struct amdgpu_i2c_chan *i2c_bus, u8 slave_addr, u8 addr, u8 val) @@ -329,9 +331,12 @@ static void amdgpu_i2c_put_byte(struct amdgpu_i2c_chan *i2c_bus, out_buf[0] = addr; out_buf[1] = val; - if (i2c_transfer(&i2c_bus->adapter, &msg, 1) != 1) - DRM_DEBUG("i2c 0x%02x 0x%02x write failed\n", - addr, val); + if (i2c_transfer(&i2c_bus->adapter, &msg, 1) != 1) { + DRM_DEBUG("i2c 0x%02x 0x%02x write failed\n", addr, val); + return -EIO; + } + + return 0; } /* ddc router switching */ @@ -346,16 +351,18 @@ amdgpu_i2c_router_select_ddc_port(const struct amdgpu_connector *amdgpu_connecto if (!amdgpu_connector->router_bus) return; - amdgpu_i2c_get_byte(amdgpu_connector->router_bus, + if (amdgpu_i2c_get_byte(amdgpu_connector->router_bus, amdgpu_connector->router.i2c_addr, - 0x3, &val); + 0x3, &val)) + return; val &= ~amdgpu_connector->router.ddc_mux_control_pin; amdgpu_i2c_put_byte(amdgpu_connector->router_bus, amdgpu_connector->router.i2c_addr, 0x3, val); - amdgpu_i2c_get_byte(amdgpu_connector->router_bus, + if (amdgpu_i2c_get_byte(amdgpu_connector->router_bus, amdgpu_connector->router.i2c_addr, - 0x1, &val); + 0x1, &val)) + return; val &= ~amdgpu_connector->router.ddc_mux_control_pin; val |= amdgpu_connector->router.ddc_mux_state; amdgpu_i2c_put_byte(amdgpu_connector->router_bus, @@ -375,16 +382,18 @@ amdgpu_i2c_router_select_cd_port(const struct amdgpu_connector *amdgpu_connector if (!amdgpu_connector->router_bus) return; - amdgpu_i2c_get_byte(amdgpu_connector->router_bus, + if (amdgpu_i2c_get_byte(amdgpu_connector->router_bus, amdgpu_connector->router.i2c_addr, - 0x3, &val); + 0x3, &val)) + return; val &= ~amdgpu_connector->router.cd_mux_control_pin; amdgpu_i2c_put_byte(amdgpu_connector->router_bus, amdgpu_connector->router.i2c_addr, 0x3, val); - amdgpu_i2c_get_byte(amdgpu_connector->router_bus, + if (amdgpu_i2c_get_byte(amdgpu_connector->router_bus, amdgpu_connector->router.i2c_addr, - 0x1, &val); + 0x1, &val)) + return; val &= ~amdgpu_connector->router.cd_mux_control_pin; val |= amdgpu_connector->router.cd_mux_state; amdgpu_i2c_put_byte(amdgpu_connector->router_bus, diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_ids.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_ids.c index 3d7fcdeaf8cf..b6a8bddada4c 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_ids.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_ids.c @@ -290,18 +290,36 @@ static int amdgpu_vmid_grab_reserved(struct amdgpu_vm *vm, !dma_fence_is_signaled((*id)->last_flush))) { struct dma_fence *tmp; - /* Don't use per engine and per process VMID at the same time */ - if (adev->vm_manager.concurrent_flush) - ring = NULL; - - /* to prevent one context starved by another context */ - (*id)->pd_gpu_addr = 0; - tmp = amdgpu_sync_peek_fence(&(*id)->active, ring); - if (tmp) { + /* Wait for the gang to be assembled before using a + * reserved VMID or otherwise the gang could deadlock. + */ + tmp = amdgpu_device_get_gang(adev); + if (!dma_fence_is_signaled(tmp) && tmp != job->gang_submit) { *id = NULL; - *fence = dma_fence_get(tmp); + *fence = tmp; return 0; } + dma_fence_put(tmp); + + /* Make sure the id is owned by the gang before proceeding */ + if (!job->gang_submit || + (*id)->owner != vm->immediate.fence_context) { + + /* Don't use per engine and per process VMID at the + * same time + */ + if (adev->vm_manager.concurrent_flush) + ring = NULL; + + /* to prevent one context starved by another context */ + (*id)->pd_gpu_addr = 0; + tmp = amdgpu_sync_peek_fence(&(*id)->active, ring); + if (tmp) { + *id = NULL; + *fence = dma_fence_get(tmp); + return 0; + } + } needs_flush = true; } @@ -406,7 +424,7 @@ int amdgpu_vmid_grab(struct amdgpu_vm *vm, struct amdgpu_ring *ring, if (r || !idle) goto error; - if (vm->reserved_vmid[vmhub] || (enforce_isolation && (vmhub == AMDGPU_GFXHUB(0)))) { + if (amdgpu_vmid_uses_reserved(vm, vmhub)) { r = amdgpu_vmid_grab_reserved(vm, ring, job, &id, fence); if (r || !id) goto error; @@ -456,6 +474,19 @@ error: return r; } +/* + * amdgpu_vmid_uses_reserved - check if a VM will use a reserved VMID + * @vm: the VM to check + * @vmhub: the VMHUB which will be used + * + * Returns: True if the VM will use a reserved VMID. + */ +bool amdgpu_vmid_uses_reserved(struct amdgpu_vm *vm, unsigned int vmhub) +{ + return vm->reserved_vmid[vmhub] || + (enforce_isolation && (vmhub == AMDGPU_GFXHUB(0))); +} + int amdgpu_vmid_alloc_reserved(struct amdgpu_device *adev, unsigned vmhub) { diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_ids.h b/drivers/gpu/drm/amd/amdgpu/amdgpu_ids.h index fa8c42c83d5d..240fa6751260 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_ids.h +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_ids.h @@ -78,6 +78,7 @@ void amdgpu_pasid_free_delayed(struct dma_resv *resv, bool amdgpu_vmid_had_gpu_reset(struct amdgpu_device *adev, struct amdgpu_vmid *id); +bool amdgpu_vmid_uses_reserved(struct amdgpu_vm *vm, unsigned int vmhub); int amdgpu_vmid_alloc_reserved(struct amdgpu_device *adev, unsigned vmhub); void amdgpu_vmid_free_reserved(struct amdgpu_device *adev, diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_irq.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_irq.c index 7e6d09730e6d..19ce4da285e8 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_irq.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_irq.c @@ -279,7 +279,7 @@ int amdgpu_irq_init(struct amdgpu_device *adev) adev->irq.msi_enabled = false; if (!amdgpu_msi_ok(adev)) - flags = PCI_IRQ_LEGACY; + flags = PCI_IRQ_INTX; else flags = PCI_IRQ_ALL_TYPES; @@ -445,6 +445,14 @@ void amdgpu_irq_dispatch(struct amdgpu_device *adev, entry.ih = ih; entry.iv_entry = (const uint32_t *)&ih->ring[ring_index]; + + /* + * timestamp is not supported on some legacy SOCs (cik, cz, iceland, + * si and tonga), so initialize timestamp and timestamp_src to 0 + */ + entry.timestamp = 0; + entry.timestamp_src = 0; + amdgpu_ih_decode_iv(adev, &entry); trace_amdgpu_iv(ih - &adev->irq.ih, &entry); @@ -458,7 +466,8 @@ void amdgpu_irq_dispatch(struct amdgpu_device *adev, } else if (src_id >= AMDGPU_MAX_IRQ_SRC_ID) { DRM_DEBUG("Invalid src_id in IV: %d\n", src_id); - } else if ((client_id == AMDGPU_IRQ_CLIENTID_LEGACY) && + } else if (((client_id == AMDGPU_IRQ_CLIENTID_LEGACY) || + (client_id == SOC15_IH_CLIENTID_ISP)) && adev->irq.virq[src_id]) { generic_handle_domain_irq(adev->irq.domain, src_id); diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_isp.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_isp.c new file mode 100644 index 000000000000..4766e99dd98f --- /dev/null +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_isp.c @@ -0,0 +1,210 @@ +/* SPDX-License-Identifier: MIT */ +/* + * Copyright (C) 2024 Advanced Micro Devices, Inc. All rights reserved. + * All Rights Reserved. + * + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and associated documentation files (the + * "Software"), to deal in the Software without restriction, including + * without limitation the rights to use, copy, modify, merge, publish, + * distribute, sub license, and/or sell copies of the Software, and to + * permit persons to whom the Software is furnished to do so, subject to + * the following conditions: + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT. IN NO EVENT SHALL + * THE COPYRIGHT HOLDERS, AUTHORS AND/OR ITS SUPPLIERS BE LIABLE FOR ANY CLAIM, + * DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR + * OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE + * USE OR OTHER DEALINGS IN THE SOFTWARE. + * + * The above copyright notice and this permission notice (including the + * next paragraph) shall be included in all copies or substantial portions + * of the Software. + * + */ + +#include <linux/firmware.h> +#include <linux/mfd/core.h> + +#include "amdgpu.h" +#include "amdgpu_isp.h" +#include "isp_v4_1_0.h" +#include "isp_v4_1_1.h" + +static int isp_sw_init(void *handle) +{ + return 0; +} + +static int isp_sw_fini(void *handle) +{ + return 0; +} + +/** + * isp_hw_init - start and test isp block + * + * @handle: handle for amdgpu_device pointer + * + */ +static int isp_hw_init(void *handle) +{ + struct amdgpu_device *adev = (struct amdgpu_device *)handle; + struct amdgpu_isp *isp = &adev->isp; + + const struct amdgpu_ip_block *ip_block = + amdgpu_device_ip_get_ip_block(adev, AMD_IP_BLOCK_TYPE_ISP); + + if (!ip_block) + return -EINVAL; + + if (isp->funcs->hw_init != NULL) + return isp->funcs->hw_init(isp); + + return -ENODEV; +} + +/** + * isp_hw_fini - stop the hardware block + * + * @handle: handle for amdgpu_device pointer + * + */ +static int isp_hw_fini(void *handle) +{ + struct amdgpu_device *adev = (struct amdgpu_device *)handle; + struct amdgpu_isp *isp = &adev->isp; + + if (isp->funcs->hw_fini != NULL) + return isp->funcs->hw_fini(isp); + + return -ENODEV; +} + +static int isp_suspend(void *handle) +{ + return 0; +} + +static int isp_resume(void *handle) +{ + return 0; +} + +static int isp_load_fw_by_psp(struct amdgpu_device *adev) +{ + const struct common_firmware_header *hdr; + char ucode_prefix[10]; + int r = 0; + + /* get isp fw binary name and path */ + amdgpu_ucode_ip_version_decode(adev, ISP_HWIP, ucode_prefix, + sizeof(ucode_prefix)); + + /* read isp fw */ + r = amdgpu_ucode_request(adev, &adev->isp.fw, "amdgpu/%s.bin", ucode_prefix); + if (r) { + amdgpu_ucode_release(&adev->isp.fw); + return r; + } + + hdr = (const struct common_firmware_header *)adev->isp.fw->data; + + adev->firmware.ucode[AMDGPU_UCODE_ID_ISP].ucode_id = + AMDGPU_UCODE_ID_ISP; + adev->firmware.ucode[AMDGPU_UCODE_ID_ISP].fw = adev->isp.fw; + + adev->firmware.fw_size += + ALIGN(le32_to_cpu(hdr->ucode_size_bytes), PAGE_SIZE); + + return r; +} + +static int isp_early_init(void *handle) +{ + struct amdgpu_device *adev = (struct amdgpu_device *)handle; + struct amdgpu_isp *isp = &adev->isp; + + switch (amdgpu_ip_version(adev, ISP_HWIP, 0)) { + case IP_VERSION(4, 1, 0): + isp_v4_1_0_set_isp_funcs(isp); + break; + case IP_VERSION(4, 1, 1): + isp_v4_1_1_set_isp_funcs(isp); + break; + default: + return -EINVAL; + } + + isp->adev = adev; + isp->parent = adev->dev; + + if (isp_load_fw_by_psp(adev)) { + DRM_DEBUG_DRIVER("%s: isp fw load failed\n", __func__); + return -ENOENT; + } + + return 0; +} + +static bool isp_is_idle(void *handle) +{ + return true; +} + +static int isp_wait_for_idle(void *handle) +{ + return 0; +} + +static int isp_soft_reset(void *handle) +{ + return 0; +} + +static int isp_set_clockgating_state(void *handle, + enum amd_clockgating_state state) +{ + return 0; +} + +static int isp_set_powergating_state(void *handle, + enum amd_powergating_state state) +{ + return 0; +} + +static const struct amd_ip_funcs isp_ip_funcs = { + .name = "isp_ip", + .early_init = isp_early_init, + .late_init = NULL, + .sw_init = isp_sw_init, + .sw_fini = isp_sw_fini, + .hw_init = isp_hw_init, + .hw_fini = isp_hw_fini, + .suspend = isp_suspend, + .resume = isp_resume, + .is_idle = isp_is_idle, + .wait_for_idle = isp_wait_for_idle, + .soft_reset = isp_soft_reset, + .set_clockgating_state = isp_set_clockgating_state, + .set_powergating_state = isp_set_powergating_state, +}; + +const struct amdgpu_ip_block_version isp_v4_1_0_ip_block = { + .type = AMD_IP_BLOCK_TYPE_ISP, + .major = 4, + .minor = 1, + .rev = 0, + .funcs = &isp_ip_funcs, +}; + +const struct amdgpu_ip_block_version isp_v4_1_1_ip_block = { + .type = AMD_IP_BLOCK_TYPE_ISP, + .major = 4, + .minor = 1, + .rev = 1, + .funcs = &isp_ip_funcs, +}; diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_isp.h b/drivers/gpu/drm/amd/amdgpu/amdgpu_isp.h new file mode 100644 index 000000000000..44e2ea8c9728 --- /dev/null +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_isp.h @@ -0,0 +1,60 @@ +/* SPDX-License-Identifier: MIT */ +/* + * Copyright (C) 2024 Advanced Micro Devices, Inc. All rights reserved. + * All Rights Reserved. + * + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and associated documentation files (the + * "Software"), to deal in the Software without restriction, including + * without limitation the rights to use, copy, modify, merge, publish, + * distribute, sub license, and/or sell copies of the Software, and to + * permit persons to whom the Software is furnished to do so, subject to + * the following conditions: + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT. IN NO EVENT SHALL + * THE COPYRIGHT HOLDERS, AUTHORS AND/OR ITS SUPPLIERS BE LIABLE FOR ANY CLAIM, + * DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR + * OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE + * USE OR OTHER DEALINGS IN THE SOFTWARE. + * + * The above copyright notice and this permission notice (including the + * next paragraph) shall be included in all copies or substantial portions + * of the Software. + * + */ + +#ifndef __AMDGPU_ISP_H__ +#define __AMDGPU_ISP_H__ + +#define ISP_REGS_OFFSET_END 0x629A4 + +struct amdgpu_isp; + +struct isp_platform_data { + void *adev; + u32 asic_type; + resource_size_t base_rmmio_size; +}; + +struct isp_funcs { + int (*hw_init)(struct amdgpu_isp *isp); + int (*hw_fini)(struct amdgpu_isp *isp); +}; + +struct amdgpu_isp { + struct device *parent; + struct amdgpu_device *adev; + const struct isp_funcs *funcs; + struct mfd_cell *isp_cell; + struct resource *isp_res; + struct isp_platform_data *isp_pdata; + unsigned int harvest_config; + const struct firmware *fw; +}; + +extern const struct amdgpu_ip_block_version isp_v4_1_0_ip_block; +extern const struct amdgpu_ip_block_version isp_v4_1_1_ip_block; + +#endif /* __AMDGPU_ISP_H__ */ diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_job.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_job.c index 4b3000c21ef2..e238f2832f65 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_job.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_job.c @@ -41,7 +41,7 @@ static enum drm_gpu_sched_stat amdgpu_job_timedout(struct drm_sched_job *s_job) int r; if (!drm_dev_enter(adev_to_drm(adev), &idx)) { - DRM_INFO("%s - device unplugged skipping recovery on scheduler:%s", + dev_info(adev->dev, "%s - device unplugged skipping recovery on scheduler:%s", __func__, s_job->sched->name); /* Effectively the job is aborted as the device is gone */ @@ -53,19 +53,20 @@ static enum drm_gpu_sched_stat amdgpu_job_timedout(struct drm_sched_job *s_job) if (amdgpu_gpu_recovery && amdgpu_ring_soft_recovery(ring, job->vmid, s_job->s_fence->parent)) { - DRM_ERROR("ring %s timeout, but soft recovered\n", - s_job->sched->name); + dev_err(adev->dev, "ring %s timeout, but soft recovered\n", + s_job->sched->name); goto exit; } - DRM_ERROR("ring %s timeout, signaled seq=%u, emitted seq=%u\n", - job->base.sched->name, atomic_read(&ring->fence_drv.last_seq), - ring->fence_drv.sync_seq); + dev_err(adev->dev, "ring %s timeout, signaled seq=%u, emitted seq=%u\n", + job->base.sched->name, atomic_read(&ring->fence_drv.last_seq), + ring->fence_drv.sync_seq); ti = amdgpu_vm_get_task_info_pasid(ring->adev, job->pasid); if (ti) { - DRM_ERROR("Process information: process %s pid %d thread %s pid %d\n", - ti->process_name, ti->tgid, ti->task_name, ti->pid); + dev_err(adev->dev, + "Process information: process %s pid %d thread %s pid %d\n", + ti->process_name, ti->tgid, ti->task_name, ti->pid); amdgpu_vm_put_task_info(ti); } @@ -77,11 +78,12 @@ static enum drm_gpu_sched_stat amdgpu_job_timedout(struct drm_sched_job *s_job) reset_context.method = AMD_RESET_METHOD_NONE; reset_context.reset_req_dev = adev; + reset_context.src = AMDGPU_RESET_SRC_JOB; clear_bit(AMDGPU_NEED_FULL_RESET, &reset_context.flags); r = amdgpu_device_gpu_recover(ring->adev, job, &reset_context); if (r) - DRM_ERROR("GPU Recovery Failed: %d\n", r); + dev_err(adev->dev, "GPU Recovery Failed: %d\n", r); } else { drm_sched_suspend_timeout(&ring->sched); if (amdgpu_sriov_vf(adev)) @@ -273,7 +275,7 @@ amdgpu_job_prepare_job(struct drm_sched_job *sched_job, while (!fence && job->vm && !job->vmid) { r = amdgpu_vmid_grab(job->vm, ring, job, &fence); if (r) { - DRM_ERROR("Error getting VM ID (%d)\n", r); + dev_err(ring->adev->dev, "Error getting VM ID (%d)\n", r); goto error; } } @@ -304,12 +306,15 @@ static struct dma_fence *amdgpu_job_run(struct drm_sched_job *sched_job) dma_fence_set_error(finished, -ECANCELED); if (finished->error < 0) { - DRM_INFO("Skip scheduling IBs!\n"); + dev_dbg(adev->dev, "Skip scheduling IBs in ring(%s)", + ring->name); } else { r = amdgpu_ib_schedule(ring, job->num_ibs, job->ibs, job, &fence); if (r) - DRM_ERROR("Error scheduling IBs (%d)\n", r); + dev_err(adev->dev, + "Error scheduling IBs (%d) in ring(%s)", r, + ring->name); } job->job_run_counter++; diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_jpeg.h b/drivers/gpu/drm/amd/amdgpu/amdgpu_jpeg.h index aea31d61d991..f9cdd873ac9b 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_jpeg.h +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_jpeg.h @@ -60,6 +60,37 @@ RREG32_SOC15(JPEG, inst_idx, mmUVD_DPG_LMA_DATA); \ }) +#define WREG32_SOC24_JPEG_DPG_MODE(inst_idx, offset, value, indirect) \ + do { \ + WREG32_SOC15(JPEG, GET_INST(JPEG, inst_idx), \ + regUVD_DPG_LMA_DATA, value); \ + WREG32_SOC15(JPEG, GET_INST(JPEG, inst_idx), \ + regUVD_DPG_LMA_MASK, 0xFFFFFFFF); \ + WREG32_SOC15( \ + JPEG, GET_INST(JPEG, inst_idx), \ + regUVD_DPG_LMA_CTL, \ + (UVD_DPG_LMA_CTL__READ_WRITE_MASK | \ + offset << UVD_DPG_LMA_CTL__READ_WRITE_ADDR__SHIFT | \ + indirect << UVD_DPG_LMA_CTL__SRAM_SEL__SHIFT)); \ + } while (0) + +#define RREG32_SOC24_JPEG_DPG_MODE(inst_idx, offset, mask_en) \ + do { \ + WREG32_SOC15(JPEG, GET_INST(JPEG, inst_idx), \ + regUVD_DPG_LMA_MASK, 0xFFFFFFFF); \ + WREG32_SOC15(JPEG, GET_INST(JPEG, inst_idx), \ + regUVD_DPG_LMA_CTL, \ + (UVD_DPG_LMA_CTL__MASK_EN_MASK | \ + offset << UVD_DPG_LMA_CTL__READ_WRITE_ADDR__SHIFT)); \ + RREG32_SOC15(JPEG, inst_idx, regUVD_DPG_LMA_DATA); \ + } while (0) + +#define ADD_SOC24_JPEG_TO_DPG_SRAM(inst_idx, offset, value, indirect) \ + do { \ + *adev->jpeg.inst[inst_idx].dpg_sram_curr_addr++ = offset; \ + *adev->jpeg.inst[inst_idx].dpg_sram_curr_addr++ = value; \ + } while (0) + struct amdgpu_jpeg_reg{ unsigned jpeg_pitch[AMDGPU_MAX_JPEG_RINGS]; }; diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_kms.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_kms.c index a2df3025a754..66782be5917b 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_kms.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_kms.c @@ -149,38 +149,7 @@ int amdgpu_driver_load_kms(struct amdgpu_device *adev, unsigned long flags) goto out; } - adev->pm.rpm_mode = AMDGPU_RUNPM_NONE; - if (amdgpu_device_supports_px(dev) && - (amdgpu_runtime_pm != 0)) { /* enable PX as runtime mode */ - adev->pm.rpm_mode = AMDGPU_RUNPM_PX; - dev_info(adev->dev, "Using ATPX for runtime pm\n"); - } else if (amdgpu_device_supports_boco(dev) && - (amdgpu_runtime_pm != 0)) { /* enable boco as runtime mode */ - adev->pm.rpm_mode = AMDGPU_RUNPM_BOCO; - dev_info(adev->dev, "Using BOCO for runtime pm\n"); - } else if (amdgpu_device_supports_baco(dev) && - (amdgpu_runtime_pm != 0)) { - switch (adev->asic_type) { - case CHIP_VEGA20: - case CHIP_ARCTURUS: - /* enable BACO as runpm mode if runpm=1 */ - if (amdgpu_runtime_pm > 0) - adev->pm.rpm_mode = AMDGPU_RUNPM_BACO; - break; - case CHIP_VEGA10: - /* enable BACO as runpm mode if noretry=0 */ - if (!adev->gmc.noretry) - adev->pm.rpm_mode = AMDGPU_RUNPM_BACO; - break; - default: - /* enable BACO as runpm mode on CI+ */ - adev->pm.rpm_mode = AMDGPU_RUNPM_BACO; - break; - } - - if (adev->pm.rpm_mode == AMDGPU_RUNPM_BACO) - dev_info(adev->dev, "Using BACO for runtime pm\n"); - } + amdgpu_device_detect_runtime_pm_mode(adev); /* Call ACPI methods: require modeset init * but failure is not fatal @@ -649,30 +618,37 @@ int amdgpu_info_ioctl(struct drm_device *dev, void *data, struct drm_file *filp) return -EINVAL; if (adev->xcp_mgr && adev->xcp_mgr->num_xcps > 0 && - fpriv->xcp_id >= 0 && fpriv->xcp_id < adev->xcp_mgr->num_xcps) { + fpriv->xcp_id < adev->xcp_mgr->num_xcps) { xcp = &adev->xcp_mgr->xcp[fpriv->xcp_id]; switch (type) { case AMD_IP_BLOCK_TYPE_GFX: ret = amdgpu_xcp_get_inst_details(xcp, AMDGPU_XCP_GFX, &inst_mask); + if (ret) + return ret; count = hweight32(inst_mask); break; case AMD_IP_BLOCK_TYPE_SDMA: ret = amdgpu_xcp_get_inst_details(xcp, AMDGPU_XCP_SDMA, &inst_mask); + if (ret) + return ret; count = hweight32(inst_mask); break; case AMD_IP_BLOCK_TYPE_JPEG: ret = amdgpu_xcp_get_inst_details(xcp, AMDGPU_XCP_VCN, &inst_mask); + if (ret) + return ret; count = hweight32(inst_mask) * adev->jpeg.num_jpeg_rings; break; case AMD_IP_BLOCK_TYPE_VCN: ret = amdgpu_xcp_get_inst_details(xcp, AMDGPU_XCP_VCN, &inst_mask); + if (ret) + return ret; count = hweight32(inst_mask); break; default: return -EINVAL; } - if (ret) - return ret; + return copy_to_user(out, &count, min(size, 4u)) ? -EFAULT : 0; } diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_mca.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_mca.c index 24ad4b97177b..2542bd7aa7c7 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_mca.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_mca.c @@ -153,7 +153,7 @@ int amdgpu_mca_mpio_ras_sw_init(struct amdgpu_device *adev) return 0; } -void amdgpu_mca_bank_set_init(struct mca_bank_set *mca_set) +static void amdgpu_mca_bank_set_init(struct mca_bank_set *mca_set) { if (!mca_set) return; @@ -162,7 +162,7 @@ void amdgpu_mca_bank_set_init(struct mca_bank_set *mca_set) INIT_LIST_HEAD(&mca_set->list); } -int amdgpu_mca_bank_set_add_entry(struct mca_bank_set *mca_set, struct mca_bank_entry *entry) +static int amdgpu_mca_bank_set_add_entry(struct mca_bank_set *mca_set, struct mca_bank_entry *entry) { struct mca_bank_node *node; @@ -183,107 +183,113 @@ int amdgpu_mca_bank_set_add_entry(struct mca_bank_set *mca_set, struct mca_bank_ return 0; } -void amdgpu_mca_bank_set_release(struct mca_bank_set *mca_set) +static int amdgpu_mca_bank_set_merge(struct mca_bank_set *mca_set, struct mca_bank_set *new) { - struct mca_bank_node *node, *tmp; + struct mca_bank_node *node; - list_for_each_entry_safe(node, tmp, &mca_set->list, node) { - list_del(&node->node); - kvfree(node); - } + list_for_each_entry(node, &new->list, node) + amdgpu_mca_bank_set_add_entry(mca_set, &node->entry); + + return 0; } -void amdgpu_mca_smu_init_funcs(struct amdgpu_device *adev, const struct amdgpu_mca_smu_funcs *mca_funcs) +static void amdgpu_mca_bank_set_remove_node(struct mca_bank_set *mca_set, struct mca_bank_node *node) { - struct amdgpu_mca *mca = &adev->mca; + if (!node) + return; - mca->mca_funcs = mca_funcs; + list_del(&node->node); + kvfree(node); + + mca_set->nr_entries--; } -int amdgpu_mca_smu_set_debug_mode(struct amdgpu_device *adev, bool enable) +static void amdgpu_mca_bank_set_release(struct mca_bank_set *mca_set) { - const struct amdgpu_mca_smu_funcs *mca_funcs = adev->mca.mca_funcs; + struct mca_bank_node *node, *tmp; - if (mca_funcs && mca_funcs->mca_set_debug_mode) - return mca_funcs->mca_set_debug_mode(adev, enable); + if (list_empty(&mca_set->list)) + return; - return -EOPNOTSUPP; + list_for_each_entry_safe(node, tmp, &mca_set->list, node) + amdgpu_mca_bank_set_remove_node(mca_set, node); } -static void amdgpu_mca_smu_mca_bank_dump(struct amdgpu_device *adev, int idx, struct mca_bank_entry *entry) +void amdgpu_mca_smu_init_funcs(struct amdgpu_device *adev, const struct amdgpu_mca_smu_funcs *mca_funcs) { - dev_info(adev->dev, HW_ERR "Accelerator Check Architecture events logged\n"); - dev_info(adev->dev, HW_ERR "aca entry[%02d].STATUS=0x%016llx\n", - idx, entry->regs[MCA_REG_IDX_STATUS]); - dev_info(adev->dev, HW_ERR "aca entry[%02d].ADDR=0x%016llx\n", - idx, entry->regs[MCA_REG_IDX_ADDR]); - dev_info(adev->dev, HW_ERR "aca entry[%02d].MISC0=0x%016llx\n", - idx, entry->regs[MCA_REG_IDX_MISC0]); - dev_info(adev->dev, HW_ERR "aca entry[%02d].IPID=0x%016llx\n", - idx, entry->regs[MCA_REG_IDX_IPID]); - dev_info(adev->dev, HW_ERR "aca entry[%02d].SYND=0x%016llx\n", - idx, entry->regs[MCA_REG_IDX_SYND]); + struct amdgpu_mca *mca = &adev->mca; + + mca->mca_funcs = mca_funcs; } -int amdgpu_mca_smu_log_ras_error(struct amdgpu_device *adev, enum amdgpu_ras_block blk, enum amdgpu_mca_error_type type, struct ras_err_data *err_data) +int amdgpu_mca_init(struct amdgpu_device *adev) { - struct amdgpu_smuio_mcm_config_info mcm_info; - struct ras_err_addr err_addr = {0}; - struct mca_bank_set mca_set; - struct mca_bank_node *node; - struct mca_bank_entry *entry; - uint32_t count; - int ret, i = 0; + struct amdgpu_mca *mca = &adev->mca; + struct mca_bank_cache *mca_cache; + int i; - amdgpu_mca_bank_set_init(&mca_set); + atomic_set(&mca->ue_update_flag, 0); - ret = amdgpu_mca_smu_get_mca_set(adev, blk, type, &mca_set); - if (ret) - goto out_mca_release; + for (i = 0; i < ARRAY_SIZE(mca->mca_caches); i++) { + mca_cache = &mca->mca_caches[i]; + mutex_init(&mca_cache->lock); + amdgpu_mca_bank_set_init(&mca_cache->mca_set); + } - list_for_each_entry(node, &mca_set.list, node) { - entry = &node->entry; + return 0; +} - amdgpu_mca_smu_mca_bank_dump(adev, i++, entry); +void amdgpu_mca_fini(struct amdgpu_device *adev) +{ + struct amdgpu_mca *mca = &adev->mca; + struct mca_bank_cache *mca_cache; + int i; - count = 0; - ret = amdgpu_mca_smu_parse_mca_error_count(adev, blk, type, entry, &count); - if (ret) - goto out_mca_release; + atomic_set(&mca->ue_update_flag, 0); - if (!count) - continue; + for (i = 0; i < ARRAY_SIZE(mca->mca_caches); i++) { + mca_cache = &mca->mca_caches[i]; + amdgpu_mca_bank_set_release(&mca_cache->mca_set); + mutex_destroy(&mca_cache->lock); + } +} - mcm_info.socket_id = entry->info.socket_id; - mcm_info.die_id = entry->info.aid; +int amdgpu_mca_reset(struct amdgpu_device *adev) +{ + amdgpu_mca_fini(adev); - if (blk == AMDGPU_RAS_BLOCK__UMC) { - err_addr.err_status = entry->regs[MCA_REG_IDX_STATUS]; - err_addr.err_ipid = entry->regs[MCA_REG_IDX_IPID]; - err_addr.err_addr = entry->regs[MCA_REG_IDX_ADDR]; - } + return amdgpu_mca_init(adev); +} - if (type == AMDGPU_MCA_ERROR_TYPE_UE) - amdgpu_ras_error_statistic_ue_count(err_data, - &mcm_info, &err_addr, (uint64_t)count); - else { - if (amdgpu_mca_is_deferred_error(adev, entry->regs[MCA_REG_IDX_STATUS])) - amdgpu_ras_error_statistic_de_count(err_data, - &mcm_info, &err_addr, (uint64_t)count); - else - amdgpu_ras_error_statistic_ce_count(err_data, - &mcm_info, &err_addr, (uint64_t)count); - } - } +int amdgpu_mca_smu_set_debug_mode(struct amdgpu_device *adev, bool enable) +{ + const struct amdgpu_mca_smu_funcs *mca_funcs = adev->mca.mca_funcs; -out_mca_release: - amdgpu_mca_bank_set_release(&mca_set); + if (mca_funcs && mca_funcs->mca_set_debug_mode) + return mca_funcs->mca_set_debug_mode(adev, enable); - return ret; + return -EOPNOTSUPP; } +static void amdgpu_mca_smu_mca_bank_dump(struct amdgpu_device *adev, int idx, struct mca_bank_entry *entry, + struct ras_query_context *qctx) +{ + u64 event_id = qctx ? qctx->evid.event_id : RAS_EVENT_INVALID_ID; + + RAS_EVENT_LOG(adev, event_id, HW_ERR "Accelerator Check Architecture events logged\n"); + RAS_EVENT_LOG(adev, event_id, HW_ERR "aca entry[%02d].STATUS=0x%016llx\n", + idx, entry->regs[MCA_REG_IDX_STATUS]); + RAS_EVENT_LOG(adev, event_id, HW_ERR "aca entry[%02d].ADDR=0x%016llx\n", + idx, entry->regs[MCA_REG_IDX_ADDR]); + RAS_EVENT_LOG(adev, event_id, HW_ERR "aca entry[%02d].MISC0=0x%016llx\n", + idx, entry->regs[MCA_REG_IDX_MISC0]); + RAS_EVENT_LOG(adev, event_id, HW_ERR "aca entry[%02d].IPID=0x%016llx\n", + idx, entry->regs[MCA_REG_IDX_IPID]); + RAS_EVENT_LOG(adev, event_id, HW_ERR "aca entry[%02d].SYND=0x%016llx\n", + idx, entry->regs[MCA_REG_IDX_SYND]); +} -int amdgpu_mca_smu_get_valid_mca_count(struct amdgpu_device *adev, enum amdgpu_mca_error_type type, uint32_t *count) +static int amdgpu_mca_smu_get_valid_mca_count(struct amdgpu_device *adev, enum amdgpu_mca_error_type type, uint32_t *count) { const struct amdgpu_mca_smu_funcs *mca_funcs = adev->mca.mca_funcs; @@ -296,103 +302,200 @@ int amdgpu_mca_smu_get_valid_mca_count(struct amdgpu_device *adev, enum amdgpu_m return -EOPNOTSUPP; } -int amdgpu_mca_smu_get_mca_set_error_count(struct amdgpu_device *adev, enum amdgpu_ras_block blk, - enum amdgpu_mca_error_type type, uint32_t *total) +static int amdgpu_mca_smu_get_mca_entry(struct amdgpu_device *adev, enum amdgpu_mca_error_type type, + int idx, struct mca_bank_entry *entry) { const struct amdgpu_mca_smu_funcs *mca_funcs = adev->mca.mca_funcs; - struct mca_bank_set mca_set; - struct mca_bank_node *node; - struct mca_bank_entry *entry; - uint32_t count; - int ret; + int count; + + if (!mca_funcs || !mca_funcs->mca_get_mca_entry) + return -EOPNOTSUPP; - if (!total) + switch (type) { + case AMDGPU_MCA_ERROR_TYPE_UE: + count = mca_funcs->max_ue_count; + break; + case AMDGPU_MCA_ERROR_TYPE_CE: + count = mca_funcs->max_ce_count; + break; + default: return -EINVAL; + } - if (!mca_funcs) - return -EOPNOTSUPP; + if (idx >= count) + return -EINVAL; - if (!mca_funcs->mca_get_ras_mca_set || !mca_funcs->mca_get_valid_mca_count) - return -EOPNOTSUPP; + return mca_funcs->mca_get_mca_entry(adev, type, idx, entry); +} - amdgpu_mca_bank_set_init(&mca_set); +static bool amdgpu_mca_bank_should_update(struct amdgpu_device *adev, enum amdgpu_mca_error_type type) +{ + struct amdgpu_mca *mca = &adev->mca; + bool ret = true; + + /* + * Because the UE Valid MCA count will only be cleared after reset, + * in order to avoid repeated counting of the error count, + * the aca bank is only updated once during the gpu recovery stage. + */ + if (type == AMDGPU_MCA_ERROR_TYPE_UE) { + if (amdgpu_ras_intr_triggered()) + ret = atomic_cmpxchg(&mca->ue_update_flag, 0, 1) == 0; + else + atomic_set(&mca->ue_update_flag, 0); + } - ret = mca_funcs->mca_get_ras_mca_set(adev, blk, type, &mca_set); - if (ret) - goto err_mca_set_release; + return ret; +} - *total = 0; - list_for_each_entry(node, &mca_set.list, node) { - entry = &node->entry; +static int amdgpu_mca_smu_get_mca_set(struct amdgpu_device *adev, enum amdgpu_mca_error_type type, struct mca_bank_set *mca_set, + struct ras_query_context *qctx) +{ + struct mca_bank_entry entry; + uint32_t count = 0, i; + int ret; - count = 0; - ret = mca_funcs->mca_parse_mca_error_count(adev, blk, type, entry, &count); + if (!mca_set) + return -EINVAL; + + if (!amdgpu_mca_bank_should_update(adev, type)) + return 0; + + ret = amdgpu_mca_smu_get_valid_mca_count(adev, type, &count); + if (ret) + return ret; + + for (i = 0; i < count; i++) { + memset(&entry, 0, sizeof(entry)); + ret = amdgpu_mca_smu_get_mca_entry(adev, type, i, &entry); if (ret) - goto err_mca_set_release; + return ret; - *total += count; - } + amdgpu_mca_bank_set_add_entry(mca_set, &entry); -err_mca_set_release: - amdgpu_mca_bank_set_release(&mca_set); + amdgpu_mca_smu_mca_bank_dump(adev, i, &entry, qctx); + } - return ret; + return 0; } -int amdgpu_mca_smu_parse_mca_error_count(struct amdgpu_device *adev, enum amdgpu_ras_block blk, - enum amdgpu_mca_error_type type, struct mca_bank_entry *entry, uint32_t *count) +static int amdgpu_mca_smu_parse_mca_error_count(struct amdgpu_device *adev, enum amdgpu_ras_block blk, + enum amdgpu_mca_error_type type, struct mca_bank_entry *entry, uint32_t *count) { const struct amdgpu_mca_smu_funcs *mca_funcs = adev->mca.mca_funcs; + if (!count || !entry) return -EINVAL; if (!mca_funcs || !mca_funcs->mca_parse_mca_error_count) return -EOPNOTSUPP; - return mca_funcs->mca_parse_mca_error_count(adev, blk, type, entry, count); } -int amdgpu_mca_smu_get_mca_set(struct amdgpu_device *adev, enum amdgpu_ras_block blk, - enum amdgpu_mca_error_type type, struct mca_bank_set *mca_set) +static int amdgpu_mca_dispatch_mca_set(struct amdgpu_device *adev, enum amdgpu_ras_block blk, enum amdgpu_mca_error_type type, + struct mca_bank_set *mca_set, struct ras_err_data *err_data) { - const struct amdgpu_mca_smu_funcs *mca_funcs = adev->mca.mca_funcs; + struct ras_err_addr err_addr; + struct amdgpu_smuio_mcm_config_info mcm_info; + struct mca_bank_node *node, *tmp; + struct mca_bank_entry *entry; + uint32_t count; + int ret; if (!mca_set) return -EINVAL; - if (!mca_funcs || !mca_funcs->mca_get_ras_mca_set) - return -EOPNOTSUPP; + if (!mca_set->nr_entries) + return 0; + + list_for_each_entry_safe(node, tmp, &mca_set->list, node) { + entry = &node->entry; - WARN_ON(!list_empty(&mca_set->list)); + count = 0; + ret = amdgpu_mca_smu_parse_mca_error_count(adev, blk, type, entry, &count); + if (ret && ret != -EOPNOTSUPP) + return ret; + + if (!count) + continue; + + memset(&mcm_info, 0, sizeof(mcm_info)); + memset(&err_addr, 0, sizeof(err_addr)); + + mcm_info.socket_id = entry->info.socket_id; + mcm_info.die_id = entry->info.aid; - return mca_funcs->mca_get_ras_mca_set(adev, blk, type, mca_set); + if (blk == AMDGPU_RAS_BLOCK__UMC) { + err_addr.err_status = entry->regs[MCA_REG_IDX_STATUS]; + err_addr.err_ipid = entry->regs[MCA_REG_IDX_IPID]; + err_addr.err_addr = entry->regs[MCA_REG_IDX_ADDR]; + } + + if (type == AMDGPU_MCA_ERROR_TYPE_UE) { + amdgpu_ras_error_statistic_ue_count(err_data, + &mcm_info, &err_addr, (uint64_t)count); + } else { + if (amdgpu_mca_is_deferred_error(adev, entry->regs[MCA_REG_IDX_STATUS])) + amdgpu_ras_error_statistic_de_count(err_data, + &mcm_info, &err_addr, (uint64_t)count); + else + amdgpu_ras_error_statistic_ce_count(err_data, + &mcm_info, &err_addr, (uint64_t)count); + } + + amdgpu_mca_bank_set_remove_node(mca_set, node); + } + + return 0; } -int amdgpu_mca_smu_get_mca_entry(struct amdgpu_device *adev, enum amdgpu_mca_error_type type, - int idx, struct mca_bank_entry *entry) +static int amdgpu_mca_add_mca_set_to_cache(struct amdgpu_device *adev, enum amdgpu_mca_error_type type, struct mca_bank_set *new) { - const struct amdgpu_mca_smu_funcs *mca_funcs = adev->mca.mca_funcs; - int count; + struct mca_bank_cache *mca_cache = &adev->mca.mca_caches[type]; + int ret; - if (!mca_funcs || !mca_funcs->mca_get_mca_entry) - return -EOPNOTSUPP; + mutex_lock(&mca_cache->lock); + ret = amdgpu_mca_bank_set_merge(&mca_cache->mca_set, new); + mutex_unlock(&mca_cache->lock); - switch (type) { - case AMDGPU_MCA_ERROR_TYPE_UE: - count = mca_funcs->max_ue_count; - break; - case AMDGPU_MCA_ERROR_TYPE_CE: - count = mca_funcs->max_ce_count; - break; - default: - return -EINVAL; + return ret; +} + +int amdgpu_mca_smu_log_ras_error(struct amdgpu_device *adev, enum amdgpu_ras_block blk, enum amdgpu_mca_error_type type, + struct ras_err_data *err_data, struct ras_query_context *qctx) +{ + struct mca_bank_set mca_set; + struct mca_bank_cache *mca_cache = &adev->mca.mca_caches[type]; + int ret; + + amdgpu_mca_bank_set_init(&mca_set); + + ret = amdgpu_mca_smu_get_mca_set(adev, type, &mca_set, qctx); + if (ret) + goto out_mca_release; + + ret = amdgpu_mca_dispatch_mca_set(adev, blk, type, &mca_set, err_data); + if (ret) + goto out_mca_release; + + /* add remain mca bank to mca cache */ + if (mca_set.nr_entries) { + ret = amdgpu_mca_add_mca_set_to_cache(adev, type, &mca_set); + if (ret) + goto out_mca_release; } - if (idx >= count) - return -EINVAL; + /* dispatch mca set again if mca cache has valid data */ + mutex_lock(&mca_cache->lock); + if (mca_cache->mca_set.nr_entries) + ret = amdgpu_mca_dispatch_mca_set(adev, blk, type, &mca_cache->mca_set, err_data); + mutex_unlock(&mca_cache->lock); - return mca_funcs->mca_get_mca_entry(adev, type, idx, entry); +out_mca_release: + amdgpu_mca_bank_set_release(&mca_set); + + return ret; } #if defined(CONFIG_DEBUG_FS) @@ -433,36 +536,32 @@ static void mca_dump_entry(struct seq_file *m, struct mca_bank_entry *entry) static int mca_dump_show(struct seq_file *m, enum amdgpu_mca_error_type type) { struct amdgpu_device *adev = (struct amdgpu_device *)m->private; - struct mca_bank_entry *entry; - uint32_t count = 0; - int i, ret; + struct mca_bank_node *node; + struct mca_bank_set mca_set; + struct ras_query_context qctx; + int ret; - ret = amdgpu_mca_smu_get_valid_mca_count(adev, type, &count); + amdgpu_mca_bank_set_init(&mca_set); + + qctx.evid.event_id = RAS_EVENT_INVALID_ID; + ret = amdgpu_mca_smu_get_mca_set(adev, type, &mca_set, &qctx); if (ret) - return ret; + goto err_free_mca_set; seq_printf(m, "amdgpu smu %s valid mca count: %d\n", - type == AMDGPU_MCA_ERROR_TYPE_UE ? "UE" : "CE", count); + type == AMDGPU_MCA_ERROR_TYPE_UE ? "UE" : "CE", mca_set.nr_entries); - if (!count) - return 0; + if (!mca_set.nr_entries) + goto err_free_mca_set; - entry = kmalloc(sizeof(*entry), GFP_KERNEL); - if (!entry) - return -ENOMEM; + list_for_each_entry(node, &mca_set.list, node) + mca_dump_entry(m, &node->entry); - for (i = 0; i < count; i++) { - memset(entry, 0, sizeof(*entry)); - - ret = amdgpu_mca_smu_get_mca_entry(adev, type, i, entry); - if (ret) - goto err_free_entry; + /* add mca bank to mca bank cache */ + ret = amdgpu_mca_add_mca_set_to_cache(adev, type, &mca_set); - mca_dump_entry(m, entry); - } - -err_free_entry: - kfree(entry); +err_free_mca_set: + amdgpu_mca_bank_set_release(&mca_set); return ret; } @@ -509,7 +608,7 @@ DEFINE_DEBUGFS_ATTRIBUTE(mca_debug_mode_fops, NULL, amdgpu_mca_smu_debug_mode_se void amdgpu_mca_smu_debugfs_init(struct amdgpu_device *adev, struct dentry *root) { #if defined(CONFIG_DEBUG_FS) - if (!root || amdgpu_ip_version(adev, MP1_HWIP, 0) != IP_VERSION(13, 0, 6)) + if (!root) return; debugfs_create_file("mca_debug_mode", 0200, root, adev, &mca_debug_mode_fops); diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_mca.h b/drivers/gpu/drm/amd/amdgpu/amdgpu_mca.h index b964110ed1e0..e80323ff90c1 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_mca.h +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_mca.h @@ -77,11 +77,23 @@ struct amdgpu_mca_ras { struct amdgpu_mca_ras_block *ras; }; +struct mca_bank_set { + int nr_entries; + struct list_head list; +}; + +struct mca_bank_cache { + struct mca_bank_set mca_set; + struct mutex lock; +}; + struct amdgpu_mca { struct amdgpu_mca_ras mp0; struct amdgpu_mca_ras mp1; struct amdgpu_mca_ras mpio; const struct amdgpu_mca_smu_funcs *mca_funcs; + struct mca_bank_cache mca_caches[AMDGPU_MCA_ERROR_TYPE_DE]; + atomic_t ue_update_flag; }; enum mca_reg_idx { @@ -113,17 +125,10 @@ struct mca_bank_node { struct list_head node; }; -struct mca_bank_set { - int nr_entries; - struct list_head list; -}; - struct amdgpu_mca_smu_funcs { int max_ue_count; int max_ce_count; int (*mca_set_debug_mode)(struct amdgpu_device *adev, bool enable); - int (*mca_get_ras_mca_set)(struct amdgpu_device *adev, enum amdgpu_ras_block blk, enum amdgpu_mca_error_type type, - struct mca_bank_set *mca_set); int (*mca_parse_mca_error_count)(struct amdgpu_device *adev, enum amdgpu_ras_block blk, enum amdgpu_mca_error_type type, struct mca_bank_entry *entry, uint32_t *count); int (*mca_get_valid_mca_count)(struct amdgpu_device *adev, enum amdgpu_mca_error_type type, @@ -151,24 +156,14 @@ int amdgpu_mca_mp1_ras_sw_init(struct amdgpu_device *adev); int amdgpu_mca_mpio_ras_sw_init(struct amdgpu_device *adev); void amdgpu_mca_smu_init_funcs(struct amdgpu_device *adev, const struct amdgpu_mca_smu_funcs *mca_funcs); +int amdgpu_mca_init(struct amdgpu_device *adev); +void amdgpu_mca_fini(struct amdgpu_device *adev); +int amdgpu_mca_reset(struct amdgpu_device *adev); int amdgpu_mca_smu_set_debug_mode(struct amdgpu_device *adev, bool enable); -int amdgpu_mca_smu_get_valid_mca_count(struct amdgpu_device *adev, enum amdgpu_mca_error_type type, uint32_t *count); int amdgpu_mca_smu_get_mca_set_error_count(struct amdgpu_device *adev, enum amdgpu_ras_block blk, enum amdgpu_mca_error_type type, uint32_t *total); -int amdgpu_mca_smu_get_error_count(struct amdgpu_device *adev, enum amdgpu_ras_block blk, - enum amdgpu_mca_error_type type, uint32_t *count); -int amdgpu_mca_smu_parse_mca_error_count(struct amdgpu_device *adev, enum amdgpu_ras_block blk, - enum amdgpu_mca_error_type type, struct mca_bank_entry *entry, uint32_t *count); -int amdgpu_mca_smu_get_mca_set(struct amdgpu_device *adev, enum amdgpu_ras_block blk, - enum amdgpu_mca_error_type type, struct mca_bank_set *mca_set); -int amdgpu_mca_smu_get_mca_entry(struct amdgpu_device *adev, enum amdgpu_mca_error_type type, - int idx, struct mca_bank_entry *entry); - void amdgpu_mca_smu_debugfs_init(struct amdgpu_device *adev, struct dentry *root); - -void amdgpu_mca_bank_set_init(struct mca_bank_set *mca_set); -int amdgpu_mca_bank_set_add_entry(struct mca_bank_set *mca_set, struct mca_bank_entry *entry); -void amdgpu_mca_bank_set_release(struct mca_bank_set *mca_set); -int amdgpu_mca_smu_log_ras_error(struct amdgpu_device *adev, enum amdgpu_ras_block blk, enum amdgpu_mca_error_type type, struct ras_err_data *err_data); +int amdgpu_mca_smu_log_ras_error(struct amdgpu_device *adev, enum amdgpu_ras_block blk, enum amdgpu_mca_error_type type, + struct ras_err_data *err_data, struct ras_query_context *qctx); #endif diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_mes.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_mes.c index a98e03e0a51f..e499d6ba306b 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_mes.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_mes.c @@ -40,7 +40,6 @@ int amdgpu_mes_doorbell_process_slice(struct amdgpu_device *adev) } static int amdgpu_mes_kernel_doorbell_get(struct amdgpu_device *adev, - struct amdgpu_mes_process *process, int ip_type, uint64_t *doorbell_index) { unsigned int offset, found; @@ -65,7 +64,6 @@ static int amdgpu_mes_kernel_doorbell_get(struct amdgpu_device *adev, } static void amdgpu_mes_kernel_doorbell_free(struct amdgpu_device *adev, - struct amdgpu_mes_process *process, uint32_t doorbell_index) { unsigned int old, rel_index; @@ -102,7 +100,10 @@ static int amdgpu_mes_event_log_init(struct amdgpu_device *adev) { int r; - r = amdgpu_bo_create_kernel(adev, PAGE_SIZE, PAGE_SIZE, + if (!amdgpu_mes_log_enable) + return 0; + + r = amdgpu_bo_create_kernel(adev, AMDGPU_MES_LOG_BUFFER_SIZE, PAGE_SIZE, AMDGPU_GEM_DOMAIN_GTT, &adev->mes.event_log_gpu_obj, &adev->mes.event_log_gpu_addr, @@ -143,7 +144,7 @@ int amdgpu_mes_init(struct amdgpu_device *adev) for (i = 0; i < AMDGPU_MES_MAX_COMPUTE_PIPES; i++) { /* use only 1st MEC pipes */ - if (i >= 4) + if (i >= adev->gfx.mec.num_pipe_per_mec) continue; adev->mes.compute_hqd_mask[i] = 0xc; } @@ -653,7 +654,7 @@ int amdgpu_mes_add_hw_queue(struct amdgpu_device *adev, int gang_id, *queue_id = queue->queue_id = r; /* allocate a doorbell index for the queue */ - r = amdgpu_mes_kernel_doorbell_get(adev, gang->process, + r = amdgpu_mes_kernel_doorbell_get(adev, qprops->queue_type, &qprops->doorbell_off); if (r) @@ -711,8 +712,7 @@ int amdgpu_mes_add_hw_queue(struct amdgpu_device *adev, int gang_id, return 0; clean_up_doorbell: - amdgpu_mes_kernel_doorbell_free(adev, gang->process, - qprops->doorbell_off); + amdgpu_mes_kernel_doorbell_free(adev, qprops->doorbell_off); clean_up_queue_id: spin_lock_irqsave(&adev->mes.queue_id_lock, flags); idr_remove(&adev->mes.queue_id_idr, queue->queue_id); @@ -766,8 +766,7 @@ int amdgpu_mes_remove_hw_queue(struct amdgpu_device *adev, int queue_id) queue_id); list_del(&queue->list); - amdgpu_mes_kernel_doorbell_free(adev, gang->process, - queue->doorbell_off); + amdgpu_mes_kernel_doorbell_free(adev, queue->doorbell_off); amdgpu_mes_unlock(&adev->mes); amdgpu_mes_queue_free_mqd(queue); @@ -775,6 +774,28 @@ int amdgpu_mes_remove_hw_queue(struct amdgpu_device *adev, int queue_id) return 0; } +int amdgpu_mes_map_legacy_queue(struct amdgpu_device *adev, + struct amdgpu_ring *ring) +{ + struct mes_map_legacy_queue_input queue_input; + int r; + + memset(&queue_input, 0, sizeof(queue_input)); + + queue_input.queue_type = ring->funcs->type; + queue_input.doorbell_offset = ring->doorbell_index; + queue_input.pipe_id = ring->pipe; + queue_input.queue_id = ring->queue; + queue_input.mqd_addr = amdgpu_bo_gpu_offset(ring->mqd_obj); + queue_input.wptr_addr = ring->wptr_gpu_addr; + + r = adev->mes.funcs->map_legacy_queue(&adev->mes, &queue_input); + if (r) + DRM_ERROR("failed to map legacy queue\n"); + + return r; +} + int amdgpu_mes_unmap_legacy_queue(struct amdgpu_device *adev, struct amdgpu_ring *ring, enum amdgpu_unmap_queues_action action, @@ -1129,6 +1150,7 @@ void amdgpu_mes_remove_ring(struct amdgpu_device *adev, return; amdgpu_mes_remove_hw_queue(adev, ring->hw_queue_id); + del_timer_sync(&ring->fence_drv.fallback_timer); amdgpu_ring_fini(ring); kfree(ring); } @@ -1471,13 +1493,17 @@ int amdgpu_mes_init_microcode(struct amdgpu_device *adev, int pipe) const struct mes_firmware_header_v1_0 *mes_hdr; struct amdgpu_firmware_info *info; char ucode_prefix[30]; - char fw_name[40]; + char fw_name[50]; bool need_retry = false; int r; amdgpu_ucode_ip_version_decode(adev, GC_HWIP, ucode_prefix, sizeof(ucode_prefix)); - if (amdgpu_ip_version(adev, GC_HWIP, 0) >= IP_VERSION(11, 0, 0)) { + if (adev->enable_uni_mes && pipe == AMDGPU_MES_SCHED_PIPE) { + snprintf(fw_name, sizeof(fw_name), + "amdgpu/%s_uni_mes.bin", ucode_prefix); + } else if (amdgpu_ip_version(adev, GC_HWIP, 0) >= IP_VERSION(11, 0, 0) && + amdgpu_ip_version(adev, GC_HWIP, 0) < IP_VERSION(12, 0, 0)) { snprintf(fw_name, sizeof(fw_name), "amdgpu/%s_mes%s.bin", ucode_prefix, pipe == AMDGPU_MES_SCHED_PIPE ? "_2" : "1"); @@ -1490,11 +1516,9 @@ int amdgpu_mes_init_microcode(struct amdgpu_device *adev, int pipe) r = amdgpu_ucode_request(adev, &adev->mes.fw[pipe], fw_name); if (r && need_retry && pipe == AMDGPU_MES_SCHED_PIPE) { - snprintf(fw_name, sizeof(fw_name), "amdgpu/%s_mes.bin", - ucode_prefix); - DRM_INFO("try to fall back to %s\n", fw_name); + dev_info(adev->dev, "try to fall back to %s_mes.bin\n", ucode_prefix); r = amdgpu_ucode_request(adev, &adev->mes.fw[pipe], - fw_name); + "amdgpu/%s_mes.bin", ucode_prefix); } if (r) @@ -1549,12 +1573,11 @@ static int amdgpu_debugfs_mes_event_log_show(struct seq_file *m, void *unused) uint32_t *mem = (uint32_t *)(adev->mes.event_log_cpu_addr); seq_hex_dump(m, "", DUMP_PREFIX_OFFSET, 32, 4, - mem, PAGE_SIZE, false); + mem, AMDGPU_MES_LOG_BUFFER_SIZE, false); return 0; } - DEFINE_SHOW_ATTRIBUTE(amdgpu_debugfs_mes_event_log); #endif @@ -1565,7 +1588,7 @@ void amdgpu_debugfs_mes_event_log_init(struct amdgpu_device *adev) #if defined(CONFIG_DEBUG_FS) struct drm_minor *minor = adev_to_drm(adev)->primary; struct dentry *root = minor->debugfs_root; - if (adev->enable_mes) + if (adev->enable_mes && amdgpu_mes_log_enable) debugfs_create_file("amdgpu_mes_event_log", 0444, root, adev, &amdgpu_debugfs_mes_event_log_fops); diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_mes.h b/drivers/gpu/drm/amd/amdgpu/amdgpu_mes.h index 7d4f93fea937..e11051271f71 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_mes.h +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_mes.h @@ -52,6 +52,7 @@ enum amdgpu_mes_priority_level { #define AMDGPU_MES_PROC_CTX_SIZE 0x1000 /* one page area */ #define AMDGPU_MES_GANG_CTX_SIZE 0x1000 /* one page area */ +#define AMDGPU_MES_LOG_BUFFER_SIZE 0x4000 /* Maximu log buffer size for MES */ struct amdgpu_mes_funcs; @@ -140,6 +141,12 @@ struct amdgpu_mes { /* ip specific functions */ const struct amdgpu_mes_funcs *funcs; + + /* mes resource_1 bo*/ + struct amdgpu_bo *resource_1; + uint64_t resource_1_gpu_addr; + void *resource_1_addr; + }; struct amdgpu_mes_process { @@ -241,6 +248,15 @@ struct mes_remove_queue_input { uint64_t gang_context_addr; }; +struct mes_map_legacy_queue_input { + uint32_t queue_type; + uint32_t doorbell_offset; + uint32_t pipe_id; + uint32_t queue_id; + uint64_t mqd_addr; + uint64_t wptr_addr; +}; + struct mes_unmap_legacy_queue_input { enum amdgpu_unmap_queues_action action; uint32_t queue_type; @@ -317,6 +333,9 @@ struct amdgpu_mes_funcs { int (*remove_hw_queue)(struct amdgpu_mes *mes, struct mes_remove_queue_input *input); + int (*map_legacy_queue)(struct amdgpu_mes *mes, + struct mes_map_legacy_queue_input *input); + int (*unmap_legacy_queue)(struct amdgpu_mes *mes, struct mes_unmap_legacy_queue_input *input); @@ -356,6 +375,8 @@ int amdgpu_mes_add_hw_queue(struct amdgpu_device *adev, int gang_id, int *queue_id); int amdgpu_mes_remove_hw_queue(struct amdgpu_device *adev, int queue_id); +int amdgpu_mes_map_legacy_queue(struct amdgpu_device *adev, + struct amdgpu_ring *ring); int amdgpu_mes_unmap_legacy_queue(struct amdgpu_device *adev, struct amdgpu_ring *ring, enum amdgpu_unmap_queues_action action, diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_mmhub.h b/drivers/gpu/drm/amd/amdgpu/amdgpu_mmhub.h index 1ca9d4ed8063..95d676ee207f 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_mmhub.h +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_mmhub.h @@ -63,6 +63,8 @@ struct amdgpu_mmhub_funcs { uint64_t page_table_base); void (*update_power_gating)(struct amdgpu_device *adev, bool enable); + bool (*query_utcl2_poison_status)(struct amdgpu_device *adev, + int hub_inst); }; struct amdgpu_mmhub { diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_mode.h b/drivers/gpu/drm/amd/amdgpu/amdgpu_mode.h index 1fe21a70ddd0..d002b845d8ac 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_mode.h +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_mode.h @@ -300,6 +300,7 @@ struct amdgpu_framebuffer { uint64_t tiling_flags; bool tmz_surface; + bool gfx12_dcc; /* caching for later use */ uint64_t address; diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_nbio.h b/drivers/gpu/drm/amd/amdgpu/amdgpu_nbio.h index 7b8c03be1d9e..f61d117b0caf 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_nbio.h +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_nbio.h @@ -102,6 +102,7 @@ struct amdgpu_nbio_funcs { u32 (*get_memory_partition_mode)(struct amdgpu_device *adev, u32 *supp_modes); u64 (*get_pcie_replay_count)(struct amdgpu_device *adev); + void (*set_reg_remap)(struct amdgpu_device *adev); }; struct amdgpu_nbio { diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_object.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_object.c index 010b0cb7693c..e32161f6b67a 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_object.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_object.c @@ -39,6 +39,7 @@ #include "amdgpu.h" #include "amdgpu_trace.h" #include "amdgpu_amdkfd.h" +#include "amdgpu_vram_mgr.h" /** * DOC: amdgpu_object @@ -153,8 +154,10 @@ void amdgpu_bo_placement_from_domain(struct amdgpu_bo *abo, u32 domain) else places[c].flags |= TTM_PL_FLAG_TOPDOWN; - if (flags & AMDGPU_GEM_CREATE_VRAM_CONTIGUOUS) + if (abo->tbo.type == ttm_bo_type_kernel && + flags & AMDGPU_GEM_CREATE_VRAM_CONTIGUOUS) places[c].flags |= TTM_PL_FLAG_CONTIGUOUS; + c++; } @@ -173,6 +176,12 @@ void amdgpu_bo_placement_from_domain(struct amdgpu_bo *abo, u32 domain) abo->flags & AMDGPU_GEM_CREATE_PREEMPTIBLE ? AMDGPU_PL_PREEMPT : TTM_PL_TT; places[c].flags = 0; + /* + * When GTT is just an alternative to VRAM make sure that we + * only use it as fallback and still try to fill up VRAM first. + */ + if (domain & abo->preferred_domains & AMDGPU_GEM_DOMAIN_VRAM) + places[c].flags |= TTM_PL_FLAG_FALLBACK; c++; } @@ -595,9 +604,6 @@ int amdgpu_bo_create(struct amdgpu_device *adev, if (!amdgpu_bo_support_uswc(bo->flags)) bo->flags &= ~AMDGPU_GEM_CREATE_CPU_GTT_USWC; - if (adev->ras_enabled) - bo->flags |= AMDGPU_GEM_CREATE_VRAM_WIPE_ON_RELEASE; - bo->tbo.bdev = &adev->mman.bdev; if (bp->domain & (AMDGPU_GEM_DOMAIN_GWS | AMDGPU_GEM_DOMAIN_OA | AMDGPU_GEM_DOMAIN_GDS)) @@ -605,6 +611,8 @@ int amdgpu_bo_create(struct amdgpu_device *adev, else amdgpu_bo_placement_from_domain(bo, bp->domain); if (bp->type == ttm_bo_type_kernel) + bo->tbo.priority = 2; + else if (!(bp->flags & AMDGPU_GEM_CREATE_DISCARDABLE)) bo->tbo.priority = 1; if (!bp->destroy) @@ -617,8 +625,7 @@ int amdgpu_bo_create(struct amdgpu_device *adev, return r; if (!amdgpu_gmc_vram_full_visible(&adev->gmc) && - bo->tbo.resource->mem_type == TTM_PL_VRAM && - amdgpu_bo_in_cpu_visible_vram(bo)) + amdgpu_res_cpu_visible(adev, bo->tbo.resource)) amdgpu_cs_report_moved_bytes(adev, ctx.bytes_moved, ctx.bytes_moved); else @@ -628,7 +635,7 @@ int amdgpu_bo_create(struct amdgpu_device *adev, bo->tbo.resource->mem_type == TTM_PL_VRAM) { struct dma_fence *fence; - r = amdgpu_fill_buffer(bo, 0, bo->tbo.base.resv, &fence, true); + r = amdgpu_ttm_clear_buffer(bo, bo->tbo.base.resv, &fence); if (unlikely(r)) goto fail_unreserve; @@ -758,7 +765,7 @@ int amdgpu_bo_restore_shadow(struct amdgpu_bo *shadow, struct dma_fence **fence) return amdgpu_copy_buffer(ring, shadow_addr, parent_addr, amdgpu_bo_size(shadow), NULL, fence, - true, false, false); + true, false, 0); } /** @@ -960,6 +967,10 @@ int amdgpu_bo_pin_restricted(struct amdgpu_bo *bo, u32 domain, if (!bo->placements[i].lpfn || (lpfn && lpfn < bo->placements[i].lpfn)) bo->placements[i].lpfn = lpfn; + + if (bo->flags & AMDGPU_GEM_CREATE_VRAM_CONTIGUOUS && + bo->placements[i].mem_type == TTM_PL_VRAM) + bo->placements[i].flags |= TTM_PL_FLAG_CONTIGUOUS; } r = ttm_bo_validate(&bo->tbo, &bo->placement, &ctx); @@ -970,12 +981,11 @@ int amdgpu_bo_pin_restricted(struct amdgpu_bo *bo, u32 domain, ttm_bo_pin(&bo->tbo); - domain = amdgpu_mem_type_to_domain(bo->tbo.resource->mem_type); - if (domain == AMDGPU_GEM_DOMAIN_VRAM) { + if (bo->tbo.resource->mem_type == TTM_PL_VRAM) { atomic64_add(amdgpu_bo_size(bo), &adev->vram_pin_size); atomic64_add(amdgpu_vram_mgr_bo_visible_size(bo), &adev->visible_pin_size); - } else if (domain == AMDGPU_GEM_DOMAIN_GTT) { + } else if (bo->tbo.resource->mem_type == TTM_PL_TT) { atomic64_add(amdgpu_bo_size(bo), &adev->gart_pin_size); } @@ -1242,14 +1252,18 @@ int amdgpu_bo_get_metadata(struct amdgpu_bo *bo, void *buffer, * amdgpu_bo_move_notify - notification about a memory move * @bo: pointer to a buffer object * @evict: if this move is evicting the buffer from the graphics address space + * @new_mem: new resource for backing the BO * * Marks the corresponding &amdgpu_bo buffer object as invalid, also performs * bookkeeping. * TTM driver callback which is called when ttm moves a buffer. */ -void amdgpu_bo_move_notify(struct ttm_buffer_object *bo, bool evict) +void amdgpu_bo_move_notify(struct ttm_buffer_object *bo, + bool evict, + struct ttm_resource *new_mem) { struct amdgpu_device *adev = amdgpu_ttm_adev(bo->bdev); + struct ttm_resource *old_mem = bo->resource; struct amdgpu_bo *abo; if (!amdgpu_bo_is_amdgpu_bo(bo)) @@ -1261,44 +1275,44 @@ void amdgpu_bo_move_notify(struct ttm_buffer_object *bo, bool evict) amdgpu_bo_kunmap(abo); if (abo->tbo.base.dma_buf && !abo->tbo.base.import_attach && - bo->resource->mem_type != TTM_PL_SYSTEM) + old_mem && old_mem->mem_type != TTM_PL_SYSTEM) dma_buf_move_notify(abo->tbo.base.dma_buf); - /* remember the eviction */ - if (evict) - atomic64_inc(&adev->num_evictions); + /* move_notify is called before move happens */ + trace_amdgpu_bo_move(abo, new_mem ? new_mem->mem_type : -1, + old_mem ? old_mem->mem_type : -1); } void amdgpu_bo_get_memory(struct amdgpu_bo *bo, struct amdgpu_mem_stats *stats) { + struct amdgpu_device *adev = amdgpu_ttm_adev(bo->tbo.bdev); + struct ttm_resource *res = bo->tbo.resource; uint64_t size = amdgpu_bo_size(bo); struct drm_gem_object *obj; - unsigned int domain; bool shared; /* Abort if the BO doesn't currently have a backing store */ - if (!bo->tbo.resource) + if (!res) return; obj = &bo->tbo.base; shared = drm_gem_object_is_shared_for_memory_stats(obj); - domain = amdgpu_mem_type_to_domain(bo->tbo.resource->mem_type); - switch (domain) { - case AMDGPU_GEM_DOMAIN_VRAM: + switch (res->mem_type) { + case TTM_PL_VRAM: stats->vram += size; - if (amdgpu_bo_in_cpu_visible_vram(bo)) + if (amdgpu_res_cpu_visible(adev, res)) stats->visible_vram += size; if (shared) stats->vram_shared += size; break; - case AMDGPU_GEM_DOMAIN_GTT: + case TTM_PL_TT: stats->gtt += size; if (shared) stats->gtt_shared += size; break; - case AMDGPU_GEM_DOMAIN_CPU: + case TTM_PL_SYSTEM: default: stats->cpu += size; if (shared) @@ -1311,7 +1325,7 @@ void amdgpu_bo_get_memory(struct amdgpu_bo *bo, if (bo->flags & AMDGPU_GEM_CREATE_CPU_ACCESS_REQUIRED) stats->requested_visible_vram += size; - if (domain != AMDGPU_GEM_DOMAIN_VRAM) { + if (res->mem_type != TTM_PL_VRAM) { stats->evicted_vram += size; if (bo->flags & AMDGPU_GEM_CREATE_CPU_ACCESS_REQUIRED) stats->evicted_visible_vram += size; @@ -1359,8 +1373,9 @@ void amdgpu_bo_release_notify(struct ttm_buffer_object *bo) if (WARN_ON_ONCE(!dma_resv_trylock(bo->base.resv))) return; - r = amdgpu_fill_buffer(abo, AMDGPU_POISON, bo->base.resv, &fence, true); + r = amdgpu_fill_buffer(abo, 0, bo->base.resv, &fence, true); if (!WARN_ON(r)) { + amdgpu_vram_mgr_set_cleared(bo->resource); amdgpu_bo_fence(abo, fence, false); dma_fence_put(fence); } @@ -1389,10 +1404,7 @@ vm_fault_t amdgpu_bo_fault_reserve_notify(struct ttm_buffer_object *bo) /* Remember that this BO was accessed by the CPU */ abo->flags |= AMDGPU_GEM_CREATE_CPU_ACCESS_REQUIRED; - if (bo->resource->mem_type != TTM_PL_VRAM) - return 0; - - if (amdgpu_bo_in_cpu_visible_vram(abo)) + if (amdgpu_res_cpu_visible(adev, bo->resource)) return 0; /* Can't move a pinned BO to visible VRAM */ @@ -1415,7 +1427,7 @@ vm_fault_t amdgpu_bo_fault_reserve_notify(struct ttm_buffer_object *bo) /* this should never happen */ if (bo->resource->mem_type == TTM_PL_VRAM && - !amdgpu_bo_in_cpu_visible_vram(abo)) + !amdgpu_res_cpu_visible(adev, bo->resource)) return VM_FAULT_SIGBUS; ttm_bo_move_to_lru_tail_unlocked(bo); @@ -1579,6 +1591,7 @@ uint32_t amdgpu_bo_get_preferred_domain(struct amdgpu_device *adev, */ u64 amdgpu_bo_print_info(int id, struct amdgpu_bo *bo, struct seq_file *m) { + struct amdgpu_device *adev = amdgpu_ttm_adev(bo->tbo.bdev); struct dma_buf_attachment *attachment; struct dma_buf *dma_buf; const char *placement; @@ -1586,22 +1599,39 @@ u64 amdgpu_bo_print_info(int id, struct amdgpu_bo *bo, struct seq_file *m) u64 size; if (dma_resv_trylock(bo->tbo.base.resv)) { - unsigned int domain; - domain = amdgpu_mem_type_to_domain(bo->tbo.resource->mem_type); - switch (domain) { - case AMDGPU_GEM_DOMAIN_VRAM: - if (amdgpu_bo_in_cpu_visible_vram(bo)) - placement = "VRAM VISIBLE"; - else - placement = "VRAM"; - break; - case AMDGPU_GEM_DOMAIN_GTT: - placement = "GTT"; - break; - case AMDGPU_GEM_DOMAIN_CPU: - default: - placement = "CPU"; - break; + if (!bo->tbo.resource) { + placement = "NONE"; + } else { + switch (bo->tbo.resource->mem_type) { + case TTM_PL_VRAM: + if (amdgpu_res_cpu_visible(adev, bo->tbo.resource)) + placement = "VRAM VISIBLE"; + else + placement = "VRAM"; + break; + case TTM_PL_TT: + placement = "GTT"; + break; + case AMDGPU_PL_GDS: + placement = "GDS"; + break; + case AMDGPU_PL_GWS: + placement = "GWS"; + break; + case AMDGPU_PL_OA: + placement = "OA"; + break; + case AMDGPU_PL_PREEMPT: + placement = "PREEMPTIBLE"; + break; + case AMDGPU_PL_DOORBELL: + placement = "DOORBELL"; + break; + case TTM_PL_SYSTEM: + default: + placement = "CPU"; + break; + } } dma_resv_unlock(bo->tbo.base.resv); } else { diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_object.h b/drivers/gpu/drm/amd/amdgpu/amdgpu_object.h index be679c42b0b8..bc42ccbde659 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_object.h +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_object.h @@ -251,28 +251,6 @@ static inline u64 amdgpu_bo_mmap_offset(struct amdgpu_bo *bo) } /** - * amdgpu_bo_in_cpu_visible_vram - check if BO is (partly) in visible VRAM - */ -static inline bool amdgpu_bo_in_cpu_visible_vram(struct amdgpu_bo *bo) -{ - struct amdgpu_device *adev = amdgpu_ttm_adev(bo->tbo.bdev); - struct amdgpu_res_cursor cursor; - - if (!bo->tbo.resource || bo->tbo.resource->mem_type != TTM_PL_VRAM) - return false; - - amdgpu_res_first(bo->tbo.resource, 0, amdgpu_bo_size(bo), &cursor); - while (cursor.remaining) { - if (cursor.start < adev->gmc.visible_vram_size) - return true; - - amdgpu_res_next(&cursor, cursor.size); - } - - return false; -} - -/** * amdgpu_bo_explicit_sync - return whether the bo is explicitly synced */ static inline bool amdgpu_bo_explicit_sync(struct amdgpu_bo *bo) @@ -350,7 +328,9 @@ int amdgpu_bo_set_metadata (struct amdgpu_bo *bo, void *metadata, int amdgpu_bo_get_metadata(struct amdgpu_bo *bo, void *buffer, size_t buffer_size, uint32_t *metadata_size, uint64_t *flags); -void amdgpu_bo_move_notify(struct ttm_buffer_object *bo, bool evict); +void amdgpu_bo_move_notify(struct ttm_buffer_object *bo, + bool evict, + struct ttm_resource *new_mem); void amdgpu_bo_release_notify(struct ttm_buffer_object *bo); vm_fault_t amdgpu_bo_fault_reserve_notify(struct ttm_buffer_object *bo); void amdgpu_bo_fence(struct amdgpu_bo *bo, struct dma_fence *fence, diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_psp.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_psp.c index 94b310fdb719..189574d53ebd 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_psp.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_psp.c @@ -145,6 +145,7 @@ static int psp_init_sriov_microcode(struct psp_context *psp) adev->virt.autoload_ucode_id = 0; break; case IP_VERSION(13, 0, 6): + case IP_VERSION(13, 0, 14): ret = psp_init_cap_microcode(psp, ucode_prefix); ret &= psp_init_ta_microcode(psp, ucode_prefix); break; @@ -207,6 +208,7 @@ static int psp_early_init(void *handle) psp->boot_time_tmr = false; fallthrough; case IP_VERSION(13, 0, 6): + case IP_VERSION(13, 0, 14): psp_v13_0_set_psp_funcs(psp); psp->autoload_supported = false; break; @@ -217,6 +219,7 @@ static int psp_early_init(void *handle) case IP_VERSION(13, 0, 11): case IP_VERSION(14, 0, 0): case IP_VERSION(14, 0, 1): + case IP_VERSION(14, 0, 4): psp_v13_0_set_psp_funcs(psp); psp->boot_time_tmr = false; break; @@ -355,7 +358,8 @@ static bool psp_get_runtime_db_entry(struct amdgpu_device *adev, bool ret = false; int i; - if (amdgpu_ip_version(adev, MP0_HWIP, 0) == IP_VERSION(13, 0, 6)) + if (amdgpu_ip_version(adev, MP0_HWIP, 0) == IP_VERSION(13, 0, 6) || + amdgpu_ip_version(adev, MP0_HWIP, 0) == IP_VERSION(13, 0, 14)) return false; db_header_pos = adev->gmc.mc_vram_size - PSP_RUNTIME_DB_OFFSET; @@ -640,6 +644,20 @@ static const char *psp_gfx_cmd_name(enum psp_gfx_cmd_id cmd_id) } } +static bool psp_err_warn(struct psp_context *psp) +{ + struct psp_gfx_cmd_resp *cmd = psp->cmd_buf_mem; + + /* This response indicates reg list is already loaded */ + if (amdgpu_ip_version(psp->adev, MP0_HWIP, 0) == IP_VERSION(13, 0, 2) && + cmd->cmd_id == GFX_CMD_ID_LOAD_IP_FW && + cmd->cmd.cmd_load_ip_fw.fw_type == GFX_FW_TYPE_REG_LIST && + cmd->resp.status == TEE_ERROR_CANCEL) + return false; + + return true; +} + static int psp_cmd_submit_buf(struct psp_context *psp, struct amdgpu_firmware_info *ucode, @@ -699,10 +717,13 @@ psp_cmd_submit_buf(struct psp_context *psp, dev_warn(psp->adev->dev, "failed to load ucode %s(0x%X) ", amdgpu_ucode_name(ucode->ucode_id), ucode->ucode_id); - dev_warn(psp->adev->dev, - "psp gfx command %s(0x%X) failed and response status is (0x%X)\n", - psp_gfx_cmd_name(psp->cmd_buf_mem->cmd_id), psp->cmd_buf_mem->cmd_id, - psp->cmd_buf_mem->resp.status); + if (psp_err_warn(psp)) + dev_warn( + psp->adev->dev, + "psp gfx command %s(0x%X) failed and response status is (0x%X)\n", + psp_gfx_cmd_name(psp->cmd_buf_mem->cmd_id), + psp->cmd_buf_mem->cmd_id, + psp->cmd_buf_mem->resp.status); /* If any firmware (including CAP) load fails under SRIOV, it should * return failure to stop the VF from initializing. * Also return failure in case of timeout @@ -847,6 +868,7 @@ static bool psp_skip_tmr(struct psp_context *psp) case IP_VERSION(13, 0, 2): case IP_VERSION(13, 0, 6): case IP_VERSION(13, 0, 10): + case IP_VERSION(13, 0, 14): return true; default: return false; @@ -1053,6 +1075,11 @@ static int psp_asd_initialize(struct psp_context *psp) if (amdgpu_sriov_vf(psp->adev) || !psp->asd_context.bin_desc.size_bytes) return 0; + /* bypass asd if display hardware is not available */ + if (!amdgpu_device_has_display_hardware(psp->adev) && + amdgpu_ip_version(psp->adev, MP0_HWIP, 0) >= IP_VERSION(13, 0, 10)) + return 0; + psp->asd_context.mem_context.shared_mc_addr = 0; psp->asd_context.mem_context.shared_mem_size = PSP_ASD_SHARED_MEM_SIZE; psp->asd_context.ta_load_type = GFX_CMD_ID_LOAD_ASD; @@ -1353,6 +1380,9 @@ static void psp_xgmi_reflect_topology_info(struct psp_context *psp, uint8_t dst_num_links = node_info.num_links; hive = amdgpu_get_xgmi_hive(psp->adev); + if (WARN_ON(!hive)) + return; + list_for_each_entry(mirror_adev, &hive->device_list, gmc.xgmi.head) { struct psp_xgmi_topology_info *mirror_top_info; int j; @@ -1445,7 +1475,9 @@ int psp_xgmi_get_topology_info(struct psp_context *psp, (psp->xgmi_context.supports_extended_data && get_extended_data) || amdgpu_ip_version(psp->adev, MP0_HWIP, 0) == - IP_VERSION(13, 0, 6); + IP_VERSION(13, 0, 6) || + amdgpu_ip_version(psp->adev, MP0_HWIP, 0) == + IP_VERSION(13, 0, 14); bool ta_port_num_support = amdgpu_sriov_vf(psp->adev) ? 0 : psp->xgmi_context.xgmi_ta_caps & EXTEND_PEER_LINK_INFO_CMD_FLAG; @@ -1559,6 +1591,66 @@ static void psp_ras_ta_check_status(struct psp_context *psp) } } +static int psp_ras_send_cmd(struct psp_context *psp, + enum ras_command cmd_id, void *in, void *out) +{ + struct ta_ras_shared_memory *ras_cmd; + uint32_t cmd = cmd_id; + int ret = 0; + + if (!in) + return -EINVAL; + + mutex_lock(&psp->ras_context.mutex); + ras_cmd = (struct ta_ras_shared_memory *)psp->ras_context.context.mem_context.shared_buf; + memset(ras_cmd, 0, sizeof(struct ta_ras_shared_memory)); + + switch (cmd) { + case TA_RAS_COMMAND__ENABLE_FEATURES: + case TA_RAS_COMMAND__DISABLE_FEATURES: + memcpy(&ras_cmd->ras_in_message, + in, sizeof(ras_cmd->ras_in_message)); + break; + case TA_RAS_COMMAND__TRIGGER_ERROR: + memcpy(&ras_cmd->ras_in_message.trigger_error, + in, sizeof(ras_cmd->ras_in_message.trigger_error)); + break; + case TA_RAS_COMMAND__QUERY_ADDRESS: + memcpy(&ras_cmd->ras_in_message.address, + in, sizeof(ras_cmd->ras_in_message.address)); + break; + default: + dev_err(psp->adev->dev, "Invalid ras cmd id: %u\n", cmd); + ret = -EINVAL; + goto err_out; + } + + ras_cmd->cmd_id = cmd; + ret = psp_ras_invoke(psp, ras_cmd->cmd_id); + + switch (cmd) { + case TA_RAS_COMMAND__TRIGGER_ERROR: + if (!ret && out) + memcpy(out, &ras_cmd->ras_status, sizeof(ras_cmd->ras_status)); + break; + case TA_RAS_COMMAND__QUERY_ADDRESS: + if (ret || ras_cmd->ras_status || psp->cmd_buf_mem->resp.status) + ret = -EINVAL; + else if (out) + memcpy(out, + &ras_cmd->ras_out_message.address, + sizeof(ras_cmd->ras_out_message.address)); + break; + default: + break; + } + +err_out: + mutex_unlock(&psp->ras_context.mutex); + + return ret; +} + int psp_ras_invoke(struct psp_context *psp, uint32_t ta_cmd_id) { struct ta_ras_shared_memory *ras_cmd; @@ -1600,23 +1692,15 @@ int psp_ras_invoke(struct psp_context *psp, uint32_t ta_cmd_id) int psp_ras_enable_features(struct psp_context *psp, union ta_ras_cmd_input *info, bool enable) { - struct ta_ras_shared_memory *ras_cmd; + enum ras_command cmd_id; int ret; - if (!psp->ras_context.context.initialized) + if (!psp->ras_context.context.initialized || !info) return -EINVAL; - ras_cmd = (struct ta_ras_shared_memory *)psp->ras_context.context.mem_context.shared_buf; - memset(ras_cmd, 0, sizeof(struct ta_ras_shared_memory)); - - if (enable) - ras_cmd->cmd_id = TA_RAS_COMMAND__ENABLE_FEATURES; - else - ras_cmd->cmd_id = TA_RAS_COMMAND__DISABLE_FEATURES; - - ras_cmd->ras_in_message = *info; - - ret = psp_ras_invoke(psp, ras_cmd->cmd_id); + cmd_id = enable ? + TA_RAS_COMMAND__ENABLE_FEATURES : TA_RAS_COMMAND__DISABLE_FEATURES; + ret = psp_ras_send_cmd(psp, cmd_id, info, NULL); if (ret) return -EINVAL; @@ -1640,6 +1724,8 @@ int psp_ras_terminate(struct psp_context *psp) psp->ras_context.context.initialized = false; + mutex_destroy(&psp->ras_context.mutex); + return ret; } @@ -1724,9 +1810,10 @@ int psp_ras_initialize(struct psp_context *psp) ret = psp_ta_load(psp, &psp->ras_context.context); - if (!ret && !ras_cmd->ras_status) + if (!ret && !ras_cmd->ras_status) { psp->ras_context.context.initialized = true; - else { + mutex_init(&psp->ras_context.mutex); + } else { if (ras_cmd->ras_status) dev_warn(adev->dev, "RAS Init Status: 0x%X\n", ras_cmd->ras_status); @@ -1740,12 +1827,12 @@ int psp_ras_initialize(struct psp_context *psp) int psp_ras_trigger_error(struct psp_context *psp, struct ta_ras_trigger_error_input *info, uint32_t instance_mask) { - struct ta_ras_shared_memory *ras_cmd; struct amdgpu_device *adev = psp->adev; int ret; uint32_t dev_mask; + uint32_t ras_status = 0; - if (!psp->ras_context.context.initialized) + if (!psp->ras_context.context.initialized || !info) return -EINVAL; switch (info->block_id) { @@ -1769,13 +1856,8 @@ int psp_ras_trigger_error(struct psp_context *psp, dev_mask &= AMDGPU_RAS_INST_MASK; info->sub_block_index |= dev_mask; - ras_cmd = (struct ta_ras_shared_memory *)psp->ras_context.context.mem_context.shared_buf; - memset(ras_cmd, 0, sizeof(struct ta_ras_shared_memory)); - - ras_cmd->cmd_id = TA_RAS_COMMAND__TRIGGER_ERROR; - ras_cmd->ras_in_message.trigger_error = *info; - - ret = psp_ras_invoke(psp, ras_cmd->cmd_id); + ret = psp_ras_send_cmd(psp, + TA_RAS_COMMAND__TRIGGER_ERROR, info, &ras_status); if (ret) return -EINVAL; @@ -1785,9 +1867,9 @@ int psp_ras_trigger_error(struct psp_context *psp, if (amdgpu_ras_intr_triggered()) return 0; - if (ras_cmd->ras_status == TA_RAS_STATUS__TEE_ERROR_ACCESS_DENIED) + if (ras_status == TA_RAS_STATUS__TEE_ERROR_ACCESS_DENIED) return -EACCES; - else if (ras_cmd->ras_status) + else if (ras_status) return -EINVAL; return 0; @@ -1797,25 +1879,16 @@ int psp_ras_query_address(struct psp_context *psp, struct ta_ras_query_address_input *addr_in, struct ta_ras_query_address_output *addr_out) { - struct ta_ras_shared_memory *ras_cmd; int ret; - if (!psp->ras_context.context.initialized) - return -EINVAL; - - ras_cmd = (struct ta_ras_shared_memory *)psp->ras_context.context.mem_context.shared_buf; - memset(ras_cmd, 0, sizeof(struct ta_ras_shared_memory)); - - ras_cmd->cmd_id = TA_RAS_COMMAND__QUERY_ADDRESS; - ras_cmd->ras_in_message.address = *addr_in; - - ret = psp_ras_invoke(psp, ras_cmd->cmd_id); - if (ret || ras_cmd->ras_status || psp->cmd_buf_mem->resp.status) + if (!psp->ras_context.context.initialized || + !addr_in || !addr_out) return -EINVAL; - *addr_out = ras_cmd->ras_out_message.address; + ret = psp_ras_send_cmd(psp, + TA_RAS_COMMAND__QUERY_ADDRESS, addr_in, addr_out); - return 0; + return ret; } // ras end @@ -2260,6 +2333,15 @@ static int psp_hw_start(struct psp_context *psp) } } + if ((is_psp_fw_valid(psp->ipkeymgr_drv)) && + (psp->funcs->bootloader_load_ipkeymgr_drv != NULL)) { + ret = psp_bootloader_load_ipkeymgr_drv(psp); + if (ret) { + dev_err(adev->dev, "PSP load ipkeymgr_drv failed!\n"); + return ret; + } + } + if ((is_psp_fw_valid(psp->sos)) && (psp->funcs->bootloader_load_sos != NULL)) { ret = psp_bootloader_load_sos(psp); @@ -2450,6 +2532,7 @@ static int psp_get_fw_type(struct amdgpu_firmware_info *ucode, *type = GFX_FW_TYPE_DMUB; break; case AMDGPU_UCODE_ID_SDMA_UCODE_TH0: + case AMDGPU_UCODE_ID_SDMA_RS64: *type = GFX_FW_TYPE_SDMA_UCODE_TH0; break; case AMDGPU_UCODE_ID_SDMA_UCODE_TH1: @@ -2518,6 +2601,9 @@ static int psp_get_fw_type(struct amdgpu_firmware_info *ucode, case AMDGPU_UCODE_ID_JPEG_RAM: *type = GFX_FW_TYPE_JPEG_RAM; break; + case AMDGPU_UCODE_ID_ISP: + *type = GFX_FW_TYPE_ISP; + break; case AMDGPU_UCODE_ID_MAXIMUM: default: return -EINVAL; @@ -2617,10 +2703,12 @@ static int psp_load_p2s_table(struct psp_context *psp) struct amdgpu_firmware_info *ucode = &adev->firmware.ucode[AMDGPU_UCODE_ID_P2S_TABLE]; - if (adev->in_runpm && (adev->pm.rpm_mode == AMDGPU_RUNPM_BACO)) + if (adev->in_runpm && ((adev->pm.rpm_mode == AMDGPU_RUNPM_BACO) || + (adev->pm.rpm_mode == AMDGPU_RUNPM_BAMACO))) return 0; - if (amdgpu_ip_version(adev, MP0_HWIP, 0) == IP_VERSION(13, 0, 6)) { + if (amdgpu_ip_version(adev, MP0_HWIP, 0) == IP_VERSION(13, 0, 6) || + amdgpu_ip_version(adev, MP0_HWIP, 0) == IP_VERSION(13, 0, 14)) { uint32_t supp_vers = adev->flags & AMD_IS_APU ? 0x0036013D : 0x0036003C; if (psp->sos.fw_version < supp_vers) @@ -2647,7 +2735,8 @@ static int psp_load_smu_fw(struct psp_context *psp) * Skip SMU FW reloading in case of using BACO for runpm only, * as SMU is always alive. */ - if (adev->in_runpm && (adev->pm.rpm_mode == AMDGPU_RUNPM_BACO)) + if (adev->in_runpm && ((adev->pm.rpm_mode == AMDGPU_RUNPM_BACO) || + (adev->pm.rpm_mode == AMDGPU_RUNPM_BAMACO))) return 0; if (!ucode->fw || amdgpu_sriov_vf(psp->adev)) @@ -3154,12 +3243,10 @@ int psp_ring_cmd_submit(struct psp_context *psp, int psp_init_asd_microcode(struct psp_context *psp, const char *chip_name) { struct amdgpu_device *adev = psp->adev; - char fw_name[PSP_FW_NAME_LEN]; const struct psp_firmware_header_v1_0 *asd_hdr; int err = 0; - snprintf(fw_name, sizeof(fw_name), "amdgpu/%s_asd.bin", chip_name); - err = amdgpu_ucode_request(adev, &adev->psp.asd_fw, fw_name); + err = amdgpu_ucode_request(adev, &adev->psp.asd_fw, "amdgpu/%s_asd.bin", chip_name); if (err) goto out; @@ -3178,12 +3265,10 @@ out: int psp_init_toc_microcode(struct psp_context *psp, const char *chip_name) { struct amdgpu_device *adev = psp->adev; - char fw_name[PSP_FW_NAME_LEN]; const struct psp_firmware_header_v1_0 *toc_hdr; int err = 0; - snprintf(fw_name, sizeof(fw_name), "amdgpu/%s_toc.bin", chip_name); - err = amdgpu_ucode_request(adev, &adev->psp.toc_fw, fw_name); + err = amdgpu_ucode_request(adev, &adev->psp.toc_fw, "amdgpu/%s_toc.bin", chip_name); if (err) goto out; @@ -3273,6 +3358,12 @@ static int parse_sos_bin_descriptor(struct psp_context *psp, psp->ras_drv.size_bytes = le32_to_cpu(desc->size_bytes); psp->ras_drv.start_addr = ucode_start_addr; break; + case PSP_FW_TYPE_PSP_IPKEYMGR_DRV: + psp->ipkeymgr_drv.fw_version = le32_to_cpu(desc->fw_version); + psp->ipkeymgr_drv.feature_version = le32_to_cpu(desc->fw_version); + psp->ipkeymgr_drv.size_bytes = le32_to_cpu(desc->size_bytes); + psp->ipkeymgr_drv.start_addr = ucode_start_addr; + break; default: dev_warn(psp->adev->dev, "Unsupported PSP FW type: %d\n", desc->fw_type); break; @@ -3329,7 +3420,6 @@ static int psp_init_sos_base_fw(struct amdgpu_device *adev) int psp_init_sos_microcode(struct psp_context *psp, const char *chip_name) { struct amdgpu_device *adev = psp->adev; - char fw_name[PSP_FW_NAME_LEN]; const struct psp_firmware_header_v1_0 *sos_hdr; const struct psp_firmware_header_v1_1 *sos_hdr_v1_1; const struct psp_firmware_header_v1_2 *sos_hdr_v1_2; @@ -3339,8 +3429,7 @@ int psp_init_sos_microcode(struct psp_context *psp, const char *chip_name) uint8_t *ucode_array_start_addr; int fw_index = 0; - snprintf(fw_name, sizeof(fw_name), "amdgpu/%s_sos.bin", chip_name); - err = amdgpu_ucode_request(adev, &adev->psp.sos_fw, fw_name); + err = amdgpu_ucode_request(adev, &adev->psp.sos_fw, "amdgpu/%s_sos.bin", chip_name); if (err) goto out; @@ -3565,11 +3654,9 @@ int psp_init_ta_microcode(struct psp_context *psp, const char *chip_name) { const struct common_firmware_header *hdr; struct amdgpu_device *adev = psp->adev; - char fw_name[PSP_FW_NAME_LEN]; int err; - snprintf(fw_name, sizeof(fw_name), "amdgpu/%s_ta.bin", chip_name); - err = amdgpu_ucode_request(adev, &adev->psp.ta_fw, fw_name); + err = amdgpu_ucode_request(adev, &adev->psp.ta_fw, "amdgpu/%s_ta.bin", chip_name); if (err) return err; @@ -3595,7 +3682,6 @@ int psp_init_ta_microcode(struct psp_context *psp, const char *chip_name) int psp_init_cap_microcode(struct psp_context *psp, const char *chip_name) { struct amdgpu_device *adev = psp->adev; - char fw_name[PSP_FW_NAME_LEN]; const struct psp_firmware_header_v1_0 *cap_hdr_v1_0; struct amdgpu_firmware_info *info = NULL; int err = 0; @@ -3605,8 +3691,7 @@ int psp_init_cap_microcode(struct psp_context *psp, const char *chip_name) return -EINVAL; } - snprintf(fw_name, sizeof(fw_name), "amdgpu/%s_cap.bin", chip_name); - err = amdgpu_ucode_request(adev, &adev->psp.cap_fw, fw_name); + err = amdgpu_ucode_request(adev, &adev->psp.cap_fw, "amdgpu/%s_cap.bin", chip_name); if (err) { if (err == -ENODEV) { dev_warn(adev->dev, "cap microcode does not exist, skip\n"); @@ -3680,7 +3765,6 @@ static ssize_t psp_usbc_pd_fw_sysfs_write(struct device *dev, struct drm_device *ddev = dev_get_drvdata(dev); struct amdgpu_device *adev = drm_to_adev(ddev); int ret, idx; - char fw_name[100]; const struct firmware *usbc_pd_fw; struct amdgpu_bo *fw_buf_bo = NULL; uint64_t fw_pri_mc_addr; @@ -3694,8 +3778,7 @@ static ssize_t psp_usbc_pd_fw_sysfs_write(struct device *dev, if (!drm_dev_enter(ddev, &idx)) return -ENODEV; - snprintf(fw_name, sizeof(fw_name), "amdgpu/%s", buf); - ret = request_firmware(&usbc_pd_fw, fw_name, adev->dev); + ret = amdgpu_ucode_request(adev, &usbc_pd_fw, "amdgpu/%s", buf); if (ret) goto fail; @@ -3717,7 +3800,7 @@ static ssize_t psp_usbc_pd_fw_sysfs_write(struct device *dev, amdgpu_bo_free_kernel(&fw_buf_bo, &fw_pri_mc_addr, &fw_pri_cpu_addr); rel_buf: - release_firmware(usbc_pd_fw); + amdgpu_ucode_release(&usbc_pd_fw); fail: if (ret) { dev_err(adev->dev, "Failed to load USBC PD FW, err = %d", ret); diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_psp.h b/drivers/gpu/drm/amd/amdgpu/amdgpu_psp.h index ee16f134ae92..74a96516c913 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_psp.h +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_psp.h @@ -73,8 +73,10 @@ enum psp_bootloader_cmd { PSP_BL__LOAD_KEY_DATABASE = 0x80000, PSP_BL__LOAD_SOCDRV = 0xB0000, PSP_BL__LOAD_DBGDRV = 0xC0000, + PSP_BL__LOAD_HADDRV = PSP_BL__LOAD_DBGDRV, PSP_BL__LOAD_INTFDRV = 0xD0000, - PSP_BL__LOAD_RASDRV = 0xE0000, + PSP_BL__LOAD_RASDRV = 0xE0000, + PSP_BL__LOAD_IPKEYMGRDRV = 0xF0000, PSP_BL__DRAM_LONG_TRAIN = 0x100000, PSP_BL__DRAM_SHORT_TRAIN = 0x200000, PSP_BL__LOAD_TOS_SPL_TABLE = 0x10000000, @@ -117,6 +119,7 @@ struct psp_funcs { int (*bootloader_load_intf_drv)(struct psp_context *psp); int (*bootloader_load_dbg_drv)(struct psp_context *psp); int (*bootloader_load_ras_drv)(struct psp_context *psp); + int (*bootloader_load_ipkeymgr_drv)(struct psp_context *psp); int (*bootloader_load_sos)(struct psp_context *psp); int (*ring_create)(struct psp_context *psp, enum psp_ring_type ring_type); @@ -197,6 +200,7 @@ struct psp_xgmi_context { struct psp_ras_context { struct ta_context context; struct amdgpu_ras *ras; + struct mutex mutex; }; #define MEM_TRAIN_SYSTEM_SIGNATURE 0x54534942 @@ -336,6 +340,7 @@ struct psp_context { struct psp_bin_desc intf_drv; struct psp_bin_desc dbg_drv; struct psp_bin_desc ras_drv; + struct psp_bin_desc ipkeymgr_drv; /* tmr buffer */ struct amdgpu_bo *tmr_bo; @@ -424,6 +429,9 @@ struct amdgpu_psp_funcs { #define psp_bootloader_load_ras_drv(psp) \ ((psp)->funcs->bootloader_load_ras_drv ? \ (psp)->funcs->bootloader_load_ras_drv((psp)) : 0) +#define psp_bootloader_load_ipkeymgr_drv(psp) \ + ((psp)->funcs->bootloader_load_ipkeymgr_drv ? \ + (psp)->funcs->bootloader_load_ipkeymgr_drv((psp)) : 0) #define psp_bootloader_load_sos(psp) \ ((psp)->funcs->bootloader_load_sos ? (psp)->funcs->bootloader_load_sos((psp)) : 0) #define psp_smu_reload_quirk(psp) \ diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_psp_ta.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_psp_ta.c index ca5c86e5f7cd..0c856005df6b 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_psp_ta.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_psp_ta.c @@ -334,7 +334,7 @@ static ssize_t ta_if_invoke_debugfs_write(struct file *fp, const char *buf, size set_ta_context_funcs(psp, ta_type, &context); - if (!context->initialized) { + if (!context || !context->initialized) { dev_err(adev->dev, "TA is not initialized\n"); ret = -EINVAL; goto err_free_shared_buf; @@ -348,6 +348,7 @@ static ssize_t ta_if_invoke_debugfs_write(struct file *fp, const char *buf, size context->session_id = ta_id; + mutex_lock(&psp->ras_context.mutex); ret = prep_ta_mem_context(&context->mem_context, shared_buf, shared_buf_len); if (ret) goto err_free_shared_buf; @@ -366,6 +367,7 @@ static ssize_t ta_if_invoke_debugfs_write(struct file *fp, const char *buf, size ret = -EFAULT; err_free_shared_buf: + mutex_unlock(&psp->ras_context.mutex); kfree(shared_buf); return ret; diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_ras.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_ras.c index 8ebab6f22e5a..d0307c55da50 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_ras.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_ras.c @@ -120,7 +120,11 @@ const char *get_ras_block_str(struct ras_common_if *ras_block) /* typical ECC bad page rate is 1 bad page per 100MB VRAM */ #define RAS_BAD_PAGE_COVER (100 * 1024 * 1024ULL) -#define MAX_UMC_POISON_POLLING_TIME_ASYNC 100 //ms +#define MAX_UMC_POISON_POLLING_TIME_ASYNC 300 //ms + +#define AMDGPU_RAS_RETIRE_PAGE_INTERVAL 100 //ms + +#define MAX_FLUSH_RETIRE_DWORK_TIMES 100 enum amdgpu_ras_retire_page_reservation { AMDGPU_RAS_RETIRE_PAGE_RESERVED, @@ -1045,6 +1049,7 @@ static void amdgpu_ras_get_ecc_info(struct amdgpu_device *adev, struct ras_err_d static void amdgpu_ras_error_print_error_data(struct amdgpu_device *adev, struct ras_manager *ras_mgr, struct ras_err_data *err_data, + struct ras_query_context *qctx, const char *blk_name, bool is_ue, bool is_de) @@ -1052,27 +1057,28 @@ static void amdgpu_ras_error_print_error_data(struct amdgpu_device *adev, struct amdgpu_smuio_mcm_config_info *mcm_info; struct ras_err_node *err_node; struct ras_err_info *err_info; + u64 event_id = qctx->evid.event_id; if (is_ue) { for_each_ras_error(err_node, err_data) { err_info = &err_node->err_info; mcm_info = &err_info->mcm_info; if (err_info->ue_count) { - dev_info(adev->dev, "socket: %d, die: %d, " - "%lld new uncorrectable hardware errors detected in %s block\n", - mcm_info->socket_id, - mcm_info->die_id, - err_info->ue_count, - blk_name); + RAS_EVENT_LOG(adev, event_id, "socket: %d, die: %d, " + "%lld new uncorrectable hardware errors detected in %s block\n", + mcm_info->socket_id, + mcm_info->die_id, + err_info->ue_count, + blk_name); } } for_each_ras_error(err_node, &ras_mgr->err_data) { err_info = &err_node->err_info; mcm_info = &err_info->mcm_info; - dev_info(adev->dev, "socket: %d, die: %d, " - "%lld uncorrectable hardware errors detected in total in %s block\n", - mcm_info->socket_id, mcm_info->die_id, err_info->ue_count, blk_name); + RAS_EVENT_LOG(adev, event_id, "socket: %d, die: %d, " + "%lld uncorrectable hardware errors detected in total in %s block\n", + mcm_info->socket_id, mcm_info->die_id, err_info->ue_count, blk_name); } } else { @@ -1081,44 +1087,44 @@ static void amdgpu_ras_error_print_error_data(struct amdgpu_device *adev, err_info = &err_node->err_info; mcm_info = &err_info->mcm_info; if (err_info->de_count) { - dev_info(adev->dev, "socket: %d, die: %d, " - "%lld new deferred hardware errors detected in %s block\n", - mcm_info->socket_id, - mcm_info->die_id, - err_info->de_count, - blk_name); + RAS_EVENT_LOG(adev, event_id, "socket: %d, die: %d, " + "%lld new deferred hardware errors detected in %s block\n", + mcm_info->socket_id, + mcm_info->die_id, + err_info->de_count, + blk_name); } } for_each_ras_error(err_node, &ras_mgr->err_data) { err_info = &err_node->err_info; mcm_info = &err_info->mcm_info; - dev_info(adev->dev, "socket: %d, die: %d, " - "%lld deferred hardware errors detected in total in %s block\n", - mcm_info->socket_id, mcm_info->die_id, - err_info->de_count, blk_name); + RAS_EVENT_LOG(adev, event_id, "socket: %d, die: %d, " + "%lld deferred hardware errors detected in total in %s block\n", + mcm_info->socket_id, mcm_info->die_id, + err_info->de_count, blk_name); } } else { for_each_ras_error(err_node, err_data) { err_info = &err_node->err_info; mcm_info = &err_info->mcm_info; if (err_info->ce_count) { - dev_info(adev->dev, "socket: %d, die: %d, " - "%lld new correctable hardware errors detected in %s block\n", - mcm_info->socket_id, - mcm_info->die_id, - err_info->ce_count, - blk_name); + RAS_EVENT_LOG(adev, event_id, "socket: %d, die: %d, " + "%lld new correctable hardware errors detected in %s block\n", + mcm_info->socket_id, + mcm_info->die_id, + err_info->ce_count, + blk_name); } } for_each_ras_error(err_node, &ras_mgr->err_data) { err_info = &err_node->err_info; mcm_info = &err_info->mcm_info; - dev_info(adev->dev, "socket: %d, die: %d, " - "%lld correctable hardware errors detected in total in %s block\n", - mcm_info->socket_id, mcm_info->die_id, - err_info->ce_count, blk_name); + RAS_EVENT_LOG(adev, event_id, "socket: %d, die: %d, " + "%lld correctable hardware errors detected in total in %s block\n", + mcm_info->socket_id, mcm_info->die_id, + err_info->ce_count, blk_name); } } } @@ -1131,77 +1137,79 @@ static inline bool err_data_has_source_info(struct ras_err_data *data) static void amdgpu_ras_error_generate_report(struct amdgpu_device *adev, struct ras_query_if *query_if, - struct ras_err_data *err_data) + struct ras_err_data *err_data, + struct ras_query_context *qctx) { struct ras_manager *ras_mgr = amdgpu_ras_find_obj(adev, &query_if->head); const char *blk_name = get_ras_block_str(&query_if->head); + u64 event_id = qctx->evid.event_id; if (err_data->ce_count) { if (err_data_has_source_info(err_data)) { - amdgpu_ras_error_print_error_data(adev, ras_mgr, err_data, + amdgpu_ras_error_print_error_data(adev, ras_mgr, err_data, qctx, blk_name, false, false); } else if (!adev->aid_mask && adev->smuio.funcs && adev->smuio.funcs->get_socket_id && adev->smuio.funcs->get_die_id) { - dev_info(adev->dev, "socket: %d, die: %d " - "%ld correctable hardware errors " - "detected in %s block\n", - adev->smuio.funcs->get_socket_id(adev), - adev->smuio.funcs->get_die_id(adev), - ras_mgr->err_data.ce_count, - blk_name); + RAS_EVENT_LOG(adev, event_id, "socket: %d, die: %d " + "%ld correctable hardware errors " + "detected in %s block\n", + adev->smuio.funcs->get_socket_id(adev), + adev->smuio.funcs->get_die_id(adev), + ras_mgr->err_data.ce_count, + blk_name); } else { - dev_info(adev->dev, "%ld correctable hardware errors " - "detected in %s block\n", - ras_mgr->err_data.ce_count, - blk_name); + RAS_EVENT_LOG(adev, event_id, "%ld correctable hardware errors " + "detected in %s block\n", + ras_mgr->err_data.ce_count, + blk_name); } } if (err_data->ue_count) { if (err_data_has_source_info(err_data)) { - amdgpu_ras_error_print_error_data(adev, ras_mgr, err_data, + amdgpu_ras_error_print_error_data(adev, ras_mgr, err_data, qctx, blk_name, true, false); } else if (!adev->aid_mask && adev->smuio.funcs && adev->smuio.funcs->get_socket_id && adev->smuio.funcs->get_die_id) { - dev_info(adev->dev, "socket: %d, die: %d " - "%ld uncorrectable hardware errors " - "detected in %s block\n", - adev->smuio.funcs->get_socket_id(adev), - adev->smuio.funcs->get_die_id(adev), - ras_mgr->err_data.ue_count, - blk_name); + RAS_EVENT_LOG(adev, event_id, "socket: %d, die: %d " + "%ld uncorrectable hardware errors " + "detected in %s block\n", + adev->smuio.funcs->get_socket_id(adev), + adev->smuio.funcs->get_die_id(adev), + ras_mgr->err_data.ue_count, + blk_name); } else { - dev_info(adev->dev, "%ld uncorrectable hardware errors " - "detected in %s block\n", - ras_mgr->err_data.ue_count, - blk_name); + RAS_EVENT_LOG(adev, event_id, "%ld uncorrectable hardware errors " + "detected in %s block\n", + ras_mgr->err_data.ue_count, + blk_name); } } if (err_data->de_count) { if (err_data_has_source_info(err_data)) { - amdgpu_ras_error_print_error_data(adev, ras_mgr, err_data, + amdgpu_ras_error_print_error_data(adev, ras_mgr, err_data, qctx, blk_name, false, true); } else if (!adev->aid_mask && adev->smuio.funcs && adev->smuio.funcs->get_socket_id && adev->smuio.funcs->get_die_id) { - dev_info(adev->dev, "socket: %d, die: %d " - "%ld deferred hardware errors " - "detected in %s block\n", - adev->smuio.funcs->get_socket_id(adev), - adev->smuio.funcs->get_die_id(adev), - ras_mgr->err_data.de_count, - blk_name); + RAS_EVENT_LOG(adev, event_id, "socket: %d, die: %d " + "%ld deferred hardware errors " + "detected in %s block\n", + adev->smuio.funcs->get_socket_id(adev), + adev->smuio.funcs->get_die_id(adev), + ras_mgr->err_data.de_count, + blk_name); } else { - dev_info(adev->dev, "%ld deferred hardware errors " - "detected in %s block\n", - ras_mgr->err_data.de_count, - blk_name); + RAS_EVENT_LOG(adev, event_id, "%ld deferred hardware errors " + "detected in %s block\n", + ras_mgr->err_data.de_count, + blk_name); } } } @@ -1244,6 +1252,10 @@ int amdgpu_ras_bind_aca(struct amdgpu_device *adev, enum amdgpu_ras_block blk, { struct ras_manager *obj; + /* in resume phase, no need to create aca fs node */ + if (adev->in_suspend || amdgpu_in_reset(adev)) + return 0; + obj = get_ras_manager(adev, blk); if (!obj) return -EINVAL; @@ -1265,7 +1277,8 @@ int amdgpu_ras_unbind_aca(struct amdgpu_device *adev, enum amdgpu_ras_block blk) } static int amdgpu_aca_log_ras_error_data(struct amdgpu_device *adev, enum amdgpu_ras_block blk, - enum aca_error_type type, struct ras_err_data *err_data) + enum aca_error_type type, struct ras_err_data *err_data, + struct ras_query_context *qctx) { struct ras_manager *obj; @@ -1273,7 +1286,7 @@ static int amdgpu_aca_log_ras_error_data(struct amdgpu_device *adev, enum amdgpu if (!obj) return -EINVAL; - return amdgpu_aca_get_error_data(adev, &obj->aca_handle, type, err_data); + return amdgpu_aca_get_error_data(adev, &obj->aca_handle, type, err_data, qctx); } ssize_t amdgpu_ras_aca_sysfs_read(struct device *dev, struct device_attribute *attr, @@ -1284,16 +1297,20 @@ ssize_t amdgpu_ras_aca_sysfs_read(struct device *dev, struct device_attribute *a .head = obj->head, }; + if (!amdgpu_ras_get_error_query_ready(obj->adev)) + return sysfs_emit(buf, "Query currently inaccessible\n"); + if (amdgpu_ras_query_error_status(obj->adev, &info)) return -EINVAL; - return sysfs_emit(buf, "%s: %lu\n%s: %lu\n", "ue", info.ue_count, - "ce", info.ce_count); + return sysfs_emit(buf, "%s: %lu\n%s: %lu\n%s: %lu\n", "ue", info.ue_count, + "ce", info.ce_count, "de", info.de_count); } static int amdgpu_ras_query_error_status_helper(struct amdgpu_device *adev, struct ras_query_if *info, struct ras_err_data *err_data, + struct ras_query_context *qctx, unsigned int error_query_mode) { enum amdgpu_ras_block blk = info ? info->head.block : AMDGPU_RAS_BLOCK_COUNT; @@ -1329,17 +1346,21 @@ static int amdgpu_ras_query_error_status_helper(struct amdgpu_device *adev, } } else { if (amdgpu_aca_is_enabled(adev)) { - ret = amdgpu_aca_log_ras_error_data(adev, blk, ACA_ERROR_TYPE_UE, err_data); + ret = amdgpu_aca_log_ras_error_data(adev, blk, ACA_ERROR_TYPE_UE, err_data, qctx); + if (ret) + return ret; + + ret = amdgpu_aca_log_ras_error_data(adev, blk, ACA_ERROR_TYPE_CE, err_data, qctx); if (ret) return ret; - ret = amdgpu_aca_log_ras_error_data(adev, blk, ACA_ERROR_TYPE_CE, err_data); + ret = amdgpu_aca_log_ras_error_data(adev, blk, ACA_ERROR_TYPE_DEFERRED, err_data, qctx); if (ret) return ret; } else { /* FIXME: add code to check return value later */ - amdgpu_mca_smu_log_ras_error(adev, blk, AMDGPU_MCA_ERROR_TYPE_UE, err_data); - amdgpu_mca_smu_log_ras_error(adev, blk, AMDGPU_MCA_ERROR_TYPE_CE, err_data); + amdgpu_mca_smu_log_ras_error(adev, blk, AMDGPU_MCA_ERROR_TYPE_UE, err_data, qctx); + amdgpu_mca_smu_log_ras_error(adev, blk, AMDGPU_MCA_ERROR_TYPE_CE, err_data, qctx); } } @@ -1347,10 +1368,13 @@ static int amdgpu_ras_query_error_status_helper(struct amdgpu_device *adev, } /* query/inject/cure begin */ -int amdgpu_ras_query_error_status(struct amdgpu_device *adev, struct ras_query_if *info) +static int amdgpu_ras_query_error_status_with_event(struct amdgpu_device *adev, + struct ras_query_if *info, + enum ras_event_type type) { struct ras_manager *obj = amdgpu_ras_find_obj(adev, &info->head); struct ras_err_data err_data; + struct ras_query_context qctx; unsigned int error_query_mode; int ret; @@ -1364,9 +1388,20 @@ int amdgpu_ras_query_error_status(struct amdgpu_device *adev, struct ras_query_i if (!amdgpu_ras_get_error_query_mode(adev, &error_query_mode)) return -EINVAL; + memset(&qctx, 0, sizeof(qctx)); + qctx.evid.type = type; + qctx.evid.event_id = amdgpu_ras_acquire_event_id(adev, type); + + if (!down_read_trylock(&adev->reset_domain->sem)) { + ret = -EIO; + goto out_fini_err_data; + } + ret = amdgpu_ras_query_error_status_helper(adev, info, &err_data, + &qctx, error_query_mode); + up_read(&adev->reset_domain->sem); if (ret) goto out_fini_err_data; @@ -1376,7 +1411,7 @@ int amdgpu_ras_query_error_status(struct amdgpu_device *adev, struct ras_query_i info->ce_count = obj->err_data.ce_count; info->de_count = obj->err_data.de_count; - amdgpu_ras_error_generate_report(adev, info, &err_data); + amdgpu_ras_error_generate_report(adev, info, &err_data, &qctx); out_fini_err_data: amdgpu_ras_error_data_fini(&err_data); @@ -1384,15 +1419,17 @@ out_fini_err_data: return ret; } +int amdgpu_ras_query_error_status(struct amdgpu_device *adev, struct ras_query_if *info) +{ + return amdgpu_ras_query_error_status_with_event(adev, info, RAS_EVENT_TYPE_INVALID); +} + int amdgpu_ras_reset_error_count(struct amdgpu_device *adev, enum amdgpu_ras_block block) { struct amdgpu_ras_block_object *block_obj = amdgpu_ras_get_ras_block(adev, block, 0); - struct amdgpu_ras *ras = amdgpu_ras_get_context(adev); const struct amdgpu_mca_smu_funcs *mca_funcs = adev->mca.mca_funcs; const struct aca_smu_funcs *smu_funcs = adev->aca.smu_funcs; - struct amdgpu_hive_info *hive; - int hive_ras_recovery = 0; if (!block_obj || !block_obj->hw_ops) { dev_dbg_once(adev->dev, "%s doesn't config RAS function\n", @@ -1404,15 +1441,8 @@ int amdgpu_ras_reset_error_count(struct amdgpu_device *adev, !amdgpu_ras_get_aca_debug_mode(adev)) return -EOPNOTSUPP; - hive = amdgpu_get_xgmi_hive(adev); - if (hive) { - hive_ras_recovery = atomic_read(&hive->ras_recovery); - amdgpu_put_xgmi_hive(hive); - } - /* skip ras error reset in gpu reset */ - if ((amdgpu_in_reset(adev) || atomic_read(&ras->in_recovery) || - hive_ras_recovery) && + if ((amdgpu_in_reset(adev) || amdgpu_ras_in_recovery(adev)) && ((smu_funcs && smu_funcs->set_debug_mode) || (mca_funcs && mca_funcs->mca_set_debug_mode))) return -EOPNOTSUPP; @@ -1703,6 +1733,39 @@ static ssize_t amdgpu_ras_sysfs_schema_show(struct device *dev, return sysfs_emit(buf, "schema: 0x%x\n", con->schema); } +static struct { + enum ras_event_type type; + const char *name; +} dump_event[] = { + {RAS_EVENT_TYPE_FATAL, "Fatal Error"}, + {RAS_EVENT_TYPE_POISON_CREATION, "Poison Creation"}, + {RAS_EVENT_TYPE_POISON_CONSUMPTION, "Poison Consumption"}, +}; + +static ssize_t amdgpu_ras_sysfs_event_state_show(struct device *dev, + struct device_attribute *attr, char *buf) +{ + struct amdgpu_ras *con = + container_of(attr, struct amdgpu_ras, event_state_attr); + struct ras_event_manager *event_mgr = con->event_mgr; + struct ras_event_state *event_state; + int i, size = 0; + + if (!event_mgr) + return -EINVAL; + + size += sysfs_emit_at(buf, size, "current seqno: %llu\n", atomic64_read(&event_mgr->seqno)); + for (i = 0; i < ARRAY_SIZE(dump_event); i++) { + event_state = &event_mgr->event_state[dump_event[i].type]; + size += sysfs_emit_at(buf, size, "%s: count:%llu, last_seqno:%llu\n", + dump_event[i].name, + atomic64_read(&event_state->count), + event_state->last_seqno); + } + + return (ssize_t)size; +} + static void amdgpu_ras_sysfs_remove_bad_page_node(struct amdgpu_device *adev) { struct amdgpu_ras *con = amdgpu_ras_get_context(adev); @@ -1720,6 +1783,7 @@ static int amdgpu_ras_sysfs_remove_dev_attr_node(struct amdgpu_device *adev) &con->features_attr.attr, &con->version_attr.attr, &con->schema_attr.attr, + &con->event_state_attr.attr, NULL }; struct attribute_group group = { @@ -1738,6 +1802,9 @@ int amdgpu_ras_sysfs_create(struct amdgpu_device *adev, { struct ras_manager *obj = amdgpu_ras_find_obj(adev, head); + if (amdgpu_aca_is_enabled(adev)) + return 0; + if (!obj || obj->attr_inuse) return -EINVAL; @@ -1772,6 +1839,9 @@ int amdgpu_ras_sysfs_remove(struct amdgpu_device *adev, { struct ras_manager *obj = amdgpu_ras_find_obj(adev, head); + if (amdgpu_aca_is_enabled(adev)) + return 0; + if (!obj || !obj->attr_inuse) return -EINVAL; @@ -1884,6 +1954,23 @@ static void amdgpu_ras_debugfs_create(struct amdgpu_device *adev, obj, &amdgpu_ras_debugfs_ops); } +static bool amdgpu_ras_aca_is_supported(struct amdgpu_device *adev) +{ + bool ret; + + switch (amdgpu_ip_version(adev, MP0_HWIP, 0)) { + case IP_VERSION(13, 0, 6): + case IP_VERSION(13, 0, 14): + ret = true; + break; + default: + ret = false; + break; + } + + return ret; +} + void amdgpu_ras_debugfs_create_all(struct amdgpu_device *adev) { struct amdgpu_ras *con = amdgpu_ras_get_context(adev); @@ -1910,10 +1997,12 @@ void amdgpu_ras_debugfs_create_all(struct amdgpu_device *adev) } } - if (amdgpu_aca_is_enabled(adev)) - amdgpu_aca_smu_debugfs_init(adev, dir); - else - amdgpu_mca_smu_debugfs_init(adev, dir); + if (amdgpu_ras_aca_is_supported(adev)) { + if (amdgpu_aca_is_enabled(adev)) + amdgpu_aca_smu_debugfs_init(adev, dir); + else + amdgpu_mca_smu_debugfs_init(adev, dir); + } } /* debugfs end */ @@ -1927,6 +2016,8 @@ static DEVICE_ATTR(version, 0444, amdgpu_ras_sysfs_version_show, NULL); static DEVICE_ATTR(schema, 0444, amdgpu_ras_sysfs_schema_show, NULL); +static DEVICE_ATTR(event_state, 0444, + amdgpu_ras_sysfs_event_state_show, NULL); static int amdgpu_ras_fs_init(struct amdgpu_device *adev) { struct amdgpu_ras *con = amdgpu_ras_get_context(adev); @@ -1937,6 +2028,7 @@ static int amdgpu_ras_fs_init(struct amdgpu_device *adev) &con->features_attr.attr, &con->version_attr.attr, &con->schema_attr.attr, + &con->event_state_attr.attr, NULL }; struct bin_attribute *bin_attrs[] = { @@ -1959,6 +2051,10 @@ static int amdgpu_ras_fs_init(struct amdgpu_device *adev) con->schema_attr = dev_attr_schema; sysfs_attr_init(attrs[2]); + /* add event_state entry */ + con->event_state_attr = dev_attr_event_state; + sysfs_attr_init(attrs[3]); + if (amdgpu_bad_page_threshold != 0) { /* add bad_page_features entry */ bin_attr_gpu_vram_bad_pages.private = NULL; @@ -2022,8 +2118,16 @@ static void amdgpu_ras_interrupt_poison_consumption_handler(struct ras_manager * struct amdgpu_device *adev = obj->adev; struct amdgpu_ras_block_object *block_obj = amdgpu_ras_get_ras_block(adev, obj->head.block, 0); + struct amdgpu_ras *con = amdgpu_ras_get_context(adev); + enum ras_event_type type = RAS_EVENT_TYPE_POISON_CONSUMPTION; + u64 event_id; + int ret; - if (!block_obj) + if (!block_obj || !con) + return; + + ret = amdgpu_ras_mark_ras_event(adev, type); + if (ret) return; /* both query_poison_status and handle_poison_consumption are optional, @@ -2041,26 +2145,49 @@ static void amdgpu_ras_interrupt_poison_consumption_handler(struct ras_manager * } } - amdgpu_umc_poison_handler(adev, obj->head.block, false); + amdgpu_umc_poison_handler(adev, obj->head.block, 0); if (block_obj->hw_ops && block_obj->hw_ops->handle_poison_consumption) poison_stat = block_obj->hw_ops->handle_poison_consumption(adev); - /* gpu reset is fallback for failed and default cases */ - if (poison_stat) { - dev_info(adev->dev, "GPU reset for %s RAS poison consumption is issued!\n", - block_obj->ras_comm.name); + /* gpu reset is fallback for failed and default cases. + * For RMA case, amdgpu_umc_poison_handler will handle gpu reset. + */ + if (poison_stat && !con->is_rma) { + event_id = amdgpu_ras_acquire_event_id(adev, type); + RAS_EVENT_LOG(adev, event_id, + "GPU reset for %s RAS poison consumption is issued!\n", + block_obj->ras_comm.name); amdgpu_ras_reset_gpu(adev); - } else { - amdgpu_gfx_poison_consumption_handler(adev, entry); } + + if (!poison_stat) + amdgpu_gfx_poison_consumption_handler(adev, entry); } static void amdgpu_ras_interrupt_poison_creation_handler(struct ras_manager *obj, struct amdgpu_iv_entry *entry) { - dev_info(obj->adev->dev, - "Poison is created\n"); + struct amdgpu_device *adev = obj->adev; + enum ras_event_type type = RAS_EVENT_TYPE_POISON_CREATION; + u64 event_id; + int ret; + + ret = amdgpu_ras_mark_ras_event(adev, type); + if (ret) + return; + + event_id = amdgpu_ras_acquire_event_id(adev, type); + RAS_EVENT_LOG(adev, event_id, "Poison is created\n"); + + if (amdgpu_ip_version(obj->adev, UMC_HWIP, 0) >= IP_VERSION(12, 0, 0)) { + struct amdgpu_ras *con = amdgpu_ras_get_context(obj->adev); + + atomic_inc(&con->page_retirement_req_cnt); + atomic_inc(&con->poison_creation_count); + + wake_up(&con->page_retirement_wq); + } } static void amdgpu_ras_interrupt_umc_handler(struct ras_manager *obj, @@ -2080,6 +2207,7 @@ static void amdgpu_ras_interrupt_umc_handler(struct ras_manager *obj, /* Let IP handle its data, maybe we need get the output * from the callback to update the error type/count, etc */ + amdgpu_ras_set_fed(obj->adev, true); ret = data->cb(obj->adev, &err_data, entry); /* ue will trigger an interrupt, and in that case * we need do a reset to recovery the whole system. @@ -2140,12 +2268,15 @@ static void amdgpu_ras_interrupt_process_handler(struct work_struct *work) int amdgpu_ras_interrupt_dispatch(struct amdgpu_device *adev, struct ras_dispatch_if *info) { - struct ras_manager *obj = amdgpu_ras_find_obj(adev, &info->head); - struct ras_ih_data *data = &obj->ih_data; + struct ras_manager *obj; + struct ras_ih_data *data; + obj = amdgpu_ras_find_obj(adev, &info->head); if (!obj) return -EINVAL; + data = &obj->ih_data; + if (data->inuse == 0) return 0; @@ -2242,7 +2373,7 @@ static int amdgpu_ras_interrupt_remove_all(struct amdgpu_device *adev) /* ih end */ /* traversal all IPs except NBIO to query error counter */ -static void amdgpu_ras_log_on_err_counter(struct amdgpu_device *adev) +static void amdgpu_ras_log_on_err_counter(struct amdgpu_device *adev, enum ras_event_type type) { struct amdgpu_ras *con = amdgpu_ras_get_context(adev); struct ras_manager *obj; @@ -2275,7 +2406,7 @@ static void amdgpu_ras_log_on_err_counter(struct amdgpu_device *adev) IP_VERSION(13, 0, 2))) continue; - amdgpu_ras_query_error_status(adev, &info); + amdgpu_ras_query_error_status_with_event(adev, &info, type); if (amdgpu_ip_version(adev, MP0_HWIP, 0) != IP_VERSION(11, 0, 2) && @@ -2371,7 +2502,7 @@ static int amdgpu_ras_badpages_read(struct amdgpu_device *adev, .flags = AMDGPU_RAS_RETIRE_PAGE_RESERVED, }; status = amdgpu_vram_mgr_query_page_status(&adev->mman.vram_mgr, - data->bps[i].retired_page); + data->bps[i].retired_page << AMDGPU_GPU_PAGE_SHIFT); if (status == -EBUSY) (*bps)[i].flags = AMDGPU_RAS_RETIRE_PAGE_PENDING; else if (status == -ENOENT) @@ -2384,6 +2515,44 @@ out: return ret; } +static void amdgpu_ras_set_fed_all(struct amdgpu_device *adev, + struct amdgpu_hive_info *hive, bool status) +{ + struct amdgpu_device *tmp_adev; + + if (hive) { + list_for_each_entry(tmp_adev, &hive->device_list, gmc.xgmi.head) + amdgpu_ras_set_fed(tmp_adev, status); + } else { + amdgpu_ras_set_fed(adev, status); + } +} + +bool amdgpu_ras_in_recovery(struct amdgpu_device *adev) +{ + struct amdgpu_hive_info *hive = amdgpu_get_xgmi_hive(adev); + struct amdgpu_ras *ras = amdgpu_ras_get_context(adev); + int hive_ras_recovery = 0; + + if (hive) { + hive_ras_recovery = atomic_read(&hive->ras_recovery); + amdgpu_put_xgmi_hive(hive); + } + + if (ras && (atomic_read(&ras->in_recovery) || hive_ras_recovery)) + return true; + + return false; +} + +static enum ras_event_type amdgpu_ras_get_fatal_error_event(struct amdgpu_device *adev) +{ + if (amdgpu_ras_intr_triggered()) + return RAS_EVENT_TYPE_FATAL; + else + return RAS_EVENT_TYPE_POISON_CONSUMPTION; +} + static void amdgpu_ras_do_recovery(struct work_struct *work) { struct amdgpu_ras *ras = @@ -2392,9 +2561,23 @@ static void amdgpu_ras_do_recovery(struct work_struct *work) struct amdgpu_device *adev = ras->adev; struct list_head device_list, *device_list_handle = NULL; struct amdgpu_hive_info *hive = amdgpu_get_xgmi_hive(adev); + enum ras_event_type type; - if (hive) + if (hive) { atomic_set(&hive->ras_recovery, 1); + + /* If any device which is part of the hive received RAS fatal + * error interrupt, set fatal error status on all. This + * condition will need a recovery, and flag will be cleared + * as part of recovery. + */ + list_for_each_entry(remote_adev, &hive->device_list, + gmc.xgmi.head) + if (amdgpu_ras_get_fed_status(remote_adev)) { + amdgpu_ras_set_fed_all(adev, hive, true); + break; + } + } if (!ras->disable_ras_err_cnt_harvest) { /* Build list of devices to query RAS related errors */ @@ -2406,10 +2589,11 @@ static void amdgpu_ras_do_recovery(struct work_struct *work) device_list_handle = &device_list; } + type = amdgpu_ras_get_fatal_error_event(adev); list_for_each_entry(remote_adev, device_list_handle, gmc.xgmi.head) { amdgpu_ras_query_err_status(remote_adev); - amdgpu_ras_log_on_err_counter(remote_adev); + amdgpu_ras_log_on_err_counter(remote_adev, type); } } @@ -2420,6 +2604,7 @@ static void amdgpu_ras_do_recovery(struct work_struct *work) reset_context.method = AMD_RESET_METHOD_NONE; reset_context.reset_req_dev = adev; + reset_context.src = AMDGPU_RESET_SRC_RAS; /* Perform full reset in fatal error mode */ if (!amdgpu_ras_is_poison_mode_supported(ras->adev)) @@ -2439,18 +2624,6 @@ static void amdgpu_ras_do_recovery(struct work_struct *work) ras->gpu_reset_flags &= ~AMDGPU_RAS_GPU_RESET_MODE1_RESET; set_bit(AMDGPU_NEED_FULL_RESET, &reset_context.flags); - /* For any RAS error that needs a full reset to - * recover, set the fatal error status - */ - if (hive) { - list_for_each_entry(remote_adev, - &hive->device_list, - gmc.xgmi.head) - amdgpu_ras_set_fed(remote_adev, - true); - } else { - amdgpu_ras_set_fed(adev, true); - } psp_fatal_error_recovery_quirk(&adev->psp); } } @@ -2516,9 +2689,7 @@ int amdgpu_ras_add_bad_pages(struct amdgpu_device *adev, goto out; } - amdgpu_vram_mgr_reserve_range(&adev->mman.vram_mgr, - bps[i].retired_page << AMDGPU_GPU_PAGE_SHIFT, - AMDGPU_GPU_PAGE_SIZE); + amdgpu_ras_reserve_page(adev, bps[i].retired_page); memcpy(&data->bps[data->count], &bps[i], sizeof(*data->bps)); data->count++; @@ -2674,10 +2845,239 @@ static void amdgpu_ras_validate_threshold(struct amdgpu_device *adev, } } +int amdgpu_ras_put_poison_req(struct amdgpu_device *adev, + enum amdgpu_ras_block block, uint16_t pasid, + pasid_notify pasid_fn, void *data, uint32_t reset) +{ + int ret = 0; + struct ras_poison_msg poison_msg; + struct amdgpu_ras *con = amdgpu_ras_get_context(adev); + + memset(&poison_msg, 0, sizeof(poison_msg)); + poison_msg.block = block; + poison_msg.pasid = pasid; + poison_msg.reset = reset; + poison_msg.pasid_fn = pasid_fn; + poison_msg.data = data; + + ret = kfifo_put(&con->poison_fifo, poison_msg); + if (!ret) { + dev_err(adev->dev, "Poison message fifo is full!\n"); + return -ENOSPC; + } + + return 0; +} + +static int amdgpu_ras_get_poison_req(struct amdgpu_device *adev, + struct ras_poison_msg *poison_msg) +{ + struct amdgpu_ras *con = amdgpu_ras_get_context(adev); + + return kfifo_get(&con->poison_fifo, poison_msg); +} + +static void amdgpu_ras_ecc_log_init(struct ras_ecc_log_info *ecc_log) +{ + mutex_init(&ecc_log->lock); + + /* Set any value as siphash key */ + memset(&ecc_log->ecc_key, 0xad, sizeof(ecc_log->ecc_key)); + + INIT_RADIX_TREE(&ecc_log->de_page_tree, GFP_KERNEL); + ecc_log->de_queried_count = 0; + ecc_log->prev_de_queried_count = 0; +} + +static void amdgpu_ras_ecc_log_fini(struct ras_ecc_log_info *ecc_log) +{ + struct radix_tree_iter iter; + void __rcu **slot; + struct ras_ecc_err *ecc_err; + + mutex_lock(&ecc_log->lock); + radix_tree_for_each_slot(slot, &ecc_log->de_page_tree, &iter, 0) { + ecc_err = radix_tree_deref_slot(slot); + kfree(ecc_err->err_pages.pfn); + kfree(ecc_err); + radix_tree_iter_delete(&ecc_log->de_page_tree, &iter, slot); + } + mutex_unlock(&ecc_log->lock); + + mutex_destroy(&ecc_log->lock); + ecc_log->de_queried_count = 0; + ecc_log->prev_de_queried_count = 0; +} + +static bool amdgpu_ras_schedule_retirement_dwork(struct amdgpu_ras *con, + uint32_t delayed_ms) +{ + int ret; + + mutex_lock(&con->umc_ecc_log.lock); + ret = radix_tree_tagged(&con->umc_ecc_log.de_page_tree, + UMC_ECC_NEW_DETECTED_TAG); + mutex_unlock(&con->umc_ecc_log.lock); + + if (ret) + schedule_delayed_work(&con->page_retirement_dwork, + msecs_to_jiffies(delayed_ms)); + + return ret ? true : false; +} + +static void amdgpu_ras_do_page_retirement(struct work_struct *work) +{ + struct amdgpu_ras *con = container_of(work, struct amdgpu_ras, + page_retirement_dwork.work); + struct amdgpu_device *adev = con->adev; + struct ras_err_data err_data; + unsigned long err_cnt; + + /* If gpu reset is ongoing, delay retiring the bad pages */ + if (amdgpu_in_reset(adev) || amdgpu_ras_in_recovery(adev)) { + amdgpu_ras_schedule_retirement_dwork(con, + AMDGPU_RAS_RETIRE_PAGE_INTERVAL * 3); + return; + } + + amdgpu_ras_error_data_init(&err_data); + + amdgpu_umc_handle_bad_pages(adev, &err_data); + err_cnt = err_data.err_addr_cnt; + + amdgpu_ras_error_data_fini(&err_data); + + if (err_cnt && con->is_rma) + amdgpu_ras_reset_gpu(adev); + + amdgpu_ras_schedule_retirement_dwork(con, + AMDGPU_RAS_RETIRE_PAGE_INTERVAL); +} + +static int amdgpu_ras_poison_creation_handler(struct amdgpu_device *adev, + uint32_t poison_creation_count) +{ + int ret = 0; + struct ras_ecc_log_info *ecc_log; + struct ras_query_if info; + uint32_t timeout = 0; + struct amdgpu_ras *ras = amdgpu_ras_get_context(adev); + uint64_t de_queried_count; + uint32_t new_detect_count, total_detect_count; + uint32_t need_query_count = poison_creation_count; + bool query_data_timeout = false; + enum ras_event_type type = RAS_EVENT_TYPE_POISON_CREATION; + + memset(&info, 0, sizeof(info)); + info.head.block = AMDGPU_RAS_BLOCK__UMC; + + ecc_log = &ras->umc_ecc_log; + total_detect_count = 0; + do { + ret = amdgpu_ras_query_error_status_with_event(adev, &info, type); + if (ret) + return ret; + + de_queried_count = ecc_log->de_queried_count; + if (de_queried_count > ecc_log->prev_de_queried_count) { + new_detect_count = de_queried_count - ecc_log->prev_de_queried_count; + ecc_log->prev_de_queried_count = de_queried_count; + timeout = 0; + } else { + new_detect_count = 0; + } + + if (new_detect_count) { + total_detect_count += new_detect_count; + } else { + if (!timeout && need_query_count) + timeout = MAX_UMC_POISON_POLLING_TIME_ASYNC; + + if (timeout) { + if (!--timeout) { + query_data_timeout = true; + break; + } + msleep(1); + } + } + } while (total_detect_count < need_query_count); + + if (query_data_timeout) { + dev_warn(adev->dev, "Can't find deferred error! count: %u\n", + (need_query_count - total_detect_count)); + return -ENOENT; + } + + if (total_detect_count) + schedule_delayed_work(&ras->page_retirement_dwork, 0); + + return 0; +} + +static void amdgpu_ras_clear_poison_fifo(struct amdgpu_device *adev) +{ + struct amdgpu_ras *con = amdgpu_ras_get_context(adev); + struct ras_poison_msg msg; + int ret; + + do { + ret = kfifo_get(&con->poison_fifo, &msg); + } while (ret); +} + +static int amdgpu_ras_poison_consumption_handler(struct amdgpu_device *adev, + uint32_t msg_count, uint32_t *gpu_reset) +{ + struct amdgpu_ras *con = amdgpu_ras_get_context(adev); + uint32_t reset_flags = 0, reset = 0; + struct ras_poison_msg msg; + int ret, i; + + kgd2kfd_set_sram_ecc_flag(adev->kfd.dev); + + for (i = 0; i < msg_count; i++) { + ret = amdgpu_ras_get_poison_req(adev, &msg); + if (!ret) + continue; + + if (msg.pasid_fn) + msg.pasid_fn(adev, msg.pasid, msg.data); + + reset_flags |= msg.reset; + } + + /* for RMA, amdgpu_ras_poison_creation_handler will trigger gpu reset */ + if (reset_flags && !con->is_rma) { + if (reset_flags & AMDGPU_RAS_GPU_RESET_MODE1_RESET) + reset = AMDGPU_RAS_GPU_RESET_MODE1_RESET; + else if (reset_flags & AMDGPU_RAS_GPU_RESET_MODE2_RESET) + reset = AMDGPU_RAS_GPU_RESET_MODE2_RESET; + else + reset = reset_flags; + + flush_delayed_work(&con->page_retirement_dwork); + + con->gpu_reset_flags |= reset; + amdgpu_ras_reset_gpu(adev); + + *gpu_reset = reset; + + /* Wait for gpu recovery to complete */ + flush_work(&con->recovery_work); + } + + return 0; +} + static int amdgpu_ras_page_retirement_thread(void *param) { struct amdgpu_device *adev = (struct amdgpu_device *)param; struct amdgpu_ras *con = amdgpu_ras_get_context(adev); + uint32_t poison_creation_count, msg_count; + uint32_t gpu_reset; + int ret; while (!kthread_should_stop()) { @@ -2688,13 +3088,62 @@ static int amdgpu_ras_page_retirement_thread(void *param) if (kthread_should_stop()) break; - dev_info(adev->dev, "Start processing page retirement. request:%d\n", - atomic_read(&con->page_retirement_req_cnt)); + gpu_reset = 0; - atomic_dec(&con->page_retirement_req_cnt); + do { + poison_creation_count = atomic_read(&con->poison_creation_count); + ret = amdgpu_ras_poison_creation_handler(adev, poison_creation_count); + if (ret == -EIO) + break; + + if (poison_creation_count) { + atomic_sub(poison_creation_count, &con->poison_creation_count); + atomic_sub(poison_creation_count, &con->page_retirement_req_cnt); + } + } while (atomic_read(&con->poison_creation_count)); + + if (ret != -EIO) { + msg_count = kfifo_len(&con->poison_fifo); + if (msg_count) { + ret = amdgpu_ras_poison_consumption_handler(adev, + msg_count, &gpu_reset); + if ((ret != -EIO) && + (gpu_reset != AMDGPU_RAS_GPU_RESET_MODE1_RESET)) + atomic_sub(msg_count, &con->page_retirement_req_cnt); + } + } + + if ((ret == -EIO) || (gpu_reset == AMDGPU_RAS_GPU_RESET_MODE1_RESET)) { + /* gpu mode-1 reset is ongoing or just completed ras mode-1 reset */ + /* Clear poison creation request */ + atomic_set(&con->poison_creation_count, 0); + + /* Clear poison fifo */ + amdgpu_ras_clear_poison_fifo(adev); + + /* Clear all poison requests */ + atomic_set(&con->page_retirement_req_cnt, 0); + + if (ret == -EIO) { + /* Wait for mode-1 reset to complete */ + down_read(&adev->reset_domain->sem); + up_read(&adev->reset_domain->sem); + } - amdgpu_umc_bad_page_polling_timeout(adev, - false, MAX_UMC_POISON_POLLING_TIME_ASYNC); + /* Wake up work to save bad pages to eeprom */ + schedule_delayed_work(&con->page_retirement_dwork, 0); + } else if (gpu_reset) { + /* gpu just completed mode-2 reset or other reset */ + /* Clear poison consumption messages cached in fifo */ + msg_count = kfifo_len(&con->poison_fifo); + if (msg_count) { + amdgpu_ras_clear_poison_fifo(adev); + atomic_sub(msg_count, &con->page_retirement_req_cnt); + } + + /* Wake up work to save bad pages to eeprom */ + schedule_delayed_work(&con->page_retirement_dwork, 0); + } } return 0; @@ -2705,7 +3154,6 @@ int amdgpu_ras_recovery_init(struct amdgpu_device *adev) struct amdgpu_ras *con = amdgpu_ras_get_context(adev); struct ras_err_handler_data **data; u32 max_eeprom_records_count = 0; - bool exc_err_limit = false; int ret; if (!con || amdgpu_sriov_vf(adev)) @@ -2742,12 +3190,12 @@ int amdgpu_ras_recovery_init(struct amdgpu_device *adev) */ if (adev->gmc.xgmi.pending_reset) return 0; - ret = amdgpu_ras_eeprom_init(&con->eeprom_control, &exc_err_limit); + ret = amdgpu_ras_eeprom_init(&con->eeprom_control); /* - * This calling fails when exc_err_limit is true or + * This calling fails when is_rma is true or * ret != 0. */ - if (exc_err_limit || ret) + if (con->is_rma || ret) goto free; if (con->eeprom_control.ras_num_recs) { @@ -2763,9 +3211,12 @@ int amdgpu_ras_recovery_init(struct amdgpu_device *adev) } } + mutex_init(&con->page_rsv_lock); + INIT_KFIFO(con->poison_fifo); mutex_init(&con->page_retirement_lock); init_waitqueue_head(&con->page_retirement_wq); atomic_set(&con->page_retirement_req_cnt, 0); + atomic_set(&con->poison_creation_count, 0); con->page_retirement_thread = kthread_run(amdgpu_ras_page_retirement_thread, adev, "umc_page_retirement"); if (IS_ERR(con->page_retirement_thread)) { @@ -2773,6 +3224,8 @@ int amdgpu_ras_recovery_init(struct amdgpu_device *adev) dev_warn(adev->dev, "Failed to create umc_page_retirement thread!!!\n"); } + INIT_DELAYED_WORK(&con->page_retirement_dwork, amdgpu_ras_do_page_retirement); + amdgpu_ras_ecc_log_init(&con->umc_ecc_log); #ifdef CONFIG_X86_MCE_AMD if ((adev->asic_type == CHIP_ALDEBARAN) && (adev->gmc.xgmi.connected_to_cpu)) @@ -2791,7 +3244,7 @@ out: * Except error threshold exceeding case, other failure cases in this * function would not fail amdgpu driver init. */ - if (!exc_err_limit) + if (!con->is_rma) ret = 0; else ret = -EINVAL; @@ -2803,18 +3256,33 @@ static int amdgpu_ras_recovery_fini(struct amdgpu_device *adev) { struct amdgpu_ras *con = amdgpu_ras_get_context(adev); struct ras_err_handler_data *data = con->eh_data; + int max_flush_timeout = MAX_FLUSH_RETIRE_DWORK_TIMES; + bool ret; /* recovery_init failed to init it, fini is useless */ if (!data) return 0; + /* Save all cached bad pages to eeprom */ + do { + flush_delayed_work(&con->page_retirement_dwork); + ret = amdgpu_ras_schedule_retirement_dwork(con, 0); + } while (ret && max_flush_timeout--); + if (con->page_retirement_thread) kthread_stop(con->page_retirement_thread); atomic_set(&con->page_retirement_req_cnt, 0); + atomic_set(&con->poison_creation_count, 0); + + mutex_destroy(&con->page_rsv_lock); cancel_work_sync(&con->recovery_work); + cancel_delayed_work_sync(&con->page_retirement_dwork); + + amdgpu_ras_ecc_log_fini(&con->umc_ecc_log); + mutex_lock(&con->recovery_lock); con->eh_data = NULL; kfree(data->bps); @@ -2831,6 +3299,7 @@ static bool amdgpu_ras_asic_supported(struct amdgpu_device *adev) switch (amdgpu_ip_version(adev, MP0_HWIP, 0)) { case IP_VERSION(13, 0, 2): case IP_VERSION(13, 0, 6): + case IP_VERSION(13, 0, 14): return true; default: return false; @@ -2842,6 +3311,7 @@ static bool amdgpu_ras_asic_supported(struct amdgpu_device *adev) case IP_VERSION(13, 0, 0): case IP_VERSION(13, 0, 6): case IP_VERSION(13, 0, 10): + case IP_VERSION(13, 0, 14): return true; default: return false; @@ -3036,6 +3506,60 @@ static int amdgpu_get_ras_schema(struct amdgpu_device *adev) AMDGPU_RAS_ERROR__PARITY; } +static void ras_event_mgr_init(struct ras_event_manager *mgr) +{ + struct ras_event_state *event_state; + int i; + + memset(mgr, 0, sizeof(*mgr)); + atomic64_set(&mgr->seqno, 0); + + for (i = 0; i < ARRAY_SIZE(mgr->event_state); i++) { + event_state = &mgr->event_state[i]; + event_state->last_seqno = RAS_EVENT_INVALID_ID; + atomic64_set(&event_state->count, 0); + } +} + +static void amdgpu_ras_event_mgr_init(struct amdgpu_device *adev) +{ + struct amdgpu_ras *ras = amdgpu_ras_get_context(adev); + struct amdgpu_hive_info *hive; + + if (!ras) + return; + + hive = amdgpu_get_xgmi_hive(adev); + ras->event_mgr = hive ? &hive->event_mgr : &ras->__event_mgr; + + /* init event manager with node 0 on xgmi system */ + if (!amdgpu_in_reset(adev)) { + if (!hive || adev->gmc.xgmi.node_id == 0) + ras_event_mgr_init(ras->event_mgr); + } + + if (hive) + amdgpu_put_xgmi_hive(hive); +} + +static void amdgpu_ras_init_reserved_vram_size(struct amdgpu_device *adev) +{ + struct amdgpu_ras *con = amdgpu_ras_get_context(adev); + + if (!con || (adev->flags & AMD_IS_APU)) + return; + + switch (amdgpu_ip_version(adev, MP0_HWIP, 0)) { + case IP_VERSION(13, 0, 2): + case IP_VERSION(13, 0, 6): + case IP_VERSION(13, 0, 14): + con->reserved_pages_in_bytes = AMDGPU_RAS_RESERVED_VRAM_SIZE; + break; + default: + break; + } +} + int amdgpu_ras_init(struct amdgpu_device *adev) { struct amdgpu_ras *con = amdgpu_ras_get_context(adev); @@ -3141,11 +3665,22 @@ int amdgpu_ras_init(struct amdgpu_device *adev) /* Get RAS schema for particular SOC */ con->schema = amdgpu_get_ras_schema(adev); + amdgpu_ras_init_reserved_vram_size(adev); + if (amdgpu_ras_fs_init(adev)) { r = -EINVAL; goto release_con; } + if (amdgpu_ras_aca_is_supported(adev)) { + if (amdgpu_aca_is_enabled(adev)) + r = amdgpu_aca_init(adev); + else + r = amdgpu_mca_init(adev); + if (r) + goto release_con; + } + dev_info(adev->dev, "RAS INFO: ras initialized successfully, " "hardware ability[%x] ras_mask[%x]\n", adev->ras_hw_enabled, adev->ras_enabled); @@ -3352,23 +3887,30 @@ int amdgpu_ras_late_init(struct amdgpu_device *adev) struct amdgpu_ras_block_object *obj; int r; - /* Guest side doesn't need init ras feature */ - if (amdgpu_sriov_vf(adev)) - return 0; + amdgpu_ras_event_mgr_init(adev); - if (amdgpu_aca_is_enabled(adev)) { - if (amdgpu_in_reset(adev)) - r = amdgpu_aca_reset(adev); - else - r = amdgpu_aca_init(adev); - if (r) - return r; + if (amdgpu_ras_aca_is_supported(adev)) { + if (amdgpu_in_reset(adev)) { + if (amdgpu_aca_is_enabled(adev)) + r = amdgpu_aca_reset(adev); + else + r = amdgpu_mca_reset(adev); + if (r) + return r; + } - amdgpu_ras_set_aca_debug_mode(adev, false); - } else { - amdgpu_ras_set_mca_debug_mode(adev, false); + if (!amdgpu_sriov_vf(adev)) { + if (amdgpu_aca_is_enabled(adev)) + amdgpu_ras_set_aca_debug_mode(adev, false); + else + amdgpu_ras_set_mca_debug_mode(adev, false); + } } + /* Guest side doesn't need init ras feature */ + if (amdgpu_sriov_vf(adev)) + return 0; + list_for_each_entry_safe(node, tmp, &adev->ras_list, node) { obj = node->ras_obj; if (!obj) { @@ -3436,8 +3978,12 @@ int amdgpu_ras_fini(struct amdgpu_device *adev) amdgpu_ras_fs_fini(adev); amdgpu_ras_interrupt_remove_all(adev); - if (amdgpu_aca_is_enabled(adev)) - amdgpu_aca_fini(adev); + if (amdgpu_ras_aca_is_supported(adev)) { + if (amdgpu_aca_is_enabled(adev)) + amdgpu_aca_fini(adev); + else + amdgpu_mca_fini(adev); + } WARN(AMDGPU_RAS_GET_FEATURES(con->features), "Feature mask is not cleared"); @@ -3472,14 +4018,90 @@ void amdgpu_ras_set_fed(struct amdgpu_device *adev, bool status) atomic_set(&ras->fed, !!status); } +static struct ras_event_manager *__get_ras_event_mgr(struct amdgpu_device *adev) +{ + struct amdgpu_ras *ras; + + ras = amdgpu_ras_get_context(adev); + if (!ras) + return NULL; + + return ras->event_mgr; +} + +int amdgpu_ras_mark_ras_event_caller(struct amdgpu_device *adev, enum ras_event_type type, + const void *caller) +{ + struct ras_event_manager *event_mgr; + struct ras_event_state *event_state; + int ret = 0; + + if (type >= RAS_EVENT_TYPE_COUNT) { + ret = -EINVAL; + goto out; + } + + event_mgr = __get_ras_event_mgr(adev); + if (!event_mgr) { + ret = -EINVAL; + goto out; + } + + event_state = &event_mgr->event_state[type]; + event_state->last_seqno = atomic64_inc_return(&event_mgr->seqno); + atomic64_inc(&event_state->count); + +out: + if (ret && caller) + dev_warn(adev->dev, "failed mark ras event (%d) in %ps, ret:%d\n", + (int)type, caller, ret); + + return ret; +} + +u64 amdgpu_ras_acquire_event_id(struct amdgpu_device *adev, enum ras_event_type type) +{ + struct ras_event_manager *event_mgr; + u64 id; + + if (type >= RAS_EVENT_TYPE_COUNT) + return RAS_EVENT_INVALID_ID; + + switch (type) { + case RAS_EVENT_TYPE_FATAL: + case RAS_EVENT_TYPE_POISON_CREATION: + case RAS_EVENT_TYPE_POISON_CONSUMPTION: + event_mgr = __get_ras_event_mgr(adev); + if (!event_mgr) + return RAS_EVENT_INVALID_ID; + + id = event_mgr->event_state[type].last_seqno; + break; + case RAS_EVENT_TYPE_INVALID: + default: + id = RAS_EVENT_INVALID_ID; + break; + } + + return id; +} + void amdgpu_ras_global_ras_isr(struct amdgpu_device *adev) { if (atomic_cmpxchg(&amdgpu_ras_in_intr, 0, 1) == 0) { struct amdgpu_ras *ras = amdgpu_ras_get_context(adev); + enum ras_event_type type = RAS_EVENT_TYPE_FATAL; + u64 event_id; + + if (amdgpu_ras_mark_ras_event(adev, type)) + return; - dev_info(adev->dev, "uncorrectable hardware error" - "(ERREVENT_ATHUB_INTERRUPT) detected!\n"); + event_id = amdgpu_ras_acquire_event_id(adev, type); + RAS_EVENT_LOG(adev, event_id, "uncorrectable hardware error" + "(ERREVENT_ATHUB_INTERRUPT) detected!\n"); + + amdgpu_ras_set_fed(adev, true); ras->gpu_reset_flags |= AMDGPU_RAS_GPU_RESET_MODE1_RESET; amdgpu_ras_reset_gpu(adev); } @@ -3664,6 +4286,12 @@ int amdgpu_ras_reset_gpu(struct amdgpu_device *adev) { struct amdgpu_ras *ras = amdgpu_ras_get_context(adev); + /* mode1 is the only selection for RMA status */ + if (ras->is_rma) { + ras->gpu_reset_flags = 0; + ras->gpu_reset_flags |= AMDGPU_RAS_GPU_RESET_MODE1_RESET; + } + if (atomic_cmpxchg(&ras->in_recovery, 0, 1) == 0) amdgpu_reset_domain_schedule(ras->adev->reset_domain, &ras->recovery_work); return 0; @@ -3996,19 +4624,8 @@ static struct ras_err_info *amdgpu_ras_error_get_info(struct ras_err_data *err_d void amdgpu_ras_add_mca_err_addr(struct ras_err_info *err_info, struct ras_err_addr *err_addr) { - struct ras_err_addr *mca_err_addr; - - mca_err_addr = kzalloc(sizeof(*mca_err_addr), GFP_KERNEL); - if (!mca_err_addr) - return; - - INIT_LIST_HEAD(&mca_err_addr->node); - - mca_err_addr->err_status = err_addr->err_status; - mca_err_addr->err_ipid = err_addr->err_ipid; - mca_err_addr->err_addr = err_addr->err_addr; - - list_add_tail(&mca_err_addr->node, &err_info->err_addr_list); + /* This function will be retired. */ + return; } void amdgpu_ras_del_mca_err_addr(struct ras_err_info *err_info, struct ras_err_addr *mca_err_addr) @@ -4092,64 +4709,74 @@ int amdgpu_ras_error_statistic_de_count(struct ras_err_data *err_data, #define mmMP0_SMN_C2PMSG_92 0x1609C #define mmMP0_SMN_C2PMSG_126 0x160BE static void amdgpu_ras_boot_time_error_reporting(struct amdgpu_device *adev, - u32 instance, u32 boot_error) + u32 instance) { u32 socket_id, aid_id, hbm_id; - u32 reg_data; + u32 fw_status; + u32 boot_error; u64 reg_addr; - socket_id = AMDGPU_RAS_GPU_ERR_SOCKET_ID(boot_error); - aid_id = AMDGPU_RAS_GPU_ERR_AID_ID(boot_error); - hbm_id = AMDGPU_RAS_GPU_ERR_HBM_ID(boot_error); - /* The pattern for smn addressing in other SOC could be different from * the one for aqua_vanjaram. We should revisit the code if the pattern * is changed. In such case, replace the aqua_vanjaram implementation * with more common helper */ reg_addr = (mmMP0_SMN_C2PMSG_92 << 2) + aqua_vanjaram_encode_ext_smn_addressing(instance); + fw_status = amdgpu_device_indirect_rreg_ext(adev, reg_addr); - reg_data = amdgpu_device_indirect_rreg_ext(adev, reg_addr); - dev_err(adev->dev, "socket: %d, aid: %d, firmware boot failed, fw status is 0x%x\n", - socket_id, aid_id, reg_data); + reg_addr = (mmMP0_SMN_C2PMSG_126 << 2) + + aqua_vanjaram_encode_ext_smn_addressing(instance); + boot_error = amdgpu_device_indirect_rreg_ext(adev, reg_addr); + + socket_id = AMDGPU_RAS_GPU_ERR_SOCKET_ID(boot_error); + aid_id = AMDGPU_RAS_GPU_ERR_AID_ID(boot_error); + hbm_id = ((1 == AMDGPU_RAS_GPU_ERR_HBM_ID(boot_error)) ? 0 : 1); if (AMDGPU_RAS_GPU_ERR_MEM_TRAINING(boot_error)) - dev_info(adev->dev, "socket: %d, aid: %d, hbm: %d, memory training failed\n", - socket_id, aid_id, hbm_id); + dev_info(adev->dev, + "socket: %d, aid: %d, hbm: %d, fw_status: 0x%x, memory training failed\n", + socket_id, aid_id, hbm_id, fw_status); if (AMDGPU_RAS_GPU_ERR_FW_LOAD(boot_error)) - dev_info(adev->dev, "socket: %d, aid: %d, firmware load failed at boot time\n", - socket_id, aid_id); + dev_info(adev->dev, + "socket: %d, aid: %d, fw_status: 0x%x, firmware load failed at boot time\n", + socket_id, aid_id, fw_status); if (AMDGPU_RAS_GPU_ERR_WAFL_LINK_TRAINING(boot_error)) - dev_info(adev->dev, "socket: %d, aid: %d, wafl link training failed\n", - socket_id, aid_id); + dev_info(adev->dev, + "socket: %d, aid: %d, fw_status: 0x%x, wafl link training failed\n", + socket_id, aid_id, fw_status); if (AMDGPU_RAS_GPU_ERR_XGMI_LINK_TRAINING(boot_error)) - dev_info(adev->dev, "socket: %d, aid: %d, xgmi link training failed\n", - socket_id, aid_id); + dev_info(adev->dev, + "socket: %d, aid: %d, fw_status: 0x%x, xgmi link training failed\n", + socket_id, aid_id, fw_status); if (AMDGPU_RAS_GPU_ERR_USR_CP_LINK_TRAINING(boot_error)) - dev_info(adev->dev, "socket: %d, aid: %d, usr cp link training failed\n", - socket_id, aid_id); + dev_info(adev->dev, + "socket: %d, aid: %d, fw_status: 0x%x, usr cp link training failed\n", + socket_id, aid_id, fw_status); if (AMDGPU_RAS_GPU_ERR_USR_DP_LINK_TRAINING(boot_error)) - dev_info(adev->dev, "socket: %d, aid: %d, usr dp link training failed\n", - socket_id, aid_id); + dev_info(adev->dev, + "socket: %d, aid: %d, fw_status: 0x%x, usr dp link training failed\n", + socket_id, aid_id, fw_status); if (AMDGPU_RAS_GPU_ERR_HBM_MEM_TEST(boot_error)) - dev_info(adev->dev, "socket: %d, aid: %d, hbm: %d, hbm memory test failed\n", - socket_id, aid_id, hbm_id); + dev_info(adev->dev, + "socket: %d, aid: %d, hbm: %d, fw_status: 0x%x, hbm memory test failed\n", + socket_id, aid_id, hbm_id, fw_status); if (AMDGPU_RAS_GPU_ERR_HBM_BIST_TEST(boot_error)) - dev_info(adev->dev, "socket: %d, aid: %d, hbm: %d, hbm bist test failed\n", - socket_id, aid_id, hbm_id); + dev_info(adev->dev, + "socket: %d, aid: %d, hbm: %d, fw_status: 0x%x, hbm bist test failed\n", + socket_id, aid_id, hbm_id, fw_status); } -static int amdgpu_ras_wait_for_boot_complete(struct amdgpu_device *adev, - u32 instance, u32 *boot_error) +static bool amdgpu_ras_boot_error_detected(struct amdgpu_device *adev, + u32 instance) { - u32 reg_addr; + u64 reg_addr; u32 reg_data; int retry_loop; @@ -4158,40 +4785,55 @@ static int amdgpu_ras_wait_for_boot_complete(struct amdgpu_device *adev, for (retry_loop = 0; retry_loop < AMDGPU_RAS_BOOT_STATUS_POLLING_LIMIT; retry_loop++) { reg_data = amdgpu_device_indirect_rreg_ext(adev, reg_addr); - if ((reg_data & AMDGPU_RAS_BOOT_STATUS_MASK) == AMDGPU_RAS_BOOT_STEADY_STATUS) { - *boot_error = AMDGPU_RAS_BOOT_SUCEESS; - return 0; - } - msleep(1); - } - - /* The pattern for smn addressing in other SOC could be different from - * the one for aqua_vanjaram. We should revisit the code if the pattern - * is changed. In such case, replace the aqua_vanjaram implementation - * with more common helper */ - reg_addr = (mmMP0_SMN_C2PMSG_126 << 2) + - aqua_vanjaram_encode_ext_smn_addressing(instance); - - for (retry_loop = 0; retry_loop < AMDGPU_RAS_BOOT_STATUS_POLLING_LIMIT; retry_loop++) { - reg_data = amdgpu_device_indirect_rreg_ext(adev, reg_addr); - if (AMDGPU_RAS_GPU_ERR_BOOT_STATUS(reg_data)) { - *boot_error = reg_data; - return 0; - } - msleep(1); + if ((reg_data & AMDGPU_RAS_BOOT_STATUS_MASK) == AMDGPU_RAS_BOOT_STEADY_STATUS) + return false; + else + msleep(1); } - *boot_error = reg_data; - return -ETIME; + return true; } void amdgpu_ras_query_boot_status(struct amdgpu_device *adev, u32 num_instances) { - u32 boot_error = 0; u32 i; for (i = 0; i < num_instances; i++) { - if (amdgpu_ras_wait_for_boot_complete(adev, i, &boot_error)) - amdgpu_ras_boot_time_error_reporting(adev, i, boot_error); + if (amdgpu_ras_boot_error_detected(adev, i)) + amdgpu_ras_boot_time_error_reporting(adev, i); } } + +int amdgpu_ras_reserve_page(struct amdgpu_device *adev, uint64_t pfn) +{ + struct amdgpu_ras *con = amdgpu_ras_get_context(adev); + struct amdgpu_vram_mgr *mgr = &adev->mman.vram_mgr; + uint64_t start = pfn << AMDGPU_GPU_PAGE_SHIFT; + int ret = 0; + + mutex_lock(&con->page_rsv_lock); + ret = amdgpu_vram_mgr_query_page_status(mgr, start); + if (ret == -ENOENT) + ret = amdgpu_vram_mgr_reserve_range(mgr, start, AMDGPU_GPU_PAGE_SIZE); + mutex_unlock(&con->page_rsv_lock); + + return ret; +} + +void amdgpu_ras_event_log_print(struct amdgpu_device *adev, u64 event_id, + const char *fmt, ...) +{ + struct va_format vaf; + va_list args; + + va_start(args, fmt); + vaf.fmt = fmt; + vaf.va = &args; + + if (RAS_EVENT_ID_IS_VALID(event_id)) + dev_printk(KERN_INFO, adev->dev, "{%llu}%pV", event_id, &vaf); + else + dev_printk(KERN_INFO, adev->dev, "%pV", &vaf); + + va_end(args); +} diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_ras.h b/drivers/gpu/drm/amd/amdgpu/amdgpu_ras.h index e0f8ce9d8440..dcf1f3dbb5c4 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_ras.h +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_ras.h @@ -26,6 +26,9 @@ #include <linux/debugfs.h> #include <linux/list.h> +#include <linux/kfifo.h> +#include <linux/radix-tree.h> +#include <linux/siphash.h> #include "ta_ras_if.h" #include "amdgpu_ras_eeprom.h" #include "amdgpu_smuio.h" @@ -43,13 +46,11 @@ struct amdgpu_iv_entry; #define AMDGPU_RAS_GPU_ERR_HBM_BIST_TEST(x) AMDGPU_GET_REG_FIELD(x, 7, 7) #define AMDGPU_RAS_GPU_ERR_SOCKET_ID(x) AMDGPU_GET_REG_FIELD(x, 10, 8) #define AMDGPU_RAS_GPU_ERR_AID_ID(x) AMDGPU_GET_REG_FIELD(x, 12, 11) -#define AMDGPU_RAS_GPU_ERR_HBM_ID(x) AMDGPU_GET_REG_FIELD(x, 13, 13) -#define AMDGPU_RAS_GPU_ERR_BOOT_STATUS(x) AMDGPU_GET_REG_FIELD(x, 31, 31) +#define AMDGPU_RAS_GPU_ERR_HBM_ID(x) AMDGPU_GET_REG_FIELD(x, 14, 13) -#define AMDGPU_RAS_BOOT_STATUS_POLLING_LIMIT 1000 +#define AMDGPU_RAS_BOOT_STATUS_POLLING_LIMIT 100 #define AMDGPU_RAS_BOOT_STEADY_STATUS 0xBA #define AMDGPU_RAS_BOOT_STATUS_MASK 0xFF -#define AMDGPU_RAS_BOOT_SUCEESS 0x80000000 #define AMDGPU_RAS_FLAG_INIT_BY_VBIOS (0x1 << 0) /* position of instance value in sub_block_index of @@ -61,9 +62,21 @@ struct amdgpu_iv_entry; #define AMDGPU_RAS_FEATURES_SOCKETID_SHIFT 29 #define AMDGPU_RAS_FEATURES_SOCKETID_MASK 0xe0000000 +/* Reserve 8 physical dram row for possible retirement. + * In worst cases, it will lose 8 * 2MB memory in vram domain */ +#define AMDGPU_RAS_RESERVED_VRAM_SIZE (16ULL << 20) /* The high three bits indicates socketid */ #define AMDGPU_RAS_GET_FEATURES(val) ((val) & ~AMDGPU_RAS_FEATURES_SOCKETID_MASK) +#define RAS_EVENT_INVALID_ID (BIT_ULL(63)) +#define RAS_EVENT_ID_IS_VALID(x) (!((x) & BIT_ULL(63))) + +#define RAS_EVENT_LOG(adev, id, fmt, ...) \ + amdgpu_ras_event_log_print((adev), (id), (fmt), ##__VA_ARGS__) + +#define amdgpu_ras_mark_ras_event(adev, type) \ + (amdgpu_ras_mark_ras_event_caller((adev), (type), __builtin_return_address(0))) + enum amdgpu_ras_block { AMDGPU_RAS_BLOCK__UMC = 0, AMDGPU_RAS_BLOCK__SDMA, @@ -419,6 +432,65 @@ struct umc_ecc_info { int record_ce_addr_supported; }; +enum ras_event_type { + RAS_EVENT_TYPE_INVALID = 0, + RAS_EVENT_TYPE_FATAL, + RAS_EVENT_TYPE_POISON_CREATION, + RAS_EVENT_TYPE_POISON_CONSUMPTION, + RAS_EVENT_TYPE_COUNT, +}; + +struct ras_event_state { + u64 last_seqno; + atomic64_t count; +}; + +struct ras_event_manager { + atomic64_t seqno; + struct ras_event_state event_state[RAS_EVENT_TYPE_COUNT]; +}; + +struct ras_event_id { + enum ras_event_type type; + u64 event_id; +}; + +struct ras_query_context { + struct ras_event_id evid; +}; + +typedef int (*pasid_notify)(struct amdgpu_device *adev, + uint16_t pasid, void *data); + +struct ras_poison_msg { + enum amdgpu_ras_block block; + uint16_t pasid; + uint32_t reset; + pasid_notify pasid_fn; + void *data; +}; + +struct ras_err_pages { + uint32_t count; + uint64_t *pfn; +}; + +struct ras_ecc_err { + u64 hash_index; + uint64_t status; + uint64_t ipid; + uint64_t addr; + struct ras_err_pages err_pages; +}; + +struct ras_ecc_log_info { + struct mutex lock; + siphash_key_t ecc_key; + struct radix_tree_root de_page_tree; + uint64_t de_queried_count; + uint64_t prev_de_queried_count; +}; + struct amdgpu_ras { /* ras infrastructure */ /* for ras itself. */ @@ -429,6 +501,7 @@ struct amdgpu_ras { struct device_attribute features_attr; struct device_attribute version_attr; struct device_attribute schema_attr; + struct device_attribute event_state_attr; struct bin_attribute badpages_attr; struct dentry *de_ras_eeprom_table; /* block array */ @@ -469,6 +542,7 @@ struct amdgpu_ras { bool update_channel_flag; /* Record status of smu mca debug mode */ bool is_aca_debug_mode; + bool is_rma; /* Record special requirements of gpu reset caller */ uint32_t gpu_reset_flags; @@ -477,8 +551,20 @@ struct amdgpu_ras { wait_queue_head_t page_retirement_wq; struct mutex page_retirement_lock; atomic_t page_retirement_req_cnt; + atomic_t poison_creation_count; + struct mutex page_rsv_lock; + DECLARE_KFIFO(poison_fifo, struct ras_poison_msg, 128); + struct ras_ecc_log_info umc_ecc_log; + struct delayed_work page_retirement_dwork; + /* Fatal error detected flag */ atomic_t fed; + + /* RAS event manager */ + struct ras_event_manager __event_mgr; + struct ras_event_manager *event_mgr; + + uint64_t reserved_pages_in_bytes; }; struct ras_fs_data { @@ -512,6 +598,7 @@ struct ras_err_data { unsigned long de_count; unsigned long err_addr_cnt; struct eeprom_table_record *err_addr; + unsigned long err_addr_len; u32 err_list_count; struct list_head err_node_list; }; @@ -879,4 +966,20 @@ void amdgpu_ras_del_mca_err_addr(struct ras_err_info *err_info, void amdgpu_ras_set_fed(struct amdgpu_device *adev, bool status); bool amdgpu_ras_get_fed_status(struct amdgpu_device *adev); +u64 amdgpu_ras_acquire_event_id(struct amdgpu_device *adev, enum ras_event_type type); +int amdgpu_ras_mark_ras_event_caller(struct amdgpu_device *adev, enum ras_event_type type, + const void *caller); + +int amdgpu_ras_reserve_page(struct amdgpu_device *adev, uint64_t pfn); + +int amdgpu_ras_put_poison_req(struct amdgpu_device *adev, + enum amdgpu_ras_block block, uint16_t pasid, + pasid_notify pasid_fn, void *data, uint32_t reset); + +bool amdgpu_ras_in_recovery(struct amdgpu_device *adev); + +__printf(3, 4) +void amdgpu_ras_event_log_print(struct amdgpu_device *adev, u64 event_id, + const char *fmt, ...); + #endif diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_ras_eeprom.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_ras_eeprom.c index b12808c0c331..aab8077e5098 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_ras_eeprom.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_ras_eeprom.c @@ -161,6 +161,7 @@ static bool __is_ras_eeprom_supported(struct amdgpu_device *adev) case IP_VERSION(13, 0, 10): return true; case IP_VERSION(13, 0, 6): + case IP_VERSION(13, 0, 14): return (adev->gmc.is_app_apu) ? false : true; default: return false; @@ -222,6 +223,7 @@ static bool __get_eeprom_i2c_addr(struct amdgpu_device *adev, return true; case IP_VERSION(13, 0, 6): case IP_VERSION(13, 0, 10): + case IP_VERSION(13, 0, 14): control->i2c_address = EEPROM_I2C_MADDR_4; return true; default: @@ -404,6 +406,22 @@ static int amdgpu_ras_eeprom_correct_header_tag( return res; } +static void amdgpu_ras_set_eeprom_table_version(struct amdgpu_ras_eeprom_control *control) +{ + struct amdgpu_device *adev = to_amdgpu_device(control); + struct amdgpu_ras_eeprom_table_header *hdr = &control->tbl_hdr; + + switch (amdgpu_ip_version(adev, UMC_HWIP, 0)) { + case IP_VERSION(8, 10, 0): + case IP_VERSION(12, 0, 0): + hdr->version = RAS_TABLE_VER_V2_1; + return; + default: + hdr->version = RAS_TABLE_VER_V1; + return; + } +} + /** * amdgpu_ras_eeprom_reset_table -- Reset the RAS EEPROM table * @control: pointer to control structure @@ -423,11 +441,7 @@ int amdgpu_ras_eeprom_reset_table(struct amdgpu_ras_eeprom_control *control) mutex_lock(&control->ras_tbl_mutex); hdr->header = RAS_TABLE_HDR_VAL; - if (adev->umc.ras && - adev->umc.ras->set_eeprom_table_version) - adev->umc.ras->set_eeprom_table_version(hdr); - else - hdr->version = RAS_TABLE_VER_V1; + amdgpu_ras_set_eeprom_table_version(control); if (hdr->version == RAS_TABLE_VER_V2_1) { hdr->first_rec_offset = RAS_RECORD_START_V2_1; @@ -736,6 +750,9 @@ amdgpu_ras_eeprom_update_header(struct amdgpu_ras_eeprom_control *control) control->tbl_rai.health_percent = 0; } + if (amdgpu_bad_page_threshold != -1) + ras->is_rma = true; + /* ignore the -ENOTSUPP return value */ amdgpu_dpm_send_rma_reason(adev); } @@ -994,6 +1011,9 @@ Out: uint32_t amdgpu_ras_eeprom_max_record_count(struct amdgpu_ras_eeprom_control *control) { + /* get available eeprom table version first before eeprom table init */ + amdgpu_ras_set_eeprom_table_version(control); + if (control->tbl_hdr.version == RAS_TABLE_VER_V2_1) return RAS_MAX_RECORD_COUNT_V2_1; else @@ -1307,8 +1327,7 @@ Out: return res == RAS_TABLE_V2_1_INFO_SIZE ? 0 : res; } -int amdgpu_ras_eeprom_init(struct amdgpu_ras_eeprom_control *control, - bool *exceed_err_limit) +int amdgpu_ras_eeprom_init(struct amdgpu_ras_eeprom_control *control) { struct amdgpu_device *adev = to_amdgpu_device(control); unsigned char buf[RAS_TABLE_HEADER_SIZE] = { 0 }; @@ -1316,7 +1335,7 @@ int amdgpu_ras_eeprom_init(struct amdgpu_ras_eeprom_control *control, struct amdgpu_ras *ras = amdgpu_ras_get_context(adev); int res; - *exceed_err_limit = false; + ras->is_rma = false; if (!__is_ras_eeprom_supported(adev)) return 0; @@ -1408,7 +1427,7 @@ int amdgpu_ras_eeprom_init(struct amdgpu_ras_eeprom_control *control, dev_warn(adev->dev, "GPU will be initialized due to bad_page_threshold = -1."); res = 0; } else { - *exceed_err_limit = true; + ras->is_rma = true; dev_err(adev->dev, "RAS records:%d exceed threshold:%d, " "GPU will not be initialized. Replace this GPU or increase the threshold", diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_ras_eeprom.h b/drivers/gpu/drm/amd/amdgpu/amdgpu_ras_eeprom.h index 6dfd667f3013..b9ebda577797 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_ras_eeprom.h +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_ras_eeprom.h @@ -129,8 +129,7 @@ struct eeprom_table_record { unsigned char mcumc_id; } __packed; -int amdgpu_ras_eeprom_init(struct amdgpu_ras_eeprom_control *control, - bool *exceed_err_limit); +int amdgpu_ras_eeprom_init(struct amdgpu_ras_eeprom_control *control); int amdgpu_ras_eeprom_reset_table(struct amdgpu_ras_eeprom_control *control); diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_res_cursor.h b/drivers/gpu/drm/amd/amdgpu/amdgpu_res_cursor.h index 381101d2bf05..50fcd86e1033 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_res_cursor.h +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_res_cursor.h @@ -164,4 +164,29 @@ static inline void amdgpu_res_next(struct amdgpu_res_cursor *cur, uint64_t size) } } +/** + * amdgpu_res_cleared - check if blocks are cleared + * + * @cur: the cursor to extract the block + * + * Check if the @cur block is cleared + */ +static inline bool amdgpu_res_cleared(struct amdgpu_res_cursor *cur) +{ + struct drm_buddy_block *block; + + switch (cur->mem_type) { + case TTM_PL_VRAM: + block = cur->node; + + if (!amdgpu_vram_mgr_is_cleared(block)) + return false; + break; + default: + return false; + } + + return true; +} + #endif diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_reset.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_reset.c index 147100c27c2d..66c1a868c0e1 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_reset.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_reset.c @@ -21,9 +21,6 @@ * */ -#include <linux/devcoredump.h> -#include <generated/utsrelease.h> - #include "amdgpu_reset.h" #include "aldebaran.h" #include "sienna_cichlid.h" @@ -36,6 +33,7 @@ int amdgpu_reset_init(struct amdgpu_device *adev) switch (amdgpu_ip_version(adev, MP1_HWIP, 0)) { case IP_VERSION(13, 0, 2): case IP_VERSION(13, 0, 6): + case IP_VERSION(13, 0, 14): ret = aldebaran_reset_init(adev); break; case IP_VERSION(11, 0, 7): @@ -58,6 +56,7 @@ int amdgpu_reset_fini(struct amdgpu_device *adev) switch (amdgpu_ip_version(adev, MP1_HWIP, 0)) { case IP_VERSION(13, 0, 2): case IP_VERSION(13, 0, 6): + case IP_VERSION(13, 0, 14): ret = aldebaran_reset_fini(adev); break; case IP_VERSION(11, 0, 7): @@ -162,104 +161,34 @@ void amdgpu_device_unlock_reset_domain(struct amdgpu_reset_domain *reset_domain) up_write(&reset_domain->sem); } -#ifndef CONFIG_DEV_COREDUMP -void amdgpu_coredump(struct amdgpu_device *adev, bool vram_lost, - struct amdgpu_reset_context *reset_context) -{ -} -#else -static ssize_t -amdgpu_devcoredump_read(char *buffer, loff_t offset, size_t count, - void *data, size_t datalen) -{ - struct drm_printer p; - struct amdgpu_coredump_info *coredump = data; - struct drm_print_iterator iter; - int i; - - iter.data = buffer; - iter.offset = 0; - iter.start = offset; - iter.remain = count; - - p = drm_coredump_printer(&iter); - - drm_printf(&p, "**** AMDGPU Device Coredump ****\n"); - drm_printf(&p, "version: " AMDGPU_COREDUMP_VERSION "\n"); - drm_printf(&p, "kernel: " UTS_RELEASE "\n"); - drm_printf(&p, "module: " KBUILD_MODNAME "\n"); - drm_printf(&p, "time: %lld.%09ld\n", coredump->reset_time.tv_sec, - coredump->reset_time.tv_nsec); - - if (coredump->reset_task_info.pid) - drm_printf(&p, "process_name: %s PID: %d\n", - coredump->reset_task_info.process_name, - coredump->reset_task_info.pid); - - if (coredump->ring) { - drm_printf(&p, "\nRing timed out details\n"); - drm_printf(&p, "IP Type: %d Ring Name: %s\n", - coredump->ring->funcs->type, - coredump->ring->name); - } - - if (coredump->reset_vram_lost) - drm_printf(&p, "VRAM is lost due to GPU reset!\n"); - if (coredump->adev->reset_info.num_regs) { - drm_printf(&p, "AMDGPU register dumps:\nOffset: Value:\n"); - - for (i = 0; i < coredump->adev->reset_info.num_regs; i++) - drm_printf(&p, "0x%08x: 0x%08x\n", - coredump->adev->reset_info.reset_dump_reg_list[i], - coredump->adev->reset_info.reset_dump_reg_value[i]); - } - - return count - iter.remain; -} - -static void amdgpu_devcoredump_free(void *data) +void amdgpu_reset_get_desc(struct amdgpu_reset_context *rst_ctxt, char *buf, + size_t len) { - kfree(data); -} - -void amdgpu_coredump(struct amdgpu_device *adev, bool vram_lost, - struct amdgpu_reset_context *reset_context) -{ - struct amdgpu_coredump_info *coredump; - struct drm_device *dev = adev_to_drm(adev); - struct amdgpu_job *job = reset_context->job; - struct drm_sched_job *s_job; - - coredump = kzalloc(sizeof(*coredump), GFP_NOWAIT); - - if (!coredump) { - DRM_ERROR("%s: failed to allocate memory for coredump\n", __func__); + if (!buf || !len) return; - } - - coredump->reset_vram_lost = vram_lost; - if (reset_context->job && reset_context->job->vm) { - struct amdgpu_task_info *ti; - struct amdgpu_vm *vm = reset_context->job->vm; - - ti = amdgpu_vm_get_task_info_vm(vm); - if (ti) { - coredump->reset_task_info = *ti; - amdgpu_vm_put_task_info(ti); + switch (rst_ctxt->src) { + case AMDGPU_RESET_SRC_JOB: + if (rst_ctxt->job) { + snprintf(buf, len, "job hang on ring:%s", + rst_ctxt->job->base.sched->name); + } else { + strscpy(buf, "job hang", len); } + break; + case AMDGPU_RESET_SRC_RAS: + strscpy(buf, "RAS error", len); + break; + case AMDGPU_RESET_SRC_MES: + strscpy(buf, "MES hang", len); + break; + case AMDGPU_RESET_SRC_HWS: + strscpy(buf, "HWS hang", len); + break; + case AMDGPU_RESET_SRC_USER: + strscpy(buf, "user trigger", len); + break; + default: + strscpy(buf, "unknown", len); } - - if (job) { - s_job = &job->base; - coredump->ring = to_amdgpu_ring(s_job->sched); - } - - coredump->adev = adev; - - ktime_get_ts64(&coredump->reset_time); - - dev_coredumpm(dev->dev, THIS_MODULE, coredump, 0, GFP_NOWAIT, - amdgpu_devcoredump_read, amdgpu_devcoredump_free); } -#endif diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_reset.h b/drivers/gpu/drm/amd/amdgpu/amdgpu_reset.h index 60522963aaca..4ae581f3fcb5 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_reset.h +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_reset.h @@ -32,6 +32,17 @@ enum AMDGPU_RESET_FLAGS { AMDGPU_NEED_FULL_RESET = 0, AMDGPU_SKIP_HW_RESET = 1, + AMDGPU_SKIP_COREDUMP = 2, + AMDGPU_HOST_FLR = 3, +}; + +enum AMDGPU_RESET_SRCS { + AMDGPU_RESET_SRC_UNKNOWN, + AMDGPU_RESET_SRC_JOB, + AMDGPU_RESET_SRC_RAS, + AMDGPU_RESET_SRC_MES, + AMDGPU_RESET_SRC_HWS, + AMDGPU_RESET_SRC_USER, }; struct amdgpu_reset_context { @@ -41,6 +52,7 @@ struct amdgpu_reset_context { struct amdgpu_hive_info *hive; struct list_head *reset_device_list; unsigned long flags; + enum AMDGPU_RESET_SRCS src; }; struct amdgpu_reset_handler { @@ -88,19 +100,6 @@ struct amdgpu_reset_domain { atomic_t reset_res; }; -#ifdef CONFIG_DEV_COREDUMP - -#define AMDGPU_COREDUMP_VERSION "1" - -struct amdgpu_coredump_info { - struct amdgpu_device *adev; - struct amdgpu_task_info reset_task_info; - struct timespec64 reset_time; - bool reset_vram_lost; - struct amdgpu_ring *ring; -}; -#endif - int amdgpu_reset_init(struct amdgpu_device *adev); int amdgpu_reset_fini(struct amdgpu_device *adev); @@ -141,8 +140,8 @@ void amdgpu_device_lock_reset_domain(struct amdgpu_reset_domain *reset_domain); void amdgpu_device_unlock_reset_domain(struct amdgpu_reset_domain *reset_domain); -void amdgpu_coredump(struct amdgpu_device *adev, bool vram_lost, - struct amdgpu_reset_context *reset_context); +void amdgpu_reset_get_desc(struct amdgpu_reset_context *rst_ctxt, char *buf, + size_t len); #define for_each_handler(i, handler, reset_ctl) \ for (i = 0; (i < AMDGPU_RESET_MAX_HANDLERS) && \ diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_ring.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_ring.c index 5505d646f43a..ad49cecb20b8 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_ring.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_ring.c @@ -352,7 +352,7 @@ int amdgpu_ring_init(struct amdgpu_device *adev, struct amdgpu_ring *ring, ring->max_dw = max_dw; ring->hw_prio = hw_prio; - if (!ring->no_scheduler) { + if (!ring->no_scheduler && ring->funcs->type < AMDGPU_HW_IP_NUM) { hw_ip = ring->funcs->type; num_sched = &adev->gpu_sched[hw_ip][hw_prio].num_scheds; adev->gpu_sched[hw_ip][hw_prio].sched[(*num_sched)++] = @@ -473,8 +473,9 @@ static ssize_t amdgpu_debugfs_ring_read(struct file *f, char __user *buf, size_t size, loff_t *pos) { struct amdgpu_ring *ring = file_inode(f)->i_private; - int r, i; uint32_t value, result, early[3]; + loff_t i; + int r; if (*pos & 3 || size & 3) return -EINVAL; @@ -524,46 +525,58 @@ static ssize_t amdgpu_debugfs_mqd_read(struct file *f, char __user *buf, { struct amdgpu_ring *ring = file_inode(f)->i_private; volatile u32 *mqd; - int r; + u32 *kbuf; + int r, i; uint32_t value, result; if (*pos & 3 || size & 3) return -EINVAL; - result = 0; + kbuf = kmalloc(ring->mqd_size, GFP_KERNEL); + if (!kbuf) + return -ENOMEM; r = amdgpu_bo_reserve(ring->mqd_obj, false); if (unlikely(r != 0)) - return r; + goto err_free; r = amdgpu_bo_kmap(ring->mqd_obj, (void **)&mqd); - if (r) { - amdgpu_bo_unreserve(ring->mqd_obj); - return r; - } + if (r) + goto err_unreserve; + + /* + * Copy to local buffer to avoid put_user(), which might fault + * and acquire mmap_sem, under reservation_ww_class_mutex. + */ + for (i = 0; i < ring->mqd_size/sizeof(u32); i++) + kbuf[i] = mqd[i]; + amdgpu_bo_kunmap(ring->mqd_obj); + amdgpu_bo_unreserve(ring->mqd_obj); + + result = 0; while (size) { if (*pos >= ring->mqd_size) - goto done; + break; - value = mqd[*pos/4]; + value = kbuf[*pos/4]; r = put_user(value, (uint32_t *)buf); if (r) - goto done; + goto err_free; buf += 4; result += 4; size -= 4; *pos += 4; } -done: - amdgpu_bo_kunmap(ring->mqd_obj); - mqd = NULL; - amdgpu_bo_unreserve(ring->mqd_obj); - if (r) - return r; - + kfree(kbuf); return result; + +err_unreserve: + amdgpu_bo_unreserve(ring->mqd_obj); +err_free: + kfree(kbuf); + return r; } static const struct file_operations amdgpu_debugfs_mqd_fops = { diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_rlc.h b/drivers/gpu/drm/amd/amdgpu/amdgpu_rlc.h index 5a17e0ff2ab8..fce22d3f816b 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_rlc.h +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_rlc.h @@ -112,6 +112,53 @@ typedef enum _SOC21_FIRMWARE_ID_ { SOC21_FIRMWARE_ID_MAX = 37 } SOC21_FIRMWARE_ID; +typedef enum _SOC24_FIRMWARE_ID_ { + SOC24_FIRMWARE_ID_INVALID = 0, + SOC24_FIRMWARE_ID_RLC_G_UCODE = 1, + SOC24_FIRMWARE_ID_RLC_TOC = 2, + SOC24_FIRMWARE_ID_RLCG_SCRATCH = 3, + SOC24_FIRMWARE_ID_RLC_SRM_ARAM = 4, + SOC24_FIRMWARE_ID_RLC_P_UCODE = 5, + SOC24_FIRMWARE_ID_RLC_V_UCODE = 6, + SOC24_FIRMWARE_ID_RLX6_UCODE = 7, + SOC24_FIRMWARE_ID_RLX6_UCODE_CORE1 = 8, + SOC24_FIRMWARE_ID_RLX6_DRAM_BOOT = 9, + SOC24_FIRMWARE_ID_RLX6_DRAM_BOOT_CORE1 = 10, + SOC24_FIRMWARE_ID_SDMA_UCODE_TH0 = 11, + SOC24_FIRMWARE_ID_SDMA_UCODE_TH1 = 12, + SOC24_FIRMWARE_ID_CP_PFP = 13, + SOC24_FIRMWARE_ID_CP_ME = 14, + SOC24_FIRMWARE_ID_CP_MEC = 15, + SOC24_FIRMWARE_ID_RS64_MES_P0 = 16, + SOC24_FIRMWARE_ID_RS64_MES_P1 = 17, + SOC24_FIRMWARE_ID_RS64_PFP = 18, + SOC24_FIRMWARE_ID_RS64_ME = 19, + SOC24_FIRMWARE_ID_RS64_MEC = 20, + SOC24_FIRMWARE_ID_RS64_MES_P0_STACK = 21, + SOC24_FIRMWARE_ID_RS64_MES_P1_STACK = 22, + SOC24_FIRMWARE_ID_RS64_PFP_P0_STACK = 23, + SOC24_FIRMWARE_ID_RS64_PFP_P1_STACK = 24, + SOC24_FIRMWARE_ID_RS64_ME_P0_STACK = 25, + SOC24_FIRMWARE_ID_RS64_ME_P1_STACK = 26, + SOC24_FIRMWARE_ID_RS64_MEC_P0_STACK = 27, + SOC24_FIRMWARE_ID_RS64_MEC_P1_STACK = 28, + SOC24_FIRMWARE_ID_RS64_MEC_P2_STACK = 29, + SOC24_FIRMWARE_ID_RS64_MEC_P3_STACK = 30, + SOC24_FIRMWARE_ID_RLC_SRM_DRAM_SR = 31, + SOC24_FIRMWARE_ID_RLCG_SCRATCH_SR = 32, + SOC24_FIRMWARE_ID_RLCP_SCRATCH_SR = 33, + SOC24_FIRMWARE_ID_RLCV_SCRATCH_SR = 34, + SOC24_FIRMWARE_ID_RLX6_DRAM_SR = 35, + SOC24_FIRMWARE_ID_RLX6_DRAM_SR_CORE1 = 36, + SOC24_FIRMWARE_ID_RLCDEBUGLOG = 37, + SOC24_FIRMWARE_ID_SRIOV_DEBUG = 38, + SOC24_FIRMWARE_ID_SRIOV_CSA_RLC = 39, + SOC24_FIRMWARE_ID_SRIOV_CSA_SDMA = 40, + SOC24_FIRMWARE_ID_SRIOV_CSA_CP = 41, + SOC24_FIRMWARE_ID_UMF_ZONE_PAD = 42, + SOC24_FIRMWARE_ID_MAX = 43 +} SOC24_FIRMWARE_ID; + typedef struct _RLC_TABLE_OF_CONTENT { union { unsigned int DW0; @@ -155,6 +202,33 @@ typedef struct _RLC_TABLE_OF_CONTENT { }; } RLC_TABLE_OF_CONTENT; +typedef struct _RLC_TABLE_OF_CONTENT_V2 { + union { + unsigned int DW0; + struct { + uint32_t offset : 25; + uint32_t id : 7; + }; + }; + + union { + unsigned int DW1; + struct { + uint32_t reserved0 : 1; + uint32_t reserved1 : 1; + uint32_t reserved2 : 1; + uint32_t memory_destination : 2; + uint32_t vfflr_image_code : 4; + uint32_t reserved9 : 1; + uint32_t reserved10 : 1; + uint32_t reserved11 : 1; + uint32_t size_x16 : 1; + uint32_t reserved13 : 1; + uint32_t size : 18; + }; + }; +} RLC_TABLE_OF_CONTENT_V2; + #define RLC_TOC_MAX_SIZE 64 struct amdgpu_rlc_funcs { diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_sdma.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_sdma.c index 1d9d187de6ee..183a976ba29d 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_sdma.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_sdma.c @@ -158,6 +158,7 @@ static int amdgpu_sdma_init_inst_ctx(struct amdgpu_sdma_instance *sdma_inst) const struct common_firmware_header *header = NULL; const struct sdma_firmware_header_v1_0 *hdr; const struct sdma_firmware_header_v2_0 *hdr_v2; + const struct sdma_firmware_header_v3_0 *hdr_v3; header = (const struct common_firmware_header *) sdma_inst->fw->data; @@ -174,6 +175,11 @@ static int amdgpu_sdma_init_inst_ctx(struct amdgpu_sdma_instance *sdma_inst) sdma_inst->fw_version = le32_to_cpu(hdr_v2->header.ucode_version); sdma_inst->feature_version = le32_to_cpu(hdr_v2->ucode_feature_version); break; + case 3: + hdr_v3 = (const struct sdma_firmware_header_v3_0 *)sdma_inst->fw->data; + sdma_inst->fw_version = le32_to_cpu(hdr_v3->header.ucode_version); + sdma_inst->feature_version = le32_to_cpu(hdr_v3->ucode_feature_version); + break; default: return -EINVAL; } @@ -206,16 +212,17 @@ int amdgpu_sdma_init_microcode(struct amdgpu_device *adev, const struct common_firmware_header *header = NULL; int err, i; const struct sdma_firmware_header_v2_0 *sdma_hdr; + const struct sdma_firmware_header_v3_0 *sdma_hv3; uint16_t version_major; char ucode_prefix[30]; - char fw_name[52]; amdgpu_ucode_ip_version_decode(adev, SDMA0_HWIP, ucode_prefix, sizeof(ucode_prefix)); if (instance == 0) - snprintf(fw_name, sizeof(fw_name), "amdgpu/%s.bin", ucode_prefix); + err = amdgpu_ucode_request(adev, &adev->sdma.instance[instance].fw, + "amdgpu/%s.bin", ucode_prefix); else - snprintf(fw_name, sizeof(fw_name), "amdgpu/%s%d.bin", ucode_prefix, instance); - err = amdgpu_ucode_request(adev, &adev->sdma.instance[instance].fw, fw_name); + err = amdgpu_ucode_request(adev, &adev->sdma.instance[instance].fw, + "amdgpu/%s%d.bin", ucode_prefix, instance); if (err) goto out; @@ -251,11 +258,12 @@ int amdgpu_sdma_init_microcode(struct amdgpu_device *adev, else { /* Use a single copy per SDMA firmware type. PSP uses the same instance for all * groups of SDMAs */ - if (amdgpu_ip_version(adev, SDMA0_HWIP, - 0) == - IP_VERSION(4, 4, 2) && + if ((amdgpu_ip_version(adev, SDMA0_HWIP, 0) == + IP_VERSION(4, 4, 2) || + amdgpu_ip_version(adev, SDMA0_HWIP, 0) == + IP_VERSION(4, 4, 5)) && adev->firmware.load_type == - AMDGPU_FW_LOAD_PSP && + AMDGPU_FW_LOAD_PSP && adev->sdma.num_inst_per_aid == i) { break; } @@ -281,6 +289,15 @@ int amdgpu_sdma_init_microcode(struct amdgpu_device *adev, adev->firmware.fw_size += ALIGN(le32_to_cpu(sdma_hdr->ctl_ucode_size_bytes), PAGE_SIZE); break; + case 3: + sdma_hv3 = (const struct sdma_firmware_header_v3_0 *) + adev->sdma.instance[0].fw->data; + info = &adev->firmware.ucode[AMDGPU_UCODE_ID_SDMA_RS64]; + info->ucode_id = AMDGPU_UCODE_ID_SDMA_RS64; + info->fw = adev->sdma.instance[0].fw; + adev->firmware.fw_size += + ALIGN(le32_to_cpu(sdma_hv3->ucode_size_bytes), PAGE_SIZE); + break; default: err = -EINVAL; } diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_sdma.h b/drivers/gpu/drm/amd/amdgpu/amdgpu_sdma.h index 173a2a308078..d3706a484870 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_sdma.h +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_sdma.h @@ -60,6 +60,10 @@ struct amdgpu_sdma_instance { struct amdgpu_ring page; bool burst_nop; uint32_t aid_id; + + struct amdgpu_bo *sdma_fw_obj; + uint64_t sdma_fw_gpu_addr; + uint32_t *sdma_fw_ptr; }; enum amdgpu_sdma_ras_memory_id { @@ -132,7 +136,7 @@ struct amdgpu_buffer_funcs { uint64_t dst_offset, /* number of byte to transfer */ uint32_t byte_count, - bool tmz); + uint32_t copy_flags); /* maximum bytes in a single operation */ uint32_t fill_max_bytes; diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_securedisplay.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_securedisplay.c index 8ed0e073656f..41ebe690eeff 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_securedisplay.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_securedisplay.c @@ -135,6 +135,10 @@ static ssize_t amdgpu_securedisplay_debugfs_write(struct file *f, const char __u mutex_unlock(&psp->securedisplay_context.mutex); break; case 2: + if (size < 3 || phy_id >= TA_SECUREDISPLAY_MAX_PHY) { + dev_err(adev->dev, "Invalid input: %s\n", str); + return -EINVAL; + } mutex_lock(&psp->securedisplay_context.mutex); psp_prep_securedisplay_cmd_buf(psp, &securedisplay_cmd, TA_SECUREDISPLAY_COMMAND__SEND_ROI_CRC); diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_smuio.h b/drivers/gpu/drm/amd/amdgpu/amdgpu_smuio.h index ff4435181055..ec9d12f85f39 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_smuio.h +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_smuio.h @@ -44,6 +44,7 @@ struct amdgpu_smuio_funcs { u32 (*get_socket_id)(struct amdgpu_device *adev); enum amdgpu_pkg_type (*get_pkg_type)(struct amdgpu_device *adev); bool (*is_host_gpu_xgmi_supported)(struct amdgpu_device *adev); + u64 (*get_gpu_clock_counter)(struct amdgpu_device *adev); }; struct amdgpu_smuio { diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_trace.h b/drivers/gpu/drm/amd/amdgpu/amdgpu_trace.h index f539b1d00234..383fce40d4dd 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_trace.h +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_trace.h @@ -178,10 +178,10 @@ TRACE_EVENT(amdgpu_cs_ioctl, TP_fast_assign( __entry->sched_job_id = job->base.id; - __assign_str(timeline, AMDGPU_JOB_GET_TIMELINE_NAME(job)); + __assign_str(timeline); __entry->context = job->base.s_fence->finished.context; __entry->seqno = job->base.s_fence->finished.seqno; - __assign_str(ring, to_amdgpu_ring(job->base.sched)->name); + __assign_str(ring); __entry->num_ibs = job->num_ibs; ), TP_printk("sched_job=%llu, timeline=%s, context=%u, seqno=%u, ring_name=%s, num_ibs=%u", @@ -203,10 +203,10 @@ TRACE_EVENT(amdgpu_sched_run_job, TP_fast_assign( __entry->sched_job_id = job->base.id; - __assign_str(timeline, AMDGPU_JOB_GET_TIMELINE_NAME(job)); + __assign_str(timeline); __entry->context = job->base.s_fence->finished.context; __entry->seqno = job->base.s_fence->finished.seqno; - __assign_str(ring, to_amdgpu_ring(job->base.sched)->name); + __assign_str(ring); __entry->num_ibs = job->num_ibs; ), TP_printk("sched_job=%llu, timeline=%s, context=%u, seqno=%u, ring_name=%s, num_ibs=%u", @@ -231,7 +231,7 @@ TRACE_EVENT(amdgpu_vm_grab_id, TP_fast_assign( __entry->pasid = vm->pasid; - __assign_str(ring, ring->name); + __assign_str(ring); __entry->vmid = job->vmid; __entry->vm_hub = ring->vm_hub, __entry->pd_addr = job->vm_pd_addr; @@ -425,7 +425,7 @@ TRACE_EVENT(amdgpu_vm_flush, ), TP_fast_assign( - __assign_str(ring, ring->name); + __assign_str(ring); __entry->vmid = vmid; __entry->vm_hub = ring->vm_hub; __entry->pd_addr = pd_addr; @@ -526,7 +526,7 @@ TRACE_EVENT(amdgpu_ib_pipe_sync, ), TP_fast_assign( - __assign_str(ring, sched_job->base.sched->name); + __assign_str(ring); __entry->id = sched_job->base.id; __entry->fence = fence; __entry->ctx = fence->context; @@ -554,21 +554,6 @@ TRACE_EVENT(amdgpu_reset_reg_dumps, __entry->value) ); -TRACE_EVENT(amdgpu_runpm_reference_dumps, - TP_PROTO(uint32_t index, const char *func), - TP_ARGS(index, func), - TP_STRUCT__entry( - __field(uint32_t, index) - __string(func, func) - ), - TP_fast_assign( - __entry->index = index; - __assign_str(func, func); - ), - TP_printk("amdgpu runpm reference dump 0x%x: 0x%s\n", - __entry->index, - __get_str(func)) -); #undef AMDGPU_JOB_GET_TIMELINE_NAME #endif diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_ttm.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_ttm.c index fc418e670fda..b8bc7fa8c375 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_ttm.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_ttm.c @@ -133,7 +133,7 @@ static void amdgpu_evict_flags(struct ttm_buffer_object *bo, } else if (!amdgpu_gmc_vram_full_visible(&adev->gmc) && !(abo->flags & AMDGPU_GEM_CREATE_CPU_ACCESS_REQUIRED) && - amdgpu_bo_in_cpu_visible_vram(abo)) { + amdgpu_res_cpu_visible(adev, bo->resource)) { /* Try evicting to the CPU inaccessible part of VRAM * first, but only set GTT as busy placement, so this @@ -236,7 +236,7 @@ static int amdgpu_ttm_map_buffer(struct ttm_buffer_object *bo, dst_addr = amdgpu_bo_gpu_offset(adev->gart.bo); dst_addr += window * AMDGPU_GTT_MAX_TRANSFER_SIZE * 8; amdgpu_emit_copy_buffer(adev, &job->ibs[0], src_addr, - dst_addr, num_bytes, false); + dst_addr, num_bytes, 0); amdgpu_ring_pad_ib(ring, &job->ibs[0]); WARN_ON(job->ibs[0].length_dw > num_dw); @@ -295,6 +295,8 @@ int amdgpu_ttm_copy_mem_to_mem(struct amdgpu_device *adev, struct amdgpu_res_cursor src_mm, dst_mm; struct dma_fence *fence = NULL; int r = 0; + uint32_t copy_flags = 0; + struct amdgpu_bo *abo_src, *abo_dst; if (!adev->mman.buffer_funcs_enabled) { DRM_ERROR("Trying to move memory with ring turned off.\n"); @@ -306,7 +308,8 @@ int amdgpu_ttm_copy_mem_to_mem(struct amdgpu_device *adev, mutex_lock(&adev->mman.gtt_window_lock); while (src_mm.remaining) { - uint64_t from, to, cur_size; + uint64_t from, to, cur_size, tiling_flags; + uint32_t num_type, data_format, max_com; struct dma_fence *next; /* Never copy more than 256MiB at once to avoid a timeout */ @@ -323,8 +326,27 @@ int amdgpu_ttm_copy_mem_to_mem(struct amdgpu_device *adev, if (r) goto error; - r = amdgpu_copy_buffer(ring, from, to, cur_size, - resv, &next, false, true, tmz); + abo_src = ttm_to_amdgpu_bo(src->bo); + abo_dst = ttm_to_amdgpu_bo(dst->bo); + if (tmz) + copy_flags |= AMDGPU_COPY_FLAGS_TMZ; + if ((abo_src->flags & AMDGPU_GEM_CREATE_GFX12_DCC) && + (abo_src->tbo.resource->mem_type == TTM_PL_VRAM)) + copy_flags |= AMDGPU_COPY_FLAGS_READ_DECOMPRESSED; + if ((abo_dst->flags & AMDGPU_GEM_CREATE_GFX12_DCC) && + (dst->mem->mem_type == TTM_PL_VRAM)) { + copy_flags |= AMDGPU_COPY_FLAGS_WRITE_COMPRESSED; + amdgpu_bo_get_tiling_flags(abo_dst, &tiling_flags); + max_com = AMDGPU_TILING_GET(tiling_flags, GFX12_DCC_MAX_COMPRESSED_BLOCK); + num_type = AMDGPU_TILING_GET(tiling_flags, GFX12_DCC_NUMBER_TYPE); + data_format = AMDGPU_TILING_GET(tiling_flags, GFX12_DCC_DATA_FORMAT); + copy_flags |= (AMDGPU_COPY_FLAGS_SET(MAX_COMPRESSED, max_com) | + AMDGPU_COPY_FLAGS_SET(NUMBER_TYPE, num_type) | + AMDGPU_COPY_FLAGS_SET(DATA_FORMAT, data_format)); + } + + r = amdgpu_copy_buffer(ring, from, to, cur_size, resv, + &next, false, true, copy_flags); if (r) goto error; @@ -378,11 +400,12 @@ static int amdgpu_move_blit(struct ttm_buffer_object *bo, (abo->flags & AMDGPU_GEM_CREATE_VRAM_WIPE_ON_RELEASE)) { struct dma_fence *wipe_fence = NULL; - r = amdgpu_fill_buffer(abo, AMDGPU_POISON, NULL, &wipe_fence, - false); + r = amdgpu_fill_buffer(abo, 0, NULL, &wipe_fence, + false); if (r) { goto error; } else if (wipe_fence) { + amdgpu_vram_mgr_set_cleared(bo->resource); dma_fence_put(fence); fence = wipe_fence; } @@ -403,40 +426,55 @@ error: return r; } -/* - * amdgpu_mem_visible - Check that memory can be accessed by ttm_bo_move_memcpy +/** + * amdgpu_res_cpu_visible - Check that resource can be accessed by CPU + * @adev: amdgpu device + * @res: the resource to check * - * Called by amdgpu_bo_move() + * Returns: true if the full resource is CPU visible, false otherwise. */ -static bool amdgpu_mem_visible(struct amdgpu_device *adev, - struct ttm_resource *mem) +bool amdgpu_res_cpu_visible(struct amdgpu_device *adev, + struct ttm_resource *res) { - u64 mem_size = (u64)mem->size; struct amdgpu_res_cursor cursor; - u64 end; - if (mem->mem_type == TTM_PL_SYSTEM || - mem->mem_type == TTM_PL_TT) + if (!res) + return false; + + if (res->mem_type == TTM_PL_SYSTEM || res->mem_type == TTM_PL_TT || + res->mem_type == AMDGPU_PL_PREEMPT || res->mem_type == AMDGPU_PL_DOORBELL) return true; - if (mem->mem_type != TTM_PL_VRAM) + + if (res->mem_type != TTM_PL_VRAM) return false; - amdgpu_res_first(mem, 0, mem_size, &cursor); - end = cursor.start + cursor.size; + amdgpu_res_first(res, 0, res->size, &cursor); while (cursor.remaining) { + if ((cursor.start + cursor.size) > adev->gmc.visible_vram_size) + return false; amdgpu_res_next(&cursor, cursor.size); + } - if (!cursor.remaining) - break; + return true; +} - /* ttm_resource_ioremap only supports contiguous memory */ - if (end != cursor.start) - return false; +/* + * amdgpu_res_copyable - Check that memory can be accessed by ttm_bo_move_memcpy + * + * Called by amdgpu_bo_move() + */ +static bool amdgpu_res_copyable(struct amdgpu_device *adev, + struct ttm_resource *mem) +{ + if (!amdgpu_res_cpu_visible(adev, mem)) + return false; - end = cursor.start + cursor.size; - } + /* ttm_resource_ioremap only supports contiguous memory */ + if (mem->mem_type == TTM_PL_VRAM && + !(mem->placement & TTM_PL_FLAG_CONTIGUOUS)) + return false; - return end <= adev->gmc.visible_vram_size; + return true; } /* @@ -466,14 +504,16 @@ static int amdgpu_bo_move(struct ttm_buffer_object *bo, bool evict, if (!old_mem || (old_mem->mem_type == TTM_PL_SYSTEM && bo->ttm == NULL)) { + amdgpu_bo_move_notify(bo, evict, new_mem); ttm_bo_move_null(bo, new_mem); - goto out; + return 0; } if (old_mem->mem_type == TTM_PL_SYSTEM && (new_mem->mem_type == TTM_PL_TT || new_mem->mem_type == AMDGPU_PL_PREEMPT)) { + amdgpu_bo_move_notify(bo, evict, new_mem); ttm_bo_move_null(bo, new_mem); - goto out; + return 0; } if ((old_mem->mem_type == TTM_PL_TT || old_mem->mem_type == AMDGPU_PL_PREEMPT) && @@ -483,9 +523,10 @@ static int amdgpu_bo_move(struct ttm_buffer_object *bo, bool evict, return r; amdgpu_ttm_backend_unbind(bo->bdev, bo->ttm); + amdgpu_bo_move_notify(bo, evict, new_mem); ttm_resource_free(bo, &bo->resource); ttm_bo_assign_mem(bo, new_mem); - goto out; + return 0; } if (old_mem->mem_type == AMDGPU_PL_GDS || @@ -497,8 +538,9 @@ static int amdgpu_bo_move(struct ttm_buffer_object *bo, bool evict, new_mem->mem_type == AMDGPU_PL_OA || new_mem->mem_type == AMDGPU_PL_DOORBELL) { /* Nothing to save here */ + amdgpu_bo_move_notify(bo, evict, new_mem); ttm_bo_move_null(bo, new_mem); - goto out; + return 0; } if (bo->type == ttm_bo_type_device && @@ -510,27 +552,28 @@ static int amdgpu_bo_move(struct ttm_buffer_object *bo, bool evict, abo->flags &= ~AMDGPU_GEM_CREATE_CPU_ACCESS_REQUIRED; } - if (adev->mman.buffer_funcs_enabled) { - if (((old_mem->mem_type == TTM_PL_SYSTEM && - new_mem->mem_type == TTM_PL_VRAM) || - (old_mem->mem_type == TTM_PL_VRAM && - new_mem->mem_type == TTM_PL_SYSTEM))) { - hop->fpfn = 0; - hop->lpfn = 0; - hop->mem_type = TTM_PL_TT; - hop->flags = TTM_PL_FLAG_TEMPORARY; - return -EMULTIHOP; - } + if (adev->mman.buffer_funcs_enabled && + ((old_mem->mem_type == TTM_PL_SYSTEM && + new_mem->mem_type == TTM_PL_VRAM) || + (old_mem->mem_type == TTM_PL_VRAM && + new_mem->mem_type == TTM_PL_SYSTEM))) { + hop->fpfn = 0; + hop->lpfn = 0; + hop->mem_type = TTM_PL_TT; + hop->flags = TTM_PL_FLAG_TEMPORARY; + return -EMULTIHOP; + } + amdgpu_bo_move_notify(bo, evict, new_mem); + if (adev->mman.buffer_funcs_enabled) r = amdgpu_move_blit(bo, evict, new_mem, old_mem); - } else { + else r = -ENODEV; - } if (r) { /* Check that all memory is CPU accessible */ - if (!amdgpu_mem_visible(adev, old_mem) || - !amdgpu_mem_visible(adev, new_mem)) { + if (!amdgpu_res_copyable(adev, old_mem) || + !amdgpu_res_copyable(adev, new_mem)) { pr_err("Move buffer fallback to memcpy unavailable\n"); return r; } @@ -540,11 +583,10 @@ static int amdgpu_bo_move(struct ttm_buffer_object *bo, bool evict, return r; } - trace_amdgpu_bo_move(abo, new_mem->mem_type, old_mem->mem_type); -out: - /* update statistics */ + /* update statistics after the move */ + if (evict) + atomic64_inc(&adev->num_evictions); atomic64_add(bo->base.size, &adev->num_bytes_moved); - amdgpu_bo_move_notify(bo, evict); return 0; } @@ -557,7 +599,6 @@ static int amdgpu_ttm_io_mem_reserve(struct ttm_device *bdev, struct ttm_resource *mem) { struct amdgpu_device *adev = amdgpu_ttm_adev(bdev); - size_t bus_size = (size_t)mem->size; switch (mem->mem_type) { case TTM_PL_SYSTEM: @@ -568,9 +609,6 @@ static int amdgpu_ttm_io_mem_reserve(struct ttm_device *bdev, break; case TTM_PL_VRAM: mem->bus.offset = mem->start << PAGE_SHIFT; - /* check if it's visible */ - if ((mem->bus.offset + bus_size) > adev->gmc.visible_vram_size) - return -EINVAL; if (adev->mman.aper_base_kaddr && mem->placement & TTM_PL_FLAG_CONTIGUOUS) @@ -824,8 +862,7 @@ static void amdgpu_ttm_gart_bind_gfx9_mqd(struct amdgpu_device *adev, int num_xcc = max(1U, adev->gfx.num_xcc_per_xcp); uint64_t page_idx, pages_per_xcc; int i; - uint64_t ctrl_flags = (flags & ~AMDGPU_PTE_MTYPE_VG10_MASK) | - AMDGPU_PTE_MTYPE_VG10(AMDGPU_MTYPE_NC); + uint64_t ctrl_flags = AMDGPU_PTE_MTYPE_VG10(flags, AMDGPU_MTYPE_NC); pages_per_xcc = total_pages; do_div(pages_per_xcc, num_xcc); @@ -1380,7 +1417,8 @@ static bool amdgpu_ttm_bo_eviction_valuable(struct ttm_buffer_object *bo, */ dma_resv_for_each_fence(&resv_cursor, bo->base.resv, DMA_RESV_USAGE_BOOKKEEP, f) { - if (amdkfd_fence_check_mm(f, current->mm)) + if (amdkfd_fence_check_mm(f, current->mm) && + !(place->flags & TTM_PL_FLAG_CONTIGUOUS)) return false; } @@ -1477,7 +1515,7 @@ static int amdgpu_ttm_access_memory_sdma(struct ttm_buffer_object *bo, swap(src_addr, dst_addr); amdgpu_emit_copy_buffer(adev, &job->ibs[0], src_addr, dst_addr, - PAGE_SIZE, false); + PAGE_SIZE, 0); amdgpu_ring_pad_ib(adev->mman.buffer_funcs_ring, &job->ibs[0]); WARN_ON(job->ibs[0].length_dw > num_dw); @@ -1548,7 +1586,7 @@ static int amdgpu_ttm_access_memory(struct ttm_buffer_object *bo, static void amdgpu_bo_delete_mem_notify(struct ttm_buffer_object *bo) { - amdgpu_bo_move_notify(bo, false); + amdgpu_bo_move_notify(bo, false, NULL); } static struct ttm_device_funcs amdgpu_bo_driver = { @@ -1721,7 +1759,8 @@ static int amdgpu_ttm_reserve_tmr(struct amdgpu_device *adev) amdgpu_atomfirmware_get_fw_reserved_fb_size(adev); if (!adev->bios && - amdgpu_ip_version(adev, GC_HWIP, 0) == IP_VERSION(9, 4, 3)) + (amdgpu_ip_version(adev, GC_HWIP, 0) == IP_VERSION(9, 4, 3) || + amdgpu_ip_version(adev, GC_HWIP, 0) == IP_VERSION(9, 4, 4))) reserve_size = max(reserve_size, (uint32_t)280 << 20); else if (!reserve_size) reserve_size = DISCOVERY_TMR_OFFSET; @@ -2128,7 +2167,7 @@ int amdgpu_copy_buffer(struct amdgpu_ring *ring, uint64_t src_offset, uint64_t dst_offset, uint32_t byte_count, struct dma_resv *resv, struct dma_fence **fence, bool direct_submit, - bool vm_needs_flush, bool tmz) + bool vm_needs_flush, uint32_t copy_flags) { struct amdgpu_device *adev = ring->adev; unsigned int num_loops, num_dw; @@ -2154,8 +2193,7 @@ int amdgpu_copy_buffer(struct amdgpu_ring *ring, uint64_t src_offset, uint32_t cur_size_in_bytes = min(byte_count, max_bytes); amdgpu_emit_copy_buffer(adev, &job->ibs[0], src_offset, - dst_offset, cur_size_in_bytes, tmz); - + dst_offset, cur_size_in_bytes, copy_flags); src_offset += cur_size_in_bytes; dst_offset += cur_size_in_bytes; byte_count -= cur_size_in_bytes; @@ -2215,6 +2253,71 @@ static int amdgpu_ttm_fill_mem(struct amdgpu_ring *ring, uint32_t src_data, return 0; } +/** + * amdgpu_ttm_clear_buffer - clear memory buffers + * @bo: amdgpu buffer object + * @resv: reservation object + * @fence: dma_fence associated with the operation + * + * Clear the memory buffer resource. + * + * Returns: + * 0 for success or a negative error code on failure. + */ +int amdgpu_ttm_clear_buffer(struct amdgpu_bo *bo, + struct dma_resv *resv, + struct dma_fence **fence) +{ + struct amdgpu_device *adev = amdgpu_ttm_adev(bo->tbo.bdev); + struct amdgpu_ring *ring = adev->mman.buffer_funcs_ring; + struct amdgpu_res_cursor cursor; + u64 addr; + int r; + + if (!adev->mman.buffer_funcs_enabled) + return -EINVAL; + + if (!fence) + return -EINVAL; + + *fence = dma_fence_get_stub(); + + amdgpu_res_first(bo->tbo.resource, 0, amdgpu_bo_size(bo), &cursor); + + mutex_lock(&adev->mman.gtt_window_lock); + while (cursor.remaining) { + struct dma_fence *next = NULL; + u64 size; + + if (amdgpu_res_cleared(&cursor)) { + amdgpu_res_next(&cursor, cursor.size); + continue; + } + + /* Never clear more than 256MiB at once to avoid timeouts */ + size = min(cursor.size, 256ULL << 20); + + r = amdgpu_ttm_map_buffer(&bo->tbo, bo->tbo.resource, &cursor, + 1, ring, false, &size, &addr); + if (r) + goto err; + + r = amdgpu_ttm_fill_mem(ring, 0, addr, size, resv, + &next, true, true); + if (r) + goto err; + + dma_fence_put(*fence); + *fence = next; + + amdgpu_res_next(&cursor, size); + } +err: + mutex_unlock(&adev->mman.gtt_window_lock); + + return r; +} + int amdgpu_fill_buffer(struct amdgpu_bo *bo, uint32_t src_data, struct dma_resv *resv, diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_ttm.h b/drivers/gpu/drm/amd/amdgpu/amdgpu_ttm.h index 65ec82141a8e..138d80017f35 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_ttm.h +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_ttm.h @@ -38,8 +38,6 @@ #define AMDGPU_GTT_MAX_TRANSFER_SIZE 512 #define AMDGPU_GTT_NUM_TRANSFER_WINDOWS 2 -#define AMDGPU_POISON 0xd0bed0be - extern const struct attribute_group amdgpu_vram_mgr_attr_group; extern const struct attribute_group amdgpu_gtt_mgr_attr_group; @@ -111,6 +109,21 @@ struct amdgpu_copy_mem { unsigned long offset; }; +#define AMDGPU_COPY_FLAGS_TMZ (1 << 0) +#define AMDGPU_COPY_FLAGS_READ_DECOMPRESSED (1 << 1) +#define AMDGPU_COPY_FLAGS_WRITE_COMPRESSED (1 << 2) +#define AMDGPU_COPY_FLAGS_MAX_COMPRESSED_SHIFT 3 +#define AMDGPU_COPY_FLAGS_MAX_COMPRESSED_MASK 0x03 +#define AMDGPU_COPY_FLAGS_NUMBER_TYPE_SHIFT 5 +#define AMDGPU_COPY_FLAGS_NUMBER_TYPE_MASK 0x07 +#define AMDGPU_COPY_FLAGS_DATA_FORMAT_SHIFT 8 +#define AMDGPU_COPY_FLAGS_DATA_FORMAT_MASK 0x3f + +#define AMDGPU_COPY_FLAGS_SET(field, value) \ + (((__u32)(value) & AMDGPU_COPY_FLAGS_##field##_MASK) << AMDGPU_COPY_FLAGS_##field##_SHIFT) +#define AMDGPU_COPY_FLAGS_GET(value, field) \ + (((__u32)(value) >> AMDGPU_COPY_FLAGS_##field##_SHIFT) & AMDGPU_COPY_FLAGS_##field##_MASK) + int amdgpu_gtt_mgr_init(struct amdgpu_device *adev, uint64_t gtt_size); void amdgpu_gtt_mgr_fini(struct amdgpu_device *adev); int amdgpu_preempt_mgr_init(struct amdgpu_device *adev); @@ -139,22 +152,27 @@ int amdgpu_vram_mgr_reserve_range(struct amdgpu_vram_mgr *mgr, int amdgpu_vram_mgr_query_page_status(struct amdgpu_vram_mgr *mgr, uint64_t start); +bool amdgpu_res_cpu_visible(struct amdgpu_device *adev, + struct ttm_resource *res); + int amdgpu_ttm_init(struct amdgpu_device *adev); void amdgpu_ttm_fini(struct amdgpu_device *adev); void amdgpu_ttm_set_buffer_funcs_status(struct amdgpu_device *adev, bool enable); - int amdgpu_copy_buffer(struct amdgpu_ring *ring, uint64_t src_offset, uint64_t dst_offset, uint32_t byte_count, struct dma_resv *resv, struct dma_fence **fence, bool direct_submit, - bool vm_needs_flush, bool tmz); + bool vm_needs_flush, uint32_t copy_flags); int amdgpu_ttm_copy_mem_to_mem(struct amdgpu_device *adev, const struct amdgpu_copy_mem *src, const struct amdgpu_copy_mem *dst, uint64_t size, bool tmz, struct dma_resv *resv, struct dma_fence **f); +int amdgpu_ttm_clear_buffer(struct amdgpu_bo *bo, + struct dma_resv *resv, + struct dma_fence **fence); int amdgpu_fill_buffer(struct amdgpu_bo *bo, uint32_t src_data, struct dma_resv *resv, diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_ucode.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_ucode.c index 0867fd9e15ba..4c7b53648a50 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_ucode.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_ucode.c @@ -28,6 +28,8 @@ #include "amdgpu.h" #include "amdgpu_ucode.h" +#define AMDGPU_UCODE_NAME_MAX (128) + static void amdgpu_ucode_print_common_hdr(const struct common_firmware_header *hdr) { DRM_DEBUG("size_bytes: %u\n", le32_to_cpu(hdr->size_bytes)); @@ -323,6 +325,12 @@ void amdgpu_ucode_print_sdma_hdr(const struct common_firmware_header *hdr) DRM_DEBUG("ctl_ucode_offset: %u\n", le32_to_cpu(sdma_hdr->ctl_ucode_offset)); DRM_DEBUG("ctl_jt_offset: %u\n", le32_to_cpu(sdma_hdr->ctl_jt_offset)); DRM_DEBUG("ctl_jt_size: %u\n", le32_to_cpu(sdma_hdr->ctl_jt_size)); + } else if (version_major == 3) { + const struct sdma_firmware_header_v3_0 *sdma_hdr = + container_of(hdr, struct sdma_firmware_header_v3_0, header); + + DRM_DEBUG("ucode_reversion: %u\n", + le32_to_cpu(sdma_hdr->ucode_feature_version)); } else { DRM_ERROR("Unknown SDMA ucode version: %u.%u\n", version_major, version_minor); @@ -682,6 +690,32 @@ const char *amdgpu_ucode_name(enum AMDGPU_UCODE_ID ucode_id) return "UMSCH_MM_CMD_BUFFER"; case AMDGPU_UCODE_ID_JPEG_RAM: return "JPEG"; + case AMDGPU_UCODE_ID_SDMA_RS64: + return "RS64_SDMA"; + case AMDGPU_UCODE_ID_CP_RS64_PFP: + return "RS64_PFP"; + case AMDGPU_UCODE_ID_CP_RS64_ME: + return "RS64_ME"; + case AMDGPU_UCODE_ID_CP_RS64_MEC: + return "RS64_MEC"; + case AMDGPU_UCODE_ID_CP_RS64_PFP_P0_STACK: + return "RS64_PFP_P0_STACK"; + case AMDGPU_UCODE_ID_CP_RS64_PFP_P1_STACK: + return "RS64_PFP_P1_STACK"; + case AMDGPU_UCODE_ID_CP_RS64_ME_P0_STACK: + return "RS64_ME_P0_STACK"; + case AMDGPU_UCODE_ID_CP_RS64_ME_P1_STACK: + return "RS64_ME_P1_STACK"; + case AMDGPU_UCODE_ID_CP_RS64_MEC_P0_STACK: + return "RS64_MEC_P0_STACK"; + case AMDGPU_UCODE_ID_CP_RS64_MEC_P1_STACK: + return "RS64_MEC_P1_STACK"; + case AMDGPU_UCODE_ID_CP_RS64_MEC_P2_STACK: + return "RS64_MEC_P2_STACK"; + case AMDGPU_UCODE_ID_CP_RS64_MEC_P3_STACK: + return "RS64_MEC_P3_STACK"; + case AMDGPU_UCODE_ID_ISP: + return "ISP"; default: return "UNKNOWN UCODE"; } @@ -791,6 +825,7 @@ static int amdgpu_ucode_init_single_fw(struct amdgpu_device *adev, const struct dmcub_firmware_header_v1_0 *dmcub_hdr = NULL; const struct mes_firmware_header_v1_0 *mes_hdr = NULL; const struct sdma_firmware_header_v2_0 *sdma_hdr = NULL; + const struct sdma_firmware_header_v3_0 *sdmav3_hdr = NULL; const struct imu_firmware_header_v1_0 *imu_hdr = NULL; const struct vpe_firmware_header_v1_0 *vpe_hdr = NULL; const struct umsch_mm_firmware_header_v1_0 *umsch_mm_hdr = NULL; @@ -812,6 +847,7 @@ static int amdgpu_ucode_init_single_fw(struct amdgpu_device *adev, dmcub_hdr = (const struct dmcub_firmware_header_v1_0 *)ucode->fw->data; mes_hdr = (const struct mes_firmware_header_v1_0 *)ucode->fw->data; sdma_hdr = (const struct sdma_firmware_header_v2_0 *)ucode->fw->data; + sdmav3_hdr = (const struct sdma_firmware_header_v3_0 *)ucode->fw->data; imu_hdr = (const struct imu_firmware_header_v1_0 *)ucode->fw->data; vpe_hdr = (const struct vpe_firmware_header_v1_0 *)ucode->fw->data; umsch_mm_hdr = (const struct umsch_mm_firmware_header_v1_0 *)ucode->fw->data; @@ -828,6 +864,11 @@ static int amdgpu_ucode_init_single_fw(struct amdgpu_device *adev, ucode_addr = (u8 *)ucode->fw->data + le32_to_cpu(sdma_hdr->ctl_ucode_offset); break; + case AMDGPU_UCODE_ID_SDMA_RS64: + ucode->ucode_size = le32_to_cpu(sdmav3_hdr->ucode_size_bytes); + ucode_addr = (u8 *)ucode->fw->data + + le32_to_cpu(sdmav3_hdr->header.ucode_array_offset_bytes); + break; case AMDGPU_UCODE_ID_CP_MEC1: case AMDGPU_UCODE_ID_CP_MEC2: ucode->ucode_size = le32_to_cpu(header->ucode_size_bytes) - @@ -1374,6 +1415,9 @@ void amdgpu_ucode_ip_version_decode(struct amdgpu_device *adev, int block_type, case VPE_HWIP: ip_name = "vpe"; break; + case ISP_HWIP: + ip_name = "isp"; + break; default: BUG(); } @@ -1390,28 +1434,40 @@ void amdgpu_ucode_ip_version_decode(struct amdgpu_device *adev, int block_type, * * @adev: amdgpu device * @fw: pointer to load firmware to - * @fw_name: firmware to load + * @fmt: firmware name format string + * @...: variable arguments * * This is a helper that will use request_firmware and amdgpu_ucode_validate * to load and run basic validation on firmware. If the load fails, remap * the error code to -ENODEV, so that early_init functions will fail to load. */ int amdgpu_ucode_request(struct amdgpu_device *adev, const struct firmware **fw, - const char *fw_name) + const char *fmt, ...) { - int err = request_firmware(fw, fw_name, adev->dev); + char fname[AMDGPU_UCODE_NAME_MAX]; + va_list ap; + int r; + + va_start(ap, fmt); + r = vsnprintf(fname, sizeof(fname), fmt, ap); + va_end(ap); + if (r == sizeof(fname)) { + dev_warn(adev->dev, "amdgpu firmware name buffer overflow\n"); + return -EOVERFLOW; + } - if (err) + r = request_firmware(fw, fname, adev->dev); + if (r) return -ENODEV; - err = amdgpu_ucode_validate(*fw); - if (err) { - dev_dbg(adev->dev, "\"%s\" failed to validate\n", fw_name); + r = amdgpu_ucode_validate(*fw); + if (r) { + dev_dbg(adev->dev, "\"%s\" failed to validate\n", fname); release_firmware(*fw); *fw = NULL; } - return err; + return r; } /* diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_ucode.h b/drivers/gpu/drm/amd/amdgpu/amdgpu_ucode.h index 619445760037..5bc37acd3981 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_ucode.h +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_ucode.h @@ -125,6 +125,7 @@ enum psp_fw_type { PSP_FW_TYPE_PSP_INTF_DRV, PSP_FW_TYPE_PSP_DBG_DRV, PSP_FW_TYPE_PSP_RAS_DRV, + PSP_FW_TYPE_PSP_IPKEYMGR_DRV, PSP_FW_TYPE_MAX_INDEX, }; @@ -345,6 +346,14 @@ struct umsch_mm_firmware_header_v1_0 { uint32_t umsch_mm_data_start_addr_hi; }; +/* version_major=3, version_minor=0 */ +struct sdma_firmware_header_v3_0 { + struct common_firmware_header header; + uint32_t ucode_feature_version; + uint32_t ucode_offset_bytes; + uint32_t ucode_size_bytes; +}; + /* gpu info payload */ struct gpu_info_firmware_v1_0 { uint32_t gc_num_se; @@ -430,6 +439,7 @@ union amdgpu_firmware_header { struct sdma_firmware_header_v1_0 sdma; struct sdma_firmware_header_v1_1 sdma_v1_1; struct sdma_firmware_header_v2_0 sdma_v2_0; + struct sdma_firmware_header_v3_0 sdma_v3_0; struct gpu_info_firmware_header_v1_0 gpu_info; struct dmcu_firmware_header_v1_0 dmcu; struct dmcub_firmware_header_v1_0 dmcub; @@ -454,6 +464,7 @@ enum AMDGPU_UCODE_ID { AMDGPU_UCODE_ID_SDMA7, AMDGPU_UCODE_ID_SDMA_UCODE_TH0, AMDGPU_UCODE_ID_SDMA_UCODE_TH1, + AMDGPU_UCODE_ID_SDMA_RS64, AMDGPU_UCODE_ID_CP_CE, AMDGPU_UCODE_ID_CP_PFP, AMDGPU_UCODE_ID_CP_ME, @@ -512,6 +523,7 @@ enum AMDGPU_UCODE_ID { AMDGPU_UCODE_ID_UMSCH_MM_CMD_BUFFER, AMDGPU_UCODE_ID_P2S_TABLE, AMDGPU_UCODE_ID_JPEG_RAM, + AMDGPU_UCODE_ID_ISP, AMDGPU_UCODE_ID_MAXIMUM, }; @@ -582,8 +594,9 @@ void amdgpu_ucode_print_rlc_hdr(const struct common_firmware_header *hdr); void amdgpu_ucode_print_sdma_hdr(const struct common_firmware_header *hdr); void amdgpu_ucode_print_psp_hdr(const struct common_firmware_header *hdr); void amdgpu_ucode_print_gpu_info_hdr(const struct common_firmware_header *hdr); +__printf(3, 4) int amdgpu_ucode_request(struct amdgpu_device *adev, const struct firmware **fw, - const char *fw_name); + const char *fmt, ...); void amdgpu_ucode_release(const struct firmware **fw); bool amdgpu_ucode_hdr_version(union amdgpu_firmware_header *hdr, uint16_t hdr_major, uint16_t hdr_minor); diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_umc.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_umc.c index 20436f81856a..2f84bdb8c594 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_umc.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_umc.c @@ -21,10 +21,13 @@ * */ +#include <linux/sort.h> #include "amdgpu.h" #include "umc_v6_7.h" #define MAX_UMC_POISON_POLLING_TIME_SYNC 20 //ms +#define MAX_UMC_HASH_STRING_SIZE 256 + static int amdgpu_umc_convert_error_address(struct amdgpu_device *adev, struct ras_err_data *err_data, uint64_t err_addr, uint32_t ch_inst, uint32_t umc_inst) @@ -63,6 +66,8 @@ int amdgpu_umc_page_retirement_mca(struct amdgpu_device *adev, goto out_fini_err_data; } + err_data.err_addr_len = adev->umc.max_ras_err_cnt_per_query; + /* * Translate UMC channel address to Physical address */ @@ -86,7 +91,7 @@ out_fini_err_data: return ret; } -static void amdgpu_umc_handle_bad_pages(struct amdgpu_device *adev, +void amdgpu_umc_handle_bad_pages(struct amdgpu_device *adev, void *ras_error_status) { struct ras_err_data *err_data = (struct ras_err_data *)ras_error_status; @@ -118,6 +123,8 @@ static void amdgpu_umc_handle_bad_pages(struct amdgpu_device *adev, if(!err_data->err_addr) dev_warn(adev->dev, "Failed to alloc memory for " "umc error address record!\n"); + else + err_data->err_addr_len = adev->umc.max_ras_err_cnt_per_query; /* umc query_ras_error_address is also responsible for clearing * error status @@ -143,6 +150,8 @@ static void amdgpu_umc_handle_bad_pages(struct amdgpu_device *adev, if(!err_data->err_addr) dev_warn(adev->dev, "Failed to alloc memory for " "umc error address record!\n"); + else + err_data->err_addr_len = adev->umc.max_ras_err_cnt_per_query; /* umc query_ras_error_address is also responsible for clearing * error status @@ -170,6 +179,7 @@ static void amdgpu_umc_handle_bad_pages(struct amdgpu_device *adev, } kfree(err_data->err_addr); + err_data->err_addr = NULL; mutex_unlock(&con->page_retirement_lock); } @@ -177,7 +187,7 @@ static void amdgpu_umc_handle_bad_pages(struct amdgpu_device *adev, static int amdgpu_umc_do_page_retirement(struct amdgpu_device *adev, void *ras_error_status, struct amdgpu_iv_entry *entry, - bool reset) + uint32_t reset) { struct ras_err_data *err_data = (struct ras_err_data *)ras_error_status; struct amdgpu_ras *con = amdgpu_ras_get_context(adev); @@ -185,10 +195,9 @@ static int amdgpu_umc_do_page_retirement(struct amdgpu_device *adev, kgd2kfd_set_sram_ecc_flag(adev->kfd.dev); amdgpu_umc_handle_bad_pages(adev, ras_error_status); - if (err_data->ue_count && reset) { - /* use mode-2 reset for poison consumption */ - if (!entry) - con->gpu_reset_flags |= AMDGPU_RAS_GPU_RESET_MODE2_RESET; + if ((err_data->ue_count || err_data->de_count) && + (reset || (con && con->is_rma))) { + con->gpu_reset_flags |= reset; amdgpu_ras_reset_gpu(adev); } @@ -196,13 +205,14 @@ static int amdgpu_umc_do_page_retirement(struct amdgpu_device *adev, } int amdgpu_umc_bad_page_polling_timeout(struct amdgpu_device *adev, - bool reset, uint32_t timeout_ms) + uint32_t reset, uint32_t timeout_ms) { struct ras_err_data err_data; struct ras_common_if head = { .block = AMDGPU_RAS_BLOCK__UMC, }; struct ras_manager *obj = amdgpu_ras_find_obj(adev, &head); + struct amdgpu_ras *con = amdgpu_ras_get_context(adev); uint32_t timeout = timeout_ms; memset(&err_data, 0, sizeof(err_data)); @@ -235,19 +245,17 @@ int amdgpu_umc_bad_page_polling_timeout(struct amdgpu_device *adev, kgd2kfd_set_sram_ecc_flag(adev->kfd.dev); - if (reset) { - struct amdgpu_ras *con = amdgpu_ras_get_context(adev); - - /* use mode-2 reset for poison consumption */ - con->gpu_reset_flags |= AMDGPU_RAS_GPU_RESET_MODE2_RESET; + if (reset || (err_data.err_addr_cnt && con && con->is_rma)) { + con->gpu_reset_flags |= reset; amdgpu_ras_reset_gpu(adev); } return 0; } -int amdgpu_umc_poison_handler(struct amdgpu_device *adev, - enum amdgpu_ras_block block, bool reset) +int amdgpu_umc_pasid_poison_handler(struct amdgpu_device *adev, + enum amdgpu_ras_block block, uint16_t pasid, + pasid_notify pasid_fn, void *data, uint32_t reset) { int ret = AMDGPU_RAS_SUCCESS; @@ -285,14 +293,13 @@ int amdgpu_umc_poison_handler(struct amdgpu_device *adev, amdgpu_ras_error_data_fini(&err_data); } else { - if (reset) { - amdgpu_umc_bad_page_polling_timeout(adev, - reset, MAX_UMC_POISON_POLLING_TIME_SYNC); - } else { - struct amdgpu_ras *con = amdgpu_ras_get_context(adev); + struct amdgpu_ras *con = amdgpu_ras_get_context(adev); + int ret; + ret = amdgpu_ras_put_poison_req(adev, + block, pasid, pasid_fn, data, reset); + if (!ret) { atomic_inc(&con->page_retirement_req_cnt); - wake_up(&con->page_retirement_wq); } } @@ -307,11 +314,19 @@ int amdgpu_umc_poison_handler(struct amdgpu_device *adev, return ret; } +int amdgpu_umc_poison_handler(struct amdgpu_device *adev, + enum amdgpu_ras_block block, uint32_t reset) +{ + return amdgpu_umc_pasid_poison_handler(adev, + block, 0, NULL, NULL, reset); +} + int amdgpu_umc_process_ras_data_cb(struct amdgpu_device *adev, void *ras_error_status, struct amdgpu_iv_entry *entry) { - return amdgpu_umc_do_page_retirement(adev, ras_error_status, entry, true); + return amdgpu_umc_do_page_retirement(adev, ras_error_status, entry, + AMDGPU_RAS_GPU_RESET_MODE1_RESET); } int amdgpu_umc_ras_sw_init(struct amdgpu_device *adev) @@ -388,14 +403,20 @@ int amdgpu_umc_process_ecc_irq(struct amdgpu_device *adev, return 0; } -void amdgpu_umc_fill_error_record(struct ras_err_data *err_data, +int amdgpu_umc_fill_error_record(struct ras_err_data *err_data, uint64_t err_addr, uint64_t retired_page, uint32_t channel_index, uint32_t umc_inst) { - struct eeprom_table_record *err_rec = - &err_data->err_addr[err_data->err_addr_cnt]; + struct eeprom_table_record *err_rec; + + if (!err_data || + !err_data->err_addr || + (err_data->err_addr_cnt >= err_data->err_addr_len)) + return -EINVAL; + + err_rec = &err_data->err_addr[err_data->err_addr_cnt]; err_rec->address = err_addr; /* page frame address is saved */ @@ -407,6 +428,8 @@ void amdgpu_umc_fill_error_record(struct ras_err_data *err_data, err_rec->mcumc_id = umc_inst; err_data->err_addr_cnt++; + + return 0; } int amdgpu_umc_loop_channels(struct amdgpu_device *adev, @@ -439,3 +462,76 @@ int amdgpu_umc_loop_channels(struct amdgpu_device *adev, return 0; } + +int amdgpu_umc_update_ecc_status(struct amdgpu_device *adev, + uint64_t status, uint64_t ipid, uint64_t addr) +{ + if (adev->umc.ras->update_ecc_status) + return adev->umc.ras->update_ecc_status(adev, + status, ipid, addr); + return 0; +} + +static int amdgpu_umc_uint64_cmp(const void *a, const void *b) +{ + uint64_t *addr_a = (uint64_t *)a; + uint64_t *addr_b = (uint64_t *)b; + + if (*addr_a > *addr_b) + return 1; + else if (*addr_a < *addr_b) + return -1; + else + return 0; +} + +/* Use string hash to avoid logging the same bad pages repeatedly */ +int amdgpu_umc_build_pages_hash(struct amdgpu_device *adev, + uint64_t *pfns, int len, uint64_t *val) +{ + struct amdgpu_ras *con = amdgpu_ras_get_context(adev); + char buf[MAX_UMC_HASH_STRING_SIZE] = {0}; + int offset = 0, i = 0; + uint64_t hash_val; + + if (!pfns || !len) + return -EINVAL; + + sort(pfns, len, sizeof(uint64_t), amdgpu_umc_uint64_cmp, NULL); + + for (i = 0; i < len; i++) + offset += snprintf(&buf[offset], sizeof(buf) - offset, "%llx", pfns[i]); + + hash_val = siphash(buf, offset, &con->umc_ecc_log.ecc_key); + + *val = hash_val; + + return 0; +} + +int amdgpu_umc_logs_ecc_err(struct amdgpu_device *adev, + struct radix_tree_root *ecc_tree, struct ras_ecc_err *ecc_err) +{ + struct amdgpu_ras *con = amdgpu_ras_get_context(adev); + struct ras_ecc_log_info *ecc_log; + int ret; + + ecc_log = &con->umc_ecc_log; + + mutex_lock(&ecc_log->lock); + ret = radix_tree_insert(ecc_tree, ecc_err->hash_index, ecc_err); + if (!ret) { + struct ras_err_pages *err_pages = &ecc_err->err_pages; + int i; + + /* Reserve memory */ + for (i = 0; i < err_pages->count; i++) + amdgpu_ras_reserve_page(adev, err_pages->pfn[i]); + + radix_tree_tag_set(ecc_tree, + ecc_err->hash_index, UMC_ECC_NEW_DETECTED_TAG); + } + mutex_unlock(&ecc_log->lock); + + return ret; +} diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_umc.h b/drivers/gpu/drm/amd/amdgpu/amdgpu_umc.h index 26d2ae498daf..5f50c69c3cec 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_umc.h +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_umc.h @@ -52,6 +52,8 @@ #define LOOP_UMC_EACH_NODE_INST_AND_CH(node_inst, umc_inst, ch_inst) \ LOOP_UMC_NODE_INST((node_inst)) LOOP_UMC_INST_AND_CH((umc_inst), (ch_inst)) +/* Page retirement tag */ +#define UMC_ECC_NEW_DETECTED_TAG 0x1 typedef int (*umc_func)(struct amdgpu_device *adev, uint32_t node_inst, uint32_t umc_inst, uint32_t ch_inst, void *data); @@ -66,8 +68,8 @@ struct amdgpu_umc_ras { void *ras_error_status); bool (*check_ecc_err_status)(struct amdgpu_device *adev, enum amdgpu_mca_error_type type, void *ras_error_status); - /* support different eeprom table version for different asic */ - void (*set_eeprom_table_version)(struct amdgpu_ras_eeprom_table_header *hdr); + int (*update_ecc_status)(struct amdgpu_device *adev, + uint64_t status, uint64_t ipid, uint64_t addr); }; struct amdgpu_umc_funcs { @@ -103,11 +105,14 @@ struct amdgpu_umc { int amdgpu_umc_ras_sw_init(struct amdgpu_device *adev); int amdgpu_umc_ras_late_init(struct amdgpu_device *adev, struct ras_common_if *ras_block); int amdgpu_umc_poison_handler(struct amdgpu_device *adev, - enum amdgpu_ras_block block, bool reset); + enum amdgpu_ras_block block, uint32_t reset); +int amdgpu_umc_pasid_poison_handler(struct amdgpu_device *adev, + enum amdgpu_ras_block block, uint16_t pasid, + pasid_notify pasid_fn, void *data, uint32_t reset); int amdgpu_umc_process_ecc_irq(struct amdgpu_device *adev, struct amdgpu_irq_src *source, struct amdgpu_iv_entry *entry); -void amdgpu_umc_fill_error_record(struct ras_err_data *err_data, +int amdgpu_umc_fill_error_record(struct ras_err_data *err_data, uint64_t err_addr, uint64_t retired_page, uint32_t channel_index, @@ -123,5 +128,15 @@ int amdgpu_umc_loop_channels(struct amdgpu_device *adev, umc_func func, void *data); int amdgpu_umc_bad_page_polling_timeout(struct amdgpu_device *adev, - bool reset, uint32_t timeout_ms); + uint32_t reset, uint32_t timeout_ms); + +int amdgpu_umc_update_ecc_status(struct amdgpu_device *adev, + uint64_t status, uint64_t ipid, uint64_t addr); +int amdgpu_umc_build_pages_hash(struct amdgpu_device *adev, + uint64_t *pfns, int len, uint64_t *val); +int amdgpu_umc_logs_ecc_err(struct amdgpu_device *adev, + struct radix_tree_root *ecc_tree, struct ras_ecc_err *ecc_err); + +void amdgpu_umc_handle_bad_pages(struct amdgpu_device *adev, + void *ras_error_status); #endif diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_umsch_mm.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_umsch_mm.c index ab820cf52668..fbc2852278e1 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_umsch_mm.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_umsch_mm.c @@ -23,7 +23,10 @@ */ #include <linux/firmware.h> +#include <linux/module.h> +#include <linux/debugfs.h> #include <drm/drm_exec.h> +#include <drm/drm_drv.h> #include "amdgpu.h" #include "amdgpu_umsch_mm.h" @@ -189,10 +192,13 @@ static void setup_vpe_queue(struct amdgpu_device *adev, mqd->rptr_val = 0; mqd->unmapped = 1; + if (adev->vpe.collaborate_mode) + memcpy(++mqd, test->mqd_data_cpu_addr, sizeof(struct MQD_INFO)); + qinfo->mqd_addr = test->mqd_data_gpu_addr; qinfo->csa_addr = test->ctx_data_gpu_addr + offsetof(struct umsch_mm_test_ctx_data, vpe_ctx_csa); - qinfo->doorbell_offset_0 = (adev->doorbell_index.vpe_ring + 1) << 1; + qinfo->doorbell_offset_0 = 0; qinfo->doorbell_offset_1 = 0; } @@ -287,7 +293,10 @@ static int submit_vpe_queue(struct amdgpu_device *adev, struct umsch_mm_test *te ring[5] = 0; mqd->wptr_val = (6 << 2); - // WDOORBELL32(adev->umsch_mm.agdb_index[CONTEXT_PRIORITY_LEVEL_NORMAL], mqd->wptr_val); + if (adev->vpe.collaborate_mode) + (++mqd)->wptr_val = (6 << 2); + + WDOORBELL32(adev->umsch_mm.agdb_index[CONTEXT_PRIORITY_LEVEL_NORMAL], mqd->wptr_val); for (i = 0; i < adev->usec_timeout; i++) { if (*fence == test_pattern) @@ -571,6 +580,7 @@ int amdgpu_umsch_mm_init_microcode(struct amdgpu_umsch_mm *umsch) switch (amdgpu_ip_version(adev, VCN_HWIP, 0)) { case IP_VERSION(4, 0, 5): + case IP_VERSION(4, 0, 6): fw_name = "amdgpu/umsch_mm_4_0_0.bin"; break; default: @@ -736,6 +746,17 @@ static int umsch_mm_init(struct amdgpu_device *adev) return r; } + r = amdgpu_bo_create_kernel(adev, AMDGPU_UMSCHFW_LOG_SIZE, PAGE_SIZE, + AMDGPU_GEM_DOMAIN_VRAM | + AMDGPU_GEM_DOMAIN_GTT, + &adev->umsch_mm.dbglog_bo, + &adev->umsch_mm.log_gpu_addr, + &adev->umsch_mm.log_cpu_addr); + if (r) { + dev_err(adev->dev, "(%d) failed to allocate umsch debug bo\n", r); + return r; + } + mutex_init(&adev->umsch_mm.mutex_hidden); umsch_mm_agdb_index_init(adev); @@ -750,6 +771,7 @@ static int umsch_mm_early_init(void *handle) switch (amdgpu_ip_version(adev, VCN_HWIP, 0)) { case IP_VERSION(4, 0, 5): + case IP_VERSION(4, 0, 6): umsch_mm_v4_0_set_funcs(&adev->umsch_mm); break; default: @@ -766,6 +788,9 @@ static int umsch_mm_late_init(void *handle) { struct amdgpu_device *adev = (struct amdgpu_device *)handle; + if (amdgpu_in_reset(adev) || adev->in_s0ix || adev->in_suspend) + return 0; + return umsch_mm_test(adev); } @@ -778,6 +803,7 @@ static int umsch_mm_sw_init(void *handle) if (r) return r; + amdgpu_umsch_fwlog_init(&adev->umsch_mm); r = umsch_mm_ring_init(&adev->umsch_mm); if (r) return r; @@ -804,6 +830,10 @@ static int umsch_mm_sw_fini(void *handle) &adev->umsch_mm.cmd_buf_gpu_addr, (void **)&adev->umsch_mm.cmd_buf_ptr); + amdgpu_bo_free_kernel(&adev->umsch_mm.dbglog_bo, + &adev->umsch_mm.log_gpu_addr, + (void **)&adev->umsch_mm.log_cpu_addr); + amdgpu_device_wb_free(adev, adev->umsch_mm.wb_index); return 0; @@ -857,6 +887,106 @@ static int umsch_mm_resume(void *handle) return umsch_mm_hw_init(adev); } +void amdgpu_umsch_fwlog_init(struct amdgpu_umsch_mm *umsch_mm) +{ +#if defined(CONFIG_DEBUG_FS) + void *fw_log_cpu_addr = umsch_mm->log_cpu_addr; + volatile struct amdgpu_umsch_fwlog *log_buf = fw_log_cpu_addr; + + log_buf->header_size = sizeof(struct amdgpu_umsch_fwlog); + log_buf->buffer_size = AMDGPU_UMSCHFW_LOG_SIZE; + log_buf->rptr = log_buf->header_size; + log_buf->wptr = log_buf->header_size; + log_buf->wrapped = 0; +#endif +} + +/* + * debugfs for mapping umsch firmware log buffer. + */ +#if defined(CONFIG_DEBUG_FS) +static ssize_t amdgpu_debugfs_umsch_fwlog_read(struct file *f, char __user *buf, + size_t size, loff_t *pos) +{ + struct amdgpu_umsch_mm *umsch_mm; + void *log_buf; + volatile struct amdgpu_umsch_fwlog *plog; + unsigned int read_pos, write_pos, available, i, read_bytes = 0; + unsigned int read_num[2] = {0}; + + umsch_mm = file_inode(f)->i_private; + if (!umsch_mm) + return -ENODEV; + + if (!umsch_mm->log_cpu_addr) + return -EFAULT; + + log_buf = umsch_mm->log_cpu_addr; + + plog = (volatile struct amdgpu_umsch_fwlog *)log_buf; + read_pos = plog->rptr; + write_pos = plog->wptr; + + if (read_pos > AMDGPU_UMSCHFW_LOG_SIZE || write_pos > AMDGPU_UMSCHFW_LOG_SIZE) + return -EFAULT; + + if (!size || (read_pos == write_pos)) + return 0; + + if (write_pos > read_pos) { + available = write_pos - read_pos; + read_num[0] = min_t(size_t, size, available); + } else { + read_num[0] = AMDGPU_UMSCHFW_LOG_SIZE - read_pos; + available = read_num[0] + write_pos - plog->header_size; + if (size > available) + read_num[1] = write_pos - plog->header_size; + else if (size > read_num[0]) + read_num[1] = size - read_num[0]; + else + read_num[0] = size; + } + + for (i = 0; i < 2; i++) { + if (read_num[i]) { + if (read_pos == AMDGPU_UMSCHFW_LOG_SIZE) + read_pos = plog->header_size; + if (read_num[i] == copy_to_user((buf + read_bytes), + (log_buf + read_pos), read_num[i])) + return -EFAULT; + + read_bytes += read_num[i]; + read_pos += read_num[i]; + } + } + + plog->rptr = read_pos; + *pos += read_bytes; + return read_bytes; +} + +static const struct file_operations amdgpu_debugfs_umschfwlog_fops = { + .owner = THIS_MODULE, + .read = amdgpu_debugfs_umsch_fwlog_read, + .llseek = default_llseek +}; +#endif + +void amdgpu_debugfs_umsch_fwlog_init(struct amdgpu_device *adev, + struct amdgpu_umsch_mm *umsch_mm) +{ +#if defined(CONFIG_DEBUG_FS) + struct drm_minor *minor = adev_to_drm(adev)->primary; + struct dentry *root = minor->debugfs_root; + char name[32]; + + sprintf(name, "amdgpu_umsch_fwlog"); + debugfs_create_file_size(name, S_IFREG | 0444, root, umsch_mm, + &amdgpu_debugfs_umschfwlog_fops, + AMDGPU_UMSCHFW_LOG_SIZE); +#endif +} + static const struct amd_ip_funcs umsch_mm_v4_0_ip_funcs = { .name = "umsch_mm_v4_0", .early_init = umsch_mm_early_init, @@ -867,6 +997,8 @@ static const struct amd_ip_funcs umsch_mm_v4_0_ip_funcs = { .hw_fini = umsch_mm_hw_fini, .suspend = umsch_mm_suspend, .resume = umsch_mm_resume, + .dump_ip_state = NULL, + .print_ip_state = NULL, }; const struct amdgpu_ip_block_version umsch_mm_v4_0_ip_block = { diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_umsch_mm.h b/drivers/gpu/drm/amd/amdgpu/amdgpu_umsch_mm.h index 8258a43a6236..2c771a753778 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_umsch_mm.h +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_umsch_mm.h @@ -33,13 +33,6 @@ enum UMSCH_SWIP_ENGINE_TYPE { UMSCH_SWIP_ENGINE_TYPE_MAX }; -enum UMSCH_SWIP_AFFINITY_TYPE { - UMSCH_SWIP_AFFINITY_TYPE_ANY = 0, - UMSCH_SWIP_AFFINITY_TYPE_VCN0 = 1, - UMSCH_SWIP_AFFINITY_TYPE_VCN1 = 2, - UMSCH_SWIP_AFFINITY_TYPE_MAX -}; - enum UMSCH_CONTEXT_PRIORITY_LEVEL { CONTEXT_PRIORITY_LEVEL_IDLE = 0, CONTEXT_PRIORITY_LEVEL_NORMAL = 1, @@ -51,18 +44,28 @@ enum UMSCH_CONTEXT_PRIORITY_LEVEL { struct umsch_mm_set_resource_input { uint32_t vmid_mask_mm_vcn; uint32_t vmid_mask_mm_vpe; + uint32_t collaboration_mask_vpe; uint32_t logging_vmid; uint32_t engine_mask; union { struct { uint32_t disable_reset : 1; uint32_t disable_umsch_mm_log : 1; - uint32_t reserved : 30; + uint32_t use_rs64mem_for_proc_ctx_csa : 1; + uint32_t reserved : 29; }; uint32_t uint32_all; }; }; +struct amdgpu_umsch_fwlog { + uint32_t rptr; + uint32_t wptr; + uint32_t buffer_size; + uint32_t header_size; + uint32_t wrapped; +}; + struct umsch_mm_add_queue_input { uint32_t process_id; uint64_t page_table_base_addr; @@ -78,15 +81,18 @@ struct umsch_mm_add_queue_input { uint32_t doorbell_offset_1; enum UMSCH_SWIP_ENGINE_TYPE engine_type; uint32_t affinity; - enum UMSCH_SWIP_AFFINITY_TYPE affinity_type; uint64_t mqd_addr; uint64_t h_context; uint64_t h_queue; uint32_t vm_context_cntl; + uint32_t process_csa_array_index; + uint32_t context_csa_array_index; + struct { uint32_t is_context_suspended : 1; - uint32_t reserved : 31; + uint32_t collaboration_mode : 1; + uint32_t reserved : 30; }; }; @@ -94,6 +100,7 @@ struct umsch_mm_remove_queue_input { uint32_t doorbell_offset_0; uint32_t doorbell_offset_1; uint64_t context_csa_addr; + uint32_t context_csa_array_index; }; struct MQD_INFO { @@ -103,6 +110,7 @@ struct MQD_INFO { uint32_t wptr_val; uint32_t rptr_val; uint32_t unmapped; + uint32_t vmid; }; struct amdgpu_umsch_mm; @@ -166,6 +174,11 @@ struct amdgpu_umsch_mm { uint32_t agdb_index[CONTEXT_PRIORITY_NUM_LEVELS]; struct mutex mutex_hidden; + struct amdgpu_bo *dbglog_bo; + void *log_cpu_addr; + uint64_t log_gpu_addr; + uint32_t mem_size; + uint32_t log_offset; }; int amdgpu_umsch_mm_submit_pkt(struct amdgpu_umsch_mm *umsch, void *pkt, int ndws); @@ -179,6 +192,11 @@ int amdgpu_umsch_mm_psp_execute_cmd_buf(struct amdgpu_umsch_mm *umsch); int amdgpu_umsch_mm_ring_init(struct amdgpu_umsch_mm *umsch); +void amdgpu_debugfs_umsch_fwlog_init(struct amdgpu_device *adev, + struct amdgpu_umsch_mm *umsch); + +void amdgpu_umsch_fwlog_init(struct amdgpu_umsch_mm *umsch_mm); + #define WREG32_SOC15_UMSCH(reg, value) \ do { \ uint32_t reg_offset = adev->reg_offset[VCN_HWIP][0][reg##_BASE_IDX] + reg; \ diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_vce.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_vce.c index 59acf424a078..968ca2c84ef7 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_vce.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_vce.c @@ -743,7 +743,8 @@ int amdgpu_vce_ring_parse_cs(struct amdgpu_cs_parser *p, uint32_t created = 0; uint32_t allocated = 0; uint32_t tmp, handle = 0; - uint32_t *size = &tmp; + uint32_t dummy = 0xffffffff; + uint32_t *size = &dummy; unsigned int idx; int i, r = 0; diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_vcn.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_vcn.c index 9c514a606a2f..43f44cc201cb 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_vcn.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_vcn.c @@ -93,19 +93,15 @@ static void amdgpu_vcn_idle_work_handler(struct work_struct *work); int amdgpu_vcn_early_init(struct amdgpu_device *adev) { - char ucode_prefix[30]; - char fw_name[40]; + char ucode_prefix[25]; int r, i; + amdgpu_ucode_ip_version_decode(adev, UVD_HWIP, ucode_prefix, sizeof(ucode_prefix)); for (i = 0; i < adev->vcn.num_vcn_inst; i++) { - amdgpu_ucode_ip_version_decode(adev, UVD_HWIP, ucode_prefix, sizeof(ucode_prefix)); - snprintf(fw_name, sizeof(fw_name), "amdgpu/%s.bin", ucode_prefix); - if (amdgpu_ip_version(adev, UVD_HWIP, 0) == IP_VERSION(4, 0, 6) && - i == 1) { - snprintf(fw_name, sizeof(fw_name), "amdgpu/%s_%d.bin", ucode_prefix, i); - } - - r = amdgpu_ucode_request(adev, &adev->vcn.fw[i], fw_name); + if (i == 1 && amdgpu_ip_version(adev, UVD_HWIP, 0) == IP_VERSION(4, 0, 6)) + r = amdgpu_ucode_request(adev, &adev->vcn.fw[i], "amdgpu/%s_%d.bin", ucode_prefix, i); + else + r = amdgpu_ucode_request(adev, &adev->vcn.fw[i], "amdgpu/%s.bin", ucode_prefix); if (r) { amdgpu_ucode_release(&adev->vcn.fw[i]); return r; @@ -151,6 +147,10 @@ int amdgpu_vcn_sw_init(struct amdgpu_device *adev) } } + /* from vcn4 and above, only unified queue is used */ + adev->vcn.using_unified_queue = + amdgpu_ip_version(adev, UVD_HWIP, 0) >= IP_VERSION(4, 0, 0); + hdr = (const struct common_firmware_header *)adev->vcn.fw[0]->data; adev->vcn.fw_version = le32_to_cpu(hdr->ucode_version); @@ -185,7 +185,10 @@ int amdgpu_vcn_sw_init(struct amdgpu_device *adev) if (adev->firmware.load_type != AMDGPU_FW_LOAD_PSP) bo_size += AMDGPU_GPU_PAGE_ALIGN(le32_to_cpu(hdr->ucode_size_bytes) + 8); - if (amdgpu_ip_version(adev, UVD_HWIP, 0) >= IP_VERSION(4, 0, 0)) { + if (amdgpu_ip_version(adev, UVD_HWIP, 0) >= IP_VERSION(5, 0, 0)) { + fw_shared_size = AMDGPU_GPU_PAGE_ALIGN(sizeof(struct amdgpu_vcn5_fw_shared)); + log_offset = offsetof(struct amdgpu_vcn5_fw_shared, fw_log); + } else if (amdgpu_ip_version(adev, UVD_HWIP, 0) >= IP_VERSION(4, 0, 0)) { fw_shared_size = AMDGPU_GPU_PAGE_ALIGN(sizeof(struct amdgpu_vcn4_fw_shared)); log_offset = offsetof(struct amdgpu_vcn4_fw_shared, fw_log); } else { @@ -276,18 +279,6 @@ int amdgpu_vcn_sw_fini(struct amdgpu_device *adev) return 0; } -/* from vcn4 and above, only unified queue is used */ -static bool amdgpu_vcn_using_unified_queue(struct amdgpu_ring *ring) -{ - struct amdgpu_device *adev = ring->adev; - bool ret = false; - - if (amdgpu_ip_version(adev, UVD_HWIP, 0) >= IP_VERSION(4, 0, 0)) - ret = true; - - return ret; -} - bool amdgpu_vcn_is_disabled_vcn(struct amdgpu_device *adev, enum vcn_ring_type type, uint32_t vcn_instance) { bool ret = false; @@ -398,7 +389,9 @@ static void amdgpu_vcn_idle_work_handler(struct work_struct *work) for (i = 0; i < adev->vcn.num_enc_rings; ++i) fence[j] += amdgpu_fence_count_emitted(&adev->vcn.inst[j].ring_enc[i]); - if (adev->pg_flags & AMD_PG_SUPPORT_VCN_DPG) { + /* Only set DPG pause for VCN3 or below, VCN4 and above will be handled by FW */ + if (adev->pg_flags & AMD_PG_SUPPORT_VCN_DPG && + !adev->vcn.using_unified_queue) { struct dpg_pause_state new_state; if (fence[j] || @@ -444,7 +437,9 @@ void amdgpu_vcn_ring_begin_use(struct amdgpu_ring *ring) amdgpu_device_ip_set_powergating_state(adev, AMD_IP_BLOCK_TYPE_VCN, AMD_PG_STATE_UNGATE); - if (adev->pg_flags & AMD_PG_SUPPORT_VCN_DPG) { + /* Only set DPG pause for VCN3 or below, VCN4 and above will be handled by FW */ + if (adev->pg_flags & AMD_PG_SUPPORT_VCN_DPG && + !adev->vcn.using_unified_queue) { struct dpg_pause_state new_state; if (ring->funcs->type == AMDGPU_RING_TYPE_VCN_ENC) { @@ -470,8 +465,12 @@ void amdgpu_vcn_ring_begin_use(struct amdgpu_ring *ring) void amdgpu_vcn_ring_end_use(struct amdgpu_ring *ring) { + struct amdgpu_device *adev = ring->adev; + + /* Only set DPG pause for VCN3 or below, VCN4 and above will be handled by FW */ if (ring->adev->pg_flags & AMD_PG_SUPPORT_VCN_DPG && - ring->funcs->type == AMDGPU_RING_TYPE_VCN_ENC) + ring->funcs->type == AMDGPU_RING_TYPE_VCN_ENC && + !adev->vcn.using_unified_queue) atomic_dec(&ring->adev->vcn.inst[ring->me].dpg_enc_submission_cnt); atomic_dec(&ring->adev->vcn.total_submission_cnt); @@ -725,12 +724,11 @@ static int amdgpu_vcn_dec_sw_send_msg(struct amdgpu_ring *ring, struct amdgpu_job *job; struct amdgpu_ib *ib; uint64_t addr = AMDGPU_GPU_PAGE_ALIGN(ib_msg->gpu_addr); - bool sq = amdgpu_vcn_using_unified_queue(ring); uint32_t *ib_checksum; uint32_t ib_pack_in_dw; int i, r; - if (sq) + if (adev->vcn.using_unified_queue) ib_size_dw += 8; r = amdgpu_job_alloc_with_ib(ring->adev, NULL, NULL, @@ -743,7 +741,7 @@ static int amdgpu_vcn_dec_sw_send_msg(struct amdgpu_ring *ring, ib->length_dw = 0; /* single queue headers */ - if (sq) { + if (adev->vcn.using_unified_queue) { ib_pack_in_dw = sizeof(struct amdgpu_vcn_decode_buffer) / sizeof(uint32_t) + 4 + 2; /* engine info + decoding ib in dw */ ib_checksum = amdgpu_vcn_unified_ring_ib_header(ib, ib_pack_in_dw, false); @@ -762,7 +760,7 @@ static int amdgpu_vcn_dec_sw_send_msg(struct amdgpu_ring *ring, for (i = ib->length_dw; i < ib_size_dw; ++i) ib->ptr[i] = 0x0; - if (sq) + if (adev->vcn.using_unified_queue) amdgpu_vcn_unified_ring_ib_checksum(&ib_checksum, ib_pack_in_dw); r = amdgpu_job_submit_direct(job, ring, &f); @@ -852,15 +850,15 @@ static int amdgpu_vcn_enc_get_create_msg(struct amdgpu_ring *ring, uint32_t hand struct dma_fence **fence) { unsigned int ib_size_dw = 16; + struct amdgpu_device *adev = ring->adev; struct amdgpu_job *job; struct amdgpu_ib *ib; struct dma_fence *f = NULL; uint32_t *ib_checksum = NULL; uint64_t addr; - bool sq = amdgpu_vcn_using_unified_queue(ring); int i, r; - if (sq) + if (adev->vcn.using_unified_queue) ib_size_dw += 8; r = amdgpu_job_alloc_with_ib(ring->adev, NULL, NULL, @@ -874,7 +872,7 @@ static int amdgpu_vcn_enc_get_create_msg(struct amdgpu_ring *ring, uint32_t hand ib->length_dw = 0; - if (sq) + if (adev->vcn.using_unified_queue) ib_checksum = amdgpu_vcn_unified_ring_ib_header(ib, 0x11, true); ib->ptr[ib->length_dw++] = 0x00000018; @@ -882,7 +880,7 @@ static int amdgpu_vcn_enc_get_create_msg(struct amdgpu_ring *ring, uint32_t hand ib->ptr[ib->length_dw++] = handle; ib->ptr[ib->length_dw++] = upper_32_bits(addr); ib->ptr[ib->length_dw++] = addr; - ib->ptr[ib->length_dw++] = 0x0000000b; + ib->ptr[ib->length_dw++] = 0x00000000; ib->ptr[ib->length_dw++] = 0x00000014; ib->ptr[ib->length_dw++] = 0x00000002; /* task info */ @@ -896,7 +894,7 @@ static int amdgpu_vcn_enc_get_create_msg(struct amdgpu_ring *ring, uint32_t hand for (i = ib->length_dw; i < ib_size_dw; ++i) ib->ptr[i] = 0x0; - if (sq) + if (adev->vcn.using_unified_queue) amdgpu_vcn_unified_ring_ib_checksum(&ib_checksum, 0x11); r = amdgpu_job_submit_direct(job, ring, &f); @@ -919,15 +917,15 @@ static int amdgpu_vcn_enc_get_destroy_msg(struct amdgpu_ring *ring, uint32_t han struct dma_fence **fence) { unsigned int ib_size_dw = 16; + struct amdgpu_device *adev = ring->adev; struct amdgpu_job *job; struct amdgpu_ib *ib; struct dma_fence *f = NULL; uint32_t *ib_checksum = NULL; uint64_t addr; - bool sq = amdgpu_vcn_using_unified_queue(ring); int i, r; - if (sq) + if (adev->vcn.using_unified_queue) ib_size_dw += 8; r = amdgpu_job_alloc_with_ib(ring->adev, NULL, NULL, @@ -941,7 +939,7 @@ static int amdgpu_vcn_enc_get_destroy_msg(struct amdgpu_ring *ring, uint32_t han ib->length_dw = 0; - if (sq) + if (adev->vcn.using_unified_queue) ib_checksum = amdgpu_vcn_unified_ring_ib_header(ib, 0x11, true); ib->ptr[ib->length_dw++] = 0x00000018; @@ -949,7 +947,7 @@ static int amdgpu_vcn_enc_get_destroy_msg(struct amdgpu_ring *ring, uint32_t han ib->ptr[ib->length_dw++] = handle; ib->ptr[ib->length_dw++] = upper_32_bits(addr); ib->ptr[ib->length_dw++] = addr; - ib->ptr[ib->length_dw++] = 0x0000000b; + ib->ptr[ib->length_dw++] = 0x00000000; ib->ptr[ib->length_dw++] = 0x00000014; ib->ptr[ib->length_dw++] = 0x00000002; @@ -963,7 +961,7 @@ static int amdgpu_vcn_enc_get_destroy_msg(struct amdgpu_ring *ring, uint32_t han for (i = ib->length_dw; i < ib_size_dw; ++i) ib->ptr[i] = 0x0; - if (sq) + if (adev->vcn.using_unified_queue) amdgpu_vcn_unified_ring_ib_checksum(&ib_checksum, 0x11); r = amdgpu_job_submit_direct(job, ring, &f); @@ -1075,7 +1073,6 @@ void amdgpu_vcn_setup_ucode(struct amdgpu_device *adev) IP_VERSION(4, 0, 3)) break; } - dev_info(adev->dev, "Will use PSP to load VCN firmware\n"); } } diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_vcn.h b/drivers/gpu/drm/amd/amdgpu/amdgpu_vcn.h index a418393d89ec..1a5439abd1a0 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_vcn.h +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_vcn.h @@ -329,6 +329,7 @@ struct amdgpu_vcn { uint16_t inst_mask; uint8_t num_inst_per_aid; + bool using_unified_queue; }; struct amdgpu_fw_shared_rb_ptrs_struct { @@ -454,6 +455,16 @@ struct amdgpu_vcn_rb_metadata { uint8_t pad[26]; }; +struct amdgpu_vcn5_fw_shared { + uint32_t present_flag_0; + uint8_t pad[12]; + struct amdgpu_fw_shared_unified_queue_struct sq; + uint8_t pad1[8]; + struct amdgpu_fw_shared_fw_logging fw_log; + struct amdgpu_fw_shared_rb_setup rb_setup; + uint8_t pad2[4]; +}; + #define VCN_BLOCK_ENCODE_DISABLE_MASK 0x80 #define VCN_BLOCK_DECODE_DISABLE_MASK 0x40 #define VCN_BLOCK_QUEUE_DISABLE_MASK 0xC0 diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_virt.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_virt.c index 7a4eae36778a..111c380f929b 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_virt.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_virt.c @@ -32,6 +32,7 @@ #include "amdgpu.h" #include "amdgpu_ras.h" +#include "amdgpu_reset.h" #include "vi.h" #include "soc15.h" #include "nv.h" @@ -85,8 +86,10 @@ int amdgpu_virt_request_full_gpu(struct amdgpu_device *adev, bool init) if (virt->ops && virt->ops->req_full_gpu) { r = virt->ops->req_full_gpu(adev, init); - if (r) + if (r) { + adev->no_hw_access = true; return r; + } adev->virt.caps &= ~AMDGPU_SRIOV_CAPS_RUNTIME; } @@ -152,6 +155,20 @@ void amdgpu_virt_request_init_data(struct amdgpu_device *adev) } /** + * amdgpu_virt_ready_to_reset() - send ready to reset to host + * @adev: amdgpu device. + * Send ready to reset message to GPU hypervisor to signal we have stopped GPU + * activity and is ready for host FLR + */ +void amdgpu_virt_ready_to_reset(struct amdgpu_device *adev) +{ + struct amdgpu_virt *virt = &adev->virt; + + if (virt->ops && virt->ops->reset_gpu) + virt->ops->ready_to_reset(adev); +} + +/** * amdgpu_virt_wait_reset() - wait for reset gpu completed * @adev: amdgpu device. * Wait for GPU reset completed. @@ -214,6 +231,22 @@ void amdgpu_virt_free_mm_table(struct amdgpu_device *adev) adev->virt.mm_table.gpu_addr = 0; } +/** + * amdgpu_virt_rcvd_ras_interrupt() - receive ras interrupt + * @adev: amdgpu device. + * Check whether host sent RAS error message + * Return: true if found, otherwise false + */ +bool amdgpu_virt_rcvd_ras_interrupt(struct amdgpu_device *adev) +{ + struct amdgpu_virt *virt = &adev->virt; + + if (!virt->ops || !virt->ops->rcvd_ras_intr) + return false; + + return virt->ops->rcvd_ras_intr(adev); +} + unsigned int amd_sriov_msg_checksum(void *obj, unsigned long obj_size, @@ -394,6 +427,8 @@ static void amdgpu_virt_add_bad_page(struct amdgpu_device *adev, else vram_usage_va = adev->mman.drv_vram_usage_va; + memset(&bp, 0, sizeof(bp)); + if (bp_block_size) { bp_cnt = bp_block_size / sizeof(uint64_t); for (bp_idx = 0; bp_idx < bp_cnt; bp_idx++) { @@ -424,7 +459,7 @@ static int amdgpu_virt_read_pf2vf_data(struct amdgpu_device *adev) return -EINVAL; if (pf2vf_info->size > 1024) { - DRM_ERROR("invalid pf2vf message size\n"); + dev_err(adev->dev, "invalid pf2vf message size: 0x%x\n", pf2vf_info->size); return -EINVAL; } @@ -435,7 +470,9 @@ static int amdgpu_virt_read_pf2vf_data(struct amdgpu_device *adev) adev->virt.fw_reserve.p_pf2vf, pf2vf_info->size, adev->virt.fw_reserve.checksum_key, checksum); if (checksum != checkval) { - DRM_ERROR("invalid pf2vf message\n"); + dev_err(adev->dev, + "invalid pf2vf message: header checksum=0x%x calculated checksum=0x%x\n", + checksum, checkval); return -EINVAL; } @@ -449,7 +486,9 @@ static int amdgpu_virt_read_pf2vf_data(struct amdgpu_device *adev) adev->virt.fw_reserve.p_pf2vf, pf2vf_info->size, 0, checksum); if (checksum != checkval) { - DRM_ERROR("invalid pf2vf message\n"); + dev_err(adev->dev, + "invalid pf2vf message: header checksum=0x%x calculated checksum=0x%x\n", + checksum, checkval); return -EINVAL; } @@ -485,7 +524,7 @@ static int amdgpu_virt_read_pf2vf_data(struct amdgpu_device *adev) ((struct amd_sriov_msg_pf2vf_info *)pf2vf_info)->uuid; break; default: - DRM_ERROR("invalid pf2vf version\n"); + dev_err(adev->dev, "invalid pf2vf version: 0x%x\n", pf2vf_info->version); return -EINVAL; } @@ -571,9 +610,14 @@ static int amdgpu_virt_write_vf2pf_data(struct amdgpu_device *adev) vf2pf_info->decode_usage = 0; vf2pf_info->dummy_page_addr = (uint64_t)adev->dummy_page_addr; + vf2pf_info->mes_info_addr = (uint64_t)adev->mes.resource_1_gpu_addr; + + if (adev->mes.resource_1) { + vf2pf_info->mes_info_size = adev->mes.resource_1->tbo.base.size; + } vf2pf_info->checksum = amd_sriov_msg_checksum( - vf2pf_info, vf2pf_info->header.size, 0, 0); + vf2pf_info, sizeof(*vf2pf_info), 0, 0); return 0; } @@ -584,8 +628,25 @@ static void amdgpu_virt_update_vf2pf_work_item(struct work_struct *work) int ret; ret = amdgpu_virt_read_pf2vf_data(adev); - if (ret) + if (ret) { + adev->virt.vf2pf_update_retry_cnt++; + + if ((amdgpu_virt_rcvd_ras_interrupt(adev) || + adev->virt.vf2pf_update_retry_cnt >= AMDGPU_VF2PF_UPDATE_MAX_RETRY_LIMIT) && + amdgpu_sriov_runtime(adev)) { + + amdgpu_ras_set_fed(adev, true); + if (amdgpu_reset_domain_schedule(adev->reset_domain, + &adev->kfd.reset_work)) + return; + else + dev_err(adev->dev, "Failed to queue work! at %s", __func__); + } + goto out; + } + + adev->virt.vf2pf_update_retry_cnt = 0; amdgpu_virt_write_vf2pf_data(adev); out: @@ -606,6 +667,7 @@ void amdgpu_virt_init_data_exchange(struct amdgpu_device *adev) adev->virt.fw_reserve.p_pf2vf = NULL; adev->virt.fw_reserve.p_vf2pf = NULL; adev->virt.vf2pf_update_interval_ms = 0; + adev->virt.vf2pf_update_retry_cnt = 0; if (adev->mman.fw_vram_usage_va && adev->mman.drv_vram_usage_va) { DRM_WARN("Currently fw_vram and drv_vram should not have values at the same time!"); @@ -705,12 +767,6 @@ void amdgpu_detect_virtualization(struct amdgpu_device *adev) adev->virt.caps |= AMDGPU_PASSTHROUGH_MODE; } - if (amdgpu_sriov_vf(adev) && adev->asic_type == CHIP_SIENNA_CICHLID) - /* VF MMIO access (except mailbox range) from CPU - * will be blocked during sriov runtime - */ - adev->virt.caps |= AMDGPU_VF_MMIO_ACCESS_PROTECT; - /* we have the ability to check now */ if (amdgpu_sriov_vf(adev)) { switch (adev->asic_type) { @@ -801,6 +857,8 @@ void amdgpu_virt_post_reset(struct amdgpu_device *adev) */ adev->gfx.is_poweron = false; } + + adev->mes.ring.sched.ready = false; } bool amdgpu_virt_fw_load_skip_check(struct amdgpu_device *adev, uint32_t ucode_id) @@ -956,11 +1014,17 @@ u32 amdgpu_virt_rlcg_reg_rw(struct amdgpu_device *adev, u32 offset, u32 v, u32 f return 0; } + if (amdgpu_device_skip_hw_access(adev)) + return 0; + reg_access_ctrl = &adev->gfx.rlc.reg_access_ctrl[xcc_id]; scratch_reg0 = (void __iomem *)adev->rmmio + 4 * reg_access_ctrl->scratch_reg0; scratch_reg1 = (void __iomem *)adev->rmmio + 4 * reg_access_ctrl->scratch_reg1; scratch_reg2 = (void __iomem *)adev->rmmio + 4 * reg_access_ctrl->scratch_reg2; scratch_reg3 = (void __iomem *)adev->rmmio + 4 * reg_access_ctrl->scratch_reg3; + + mutex_lock(&adev->virt.rlcg_reg_lock); + if (reg_access_ctrl->spare_int) spare_int = (void __iomem *)adev->rmmio + 4 * reg_access_ctrl->spare_int; @@ -1017,6 +1081,9 @@ u32 amdgpu_virt_rlcg_reg_rw(struct amdgpu_device *adev, u32 offset, u32 v, u32 f } ret = readl(scratch_reg0); + + mutex_unlock(&adev->virt.rlcg_reg_lock); + return ret; } @@ -1026,6 +1093,9 @@ void amdgpu_sriov_wreg(struct amdgpu_device *adev, { u32 rlcg_flag; + if (amdgpu_device_skip_hw_access(adev)) + return; + if (!amdgpu_sriov_runtime(adev) && amdgpu_virt_get_rlcg_reg_access_flag(adev, acc_flags, hwip, true, &rlcg_flag)) { amdgpu_virt_rlcg_reg_rw(adev, offset, value, rlcg_flag, xcc_id); @@ -1043,6 +1113,9 @@ u32 amdgpu_sriov_rreg(struct amdgpu_device *adev, { u32 rlcg_flag; + if (amdgpu_device_skip_hw_access(adev)) + return 0; + if (!amdgpu_sriov_runtime(adev) && amdgpu_virt_get_rlcg_reg_access_flag(adev, acc_flags, hwip, false, &rlcg_flag)) return amdgpu_virt_rlcg_reg_rw(adev, offset, 0, rlcg_flag, xcc_id); diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_virt.h b/drivers/gpu/drm/amd/amdgpu/amdgpu_virt.h index 3f59b7b5523f..b42a8854dca0 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_virt.h +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_virt.h @@ -52,6 +52,8 @@ /* tonga/fiji use this offset */ #define mmBIF_IOV_FUNC_IDENTIFIER 0x1503 +#define AMDGPU_VF2PF_UPDATE_MAX_RETRY_LIMIT 2 + enum amdgpu_sriov_vf_mode { SRIOV_VF_MODE_BARE_METAL = 0, SRIOV_VF_MODE_ONE_VF, @@ -86,11 +88,13 @@ struct amdgpu_virt_ops { int (*rel_full_gpu)(struct amdgpu_device *adev, bool init); int (*req_init_data)(struct amdgpu_device *adev); int (*reset_gpu)(struct amdgpu_device *adev); + void (*ready_to_reset)(struct amdgpu_device *adev); int (*wait_reset)(struct amdgpu_device *adev); void (*trans_msg)(struct amdgpu_device *adev, enum idh_request req, u32 data1, u32 data2, u32 data3); void (*ras_poison_handler)(struct amdgpu_device *adev, enum amdgpu_ras_block block); + bool (*rcvd_ras_intr)(struct amdgpu_device *adev); }; /* @@ -130,6 +134,8 @@ enum AMDGIM_FEATURE_FLAG { AMDGIM_FEATURE_AV1_SUPPORT = (1 << 6), /* VCN RB decouple */ AMDGIM_FEATURE_VCN_RB_DECOUPLE = (1 << 7), + /* MES info */ + AMDGIM_FEATURE_MES_INFO_ENABLE = (1 << 8), }; enum AMDGIM_REG_ACCESS_FLAG { @@ -257,6 +263,7 @@ struct amdgpu_virt { /* vf2pf message */ struct delayed_work vf2pf_work; uint32_t vf2pf_update_interval_ms; + int vf2pf_update_retry_cnt; /* multimedia bandwidth config */ bool is_mm_bw_enabled; @@ -267,6 +274,8 @@ struct amdgpu_virt { /* the ucode id to signal the autoload */ uint32_t autoload_ucode_id; + + struct mutex rlcg_reg_lock; }; struct amdgpu_video_codec_info; @@ -332,15 +341,19 @@ static inline bool is_virtual_machine(void) ((adev)->virt.gim_feature & AMDGIM_FEATURE_AV1_SUPPORT) #define amdgpu_sriov_is_vcn_rb_decouple(adev) \ ((adev)->virt.gim_feature & AMDGIM_FEATURE_VCN_RB_DECOUPLE) +#define amdgpu_sriov_is_mes_info_enable(adev) \ + ((adev)->virt.gim_feature & AMDGIM_FEATURE_MES_INFO_ENABLE) bool amdgpu_virt_mmio_blocked(struct amdgpu_device *adev); void amdgpu_virt_init_setting(struct amdgpu_device *adev); int amdgpu_virt_request_full_gpu(struct amdgpu_device *adev, bool init); int amdgpu_virt_release_full_gpu(struct amdgpu_device *adev, bool init); int amdgpu_virt_reset_gpu(struct amdgpu_device *adev); void amdgpu_virt_request_init_data(struct amdgpu_device *adev); +void amdgpu_virt_ready_to_reset(struct amdgpu_device *adev); int amdgpu_virt_wait_reset(struct amdgpu_device *adev); int amdgpu_virt_alloc_mm_table(struct amdgpu_device *adev); void amdgpu_virt_free_mm_table(struct amdgpu_device *adev); +bool amdgpu_virt_rcvd_ras_interrupt(struct amdgpu_device *adev); void amdgpu_virt_release_ras_err_handler_data(struct amdgpu_device *adev); void amdgpu_virt_init_data_exchange(struct amdgpu_device *adev); void amdgpu_virt_exchange_data(struct amdgpu_device *adev); diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_vkms.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_vkms.c index 8baa2e0935cc..6415d0d039e1 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_vkms.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_vkms.c @@ -3,6 +3,7 @@ #include <drm/drm_atomic_helper.h> #include <drm/drm_edid.h> #include <drm/drm_simple_kms_helper.h> +#include <drm/drm_gem_framebuffer_helper.h> #include <drm/drm_vblank.h> #include "amdgpu.h" @@ -65,9 +66,7 @@ static enum hrtimer_restart amdgpu_vkms_vblank_simulate(struct hrtimer *timer) static int amdgpu_vkms_enable_vblank(struct drm_crtc *crtc) { - struct drm_device *dev = crtc->dev; - unsigned int pipe = drm_crtc_index(crtc); - struct drm_vblank_crtc *vblank = &dev->vblank[pipe]; + struct drm_vblank_crtc *vblank = drm_crtc_vblank_crtc(crtc); struct amdgpu_vkms_output *out = drm_crtc_to_amdgpu_vkms_output(crtc); struct amdgpu_crtc *amdgpu_crtc = to_amdgpu_crtc(crtc); @@ -91,10 +90,8 @@ static bool amdgpu_vkms_get_vblank_timestamp(struct drm_crtc *crtc, ktime_t *vblank_time, bool in_vblank_irq) { - struct drm_device *dev = crtc->dev; - unsigned int pipe = crtc->index; struct amdgpu_vkms_output *output = drm_crtc_to_amdgpu_vkms_output(crtc); - struct drm_vblank_crtc *vblank = &dev->vblank[pipe]; + struct drm_vblank_crtc *vblank = drm_crtc_vblank_crtc(crtc); struct amdgpu_crtc *amdgpu_crtc = to_amdgpu_crtc(crtc); if (!READ_ONCE(vblank->enabled)) { @@ -314,7 +311,13 @@ static int amdgpu_vkms_prepare_fb(struct drm_plane *plane, return 0; } afb = to_amdgpu_framebuffer(new_state->fb); - obj = new_state->fb->obj[0]; + + obj = drm_gem_fb_get_obj(new_state->fb, 0); + if (!obj) { + DRM_ERROR("Failed to get obj from framebuffer\n"); + return -EINVAL; + } + rbo = gem_to_amdgpu_bo(obj); adev = amdgpu_ttm_adev(rbo->tbo.bdev); @@ -368,12 +371,19 @@ static void amdgpu_vkms_cleanup_fb(struct drm_plane *plane, struct drm_plane_state *old_state) { struct amdgpu_bo *rbo; + struct drm_gem_object *obj; int r; if (!old_state->fb) return; - rbo = gem_to_amdgpu_bo(old_state->fb->obj[0]); + obj = drm_gem_fb_get_obj(old_state->fb, 0); + if (!obj) { + DRM_ERROR("Failed to get obj from framebuffer\n"); + return; + } + + rbo = gem_to_amdgpu_bo(obj); r = amdgpu_bo_reserve(rbo, false); if (unlikely(r)) { DRM_ERROR("failed to reserve rbo before unpin\n"); @@ -658,6 +668,8 @@ static const struct amd_ip_funcs amdgpu_vkms_ip_funcs = { .soft_reset = amdgpu_vkms_soft_reset, .set_clockgating_state = amdgpu_vkms_set_clockgating_state, .set_powergating_state = amdgpu_vkms_set_powergating_state, + .dump_ip_state = NULL, + .print_ip_state = NULL, }; const struct amdgpu_ip_block_version amdgpu_vkms_ip_block = { diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_vm.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_vm.c index 4299ce386322..a060c28f0877 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_vm.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_vm.c @@ -333,7 +333,7 @@ void amdgpu_vm_bo_base_init(struct amdgpu_vm_bo_base *base, base->next = bo->vm_bo; bo->vm_bo = base; - if (bo->tbo.base.resv != vm->root.bo->tbo.base.resv) + if (!amdgpu_vm_is_bo_always_valid(vm, bo)) return; dma_resv_assert_held(vm->root.bo->tbo.base.resv); @@ -434,7 +434,7 @@ uint64_t amdgpu_vm_generation(struct amdgpu_device *adev, struct amdgpu_vm *vm) if (!vm) return result; - result += vm->generation; + result += lower_32_bits(vm->generation); /* Add one if the page tables will be re-generated on next CS */ if (drm_sched_entity_error(&vm->delayed)) ++result; @@ -463,13 +463,14 @@ int amdgpu_vm_validate(struct amdgpu_device *adev, struct amdgpu_vm *vm, int (*validate)(void *p, struct amdgpu_bo *bo), void *param) { + uint64_t new_vm_generation = amdgpu_vm_generation(adev, vm); struct amdgpu_vm_bo_base *bo_base; struct amdgpu_bo *shadow; struct amdgpu_bo *bo; int r; - if (drm_sched_entity_error(&vm->delayed)) { - ++vm->generation; + if (vm->generation != new_vm_generation) { + vm->generation = new_vm_generation; amdgpu_vm_bo_reset_state_machine(vm); amdgpu_vm_fini_entities(vm); r = amdgpu_vm_init_entities(adev, vm); @@ -886,6 +887,44 @@ static void amdgpu_vm_tlb_seq_cb(struct dma_fence *fence, } /** + * amdgpu_vm_tlb_flush - prepare TLB flush + * + * @params: parameters for update + * @fence: input fence to sync TLB flush with + * @tlb_cb: the callback structure + * + * Increments the tlb sequence to make sure that future CS execute a VM flush. + */ +static void +amdgpu_vm_tlb_flush(struct amdgpu_vm_update_params *params, + struct dma_fence **fence, + struct amdgpu_vm_tlb_seq_struct *tlb_cb) +{ + struct amdgpu_vm *vm = params->vm; + + if (!fence || !*fence) + return; + + tlb_cb->vm = vm; + if (!dma_fence_add_callback(*fence, &tlb_cb->cb, + amdgpu_vm_tlb_seq_cb)) { + dma_fence_put(vm->last_tlb_flush); + vm->last_tlb_flush = dma_fence_get(*fence); + } else { + amdgpu_vm_tlb_seq_cb(NULL, &tlb_cb->cb); + } + + /* Prepare a TLB flush fence to be attached to PTs */ + if (!params->unlocked && vm->is_compute_context) { + amdgpu_vm_tlb_fence_create(params->adev, vm, fence); + + /* Makes sure no PD/PT is freed before the flush */ + dma_resv_add_fence(vm->root.bo->tbo.base.resv, *fence, + DMA_RESV_USAGE_BOOKKEEP); + } +} + +/** * amdgpu_vm_update_range - update a range in the vm page table * * @adev: amdgpu_device pointer to use for commands @@ -916,8 +955,8 @@ int amdgpu_vm_update_range(struct amdgpu_device *adev, struct amdgpu_vm *vm, struct ttm_resource *res, dma_addr_t *pages_addr, struct dma_fence **fence) { - struct amdgpu_vm_update_params params; struct amdgpu_vm_tlb_seq_struct *tlb_cb; + struct amdgpu_vm_update_params params; struct amdgpu_res_cursor cursor; enum amdgpu_sync_mode sync_mode; int r, idx; @@ -927,8 +966,8 @@ int amdgpu_vm_update_range(struct amdgpu_device *adev, struct amdgpu_vm *vm, tlb_cb = kmalloc(sizeof(*tlb_cb), GFP_KERNEL); if (!tlb_cb) { - r = -ENOMEM; - goto error_unlock; + drm_dev_exit(idx); + return -ENOMEM; } /* Vega20+XGMI where PTEs get inadvertently cached in L2 texture cache, @@ -948,7 +987,9 @@ int amdgpu_vm_update_range(struct amdgpu_device *adev, struct amdgpu_vm *vm, params.immediate = immediate; params.pages_addr = pages_addr; params.unlocked = unlocked; + params.needs_flush = flush_tlb; params.allow_override = allow_override; + INIT_LIST_HEAD(¶ms.tlb_flush_waitlist); /* Implicitly sync to command submissions in the same VM before * unmapping. Sync to moving fences before mapping. @@ -1015,7 +1056,7 @@ int amdgpu_vm_update_range(struct amdgpu_device *adev, struct amdgpu_vm *vm, params.pages_addr = NULL; } - } else if (flags & (AMDGPU_PTE_VALID | AMDGPU_PTE_PRT)) { + } else if (flags & (AMDGPU_PTE_VALID | AMDGPU_PTE_PRT_FLAG(adev))) { addr = vram_base + cursor.start; } else { addr = 0; @@ -1031,24 +1072,18 @@ int amdgpu_vm_update_range(struct amdgpu_device *adev, struct amdgpu_vm *vm, } r = vm->update_funcs->commit(¶ms, fence); + if (r) + goto error_free; - if (flush_tlb || params.table_freed) { - tlb_cb->vm = vm; - if (fence && *fence && - !dma_fence_add_callback(*fence, &tlb_cb->cb, - amdgpu_vm_tlb_seq_cb)) { - dma_fence_put(vm->last_tlb_flush); - vm->last_tlb_flush = dma_fence_get(*fence); - } else { - amdgpu_vm_tlb_seq_cb(NULL, &tlb_cb->cb); - } + if (params.needs_flush) { + amdgpu_vm_tlb_flush(¶ms, fence, tlb_cb); tlb_cb = NULL; } + amdgpu_vm_pt_free_list(adev, ¶ms); + error_free: kfree(tlb_cb); - -error_unlock: amdgpu_vm_eviction_unlock(vm); drm_dev_exit(idx); return r; @@ -1067,13 +1102,13 @@ static void amdgpu_vm_bo_get_memory(struct amdgpu_bo_va *bo_va, * For now ignore BOs which are currently locked and potentially * changing their location. */ - if (bo->tbo.base.resv != vm->root.bo->tbo.base.resv && + if (!amdgpu_vm_is_bo_always_valid(vm, bo) && !dma_resv_trylock(bo->tbo.base.resv)) return; amdgpu_bo_get_memory(bo, stats); - if (bo->tbo.base.resv != vm->root.bo->tbo.base.resv) - dma_resv_unlock(bo->tbo.base.resv); + if (!amdgpu_vm_is_bo_always_valid(vm, bo)) + dma_resv_unlock(bo->tbo.base.resv); } void amdgpu_vm_get_memory(struct amdgpu_vm *vm, @@ -1169,8 +1204,7 @@ int amdgpu_vm_bo_update(struct amdgpu_device *adev, struct amdgpu_bo_va *bo_va, uncached = false; } - if (clear || (bo && bo->tbo.base.resv == - vm->root.bo->tbo.base.resv)) + if (clear || amdgpu_vm_is_bo_always_valid(vm, bo)) last_update = &vm->last_update; else last_update = &bo_va->last_pt_update; @@ -1212,7 +1246,7 @@ int amdgpu_vm_bo_update(struct amdgpu_device *adev, struct amdgpu_bo_va *bo_va, * the evicted list so that it gets validated again on the * next command submission. */ - if (bo && bo->tbo.base.resv == vm->root.bo->tbo.base.resv) { + if (amdgpu_vm_is_bo_always_valid(vm, bo)) { uint32_t mem_type = bo->tbo.resource->mem_type; if (!(bo->preferred_domains & @@ -1335,7 +1369,7 @@ static void amdgpu_vm_free_mapping(struct amdgpu_device *adev, struct amdgpu_bo_va_mapping *mapping, struct dma_fence *fence) { - if (mapping->flags & AMDGPU_PTE_PRT) + if (mapping->flags & AMDGPU_PTE_PRT_FLAG(adev)) amdgpu_vm_add_prt_cb(adev, fence); kfree(mapping); } @@ -1603,16 +1637,46 @@ static void amdgpu_vm_bo_insert_map(struct amdgpu_device *adev, list_add(&mapping->list, &bo_va->invalids); amdgpu_vm_it_insert(mapping, &vm->va); - if (mapping->flags & AMDGPU_PTE_PRT) + if (mapping->flags & AMDGPU_PTE_PRT_FLAG(adev)) amdgpu_vm_prt_get(adev); - if (bo && bo->tbo.base.resv == vm->root.bo->tbo.base.resv && - !bo_va->base.moved) { + if (amdgpu_vm_is_bo_always_valid(vm, bo) && !bo_va->base.moved) amdgpu_vm_bo_moved(&bo_va->base); - } + trace_amdgpu_vm_bo_map(bo_va, mapping); } +/* Validate operation parameters to prevent potential abuse */ +static int amdgpu_vm_verify_parameters(struct amdgpu_device *adev, + struct amdgpu_bo *bo, + uint64_t saddr, + uint64_t offset, + uint64_t size) +{ + uint64_t tmp, lpfn; + + if (saddr & AMDGPU_GPU_PAGE_MASK + || offset & AMDGPU_GPU_PAGE_MASK + || size & AMDGPU_GPU_PAGE_MASK) + return -EINVAL; + + if (check_add_overflow(saddr, size, &tmp) + || check_add_overflow(offset, size, &tmp) + || size == 0 /* which also leads to end < begin */) + return -EINVAL; + + /* make sure object fit at this offset */ + if (bo && offset + size > amdgpu_bo_size(bo)) + return -EINVAL; + + /* Ensure last pfn not exceed max_pfn */ + lpfn = (saddr + size - 1) >> AMDGPU_GPU_PAGE_SHIFT; + if (lpfn >= adev->vm_manager.max_pfn) + return -EINVAL; + + return 0; +} + /** * amdgpu_vm_bo_map - map bo inside a vm * @@ -1639,21 +1703,14 @@ int amdgpu_vm_bo_map(struct amdgpu_device *adev, struct amdgpu_bo *bo = bo_va->base.bo; struct amdgpu_vm *vm = bo_va->base.vm; uint64_t eaddr; + int r; - /* validate the parameters */ - if (saddr & ~PAGE_MASK || offset & ~PAGE_MASK || size & ~PAGE_MASK) - return -EINVAL; - if (saddr + size <= saddr || offset + size <= offset) - return -EINVAL; - - /* make sure object fit at this offset */ - eaddr = saddr + size - 1; - if ((bo && offset + size > amdgpu_bo_size(bo)) || - (eaddr >= adev->vm_manager.max_pfn << AMDGPU_GPU_PAGE_SHIFT)) - return -EINVAL; + r = amdgpu_vm_verify_parameters(adev, bo, saddr, offset, size); + if (r) + return r; saddr /= AMDGPU_GPU_PAGE_SIZE; - eaddr /= AMDGPU_GPU_PAGE_SIZE; + eaddr = saddr + (size - 1) / AMDGPU_GPU_PAGE_SIZE; tmp = amdgpu_vm_it_iter_first(&vm->va, saddr, eaddr); if (tmp) { @@ -1706,17 +1763,9 @@ int amdgpu_vm_bo_replace_map(struct amdgpu_device *adev, uint64_t eaddr; int r; - /* validate the parameters */ - if (saddr & ~PAGE_MASK || offset & ~PAGE_MASK || size & ~PAGE_MASK) - return -EINVAL; - if (saddr + size <= saddr || offset + size <= offset) - return -EINVAL; - - /* make sure object fit at this offset */ - eaddr = saddr + size - 1; - if ((bo && offset + size > amdgpu_bo_size(bo)) || - (eaddr >= adev->vm_manager.max_pfn << AMDGPU_GPU_PAGE_SHIFT)) - return -EINVAL; + r = amdgpu_vm_verify_parameters(adev, bo, saddr, offset, size); + if (r) + return r; /* Allocate all the needed memory */ mapping = kmalloc(sizeof(*mapping), GFP_KERNEL); @@ -1730,7 +1779,7 @@ int amdgpu_vm_bo_replace_map(struct amdgpu_device *adev, } saddr /= AMDGPU_GPU_PAGE_SIZE; - eaddr /= AMDGPU_GPU_PAGE_SIZE; + eaddr = saddr + (size - 1) / AMDGPU_GPU_PAGE_SIZE; mapping->start = saddr; mapping->last = eaddr; @@ -1817,10 +1866,14 @@ int amdgpu_vm_bo_clear_mappings(struct amdgpu_device *adev, struct amdgpu_bo_va_mapping *before, *after, *tmp, *next; LIST_HEAD(removed); uint64_t eaddr; + int r; + + r = amdgpu_vm_verify_parameters(adev, NULL, saddr, 0, size); + if (r) + return r; - eaddr = saddr + size - 1; saddr /= AMDGPU_GPU_PAGE_SIZE; - eaddr /= AMDGPU_GPU_PAGE_SIZE; + eaddr = saddr + (size - 1) / AMDGPU_GPU_PAGE_SIZE; /* Allocate all the needed memory */ before = kzalloc(sizeof(*before), GFP_KERNEL); @@ -1885,10 +1938,10 @@ int amdgpu_vm_bo_clear_mappings(struct amdgpu_device *adev, struct amdgpu_bo *bo = before->bo_va->base.bo; amdgpu_vm_it_insert(before, &vm->va); - if (before->flags & AMDGPU_PTE_PRT) + if (before->flags & AMDGPU_PTE_PRT_FLAG(adev)) amdgpu_vm_prt_get(adev); - if (bo && bo->tbo.base.resv == vm->root.bo->tbo.base.resv && + if (amdgpu_vm_is_bo_always_valid(vm, bo) && !before->bo_va->base.moved) amdgpu_vm_bo_moved(&before->bo_va->base); } else { @@ -1900,10 +1953,10 @@ int amdgpu_vm_bo_clear_mappings(struct amdgpu_device *adev, struct amdgpu_bo *bo = after->bo_va->base.bo; amdgpu_vm_it_insert(after, &vm->va); - if (after->flags & AMDGPU_PTE_PRT) + if (after->flags & AMDGPU_PTE_PRT_FLAG(adev)) amdgpu_vm_prt_get(adev); - if (bo && bo->tbo.base.resv == vm->root.bo->tbo.base.resv && + if (amdgpu_vm_is_bo_always_valid(vm, bo) && !after->bo_va->base.moved) amdgpu_vm_bo_moved(&after->bo_va->base); } else { @@ -1983,7 +2036,7 @@ void amdgpu_vm_bo_del(struct amdgpu_device *adev, if (bo) { dma_resv_assert_held(bo->tbo.base.resv); - if (bo->tbo.base.resv == vm->root.bo->tbo.base.resv) + if (amdgpu_vm_is_bo_always_valid(vm, bo)) ttm_bo_set_bulk_move(&bo->tbo, NULL); for (base = &bo_va->base.bo->vm_bo; *base; @@ -2077,7 +2130,7 @@ void amdgpu_vm_bo_invalidate(struct amdgpu_device *adev, for (bo_base = bo->vm_bo; bo_base; bo_base = bo_base->next) { struct amdgpu_vm *vm = bo_base->vm; - if (evicted && bo->tbo.base.resv == vm->root.bo->tbo.base.resv) { + if (evicted && amdgpu_vm_is_bo_always_valid(vm, bo)) { amdgpu_vm_bo_evicted(bo_base); continue; } @@ -2088,7 +2141,7 @@ void amdgpu_vm_bo_invalidate(struct amdgpu_device *adev, if (bo->tbo.type == ttm_bo_type_kernel) amdgpu_vm_bo_relocated(bo_base); - else if (bo->tbo.base.resv == vm->root.bo->tbo.base.resv) + else if (amdgpu_vm_is_bo_always_valid(vm, bo)) amdgpu_vm_bo_moved(bo_base); else amdgpu_vm_bo_invalidated(bo_base); @@ -2387,10 +2440,11 @@ int amdgpu_vm_init(struct amdgpu_device *adev, struct amdgpu_vm *vm, vm->last_update = dma_fence_get_stub(); vm->last_unlocked = dma_fence_get_stub(); vm->last_tlb_flush = dma_fence_get_stub(); - vm->generation = 0; + vm->generation = amdgpu_vm_generation(adev, NULL); mutex_init(&vm->eviction_lock); vm->evicting = false; + vm->tlb_fence_context = dma_fence_context_alloc(1); r = amdgpu_vm_pt_create(adev, vm, adev->vm_manager.root_level, false, &root, xcp_id); @@ -2550,7 +2604,7 @@ void amdgpu_vm_fini(struct amdgpu_device *adev, struct amdgpu_vm *vm) dma_fence_put(vm->last_tlb_flush); list_for_each_entry_safe(mapping, tmp, &vm->freed, list) { - if (mapping->flags & AMDGPU_PTE_PRT && prt_fini_needed) { + if (mapping->flags & AMDGPU_PTE_PRT_FLAG(adev) && prt_fini_needed) { amdgpu_vm_prt_fini(adev, vm); prt_fini_needed = false; } @@ -2924,6 +2978,14 @@ void amdgpu_vm_update_fault_cache(struct amdgpu_device *adev, if (vm && status) { vm->fault_info.addr = addr; vm->fault_info.status = status; + /* + * Update the fault information globally for later usage + * when vm could be stale or freed. + */ + adev->vm_manager.fault_info.addr = addr; + adev->vm_manager.fault_info.vmhub = vmhub; + adev->vm_manager.fault_info.status = status; + if (AMDGPU_IS_GFXHUB(vmhub)) { vm->fault_info.vmhub = AMDGPU_VMHUB_TYPE_GFX; vm->fault_info.vmhub |= @@ -2943,3 +3005,16 @@ void amdgpu_vm_update_fault_cache(struct amdgpu_device *adev, xa_unlock_irqrestore(&adev->vm_manager.pasids, flags); } +/** + * amdgpu_vm_is_bo_always_valid - check if the BO is VM always valid + * + * @vm: VM to test against. + * @bo: BO to be tested. + * + * Returns true if the BO shares the dma_resv object with the root PD and is + * always guaranteed to be valid inside the VM. + */ +bool amdgpu_vm_is_bo_always_valid(struct amdgpu_vm *vm, struct amdgpu_bo *bo) +{ + return bo && bo->tbo.base.resv == vm->root.bo->tbo.base.resv; +} diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_vm.h b/drivers/gpu/drm/amd/amdgpu/amdgpu_vm.h index 047ec1930d12..046949c4b695 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_vm.h +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_vm.h @@ -94,8 +94,11 @@ struct amdgpu_mem_stats; #define AMDGPU_VM_NORETRY_FLAGS_TF (AMDGPU_PTE_VALID | AMDGPU_PTE_SYSTEM | \ AMDGPU_PTE_PRT) /* For GFX9 */ -#define AMDGPU_PTE_MTYPE_VG10(a) ((uint64_t)(a) << 57) -#define AMDGPU_PTE_MTYPE_VG10_MASK AMDGPU_PTE_MTYPE_VG10(3ULL) +#define AMDGPU_PTE_MTYPE_VG10_SHIFT(mtype) ((uint64_t)(mtype) << 57) +#define AMDGPU_PTE_MTYPE_VG10_MASK AMDGPU_PTE_MTYPE_VG10_SHIFT(3ULL) +#define AMDGPU_PTE_MTYPE_VG10(flags, mtype) \ + (((uint64_t)(flags) & (~AMDGPU_PTE_MTYPE_VG10_MASK)) | \ + AMDGPU_PTE_MTYPE_VG10_SHIFT(mtype)) #define AMDGPU_MTYPE_NC 0 #define AMDGPU_MTYPE_CC 2 @@ -108,8 +111,34 @@ struct amdgpu_mem_stats; | AMDGPU_PTE_MTYPE_VG10(AMDGPU_MTYPE_CC)) /* gfx10 */ -#define AMDGPU_PTE_MTYPE_NV10(a) ((uint64_t)(a) << 48) -#define AMDGPU_PTE_MTYPE_NV10_MASK AMDGPU_PTE_MTYPE_NV10(7ULL) +#define AMDGPU_PTE_MTYPE_NV10_SHIFT(mtype) ((uint64_t)(mtype) << 48) +#define AMDGPU_PTE_MTYPE_NV10_MASK AMDGPU_PTE_MTYPE_NV10_SHIFT(7ULL) +#define AMDGPU_PTE_MTYPE_NV10(flags, mtype) \ + (((uint64_t)(flags) & (~AMDGPU_PTE_MTYPE_NV10_MASK)) | \ + AMDGPU_PTE_MTYPE_NV10_SHIFT(mtype)) + +/* gfx12 */ +#define AMDGPU_PTE_PRT_GFX12 (1ULL << 56) +#define AMDGPU_PTE_PRT_FLAG(adev) \ + ((amdgpu_ip_version((adev), GC_HWIP, 0) >= IP_VERSION(12, 0, 0)) ? AMDGPU_PTE_PRT_GFX12 : AMDGPU_PTE_PRT) + +#define AMDGPU_PTE_MTYPE_GFX12_SHIFT(mtype) ((uint64_t)(mtype) << 54) +#define AMDGPU_PTE_MTYPE_GFX12_MASK AMDGPU_PTE_MTYPE_GFX12_SHIFT(3ULL) +#define AMDGPU_PTE_MTYPE_GFX12(flags, mtype) \ + (((uint64_t)(flags) & (~AMDGPU_PTE_MTYPE_GFX12_MASK)) | \ + AMDGPU_PTE_MTYPE_GFX12_SHIFT(mtype)) + +#define AMDGPU_PTE_DCC (1ULL << 58) +#define AMDGPU_PTE_IS_PTE (1ULL << 63) + +/* PDE Block Fragment Size for gfx v12 */ +#define AMDGPU_PDE_BFS_GFX12(a) ((uint64_t)((a) & 0x1fULL) << 58) +#define AMDGPU_PDE_BFS_FLAG(adev, a) \ + ((amdgpu_ip_version((adev), GC_HWIP, 0) >= IP_VERSION(12, 0, 0)) ? AMDGPU_PDE_BFS_GFX12(a) : AMDGPU_PDE_BFS(a)) +/* PDE is handled as PTE for gfx v12 */ +#define AMDGPU_PDE_PTE_GFX12 (1ULL << 63) +#define AMDGPU_PDE_PTE_FLAG(adev) \ + ((amdgpu_ip_version((adev), GC_HWIP, 0) >= IP_VERSION(12, 0, 0)) ? AMDGPU_PDE_PTE_GFX12 : AMDGPU_PDE_PTE) /* How to program VM fault handling */ #define AMDGPU_VM_FAULT_STOP_NEVER 0 @@ -257,15 +286,20 @@ struct amdgpu_vm_update_params { unsigned int num_dw_left; /** - * @table_freed: return true if page table is freed when updating + * @needs_flush: true whenever we need to invalidate the TLB */ - bool table_freed; + bool needs_flush; /** * @allow_override: true for memory that is not uncached: allows MTYPE * to be overridden for NUMA local memory. */ bool allow_override; + + /** + * @tlb_flush_waitlist: temporary storage for BOs until tlb_flush + */ + struct list_head tlb_flush_waitlist; }; struct amdgpu_vm_update_funcs { @@ -342,6 +376,7 @@ struct amdgpu_vm { atomic64_t tlb_seq; struct dma_fence *last_tlb_flush; atomic64_t kfd_last_flushed_seq; + uint64_t tlb_fence_context; /* How many times we had to re-generate the page tables */ uint64_t generation; @@ -422,6 +457,8 @@ struct amdgpu_vm_manager { * look up VM of a page fault */ struct xarray pasids; + /* Global registration of recent page fault information */ + struct amdgpu_vm_fault_info fault_info; }; struct amdgpu_bo_va_mapping; @@ -544,6 +581,8 @@ int amdgpu_vm_ptes_update(struct amdgpu_vm_update_params *params, uint64_t start, uint64_t end, uint64_t dst, uint64_t flags); void amdgpu_vm_pt_free_work(struct work_struct *work); +void amdgpu_vm_pt_free_list(struct amdgpu_device *adev, + struct amdgpu_vm_update_params *params); #if defined(CONFIG_DEBUG_FS) void amdgpu_debugfs_vm_bo_info(struct amdgpu_vm *vm, struct seq_file *m); @@ -551,6 +590,8 @@ void amdgpu_debugfs_vm_bo_info(struct amdgpu_vm *vm, struct seq_file *m); int amdgpu_vm_pt_map_tables(struct amdgpu_device *adev, struct amdgpu_vm *vm); +bool amdgpu_vm_is_bo_always_valid(struct amdgpu_vm *vm, struct amdgpu_bo *bo); + /** * amdgpu_vm_tlb_seq - return tlb flush sequence number * @vm: the amdgpu_vm structure to query @@ -609,5 +650,8 @@ void amdgpu_vm_update_fault_cache(struct amdgpu_device *adev, uint64_t addr, uint32_t status, unsigned int vmhub); +void amdgpu_vm_tlb_fence_create(struct amdgpu_device *adev, + struct amdgpu_vm *vm, + struct dma_fence **fence); #endif diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_vm_cpu.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_vm_cpu.c index 6e31621452de..3895bd7d176a 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_vm_cpu.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_vm_cpu.c @@ -108,7 +108,9 @@ static int amdgpu_vm_cpu_update(struct amdgpu_vm_update_params *p, static int amdgpu_vm_cpu_commit(struct amdgpu_vm_update_params *p, struct dma_fence **fence) { - /* Flush HDP */ + if (p->needs_flush) + atomic64_inc(&p->vm->tlb_seq); + mb(); amdgpu_device_flush_hdp(p->adev, NULL); return 0; diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_vm_pt.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_vm_pt.c index 124389a6bf48..e39d6e7643bf 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_vm_pt.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_vm_pt.c @@ -413,7 +413,7 @@ int amdgpu_vm_pt_clear(struct amdgpu_device *adev, struct amdgpu_vm *vm, if (adev->asic_type >= CHIP_VEGA10) { if (level != AMDGPU_VM_PTB) { /* Handle leaf PDEs as PTEs */ - flags |= AMDGPU_PDE_PTE; + flags |= AMDGPU_PDE_PTE_FLAG(adev); amdgpu_gmc_get_vm_pde(adev, level, &value, &flags); } else { @@ -622,40 +622,58 @@ void amdgpu_vm_pt_free_work(struct work_struct *work) } /** - * amdgpu_vm_pt_free_dfs - free PD/PT levels + * amdgpu_vm_pt_free_list - free PD/PT levels * * @adev: amdgpu device structure - * @vm: amdgpu vm structure - * @start: optional cursor where to start freeing PDs/PTs - * @unlocked: vm resv unlock status + * @params: see amdgpu_vm_update_params definition * - * Free the page directory or page table level and all sub levels. + * Free the page directory objects saved in the flush list */ -static void amdgpu_vm_pt_free_dfs(struct amdgpu_device *adev, - struct amdgpu_vm *vm, - struct amdgpu_vm_pt_cursor *start, - bool unlocked) +void amdgpu_vm_pt_free_list(struct amdgpu_device *adev, + struct amdgpu_vm_update_params *params) { - struct amdgpu_vm_pt_cursor cursor; - struct amdgpu_vm_bo_base *entry; + struct amdgpu_vm_bo_base *entry, *next; + struct amdgpu_vm *vm = params->vm; + bool unlocked = params->unlocked; + + if (list_empty(¶ms->tlb_flush_waitlist)) + return; if (unlocked) { spin_lock(&vm->status_lock); - for_each_amdgpu_vm_pt_dfs_safe(adev, vm, start, cursor, entry) - list_move(&entry->vm_status, &vm->pt_freed); - - if (start) - list_move(&start->entry->vm_status, &vm->pt_freed); + list_splice_init(¶ms->tlb_flush_waitlist, &vm->pt_freed); spin_unlock(&vm->status_lock); schedule_work(&vm->pt_free_work); return; } - for_each_amdgpu_vm_pt_dfs_safe(adev, vm, start, cursor, entry) + list_for_each_entry_safe(entry, next, ¶ms->tlb_flush_waitlist, vm_status) amdgpu_vm_pt_free(entry); +} - if (start) - amdgpu_vm_pt_free(start->entry); +/** + * amdgpu_vm_pt_add_list - add PD/PT level to the flush list + * + * @params: parameters for the update + * @cursor: first PT entry to start DF search from, non NULL + * + * This list will be freed after TLB flush. + */ +static void amdgpu_vm_pt_add_list(struct amdgpu_vm_update_params *params, + struct amdgpu_vm_pt_cursor *cursor) +{ + struct amdgpu_vm_pt_cursor seek; + struct amdgpu_vm_bo_base *entry; + + spin_lock(¶ms->vm->status_lock); + for_each_amdgpu_vm_pt_dfs_safe(params->adev, params->vm, cursor, seek, entry) { + if (entry && entry->bo) + list_move(&entry->vm_status, ¶ms->tlb_flush_waitlist); + } + + /* enter start node now */ + list_move(&cursor->entry->vm_status, ¶ms->tlb_flush_waitlist); + spin_unlock(¶ms->vm->status_lock); } /** @@ -667,7 +685,13 @@ static void amdgpu_vm_pt_free_dfs(struct amdgpu_device *adev, */ void amdgpu_vm_pt_free_root(struct amdgpu_device *adev, struct amdgpu_vm *vm) { - amdgpu_vm_pt_free_dfs(adev, vm, NULL, false); + struct amdgpu_vm_pt_cursor cursor; + struct amdgpu_vm_bo_base *entry; + + for_each_amdgpu_vm_pt_dfs_safe(adev, vm, NULL, cursor, entry) { + if (entry) + amdgpu_vm_pt_free(entry); + } } /** @@ -682,11 +706,15 @@ int amdgpu_vm_pde_update(struct amdgpu_vm_update_params *params, struct amdgpu_vm_bo_base *entry) { struct amdgpu_vm_bo_base *parent = amdgpu_vm_pt_parent(entry); - struct amdgpu_bo *bo = parent->bo, *pbo; + struct amdgpu_bo *bo, *pbo; struct amdgpu_vm *vm = params->vm; uint64_t pde, pt, flags; unsigned int level; + if (WARN_ON(!parent)) + return -EINVAL; + + bo = parent->bo; for (level = 0, pbo = bo->parent; pbo; ++level) pbo = pbo->parent; @@ -733,12 +761,12 @@ static void amdgpu_vm_pte_update_flags(struct amdgpu_vm_update_params *params, struct amdgpu_device *adev = params->adev; if (level != AMDGPU_VM_PTB) { - flags |= AMDGPU_PDE_PTE; + flags |= AMDGPU_PDE_PTE_FLAG(params->adev); amdgpu_gmc_get_vm_pde(adev, level, &addr, &flags); } else if (adev->asic_type >= CHIP_VEGA10 && !(flags & AMDGPU_PTE_VALID) && - !(flags & AMDGPU_PTE_PRT)) { + !(flags & AMDGPU_PTE_PRT_FLAG(params->adev))) { /* Workaround for fault priority problem on GMC9 */ flags |= AMDGPU_PTE_EXECUTABLE; @@ -972,10 +1000,8 @@ int amdgpu_vm_ptes_update(struct amdgpu_vm_update_params *params, while (cursor.pfn < frag_start) { /* Make sure previous mapping is freed */ if (cursor.entry->bo) { - params->table_freed = true; - amdgpu_vm_pt_free_dfs(adev, params->vm, - &cursor, - params->unlocked); + params->needs_flush = true; + amdgpu_vm_pt_add_list(params, &cursor); } amdgpu_vm_pt_next(adev, &cursor); } diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_vm_sdma.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_vm_sdma.c index 349416e176a1..9b748d7058b5 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_vm_sdma.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_vm_sdma.c @@ -102,6 +102,11 @@ static int amdgpu_vm_sdma_prepare(struct amdgpu_vm_update_params *p, if (!r) r = amdgpu_sync_push_to_job(&sync, p->job); amdgpu_sync_free(&sync); + + if (r) { + p->num_dw_left = 0; + amdgpu_job_free(p->job); + } return r; } @@ -126,6 +131,10 @@ static int amdgpu_vm_sdma_commit(struct amdgpu_vm_update_params *p, WARN_ON(ib->length_dw == 0); amdgpu_ring_pad_ib(ring, ib); + + if (p->needs_flush) + atomic64_inc(&p->vm->tlb_seq); + WARN_ON(ib->length_dw > p->num_dw_left); f = amdgpu_job_submit(p->job); diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_vm_tlb_fence.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_vm_tlb_fence.c new file mode 100644 index 000000000000..51cddfa3f1e8 --- /dev/null +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_vm_tlb_fence.c @@ -0,0 +1,112 @@ +// SPDX-License-Identifier: GPL-2.0 OR MIT +/* + * Copyright 2023 Advanced Micro Devices, Inc. + * + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and associated documentation files (the "Software"), + * to deal in the Software without restriction, including without limitation + * the rights to use, copy, modify, merge, publish, distribute, sublicense, + * and/or sell copies of the Software, and to permit persons to whom the + * Software is furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included in + * all copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL + * THE COPYRIGHT HOLDER(S) OR AUTHOR(S) BE LIABLE FOR ANY CLAIM, DAMAGES OR + * OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, + * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR + * OTHER DEALINGS IN THE SOFTWARE. + */ + +#include <linux/dma-fence.h> +#include <linux/workqueue.h> + +#include "amdgpu.h" +#include "amdgpu_vm.h" +#include "amdgpu_gmc.h" + +struct amdgpu_tlb_fence { + struct dma_fence base; + struct amdgpu_device *adev; + struct dma_fence *dependency; + struct work_struct work; + spinlock_t lock; + uint16_t pasid; + +}; + +static const char *amdgpu_tlb_fence_get_driver_name(struct dma_fence *fence) +{ + return "amdgpu tlb fence"; +} + +static const char *amdgpu_tlb_fence_get_timeline_name(struct dma_fence *f) +{ + return "amdgpu tlb timeline"; +} + +static void amdgpu_tlb_fence_work(struct work_struct *work) +{ + struct amdgpu_tlb_fence *f = container_of(work, typeof(*f), work); + int r; + + if (f->dependency) { + dma_fence_wait(f->dependency, false); + dma_fence_put(f->dependency); + f->dependency = NULL; + } + + r = amdgpu_gmc_flush_gpu_tlb_pasid(f->adev, f->pasid, 2, true, 0); + if (r) { + dev_err(f->adev->dev, "TLB flush failed for PASID %d.\n", + f->pasid); + dma_fence_set_error(&f->base, r); + } + + dma_fence_signal(&f->base); + dma_fence_put(&f->base); +} + +static const struct dma_fence_ops amdgpu_tlb_fence_ops = { + .use_64bit_seqno = true, + .get_driver_name = amdgpu_tlb_fence_get_driver_name, + .get_timeline_name = amdgpu_tlb_fence_get_timeline_name +}; + +void amdgpu_vm_tlb_fence_create(struct amdgpu_device *adev, struct amdgpu_vm *vm, + struct dma_fence **fence) +{ + struct amdgpu_tlb_fence *f; + + f = kmalloc(sizeof(*f), GFP_KERNEL); + if (!f) { + /* + * We can't fail since the PDEs and PTEs are already updated, so + * just block for the dependency and execute the TLB flush + */ + if (*fence) + dma_fence_wait(*fence, false); + + amdgpu_gmc_flush_gpu_tlb_pasid(adev, vm->pasid, 2, true, 0); + *fence = dma_fence_get_stub(); + return; + } + + f->adev = adev; + f->dependency = *fence; + f->pasid = vm->pasid; + INIT_WORK(&f->work, amdgpu_tlb_fence_work); + spin_lock_init(&f->lock); + + dma_fence_init(&f->base, &amdgpu_tlb_fence_ops, &f->lock, + vm->tlb_fence_context, atomic64_read(&vm->tlb_seq)); + + /* TODO: We probably need a separate wq here */ + dma_fence_get(&f->base); + schedule_work(&f->work); + + *fence = &f->base; +} diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_vpe.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_vpe.c index 7a65a2b128ec..5acd20ff5979 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_vpe.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_vpe.c @@ -128,6 +128,7 @@ int amdgpu_vpe_configure_dpm(struct amdgpu_vpe *vpe) struct dpm_clock *VPEClks; struct dpm_clock *SOCClks; uint32_t idx; + uint32_t vpeclk_enalbled_num = 0; uint32_t pratio_vmax_vnorm = 0, pratio_vnorm_vmid = 0, pratio_vmid_vmin = 0; uint16_t pratio_vmin_freq = 0, pratio_vmid_freq = 0, pratio_vnorm_freq = 0, pratio_vmax_freq = 0; @@ -144,6 +145,14 @@ int amdgpu_vpe_configure_dpm(struct amdgpu_vpe *vpe) SOCClks = clock_table.SocClocks; VPEClks = clock_table.VPEClocks; + /* Comfirm enabled vpe clk num + * Enabled VPE clocks are ordered from low to high in VPEClks + * The highest valid clock index+1 is the number of VPEClks + */ + for (idx = PP_SMU_NUM_VPECLK_DPM_LEVELS; idx && !vpeclk_enalbled_num; idx--) + if (VPEClks[idx-1].Freq) + vpeclk_enalbled_num = idx; + /* vpe dpm only cares 4 levels. */ for (idx = 0; idx < VPE_MAX_DPM_LEVEL; idx++) { uint32_t soc_dpm_level; @@ -155,8 +164,8 @@ int amdgpu_vpe_configure_dpm(struct amdgpu_vpe *vpe) soc_dpm_level = (idx * 2) + 1; /* clamp the max level */ - if (soc_dpm_level > PP_SMU_NUM_VPECLK_DPM_LEVELS - 1) - soc_dpm_level = PP_SMU_NUM_VPECLK_DPM_LEVELS - 1; + if (soc_dpm_level > vpeclk_enalbled_num - 1) + soc_dpm_level = vpeclk_enalbled_num - 1; min_freq = (SOCClks[soc_dpm_level].Freq < VPEClks[soc_dpm_level].Freq) ? SOCClks[soc_dpm_level].Freq : VPEClks[soc_dpm_level].Freq; @@ -205,7 +214,7 @@ disable_dpm: dpm_ctl &= 0xfffffffe; /* Disable DPM */ WREG32(vpe_get_reg_offset(vpe, 0, vpe->regs.dpm_enable), dpm_ctl); dev_dbg(adev->dev, "%s: disable vpe dpm\n", __func__); - return 0; + return -EINVAL; } int amdgpu_vpe_psp_update_sram(struct amdgpu_device *adev) @@ -223,13 +232,11 @@ int amdgpu_vpe_init_microcode(struct amdgpu_vpe *vpe) { struct amdgpu_device *adev = vpe->ring.adev; const struct vpe_firmware_header_v1_0 *vpe_hdr; - char fw_prefix[32], fw_name[64]; + char fw_prefix[32]; int ret; amdgpu_ucode_ip_version_decode(adev, VPE_HWIP, fw_prefix, sizeof(fw_prefix)); - snprintf(fw_name, sizeof(fw_name), "amdgpu/%s.bin", fw_prefix); - - ret = amdgpu_ucode_request(adev, &adev->vpe.fw, fw_name); + ret = amdgpu_ucode_request(adev, &adev->vpe.fw, "amdgpu/%s.bin", fw_prefix); if (ret) goto out; @@ -295,6 +302,7 @@ static int vpe_early_init(void *handle) switch (amdgpu_ip_version(adev, VPE_HWIP, 0)) { case IP_VERSION(6, 1, 0): + case IP_VERSION(6, 1, 3): vpe_v6_1_set_funcs(vpe); break; case IP_VERSION(6, 1, 1): @@ -396,6 +404,12 @@ static int vpe_hw_init(void *handle) struct amdgpu_vpe *vpe = &adev->vpe; int ret; + /* Power on VPE */ + ret = amdgpu_device_ip_set_powergating_state(adev, AMD_IP_BLOCK_TYPE_VPE, + AMD_PG_STATE_UNGATE); + if (ret) + return ret; + ret = vpe_load_microcode(vpe); if (ret) return ret; diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_vram_mgr.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_vram_mgr.c index 8db880244324..f91cc149d06c 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_vram_mgr.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_vram_mgr.c @@ -31,6 +31,8 @@ #include "amdgpu_atomfirmware.h" #include "atom.h" +#define AMDGPU_MAX_SG_SEGMENT_SIZE (2UL << 30) + struct amdgpu_vram_reservation { u64 start; u64 size; @@ -450,6 +452,7 @@ static int amdgpu_vram_mgr_new(struct ttm_resource_manager *man, { struct amdgpu_vram_mgr *mgr = to_vram_mgr(man); struct amdgpu_device *adev = to_amdgpu_device(mgr); + struct amdgpu_bo *bo = ttm_to_amdgpu_bo(tbo); u64 vis_usage = 0, max_bytes, min_block_size; struct amdgpu_vram_mgr_resource *vres; u64 size, remaining_size, lpfn, fpfn; @@ -468,7 +471,7 @@ static int amdgpu_vram_mgr_new(struct ttm_resource_manager *man, if (tbo->type != ttm_bo_type_kernel) max_bytes -= AMDGPU_VM_RESERVED_VRAM; - if (place->flags & TTM_PL_FLAG_CONTIGUOUS) { + if (bo->flags & AMDGPU_GEM_CREATE_VRAM_CONTIGUOUS) { pages_per_block = ~0ul; } else { #ifdef CONFIG_TRANSPARENT_HUGEPAGE @@ -477,7 +480,7 @@ static int amdgpu_vram_mgr_new(struct ttm_resource_manager *man, /* default to 2MB */ pages_per_block = 2UL << (20UL - PAGE_SHIFT); #endif - pages_per_block = max_t(uint32_t, pages_per_block, + pages_per_block = max_t(u32, pages_per_block, tbo->page_alignment); } @@ -498,9 +501,12 @@ static int amdgpu_vram_mgr_new(struct ttm_resource_manager *man, if (place->flags & TTM_PL_FLAG_TOPDOWN) vres->flags |= DRM_BUDDY_TOPDOWN_ALLOCATION; - if (place->flags & TTM_PL_FLAG_CONTIGUOUS) + if (bo->flags & AMDGPU_GEM_CREATE_VRAM_CONTIGUOUS) vres->flags |= DRM_BUDDY_CONTIGUOUS_ALLOCATION; + if (bo->flags & AMDGPU_GEM_CREATE_VRAM_CLEARED) + vres->flags |= DRM_BUDDY_CLEAR_ALLOCATION; + if (fpfn || lpfn != mgr->mm.size) /* Allocate blocks in desired range */ vres->flags |= DRM_BUDDY_RANGE_ALLOCATION; @@ -514,21 +520,29 @@ static int amdgpu_vram_mgr_new(struct ttm_resource_manager *man, else min_block_size = mgr->default_page_size; - BUG_ON(min_block_size < mm->chunk_size); - - /* Limit maximum size to 2GiB due to SG table limitations */ - size = min(remaining_size, 2ULL << 30); - + size = remaining_size; if ((size >= (u64)pages_per_block << PAGE_SHIFT) && - !(size & (((u64)pages_per_block << PAGE_SHIFT) - 1))) + !(size & (((u64)pages_per_block << PAGE_SHIFT) - 1))) min_block_size = (u64)pages_per_block << PAGE_SHIFT; + BUG_ON(min_block_size < mm->chunk_size); + r = drm_buddy_alloc_blocks(mm, fpfn, lpfn, size, min_block_size, &vres->blocks, vres->flags); + + if (unlikely(r == -ENOSPC) && pages_per_block == ~0ul && + !(place->flags & TTM_PL_FLAG_CONTIGUOUS)) { + vres->flags &= ~DRM_BUDDY_CONTIGUOUS_ALLOCATION; + pages_per_block = max_t(u32, 2UL << (20UL - PAGE_SHIFT), + tbo->page_alignment); + + continue; + } + if (unlikely(r)) goto error_free_blocks; @@ -571,7 +585,7 @@ static int amdgpu_vram_mgr_new(struct ttm_resource_manager *man, return 0; error_free_blocks: - drm_buddy_free_list(mm, &vres->blocks); + drm_buddy_free_list(mm, &vres->blocks, 0); mutex_unlock(&mgr->lock); error_fini: ttm_resource_fini(man, &vres->base); @@ -604,7 +618,7 @@ static void amdgpu_vram_mgr_del(struct ttm_resource_manager *man, amdgpu_vram_mgr_do_reserve(man); - drm_buddy_free_list(mm, &vres->blocks); + drm_buddy_free_list(mm, &vres->blocks, vres->flags); mutex_unlock(&mgr->lock); atomic64_sub(vis_usage, &mgr->vis_usage); @@ -646,7 +660,7 @@ int amdgpu_vram_mgr_alloc_sgt(struct amdgpu_device *adev, amdgpu_res_first(res, offset, length, &cursor); while (cursor.remaining) { num_entries++; - amdgpu_res_next(&cursor, cursor.size); + amdgpu_res_next(&cursor, min(cursor.size, AMDGPU_MAX_SG_SEGMENT_SIZE)); } r = sg_alloc_table(*sgt, num_entries, GFP_KERNEL); @@ -666,7 +680,7 @@ int amdgpu_vram_mgr_alloc_sgt(struct amdgpu_device *adev, amdgpu_res_first(res, offset, length, &cursor); for_each_sgtable_sg((*sgt), sg, i) { phys_addr_t phys = cursor.start + adev->gmc.aper_base; - size_t size = cursor.size; + unsigned long size = min(cursor.size, AMDGPU_MAX_SG_SEGMENT_SIZE); dma_addr_t addr; addr = dma_map_resource(dev, phys, size, dir, @@ -679,7 +693,7 @@ int amdgpu_vram_mgr_alloc_sgt(struct amdgpu_device *adev, sg_dma_address(sg) = addr; sg_dma_len(sg) = size; - amdgpu_res_next(&cursor, cursor.size); + amdgpu_res_next(&cursor, size); } return 0; @@ -912,7 +926,7 @@ void amdgpu_vram_mgr_fini(struct amdgpu_device *adev) kfree(rsv); list_for_each_entry_safe(rsv, temp, &mgr->reserved_pages, blocks) { - drm_buddy_free_list(&mgr->mm, &rsv->allocated); + drm_buddy_free_list(&mgr->mm, &rsv->allocated, 0); kfree(rsv); } if (!adev->gmc.is_app_apu) diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_vram_mgr.h b/drivers/gpu/drm/amd/amdgpu/amdgpu_vram_mgr.h index 0e04e42cf809..b256cbc2bc27 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_vram_mgr.h +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_vram_mgr.h @@ -53,10 +53,20 @@ static inline u64 amdgpu_vram_mgr_block_size(struct drm_buddy_block *block) return (u64)PAGE_SIZE << drm_buddy_block_order(block); } +static inline bool amdgpu_vram_mgr_is_cleared(struct drm_buddy_block *block) +{ + return drm_buddy_block_is_clear(block); +} + static inline struct amdgpu_vram_mgr_resource * to_amdgpu_vram_mgr_resource(struct ttm_resource *res) { return container_of(res, struct amdgpu_vram_mgr_resource, base); } +static inline void amdgpu_vram_mgr_set_cleared(struct ttm_resource *res) +{ + to_amdgpu_vram_mgr_resource(res)->flags |= DRM_BUDDY_CLEARED; +} + #endif diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_xcp.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_xcp.c index 2b99eed5ba19..a6d456ec6aeb 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_xcp.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_xcp.c @@ -219,7 +219,8 @@ int amdgpu_xcp_query_partition_mode(struct amdgpu_xcp_mgr *xcp_mgr, u32 flags) { int mode; - if (xcp_mgr->mode == AMDGPU_XCP_MODE_NONE) + if (!amdgpu_sriov_vf(xcp_mgr->adev) && + xcp_mgr->mode == AMDGPU_XCP_MODE_NONE) return xcp_mgr->mode; if (!xcp_mgr->funcs || !xcp_mgr->funcs->query_partition_mode) @@ -228,6 +229,12 @@ int amdgpu_xcp_query_partition_mode(struct amdgpu_xcp_mgr *xcp_mgr, u32 flags) if (!(flags & AMDGPU_XCP_FL_LOCKED)) mutex_lock(&xcp_mgr->xcp_lock); mode = xcp_mgr->funcs->query_partition_mode(xcp_mgr); + + /* First time query for VF, set the mode here */ + if (amdgpu_sriov_vf(xcp_mgr->adev) && + xcp_mgr->mode == AMDGPU_XCP_MODE_NONE) + xcp_mgr->mode = mode; + if (xcp_mgr->mode != AMDGPU_XCP_MODE_TRANS && mode != xcp_mgr->mode) dev_WARN( xcp_mgr->adev->dev, @@ -282,8 +289,7 @@ int amdgpu_xcp_mgr_init(struct amdgpu_device *adev, int init_mode, { struct amdgpu_xcp_mgr *xcp_mgr; - if (!xcp_funcs || !xcp_funcs->switch_partition_mode || - !xcp_funcs->get_ip_details) + if (!xcp_funcs || !xcp_funcs->get_ip_details) return -EINVAL; xcp_mgr = kzalloc(sizeof(*xcp_mgr), GFP_KERNEL); diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_xgmi.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_xgmi.c index 20d51f6c9bb8..821ba2309dec 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_xgmi.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_xgmi.c @@ -434,6 +434,9 @@ static ssize_t amdgpu_xgmi_show_connected_port_num(struct device *dev, } } + if (i == top->num_nodes) + return -EINVAL; + for (i = 0; i < top->num_nodes; i++) { for (j = 0; j < top->nodes[i].num_links; j++) /* node id in sysfs starts from 1 rather than 0 so +1 here */ @@ -1035,15 +1038,16 @@ int amdgpu_xgmi_remove_device(struct amdgpu_device *adev) return 0; } -static int xgmi_v6_4_0_aca_bank_generate_report(struct aca_handle *handle, struct aca_bank *bank, enum aca_error_type type, - struct aca_bank_report *report, void *data) +static int xgmi_v6_4_0_aca_bank_parser(struct aca_handle *handle, struct aca_bank *bank, + enum aca_smu_type type, void *data) { struct amdgpu_device *adev = handle->adev; + struct aca_bank_info info; const char *error_str; - u64 status; + u64 status, count; int ret, ext_error_code; - ret = aca_bank_info_decode(bank, &report->info); + ret = aca_bank_info_decode(bank, &info); if (ret) return ret; @@ -1055,15 +1059,28 @@ static int xgmi_v6_4_0_aca_bank_generate_report(struct aca_handle *handle, struc if (error_str) dev_info(adev->dev, "%s detected\n", error_str); - if ((type == ACA_ERROR_TYPE_UE && ext_error_code == 0) || - (type == ACA_ERROR_TYPE_CE && ext_error_code == 6)) - report->count[type] = ACA_REG__MISC0__ERRCNT(bank->regs[ACA_REG_IDX_MISC0]); + count = ACA_REG__MISC0__ERRCNT(bank->regs[ACA_REG_IDX_MISC0]); - return 0; + switch (type) { + case ACA_SMU_TYPE_UE: + if (ext_error_code != 0 && ext_error_code != 9) + count = 0ULL; + + ret = aca_error_cache_log_bank_error(handle, &info, ACA_ERROR_TYPE_UE, count); + break; + case ACA_SMU_TYPE_CE: + count = ext_error_code == 6 ? count : 0ULL; + ret = aca_error_cache_log_bank_error(handle, &info, ACA_ERROR_TYPE_CE, count); + break; + default: + return -EINVAL; + } + + return ret; } static const struct aca_bank_ops xgmi_v6_4_0_aca_bank_ops = { - .aca_bank_generate_report = xgmi_v6_4_0_aca_bank_generate_report, + .aca_bank_parser = xgmi_v6_4_0_aca_bank_parser, }; static const struct aca_info xgmi_v6_4_0_aca_info = { @@ -1429,7 +1446,7 @@ static int amdgpu_ras_error_inject_xgmi(struct amdgpu_device *adev, if (amdgpu_dpm_set_df_cstate(adev, DF_CSTATE_DISALLOW)) dev_warn(adev->dev, "Failed to disallow df cstate"); - ret1 = amdgpu_dpm_set_xgmi_plpd_mode(adev, XGMI_PLPD_DISALLOW); + ret1 = amdgpu_dpm_set_pm_policy(adev, PP_PM_POLICY_XGMI_PLPD, XGMI_PLPD_DISALLOW); if (ret1 && ret1 != -EOPNOTSUPP) dev_warn(adev->dev, "Failed to disallow XGMI power down"); @@ -1438,7 +1455,7 @@ static int amdgpu_ras_error_inject_xgmi(struct amdgpu_device *adev, if (amdgpu_ras_intr_triggered()) return ret2; - ret1 = amdgpu_dpm_set_xgmi_plpd_mode(adev, XGMI_PLPD_DEFAULT); + ret1 = amdgpu_dpm_set_pm_policy(adev, PP_PM_POLICY_XGMI_PLPD, XGMI_PLPD_DEFAULT); if (ret1 && ret1 != -EOPNOTSUPP) dev_warn(adev->dev, "Failed to allow XGMI power down"); diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_xgmi.h b/drivers/gpu/drm/amd/amdgpu/amdgpu_xgmi.h index 1592c63b3099..a3bfc16de6d4 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_xgmi.h +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_xgmi.h @@ -44,6 +44,7 @@ struct amdgpu_hive_info { struct amdgpu_reset_domain *reset_domain; atomic_t ras_recovery; + struct ras_event_manager event_mgr; }; struct amdgpu_pcs_ras_field { diff --git a/drivers/gpu/drm/amd/amdgpu/amdgv_sriovmsg.h b/drivers/gpu/drm/amd/amdgpu/amdgv_sriovmsg.h index 51a14f6d93bd..fb2b394bb9c5 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgv_sriovmsg.h +++ b/drivers/gpu/drm/amd/amdgpu/amdgv_sriovmsg.h @@ -94,7 +94,8 @@ union amd_sriov_msg_feature_flags { uint32_t reg_indirect_acc : 1; uint32_t av1_support : 1; uint32_t vcn_rb_decouple : 1; - uint32_t reserved : 24; + uint32_t mes_info_enable : 1; + uint32_t reserved : 23; } flags; uint32_t all; }; @@ -157,7 +158,7 @@ struct amd_sriov_msg_pf2vf_info_header { uint32_t reserved[2]; }; -#define AMD_SRIOV_MSG_PF2VF_INFO_FILLED_SIZE (48) +#define AMD_SRIOV_MSG_PF2VF_INFO_FILLED_SIZE (49) struct amd_sriov_msg_pf2vf_info { /* header contains size and version */ struct amd_sriov_msg_pf2vf_info_header header; @@ -208,6 +209,8 @@ struct amd_sriov_msg_pf2vf_info { struct amd_sriov_msg_uuid_info uuid_info; /* PCIE atomic ops support flag */ uint32_t pcie_atomic_ops_support_flags; + /* Portion of GPU memory occupied by VF. MAX value is 65535, but set to uint32_t to maintain alignment with reserved size */ + uint32_t gpu_capacity; /* reserved */ uint32_t reserved[256 - AMD_SRIOV_MSG_PF2VF_INFO_FILLED_SIZE]; }; @@ -221,7 +224,7 @@ struct amd_sriov_msg_vf2pf_info_header { uint32_t reserved[2]; }; -#define AMD_SRIOV_MSG_VF2PF_INFO_FILLED_SIZE (70) +#define AMD_SRIOV_MSG_VF2PF_INFO_FILLED_SIZE (73) struct amd_sriov_msg_vf2pf_info { /* header contains size and version */ struct amd_sriov_msg_vf2pf_info_header header; @@ -265,7 +268,9 @@ struct amd_sriov_msg_vf2pf_info { uint32_t version; } ucode_info[AMD_SRIOV_MSG_RESERVE_UCODE]; uint64_t dummy_page_addr; - + /* FB allocated for guest MES to record UQ info */ + uint64_t mes_info_addr; + uint32_t mes_info_size; /* reserved */ uint32_t reserved[256 - AMD_SRIOV_MSG_VF2PF_INFO_FILLED_SIZE]; }; diff --git a/drivers/gpu/drm/amd/amdgpu/aqua_vanjaram.c b/drivers/gpu/drm/amd/amdgpu/aqua_vanjaram.c index d6f808acfb17..228fd4dd32f1 100644 --- a/drivers/gpu/drm/amd/amdgpu/aqua_vanjaram.c +++ b/drivers/gpu/drm/amd/amdgpu/aqua_vanjaram.c @@ -62,6 +62,11 @@ void aqua_vanjaram_doorbell_index_init(struct amdgpu_device *adev) adev->doorbell_index.max_assignment = AMDGPU_DOORBELL_LAYOUT1_MAX_ASSIGNMENT << 1; } +static bool aqua_vanjaram_xcp_vcn_shared(struct amdgpu_device *adev) +{ + return (adev->xcp_mgr->num_xcps > adev->vcn.num_vcn_inst); +} + static void aqua_vanjaram_set_xcp_id(struct amdgpu_device *adev, uint32_t inst_idx, struct amdgpu_ring *ring) { @@ -87,7 +92,7 @@ static void aqua_vanjaram_set_xcp_id(struct amdgpu_device *adev, case AMDGPU_RING_TYPE_VCN_ENC: case AMDGPU_RING_TYPE_VCN_JPEG: ip_blk = AMDGPU_XCP_VCN; - if (adev->xcp_mgr->mode == AMDGPU_CPX_PARTITION_MODE) + if (aqua_vanjaram_xcp_vcn_shared(adev)) inst_mask = 1 << (inst_idx * 2); break; default: @@ -140,10 +145,12 @@ static int aqua_vanjaram_xcp_sched_list_update( aqua_vanjaram_xcp_gpu_sched_update(adev, ring, ring->xcp_id); - /* VCN is shared by two partitions under CPX MODE */ + /* VCN may be shared by two partitions under CPX MODE in certain + * configs. + */ if ((ring->funcs->type == AMDGPU_RING_TYPE_VCN_ENC || - ring->funcs->type == AMDGPU_RING_TYPE_VCN_JPEG) && - adev->xcp_mgr->mode == AMDGPU_CPX_PARTITION_MODE) + ring->funcs->type == AMDGPU_RING_TYPE_VCN_JPEG) && + aqua_vanjaram_xcp_vcn_shared(adev)) aqua_vanjaram_xcp_gpu_sched_update(adev, ring, ring->xcp_id + 1); } @@ -297,13 +304,56 @@ u64 aqua_vanjaram_encode_ext_smn_addressing(int ext_id) return ext_offset; } +static enum amdgpu_gfx_partition +__aqua_vanjaram_calc_xcp_mode(struct amdgpu_xcp_mgr *xcp_mgr) +{ + struct amdgpu_device *adev = xcp_mgr->adev; + int num_xcc, num_xcc_per_xcp = 0, mode = 0; + + num_xcc = NUM_XCC(xcp_mgr->adev->gfx.xcc_mask); + if (adev->gfx.funcs->get_xccs_per_xcp) + num_xcc_per_xcp = adev->gfx.funcs->get_xccs_per_xcp(adev); + if ((num_xcc_per_xcp) && (num_xcc % num_xcc_per_xcp == 0)) + mode = num_xcc / num_xcc_per_xcp; + + if (num_xcc_per_xcp == 1) + return AMDGPU_CPX_PARTITION_MODE; + + switch (mode) { + case 1: + return AMDGPU_SPX_PARTITION_MODE; + case 2: + return AMDGPU_DPX_PARTITION_MODE; + case 3: + return AMDGPU_TPX_PARTITION_MODE; + case 4: + return AMDGPU_QPX_PARTITION_MODE; + default: + return AMDGPU_UNKNOWN_COMPUTE_PARTITION_MODE; + } + + return AMDGPU_UNKNOWN_COMPUTE_PARTITION_MODE; +} + static int aqua_vanjaram_query_partition_mode(struct amdgpu_xcp_mgr *xcp_mgr) { - enum amdgpu_gfx_partition mode = AMDGPU_UNKNOWN_COMPUTE_PARTITION_MODE; + enum amdgpu_gfx_partition derv_mode, + mode = AMDGPU_UNKNOWN_COMPUTE_PARTITION_MODE; struct amdgpu_device *adev = xcp_mgr->adev; - if (adev->nbio.funcs->get_compute_partition_mode) + derv_mode = __aqua_vanjaram_calc_xcp_mode(xcp_mgr); + + if (amdgpu_sriov_vf(adev)) + return derv_mode; + + if (adev->nbio.funcs->get_compute_partition_mode) { mode = adev->nbio.funcs->get_compute_partition_mode(adev); + if (mode != derv_mode) + dev_warn( + adev->dev, + "Mismatch in compute partition mode - reported : %d derived : %d", + mode, derv_mode); + } return mode; } @@ -415,7 +465,7 @@ __aqua_vanjaram_get_auto_mode(struct amdgpu_xcp_mgr *xcp_mgr) if (adev->gmc.num_mem_partitions == num_xcc / 2) return (adev->flags & AMD_IS_APU) ? AMDGPU_TPX_PARTITION_MODE : - AMDGPU_QPX_PARTITION_MODE; + AMDGPU_CPX_PARTITION_MODE; if (adev->gmc.num_mem_partitions == 2 && !(adev->flags & AMD_IS_APU)) return AMDGPU_DPX_PARTITION_MODE; @@ -494,6 +544,12 @@ static int aqua_vanjaram_switch_partition_mode(struct amdgpu_xcp_mgr *xcp_mgr, if (mode == AMDGPU_AUTO_COMPUTE_PARTITION_MODE) { mode = __aqua_vanjaram_get_auto_mode(xcp_mgr); + if (mode == AMDGPU_UNKNOWN_COMPUTE_PARTITION_MODE) { + dev_err(adev->dev, + "Invalid config, no compatible compute partition mode found, available memory partitions: %d", + adev->gmc.num_mem_partitions); + return -EINVAL; + } } else if (!__aqua_vanjaram_is_valid_mode(xcp_mgr, mode)) { dev_err(adev->dev, "Invalid compute partition mode requested, requested: %s, available memory partitions: %d", @@ -611,6 +667,9 @@ static int aqua_vanjaram_xcp_mgr_init(struct amdgpu_device *adev) { int ret; + if (amdgpu_sriov_vf(adev)) + aqua_vanjaram_xcp_funcs.switch_partition_mode = NULL; + ret = amdgpu_xcp_mgr_init(adev, AMDGPU_UNKNOWN_COMPUTE_PARTITION_MODE, 1, &aqua_vanjaram_xcp_funcs); if (ret) @@ -623,7 +682,7 @@ static int aqua_vanjaram_xcp_mgr_init(struct amdgpu_device *adev) int aqua_vanjaram_init_soc_config(struct amdgpu_device *adev) { - u32 mask, inst_mask = adev->sdma.sdma_mask; + u32 mask, avail_inst, inst_mask = adev->sdma.sdma_mask; int ret, i; /* generally 1 AID supports 4 instances */ @@ -635,7 +694,9 @@ int aqua_vanjaram_init_soc_config(struct amdgpu_device *adev) for (mask = (1 << adev->sdma.num_inst_per_aid) - 1; inst_mask; inst_mask >>= adev->sdma.num_inst_per_aid, ++i) { - if ((inst_mask & mask) == mask) + avail_inst = inst_mask & mask; + if (avail_inst == mask || avail_inst == 0x3 || + avail_inst == 0xc) adev->aid_mask |= (1 << i); } diff --git a/drivers/gpu/drm/amd/amdgpu/atom.c b/drivers/gpu/drm/amd/amdgpu/atom.c index 72362df352f6..09715b506468 100644 --- a/drivers/gpu/drm/amd/amdgpu/atom.c +++ b/drivers/gpu/drm/amd/amdgpu/atom.c @@ -301,7 +301,7 @@ static uint32_t atom_get_src_int(atom_exec_context *ctx, uint8_t attr, (*ptr) += 4; if (print) DEBUG("IMM 0x%08X\n", val); - return val; + break; case ATOM_SRC_WORD0: case ATOM_SRC_WORD8: case ATOM_SRC_WORD16: @@ -309,7 +309,7 @@ static uint32_t atom_get_src_int(atom_exec_context *ctx, uint8_t attr, (*ptr) += 2; if (print) DEBUG("IMM 0x%04X\n", val); - return val; + break; case ATOM_SRC_BYTE0: case ATOM_SRC_BYTE8: case ATOM_SRC_BYTE16: @@ -318,9 +318,9 @@ static uint32_t atom_get_src_int(atom_exec_context *ctx, uint8_t attr, (*ptr)++; if (print) DEBUG("IMM 0x%02X\n", val); - return val; + break; } - break; + return val; case ATOM_ARG_PLL: idx = U8(*ptr); (*ptr)++; @@ -1243,6 +1243,7 @@ static int amdgpu_atom_execute_table_locked(struct atom_context *ctx, int index, ectx.ps_size = params_size; ectx.abort = false; ectx.last_jump = 0; + ectx.last_jump_jiffies = 0; if (ws) { ectx.ws = kcalloc(4, ws, GFP_KERNEL); ectx.ws_size = ws; diff --git a/drivers/gpu/drm/amd/amdgpu/cik.c b/drivers/gpu/drm/amd/amdgpu/cik.c index a3a643254d7a..cf1d5d462b67 100644 --- a/drivers/gpu/drm/amd/amdgpu/cik.c +++ b/drivers/gpu/drm/amd/amdgpu/cik.c @@ -1375,14 +1375,14 @@ static int cik_asic_pci_config_reset(struct amdgpu_device *adev) return r; } -static bool cik_asic_supports_baco(struct amdgpu_device *adev) +static int cik_asic_supports_baco(struct amdgpu_device *adev) { switch (adev->asic_type) { case CHIP_BONAIRE: case CHIP_HAWAII: return amdgpu_dpm_is_baco_supported(adev); default: - return false; + return 0; } } @@ -2210,6 +2210,8 @@ static const struct amd_ip_funcs cik_common_ip_funcs = { .soft_reset = cik_common_soft_reset, .set_clockgating_state = cik_common_set_clockgating_state, .set_powergating_state = cik_common_set_powergating_state, + .dump_ip_state = NULL, + .print_ip_state = NULL, }; static const struct amdgpu_ip_block_version cik_common_ip_block = diff --git a/drivers/gpu/drm/amd/amdgpu/cik_ih.c b/drivers/gpu/drm/amd/amdgpu/cik_ih.c index f24e34dc33d1..576baa9dbb0e 100644 --- a/drivers/gpu/drm/amd/amdgpu/cik_ih.c +++ b/drivers/gpu/drm/amd/amdgpu/cik_ih.c @@ -435,6 +435,8 @@ static const struct amd_ip_funcs cik_ih_ip_funcs = { .soft_reset = cik_ih_soft_reset, .set_clockgating_state = cik_ih_set_clockgating_state, .set_powergating_state = cik_ih_set_powergating_state, + .dump_ip_state = NULL, + .print_ip_state = NULL, }; static const struct amdgpu_ih_funcs cik_ih_funcs = { diff --git a/drivers/gpu/drm/amd/amdgpu/cik_sdma.c b/drivers/gpu/drm/amd/amdgpu/cik_sdma.c index a3fccc4c1f43..952737de9411 100644 --- a/drivers/gpu/drm/amd/amdgpu/cik_sdma.c +++ b/drivers/gpu/drm/amd/amdgpu/cik_sdma.c @@ -107,7 +107,6 @@ static void cik_sdma_free_microcode(struct amdgpu_device *adev) static int cik_sdma_init_microcode(struct amdgpu_device *adev) { const char *chip_name; - char fw_name[30]; int err = 0, i; DRM_DEBUG("\n"); @@ -133,16 +132,18 @@ static int cik_sdma_init_microcode(struct amdgpu_device *adev) for (i = 0; i < adev->sdma.num_instances; i++) { if (i == 0) - snprintf(fw_name, sizeof(fw_name), "amdgpu/%s_sdma.bin", chip_name); + err = amdgpu_ucode_request(adev, &adev->sdma.instance[i].fw, + "amdgpu/%s_sdma.bin", chip_name); else - snprintf(fw_name, sizeof(fw_name), "amdgpu/%s_sdma1.bin", chip_name); - err = amdgpu_ucode_request(adev, &adev->sdma.instance[i].fw, fw_name); + err = amdgpu_ucode_request(adev, &adev->sdma.instance[i].fw, + "amdgpu/%s_sdma1.bin", chip_name); if (err) goto out; } out: if (err) { - pr_err("cik_sdma: Failed to load firmware \"%s\"\n", fw_name); + pr_err("cik_sdma: Failed to load firmware \"%s_sdma%s.bin\"\n", + chip_name, i == 0 ? "" : "1"); for (i = 0; i < adev->sdma.num_instances; i++) amdgpu_ucode_release(&adev->sdma.instance[i].fw); } @@ -1228,6 +1229,8 @@ static const struct amd_ip_funcs cik_sdma_ip_funcs = { .soft_reset = cik_sdma_soft_reset, .set_clockgating_state = cik_sdma_set_clockgating_state, .set_powergating_state = cik_sdma_set_powergating_state, + .dump_ip_state = NULL, + .print_ip_state = NULL, }; static const struct amdgpu_ring_funcs cik_sdma_ring_funcs = { @@ -1290,7 +1293,7 @@ static void cik_sdma_set_irq_funcs(struct amdgpu_device *adev) * @src_offset: src GPU address * @dst_offset: dst GPU address * @byte_count: number of bytes to xfer - * @tmz: is this a secure operation + * @copy_flags: unused * * Copy GPU buffers using the DMA engine (CIK). * Used by the amdgpu ttm implementation to move pages if @@ -1300,7 +1303,7 @@ static void cik_sdma_emit_copy_buffer(struct amdgpu_ib *ib, uint64_t src_offset, uint64_t dst_offset, uint32_t byte_count, - bool tmz) + uint32_t copy_flags) { ib->ptr[ib->length_dw++] = SDMA_PACKET(SDMA_OPCODE_COPY, SDMA_COPY_SUB_OPCODE_LINEAR, 0); ib->ptr[ib->length_dw++] = byte_count; diff --git a/drivers/gpu/drm/amd/amdgpu/clearstate_gfx12.h b/drivers/gpu/drm/amd/amdgpu/clearstate_gfx12.h new file mode 100644 index 000000000000..2f6c9d11d5ae --- /dev/null +++ b/drivers/gpu/drm/amd/amdgpu/clearstate_gfx12.h @@ -0,0 +1,121 @@ +/* + * Copyright 2023 Advanced Micro Devices, Inc. + * + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and associated documentation files (the "Software"), + * to deal in the Software without restriction, including without limitation + * the rights to use, copy, modify, merge, publish, distribute, sublicense, + * and/or sell copies of the Software, and to permit persons to whom the + * Software is furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included in + * all copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL + * THE COPYRIGHT HOLDER(S) OR AUTHOR(S) BE LIABLE FOR ANY CLAIM, DAMAGES OR + * OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, + * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR + * OTHER DEALINGS IN THE SOFTWARE. + * + */ +#ifndef __CLEARSTATE_GFX12_H_ +#define __CLEARSTATE_GFX12_H_ + +static const unsigned int gfx12_SECT_CONTEXT_def_1[] = { +0x00000000, //mmSC_MEM_TEMPORAL +0x00000000, //mmSC_MEM_SPEC_READ +0x00000000, //mmPA_SC_VPORT_0_TL +0x00000000, //mmPA_SC_VPORT_0_BR +0x00000000, //mmPA_SC_VPORT_1_TL +0x00000000, //mmPA_SC_VPORT_1_BR +0x00000000, //mmPA_SC_VPORT_2_TL +0x00000000, //mmPA_SC_VPORT_2_BR +0x00000000, //mmPA_SC_VPORT_3_TL +0x00000000, //mmPA_SC_VPORT_3_BR +0x00000000, //mmPA_SC_VPORT_4_TL +0x00000000, //mmPA_SC_VPORT_4_BR +0x00000000, //mmPA_SC_VPORT_5_TL +0x00000000, //mmPA_SC_VPORT_5_BR +0x00000000, //mmPA_SC_VPORT_6_TL +0x00000000, //mmPA_SC_VPORT_6_BR +0x00000000, //mmPA_SC_VPORT_7_TL +0x00000000, //mmPA_SC_VPORT_7_BR +0x00000000, //mmPA_SC_VPORT_8_TL +0x00000000, //mmPA_SC_VPORT_8_BR +0x00000000, //mmPA_SC_VPORT_9_TL +0x00000000, //mmPA_SC_VPORT_9_BR +0x00000000, //mmPA_SC_VPORT_10_TL +0x00000000, //mmPA_SC_VPORT_10_BR +0x00000000, //mmPA_SC_VPORT_11_TL +0x00000000, //mmPA_SC_VPORT_11_BR +0x00000000, //mmPA_SC_VPORT_12_TL +0x00000000, //mmPA_SC_VPORT_12_BR +0x00000000, //mmPA_SC_VPORT_13_TL +0x00000000, //mmPA_SC_VPORT_13_BR +0x00000000, //mmPA_SC_VPORT_14_TL +0x00000000, //mmPA_SC_VPORT_14_BR +0x00000000, //mmPA_SC_VPORT_15_TL +0x00000000, //mmPA_SC_VPORT_15_BR +}; + +static const unsigned int gfx12_SECT_CONTEXT_def_2[] = { +0x00000000, //mmPA_CL_PROG_NEAR_CLIP_Z +0x00000000, //mmPA_RATE_CNTL +}; + +static const unsigned int gfx12_SECT_CONTEXT_def_3[] = { +0x00000000, //mmCP_PERFMON_CNTX_CNTL +}; + +static const unsigned int gfx12_SECT_CONTEXT_def_4[] = { +0x00000000, //mmCONTEXT_RESERVED_REG0 +0x00000000, //mmCONTEXT_RESERVED_REG1 +0x00000000, //mmPA_SC_CLIPRECT_0_EXT +0x00000000, //mmPA_SC_CLIPRECT_1_EXT +0x00000000, //mmPA_SC_CLIPRECT_2_EXT +0x00000000, //mmPA_SC_CLIPRECT_3_EXT +}; + +static const unsigned int gfx12_SECT_CONTEXT_def_5[] = { +0x00000000, //mmPA_SC_HIZ_INFO +0x00000000, //mmPA_SC_HIS_INFO +0x00000000, //mmPA_SC_HIZ_BASE +0x00000000, //mmPA_SC_HIZ_BASE_EXT +0x00000000, //mmPA_SC_HIZ_SIZE_XY +0x00000000, //mmPA_SC_HIS_BASE +0x00000000, //mmPA_SC_HIS_BASE_EXT +0x00000000, //mmPA_SC_HIS_SIZE_XY +0x00000000, //mmPA_SC_BINNER_OUTPUT_TIMEOUT_CNTL +0x00000000, //mmPA_SC_BINNER_DYNAMIC_BATCH_LIMIT +0x00000000, //mmPA_SC_HISZ_CONTROL +}; + +static const unsigned int gfx12_SECT_CONTEXT_def_6[] = { +0x00000000, //mmCB_MEM0_INFO +0x00000000, //mmCB_MEM1_INFO +0x00000000, //mmCB_MEM2_INFO +0x00000000, //mmCB_MEM3_INFO +0x00000000, //mmCB_MEM4_INFO +0x00000000, //mmCB_MEM5_INFO +0x00000000, //mmCB_MEM6_INFO +0x00000000, //mmCB_MEM7_INFO +}; + +static const struct cs_extent_def gfx12_SECT_CONTEXT_defs[] = { + {gfx12_SECT_CONTEXT_def_1, 0x0000a03e, 34 }, + {gfx12_SECT_CONTEXT_def_2, 0x0000a0cc, 2 }, + {gfx12_SECT_CONTEXT_def_3, 0x0000a0d8, 1 }, + {gfx12_SECT_CONTEXT_def_4, 0x0000a0db, 6 }, + {gfx12_SECT_CONTEXT_def_5, 0x0000a2e5, 11 }, + {gfx12_SECT_CONTEXT_def_6, 0x0000a3c0, 8 }, + { 0, 0, 0 } +}; + +static const struct cs_section_def gfx12_cs_data[] = { + { gfx12_SECT_CONTEXT_defs, SECT_CONTEXT }, + { 0, SECT_NONE } +}; + +#endif /* __CLEARSTATE_GFX12_H_ */ diff --git a/drivers/gpu/drm/amd/amdgpu/cz_ih.c b/drivers/gpu/drm/amd/amdgpu/cz_ih.c index c19681492efa..072643787384 100644 --- a/drivers/gpu/drm/amd/amdgpu/cz_ih.c +++ b/drivers/gpu/drm/amd/amdgpu/cz_ih.c @@ -433,6 +433,8 @@ static const struct amd_ip_funcs cz_ih_ip_funcs = { .soft_reset = cz_ih_soft_reset, .set_clockgating_state = cz_ih_set_clockgating_state, .set_powergating_state = cz_ih_set_powergating_state, + .dump_ip_state = NULL, + .print_ip_state = NULL, }; static const struct amdgpu_ih_funcs cz_ih_funcs = { diff --git a/drivers/gpu/drm/amd/amdgpu/dce_v10_0.c b/drivers/gpu/drm/amd/amdgpu/dce_v10_0.c index 221af054d874..dddb5fe16f2c 100644 --- a/drivers/gpu/drm/amd/amdgpu/dce_v10_0.c +++ b/drivers/gpu/drm/amd/amdgpu/dce_v10_0.c @@ -1299,7 +1299,7 @@ static void dce_v10_0_audio_write_speaker_allocation(struct drm_encoder *encoder return; } - sad_count = drm_edid_to_speaker_allocation(amdgpu_connector_edid(connector), &sadb); + sad_count = drm_edid_to_speaker_allocation(amdgpu_connector->edid, &sadb); if (sad_count < 0) { DRM_ERROR("Couldn't read Speaker Allocation Data Block: %d\n", sad_count); sad_count = 0; @@ -1369,7 +1369,7 @@ static void dce_v10_0_audio_write_sad_regs(struct drm_encoder *encoder) return; } - sad_count = drm_edid_to_sad(amdgpu_connector_edid(connector), &sads); + sad_count = drm_edid_to_sad(amdgpu_connector->edid, &sads); if (sad_count < 0) DRM_ERROR("Couldn't read SADs: %d\n", sad_count); if (sad_count <= 0) @@ -3333,6 +3333,8 @@ static const struct amd_ip_funcs dce_v10_0_ip_funcs = { .soft_reset = dce_v10_0_soft_reset, .set_clockgating_state = dce_v10_0_set_clockgating_state, .set_powergating_state = dce_v10_0_set_powergating_state, + .dump_ip_state = NULL, + .print_ip_state = NULL, }; static void diff --git a/drivers/gpu/drm/amd/amdgpu/dce_v11_0.c b/drivers/gpu/drm/amd/amdgpu/dce_v11_0.c index 69e8b0db6cf7..11780e4d7e9f 100644 --- a/drivers/gpu/drm/amd/amdgpu/dce_v11_0.c +++ b/drivers/gpu/drm/amd/amdgpu/dce_v11_0.c @@ -1331,7 +1331,7 @@ static void dce_v11_0_audio_write_speaker_allocation(struct drm_encoder *encoder return; } - sad_count = drm_edid_to_speaker_allocation(amdgpu_connector_edid(connector), &sadb); + sad_count = drm_edid_to_speaker_allocation(amdgpu_connector->edid, &sadb); if (sad_count < 0) { DRM_ERROR("Couldn't read Speaker Allocation Data Block: %d\n", sad_count); sad_count = 0; @@ -1401,7 +1401,7 @@ static void dce_v11_0_audio_write_sad_regs(struct drm_encoder *encoder) return; } - sad_count = drm_edid_to_sad(amdgpu_connector_edid(connector), &sads); + sad_count = drm_edid_to_sad(amdgpu_connector->edid, &sads); if (sad_count < 0) DRM_ERROR("Couldn't read SADs: %d\n", sad_count); if (sad_count <= 0) @@ -3464,6 +3464,8 @@ static const struct amd_ip_funcs dce_v11_0_ip_funcs = { .soft_reset = dce_v11_0_soft_reset, .set_clockgating_state = dce_v11_0_set_clockgating_state, .set_powergating_state = dce_v11_0_set_powergating_state, + .dump_ip_state = NULL, + .print_ip_state = NULL, }; static void diff --git a/drivers/gpu/drm/amd/amdgpu/dce_v6_0.c b/drivers/gpu/drm/amd/amdgpu/dce_v6_0.c index 60d40201fdd1..05c0df97f01d 100644 --- a/drivers/gpu/drm/amd/amdgpu/dce_v6_0.c +++ b/drivers/gpu/drm/amd/amdgpu/dce_v6_0.c @@ -1217,7 +1217,7 @@ static void dce_v6_0_audio_write_speaker_allocation(struct drm_encoder *encoder) return; } - sad_count = drm_edid_to_speaker_allocation(amdgpu_connector_edid(connector), &sadb); + sad_count = drm_edid_to_speaker_allocation(amdgpu_connector->edid, &sadb); if (sad_count < 0) { DRM_ERROR("Couldn't read Speaker Allocation Data Block: %d\n", sad_count); sad_count = 0; @@ -1292,7 +1292,7 @@ static void dce_v6_0_audio_write_sad_regs(struct drm_encoder *encoder) return; } - sad_count = drm_edid_to_sad(amdgpu_connector_edid(connector), &sads); + sad_count = drm_edid_to_sad(amdgpu_connector->edid, &sads); if (sad_count < 0) DRM_ERROR("Couldn't read SADs: %d\n", sad_count); if (sad_count <= 0) @@ -3154,6 +3154,8 @@ static const struct amd_ip_funcs dce_v6_0_ip_funcs = { .soft_reset = dce_v6_0_soft_reset, .set_clockgating_state = dce_v6_0_set_clockgating_state, .set_powergating_state = dce_v6_0_set_powergating_state, + .dump_ip_state = NULL, + .print_ip_state = NULL, }; static void diff --git a/drivers/gpu/drm/amd/amdgpu/dce_v8_0.c b/drivers/gpu/drm/amd/amdgpu/dce_v8_0.c index 5a5fcc45e452..dc73e301d937 100644 --- a/drivers/gpu/drm/amd/amdgpu/dce_v8_0.c +++ b/drivers/gpu/drm/amd/amdgpu/dce_v8_0.c @@ -1272,7 +1272,7 @@ static void dce_v8_0_audio_write_speaker_allocation(struct drm_encoder *encoder) return; } - sad_count = drm_edid_to_speaker_allocation(amdgpu_connector_edid(connector), &sadb); + sad_count = drm_edid_to_speaker_allocation(amdgpu_connector->edid, &sadb); if (sad_count < 0) { DRM_ERROR("Couldn't read Speaker Allocation Data Block: %d\n", sad_count); sad_count = 0; @@ -1340,7 +1340,7 @@ static void dce_v8_0_audio_write_sad_regs(struct drm_encoder *encoder) return; } - sad_count = drm_edid_to_sad(amdgpu_connector_edid(connector), &sads); + sad_count = drm_edid_to_sad(amdgpu_connector->edid, &sads); if (sad_count < 0) DRM_ERROR("Couldn't read SADs: %d\n", sad_count); if (sad_count <= 0) @@ -3242,6 +3242,8 @@ static const struct amd_ip_funcs dce_v8_0_ip_funcs = { .soft_reset = dce_v8_0_soft_reset, .set_clockgating_state = dce_v8_0_set_clockgating_state, .set_powergating_state = dce_v8_0_set_powergating_state, + .dump_ip_state = NULL, + .print_ip_state = NULL, }; static void diff --git a/drivers/gpu/drm/amd/amdgpu/df_v1_7.c b/drivers/gpu/drm/amd/amdgpu/df_v1_7.c index 5dfab80ffff2..cd298556f7a6 100644 --- a/drivers/gpu/drm/amd/amdgpu/df_v1_7.c +++ b/drivers/gpu/drm/amd/amdgpu/df_v1_7.c @@ -70,6 +70,8 @@ static u32 df_v1_7_get_hbm_channel_number(struct amdgpu_device *adev) int fb_channel_number; fb_channel_number = adev->df.funcs->get_fb_channel_number(adev); + if (fb_channel_number >= ARRAY_SIZE(df_v1_7_channel_number)) + fb_channel_number = 0; return df_v1_7_channel_number[fb_channel_number]; } diff --git a/drivers/gpu/drm/amd/amdgpu/df_v4_15.c b/drivers/gpu/drm/amd/amdgpu/df_v4_15.c new file mode 100644 index 000000000000..2a573e33908b --- /dev/null +++ b/drivers/gpu/drm/amd/amdgpu/df_v4_15.c @@ -0,0 +1,45 @@ +/* + * Copyright 2024 Advanced Micro Devices, Inc. + * + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and associated documentation files (the "Software"), + * to deal in the Software without restriction, including without limitation + * the rights to use, copy, modify, merge, publish, distribute, sublicense, + * and/or sell copies of the Software, and to permit persons to whom the + * Software is furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included in + * all copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL + * THE COPYRIGHT HOLDER(S) OR AUTHOR(S) BE LIABLE FOR ANY CLAIM, DAMAGES OR + * OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, + * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR + * OTHER DEALINGS IN THE SOFTWARE. + * + */ +#include "amdgpu.h" +#include "df_v4_15.h" + +#include "df/df_4_15_offset.h" +#include "df/df_4_15_sh_mask.h" + +static void df_v4_15_hw_init(struct amdgpu_device *adev) +{ + if (adev->have_atomics_support) { + uint32_t tmp; + uint32_t dis_lcl_proc = (1 << 1 | + 1 << 2 | + 1 << 13); + + tmp = RREG32_SOC15(DF, 0, regNCSConfigurationRegister1); + tmp |= (dis_lcl_proc << NCSConfigurationRegister1__DisIntAtomicsLclProcessing__SHIFT); + WREG32_SOC15(DF, 0, regNCSConfigurationRegister1, tmp); + } +} + +const struct amdgpu_df_funcs df_v4_15_funcs = { + .hw_init = df_v4_15_hw_init +}; diff --git a/drivers/gpu/drm/amd/amdgpu/df_v4_15.h b/drivers/gpu/drm/amd/amdgpu/df_v4_15.h new file mode 100644 index 000000000000..dddf2422112a --- /dev/null +++ b/drivers/gpu/drm/amd/amdgpu/df_v4_15.h @@ -0,0 +1,30 @@ +/* + * Copyright 2024 Advanced Micro Devices, Inc. + * + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and associated documentation files (the "Software"), + * to deal in the Software without restriction, including without limitation + * the rights to use, copy, modify, merge, publish, distribute, sublicense, + * and/or sell copies of the Software, and to permit persons to whom the + * Software is furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included in + * all copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL + * THE COPYRIGHT HOLDER(S) OR AUTHOR(S) BE LIABLE FOR ANY CLAIM, DAMAGES OR + * OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, + * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR + * OTHER DEALINGS IN THE SOFTWARE. + * + */ + +#ifndef __DF_V4_15_H__ +#define __DF_V4_15_H__ + +extern const struct amdgpu_df_funcs df_v4_15_funcs; + +#endif /* __DF_V4_15_H__ */ + diff --git a/drivers/gpu/drm/amd/amdgpu/gfx_v10_0.c b/drivers/gpu/drm/amd/amdgpu/gfx_v10_0.c index f90905ef32c7..2957702fca0c 100644 --- a/drivers/gpu/drm/amd/amdgpu/gfx_v10_0.c +++ b/drivers/gpu/drm/amd/amdgpu/gfx_v10_0.c @@ -276,6 +276,181 @@ MODULE_FIRMWARE("amdgpu/gc_10_3_7_mec.bin"); MODULE_FIRMWARE("amdgpu/gc_10_3_7_mec2.bin"); MODULE_FIRMWARE("amdgpu/gc_10_3_7_rlc.bin"); +static const struct amdgpu_hwip_reg_entry gc_reg_list_10_1[] = { + SOC15_REG_ENTRY_STR(GC, 0, mmGRBM_STATUS), + SOC15_REG_ENTRY_STR(GC, 0, mmGRBM_STATUS2), + SOC15_REG_ENTRY_STR(GC, 0, mmGRBM_STATUS3), + SOC15_REG_ENTRY_STR(GC, 0, mmCP_STALLED_STAT1), + SOC15_REG_ENTRY_STR(GC, 0, mmCP_STALLED_STAT2), + SOC15_REG_ENTRY_STR(GC, 0, mmCP_CPC_STALLED_STAT1), + SOC15_REG_ENTRY_STR(GC, 0, mmCP_CPF_STALLED_STAT1), + SOC15_REG_ENTRY_STR(GC, 0, mmCP_BUSY_STAT), + SOC15_REG_ENTRY_STR(GC, 0, mmCP_CPC_BUSY_STAT), + SOC15_REG_ENTRY_STR(GC, 0, mmCP_CPF_BUSY_STAT), + SOC15_REG_ENTRY_STR(GC, 0, mmCP_CPC_BUSY_STAT2), + SOC15_REG_ENTRY_STR(GC, 0, mmCP_CPF_BUSY_STAT2), + SOC15_REG_ENTRY_STR(GC, 0, mmCP_CPF_STATUS), + SOC15_REG_ENTRY_STR(GC, 0, mmCP_GFX_ERROR), + SOC15_REG_ENTRY_STR(GC, 0, mmCP_GFX_HPD_STATUS0), + SOC15_REG_ENTRY_STR(GC, 0, mmCP_RB_BASE), + SOC15_REG_ENTRY_STR(GC, 0, mmCP_RB_RPTR), + SOC15_REG_ENTRY_STR(GC, 0, mmCP_RB_WPTR), + SOC15_REG_ENTRY_STR(GC, 0, mmCP_RB0_BASE), + SOC15_REG_ENTRY_STR(GC, 0, mmCP_RB0_RPTR), + SOC15_REG_ENTRY_STR(GC, 0, mmCP_RB0_WPTR), + SOC15_REG_ENTRY_STR(GC, 0, mmCP_RB1_BASE), + SOC15_REG_ENTRY_STR(GC, 0, mmCP_RB1_RPTR), + SOC15_REG_ENTRY_STR(GC, 0, mmCP_RB1_WPTR), + SOC15_REG_ENTRY_STR(GC, 0, mmCP_RB2_BASE), + SOC15_REG_ENTRY_STR(GC, 0, mmCP_RB2_WPTR), + SOC15_REG_ENTRY_STR(GC, 0, mmCP_RB2_WPTR), + SOC15_REG_ENTRY_STR(GC, 0, mmCP_CE_IB1_CMD_BUFSZ), + SOC15_REG_ENTRY_STR(GC, 0, mmCP_CE_IB2_CMD_BUFSZ), + SOC15_REG_ENTRY_STR(GC, 0, mmCP_IB1_CMD_BUFSZ), + SOC15_REG_ENTRY_STR(GC, 0, mmCP_IB2_CMD_BUFSZ), + SOC15_REG_ENTRY_STR(GC, 0, mmCP_CE_IB1_BASE_LO), + SOC15_REG_ENTRY_STR(GC, 0, mmCP_CE_IB1_BASE_HI), + SOC15_REG_ENTRY_STR(GC, 0, mmCP_CE_IB1_BUFSZ), + SOC15_REG_ENTRY_STR(GC, 0, mmCP_CE_IB2_BASE_LO), + SOC15_REG_ENTRY_STR(GC, 0, mmCP_CE_IB2_BASE_HI), + SOC15_REG_ENTRY_STR(GC, 0, mmCP_CE_IB2_BUFSZ), + SOC15_REG_ENTRY_STR(GC, 0, mmCP_IB1_BASE_LO), + SOC15_REG_ENTRY_STR(GC, 0, mmCP_IB1_BASE_HI), + SOC15_REG_ENTRY_STR(GC, 0, mmCP_IB1_BUFSZ), + SOC15_REG_ENTRY_STR(GC, 0, mmCP_IB2_BASE_LO), + SOC15_REG_ENTRY_STR(GC, 0, mmCP_IB2_BASE_HI), + SOC15_REG_ENTRY_STR(GC, 0, mmCP_IB2_BUFSZ), + SOC15_REG_ENTRY_STR(GC, 0, mmCPF_UTCL1_STATUS), + SOC15_REG_ENTRY_STR(GC, 0, mmCPC_UTCL1_STATUS), + SOC15_REG_ENTRY_STR(GC, 0, mmCPG_UTCL1_STATUS), + SOC15_REG_ENTRY_STR(GC, 0, mmGDS_PROTECTION_FAULT), + SOC15_REG_ENTRY_STR(GC, 0, mmGDS_VM_PROTECTION_FAULT), + SOC15_REG_ENTRY_STR(GC, 0, mmIA_UTCL1_STATUS), + SOC15_REG_ENTRY_STR(GC, 0, mmIA_UTCL1_STATUS_2), + SOC15_REG_ENTRY_STR(GC, 0, mmPA_CL_CNTL_STATUS), + SOC15_REG_ENTRY_STR(GC, 0, mmRLC_UTCL1_STATUS), + SOC15_REG_ENTRY_STR(GC, 0, mmRMI_UTCL1_STATUS), + SOC15_REG_ENTRY_STR(GC, 0, mmSQC_DCACHE_UTCL0_STATUS), + SOC15_REG_ENTRY_STR(GC, 0, mmSQC_ICACHE_UTCL0_STATUS), + SOC15_REG_ENTRY_STR(GC, 0, mmSQG_UTCL0_STATUS), + SOC15_REG_ENTRY_STR(GC, 0, mmTCP_UTCL0_STATUS), + SOC15_REG_ENTRY_STR(GC, 0, mmWD_UTCL1_STATUS), + SOC15_REG_ENTRY_STR(GC, 0, mmGCVM_L2_PROTECTION_FAULT_CNTL), + SOC15_REG_ENTRY_STR(GC, 0, mmGCVM_L2_PROTECTION_FAULT_STATUS), + SOC15_REG_ENTRY_STR(GC, 0, mmCP_DEBUG), + SOC15_REG_ENTRY_STR(GC, 0, mmCP_MEC_CNTL), + SOC15_REG_ENTRY_STR(GC, 0, mmCP_MES_CNTL), + SOC15_REG_ENTRY_STR(GC, 0, mmCP_CE_INSTR_PNTR), + SOC15_REG_ENTRY_STR(GC, 0, mmCP_MEC1_INSTR_PNTR), + SOC15_REG_ENTRY_STR(GC, 0, mmCP_MEC2_INSTR_PNTR), + SOC15_REG_ENTRY_STR(GC, 0, mmCP_MES_DEBUG_INTERRUPT_INSTR_PNTR), + SOC15_REG_ENTRY_STR(GC, 0, mmCP_MES_INSTR_PNTR), + SOC15_REG_ENTRY_STR(GC, 0, mmCP_ME_INSTR_PNTR), + SOC15_REG_ENTRY_STR(GC, 0, mmCP_PFP_INSTR_PNTR), + SOC15_REG_ENTRY_STR(GC, 0, mmCP_CPC_STATUS), + SOC15_REG_ENTRY_STR(GC, 0, mmRLC_STAT), + SOC15_REG_ENTRY_STR(GC, 0, mmRLC_SMU_COMMAND), + SOC15_REG_ENTRY_STR(GC, 0, mmRLC_SMU_MESSAGE), + SOC15_REG_ENTRY_STR(GC, 0, mmRLC_SMU_ARGUMENT_1), + SOC15_REG_ENTRY_STR(GC, 0, mmRLC_SMU_ARGUMENT_2), + SOC15_REG_ENTRY_STR(GC, 0, mmRLC_SMU_ARGUMENT_3), + SOC15_REG_ENTRY_STR(GC, 0, mmRLC_SMU_ARGUMENT_4), + SOC15_REG_ENTRY_STR(GC, 0, mmSMU_RLC_RESPONSE), + SOC15_REG_ENTRY_STR(GC, 0, mmRLC_SAFE_MODE), + SOC15_REG_ENTRY_STR(GC, 0, mmRLC_SMU_SAFE_MODE), + SOC15_REG_ENTRY_STR(GC, 0, mmRLC_RLCS_GPM_STAT_2), + SOC15_REG_ENTRY_STR(GC, 0, mmRLC_SPP_STATUS), + SOC15_REG_ENTRY_STR(GC, 0, mmRLC_RLCS_BOOTLOAD_STATUS), + SOC15_REG_ENTRY_STR(GC, 0, mmRLC_INT_STAT), + SOC15_REG_ENTRY_STR(GC, 0, mmRLC_GPM_GENERAL_6), + SOC15_REG_ENTRY_STR(GC, 0, mmRLC_GPM_DEBUG_INST_A), + SOC15_REG_ENTRY_STR(GC, 0, mmRLC_GPM_DEBUG_INST_B), + SOC15_REG_ENTRY_STR(GC, 0, mmRLC_GPM_DEBUG_INST_ADDR), + SOC15_REG_ENTRY_STR(GC, 0, mmRLC_LX6_CORE_PDEBUG_INST), + /* cp header registers */ + SOC15_REG_ENTRY_STR(GC, 0, mmCP_CE_HEADER_DUMP), + SOC15_REG_ENTRY_STR(GC, 0, mmCP_MEC_ME1_HEADER_DUMP), + SOC15_REG_ENTRY_STR(GC, 0, mmCP_MEC_ME2_HEADER_DUMP), + SOC15_REG_ENTRY_STR(GC, 0, mmCP_PFP_HEADER_DUMP), + SOC15_REG_ENTRY_STR(GC, 0, mmCP_ME_HEADER_DUMP), + SOC15_REG_ENTRY_STR(GC, 0, mmCP_MES_HEADER_DUMP), + /* SE status registers */ + SOC15_REG_ENTRY_STR(GC, 0, mmGRBM_STATUS_SE0), + SOC15_REG_ENTRY_STR(GC, 0, mmGRBM_STATUS_SE1), + SOC15_REG_ENTRY_STR(GC, 0, mmGRBM_STATUS_SE2), + SOC15_REG_ENTRY_STR(GC, 0, mmGRBM_STATUS_SE3) +}; + +static const struct amdgpu_hwip_reg_entry gc_cp_reg_list_10[] = { + /* compute registers */ + SOC15_REG_ENTRY_STR(GC, 0, mmCP_HQD_VMID), + SOC15_REG_ENTRY_STR(GC, 0, mmCP_HQD_PERSISTENT_STATE), + SOC15_REG_ENTRY_STR(GC, 0, mmCP_HQD_PIPE_PRIORITY), + SOC15_REG_ENTRY_STR(GC, 0, mmCP_HQD_QUEUE_PRIORITY), + SOC15_REG_ENTRY_STR(GC, 0, mmCP_HQD_QUANTUM), + SOC15_REG_ENTRY_STR(GC, 0, mmCP_HQD_PQ_BASE), + SOC15_REG_ENTRY_STR(GC, 0, mmCP_HQD_PQ_BASE_HI), + SOC15_REG_ENTRY_STR(GC, 0, mmCP_HQD_PQ_RPTR), + SOC15_REG_ENTRY_STR(GC, 0, mmCP_HQD_PQ_WPTR_POLL_ADDR), + SOC15_REG_ENTRY_STR(GC, 0, mmCP_HQD_PQ_WPTR_POLL_ADDR_HI), + SOC15_REG_ENTRY_STR(GC, 0, mmCP_HQD_PQ_DOORBELL_CONTROL), + SOC15_REG_ENTRY_STR(GC, 0, mmCP_HQD_PQ_CONTROL), + SOC15_REG_ENTRY_STR(GC, 0, mmCP_HQD_IB_BASE_ADDR), + SOC15_REG_ENTRY_STR(GC, 0, mmCP_HQD_IB_BASE_ADDR_HI), + SOC15_REG_ENTRY_STR(GC, 0, mmCP_HQD_IB_RPTR), + SOC15_REG_ENTRY_STR(GC, 0, mmCP_HQD_IB_CONTROL), + SOC15_REG_ENTRY_STR(GC, 0, mmCP_HQD_DEQUEUE_REQUEST), + SOC15_REG_ENTRY_STR(GC, 0, mmCP_HQD_EOP_BASE_ADDR), + SOC15_REG_ENTRY_STR(GC, 0, mmCP_HQD_EOP_BASE_ADDR_HI), + SOC15_REG_ENTRY_STR(GC, 0, mmCP_HQD_EOP_CONTROL), + SOC15_REG_ENTRY_STR(GC, 0, mmCP_HQD_EOP_RPTR), + SOC15_REG_ENTRY_STR(GC, 0, mmCP_HQD_EOP_WPTR), + SOC15_REG_ENTRY_STR(GC, 0, mmCP_HQD_EOP_EVENTS), + SOC15_REG_ENTRY_STR(GC, 0, mmCP_HQD_CTX_SAVE_BASE_ADDR_LO), + SOC15_REG_ENTRY_STR(GC, 0, mmCP_HQD_CTX_SAVE_BASE_ADDR_HI), + SOC15_REG_ENTRY_STR(GC, 0, mmCP_HQD_CTX_SAVE_CONTROL), + SOC15_REG_ENTRY_STR(GC, 0, mmCP_HQD_CNTL_STACK_OFFSET), + SOC15_REG_ENTRY_STR(GC, 0, mmCP_HQD_CNTL_STACK_SIZE), + SOC15_REG_ENTRY_STR(GC, 0, mmCP_HQD_WG_STATE_OFFSET), + SOC15_REG_ENTRY_STR(GC, 0, mmCP_HQD_CTX_SAVE_SIZE), + SOC15_REG_ENTRY_STR(GC, 0, mmCP_HQD_GDS_RESOURCE_STATE), + SOC15_REG_ENTRY_STR(GC, 0, mmCP_HQD_ERROR), + SOC15_REG_ENTRY_STR(GC, 0, mmCP_HQD_EOP_WPTR_MEM), + SOC15_REG_ENTRY_STR(GC, 0, mmCP_HQD_PQ_WPTR_LO), + SOC15_REG_ENTRY_STR(GC, 0, mmCP_HQD_PQ_WPTR_HI), + SOC15_REG_ENTRY_STR(GC, 0, mmCP_HQD_SUSPEND_CNTL_STACK_OFFSET), + SOC15_REG_ENTRY_STR(GC, 0, mmCP_HQD_SUSPEND_CNTL_STACK_DW_CNT), + SOC15_REG_ENTRY_STR(GC, 0, mmCP_HQD_SUSPEND_WG_STATE_OFFSET), + SOC15_REG_ENTRY_STR(GC, 0, mmCP_HQD_DEQUEUE_STATUS) +}; + +static const struct amdgpu_hwip_reg_entry gc_gfx_queue_reg_list_10[] = { + /* gfx queue registers */ + SOC15_REG_ENTRY_STR(GC, 0, mmCP_GFX_HQD_ACTIVE), + SOC15_REG_ENTRY_STR(GC, 0, mmCP_GFX_HQD_QUEUE_PRIORITY), + SOC15_REG_ENTRY_STR(GC, 0, mmCP_GFX_HQD_BASE), + SOC15_REG_ENTRY_STR(GC, 0, mmCP_GFX_HQD_BASE_HI), + SOC15_REG_ENTRY_STR(GC, 0, mmCP_GFX_HQD_OFFSET), + SOC15_REG_ENTRY_STR(GC, 0, mmCP_GFX_HQD_CSMD_RPTR), + SOC15_REG_ENTRY_STR(GC, 0, mmCP_GFX_HQD_WPTR), + SOC15_REG_ENTRY_STR(GC, 0, mmCP_GFX_HQD_WPTR_HI), + SOC15_REG_ENTRY_STR(GC, 0, mmCP_GFX_HQD_DEQUEUE_REQUEST), + SOC15_REG_ENTRY_STR(GC, 0, mmCP_GFX_HQD_MAPPED), + SOC15_REG_ENTRY_STR(GC, 0, mmCP_GFX_HQD_QUE_MGR_CONTROL), + SOC15_REG_ENTRY_STR(GC, 0, mmCP_GFX_HQD_HQ_CONTROL0), + SOC15_REG_ENTRY_STR(GC, 0, mmCP_GFX_HQD_HQ_STATUS0), + SOC15_REG_ENTRY_STR(GC, 0, mmCP_GFX_HQD_CE_WPTR_POLL_ADDR_LO), + SOC15_REG_ENTRY_STR(GC, 0, mmCP_GFX_HQD_CE_WPTR_POLL_ADDR_HI), + SOC15_REG_ENTRY_STR(GC, 0, mmCP_GFX_HQD_CE_OFFSET), + SOC15_REG_ENTRY_STR(GC, 0, mmCP_GFX_HQD_CE_CSMD_RPTR), + SOC15_REG_ENTRY_STR(GC, 0, mmCP_GFX_HQD_CE_WPTR), + SOC15_REG_ENTRY_STR(GC, 0, mmCP_GFX_HQD_CE_WPTR_HI), + SOC15_REG_ENTRY_STR(GC, 0, mmCP_GFX_MQD_BASE_ADDR), + SOC15_REG_ENTRY_STR(GC, 0, mmCP_GFX_MQD_BASE_ADDR_HI), + SOC15_REG_ENTRY_STR(GC, 0, mmCP_RB_WPTR_POLL_ADDR_LO), + SOC15_REG_ENTRY_STR(GC, 0, mmCP_RB_WPTR_POLL_ADDR_HI) +}; + static const struct soc15_reg_golden golden_settings_gc_10_1[] = { SOC15_REG_GOLDEN_VALUE(GC, 0, mmCB_HW_CONTROL_4, 0xffffffff, 0x00400014), SOC15_REG_GOLDEN_VALUE(GC, 0, mmCGTT_CPF_CLK_CTRL, 0xfcff8fff, 0xf8000100), @@ -3558,14 +3733,8 @@ static void gfx10_kiq_unmap_queues(struct amdgpu_ring *kiq_ring, enum amdgpu_unmap_queues_action action, u64 gpu_addr, u64 seq) { - struct amdgpu_device *adev = kiq_ring->adev; uint32_t eng_sel = ring->funcs->type == AMDGPU_RING_TYPE_GFX ? 4 : 0; - if (adev->enable_mes && !adev->gfx.kiq[0].ring.sched.ready) { - amdgpu_mes_unmap_legacy_queue(adev, ring, action, gpu_addr, seq); - return; - } - amdgpu_ring_write(kiq_ring, PACKET3(PACKET3_UNMAP_QUEUES, 4)); amdgpu_ring_write(kiq_ring, /* Q_sel: 0, vmid: 0, engine: 0, num_Q: 1 */ PACKET3_UNMAP_QUEUES_ACTION(action) | @@ -3823,33 +3992,18 @@ static int gfx_v10_0_ring_test_ib(struct amdgpu_ring *ring, long timeout) memset(&ib, 0, sizeof(ib)); - if (ring->is_mes_queue) { - uint32_t padding, offset; - - offset = amdgpu_mes_ctx_get_offs(ring, AMDGPU_MES_CTX_IB_OFFS); - padding = amdgpu_mes_ctx_get_offs(ring, - AMDGPU_MES_CTX_PADDING_OFFS); - - ib.gpu_addr = amdgpu_mes_ctx_get_offs_gpu_addr(ring, offset); - ib.ptr = amdgpu_mes_ctx_get_offs_cpu_addr(ring, offset); - - gpu_addr = amdgpu_mes_ctx_get_offs_gpu_addr(ring, padding); - cpu_ptr = amdgpu_mes_ctx_get_offs_cpu_addr(ring, padding); - *cpu_ptr = cpu_to_le32(0xCAFEDEAD); - } else { - r = amdgpu_device_wb_get(adev, &index); - if (r) - return r; + r = amdgpu_device_wb_get(adev, &index); + if (r) + return r; - gpu_addr = adev->wb.gpu_addr + (index * 4); - adev->wb.wb[index] = cpu_to_le32(0xCAFEDEAD); - cpu_ptr = &adev->wb.wb[index]; + gpu_addr = adev->wb.gpu_addr + (index * 4); + adev->wb.wb[index] = cpu_to_le32(0xCAFEDEAD); + cpu_ptr = &adev->wb.wb[index]; - r = amdgpu_ib_get(adev, NULL, 20, AMDGPU_IB_POOL_DIRECT, &ib); - if (r) { - DRM_ERROR("amdgpu: failed to get ib (%ld).\n", r); - goto err1; - } + r = amdgpu_ib_get(adev, NULL, 20, AMDGPU_IB_POOL_DIRECT, &ib); + if (r) { + DRM_ERROR("amdgpu: failed to get ib (%ld).\n", r); + goto err1; } ib.ptr[0] = PACKET3(PACKET3_WRITE_DATA, 3); @@ -3876,12 +4030,10 @@ static int gfx_v10_0_ring_test_ib(struct amdgpu_ring *ring, long timeout) else r = -EINVAL; err2: - if (!ring->is_mes_queue) - amdgpu_ib_free(adev, &ib, NULL); + amdgpu_ib_free(adev, &ib, NULL); dma_fence_put(f); err1: - if (!ring->is_mes_queue) - amdgpu_device_wb_free(adev, index); + amdgpu_device_wb_free(adev, index); return r; } @@ -3964,7 +4116,6 @@ static void gfx_v10_0_check_gfxoff_flag(struct amdgpu_device *adev) static int gfx_v10_0_init_microcode(struct amdgpu_device *adev) { - char fw_name[40]; char ucode_prefix[30]; const char *wks = ""; int err; @@ -3979,27 +4130,27 @@ static int gfx_v10_0_init_microcode(struct amdgpu_device *adev) wks = "_wks"; amdgpu_ucode_ip_version_decode(adev, GC_HWIP, ucode_prefix, sizeof(ucode_prefix)); - snprintf(fw_name, sizeof(fw_name), "amdgpu/%s_pfp%s.bin", ucode_prefix, wks); - err = amdgpu_ucode_request(adev, &adev->gfx.pfp_fw, fw_name); + err = amdgpu_ucode_request(adev, &adev->gfx.pfp_fw, + "amdgpu/%s_pfp%s.bin", ucode_prefix, wks); if (err) goto out; amdgpu_gfx_cp_init_microcode(adev, AMDGPU_UCODE_ID_CP_PFP); - snprintf(fw_name, sizeof(fw_name), "amdgpu/%s_me%s.bin", ucode_prefix, wks); - err = amdgpu_ucode_request(adev, &adev->gfx.me_fw, fw_name); + err = amdgpu_ucode_request(adev, &adev->gfx.me_fw, + "amdgpu/%s_me%s.bin", ucode_prefix, wks); if (err) goto out; amdgpu_gfx_cp_init_microcode(adev, AMDGPU_UCODE_ID_CP_ME); - snprintf(fw_name, sizeof(fw_name), "amdgpu/%s_ce%s.bin", ucode_prefix, wks); - err = amdgpu_ucode_request(adev, &adev->gfx.ce_fw, fw_name); + err = amdgpu_ucode_request(adev, &adev->gfx.ce_fw, + "amdgpu/%s_ce%s.bin", ucode_prefix, wks); if (err) goto out; amdgpu_gfx_cp_init_microcode(adev, AMDGPU_UCODE_ID_CP_CE); if (!amdgpu_sriov_vf(adev)) { - snprintf(fw_name, sizeof(fw_name), "amdgpu/%s_rlc.bin", ucode_prefix); - err = request_firmware(&adev->gfx.rlc_fw, fw_name, adev->dev); + err = amdgpu_ucode_request(adev, &adev->gfx.rlc_fw, + "amdgpu/%s_rlc.bin", ucode_prefix); if (err) goto out; @@ -4014,15 +4165,15 @@ static int gfx_v10_0_init_microcode(struct amdgpu_device *adev) goto out; } - snprintf(fw_name, sizeof(fw_name), "amdgpu/%s_mec%s.bin", ucode_prefix, wks); - err = amdgpu_ucode_request(adev, &adev->gfx.mec_fw, fw_name); + err = amdgpu_ucode_request(adev, &adev->gfx.mec_fw, + "amdgpu/%s_mec%s.bin", ucode_prefix, wks); if (err) goto out; amdgpu_gfx_cp_init_microcode(adev, AMDGPU_UCODE_ID_CP_MEC1); amdgpu_gfx_cp_init_microcode(adev, AMDGPU_UCODE_ID_CP_MEC1_JT); - snprintf(fw_name, sizeof(fw_name), "amdgpu/%s_mec2%s.bin", ucode_prefix, wks); - err = amdgpu_ucode_request(adev, &adev->gfx.mec2_fw, fw_name); + err = amdgpu_ucode_request(adev, &adev->gfx.mec2_fw, + "amdgpu/%s_mec2%s.bin", ucode_prefix, wks); if (!err) { amdgpu_gfx_cp_init_microcode(adev, AMDGPU_UCODE_ID_CP_MEC2); amdgpu_gfx_cp_init_microcode(adev, AMDGPU_UCODE_ID_CP_MEC2_JT); @@ -4490,6 +4641,47 @@ static int gfx_v10_0_compute_ring_init(struct amdgpu_device *adev, int ring_id, hw_prio, NULL); } +static void gfx_v10_0_alloc_ip_dump(struct amdgpu_device *adev) +{ + uint32_t reg_count = ARRAY_SIZE(gc_reg_list_10_1); + uint32_t *ptr; + uint32_t inst; + + ptr = kcalloc(reg_count, sizeof(uint32_t), GFP_KERNEL); + if (ptr == NULL) { + DRM_ERROR("Failed to allocate memory for GFX IP Dump\n"); + adev->gfx.ip_dump_core = NULL; + } else { + adev->gfx.ip_dump_core = ptr; + } + + /* Allocate memory for compute queue registers for all the instances */ + reg_count = ARRAY_SIZE(gc_cp_reg_list_10); + inst = adev->gfx.mec.num_mec * adev->gfx.mec.num_pipe_per_mec * + adev->gfx.mec.num_queue_per_pipe; + + ptr = kcalloc(reg_count * inst, sizeof(uint32_t), GFP_KERNEL); + if (ptr == NULL) { + DRM_ERROR("Failed to allocate memory for Compute Queues IP Dump\n"); + adev->gfx.ip_dump_compute_queues = NULL; + } else { + adev->gfx.ip_dump_compute_queues = ptr; + } + + /* Allocate memory for gfx queue registers for all the instances */ + reg_count = ARRAY_SIZE(gc_gfx_queue_reg_list_10); + inst = adev->gfx.me.num_me * adev->gfx.me.num_pipe_per_me * + adev->gfx.me.num_queue_per_pipe; + + ptr = kcalloc(reg_count * inst, sizeof(uint32_t), GFP_KERNEL); + if (ptr == NULL) { + DRM_ERROR("Failed to allocate memory for GFX Queues IP Dump\n"); + adev->gfx.ip_dump_gfx_queues = NULL; + } else { + adev->gfx.ip_dump_gfx_queues = ptr; + } +} + static int gfx_v10_0_sw_init(void *handle) { int i, j, k, r, ring_id = 0; @@ -4518,7 +4710,7 @@ static int gfx_v10_0_sw_init(void *handle) case IP_VERSION(10, 3, 3): case IP_VERSION(10, 3, 7): adev->gfx.me.num_me = 1; - adev->gfx.me.num_pipe_per_me = 1; + adev->gfx.me.num_pipe_per_me = 2; adev->gfx.me.num_queue_per_pipe = 1; adev->gfx.mec.num_mec = 2; adev->gfx.mec.num_pipe_per_mec = 4; @@ -4615,18 +4807,16 @@ static int gfx_v10_0_sw_init(void *handle) } } - if (!adev->enable_mes_kiq) { - r = amdgpu_gfx_kiq_init(adev, GFX10_MEC_HPD_SIZE, 0); - if (r) { - DRM_ERROR("Failed to init KIQ BOs!\n"); - return r; - } - - r = amdgpu_gfx_kiq_init_ring(adev, xcc_id); - if (r) - return r; + r = amdgpu_gfx_kiq_init(adev, GFX10_MEC_HPD_SIZE, 0); + if (r) { + DRM_ERROR("Failed to init KIQ BOs!\n"); + return r; } + r = amdgpu_gfx_kiq_init_ring(adev, xcc_id); + if (r) + return r; + r = amdgpu_gfx_mqd_sw_init(adev, sizeof(struct v10_compute_mqd), 0); if (r) return r; @@ -4642,6 +4832,8 @@ static int gfx_v10_0_sw_init(void *handle) gfx_v10_0_gpu_early_init(adev); + gfx_v10_0_alloc_ip_dump(adev); + return 0; } @@ -4678,10 +4870,8 @@ static int gfx_v10_0_sw_fini(void *handle) amdgpu_gfx_mqd_sw_fini(adev, 0); - if (!adev->enable_mes_kiq) { - amdgpu_gfx_kiq_free_ring(&adev->gfx.kiq[0].ring); - amdgpu_gfx_kiq_fini(adev, 0); - } + amdgpu_gfx_kiq_free_ring(&adev->gfx.kiq[0].ring); + amdgpu_gfx_kiq_fini(adev, 0); gfx_v10_0_pfp_fini(adev); gfx_v10_0_ce_fini(adev); @@ -4694,6 +4884,10 @@ static int gfx_v10_0_sw_fini(void *handle) gfx_v10_0_free_microcode(adev); + kfree(adev->gfx.ip_dump_core); + kfree(adev->gfx.ip_dump_compute_queues); + kfree(adev->gfx.ip_dump_gfx_queues); + return 0; } @@ -6881,10 +7075,7 @@ static int gfx_v10_0_cp_resume(struct amdgpu_device *adev) return r; } - if (adev->enable_mes_kiq && adev->mes.kiq_hw_init) - r = amdgpu_mes_kiq_hw_init(adev); - else - r = gfx_v10_0_kiq_resume(adev); + r = gfx_v10_0_kiq_resume(adev); if (r) return r; @@ -7133,11 +7324,9 @@ static int gfx_v10_0_hw_init(void *handle) * loaded firstly, so in direct type, it has to load smc ucode * here before rlc. */ - if (!(adev->flags & AMD_IS_APU)) { - r = amdgpu_pm_load_smu_firmware(adev, NULL); - if (r) - return r; - } + r = amdgpu_pm_load_smu_firmware(adev, NULL); + if (r) + return r; gfx_v10_0_disable_gpa_mode(adev); } @@ -7939,15 +8128,24 @@ static int gfx_v10_0_update_gfx_clock_gating(struct amdgpu_device *adev, static void gfx_v10_0_update_spm_vmid_internal(struct amdgpu_device *adev, unsigned int vmid) { - u32 data; + u32 reg, pre_data, data; + reg = SOC15_REG_OFFSET(GC, 0, mmRLC_SPM_MC_CNTL); /* not for *_SOC15 */ - data = RREG32_SOC15_NO_KIQ(GC, 0, mmRLC_SPM_MC_CNTL); + if (amdgpu_sriov_is_pp_one_vf(adev) && !amdgpu_sriov_runtime(adev)) + pre_data = RREG32_NO_KIQ(reg); + else + pre_data = RREG32(reg); - data &= ~RLC_SPM_MC_CNTL__RLC_SPM_VMID_MASK; + data = pre_data & (~RLC_SPM_MC_CNTL__RLC_SPM_VMID_MASK); data |= (vmid & RLC_SPM_MC_CNTL__RLC_SPM_VMID_MASK) << RLC_SPM_MC_CNTL__RLC_SPM_VMID__SHIFT; - WREG32_SOC15_NO_KIQ(GC, 0, mmRLC_SPM_MC_CNTL, data); + if (pre_data != data) { + if (amdgpu_sriov_is_pp_one_vf(adev) && !amdgpu_sriov_runtime(adev)) { + WREG32_SOC15_NO_KIQ(GC, 0, mmRLC_SPM_MC_CNTL, data); + } else + WREG32_SOC15(GC, 0, mmRLC_SPM_MC_CNTL, data); + } } static void gfx_v10_0_update_spm_vmid(struct amdgpu_device *adev, struct amdgpu_ring *ring, unsigned int vmid) @@ -8196,45 +8394,17 @@ static u64 gfx_v10_0_ring_get_wptr_gfx(struct amdgpu_ring *ring) static void gfx_v10_0_ring_set_wptr_gfx(struct amdgpu_ring *ring) { struct amdgpu_device *adev = ring->adev; - uint32_t *wptr_saved; - uint32_t *is_queue_unmap; - uint64_t aggregated_db_index; - uint32_t mqd_size = adev->mqds[AMDGPU_HW_IP_GFX].mqd_size; - uint64_t wptr_tmp; - - if (ring->is_mes_queue) { - wptr_saved = (uint32_t *)(ring->mqd_ptr + mqd_size); - is_queue_unmap = (uint32_t *)(ring->mqd_ptr + mqd_size + - sizeof(uint32_t)); - aggregated_db_index = - amdgpu_mes_get_aggregated_doorbell_index(adev, - AMDGPU_MES_PRIORITY_LEVEL_NORMAL); - - wptr_tmp = ring->wptr & ring->buf_mask; - atomic64_set((atomic64_t *)ring->wptr_cpu_addr, wptr_tmp); - *wptr_saved = wptr_tmp; - /* assume doorbell always being used by mes mapped queue */ - if (*is_queue_unmap) { - WDOORBELL64(aggregated_db_index, wptr_tmp); - WDOORBELL64(ring->doorbell_index, wptr_tmp); - } else { - WDOORBELL64(ring->doorbell_index, wptr_tmp); - if (*is_queue_unmap) - WDOORBELL64(aggregated_db_index, wptr_tmp); - } + if (ring->use_doorbell) { + /* XXX check if swapping is necessary on BE */ + atomic64_set((atomic64_t *)ring->wptr_cpu_addr, + ring->wptr); + WDOORBELL64(ring->doorbell_index, ring->wptr); } else { - if (ring->use_doorbell) { - /* XXX check if swapping is necessary on BE */ - atomic64_set((atomic64_t *)ring->wptr_cpu_addr, - ring->wptr); - WDOORBELL64(ring->doorbell_index, ring->wptr); - } else { - WREG32_SOC15(GC, 0, mmCP_RB0_WPTR, - lower_32_bits(ring->wptr)); - WREG32_SOC15(GC, 0, mmCP_RB0_WPTR_HI, - upper_32_bits(ring->wptr)); - } + WREG32_SOC15(GC, 0, mmCP_RB0_WPTR, + lower_32_bits(ring->wptr)); + WREG32_SOC15(GC, 0, mmCP_RB0_WPTR_HI, + upper_32_bits(ring->wptr)); } } @@ -8259,42 +8429,13 @@ static u64 gfx_v10_0_ring_get_wptr_compute(struct amdgpu_ring *ring) static void gfx_v10_0_ring_set_wptr_compute(struct amdgpu_ring *ring) { struct amdgpu_device *adev = ring->adev; - uint32_t *wptr_saved; - uint32_t *is_queue_unmap; - uint64_t aggregated_db_index; - uint32_t mqd_size = adev->mqds[AMDGPU_HW_IP_COMPUTE].mqd_size; - uint64_t wptr_tmp; - - if (ring->is_mes_queue) { - wptr_saved = (uint32_t *)(ring->mqd_ptr + mqd_size); - is_queue_unmap = (uint32_t *)(ring->mqd_ptr + mqd_size + - sizeof(uint32_t)); - aggregated_db_index = - amdgpu_mes_get_aggregated_doorbell_index(adev, - AMDGPU_MES_PRIORITY_LEVEL_NORMAL); - - wptr_tmp = ring->wptr & ring->buf_mask; - atomic64_set((atomic64_t *)ring->wptr_cpu_addr, wptr_tmp); - *wptr_saved = wptr_tmp; - /* assume doorbell always used by mes mapped queue */ - if (*is_queue_unmap) { - WDOORBELL64(aggregated_db_index, wptr_tmp); - WDOORBELL64(ring->doorbell_index, wptr_tmp); - } else { - WDOORBELL64(ring->doorbell_index, wptr_tmp); - if (*is_queue_unmap) - WDOORBELL64(aggregated_db_index, wptr_tmp); - } + if (ring->use_doorbell) { + atomic64_set((atomic64_t *)ring->wptr_cpu_addr, + ring->wptr); + WDOORBELL64(ring->doorbell_index, ring->wptr); } else { - /* XXX check if swapping is necessary on BE */ - if (ring->use_doorbell) { - atomic64_set((atomic64_t *)ring->wptr_cpu_addr, - ring->wptr); - WDOORBELL64(ring->doorbell_index, ring->wptr); - } else { - BUG(); /* only DOORBELL method supported on gfx10 now */ - } + BUG(); /* only DOORBELL method supported on gfx10 now */ } } @@ -8317,7 +8458,7 @@ static void gfx_v10_0_ring_emit_hdp_flush(struct amdgpu_ring *ring) } reg_mem_engine = 0; } else { - ref_and_mask = nbio_hf_reg->ref_and_mask_cp0; + ref_and_mask = nbio_hf_reg->ref_and_mask_cp0 << ring->pipe; reg_mem_engine = 1; /* pfp */ } @@ -8353,10 +8494,6 @@ static void gfx_v10_0_ring_emit_ib_gfx(struct amdgpu_ring *ring, (!amdgpu_sriov_vf(ring->adev) && flags & AMDGPU_IB_PREEMPTED) ? true : false); } - if (ring->is_mes_queue) - /* inherit vmid from mqd */ - control |= 0x400000; - amdgpu_ring_write(ring, header); BUG_ON(ib->gpu_addr & 0x3); /* Dword align */ amdgpu_ring_write(ring, @@ -8376,10 +8513,6 @@ static void gfx_v10_0_ring_emit_ib_compute(struct amdgpu_ring *ring, unsigned int vmid = AMDGPU_JOB_GET_VMID(job); u32 control = INDIRECT_BUFFER_VALID | ib->length_dw | (vmid << 24); - if (ring->is_mes_queue) - /* inherit vmid from mqd */ - control |= 0x40000000; - /* Currently, there is a high possibility to get wave ID mismatch * between ME and GDS, leading to a hw deadlock, because ME generates * different wave IDs than the GDS expects. This situation happens @@ -8437,8 +8570,7 @@ static void gfx_v10_0_ring_emit_fence(struct amdgpu_ring *ring, u64 addr, amdgpu_ring_write(ring, upper_32_bits(addr)); amdgpu_ring_write(ring, lower_32_bits(seq)); amdgpu_ring_write(ring, upper_32_bits(seq)); - amdgpu_ring_write(ring, ring->is_mes_queue ? - (ring->hw_queue_id | AMDGPU_FENCE_MES_QUEUE_FLAG) : 0); + amdgpu_ring_write(ring, 0); } static void gfx_v10_0_ring_emit_pipeline_sync(struct amdgpu_ring *ring) @@ -8466,10 +8598,7 @@ static void gfx_v10_0_ring_invalidate_tlbs(struct amdgpu_ring *ring, static void gfx_v10_0_ring_emit_vm_flush(struct amdgpu_ring *ring, unsigned int vmid, uint64_t pd_addr) { - if (ring->is_mes_queue) - gfx_v10_0_ring_invalidate_tlbs(ring, 0, 0, false, 0); - else - amdgpu_gmc_emit_flush_gpu_tlb(ring, vmid, pd_addr); + amdgpu_gmc_emit_flush_gpu_tlb(ring, vmid, pd_addr); /* compute doesn't have PFP */ if (ring->funcs->type == AMDGPU_RING_TYPE_GFX) { @@ -8620,19 +8749,9 @@ static void gfx_v10_0_ring_emit_ce_meta(struct amdgpu_ring *ring, bool resume) cnt = (sizeof(ce_payload) >> 2) + 4 - 2; - if (ring->is_mes_queue) { - offset = offsetof(struct amdgpu_mes_ctx_meta_data, - gfx[0].gfx_meta_data) + - offsetof(struct v10_gfx_meta_data, ce_payload); - ce_payload_gpu_addr = - amdgpu_mes_ctx_get_offs_gpu_addr(ring, offset); - ce_payload_cpu_addr = - amdgpu_mes_ctx_get_offs_cpu_addr(ring, offset); - } else { - offset = offsetof(struct v10_gfx_meta_data, ce_payload); - ce_payload_gpu_addr = amdgpu_csa_vaddr(ring->adev) + offset; - ce_payload_cpu_addr = adev->virt.csa_cpu_addr + offset; - } + offset = offsetof(struct v10_gfx_meta_data, ce_payload); + ce_payload_gpu_addr = amdgpu_csa_vaddr(ring->adev) + offset; + ce_payload_cpu_addr = adev->virt.csa_cpu_addr + offset; amdgpu_ring_write(ring, PACKET3(PACKET3_WRITE_DATA, cnt)); amdgpu_ring_write(ring, (WRITE_DATA_ENGINE_SEL(2) | @@ -8658,28 +8777,13 @@ static void gfx_v10_0_ring_emit_de_meta(struct amdgpu_ring *ring, bool resume) void *de_payload_cpu_addr; int cnt; - if (ring->is_mes_queue) { - offset = offsetof(struct amdgpu_mes_ctx_meta_data, - gfx[0].gfx_meta_data) + - offsetof(struct v10_gfx_meta_data, de_payload); - de_payload_gpu_addr = - amdgpu_mes_ctx_get_offs_gpu_addr(ring, offset); - de_payload_cpu_addr = - amdgpu_mes_ctx_get_offs_cpu_addr(ring, offset); - - offset = offsetof(struct amdgpu_mes_ctx_meta_data, - gfx[0].gds_backup) + - offsetof(struct v10_gfx_meta_data, de_payload); - gds_addr = amdgpu_mes_ctx_get_offs_gpu_addr(ring, offset); - } else { - offset = offsetof(struct v10_gfx_meta_data, de_payload); - de_payload_gpu_addr = amdgpu_csa_vaddr(ring->adev) + offset; - de_payload_cpu_addr = adev->virt.csa_cpu_addr + offset; + offset = offsetof(struct v10_gfx_meta_data, de_payload); + de_payload_gpu_addr = amdgpu_csa_vaddr(ring->adev) + offset; + de_payload_cpu_addr = adev->virt.csa_cpu_addr + offset; - gds_addr = ALIGN(amdgpu_csa_vaddr(ring->adev) + - AMDGPU_CSA_SIZE - adev->gds.gds_size, - PAGE_SIZE); - } + gds_addr = ALIGN(amdgpu_csa_vaddr(ring->adev) + + AMDGPU_CSA_SIZE - adev->gds.gds_size, + PAGE_SIZE); de_payload.gds_backup_addrlo = lower_32_bits(gds_addr); de_payload.gds_backup_addrhi = upper_32_bits(gds_addr); @@ -8931,49 +9035,34 @@ static int gfx_v10_0_eop_irq(struct amdgpu_device *adev, int i; u8 me_id, pipe_id, queue_id; struct amdgpu_ring *ring; - uint32_t mes_queue_id = entry->src_data[0]; DRM_DEBUG("IH: CP EOP\n"); - if (adev->enable_mes && (mes_queue_id & AMDGPU_FENCE_MES_QUEUE_FLAG)) { - struct amdgpu_mes_queue *queue; - - mes_queue_id &= AMDGPU_FENCE_MES_QUEUE_ID_MASK; - - spin_lock(&adev->mes.queue_id_lock); - queue = idr_find(&adev->mes.queue_id_idr, mes_queue_id); - if (queue) { - DRM_DEBUG("process mes queue id = %d\n", mes_queue_id); - amdgpu_fence_process(queue->ring); - } - spin_unlock(&adev->mes.queue_id_lock); - } else { - me_id = (entry->ring_id & 0x0c) >> 2; - pipe_id = (entry->ring_id & 0x03) >> 0; - queue_id = (entry->ring_id & 0x70) >> 4; + me_id = (entry->ring_id & 0x0c) >> 2; + pipe_id = (entry->ring_id & 0x03) >> 0; + queue_id = (entry->ring_id & 0x70) >> 4; - switch (me_id) { - case 0: - if (pipe_id == 0) - amdgpu_fence_process(&adev->gfx.gfx_ring[0]); - else - amdgpu_fence_process(&adev->gfx.gfx_ring[1]); - break; - case 1: - case 2: - for (i = 0; i < adev->gfx.num_compute_rings; i++) { - ring = &adev->gfx.compute_ring[i]; - /* Per-queue interrupt is supported for MEC starting from VI. - * The interrupt can only be enabled/disabled per pipe instead - * of per queue. - */ - if ((ring->me == me_id) && - (ring->pipe == pipe_id) && - (ring->queue == queue_id)) - amdgpu_fence_process(ring); - } - break; + switch (me_id) { + case 0: + if (pipe_id == 0) + amdgpu_fence_process(&adev->gfx.gfx_ring[0]); + else + amdgpu_fence_process(&adev->gfx.gfx_ring[1]); + break; + case 1: + case 2: + for (i = 0; i < adev->gfx.num_compute_rings; i++) { + ring = &adev->gfx.compute_ring[i]; + /* Per-queue interrupt is supported for MEC starting from VI. + * The interrupt can only be enabled/disabled per pipe instead + * of per queue. + */ + if ((ring->me == me_id) && + (ring->pipe == pipe_id) && + (ring->queue == queue_id)) + amdgpu_fence_process(ring); } + break; } return 0; @@ -9154,6 +9243,137 @@ static void gfx_v10_0_emit_mem_sync(struct amdgpu_ring *ring) amdgpu_ring_write(ring, gcr_cntl); /* GCR_CNTL */ } +static void gfx_v10_ip_print(void *handle, struct drm_printer *p) +{ + struct amdgpu_device *adev = (struct amdgpu_device *)handle; + uint32_t i, j, k, reg, index = 0; + uint32_t reg_count = ARRAY_SIZE(gc_reg_list_10_1); + + if (!adev->gfx.ip_dump_core) + return; + + for (i = 0; i < reg_count; i++) + drm_printf(p, "%-50s \t 0x%08x\n", + gc_reg_list_10_1[i].reg_name, + adev->gfx.ip_dump_core[i]); + + /* print compute queue registers for all instances */ + if (!adev->gfx.ip_dump_compute_queues) + return; + + reg_count = ARRAY_SIZE(gc_cp_reg_list_10); + drm_printf(p, "\nnum_mec: %d num_pipe: %d num_queue: %d\n", + adev->gfx.mec.num_mec, + adev->gfx.mec.num_pipe_per_mec, + adev->gfx.mec.num_queue_per_pipe); + + for (i = 0; i < adev->gfx.mec.num_mec; i++) { + for (j = 0; j < adev->gfx.mec.num_pipe_per_mec; j++) { + for (k = 0; k < adev->gfx.mec.num_queue_per_pipe; k++) { + drm_printf(p, "\nmec %d, pipe %d, queue %d\n", i, j, k); + for (reg = 0; reg < reg_count; reg++) { + drm_printf(p, "%-50s \t 0x%08x\n", + gc_cp_reg_list_10[reg].reg_name, + adev->gfx.ip_dump_compute_queues[index + reg]); + } + index += reg_count; + } + } + } + + /* print gfx queue registers for all instances */ + if (!adev->gfx.ip_dump_gfx_queues) + return; + + index = 0; + reg_count = ARRAY_SIZE(gc_gfx_queue_reg_list_10); + drm_printf(p, "\nnum_me: %d num_pipe: %d num_queue: %d\n", + adev->gfx.me.num_me, + adev->gfx.me.num_pipe_per_me, + adev->gfx.me.num_queue_per_pipe); + + for (i = 0; i < adev->gfx.me.num_me; i++) { + for (j = 0; j < adev->gfx.me.num_pipe_per_me; j++) { + for (k = 0; k < adev->gfx.me.num_queue_per_pipe; k++) { + drm_printf(p, "\nme %d, pipe %d, queue %d\n", i, j, k); + for (reg = 0; reg < reg_count; reg++) { + drm_printf(p, "%-50s \t 0x%08x\n", + gc_gfx_queue_reg_list_10[reg].reg_name, + adev->gfx.ip_dump_gfx_queues[index + reg]); + } + index += reg_count; + } + } + } +} + +static void gfx_v10_ip_dump(void *handle) +{ + struct amdgpu_device *adev = (struct amdgpu_device *)handle; + uint32_t i, j, k, reg, index = 0; + uint32_t reg_count = ARRAY_SIZE(gc_reg_list_10_1); + + if (!adev->gfx.ip_dump_core) + return; + + amdgpu_gfx_off_ctrl(adev, false); + for (i = 0; i < reg_count; i++) + adev->gfx.ip_dump_core[i] = RREG32(SOC15_REG_ENTRY_OFFSET(gc_reg_list_10_1[i])); + amdgpu_gfx_off_ctrl(adev, true); + + /* dump compute queue registers for all instances */ + if (!adev->gfx.ip_dump_compute_queues) + return; + + reg_count = ARRAY_SIZE(gc_cp_reg_list_10); + amdgpu_gfx_off_ctrl(adev, false); + mutex_lock(&adev->srbm_mutex); + for (i = 0; i < adev->gfx.mec.num_mec; i++) { + for (j = 0; j < adev->gfx.mec.num_pipe_per_mec; j++) { + for (k = 0; k < adev->gfx.mec.num_queue_per_pipe; k++) { + /* ME0 is for GFX so start from 1 for CP */ + nv_grbm_select(adev, adev->gfx.me.num_me + i, j, k, 0); + + for (reg = 0; reg < reg_count; reg++) { + adev->gfx.ip_dump_compute_queues[index + reg] = + RREG32(SOC15_REG_ENTRY_OFFSET( + gc_cp_reg_list_10[reg])); + } + index += reg_count; + } + } + } + nv_grbm_select(adev, 0, 0, 0, 0); + mutex_unlock(&adev->srbm_mutex); + amdgpu_gfx_off_ctrl(adev, true); + + /* dump gfx queue registers for all instances */ + if (!adev->gfx.ip_dump_gfx_queues) + return; + + index = 0; + reg_count = ARRAY_SIZE(gc_gfx_queue_reg_list_10); + amdgpu_gfx_off_ctrl(adev, false); + mutex_lock(&adev->srbm_mutex); + for (i = 0; i < adev->gfx.me.num_me; i++) { + for (j = 0; j < adev->gfx.me.num_pipe_per_me; j++) { + for (k = 0; k < adev->gfx.me.num_queue_per_pipe; k++) { + nv_grbm_select(adev, i, j, k, 0); + + for (reg = 0; reg < reg_count; reg++) { + adev->gfx.ip_dump_gfx_queues[index + reg] = + RREG32(SOC15_REG_ENTRY_OFFSET( + gc_gfx_queue_reg_list_10[reg])); + } + index += reg_count; + } + } + } + nv_grbm_select(adev, 0, 0, 0, 0); + mutex_unlock(&adev->srbm_mutex); + amdgpu_gfx_off_ctrl(adev, true); +} + static const struct amd_ip_funcs gfx_v10_0_ip_funcs = { .name = "gfx_v10_0", .early_init = gfx_v10_0_early_init, @@ -9170,6 +9390,8 @@ static const struct amd_ip_funcs gfx_v10_0_ip_funcs = { .set_clockgating_state = gfx_v10_0_set_clockgating_state, .set_powergating_state = gfx_v10_0_set_powergating_state, .get_clockgating_state = gfx_v10_0_get_clockgating_state, + .dump_ip_state = gfx_v10_ip_dump, + .print_ip_state = gfx_v10_ip_print, }; static const struct amdgpu_ring_funcs gfx_v10_0_ring_funcs_gfx = { @@ -9186,7 +9408,7 @@ static const struct amdgpu_ring_funcs gfx_v10_0_ring_funcs_gfx = { 7 + /* PIPELINE_SYNC */ SOC15_FLUSH_GPU_TLB_NUM_WREG * 5 + SOC15_FLUSH_GPU_TLB_NUM_REG_WAIT * 7 + - 2 + /* VM_FLUSH */ + 4 + /* VM_FLUSH */ 8 + /* FENCE for VM_FLUSH */ 20 + /* GDS switch */ 4 + /* double SWITCH_BUFFER, @@ -9276,7 +9498,6 @@ static const struct amdgpu_ring_funcs gfx_v10_0_ring_funcs_kiq = { 7 + /* gfx_v10_0_ring_emit_pipeline_sync */ SOC15_FLUSH_GPU_TLB_NUM_WREG * 5 + SOC15_FLUSH_GPU_TLB_NUM_REG_WAIT * 7 + - 2 + /* gfx_v10_0_ring_emit_vm_flush */ 8 + 8 + 8, /* gfx_v10_0_ring_emit_fence_kiq x3 for user fence, vm fence */ .emit_ib_size = 7, /* gfx_v10_0_ring_emit_ib_compute */ .emit_ib = gfx_v10_0_ring_emit_ib_compute, diff --git a/drivers/gpu/drm/amd/amdgpu/gfx_v11_0.c b/drivers/gpu/drm/amd/amdgpu/gfx_v11_0.c index 1770e496c1b7..dcef39907449 100644 --- a/drivers/gpu/drm/amd/amdgpu/gfx_v11_0.c +++ b/drivers/gpu/drm/amd/amdgpu/gfx_v11_0.c @@ -93,6 +93,154 @@ MODULE_FIRMWARE("amdgpu/gc_11_5_1_pfp.bin"); MODULE_FIRMWARE("amdgpu/gc_11_5_1_me.bin"); MODULE_FIRMWARE("amdgpu/gc_11_5_1_mec.bin"); MODULE_FIRMWARE("amdgpu/gc_11_5_1_rlc.bin"); +MODULE_FIRMWARE("amdgpu/gc_11_5_2_pfp.bin"); +MODULE_FIRMWARE("amdgpu/gc_11_5_2_me.bin"); +MODULE_FIRMWARE("amdgpu/gc_11_5_2_mec.bin"); +MODULE_FIRMWARE("amdgpu/gc_11_5_2_rlc.bin"); + +static const struct amdgpu_hwip_reg_entry gc_reg_list_11_0[] = { + SOC15_REG_ENTRY_STR(GC, 0, regGRBM_STATUS), + SOC15_REG_ENTRY_STR(GC, 0, regGRBM_STATUS2), + SOC15_REG_ENTRY_STR(GC, 0, regGRBM_STATUS3), + SOC15_REG_ENTRY_STR(GC, 0, regCP_STALLED_STAT1), + SOC15_REG_ENTRY_STR(GC, 0, regCP_STALLED_STAT2), + SOC15_REG_ENTRY_STR(GC, 0, regCP_STALLED_STAT3), + SOC15_REG_ENTRY_STR(GC, 0, regCP_CPC_STALLED_STAT1), + SOC15_REG_ENTRY_STR(GC, 0, regCP_CPF_STALLED_STAT1), + SOC15_REG_ENTRY_STR(GC, 0, regCP_BUSY_STAT), + SOC15_REG_ENTRY_STR(GC, 0, regCP_CPC_BUSY_STAT), + SOC15_REG_ENTRY_STR(GC, 0, regCP_CPF_BUSY_STAT), + SOC15_REG_ENTRY_STR(GC, 0, regCP_CPC_BUSY_STAT2), + SOC15_REG_ENTRY_STR(GC, 0, regCP_CPF_BUSY_STAT2), + SOC15_REG_ENTRY_STR(GC, 0, regCP_CPF_STATUS), + SOC15_REG_ENTRY_STR(GC, 0, regCP_GFX_ERROR), + SOC15_REG_ENTRY_STR(GC, 0, regCP_GFX_HPD_STATUS0), + SOC15_REG_ENTRY_STR(GC, 0, regCP_RB_BASE), + SOC15_REG_ENTRY_STR(GC, 0, regCP_RB_RPTR), + SOC15_REG_ENTRY_STR(GC, 0, regCP_RB_WPTR), + SOC15_REG_ENTRY_STR(GC, 0, regCP_RB0_BASE), + SOC15_REG_ENTRY_STR(GC, 0, regCP_RB0_RPTR), + SOC15_REG_ENTRY_STR(GC, 0, regCP_RB0_WPTR), + SOC15_REG_ENTRY_STR(GC, 0, regCP_RB1_BASE), + SOC15_REG_ENTRY_STR(GC, 0, regCP_RB1_RPTR), + SOC15_REG_ENTRY_STR(GC, 0, regCP_RB1_WPTR), + SOC15_REG_ENTRY_STR(GC, 0, regCP_IB1_CMD_BUFSZ), + SOC15_REG_ENTRY_STR(GC, 0, regCP_IB2_CMD_BUFSZ), + SOC15_REG_ENTRY_STR(GC, 0, regCP_IB1_BASE_LO), + SOC15_REG_ENTRY_STR(GC, 0, regCP_IB1_BASE_HI), + SOC15_REG_ENTRY_STR(GC, 0, regCP_IB1_BUFSZ), + SOC15_REG_ENTRY_STR(GC, 0, regCP_IB2_BASE_LO), + SOC15_REG_ENTRY_STR(GC, 0, regCP_IB2_BASE_HI), + SOC15_REG_ENTRY_STR(GC, 0, regCP_IB2_BUFSZ), + SOC15_REG_ENTRY_STR(GC, 0, regCPF_UTCL1_STATUS), + SOC15_REG_ENTRY_STR(GC, 0, regCPC_UTCL1_STATUS), + SOC15_REG_ENTRY_STR(GC, 0, regCPG_UTCL1_STATUS), + SOC15_REG_ENTRY_STR(GC, 0, regGDS_PROTECTION_FAULT), + SOC15_REG_ENTRY_STR(GC, 0, regGDS_VM_PROTECTION_FAULT), + SOC15_REG_ENTRY_STR(GC, 0, regIA_UTCL1_STATUS), + SOC15_REG_ENTRY_STR(GC, 0, regIA_UTCL1_STATUS_2), + SOC15_REG_ENTRY_STR(GC, 0, regPA_CL_CNTL_STATUS), + SOC15_REG_ENTRY_STR(GC, 0, regRLC_UTCL1_STATUS), + SOC15_REG_ENTRY_STR(GC, 0, regRMI_UTCL1_STATUS), + SOC15_REG_ENTRY_STR(GC, 0, regSQC_CACHES), + SOC15_REG_ENTRY_STR(GC, 0, regSQG_STATUS), + SOC15_REG_ENTRY_STR(GC, 0, regWD_UTCL1_STATUS), + SOC15_REG_ENTRY_STR(GC, 0, regGCVM_L2_PROTECTION_FAULT_CNTL), + SOC15_REG_ENTRY_STR(GC, 0, regGCVM_L2_PROTECTION_FAULT_STATUS), + SOC15_REG_ENTRY_STR(GC, 0, regCP_DEBUG), + SOC15_REG_ENTRY_STR(GC, 0, regCP_MEC_CNTL), + SOC15_REG_ENTRY_STR(GC, 0, regCP_MES_CNTL), + SOC15_REG_ENTRY_STR(GC, 0, regCP_MEC1_INSTR_PNTR), + SOC15_REG_ENTRY_STR(GC, 0, regCP_MES_DEBUG_INTERRUPT_INSTR_PNTR), + SOC15_REG_ENTRY_STR(GC, 0, regCP_MES_INSTR_PNTR), + SOC15_REG_ENTRY_STR(GC, 0, regCP_ME_INSTR_PNTR), + SOC15_REG_ENTRY_STR(GC, 0, regCP_PFP_INSTR_PNTR), + SOC15_REG_ENTRY_STR(GC, 0, regCP_CPC_STATUS), + /* cp header registers */ + SOC15_REG_ENTRY_STR(GC, 0, regCP_MEC_ME1_HEADER_DUMP), + SOC15_REG_ENTRY_STR(GC, 0, regCP_PFP_HEADER_DUMP), + SOC15_REG_ENTRY_STR(GC, 0, regCP_ME_HEADER_DUMP), + SOC15_REG_ENTRY_STR(GC, 0, regCP_MES_HEADER_DUMP), + /* SE status registers */ + SOC15_REG_ENTRY_STR(GC, 0, regGRBM_STATUS_SE0), + SOC15_REG_ENTRY_STR(GC, 0, regGRBM_STATUS_SE1), + SOC15_REG_ENTRY_STR(GC, 0, regGRBM_STATUS_SE2), + SOC15_REG_ENTRY_STR(GC, 0, regGRBM_STATUS_SE3), + SOC15_REG_ENTRY_STR(GC, 0, regGRBM_STATUS_SE4), + SOC15_REG_ENTRY_STR(GC, 0, regGRBM_STATUS_SE5) +}; + +static const struct amdgpu_hwip_reg_entry gc_cp_reg_list_11[] = { + /* compute registers */ + SOC15_REG_ENTRY_STR(GC, 0, regCP_HQD_VMID), + SOC15_REG_ENTRY_STR(GC, 0, regCP_HQD_PERSISTENT_STATE), + SOC15_REG_ENTRY_STR(GC, 0, regCP_HQD_PIPE_PRIORITY), + SOC15_REG_ENTRY_STR(GC, 0, regCP_HQD_QUEUE_PRIORITY), + SOC15_REG_ENTRY_STR(GC, 0, regCP_HQD_QUANTUM), + SOC15_REG_ENTRY_STR(GC, 0, regCP_HQD_PQ_BASE), + SOC15_REG_ENTRY_STR(GC, 0, regCP_HQD_PQ_BASE_HI), + SOC15_REG_ENTRY_STR(GC, 0, regCP_HQD_PQ_RPTR), + SOC15_REG_ENTRY_STR(GC, 0, regCP_HQD_PQ_WPTR_POLL_ADDR), + SOC15_REG_ENTRY_STR(GC, 0, regCP_HQD_PQ_WPTR_POLL_ADDR_HI), + SOC15_REG_ENTRY_STR(GC, 0, regCP_HQD_PQ_DOORBELL_CONTROL), + SOC15_REG_ENTRY_STR(GC, 0, regCP_HQD_PQ_CONTROL), + SOC15_REG_ENTRY_STR(GC, 0, regCP_HQD_IB_BASE_ADDR), + SOC15_REG_ENTRY_STR(GC, 0, regCP_HQD_IB_BASE_ADDR_HI), + SOC15_REG_ENTRY_STR(GC, 0, regCP_HQD_IB_RPTR), + SOC15_REG_ENTRY_STR(GC, 0, regCP_HQD_IB_CONTROL), + SOC15_REG_ENTRY_STR(GC, 0, regCP_HQD_DEQUEUE_REQUEST), + SOC15_REG_ENTRY_STR(GC, 0, regCP_HQD_EOP_BASE_ADDR), + SOC15_REG_ENTRY_STR(GC, 0, regCP_HQD_EOP_BASE_ADDR_HI), + SOC15_REG_ENTRY_STR(GC, 0, regCP_HQD_EOP_CONTROL), + SOC15_REG_ENTRY_STR(GC, 0, regCP_HQD_EOP_RPTR), + SOC15_REG_ENTRY_STR(GC, 0, regCP_HQD_EOP_WPTR), + SOC15_REG_ENTRY_STR(GC, 0, regCP_HQD_EOP_EVENTS), + SOC15_REG_ENTRY_STR(GC, 0, regCP_HQD_CTX_SAVE_BASE_ADDR_LO), + SOC15_REG_ENTRY_STR(GC, 0, regCP_HQD_CTX_SAVE_BASE_ADDR_HI), + SOC15_REG_ENTRY_STR(GC, 0, regCP_HQD_CTX_SAVE_CONTROL), + SOC15_REG_ENTRY_STR(GC, 0, regCP_HQD_CNTL_STACK_OFFSET), + SOC15_REG_ENTRY_STR(GC, 0, regCP_HQD_CNTL_STACK_SIZE), + SOC15_REG_ENTRY_STR(GC, 0, regCP_HQD_WG_STATE_OFFSET), + SOC15_REG_ENTRY_STR(GC, 0, regCP_HQD_CTX_SAVE_SIZE), + SOC15_REG_ENTRY_STR(GC, 0, regCP_HQD_GDS_RESOURCE_STATE), + SOC15_REG_ENTRY_STR(GC, 0, regCP_HQD_ERROR), + SOC15_REG_ENTRY_STR(GC, 0, regCP_HQD_EOP_WPTR_MEM), + SOC15_REG_ENTRY_STR(GC, 0, regCP_HQD_PQ_WPTR_LO), + SOC15_REG_ENTRY_STR(GC, 0, regCP_HQD_PQ_WPTR_HI), + SOC15_REG_ENTRY_STR(GC, 0, regCP_HQD_SUSPEND_CNTL_STACK_OFFSET), + SOC15_REG_ENTRY_STR(GC, 0, regCP_HQD_SUSPEND_CNTL_STACK_DW_CNT), + SOC15_REG_ENTRY_STR(GC, 0, regCP_HQD_SUSPEND_WG_STATE_OFFSET), + SOC15_REG_ENTRY_STR(GC, 0, regCP_HQD_DEQUEUE_STATUS) +}; + +static const struct amdgpu_hwip_reg_entry gc_gfx_queue_reg_list_11[] = { + /* gfx queue registers */ + SOC15_REG_ENTRY_STR(GC, 0, regCP_GFX_HQD_ACTIVE), + SOC15_REG_ENTRY_STR(GC, 0, regCP_GFX_HQD_VMID), + SOC15_REG_ENTRY_STR(GC, 0, regCP_GFX_HQD_QUEUE_PRIORITY), + SOC15_REG_ENTRY_STR(GC, 0, regCP_GFX_HQD_QUANTUM), + SOC15_REG_ENTRY_STR(GC, 0, regCP_GFX_HQD_BASE), + SOC15_REG_ENTRY_STR(GC, 0, regCP_GFX_HQD_BASE_HI), + SOC15_REG_ENTRY_STR(GC, 0, regCP_GFX_HQD_OFFSET), + SOC15_REG_ENTRY_STR(GC, 0, regCP_GFX_HQD_CNTL), + SOC15_REG_ENTRY_STR(GC, 0, regCP_GFX_HQD_CSMD_RPTR), + SOC15_REG_ENTRY_STR(GC, 0, regCP_GFX_HQD_WPTR), + SOC15_REG_ENTRY_STR(GC, 0, regCP_GFX_HQD_WPTR_HI), + SOC15_REG_ENTRY_STR(GC, 0, regCP_GFX_HQD_DEQUEUE_REQUEST), + SOC15_REG_ENTRY_STR(GC, 0, regCP_GFX_HQD_MAPPED), + SOC15_REG_ENTRY_STR(GC, 0, regCP_GFX_HQD_QUE_MGR_CONTROL), + SOC15_REG_ENTRY_STR(GC, 0, regCP_GFX_HQD_HQ_CONTROL0), + SOC15_REG_ENTRY_STR(GC, 0, regCP_GFX_HQD_HQ_STATUS0), + SOC15_REG_ENTRY_STR(GC, 0, regCP_GFX_MQD_BASE_ADDR), + SOC15_REG_ENTRY_STR(GC, 0, regCP_GFX_MQD_BASE_ADDR_HI), + SOC15_REG_ENTRY_STR(GC, 0, regCP_RB_WPTR_POLL_ADDR_LO), + SOC15_REG_ENTRY_STR(GC, 0, regCP_RB_WPTR_POLL_ADDR_HI), + SOC15_REG_ENTRY_STR(GC, 0, regCP_RB_RPTR), + SOC15_REG_ENTRY_STR(GC, 0, regCP_IB1_BASE_LO), + SOC15_REG_ENTRY_STR(GC, 0, regCP_IB1_BASE_HI), + SOC15_REG_ENTRY_STR(GC, 0, regCP_IB1_CMD_BUFSZ), + SOC15_REG_ENTRY_STR(GC, 0, regCP_IB1_BUFSZ) +}; static const struct soc15_reg_golden golden_settings_gc_11_0[] = { SOC15_REG_GOLDEN_VALUE(GC, 0, regTCP_CNTL, 0x20000000, 0x20000000) @@ -467,10 +615,9 @@ static int gfx_v11_0_init_toc_microcode(struct amdgpu_device *adev, const char * { const struct psp_firmware_header_v1_0 *toc_hdr; int err = 0; - char fw_name[40]; - snprintf(fw_name, sizeof(fw_name), "amdgpu/%s_toc.bin", ucode_prefix); - err = amdgpu_ucode_request(adev, &adev->psp.toc_fw, fw_name); + err = amdgpu_ucode_request(adev, &adev->psp.toc_fw, + "amdgpu/%s_toc.bin", ucode_prefix); if (err) goto out; @@ -509,8 +656,7 @@ static void gfx_v11_0_check_fw_cp_gfx_shadow(struct amdgpu_device *adev) static int gfx_v11_0_init_microcode(struct amdgpu_device *adev) { - char fw_name[40]; - char ucode_prefix[30]; + char ucode_prefix[25]; int err; const struct rlc_firmware_header_v2_0 *rlc_hdr; uint16_t version_major; @@ -519,9 +665,8 @@ static int gfx_v11_0_init_microcode(struct amdgpu_device *adev) DRM_DEBUG("\n"); amdgpu_ucode_ip_version_decode(adev, GC_HWIP, ucode_prefix, sizeof(ucode_prefix)); - - snprintf(fw_name, sizeof(fw_name), "amdgpu/%s_pfp.bin", ucode_prefix); - err = amdgpu_ucode_request(adev, &adev->gfx.pfp_fw, fw_name); + err = amdgpu_ucode_request(adev, &adev->gfx.pfp_fw, + "amdgpu/%s_pfp.bin", ucode_prefix); if (err) goto out; /* check pfp fw hdr version to decide if enable rs64 for gfx11.*/ @@ -537,8 +682,8 @@ static int gfx_v11_0_init_microcode(struct amdgpu_device *adev) amdgpu_gfx_cp_init_microcode(adev, AMDGPU_UCODE_ID_CP_PFP); } - snprintf(fw_name, sizeof(fw_name), "amdgpu/%s_me.bin", ucode_prefix); - err = amdgpu_ucode_request(adev, &adev->gfx.me_fw, fw_name); + err = amdgpu_ucode_request(adev, &adev->gfx.me_fw, + "amdgpu/%s_me.bin", ucode_prefix); if (err) goto out; if (adev->gfx.rs64_enable) { @@ -552,10 +697,11 @@ static int gfx_v11_0_init_microcode(struct amdgpu_device *adev) if (!amdgpu_sriov_vf(adev)) { if (amdgpu_ip_version(adev, GC_HWIP, 0) == IP_VERSION(11, 0, 0) && adev->pdev->revision == 0xCE) - snprintf(fw_name, sizeof(fw_name), "amdgpu/gc_11_0_0_rlc_1.bin"); + err = amdgpu_ucode_request(adev, &adev->gfx.rlc_fw, + "amdgpu/gc_11_0_0_rlc_1.bin"); else - snprintf(fw_name, sizeof(fw_name), "amdgpu/%s_rlc.bin", ucode_prefix); - err = amdgpu_ucode_request(adev, &adev->gfx.rlc_fw, fw_name); + err = amdgpu_ucode_request(adev, &adev->gfx.rlc_fw, + "amdgpu/%s_rlc.bin", ucode_prefix); if (err) goto out; rlc_hdr = (const struct rlc_firmware_header_v2_0 *)adev->gfx.rlc_fw->data; @@ -566,8 +712,8 @@ static int gfx_v11_0_init_microcode(struct amdgpu_device *adev) goto out; } - snprintf(fw_name, sizeof(fw_name), "amdgpu/%s_mec.bin", ucode_prefix); - err = amdgpu_ucode_request(adev, &adev->gfx.mec_fw, fw_name); + err = amdgpu_ucode_request(adev, &adev->gfx.mec_fw, + "amdgpu/%s_mec.bin", ucode_prefix); if (err) goto out; if (adev->gfx.rs64_enable) { @@ -912,6 +1058,7 @@ static int gfx_v11_0_gpu_early_init(struct amdgpu_device *adev) case IP_VERSION(11, 0, 4): case IP_VERSION(11, 5, 0): case IP_VERSION(11, 5, 1): + case IP_VERSION(11, 5, 2): adev->gfx.config.max_hw_contexts = 8; adev->gfx.config.sc_prim_fifo_size_frontend = 0x20; adev->gfx.config.sc_prim_fifo_size_backend = 0x100; @@ -929,9 +1076,9 @@ static int gfx_v11_0_gpu_early_init(struct amdgpu_device *adev) static int gfx_v11_0_gfx_ring_init(struct amdgpu_device *adev, int ring_id, int me, int pipe, int queue) { - int r; struct amdgpu_ring *ring; unsigned int irq_type; + unsigned int hw_prio; ring = &adev->gfx.gfx_ring[ring_id]; @@ -950,11 +1097,10 @@ static int gfx_v11_0_gfx_ring_init(struct amdgpu_device *adev, int ring_id, sprintf(ring->name, "gfx_%d.%d.%d", ring->me, ring->pipe, ring->queue); irq_type = AMDGPU_CP_IRQ_GFX_ME0_PIPE0_EOP + ring->pipe; - r = amdgpu_ring_init(adev, ring, 1024, &adev->gfx.eop_irq, irq_type, - AMDGPU_RING_PRIO_DEFAULT, NULL); - if (r) - return r; - return 0; + hw_prio = amdgpu_gfx_is_high_priority_graphics_queue(adev, ring) ? + AMDGPU_GFX_PIPE_PRIO_HIGH : AMDGPU_GFX_PIPE_PRIO_NORMAL; + return amdgpu_ring_init(adev, ring, 1024, &adev->gfx.eop_irq, irq_type, + hw_prio, NULL); } static int gfx_v11_0_compute_ring_init(struct amdgpu_device *adev, int ring_id, @@ -1331,6 +1477,47 @@ static int gfx_v11_0_rlc_backdoor_autoload_enable(struct amdgpu_device *adev) return 0; } +static void gfx_v11_0_alloc_ip_dump(struct amdgpu_device *adev) +{ + uint32_t reg_count = ARRAY_SIZE(gc_reg_list_11_0); + uint32_t *ptr; + uint32_t inst; + + ptr = kcalloc(reg_count, sizeof(uint32_t), GFP_KERNEL); + if (ptr == NULL) { + DRM_ERROR("Failed to allocate memory for GFX IP Dump\n"); + adev->gfx.ip_dump_core = NULL; + } else { + adev->gfx.ip_dump_core = ptr; + } + + /* Allocate memory for compute queue registers for all the instances */ + reg_count = ARRAY_SIZE(gc_cp_reg_list_11); + inst = adev->gfx.mec.num_mec * adev->gfx.mec.num_pipe_per_mec * + adev->gfx.mec.num_queue_per_pipe; + + ptr = kcalloc(reg_count * inst, sizeof(uint32_t), GFP_KERNEL); + if (ptr == NULL) { + DRM_ERROR("Failed to allocate memory for Compute Queues IP Dump\n"); + adev->gfx.ip_dump_compute_queues = NULL; + } else { + adev->gfx.ip_dump_compute_queues = ptr; + } + + /* Allocate memory for gfx queue registers for all the instances */ + reg_count = ARRAY_SIZE(gc_gfx_queue_reg_list_11); + inst = adev->gfx.me.num_me * adev->gfx.me.num_pipe_per_me * + adev->gfx.me.num_queue_per_pipe; + + ptr = kcalloc(reg_count * inst, sizeof(uint32_t), GFP_KERNEL); + if (ptr == NULL) { + DRM_ERROR("Failed to allocate memory for GFX Queues IP Dump\n"); + adev->gfx.ip_dump_gfx_queues = NULL; + } else { + adev->gfx.ip_dump_gfx_queues = ptr; + } +} + static int gfx_v11_0_sw_init(void *handle) { int i, j, k, r, ring_id = 0; @@ -1352,6 +1539,7 @@ static int gfx_v11_0_sw_init(void *handle) case IP_VERSION(11, 0, 4): case IP_VERSION(11, 5, 0): case IP_VERSION(11, 5, 1): + case IP_VERSION(11, 5, 2): adev->gfx.me.num_me = 1; adev->gfx.me.num_pipe_per_me = 1; adev->gfx.me.num_queue_per_pipe = 1; @@ -1485,6 +1673,8 @@ static int gfx_v11_0_sw_init(void *handle) return -EINVAL; } + gfx_v11_0_alloc_ip_dump(adev); + return 0; } @@ -1544,6 +1734,10 @@ static int gfx_v11_0_sw_fini(void *handle) gfx_v11_0_free_microcode(adev); + kfree(adev->gfx.ip_dump_core); + kfree(adev->gfx.ip_dump_compute_queues); + kfree(adev->gfx.ip_dump_gfx_queues); + return 0; } @@ -1635,7 +1829,7 @@ static void gfx_v11_0_setup_rb(struct amdgpu_device *adev) active_rb_bitmap |= (0x3 << (i * rb_bitmap_width_per_sa)); } - active_rb_bitmap |= global_active_rb_bitmap; + active_rb_bitmap &= global_active_rb_bitmap; adev->gfx.config.backend_enable_mask = active_rb_bitmap; adev->gfx.config.num_rbs = hweight32(active_rb_bitmap); } @@ -2594,7 +2788,8 @@ static int gfx_v11_0_wait_for_rlc_autoload_complete(struct amdgpu_device *adev) amdgpu_ip_version(adev, GC_HWIP, 0) == IP_VERSION(11, 0, 4) || amdgpu_ip_version(adev, GC_HWIP, 0) == IP_VERSION(11, 5, 0) || - amdgpu_ip_version(adev, GC_HWIP, 0) == IP_VERSION(11, 5, 1)) + amdgpu_ip_version(adev, GC_HWIP, 0) == IP_VERSION(11, 5, 1) || + amdgpu_ip_version(adev, GC_HWIP, 0) == IP_VERSION(11, 5, 2)) bootload_status = RREG32_SOC15(GC, 0, regRLC_RLCS_BOOTLOAD_STATUS_gc_11_0_1); else @@ -3615,6 +3810,24 @@ static void gfx_v11_0_cp_set_doorbell_range(struct amdgpu_device *adev) (adev->doorbell_index.userqueue_end * 2) << 2); } +static void gfx_v11_0_gfx_mqd_set_priority(struct amdgpu_device *adev, + struct v11_gfx_mqd *mqd, + struct amdgpu_mqd_prop *prop) +{ + bool priority = 0; + u32 tmp; + + /* set up default queue priority level + * 0x0 = low priority, 0x1 = high priority + */ + if (prop->hqd_pipe_priority == AMDGPU_GFX_PIPE_PRIO_HIGH) + priority = 1; + + tmp = RREG32_SOC15(GC, 0, regCP_GFX_HQD_QUEUE_PRIORITY); + tmp = REG_SET_FIELD(tmp, CP_GFX_HQD_QUEUE_PRIORITY, PRIORITY_LEVEL, priority); + mqd->cp_gfx_hqd_queue_priority = tmp; +} + static int gfx_v11_0_gfx_mqd_init(struct amdgpu_device *adev, void *m, struct amdgpu_mqd_prop *prop) { @@ -3643,11 +3856,8 @@ static int gfx_v11_0_gfx_mqd_init(struct amdgpu_device *adev, void *m, tmp = REG_SET_FIELD(tmp, CP_GFX_HQD_VMID, VMID, 0); mqd->cp_gfx_hqd_vmid = 0; - /* set up default queue priority level - * 0x0 = low priority, 0x1 = high priority */ - tmp = RREG32_SOC15(GC, 0, regCP_GFX_HQD_QUEUE_PRIORITY); - tmp = REG_SET_FIELD(tmp, CP_GFX_HQD_QUEUE_PRIORITY, PRIORITY_LEVEL, 0); - mqd->cp_gfx_hqd_queue_priority = tmp; + /* set up gfx queue priority */ + gfx_v11_0_gfx_mqd_set_priority(adev, mqd, prop); /* set up time quantum */ tmp = RREG32_SOC15(GC, 0, regCP_GFX_HQD_QUANTUM); @@ -4210,7 +4420,9 @@ static int gfx_v11_0_gfxhub_enable(struct amdgpu_device *adev) false : true; adev->gfxhub.funcs->set_fault_enable_default(adev, value); - amdgpu_gmc_flush_gpu_tlb(adev, 0, AMDGPU_GFXHUB(0), 0); + /* TODO investigate why this and the hdp flush above is needed, + * are we missing a flush somewhere else? */ + adev->gmc.gmc_funcs->flush_gpu_tlb(adev, 0, AMDGPU_GFXHUB(0), 0); return 0; } @@ -4293,11 +4505,11 @@ static int gfx_v11_0_hw_init(void *handle) /* RLC autoload sequence 1: Program rlc ram */ if (adev->gfx.imu.funcs->program_rlc_ram) adev->gfx.imu.funcs->program_rlc_ram(adev); + /* rlc autoload firmware */ + r = gfx_v11_0_rlc_backdoor_autoload_enable(adev); + if (r) + return r; } - /* rlc autoload firmware */ - r = gfx_v11_0_rlc_backdoor_autoload_enable(adev); - if (r) - return r; } else { if (adev->firmware.load_type == AMDGPU_FW_LOAD_DIRECT) { if (adev->gfx.imu.funcs && (amdgpu_dpm > 0)) { @@ -4346,11 +4558,9 @@ static int gfx_v11_0_hw_init(void *handle) * loaded firstly, so in direct type, it has to load smc ucode * here before rlc. */ - if (!(adev->flags & AMD_IS_APU)) { - r = amdgpu_pm_load_smu_firmware(adev, NULL); - if (r) - return r; - } + r = amdgpu_pm_load_smu_firmware(adev, NULL); + if (r) + return r; } gfx_v11_0_constants_init(adev); @@ -4506,14 +4716,11 @@ static int gfx_v11_0_soft_reset(void *handle) gfx_v11_0_set_safe_mode(adev, 0); + mutex_lock(&adev->srbm_mutex); for (i = 0; i < adev->gfx.mec.num_mec; ++i) { for (j = 0; j < adev->gfx.mec.num_queue_per_pipe; j++) { for (k = 0; k < adev->gfx.mec.num_pipe_per_mec; k++) { - tmp = RREG32_SOC15(GC, 0, regGRBM_GFX_CNTL); - tmp = REG_SET_FIELD(tmp, GRBM_GFX_CNTL, MEID, i); - tmp = REG_SET_FIELD(tmp, GRBM_GFX_CNTL, QUEUEID, j); - tmp = REG_SET_FIELD(tmp, GRBM_GFX_CNTL, PIPEID, k); - WREG32_SOC15(GC, 0, regGRBM_GFX_CNTL, tmp); + soc21_grbm_select(adev, i, k, j, 0); WREG32_SOC15(GC, 0, regCP_HQD_DEQUEUE_REQUEST, 0x2); WREG32_SOC15(GC, 0, regSPI_COMPUTE_QUEUE_RESET, 0x1); @@ -4523,16 +4730,14 @@ static int gfx_v11_0_soft_reset(void *handle) for (i = 0; i < adev->gfx.me.num_me; ++i) { for (j = 0; j < adev->gfx.me.num_queue_per_pipe; j++) { for (k = 0; k < adev->gfx.me.num_pipe_per_me; k++) { - tmp = RREG32_SOC15(GC, 0, regGRBM_GFX_CNTL); - tmp = REG_SET_FIELD(tmp, GRBM_GFX_CNTL, MEID, i); - tmp = REG_SET_FIELD(tmp, GRBM_GFX_CNTL, QUEUEID, j); - tmp = REG_SET_FIELD(tmp, GRBM_GFX_CNTL, PIPEID, k); - WREG32_SOC15(GC, 0, regGRBM_GFX_CNTL, tmp); + soc21_grbm_select(adev, i, k, j, 0); WREG32_SOC15(GC, 0, regCP_GFX_HQD_DEQUEUE_REQUEST, 0x1); } } } + soc21_grbm_select(adev, 0, 0, 0, 0); + mutex_unlock(&adev->srbm_mutex); /* Try to acquire the gfx mutex before access to CP_VMID_RESET */ r = gfx_v11_0_request_gfx_index_mutex(adev, 1); @@ -5035,24 +5240,31 @@ static int gfx_v11_0_update_gfx_clock_gating(struct amdgpu_device *adev, static void gfx_v11_0_update_spm_vmid(struct amdgpu_device *adev, struct amdgpu_ring *ring, unsigned vmid) { - u32 data; + u32 reg, pre_data, data; amdgpu_gfx_off_ctrl(adev, false); + reg = SOC15_REG_OFFSET(GC, 0, regRLC_SPM_MC_CNTL); + if (amdgpu_sriov_is_pp_one_vf(adev) && !amdgpu_sriov_runtime(adev)) + pre_data = RREG32_NO_KIQ(reg); + else + pre_data = RREG32(reg); - data = RREG32_SOC15_NO_KIQ(GC, 0, regRLC_SPM_MC_CNTL); - - data &= ~RLC_SPM_MC_CNTL__RLC_SPM_VMID_MASK; + data = pre_data & (~RLC_SPM_MC_CNTL__RLC_SPM_VMID_MASK); data |= (vmid & RLC_SPM_MC_CNTL__RLC_SPM_VMID_MASK) << RLC_SPM_MC_CNTL__RLC_SPM_VMID__SHIFT; - WREG32_SOC15_NO_KIQ(GC, 0, regRLC_SPM_MC_CNTL, data); - + if (pre_data != data) { + if (amdgpu_sriov_is_pp_one_vf(adev) && !amdgpu_sriov_runtime(adev)) { + WREG32_SOC15_NO_KIQ(GC, 0, regRLC_SPM_MC_CNTL, data); + } else + WREG32_SOC15(GC, 0, regRLC_SPM_MC_CNTL, data); + } amdgpu_gfx_off_ctrl(adev, true); if (ring && amdgpu_sriov_is_pp_one_vf(adev) + && (pre_data != data) && ((ring->funcs->type == AMDGPU_RING_TYPE_GFX) || (ring->funcs->type == AMDGPU_RING_TYPE_COMPUTE))) { - uint32_t reg = SOC15_REG_OFFSET(GC, 0, regRLC_SPM_MC_CNTL); amdgpu_ring_emit_wreg(ring, reg, data); } } @@ -5089,6 +5301,7 @@ static void gfx_v11_cntl_power_gating(struct amdgpu_device *adev, bool enable) case IP_VERSION(11, 0, 4): case IP_VERSION(11, 5, 0): case IP_VERSION(11, 5, 1): + case IP_VERSION(11, 5, 2): WREG32_SOC15(GC, 0, regRLC_PG_DELAY_3, RLC_PG_DELAY_3_DEFAULT_GC_11_0_1); break; default: @@ -5125,6 +5338,7 @@ static int gfx_v11_0_set_powergating_state(void *handle, case IP_VERSION(11, 0, 4): case IP_VERSION(11, 5, 0): case IP_VERSION(11, 5, 1): + case IP_VERSION(11, 5, 2): if (!enable) amdgpu_gfx_off_ctrl(adev, false); @@ -5157,6 +5371,7 @@ static int gfx_v11_0_set_clockgating_state(void *handle, case IP_VERSION(11, 0, 4): case IP_VERSION(11, 5, 0): case IP_VERSION(11, 5, 1): + case IP_VERSION(11, 5, 2): gfx_v11_0_update_gfx_clock_gating(adev, state == AMD_CG_STATE_GATE); break; @@ -5298,7 +5513,7 @@ static void gfx_v11_0_ring_emit_hdp_flush(struct amdgpu_ring *ring) } reg_mem_engine = 0; } else { - ref_and_mask = nbio_hf_reg->ref_and_mask_cp0; + ref_and_mask = nbio_hf_reg->ref_and_mask_cp0 << ring->pipe; reg_mem_engine = 1; /* pfp */ } @@ -5397,11 +5612,7 @@ static void gfx_v11_0_ring_emit_fence(struct amdgpu_ring *ring, u64 addr, amdgpu_ring_write(ring, PACKET3(PACKET3_RELEASE_MEM, 6)); amdgpu_ring_write(ring, (PACKET3_RELEASE_MEM_GCR_SEQ | PACKET3_RELEASE_MEM_GCR_GL2_WB | - PACKET3_RELEASE_MEM_GCR_GL2_INV | - PACKET3_RELEASE_MEM_GCR_GL2_US | - PACKET3_RELEASE_MEM_GCR_GL1_INV | - PACKET3_RELEASE_MEM_GCR_GLV_INV | - PACKET3_RELEASE_MEM_GCR_GLM_INV | + PACKET3_RELEASE_MEM_GCR_GLM_INV | /* must be set with GLM_WB */ PACKET3_RELEASE_MEM_GCR_GLM_WB | PACKET3_RELEASE_MEM_CACHE_POLICY(3) | PACKET3_RELEASE_MEM_EVENT_TYPE(CACHE_FLUSH_AND_INV_TS_EVENT) | @@ -5465,6 +5676,7 @@ static void gfx_v11_0_ring_emit_vm_flush(struct amdgpu_ring *ring, /* Make sure that we can't skip the SET_Q_MODE packets when the VM * changed in any way. */ + ring->set_q_mode_offs = 0; ring->set_q_mode_ptr = NULL; } @@ -6155,6 +6367,136 @@ static void gfx_v11_0_emit_mem_sync(struct amdgpu_ring *ring) amdgpu_ring_write(ring, gcr_cntl); /* GCR_CNTL */ } +static void gfx_v11_ip_print(void *handle, struct drm_printer *p) +{ + struct amdgpu_device *adev = (struct amdgpu_device *)handle; + uint32_t i, j, k, reg, index = 0; + uint32_t reg_count = ARRAY_SIZE(gc_reg_list_11_0); + + if (!adev->gfx.ip_dump_core) + return; + + for (i = 0; i < reg_count; i++) + drm_printf(p, "%-50s \t 0x%08x\n", + gc_reg_list_11_0[i].reg_name, + adev->gfx.ip_dump_core[i]); + + /* print compute queue registers for all instances */ + if (!adev->gfx.ip_dump_compute_queues) + return; + + reg_count = ARRAY_SIZE(gc_cp_reg_list_11); + drm_printf(p, "\nnum_mec: %d num_pipe: %d num_queue: %d\n", + adev->gfx.mec.num_mec, + adev->gfx.mec.num_pipe_per_mec, + adev->gfx.mec.num_queue_per_pipe); + + for (i = 0; i < adev->gfx.mec.num_mec; i++) { + for (j = 0; j < adev->gfx.mec.num_pipe_per_mec; j++) { + for (k = 0; k < adev->gfx.mec.num_queue_per_pipe; k++) { + drm_printf(p, "\nmec %d, pipe %d, queue %d\n", i, j, k); + for (reg = 0; reg < reg_count; reg++) { + drm_printf(p, "%-50s \t 0x%08x\n", + gc_cp_reg_list_11[reg].reg_name, + adev->gfx.ip_dump_compute_queues[index + reg]); + } + index += reg_count; + } + } + } + + /* print gfx queue registers for all instances */ + if (!adev->gfx.ip_dump_gfx_queues) + return; + + index = 0; + reg_count = ARRAY_SIZE(gc_gfx_queue_reg_list_11); + drm_printf(p, "\nnum_me: %d num_pipe: %d num_queue: %d\n", + adev->gfx.me.num_me, + adev->gfx.me.num_pipe_per_me, + adev->gfx.me.num_queue_per_pipe); + + for (i = 0; i < adev->gfx.me.num_me; i++) { + for (j = 0; j < adev->gfx.me.num_pipe_per_me; j++) { + for (k = 0; k < adev->gfx.me.num_queue_per_pipe; k++) { + drm_printf(p, "\nme %d, pipe %d, queue %d\n", i, j, k); + for (reg = 0; reg < reg_count; reg++) { + drm_printf(p, "%-50s \t 0x%08x\n", + gc_gfx_queue_reg_list_11[reg].reg_name, + adev->gfx.ip_dump_gfx_queues[index + reg]); + } + index += reg_count; + } + } + } +} + +static void gfx_v11_ip_dump(void *handle) +{ + struct amdgpu_device *adev = (struct amdgpu_device *)handle; + uint32_t i, j, k, reg, index = 0; + uint32_t reg_count = ARRAY_SIZE(gc_reg_list_11_0); + + if (!adev->gfx.ip_dump_core) + return; + + amdgpu_gfx_off_ctrl(adev, false); + for (i = 0; i < reg_count; i++) + adev->gfx.ip_dump_core[i] = RREG32(SOC15_REG_ENTRY_OFFSET(gc_reg_list_11_0[i])); + amdgpu_gfx_off_ctrl(adev, true); + + /* dump compute queue registers for all instances */ + if (!adev->gfx.ip_dump_compute_queues) + return; + + reg_count = ARRAY_SIZE(gc_cp_reg_list_11); + amdgpu_gfx_off_ctrl(adev, false); + mutex_lock(&adev->srbm_mutex); + for (i = 0; i < adev->gfx.mec.num_mec; i++) { + for (j = 0; j < adev->gfx.mec.num_pipe_per_mec; j++) { + for (k = 0; k < adev->gfx.mec.num_queue_per_pipe; k++) { + /* ME0 is for GFX so start from 1 for CP */ + soc21_grbm_select(adev, adev->gfx.me.num_me + i, j, k, 0); + for (reg = 0; reg < reg_count; reg++) { + adev->gfx.ip_dump_compute_queues[index + reg] = + RREG32(SOC15_REG_ENTRY_OFFSET( + gc_cp_reg_list_11[reg])); + } + index += reg_count; + } + } + } + soc21_grbm_select(adev, 0, 0, 0, 0); + mutex_unlock(&adev->srbm_mutex); + amdgpu_gfx_off_ctrl(adev, true); + + /* dump gfx queue registers for all instances */ + if (!adev->gfx.ip_dump_gfx_queues) + return; + + index = 0; + reg_count = ARRAY_SIZE(gc_gfx_queue_reg_list_11); + amdgpu_gfx_off_ctrl(adev, false); + mutex_lock(&adev->srbm_mutex); + for (i = 0; i < adev->gfx.me.num_me; i++) { + for (j = 0; j < adev->gfx.me.num_pipe_per_me; j++) { + for (k = 0; k < adev->gfx.me.num_queue_per_pipe; k++) { + soc21_grbm_select(adev, i, j, k, 0); + + for (reg = 0; reg < reg_count; reg++) { + adev->gfx.ip_dump_gfx_queues[index + reg] = + RREG32(SOC15_REG_ENTRY_OFFSET( + gc_gfx_queue_reg_list_11[reg])); + } + index += reg_count; + } + } + } + soc21_grbm_select(adev, 0, 0, 0, 0); + mutex_unlock(&adev->srbm_mutex); + amdgpu_gfx_off_ctrl(adev, true); +} + static const struct amd_ip_funcs gfx_v11_0_ip_funcs = { .name = "gfx_v11_0", .early_init = gfx_v11_0_early_init, @@ -6173,6 +6515,8 @@ static const struct amd_ip_funcs gfx_v11_0_ip_funcs = { .set_clockgating_state = gfx_v11_0_set_clockgating_state, .set_powergating_state = gfx_v11_0_set_powergating_state, .get_clockgating_state = gfx_v11_0_get_clockgating_state, + .dump_ip_state = gfx_v11_ip_dump, + .print_ip_state = gfx_v11_ip_print, }; static const struct amdgpu_ring_funcs gfx_v11_0_ring_funcs_gfx = { @@ -6191,7 +6535,7 @@ static const struct amdgpu_ring_funcs gfx_v11_0_ring_funcs_gfx = { 7 + /* PIPELINE_SYNC */ SOC15_FLUSH_GPU_TLB_NUM_WREG * 5 + SOC15_FLUSH_GPU_TLB_NUM_REG_WAIT * 7 + - 2 + /* VM_FLUSH */ + 4 + /* VM_FLUSH */ 8 + /* FENCE for VM_FLUSH */ 20 + /* GDS switch */ 5 + /* COND_EXEC */ @@ -6277,7 +6621,6 @@ static const struct amdgpu_ring_funcs gfx_v11_0_ring_funcs_kiq = { 7 + /* gfx_v11_0_ring_emit_pipeline_sync */ SOC15_FLUSH_GPU_TLB_NUM_WREG * 5 + SOC15_FLUSH_GPU_TLB_NUM_REG_WAIT * 7 + - 2 + /* gfx_v11_0_ring_emit_vm_flush */ 8 + 8 + 8, /* gfx_v11_0_ring_emit_fence_kiq x3 for user fence, vm fence */ .emit_ib_size = 7, /* gfx_v11_0_ring_emit_ib_compute */ .emit_ib = gfx_v11_0_ring_emit_ib_compute, diff --git a/drivers/gpu/drm/amd/amdgpu/gfx_v11_0_3.c b/drivers/gpu/drm/amd/amdgpu/gfx_v11_0_3.c index 9e7ce1e6bc06..9cd221ed240c 100644 --- a/drivers/gpu/drm/amd/amdgpu/gfx_v11_0_3.c +++ b/drivers/gpu/drm/amd/amdgpu/gfx_v11_0_3.c @@ -85,6 +85,7 @@ static int gfx_v11_0_3_poison_consumption_handler(struct amdgpu_device *adev, if (entry && (entry->client_id == SOC21_IH_CLIENTID_GFX) && (entry->src_id == GFX_11_0_0__SRCID__RLC_GC_FED_INTERRUPT) && !entry->vmid && !entry->pasid) { + struct amdgpu_ras *con = amdgpu_ras_get_context(adev); uint32_t rlc_status0 = 0; rlc_status0 = RREG32_SOC15(GC, 0, regRLC_RLCS_FED_STATUS_0); @@ -96,7 +97,8 @@ static int gfx_v11_0_3_poison_consumption_handler(struct amdgpu_device *adev, ras->gpu_reset_flags |= AMDGPU_RAS_GPU_RESET_MODE2_RESET; } - amdgpu_ras_reset_gpu(adev); + if (con && !con->is_rma) + amdgpu_ras_reset_gpu(adev); } return 0; diff --git a/drivers/gpu/drm/amd/amdgpu/gfx_v12_0.c b/drivers/gpu/drm/amd/amdgpu/gfx_v12_0.c new file mode 100644 index 000000000000..f384be0d1800 --- /dev/null +++ b/drivers/gpu/drm/amd/amdgpu/gfx_v12_0.c @@ -0,0 +1,5299 @@ +/* + * Copyright 2023 Advanced Micro Devices, Inc. + * + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and associated documentation files (the "Software"), + * to deal in the Software without restriction, including without limitation + * the rights to use, copy, modify, merge, publish, distribute, sublicense, + * and/or sell copies of the Software, and to permit persons to whom the + * Software is furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included in + * all copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL + * THE COPYRIGHT HOLDER(S) OR AUTHOR(S) BE LIABLE FOR ANY CLAIM, DAMAGES OR + * OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, + * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR + * OTHER DEALINGS IN THE SOFTWARE. + * + */ +#include <linux/delay.h> +#include <linux/kernel.h> +#include <linux/firmware.h> +#include <linux/module.h> +#include <linux/pci.h> +#include "amdgpu.h" +#include "amdgpu_gfx.h" +#include "amdgpu_psp.h" +#include "amdgpu_smu.h" +#include "amdgpu_atomfirmware.h" +#include "imu_v12_0.h" +#include "soc24.h" +#include "nvd.h" + +#include "gc/gc_12_0_0_offset.h" +#include "gc/gc_12_0_0_sh_mask.h" +#include "soc24_enum.h" +#include "ivsrcid/gfx/irqsrcs_gfx_11_0_0.h" + +#include "soc15.h" +#include "soc15d.h" +#include "clearstate_gfx12.h" +#include "v12_structs.h" +#include "gfx_v12_0.h" +#include "nbif_v6_3_1.h" +#include "mes_v12_0.h" + +#define GFX12_NUM_GFX_RINGS 1 +#define GFX12_MEC_HPD_SIZE 2048 + +#define RLCG_UCODE_LOADING_START_ADDRESS 0x00002000L + +MODULE_FIRMWARE("amdgpu/gc_12_0_0_pfp.bin"); +MODULE_FIRMWARE("amdgpu/gc_12_0_0_me.bin"); +MODULE_FIRMWARE("amdgpu/gc_12_0_0_mec.bin"); +MODULE_FIRMWARE("amdgpu/gc_12_0_0_rlc.bin"); +MODULE_FIRMWARE("amdgpu/gc_12_0_0_toc.bin"); +MODULE_FIRMWARE("amdgpu/gc_12_0_1_pfp.bin"); +MODULE_FIRMWARE("amdgpu/gc_12_0_1_me.bin"); +MODULE_FIRMWARE("amdgpu/gc_12_0_1_mec.bin"); +MODULE_FIRMWARE("amdgpu/gc_12_0_1_rlc.bin"); +MODULE_FIRMWARE("amdgpu/gc_12_0_1_toc.bin"); + +static const struct amdgpu_hwip_reg_entry gc_reg_list_12_0[] = { + SOC15_REG_ENTRY_STR(GC, 0, regGRBM_STATUS), + SOC15_REG_ENTRY_STR(GC, 0, regGRBM_STATUS2), + SOC15_REG_ENTRY_STR(GC, 0, regGRBM_STATUS3), + SOC15_REG_ENTRY_STR(GC, 0, regCP_STALLED_STAT1), + SOC15_REG_ENTRY_STR(GC, 0, regCP_STALLED_STAT2), + SOC15_REG_ENTRY_STR(GC, 0, regCP_STALLED_STAT3), + SOC15_REG_ENTRY_STR(GC, 0, regCP_CPC_STALLED_STAT1), + SOC15_REG_ENTRY_STR(GC, 0, regCP_CPF_STALLED_STAT1), + SOC15_REG_ENTRY_STR(GC, 0, regCP_BUSY_STAT), + SOC15_REG_ENTRY_STR(GC, 0, regCP_CPC_BUSY_STAT), + SOC15_REG_ENTRY_STR(GC, 0, regCP_CPF_BUSY_STAT), + SOC15_REG_ENTRY_STR(GC, 0, regCP_CPC_BUSY_STAT2), + SOC15_REG_ENTRY_STR(GC, 0, regCP_CPF_BUSY_STAT2), + SOC15_REG_ENTRY_STR(GC, 0, regCP_CPF_STATUS), + SOC15_REG_ENTRY_STR(GC, 0, regCP_GFX_ERROR), + SOC15_REG_ENTRY_STR(GC, 0, regCP_GFX_HPD_STATUS0), + SOC15_REG_ENTRY_STR(GC, 0, regCP_RB_BASE), + SOC15_REG_ENTRY_STR(GC, 0, regCP_RB_RPTR), + SOC15_REG_ENTRY_STR(GC, 0, regCP_RB_WPTR), + SOC15_REG_ENTRY_STR(GC, 0, regCP_RB0_BASE), + SOC15_REG_ENTRY_STR(GC, 0, regCP_RB0_RPTR), + SOC15_REG_ENTRY_STR(GC, 0, regCP_RB0_WPTR), + SOC15_REG_ENTRY_STR(GC, 0, regCP_IB1_CMD_BUFSZ), + SOC15_REG_ENTRY_STR(GC, 0, regCP_IB2_CMD_BUFSZ), + SOC15_REG_ENTRY_STR(GC, 0, regCP_IB1_BASE_LO), + SOC15_REG_ENTRY_STR(GC, 0, regCP_IB1_BASE_HI), + SOC15_REG_ENTRY_STR(GC, 0, regCP_IB1_BUFSZ), + SOC15_REG_ENTRY_STR(GC, 0, regCP_IB2_BASE_LO), + SOC15_REG_ENTRY_STR(GC, 0, regCP_IB2_BASE_HI), + SOC15_REG_ENTRY_STR(GC, 0, regCP_IB2_BUFSZ), + SOC15_REG_ENTRY_STR(GC, 0, regCPF_UTCL1_STATUS), + SOC15_REG_ENTRY_STR(GC, 0, regCPC_UTCL1_STATUS), + SOC15_REG_ENTRY_STR(GC, 0, regCPG_UTCL1_STATUS), + SOC15_REG_ENTRY_STR(GC, 0, regIA_UTCL1_STATUS), + SOC15_REG_ENTRY_STR(GC, 0, regIA_UTCL1_STATUS_2), + SOC15_REG_ENTRY_STR(GC, 0, regPA_CL_CNTL_STATUS), + SOC15_REG_ENTRY_STR(GC, 0, regRMI_UTCL1_STATUS), + SOC15_REG_ENTRY_STR(GC, 0, regSQC_CACHES), + SOC15_REG_ENTRY_STR(GC, 0, regSQG_STATUS), + SOC15_REG_ENTRY_STR(GC, 0, regWD_UTCL1_STATUS), + SOC15_REG_ENTRY_STR(GC, 0, regGCVM_L2_PROTECTION_FAULT_CNTL), + SOC15_REG_ENTRY_STR(GC, 0, regGCVM_L2_PROTECTION_FAULT_STATUS_LO32), + SOC15_REG_ENTRY_STR(GC, 0, regGCVM_L2_PROTECTION_FAULT_STATUS_HI32), + SOC15_REG_ENTRY_STR(GC, 0, regCP_DEBUG), + SOC15_REG_ENTRY_STR(GC, 0, regCP_MEC_CNTL), + SOC15_REG_ENTRY_STR(GC, 0, regCP_MES_CNTL), + SOC15_REG_ENTRY_STR(GC, 0, regCP_MES_INSTR_PNTR), + SOC15_REG_ENTRY_STR(GC, 0, regCP_ME_INSTR_PNTR), + SOC15_REG_ENTRY_STR(GC, 0, regCP_PFP_INSTR_PNTR), + SOC15_REG_ENTRY_STR(GC, 0, regCP_CPC_STATUS), + SOC15_REG_ENTRY_STR(GC, 0, regCP_GFX_RS64_INSTR_PNTR0), + SOC15_REG_ENTRY_STR(GC, 0, regCP_GFX_RS64_INSTR_PNTR1), + SOC15_REG_ENTRY_STR(GC, 0, regCP_MEC_RS64_INSTR_PNTR), + + /* cp header registers */ + SOC15_REG_ENTRY_STR(GC, 0, regCP_MEC_ME1_HEADER_DUMP), + SOC15_REG_ENTRY_STR(GC, 0, regCP_PFP_HEADER_DUMP), + SOC15_REG_ENTRY_STR(GC, 0, regCP_ME_HEADER_DUMP), + SOC15_REG_ENTRY_STR(GC, 0, regCP_MES_HEADER_DUMP), + /* SE status registers */ + SOC15_REG_ENTRY_STR(GC, 0, regGRBM_STATUS_SE0), + SOC15_REG_ENTRY_STR(GC, 0, regGRBM_STATUS_SE1), + SOC15_REG_ENTRY_STR(GC, 0, regGRBM_STATUS_SE2), + SOC15_REG_ENTRY_STR(GC, 0, regGRBM_STATUS_SE3) +}; + +static const struct amdgpu_hwip_reg_entry gc_cp_reg_list_12[] = { + /* compute registers */ + SOC15_REG_ENTRY_STR(GC, 0, regCP_HQD_VMID), + SOC15_REG_ENTRY_STR(GC, 0, regCP_HQD_PERSISTENT_STATE), + SOC15_REG_ENTRY_STR(GC, 0, regCP_HQD_PIPE_PRIORITY), + SOC15_REG_ENTRY_STR(GC, 0, regCP_HQD_QUEUE_PRIORITY), + SOC15_REG_ENTRY_STR(GC, 0, regCP_HQD_QUANTUM), + SOC15_REG_ENTRY_STR(GC, 0, regCP_HQD_PQ_BASE), + SOC15_REG_ENTRY_STR(GC, 0, regCP_HQD_PQ_BASE_HI), + SOC15_REG_ENTRY_STR(GC, 0, regCP_HQD_PQ_RPTR), + SOC15_REG_ENTRY_STR(GC, 0, regCP_HQD_PQ_WPTR_POLL_ADDR), + SOC15_REG_ENTRY_STR(GC, 0, regCP_HQD_PQ_WPTR_POLL_ADDR_HI), + SOC15_REG_ENTRY_STR(GC, 0, regCP_HQD_PQ_DOORBELL_CONTROL), + SOC15_REG_ENTRY_STR(GC, 0, regCP_HQD_PQ_CONTROL), + SOC15_REG_ENTRY_STR(GC, 0, regCP_HQD_IB_BASE_ADDR), + SOC15_REG_ENTRY_STR(GC, 0, regCP_HQD_IB_BASE_ADDR_HI), + SOC15_REG_ENTRY_STR(GC, 0, regCP_HQD_IB_RPTR), + SOC15_REG_ENTRY_STR(GC, 0, regCP_HQD_IB_CONTROL), + SOC15_REG_ENTRY_STR(GC, 0, regCP_HQD_DEQUEUE_REQUEST), + SOC15_REG_ENTRY_STR(GC, 0, regCP_HQD_EOP_BASE_ADDR), + SOC15_REG_ENTRY_STR(GC, 0, regCP_HQD_EOP_BASE_ADDR_HI), + SOC15_REG_ENTRY_STR(GC, 0, regCP_HQD_EOP_CONTROL), + SOC15_REG_ENTRY_STR(GC, 0, regCP_HQD_EOP_RPTR), + SOC15_REG_ENTRY_STR(GC, 0, regCP_HQD_EOP_WPTR), + SOC15_REG_ENTRY_STR(GC, 0, regCP_HQD_EOP_EVENTS), + SOC15_REG_ENTRY_STR(GC, 0, regCP_HQD_CTX_SAVE_BASE_ADDR_LO), + SOC15_REG_ENTRY_STR(GC, 0, regCP_HQD_CTX_SAVE_BASE_ADDR_HI), + SOC15_REG_ENTRY_STR(GC, 0, regCP_HQD_CTX_SAVE_CONTROL), + SOC15_REG_ENTRY_STR(GC, 0, regCP_HQD_CNTL_STACK_OFFSET), + SOC15_REG_ENTRY_STR(GC, 0, regCP_HQD_CNTL_STACK_SIZE), + SOC15_REG_ENTRY_STR(GC, 0, regCP_HQD_WG_STATE_OFFSET), + SOC15_REG_ENTRY_STR(GC, 0, regCP_HQD_CTX_SAVE_SIZE), + SOC15_REG_ENTRY_STR(GC, 0, regCP_HQD_GDS_RESOURCE_STATE), + SOC15_REG_ENTRY_STR(GC, 0, regCP_HQD_ERROR), + SOC15_REG_ENTRY_STR(GC, 0, regCP_HQD_EOP_WPTR_MEM), + SOC15_REG_ENTRY_STR(GC, 0, regCP_HQD_PQ_WPTR_LO), + SOC15_REG_ENTRY_STR(GC, 0, regCP_HQD_PQ_WPTR_HI), + SOC15_REG_ENTRY_STR(GC, 0, regCP_HQD_SUSPEND_CNTL_STACK_OFFSET), + SOC15_REG_ENTRY_STR(GC, 0, regCP_HQD_SUSPEND_CNTL_STACK_DW_CNT), + SOC15_REG_ENTRY_STR(GC, 0, regCP_HQD_SUSPEND_WG_STATE_OFFSET), + SOC15_REG_ENTRY_STR(GC, 0, regCP_HQD_DEQUEUE_STATUS) +}; + +static const struct amdgpu_hwip_reg_entry gc_gfx_queue_reg_list_12[] = { + /* gfx queue registers */ + SOC15_REG_ENTRY_STR(GC, 0, regCP_GFX_HQD_ACTIVE), + SOC15_REG_ENTRY_STR(GC, 0, regCP_GFX_HQD_VMID), + SOC15_REG_ENTRY_STR(GC, 0, regCP_GFX_HQD_QUEUE_PRIORITY), + SOC15_REG_ENTRY_STR(GC, 0, regCP_GFX_HQD_QUANTUM), + SOC15_REG_ENTRY_STR(GC, 0, regCP_GFX_HQD_BASE), + SOC15_REG_ENTRY_STR(GC, 0, regCP_GFX_HQD_BASE_HI), + SOC15_REG_ENTRY_STR(GC, 0, regCP_GFX_HQD_OFFSET), + SOC15_REG_ENTRY_STR(GC, 0, regCP_GFX_HQD_CNTL), + SOC15_REG_ENTRY_STR(GC, 0, regCP_GFX_HQD_CSMD_RPTR), + SOC15_REG_ENTRY_STR(GC, 0, regCP_GFX_HQD_WPTR), + SOC15_REG_ENTRY_STR(GC, 0, regCP_GFX_HQD_WPTR_HI), + SOC15_REG_ENTRY_STR(GC, 0, regCP_GFX_HQD_DEQUEUE_REQUEST), + SOC15_REG_ENTRY_STR(GC, 0, regCP_GFX_HQD_MAPPED), + SOC15_REG_ENTRY_STR(GC, 0, regCP_GFX_HQD_QUE_MGR_CONTROL), + SOC15_REG_ENTRY_STR(GC, 0, regCP_GFX_HQD_HQ_CONTROL0), + SOC15_REG_ENTRY_STR(GC, 0, regCP_GFX_HQD_HQ_STATUS0), + SOC15_REG_ENTRY_STR(GC, 0, regCP_GFX_MQD_BASE_ADDR), + SOC15_REG_ENTRY_STR(GC, 0, regCP_GFX_MQD_BASE_ADDR_HI), + SOC15_REG_ENTRY_STR(GC, 0, regCP_RB_WPTR_POLL_ADDR_LO), + SOC15_REG_ENTRY_STR(GC, 0, regCP_RB_WPTR_POLL_ADDR_HI), + SOC15_REG_ENTRY_STR(GC, 0, regCP_RB_RPTR), + SOC15_REG_ENTRY_STR(GC, 0, regCP_IB1_BASE_LO), + SOC15_REG_ENTRY_STR(GC, 0, regCP_IB1_BASE_HI), + SOC15_REG_ENTRY_STR(GC, 0, regCP_IB1_CMD_BUFSZ), + SOC15_REG_ENTRY_STR(GC, 0, regCP_IB1_BUFSZ) +}; + +#define DEFAULT_SH_MEM_CONFIG \ + ((SH_MEM_ADDRESS_MODE_64 << SH_MEM_CONFIG__ADDRESS_MODE__SHIFT) | \ + (SH_MEM_ALIGNMENT_MODE_UNALIGNED << SH_MEM_CONFIG__ALIGNMENT_MODE__SHIFT) | \ + (3 << SH_MEM_CONFIG__INITIAL_INST_PREFETCH__SHIFT)) + +static void gfx_v12_0_disable_gpa_mode(struct amdgpu_device *adev); +static void gfx_v12_0_set_ring_funcs(struct amdgpu_device *adev); +static void gfx_v12_0_set_irq_funcs(struct amdgpu_device *adev); +static void gfx_v12_0_set_rlc_funcs(struct amdgpu_device *adev); +static void gfx_v12_0_set_mqd_funcs(struct amdgpu_device *adev); +static void gfx_v12_0_set_imu_funcs(struct amdgpu_device *adev); +static int gfx_v12_0_get_cu_info(struct amdgpu_device *adev, + struct amdgpu_cu_info *cu_info); +static uint64_t gfx_v12_0_get_gpu_clock_counter(struct amdgpu_device *adev); +static void gfx_v12_0_select_se_sh(struct amdgpu_device *adev, u32 se_num, + u32 sh_num, u32 instance, int xcc_id); +static u32 gfx_v12_0_get_wgp_active_bitmap_per_sh(struct amdgpu_device *adev); + +static void gfx_v12_0_ring_emit_frame_cntl(struct amdgpu_ring *ring, bool start, bool secure); +static void gfx_v12_0_ring_emit_wreg(struct amdgpu_ring *ring, uint32_t reg, + uint32_t val); +static int gfx_v12_0_wait_for_rlc_autoload_complete(struct amdgpu_device *adev); +static void gfx_v12_0_ring_invalidate_tlbs(struct amdgpu_ring *ring, + uint16_t pasid, uint32_t flush_type, + bool all_hub, uint8_t dst_sel); +static void gfx_v12_0_set_safe_mode(struct amdgpu_device *adev, int xcc_id); +static void gfx_v12_0_unset_safe_mode(struct amdgpu_device *adev, int xcc_id); +static void gfx_v12_0_update_perf_clk(struct amdgpu_device *adev, + bool enable); + +static void gfx_v12_0_kiq_set_resources(struct amdgpu_ring *kiq_ring, + uint64_t queue_mask) +{ + amdgpu_ring_write(kiq_ring, PACKET3(PACKET3_SET_RESOURCES, 6)); + amdgpu_ring_write(kiq_ring, PACKET3_SET_RESOURCES_VMID_MASK(0) | + PACKET3_SET_RESOURCES_QUEUE_TYPE(0)); /* vmid_mask:0 queue_type:0 (KIQ) */ + amdgpu_ring_write(kiq_ring, lower_32_bits(queue_mask)); /* queue mask lo */ + amdgpu_ring_write(kiq_ring, upper_32_bits(queue_mask)); /* queue mask hi */ + amdgpu_ring_write(kiq_ring, 0); /* gws mask lo */ + amdgpu_ring_write(kiq_ring, 0); /* gws mask hi */ + amdgpu_ring_write(kiq_ring, 0); /* oac mask */ + amdgpu_ring_write(kiq_ring, 0); +} + +static void gfx_v12_0_kiq_map_queues(struct amdgpu_ring *kiq_ring, + struct amdgpu_ring *ring) +{ + uint64_t mqd_addr = amdgpu_bo_gpu_offset(ring->mqd_obj); + uint64_t wptr_addr = ring->wptr_gpu_addr; + uint32_t me = 0, eng_sel = 0; + + switch (ring->funcs->type) { + case AMDGPU_RING_TYPE_COMPUTE: + me = 1; + eng_sel = 0; + break; + case AMDGPU_RING_TYPE_GFX: + me = 0; + eng_sel = 4; + break; + case AMDGPU_RING_TYPE_MES: + me = 2; + eng_sel = 5; + break; + default: + WARN_ON(1); + } + + amdgpu_ring_write(kiq_ring, PACKET3(PACKET3_MAP_QUEUES, 5)); + /* Q_sel:0, vmid:0, vidmem: 1, engine:0, num_Q:1*/ + amdgpu_ring_write(kiq_ring, /* Q_sel: 0, vmid: 0, engine: 0, num_Q: 1 */ + PACKET3_MAP_QUEUES_QUEUE_SEL(0) | /* Queue_Sel */ + PACKET3_MAP_QUEUES_VMID(0) | /* VMID */ + PACKET3_MAP_QUEUES_QUEUE(ring->queue) | + PACKET3_MAP_QUEUES_PIPE(ring->pipe) | + PACKET3_MAP_QUEUES_ME((me)) | + PACKET3_MAP_QUEUES_QUEUE_TYPE(0) | /*queue_type: normal compute queue */ + PACKET3_MAP_QUEUES_ALLOC_FORMAT(0) | /* alloc format: all_on_one_pipe */ + PACKET3_MAP_QUEUES_ENGINE_SEL(eng_sel) | + PACKET3_MAP_QUEUES_NUM_QUEUES(1)); /* num_queues: must be 1 */ + amdgpu_ring_write(kiq_ring, PACKET3_MAP_QUEUES_DOORBELL_OFFSET(ring->doorbell_index)); + amdgpu_ring_write(kiq_ring, lower_32_bits(mqd_addr)); + amdgpu_ring_write(kiq_ring, upper_32_bits(mqd_addr)); + amdgpu_ring_write(kiq_ring, lower_32_bits(wptr_addr)); + amdgpu_ring_write(kiq_ring, upper_32_bits(wptr_addr)); +} + +static void gfx_v12_0_kiq_unmap_queues(struct amdgpu_ring *kiq_ring, + struct amdgpu_ring *ring, + enum amdgpu_unmap_queues_action action, + u64 gpu_addr, u64 seq) +{ + struct amdgpu_device *adev = kiq_ring->adev; + uint32_t eng_sel = ring->funcs->type == AMDGPU_RING_TYPE_GFX ? 4 : 0; + + if (adev->enable_mes && !adev->gfx.kiq[0].ring.sched.ready) { + amdgpu_mes_unmap_legacy_queue(adev, ring, action, gpu_addr, seq); + return; + } + + amdgpu_ring_write(kiq_ring, PACKET3(PACKET3_UNMAP_QUEUES, 4)); + amdgpu_ring_write(kiq_ring, /* Q_sel: 0, vmid: 0, engine: 0, num_Q: 1 */ + PACKET3_UNMAP_QUEUES_ACTION(action) | + PACKET3_UNMAP_QUEUES_QUEUE_SEL(0) | + PACKET3_UNMAP_QUEUES_ENGINE_SEL(eng_sel) | + PACKET3_UNMAP_QUEUES_NUM_QUEUES(1)); + amdgpu_ring_write(kiq_ring, + PACKET3_UNMAP_QUEUES_DOORBELL_OFFSET0(ring->doorbell_index)); + + if (action == PREEMPT_QUEUES_NO_UNMAP) { + amdgpu_ring_write(kiq_ring, lower_32_bits(gpu_addr)); + amdgpu_ring_write(kiq_ring, upper_32_bits(gpu_addr)); + amdgpu_ring_write(kiq_ring, seq); + } else { + amdgpu_ring_write(kiq_ring, 0); + amdgpu_ring_write(kiq_ring, 0); + amdgpu_ring_write(kiq_ring, 0); + } +} + +static void gfx_v12_0_kiq_query_status(struct amdgpu_ring *kiq_ring, + struct amdgpu_ring *ring, + u64 addr, u64 seq) +{ + uint32_t eng_sel = ring->funcs->type == AMDGPU_RING_TYPE_GFX ? 4 : 0; + + amdgpu_ring_write(kiq_ring, PACKET3(PACKET3_QUERY_STATUS, 5)); + amdgpu_ring_write(kiq_ring, + PACKET3_QUERY_STATUS_CONTEXT_ID(0) | + PACKET3_QUERY_STATUS_INTERRUPT_SEL(0) | + PACKET3_QUERY_STATUS_COMMAND(2)); + amdgpu_ring_write(kiq_ring, /* Q_sel: 0, vmid: 0, engine: 0, num_Q: 1 */ + PACKET3_QUERY_STATUS_DOORBELL_OFFSET(ring->doorbell_index) | + PACKET3_QUERY_STATUS_ENG_SEL(eng_sel)); + amdgpu_ring_write(kiq_ring, lower_32_bits(addr)); + amdgpu_ring_write(kiq_ring, upper_32_bits(addr)); + amdgpu_ring_write(kiq_ring, lower_32_bits(seq)); + amdgpu_ring_write(kiq_ring, upper_32_bits(seq)); +} + +static void gfx_v12_0_kiq_invalidate_tlbs(struct amdgpu_ring *kiq_ring, + uint16_t pasid, + uint32_t flush_type, + bool all_hub) +{ + gfx_v12_0_ring_invalidate_tlbs(kiq_ring, pasid, flush_type, all_hub, 1); +} + +static const struct kiq_pm4_funcs gfx_v12_0_kiq_pm4_funcs = { + .kiq_set_resources = gfx_v12_0_kiq_set_resources, + .kiq_map_queues = gfx_v12_0_kiq_map_queues, + .kiq_unmap_queues = gfx_v12_0_kiq_unmap_queues, + .kiq_query_status = gfx_v12_0_kiq_query_status, + .kiq_invalidate_tlbs = gfx_v12_0_kiq_invalidate_tlbs, + .set_resources_size = 8, + .map_queues_size = 7, + .unmap_queues_size = 6, + .query_status_size = 7, + .invalidate_tlbs_size = 2, +}; + +static void gfx_v12_0_set_kiq_pm4_funcs(struct amdgpu_device *adev) +{ + adev->gfx.kiq[0].pmf = &gfx_v12_0_kiq_pm4_funcs; +} + +static void gfx_v12_0_wait_reg_mem(struct amdgpu_ring *ring, int eng_sel, + int mem_space, int opt, uint32_t addr0, + uint32_t addr1, uint32_t ref, + uint32_t mask, uint32_t inv) +{ + amdgpu_ring_write(ring, PACKET3(PACKET3_WAIT_REG_MEM, 5)); + amdgpu_ring_write(ring, + /* memory (1) or register (0) */ + (WAIT_REG_MEM_MEM_SPACE(mem_space) | + WAIT_REG_MEM_OPERATION(opt) | /* wait */ + WAIT_REG_MEM_FUNCTION(3) | /* equal */ + WAIT_REG_MEM_ENGINE(eng_sel))); + + if (mem_space) + BUG_ON(addr0 & 0x3); /* Dword align */ + amdgpu_ring_write(ring, addr0); + amdgpu_ring_write(ring, addr1); + amdgpu_ring_write(ring, ref); + amdgpu_ring_write(ring, mask); + amdgpu_ring_write(ring, inv); /* poll interval */ +} + +static int gfx_v12_0_ring_test_ring(struct amdgpu_ring *ring) +{ + struct amdgpu_device *adev = ring->adev; + uint32_t scratch = SOC15_REG_OFFSET(GC, 0, regSCRATCH_REG0); + uint32_t tmp = 0; + unsigned i; + int r; + + WREG32(scratch, 0xCAFEDEAD); + r = amdgpu_ring_alloc(ring, 5); + if (r) { + dev_err(adev->dev, + "amdgpu: cp failed to lock ring %d (%d).\n", + ring->idx, r); + return r; + } + + if (ring->funcs->type == AMDGPU_RING_TYPE_KIQ) { + gfx_v12_0_ring_emit_wreg(ring, scratch, 0xDEADBEEF); + } else { + amdgpu_ring_write(ring, PACKET3(PACKET3_SET_UCONFIG_REG, 1)); + amdgpu_ring_write(ring, scratch - + PACKET3_SET_UCONFIG_REG_START); + amdgpu_ring_write(ring, 0xDEADBEEF); + } + amdgpu_ring_commit(ring); + + for (i = 0; i < adev->usec_timeout; i++) { + tmp = RREG32(scratch); + if (tmp == 0xDEADBEEF) + break; + if (amdgpu_emu_mode == 1) + msleep(1); + else + udelay(1); + } + + if (i >= adev->usec_timeout) + r = -ETIMEDOUT; + return r; +} + +static int gfx_v12_0_ring_test_ib(struct amdgpu_ring *ring, long timeout) +{ + struct amdgpu_device *adev = ring->adev; + struct amdgpu_ib ib; + struct dma_fence *f = NULL; + unsigned index; + uint64_t gpu_addr; + volatile uint32_t *cpu_ptr; + long r; + + /* MES KIQ fw hasn't indirect buffer support for now */ + if (adev->enable_mes_kiq && + ring->funcs->type == AMDGPU_RING_TYPE_KIQ) + return 0; + + memset(&ib, 0, sizeof(ib)); + + if (ring->is_mes_queue) { + uint32_t padding, offset; + + offset = amdgpu_mes_ctx_get_offs(ring, AMDGPU_MES_CTX_IB_OFFS); + padding = amdgpu_mes_ctx_get_offs(ring, + AMDGPU_MES_CTX_PADDING_OFFS); + + ib.gpu_addr = amdgpu_mes_ctx_get_offs_gpu_addr(ring, offset); + ib.ptr = amdgpu_mes_ctx_get_offs_cpu_addr(ring, offset); + + gpu_addr = amdgpu_mes_ctx_get_offs_gpu_addr(ring, padding); + cpu_ptr = amdgpu_mes_ctx_get_offs_cpu_addr(ring, padding); + *cpu_ptr = cpu_to_le32(0xCAFEDEAD); + } else { + r = amdgpu_device_wb_get(adev, &index); + if (r) + return r; + + gpu_addr = adev->wb.gpu_addr + (index * 4); + adev->wb.wb[index] = cpu_to_le32(0xCAFEDEAD); + cpu_ptr = &adev->wb.wb[index]; + + r = amdgpu_ib_get(adev, NULL, 16, AMDGPU_IB_POOL_DIRECT, &ib); + if (r) { + dev_err(adev->dev, "amdgpu: failed to get ib (%ld).\n", r); + goto err1; + } + } + + ib.ptr[0] = PACKET3(PACKET3_WRITE_DATA, 3); + ib.ptr[1] = WRITE_DATA_DST_SEL(5) | WR_CONFIRM; + ib.ptr[2] = lower_32_bits(gpu_addr); + ib.ptr[3] = upper_32_bits(gpu_addr); + ib.ptr[4] = 0xDEADBEEF; + ib.length_dw = 5; + + r = amdgpu_ib_schedule(ring, 1, &ib, NULL, &f); + if (r) + goto err2; + + r = dma_fence_wait_timeout(f, false, timeout); + if (r == 0) { + r = -ETIMEDOUT; + goto err2; + } else if (r < 0) { + goto err2; + } + + if (le32_to_cpu(*cpu_ptr) == 0xDEADBEEF) + r = 0; + else + r = -EINVAL; +err2: + if (!ring->is_mes_queue) + amdgpu_ib_free(adev, &ib, NULL); + dma_fence_put(f); +err1: + if (!ring->is_mes_queue) + amdgpu_device_wb_free(adev, index); + return r; +} + +static void gfx_v12_0_free_microcode(struct amdgpu_device *adev) +{ + amdgpu_ucode_release(&adev->gfx.pfp_fw); + amdgpu_ucode_release(&adev->gfx.me_fw); + amdgpu_ucode_release(&adev->gfx.rlc_fw); + amdgpu_ucode_release(&adev->gfx.mec_fw); + + kfree(adev->gfx.rlc.register_list_format); +} + +static int gfx_v12_0_init_toc_microcode(struct amdgpu_device *adev, const char *ucode_prefix) +{ + const struct psp_firmware_header_v1_0 *toc_hdr; + int err = 0; + + err = amdgpu_ucode_request(adev, &adev->psp.toc_fw, + "amdgpu/%s_toc.bin", ucode_prefix); + if (err) + goto out; + + toc_hdr = (const struct psp_firmware_header_v1_0 *)adev->psp.toc_fw->data; + adev->psp.toc.fw_version = le32_to_cpu(toc_hdr->header.ucode_version); + adev->psp.toc.feature_version = le32_to_cpu(toc_hdr->sos.fw_version); + adev->psp.toc.size_bytes = le32_to_cpu(toc_hdr->header.ucode_size_bytes); + adev->psp.toc.start_addr = (uint8_t *)toc_hdr + + le32_to_cpu(toc_hdr->header.ucode_array_offset_bytes); + return 0; +out: + amdgpu_ucode_release(&adev->psp.toc_fw); + return err; +} + +static int gfx_v12_0_init_microcode(struct amdgpu_device *adev) +{ + char ucode_prefix[15]; + int err; + const struct rlc_firmware_header_v2_0 *rlc_hdr; + uint16_t version_major; + uint16_t version_minor; + + DRM_DEBUG("\n"); + + amdgpu_ucode_ip_version_decode(adev, GC_HWIP, ucode_prefix, sizeof(ucode_prefix)); + + err = amdgpu_ucode_request(adev, &adev->gfx.pfp_fw, + "amdgpu/%s_pfp.bin", ucode_prefix); + if (err) + goto out; + amdgpu_gfx_cp_init_microcode(adev, AMDGPU_UCODE_ID_CP_RS64_PFP); + amdgpu_gfx_cp_init_microcode(adev, AMDGPU_UCODE_ID_CP_RS64_PFP_P0_STACK); + + err = amdgpu_ucode_request(adev, &adev->gfx.me_fw, + "amdgpu/%s_me.bin", ucode_prefix); + if (err) + goto out; + amdgpu_gfx_cp_init_microcode(adev, AMDGPU_UCODE_ID_CP_RS64_ME); + amdgpu_gfx_cp_init_microcode(adev, AMDGPU_UCODE_ID_CP_RS64_ME_P0_STACK); + + if (!amdgpu_sriov_vf(adev)) { + err = amdgpu_ucode_request(adev, &adev->gfx.rlc_fw, + "amdgpu/%s_rlc.bin", ucode_prefix); + if (err) + goto out; + rlc_hdr = (const struct rlc_firmware_header_v2_0 *)adev->gfx.rlc_fw->data; + version_major = le16_to_cpu(rlc_hdr->header.header_version_major); + version_minor = le16_to_cpu(rlc_hdr->header.header_version_minor); + err = amdgpu_gfx_rlc_init_microcode(adev, version_major, version_minor); + if (err) + goto out; + } + + err = amdgpu_ucode_request(adev, &adev->gfx.mec_fw, + "amdgpu/%s_mec.bin", ucode_prefix); + if (err) + goto out; + amdgpu_gfx_cp_init_microcode(adev, AMDGPU_UCODE_ID_CP_RS64_MEC); + amdgpu_gfx_cp_init_microcode(adev, AMDGPU_UCODE_ID_CP_RS64_MEC_P0_STACK); + amdgpu_gfx_cp_init_microcode(adev, AMDGPU_UCODE_ID_CP_RS64_MEC_P1_STACK); + + if (adev->firmware.load_type == AMDGPU_FW_LOAD_RLC_BACKDOOR_AUTO) + err = gfx_v12_0_init_toc_microcode(adev, ucode_prefix); + + /* only one MEC for gfx 12 */ + adev->gfx.mec2_fw = NULL; + + if (adev->gfx.imu.funcs) { + if (adev->gfx.imu.funcs->init_microcode) { + err = adev->gfx.imu.funcs->init_microcode(adev); + if (err) + dev_err(adev->dev, "Failed to load imu firmware!\n"); + } + } + +out: + if (err) { + amdgpu_ucode_release(&adev->gfx.pfp_fw); + amdgpu_ucode_release(&adev->gfx.me_fw); + amdgpu_ucode_release(&adev->gfx.rlc_fw); + amdgpu_ucode_release(&adev->gfx.mec_fw); + } + + return err; +} + +static u32 gfx_v12_0_get_csb_size(struct amdgpu_device *adev) +{ + u32 count = 0; + const struct cs_section_def *sect = NULL; + const struct cs_extent_def *ext = NULL; + + count += 1; + + for (sect = gfx12_cs_data; sect->section != NULL; ++sect) { + if (sect->id == SECT_CONTEXT) { + for (ext = sect->section; ext->extent != NULL; ++ext) + count += 2 + ext->reg_count; + } else + return 0; + } + + return count; +} + +static void gfx_v12_0_get_csb_buffer(struct amdgpu_device *adev, + volatile u32 *buffer) +{ + u32 count = 0, clustercount = 0, i; + const struct cs_section_def *sect = NULL; + const struct cs_extent_def *ext = NULL; + + if (adev->gfx.rlc.cs_data == NULL) + return; + if (buffer == NULL) + return; + + count += 1; + + for (sect = adev->gfx.rlc.cs_data; sect->section != NULL; ++sect) { + if (sect->id == SECT_CONTEXT) { + for (ext = sect->section; ext->extent != NULL; ++ext) { + clustercount++; + buffer[count++] = ext->reg_count; + buffer[count++] = ext->reg_index; + + for (i = 0; i < ext->reg_count; i++) + buffer[count++] = cpu_to_le32(ext->extent[i]); + } + } else + return; + } + + buffer[0] = clustercount; +} + +static void gfx_v12_0_rlc_fini(struct amdgpu_device *adev) +{ + /* clear state block */ + amdgpu_bo_free_kernel(&adev->gfx.rlc.clear_state_obj, + &adev->gfx.rlc.clear_state_gpu_addr, + (void **)&adev->gfx.rlc.cs_ptr); + + /* jump table block */ + amdgpu_bo_free_kernel(&adev->gfx.rlc.cp_table_obj, + &adev->gfx.rlc.cp_table_gpu_addr, + (void **)&adev->gfx.rlc.cp_table_ptr); +} + +static void gfx_v12_0_init_rlcg_reg_access_ctrl(struct amdgpu_device *adev) +{ + struct amdgpu_rlcg_reg_access_ctrl *reg_access_ctrl; + + reg_access_ctrl = &adev->gfx.rlc.reg_access_ctrl[0]; + reg_access_ctrl->scratch_reg0 = SOC15_REG_OFFSET(GC, 0, regSCRATCH_REG0); + reg_access_ctrl->scratch_reg1 = SOC15_REG_OFFSET(GC, 0, regSCRATCH_REG1); + reg_access_ctrl->scratch_reg2 = SOC15_REG_OFFSET(GC, 0, regSCRATCH_REG2); + reg_access_ctrl->scratch_reg3 = SOC15_REG_OFFSET(GC, 0, regSCRATCH_REG3); + reg_access_ctrl->grbm_cntl = SOC15_REG_OFFSET(GC, 0, regGRBM_GFX_CNTL); + reg_access_ctrl->grbm_idx = SOC15_REG_OFFSET(GC, 0, regGRBM_GFX_INDEX); + reg_access_ctrl->spare_int = SOC15_REG_OFFSET(GC, 0, regRLC_SPARE_INT_0); + adev->gfx.rlc.rlcg_reg_access_supported = true; +} + +static int gfx_v12_0_rlc_init(struct amdgpu_device *adev) +{ + const struct cs_section_def *cs_data; + int r; + + adev->gfx.rlc.cs_data = gfx12_cs_data; + + cs_data = adev->gfx.rlc.cs_data; + + if (cs_data) { + /* init clear state block */ + r = amdgpu_gfx_rlc_init_csb(adev); + if (r) + return r; + } + + /* init spm vmid with 0xf */ + if (adev->gfx.rlc.funcs->update_spm_vmid) + adev->gfx.rlc.funcs->update_spm_vmid(adev, NULL, 0xf); + + return 0; +} + +static void gfx_v12_0_mec_fini(struct amdgpu_device *adev) +{ + amdgpu_bo_free_kernel(&adev->gfx.mec.hpd_eop_obj, NULL, NULL); + amdgpu_bo_free_kernel(&adev->gfx.mec.mec_fw_obj, NULL, NULL); + amdgpu_bo_free_kernel(&adev->gfx.mec.mec_fw_data_obj, NULL, NULL); +} + +static void gfx_v12_0_me_init(struct amdgpu_device *adev) +{ + bitmap_zero(adev->gfx.me.queue_bitmap, AMDGPU_MAX_GFX_QUEUES); + + amdgpu_gfx_graphics_queue_acquire(adev); +} + +static int gfx_v12_0_mec_init(struct amdgpu_device *adev) +{ + int r; + u32 *hpd; + size_t mec_hpd_size; + + bitmap_zero(adev->gfx.mec_bitmap[0].queue_bitmap, AMDGPU_MAX_COMPUTE_QUEUES); + + /* take ownership of the relevant compute queues */ + amdgpu_gfx_compute_queue_acquire(adev); + mec_hpd_size = adev->gfx.num_compute_rings * GFX12_MEC_HPD_SIZE; + + if (mec_hpd_size) { + r = amdgpu_bo_create_reserved(adev, mec_hpd_size, PAGE_SIZE, + AMDGPU_GEM_DOMAIN_GTT, + &adev->gfx.mec.hpd_eop_obj, + &adev->gfx.mec.hpd_eop_gpu_addr, + (void **)&hpd); + if (r) { + dev_warn(adev->dev, "(%d) create HDP EOP bo failed\n", r); + gfx_v12_0_mec_fini(adev); + return r; + } + + memset(hpd, 0, mec_hpd_size); + + amdgpu_bo_kunmap(adev->gfx.mec.hpd_eop_obj); + amdgpu_bo_unreserve(adev->gfx.mec.hpd_eop_obj); + } + + return 0; +} + +static uint32_t wave_read_ind(struct amdgpu_device *adev, uint32_t wave, uint32_t address) +{ + WREG32_SOC15(GC, 0, regSQ_IND_INDEX, + (wave << SQ_IND_INDEX__WAVE_ID__SHIFT) | + (address << SQ_IND_INDEX__INDEX__SHIFT)); + return RREG32_SOC15(GC, 0, regSQ_IND_DATA); +} + +static void wave_read_regs(struct amdgpu_device *adev, uint32_t wave, + uint32_t thread, uint32_t regno, + uint32_t num, uint32_t *out) +{ + WREG32_SOC15(GC, 0, regSQ_IND_INDEX, + (wave << SQ_IND_INDEX__WAVE_ID__SHIFT) | + (regno << SQ_IND_INDEX__INDEX__SHIFT) | + (thread << SQ_IND_INDEX__WORKITEM_ID__SHIFT) | + (SQ_IND_INDEX__AUTO_INCR_MASK)); + while (num--) + *(out++) = RREG32_SOC15(GC, 0, regSQ_IND_DATA); +} + +static void gfx_v12_0_read_wave_data(struct amdgpu_device *adev, + uint32_t xcc_id, + uint32_t simd, uint32_t wave, + uint32_t *dst, int *no_fields) +{ + /* in gfx12 the SIMD_ID is specified as part of the INSTANCE + * field when performing a select_se_sh so it should be + * zero here */ + WARN_ON(simd != 0); + + /* type 4 wave data */ + dst[(*no_fields)++] = 4; + dst[(*no_fields)++] = wave_read_ind(adev, wave, ixSQ_WAVE_STATUS); + dst[(*no_fields)++] = wave_read_ind(adev, wave, ixSQ_WAVE_PC_LO); + dst[(*no_fields)++] = wave_read_ind(adev, wave, ixSQ_WAVE_PC_HI); + dst[(*no_fields)++] = wave_read_ind(adev, wave, ixSQ_WAVE_EXEC_LO); + dst[(*no_fields)++] = wave_read_ind(adev, wave, ixSQ_WAVE_EXEC_HI); + dst[(*no_fields)++] = wave_read_ind(adev, wave, ixSQ_WAVE_HW_ID1); + dst[(*no_fields)++] = wave_read_ind(adev, wave, ixSQ_WAVE_HW_ID2); + dst[(*no_fields)++] = wave_read_ind(adev, wave, ixSQ_WAVE_GPR_ALLOC); + dst[(*no_fields)++] = wave_read_ind(adev, wave, ixSQ_WAVE_LDS_ALLOC); + dst[(*no_fields)++] = wave_read_ind(adev, wave, ixSQ_WAVE_IB_STS); + dst[(*no_fields)++] = wave_read_ind(adev, wave, ixSQ_WAVE_IB_STS2); + dst[(*no_fields)++] = wave_read_ind(adev, wave, ixSQ_WAVE_IB_DBG1); + dst[(*no_fields)++] = wave_read_ind(adev, wave, ixSQ_WAVE_M0); + dst[(*no_fields)++] = wave_read_ind(adev, wave, ixSQ_WAVE_MODE); + dst[(*no_fields)++] = wave_read_ind(adev, wave, ixSQ_WAVE_STATE_PRIV); + dst[(*no_fields)++] = wave_read_ind(adev, wave, ixSQ_WAVE_EXCP_FLAG_PRIV); + dst[(*no_fields)++] = wave_read_ind(adev, wave, ixSQ_WAVE_EXCP_FLAG_USER); + dst[(*no_fields)++] = wave_read_ind(adev, wave, ixSQ_WAVE_TRAP_CTRL); + dst[(*no_fields)++] = wave_read_ind(adev, wave, ixSQ_WAVE_ACTIVE); + dst[(*no_fields)++] = wave_read_ind(adev, wave, ixSQ_WAVE_VALID_AND_IDLE); + dst[(*no_fields)++] = wave_read_ind(adev, wave, ixSQ_WAVE_DVGPR_ALLOC_LO); + dst[(*no_fields)++] = wave_read_ind(adev, wave, ixSQ_WAVE_DVGPR_ALLOC_HI); + dst[(*no_fields)++] = wave_read_ind(adev, wave, ixSQ_WAVE_SCHED_MODE); +} + +static void gfx_v12_0_read_wave_sgprs(struct amdgpu_device *adev, + uint32_t xcc_id, uint32_t simd, + uint32_t wave, uint32_t start, + uint32_t size, uint32_t *dst) +{ + WARN_ON(simd != 0); + + wave_read_regs( + adev, wave, 0, start + SQIND_WAVE_SGPRS_OFFSET, size, + dst); +} + +static void gfx_v12_0_read_wave_vgprs(struct amdgpu_device *adev, + uint32_t xcc_id, uint32_t simd, + uint32_t wave, uint32_t thread, + uint32_t start, uint32_t size, + uint32_t *dst) +{ + wave_read_regs( + adev, wave, thread, + start + SQIND_WAVE_VGPRS_OFFSET, size, dst); +} + +static void gfx_v12_0_select_me_pipe_q(struct amdgpu_device *adev, + u32 me, u32 pipe, u32 q, u32 vm, u32 xcc_id) +{ + soc24_grbm_select(adev, me, pipe, q, vm); +} + +static const struct amdgpu_gfx_funcs gfx_v12_0_gfx_funcs = { + .get_gpu_clock_counter = &gfx_v12_0_get_gpu_clock_counter, + .select_se_sh = &gfx_v12_0_select_se_sh, + .read_wave_data = &gfx_v12_0_read_wave_data, + .read_wave_sgprs = &gfx_v12_0_read_wave_sgprs, + .read_wave_vgprs = &gfx_v12_0_read_wave_vgprs, + .select_me_pipe_q = &gfx_v12_0_select_me_pipe_q, + .update_perfmon_mgcg = &gfx_v12_0_update_perf_clk, +}; + +static int gfx_v12_0_gpu_early_init(struct amdgpu_device *adev) +{ + + switch (amdgpu_ip_version(adev, GC_HWIP, 0)) { + case IP_VERSION(12, 0, 0): + case IP_VERSION(12, 0, 1): + adev->gfx.config.max_hw_contexts = 8; + adev->gfx.config.sc_prim_fifo_size_frontend = 0x20; + adev->gfx.config.sc_prim_fifo_size_backend = 0x100; + adev->gfx.config.sc_hiz_tile_fifo_size = 0; + adev->gfx.config.sc_earlyz_tile_fifo_size = 0x4C0; + break; + default: + BUG(); + break; + } + + return 0; +} + +static int gfx_v12_0_gfx_ring_init(struct amdgpu_device *adev, int ring_id, + int me, int pipe, int queue) +{ + int r; + struct amdgpu_ring *ring; + unsigned int irq_type; + + ring = &adev->gfx.gfx_ring[ring_id]; + + ring->me = me; + ring->pipe = pipe; + ring->queue = queue; + + ring->ring_obj = NULL; + ring->use_doorbell = true; + + if (!ring_id) + ring->doorbell_index = adev->doorbell_index.gfx_ring0 << 1; + else + ring->doorbell_index = adev->doorbell_index.gfx_ring1 << 1; + ring->vm_hub = AMDGPU_GFXHUB(0); + sprintf(ring->name, "gfx_%d.%d.%d", ring->me, ring->pipe, ring->queue); + + irq_type = AMDGPU_CP_IRQ_GFX_ME0_PIPE0_EOP + ring->pipe; + r = amdgpu_ring_init(adev, ring, 1024, &adev->gfx.eop_irq, irq_type, + AMDGPU_RING_PRIO_DEFAULT, NULL); + if (r) + return r; + return 0; +} + +static int gfx_v12_0_compute_ring_init(struct amdgpu_device *adev, int ring_id, + int mec, int pipe, int queue) +{ + int r; + unsigned irq_type; + struct amdgpu_ring *ring; + unsigned int hw_prio; + + ring = &adev->gfx.compute_ring[ring_id]; + + /* mec0 is me1 */ + ring->me = mec + 1; + ring->pipe = pipe; + ring->queue = queue; + + ring->ring_obj = NULL; + ring->use_doorbell = true; + ring->doorbell_index = (adev->doorbell_index.mec_ring0 + ring_id) << 1; + ring->eop_gpu_addr = adev->gfx.mec.hpd_eop_gpu_addr + + (ring_id * GFX12_MEC_HPD_SIZE); + ring->vm_hub = AMDGPU_GFXHUB(0); + sprintf(ring->name, "comp_%d.%d.%d", ring->me, ring->pipe, ring->queue); + + irq_type = AMDGPU_CP_IRQ_COMPUTE_MEC1_PIPE0_EOP + + ((ring->me - 1) * adev->gfx.mec.num_pipe_per_mec) + + ring->pipe; + hw_prio = amdgpu_gfx_is_high_priority_compute_queue(adev, ring) ? + AMDGPU_GFX_PIPE_PRIO_HIGH : AMDGPU_GFX_PIPE_PRIO_NORMAL; + /* type-2 packets are deprecated on MEC, use type-3 instead */ + r = amdgpu_ring_init(adev, ring, 1024, &adev->gfx.eop_irq, irq_type, + hw_prio, NULL); + if (r) + return r; + + return 0; +} + +static struct { + SOC24_FIRMWARE_ID id; + unsigned int offset; + unsigned int size; + unsigned int size_x16; +} rlc_autoload_info[SOC24_FIRMWARE_ID_MAX]; + +#define RLC_TOC_OFFSET_DWUNIT 8 +#define RLC_SIZE_MULTIPLE 1024 +#define RLC_TOC_UMF_SIZE_inM 23ULL +#define RLC_TOC_FORMAT_API 165ULL + +static void gfx_v12_0_parse_rlc_toc(struct amdgpu_device *adev, void *rlc_toc) +{ + RLC_TABLE_OF_CONTENT_V2 *ucode = rlc_toc; + + while (ucode && (ucode->id > SOC24_FIRMWARE_ID_INVALID)) { + rlc_autoload_info[ucode->id].id = ucode->id; + rlc_autoload_info[ucode->id].offset = + ucode->offset * RLC_TOC_OFFSET_DWUNIT * 4; + rlc_autoload_info[ucode->id].size = + ucode->size_x16 ? ucode->size * RLC_SIZE_MULTIPLE * 4 : + ucode->size * 4; + ucode++; + } +} + +static uint32_t gfx_v12_0_calc_toc_total_size(struct amdgpu_device *adev) +{ + uint32_t total_size = 0; + SOC24_FIRMWARE_ID id; + + gfx_v12_0_parse_rlc_toc(adev, adev->psp.toc.start_addr); + + for (id = SOC24_FIRMWARE_ID_RLC_G_UCODE; id < SOC24_FIRMWARE_ID_MAX; id++) + total_size += rlc_autoload_info[id].size; + + /* In case the offset in rlc toc ucode is aligned */ + if (total_size < rlc_autoload_info[SOC24_FIRMWARE_ID_MAX-1].offset) + total_size = rlc_autoload_info[SOC24_FIRMWARE_ID_MAX-1].offset + + rlc_autoload_info[SOC24_FIRMWARE_ID_MAX-1].size; + if (total_size < (RLC_TOC_UMF_SIZE_inM << 20)) + total_size = RLC_TOC_UMF_SIZE_inM << 20; + + return total_size; +} + +static int gfx_v12_0_rlc_autoload_buffer_init(struct amdgpu_device *adev) +{ + int r; + uint32_t total_size; + + total_size = gfx_v12_0_calc_toc_total_size(adev); + + r = amdgpu_bo_create_reserved(adev, total_size, 64 * 1024, + AMDGPU_GEM_DOMAIN_VRAM, + &adev->gfx.rlc.rlc_autoload_bo, + &adev->gfx.rlc.rlc_autoload_gpu_addr, + (void **)&adev->gfx.rlc.rlc_autoload_ptr); + + if (r) { + dev_err(adev->dev, "(%d) failed to create fw autoload bo\n", r); + return r; + } + + return 0; +} + +static void gfx_v12_0_rlc_backdoor_autoload_copy_ucode(struct amdgpu_device *adev, + SOC24_FIRMWARE_ID id, + const void *fw_data, + uint32_t fw_size) +{ + uint32_t toc_offset; + uint32_t toc_fw_size; + char *ptr = adev->gfx.rlc.rlc_autoload_ptr; + + if (id <= SOC24_FIRMWARE_ID_INVALID || id >= SOC24_FIRMWARE_ID_MAX) + return; + + toc_offset = rlc_autoload_info[id].offset; + toc_fw_size = rlc_autoload_info[id].size; + + if (fw_size == 0) + fw_size = toc_fw_size; + + if (fw_size > toc_fw_size) + fw_size = toc_fw_size; + + memcpy(ptr + toc_offset, fw_data, fw_size); + + if (fw_size < toc_fw_size) + memset(ptr + toc_offset + fw_size, 0, toc_fw_size - fw_size); +} + +static void +gfx_v12_0_rlc_backdoor_autoload_copy_toc_ucode(struct amdgpu_device *adev) +{ + void *data; + uint32_t size; + uint32_t *toc_ptr; + + data = adev->psp.toc.start_addr; + size = rlc_autoload_info[SOC24_FIRMWARE_ID_RLC_TOC].size; + + toc_ptr = (uint32_t *)data + size / 4 - 2; + *toc_ptr = (RLC_TOC_FORMAT_API << 24) | 0x1; + + gfx_v12_0_rlc_backdoor_autoload_copy_ucode(adev, SOC24_FIRMWARE_ID_RLC_TOC, + data, size); +} + +static void +gfx_v12_0_rlc_backdoor_autoload_copy_gfx_ucode(struct amdgpu_device *adev) +{ + const __le32 *fw_data; + uint32_t fw_size; + const struct gfx_firmware_header_v2_0 *cpv2_hdr; + const struct rlc_firmware_header_v2_0 *rlc_hdr; + const struct rlc_firmware_header_v2_1 *rlcv21_hdr; + const struct rlc_firmware_header_v2_2 *rlcv22_hdr; + uint16_t version_major, version_minor; + + /* pfp ucode */ + cpv2_hdr = (const struct gfx_firmware_header_v2_0 *) + adev->gfx.pfp_fw->data; + /* instruction */ + fw_data = (const __le32 *)(adev->gfx.pfp_fw->data + + le32_to_cpu(cpv2_hdr->ucode_offset_bytes)); + fw_size = le32_to_cpu(cpv2_hdr->ucode_size_bytes); + gfx_v12_0_rlc_backdoor_autoload_copy_ucode(adev, SOC24_FIRMWARE_ID_RS64_PFP, + fw_data, fw_size); + /* data */ + fw_data = (const __le32 *)(adev->gfx.pfp_fw->data + + le32_to_cpu(cpv2_hdr->data_offset_bytes)); + fw_size = le32_to_cpu(cpv2_hdr->data_size_bytes); + gfx_v12_0_rlc_backdoor_autoload_copy_ucode(adev, SOC24_FIRMWARE_ID_RS64_PFP_P0_STACK, + fw_data, fw_size); + gfx_v12_0_rlc_backdoor_autoload_copy_ucode(adev, SOC24_FIRMWARE_ID_RS64_PFP_P1_STACK, + fw_data, fw_size); + /* me ucode */ + cpv2_hdr = (const struct gfx_firmware_header_v2_0 *) + adev->gfx.me_fw->data; + /* instruction */ + fw_data = (const __le32 *)(adev->gfx.me_fw->data + + le32_to_cpu(cpv2_hdr->ucode_offset_bytes)); + fw_size = le32_to_cpu(cpv2_hdr->ucode_size_bytes); + gfx_v12_0_rlc_backdoor_autoload_copy_ucode(adev, SOC24_FIRMWARE_ID_RS64_ME, + fw_data, fw_size); + /* data */ + fw_data = (const __le32 *)(adev->gfx.me_fw->data + + le32_to_cpu(cpv2_hdr->data_offset_bytes)); + fw_size = le32_to_cpu(cpv2_hdr->data_size_bytes); + gfx_v12_0_rlc_backdoor_autoload_copy_ucode(adev, SOC24_FIRMWARE_ID_RS64_ME_P0_STACK, + fw_data, fw_size); + gfx_v12_0_rlc_backdoor_autoload_copy_ucode(adev, SOC24_FIRMWARE_ID_RS64_ME_P1_STACK, + fw_data, fw_size); + /* mec ucode */ + cpv2_hdr = (const struct gfx_firmware_header_v2_0 *) + adev->gfx.mec_fw->data; + /* instruction */ + fw_data = (const __le32 *) (adev->gfx.mec_fw->data + + le32_to_cpu(cpv2_hdr->ucode_offset_bytes)); + fw_size = le32_to_cpu(cpv2_hdr->ucode_size_bytes); + gfx_v12_0_rlc_backdoor_autoload_copy_ucode(adev, SOC24_FIRMWARE_ID_RS64_MEC, + fw_data, fw_size); + /* data */ + fw_data = (const __le32 *) (adev->gfx.mec_fw->data + + le32_to_cpu(cpv2_hdr->data_offset_bytes)); + fw_size = le32_to_cpu(cpv2_hdr->data_size_bytes); + gfx_v12_0_rlc_backdoor_autoload_copy_ucode(adev, SOC24_FIRMWARE_ID_RS64_MEC_P0_STACK, + fw_data, fw_size); + gfx_v12_0_rlc_backdoor_autoload_copy_ucode(adev, SOC24_FIRMWARE_ID_RS64_MEC_P1_STACK, + fw_data, fw_size); + gfx_v12_0_rlc_backdoor_autoload_copy_ucode(adev, SOC24_FIRMWARE_ID_RS64_MEC_P2_STACK, + fw_data, fw_size); + gfx_v12_0_rlc_backdoor_autoload_copy_ucode(adev, SOC24_FIRMWARE_ID_RS64_MEC_P3_STACK, + fw_data, fw_size); + + /* rlc ucode */ + rlc_hdr = (const struct rlc_firmware_header_v2_0 *) + adev->gfx.rlc_fw->data; + fw_data = (const __le32 *)(adev->gfx.rlc_fw->data + + le32_to_cpu(rlc_hdr->header.ucode_array_offset_bytes)); + fw_size = le32_to_cpu(rlc_hdr->header.ucode_size_bytes); + gfx_v12_0_rlc_backdoor_autoload_copy_ucode(adev, SOC24_FIRMWARE_ID_RLC_G_UCODE, + fw_data, fw_size); + + version_major = le16_to_cpu(rlc_hdr->header.header_version_major); + version_minor = le16_to_cpu(rlc_hdr->header.header_version_minor); + if (version_major == 2) { + if (version_minor >= 1) { + rlcv21_hdr = (const struct rlc_firmware_header_v2_1 *)adev->gfx.rlc_fw->data; + + fw_data = (const __le32 *)(adev->gfx.rlc_fw->data + + le32_to_cpu(rlcv21_hdr->save_restore_list_gpm_offset_bytes)); + fw_size = le32_to_cpu(rlcv21_hdr->save_restore_list_gpm_size_bytes); + gfx_v12_0_rlc_backdoor_autoload_copy_ucode(adev, SOC24_FIRMWARE_ID_RLCG_SCRATCH, + fw_data, fw_size); + + fw_data = (const __le32 *)(adev->gfx.rlc_fw->data + + le32_to_cpu(rlcv21_hdr->save_restore_list_srm_offset_bytes)); + fw_size = le32_to_cpu(rlcv21_hdr->save_restore_list_srm_size_bytes); + gfx_v12_0_rlc_backdoor_autoload_copy_ucode(adev, SOC24_FIRMWARE_ID_RLC_SRM_ARAM, + fw_data, fw_size); + } + if (version_minor >= 2) { + rlcv22_hdr = (const struct rlc_firmware_header_v2_2 *)adev->gfx.rlc_fw->data; + + fw_data = (const __le32 *)(adev->gfx.rlc_fw->data + + le32_to_cpu(rlcv22_hdr->rlc_iram_ucode_offset_bytes)); + fw_size = le32_to_cpu(rlcv22_hdr->rlc_iram_ucode_size_bytes); + gfx_v12_0_rlc_backdoor_autoload_copy_ucode(adev, SOC24_FIRMWARE_ID_RLX6_UCODE, + fw_data, fw_size); + + fw_data = (const __le32 *)(adev->gfx.rlc_fw->data + + le32_to_cpu(rlcv22_hdr->rlc_dram_ucode_offset_bytes)); + fw_size = le32_to_cpu(rlcv22_hdr->rlc_dram_ucode_size_bytes); + gfx_v12_0_rlc_backdoor_autoload_copy_ucode(adev, SOC24_FIRMWARE_ID_RLX6_DRAM_BOOT, + fw_data, fw_size); + } + } +} + +static void +gfx_v12_0_rlc_backdoor_autoload_copy_sdma_ucode(struct amdgpu_device *adev) +{ + const __le32 *fw_data; + uint32_t fw_size; + const struct sdma_firmware_header_v3_0 *sdma_hdr; + + sdma_hdr = (const struct sdma_firmware_header_v3_0 *) + adev->sdma.instance[0].fw->data; + fw_data = (const __le32 *) (adev->sdma.instance[0].fw->data + + le32_to_cpu(sdma_hdr->ucode_offset_bytes)); + fw_size = le32_to_cpu(sdma_hdr->ucode_size_bytes); + + gfx_v12_0_rlc_backdoor_autoload_copy_ucode(adev, SOC24_FIRMWARE_ID_SDMA_UCODE_TH0, + fw_data, fw_size); +} + +static void +gfx_v12_0_rlc_backdoor_autoload_copy_mes_ucode(struct amdgpu_device *adev) +{ + const __le32 *fw_data; + unsigned fw_size; + const struct mes_firmware_header_v1_0 *mes_hdr; + int pipe, ucode_id, data_id; + + for (pipe = 0; pipe < 2; pipe++) { + if (pipe == 0) { + ucode_id = SOC24_FIRMWARE_ID_RS64_MES_P0; + data_id = SOC24_FIRMWARE_ID_RS64_MES_P0_STACK; + } else { + ucode_id = SOC24_FIRMWARE_ID_RS64_MES_P1; + data_id = SOC24_FIRMWARE_ID_RS64_MES_P1_STACK; + } + + mes_hdr = (const struct mes_firmware_header_v1_0 *) + adev->mes.fw[pipe]->data; + + fw_data = (const __le32 *)(adev->mes.fw[pipe]->data + + le32_to_cpu(mes_hdr->mes_ucode_offset_bytes)); + fw_size = le32_to_cpu(mes_hdr->mes_ucode_size_bytes); + + gfx_v12_0_rlc_backdoor_autoload_copy_ucode(adev, ucode_id, fw_data, fw_size); + + fw_data = (const __le32 *)(adev->mes.fw[pipe]->data + + le32_to_cpu(mes_hdr->mes_ucode_data_offset_bytes)); + fw_size = le32_to_cpu(mes_hdr->mes_ucode_data_size_bytes); + + gfx_v12_0_rlc_backdoor_autoload_copy_ucode(adev, data_id, fw_data, fw_size); + } +} + +static int gfx_v12_0_rlc_backdoor_autoload_enable(struct amdgpu_device *adev) +{ + uint32_t rlc_g_offset, rlc_g_size; + uint64_t gpu_addr; + uint32_t data; + + /* RLC autoload sequence 2: copy ucode */ + gfx_v12_0_rlc_backdoor_autoload_copy_sdma_ucode(adev); + gfx_v12_0_rlc_backdoor_autoload_copy_gfx_ucode(adev); + gfx_v12_0_rlc_backdoor_autoload_copy_mes_ucode(adev); + gfx_v12_0_rlc_backdoor_autoload_copy_toc_ucode(adev); + + rlc_g_offset = rlc_autoload_info[SOC24_FIRMWARE_ID_RLC_G_UCODE].offset; + rlc_g_size = rlc_autoload_info[SOC24_FIRMWARE_ID_RLC_G_UCODE].size; + gpu_addr = adev->gfx.rlc.rlc_autoload_gpu_addr + rlc_g_offset - adev->gmc.vram_start; + + WREG32_SOC15(GC, 0, regGFX_IMU_RLC_BOOTLOADER_ADDR_HI, upper_32_bits(gpu_addr)); + WREG32_SOC15(GC, 0, regGFX_IMU_RLC_BOOTLOADER_ADDR_LO, lower_32_bits(gpu_addr)); + + WREG32_SOC15(GC, 0, regGFX_IMU_RLC_BOOTLOADER_SIZE, rlc_g_size); + + if (adev->gfx.imu.funcs && (amdgpu_dpm > 0)) { + /* RLC autoload sequence 3: load IMU fw */ + if (adev->gfx.imu.funcs->load_microcode) + adev->gfx.imu.funcs->load_microcode(adev); + /* RLC autoload sequence 4 init IMU fw */ + if (adev->gfx.imu.funcs->setup_imu) + adev->gfx.imu.funcs->setup_imu(adev); + if (adev->gfx.imu.funcs->start_imu) + adev->gfx.imu.funcs->start_imu(adev); + + /* RLC autoload sequence 5 disable gpa mode */ + gfx_v12_0_disable_gpa_mode(adev); + } else { + /* unhalt rlc to start autoload without imu */ + data = RREG32_SOC15(GC, 0, regRLC_GPM_THREAD_ENABLE); + data = REG_SET_FIELD(data, RLC_GPM_THREAD_ENABLE, THREAD0_ENABLE, 1); + data = REG_SET_FIELD(data, RLC_GPM_THREAD_ENABLE, THREAD1_ENABLE, 1); + WREG32_SOC15(GC, 0, regRLC_GPM_THREAD_ENABLE, data); + WREG32_SOC15(GC, 0, regRLC_CNTL, RLC_CNTL__RLC_ENABLE_F32_MASK); + } + + return 0; +} + +static void gfx_v12_0_alloc_ip_dump(struct amdgpu_device *adev) +{ + uint32_t reg_count = ARRAY_SIZE(gc_reg_list_12_0); + uint32_t *ptr; + uint32_t inst; + + ptr = kcalloc(reg_count, sizeof(uint32_t), GFP_KERNEL); + if (ptr == NULL) { + DRM_ERROR("Failed to allocate memory for GFX IP Dump\n"); + adev->gfx.ip_dump_core = NULL; + } else { + adev->gfx.ip_dump_core = ptr; + } + + /* Allocate memory for compute queue registers for all the instances */ + reg_count = ARRAY_SIZE(gc_cp_reg_list_12); + inst = adev->gfx.mec.num_mec * adev->gfx.mec.num_pipe_per_mec * + adev->gfx.mec.num_queue_per_pipe; + + ptr = kcalloc(reg_count * inst, sizeof(uint32_t), GFP_KERNEL); + if (ptr == NULL) { + DRM_ERROR("Failed to allocate memory for Compute Queues IP Dump\n"); + adev->gfx.ip_dump_compute_queues = NULL; + } else { + adev->gfx.ip_dump_compute_queues = ptr; + } + + /* Allocate memory for gfx queue registers for all the instances */ + reg_count = ARRAY_SIZE(gc_gfx_queue_reg_list_12); + inst = adev->gfx.me.num_me * adev->gfx.me.num_pipe_per_me * + adev->gfx.me.num_queue_per_pipe; + + ptr = kcalloc(reg_count * inst, sizeof(uint32_t), GFP_KERNEL); + if (ptr == NULL) { + DRM_ERROR("Failed to allocate memory for GFX Queues IP Dump\n"); + adev->gfx.ip_dump_gfx_queues = NULL; + } else { + adev->gfx.ip_dump_gfx_queues = ptr; + } +} + +static int gfx_v12_0_sw_init(void *handle) +{ + int i, j, k, r, ring_id = 0; + unsigned num_compute_rings; + int xcc_id = 0; + struct amdgpu_device *adev = (struct amdgpu_device *)handle; + + switch (amdgpu_ip_version(adev, GC_HWIP, 0)) { + case IP_VERSION(12, 0, 0): + case IP_VERSION(12, 0, 1): + adev->gfx.me.num_me = 1; + adev->gfx.me.num_pipe_per_me = 1; + adev->gfx.me.num_queue_per_pipe = 1; + adev->gfx.mec.num_mec = 2; + adev->gfx.mec.num_pipe_per_mec = 2; + adev->gfx.mec.num_queue_per_pipe = 4; + break; + default: + adev->gfx.me.num_me = 1; + adev->gfx.me.num_pipe_per_me = 1; + adev->gfx.me.num_queue_per_pipe = 1; + adev->gfx.mec.num_mec = 1; + adev->gfx.mec.num_pipe_per_mec = 4; + adev->gfx.mec.num_queue_per_pipe = 8; + break; + } + + /* recalculate compute rings to use based on hardware configuration */ + num_compute_rings = (adev->gfx.mec.num_pipe_per_mec * + adev->gfx.mec.num_queue_per_pipe) / 2; + adev->gfx.num_compute_rings = min(adev->gfx.num_compute_rings, + num_compute_rings); + + /* EOP Event */ + r = amdgpu_irq_add_id(adev, SOC21_IH_CLIENTID_GRBM_CP, + GFX_11_0_0__SRCID__CP_EOP_INTERRUPT, + &adev->gfx.eop_irq); + if (r) + return r; + + /* Privileged reg */ + r = amdgpu_irq_add_id(adev, SOC21_IH_CLIENTID_GRBM_CP, + GFX_11_0_0__SRCID__CP_PRIV_REG_FAULT, + &adev->gfx.priv_reg_irq); + if (r) + return r; + + /* Privileged inst */ + r = amdgpu_irq_add_id(adev, SOC21_IH_CLIENTID_GRBM_CP, + GFX_11_0_0__SRCID__CP_PRIV_INSTR_FAULT, + &adev->gfx.priv_inst_irq); + if (r) + return r; + + adev->gfx.gfx_current_status = AMDGPU_GFX_NORMAL_MODE; + + gfx_v12_0_me_init(adev); + + r = gfx_v12_0_rlc_init(adev); + if (r) { + dev_err(adev->dev, "Failed to init rlc BOs!\n"); + return r; + } + + r = gfx_v12_0_mec_init(adev); + if (r) { + dev_err(adev->dev, "Failed to init MEC BOs!\n"); + return r; + } + + /* set up the gfx ring */ + for (i = 0; i < adev->gfx.me.num_me; i++) { + for (j = 0; j < adev->gfx.me.num_queue_per_pipe; j++) { + for (k = 0; k < adev->gfx.me.num_pipe_per_me; k++) { + if (!amdgpu_gfx_is_me_queue_enabled(adev, i, k, j)) + continue; + + r = gfx_v12_0_gfx_ring_init(adev, ring_id, + i, k, j); + if (r) + return r; + ring_id++; + } + } + } + + ring_id = 0; + /* set up the compute queues - allocate horizontally across pipes */ + for (i = 0; i < adev->gfx.mec.num_mec; ++i) { + for (j = 0; j < adev->gfx.mec.num_queue_per_pipe; j++) { + for (k = 0; k < adev->gfx.mec.num_pipe_per_mec; k++) { + if (!amdgpu_gfx_is_mec_queue_enabled(adev, + 0, i, k, j)) + continue; + + r = gfx_v12_0_compute_ring_init(adev, ring_id, + i, k, j); + if (r) + return r; + + ring_id++; + } + } + } + + if (!adev->enable_mes_kiq) { + r = amdgpu_gfx_kiq_init(adev, GFX12_MEC_HPD_SIZE, 0); + if (r) { + dev_err(adev->dev, "Failed to init KIQ BOs!\n"); + return r; + } + + r = amdgpu_gfx_kiq_init_ring(adev, xcc_id); + if (r) + return r; + } + + r = amdgpu_gfx_mqd_sw_init(adev, sizeof(struct v12_compute_mqd), 0); + if (r) + return r; + + /* allocate visible FB for rlc auto-loading fw */ + if (adev->firmware.load_type == AMDGPU_FW_LOAD_RLC_BACKDOOR_AUTO) { + r = gfx_v12_0_rlc_autoload_buffer_init(adev); + if (r) + return r; + } + + r = gfx_v12_0_gpu_early_init(adev); + if (r) + return r; + + gfx_v12_0_alloc_ip_dump(adev); + + return 0; +} + +static void gfx_v12_0_pfp_fini(struct amdgpu_device *adev) +{ + amdgpu_bo_free_kernel(&adev->gfx.pfp.pfp_fw_obj, + &adev->gfx.pfp.pfp_fw_gpu_addr, + (void **)&adev->gfx.pfp.pfp_fw_ptr); + + amdgpu_bo_free_kernel(&adev->gfx.pfp.pfp_fw_data_obj, + &adev->gfx.pfp.pfp_fw_data_gpu_addr, + (void **)&adev->gfx.pfp.pfp_fw_data_ptr); +} + +static void gfx_v12_0_me_fini(struct amdgpu_device *adev) +{ + amdgpu_bo_free_kernel(&adev->gfx.me.me_fw_obj, + &adev->gfx.me.me_fw_gpu_addr, + (void **)&adev->gfx.me.me_fw_ptr); + + amdgpu_bo_free_kernel(&adev->gfx.me.me_fw_data_obj, + &adev->gfx.me.me_fw_data_gpu_addr, + (void **)&adev->gfx.me.me_fw_data_ptr); +} + +static void gfx_v12_0_rlc_autoload_buffer_fini(struct amdgpu_device *adev) +{ + amdgpu_bo_free_kernel(&adev->gfx.rlc.rlc_autoload_bo, + &adev->gfx.rlc.rlc_autoload_gpu_addr, + (void **)&adev->gfx.rlc.rlc_autoload_ptr); +} + +static int gfx_v12_0_sw_fini(void *handle) +{ + int i; + struct amdgpu_device *adev = (struct amdgpu_device *)handle; + + for (i = 0; i < adev->gfx.num_gfx_rings; i++) + amdgpu_ring_fini(&adev->gfx.gfx_ring[i]); + for (i = 0; i < adev->gfx.num_compute_rings; i++) + amdgpu_ring_fini(&adev->gfx.compute_ring[i]); + + amdgpu_gfx_mqd_sw_fini(adev, 0); + + if (!adev->enable_mes_kiq) { + amdgpu_gfx_kiq_free_ring(&adev->gfx.kiq[0].ring); + amdgpu_gfx_kiq_fini(adev, 0); + } + + gfx_v12_0_pfp_fini(adev); + gfx_v12_0_me_fini(adev); + gfx_v12_0_rlc_fini(adev); + gfx_v12_0_mec_fini(adev); + + if (adev->firmware.load_type == AMDGPU_FW_LOAD_RLC_BACKDOOR_AUTO) + gfx_v12_0_rlc_autoload_buffer_fini(adev); + + gfx_v12_0_free_microcode(adev); + + kfree(adev->gfx.ip_dump_core); + kfree(adev->gfx.ip_dump_compute_queues); + kfree(adev->gfx.ip_dump_gfx_queues); + + return 0; +} + +static void gfx_v12_0_select_se_sh(struct amdgpu_device *adev, u32 se_num, + u32 sh_num, u32 instance, int xcc_id) +{ + u32 data; + + if (instance == 0xffffffff) + data = REG_SET_FIELD(0, GRBM_GFX_INDEX, + INSTANCE_BROADCAST_WRITES, 1); + else + data = REG_SET_FIELD(0, GRBM_GFX_INDEX, INSTANCE_INDEX, + instance); + + if (se_num == 0xffffffff) + data = REG_SET_FIELD(data, GRBM_GFX_INDEX, SE_BROADCAST_WRITES, + 1); + else + data = REG_SET_FIELD(data, GRBM_GFX_INDEX, SE_INDEX, se_num); + + if (sh_num == 0xffffffff) + data = REG_SET_FIELD(data, GRBM_GFX_INDEX, SA_BROADCAST_WRITES, + 1); + else + data = REG_SET_FIELD(data, GRBM_GFX_INDEX, SA_INDEX, sh_num); + + WREG32_SOC15(GC, 0, regGRBM_GFX_INDEX, data); +} + +static u32 gfx_v12_0_get_sa_active_bitmap(struct amdgpu_device *adev) +{ + u32 gc_disabled_sa_mask, gc_user_disabled_sa_mask, sa_mask; + + gc_disabled_sa_mask = RREG32_SOC15(GC, 0, regGRBM_CC_GC_SA_UNIT_DISABLE); + gc_disabled_sa_mask = REG_GET_FIELD(gc_disabled_sa_mask, + GRBM_CC_GC_SA_UNIT_DISABLE, + SA_DISABLE); + gc_user_disabled_sa_mask = RREG32_SOC15(GC, 0, regGRBM_GC_USER_SA_UNIT_DISABLE); + gc_user_disabled_sa_mask = REG_GET_FIELD(gc_user_disabled_sa_mask, + GRBM_GC_USER_SA_UNIT_DISABLE, + SA_DISABLE); + sa_mask = amdgpu_gfx_create_bitmask(adev->gfx.config.max_sh_per_se * + adev->gfx.config.max_shader_engines); + + return sa_mask & (~(gc_disabled_sa_mask | gc_user_disabled_sa_mask)); +} + +static u32 gfx_v12_0_get_rb_active_bitmap(struct amdgpu_device *adev) +{ + u32 gc_disabled_rb_mask, gc_user_disabled_rb_mask; + u32 rb_mask; + + gc_disabled_rb_mask = RREG32_SOC15(GC, 0, regCC_RB_BACKEND_DISABLE); + gc_disabled_rb_mask = REG_GET_FIELD(gc_disabled_rb_mask, + CC_RB_BACKEND_DISABLE, + BACKEND_DISABLE); + gc_user_disabled_rb_mask = RREG32_SOC15(GC, 0, regGC_USER_RB_BACKEND_DISABLE); + gc_user_disabled_rb_mask = REG_GET_FIELD(gc_user_disabled_rb_mask, + GC_USER_RB_BACKEND_DISABLE, + BACKEND_DISABLE); + rb_mask = amdgpu_gfx_create_bitmask(adev->gfx.config.max_backends_per_se * + adev->gfx.config.max_shader_engines); + + return rb_mask & (~(gc_disabled_rb_mask | gc_user_disabled_rb_mask)); +} + +static void gfx_v12_0_setup_rb(struct amdgpu_device *adev) +{ + u32 rb_bitmap_width_per_sa; + u32 max_sa; + u32 active_sa_bitmap; + u32 global_active_rb_bitmap; + u32 active_rb_bitmap = 0; + u32 i; + + /* query sa bitmap from SA_UNIT_DISABLE registers */ + active_sa_bitmap = gfx_v12_0_get_sa_active_bitmap(adev); + /* query rb bitmap from RB_BACKEND_DISABLE registers */ + global_active_rb_bitmap = gfx_v12_0_get_rb_active_bitmap(adev); + + /* generate active rb bitmap according to active sa bitmap */ + max_sa = adev->gfx.config.max_shader_engines * + adev->gfx.config.max_sh_per_se; + rb_bitmap_width_per_sa = adev->gfx.config.max_backends_per_se / + adev->gfx.config.max_sh_per_se; + for (i = 0; i < max_sa; i++) { + if (active_sa_bitmap & (1 << i)) + active_rb_bitmap |= (0x3 << (i * rb_bitmap_width_per_sa)); + } + + active_rb_bitmap |= global_active_rb_bitmap; + adev->gfx.config.backend_enable_mask = active_rb_bitmap; + adev->gfx.config.num_rbs = hweight32(active_rb_bitmap); +} + +#define LDS_APP_BASE 0x1 +#define SCRATCH_APP_BASE 0x2 + +static void gfx_v12_0_init_compute_vmid(struct amdgpu_device *adev) +{ + int i; + uint32_t sh_mem_bases; + uint32_t data; + + /* + * Configure apertures: + * LDS: 0x60000000'00000000 - 0x60000001'00000000 (4GB) + * Scratch: 0x60000001'00000000 - 0x60000002'00000000 (4GB) + * GPUVM: 0x60010000'00000000 - 0x60020000'00000000 (1TB) + */ + sh_mem_bases = (LDS_APP_BASE << SH_MEM_BASES__SHARED_BASE__SHIFT) | + SCRATCH_APP_BASE; + + mutex_lock(&adev->srbm_mutex); + for (i = adev->vm_manager.first_kfd_vmid; i < AMDGPU_NUM_VMID; i++) { + soc24_grbm_select(adev, 0, 0, 0, i); + /* CP and shaders */ + WREG32_SOC15(GC, 0, regSH_MEM_CONFIG, DEFAULT_SH_MEM_CONFIG); + WREG32_SOC15(GC, 0, regSH_MEM_BASES, sh_mem_bases); + + /* Enable trap for each kfd vmid. */ + data = RREG32_SOC15(GC, 0, regSPI_GDBG_PER_VMID_CNTL); + data = REG_SET_FIELD(data, SPI_GDBG_PER_VMID_CNTL, TRAP_EN, 1); + WREG32_SOC15(GC, 0, regSPI_GDBG_PER_VMID_CNTL, data); + } + soc24_grbm_select(adev, 0, 0, 0, 0); + mutex_unlock(&adev->srbm_mutex); +} + +static void gfx_v12_0_tcp_harvest(struct amdgpu_device *adev) +{ + /* TODO: harvest feature to be added later. */ +} + +static void gfx_v12_0_get_tcc_info(struct amdgpu_device *adev) +{ +} + +static void gfx_v12_0_constants_init(struct amdgpu_device *adev) +{ + u32 tmp; + int i; + + if (!amdgpu_sriov_vf(adev)) + WREG32_FIELD15_PREREG(GC, 0, GRBM_CNTL, READ_TIMEOUT, 0xff); + + gfx_v12_0_setup_rb(adev); + gfx_v12_0_get_cu_info(adev, &adev->gfx.cu_info); + gfx_v12_0_get_tcc_info(adev); + adev->gfx.config.pa_sc_tile_steering_override = 0; + + /* XXX SH_MEM regs */ + /* where to put LDS, scratch, GPUVM in FSA64 space */ + mutex_lock(&adev->srbm_mutex); + for (i = 0; i < adev->vm_manager.id_mgr[AMDGPU_GFXHUB(0)].num_ids; i++) { + soc24_grbm_select(adev, 0, 0, 0, i); + /* CP and shaders */ + WREG32_SOC15(GC, 0, regSH_MEM_CONFIG, DEFAULT_SH_MEM_CONFIG); + if (i != 0) { + tmp = REG_SET_FIELD(0, SH_MEM_BASES, PRIVATE_BASE, + (adev->gmc.private_aperture_start >> 48)); + tmp = REG_SET_FIELD(tmp, SH_MEM_BASES, SHARED_BASE, + (adev->gmc.shared_aperture_start >> 48)); + WREG32_SOC15(GC, 0, regSH_MEM_BASES, tmp); + } + } + soc24_grbm_select(adev, 0, 0, 0, 0); + + mutex_unlock(&adev->srbm_mutex); + + gfx_v12_0_init_compute_vmid(adev); +} + +static void gfx_v12_0_enable_gui_idle_interrupt(struct amdgpu_device *adev, + bool enable) +{ + u32 tmp; + + if (amdgpu_sriov_vf(adev)) + return; + + tmp = RREG32_SOC15(GC, 0, regCP_INT_CNTL_RING0); + + tmp = REG_SET_FIELD(tmp, CP_INT_CNTL_RING0, CNTX_BUSY_INT_ENABLE, + enable ? 1 : 0); + tmp = REG_SET_FIELD(tmp, CP_INT_CNTL_RING0, CNTX_EMPTY_INT_ENABLE, + enable ? 1 : 0); + tmp = REG_SET_FIELD(tmp, CP_INT_CNTL_RING0, CMP_BUSY_INT_ENABLE, + enable ? 1 : 0); + tmp = REG_SET_FIELD(tmp, CP_INT_CNTL_RING0, GFX_IDLE_INT_ENABLE, + enable ? 1 : 0); + + WREG32_SOC15(GC, 0, regCP_INT_CNTL_RING0, tmp); +} + +static int gfx_v12_0_init_csb(struct amdgpu_device *adev) +{ + adev->gfx.rlc.funcs->get_csb_buffer(adev, adev->gfx.rlc.cs_ptr); + + WREG32_SOC15(GC, 0, regRLC_CSIB_ADDR_HI, + adev->gfx.rlc.clear_state_gpu_addr >> 32); + WREG32_SOC15(GC, 0, regRLC_CSIB_ADDR_LO, + adev->gfx.rlc.clear_state_gpu_addr & 0xfffffffc); + WREG32_SOC15(GC, 0, regRLC_CSIB_LENGTH, adev->gfx.rlc.clear_state_size); + + return 0; +} + +static void gfx_v12_0_rlc_stop(struct amdgpu_device *adev) +{ + u32 tmp = RREG32_SOC15(GC, 0, regRLC_CNTL); + + tmp = REG_SET_FIELD(tmp, RLC_CNTL, RLC_ENABLE_F32, 0); + WREG32_SOC15(GC, 0, regRLC_CNTL, tmp); +} + +static void gfx_v12_0_rlc_reset(struct amdgpu_device *adev) +{ + WREG32_FIELD15_PREREG(GC, 0, GRBM_SOFT_RESET, SOFT_RESET_RLC, 1); + udelay(50); + WREG32_FIELD15_PREREG(GC, 0, GRBM_SOFT_RESET, SOFT_RESET_RLC, 0); + udelay(50); +} + +static void gfx_v12_0_rlc_smu_handshake_cntl(struct amdgpu_device *adev, + bool enable) +{ + uint32_t rlc_pg_cntl; + + rlc_pg_cntl = RREG32_SOC15(GC, 0, regRLC_PG_CNTL); + + if (!enable) { + /* RLC_PG_CNTL[23] = 0 (default) + * RLC will wait for handshake acks with SMU + * GFXOFF will be enabled + * RLC_PG_CNTL[23] = 1 + * RLC will not issue any message to SMU + * hence no handshake between SMU & RLC + * GFXOFF will be disabled + */ + rlc_pg_cntl |= RLC_PG_CNTL__SMU_HANDSHAKE_DISABLE_MASK; + } else + rlc_pg_cntl &= ~RLC_PG_CNTL__SMU_HANDSHAKE_DISABLE_MASK; + WREG32_SOC15(GC, 0, regRLC_PG_CNTL, rlc_pg_cntl); +} + +static void gfx_v12_0_rlc_start(struct amdgpu_device *adev) +{ + /* TODO: enable rlc & smu handshake until smu + * and gfxoff feature works as expected */ + if (!(amdgpu_pp_feature_mask & PP_GFXOFF_MASK)) + gfx_v12_0_rlc_smu_handshake_cntl(adev, false); + + WREG32_FIELD15_PREREG(GC, 0, RLC_CNTL, RLC_ENABLE_F32, 1); + udelay(50); +} + +static void gfx_v12_0_rlc_enable_srm(struct amdgpu_device *adev) +{ + uint32_t tmp; + + /* enable Save Restore Machine */ + tmp = RREG32(SOC15_REG_OFFSET(GC, 0, regRLC_SRM_CNTL)); + tmp |= RLC_SRM_CNTL__AUTO_INCR_ADDR_MASK; + tmp |= RLC_SRM_CNTL__SRM_ENABLE_MASK; + WREG32(SOC15_REG_OFFSET(GC, 0, regRLC_SRM_CNTL), tmp); +} + +static void gfx_v12_0_load_rlcg_microcode(struct amdgpu_device *adev) +{ + const struct rlc_firmware_header_v2_0 *hdr; + const __le32 *fw_data; + unsigned i, fw_size; + + hdr = (const struct rlc_firmware_header_v2_0 *)adev->gfx.rlc_fw->data; + fw_data = (const __le32 *)(adev->gfx.rlc_fw->data + + le32_to_cpu(hdr->header.ucode_array_offset_bytes)); + fw_size = le32_to_cpu(hdr->header.ucode_size_bytes) / 4; + + WREG32_SOC15(GC, 0, regRLC_GPM_UCODE_ADDR, + RLCG_UCODE_LOADING_START_ADDRESS); + + for (i = 0; i < fw_size; i++) + WREG32_SOC15(GC, 0, regRLC_GPM_UCODE_DATA, + le32_to_cpup(fw_data++)); + + WREG32_SOC15(GC, 0, regRLC_GPM_UCODE_ADDR, adev->gfx.rlc_fw_version); +} + +static void gfx_v12_0_load_rlc_iram_dram_microcode(struct amdgpu_device *adev) +{ + const struct rlc_firmware_header_v2_2 *hdr; + const __le32 *fw_data; + unsigned i, fw_size; + u32 tmp; + + hdr = (const struct rlc_firmware_header_v2_2 *)adev->gfx.rlc_fw->data; + + fw_data = (const __le32 *)(adev->gfx.rlc_fw->data + + le32_to_cpu(hdr->rlc_iram_ucode_offset_bytes)); + fw_size = le32_to_cpu(hdr->rlc_iram_ucode_size_bytes) / 4; + + WREG32_SOC15(GC, 0, regRLC_LX6_IRAM_ADDR, 0); + + for (i = 0; i < fw_size; i++) { + if ((amdgpu_emu_mode == 1) && (i % 100 == 99)) + msleep(1); + WREG32_SOC15(GC, 0, regRLC_LX6_IRAM_DATA, + le32_to_cpup(fw_data++)); + } + + WREG32_SOC15(GC, 0, regRLC_LX6_IRAM_ADDR, adev->gfx.rlc_fw_version); + + fw_data = (const __le32 *)(adev->gfx.rlc_fw->data + + le32_to_cpu(hdr->rlc_dram_ucode_offset_bytes)); + fw_size = le32_to_cpu(hdr->rlc_dram_ucode_size_bytes) / 4; + + WREG32_SOC15(GC, 0, regRLC_LX6_DRAM_ADDR, 0); + for (i = 0; i < fw_size; i++) { + if ((amdgpu_emu_mode == 1) && (i % 100 == 99)) + msleep(1); + WREG32_SOC15(GC, 0, regRLC_LX6_DRAM_DATA, + le32_to_cpup(fw_data++)); + } + + WREG32_SOC15(GC, 0, regRLC_LX6_IRAM_ADDR, adev->gfx.rlc_fw_version); + + tmp = RREG32_SOC15(GC, 0, regRLC_LX6_CNTL); + tmp = REG_SET_FIELD(tmp, RLC_LX6_CNTL, PDEBUG_ENABLE, 1); + tmp = REG_SET_FIELD(tmp, RLC_LX6_CNTL, BRESET, 0); + WREG32_SOC15(GC, 0, regRLC_LX6_CNTL, tmp); +} + +static int gfx_v12_0_rlc_load_microcode(struct amdgpu_device *adev) +{ + const struct rlc_firmware_header_v2_0 *hdr; + uint16_t version_major; + uint16_t version_minor; + + if (!adev->gfx.rlc_fw) + return -EINVAL; + + hdr = (const struct rlc_firmware_header_v2_0 *)adev->gfx.rlc_fw->data; + amdgpu_ucode_print_rlc_hdr(&hdr->header); + + version_major = le16_to_cpu(hdr->header.header_version_major); + version_minor = le16_to_cpu(hdr->header.header_version_minor); + + if (version_major == 2) { + gfx_v12_0_load_rlcg_microcode(adev); + if (amdgpu_dpm == 1) { + if (version_minor >= 2) + gfx_v12_0_load_rlc_iram_dram_microcode(adev); + } + + return 0; + } + + return -EINVAL; +} + +static int gfx_v12_0_rlc_resume(struct amdgpu_device *adev) +{ + int r; + + if (adev->firmware.load_type == AMDGPU_FW_LOAD_PSP) { + gfx_v12_0_init_csb(adev); + + if (!amdgpu_sriov_vf(adev)) /* enable RLC SRM */ + gfx_v12_0_rlc_enable_srm(adev); + } else { + if (amdgpu_sriov_vf(adev)) { + gfx_v12_0_init_csb(adev); + return 0; + } + + adev->gfx.rlc.funcs->stop(adev); + + /* disable CG */ + WREG32_SOC15(GC, 0, regRLC_CGCG_CGLS_CTRL, 0); + + /* disable PG */ + WREG32_SOC15(GC, 0, regRLC_PG_CNTL, 0); + + if (adev->firmware.load_type == AMDGPU_FW_LOAD_DIRECT) { + /* legacy rlc firmware loading */ + r = gfx_v12_0_rlc_load_microcode(adev); + if (r) + return r; + } + + gfx_v12_0_init_csb(adev); + + adev->gfx.rlc.funcs->start(adev); + } + + return 0; +} + +static void gfx_v12_0_config_gfx_rs64(struct amdgpu_device *adev) +{ + const struct gfx_firmware_header_v2_0 *pfp_hdr; + const struct gfx_firmware_header_v2_0 *me_hdr; + const struct gfx_firmware_header_v2_0 *mec_hdr; + uint32_t pipe_id, tmp; + + mec_hdr = (const struct gfx_firmware_header_v2_0 *) + adev->gfx.mec_fw->data; + me_hdr = (const struct gfx_firmware_header_v2_0 *) + adev->gfx.me_fw->data; + pfp_hdr = (const struct gfx_firmware_header_v2_0 *) + adev->gfx.pfp_fw->data; + + /* config pfp program start addr */ + for (pipe_id = 0; pipe_id < 2; pipe_id++) { + soc24_grbm_select(adev, 0, pipe_id, 0, 0); + WREG32_SOC15(GC, 0, regCP_PFP_PRGRM_CNTR_START, + (pfp_hdr->ucode_start_addr_hi << 30) | + (pfp_hdr->ucode_start_addr_lo >> 2)); + WREG32_SOC15(GC, 0, regCP_PFP_PRGRM_CNTR_START_HI, + pfp_hdr->ucode_start_addr_hi >> 2); + } + soc24_grbm_select(adev, 0, 0, 0, 0); + + /* reset pfp pipe */ + tmp = RREG32_SOC15(GC, 0, regCP_ME_CNTL); + tmp = REG_SET_FIELD(tmp, CP_ME_CNTL, PFP_PIPE0_RESET, 1); + tmp = REG_SET_FIELD(tmp, CP_ME_CNTL, PFP_PIPE1_RESET, 1); + WREG32_SOC15(GC, 0, regCP_ME_CNTL, tmp); + + /* clear pfp pipe reset */ + tmp = REG_SET_FIELD(tmp, CP_ME_CNTL, PFP_PIPE0_RESET, 0); + tmp = REG_SET_FIELD(tmp, CP_ME_CNTL, PFP_PIPE1_RESET, 0); + WREG32_SOC15(GC, 0, regCP_ME_CNTL, tmp); + + /* config me program start addr */ + for (pipe_id = 0; pipe_id < 2; pipe_id++) { + soc24_grbm_select(adev, 0, pipe_id, 0, 0); + WREG32_SOC15(GC, 0, regCP_ME_PRGRM_CNTR_START, + (me_hdr->ucode_start_addr_hi << 30) | + (me_hdr->ucode_start_addr_lo >> 2)); + WREG32_SOC15(GC, 0, regCP_ME_PRGRM_CNTR_START_HI, + me_hdr->ucode_start_addr_hi>>2); + } + soc24_grbm_select(adev, 0, 0, 0, 0); + + /* reset me pipe */ + tmp = RREG32_SOC15(GC, 0, regCP_ME_CNTL); + tmp = REG_SET_FIELD(tmp, CP_ME_CNTL, ME_PIPE0_RESET, 1); + tmp = REG_SET_FIELD(tmp, CP_ME_CNTL, ME_PIPE1_RESET, 1); + WREG32_SOC15(GC, 0, regCP_ME_CNTL, tmp); + + /* clear me pipe reset */ + tmp = REG_SET_FIELD(tmp, CP_ME_CNTL, ME_PIPE0_RESET, 0); + tmp = REG_SET_FIELD(tmp, CP_ME_CNTL, ME_PIPE1_RESET, 0); + WREG32_SOC15(GC, 0, regCP_ME_CNTL, tmp); + + /* config mec program start addr */ + for (pipe_id = 0; pipe_id < 4; pipe_id++) { + soc24_grbm_select(adev, 1, pipe_id, 0, 0); + WREG32_SOC15(GC, 0, regCP_MEC_RS64_PRGRM_CNTR_START, + mec_hdr->ucode_start_addr_lo >> 2 | + mec_hdr->ucode_start_addr_hi << 30); + WREG32_SOC15(GC, 0, regCP_MEC_RS64_PRGRM_CNTR_START_HI, + mec_hdr->ucode_start_addr_hi >> 2); + } + soc24_grbm_select(adev, 0, 0, 0, 0); + + /* reset mec pipe */ + tmp = RREG32_SOC15(GC, 0, regCP_MEC_RS64_CNTL); + tmp = REG_SET_FIELD(tmp, CP_MEC_RS64_CNTL, MEC_PIPE0_RESET, 1); + tmp = REG_SET_FIELD(tmp, CP_MEC_RS64_CNTL, MEC_PIPE1_RESET, 1); + tmp = REG_SET_FIELD(tmp, CP_MEC_RS64_CNTL, MEC_PIPE2_RESET, 1); + tmp = REG_SET_FIELD(tmp, CP_MEC_RS64_CNTL, MEC_PIPE3_RESET, 1); + WREG32_SOC15(GC, 0, regCP_MEC_RS64_CNTL, tmp); + + /* clear mec pipe reset */ + tmp = REG_SET_FIELD(tmp, CP_MEC_RS64_CNTL, MEC_PIPE0_RESET, 0); + tmp = REG_SET_FIELD(tmp, CP_MEC_RS64_CNTL, MEC_PIPE1_RESET, 0); + tmp = REG_SET_FIELD(tmp, CP_MEC_RS64_CNTL, MEC_PIPE2_RESET, 0); + tmp = REG_SET_FIELD(tmp, CP_MEC_RS64_CNTL, MEC_PIPE3_RESET, 0); + WREG32_SOC15(GC, 0, regCP_MEC_RS64_CNTL, tmp); +} + +static void gfx_v12_0_set_pfp_ucode_start_addr(struct amdgpu_device *adev) +{ + const struct gfx_firmware_header_v2_0 *cp_hdr; + unsigned pipe_id, tmp; + + cp_hdr = (const struct gfx_firmware_header_v2_0 *) + adev->gfx.pfp_fw->data; + mutex_lock(&adev->srbm_mutex); + for (pipe_id = 0; pipe_id < adev->gfx.me.num_pipe_per_me; pipe_id++) { + soc24_grbm_select(adev, 0, pipe_id, 0, 0); + WREG32_SOC15(GC, 0, regCP_PFP_PRGRM_CNTR_START, + (cp_hdr->ucode_start_addr_hi << 30) | + (cp_hdr->ucode_start_addr_lo >> 2)); + WREG32_SOC15(GC, 0, regCP_PFP_PRGRM_CNTR_START_HI, + cp_hdr->ucode_start_addr_hi>>2); + + /* + * Program CP_ME_CNTL to reset given PIPE to take + * effect of CP_PFP_PRGRM_CNTR_START. + */ + tmp = RREG32_SOC15(GC, 0, regCP_ME_CNTL); + if (pipe_id == 0) + tmp = REG_SET_FIELD(tmp, CP_ME_CNTL, + PFP_PIPE0_RESET, 1); + else + tmp = REG_SET_FIELD(tmp, CP_ME_CNTL, + PFP_PIPE1_RESET, 1); + WREG32_SOC15(GC, 0, regCP_ME_CNTL, tmp); + + /* Clear pfp pipe0 reset bit. */ + if (pipe_id == 0) + tmp = REG_SET_FIELD(tmp, CP_ME_CNTL, + PFP_PIPE0_RESET, 0); + else + tmp = REG_SET_FIELD(tmp, CP_ME_CNTL, + PFP_PIPE1_RESET, 0); + WREG32_SOC15(GC, 0, regCP_ME_CNTL, tmp); + } + soc24_grbm_select(adev, 0, 0, 0, 0); + mutex_unlock(&adev->srbm_mutex); +} + +static void gfx_v12_0_set_me_ucode_start_addr(struct amdgpu_device *adev) +{ + const struct gfx_firmware_header_v2_0 *cp_hdr; + unsigned pipe_id, tmp; + + cp_hdr = (const struct gfx_firmware_header_v2_0 *) + adev->gfx.me_fw->data; + mutex_lock(&adev->srbm_mutex); + for (pipe_id = 0; pipe_id < adev->gfx.me.num_pipe_per_me; pipe_id++) { + soc24_grbm_select(adev, 0, pipe_id, 0, 0); + WREG32_SOC15(GC, 0, regCP_ME_PRGRM_CNTR_START, + (cp_hdr->ucode_start_addr_hi << 30) | + (cp_hdr->ucode_start_addr_lo >> 2) ); + WREG32_SOC15(GC, 0, regCP_ME_PRGRM_CNTR_START_HI, + cp_hdr->ucode_start_addr_hi>>2); + + /* + * Program CP_ME_CNTL to reset given PIPE to take + * effect of CP_ME_PRGRM_CNTR_START. + */ + tmp = RREG32_SOC15(GC, 0, regCP_ME_CNTL); + if (pipe_id == 0) + tmp = REG_SET_FIELD(tmp, CP_ME_CNTL, + ME_PIPE0_RESET, 1); + else + tmp = REG_SET_FIELD(tmp, CP_ME_CNTL, + ME_PIPE1_RESET, 1); + WREG32_SOC15(GC, 0, regCP_ME_CNTL, tmp); + + /* Clear pfp pipe0 reset bit. */ + if (pipe_id == 0) + tmp = REG_SET_FIELD(tmp, CP_ME_CNTL, + ME_PIPE0_RESET, 0); + else + tmp = REG_SET_FIELD(tmp, CP_ME_CNTL, + ME_PIPE1_RESET, 0); + WREG32_SOC15(GC, 0, regCP_ME_CNTL, tmp); + } + soc24_grbm_select(adev, 0, 0, 0, 0); + mutex_unlock(&adev->srbm_mutex); +} + +static void gfx_v12_0_set_mec_ucode_start_addr(struct amdgpu_device *adev) +{ + const struct gfx_firmware_header_v2_0 *cp_hdr; + unsigned pipe_id; + + cp_hdr = (const struct gfx_firmware_header_v2_0 *) + adev->gfx.mec_fw->data; + mutex_lock(&adev->srbm_mutex); + for (pipe_id = 0; pipe_id < adev->gfx.mec.num_pipe_per_mec; pipe_id++) { + soc24_grbm_select(adev, 1, pipe_id, 0, 0); + WREG32_SOC15(GC, 0, regCP_MEC_RS64_PRGRM_CNTR_START, + cp_hdr->ucode_start_addr_lo >> 2 | + cp_hdr->ucode_start_addr_hi << 30); + WREG32_SOC15(GC, 0, regCP_MEC_RS64_PRGRM_CNTR_START_HI, + cp_hdr->ucode_start_addr_hi >> 2); + } + soc24_grbm_select(adev, 0, 0, 0, 0); + mutex_unlock(&adev->srbm_mutex); +} + +static int gfx_v12_0_wait_for_rlc_autoload_complete(struct amdgpu_device *adev) +{ + uint32_t cp_status; + uint32_t bootload_status; + int i; + + for (i = 0; i < adev->usec_timeout; i++) { + cp_status = RREG32_SOC15(GC, 0, regCP_STAT); + bootload_status = RREG32_SOC15(GC, 0, regRLC_RLCS_BOOTLOAD_STATUS); + + if ((cp_status == 0) && + (REG_GET_FIELD(bootload_status, + RLC_RLCS_BOOTLOAD_STATUS, BOOTLOAD_COMPLETE) == 1)) { + break; + } + udelay(1); + if (amdgpu_emu_mode) + msleep(10); + } + + if (i >= adev->usec_timeout) { + dev_err(adev->dev, "rlc autoload: gc ucode autoload timeout\n"); + return -ETIMEDOUT; + } + + if (adev->firmware.load_type == AMDGPU_FW_LOAD_RLC_BACKDOOR_AUTO) { + gfx_v12_0_set_pfp_ucode_start_addr(adev); + gfx_v12_0_set_me_ucode_start_addr(adev); + gfx_v12_0_set_mec_ucode_start_addr(adev); + } + + return 0; +} + +static int gfx_v12_0_cp_gfx_enable(struct amdgpu_device *adev, bool enable) +{ + int i; + u32 tmp = RREG32_SOC15(GC, 0, regCP_ME_CNTL); + + tmp = REG_SET_FIELD(tmp, CP_ME_CNTL, ME_HALT, enable ? 0 : 1); + tmp = REG_SET_FIELD(tmp, CP_ME_CNTL, PFP_HALT, enable ? 0 : 1); + WREG32_SOC15(GC, 0, regCP_ME_CNTL, tmp); + + for (i = 0; i < adev->usec_timeout; i++) { + if (RREG32_SOC15(GC, 0, regCP_STAT) == 0) + break; + udelay(1); + } + + if (i >= adev->usec_timeout) + DRM_ERROR("failed to %s cp gfx\n", enable ? "unhalt" : "halt"); + + return 0; +} + +static int gfx_v12_0_cp_gfx_load_pfp_microcode_rs64(struct amdgpu_device *adev) +{ + int r; + const struct gfx_firmware_header_v2_0 *pfp_hdr; + const __le32 *fw_ucode, *fw_data; + unsigned i, pipe_id, fw_ucode_size, fw_data_size; + uint32_t tmp; + uint32_t usec_timeout = 50000; /* wait for 50ms */ + + pfp_hdr = (const struct gfx_firmware_header_v2_0 *) + adev->gfx.pfp_fw->data; + + amdgpu_ucode_print_gfx_hdr(&pfp_hdr->header); + + /* instruction */ + fw_ucode = (const __le32 *)(adev->gfx.pfp_fw->data + + le32_to_cpu(pfp_hdr->ucode_offset_bytes)); + fw_ucode_size = le32_to_cpu(pfp_hdr->ucode_size_bytes); + /* data */ + fw_data = (const __le32 *)(adev->gfx.pfp_fw->data + + le32_to_cpu(pfp_hdr->data_offset_bytes)); + fw_data_size = le32_to_cpu(pfp_hdr->data_size_bytes); + + /* 64kb align */ + r = amdgpu_bo_create_reserved(adev, fw_ucode_size, + 64 * 1024, AMDGPU_GEM_DOMAIN_VRAM, + &adev->gfx.pfp.pfp_fw_obj, + &adev->gfx.pfp.pfp_fw_gpu_addr, + (void **)&adev->gfx.pfp.pfp_fw_ptr); + if (r) { + dev_err(adev->dev, "(%d) failed to create pfp ucode fw bo\n", r); + gfx_v12_0_pfp_fini(adev); + return r; + } + + r = amdgpu_bo_create_reserved(adev, fw_data_size, + 64 * 1024, AMDGPU_GEM_DOMAIN_VRAM, + &adev->gfx.pfp.pfp_fw_data_obj, + &adev->gfx.pfp.pfp_fw_data_gpu_addr, + (void **)&adev->gfx.pfp.pfp_fw_data_ptr); + if (r) { + dev_err(adev->dev, "(%d) failed to create pfp data fw bo\n", r); + gfx_v12_0_pfp_fini(adev); + return r; + } + + memcpy(adev->gfx.pfp.pfp_fw_ptr, fw_ucode, fw_ucode_size); + memcpy(adev->gfx.pfp.pfp_fw_data_ptr, fw_data, fw_data_size); + + amdgpu_bo_kunmap(adev->gfx.pfp.pfp_fw_obj); + amdgpu_bo_kunmap(adev->gfx.pfp.pfp_fw_data_obj); + amdgpu_bo_unreserve(adev->gfx.pfp.pfp_fw_obj); + amdgpu_bo_unreserve(adev->gfx.pfp.pfp_fw_data_obj); + + if (amdgpu_emu_mode == 1) + adev->hdp.funcs->flush_hdp(adev, NULL); + + WREG32_SOC15(GC, 0, regCP_PFP_IC_BASE_LO, + lower_32_bits(adev->gfx.pfp.pfp_fw_gpu_addr)); + WREG32_SOC15(GC, 0, regCP_PFP_IC_BASE_HI, + upper_32_bits(adev->gfx.pfp.pfp_fw_gpu_addr)); + + tmp = RREG32_SOC15(GC, 0, regCP_PFP_IC_BASE_CNTL); + tmp = REG_SET_FIELD(tmp, CP_PFP_IC_BASE_CNTL, VMID, 0); + tmp = REG_SET_FIELD(tmp, CP_PFP_IC_BASE_CNTL, CACHE_POLICY, 0); + tmp = REG_SET_FIELD(tmp, CP_PFP_IC_BASE_CNTL, EXE_DISABLE, 0); + WREG32_SOC15(GC, 0, regCP_PFP_IC_BASE_CNTL, tmp); + + /* + * Programming any of the CP_PFP_IC_BASE registers + * forces invalidation of the ME L1 I$. Wait for the + * invalidation complete + */ + for (i = 0; i < usec_timeout; i++) { + tmp = RREG32_SOC15(GC, 0, regCP_PFP_IC_OP_CNTL); + if (1 == REG_GET_FIELD(tmp, CP_PFP_IC_OP_CNTL, + INVALIDATE_CACHE_COMPLETE)) + break; + udelay(1); + } + + if (i >= usec_timeout) { + dev_err(adev->dev, "failed to invalidate instruction cache\n"); + return -EINVAL; + } + + /* Prime the L1 instruction caches */ + tmp = RREG32_SOC15(GC, 0, regCP_PFP_IC_OP_CNTL); + tmp = REG_SET_FIELD(tmp, CP_PFP_IC_OP_CNTL, PRIME_ICACHE, 1); + WREG32_SOC15(GC, 0, regCP_PFP_IC_OP_CNTL, tmp); + /* Waiting for cache primed*/ + for (i = 0; i < usec_timeout; i++) { + tmp = RREG32_SOC15(GC, 0, regCP_PFP_IC_OP_CNTL); + if (1 == REG_GET_FIELD(tmp, CP_PFP_IC_OP_CNTL, + ICACHE_PRIMED)) + break; + udelay(1); + } + + if (i >= usec_timeout) { + dev_err(adev->dev, "failed to prime instruction cache\n"); + return -EINVAL; + } + + mutex_lock(&adev->srbm_mutex); + for (pipe_id = 0; pipe_id < adev->gfx.me.num_pipe_per_me; pipe_id++) { + soc24_grbm_select(adev, 0, pipe_id, 0, 0); + + WREG32_SOC15(GC, 0, regCP_GFX_RS64_DC_BASE0_LO, + lower_32_bits(adev->gfx.pfp.pfp_fw_data_gpu_addr)); + WREG32_SOC15(GC, 0, regCP_GFX_RS64_DC_BASE0_HI, + upper_32_bits(adev->gfx.pfp.pfp_fw_data_gpu_addr)); + } + soc24_grbm_select(adev, 0, 0, 0, 0); + mutex_unlock(&adev->srbm_mutex); + + tmp = RREG32_SOC15(GC, 0, regCP_GFX_RS64_DC_BASE_CNTL); + tmp = REG_SET_FIELD(tmp, CP_GFX_RS64_DC_BASE_CNTL, VMID, 0); + tmp = REG_SET_FIELD(tmp, CP_GFX_RS64_DC_BASE_CNTL, CACHE_POLICY, 0); + WREG32_SOC15(GC, 0, regCP_GFX_RS64_DC_BASE_CNTL, tmp); + + /* Invalidate the data caches */ + tmp = RREG32_SOC15(GC, 0, regCP_GFX_RS64_DC_OP_CNTL); + tmp = REG_SET_FIELD(tmp, CP_GFX_RS64_DC_OP_CNTL, INVALIDATE_DCACHE, 1); + WREG32_SOC15(GC, 0, regCP_GFX_RS64_DC_OP_CNTL, tmp); + + for (i = 0; i < usec_timeout; i++) { + tmp = RREG32_SOC15(GC, 0, regCP_GFX_RS64_DC_OP_CNTL); + if (1 == REG_GET_FIELD(tmp, CP_GFX_RS64_DC_OP_CNTL, + INVALIDATE_DCACHE_COMPLETE)) + break; + udelay(1); + } + + if (i >= usec_timeout) { + dev_err(adev->dev, "failed to invalidate RS64 data cache\n"); + return -EINVAL; + } + + gfx_v12_0_set_pfp_ucode_start_addr(adev); + + return 0; +} + +static int gfx_v12_0_cp_gfx_load_me_microcode_rs64(struct amdgpu_device *adev) +{ + int r; + const struct gfx_firmware_header_v2_0 *me_hdr; + const __le32 *fw_ucode, *fw_data; + unsigned i, pipe_id, fw_ucode_size, fw_data_size; + uint32_t tmp; + uint32_t usec_timeout = 50000; /* wait for 50ms */ + + me_hdr = (const struct gfx_firmware_header_v2_0 *) + adev->gfx.me_fw->data; + + amdgpu_ucode_print_gfx_hdr(&me_hdr->header); + + /* instruction */ + fw_ucode = (const __le32 *)(adev->gfx.me_fw->data + + le32_to_cpu(me_hdr->ucode_offset_bytes)); + fw_ucode_size = le32_to_cpu(me_hdr->ucode_size_bytes); + /* data */ + fw_data = (const __le32 *)(adev->gfx.me_fw->data + + le32_to_cpu(me_hdr->data_offset_bytes)); + fw_data_size = le32_to_cpu(me_hdr->data_size_bytes); + + /* 64kb align*/ + r = amdgpu_bo_create_reserved(adev, fw_ucode_size, + 64 * 1024, AMDGPU_GEM_DOMAIN_VRAM, + &adev->gfx.me.me_fw_obj, + &adev->gfx.me.me_fw_gpu_addr, + (void **)&adev->gfx.me.me_fw_ptr); + if (r) { + dev_err(adev->dev, "(%d) failed to create me ucode bo\n", r); + gfx_v12_0_me_fini(adev); + return r; + } + + r = amdgpu_bo_create_reserved(adev, fw_data_size, + 64 * 1024, AMDGPU_GEM_DOMAIN_VRAM, + &adev->gfx.me.me_fw_data_obj, + &adev->gfx.me.me_fw_data_gpu_addr, + (void **)&adev->gfx.me.me_fw_data_ptr); + if (r) { + dev_err(adev->dev, "(%d) failed to create me data bo\n", r); + gfx_v12_0_pfp_fini(adev); + return r; + } + + memcpy(adev->gfx.me.me_fw_ptr, fw_ucode, fw_ucode_size); + memcpy(adev->gfx.me.me_fw_data_ptr, fw_data, fw_data_size); + + amdgpu_bo_kunmap(adev->gfx.me.me_fw_obj); + amdgpu_bo_kunmap(adev->gfx.me.me_fw_data_obj); + amdgpu_bo_unreserve(adev->gfx.me.me_fw_obj); + amdgpu_bo_unreserve(adev->gfx.me.me_fw_data_obj); + + if (amdgpu_emu_mode == 1) + adev->hdp.funcs->flush_hdp(adev, NULL); + + WREG32_SOC15(GC, 0, regCP_ME_IC_BASE_LO, + lower_32_bits(adev->gfx.me.me_fw_gpu_addr)); + WREG32_SOC15(GC, 0, regCP_ME_IC_BASE_HI, + upper_32_bits(adev->gfx.me.me_fw_gpu_addr)); + + tmp = RREG32_SOC15(GC, 0, regCP_ME_IC_BASE_CNTL); + tmp = REG_SET_FIELD(tmp, CP_ME_IC_BASE_CNTL, VMID, 0); + tmp = REG_SET_FIELD(tmp, CP_ME_IC_BASE_CNTL, CACHE_POLICY, 0); + tmp = REG_SET_FIELD(tmp, CP_ME_IC_BASE_CNTL, EXE_DISABLE, 0); + WREG32_SOC15(GC, 0, regCP_ME_IC_BASE_CNTL, tmp); + + /* + * Programming any of the CP_ME_IC_BASE registers + * forces invalidation of the ME L1 I$. Wait for the + * invalidation complete + */ + for (i = 0; i < usec_timeout; i++) { + tmp = RREG32_SOC15(GC, 0, regCP_ME_IC_OP_CNTL); + if (1 == REG_GET_FIELD(tmp, CP_ME_IC_OP_CNTL, + INVALIDATE_CACHE_COMPLETE)) + break; + udelay(1); + } + + if (i >= usec_timeout) { + dev_err(adev->dev, "failed to invalidate instruction cache\n"); + return -EINVAL; + } + + /* Prime the instruction caches */ + tmp = RREG32_SOC15(GC, 0, regCP_ME_IC_OP_CNTL); + tmp = REG_SET_FIELD(tmp, CP_ME_IC_OP_CNTL, PRIME_ICACHE, 1); + WREG32_SOC15(GC, 0, regCP_ME_IC_OP_CNTL, tmp); + + /* Waiting for instruction cache primed*/ + for (i = 0; i < usec_timeout; i++) { + tmp = RREG32_SOC15(GC, 0, regCP_ME_IC_OP_CNTL); + if (1 == REG_GET_FIELD(tmp, CP_ME_IC_OP_CNTL, + ICACHE_PRIMED)) + break; + udelay(1); + } + + if (i >= usec_timeout) { + dev_err(adev->dev, "failed to prime instruction cache\n"); + return -EINVAL; + } + + mutex_lock(&adev->srbm_mutex); + for (pipe_id = 0; pipe_id < adev->gfx.me.num_pipe_per_me; pipe_id++) { + soc24_grbm_select(adev, 0, pipe_id, 0, 0); + + WREG32_SOC15(GC, 0, regCP_GFX_RS64_DC_BASE1_LO, + lower_32_bits(adev->gfx.me.me_fw_data_gpu_addr)); + WREG32_SOC15(GC, 0, regCP_GFX_RS64_DC_BASE1_HI, + upper_32_bits(adev->gfx.me.me_fw_data_gpu_addr)); + } + soc24_grbm_select(adev, 0, 0, 0, 0); + mutex_unlock(&adev->srbm_mutex); + + tmp = RREG32_SOC15(GC, 0, regCP_GFX_RS64_DC_BASE_CNTL); + tmp = REG_SET_FIELD(tmp, CP_GFX_RS64_DC_BASE_CNTL, VMID, 0); + tmp = REG_SET_FIELD(tmp, CP_GFX_RS64_DC_BASE_CNTL, CACHE_POLICY, 0); + WREG32_SOC15(GC, 0, regCP_GFX_RS64_DC_BASE_CNTL, tmp); + + /* Invalidate the data caches */ + tmp = RREG32_SOC15(GC, 0, regCP_GFX_RS64_DC_OP_CNTL); + tmp = REG_SET_FIELD(tmp, CP_GFX_RS64_DC_OP_CNTL, INVALIDATE_DCACHE, 1); + WREG32_SOC15(GC, 0, regCP_GFX_RS64_DC_OP_CNTL, tmp); + + for (i = 0; i < usec_timeout; i++) { + tmp = RREG32_SOC15(GC, 0, regCP_GFX_RS64_DC_OP_CNTL); + if (1 == REG_GET_FIELD(tmp, CP_GFX_RS64_DC_OP_CNTL, + INVALIDATE_DCACHE_COMPLETE)) + break; + udelay(1); + } + + if (i >= usec_timeout) { + dev_err(adev->dev, "failed to invalidate RS64 data cache\n"); + return -EINVAL; + } + + gfx_v12_0_set_me_ucode_start_addr(adev); + + return 0; +} + +static int gfx_v12_0_cp_gfx_load_microcode(struct amdgpu_device *adev) +{ + int r; + + if (!adev->gfx.me_fw || !adev->gfx.pfp_fw) + return -EINVAL; + + gfx_v12_0_cp_gfx_enable(adev, false); + + r = gfx_v12_0_cp_gfx_load_pfp_microcode_rs64(adev); + if (r) { + dev_err(adev->dev, "(%d) failed to load pfp fw\n", r); + return r; + } + + r = gfx_v12_0_cp_gfx_load_me_microcode_rs64(adev); + if (r) { + dev_err(adev->dev, "(%d) failed to load me fw\n", r); + return r; + } + + return 0; +} + +static int gfx_v12_0_cp_gfx_start(struct amdgpu_device *adev) +{ + /* init the CP */ + WREG32_SOC15(GC, 0, regCP_MAX_CONTEXT, + adev->gfx.config.max_hw_contexts - 1); + WREG32_SOC15(GC, 0, regCP_DEVICE_ID, 1); + + if (!amdgpu_async_gfx_ring) + gfx_v12_0_cp_gfx_enable(adev, true); + + return 0; +} + +static void gfx_v12_0_cp_gfx_switch_pipe(struct amdgpu_device *adev, + CP_PIPE_ID pipe) +{ + u32 tmp; + + tmp = RREG32_SOC15(GC, 0, regGRBM_GFX_CNTL); + tmp = REG_SET_FIELD(tmp, GRBM_GFX_CNTL, PIPEID, pipe); + + WREG32_SOC15(GC, 0, regGRBM_GFX_CNTL, tmp); +} + +static void gfx_v12_0_cp_gfx_set_doorbell(struct amdgpu_device *adev, + struct amdgpu_ring *ring) +{ + u32 tmp; + + tmp = RREG32_SOC15(GC, 0, regCP_RB_DOORBELL_CONTROL); + if (ring->use_doorbell) { + tmp = REG_SET_FIELD(tmp, CP_RB_DOORBELL_CONTROL, + DOORBELL_OFFSET, ring->doorbell_index); + tmp = REG_SET_FIELD(tmp, CP_RB_DOORBELL_CONTROL, + DOORBELL_EN, 1); + } else { + tmp = REG_SET_FIELD(tmp, CP_RB_DOORBELL_CONTROL, + DOORBELL_EN, 0); + } + WREG32_SOC15(GC, 0, regCP_RB_DOORBELL_CONTROL, tmp); + + tmp = REG_SET_FIELD(0, CP_RB_DOORBELL_RANGE_LOWER, + DOORBELL_RANGE_LOWER, ring->doorbell_index); + WREG32_SOC15(GC, 0, regCP_RB_DOORBELL_RANGE_LOWER, tmp); + + WREG32_SOC15(GC, 0, regCP_RB_DOORBELL_RANGE_UPPER, + CP_RB_DOORBELL_RANGE_UPPER__DOORBELL_RANGE_UPPER_MASK); +} + +static int gfx_v12_0_cp_gfx_resume(struct amdgpu_device *adev) +{ + struct amdgpu_ring *ring; + u32 tmp; + u32 rb_bufsz; + u64 rb_addr, rptr_addr, wptr_gpu_addr; + u32 i; + + /* Set the write pointer delay */ + WREG32_SOC15(GC, 0, regCP_RB_WPTR_DELAY, 0); + + /* set the RB to use vmid 0 */ + WREG32_SOC15(GC, 0, regCP_RB_VMID, 0); + + /* Init gfx ring 0 for pipe 0 */ + mutex_lock(&adev->srbm_mutex); + gfx_v12_0_cp_gfx_switch_pipe(adev, PIPE_ID0); + + /* Set ring buffer size */ + ring = &adev->gfx.gfx_ring[0]; + rb_bufsz = order_base_2(ring->ring_size / 8); + tmp = REG_SET_FIELD(0, CP_RB0_CNTL, RB_BUFSZ, rb_bufsz); + tmp = REG_SET_FIELD(tmp, CP_RB0_CNTL, RB_BLKSZ, rb_bufsz - 2); + WREG32_SOC15(GC, 0, regCP_RB0_CNTL, tmp); + + /* Initialize the ring buffer's write pointers */ + ring->wptr = 0; + WREG32_SOC15(GC, 0, regCP_RB0_WPTR, lower_32_bits(ring->wptr)); + WREG32_SOC15(GC, 0, regCP_RB0_WPTR_HI, upper_32_bits(ring->wptr)); + + /* set the wb address wether it's enabled or not */ + rptr_addr = ring->rptr_gpu_addr; + WREG32_SOC15(GC, 0, regCP_RB0_RPTR_ADDR, lower_32_bits(rptr_addr)); + WREG32_SOC15(GC, 0, regCP_RB0_RPTR_ADDR_HI, upper_32_bits(rptr_addr) & + CP_RB_RPTR_ADDR_HI__RB_RPTR_ADDR_HI_MASK); + + wptr_gpu_addr = ring->wptr_gpu_addr; + WREG32_SOC15(GC, 0, regCP_RB_WPTR_POLL_ADDR_LO, + lower_32_bits(wptr_gpu_addr)); + WREG32_SOC15(GC, 0, regCP_RB_WPTR_POLL_ADDR_HI, + upper_32_bits(wptr_gpu_addr)); + + mdelay(1); + WREG32_SOC15(GC, 0, regCP_RB0_CNTL, tmp); + + rb_addr = ring->gpu_addr >> 8; + WREG32_SOC15(GC, 0, regCP_RB0_BASE, rb_addr); + WREG32_SOC15(GC, 0, regCP_RB0_BASE_HI, upper_32_bits(rb_addr)); + + WREG32_SOC15(GC, 0, regCP_RB_ACTIVE, 1); + + gfx_v12_0_cp_gfx_set_doorbell(adev, ring); + mutex_unlock(&adev->srbm_mutex); + + /* Switch to pipe 0 */ + mutex_lock(&adev->srbm_mutex); + gfx_v12_0_cp_gfx_switch_pipe(adev, PIPE_ID0); + mutex_unlock(&adev->srbm_mutex); + + /* start the ring */ + gfx_v12_0_cp_gfx_start(adev); + + for (i = 0; i < adev->gfx.num_gfx_rings; i++) { + ring = &adev->gfx.gfx_ring[i]; + ring->sched.ready = true; + } + + return 0; +} + +static void gfx_v12_0_cp_compute_enable(struct amdgpu_device *adev, bool enable) +{ + u32 data; + + data = RREG32_SOC15(GC, 0, regCP_MEC_RS64_CNTL); + data = REG_SET_FIELD(data, CP_MEC_RS64_CNTL, MEC_INVALIDATE_ICACHE, + enable ? 0 : 1); + data = REG_SET_FIELD(data, CP_MEC_RS64_CNTL, MEC_PIPE0_RESET, + enable ? 0 : 1); + data = REG_SET_FIELD(data, CP_MEC_RS64_CNTL, MEC_PIPE1_RESET, + enable ? 0 : 1); + data = REG_SET_FIELD(data, CP_MEC_RS64_CNTL, MEC_PIPE2_RESET, + enable ? 0 : 1); + data = REG_SET_FIELD(data, CP_MEC_RS64_CNTL, MEC_PIPE3_RESET, + enable ? 0 : 1); + data = REG_SET_FIELD(data, CP_MEC_RS64_CNTL, MEC_PIPE0_ACTIVE, + enable ? 1 : 0); + data = REG_SET_FIELD(data, CP_MEC_RS64_CNTL, MEC_PIPE1_ACTIVE, + enable ? 1 : 0); + data = REG_SET_FIELD(data, CP_MEC_RS64_CNTL, MEC_PIPE2_ACTIVE, + enable ? 1 : 0); + data = REG_SET_FIELD(data, CP_MEC_RS64_CNTL, MEC_PIPE3_ACTIVE, + enable ? 1 : 0); + data = REG_SET_FIELD(data, CP_MEC_RS64_CNTL, MEC_HALT, + enable ? 0 : 1); + WREG32_SOC15(GC, 0, regCP_MEC_RS64_CNTL, data); + + adev->gfx.kiq[0].ring.sched.ready = enable; + + udelay(50); +} + +static int gfx_v12_0_cp_compute_load_microcode_rs64(struct amdgpu_device *adev) +{ + const struct gfx_firmware_header_v2_0 *mec_hdr; + const __le32 *fw_ucode, *fw_data; + u32 tmp, fw_ucode_size, fw_data_size; + u32 i, usec_timeout = 50000; /* Wait for 50 ms */ + u32 *fw_ucode_ptr, *fw_data_ptr; + int r; + + if (!adev->gfx.mec_fw) + return -EINVAL; + + gfx_v12_0_cp_compute_enable(adev, false); + + mec_hdr = (const struct gfx_firmware_header_v2_0 *)adev->gfx.mec_fw->data; + amdgpu_ucode_print_gfx_hdr(&mec_hdr->header); + + fw_ucode = (const __le32 *) (adev->gfx.mec_fw->data + + le32_to_cpu(mec_hdr->ucode_offset_bytes)); + fw_ucode_size = le32_to_cpu(mec_hdr->ucode_size_bytes); + + fw_data = (const __le32 *) (adev->gfx.mec_fw->data + + le32_to_cpu(mec_hdr->data_offset_bytes)); + fw_data_size = le32_to_cpu(mec_hdr->data_size_bytes); + + r = amdgpu_bo_create_reserved(adev, fw_ucode_size, + 64 * 1024, AMDGPU_GEM_DOMAIN_VRAM, + &adev->gfx.mec.mec_fw_obj, + &adev->gfx.mec.mec_fw_gpu_addr, + (void **)&fw_ucode_ptr); + if (r) { + dev_err(adev->dev, "(%d) failed to create mec fw ucode bo\n", r); + gfx_v12_0_mec_fini(adev); + return r; + } + + r = amdgpu_bo_create_reserved(adev, + ALIGN(fw_data_size, 64 * 1024) * + adev->gfx.mec.num_pipe_per_mec, + 64 * 1024, AMDGPU_GEM_DOMAIN_VRAM, + &adev->gfx.mec.mec_fw_data_obj, + &adev->gfx.mec.mec_fw_data_gpu_addr, + (void **)&fw_data_ptr); + if (r) { + dev_err(adev->dev, "(%d) failed to create mec fw ucode bo\n", r); + gfx_v12_0_mec_fini(adev); + return r; + } + + memcpy(fw_ucode_ptr, fw_ucode, fw_ucode_size); + for (i = 0; i < adev->gfx.mec.num_pipe_per_mec; i++) { + memcpy(fw_data_ptr + i * ALIGN(fw_data_size, 64 * 1024) / 4, fw_data, fw_data_size); + } + + amdgpu_bo_kunmap(adev->gfx.mec.mec_fw_obj); + amdgpu_bo_kunmap(adev->gfx.mec.mec_fw_data_obj); + amdgpu_bo_unreserve(adev->gfx.mec.mec_fw_obj); + amdgpu_bo_unreserve(adev->gfx.mec.mec_fw_data_obj); + + tmp = RREG32_SOC15(GC, 0, regCP_CPC_IC_BASE_CNTL); + tmp = REG_SET_FIELD(tmp, CP_CPC_IC_BASE_CNTL, VMID, 0); + tmp = REG_SET_FIELD(tmp, CP_CPC_IC_BASE_CNTL, EXE_DISABLE, 0); + tmp = REG_SET_FIELD(tmp, CP_CPC_IC_BASE_CNTL, CACHE_POLICY, 0); + WREG32_SOC15(GC, 0, regCP_CPC_IC_BASE_CNTL, tmp); + + tmp = RREG32_SOC15(GC, 0, regCP_MEC_DC_BASE_CNTL); + tmp = REG_SET_FIELD(tmp, CP_MEC_DC_BASE_CNTL, VMID, 0); + tmp = REG_SET_FIELD(tmp, CP_MEC_DC_BASE_CNTL, CACHE_POLICY, 0); + WREG32_SOC15(GC, 0, regCP_MEC_DC_BASE_CNTL, tmp); + + mutex_lock(&adev->srbm_mutex); + for (i = 0; i < adev->gfx.mec.num_pipe_per_mec; i++) { + soc24_grbm_select(adev, 1, i, 0, 0); + + WREG32_SOC15(GC, 0, regCP_MEC_MDBASE_LO, + lower_32_bits(adev->gfx.mec.mec_fw_data_gpu_addr + + i * ALIGN(fw_data_size, 64 * 1024))); + WREG32_SOC15(GC, 0, regCP_MEC_MDBASE_HI, + upper_32_bits(adev->gfx.mec.mec_fw_data_gpu_addr + + i * ALIGN(fw_data_size, 64 * 1024))); + + WREG32_SOC15(GC, 0, regCP_CPC_IC_BASE_LO, + lower_32_bits(adev->gfx.mec.mec_fw_gpu_addr)); + WREG32_SOC15(GC, 0, regCP_CPC_IC_BASE_HI, + upper_32_bits(adev->gfx.mec.mec_fw_gpu_addr)); + } + mutex_unlock(&adev->srbm_mutex); + soc24_grbm_select(adev, 0, 0, 0, 0); + + /* Trigger an invalidation of the L1 instruction caches */ + tmp = RREG32_SOC15(GC, 0, regCP_MEC_DC_OP_CNTL); + tmp = REG_SET_FIELD(tmp, CP_MEC_DC_OP_CNTL, INVALIDATE_DCACHE, 1); + WREG32_SOC15(GC, 0, regCP_MEC_DC_OP_CNTL, tmp); + + /* Wait for invalidation complete */ + for (i = 0; i < usec_timeout; i++) { + tmp = RREG32_SOC15(GC, 0, regCP_MEC_DC_OP_CNTL); + if (1 == REG_GET_FIELD(tmp, CP_MEC_DC_OP_CNTL, + INVALIDATE_DCACHE_COMPLETE)) + break; + udelay(1); + } + + if (i >= usec_timeout) { + dev_err(adev->dev, "failed to invalidate instruction cache\n"); + return -EINVAL; + } + + /* Trigger an invalidation of the L1 instruction caches */ + tmp = RREG32_SOC15(GC, 0, regCP_CPC_IC_OP_CNTL); + tmp = REG_SET_FIELD(tmp, CP_CPC_IC_OP_CNTL, INVALIDATE_CACHE, 1); + WREG32_SOC15(GC, 0, regCP_CPC_IC_OP_CNTL, tmp); + + /* Wait for invalidation complete */ + for (i = 0; i < usec_timeout; i++) { + tmp = RREG32_SOC15(GC, 0, regCP_CPC_IC_OP_CNTL); + if (1 == REG_GET_FIELD(tmp, CP_CPC_IC_OP_CNTL, + INVALIDATE_CACHE_COMPLETE)) + break; + udelay(1); + } + + if (i >= usec_timeout) { + dev_err(adev->dev, "failed to invalidate instruction cache\n"); + return -EINVAL; + } + + gfx_v12_0_set_mec_ucode_start_addr(adev); + + return 0; +} + +static void gfx_v12_0_kiq_setting(struct amdgpu_ring *ring) +{ + uint32_t tmp; + struct amdgpu_device *adev = ring->adev; + + /* tell RLC which is KIQ queue */ + tmp = RREG32_SOC15(GC, 0, regRLC_CP_SCHEDULERS); + tmp &= 0xffffff00; + tmp |= (ring->me << 5) | (ring->pipe << 3) | (ring->queue); + WREG32_SOC15(GC, 0, regRLC_CP_SCHEDULERS, tmp); + tmp |= 0x80; + WREG32_SOC15(GC, 0, regRLC_CP_SCHEDULERS, tmp); +} + +static void gfx_v12_0_cp_set_doorbell_range(struct amdgpu_device *adev) +{ + /* set graphics engine doorbell range */ + WREG32_SOC15(GC, 0, regCP_RB_DOORBELL_RANGE_LOWER, + (adev->doorbell_index.gfx_ring0 * 2) << 2); + WREG32_SOC15(GC, 0, regCP_RB_DOORBELL_RANGE_UPPER, + (adev->doorbell_index.gfx_userqueue_end * 2) << 2); + + /* set compute engine doorbell range */ + WREG32_SOC15(GC, 0, regCP_MEC_DOORBELL_RANGE_LOWER, + (adev->doorbell_index.kiq * 2) << 2); + WREG32_SOC15(GC, 0, regCP_MEC_DOORBELL_RANGE_UPPER, + (adev->doorbell_index.userqueue_end * 2) << 2); +} + +static int gfx_v12_0_gfx_mqd_init(struct amdgpu_device *adev, void *m, + struct amdgpu_mqd_prop *prop) +{ + struct v12_gfx_mqd *mqd = m; + uint64_t hqd_gpu_addr, wb_gpu_addr; + uint32_t tmp; + uint32_t rb_bufsz; + + /* set up gfx hqd wptr */ + mqd->cp_gfx_hqd_wptr = 0; + mqd->cp_gfx_hqd_wptr_hi = 0; + + /* set the pointer to the MQD */ + mqd->cp_mqd_base_addr = prop->mqd_gpu_addr & 0xfffffffc; + mqd->cp_mqd_base_addr_hi = upper_32_bits(prop->mqd_gpu_addr); + + /* set up mqd control */ + tmp = RREG32_SOC15(GC, 0, regCP_GFX_MQD_CONTROL); + tmp = REG_SET_FIELD(tmp, CP_GFX_MQD_CONTROL, VMID, 0); + tmp = REG_SET_FIELD(tmp, CP_GFX_MQD_CONTROL, PRIV_STATE, 1); + tmp = REG_SET_FIELD(tmp, CP_GFX_MQD_CONTROL, CACHE_POLICY, 0); + mqd->cp_gfx_mqd_control = tmp; + + /* set up gfx_hqd_vimd with 0x0 to indicate the ring buffer's vmid */ + tmp = RREG32_SOC15(GC, 0, regCP_GFX_HQD_VMID); + tmp = REG_SET_FIELD(tmp, CP_GFX_HQD_VMID, VMID, 0); + mqd->cp_gfx_hqd_vmid = 0; + + /* set up default queue priority level + * 0x0 = low priority, 0x1 = high priority */ + tmp = RREG32_SOC15(GC, 0, regCP_GFX_HQD_QUEUE_PRIORITY); + tmp = REG_SET_FIELD(tmp, CP_GFX_HQD_QUEUE_PRIORITY, PRIORITY_LEVEL, 0); + mqd->cp_gfx_hqd_queue_priority = tmp; + + /* set up time quantum */ + tmp = RREG32_SOC15(GC, 0, regCP_GFX_HQD_QUANTUM); + tmp = REG_SET_FIELD(tmp, CP_GFX_HQD_QUANTUM, QUANTUM_EN, 1); + mqd->cp_gfx_hqd_quantum = tmp; + + /* set up gfx hqd base. this is similar as CP_RB_BASE */ + hqd_gpu_addr = prop->hqd_base_gpu_addr >> 8; + mqd->cp_gfx_hqd_base = hqd_gpu_addr; + mqd->cp_gfx_hqd_base_hi = upper_32_bits(hqd_gpu_addr); + + /* set up hqd_rptr_addr/_hi, similar as CP_RB_RPTR */ + wb_gpu_addr = prop->rptr_gpu_addr; + mqd->cp_gfx_hqd_rptr_addr = wb_gpu_addr & 0xfffffffc; + mqd->cp_gfx_hqd_rptr_addr_hi = + upper_32_bits(wb_gpu_addr) & 0xffff; + + /* set up rb_wptr_poll addr */ + wb_gpu_addr = prop->wptr_gpu_addr; + mqd->cp_rb_wptr_poll_addr_lo = wb_gpu_addr & 0xfffffffc; + mqd->cp_rb_wptr_poll_addr_hi = upper_32_bits(wb_gpu_addr) & 0xffff; + + /* set up the gfx_hqd_control, similar as CP_RB0_CNTL */ + rb_bufsz = order_base_2(prop->queue_size / 4) - 1; + tmp = RREG32_SOC15(GC, 0, regCP_GFX_HQD_CNTL); + tmp = REG_SET_FIELD(tmp, CP_GFX_HQD_CNTL, RB_BUFSZ, rb_bufsz); + tmp = REG_SET_FIELD(tmp, CP_GFX_HQD_CNTL, RB_BLKSZ, rb_bufsz - 2); +#ifdef __BIG_ENDIAN + tmp = REG_SET_FIELD(tmp, CP_GFX_HQD_CNTL, BUF_SWAP, 1); +#endif + mqd->cp_gfx_hqd_cntl = tmp; + + /* set up cp_doorbell_control */ + tmp = RREG32_SOC15(GC, 0, regCP_RB_DOORBELL_CONTROL); + if (prop->use_doorbell) { + tmp = REG_SET_FIELD(tmp, CP_RB_DOORBELL_CONTROL, + DOORBELL_OFFSET, prop->doorbell_index); + tmp = REG_SET_FIELD(tmp, CP_RB_DOORBELL_CONTROL, + DOORBELL_EN, 1); + } else + tmp = REG_SET_FIELD(tmp, CP_RB_DOORBELL_CONTROL, + DOORBELL_EN, 0); + mqd->cp_rb_doorbell_control = tmp; + + /* reset read and write pointers, similar to CP_RB0_WPTR/_RPTR */ + mqd->cp_gfx_hqd_rptr = RREG32_SOC15(GC, 0, regCP_GFX_HQD_RPTR); + + /* active the queue */ + mqd->cp_gfx_hqd_active = 1; + + return 0; +} + +static int gfx_v12_0_gfx_init_queue(struct amdgpu_ring *ring) +{ + struct amdgpu_device *adev = ring->adev; + struct v12_gfx_mqd *mqd = ring->mqd_ptr; + int mqd_idx = ring - &adev->gfx.gfx_ring[0]; + + if (!amdgpu_in_reset(adev) && !adev->in_suspend) { + memset((void *)mqd, 0, sizeof(*mqd)); + mutex_lock(&adev->srbm_mutex); + soc24_grbm_select(adev, ring->me, ring->pipe, ring->queue, 0); + amdgpu_ring_init_mqd(ring); + soc24_grbm_select(adev, 0, 0, 0, 0); + mutex_unlock(&adev->srbm_mutex); + if (adev->gfx.me.mqd_backup[mqd_idx]) + memcpy_fromio(adev->gfx.me.mqd_backup[mqd_idx], mqd, sizeof(*mqd)); + } else { + /* restore mqd with the backup copy */ + if (adev->gfx.me.mqd_backup[mqd_idx]) + memcpy_toio(mqd, adev->gfx.me.mqd_backup[mqd_idx], sizeof(*mqd)); + /* reset the ring */ + ring->wptr = 0; + *ring->wptr_cpu_addr = 0; + amdgpu_ring_clear_ring(ring); + } + + return 0; +} + +static int gfx_v12_0_cp_async_gfx_ring_resume(struct amdgpu_device *adev) +{ + int r, i; + struct amdgpu_ring *ring; + + for (i = 0; i < adev->gfx.num_gfx_rings; i++) { + ring = &adev->gfx.gfx_ring[i]; + + r = amdgpu_bo_reserve(ring->mqd_obj, false); + if (unlikely(r != 0)) + goto done; + + r = amdgpu_bo_kmap(ring->mqd_obj, (void **)&ring->mqd_ptr); + if (!r) { + r = gfx_v12_0_gfx_init_queue(ring); + amdgpu_bo_kunmap(ring->mqd_obj); + ring->mqd_ptr = NULL; + } + amdgpu_bo_unreserve(ring->mqd_obj); + if (r) + goto done; + } + + r = amdgpu_gfx_enable_kgq(adev, 0); + if (r) + goto done; + + r = gfx_v12_0_cp_gfx_start(adev); + if (r) + goto done; + + for (i = 0; i < adev->gfx.num_gfx_rings; i++) { + ring = &adev->gfx.gfx_ring[i]; + ring->sched.ready = true; + } +done: + return r; +} + +static int gfx_v12_0_compute_mqd_init(struct amdgpu_device *adev, void *m, + struct amdgpu_mqd_prop *prop) +{ + struct v12_compute_mqd *mqd = m; + uint64_t hqd_gpu_addr, wb_gpu_addr, eop_base_addr; + uint32_t tmp; + + mqd->header = 0xC0310800; + mqd->compute_pipelinestat_enable = 0x00000001; + mqd->compute_static_thread_mgmt_se0 = 0xffffffff; + mqd->compute_static_thread_mgmt_se1 = 0xffffffff; + mqd->compute_static_thread_mgmt_se2 = 0xffffffff; + mqd->compute_static_thread_mgmt_se3 = 0xffffffff; + mqd->compute_misc_reserved = 0x00000007; + + eop_base_addr = prop->eop_gpu_addr >> 8; + mqd->cp_hqd_eop_base_addr_lo = eop_base_addr; + mqd->cp_hqd_eop_base_addr_hi = upper_32_bits(eop_base_addr); + + /* set the EOP size, register value is 2^(EOP_SIZE+1) dwords */ + tmp = RREG32_SOC15(GC, 0, regCP_HQD_EOP_CONTROL); + tmp = REG_SET_FIELD(tmp, CP_HQD_EOP_CONTROL, EOP_SIZE, + (order_base_2(GFX12_MEC_HPD_SIZE / 4) - 1)); + + mqd->cp_hqd_eop_control = tmp; + + /* enable doorbell? */ + tmp = RREG32_SOC15(GC, 0, regCP_HQD_PQ_DOORBELL_CONTROL); + + if (prop->use_doorbell) { + tmp = REG_SET_FIELD(tmp, CP_HQD_PQ_DOORBELL_CONTROL, + DOORBELL_OFFSET, prop->doorbell_index); + tmp = REG_SET_FIELD(tmp, CP_HQD_PQ_DOORBELL_CONTROL, + DOORBELL_EN, 1); + tmp = REG_SET_FIELD(tmp, CP_HQD_PQ_DOORBELL_CONTROL, + DOORBELL_SOURCE, 0); + tmp = REG_SET_FIELD(tmp, CP_HQD_PQ_DOORBELL_CONTROL, + DOORBELL_HIT, 0); + } else { + tmp = REG_SET_FIELD(tmp, CP_HQD_PQ_DOORBELL_CONTROL, + DOORBELL_EN, 0); + } + + mqd->cp_hqd_pq_doorbell_control = tmp; + + /* disable the queue if it's active */ + mqd->cp_hqd_dequeue_request = 0; + mqd->cp_hqd_pq_rptr = 0; + mqd->cp_hqd_pq_wptr_lo = 0; + mqd->cp_hqd_pq_wptr_hi = 0; + + /* set the pointer to the MQD */ + mqd->cp_mqd_base_addr_lo = prop->mqd_gpu_addr & 0xfffffffc; + mqd->cp_mqd_base_addr_hi = upper_32_bits(prop->mqd_gpu_addr); + + /* set MQD vmid to 0 */ + tmp = RREG32_SOC15(GC, 0, regCP_MQD_CONTROL); + tmp = REG_SET_FIELD(tmp, CP_MQD_CONTROL, VMID, 0); + mqd->cp_mqd_control = tmp; + + /* set the pointer to the HQD, this is similar CP_RB0_BASE/_HI */ + hqd_gpu_addr = prop->hqd_base_gpu_addr >> 8; + mqd->cp_hqd_pq_base_lo = hqd_gpu_addr; + mqd->cp_hqd_pq_base_hi = upper_32_bits(hqd_gpu_addr); + + /* set up the HQD, this is similar to CP_RB0_CNTL */ + tmp = RREG32_SOC15(GC, 0, regCP_HQD_PQ_CONTROL); + tmp = REG_SET_FIELD(tmp, CP_HQD_PQ_CONTROL, QUEUE_SIZE, + (order_base_2(prop->queue_size / 4) - 1)); + tmp = REG_SET_FIELD(tmp, CP_HQD_PQ_CONTROL, RPTR_BLOCK_SIZE, + (order_base_2(AMDGPU_GPU_PAGE_SIZE / 4) - 1)); + tmp = REG_SET_FIELD(tmp, CP_HQD_PQ_CONTROL, UNORD_DISPATCH, 0); + tmp = REG_SET_FIELD(tmp, CP_HQD_PQ_CONTROL, TUNNEL_DISPATCH, 0); + tmp = REG_SET_FIELD(tmp, CP_HQD_PQ_CONTROL, PRIV_STATE, 1); + tmp = REG_SET_FIELD(tmp, CP_HQD_PQ_CONTROL, KMD_QUEUE, 1); + mqd->cp_hqd_pq_control = tmp; + + /* set the wb address whether it's enabled or not */ + wb_gpu_addr = prop->rptr_gpu_addr; + mqd->cp_hqd_pq_rptr_report_addr_lo = wb_gpu_addr & 0xfffffffc; + mqd->cp_hqd_pq_rptr_report_addr_hi = + upper_32_bits(wb_gpu_addr) & 0xffff; + + /* only used if CP_PQ_WPTR_POLL_CNTL.CP_PQ_WPTR_POLL_CNTL__EN_MASK=1 */ + wb_gpu_addr = prop->wptr_gpu_addr; + mqd->cp_hqd_pq_wptr_poll_addr_lo = wb_gpu_addr & 0xfffffffc; + mqd->cp_hqd_pq_wptr_poll_addr_hi = upper_32_bits(wb_gpu_addr) & 0xffff; + + tmp = 0; + /* enable the doorbell if requested */ + if (prop->use_doorbell) { + tmp = RREG32_SOC15(GC, 0, regCP_HQD_PQ_DOORBELL_CONTROL); + tmp = REG_SET_FIELD(tmp, CP_HQD_PQ_DOORBELL_CONTROL, + DOORBELL_OFFSET, prop->doorbell_index); + + tmp = REG_SET_FIELD(tmp, CP_HQD_PQ_DOORBELL_CONTROL, + DOORBELL_EN, 1); + tmp = REG_SET_FIELD(tmp, CP_HQD_PQ_DOORBELL_CONTROL, + DOORBELL_SOURCE, 0); + tmp = REG_SET_FIELD(tmp, CP_HQD_PQ_DOORBELL_CONTROL, + DOORBELL_HIT, 0); + } + + mqd->cp_hqd_pq_doorbell_control = tmp; + + /* reset read and write pointers, similar to CP_RB0_WPTR/_RPTR */ + mqd->cp_hqd_pq_rptr = RREG32_SOC15(GC, 0, regCP_HQD_PQ_RPTR); + + /* set the vmid for the queue */ + mqd->cp_hqd_vmid = 0; + + tmp = RREG32_SOC15(GC, 0, regCP_HQD_PERSISTENT_STATE); + tmp = REG_SET_FIELD(tmp, CP_HQD_PERSISTENT_STATE, PRELOAD_SIZE, 0x55); + mqd->cp_hqd_persistent_state = tmp; + + /* set MIN_IB_AVAIL_SIZE */ + tmp = RREG32_SOC15(GC, 0, regCP_HQD_IB_CONTROL); + tmp = REG_SET_FIELD(tmp, CP_HQD_IB_CONTROL, MIN_IB_AVAIL_SIZE, 3); + mqd->cp_hqd_ib_control = tmp; + + /* set static priority for a compute queue/ring */ + mqd->cp_hqd_pipe_priority = prop->hqd_pipe_priority; + mqd->cp_hqd_queue_priority = prop->hqd_queue_priority; + + mqd->cp_hqd_active = prop->hqd_active; + + return 0; +} + +static int gfx_v12_0_kiq_init_register(struct amdgpu_ring *ring) +{ + struct amdgpu_device *adev = ring->adev; + struct v12_compute_mqd *mqd = ring->mqd_ptr; + int j; + + /* inactivate the queue */ + if (amdgpu_sriov_vf(adev)) + WREG32_SOC15(GC, 0, regCP_HQD_ACTIVE, 0); + + /* disable wptr polling */ + WREG32_FIELD15_PREREG(GC, 0, CP_PQ_WPTR_POLL_CNTL, EN, 0); + + /* write the EOP addr */ + WREG32_SOC15(GC, 0, regCP_HQD_EOP_BASE_ADDR, + mqd->cp_hqd_eop_base_addr_lo); + WREG32_SOC15(GC, 0, regCP_HQD_EOP_BASE_ADDR_HI, + mqd->cp_hqd_eop_base_addr_hi); + + /* set the EOP size, register value is 2^(EOP_SIZE+1) dwords */ + WREG32_SOC15(GC, 0, regCP_HQD_EOP_CONTROL, + mqd->cp_hqd_eop_control); + + /* enable doorbell? */ + WREG32_SOC15(GC, 0, regCP_HQD_PQ_DOORBELL_CONTROL, + mqd->cp_hqd_pq_doorbell_control); + + /* disable the queue if it's active */ + if (RREG32_SOC15(GC, 0, regCP_HQD_ACTIVE) & 1) { + WREG32_SOC15(GC, 0, regCP_HQD_DEQUEUE_REQUEST, 1); + for (j = 0; j < adev->usec_timeout; j++) { + if (!(RREG32_SOC15(GC, 0, regCP_HQD_ACTIVE) & 1)) + break; + udelay(1); + } + WREG32_SOC15(GC, 0, regCP_HQD_DEQUEUE_REQUEST, + mqd->cp_hqd_dequeue_request); + WREG32_SOC15(GC, 0, regCP_HQD_PQ_RPTR, + mqd->cp_hqd_pq_rptr); + WREG32_SOC15(GC, 0, regCP_HQD_PQ_WPTR_LO, + mqd->cp_hqd_pq_wptr_lo); + WREG32_SOC15(GC, 0, regCP_HQD_PQ_WPTR_HI, + mqd->cp_hqd_pq_wptr_hi); + } + + /* set the pointer to the MQD */ + WREG32_SOC15(GC, 0, regCP_MQD_BASE_ADDR, + mqd->cp_mqd_base_addr_lo); + WREG32_SOC15(GC, 0, regCP_MQD_BASE_ADDR_HI, + mqd->cp_mqd_base_addr_hi); + + /* set MQD vmid to 0 */ + WREG32_SOC15(GC, 0, regCP_MQD_CONTROL, + mqd->cp_mqd_control); + + /* set the pointer to the HQD, this is similar CP_RB0_BASE/_HI */ + WREG32_SOC15(GC, 0, regCP_HQD_PQ_BASE, + mqd->cp_hqd_pq_base_lo); + WREG32_SOC15(GC, 0, regCP_HQD_PQ_BASE_HI, + mqd->cp_hqd_pq_base_hi); + + /* set up the HQD, this is similar to CP_RB0_CNTL */ + WREG32_SOC15(GC, 0, regCP_HQD_PQ_CONTROL, + mqd->cp_hqd_pq_control); + + /* set the wb address whether it's enabled or not */ + WREG32_SOC15(GC, 0, regCP_HQD_PQ_RPTR_REPORT_ADDR, + mqd->cp_hqd_pq_rptr_report_addr_lo); + WREG32_SOC15(GC, 0, regCP_HQD_PQ_RPTR_REPORT_ADDR_HI, + mqd->cp_hqd_pq_rptr_report_addr_hi); + + /* only used if CP_PQ_WPTR_POLL_CNTL.CP_PQ_WPTR_POLL_CNTL__EN_MASK=1 */ + WREG32_SOC15(GC, 0, regCP_HQD_PQ_WPTR_POLL_ADDR, + mqd->cp_hqd_pq_wptr_poll_addr_lo); + WREG32_SOC15(GC, 0, regCP_HQD_PQ_WPTR_POLL_ADDR_HI, + mqd->cp_hqd_pq_wptr_poll_addr_hi); + + /* enable the doorbell if requested */ + if (ring->use_doorbell) { + WREG32_SOC15(GC, 0, regCP_MEC_DOORBELL_RANGE_LOWER, + (adev->doorbell_index.kiq * 2) << 2); + WREG32_SOC15(GC, 0, regCP_MEC_DOORBELL_RANGE_UPPER, + (adev->doorbell_index.userqueue_end * 2) << 2); + } + + WREG32_SOC15(GC, 0, regCP_HQD_PQ_DOORBELL_CONTROL, + mqd->cp_hqd_pq_doorbell_control); + + /* reset read and write pointers, similar to CP_RB0_WPTR/_RPTR */ + WREG32_SOC15(GC, 0, regCP_HQD_PQ_WPTR_LO, + mqd->cp_hqd_pq_wptr_lo); + WREG32_SOC15(GC, 0, regCP_HQD_PQ_WPTR_HI, + mqd->cp_hqd_pq_wptr_hi); + + /* set the vmid for the queue */ + WREG32_SOC15(GC, 0, regCP_HQD_VMID, mqd->cp_hqd_vmid); + + WREG32_SOC15(GC, 0, regCP_HQD_PERSISTENT_STATE, + mqd->cp_hqd_persistent_state); + + /* activate the queue */ + WREG32_SOC15(GC, 0, regCP_HQD_ACTIVE, + mqd->cp_hqd_active); + + if (ring->use_doorbell) + WREG32_FIELD15_PREREG(GC, 0, CP_PQ_STATUS, DOORBELL_ENABLE, 1); + + return 0; +} + +static int gfx_v12_0_kiq_init_queue(struct amdgpu_ring *ring) +{ + struct amdgpu_device *adev = ring->adev; + struct v12_compute_mqd *mqd = ring->mqd_ptr; + int mqd_idx = AMDGPU_MAX_COMPUTE_RINGS; + + gfx_v12_0_kiq_setting(ring); + + if (amdgpu_in_reset(adev)) { /* for GPU_RESET case */ + /* reset MQD to a clean status */ + if (adev->gfx.mec.mqd_backup[mqd_idx]) + memcpy(mqd, adev->gfx.mec.mqd_backup[mqd_idx], sizeof(*mqd)); + + /* reset ring buffer */ + ring->wptr = 0; + amdgpu_ring_clear_ring(ring); + + mutex_lock(&adev->srbm_mutex); + soc24_grbm_select(adev, ring->me, ring->pipe, ring->queue, 0); + gfx_v12_0_kiq_init_register(ring); + soc24_grbm_select(adev, 0, 0, 0, 0); + mutex_unlock(&adev->srbm_mutex); + } else { + memset((void *)mqd, 0, sizeof(*mqd)); + if (amdgpu_sriov_vf(adev) && adev->in_suspend) + amdgpu_ring_clear_ring(ring); + mutex_lock(&adev->srbm_mutex); + soc24_grbm_select(adev, ring->me, ring->pipe, ring->queue, 0); + amdgpu_ring_init_mqd(ring); + gfx_v12_0_kiq_init_register(ring); + soc24_grbm_select(adev, 0, 0, 0, 0); + mutex_unlock(&adev->srbm_mutex); + + if (adev->gfx.mec.mqd_backup[mqd_idx]) + memcpy(adev->gfx.mec.mqd_backup[mqd_idx], mqd, sizeof(*mqd)); + } + + return 0; +} + +static int gfx_v12_0_kcq_init_queue(struct amdgpu_ring *ring) +{ + struct amdgpu_device *adev = ring->adev; + struct v12_compute_mqd *mqd = ring->mqd_ptr; + int mqd_idx = ring - &adev->gfx.compute_ring[0]; + + if (!amdgpu_in_reset(adev) && !adev->in_suspend) { + memset((void *)mqd, 0, sizeof(*mqd)); + mutex_lock(&adev->srbm_mutex); + soc24_grbm_select(adev, ring->me, ring->pipe, ring->queue, 0); + amdgpu_ring_init_mqd(ring); + soc24_grbm_select(adev, 0, 0, 0, 0); + mutex_unlock(&adev->srbm_mutex); + + if (adev->gfx.mec.mqd_backup[mqd_idx]) + memcpy_fromio(adev->gfx.mec.mqd_backup[mqd_idx], mqd, sizeof(*mqd)); + } else { + /* restore MQD to a clean status */ + if (adev->gfx.mec.mqd_backup[mqd_idx]) + memcpy_toio(mqd, adev->gfx.mec.mqd_backup[mqd_idx], sizeof(*mqd)); + /* reset ring buffer */ + ring->wptr = 0; + atomic64_set((atomic64_t *)ring->wptr_cpu_addr, 0); + amdgpu_ring_clear_ring(ring); + } + + return 0; +} + +static int gfx_v12_0_kiq_resume(struct amdgpu_device *adev) +{ + struct amdgpu_ring *ring; + int r; + + ring = &adev->gfx.kiq[0].ring; + + r = amdgpu_bo_reserve(ring->mqd_obj, false); + if (unlikely(r != 0)) + return r; + + r = amdgpu_bo_kmap(ring->mqd_obj, (void **)&ring->mqd_ptr); + if (unlikely(r != 0)) { + amdgpu_bo_unreserve(ring->mqd_obj); + return r; + } + + gfx_v12_0_kiq_init_queue(ring); + amdgpu_bo_kunmap(ring->mqd_obj); + ring->mqd_ptr = NULL; + amdgpu_bo_unreserve(ring->mqd_obj); + ring->sched.ready = true; + return 0; +} + +static int gfx_v12_0_kcq_resume(struct amdgpu_device *adev) +{ + struct amdgpu_ring *ring = NULL; + int r = 0, i; + + if (!amdgpu_async_gfx_ring) + gfx_v12_0_cp_compute_enable(adev, true); + + for (i = 0; i < adev->gfx.num_compute_rings; i++) { + ring = &adev->gfx.compute_ring[i]; + + r = amdgpu_bo_reserve(ring->mqd_obj, false); + if (unlikely(r != 0)) + goto done; + r = amdgpu_bo_kmap(ring->mqd_obj, (void **)&ring->mqd_ptr); + if (!r) { + r = gfx_v12_0_kcq_init_queue(ring); + amdgpu_bo_kunmap(ring->mqd_obj); + ring->mqd_ptr = NULL; + } + amdgpu_bo_unreserve(ring->mqd_obj); + if (r) + goto done; + } + + r = amdgpu_gfx_enable_kcq(adev, 0); +done: + return r; +} + +static int gfx_v12_0_cp_resume(struct amdgpu_device *adev) +{ + int r, i; + struct amdgpu_ring *ring; + + if (!(adev->flags & AMD_IS_APU)) + gfx_v12_0_enable_gui_idle_interrupt(adev, false); + + if (adev->firmware.load_type == AMDGPU_FW_LOAD_DIRECT) { + /* legacy firmware loading */ + r = gfx_v12_0_cp_gfx_load_microcode(adev); + if (r) + return r; + + r = gfx_v12_0_cp_compute_load_microcode_rs64(adev); + if (r) + return r; + } + + gfx_v12_0_cp_set_doorbell_range(adev); + + if (amdgpu_async_gfx_ring) { + gfx_v12_0_cp_compute_enable(adev, true); + gfx_v12_0_cp_gfx_enable(adev, true); + } + + if (adev->enable_mes_kiq && adev->mes.kiq_hw_init) + r = amdgpu_mes_kiq_hw_init(adev); + else + r = gfx_v12_0_kiq_resume(adev); + if (r) + return r; + + r = gfx_v12_0_kcq_resume(adev); + if (r) + return r; + + if (!amdgpu_async_gfx_ring) { + r = gfx_v12_0_cp_gfx_resume(adev); + if (r) + return r; + } else { + r = gfx_v12_0_cp_async_gfx_ring_resume(adev); + if (r) + return r; + } + + for (i = 0; i < adev->gfx.num_gfx_rings; i++) { + ring = &adev->gfx.gfx_ring[i]; + r = amdgpu_ring_test_helper(ring); + if (r) + return r; + } + + for (i = 0; i < adev->gfx.num_compute_rings; i++) { + ring = &adev->gfx.compute_ring[i]; + r = amdgpu_ring_test_helper(ring); + if (r) + return r; + } + + return 0; +} + +static void gfx_v12_0_cp_enable(struct amdgpu_device *adev, bool enable) +{ + gfx_v12_0_cp_gfx_enable(adev, enable); + gfx_v12_0_cp_compute_enable(adev, enable); +} + +static int gfx_v12_0_gfxhub_enable(struct amdgpu_device *adev) +{ + int r; + bool value; + + r = adev->gfxhub.funcs->gart_enable(adev); + if (r) + return r; + + adev->hdp.funcs->flush_hdp(adev, NULL); + + value = (amdgpu_vm_fault_stop == AMDGPU_VM_FAULT_STOP_ALWAYS) ? + false : true; + + adev->gfxhub.funcs->set_fault_enable_default(adev, value); + /* TODO investigate why this and the hdp flush above is needed, + * are we missing a flush somewhere else? */ + adev->gmc.gmc_funcs->flush_gpu_tlb(adev, 0, AMDGPU_GFXHUB(0), 0); + + return 0; +} + +static int get_gb_addr_config(struct amdgpu_device *adev) +{ + u32 gb_addr_config; + + gb_addr_config = RREG32_SOC15(GC, 0, regGB_ADDR_CONFIG); + if (gb_addr_config == 0) + return -EINVAL; + + adev->gfx.config.gb_addr_config_fields.num_pkrs = + 1 << REG_GET_FIELD(gb_addr_config, GB_ADDR_CONFIG, NUM_PKRS); + + adev->gfx.config.gb_addr_config = gb_addr_config; + + adev->gfx.config.gb_addr_config_fields.num_pipes = 1 << + REG_GET_FIELD(adev->gfx.config.gb_addr_config, + GB_ADDR_CONFIG, NUM_PIPES); + + adev->gfx.config.max_tile_pipes = + adev->gfx.config.gb_addr_config_fields.num_pipes; + + adev->gfx.config.gb_addr_config_fields.max_compress_frags = 1 << + REG_GET_FIELD(adev->gfx.config.gb_addr_config, + GB_ADDR_CONFIG, MAX_COMPRESSED_FRAGS); + adev->gfx.config.gb_addr_config_fields.num_rb_per_se = 1 << + REG_GET_FIELD(adev->gfx.config.gb_addr_config, + GB_ADDR_CONFIG, NUM_RB_PER_SE); + adev->gfx.config.gb_addr_config_fields.num_se = 1 << + REG_GET_FIELD(adev->gfx.config.gb_addr_config, + GB_ADDR_CONFIG, NUM_SHADER_ENGINES); + adev->gfx.config.gb_addr_config_fields.pipe_interleave_size = 1 << (8 + + REG_GET_FIELD(adev->gfx.config.gb_addr_config, + GB_ADDR_CONFIG, PIPE_INTERLEAVE_SIZE)); + + return 0; +} + +static void gfx_v12_0_disable_gpa_mode(struct amdgpu_device *adev) +{ + uint32_t data; + + data = RREG32_SOC15(GC, 0, regCPC_PSP_DEBUG); + data |= CPC_PSP_DEBUG__GPA_OVERRIDE_MASK; + WREG32_SOC15(GC, 0, regCPC_PSP_DEBUG, data); + + data = RREG32_SOC15(GC, 0, regCPG_PSP_DEBUG); + data |= CPG_PSP_DEBUG__GPA_OVERRIDE_MASK; + WREG32_SOC15(GC, 0, regCPG_PSP_DEBUG, data); +} + +static int gfx_v12_0_hw_init(void *handle) +{ + int r; + struct amdgpu_device *adev = (struct amdgpu_device *)handle; + + if (adev->firmware.load_type == AMDGPU_FW_LOAD_RLC_BACKDOOR_AUTO) { + if (adev->gfx.imu.funcs && (amdgpu_dpm > 0)) { + /* RLC autoload sequence 1: Program rlc ram */ + if (adev->gfx.imu.funcs->program_rlc_ram) + adev->gfx.imu.funcs->program_rlc_ram(adev); + } + /* rlc autoload firmware */ + r = gfx_v12_0_rlc_backdoor_autoload_enable(adev); + if (r) + return r; + } else { + if (adev->firmware.load_type == AMDGPU_FW_LOAD_DIRECT) { + if (adev->gfx.imu.funcs && (amdgpu_dpm > 0)) { + if (adev->gfx.imu.funcs->load_microcode) + adev->gfx.imu.funcs->load_microcode(adev); + if (adev->gfx.imu.funcs->setup_imu) + adev->gfx.imu.funcs->setup_imu(adev); + if (adev->gfx.imu.funcs->start_imu) + adev->gfx.imu.funcs->start_imu(adev); + } + + /* disable gpa mode in backdoor loading */ + gfx_v12_0_disable_gpa_mode(adev); + } + } + + if ((adev->firmware.load_type == AMDGPU_FW_LOAD_RLC_BACKDOOR_AUTO) || + (adev->firmware.load_type == AMDGPU_FW_LOAD_PSP)) { + r = gfx_v12_0_wait_for_rlc_autoload_complete(adev); + if (r) { + dev_err(adev->dev, "(%d) failed to wait rlc autoload complete\n", r); + return r; + } + } + + adev->gfx.is_poweron = true; + + if (get_gb_addr_config(adev)) + DRM_WARN("Invalid gb_addr_config !\n"); + + if (adev->firmware.load_type == AMDGPU_FW_LOAD_PSP) + gfx_v12_0_config_gfx_rs64(adev); + + r = gfx_v12_0_gfxhub_enable(adev); + if (r) + return r; + + if ((adev->firmware.load_type == AMDGPU_FW_LOAD_DIRECT || + adev->firmware.load_type == AMDGPU_FW_LOAD_RLC_BACKDOOR_AUTO) && + (amdgpu_dpm == 1)) { + /** + * For gfx 12, rlc firmware loading relies on smu firmware is + * loaded firstly, so in direct type, it has to load smc ucode + * here before rlc. + */ + r = amdgpu_pm_load_smu_firmware(adev, NULL); + if (r) + return r; + } + + gfx_v12_0_constants_init(adev); + + if (adev->nbio.funcs->gc_doorbell_init) + adev->nbio.funcs->gc_doorbell_init(adev); + + r = gfx_v12_0_rlc_resume(adev); + if (r) + return r; + + /* + * init golden registers and rlc resume may override some registers, + * reconfig them here + */ + gfx_v12_0_tcp_harvest(adev); + + r = gfx_v12_0_cp_resume(adev); + if (r) + return r; + + return r; +} + +static int gfx_v12_0_kiq_disable_kgq(struct amdgpu_device *adev) +{ + struct amdgpu_kiq *kiq = &adev->gfx.kiq[0]; + struct amdgpu_ring *kiq_ring = &kiq->ring; + int i, r = 0; + + if (!kiq->pmf || !kiq->pmf->kiq_unmap_queues) + return -EINVAL; + + if (amdgpu_ring_alloc(kiq_ring, kiq->pmf->unmap_queues_size * + adev->gfx.num_gfx_rings)) + return -ENOMEM; + + for (i = 0; i < adev->gfx.num_gfx_rings; i++) + kiq->pmf->kiq_unmap_queues(kiq_ring, &adev->gfx.gfx_ring[i], + PREEMPT_QUEUES, 0, 0); + + if (adev->gfx.kiq[0].ring.sched.ready) + r = amdgpu_ring_test_helper(kiq_ring); + + return r; +} + +static int gfx_v12_0_hw_fini(void *handle) +{ + struct amdgpu_device *adev = (struct amdgpu_device *)handle; + int r; + uint32_t tmp; + + amdgpu_irq_put(adev, &adev->gfx.priv_reg_irq, 0); + amdgpu_irq_put(adev, &adev->gfx.priv_inst_irq, 0); + + if (!adev->no_hw_access) { + if (amdgpu_async_gfx_ring) { + r = gfx_v12_0_kiq_disable_kgq(adev); + if (r) + DRM_ERROR("KGQ disable failed\n"); + } + + if (amdgpu_gfx_disable_kcq(adev, 0)) + DRM_ERROR("KCQ disable failed\n"); + + amdgpu_mes_kiq_hw_fini(adev); + } + + if (amdgpu_sriov_vf(adev)) { + gfx_v12_0_cp_gfx_enable(adev, false); + /* Program KIQ position of RLC_CP_SCHEDULERS during destroy */ + tmp = RREG32_SOC15(GC, 0, regRLC_CP_SCHEDULERS); + tmp &= 0xffffff00; + WREG32_SOC15(GC, 0, regRLC_CP_SCHEDULERS, tmp); + + return 0; + } + gfx_v12_0_cp_enable(adev, false); + gfx_v12_0_enable_gui_idle_interrupt(adev, false); + + adev->gfxhub.funcs->gart_disable(adev); + + adev->gfx.is_poweron = false; + + return 0; +} + +static int gfx_v12_0_suspend(void *handle) +{ + return gfx_v12_0_hw_fini(handle); +} + +static int gfx_v12_0_resume(void *handle) +{ + return gfx_v12_0_hw_init(handle); +} + +static bool gfx_v12_0_is_idle(void *handle) +{ + struct amdgpu_device *adev = (struct amdgpu_device *)handle; + + if (REG_GET_FIELD(RREG32_SOC15(GC, 0, regGRBM_STATUS), + GRBM_STATUS, GUI_ACTIVE)) + return false; + else + return true; +} + +static int gfx_v12_0_wait_for_idle(void *handle) +{ + unsigned i; + u32 tmp; + struct amdgpu_device *adev = (struct amdgpu_device *)handle; + + for (i = 0; i < adev->usec_timeout; i++) { + /* read MC_STATUS */ + tmp = RREG32_SOC15(GC, 0, regGRBM_STATUS) & + GRBM_STATUS__GUI_ACTIVE_MASK; + + if (!REG_GET_FIELD(tmp, GRBM_STATUS, GUI_ACTIVE)) + return 0; + udelay(1); + } + return -ETIMEDOUT; +} + +static uint64_t gfx_v12_0_get_gpu_clock_counter(struct amdgpu_device *adev) +{ + uint64_t clock = 0; + + if (adev->smuio.funcs && + adev->smuio.funcs->get_gpu_clock_counter) + clock = adev->smuio.funcs->get_gpu_clock_counter(adev); + else + dev_warn(adev->dev, "query gpu clock counter is not supported\n"); + + return clock; +} + +static int gfx_v12_0_early_init(void *handle) +{ + struct amdgpu_device *adev = (struct amdgpu_device *)handle; + + adev->gfx.funcs = &gfx_v12_0_gfx_funcs; + + adev->gfx.num_gfx_rings = GFX12_NUM_GFX_RINGS; + adev->gfx.num_compute_rings = min(amdgpu_gfx_get_num_kcq(adev), + AMDGPU_MAX_COMPUTE_RINGS); + + gfx_v12_0_set_kiq_pm4_funcs(adev); + gfx_v12_0_set_ring_funcs(adev); + gfx_v12_0_set_irq_funcs(adev); + gfx_v12_0_set_rlc_funcs(adev); + gfx_v12_0_set_mqd_funcs(adev); + gfx_v12_0_set_imu_funcs(adev); + + gfx_v12_0_init_rlcg_reg_access_ctrl(adev); + + return gfx_v12_0_init_microcode(adev); +} + +static int gfx_v12_0_late_init(void *handle) +{ + struct amdgpu_device *adev = (struct amdgpu_device *)handle; + int r; + + r = amdgpu_irq_get(adev, &adev->gfx.priv_reg_irq, 0); + if (r) + return r; + + r = amdgpu_irq_get(adev, &adev->gfx.priv_inst_irq, 0); + if (r) + return r; + + return 0; +} + +static bool gfx_v12_0_is_rlc_enabled(struct amdgpu_device *adev) +{ + uint32_t rlc_cntl; + + /* if RLC is not enabled, do nothing */ + rlc_cntl = RREG32_SOC15(GC, 0, regRLC_CNTL); + return (REG_GET_FIELD(rlc_cntl, RLC_CNTL, RLC_ENABLE_F32)) ? true : false; +} + +static void gfx_v12_0_set_safe_mode(struct amdgpu_device *adev, + int xcc_id) +{ + uint32_t data; + unsigned i; + + data = RLC_SAFE_MODE__CMD_MASK; + data |= (1 << RLC_SAFE_MODE__MESSAGE__SHIFT); + + WREG32_SOC15(GC, 0, regRLC_SAFE_MODE, data); + + /* wait for RLC_SAFE_MODE */ + for (i = 0; i < adev->usec_timeout; i++) { + if (!REG_GET_FIELD(RREG32_SOC15(GC, 0, regRLC_SAFE_MODE), + RLC_SAFE_MODE, CMD)) + break; + udelay(1); + } +} + +static void gfx_v12_0_unset_safe_mode(struct amdgpu_device *adev, + int xcc_id) +{ + WREG32_SOC15(GC, 0, regRLC_SAFE_MODE, RLC_SAFE_MODE__CMD_MASK); +} + +static void gfx_v12_0_update_perf_clk(struct amdgpu_device *adev, + bool enable) +{ + uint32_t def, data; + + if (!(adev->cg_flags & AMD_CG_SUPPORT_GFX_PERF_CLK)) + return; + + def = data = RREG32_SOC15(GC, 0, regRLC_CGTT_MGCG_OVERRIDE); + + if (enable) + data &= ~RLC_CGTT_MGCG_OVERRIDE__PERFMON_CLOCK_STATE_MASK; + else + data |= RLC_CGTT_MGCG_OVERRIDE__PERFMON_CLOCK_STATE_MASK; + + if (def != data) + WREG32_SOC15(GC, 0, regRLC_CGTT_MGCG_OVERRIDE, data); +} + +static void gfx_v12_0_update_spm_vmid(struct amdgpu_device *adev, + struct amdgpu_ring *ring, + unsigned vmid) +{ + u32 reg, data; + + reg = SOC15_REG_OFFSET(GC, 0, regRLC_SPM_MC_CNTL); + if (amdgpu_sriov_is_pp_one_vf(adev)) + data = RREG32_NO_KIQ(reg); + else + data = RREG32(reg); + + data &= ~RLC_SPM_MC_CNTL__RLC_SPM_VMID_MASK; + data |= (vmid & RLC_SPM_MC_CNTL__RLC_SPM_VMID_MASK) << RLC_SPM_MC_CNTL__RLC_SPM_VMID__SHIFT; + + if (amdgpu_sriov_is_pp_one_vf(adev)) + WREG32_SOC15_NO_KIQ(GC, 0, regRLC_SPM_MC_CNTL, data); + else + WREG32_SOC15(GC, 0, regRLC_SPM_MC_CNTL, data); + + if (ring + && amdgpu_sriov_is_pp_one_vf(adev) + && ((ring->funcs->type == AMDGPU_RING_TYPE_GFX) + || (ring->funcs->type == AMDGPU_RING_TYPE_COMPUTE))) { + uint32_t reg = SOC15_REG_OFFSET(GC, 0, regRLC_SPM_MC_CNTL); + amdgpu_ring_emit_wreg(ring, reg, data); + } +} + +static const struct amdgpu_rlc_funcs gfx_v12_0_rlc_funcs = { + .is_rlc_enabled = gfx_v12_0_is_rlc_enabled, + .set_safe_mode = gfx_v12_0_set_safe_mode, + .unset_safe_mode = gfx_v12_0_unset_safe_mode, + .init = gfx_v12_0_rlc_init, + .get_csb_size = gfx_v12_0_get_csb_size, + .get_csb_buffer = gfx_v12_0_get_csb_buffer, + .resume = gfx_v12_0_rlc_resume, + .stop = gfx_v12_0_rlc_stop, + .reset = gfx_v12_0_rlc_reset, + .start = gfx_v12_0_rlc_start, + .update_spm_vmid = gfx_v12_0_update_spm_vmid, +}; + +#if 0 +static void gfx_v12_cntl_power_gating(struct amdgpu_device *adev, bool enable) +{ + /* TODO */ +} + +static void gfx_v12_cntl_pg(struct amdgpu_device *adev, bool enable) +{ + /* TODO */ +} +#endif + +static int gfx_v12_0_set_powergating_state(void *handle, + enum amd_powergating_state state) +{ + struct amdgpu_device *adev = (struct amdgpu_device *)handle; + bool enable = (state == AMD_PG_STATE_GATE); + + if (amdgpu_sriov_vf(adev)) + return 0; + + switch (amdgpu_ip_version(adev, GC_HWIP, 0)) { + case IP_VERSION(12, 0, 0): + case IP_VERSION(12, 0, 1): + amdgpu_gfx_off_ctrl(adev, enable); + break; + default: + break; + } + + return 0; +} + +static void gfx_v12_0_update_coarse_grain_clock_gating(struct amdgpu_device *adev, + bool enable) +{ + uint32_t def, data; + + if (!(adev->cg_flags & + (AMD_CG_SUPPORT_GFX_CGCG | + AMD_CG_SUPPORT_GFX_CGLS | + AMD_CG_SUPPORT_GFX_3D_CGCG | + AMD_CG_SUPPORT_GFX_3D_CGLS))) + return; + + if (enable) { + def = data = RREG32_SOC15(GC, 0, regRLC_CGTT_MGCG_OVERRIDE); + + /* unset CGCG override */ + if (adev->cg_flags & AMD_CG_SUPPORT_GFX_CGCG) + data &= ~RLC_CGTT_MGCG_OVERRIDE__GFXIP_CGCG_OVERRIDE_MASK; + if (adev->cg_flags & AMD_CG_SUPPORT_GFX_CGLS) + data &= ~RLC_CGTT_MGCG_OVERRIDE__GFXIP_CGLS_OVERRIDE_MASK; + if (adev->cg_flags & AMD_CG_SUPPORT_GFX_3D_CGCG || + adev->cg_flags & AMD_CG_SUPPORT_GFX_3D_CGLS) + data &= ~RLC_CGTT_MGCG_OVERRIDE__GFXIP_GFX3D_CG_OVERRIDE_MASK; + + /* update CGCG override bits */ + if (def != data) + WREG32_SOC15(GC, 0, regRLC_CGTT_MGCG_OVERRIDE, data); + + /* enable cgcg FSM(0x0000363F) */ + def = data = RREG32_SOC15(GC, 0, regRLC_CGCG_CGLS_CTRL); + + if (adev->cg_flags & AMD_CG_SUPPORT_GFX_CGCG) { + data &= ~RLC_CGCG_CGLS_CTRL__CGCG_GFX_IDLE_THRESHOLD_MASK; + data |= (0x36 << RLC_CGCG_CGLS_CTRL__CGCG_GFX_IDLE_THRESHOLD__SHIFT) | + RLC_CGCG_CGLS_CTRL__CGCG_EN_MASK; + } + + if (adev->cg_flags & AMD_CG_SUPPORT_GFX_CGLS) { + data &= ~RLC_CGCG_CGLS_CTRL__CGLS_REP_COMPANSAT_DELAY_MASK; + data |= (0x000F << RLC_CGCG_CGLS_CTRL__CGLS_REP_COMPANSAT_DELAY__SHIFT) | + RLC_CGCG_CGLS_CTRL__CGLS_EN_MASK; + } + + if (def != data) + WREG32_SOC15(GC, 0, regRLC_CGCG_CGLS_CTRL, data); + + /* Program RLC_CGCG_CGLS_CTRL_3D */ + def = data = RREG32_SOC15(GC, 0, regRLC_CGCG_CGLS_CTRL_3D); + + if (adev->cg_flags & AMD_CG_SUPPORT_GFX_3D_CGCG) { + data &= ~RLC_CGCG_CGLS_CTRL_3D__CGCG_GFX_IDLE_THRESHOLD_MASK; + data |= (0x36 << RLC_CGCG_CGLS_CTRL_3D__CGCG_GFX_IDLE_THRESHOLD__SHIFT) | + RLC_CGCG_CGLS_CTRL_3D__CGCG_EN_MASK; + } + + if (adev->cg_flags & AMD_CG_SUPPORT_GFX_3D_CGLS) { + data &= ~RLC_CGCG_CGLS_CTRL_3D__CGLS_REP_COMPANSAT_DELAY_MASK; + data |= (0xf << RLC_CGCG_CGLS_CTRL_3D__CGLS_REP_COMPANSAT_DELAY__SHIFT) | + RLC_CGCG_CGLS_CTRL_3D__CGLS_EN_MASK; + } + + if (def != data) + WREG32_SOC15(GC, 0, regRLC_CGCG_CGLS_CTRL_3D, data); + + /* set IDLE_POLL_COUNT(0x00900100) */ + def = data = RREG32_SOC15(GC, 0, regCP_RB_WPTR_POLL_CNTL); + + data &= ~(CP_RB_WPTR_POLL_CNTL__POLL_FREQUENCY_MASK | CP_RB_WPTR_POLL_CNTL__IDLE_POLL_COUNT_MASK); + data |= (0x0100 << CP_RB_WPTR_POLL_CNTL__POLL_FREQUENCY__SHIFT) | + (0x0090 << CP_RB_WPTR_POLL_CNTL__IDLE_POLL_COUNT__SHIFT); + + if (def != data) + WREG32_SOC15(GC, 0, regCP_RB_WPTR_POLL_CNTL, data); + + data = RREG32_SOC15(GC, 0, regCP_INT_CNTL); + data = REG_SET_FIELD(data, CP_INT_CNTL, CNTX_BUSY_INT_ENABLE, 1); + data = REG_SET_FIELD(data, CP_INT_CNTL, CNTX_EMPTY_INT_ENABLE, 1); + data = REG_SET_FIELD(data, CP_INT_CNTL, CMP_BUSY_INT_ENABLE, 1); + data = REG_SET_FIELD(data, CP_INT_CNTL, GFX_IDLE_INT_ENABLE, 1); + WREG32_SOC15(GC, 0, regCP_INT_CNTL, data); + + data = RREG32_SOC15(GC, 0, regSDMA0_RLC_CGCG_CTRL); + data = REG_SET_FIELD(data, SDMA0_RLC_CGCG_CTRL, CGCG_INT_ENABLE, 1); + WREG32_SOC15(GC, 0, regSDMA0_RLC_CGCG_CTRL, data); + + /* Some ASICs only have one SDMA instance, not need to configure SDMA1 */ + if (adev->sdma.num_instances > 1) { + data = RREG32_SOC15(GC, 0, regSDMA1_RLC_CGCG_CTRL); + data = REG_SET_FIELD(data, SDMA1_RLC_CGCG_CTRL, CGCG_INT_ENABLE, 1); + WREG32_SOC15(GC, 0, regSDMA1_RLC_CGCG_CTRL, data); + } + } else { + /* Program RLC_CGCG_CGLS_CTRL */ + def = data = RREG32_SOC15(GC, 0, regRLC_CGCG_CGLS_CTRL); + + if (adev->cg_flags & AMD_CG_SUPPORT_GFX_CGCG) + data &= ~RLC_CGCG_CGLS_CTRL__CGCG_EN_MASK; + + if (adev->cg_flags & AMD_CG_SUPPORT_GFX_CGLS) + data &= ~RLC_CGCG_CGLS_CTRL__CGLS_EN_MASK; + + if (def != data) + WREG32_SOC15(GC, 0, regRLC_CGCG_CGLS_CTRL, data); + + /* Program RLC_CGCG_CGLS_CTRL_3D */ + def = data = RREG32_SOC15(GC, 0, regRLC_CGCG_CGLS_CTRL_3D); + + if (adev->cg_flags & AMD_CG_SUPPORT_GFX_3D_CGCG) + data &= ~RLC_CGCG_CGLS_CTRL_3D__CGCG_EN_MASK; + if (adev->cg_flags & AMD_CG_SUPPORT_GFX_3D_CGLS) + data &= ~RLC_CGCG_CGLS_CTRL_3D__CGLS_EN_MASK; + + if (def != data) + WREG32_SOC15(GC, 0, regRLC_CGCG_CGLS_CTRL_3D, data); + + data = RREG32_SOC15(GC, 0, regSDMA0_RLC_CGCG_CTRL); + data &= ~SDMA0_RLC_CGCG_CTRL__CGCG_INT_ENABLE_MASK; + WREG32_SOC15(GC, 0, regSDMA0_RLC_CGCG_CTRL, data); + + /* Some ASICs only have one SDMA instance, not need to configure SDMA1 */ + if (adev->sdma.num_instances > 1) { + data = RREG32_SOC15(GC, 0, regSDMA1_RLC_CGCG_CTRL); + data &= ~SDMA1_RLC_CGCG_CTRL__CGCG_INT_ENABLE_MASK; + WREG32_SOC15(GC, 0, regSDMA1_RLC_CGCG_CTRL, data); + } + } +} + +static void gfx_v12_0_update_medium_grain_clock_gating(struct amdgpu_device *adev, + bool enable) +{ + uint32_t data, def; + if (!(adev->cg_flags & (AMD_CG_SUPPORT_GFX_MGCG | AMD_CG_SUPPORT_GFX_MGLS))) + return; + + /* It is disabled by HW by default */ + if (enable) { + if (adev->cg_flags & AMD_CG_SUPPORT_GFX_MGCG) { + /* 1 - RLC_CGTT_MGCG_OVERRIDE */ + def = data = RREG32_SOC15(GC, 0, regRLC_CGTT_MGCG_OVERRIDE); + + data &= ~(RLC_CGTT_MGCG_OVERRIDE__GRBM_CGTT_SCLK_OVERRIDE_MASK | + RLC_CGTT_MGCG_OVERRIDE__RLC_CGTT_SCLK_OVERRIDE_MASK | + RLC_CGTT_MGCG_OVERRIDE__GFXIP_MGCG_OVERRIDE_MASK); + + if (def != data) + WREG32_SOC15(GC, 0, regRLC_CGTT_MGCG_OVERRIDE, data); + } + } else { + if (adev->cg_flags & AMD_CG_SUPPORT_GFX_MGCG) { + def = data = RREG32_SOC15(GC, 0, regRLC_CGTT_MGCG_OVERRIDE); + + data |= (RLC_CGTT_MGCG_OVERRIDE__RLC_CGTT_SCLK_OVERRIDE_MASK | + RLC_CGTT_MGCG_OVERRIDE__GRBM_CGTT_SCLK_OVERRIDE_MASK | + RLC_CGTT_MGCG_OVERRIDE__GFXIP_MGCG_OVERRIDE_MASK); + + if (def != data) + WREG32_SOC15(GC, 0, regRLC_CGTT_MGCG_OVERRIDE, data); + } + } +} + +static void gfx_v12_0_update_repeater_fgcg(struct amdgpu_device *adev, + bool enable) +{ + uint32_t def, data; + + if (!(adev->cg_flags & AMD_CG_SUPPORT_REPEATER_FGCG)) + return; + + def = data = RREG32_SOC15(GC, 0, regRLC_CGTT_MGCG_OVERRIDE); + + if (enable) + data &= ~(RLC_CGTT_MGCG_OVERRIDE__GFXIP_REPEATER_FGCG_OVERRIDE_MASK | + RLC_CGTT_MGCG_OVERRIDE__RLC_REPEATER_FGCG_OVERRIDE_MASK); + else + data |= RLC_CGTT_MGCG_OVERRIDE__GFXIP_REPEATER_FGCG_OVERRIDE_MASK | + RLC_CGTT_MGCG_OVERRIDE__RLC_REPEATER_FGCG_OVERRIDE_MASK; + + if (def != data) + WREG32_SOC15(GC, 0, regRLC_CGTT_MGCG_OVERRIDE, data); +} + +static void gfx_v12_0_update_sram_fgcg(struct amdgpu_device *adev, + bool enable) +{ + uint32_t def, data; + + if (!(adev->cg_flags & AMD_CG_SUPPORT_GFX_FGCG)) + return; + + def = data = RREG32_SOC15(GC, 0, regRLC_CGTT_MGCG_OVERRIDE); + + if (enable) + data &= ~RLC_CGTT_MGCG_OVERRIDE__GFXIP_FGCG_OVERRIDE_MASK; + else + data |= RLC_CGTT_MGCG_OVERRIDE__GFXIP_FGCG_OVERRIDE_MASK; + + if (def != data) + WREG32_SOC15(GC, 0, regRLC_CGTT_MGCG_OVERRIDE, data); +} + +static int gfx_v12_0_update_gfx_clock_gating(struct amdgpu_device *adev, + bool enable) +{ + amdgpu_gfx_rlc_enter_safe_mode(adev, 0); + + gfx_v12_0_update_coarse_grain_clock_gating(adev, enable); + + gfx_v12_0_update_medium_grain_clock_gating(adev, enable); + + gfx_v12_0_update_repeater_fgcg(adev, enable); + + gfx_v12_0_update_sram_fgcg(adev, enable); + + gfx_v12_0_update_perf_clk(adev, enable); + + if (adev->cg_flags & + (AMD_CG_SUPPORT_GFX_MGCG | + AMD_CG_SUPPORT_GFX_CGLS | + AMD_CG_SUPPORT_GFX_CGCG | + AMD_CG_SUPPORT_GFX_3D_CGCG | + AMD_CG_SUPPORT_GFX_3D_CGLS)) + gfx_v12_0_enable_gui_idle_interrupt(adev, enable); + + amdgpu_gfx_rlc_exit_safe_mode(adev, 0); + + return 0; +} + +static int gfx_v12_0_set_clockgating_state(void *handle, + enum amd_clockgating_state state) +{ + struct amdgpu_device *adev = (struct amdgpu_device *)handle; + + if (amdgpu_sriov_vf(adev)) + return 0; + + switch (adev->ip_versions[GC_HWIP][0]) { + case IP_VERSION(12, 0, 0): + case IP_VERSION(12, 0, 1): + gfx_v12_0_update_gfx_clock_gating(adev, + state == AMD_CG_STATE_GATE); + break; + default: + break; + } + + return 0; +} + +static void gfx_v12_0_get_clockgating_state(void *handle, u64 *flags) +{ + struct amdgpu_device *adev = (struct amdgpu_device *)handle; + int data; + + /* AMD_CG_SUPPORT_GFX_MGCG */ + data = RREG32_SOC15(GC, 0, regRLC_CGTT_MGCG_OVERRIDE); + if (!(data & RLC_CGTT_MGCG_OVERRIDE__GFXIP_MGCG_OVERRIDE_MASK)) + *flags |= AMD_CG_SUPPORT_GFX_MGCG; + + /* AMD_CG_SUPPORT_REPEATER_FGCG */ + if (!(data & RLC_CGTT_MGCG_OVERRIDE__GFXIP_REPEATER_FGCG_OVERRIDE_MASK)) + *flags |= AMD_CG_SUPPORT_REPEATER_FGCG; + + /* AMD_CG_SUPPORT_GFX_FGCG */ + if (!(data & RLC_CGTT_MGCG_OVERRIDE__GFXIP_FGCG_OVERRIDE_MASK)) + *flags |= AMD_CG_SUPPORT_GFX_FGCG; + + /* AMD_CG_SUPPORT_GFX_PERF_CLK */ + if (!(data & RLC_CGTT_MGCG_OVERRIDE__PERFMON_CLOCK_STATE_MASK)) + *flags |= AMD_CG_SUPPORT_GFX_PERF_CLK; + + /* AMD_CG_SUPPORT_GFX_CGCG */ + data = RREG32_SOC15(GC, 0, regRLC_CGCG_CGLS_CTRL); + if (data & RLC_CGCG_CGLS_CTRL__CGCG_EN_MASK) + *flags |= AMD_CG_SUPPORT_GFX_CGCG; + + /* AMD_CG_SUPPORT_GFX_CGLS */ + if (data & RLC_CGCG_CGLS_CTRL__CGLS_EN_MASK) + *flags |= AMD_CG_SUPPORT_GFX_CGLS; + + /* AMD_CG_SUPPORT_GFX_3D_CGCG */ + data = RREG32_SOC15(GC, 0, regRLC_CGCG_CGLS_CTRL_3D); + if (data & RLC_CGCG_CGLS_CTRL_3D__CGCG_EN_MASK) + *flags |= AMD_CG_SUPPORT_GFX_3D_CGCG; + + /* AMD_CG_SUPPORT_GFX_3D_CGLS */ + if (data & RLC_CGCG_CGLS_CTRL_3D__CGLS_EN_MASK) + *flags |= AMD_CG_SUPPORT_GFX_3D_CGLS; +} + +static u64 gfx_v12_0_ring_get_rptr_gfx(struct amdgpu_ring *ring) +{ + /* gfx12 is 32bit rptr*/ + return *(uint32_t *)ring->rptr_cpu_addr; +} + +static u64 gfx_v12_0_ring_get_wptr_gfx(struct amdgpu_ring *ring) +{ + struct amdgpu_device *adev = ring->adev; + u64 wptr; + + /* XXX check if swapping is necessary on BE */ + if (ring->use_doorbell) { + wptr = atomic64_read((atomic64_t *)ring->wptr_cpu_addr); + } else { + wptr = RREG32_SOC15(GC, 0, regCP_RB0_WPTR); + wptr += (u64)RREG32_SOC15(GC, 0, regCP_RB0_WPTR_HI) << 32; + } + + return wptr; +} + +static void gfx_v12_0_ring_set_wptr_gfx(struct amdgpu_ring *ring) +{ + struct amdgpu_device *adev = ring->adev; + uint32_t *wptr_saved; + uint32_t *is_queue_unmap; + uint64_t aggregated_db_index; + uint32_t mqd_size = adev->mqds[AMDGPU_HW_IP_GFX].mqd_size; + uint64_t wptr_tmp; + + if (ring->is_mes_queue) { + wptr_saved = (uint32_t *)(ring->mqd_ptr + mqd_size); + is_queue_unmap = (uint32_t *)(ring->mqd_ptr + mqd_size + + sizeof(uint32_t)); + aggregated_db_index = + amdgpu_mes_get_aggregated_doorbell_index(adev, + ring->hw_prio); + + wptr_tmp = ring->wptr & ring->buf_mask; + atomic64_set((atomic64_t *)ring->wptr_cpu_addr, wptr_tmp); + *wptr_saved = wptr_tmp; + /* assume doorbell always being used by mes mapped queue */ + if (*is_queue_unmap) { + WDOORBELL64(aggregated_db_index, wptr_tmp); + WDOORBELL64(ring->doorbell_index, wptr_tmp); + } else { + WDOORBELL64(ring->doorbell_index, wptr_tmp); + + if (*is_queue_unmap) + WDOORBELL64(aggregated_db_index, wptr_tmp); + } + } else { + if (ring->use_doorbell) { + /* XXX check if swapping is necessary on BE */ + atomic64_set((atomic64_t *)ring->wptr_cpu_addr, + ring->wptr); + WDOORBELL64(ring->doorbell_index, ring->wptr); + } else { + WREG32_SOC15(GC, 0, regCP_RB0_WPTR, + lower_32_bits(ring->wptr)); + WREG32_SOC15(GC, 0, regCP_RB0_WPTR_HI, + upper_32_bits(ring->wptr)); + } + } +} + +static u64 gfx_v12_0_ring_get_rptr_compute(struct amdgpu_ring *ring) +{ + /* gfx12 hardware is 32bit rptr */ + return *(uint32_t *)ring->rptr_cpu_addr; +} + +static u64 gfx_v12_0_ring_get_wptr_compute(struct amdgpu_ring *ring) +{ + u64 wptr; + + /* XXX check if swapping is necessary on BE */ + if (ring->use_doorbell) + wptr = atomic64_read((atomic64_t *)ring->wptr_cpu_addr); + else + BUG(); + return wptr; +} + +static void gfx_v12_0_ring_set_wptr_compute(struct amdgpu_ring *ring) +{ + struct amdgpu_device *adev = ring->adev; + uint32_t *wptr_saved; + uint32_t *is_queue_unmap; + uint64_t aggregated_db_index; + uint32_t mqd_size = adev->mqds[AMDGPU_HW_IP_COMPUTE].mqd_size; + uint64_t wptr_tmp; + + if (ring->is_mes_queue) { + wptr_saved = (uint32_t *)(ring->mqd_ptr + mqd_size); + is_queue_unmap = (uint32_t *)(ring->mqd_ptr + mqd_size + + sizeof(uint32_t)); + aggregated_db_index = + amdgpu_mes_get_aggregated_doorbell_index(adev, + ring->hw_prio); + + wptr_tmp = ring->wptr & ring->buf_mask; + atomic64_set((atomic64_t *)ring->wptr_cpu_addr, wptr_tmp); + *wptr_saved = wptr_tmp; + /* assume doorbell always used by mes mapped queue */ + if (*is_queue_unmap) { + WDOORBELL64(aggregated_db_index, wptr_tmp); + WDOORBELL64(ring->doorbell_index, wptr_tmp); + } else { + WDOORBELL64(ring->doorbell_index, wptr_tmp); + + if (*is_queue_unmap) + WDOORBELL64(aggregated_db_index, wptr_tmp); + } + } else { + /* XXX check if swapping is necessary on BE */ + if (ring->use_doorbell) { + atomic64_set((atomic64_t *)ring->wptr_cpu_addr, + ring->wptr); + WDOORBELL64(ring->doorbell_index, ring->wptr); + } else { + BUG(); /* only DOORBELL method supported on gfx12 now */ + } + } +} + +static void gfx_v12_0_ring_emit_hdp_flush(struct amdgpu_ring *ring) +{ + struct amdgpu_device *adev = ring->adev; + u32 ref_and_mask, reg_mem_engine; + const struct nbio_hdp_flush_reg *nbio_hf_reg = adev->nbio.hdp_flush_reg; + + if (ring->funcs->type == AMDGPU_RING_TYPE_COMPUTE) { + switch (ring->me) { + case 1: + ref_and_mask = nbio_hf_reg->ref_and_mask_cp2 << ring->pipe; + break; + case 2: + ref_and_mask = nbio_hf_reg->ref_and_mask_cp6 << ring->pipe; + break; + default: + return; + } + reg_mem_engine = 0; + } else { + ref_and_mask = nbio_hf_reg->ref_and_mask_cp0; + reg_mem_engine = 1; /* pfp */ + } + + gfx_v12_0_wait_reg_mem(ring, reg_mem_engine, 0, 1, + adev->nbio.funcs->get_hdp_flush_req_offset(adev), + adev->nbio.funcs->get_hdp_flush_done_offset(adev), + ref_and_mask, ref_and_mask, 0x20); +} + +static void gfx_v12_0_ring_emit_ib_gfx(struct amdgpu_ring *ring, + struct amdgpu_job *job, + struct amdgpu_ib *ib, + uint32_t flags) +{ + unsigned vmid = AMDGPU_JOB_GET_VMID(job); + u32 header, control = 0; + + BUG_ON(ib->flags & AMDGPU_IB_FLAG_CE); + + header = PACKET3(PACKET3_INDIRECT_BUFFER, 2); + + control |= ib->length_dw | (vmid << 24); + + if (ring->is_mes_queue) + /* inherit vmid from mqd */ + control |= 0x400000; + + amdgpu_ring_write(ring, header); + BUG_ON(ib->gpu_addr & 0x3); /* Dword align */ + amdgpu_ring_write(ring, +#ifdef __BIG_ENDIAN + (2 << 0) | +#endif + lower_32_bits(ib->gpu_addr)); + amdgpu_ring_write(ring, upper_32_bits(ib->gpu_addr)); + amdgpu_ring_write(ring, control); +} + +static void gfx_v12_0_ring_emit_ib_compute(struct amdgpu_ring *ring, + struct amdgpu_job *job, + struct amdgpu_ib *ib, + uint32_t flags) +{ + unsigned vmid = AMDGPU_JOB_GET_VMID(job); + u32 control = INDIRECT_BUFFER_VALID | ib->length_dw | (vmid << 24); + + if (ring->is_mes_queue) + /* inherit vmid from mqd */ + control |= 0x40000000; + + amdgpu_ring_write(ring, PACKET3(PACKET3_INDIRECT_BUFFER, 2)); + BUG_ON(ib->gpu_addr & 0x3); /* Dword align */ + amdgpu_ring_write(ring, +#ifdef __BIG_ENDIAN + (2 << 0) | +#endif + lower_32_bits(ib->gpu_addr)); + amdgpu_ring_write(ring, upper_32_bits(ib->gpu_addr)); + amdgpu_ring_write(ring, control); +} + +static void gfx_v12_0_ring_emit_fence(struct amdgpu_ring *ring, u64 addr, + u64 seq, unsigned flags) +{ + bool write64bit = flags & AMDGPU_FENCE_FLAG_64BIT; + bool int_sel = flags & AMDGPU_FENCE_FLAG_INT; + + /* RELEASE_MEM - flush caches, send int */ + amdgpu_ring_write(ring, PACKET3(PACKET3_RELEASE_MEM, 6)); + amdgpu_ring_write(ring, (PACKET3_RELEASE_MEM_GCR_SEQ | + PACKET3_RELEASE_MEM_GCR_GL2_WB | + PACKET3_RELEASE_MEM_CACHE_POLICY(3) | + PACKET3_RELEASE_MEM_EVENT_TYPE(CACHE_FLUSH_AND_INV_TS_EVENT) | + PACKET3_RELEASE_MEM_EVENT_INDEX(5))); + amdgpu_ring_write(ring, (PACKET3_RELEASE_MEM_DATA_SEL(write64bit ? 2 : 1) | + PACKET3_RELEASE_MEM_INT_SEL(int_sel ? 2 : 0))); + + /* + * the address should be Qword aligned if 64bit write, Dword + * aligned if only send 32bit data low (discard data high) + */ + if (write64bit) + BUG_ON(addr & 0x7); + else + BUG_ON(addr & 0x3); + amdgpu_ring_write(ring, lower_32_bits(addr)); + amdgpu_ring_write(ring, upper_32_bits(addr)); + amdgpu_ring_write(ring, lower_32_bits(seq)); + amdgpu_ring_write(ring, upper_32_bits(seq)); + amdgpu_ring_write(ring, ring->is_mes_queue ? + (ring->hw_queue_id | AMDGPU_FENCE_MES_QUEUE_FLAG) : 0); +} + +static void gfx_v12_0_ring_emit_pipeline_sync(struct amdgpu_ring *ring) +{ + int usepfp = (ring->funcs->type == AMDGPU_RING_TYPE_GFX); + uint32_t seq = ring->fence_drv.sync_seq; + uint64_t addr = ring->fence_drv.gpu_addr; + + gfx_v12_0_wait_reg_mem(ring, usepfp, 1, 0, lower_32_bits(addr), + upper_32_bits(addr), seq, 0xffffffff, 4); +} + +static void gfx_v12_0_ring_invalidate_tlbs(struct amdgpu_ring *ring, + uint16_t pasid, uint32_t flush_type, + bool all_hub, uint8_t dst_sel) +{ + amdgpu_ring_write(ring, PACKET3(PACKET3_INVALIDATE_TLBS, 0)); + amdgpu_ring_write(ring, + PACKET3_INVALIDATE_TLBS_DST_SEL(dst_sel) | + PACKET3_INVALIDATE_TLBS_ALL_HUB(all_hub) | + PACKET3_INVALIDATE_TLBS_PASID(pasid) | + PACKET3_INVALIDATE_TLBS_FLUSH_TYPE(flush_type)); +} + +static void gfx_v12_0_ring_emit_vm_flush(struct amdgpu_ring *ring, + unsigned vmid, uint64_t pd_addr) +{ + if (ring->is_mes_queue) + gfx_v12_0_ring_invalidate_tlbs(ring, 0, 0, false, 0); + else + amdgpu_gmc_emit_flush_gpu_tlb(ring, vmid, pd_addr); + + /* compute doesn't have PFP */ + if (ring->funcs->type == AMDGPU_RING_TYPE_GFX) { + /* sync PFP to ME, otherwise we might get invalid PFP reads */ + amdgpu_ring_write(ring, PACKET3(PACKET3_PFP_SYNC_ME, 0)); + amdgpu_ring_write(ring, 0x0); + } +} + +static void gfx_v12_0_ring_emit_fence_kiq(struct amdgpu_ring *ring, u64 addr, + u64 seq, unsigned int flags) +{ + struct amdgpu_device *adev = ring->adev; + + /* we only allocate 32bit for each seq wb address */ + BUG_ON(flags & AMDGPU_FENCE_FLAG_64BIT); + + /* write fence seq to the "addr" */ + amdgpu_ring_write(ring, PACKET3(PACKET3_WRITE_DATA, 3)); + amdgpu_ring_write(ring, (WRITE_DATA_ENGINE_SEL(0) | + WRITE_DATA_DST_SEL(5) | WR_CONFIRM)); + amdgpu_ring_write(ring, lower_32_bits(addr)); + amdgpu_ring_write(ring, upper_32_bits(addr)); + amdgpu_ring_write(ring, lower_32_bits(seq)); + + if (flags & AMDGPU_FENCE_FLAG_INT) { + /* set register to trigger INT */ + amdgpu_ring_write(ring, PACKET3(PACKET3_WRITE_DATA, 3)); + amdgpu_ring_write(ring, (WRITE_DATA_ENGINE_SEL(0) | + WRITE_DATA_DST_SEL(0) | WR_CONFIRM)); + amdgpu_ring_write(ring, SOC15_REG_OFFSET(GC, 0, regCPC_INT_STATUS)); + amdgpu_ring_write(ring, 0); + amdgpu_ring_write(ring, 0x20000000); /* src_id is 178 */ + } +} + +static void gfx_v12_0_ring_emit_cntxcntl(struct amdgpu_ring *ring, + uint32_t flags) +{ + uint32_t dw2 = 0; + + dw2 |= 0x80000000; /* set load_enable otherwise this package is just NOPs */ + if (flags & AMDGPU_HAVE_CTX_SWITCH) { + /* set load_global_config & load_global_uconfig */ + dw2 |= 0x8001; + /* set load_cs_sh_regs */ + dw2 |= 0x01000000; + /* set load_per_context_state & load_gfx_sh_regs for GFX */ + dw2 |= 0x10002; + } + + amdgpu_ring_write(ring, PACKET3(PACKET3_CONTEXT_CONTROL, 1)); + amdgpu_ring_write(ring, dw2); + amdgpu_ring_write(ring, 0); +} + +static unsigned gfx_v12_0_ring_emit_init_cond_exec(struct amdgpu_ring *ring, + uint64_t addr) +{ + unsigned ret; + + amdgpu_ring_write(ring, PACKET3(PACKET3_COND_EXEC, 3)); + amdgpu_ring_write(ring, lower_32_bits(addr)); + amdgpu_ring_write(ring, upper_32_bits(addr)); + /* discard following DWs if *cond_exec_gpu_addr==0 */ + amdgpu_ring_write(ring, 0); + ret = ring->wptr & ring->buf_mask; + /* patch dummy value later */ + amdgpu_ring_write(ring, 0); + + return ret; +} + +static int gfx_v12_0_ring_preempt_ib(struct amdgpu_ring *ring) +{ + int i, r = 0; + struct amdgpu_device *adev = ring->adev; + struct amdgpu_kiq *kiq = &adev->gfx.kiq[0]; + struct amdgpu_ring *kiq_ring = &kiq->ring; + unsigned long flags; + + if (!kiq->pmf || !kiq->pmf->kiq_unmap_queues) + return -EINVAL; + + spin_lock_irqsave(&kiq->ring_lock, flags); + + if (amdgpu_ring_alloc(kiq_ring, kiq->pmf->unmap_queues_size)) { + spin_unlock_irqrestore(&kiq->ring_lock, flags); + return -ENOMEM; + } + + /* assert preemption condition */ + amdgpu_ring_set_preempt_cond_exec(ring, false); + + /* assert IB preemption, emit the trailing fence */ + kiq->pmf->kiq_unmap_queues(kiq_ring, ring, PREEMPT_QUEUES_NO_UNMAP, + ring->trail_fence_gpu_addr, + ++ring->trail_seq); + amdgpu_ring_commit(kiq_ring); + + spin_unlock_irqrestore(&kiq->ring_lock, flags); + + /* poll the trailing fence */ + for (i = 0; i < adev->usec_timeout; i++) { + if (ring->trail_seq == + le32_to_cpu(*(ring->trail_fence_cpu_addr))) + break; + udelay(1); + } + + if (i >= adev->usec_timeout) { + r = -EINVAL; + DRM_ERROR("ring %d failed to preempt ib\n", ring->idx); + } + + /* deassert preemption condition */ + amdgpu_ring_set_preempt_cond_exec(ring, true); + return r; +} + +static void gfx_v12_0_ring_emit_frame_cntl(struct amdgpu_ring *ring, + bool start, + bool secure) +{ + uint32_t v = secure ? FRAME_TMZ : 0; + + amdgpu_ring_write(ring, PACKET3(PACKET3_FRAME_CONTROL, 0)); + amdgpu_ring_write(ring, v | FRAME_CMD(start ? 0 : 1)); +} + +static void gfx_v12_0_ring_emit_rreg(struct amdgpu_ring *ring, uint32_t reg, + uint32_t reg_val_offs) +{ + struct amdgpu_device *adev = ring->adev; + + amdgpu_ring_write(ring, PACKET3(PACKET3_COPY_DATA, 4)); + amdgpu_ring_write(ring, 0 | /* src: register*/ + (5 << 8) | /* dst: memory */ + (1 << 20)); /* write confirm */ + amdgpu_ring_write(ring, reg); + amdgpu_ring_write(ring, 0); + amdgpu_ring_write(ring, lower_32_bits(adev->wb.gpu_addr + + reg_val_offs * 4)); + amdgpu_ring_write(ring, upper_32_bits(adev->wb.gpu_addr + + reg_val_offs * 4)); +} + +static void gfx_v12_0_ring_emit_wreg(struct amdgpu_ring *ring, + uint32_t reg, + uint32_t val) +{ + uint32_t cmd = 0; + + switch (ring->funcs->type) { + case AMDGPU_RING_TYPE_GFX: + cmd = WRITE_DATA_ENGINE_SEL(1) | WR_CONFIRM; + break; + case AMDGPU_RING_TYPE_KIQ: + cmd = (1 << 16); /* no inc addr */ + break; + default: + cmd = WR_CONFIRM; + break; + } + amdgpu_ring_write(ring, PACKET3(PACKET3_WRITE_DATA, 3)); + amdgpu_ring_write(ring, cmd); + amdgpu_ring_write(ring, reg); + amdgpu_ring_write(ring, 0); + amdgpu_ring_write(ring, val); +} + +static void gfx_v12_0_ring_emit_reg_wait(struct amdgpu_ring *ring, uint32_t reg, + uint32_t val, uint32_t mask) +{ + gfx_v12_0_wait_reg_mem(ring, 0, 0, 0, reg, 0, val, mask, 0x20); +} + +static void gfx_v12_0_ring_emit_reg_write_reg_wait(struct amdgpu_ring *ring, + uint32_t reg0, uint32_t reg1, + uint32_t ref, uint32_t mask) +{ + int usepfp = (ring->funcs->type == AMDGPU_RING_TYPE_GFX); + + gfx_v12_0_wait_reg_mem(ring, usepfp, 0, 1, reg0, reg1, + ref, mask, 0x20); +} + +static void gfx_v12_0_ring_soft_recovery(struct amdgpu_ring *ring, + unsigned vmid) +{ + struct amdgpu_device *adev = ring->adev; + uint32_t value = 0; + + value = REG_SET_FIELD(value, SQ_CMD, CMD, 0x03); + value = REG_SET_FIELD(value, SQ_CMD, MODE, 0x01); + value = REG_SET_FIELD(value, SQ_CMD, CHECK_VMID, 1); + value = REG_SET_FIELD(value, SQ_CMD, VM_ID, vmid); + WREG32_SOC15(GC, 0, regSQ_CMD, value); +} + +static void +gfx_v12_0_set_gfx_eop_interrupt_state(struct amdgpu_device *adev, + uint32_t me, uint32_t pipe, + enum amdgpu_interrupt_state state) +{ + uint32_t cp_int_cntl, cp_int_cntl_reg; + + if (!me) { + switch (pipe) { + case 0: + cp_int_cntl_reg = SOC15_REG_OFFSET(GC, 0, regCP_INT_CNTL_RING0); + break; + default: + DRM_DEBUG("invalid pipe %d\n", pipe); + return; + } + } else { + DRM_DEBUG("invalid me %d\n", me); + return; + } + + switch (state) { + case AMDGPU_IRQ_STATE_DISABLE: + cp_int_cntl = RREG32_SOC15_IP(GC, cp_int_cntl_reg); + cp_int_cntl = REG_SET_FIELD(cp_int_cntl, CP_INT_CNTL_RING0, + TIME_STAMP_INT_ENABLE, 0); + cp_int_cntl = REG_SET_FIELD(cp_int_cntl, CP_INT_CNTL_RING0, + GENERIC0_INT_ENABLE, 0); + WREG32_SOC15_IP(GC, cp_int_cntl_reg, cp_int_cntl); + break; + case AMDGPU_IRQ_STATE_ENABLE: + cp_int_cntl = RREG32_SOC15_IP(GC, cp_int_cntl_reg); + cp_int_cntl = REG_SET_FIELD(cp_int_cntl, CP_INT_CNTL_RING0, + TIME_STAMP_INT_ENABLE, 1); + cp_int_cntl = REG_SET_FIELD(cp_int_cntl, CP_INT_CNTL_RING0, + GENERIC0_INT_ENABLE, 1); + WREG32_SOC15_IP(GC, cp_int_cntl_reg, cp_int_cntl); + break; + default: + break; + } +} + +static void gfx_v12_0_set_compute_eop_interrupt_state(struct amdgpu_device *adev, + int me, int pipe, + enum amdgpu_interrupt_state state) +{ + u32 mec_int_cntl, mec_int_cntl_reg; + + /* + * amdgpu controls only the first MEC. That's why this function only + * handles the setting of interrupts for this specific MEC. All other + * pipes' interrupts are set by amdkfd. + */ + + if (me == 1) { + switch (pipe) { + case 0: + mec_int_cntl_reg = SOC15_REG_OFFSET(GC, 0, regCP_ME1_PIPE0_INT_CNTL); + break; + case 1: + mec_int_cntl_reg = SOC15_REG_OFFSET(GC, 0, regCP_ME1_PIPE1_INT_CNTL); + break; + default: + DRM_DEBUG("invalid pipe %d\n", pipe); + return; + } + } else { + DRM_DEBUG("invalid me %d\n", me); + return; + } + + switch (state) { + case AMDGPU_IRQ_STATE_DISABLE: + mec_int_cntl = RREG32_SOC15_IP(GC, mec_int_cntl_reg); + mec_int_cntl = REG_SET_FIELD(mec_int_cntl, CP_ME1_PIPE0_INT_CNTL, + TIME_STAMP_INT_ENABLE, 0); + mec_int_cntl = REG_SET_FIELD(mec_int_cntl, CP_ME1_PIPE0_INT_CNTL, + GENERIC0_INT_ENABLE, 0); + WREG32_SOC15_IP(GC, mec_int_cntl_reg, mec_int_cntl); + break; + case AMDGPU_IRQ_STATE_ENABLE: + mec_int_cntl = RREG32_SOC15_IP(GC, mec_int_cntl_reg); + mec_int_cntl = REG_SET_FIELD(mec_int_cntl, CP_ME1_PIPE0_INT_CNTL, + TIME_STAMP_INT_ENABLE, 1); + mec_int_cntl = REG_SET_FIELD(mec_int_cntl, CP_ME1_PIPE0_INT_CNTL, + GENERIC0_INT_ENABLE, 1); + WREG32_SOC15_IP(GC, mec_int_cntl_reg, mec_int_cntl); + break; + default: + break; + } +} + +static int gfx_v12_0_set_eop_interrupt_state(struct amdgpu_device *adev, + struct amdgpu_irq_src *src, + unsigned type, + enum amdgpu_interrupt_state state) +{ + switch (type) { + case AMDGPU_CP_IRQ_GFX_ME0_PIPE0_EOP: + gfx_v12_0_set_gfx_eop_interrupt_state(adev, 0, 0, state); + break; + case AMDGPU_CP_IRQ_GFX_ME0_PIPE1_EOP: + gfx_v12_0_set_gfx_eop_interrupt_state(adev, 0, 1, state); + break; + case AMDGPU_CP_IRQ_COMPUTE_MEC1_PIPE0_EOP: + gfx_v12_0_set_compute_eop_interrupt_state(adev, 1, 0, state); + break; + case AMDGPU_CP_IRQ_COMPUTE_MEC1_PIPE1_EOP: + gfx_v12_0_set_compute_eop_interrupt_state(adev, 1, 1, state); + break; + case AMDGPU_CP_IRQ_COMPUTE_MEC1_PIPE2_EOP: + gfx_v12_0_set_compute_eop_interrupt_state(adev, 1, 2, state); + break; + case AMDGPU_CP_IRQ_COMPUTE_MEC1_PIPE3_EOP: + gfx_v12_0_set_compute_eop_interrupt_state(adev, 1, 3, state); + break; + default: + break; + } + return 0; +} + +static int gfx_v12_0_eop_irq(struct amdgpu_device *adev, + struct amdgpu_irq_src *source, + struct amdgpu_iv_entry *entry) +{ + int i; + u8 me_id, pipe_id, queue_id; + struct amdgpu_ring *ring; + uint32_t mes_queue_id = entry->src_data[0]; + + DRM_DEBUG("IH: CP EOP\n"); + + if (adev->enable_mes && (mes_queue_id & AMDGPU_FENCE_MES_QUEUE_FLAG)) { + struct amdgpu_mes_queue *queue; + + mes_queue_id &= AMDGPU_FENCE_MES_QUEUE_ID_MASK; + + spin_lock(&adev->mes.queue_id_lock); + queue = idr_find(&adev->mes.queue_id_idr, mes_queue_id); + if (queue) { + DRM_DEBUG("process mes queue id = %d\n", mes_queue_id); + amdgpu_fence_process(queue->ring); + } + spin_unlock(&adev->mes.queue_id_lock); + } else { + me_id = (entry->ring_id & 0x0c) >> 2; + pipe_id = (entry->ring_id & 0x03) >> 0; + queue_id = (entry->ring_id & 0x70) >> 4; + + switch (me_id) { + case 0: + if (pipe_id == 0) + amdgpu_fence_process(&adev->gfx.gfx_ring[0]); + else + amdgpu_fence_process(&adev->gfx.gfx_ring[1]); + break; + case 1: + case 2: + for (i = 0; i < adev->gfx.num_compute_rings; i++) { + ring = &adev->gfx.compute_ring[i]; + /* Per-queue interrupt is supported for MEC starting from VI. + * The interrupt can only be enabled/disabled per pipe instead + * of per queue. + */ + if ((ring->me == me_id) && + (ring->pipe == pipe_id) && + (ring->queue == queue_id)) + amdgpu_fence_process(ring); + } + break; + } + } + + return 0; +} + +static int gfx_v12_0_set_priv_reg_fault_state(struct amdgpu_device *adev, + struct amdgpu_irq_src *source, + unsigned type, + enum amdgpu_interrupt_state state) +{ + switch (state) { + case AMDGPU_IRQ_STATE_DISABLE: + case AMDGPU_IRQ_STATE_ENABLE: + WREG32_FIELD15_PREREG(GC, 0, CP_INT_CNTL_RING0, + PRIV_REG_INT_ENABLE, + state == AMDGPU_IRQ_STATE_ENABLE ? 1 : 0); + break; + default: + break; + } + + return 0; +} + +static int gfx_v12_0_set_priv_inst_fault_state(struct amdgpu_device *adev, + struct amdgpu_irq_src *source, + unsigned type, + enum amdgpu_interrupt_state state) +{ + switch (state) { + case AMDGPU_IRQ_STATE_DISABLE: + case AMDGPU_IRQ_STATE_ENABLE: + WREG32_FIELD15_PREREG(GC, 0, CP_INT_CNTL_RING0, + PRIV_INSTR_INT_ENABLE, + state == AMDGPU_IRQ_STATE_ENABLE ? 1 : 0); + break; + default: + break; + } + + return 0; +} + +static void gfx_v12_0_handle_priv_fault(struct amdgpu_device *adev, + struct amdgpu_iv_entry *entry) +{ + u8 me_id, pipe_id, queue_id; + struct amdgpu_ring *ring; + int i; + + me_id = (entry->ring_id & 0x0c) >> 2; + pipe_id = (entry->ring_id & 0x03) >> 0; + queue_id = (entry->ring_id & 0x70) >> 4; + + switch (me_id) { + case 0: + for (i = 0; i < adev->gfx.num_gfx_rings; i++) { + ring = &adev->gfx.gfx_ring[i]; + /* we only enabled 1 gfx queue per pipe for now */ + if (ring->me == me_id && ring->pipe == pipe_id) + drm_sched_fault(&ring->sched); + } + break; + case 1: + case 2: + for (i = 0; i < adev->gfx.num_compute_rings; i++) { + ring = &adev->gfx.compute_ring[i]; + if (ring->me == me_id && ring->pipe == pipe_id && + ring->queue == queue_id) + drm_sched_fault(&ring->sched); + } + break; + default: + BUG(); + break; + } +} + +static int gfx_v12_0_priv_reg_irq(struct amdgpu_device *adev, + struct amdgpu_irq_src *source, + struct amdgpu_iv_entry *entry) +{ + DRM_ERROR("Illegal register access in command stream\n"); + gfx_v12_0_handle_priv_fault(adev, entry); + return 0; +} + +static int gfx_v12_0_priv_inst_irq(struct amdgpu_device *adev, + struct amdgpu_irq_src *source, + struct amdgpu_iv_entry *entry) +{ + DRM_ERROR("Illegal instruction in command stream\n"); + gfx_v12_0_handle_priv_fault(adev, entry); + return 0; +} + +static void gfx_v12_0_emit_mem_sync(struct amdgpu_ring *ring) +{ + const unsigned int gcr_cntl = + PACKET3_ACQUIRE_MEM_GCR_CNTL_GL2_INV(1) | + PACKET3_ACQUIRE_MEM_GCR_CNTL_GL2_WB(1) | + PACKET3_ACQUIRE_MEM_GCR_CNTL_GLM_INV(1) | + PACKET3_ACQUIRE_MEM_GCR_CNTL_GLM_WB(1) | + PACKET3_ACQUIRE_MEM_GCR_CNTL_GL1_INV(1) | + PACKET3_ACQUIRE_MEM_GCR_CNTL_GLV_INV(1) | + PACKET3_ACQUIRE_MEM_GCR_CNTL_GLK_INV(1) | + PACKET3_ACQUIRE_MEM_GCR_CNTL_GLI_INV(1); + + /* ACQUIRE_MEM - make one or more surfaces valid for use by the subsequent operations */ + amdgpu_ring_write(ring, PACKET3(PACKET3_ACQUIRE_MEM, 6)); + amdgpu_ring_write(ring, 0); /* CP_COHER_CNTL */ + amdgpu_ring_write(ring, 0xffffffff); /* CP_COHER_SIZE */ + amdgpu_ring_write(ring, 0xffffff); /* CP_COHER_SIZE_HI */ + amdgpu_ring_write(ring, 0); /* CP_COHER_BASE */ + amdgpu_ring_write(ring, 0); /* CP_COHER_BASE_HI */ + amdgpu_ring_write(ring, 0x0000000A); /* POLL_INTERVAL */ + amdgpu_ring_write(ring, gcr_cntl); /* GCR_CNTL */ +} + +static void gfx_v12_ip_print(void *handle, struct drm_printer *p) +{ + struct amdgpu_device *adev = (struct amdgpu_device *)handle; + uint32_t i, j, k, reg, index = 0; + uint32_t reg_count = ARRAY_SIZE(gc_reg_list_12_0); + + if (!adev->gfx.ip_dump_core) + return; + + for (i = 0; i < reg_count; i++) + drm_printf(p, "%-50s \t 0x%08x\n", + gc_reg_list_12_0[i].reg_name, + adev->gfx.ip_dump_core[i]); + + /* print compute queue registers for all instances */ + if (!adev->gfx.ip_dump_compute_queues) + return; + + reg_count = ARRAY_SIZE(gc_cp_reg_list_12); + drm_printf(p, "\nnum_mec: %d num_pipe: %d num_queue: %d\n", + adev->gfx.mec.num_mec, + adev->gfx.mec.num_pipe_per_mec, + adev->gfx.mec.num_queue_per_pipe); + + for (i = 0; i < adev->gfx.mec.num_mec; i++) { + for (j = 0; j < adev->gfx.mec.num_pipe_per_mec; j++) { + for (k = 0; k < adev->gfx.mec.num_queue_per_pipe; k++) { + drm_printf(p, "\nmec %d, pipe %d, queue %d\n", i, j, k); + for (reg = 0; reg < reg_count; reg++) { + drm_printf(p, "%-50s \t 0x%08x\n", + gc_cp_reg_list_12[reg].reg_name, + adev->gfx.ip_dump_compute_queues[index + reg]); + } + index += reg_count; + } + } + } + + /* print gfx queue registers for all instances */ + if (!adev->gfx.ip_dump_gfx_queues) + return; + + index = 0; + reg_count = ARRAY_SIZE(gc_gfx_queue_reg_list_12); + drm_printf(p, "\nnum_me: %d num_pipe: %d num_queue: %d\n", + adev->gfx.me.num_me, + adev->gfx.me.num_pipe_per_me, + adev->gfx.me.num_queue_per_pipe); + + for (i = 0; i < adev->gfx.me.num_me; i++) { + for (j = 0; j < adev->gfx.me.num_pipe_per_me; j++) { + for (k = 0; k < adev->gfx.me.num_queue_per_pipe; k++) { + drm_printf(p, "\nme %d, pipe %d, queue %d\n", i, j, k); + for (reg = 0; reg < reg_count; reg++) { + drm_printf(p, "%-50s \t 0x%08x\n", + gc_gfx_queue_reg_list_12[reg].reg_name, + adev->gfx.ip_dump_gfx_queues[index + reg]); + } + index += reg_count; + } + } + } +} + +static void gfx_v12_ip_dump(void *handle) +{ + struct amdgpu_device *adev = (struct amdgpu_device *)handle; + uint32_t i, j, k, reg, index = 0; + uint32_t reg_count = ARRAY_SIZE(gc_reg_list_12_0); + + if (!adev->gfx.ip_dump_core) + return; + + amdgpu_gfx_off_ctrl(adev, false); + for (i = 0; i < reg_count; i++) + adev->gfx.ip_dump_core[i] = RREG32(SOC15_REG_ENTRY_OFFSET(gc_reg_list_12_0[i])); + amdgpu_gfx_off_ctrl(adev, true); + + /* dump compute queue registers for all instances */ + if (!adev->gfx.ip_dump_compute_queues) + return; + + reg_count = ARRAY_SIZE(gc_cp_reg_list_12); + amdgpu_gfx_off_ctrl(adev, false); + mutex_lock(&adev->srbm_mutex); + for (i = 0; i < adev->gfx.mec.num_mec; i++) { + for (j = 0; j < adev->gfx.mec.num_pipe_per_mec; j++) { + for (k = 0; k < adev->gfx.mec.num_queue_per_pipe; k++) { + /* ME0 is for GFX so start from 1 for CP */ + soc24_grbm_select(adev, adev->gfx.me.num_me + i, j, k, 0); + for (reg = 0; reg < reg_count; reg++) { + adev->gfx.ip_dump_compute_queues[index + reg] = + RREG32(SOC15_REG_ENTRY_OFFSET( + gc_cp_reg_list_12[reg])); + } + index += reg_count; + } + } + } + soc24_grbm_select(adev, 0, 0, 0, 0); + mutex_unlock(&adev->srbm_mutex); + amdgpu_gfx_off_ctrl(adev, true); + + /* dump gfx queue registers for all instances */ + if (!adev->gfx.ip_dump_gfx_queues) + return; + + index = 0; + reg_count = ARRAY_SIZE(gc_gfx_queue_reg_list_12); + amdgpu_gfx_off_ctrl(adev, false); + mutex_lock(&adev->srbm_mutex); + for (i = 0; i < adev->gfx.me.num_me; i++) { + for (j = 0; j < adev->gfx.me.num_pipe_per_me; j++) { + for (k = 0; k < adev->gfx.me.num_queue_per_pipe; k++) { + soc24_grbm_select(adev, i, j, k, 0); + + for (reg = 0; reg < reg_count; reg++) { + adev->gfx.ip_dump_gfx_queues[index + reg] = + RREG32(SOC15_REG_ENTRY_OFFSET( + gc_gfx_queue_reg_list_12[reg])); + } + index += reg_count; + } + } + } + soc24_grbm_select(adev, 0, 0, 0, 0); + mutex_unlock(&adev->srbm_mutex); + amdgpu_gfx_off_ctrl(adev, true); +} + +static const struct amd_ip_funcs gfx_v12_0_ip_funcs = { + .name = "gfx_v12_0", + .early_init = gfx_v12_0_early_init, + .late_init = gfx_v12_0_late_init, + .sw_init = gfx_v12_0_sw_init, + .sw_fini = gfx_v12_0_sw_fini, + .hw_init = gfx_v12_0_hw_init, + .hw_fini = gfx_v12_0_hw_fini, + .suspend = gfx_v12_0_suspend, + .resume = gfx_v12_0_resume, + .is_idle = gfx_v12_0_is_idle, + .wait_for_idle = gfx_v12_0_wait_for_idle, + .set_clockgating_state = gfx_v12_0_set_clockgating_state, + .set_powergating_state = gfx_v12_0_set_powergating_state, + .get_clockgating_state = gfx_v12_0_get_clockgating_state, + .dump_ip_state = gfx_v12_ip_dump, + .print_ip_state = gfx_v12_ip_print, +}; + +static const struct amdgpu_ring_funcs gfx_v12_0_ring_funcs_gfx = { + .type = AMDGPU_RING_TYPE_GFX, + .align_mask = 0xff, + .nop = PACKET3(PACKET3_NOP, 0x3FFF), + .support_64bit_ptrs = true, + .secure_submission_supported = true, + .get_rptr = gfx_v12_0_ring_get_rptr_gfx, + .get_wptr = gfx_v12_0_ring_get_wptr_gfx, + .set_wptr = gfx_v12_0_ring_set_wptr_gfx, + .emit_frame_size = /* totally 242 maximum if 16 IBs */ + 5 + /* COND_EXEC */ + 7 + /* PIPELINE_SYNC */ + SOC15_FLUSH_GPU_TLB_NUM_WREG * 5 + + SOC15_FLUSH_GPU_TLB_NUM_REG_WAIT * 7 + + 2 + /* VM_FLUSH */ + 8 + /* FENCE for VM_FLUSH */ + 5 + /* COND_EXEC */ + 7 + /* HDP_flush */ + 4 + /* VGT_flush */ + 31 + /* DE_META */ + 3 + /* CNTX_CTRL */ + 5 + /* HDP_INVL */ + 8 + 8 + /* FENCE x2 */ + 8, /* gfx_v12_0_emit_mem_sync */ + .emit_ib_size = 4, /* gfx_v12_0_ring_emit_ib_gfx */ + .emit_ib = gfx_v12_0_ring_emit_ib_gfx, + .emit_fence = gfx_v12_0_ring_emit_fence, + .emit_pipeline_sync = gfx_v12_0_ring_emit_pipeline_sync, + .emit_vm_flush = gfx_v12_0_ring_emit_vm_flush, + .emit_hdp_flush = gfx_v12_0_ring_emit_hdp_flush, + .test_ring = gfx_v12_0_ring_test_ring, + .test_ib = gfx_v12_0_ring_test_ib, + .insert_nop = amdgpu_ring_insert_nop, + .pad_ib = amdgpu_ring_generic_pad_ib, + .emit_cntxcntl = gfx_v12_0_ring_emit_cntxcntl, + .init_cond_exec = gfx_v12_0_ring_emit_init_cond_exec, + .preempt_ib = gfx_v12_0_ring_preempt_ib, + .emit_frame_cntl = gfx_v12_0_ring_emit_frame_cntl, + .emit_wreg = gfx_v12_0_ring_emit_wreg, + .emit_reg_wait = gfx_v12_0_ring_emit_reg_wait, + .emit_reg_write_reg_wait = gfx_v12_0_ring_emit_reg_write_reg_wait, + .soft_recovery = gfx_v12_0_ring_soft_recovery, + .emit_mem_sync = gfx_v12_0_emit_mem_sync, +}; + +static const struct amdgpu_ring_funcs gfx_v12_0_ring_funcs_compute = { + .type = AMDGPU_RING_TYPE_COMPUTE, + .align_mask = 0xff, + .nop = PACKET3(PACKET3_NOP, 0x3FFF), + .support_64bit_ptrs = true, + .get_rptr = gfx_v12_0_ring_get_rptr_compute, + .get_wptr = gfx_v12_0_ring_get_wptr_compute, + .set_wptr = gfx_v12_0_ring_set_wptr_compute, + .emit_frame_size = + 7 + /* gfx_v12_0_ring_emit_hdp_flush */ + 5 + /* hdp invalidate */ + 7 + /* gfx_v12_0_ring_emit_pipeline_sync */ + SOC15_FLUSH_GPU_TLB_NUM_WREG * 5 + + SOC15_FLUSH_GPU_TLB_NUM_REG_WAIT * 7 + + 2 + /* gfx_v12_0_ring_emit_vm_flush */ + 8 + 8 + 8 + /* gfx_v12_0_ring_emit_fence x3 for user fence, vm fence */ + 8, /* gfx_v12_0_emit_mem_sync */ + .emit_ib_size = 7, /* gfx_v12_0_ring_emit_ib_compute */ + .emit_ib = gfx_v12_0_ring_emit_ib_compute, + .emit_fence = gfx_v12_0_ring_emit_fence, + .emit_pipeline_sync = gfx_v12_0_ring_emit_pipeline_sync, + .emit_vm_flush = gfx_v12_0_ring_emit_vm_flush, + .emit_hdp_flush = gfx_v12_0_ring_emit_hdp_flush, + .test_ring = gfx_v12_0_ring_test_ring, + .test_ib = gfx_v12_0_ring_test_ib, + .insert_nop = amdgpu_ring_insert_nop, + .pad_ib = amdgpu_ring_generic_pad_ib, + .emit_wreg = gfx_v12_0_ring_emit_wreg, + .emit_reg_wait = gfx_v12_0_ring_emit_reg_wait, + .emit_reg_write_reg_wait = gfx_v12_0_ring_emit_reg_write_reg_wait, + .emit_mem_sync = gfx_v12_0_emit_mem_sync, +}; + +static const struct amdgpu_ring_funcs gfx_v12_0_ring_funcs_kiq = { + .type = AMDGPU_RING_TYPE_KIQ, + .align_mask = 0xff, + .nop = PACKET3(PACKET3_NOP, 0x3FFF), + .support_64bit_ptrs = true, + .get_rptr = gfx_v12_0_ring_get_rptr_compute, + .get_wptr = gfx_v12_0_ring_get_wptr_compute, + .set_wptr = gfx_v12_0_ring_set_wptr_compute, + .emit_frame_size = + 7 + /* gfx_v12_0_ring_emit_hdp_flush */ + 5 + /*hdp invalidate */ + 7 + /* gfx_v12_0_ring_emit_pipeline_sync */ + SOC15_FLUSH_GPU_TLB_NUM_WREG * 5 + + SOC15_FLUSH_GPU_TLB_NUM_REG_WAIT * 7 + + 2 + /* gfx_v12_0_ring_emit_vm_flush */ + 8 + 8 + 8, /* gfx_v12_0_ring_emit_fence_kiq x3 for user fence, vm fence */ + .emit_ib_size = 7, /* gfx_v12_0_ring_emit_ib_compute */ + .emit_ib = gfx_v12_0_ring_emit_ib_compute, + .emit_fence = gfx_v12_0_ring_emit_fence_kiq, + .test_ring = gfx_v12_0_ring_test_ring, + .test_ib = gfx_v12_0_ring_test_ib, + .insert_nop = amdgpu_ring_insert_nop, + .pad_ib = amdgpu_ring_generic_pad_ib, + .emit_rreg = gfx_v12_0_ring_emit_rreg, + .emit_wreg = gfx_v12_0_ring_emit_wreg, + .emit_reg_wait = gfx_v12_0_ring_emit_reg_wait, + .emit_reg_write_reg_wait = gfx_v12_0_ring_emit_reg_write_reg_wait, +}; + +static void gfx_v12_0_set_ring_funcs(struct amdgpu_device *adev) +{ + int i; + + adev->gfx.kiq[0].ring.funcs = &gfx_v12_0_ring_funcs_kiq; + + for (i = 0; i < adev->gfx.num_gfx_rings; i++) + adev->gfx.gfx_ring[i].funcs = &gfx_v12_0_ring_funcs_gfx; + + for (i = 0; i < adev->gfx.num_compute_rings; i++) + adev->gfx.compute_ring[i].funcs = &gfx_v12_0_ring_funcs_compute; +} + +static const struct amdgpu_irq_src_funcs gfx_v12_0_eop_irq_funcs = { + .set = gfx_v12_0_set_eop_interrupt_state, + .process = gfx_v12_0_eop_irq, +}; + +static const struct amdgpu_irq_src_funcs gfx_v12_0_priv_reg_irq_funcs = { + .set = gfx_v12_0_set_priv_reg_fault_state, + .process = gfx_v12_0_priv_reg_irq, +}; + +static const struct amdgpu_irq_src_funcs gfx_v12_0_priv_inst_irq_funcs = { + .set = gfx_v12_0_set_priv_inst_fault_state, + .process = gfx_v12_0_priv_inst_irq, +}; + +static void gfx_v12_0_set_irq_funcs(struct amdgpu_device *adev) +{ + adev->gfx.eop_irq.num_types = AMDGPU_CP_IRQ_LAST; + adev->gfx.eop_irq.funcs = &gfx_v12_0_eop_irq_funcs; + + adev->gfx.priv_reg_irq.num_types = 1; + adev->gfx.priv_reg_irq.funcs = &gfx_v12_0_priv_reg_irq_funcs; + + adev->gfx.priv_inst_irq.num_types = 1; + adev->gfx.priv_inst_irq.funcs = &gfx_v12_0_priv_inst_irq_funcs; +} + +static void gfx_v12_0_set_imu_funcs(struct amdgpu_device *adev) +{ + if (adev->flags & AMD_IS_APU) + adev->gfx.imu.mode = MISSION_MODE; + else + adev->gfx.imu.mode = DEBUG_MODE; + + adev->gfx.imu.funcs = &gfx_v12_0_imu_funcs; +} + +static void gfx_v12_0_set_rlc_funcs(struct amdgpu_device *adev) +{ + adev->gfx.rlc.funcs = &gfx_v12_0_rlc_funcs; +} + +static void gfx_v12_0_set_mqd_funcs(struct amdgpu_device *adev) +{ + /* set gfx eng mqd */ + adev->mqds[AMDGPU_HW_IP_GFX].mqd_size = + sizeof(struct v12_gfx_mqd); + adev->mqds[AMDGPU_HW_IP_GFX].init_mqd = + gfx_v12_0_gfx_mqd_init; + /* set compute eng mqd */ + adev->mqds[AMDGPU_HW_IP_COMPUTE].mqd_size = + sizeof(struct v12_compute_mqd); + adev->mqds[AMDGPU_HW_IP_COMPUTE].init_mqd = + gfx_v12_0_compute_mqd_init; +} + +static void gfx_v12_0_set_user_wgp_inactive_bitmap_per_sh(struct amdgpu_device *adev, + u32 bitmap) +{ + u32 data; + + if (!bitmap) + return; + + data = bitmap << GC_USER_SHADER_ARRAY_CONFIG__INACTIVE_WGPS__SHIFT; + data &= GC_USER_SHADER_ARRAY_CONFIG__INACTIVE_WGPS_MASK; + + WREG32_SOC15(GC, 0, regGC_USER_SHADER_ARRAY_CONFIG, data); +} + +static u32 gfx_v12_0_get_wgp_active_bitmap_per_sh(struct amdgpu_device *adev) +{ + u32 data, wgp_bitmask; + data = RREG32_SOC15(GC, 0, regCC_GC_SHADER_ARRAY_CONFIG); + data |= RREG32_SOC15(GC, 0, regGC_USER_SHADER_ARRAY_CONFIG); + + data &= CC_GC_SHADER_ARRAY_CONFIG__INACTIVE_WGPS_MASK; + data >>= CC_GC_SHADER_ARRAY_CONFIG__INACTIVE_WGPS__SHIFT; + + wgp_bitmask = + amdgpu_gfx_create_bitmask(adev->gfx.config.max_cu_per_sh >> 1); + + return (~data) & wgp_bitmask; +} + +static u32 gfx_v12_0_get_cu_active_bitmap_per_sh(struct amdgpu_device *adev) +{ + u32 wgp_idx, wgp_active_bitmap; + u32 cu_bitmap_per_wgp, cu_active_bitmap; + + wgp_active_bitmap = gfx_v12_0_get_wgp_active_bitmap_per_sh(adev); + cu_active_bitmap = 0; + + for (wgp_idx = 0; wgp_idx < 16; wgp_idx++) { + /* if there is one WGP enabled, it means 2 CUs will be enabled */ + cu_bitmap_per_wgp = 3 << (2 * wgp_idx); + if (wgp_active_bitmap & (1 << wgp_idx)) + cu_active_bitmap |= cu_bitmap_per_wgp; + } + + return cu_active_bitmap; +} + +static int gfx_v12_0_get_cu_info(struct amdgpu_device *adev, + struct amdgpu_cu_info *cu_info) +{ + int i, j, k, counter, active_cu_number = 0; + u32 mask, bitmap; + unsigned disable_masks[8 * 2]; + + if (!adev || !cu_info) + return -EINVAL; + + amdgpu_gfx_parse_disable_cu(disable_masks, 8, 2); + + mutex_lock(&adev->grbm_idx_mutex); + for (i = 0; i < adev->gfx.config.max_shader_engines; i++) { + for (j = 0; j < adev->gfx.config.max_sh_per_se; j++) { + bitmap = i * adev->gfx.config.max_sh_per_se + j; + if (!((gfx_v12_0_get_sa_active_bitmap(adev) >> bitmap) & 1)) + continue; + mask = 1; + counter = 0; + gfx_v12_0_select_se_sh(adev, i, j, 0xffffffff, 0); + if (i < 8 && j < 2) + gfx_v12_0_set_user_wgp_inactive_bitmap_per_sh( + adev, disable_masks[i * 2 + j]); + bitmap = gfx_v12_0_get_cu_active_bitmap_per_sh(adev); + + /** + * GFX12 could support more than 4 SEs, while the bitmap + * in cu_info struct is 4x4 and ioctl interface struct + * drm_amdgpu_info_device should keep stable. + * So we use last two columns of bitmap to store cu mask for + * SEs 4 to 7, the layout of the bitmap is as below: + * SE0: {SH0,SH1} --> {bitmap[0][0], bitmap[0][1]} + * SE1: {SH0,SH1} --> {bitmap[1][0], bitmap[1][1]} + * SE2: {SH0,SH1} --> {bitmap[2][0], bitmap[2][1]} + * SE3: {SH0,SH1} --> {bitmap[3][0], bitmap[3][1]} + * SE4: {SH0,SH1} --> {bitmap[0][2], bitmap[0][3]} + * SE5: {SH0,SH1} --> {bitmap[1][2], bitmap[1][3]} + * SE6: {SH0,SH1} --> {bitmap[2][2], bitmap[2][3]} + * SE7: {SH0,SH1} --> {bitmap[3][2], bitmap[3][3]} + */ + cu_info->bitmap[0][i % 4][j + (i / 4) * 2] = bitmap; + + for (k = 0; k < adev->gfx.config.max_cu_per_sh; k++) { + if (bitmap & mask) + counter++; + + mask <<= 1; + } + active_cu_number += counter; + } + } + gfx_v12_0_select_se_sh(adev, 0xffffffff, 0xffffffff, 0xffffffff, 0); + mutex_unlock(&adev->grbm_idx_mutex); + + cu_info->number = active_cu_number; + cu_info->simd_per_cu = NUM_SIMD_PER_CU; + + return 0; +} + +const struct amdgpu_ip_block_version gfx_v12_0_ip_block = { + .type = AMD_IP_BLOCK_TYPE_GFX, + .major = 12, + .minor = 0, + .rev = 0, + .funcs = &gfx_v12_0_ip_funcs, +}; diff --git a/drivers/gpu/drm/amd/amdgpu/mes_v10_1.h b/drivers/gpu/drm/amd/amdgpu/gfx_v12_0.h index 9afd6ddb01e9..bcc9c72ccbde 100644 --- a/drivers/gpu/drm/amd/amdgpu/mes_v10_1.h +++ b/drivers/gpu/drm/amd/amdgpu/gfx_v12_0.h @@ -1,5 +1,5 @@ /* - * Copyright 2019 Advanced Micro Devices, Inc. + * Copyright 2023 dvanced Micro Devices, Inc. * * Permission is hereby granted, free of charge, to any person obtaining a * copy of this software and associated documentation files (the "Software"), @@ -21,9 +21,9 @@ * */ -#ifndef __MES_V10_1_H__ -#define __MES_V10_1_H__ +#ifndef __GFX_V12_0_H__ +#define __GFX_V12_0_H__ -extern const struct amdgpu_ip_block_version mes_v10_1_ip_block; +extern const struct amdgpu_ip_block_version gfx_v12_0_ip_block; #endif diff --git a/drivers/gpu/drm/amd/amdgpu/gfx_v6_0.c b/drivers/gpu/drm/amd/amdgpu/gfx_v6_0.c index 34f9211b2679..564f0b9336b6 100644 --- a/drivers/gpu/drm/amd/amdgpu/gfx_v6_0.c +++ b/drivers/gpu/drm/amd/amdgpu/gfx_v6_0.c @@ -311,7 +311,6 @@ static const u32 verde_rlc_save_restore_register_list[] = static int gfx_v6_0_init_microcode(struct amdgpu_device *adev) { const char *chip_name; - char fw_name[30]; int err; const struct gfx_firmware_header_v1_0 *cp_hdr; const struct rlc_firmware_header_v1_0 *rlc_hdr; @@ -337,32 +336,32 @@ static int gfx_v6_0_init_microcode(struct amdgpu_device *adev) default: BUG(); } - snprintf(fw_name, sizeof(fw_name), "amdgpu/%s_pfp.bin", chip_name); - err = amdgpu_ucode_request(adev, &adev->gfx.pfp_fw, fw_name); + err = amdgpu_ucode_request(adev, &adev->gfx.pfp_fw, + "amdgpu/%s_pfp.bin", chip_name); if (err) goto out; cp_hdr = (const struct gfx_firmware_header_v1_0 *)adev->gfx.pfp_fw->data; adev->gfx.pfp_fw_version = le32_to_cpu(cp_hdr->header.ucode_version); adev->gfx.pfp_feature_version = le32_to_cpu(cp_hdr->ucode_feature_version); - snprintf(fw_name, sizeof(fw_name), "amdgpu/%s_me.bin", chip_name); - err = amdgpu_ucode_request(adev, &adev->gfx.me_fw, fw_name); + err = amdgpu_ucode_request(adev, &adev->gfx.me_fw, + "amdgpu/%s_me.bin", chip_name); if (err) goto out; cp_hdr = (const struct gfx_firmware_header_v1_0 *)adev->gfx.me_fw->data; adev->gfx.me_fw_version = le32_to_cpu(cp_hdr->header.ucode_version); adev->gfx.me_feature_version = le32_to_cpu(cp_hdr->ucode_feature_version); - snprintf(fw_name, sizeof(fw_name), "amdgpu/%s_ce.bin", chip_name); - err = amdgpu_ucode_request(adev, &adev->gfx.ce_fw, fw_name); + err = amdgpu_ucode_request(adev, &adev->gfx.ce_fw, + "amdgpu/%s_ce.bin", chip_name); if (err) goto out; cp_hdr = (const struct gfx_firmware_header_v1_0 *)adev->gfx.ce_fw->data; adev->gfx.ce_fw_version = le32_to_cpu(cp_hdr->header.ucode_version); adev->gfx.ce_feature_version = le32_to_cpu(cp_hdr->ucode_feature_version); - snprintf(fw_name, sizeof(fw_name), "amdgpu/%s_rlc.bin", chip_name); - err = amdgpu_ucode_request(adev, &adev->gfx.rlc_fw, fw_name); + err = amdgpu_ucode_request(adev, &adev->gfx.rlc_fw, + "amdgpu/%s_rlc.bin", chip_name); if (err) goto out; rlc_hdr = (const struct rlc_firmware_header_v1_0 *)adev->gfx.rlc_fw->data; @@ -371,7 +370,7 @@ static int gfx_v6_0_init_microcode(struct amdgpu_device *adev) out: if (err) { - pr_err("gfx6: Failed to load firmware \"%s\"\n", fw_name); + pr_err("gfx6: Failed to load firmware %s gfx firmware\n", chip_name); amdgpu_ucode_release(&adev->gfx.pfp_fw); amdgpu_ucode_release(&adev->gfx.me_fw); amdgpu_ucode_release(&adev->gfx.ce_fw); @@ -3457,6 +3456,8 @@ static const struct amd_ip_funcs gfx_v6_0_ip_funcs = { .soft_reset = gfx_v6_0_soft_reset, .set_clockgating_state = gfx_v6_0_set_clockgating_state, .set_powergating_state = gfx_v6_0_set_powergating_state, + .dump_ip_state = NULL, + .print_ip_state = NULL, }; static const struct amdgpu_ring_funcs gfx_v6_0_ring_funcs_gfx = { diff --git a/drivers/gpu/drm/amd/amdgpu/gfx_v7_0.c b/drivers/gpu/drm/amd/amdgpu/gfx_v7_0.c index 86a4865b1ae5..d84589137df9 100644 --- a/drivers/gpu/drm/amd/amdgpu/gfx_v7_0.c +++ b/drivers/gpu/drm/amd/amdgpu/gfx_v7_0.c @@ -909,7 +909,6 @@ static void gfx_v7_0_free_microcode(struct amdgpu_device *adev) static int gfx_v7_0_init_microcode(struct amdgpu_device *adev) { const char *chip_name; - char fw_name[30]; int err; DRM_DEBUG("\n"); @@ -934,40 +933,38 @@ static int gfx_v7_0_init_microcode(struct amdgpu_device *adev) BUG(); } - snprintf(fw_name, sizeof(fw_name), "amdgpu/%s_pfp.bin", chip_name); - err = amdgpu_ucode_request(adev, &adev->gfx.pfp_fw, fw_name); + err = amdgpu_ucode_request(adev, &adev->gfx.pfp_fw, + "amdgpu/%s_pfp.bin", chip_name); if (err) goto out; - snprintf(fw_name, sizeof(fw_name), "amdgpu/%s_me.bin", chip_name); - err = amdgpu_ucode_request(adev, &adev->gfx.me_fw, fw_name); + err = amdgpu_ucode_request(adev, &adev->gfx.me_fw, + "amdgpu/%s_me.bin", chip_name); if (err) goto out; - snprintf(fw_name, sizeof(fw_name), "amdgpu/%s_ce.bin", chip_name); - err = amdgpu_ucode_request(adev, &adev->gfx.ce_fw, fw_name); + err = amdgpu_ucode_request(adev, &adev->gfx.ce_fw, + "amdgpu/%s_ce.bin", chip_name); if (err) goto out; - snprintf(fw_name, sizeof(fw_name), "amdgpu/%s_mec.bin", chip_name); - err = amdgpu_ucode_request(adev, &adev->gfx.mec_fw, fw_name); + err = amdgpu_ucode_request(adev, &adev->gfx.mec_fw, + "amdgpu/%s_mec.bin", chip_name); if (err) goto out; if (adev->asic_type == CHIP_KAVERI) { - snprintf(fw_name, sizeof(fw_name), "amdgpu/%s_mec2.bin", chip_name); - err = amdgpu_ucode_request(adev, &adev->gfx.mec2_fw, fw_name); + err = amdgpu_ucode_request(adev, &adev->gfx.mec2_fw, + "amdgpu/%s_mec2.bin", chip_name); if (err) goto out; } - snprintf(fw_name, sizeof(fw_name), "amdgpu/%s_rlc.bin", chip_name); - err = amdgpu_ucode_request(adev, &adev->gfx.rlc_fw, fw_name); - if (err) - goto out; + err = amdgpu_ucode_request(adev, &adev->gfx.rlc_fw, + "amdgpu/%s_rlc.bin", chip_name); out: if (err) { - pr_err("gfx7: Failed to load firmware \"%s\"\n", fw_name); + pr_err("gfx7: Failed to load firmware %s gfx firmware\n", chip_name); gfx_v7_0_free_microcode(adev); } return err; @@ -2757,44 +2754,6 @@ static int gfx_v7_0_mec_init(struct amdgpu_device *adev) return 0; } -struct hqd_registers { - u32 cp_mqd_base_addr; - u32 cp_mqd_base_addr_hi; - u32 cp_hqd_active; - u32 cp_hqd_vmid; - u32 cp_hqd_persistent_state; - u32 cp_hqd_pipe_priority; - u32 cp_hqd_queue_priority; - u32 cp_hqd_quantum; - u32 cp_hqd_pq_base; - u32 cp_hqd_pq_base_hi; - u32 cp_hqd_pq_rptr; - u32 cp_hqd_pq_rptr_report_addr; - u32 cp_hqd_pq_rptr_report_addr_hi; - u32 cp_hqd_pq_wptr_poll_addr; - u32 cp_hqd_pq_wptr_poll_addr_hi; - u32 cp_hqd_pq_doorbell_control; - u32 cp_hqd_pq_wptr; - u32 cp_hqd_pq_control; - u32 cp_hqd_ib_base_addr; - u32 cp_hqd_ib_base_addr_hi; - u32 cp_hqd_ib_rptr; - u32 cp_hqd_ib_control; - u32 cp_hqd_iq_timer; - u32 cp_hqd_iq_rptr; - u32 cp_hqd_dequeue_request; - u32 cp_hqd_dma_offload; - u32 cp_hqd_sema_cmd; - u32 cp_hqd_msg_type; - u32 cp_hqd_atomic0_preop_lo; - u32 cp_hqd_atomic0_preop_hi; - u32 cp_hqd_atomic1_preop_lo; - u32 cp_hqd_atomic1_preop_hi; - u32 cp_hqd_hq_scheduler0; - u32 cp_hqd_hq_scheduler1; - u32 cp_mqd_control; -}; - static void gfx_v7_0_compute_pipe_init(struct amdgpu_device *adev, int mec, int pipe) { @@ -4977,6 +4936,8 @@ static const struct amd_ip_funcs gfx_v7_0_ip_funcs = { .soft_reset = gfx_v7_0_soft_reset, .set_clockgating_state = gfx_v7_0_set_clockgating_state, .set_powergating_state = gfx_v7_0_set_powergating_state, + .dump_ip_state = NULL, + .print_ip_state = NULL, }; static const struct amdgpu_ring_funcs gfx_v7_0_ring_funcs_gfx = { diff --git a/drivers/gpu/drm/amd/amdgpu/gfx_v8_0.c b/drivers/gpu/drm/amd/amdgpu/gfx_v8_0.c index 202ddda57f98..b4658c7db0e1 100644 --- a/drivers/gpu/drm/amd/amdgpu/gfx_v8_0.c +++ b/drivers/gpu/drm/amd/amdgpu/gfx_v8_0.c @@ -939,7 +939,6 @@ static void gfx_v8_0_free_microcode(struct amdgpu_device *adev) static int gfx_v8_0_init_microcode(struct amdgpu_device *adev) { const char *chip_name; - char fw_name[30]; int err; struct amdgpu_firmware_info *info = NULL; const struct common_firmware_header *header = NULL; @@ -982,15 +981,15 @@ static int gfx_v8_0_init_microcode(struct amdgpu_device *adev) } if (adev->asic_type >= CHIP_POLARIS10 && adev->asic_type <= CHIP_POLARIS12) { - snprintf(fw_name, sizeof(fw_name), "amdgpu/%s_pfp_2.bin", chip_name); - err = amdgpu_ucode_request(adev, &adev->gfx.pfp_fw, fw_name); + err = amdgpu_ucode_request(adev, &adev->gfx.pfp_fw, + "amdgpu/%s_pfp_2.bin", chip_name); if (err == -ENODEV) { - snprintf(fw_name, sizeof(fw_name), "amdgpu/%s_pfp.bin", chip_name); - err = amdgpu_ucode_request(adev, &adev->gfx.pfp_fw, fw_name); + err = amdgpu_ucode_request(adev, &adev->gfx.pfp_fw, + "amdgpu/%s_pfp.bin", chip_name); } } else { - snprintf(fw_name, sizeof(fw_name), "amdgpu/%s_pfp.bin", chip_name); - err = amdgpu_ucode_request(adev, &adev->gfx.pfp_fw, fw_name); + err = amdgpu_ucode_request(adev, &adev->gfx.pfp_fw, + "amdgpu/%s_pfp.bin", chip_name); } if (err) goto out; @@ -999,15 +998,15 @@ static int gfx_v8_0_init_microcode(struct amdgpu_device *adev) adev->gfx.pfp_feature_version = le32_to_cpu(cp_hdr->ucode_feature_version); if (adev->asic_type >= CHIP_POLARIS10 && adev->asic_type <= CHIP_POLARIS12) { - snprintf(fw_name, sizeof(fw_name), "amdgpu/%s_me_2.bin", chip_name); - err = amdgpu_ucode_request(adev, &adev->gfx.me_fw, fw_name); + err = amdgpu_ucode_request(adev, &adev->gfx.me_fw, + "amdgpu/%s_me_2.bin", chip_name); if (err == -ENODEV) { - snprintf(fw_name, sizeof(fw_name), "amdgpu/%s_me.bin", chip_name); - err = amdgpu_ucode_request(adev, &adev->gfx.me_fw, fw_name); + err = amdgpu_ucode_request(adev, &adev->gfx.me_fw, + "amdgpu/%s_me.bin", chip_name); } } else { - snprintf(fw_name, sizeof(fw_name), "amdgpu/%s_me.bin", chip_name); - err = amdgpu_ucode_request(adev, &adev->gfx.me_fw, fw_name); + err = amdgpu_ucode_request(adev, &adev->gfx.me_fw, + "amdgpu/%s_me.bin", chip_name); } if (err) goto out; @@ -1017,15 +1016,15 @@ static int gfx_v8_0_init_microcode(struct amdgpu_device *adev) adev->gfx.me_feature_version = le32_to_cpu(cp_hdr->ucode_feature_version); if (adev->asic_type >= CHIP_POLARIS10 && adev->asic_type <= CHIP_POLARIS12) { - snprintf(fw_name, sizeof(fw_name), "amdgpu/%s_ce_2.bin", chip_name); - err = amdgpu_ucode_request(adev, &adev->gfx.ce_fw, fw_name); + err = amdgpu_ucode_request(adev, &adev->gfx.ce_fw, + "amdgpu/%s_ce_2.bin", chip_name); if (err == -ENODEV) { - snprintf(fw_name, sizeof(fw_name), "amdgpu/%s_ce.bin", chip_name); - err = amdgpu_ucode_request(adev, &adev->gfx.ce_fw, fw_name); + err = amdgpu_ucode_request(adev, &adev->gfx.ce_fw, + "amdgpu/%s_ce.bin", chip_name); } } else { - snprintf(fw_name, sizeof(fw_name), "amdgpu/%s_ce.bin", chip_name); - err = amdgpu_ucode_request(adev, &adev->gfx.ce_fw, fw_name); + err = amdgpu_ucode_request(adev, &adev->gfx.ce_fw, + "amdgpu/%s_ce.bin", chip_name); } if (err) goto out; @@ -1044,8 +1043,8 @@ static int gfx_v8_0_init_microcode(struct amdgpu_device *adev) } else adev->virt.chained_ib_support = false; - snprintf(fw_name, sizeof(fw_name), "amdgpu/%s_rlc.bin", chip_name); - err = amdgpu_ucode_request(adev, &adev->gfx.rlc_fw, fw_name); + err = amdgpu_ucode_request(adev, &adev->gfx.rlc_fw, + "amdgpu/%s_rlc.bin", chip_name); if (err) goto out; rlc_hdr = (const struct rlc_firmware_header_v2_0 *)adev->gfx.rlc_fw->data; @@ -1093,15 +1092,15 @@ static int gfx_v8_0_init_microcode(struct amdgpu_device *adev) adev->gfx.rlc.register_restore[i] = le32_to_cpu(tmp[i]); if (adev->asic_type >= CHIP_POLARIS10 && adev->asic_type <= CHIP_POLARIS12) { - snprintf(fw_name, sizeof(fw_name), "amdgpu/%s_mec_2.bin", chip_name); - err = amdgpu_ucode_request(adev, &adev->gfx.mec_fw, fw_name); + err = amdgpu_ucode_request(adev, &adev->gfx.mec_fw, + "amdgpu/%s_mec_2.bin", chip_name); if (err == -ENODEV) { - snprintf(fw_name, sizeof(fw_name), "amdgpu/%s_mec.bin", chip_name); - err = amdgpu_ucode_request(adev, &adev->gfx.mec_fw, fw_name); + err = amdgpu_ucode_request(adev, &adev->gfx.mec_fw, + "amdgpu/%s_mec.bin", chip_name); } } else { - snprintf(fw_name, sizeof(fw_name), "amdgpu/%s_mec.bin", chip_name); - err = amdgpu_ucode_request(adev, &adev->gfx.mec_fw, fw_name); + err = amdgpu_ucode_request(adev, &adev->gfx.mec_fw, + "amdgpu/%s_mec.bin", chip_name); } if (err) goto out; @@ -1112,15 +1111,15 @@ static int gfx_v8_0_init_microcode(struct amdgpu_device *adev) if ((adev->asic_type != CHIP_STONEY) && (adev->asic_type != CHIP_TOPAZ)) { if (adev->asic_type >= CHIP_POLARIS10 && adev->asic_type <= CHIP_POLARIS12) { - snprintf(fw_name, sizeof(fw_name), "amdgpu/%s_mec2_2.bin", chip_name); - err = amdgpu_ucode_request(adev, &adev->gfx.mec2_fw, fw_name); + err = amdgpu_ucode_request(adev, &adev->gfx.mec2_fw, + "amdgpu/%s_mec2_2.bin", chip_name); if (err == -ENODEV) { - snprintf(fw_name, sizeof(fw_name), "amdgpu/%s_mec2.bin", chip_name); - err = amdgpu_ucode_request(adev, &adev->gfx.mec2_fw, fw_name); + err = amdgpu_ucode_request(adev, &adev->gfx.mec2_fw, + "amdgpu/%s_mec2.bin", chip_name); } } else { - snprintf(fw_name, sizeof(fw_name), "amdgpu/%s_mec2.bin", chip_name); - err = amdgpu_ucode_request(adev, &adev->gfx.mec2_fw, fw_name); + err = amdgpu_ucode_request(adev, &adev->gfx.mec2_fw, + "amdgpu/%s_mec2.bin", chip_name); } if (!err) { cp_hdr = (const struct gfx_firmware_header_v1_0 *) @@ -1194,9 +1193,7 @@ static int gfx_v8_0_init_microcode(struct amdgpu_device *adev) out: if (err) { - dev_err(adev->dev, - "gfx8: Failed to load firmware \"%s\"\n", - fw_name); + dev_err(adev->dev, "gfx8: Failed to load firmware %s gfx firmware\n", chip_name); amdgpu_ucode_release(&adev->gfx.pfp_fw); amdgpu_ucode_release(&adev->gfx.me_fw); amdgpu_ucode_release(&adev->gfx.ce_fw); @@ -6878,6 +6875,8 @@ static const struct amd_ip_funcs gfx_v8_0_ip_funcs = { .set_clockgating_state = gfx_v8_0_set_clockgating_state, .set_powergating_state = gfx_v8_0_set_powergating_state, .get_clockgating_state = gfx_v8_0_get_clockgating_state, + .dump_ip_state = NULL, + .print_ip_state = NULL, }; static const struct amdgpu_ring_funcs gfx_v8_0_ring_funcs_gfx = { diff --git a/drivers/gpu/drm/amd/amdgpu/gfx_v9_0.c b/drivers/gpu/drm/amd/amdgpu/gfx_v9_0.c index 6f97a6d0e6d0..2929c8972ea7 100644 --- a/drivers/gpu/drm/amd/amdgpu/gfx_v9_0.c +++ b/drivers/gpu/drm/amd/amdgpu/gfx_v9_0.c @@ -149,6 +149,135 @@ MODULE_FIRMWARE("amdgpu/aldebaran_sjt_mec2.bin"); #define mmGOLDEN_TSC_COUNT_LOWER_Renoir 0x0026 #define mmGOLDEN_TSC_COUNT_LOWER_Renoir_BASE_IDX 1 +static const struct amdgpu_hwip_reg_entry gc_reg_list_9[] = { + SOC15_REG_ENTRY_STR(GC, 0, mmGRBM_STATUS), + SOC15_REG_ENTRY_STR(GC, 0, mmGRBM_STATUS2), + SOC15_REG_ENTRY_STR(GC, 0, mmCP_STALLED_STAT1), + SOC15_REG_ENTRY_STR(GC, 0, mmCP_STALLED_STAT2), + SOC15_REG_ENTRY_STR(GC, 0, mmCP_CPC_STALLED_STAT1), + SOC15_REG_ENTRY_STR(GC, 0, mmCP_CPF_STALLED_STAT1), + SOC15_REG_ENTRY_STR(GC, 0, mmCP_BUSY_STAT), + SOC15_REG_ENTRY_STR(GC, 0, mmCP_CPC_BUSY_STAT), + SOC15_REG_ENTRY_STR(GC, 0, mmCP_CPF_BUSY_STAT), + SOC15_REG_ENTRY_STR(GC, 0, mmCP_CPF_STATUS), + SOC15_REG_ENTRY_STR(GC, 0, mmCP_GFX_ERROR), + SOC15_REG_ENTRY_STR(GC, 0, mmCP_RB_BASE), + SOC15_REG_ENTRY_STR(GC, 0, mmCP_RB_RPTR), + SOC15_REG_ENTRY_STR(GC, 0, mmCP_RB_WPTR), + SOC15_REG_ENTRY_STR(GC, 0, mmCP_RB0_BASE), + SOC15_REG_ENTRY_STR(GC, 0, mmCP_RB0_RPTR), + SOC15_REG_ENTRY_STR(GC, 0, mmCP_RB0_WPTR), + SOC15_REG_ENTRY_STR(GC, 0, mmCP_RB1_BASE), + SOC15_REG_ENTRY_STR(GC, 0, mmCP_RB1_RPTR), + SOC15_REG_ENTRY_STR(GC, 0, mmCP_RB1_WPTR), + SOC15_REG_ENTRY_STR(GC, 0, mmCP_RB2_BASE), + SOC15_REG_ENTRY_STR(GC, 0, mmCP_RB2_WPTR), + SOC15_REG_ENTRY_STR(GC, 0, mmCP_RB2_WPTR), + SOC15_REG_ENTRY_STR(GC, 0, mmCP_CE_IB1_CMD_BUFSZ), + SOC15_REG_ENTRY_STR(GC, 0, mmCP_CE_IB2_CMD_BUFSZ), + SOC15_REG_ENTRY_STR(GC, 0, mmCP_IB1_CMD_BUFSZ), + SOC15_REG_ENTRY_STR(GC, 0, mmCP_IB2_CMD_BUFSZ), + SOC15_REG_ENTRY_STR(GC, 0, mmCP_CE_IB1_BASE_LO), + SOC15_REG_ENTRY_STR(GC, 0, mmCP_CE_IB1_BASE_HI), + SOC15_REG_ENTRY_STR(GC, 0, mmCP_CE_IB1_BUFSZ), + SOC15_REG_ENTRY_STR(GC, 0, mmCP_CE_IB2_BASE_LO), + SOC15_REG_ENTRY_STR(GC, 0, mmCP_CE_IB2_BASE_HI), + SOC15_REG_ENTRY_STR(GC, 0, mmCP_CE_IB2_BUFSZ), + SOC15_REG_ENTRY_STR(GC, 0, mmCP_IB1_BASE_LO), + SOC15_REG_ENTRY_STR(GC, 0, mmCP_IB1_BASE_HI), + SOC15_REG_ENTRY_STR(GC, 0, mmCP_IB1_BUFSZ), + SOC15_REG_ENTRY_STR(GC, 0, mmCP_IB2_BASE_LO), + SOC15_REG_ENTRY_STR(GC, 0, mmCP_IB2_BASE_HI), + SOC15_REG_ENTRY_STR(GC, 0, mmCP_IB2_BUFSZ), + SOC15_REG_ENTRY_STR(GC, 0, mmCPF_UTCL1_STATUS), + SOC15_REG_ENTRY_STR(GC, 0, mmCPC_UTCL1_STATUS), + SOC15_REG_ENTRY_STR(GC, 0, mmCPG_UTCL1_STATUS), + SOC15_REG_ENTRY_STR(GC, 0, mmGDS_PROTECTION_FAULT), + SOC15_REG_ENTRY_STR(GC, 0, mmGDS_VM_PROTECTION_FAULT), + SOC15_REG_ENTRY_STR(GC, 0, mmIA_UTCL1_STATUS), + SOC15_REG_ENTRY_STR(GC, 0, mmIA_UTCL1_CNTL), + SOC15_REG_ENTRY_STR(GC, 0, mmPA_CL_CNTL_STATUS), + SOC15_REG_ENTRY_STR(GC, 0, mmRLC_UTCL1_STATUS), + SOC15_REG_ENTRY_STR(GC, 0, mmRMI_UTCL1_STATUS), + SOC15_REG_ENTRY_STR(GC, 0, mmSQC_DCACHE_UTCL1_STATUS), + SOC15_REG_ENTRY_STR(GC, 0, mmSQC_ICACHE_UTCL1_STATUS), + SOC15_REG_ENTRY_STR(GC, 0, mmSQ_UTCL1_STATUS), + SOC15_REG_ENTRY_STR(GC, 0, mmTCP_UTCL1_STATUS), + SOC15_REG_ENTRY_STR(GC, 0, mmWD_UTCL1_STATUS), + SOC15_REG_ENTRY_STR(GC, 0, mmVM_L2_PROTECTION_FAULT_CNTL), + SOC15_REG_ENTRY_STR(GC, 0, mmVM_L2_PROTECTION_FAULT_STATUS), + SOC15_REG_ENTRY_STR(GC, 0, mmCP_DEBUG), + SOC15_REG_ENTRY_STR(GC, 0, mmCP_MEC_CNTL), + SOC15_REG_ENTRY_STR(GC, 0, mmCP_CE_INSTR_PNTR), + SOC15_REG_ENTRY_STR(GC, 0, mmCP_MEC1_INSTR_PNTR), + SOC15_REG_ENTRY_STR(GC, 0, mmCP_MEC2_INSTR_PNTR), + SOC15_REG_ENTRY_STR(GC, 0, mmCP_ME_INSTR_PNTR), + SOC15_REG_ENTRY_STR(GC, 0, mmCP_PFP_INSTR_PNTR), + SOC15_REG_ENTRY_STR(GC, 0, mmCP_CPC_STATUS), + SOC15_REG_ENTRY_STR(GC, 0, mmRLC_STAT), + SOC15_REG_ENTRY_STR(GC, 0, mmRLC_SMU_COMMAND), + SOC15_REG_ENTRY_STR(GC, 0, mmRLC_SMU_MESSAGE), + SOC15_REG_ENTRY_STR(GC, 0, mmRLC_SMU_ARGUMENT_1), + SOC15_REG_ENTRY_STR(GC, 0, mmRLC_SMU_ARGUMENT_2), + SOC15_REG_ENTRY_STR(GC, 0, mmSMU_RLC_RESPONSE), + SOC15_REG_ENTRY_STR(GC, 0, mmRLC_SAFE_MODE), + SOC15_REG_ENTRY_STR(GC, 0, mmRLC_SMU_SAFE_MODE), + SOC15_REG_ENTRY_STR(GC, 0, mmRLC_INT_STAT), + SOC15_REG_ENTRY_STR(GC, 0, mmRLC_GPM_GENERAL_6), + /* cp header registers */ + SOC15_REG_ENTRY_STR(GC, 0, mmCP_CE_HEADER_DUMP), + SOC15_REG_ENTRY_STR(GC, 0, mmCP_MEC_ME1_HEADER_DUMP), + SOC15_REG_ENTRY_STR(GC, 0, mmCP_MEC_ME2_HEADER_DUMP), + SOC15_REG_ENTRY_STR(GC, 0, mmCP_PFP_HEADER_DUMP), + SOC15_REG_ENTRY_STR(GC, 0, mmCP_ME_HEADER_DUMP), + /* SE status registers */ + SOC15_REG_ENTRY_STR(GC, 0, mmGRBM_STATUS_SE0), + SOC15_REG_ENTRY_STR(GC, 0, mmGRBM_STATUS_SE1), + SOC15_REG_ENTRY_STR(GC, 0, mmGRBM_STATUS_SE2), + SOC15_REG_ENTRY_STR(GC, 0, mmGRBM_STATUS_SE3) +}; + +static const struct amdgpu_hwip_reg_entry gc_cp_reg_list_9[] = { + /* compute queue registers */ + SOC15_REG_ENTRY_STR(GC, 0, mmCP_HQD_VMID), + SOC15_REG_ENTRY_STR(GC, 0, mmCP_HQD_ACTIVE), + SOC15_REG_ENTRY_STR(GC, 0, mmCP_HQD_PERSISTENT_STATE), + SOC15_REG_ENTRY_STR(GC, 0, mmCP_HQD_PIPE_PRIORITY), + SOC15_REG_ENTRY_STR(GC, 0, mmCP_HQD_QUEUE_PRIORITY), + SOC15_REG_ENTRY_STR(GC, 0, mmCP_HQD_QUANTUM), + SOC15_REG_ENTRY_STR(GC, 0, mmCP_HQD_PQ_BASE), + SOC15_REG_ENTRY_STR(GC, 0, mmCP_HQD_PQ_BASE_HI), + SOC15_REG_ENTRY_STR(GC, 0, mmCP_HQD_PQ_RPTR), + SOC15_REG_ENTRY_STR(GC, 0, mmCP_HQD_PQ_WPTR_POLL_ADDR), + SOC15_REG_ENTRY_STR(GC, 0, mmCP_HQD_PQ_WPTR_POLL_ADDR_HI), + SOC15_REG_ENTRY_STR(GC, 0, mmCP_HQD_PQ_DOORBELL_CONTROL), + SOC15_REG_ENTRY_STR(GC, 0, mmCP_HQD_PQ_CONTROL), + SOC15_REG_ENTRY_STR(GC, 0, mmCP_HQD_IB_BASE_ADDR), + SOC15_REG_ENTRY_STR(GC, 0, mmCP_HQD_IB_BASE_ADDR_HI), + SOC15_REG_ENTRY_STR(GC, 0, mmCP_HQD_IB_RPTR), + SOC15_REG_ENTRY_STR(GC, 0, mmCP_HQD_IB_CONTROL), + SOC15_REG_ENTRY_STR(GC, 0, mmCP_HQD_DEQUEUE_REQUEST), + SOC15_REG_ENTRY_STR(GC, 0, mmCP_HQD_EOP_BASE_ADDR), + SOC15_REG_ENTRY_STR(GC, 0, mmCP_HQD_EOP_BASE_ADDR_HI), + SOC15_REG_ENTRY_STR(GC, 0, mmCP_HQD_EOP_CONTROL), + SOC15_REG_ENTRY_STR(GC, 0, mmCP_HQD_EOP_RPTR), + SOC15_REG_ENTRY_STR(GC, 0, mmCP_HQD_EOP_WPTR), + SOC15_REG_ENTRY_STR(GC, 0, mmCP_HQD_EOP_EVENTS), + SOC15_REG_ENTRY_STR(GC, 0, mmCP_HQD_CTX_SAVE_BASE_ADDR_LO), + SOC15_REG_ENTRY_STR(GC, 0, mmCP_HQD_CTX_SAVE_BASE_ADDR_HI), + SOC15_REG_ENTRY_STR(GC, 0, mmCP_HQD_CTX_SAVE_CONTROL), + SOC15_REG_ENTRY_STR(GC, 0, mmCP_HQD_CNTL_STACK_OFFSET), + SOC15_REG_ENTRY_STR(GC, 0, mmCP_HQD_CNTL_STACK_SIZE), + SOC15_REG_ENTRY_STR(GC, 0, mmCP_HQD_WG_STATE_OFFSET), + SOC15_REG_ENTRY_STR(GC, 0, mmCP_HQD_CTX_SAVE_SIZE), + SOC15_REG_ENTRY_STR(GC, 0, mmCP_HQD_GDS_RESOURCE_STATE), + SOC15_REG_ENTRY_STR(GC, 0, mmCP_HQD_ERROR), + SOC15_REG_ENTRY_STR(GC, 0, mmCP_HQD_EOP_WPTR_MEM), + SOC15_REG_ENTRY_STR(GC, 0, mmCP_HQD_PQ_WPTR_LO), + SOC15_REG_ENTRY_STR(GC, 0, mmCP_HQD_PQ_WPTR_HI), + SOC15_REG_ENTRY_STR(GC, 0, mmCP_HQD_GFX_STATUS), +}; + enum ta_ras_gfx_subblock { /*CPC*/ TA_RAS_BLOCK__GFX_CPC_INDEX_START = 0, @@ -1249,23 +1378,22 @@ static void gfx_v9_0_check_if_need_gfxoff(struct amdgpu_device *adev) static int gfx_v9_0_init_cp_gfx_microcode(struct amdgpu_device *adev, char *chip_name) { - char fw_name[30]; int err; - snprintf(fw_name, sizeof(fw_name), "amdgpu/%s_pfp.bin", chip_name); - err = amdgpu_ucode_request(adev, &adev->gfx.pfp_fw, fw_name); + err = amdgpu_ucode_request(adev, &adev->gfx.pfp_fw, + "amdgpu/%s_pfp.bin", chip_name); if (err) goto out; amdgpu_gfx_cp_init_microcode(adev, AMDGPU_UCODE_ID_CP_PFP); - snprintf(fw_name, sizeof(fw_name), "amdgpu/%s_me.bin", chip_name); - err = amdgpu_ucode_request(adev, &adev->gfx.me_fw, fw_name); + err = amdgpu_ucode_request(adev, &adev->gfx.me_fw, + "amdgpu/%s_me.bin", chip_name); if (err) goto out; amdgpu_gfx_cp_init_microcode(adev, AMDGPU_UCODE_ID_CP_ME); - snprintf(fw_name, sizeof(fw_name), "amdgpu/%s_ce.bin", chip_name); - err = amdgpu_ucode_request(adev, &adev->gfx.ce_fw, fw_name); + err = amdgpu_ucode_request(adev, &adev->gfx.ce_fw, + "amdgpu/%s_ce.bin", chip_name); if (err) goto out; amdgpu_gfx_cp_init_microcode(adev, AMDGPU_UCODE_ID_CP_CE); @@ -1282,7 +1410,6 @@ out: static int gfx_v9_0_init_rlc_microcode(struct amdgpu_device *adev, char *chip_name) { - char fw_name[30]; int err; const struct rlc_firmware_header_v2_0 *rlc_hdr; uint16_t version_major; @@ -1300,20 +1427,22 @@ static int gfx_v9_0_init_rlc_microcode(struct amdgpu_device *adev, if (!strcmp(chip_name, "picasso") && (((adev->pdev->revision >= 0xC8) && (adev->pdev->revision <= 0xCF)) || ((adev->pdev->revision >= 0xD8) && (adev->pdev->revision <= 0xDF)))) - snprintf(fw_name, sizeof(fw_name), "amdgpu/%s_rlc_am4.bin", chip_name); + err = amdgpu_ucode_request(adev, &adev->gfx.rlc_fw, + "amdgpu/%s_rlc_am4.bin", chip_name); else if (!strcmp(chip_name, "raven") && (amdgpu_pm_load_smu_firmware(adev, &smu_version) == 0) && (smu_version >= 0x41e2b)) /** *SMC is loaded by SBIOS on APU and it's able to get the SMU version directly. */ - snprintf(fw_name, sizeof(fw_name), "amdgpu/%s_kicker_rlc.bin", chip_name); + err = amdgpu_ucode_request(adev, &adev->gfx.rlc_fw, + "amdgpu/%s_kicker_rlc.bin", chip_name); else - snprintf(fw_name, sizeof(fw_name), "amdgpu/%s_rlc.bin", chip_name); - err = amdgpu_ucode_request(adev, &adev->gfx.rlc_fw, fw_name); + err = amdgpu_ucode_request(adev, &adev->gfx.rlc_fw, + "amdgpu/%s_rlc.bin", chip_name); if (err) goto out; - rlc_hdr = (const struct rlc_firmware_header_v2_0 *)adev->gfx.rlc_fw->data; + rlc_hdr = (const struct rlc_firmware_header_v2_0 *)adev->gfx.rlc_fw->data; version_major = le16_to_cpu(rlc_hdr->header.header_version_major); version_minor = le16_to_cpu(rlc_hdr->header.header_version_minor); err = amdgpu_gfx_rlc_init_microcode(adev, version_major, version_minor); @@ -1337,28 +1466,27 @@ static bool gfx_v9_0_load_mec2_fw_bin_support(struct amdgpu_device *adev) static int gfx_v9_0_init_cp_compute_microcode(struct amdgpu_device *adev, char *chip_name) { - char fw_name[30]; int err; if (amdgpu_sriov_vf(adev) && (adev->asic_type == CHIP_ALDEBARAN)) - snprintf(fw_name, sizeof(fw_name), "amdgpu/%s_sjt_mec.bin", chip_name); + err = amdgpu_ucode_request(adev, &adev->gfx.mec_fw, + "amdgpu/%s_sjt_mec.bin", chip_name); else - snprintf(fw_name, sizeof(fw_name), "amdgpu/%s_mec.bin", chip_name); - - err = amdgpu_ucode_request(adev, &adev->gfx.mec_fw, fw_name); + err = amdgpu_ucode_request(adev, &adev->gfx.mec_fw, + "amdgpu/%s_mec.bin", chip_name); if (err) goto out; + amdgpu_gfx_cp_init_microcode(adev, AMDGPU_UCODE_ID_CP_MEC1); amdgpu_gfx_cp_init_microcode(adev, AMDGPU_UCODE_ID_CP_MEC1_JT); if (gfx_v9_0_load_mec2_fw_bin_support(adev)) { if (amdgpu_sriov_vf(adev) && (adev->asic_type == CHIP_ALDEBARAN)) - snprintf(fw_name, sizeof(fw_name), "amdgpu/%s_sjt_mec2.bin", chip_name); + err = amdgpu_ucode_request(adev, &adev->gfx.mec2_fw, + "amdgpu/%s_sjt_mec2.bin", chip_name); else - snprintf(fw_name, sizeof(fw_name), "amdgpu/%s_mec2.bin", chip_name); - - /* ignore failures to load */ - err = amdgpu_ucode_request(adev, &adev->gfx.mec2_fw, fw_name); + err = amdgpu_ucode_request(adev, &adev->gfx.mec2_fw, + "amdgpu/%s_mec2.bin", chip_name); if (!err) { amdgpu_gfx_cp_init_microcode(adev, AMDGPU_UCODE_ID_CP_MEC2); amdgpu_gfx_cp_init_microcode(adev, AMDGPU_UCODE_ID_CP_MEC2_JT); @@ -1994,6 +2122,34 @@ static int gfx_v9_0_compute_ring_init(struct amdgpu_device *adev, int ring_id, hw_prio, NULL); } +static void gfx_v9_0_alloc_ip_dump(struct amdgpu_device *adev) +{ + uint32_t reg_count = ARRAY_SIZE(gc_reg_list_9); + uint32_t *ptr; + uint32_t inst; + + ptr = kcalloc(reg_count, sizeof(uint32_t), GFP_KERNEL); + if (ptr == NULL) { + DRM_ERROR("Failed to allocate memory for GFX IP Dump\n"); + adev->gfx.ip_dump_core = NULL; + } else { + adev->gfx.ip_dump_core = ptr; + } + + /* Allocate memory for compute queue registers for all the instances */ + reg_count = ARRAY_SIZE(gc_cp_reg_list_9); + inst = adev->gfx.mec.num_mec * adev->gfx.mec.num_pipe_per_mec * + adev->gfx.mec.num_queue_per_pipe; + + ptr = kcalloc(reg_count * inst, sizeof(uint32_t), GFP_KERNEL); + if (ptr == NULL) { + DRM_ERROR("Failed to allocate memory for Compute Queues IP Dump\n"); + adev->gfx.ip_dump_compute_queues = NULL; + } else { + adev->gfx.ip_dump_compute_queues = ptr; + } +} + static int gfx_v9_0_sw_init(void *handle) { int i, j, k, r, ring_id; @@ -2171,6 +2327,8 @@ static int gfx_v9_0_sw_init(void *handle) return -EINVAL; } + gfx_v9_0_alloc_ip_dump(adev); + return 0; } @@ -2206,6 +2364,9 @@ static int gfx_v9_0_sw_fini(void *handle) } gfx_v9_0_free_microcode(adev); + kfree(adev->gfx.ip_dump_core); + kfree(adev->gfx.ip_dump_compute_queues); + return 0; } @@ -6840,6 +7001,88 @@ static void gfx_v9_0_emit_wave_limit(struct amdgpu_ring *ring, bool enable) } } +static void gfx_v9_ip_print(void *handle, struct drm_printer *p) +{ + struct amdgpu_device *adev = (struct amdgpu_device *)handle; + uint32_t i, j, k, reg, index = 0; + uint32_t reg_count = ARRAY_SIZE(gc_reg_list_9); + + if (!adev->gfx.ip_dump_core) + return; + + for (i = 0; i < reg_count; i++) + drm_printf(p, "%-50s \t 0x%08x\n", + gc_reg_list_9[i].reg_name, + adev->gfx.ip_dump_core[i]); + + /* print compute queue registers for all instances */ + if (!adev->gfx.ip_dump_compute_queues) + return; + + reg_count = ARRAY_SIZE(gc_cp_reg_list_9); + drm_printf(p, "\nnum_mec: %d num_pipe: %d num_queue: %d\n", + adev->gfx.mec.num_mec, + adev->gfx.mec.num_pipe_per_mec, + adev->gfx.mec.num_queue_per_pipe); + + for (i = 0; i < adev->gfx.mec.num_mec; i++) { + for (j = 0; j < adev->gfx.mec.num_pipe_per_mec; j++) { + for (k = 0; k < adev->gfx.mec.num_queue_per_pipe; k++) { + drm_printf(p, "\nmec %d, pipe %d, queue %d\n", i, j, k); + for (reg = 0; reg < reg_count; reg++) { + drm_printf(p, "%-50s \t 0x%08x\n", + gc_cp_reg_list_9[reg].reg_name, + adev->gfx.ip_dump_compute_queues[index + reg]); + } + index += reg_count; + } + } + } + +} + +static void gfx_v9_ip_dump(void *handle) +{ + struct amdgpu_device *adev = (struct amdgpu_device *)handle; + uint32_t i, j, k, reg, index = 0; + uint32_t reg_count = ARRAY_SIZE(gc_reg_list_9); + + if (!adev->gfx.ip_dump_core || !adev->gfx.num_gfx_rings) + return; + + amdgpu_gfx_off_ctrl(adev, false); + for (i = 0; i < reg_count; i++) + adev->gfx.ip_dump_core[i] = RREG32(SOC15_REG_ENTRY_OFFSET(gc_reg_list_9[i])); + amdgpu_gfx_off_ctrl(adev, true); + + /* dump compute queue registers for all instances */ + if (!adev->gfx.ip_dump_compute_queues) + return; + + reg_count = ARRAY_SIZE(gc_cp_reg_list_9); + amdgpu_gfx_off_ctrl(adev, false); + mutex_lock(&adev->srbm_mutex); + for (i = 0; i < adev->gfx.mec.num_mec; i++) { + for (j = 0; j < adev->gfx.mec.num_pipe_per_mec; j++) { + for (k = 0; k < adev->gfx.mec.num_queue_per_pipe; k++) { + /* ME0 is for GFX so start from 1 for CP */ + soc15_grbm_select(adev, 1 + i, j, k, 0, 0); + + for (reg = 0; reg < reg_count; reg++) { + adev->gfx.ip_dump_compute_queues[index + reg] = + RREG32(SOC15_REG_ENTRY_OFFSET( + gc_cp_reg_list_9[reg])); + } + index += reg_count; + } + } + } + soc15_grbm_select(adev, 0, 0, 0, 0, 0); + mutex_unlock(&adev->srbm_mutex); + amdgpu_gfx_off_ctrl(adev, true); + +} + static const struct amd_ip_funcs gfx_v9_0_ip_funcs = { .name = "gfx_v9_0", .early_init = gfx_v9_0_early_init, @@ -6856,6 +7099,8 @@ static const struct amd_ip_funcs gfx_v9_0_ip_funcs = { .set_clockgating_state = gfx_v9_0_set_clockgating_state, .set_powergating_state = gfx_v9_0_set_powergating_state, .get_clockgating_state = gfx_v9_0_get_clockgating_state, + .dump_ip_state = gfx_v9_ip_dump, + .print_ip_state = gfx_v9_ip_print, }; static const struct amdgpu_ring_funcs gfx_v9_0_ring_funcs_gfx = { @@ -6981,7 +7226,6 @@ static const struct amdgpu_ring_funcs gfx_v9_0_ring_funcs_compute = { 7 + /* gfx_v9_0_ring_emit_pipeline_sync */ SOC15_FLUSH_GPU_TLB_NUM_WREG * 5 + SOC15_FLUSH_GPU_TLB_NUM_REG_WAIT * 7 + - 2 + /* gfx_v9_0_ring_emit_vm_flush */ 8 + 8 + 8 + /* gfx_v9_0_ring_emit_fence x3 for user fence, vm fence */ 7 + /* gfx_v9_0_emit_mem_sync */ 5 + /* gfx_v9_0_emit_wave_limit for updating mmSPI_WCL_PIPE_PERCENT_GFX register */ @@ -7019,7 +7263,6 @@ static const struct amdgpu_ring_funcs gfx_v9_0_ring_funcs_kiq = { 7 + /* gfx_v9_0_ring_emit_pipeline_sync */ SOC15_FLUSH_GPU_TLB_NUM_WREG * 5 + SOC15_FLUSH_GPU_TLB_NUM_REG_WAIT * 7 + - 2 + /* gfx_v9_0_ring_emit_vm_flush */ 8 + 8 + 8, /* gfx_v9_0_ring_emit_fence_kiq x3 for user fence, vm fence */ .emit_ib_size = 7, /* gfx_v9_0_ring_emit_ib_compute */ .emit_fence = gfx_v9_0_ring_emit_fence_kiq, diff --git a/drivers/gpu/drm/amd/amdgpu/gfx_v9_4_2.c b/drivers/gpu/drm/amd/amdgpu/gfx_v9_4_2.c index 065b2bd5f5a6..3f4fd2f08163 100644 --- a/drivers/gpu/drm/amd/amdgpu/gfx_v9_4_2.c +++ b/drivers/gpu/drm/amd/amdgpu/gfx_v9_4_2.c @@ -1909,18 +1909,7 @@ static void gfx_v9_4_2_reset_sq_timeout_status(struct amdgpu_device *adev) mutex_unlock(&adev->grbm_idx_mutex); } -static bool gfx_v9_4_2_query_uctl2_poison_status(struct amdgpu_device *adev) -{ - u32 status = 0; - struct amdgpu_vmhub *hub; - - hub = &adev->vmhub[AMDGPU_GFXHUB(0)]; - status = RREG32(hub->vm_l2_pro_fault_status); - /* reset page fault status */ - WREG32_P(hub->vm_l2_pro_fault_cntl, 1, ~1); - return REG_GET_FIELD(status, VM_L2_PROTECTION_FAULT_STATUS, FED); -} struct amdgpu_ras_block_hw_ops gfx_v9_4_2_ras_ops = { .query_ras_error_count = &gfx_v9_4_2_query_ras_error_count, @@ -1934,5 +1923,4 @@ struct amdgpu_gfx_ras gfx_v9_4_2_ras = { .hw_ops = &gfx_v9_4_2_ras_ops, }, .enable_watchdog_timer = &gfx_v9_4_2_enable_watchdog_timer, - .query_utcl2_poison_status = gfx_v9_4_2_query_uctl2_poison_status, }; diff --git a/drivers/gpu/drm/amd/amdgpu/gfx_v9_4_3.c b/drivers/gpu/drm/amd/amdgpu/gfx_v9_4_3.c index b53c8fd4e8cf..20ea6cb01edf 100644 --- a/drivers/gpu/drm/amd/amdgpu/gfx_v9_4_3.c +++ b/drivers/gpu/drm/amd/amdgpu/gfx_v9_4_3.c @@ -41,7 +41,9 @@ #include "amdgpu_aca.h" MODULE_FIRMWARE("amdgpu/gc_9_4_3_mec.bin"); +MODULE_FIRMWARE("amdgpu/gc_9_4_4_mec.bin"); MODULE_FIRMWARE("amdgpu/gc_9_4_3_rlc.bin"); +MODULE_FIRMWARE("amdgpu/gc_9_4_4_rlc.bin"); #define GFX9_MEC_HPD_SIZE 4096 #define RLCG_UCODE_LOADING_START_ADDRESS 0x00002000L @@ -53,6 +55,14 @@ MODULE_FIRMWARE("amdgpu/gc_9_4_3_rlc.bin"); #define mmSMNAID_XCD1_MCA_SMU 0x38430400 /* SMN AID XCD1 */ #define mmSMNXCD_XCD0_MCA_SMU 0x40430400 /* SMN XCD XCD0 */ +#define XCC_REG_RANGE_0_LOW 0x2000 /* XCC gfxdec0 lower Bound */ +#define XCC_REG_RANGE_0_HIGH 0x3400 /* XCC gfxdec0 upper Bound */ +#define XCC_REG_RANGE_1_LOW 0xA000 /* XCC gfxdec1 lower Bound */ +#define XCC_REG_RANGE_1_HIGH 0x10000 /* XCC gfxdec1 upper Bound */ + +#define NORMALIZE_XCC_REG_OFFSET(offset) \ + (offset & 0xFFFF) + struct amdgpu_gfx_ras gfx_v9_4_3_ras; static void gfx_v9_4_3_set_ring_funcs(struct amdgpu_device *adev); @@ -215,9 +225,24 @@ static void gfx_v9_4_3_init_golden_registers(struct amdgpu_device *adev) } } +static uint32_t gfx_v9_4_3_normalize_xcc_reg_offset(uint32_t reg) +{ + uint32_t normalized_reg = NORMALIZE_XCC_REG_OFFSET(reg); + + /* If it is an XCC reg, normalize the reg to keep + lower 16 bits in local xcc */ + + if (((normalized_reg >= XCC_REG_RANGE_0_LOW) && (normalized_reg < XCC_REG_RANGE_0_HIGH)) || + ((normalized_reg >= XCC_REG_RANGE_1_LOW) && (normalized_reg < XCC_REG_RANGE_1_HIGH))) + return normalized_reg; + else + return reg; +} + static void gfx_v9_4_3_write_data_to_reg(struct amdgpu_ring *ring, int eng_sel, bool wc, uint32_t reg, uint32_t val) { + reg = gfx_v9_4_3_normalize_xcc_reg_offset(reg); amdgpu_ring_write(ring, PACKET3(PACKET3_WRITE_DATA, 3)); amdgpu_ring_write(ring, WRITE_DATA_ENGINE_SEL(eng_sel) | WRITE_DATA_DST_SEL(0) | @@ -232,6 +257,12 @@ static void gfx_v9_4_3_wait_reg_mem(struct amdgpu_ring *ring, int eng_sel, uint32_t addr1, uint32_t ref, uint32_t mask, uint32_t inv) { + /* Only do the normalization on regspace */ + if (mem_space == 0) { + addr0 = gfx_v9_4_3_normalize_xcc_reg_offset(addr0); + addr1 = gfx_v9_4_3_normalize_xcc_reg_offset(addr1); + } + amdgpu_ring_write(ring, PACKET3(PACKET3_WAIT_REG_MEM, 5)); amdgpu_ring_write(ring, /* memory (1) or register (0) */ @@ -370,15 +401,14 @@ static void gfx_v9_4_3_free_microcode(struct amdgpu_device *adev) static int gfx_v9_4_3_init_rlc_microcode(struct amdgpu_device *adev, const char *chip_name) { - char fw_name[30]; int err; const struct rlc_firmware_header_v2_0 *rlc_hdr; uint16_t version_major; uint16_t version_minor; - snprintf(fw_name, sizeof(fw_name), "amdgpu/%s_rlc.bin", chip_name); - err = amdgpu_ucode_request(adev, &adev->gfx.rlc_fw, fw_name); + err = amdgpu_ucode_request(adev, &adev->gfx.rlc_fw, + "amdgpu/%s_rlc.bin", chip_name); if (err) goto out; rlc_hdr = (const struct rlc_firmware_header_v2_0 *)adev->gfx.rlc_fw->data; @@ -407,12 +437,10 @@ static void gfx_v9_4_3_check_if_need_gfxoff(struct amdgpu_device *adev) static int gfx_v9_4_3_init_cp_compute_microcode(struct amdgpu_device *adev, const char *chip_name) { - char fw_name[30]; int err; - snprintf(fw_name, sizeof(fw_name), "amdgpu/%s_mec.bin", chip_name); - - err = amdgpu_ucode_request(adev, &adev->gfx.mec_fw, fw_name); + err = amdgpu_ucode_request(adev, &adev->gfx.mec_fw, + "amdgpu/%s_mec.bin", chip_name); if (err) goto out; amdgpu_gfx_cp_init_microcode(adev, AMDGPU_UCODE_ID_CP_MEC1); @@ -431,16 +459,16 @@ out: static int gfx_v9_4_3_init_microcode(struct amdgpu_device *adev) { - const char *chip_name; + char ucode_prefix[15]; int r; - chip_name = "gc_9_4_3"; + amdgpu_ucode_ip_version_decode(adev, GC_HWIP, ucode_prefix, sizeof(ucode_prefix)); - r = gfx_v9_4_3_init_rlc_microcode(adev, chip_name); + r = gfx_v9_4_3_init_rlc_microcode(adev, ucode_prefix); if (r) return r; - r = gfx_v9_4_3_init_cp_compute_microcode(adev, chip_name); + r = gfx_v9_4_3_init_cp_compute_microcode(adev, ucode_prefix); if (r) return r; @@ -624,6 +652,15 @@ static void gfx_v9_4_3_select_me_pipe_q(struct amdgpu_device *adev, soc15_grbm_select(adev, me, pipe, q, vm, GET_INST(GC, xcc_id)); } +static int gfx_v9_4_3_get_xccs_per_xcp(struct amdgpu_device *adev) +{ + u32 xcp_ctl; + + /* Value is expected to be the same on all, fetch from first instance */ + xcp_ctl = RREG32_SOC15(GC, GET_INST(GC, 0), regCP_HYP_XCP_CTL); + + return REG_GET_FIELD(xcp_ctl, CP_HYP_XCP_CTL, NUM_XCC_IN_XCP); +} static int gfx_v9_4_3_switch_compute_partition(struct amdgpu_device *adev, int num_xccs_per_xcp) @@ -678,40 +715,47 @@ static const struct amdgpu_gfx_funcs gfx_v9_4_3_gfx_funcs = { .select_me_pipe_q = &gfx_v9_4_3_select_me_pipe_q, .switch_partition_mode = &gfx_v9_4_3_switch_compute_partition, .ih_node_to_logical_xcc = &gfx_v9_4_3_ih_to_xcc_inst, + .get_xccs_per_xcp = &gfx_v9_4_3_get_xccs_per_xcp, }; -static int gfx_v9_4_3_aca_bank_generate_report(struct aca_handle *handle, - struct aca_bank *bank, enum aca_error_type type, - struct aca_bank_report *report, void *data) +static int gfx_v9_4_3_aca_bank_parser(struct aca_handle *handle, + struct aca_bank *bank, enum aca_smu_type type, + void *data) { - u64 status, misc0; + struct aca_bank_info info; + u64 misc0; u32 instlo; int ret; - status = bank->regs[ACA_REG_IDX_STATUS]; - if ((type == ACA_ERROR_TYPE_UE && - ACA_REG__STATUS__ERRORCODEEXT(status) == ACA_EXTERROR_CODE_FAULT) || - (type == ACA_ERROR_TYPE_CE && - ACA_REG__STATUS__ERRORCODEEXT(status) == ACA_EXTERROR_CODE_CE)) { + ret = aca_bank_info_decode(bank, &info); + if (ret) + return ret; - ret = aca_bank_info_decode(bank, &report->info); - if (ret) - return ret; + /* NOTE: overwrite info.die_id with xcd id for gfx */ + instlo = ACA_REG__IPID__INSTANCEIDLO(bank->regs[ACA_REG_IDX_IPID]); + instlo &= GENMASK(31, 1); + info.die_id = instlo == mmSMNAID_XCD0_MCA_SMU ? 0 : 1; - /* NOTE: overwrite info.die_id with xcd id for gfx */ - instlo = ACA_REG__IPID__INSTANCEIDLO(bank->regs[ACA_REG_IDX_IPID]); - instlo &= GENMASK(31, 1); - report->info.die_id = instlo == mmSMNAID_XCD0_MCA_SMU ? 0 : 1; + misc0 = bank->regs[ACA_REG_IDX_MISC0]; - misc0 = bank->regs[ACA_REG_IDX_MISC0]; - report->count[type] = ACA_REG__MISC0__ERRCNT(misc0); + switch (type) { + case ACA_SMU_TYPE_UE: + ret = aca_error_cache_log_bank_error(handle, &info, + ACA_ERROR_TYPE_UE, 1ULL); + break; + case ACA_SMU_TYPE_CE: + ret = aca_error_cache_log_bank_error(handle, &info, + ACA_ERROR_TYPE_CE, ACA_REG__MISC0__ERRCNT(misc0)); + break; + default: + return -EINVAL; } - return 0; + return ret; } static bool gfx_v9_4_3_aca_bank_is_valid(struct aca_handle *handle, struct aca_bank *bank, - enum aca_error_type type, void *data) + enum aca_smu_type type, void *data) { u32 instlo; @@ -730,7 +774,7 @@ static bool gfx_v9_4_3_aca_bank_is_valid(struct aca_handle *handle, struct aca_b } static const struct aca_bank_ops gfx_v9_4_3_aca_bank_ops = { - .aca_bank_generate_report = gfx_v9_4_3_aca_bank_generate_report, + .aca_bank_parser = gfx_v9_4_3_aca_bank_parser, .aca_bank_is_valid = gfx_v9_4_3_aca_bank_is_valid, }; @@ -749,6 +793,7 @@ static int gfx_v9_4_3_gpu_early_init(struct amdgpu_device *adev) switch (amdgpu_ip_version(adev, GC_HWIP, 0)) { case IP_VERSION(9, 4, 3): + case IP_VERSION(9, 4, 4): adev->gfx.config.max_hw_contexts = 8; adev->gfx.config.sc_prim_fifo_size_frontend = 0x20; adev->gfx.config.sc_prim_fifo_size_backend = 0x100; @@ -1386,21 +1431,23 @@ static int gfx_v9_4_3_rlc_resume(struct amdgpu_device *adev) static void gfx_v9_4_3_update_spm_vmid(struct amdgpu_device *adev, struct amdgpu_ring *ring, unsigned vmid) { - u32 reg, data; + u32 reg, pre_data, data; reg = SOC15_REG_OFFSET(GC, GET_INST(GC, 0), regRLC_SPM_MC_CNTL); - if (amdgpu_sriov_is_pp_one_vf(adev)) - data = RREG32_NO_KIQ(reg); + if (amdgpu_sriov_is_pp_one_vf(adev) && !amdgpu_sriov_runtime(adev)) + pre_data = RREG32_NO_KIQ(reg); else - data = RREG32(reg); + pre_data = RREG32(reg); - data &= ~RLC_SPM_MC_CNTL__RLC_SPM_VMID_MASK; + data = pre_data & (~RLC_SPM_MC_CNTL__RLC_SPM_VMID_MASK); data |= (vmid & RLC_SPM_MC_CNTL__RLC_SPM_VMID_MASK) << RLC_SPM_MC_CNTL__RLC_SPM_VMID__SHIFT; - if (amdgpu_sriov_is_pp_one_vf(adev)) - WREG32_SOC15_NO_KIQ(GC, GET_INST(GC, 0), regRLC_SPM_MC_CNTL, data); - else - WREG32_SOC15(GC, GET_INST(GC, 0), regRLC_SPM_MC_CNTL, data); + if (pre_data != data) { + if (amdgpu_sriov_is_pp_one_vf(adev) && !amdgpu_sriov_runtime(adev)) { + WREG32_SOC15_NO_KIQ(GC, GET_INST(GC, 0), regRLC_SPM_MC_CNTL, data); + } else + WREG32_SOC15(GC, GET_INST(GC, 0), regRLC_SPM_MC_CNTL, data); + } } static const struct soc15_reg_rlcg rlcg_access_gc_9_4_3[] = { @@ -1576,6 +1623,9 @@ static int gfx_v9_4_3_xcc_mqd_init(struct amdgpu_ring *ring, int xcc_id) DOORBELL_SOURCE, 0); tmp = REG_SET_FIELD(tmp, CP_HQD_PQ_DOORBELL_CONTROL, DOORBELL_HIT, 0); + if (amdgpu_sriov_vf(adev)) + tmp = REG_SET_FIELD(tmp, CP_HQD_PQ_DOORBELL_CONTROL, + DOORBELL_MODE, 1); } else { tmp = REG_SET_FIELD(tmp, CP_HQD_PQ_DOORBELL_CONTROL, DOORBELL_EN, 0); @@ -2010,18 +2060,31 @@ static int gfx_v9_4_3_xcc_cp_resume(struct amdgpu_device *adev, int xcc_id) static int gfx_v9_4_3_cp_resume(struct amdgpu_device *adev) { - int r = 0, i, num_xcc; + int r = 0, i, num_xcc, num_xcp, num_xcc_per_xcp; - if (amdgpu_xcp_query_partition_mode(adev->xcp_mgr, - AMDGPU_XCP_FL_NONE) == - AMDGPU_UNKNOWN_COMPUTE_PARTITION_MODE) - r = amdgpu_xcp_switch_partition_mode(adev->xcp_mgr, - amdgpu_user_partt_mode); + num_xcc = NUM_XCC(adev->gfx.xcc_mask); + if (amdgpu_sriov_vf(adev)) { + enum amdgpu_gfx_partition mode; + + mode = amdgpu_xcp_query_partition_mode(adev->xcp_mgr, + AMDGPU_XCP_FL_NONE); + if (mode == AMDGPU_UNKNOWN_COMPUTE_PARTITION_MODE) + return -EINVAL; + num_xcc_per_xcp = gfx_v9_4_3_get_xccs_per_xcp(adev); + adev->gfx.num_xcc_per_xcp = num_xcc_per_xcp; + num_xcp = num_xcc / num_xcc_per_xcp; + r = amdgpu_xcp_init(adev->xcp_mgr, num_xcp, mode); + } else { + if (amdgpu_xcp_query_partition_mode(adev->xcp_mgr, + AMDGPU_XCP_FL_NONE) == + AMDGPU_UNKNOWN_COMPUTE_PARTITION_MODE) + r = amdgpu_xcp_switch_partition_mode( + adev->xcp_mgr, amdgpu_user_partt_mode); + } if (r) return r; - num_xcc = NUM_XCC(adev->gfx.xcc_mask); for (i = 0; i < num_xcc; i++) { r = gfx_v9_4_3_xcc_cp_resume(adev, i); if (r) @@ -2398,10 +2461,10 @@ gfx_v9_4_3_xcc_update_coarse_grain_clock_gating(struct amdgpu_device *adev, if (def != data) WREG32_SOC15(GC, GET_INST(GC, xcc_id), regRLC_CGTT_MGCG_OVERRIDE, data); - /* enable cgcg FSM(0x0000363F) */ + /* CGCG Hysteresis: 400us */ def = RREG32_SOC15(GC, GET_INST(GC, xcc_id), regRLC_CGCG_CGLS_CTRL); - data = (0x36 + data = (0x2710 << RLC_CGCG_CGLS_CTRL__CGCG_GFX_IDLE_THRESHOLD__SHIFT) | RLC_CGCG_CGLS_CTRL__CGCG_EN_MASK; if (adev->cg_flags & AMD_CG_SUPPORT_GFX_CGLS) @@ -2410,10 +2473,10 @@ gfx_v9_4_3_xcc_update_coarse_grain_clock_gating(struct amdgpu_device *adev, if (def != data) WREG32_SOC15(GC, GET_INST(GC, xcc_id), regRLC_CGCG_CGLS_CTRL, data); - /* set IDLE_POLL_COUNT(0x00900100) */ + /* set IDLE_POLL_COUNT(0x33450100)*/ def = RREG32_SOC15(GC, GET_INST(GC, xcc_id), regCP_RB_WPTR_POLL_CNTL); data = (0x0100 << CP_RB_WPTR_POLL_CNTL__POLL_FREQUENCY__SHIFT) | - (0x0090 << CP_RB_WPTR_POLL_CNTL__IDLE_POLL_COUNT__SHIFT); + (0x3345 << CP_RB_WPTR_POLL_CNTL__IDLE_POLL_COUNT__SHIFT); if (def != data) WREG32_SOC15(GC, GET_INST(GC, xcc_id), regCP_RB_WPTR_POLL_CNTL, data); } else { @@ -2496,6 +2559,7 @@ static int gfx_v9_4_3_set_clockgating_state(void *handle, num_xcc = NUM_XCC(adev->gfx.xcc_mask); switch (amdgpu_ip_version(adev, GC_HWIP, 0)) { case IP_VERSION(9, 4, 3): + case IP_VERSION(9, 4, 4): for (i = 0; i < num_xcc; i++) gfx_v9_4_3_xcc_update_gfx_clock_gating( adev, state == AMD_CG_STATE_GATE, i); @@ -2716,6 +2780,8 @@ static void gfx_v9_4_3_ring_emit_rreg(struct amdgpu_ring *ring, uint32_t reg, { struct amdgpu_device *adev = ring->adev; + reg = gfx_v9_4_3_normalize_xcc_reg_offset(reg); + amdgpu_ring_write(ring, PACKET3(PACKET3_COPY_DATA, 4)); amdgpu_ring_write(ring, 0 | /* src: register*/ (5 << 8) | /* dst: memory */ @@ -2733,6 +2799,8 @@ static void gfx_v9_4_3_ring_emit_wreg(struct amdgpu_ring *ring, uint32_t reg, { uint32_t cmd = 0; + reg = gfx_v9_4_3_normalize_xcc_reg_offset(reg); + switch (ring->funcs->type) { case AMDGPU_RING_TYPE_GFX: cmd = WRITE_DATA_ENGINE_SEL(1) | WR_CONFIRM; @@ -4010,6 +4078,8 @@ static const struct amd_ip_funcs gfx_v9_4_3_ip_funcs = { .set_clockgating_state = gfx_v9_4_3_set_clockgating_state, .set_powergating_state = gfx_v9_4_3_set_powergating_state, .get_clockgating_state = gfx_v9_4_3_get_clockgating_state, + .dump_ip_state = NULL, + .print_ip_state = NULL, }; static const struct amdgpu_ring_funcs gfx_v9_4_3_ring_funcs_compute = { @@ -4130,6 +4200,7 @@ static void gfx_v9_4_3_set_gds_init(struct amdgpu_device *adev) /* init asci gds info */ switch (amdgpu_ip_version(adev, GC_HWIP, 0)) { case IP_VERSION(9, 4, 3): + case IP_VERSION(9, 4, 4): /* 9.4.3 removed all the GDS internal memory, * only support GWS opcode in kernel, like barrier * semaphore.etc */ @@ -4142,6 +4213,7 @@ static void gfx_v9_4_3_set_gds_init(struct amdgpu_device *adev) switch (amdgpu_ip_version(adev, GC_HWIP, 0)) { case IP_VERSION(9, 4, 3): + case IP_VERSION(9, 4, 4): /* deprecated for 9.4.3, no usage at all */ adev->gds.gds_compute_max_wave_id = 0; break; @@ -4187,9 +4259,10 @@ static u32 gfx_v9_4_3_get_cu_active_bitmap(struct amdgpu_device *adev, int xcc_i static int gfx_v9_4_3_get_cu_info(struct amdgpu_device *adev, struct amdgpu_cu_info *cu_info) { - int i, j, k, counter, xcc_id, active_cu_number = 0; - u32 mask, bitmap, ao_bitmap, ao_cu_mask = 0; + int i, j, k, prev_counter, counter, xcc_id, active_cu_number = 0; + u32 mask, bitmap, ao_bitmap, ao_cu_mask = 0, tmp; unsigned disable_masks[4 * 4]; + bool is_symmetric_cus; if (!adev || !cu_info) return -EINVAL; @@ -4207,6 +4280,7 @@ static int gfx_v9_4_3_get_cu_info(struct amdgpu_device *adev, mutex_lock(&adev->grbm_idx_mutex); for (xcc_id = 0; xcc_id < NUM_XCC(adev->gfx.xcc_mask); xcc_id++) { + is_symmetric_cus = true; for (i = 0; i < adev->gfx.config.max_shader_engines; i++) { for (j = 0; j < adev->gfx.config.max_sh_per_se; j++) { mask = 1; @@ -4234,6 +4308,15 @@ static int gfx_v9_4_3_get_cu_info(struct amdgpu_device *adev, ao_cu_mask |= (ao_bitmap << (i * 16 + j * 8)); cu_info->ao_cu_bitmap[i][j] = ao_bitmap; } + if (i && is_symmetric_cus && prev_counter != counter) + is_symmetric_cus = false; + prev_counter = counter; + } + if (is_symmetric_cus) { + tmp = RREG32_SOC15(GC, GET_INST(GC, xcc_id), regCP_CPC_DEBUG); + tmp = REG_SET_FIELD(tmp, CP_CPC_DEBUG, CPC_HARVESTING_RELAUNCH_DISABLE, 1); + tmp = REG_SET_FIELD(tmp, CP_CPC_DEBUG, CPC_HARVESTING_DISPATCH_DISABLE, 1); + WREG32_SOC15(GC, GET_INST(GC, xcc_id), regCP_CPC_DEBUG, tmp); } gfx_v9_4_3_xcc_select_se_sh(adev, 0xffffffff, 0xffffffff, 0xffffffff, xcc_id); diff --git a/drivers/gpu/drm/amd/amdgpu/gfxhub_v12_0.c b/drivers/gpu/drm/amd/amdgpu/gfxhub_v12_0.c new file mode 100644 index 000000000000..7609b9cecae8 --- /dev/null +++ b/drivers/gpu/drm/amd/amdgpu/gfxhub_v12_0.c @@ -0,0 +1,521 @@ +/* + * Copyright 2023 Advanced Micro Devices, Inc. + * + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and associated documentation files (the "Software"), + * to deal in the Software without restriction, including without limitation + * the rights to use, copy, modify, merge, publish, distribute, sublicense, + * and/or sell copies of the Software, and to permit persons to whom the + * Software is furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included in + * all copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL + * THE COPYRIGHT HOLDER(S) OR AUTHOR(S) BE LIABLE FOR ANY CLAIM, DAMAGES OR + * OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, + * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR + * OTHER DEALINGS IN THE SOFTWARE. + * + */ + +#include "amdgpu.h" +#include "gfxhub_v12_0.h" + +#include "gc/gc_12_0_0_offset.h" +#include "gc/gc_12_0_0_sh_mask.h" +#include "soc24_enum.h" +#include "soc15_common.h" + +#define regGCVM_L2_CNTL3_DEFAULT 0x80120007 +#define regGCVM_L2_CNTL4_DEFAULT 0x000000c1 +#define regGCVM_L2_CNTL5_DEFAULT 0x00003fe0 +#define regGRBM_GFX_INDEX_DEFAULT 0xe0000000 + +static const char *gfxhub_client_ids[] = { + "CB", + "DB", + "GE1", + "GE2", + "CPF", + "CPC", + "CPG", + "RLC", + "TCP", + "SQC (inst)", + "SQC (data)", + "SQG/PC/SC", + "Reserved", + "SDMA0", + "SDMA1", + "GCR", + "Reserved", + "Reserved", + "WGS", + "DSM", + "PA" +}; + +static uint32_t gfxhub_v12_0_get_invalidate_req(unsigned int vmid, + uint32_t flush_type) +{ + u32 req = 0; + + /* invalidate using legacy mode on vmid*/ + req = REG_SET_FIELD(req, GCVM_INVALIDATE_ENG0_REQ, + PER_VMID_INVALIDATE_REQ, 1 << vmid); + req = REG_SET_FIELD(req, GCVM_INVALIDATE_ENG0_REQ, FLUSH_TYPE, flush_type); + req = REG_SET_FIELD(req, GCVM_INVALIDATE_ENG0_REQ, INVALIDATE_L2_PTES, 1); + req = REG_SET_FIELD(req, GCVM_INVALIDATE_ENG0_REQ, INVALIDATE_L2_PDE0, 1); + req = REG_SET_FIELD(req, GCVM_INVALIDATE_ENG0_REQ, INVALIDATE_L2_PDE1, 1); + req = REG_SET_FIELD(req, GCVM_INVALIDATE_ENG0_REQ, INVALIDATE_L2_PDE2, 1); + req = REG_SET_FIELD(req, GCVM_INVALIDATE_ENG0_REQ, INVALIDATE_L1_PTES, 1); + req = REG_SET_FIELD(req, GCVM_INVALIDATE_ENG0_REQ, + CLEAR_PROTECTION_FAULT_STATUS_ADDR, 0); + + return req; +} + +static void +gfxhub_v12_0_print_l2_protection_fault_status(struct amdgpu_device *adev, + uint32_t status) +{ + u32 cid = REG_GET_FIELD(status, + GCVM_L2_PROTECTION_FAULT_STATUS_LO32, CID); + + dev_err(adev->dev, + "GCVM_L2_PROTECTION_FAULT_STATUS:0x%08X\n", + status); + dev_err(adev->dev, "\t Faulty UTCL2 client ID: %s (0x%x)\n", + cid >= ARRAY_SIZE(gfxhub_client_ids) ? "unknown" : gfxhub_client_ids[cid], + cid); + dev_err(adev->dev, "\t MORE_FAULTS: 0x%lx\n", + REG_GET_FIELD(status, + GCVM_L2_PROTECTION_FAULT_STATUS_LO32, MORE_FAULTS)); + dev_err(adev->dev, "\t WALKER_ERROR: 0x%lx\n", + REG_GET_FIELD(status, + GCVM_L2_PROTECTION_FAULT_STATUS_LO32, WALKER_ERROR)); + dev_err(adev->dev, "\t PERMISSION_FAULTS: 0x%lx\n", + REG_GET_FIELD(status, + GCVM_L2_PROTECTION_FAULT_STATUS_LO32, PERMISSION_FAULTS)); + dev_err(adev->dev, "\t MAPPING_ERROR: 0x%lx\n", + REG_GET_FIELD(status, + GCVM_L2_PROTECTION_FAULT_STATUS_LO32, MAPPING_ERROR)); + dev_err(adev->dev, "\t RW: 0x%lx\n", + REG_GET_FIELD(status, + GCVM_L2_PROTECTION_FAULT_STATUS_LO32, RW)); +} + +static u64 gfxhub_v12_0_get_fb_location(struct amdgpu_device *adev) +{ + u64 base = RREG32_SOC15(GC, 0, regGCMC_VM_FB_LOCATION_BASE); + + base &= GCMC_VM_FB_LOCATION_BASE__FB_BASE_MASK; + base <<= 24; + + return base; +} + +static u64 gfxhub_v12_0_get_mc_fb_offset(struct amdgpu_device *adev) +{ + return (u64)RREG32_SOC15(GC, 0, regGCMC_VM_FB_OFFSET) << 24; +} + +static void gfxhub_v12_0_setup_vm_pt_regs(struct amdgpu_device *adev, + uint32_t vmid, + uint64_t page_table_base) +{ + struct amdgpu_vmhub *hub = &adev->vmhub[AMDGPU_GFXHUB(0)]; + + WREG32_SOC15_OFFSET(GC, 0, regGCVM_CONTEXT0_PAGE_TABLE_BASE_ADDR_LO32, + hub->ctx_addr_distance * vmid, + lower_32_bits(page_table_base)); + + WREG32_SOC15_OFFSET(GC, 0, regGCVM_CONTEXT0_PAGE_TABLE_BASE_ADDR_HI32, + hub->ctx_addr_distance * vmid, + upper_32_bits(page_table_base)); +} + +static void gfxhub_v12_0_init_gart_aperture_regs(struct amdgpu_device *adev) +{ + uint64_t pt_base = amdgpu_gmc_pd_addr(adev->gart.bo); + + gfxhub_v12_0_setup_vm_pt_regs(adev, 0, pt_base); + + WREG32_SOC15(GC, 0, regGCVM_CONTEXT0_PAGE_TABLE_START_ADDR_LO32, + (u32)(adev->gmc.gart_start >> 12)); + WREG32_SOC15(GC, 0, regGCVM_CONTEXT0_PAGE_TABLE_START_ADDR_HI32, + (u32)(adev->gmc.gart_start >> 44)); + + WREG32_SOC15(GC, 0, regGCVM_CONTEXT0_PAGE_TABLE_END_ADDR_LO32, + (u32)(adev->gmc.gart_end >> 12)); + WREG32_SOC15(GC, 0, regGCVM_CONTEXT0_PAGE_TABLE_END_ADDR_HI32, + (u32)(adev->gmc.gart_end >> 44)); +} + +static void gfxhub_v12_0_init_system_aperture_regs(struct amdgpu_device *adev) +{ + uint64_t value; + + /* Program the AGP BAR */ + WREG32_SOC15(GC, 0, regGCMC_VM_AGP_BASE, 0); + WREG32_SOC15(GC, 0, regGCMC_VM_AGP_BOT, adev->gmc.agp_start >> 24); + WREG32_SOC15(GC, 0, regGCMC_VM_AGP_TOP, adev->gmc.agp_end >> 24); + + /* Program the system aperture low logical page number. */ + WREG32_SOC15(GC, 0, regGCMC_VM_SYSTEM_APERTURE_LOW_ADDR, + min(adev->gmc.fb_start, adev->gmc.agp_start) >> 18); + WREG32_SOC15(GC, 0, regGCMC_VM_SYSTEM_APERTURE_HIGH_ADDR, + max(adev->gmc.fb_end, adev->gmc.agp_end) >> 18); + + /* Set default page address. */ + value = adev->mem_scratch.gpu_addr - adev->gmc.vram_start + + adev->vm_manager.vram_base_offset; + WREG32_SOC15(GC, 0, regGCMC_VM_SYSTEM_APERTURE_DEFAULT_ADDR_LSB, + (u32)(value >> 12)); + WREG32_SOC15(GC, 0, regGCMC_VM_SYSTEM_APERTURE_DEFAULT_ADDR_MSB, + (u32)(value >> 44)); + + /* Program "protection fault". */ + WREG32_SOC15(GC, 0, regGCVM_L2_PROTECTION_FAULT_DEFAULT_ADDR_LO32, + (u32)(adev->dummy_page_addr >> 12)); + WREG32_SOC15(GC, 0, regGCVM_L2_PROTECTION_FAULT_DEFAULT_ADDR_HI32, + (u32)((u64)adev->dummy_page_addr >> 44)); + + WREG32_FIELD15_PREREG(GC, 0, GCVM_L2_PROTECTION_FAULT_CNTL2, + ACTIVE_PAGE_MIGRATION_PTE_READ_RETRY, 1); +} + + +static void gfxhub_v12_0_init_tlb_regs(struct amdgpu_device *adev) +{ + uint32_t tmp; + + /* Setup TLB control */ + tmp = RREG32_SOC15(GC, 0, regGCMC_VM_MX_L1_TLB_CNTL); + + tmp = REG_SET_FIELD(tmp, GCMC_VM_MX_L1_TLB_CNTL, ENABLE_L1_TLB, 1); + tmp = REG_SET_FIELD(tmp, GCMC_VM_MX_L1_TLB_CNTL, SYSTEM_ACCESS_MODE, 3); + tmp = REG_SET_FIELD(tmp, GCMC_VM_MX_L1_TLB_CNTL, + ENABLE_ADVANCED_DRIVER_MODEL, 1); + tmp = REG_SET_FIELD(tmp, GCMC_VM_MX_L1_TLB_CNTL, + SYSTEM_APERTURE_UNMAPPED_ACCESS, 0); + tmp = REG_SET_FIELD(tmp, GCMC_VM_MX_L1_TLB_CNTL, ECO_BITS, 0); + tmp = REG_SET_FIELD(tmp, GCMC_VM_MX_L1_TLB_CNTL, + MTYPE, MTYPE_UC); /* UC, uncached */ + + WREG32_SOC15(GC, 0, regGCMC_VM_MX_L1_TLB_CNTL, tmp); +} + +static void gfxhub_v12_0_init_cache_regs(struct amdgpu_device *adev) +{ + uint32_t tmp; + + /* These registers are not accessible to VF-SRIOV. + * The PF will program them instead. + */ + if (amdgpu_sriov_vf(adev)) + return; + + /* Setup L2 cache */ + tmp = RREG32_SOC15(GC, 0, regGCVM_L2_CNTL); + tmp = REG_SET_FIELD(tmp, GCVM_L2_CNTL, ENABLE_L2_CACHE, 1); + tmp = REG_SET_FIELD(tmp, GCVM_L2_CNTL, ENABLE_L2_FRAGMENT_PROCESSING, 0); + tmp = REG_SET_FIELD(tmp, GCVM_L2_CNTL, + ENABLE_DEFAULT_PAGE_OUT_TO_SYSTEM_MEMORY, 1); + /* XXX for emulation, Refer to closed source code.*/ + tmp = REG_SET_FIELD(tmp, GCVM_L2_CNTL, + L2_PDE0_CACHE_TAG_GENERATION_MODE, 0); + tmp = REG_SET_FIELD(tmp, GCVM_L2_CNTL, PDE_FAULT_CLASSIFICATION, 0); + tmp = REG_SET_FIELD(tmp, GCVM_L2_CNTL, CONTEXT1_IDENTITY_ACCESS_MODE, 1); + tmp = REG_SET_FIELD(tmp, GCVM_L2_CNTL, IDENTITY_MODE_FRAGMENT_SIZE, 0); + WREG32_SOC15(GC, 0, regGCVM_L2_CNTL, tmp); + + tmp = RREG32_SOC15(GC, 0, regGCVM_L2_CNTL2); + tmp = REG_SET_FIELD(tmp, GCVM_L2_CNTL2, INVALIDATE_ALL_L1_TLBS, 1); + tmp = REG_SET_FIELD(tmp, GCVM_L2_CNTL2, INVALIDATE_L2_CACHE, 1); + WREG32_SOC15(GC, 0, regGCVM_L2_CNTL2, tmp); + + tmp = regGCVM_L2_CNTL3_DEFAULT; + if (adev->gmc.translate_further) { + tmp = REG_SET_FIELD(tmp, GCVM_L2_CNTL3, BANK_SELECT, 12); + tmp = REG_SET_FIELD(tmp, GCVM_L2_CNTL3, + L2_CACHE_BIGK_FRAGMENT_SIZE, 9); + } else { + tmp = REG_SET_FIELD(tmp, GCVM_L2_CNTL3, BANK_SELECT, 9); + tmp = REG_SET_FIELD(tmp, GCVM_L2_CNTL3, + L2_CACHE_BIGK_FRAGMENT_SIZE, 6); + } + WREG32_SOC15(GC, 0, regGCVM_L2_CNTL3, tmp); + + tmp = regGCVM_L2_CNTL4_DEFAULT; + tmp = REG_SET_FIELD(tmp, GCVM_L2_CNTL4, VMC_TAP_PDE_REQUEST_PHYSICAL, 0); + tmp = REG_SET_FIELD(tmp, GCVM_L2_CNTL4, VMC_TAP_PTE_REQUEST_PHYSICAL, 0); + WREG32_SOC15(GC, 0, regGCVM_L2_CNTL4, tmp); + + tmp = regGCVM_L2_CNTL5_DEFAULT; + tmp = REG_SET_FIELD(tmp, GCVM_L2_CNTL5, L2_CACHE_SMALLK_FRAGMENT_SIZE, 0); + WREG32_SOC15(GC, 0, regGCVM_L2_CNTL5, tmp); +} + +static void gfxhub_v12_0_enable_system_domain(struct amdgpu_device *adev) +{ + uint32_t tmp; + + tmp = RREG32_SOC15(GC, 0, regGCVM_CONTEXT0_CNTL); + tmp = REG_SET_FIELD(tmp, GCVM_CONTEXT0_CNTL, ENABLE_CONTEXT, 1); + tmp = REG_SET_FIELD(tmp, GCVM_CONTEXT0_CNTL, PAGE_TABLE_DEPTH, 0); + tmp = REG_SET_FIELD(tmp, GCVM_CONTEXT0_CNTL, + RETRY_PERMISSION_OR_INVALID_PAGE_FAULT, 0); + WREG32_SOC15(GC, 0, regGCVM_CONTEXT0_CNTL, tmp); +} + +static void gfxhub_v12_0_disable_identity_aperture(struct amdgpu_device *adev) +{ + /* These registers are not accessible to VF-SRIOV. + * The PF will program them instead. + */ + if (amdgpu_sriov_vf(adev)) + return; + + WREG32_SOC15(GC, 0, regGCVM_L2_CONTEXT1_IDENTITY_APERTURE_LOW_ADDR_LO32, + 0xFFFFFFFF); + WREG32_SOC15(GC, 0, regGCVM_L2_CONTEXT1_IDENTITY_APERTURE_LOW_ADDR_HI32, + 0x0000000F); + + WREG32_SOC15(GC, 0, regGCVM_L2_CONTEXT1_IDENTITY_APERTURE_HIGH_ADDR_LO32, + 0); + WREG32_SOC15(GC, 0, regGCVM_L2_CONTEXT1_IDENTITY_APERTURE_HIGH_ADDR_HI32, + 0); + + WREG32_SOC15(GC, 0, regGCVM_L2_CONTEXT_IDENTITY_PHYSICAL_OFFSET_LO32, 0); + WREG32_SOC15(GC, 0, regGCVM_L2_CONTEXT_IDENTITY_PHYSICAL_OFFSET_HI32, 0); + +} + +static void gfxhub_v12_0_setup_vmid_config(struct amdgpu_device *adev) +{ + struct amdgpu_vmhub *hub = &adev->vmhub[AMDGPU_GFXHUB(0)]; + int i; + uint32_t tmp; + + for (i = 0; i <= 14; i++) { + tmp = RREG32_SOC15_OFFSET(GC, 0, regGCVM_CONTEXT1_CNTL, i); + tmp = REG_SET_FIELD(tmp, GCVM_CONTEXT1_CNTL, ENABLE_CONTEXT, 1); + tmp = REG_SET_FIELD(tmp, GCVM_CONTEXT1_CNTL, PAGE_TABLE_DEPTH, + adev->vm_manager.num_level); + tmp = REG_SET_FIELD(tmp, GCVM_CONTEXT1_CNTL, + RANGE_PROTECTION_FAULT_ENABLE_DEFAULT, 1); + tmp = REG_SET_FIELD(tmp, GCVM_CONTEXT1_CNTL, + DUMMY_PAGE_PROTECTION_FAULT_ENABLE_DEFAULT, 1); + tmp = REG_SET_FIELD(tmp, GCVM_CONTEXT1_CNTL, + PDE0_PROTECTION_FAULT_ENABLE_DEFAULT, 1); + tmp = REG_SET_FIELD(tmp, GCVM_CONTEXT1_CNTL, + VALID_PROTECTION_FAULT_ENABLE_DEFAULT, 1); + tmp = REG_SET_FIELD(tmp, GCVM_CONTEXT1_CNTL, + READ_PROTECTION_FAULT_ENABLE_DEFAULT, 1); + tmp = REG_SET_FIELD(tmp, GCVM_CONTEXT1_CNTL, + WRITE_PROTECTION_FAULT_ENABLE_DEFAULT, 1); + tmp = REG_SET_FIELD(tmp, GCVM_CONTEXT1_CNTL, + EXECUTE_PROTECTION_FAULT_ENABLE_DEFAULT, 1); + tmp = REG_SET_FIELD(tmp, GCVM_CONTEXT1_CNTL, + PAGE_TABLE_BLOCK_SIZE, + adev->vm_manager.block_size - 9); + /* Send no-retry XNACK on fault to suppress VM fault storm. */ + tmp = REG_SET_FIELD(tmp, GCVM_CONTEXT1_CNTL, + RETRY_PERMISSION_OR_INVALID_PAGE_FAULT, + !amdgpu_noretry); + WREG32_SOC15_OFFSET(GC, 0, regGCVM_CONTEXT1_CNTL, + i * hub->ctx_distance, tmp); + WREG32_SOC15_OFFSET(GC, 0, regGCVM_CONTEXT1_PAGE_TABLE_START_ADDR_LO32, + i * hub->ctx_addr_distance, 0); + WREG32_SOC15_OFFSET(GC, 0, regGCVM_CONTEXT1_PAGE_TABLE_START_ADDR_HI32, + i * hub->ctx_addr_distance, 0); + WREG32_SOC15_OFFSET(GC, 0, regGCVM_CONTEXT1_PAGE_TABLE_END_ADDR_LO32, + i * hub->ctx_addr_distance, + lower_32_bits(adev->vm_manager.max_pfn - 1)); + WREG32_SOC15_OFFSET(GC, 0, regGCVM_CONTEXT1_PAGE_TABLE_END_ADDR_HI32, + i * hub->ctx_addr_distance, + upper_32_bits(adev->vm_manager.max_pfn - 1)); + } + + hub->vm_cntx_cntl = tmp; +} + +static void gfxhub_v12_0_program_invalidation(struct amdgpu_device *adev) +{ + struct amdgpu_vmhub *hub = &adev->vmhub[AMDGPU_GFXHUB(0)]; + unsigned i; + + for (i = 0 ; i < 18; ++i) { + WREG32_SOC15_OFFSET(GC, 0, regGCVM_INVALIDATE_ENG0_ADDR_RANGE_LO32, + i * hub->eng_addr_distance, 0xffffffff); + WREG32_SOC15_OFFSET(GC, 0, regGCVM_INVALIDATE_ENG0_ADDR_RANGE_HI32, + i * hub->eng_addr_distance, 0x1f); + } +} + +static int gfxhub_v12_0_gart_enable(struct amdgpu_device *adev) +{ + if (amdgpu_sriov_vf(adev)) { + /* + * GCMC_VM_FB_LOCATION_BASE/TOP is NULL for VF, becuase they are + * VF copy registers so vbios post doesn't program them, for + * SRIOV driver need to program them + */ + WREG32_SOC15(GC, 0, regGCMC_VM_FB_LOCATION_BASE, + adev->gmc.vram_start >> 24); + WREG32_SOC15(GC, 0, regGCMC_VM_FB_LOCATION_TOP, + adev->gmc.vram_end >> 24); + } + + /* GART Enable. */ + gfxhub_v12_0_init_gart_aperture_regs(adev); + gfxhub_v12_0_init_system_aperture_regs(adev); + gfxhub_v12_0_init_tlb_regs(adev); + gfxhub_v12_0_init_cache_regs(adev); + + gfxhub_v12_0_enable_system_domain(adev); + gfxhub_v12_0_disable_identity_aperture(adev); + gfxhub_v12_0_setup_vmid_config(adev); + gfxhub_v12_0_program_invalidation(adev); + + return 0; +} + +static void gfxhub_v12_0_gart_disable(struct amdgpu_device *adev) +{ + struct amdgpu_vmhub *hub = &adev->vmhub[AMDGPU_GFXHUB(0)]; + u32 tmp; + u32 i; + + /* Disable all tables */ + for (i = 0; i < 16; i++) + WREG32_SOC15_OFFSET(GC, 0, regGCVM_CONTEXT0_CNTL, + i * hub->ctx_distance, 0); + + /* Setup TLB control */ + tmp = RREG32_SOC15(GC, 0, regGCMC_VM_MX_L1_TLB_CNTL); + tmp = REG_SET_FIELD(tmp, GCMC_VM_MX_L1_TLB_CNTL, ENABLE_L1_TLB, 0); + tmp = REG_SET_FIELD(tmp, GCMC_VM_MX_L1_TLB_CNTL, + ENABLE_ADVANCED_DRIVER_MODEL, 0); + WREG32_SOC15(GC, 0, regGCMC_VM_MX_L1_TLB_CNTL, tmp); + + /* Setup L2 cache */ + WREG32_FIELD15_PREREG(GC, 0, GCVM_L2_CNTL, ENABLE_L2_CACHE, 0); + WREG32_SOC15(GC, 0, regGCVM_L2_CNTL3, 0); +} + +/** + * gfxhub_v12_0_set_fault_enable_default - update GART/VM fault handling + * + * @adev: amdgpu_device pointer + * @value: true redirects VM faults to the default page + */ +static void gfxhub_v12_0_set_fault_enable_default(struct amdgpu_device *adev, + bool value) +{ + u32 tmp; + + /* NO halt CP when page fault */ + tmp = RREG32_SOC15(GC, 0, regCP_DEBUG); + tmp = REG_SET_FIELD(tmp, CP_DEBUG, CPG_UTCL1_ERROR_HALT_DISABLE, 1); + WREG32_SOC15(GC, 0, regCP_DEBUG, tmp); + + /* These registers are not accessible to VF-SRIOV. + * The PF will program them instead. + */ + if (amdgpu_sriov_vf(adev)) + return; + + tmp = RREG32_SOC15(GC, 0, regGCVM_L2_PROTECTION_FAULT_CNTL); + tmp = REG_SET_FIELD(tmp, GCVM_L2_PROTECTION_FAULT_CNTL, + RANGE_PROTECTION_FAULT_ENABLE_DEFAULT, value); + tmp = REG_SET_FIELD(tmp, GCVM_L2_PROTECTION_FAULT_CNTL, + PDE0_PROTECTION_FAULT_ENABLE_DEFAULT, value); + tmp = REG_SET_FIELD(tmp, GCVM_L2_PROTECTION_FAULT_CNTL, + PDE1_PROTECTION_FAULT_ENABLE_DEFAULT, value); + tmp = REG_SET_FIELD(tmp, GCVM_L2_PROTECTION_FAULT_CNTL, + PDE2_PROTECTION_FAULT_ENABLE_DEFAULT, value); + tmp = REG_SET_FIELD(tmp, GCVM_L2_PROTECTION_FAULT_CNTL, + TRANSLATE_FURTHER_PROTECTION_FAULT_ENABLE_DEFAULT, + value); + tmp = REG_SET_FIELD(tmp, GCVM_L2_PROTECTION_FAULT_CNTL, + NACK_PROTECTION_FAULT_ENABLE_DEFAULT, value); + tmp = REG_SET_FIELD(tmp, GCVM_L2_PROTECTION_FAULT_CNTL, + DUMMY_PAGE_PROTECTION_FAULT_ENABLE_DEFAULT, value); + tmp = REG_SET_FIELD(tmp, GCVM_L2_PROTECTION_FAULT_CNTL, + VALID_PROTECTION_FAULT_ENABLE_DEFAULT, value); + tmp = REG_SET_FIELD(tmp, GCVM_L2_PROTECTION_FAULT_CNTL, + READ_PROTECTION_FAULT_ENABLE_DEFAULT, value); + tmp = REG_SET_FIELD(tmp, GCVM_L2_PROTECTION_FAULT_CNTL, + WRITE_PROTECTION_FAULT_ENABLE_DEFAULT, value); + tmp = REG_SET_FIELD(tmp, GCVM_L2_PROTECTION_FAULT_CNTL, + EXECUTE_PROTECTION_FAULT_ENABLE_DEFAULT, value); + if (!value) { + tmp = REG_SET_FIELD(tmp, GCVM_L2_PROTECTION_FAULT_CNTL, + CRASH_ON_NO_RETRY_FAULT, 1); + tmp = REG_SET_FIELD(tmp, GCVM_L2_PROTECTION_FAULT_CNTL, + CRASH_ON_RETRY_FAULT, 1); + } + WREG32_SOC15(GC, 0, regGCVM_L2_PROTECTION_FAULT_CNTL, tmp); +} + +static const struct amdgpu_vmhub_funcs gfxhub_v12_0_vmhub_funcs = { + .print_l2_protection_fault_status = gfxhub_v12_0_print_l2_protection_fault_status, + .get_invalidate_req = gfxhub_v12_0_get_invalidate_req, +}; + +static void gfxhub_v12_0_init(struct amdgpu_device *adev) +{ + struct amdgpu_vmhub *hub = &adev->vmhub[AMDGPU_GFXHUB(0)]; + + hub->ctx0_ptb_addr_lo32 = + SOC15_REG_OFFSET(GC, 0, + regGCVM_CONTEXT0_PAGE_TABLE_BASE_ADDR_LO32); + hub->ctx0_ptb_addr_hi32 = + SOC15_REG_OFFSET(GC, 0, + regGCVM_CONTEXT0_PAGE_TABLE_BASE_ADDR_HI32); + hub->vm_inv_eng0_sem = + SOC15_REG_OFFSET(GC, 0, regGCVM_INVALIDATE_ENG0_SEM); + hub->vm_inv_eng0_req = + SOC15_REG_OFFSET(GC, 0, regGCVM_INVALIDATE_ENG0_REQ); + hub->vm_inv_eng0_ack = + SOC15_REG_OFFSET(GC, 0, regGCVM_INVALIDATE_ENG0_ACK); + hub->vm_context0_cntl = + SOC15_REG_OFFSET(GC, 0, regGCVM_CONTEXT0_CNTL); + hub->vm_l2_pro_fault_status = + SOC15_REG_OFFSET(GC, 0, regGCVM_L2_PROTECTION_FAULT_STATUS_LO32); + hub->vm_l2_pro_fault_cntl = + SOC15_REG_OFFSET(GC, 0, regGCVM_L2_PROTECTION_FAULT_CNTL); + + hub->ctx_distance = regGCVM_CONTEXT1_CNTL - regGCVM_CONTEXT0_CNTL; + hub->ctx_addr_distance = regGCVM_CONTEXT1_PAGE_TABLE_BASE_ADDR_LO32 - + regGCVM_CONTEXT0_PAGE_TABLE_BASE_ADDR_LO32; + hub->eng_distance = regGCVM_INVALIDATE_ENG1_REQ - + regGCVM_INVALIDATE_ENG0_REQ; + hub->eng_addr_distance = regGCVM_INVALIDATE_ENG1_ADDR_RANGE_LO32 - + regGCVM_INVALIDATE_ENG0_ADDR_RANGE_LO32; + + hub->vm_cntx_cntl_vm_fault = GCVM_CONTEXT1_CNTL__RANGE_PROTECTION_FAULT_ENABLE_INTERRUPT_MASK | + GCVM_CONTEXT1_CNTL__DUMMY_PAGE_PROTECTION_FAULT_ENABLE_INTERRUPT_MASK | + GCVM_CONTEXT1_CNTL__PDE0_PROTECTION_FAULT_ENABLE_INTERRUPT_MASK | + GCVM_CONTEXT1_CNTL__VALID_PROTECTION_FAULT_ENABLE_INTERRUPT_MASK | + GCVM_CONTEXT1_CNTL__READ_PROTECTION_FAULT_ENABLE_INTERRUPT_MASK | + GCVM_CONTEXT1_CNTL__WRITE_PROTECTION_FAULT_ENABLE_INTERRUPT_MASK | + GCVM_CONTEXT1_CNTL__EXECUTE_PROTECTION_FAULT_ENABLE_INTERRUPT_MASK; + + hub->vmhub_funcs = &gfxhub_v12_0_vmhub_funcs; +} + +const struct amdgpu_gfxhub_funcs gfxhub_v12_0_funcs = { + .get_fb_location = gfxhub_v12_0_get_fb_location, + .get_mc_fb_offset = gfxhub_v12_0_get_mc_fb_offset, + .setup_vm_pt_regs = gfxhub_v12_0_setup_vm_pt_regs, + .gart_enable = gfxhub_v12_0_gart_enable, + .gart_disable = gfxhub_v12_0_gart_disable, + .set_fault_enable_default = gfxhub_v12_0_set_fault_enable_default, + .init = gfxhub_v12_0_init, +}; diff --git a/drivers/gpu/drm/amd/amdgpu/gfxhub_v12_0.h b/drivers/gpu/drm/amd/amdgpu/gfxhub_v12_0.h new file mode 100644 index 000000000000..f1258265f802 --- /dev/null +++ b/drivers/gpu/drm/amd/amdgpu/gfxhub_v12_0.h @@ -0,0 +1,29 @@ +/* + * Copyright 2023 Advanced Micro Devices, Inc. + * + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and associated documentation files (the "Software"), + * to deal in the Software without restriction, including without limitation + * the rights to use, copy, modify, merge, publish, distribute, sublicense, + * and/or sell copies of the Software, and to permit persons to whom the + * Software is furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included in + * all copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL + * THE COPYRIGHT HOLDER(S) OR AUTHOR(S) BE LIABLE FOR ANY CLAIM, DAMAGES OR + * OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, + * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR + * OTHER DEALINGS IN THE SOFTWARE. + * + */ + +#ifndef __GFXHUB_V12_0_H__ +#define __GFXHUB_V12_0_H__ + +extern const struct amdgpu_gfxhub_funcs gfxhub_v12_0_funcs; + +#endif diff --git a/drivers/gpu/drm/amd/amdgpu/gfxhub_v1_0.c b/drivers/gpu/drm/amd/amdgpu/gfxhub_v1_0.c index 22175da0e16a..d200310d1731 100644 --- a/drivers/gpu/drm/amd/amdgpu/gfxhub_v1_0.c +++ b/drivers/gpu/drm/amd/amdgpu/gfxhub_v1_0.c @@ -443,6 +443,22 @@ static void gfxhub_v1_0_init(struct amdgpu_device *adev) mmVM_INVALIDATE_ENG0_ADDR_RANGE_LO32; } +static bool gfxhub_v1_0_query_utcl2_poison_status(struct amdgpu_device *adev, + int xcc_id) +{ + u32 status = 0; + struct amdgpu_vmhub *hub; + + if (amdgpu_ip_version(adev, GC_HWIP, 0) != IP_VERSION(9, 4, 2)) + return false; + + hub = &adev->vmhub[AMDGPU_GFXHUB(0)]; + status = RREG32(hub->vm_l2_pro_fault_status); + /* reset page fault status */ + WREG32_P(hub->vm_l2_pro_fault_cntl, 1, ~1); + + return REG_GET_FIELD(status, VM_L2_PROTECTION_FAULT_STATUS, FED); +} const struct amdgpu_gfxhub_funcs gfxhub_v1_0_funcs = { .get_mc_fb_offset = gfxhub_v1_0_get_mc_fb_offset, @@ -452,4 +468,5 @@ const struct amdgpu_gfxhub_funcs gfxhub_v1_0_funcs = { .set_fault_enable_default = gfxhub_v1_0_set_fault_enable_default, .init = gfxhub_v1_0_init, .get_xgmi_info = gfxhub_v1_1_get_xgmi_info, + .query_utcl2_poison_status = gfxhub_v1_0_query_utcl2_poison_status, }; diff --git a/drivers/gpu/drm/amd/amdgpu/gfxhub_v1_2.c b/drivers/gpu/drm/amd/amdgpu/gfxhub_v1_2.c index 49aecdcee006..72109abe7c86 100644 --- a/drivers/gpu/drm/amd/amdgpu/gfxhub_v1_2.c +++ b/drivers/gpu/drm/amd/amdgpu/gfxhub_v1_2.c @@ -366,7 +366,9 @@ static void gfxhub_v1_2_xcc_setup_vmid_config(struct amdgpu_device *adev, amdgpu_ip_version(adev, GC_HWIP, 0) == IP_VERSION(9, 4, 2) || amdgpu_ip_version(adev, GC_HWIP, 0) == - IP_VERSION(9, 4, 3)); + IP_VERSION(9, 4, 3) || + amdgpu_ip_version(adev, GC_HWIP, 0) == + IP_VERSION(9, 4, 4)); WREG32_SOC15_OFFSET(GC, GET_INST(GC, j), regVM_CONTEXT1_CNTL, i * hub->ctx_distance, tmp); WREG32_SOC15_OFFSET(GC, GET_INST(GC, j), @@ -620,6 +622,22 @@ static int gfxhub_v1_2_get_xgmi_info(struct amdgpu_device *adev) return 0; } +static bool gfxhub_v1_2_query_utcl2_poison_status(struct amdgpu_device *adev, + int xcc_id) +{ + u32 fed, status; + + status = RREG32_SOC15(GC, GET_INST(GC, xcc_id), regVM_L2_PROTECTION_FAULT_STATUS); + fed = REG_GET_FIELD(status, VM_L2_PROTECTION_FAULT_STATUS, FED); + if (!amdgpu_sriov_vf(adev)) { + /* clear page fault status and address */ + WREG32_P(SOC15_REG_OFFSET(GC, GET_INST(GC, xcc_id), + regVM_L2_PROTECTION_FAULT_CNTL), 1, ~1); + } + + return fed; +} + const struct amdgpu_gfxhub_funcs gfxhub_v1_2_funcs = { .get_mc_fb_offset = gfxhub_v1_2_get_mc_fb_offset, .setup_vm_pt_regs = gfxhub_v1_2_setup_vm_pt_regs, @@ -628,6 +646,7 @@ const struct amdgpu_gfxhub_funcs gfxhub_v1_2_funcs = { .set_fault_enable_default = gfxhub_v1_2_set_fault_enable_default, .init = gfxhub_v1_2_init, .get_xgmi_info = gfxhub_v1_2_get_xgmi_info, + .query_utcl2_poison_status = gfxhub_v1_2_query_utcl2_poison_status, }; static int gfxhub_v1_2_xcp_resume(void *handle, uint32_t inst_mask) diff --git a/drivers/gpu/drm/amd/amdgpu/gmc_v10_0.c b/drivers/gpu/drm/amd/amdgpu/gmc_v10_0.c index d933e19e0cf5..f0ceab3ce5bf 100644 --- a/drivers/gpu/drm/amd/amdgpu/gmc_v10_0.c +++ b/drivers/gpu/drm/amd/amdgpu/gmc_v10_0.c @@ -473,17 +473,17 @@ static uint64_t gmc_v10_0_map_mtype(struct amdgpu_device *adev, uint32_t flags) { switch (flags) { case AMDGPU_VM_MTYPE_DEFAULT: - return AMDGPU_PTE_MTYPE_NV10(MTYPE_NC); + return AMDGPU_PTE_MTYPE_NV10(0ULL, MTYPE_NC); case AMDGPU_VM_MTYPE_NC: - return AMDGPU_PTE_MTYPE_NV10(MTYPE_NC); + return AMDGPU_PTE_MTYPE_NV10(0ULL, MTYPE_NC); case AMDGPU_VM_MTYPE_WC: - return AMDGPU_PTE_MTYPE_NV10(MTYPE_WC); + return AMDGPU_PTE_MTYPE_NV10(0ULL, MTYPE_WC); case AMDGPU_VM_MTYPE_CC: - return AMDGPU_PTE_MTYPE_NV10(MTYPE_CC); + return AMDGPU_PTE_MTYPE_NV10(0ULL, MTYPE_CC); case AMDGPU_VM_MTYPE_UC: - return AMDGPU_PTE_MTYPE_NV10(MTYPE_UC); + return AMDGPU_PTE_MTYPE_NV10(0ULL, MTYPE_UC); default: - return AMDGPU_PTE_MTYPE_NV10(MTYPE_NC); + return AMDGPU_PTE_MTYPE_NV10(0ULL, MTYPE_NC); } } @@ -536,8 +536,7 @@ static void gmc_v10_0_get_vm_pte(struct amdgpu_device *adev, if (bo && bo->flags & (AMDGPU_GEM_CREATE_COHERENT | AMDGPU_GEM_CREATE_EXT_COHERENT | AMDGPU_GEM_CREATE_UNCACHED)) - *flags = (*flags & ~AMDGPU_PTE_MTYPE_NV10_MASK) | - AMDGPU_PTE_MTYPE_NV10(MTYPE_UC); + *flags = AMDGPU_PTE_MTYPE_NV10(*flags, MTYPE_UC); } static unsigned int gmc_v10_0_get_vbios_fb_size(struct amdgpu_device *adev) @@ -763,7 +762,7 @@ static int gmc_v10_0_gart_init(struct amdgpu_device *adev) return r; adev->gart.table_size = adev->gart.num_gpu_pages * 8; - adev->gart.gart_pte_flags = AMDGPU_PTE_MTYPE_NV10(MTYPE_UC) | + adev->gart.gart_pte_flags = AMDGPU_PTE_MTYPE_NV10(0ULL, MTYPE_UC) | AMDGPU_PTE_EXECUTABLE; return amdgpu_gart_table_vram_alloc(adev); diff --git a/drivers/gpu/drm/amd/amdgpu/gmc_v11_0.c b/drivers/gpu/drm/amd/amdgpu/gmc_v11_0.c index 527dc917e049..b88a6fa173b3 100644 --- a/drivers/gpu/drm/amd/amdgpu/gmc_v11_0.c +++ b/drivers/gpu/drm/amd/amdgpu/gmc_v11_0.c @@ -438,17 +438,17 @@ static uint64_t gmc_v11_0_map_mtype(struct amdgpu_device *adev, uint32_t flags) { switch (flags) { case AMDGPU_VM_MTYPE_DEFAULT: - return AMDGPU_PTE_MTYPE_NV10(MTYPE_NC); + return AMDGPU_PTE_MTYPE_NV10(0ULL, MTYPE_NC); case AMDGPU_VM_MTYPE_NC: - return AMDGPU_PTE_MTYPE_NV10(MTYPE_NC); + return AMDGPU_PTE_MTYPE_NV10(0ULL, MTYPE_NC); case AMDGPU_VM_MTYPE_WC: - return AMDGPU_PTE_MTYPE_NV10(MTYPE_WC); + return AMDGPU_PTE_MTYPE_NV10(0ULL, MTYPE_WC); case AMDGPU_VM_MTYPE_CC: - return AMDGPU_PTE_MTYPE_NV10(MTYPE_CC); + return AMDGPU_PTE_MTYPE_NV10(0ULL, MTYPE_CC); case AMDGPU_VM_MTYPE_UC: - return AMDGPU_PTE_MTYPE_NV10(MTYPE_UC); + return AMDGPU_PTE_MTYPE_NV10(0ULL, MTYPE_UC); default: - return AMDGPU_PTE_MTYPE_NV10(MTYPE_NC); + return AMDGPU_PTE_MTYPE_NV10(0ULL, MTYPE_NC); } } @@ -501,8 +501,7 @@ static void gmc_v11_0_get_vm_pte(struct amdgpu_device *adev, if (bo && bo->flags & (AMDGPU_GEM_CREATE_COHERENT | AMDGPU_GEM_CREATE_EXT_COHERENT | AMDGPU_GEM_CREATE_UNCACHED)) - *flags = (*flags & ~AMDGPU_PTE_MTYPE_NV10_MASK) | - AMDGPU_PTE_MTYPE_NV10(MTYPE_UC); + *flags = AMDGPU_PTE_MTYPE_NV10(*flags, MTYPE_UC); } static unsigned int gmc_v11_0_get_vbios_fb_size(struct amdgpu_device *adev) @@ -593,6 +592,7 @@ static void gmc_v11_0_set_gfxhub_funcs(struct amdgpu_device *adev) break; case IP_VERSION(11, 5, 0): case IP_VERSION(11, 5, 1): + case IP_VERSION(11, 5, 2): adev->gfxhub.funcs = &gfxhub_v11_5_0_funcs; break; default: @@ -723,7 +723,7 @@ static int gmc_v11_0_gart_init(struct amdgpu_device *adev) return r; adev->gart.table_size = adev->gart.num_gpu_pages * 8; - adev->gart.gart_pte_flags = AMDGPU_PTE_MTYPE_NV10(MTYPE_UC) | + adev->gart.gart_pte_flags = AMDGPU_PTE_MTYPE_NV10(0ULL, MTYPE_UC) | AMDGPU_PTE_EXECUTABLE; return amdgpu_gart_table_vram_alloc(adev); @@ -755,6 +755,7 @@ static int gmc_v11_0_sw_init(void *handle) case IP_VERSION(11, 0, 4): case IP_VERSION(11, 5, 0): case IP_VERSION(11, 5, 1): + case IP_VERSION(11, 5, 2): set_bit(AMDGPU_GFXHUB(0), adev->vmhubs_mask); set_bit(AMDGPU_MMHUB0(0), adev->vmhubs_mask); /* diff --git a/drivers/gpu/drm/amd/amdgpu/gmc_v12_0.c b/drivers/gpu/drm/amd/amdgpu/gmc_v12_0.c new file mode 100644 index 000000000000..fd3ac483760e --- /dev/null +++ b/drivers/gpu/drm/amd/amdgpu/gmc_v12_0.c @@ -0,0 +1,1022 @@ +/* + * Copyright 2023 Advanced Micro Devices, Inc. + * + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and associated documentation files (the "Software"), + * to deal in the Software without restriction, including without limitation + * the rights to use, copy, modify, merge, publish, distribute, sublicense, + * and/or sell copies of the Software, and to permit persons to whom the + * Software is furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included in + * all copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL + * THE COPYRIGHT HOLDER(S) OR AUTHOR(S) BE LIABLE FOR ANY CLAIM, DAMAGES OR + * OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, + * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR + * OTHER DEALINGS IN THE SOFTWARE. + * + */ +#include <linux/firmware.h> +#include <linux/pci.h> + +#include <drm/drm_cache.h> + +#include "amdgpu.h" +#include "amdgpu_atomfirmware.h" +#include "gmc_v12_0.h" +#include "athub/athub_4_1_0_sh_mask.h" +#include "athub/athub_4_1_0_offset.h" +#include "oss/osssys_7_0_0_offset.h" +#include "ivsrcid/vmc/irqsrcs_vmc_1_0.h" +#include "soc24_enum.h" +#include "soc24.h" +#include "soc15d.h" +#include "soc15_common.h" +#include "nbif_v6_3_1.h" +#include "gfxhub_v12_0.h" +#include "mmhub_v4_1_0.h" +#include "athub_v4_1_0.h" + + +static int gmc_v12_0_ecc_interrupt_state(struct amdgpu_device *adev, + struct amdgpu_irq_src *src, + unsigned type, + enum amdgpu_interrupt_state state) +{ + return 0; +} + +static int gmc_v12_0_vm_fault_interrupt_state(struct amdgpu_device *adev, + struct amdgpu_irq_src *src, unsigned type, + enum amdgpu_interrupt_state state) +{ + switch (state) { + case AMDGPU_IRQ_STATE_DISABLE: + /* MM HUB */ + amdgpu_gmc_set_vm_fault_masks(adev, AMDGPU_MMHUB0(0), false); + /* GFX HUB */ + /* This works because this interrupt is only + * enabled at init/resume and disabled in + * fini/suspend, so the overall state doesn't + * change over the course of suspend/resume. + */ + if (!adev->in_s0ix) + amdgpu_gmc_set_vm_fault_masks(adev, AMDGPU_GFXHUB(0), false); + break; + case AMDGPU_IRQ_STATE_ENABLE: + /* MM HUB */ + amdgpu_gmc_set_vm_fault_masks(adev, AMDGPU_MMHUB0(0), true); + /* GFX HUB */ + /* This works because this interrupt is only + * enabled at init/resume and disabled in + * fini/suspend, so the overall state doesn't + * change over the course of suspend/resume. + */ + if (!adev->in_s0ix) + amdgpu_gmc_set_vm_fault_masks(adev, AMDGPU_GFXHUB(0), true); + break; + default: + break; + } + + return 0; +} + +static int gmc_v12_0_process_interrupt(struct amdgpu_device *adev, + struct amdgpu_irq_src *source, + struct amdgpu_iv_entry *entry) +{ + struct amdgpu_vmhub *hub; + uint32_t status = 0; + u64 addr; + + addr = (u64)entry->src_data[0] << 12; + addr |= ((u64)entry->src_data[1] & 0xf) << 44; + + if (entry->client_id == SOC21_IH_CLIENTID_VMC) + hub = &adev->vmhub[AMDGPU_MMHUB0(0)]; + else + hub = &adev->vmhub[AMDGPU_GFXHUB(0)]; + + if (!amdgpu_sriov_vf(adev)) { + /* + * Issue a dummy read to wait for the status register to + * be updated to avoid reading an incorrect value due to + * the new fast GRBM interface. + */ + if (entry->vmid_src == AMDGPU_GFXHUB(0)) + RREG32(hub->vm_l2_pro_fault_status); + + status = RREG32(hub->vm_l2_pro_fault_status); + WREG32_P(hub->vm_l2_pro_fault_cntl, 1, ~1); + + amdgpu_vm_update_fault_cache(adev, entry->pasid, addr, status, + entry->vmid_src ? AMDGPU_MMHUB0(0) : AMDGPU_GFXHUB(0)); + } + + if (printk_ratelimit()) { + struct amdgpu_task_info *task_info; + + dev_err(adev->dev, + "[%s] page fault (src_id:%u ring:%u vmid:%u pasid:%u)\n", + entry->vmid_src ? "mmhub" : "gfxhub", + entry->src_id, entry->ring_id, entry->vmid, entry->pasid); + task_info = amdgpu_vm_get_task_info_pasid(adev, entry->pasid); + if (task_info) { + dev_err(adev->dev, + " in process %s pid %d thread %s pid %d)\n", + task_info->process_name, task_info->tgid, + task_info->task_name, task_info->pid); + amdgpu_vm_put_task_info(task_info); + } + + dev_err(adev->dev, " in page starting at address 0x%016llx from client %d\n", + addr, entry->client_id); + + if (!amdgpu_sriov_vf(adev)) + hub->vmhub_funcs->print_l2_protection_fault_status(adev, status); + } + + return 0; +} + +static const struct amdgpu_irq_src_funcs gmc_v12_0_irq_funcs = { + .set = gmc_v12_0_vm_fault_interrupt_state, + .process = gmc_v12_0_process_interrupt, +}; + +static const struct amdgpu_irq_src_funcs gmc_v12_0_ecc_funcs = { + .set = gmc_v12_0_ecc_interrupt_state, + .process = amdgpu_umc_process_ecc_irq, +}; + +static void gmc_v12_0_set_irq_funcs(struct amdgpu_device *adev) +{ + adev->gmc.vm_fault.num_types = 1; + adev->gmc.vm_fault.funcs = &gmc_v12_0_irq_funcs; + + if (!amdgpu_sriov_vf(adev)) { + adev->gmc.ecc_irq.num_types = 1; + adev->gmc.ecc_irq.funcs = &gmc_v12_0_ecc_funcs; + } +} + +/** + * gmc_v12_0_use_invalidate_semaphore - judge whether to use semaphore + * + * @adev: amdgpu_device pointer + * @vmhub: vmhub type + * + */ +static bool gmc_v12_0_use_invalidate_semaphore(struct amdgpu_device *adev, + uint32_t vmhub) +{ + return ((vmhub == AMDGPU_MMHUB0(0)) && + (!amdgpu_sriov_vf(adev))); +} + +static bool gmc_v12_0_get_vmid_pasid_mapping_info( + struct amdgpu_device *adev, + uint8_t vmid, uint16_t *p_pasid) +{ + *p_pasid = RREG32(SOC15_REG_OFFSET(OSSSYS, 0, regIH_VMID_0_LUT) + vmid) & 0xffff; + + return !!(*p_pasid); +} + +/* + * GART + * VMID 0 is the physical GPU addresses as used by the kernel. + * VMIDs 1-15 are used for userspace clients and are handled + * by the amdgpu vm/hsa code. + */ + +static void gmc_v12_0_flush_vm_hub(struct amdgpu_device *adev, uint32_t vmid, + unsigned int vmhub, uint32_t flush_type) +{ + bool use_semaphore = gmc_v12_0_use_invalidate_semaphore(adev, vmhub); + struct amdgpu_vmhub *hub = &adev->vmhub[vmhub]; + u32 inv_req = hub->vmhub_funcs->get_invalidate_req(vmid, flush_type); + u32 tmp; + /* Use register 17 for GART */ + const unsigned eng = 17; + unsigned int i; + unsigned char hub_ip = 0; + + hub_ip = (vmhub == AMDGPU_GFXHUB(0)) ? + GC_HWIP : MMHUB_HWIP; + + spin_lock(&adev->gmc.invalidate_lock); + /* + * It may lose gpuvm invalidate acknowldege state across power-gating + * off cycle, add semaphore acquire before invalidation and semaphore + * release after invalidation to avoid entering power gated state + * to WA the Issue + */ + + /* TODO: It needs to continue working on debugging with semaphore for GFXHUB as well. */ + if (use_semaphore) { + for (i = 0; i < adev->usec_timeout; i++) { + /* a read return value of 1 means semaphore acuqire */ + tmp = RREG32_RLC_NO_KIQ(hub->vm_inv_eng0_sem + + hub->eng_distance * eng, hub_ip); + if (tmp & 0x1) + break; + udelay(1); + } + + if (i >= adev->usec_timeout) + dev_err(adev->dev, + "Timeout waiting for sem acquire in VM flush!\n"); + } + + WREG32_RLC_NO_KIQ(hub->vm_inv_eng0_req + hub->eng_distance * eng, inv_req, hub_ip); + + /* Wait for ACK with a delay.*/ + for (i = 0; i < adev->usec_timeout; i++) { + tmp = RREG32_RLC_NO_KIQ(hub->vm_inv_eng0_ack + + hub->eng_distance * eng, hub_ip); + tmp &= 1 << vmid; + if (tmp) + break; + + udelay(1); + } + + /* TODO: It needs to continue working on debugging with semaphore for GFXHUB as well. */ + if (use_semaphore) + /* + * add semaphore release after invalidation, + * write with 0 means semaphore release + */ + WREG32_RLC_NO_KIQ(hub->vm_inv_eng0_sem + + hub->eng_distance * eng, 0, hub_ip); + + /* Issue additional private vm invalidation to MMHUB */ + if ((vmhub != AMDGPU_GFXHUB(0)) && + (hub->vm_l2_bank_select_reserved_cid2) && + !amdgpu_sriov_vf(adev)) { + inv_req = RREG32_NO_KIQ(hub->vm_l2_bank_select_reserved_cid2); + /* bit 25: RSERVED_CACHE_PRIVATE_INVALIDATION */ + inv_req |= (1 << 25); + /* Issue private invalidation */ + WREG32_NO_KIQ(hub->vm_l2_bank_select_reserved_cid2, inv_req); + /* Read back to ensure invalidation is done*/ + RREG32_NO_KIQ(hub->vm_l2_bank_select_reserved_cid2); + } + + spin_unlock(&adev->gmc.invalidate_lock); + + if (i < adev->usec_timeout) + return; + + dev_err(adev->dev, "Timeout waiting for VM flush ACK!\n"); +} + +/** + * gmc_v12_0_flush_gpu_tlb - gart tlb flush callback + * + * @adev: amdgpu_device pointer + * @vmid: vm instance to flush + * @vmhub: which hub to flush + * @flush_type: the flush type + * + * Flush the TLB for the requested page table. + */ +static void gmc_v12_0_flush_gpu_tlb(struct amdgpu_device *adev, uint32_t vmid, + uint32_t vmhub, uint32_t flush_type) +{ + if ((vmhub == AMDGPU_GFXHUB(0)) && !adev->gfx.is_poweron) + return; + + /* flush hdp cache */ + adev->hdp.funcs->flush_hdp(adev, NULL); + + /* This is necessary for SRIOV as well as for GFXOFF to function + * properly under bare metal + */ + if ((adev->gfx.kiq[0].ring.sched.ready || adev->mes.ring.sched.ready) && + (amdgpu_sriov_runtime(adev) || !amdgpu_sriov_vf(adev))) { + struct amdgpu_vmhub *hub = &adev->vmhub[vmhub]; + const unsigned eng = 17; + u32 inv_req = hub->vmhub_funcs->get_invalidate_req(vmid, flush_type); + u32 req = hub->vm_inv_eng0_req + hub->eng_distance * eng; + u32 ack = hub->vm_inv_eng0_ack + hub->eng_distance * eng; + + amdgpu_gmc_fw_reg_write_reg_wait(adev, req, ack, inv_req, + 1 << vmid, GET_INST(GC, 0)); + return; + } + + mutex_lock(&adev->mman.gtt_window_lock); + gmc_v12_0_flush_vm_hub(adev, vmid, vmhub, 0); + mutex_unlock(&adev->mman.gtt_window_lock); + return; +} + +/** + * gmc_v12_0_flush_gpu_tlb_pasid - tlb flush via pasid + * + * @adev: amdgpu_device pointer + * @pasid: pasid to be flush + * @flush_type: the flush type + * @all_hub: flush all hubs + * @inst: is used to select which instance of KIQ to use for the invalidation + * + * Flush the TLB for the requested pasid. + */ +static void gmc_v12_0_flush_gpu_tlb_pasid(struct amdgpu_device *adev, + uint16_t pasid, uint32_t flush_type, + bool all_hub, uint32_t inst) +{ + uint16_t queried; + int vmid, i; + + for (vmid = 1; vmid < 16; vmid++) { + bool valid; + + valid = gmc_v12_0_get_vmid_pasid_mapping_info(adev, vmid, + &queried); + if (!valid || queried != pasid) + continue; + + if (all_hub) { + for_each_set_bit(i, adev->vmhubs_mask, + AMDGPU_MAX_VMHUBS) + gmc_v12_0_flush_gpu_tlb(adev, vmid, i, + flush_type); + } else { + gmc_v12_0_flush_gpu_tlb(adev, vmid, AMDGPU_GFXHUB(0), + flush_type); + } + } +} + +static uint64_t gmc_v12_0_emit_flush_gpu_tlb(struct amdgpu_ring *ring, + unsigned vmid, uint64_t pd_addr) +{ + bool use_semaphore = gmc_v12_0_use_invalidate_semaphore(ring->adev, ring->vm_hub); + struct amdgpu_vmhub *hub = &ring->adev->vmhub[ring->vm_hub]; + uint32_t req = hub->vmhub_funcs->get_invalidate_req(vmid, 0); + unsigned eng = ring->vm_inv_eng; + + /* + * It may lose gpuvm invalidate acknowldege state across power-gating + * off cycle, add semaphore acquire before invalidation and semaphore + * release after invalidation to avoid entering power gated state + * to WA the Issue + */ + + /* TODO: It needs to continue working on debugging with semaphore for GFXHUB as well. */ + if (use_semaphore) + /* a read return value of 1 means semaphore acuqire */ + amdgpu_ring_emit_reg_wait(ring, + hub->vm_inv_eng0_sem + + hub->eng_distance * eng, 0x1, 0x1); + + amdgpu_ring_emit_wreg(ring, hub->ctx0_ptb_addr_lo32 + + (hub->ctx_addr_distance * vmid), + lower_32_bits(pd_addr)); + + amdgpu_ring_emit_wreg(ring, hub->ctx0_ptb_addr_hi32 + + (hub->ctx_addr_distance * vmid), + upper_32_bits(pd_addr)); + + amdgpu_ring_emit_reg_write_reg_wait(ring, hub->vm_inv_eng0_req + + hub->eng_distance * eng, + hub->vm_inv_eng0_ack + + hub->eng_distance * eng, + req, 1 << vmid); + + /* TODO: It needs to continue working on debugging with semaphore for GFXHUB as well. */ + if (use_semaphore) + /* + * add semaphore release after invalidation, + * write with 0 means semaphore release + */ + amdgpu_ring_emit_wreg(ring, hub->vm_inv_eng0_sem + + hub->eng_distance * eng, 0); + + return pd_addr; +} + +static void gmc_v12_0_emit_pasid_mapping(struct amdgpu_ring *ring, unsigned vmid, + unsigned pasid) +{ + struct amdgpu_device *adev = ring->adev; + uint32_t reg; + + /* MES fw manages IH_VMID_x_LUT updating */ + if (ring->is_mes_queue) + return; + + if (ring->vm_hub == AMDGPU_GFXHUB(0)) + reg = SOC15_REG_OFFSET(OSSSYS, 0, regIH_VMID_0_LUT) + vmid; + else + reg = SOC15_REG_OFFSET(OSSSYS, 0, regIH_VMID_0_LUT_MM) + vmid; + + amdgpu_ring_emit_wreg(ring, reg, pasid); +} + +/* + * PTE format: + * 63 P + * 62:59 reserved + * 58 D + * 57 G + * 56 T + * 55:54 M + * 53:52 SW + * 51:48 reserved for future + * 47:12 4k physical page base address + * 11:7 fragment + * 6 write + * 5 read + * 4 exe + * 3 Z + * 2 snooped + * 1 system + * 0 valid + * + * PDE format: + * 63 P + * 62:58 block fragment size + * 57 reserved + * 56 A + * 55:54 M + * 53:52 reserved + * 51:48 reserved for future + * 47:6 physical base address of PD or PTE + * 5:3 reserved + * 2 C + * 1 system + * 0 valid + */ + +static uint64_t gmc_v12_0_map_mtype(struct amdgpu_device *adev, uint32_t flags) +{ + switch (flags) { + case AMDGPU_VM_MTYPE_DEFAULT: + return AMDGPU_PTE_MTYPE_GFX12(0ULL, MTYPE_NC); + case AMDGPU_VM_MTYPE_NC: + return AMDGPU_PTE_MTYPE_GFX12(0ULL, MTYPE_NC); + case AMDGPU_VM_MTYPE_UC: + return AMDGPU_PTE_MTYPE_GFX12(0ULL, MTYPE_UC); + default: + return AMDGPU_PTE_MTYPE_GFX12(0ULL, MTYPE_NC); + } +} + +static void gmc_v12_0_get_vm_pde(struct amdgpu_device *adev, int level, + uint64_t *addr, uint64_t *flags) +{ + if (!(*flags & AMDGPU_PDE_PTE_GFX12) && !(*flags & AMDGPU_PTE_SYSTEM)) + *addr = adev->vm_manager.vram_base_offset + *addr - + adev->gmc.vram_start; + BUG_ON(*addr & 0xFFFF00000000003FULL); + + if (!adev->gmc.translate_further) + return; + + if (level == AMDGPU_VM_PDB1) { + /* Set the block fragment size */ + if (!(*flags & AMDGPU_PDE_PTE_GFX12)) + *flags |= AMDGPU_PDE_BFS_GFX12(0x9); + + } else if (level == AMDGPU_VM_PDB0) { + if (*flags & AMDGPU_PDE_PTE_GFX12) + *flags &= ~AMDGPU_PDE_PTE_GFX12; + } +} + +static void gmc_v12_0_get_vm_pte(struct amdgpu_device *adev, + struct amdgpu_bo_va_mapping *mapping, + uint64_t *flags) +{ + struct amdgpu_bo *bo = mapping->bo_va->base.bo; + struct amdgpu_device *bo_adev; + bool coherent, is_system; + + + *flags &= ~AMDGPU_PTE_EXECUTABLE; + *flags |= mapping->flags & AMDGPU_PTE_EXECUTABLE; + + *flags &= ~AMDGPU_PTE_MTYPE_GFX12_MASK; + *flags |= (mapping->flags & AMDGPU_PTE_MTYPE_GFX12_MASK); + + if (mapping->flags & AMDGPU_PTE_PRT_GFX12) { + *flags |= AMDGPU_PTE_PRT_GFX12; + *flags |= AMDGPU_PTE_SNOOPED; + *flags |= AMDGPU_PTE_SYSTEM; + *flags |= AMDGPU_PTE_IS_PTE; + *flags &= ~AMDGPU_PTE_VALID; + } + + if (!bo) + return; + + if (bo->flags & (AMDGPU_GEM_CREATE_COHERENT | + AMDGPU_GEM_CREATE_UNCACHED)) + *flags = AMDGPU_PTE_MTYPE_GFX12(*flags, MTYPE_UC); + + bo_adev = amdgpu_ttm_adev(bo->tbo.bdev); + coherent = bo->flags & AMDGPU_GEM_CREATE_COHERENT; + is_system = (bo->tbo.resource->mem_type == TTM_PL_TT) || + (bo->tbo.resource->mem_type == AMDGPU_PL_PREEMPT); + + if (bo && bo->flags & AMDGPU_GEM_CREATE_GFX12_DCC) + *flags |= AMDGPU_PTE_DCC; + + /* WA for HW bug */ + if (is_system || ((bo_adev != adev) && coherent)) + *flags = AMDGPU_PTE_MTYPE_GFX12(*flags, MTYPE_NC); + +} + +static unsigned gmc_v12_0_get_vbios_fb_size(struct amdgpu_device *adev) +{ + return 0; +} + +static const struct amdgpu_gmc_funcs gmc_v12_0_gmc_funcs = { + .flush_gpu_tlb = gmc_v12_0_flush_gpu_tlb, + .flush_gpu_tlb_pasid = gmc_v12_0_flush_gpu_tlb_pasid, + .emit_flush_gpu_tlb = gmc_v12_0_emit_flush_gpu_tlb, + .emit_pasid_mapping = gmc_v12_0_emit_pasid_mapping, + .map_mtype = gmc_v12_0_map_mtype, + .get_vm_pde = gmc_v12_0_get_vm_pde, + .get_vm_pte = gmc_v12_0_get_vm_pte, + .get_vbios_fb_size = gmc_v12_0_get_vbios_fb_size, +}; + +static void gmc_v12_0_set_gmc_funcs(struct amdgpu_device *adev) +{ + adev->gmc.gmc_funcs = &gmc_v12_0_gmc_funcs; +} + +static void gmc_v12_0_set_umc_funcs(struct amdgpu_device *adev) +{ +} + + +static void gmc_v12_0_set_mmhub_funcs(struct amdgpu_device *adev) +{ + switch (amdgpu_ip_version(adev, MMHUB_HWIP, 0)) { + case IP_VERSION(4, 1, 0): + adev->mmhub.funcs = &mmhub_v4_1_0_funcs; + break; + default: + break; + } +} + +static void gmc_v12_0_set_gfxhub_funcs(struct amdgpu_device *adev) +{ + switch (amdgpu_ip_version(adev, GC_HWIP, 0)) { + case IP_VERSION(12, 0, 0): + case IP_VERSION(12, 0, 1): + adev->gfxhub.funcs = &gfxhub_v12_0_funcs; + break; + default: + break; + } +} + +static int gmc_v12_0_early_init(void *handle) +{ + struct amdgpu_device *adev = (struct amdgpu_device *)handle; + + gmc_v12_0_set_gfxhub_funcs(adev); + gmc_v12_0_set_mmhub_funcs(adev); + gmc_v12_0_set_gmc_funcs(adev); + gmc_v12_0_set_irq_funcs(adev); + gmc_v12_0_set_umc_funcs(adev); + + adev->gmc.shared_aperture_start = 0x2000000000000000ULL; + adev->gmc.shared_aperture_end = + adev->gmc.shared_aperture_start + (4ULL << 30) - 1; + adev->gmc.private_aperture_start = 0x1000000000000000ULL; + adev->gmc.private_aperture_end = + adev->gmc.private_aperture_start + (4ULL << 30) - 1; + + return 0; +} + +static int gmc_v12_0_late_init(void *handle) +{ + struct amdgpu_device *adev = (struct amdgpu_device *)handle; + int r; + + r = amdgpu_gmc_allocate_vm_inv_eng(adev); + if (r) + return r; + + r = amdgpu_gmc_ras_late_init(adev); + if (r) + return r; + + return amdgpu_irq_get(adev, &adev->gmc.vm_fault, 0); +} + +static void gmc_v12_0_vram_gtt_location(struct amdgpu_device *adev, + struct amdgpu_gmc *mc) +{ + u64 base = 0; + + base = adev->mmhub.funcs->get_fb_location(adev); + + amdgpu_gmc_set_agp_default(adev, mc); + amdgpu_gmc_vram_location(adev, &adev->gmc, base); + amdgpu_gmc_gart_location(adev, mc, AMDGPU_GART_PLACEMENT_LOW); + if (!amdgpu_sriov_vf(adev) && (amdgpu_agp == 1)) + amdgpu_gmc_agp_location(adev, mc); + + /* base offset of vram pages */ + if (amdgpu_sriov_vf(adev)) + adev->vm_manager.vram_base_offset = 0; + else + adev->vm_manager.vram_base_offset = adev->mmhub.funcs->get_mc_fb_offset(adev); +} + +/** + * gmc_v12_0_mc_init - initialize the memory controller driver params + * + * @adev: amdgpu_device pointer + * + * Look up the amount of vram, vram width, and decide how to place + * vram and gart within the GPU's physical address space. + * Returns 0 for success. + */ +static int gmc_v12_0_mc_init(struct amdgpu_device *adev) +{ + int r; + + /* size in MB on si */ + adev->gmc.mc_vram_size = + adev->nbio.funcs->get_memsize(adev) * 1024ULL * 1024ULL; + adev->gmc.real_vram_size = adev->gmc.mc_vram_size; + + if (!(adev->flags & AMD_IS_APU)) { + r = amdgpu_device_resize_fb_bar(adev); + if (r) + return r; + } + + adev->gmc.aper_base = pci_resource_start(adev->pdev, 0); + adev->gmc.aper_size = pci_resource_len(adev->pdev, 0); + +#ifdef CONFIG_X86_64 + if ((adev->flags & AMD_IS_APU) && !amdgpu_passthrough(adev)) { + adev->gmc.aper_base = adev->mmhub.funcs->get_mc_fb_offset(adev); + adev->gmc.aper_size = adev->gmc.real_vram_size; + } +#endif + /* In case the PCI BAR is larger than the actual amount of vram */ + adev->gmc.visible_vram_size = adev->gmc.aper_size; + if (adev->gmc.visible_vram_size > adev->gmc.real_vram_size) + adev->gmc.visible_vram_size = adev->gmc.real_vram_size; + + /* set the gart size */ + if (amdgpu_gart_size == -1) { + adev->gmc.gart_size = 512ULL << 20; + } else + adev->gmc.gart_size = (u64)amdgpu_gart_size << 20; + + gmc_v12_0_vram_gtt_location(adev, &adev->gmc); + + return 0; +} + +static int gmc_v12_0_gart_init(struct amdgpu_device *adev) +{ + int r; + + if (adev->gart.bo) { + WARN(1, "PCIE GART already initialized\n"); + return 0; + } + + /* Initialize common gart structure */ + r = amdgpu_gart_init(adev); + if (r) + return r; + + adev->gart.table_size = adev->gart.num_gpu_pages * 8; + adev->gart.gart_pte_flags = AMDGPU_PTE_MTYPE_GFX12(0ULL, MTYPE_UC) | + AMDGPU_PTE_EXECUTABLE | + AMDGPU_PTE_IS_PTE; + + return amdgpu_gart_table_vram_alloc(adev); +} + +static int gmc_v12_0_sw_init(void *handle) +{ + int r, vram_width = 0, vram_type = 0, vram_vendor = 0; + struct amdgpu_device *adev = (struct amdgpu_device *)handle; + + adev->mmhub.funcs->init(adev); + + adev->gfxhub.funcs->init(adev); + + spin_lock_init(&adev->gmc.invalidate_lock); + + r = amdgpu_atomfirmware_get_vram_info(adev, + &vram_width, &vram_type, &vram_vendor); + adev->gmc.vram_width = vram_width; + + adev->gmc.vram_type = vram_type; + adev->gmc.vram_vendor = vram_vendor; + + switch (amdgpu_ip_version(adev, GC_HWIP, 0)) { + case IP_VERSION(12, 0, 0): + case IP_VERSION(12, 0, 1): + set_bit(AMDGPU_GFXHUB(0), adev->vmhubs_mask); + set_bit(AMDGPU_MMHUB0(0), adev->vmhubs_mask); + /* + * To fulfill 4-level page support, + * vm size is 256TB (48bit), maximum size, + * block size 512 (9bit) + */ + amdgpu_vm_adjust_size(adev, 256 * 1024, 9, 3, 48); + break; + default: + break; + } + + /* This interrupt is VMC page fault.*/ + r = amdgpu_irq_add_id(adev, SOC21_IH_CLIENTID_VMC, + VMC_1_0__SRCID__VM_FAULT, + &adev->gmc.vm_fault); + + if (r) + return r; + + r = amdgpu_irq_add_id(adev, SOC21_IH_CLIENTID_GFX, + UTCL2_1_0__SRCID__FAULT, + &adev->gmc.vm_fault); + if (r) + return r; + + if (!amdgpu_sriov_vf(adev)) { + /* interrupt sent to DF. */ + r = amdgpu_irq_add_id(adev, SOC21_IH_CLIENTID_DF, 0, + &adev->gmc.ecc_irq); + if (r) + return r; + } + + /* + * Set the internal MC address mask This is the max address of the GPU's + * internal address space. + */ + adev->gmc.mc_mask = 0xffffffffffffULL; /* 48 bit MC */ + + r = dma_set_mask_and_coherent(adev->dev, DMA_BIT_MASK(44)); + if (r) { + printk(KERN_WARNING "amdgpu: No suitable DMA available.\n"); + return r; + } + + adev->need_swiotlb = drm_need_swiotlb(44); + + r = gmc_v12_0_mc_init(adev); + if (r) + return r; + + amdgpu_gmc_get_vbios_allocations(adev); + + /* Memory manager */ + r = amdgpu_bo_init(adev); + if (r) + return r; + + r = gmc_v12_0_gart_init(adev); + if (r) + return r; + + /* + * number of VMs + * VMID 0 is reserved for System + * amdgpu graphics/compute will use VMIDs 1-7 + * amdkfd will use VMIDs 8-15 + */ + adev->vm_manager.first_kfd_vmid = 8; + + amdgpu_vm_manager_init(adev); + + return 0; +} + +/** + * gmc_v12_0_gart_fini - vm fini callback + * + * @adev: amdgpu_device pointer + * + * Tears down the driver GART/VM setup (CIK). + */ +static void gmc_v12_0_gart_fini(struct amdgpu_device *adev) +{ + amdgpu_gart_table_vram_free(adev); +} + +static int gmc_v12_0_sw_fini(void *handle) +{ + struct amdgpu_device *adev = (struct amdgpu_device *)handle; + + amdgpu_vm_manager_fini(adev); + gmc_v12_0_gart_fini(adev); + amdgpu_gem_force_release(adev); + amdgpu_bo_fini(adev); + + return 0; +} + +static void gmc_v12_0_init_golden_registers(struct amdgpu_device *adev) +{ +} + +/** + * gmc_v12_0_gart_enable - gart enable + * + * @adev: amdgpu_device pointer + */ +static int gmc_v12_0_gart_enable(struct amdgpu_device *adev) +{ + int r; + bool value; + + if (adev->gart.bo == NULL) { + dev_err(adev->dev, "No VRAM object for PCIE GART.\n"); + return -EINVAL; + } + + amdgpu_gtt_mgr_recover(&adev->mman.gtt_mgr); + + r = adev->mmhub.funcs->gart_enable(adev); + if (r) + return r; + + /* Flush HDP after it is initialized */ + adev->hdp.funcs->flush_hdp(adev, NULL); + + value = (amdgpu_vm_fault_stop == AMDGPU_VM_FAULT_STOP_ALWAYS) ? + false : true; + + adev->mmhub.funcs->set_fault_enable_default(adev, value); + gmc_v12_0_flush_gpu_tlb(adev, 0, AMDGPU_MMHUB0(0), 0); + + dev_info(adev->dev, "PCIE GART of %uM enabled (table at 0x%016llX).\n", + (unsigned)(adev->gmc.gart_size >> 20), + (unsigned long long)amdgpu_bo_gpu_offset(adev->gart.bo)); + + return 0; +} + +static int gmc_v12_0_hw_init(void *handle) +{ + int r; + struct amdgpu_device *adev = (struct amdgpu_device *)handle; + + /* The sequence of these two function calls matters.*/ + gmc_v12_0_init_golden_registers(adev); + + r = gmc_v12_0_gart_enable(adev); + if (r) + return r; + + if (adev->umc.funcs && adev->umc.funcs->init_registers) + adev->umc.funcs->init_registers(adev); + + return 0; +} + +/** + * gmc_v12_0_gart_disable - gart disable + * + * @adev: amdgpu_device pointer + * + * This disables all VM page table. + */ +static void gmc_v12_0_gart_disable(struct amdgpu_device *adev) +{ + adev->mmhub.funcs->gart_disable(adev); +} + +static int gmc_v12_0_hw_fini(void *handle) +{ + struct amdgpu_device *adev = (struct amdgpu_device *)handle; + + if (amdgpu_sriov_vf(adev)) { + /* full access mode, so don't touch any GMC register */ + DRM_DEBUG("For SRIOV client, shouldn't do anything.\n"); + return 0; + } + + amdgpu_irq_put(adev, &adev->gmc.vm_fault, 0); + + if (adev->gmc.ecc_irq.funcs && + amdgpu_ras_is_supported(adev, AMDGPU_RAS_BLOCK__UMC)) + amdgpu_irq_put(adev, &adev->gmc.ecc_irq, 0); + + gmc_v12_0_gart_disable(adev); + + return 0; +} + +static int gmc_v12_0_suspend(void *handle) +{ + struct amdgpu_device *adev = (struct amdgpu_device *)handle; + + gmc_v12_0_hw_fini(adev); + + return 0; +} + +static int gmc_v12_0_resume(void *handle) +{ + int r; + struct amdgpu_device *adev = (struct amdgpu_device *)handle; + + r = gmc_v12_0_hw_init(adev); + if (r) + return r; + + amdgpu_vmid_reset_all(adev); + + return 0; +} + +static bool gmc_v12_0_is_idle(void *handle) +{ + /* MC is always ready in GMC v11.*/ + return true; +} + +static int gmc_v12_0_wait_for_idle(void *handle) +{ + /* There is no need to wait for MC idle in GMC v11.*/ + return 0; +} + +static int gmc_v12_0_soft_reset(void *handle) +{ + return 0; +} + +static int gmc_v12_0_set_clockgating_state(void *handle, + enum amd_clockgating_state state) +{ + int r; + struct amdgpu_device *adev = (struct amdgpu_device *)handle; + + r = adev->mmhub.funcs->set_clockgating(adev, state); + if (r) + return r; + + return athub_v4_1_0_set_clockgating(adev, state); +} + +static void gmc_v12_0_get_clockgating_state(void *handle, u64 *flags) +{ + struct amdgpu_device *adev = (struct amdgpu_device *)handle; + + adev->mmhub.funcs->get_clockgating(adev, flags); + + athub_v4_1_0_get_clockgating(adev, flags); +} + +static int gmc_v12_0_set_powergating_state(void *handle, + enum amd_powergating_state state) +{ + return 0; +} + +const struct amd_ip_funcs gmc_v12_0_ip_funcs = { + .name = "gmc_v12_0", + .early_init = gmc_v12_0_early_init, + .sw_init = gmc_v12_0_sw_init, + .hw_init = gmc_v12_0_hw_init, + .late_init = gmc_v12_0_late_init, + .sw_fini = gmc_v12_0_sw_fini, + .hw_fini = gmc_v12_0_hw_fini, + .suspend = gmc_v12_0_suspend, + .resume = gmc_v12_0_resume, + .is_idle = gmc_v12_0_is_idle, + .wait_for_idle = gmc_v12_0_wait_for_idle, + .soft_reset = gmc_v12_0_soft_reset, + .set_clockgating_state = gmc_v12_0_set_clockgating_state, + .set_powergating_state = gmc_v12_0_set_powergating_state, + .get_clockgating_state = gmc_v12_0_get_clockgating_state, +}; + +const struct amdgpu_ip_block_version gmc_v12_0_ip_block = { + .type = AMD_IP_BLOCK_TYPE_GMC, + .major = 12, + .minor = 0, + .rev = 0, + .funcs = &gmc_v12_0_ip_funcs, +}; diff --git a/drivers/gpu/drm/amd/amdgpu/gmc_v12_0.h b/drivers/gpu/drm/amd/amdgpu/gmc_v12_0.h new file mode 100644 index 000000000000..deca93e4a156 --- /dev/null +++ b/drivers/gpu/drm/amd/amdgpu/gmc_v12_0.h @@ -0,0 +1,30 @@ +/* + * Copyright 2023 Advanced Micro Devices, Inc. + * + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and associated documentation files (the "Software"), + * to deal in the Software without restriction, including without limitation + * the rights to use, copy, modify, merge, publish, distribute, sublicense, + * and/or sell copies of the Software, and to permit persons to whom the + * Software is furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included in + * all copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL + * THE COPYRIGHT HOLDER(S) OR AUTHOR(S) BE LIABLE FOR ANY CLAIM, DAMAGES OR + * OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, + * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR + * OTHER DEALINGS IN THE SOFTWARE. + * + */ + +#ifndef __GMC_V12_0_H__ +#define __GMC_V12_0_H__ + +extern const struct amd_ip_funcs gmc_v12_0_ip_funcs; +extern const struct amdgpu_ip_block_version gmc_v12_0_ip_block; + +#endif diff --git a/drivers/gpu/drm/amd/amdgpu/gmc_v6_0.c b/drivers/gpu/drm/amd/amdgpu/gmc_v6_0.c index 23b478639921..d36725666b54 100644 --- a/drivers/gpu/drm/amd/amdgpu/gmc_v6_0.c +++ b/drivers/gpu/drm/amd/amdgpu/gmc_v6_0.c @@ -98,9 +98,7 @@ static void gmc_v6_0_mc_resume(struct amdgpu_device *adev) static int gmc_v6_0_init_microcode(struct amdgpu_device *adev) { const char *chip_name; - char fw_name[30]; int err; - bool is_58_fw = false; DRM_DEBUG("\n"); @@ -126,17 +124,13 @@ static int gmc_v6_0_init_microcode(struct amdgpu_device *adev) /* this memory configuration requires special firmware */ if (((RREG32(mmMC_SEQ_MISC0) & 0xff000000) >> 24) == 0x58) - is_58_fw = true; + chip_name = "si58"; - if (is_58_fw) - snprintf(fw_name, sizeof(fw_name), "amdgpu/si58_mc.bin"); - else - snprintf(fw_name, sizeof(fw_name), "amdgpu/%s_mc.bin", chip_name); - err = amdgpu_ucode_request(adev, &adev->gmc.fw, fw_name); + err = amdgpu_ucode_request(adev, &adev->gmc.fw, "amdgpu/%s_mc.bin", chip_name); if (err) { dev_err(adev->dev, - "si_mc: Failed to load firmware \"%s\"\n", - fw_name); + "si_mc: Failed to load firmware \"%s_mc.bin\"\n", + chip_name); amdgpu_ucode_release(&adev->gmc.fw); } return err; @@ -1115,6 +1109,8 @@ static const struct amd_ip_funcs gmc_v6_0_ip_funcs = { .soft_reset = gmc_v6_0_soft_reset, .set_clockgating_state = gmc_v6_0_set_clockgating_state, .set_powergating_state = gmc_v6_0_set_powergating_state, + .dump_ip_state = NULL, + .print_ip_state = NULL, }; static const struct amdgpu_gmc_funcs gmc_v6_0_gmc_funcs = { diff --git a/drivers/gpu/drm/amd/amdgpu/gmc_v7_0.c b/drivers/gpu/drm/amd/amdgpu/gmc_v7_0.c index 3da7b6a2b00d..994432fb57ea 100644 --- a/drivers/gpu/drm/amd/amdgpu/gmc_v7_0.c +++ b/drivers/gpu/drm/amd/amdgpu/gmc_v7_0.c @@ -130,7 +130,6 @@ static void gmc_v7_0_mc_resume(struct amdgpu_device *adev) static int gmc_v7_0_init_microcode(struct amdgpu_device *adev) { const char *chip_name; - char fw_name[30]; int err; DRM_DEBUG("\n"); @@ -153,11 +152,9 @@ static int gmc_v7_0_init_microcode(struct amdgpu_device *adev) return -EINVAL; } - snprintf(fw_name, sizeof(fw_name), "amdgpu/%s_mc.bin", chip_name); - - err = amdgpu_ucode_request(adev, &adev->gmc.fw, fw_name); + err = amdgpu_ucode_request(adev, &adev->gmc.fw, "amdgpu/%s_mc.bin", chip_name); if (err) { - pr_err("cik_mc: Failed to load firmware \"%s\"\n", fw_name); + pr_err("cik_mc: Failed to load firmware \"%s_mc.bin\"\n", chip_name); amdgpu_ucode_release(&adev->gmc.fw); } return err; @@ -1354,6 +1351,8 @@ static const struct amd_ip_funcs gmc_v7_0_ip_funcs = { .soft_reset = gmc_v7_0_soft_reset, .set_clockgating_state = gmc_v7_0_set_clockgating_state, .set_powergating_state = gmc_v7_0_set_powergating_state, + .dump_ip_state = NULL, + .print_ip_state = NULL, }; static const struct amdgpu_gmc_funcs gmc_v7_0_gmc_funcs = { diff --git a/drivers/gpu/drm/amd/amdgpu/gmc_v8_0.c b/drivers/gpu/drm/amd/amdgpu/gmc_v8_0.c index d20e5f20ee31..86488c052f82 100644 --- a/drivers/gpu/drm/amd/amdgpu/gmc_v8_0.c +++ b/drivers/gpu/drm/amd/amdgpu/gmc_v8_0.c @@ -212,7 +212,6 @@ static void gmc_v8_0_mc_resume(struct amdgpu_device *adev) static int gmc_v8_0_init_microcode(struct amdgpu_device *adev) { const char *chip_name; - char fw_name[30]; int err; DRM_DEBUG("\n"); @@ -255,10 +254,9 @@ static int gmc_v8_0_init_microcode(struct amdgpu_device *adev) return -EINVAL; } - snprintf(fw_name, sizeof(fw_name), "amdgpu/%s_mc.bin", chip_name); - err = amdgpu_ucode_request(adev, &adev->gmc.fw, fw_name); + err = amdgpu_ucode_request(adev, &adev->gmc.fw, "amdgpu/%s_mc.bin", chip_name); if (err) { - pr_err("mc: Failed to load firmware \"%s\"\n", fw_name); + pr_err("mc: Failed to load firmware \"%s_mc.bin\"\n", chip_name); amdgpu_ucode_release(&adev->gmc.fw); } return err; @@ -1717,6 +1715,8 @@ static const struct amd_ip_funcs gmc_v8_0_ip_funcs = { .set_clockgating_state = gmc_v8_0_set_clockgating_state, .set_powergating_state = gmc_v8_0_set_powergating_state, .get_clockgating_state = gmc_v8_0_get_clockgating_state, + .dump_ip_state = NULL, + .print_ip_state = NULL, }; static const struct amdgpu_gmc_funcs gmc_v8_0_gmc_funcs = { diff --git a/drivers/gpu/drm/amd/amdgpu/gmc_v9_0.c b/drivers/gpu/drm/amd/amdgpu/gmc_v9_0.c index 47b63a4ce68b..b73136d390cc 100644 --- a/drivers/gpu/drm/amd/amdgpu/gmc_v9_0.c +++ b/drivers/gpu/drm/amd/amdgpu/gmc_v9_0.c @@ -548,7 +548,7 @@ static int gmc_v9_0_process_interrupt(struct amdgpu_device *adev, { bool retry_fault = !!(entry->src_data[1] & 0x80); bool write_fault = !!(entry->src_data[1] & 0x20); - uint32_t status = 0, cid = 0, rw = 0; + uint32_t status = 0, cid = 0, rw = 0, fed = 0; struct amdgpu_task_info *task_info; struct amdgpu_vmhub *hub; const char *mmhub_cid; @@ -644,7 +644,8 @@ static int gmc_v9_0_process_interrupt(struct amdgpu_device *adev, addr, entry->client_id, soc15_ih_clientid_name[entry->client_id]); - if (amdgpu_ip_version(adev, GC_HWIP, 0) == IP_VERSION(9, 4, 3)) + if (amdgpu_ip_version(adev, GC_HWIP, 0) == IP_VERSION(9, 4, 3) || + amdgpu_ip_version(adev, GC_HWIP, 0) == IP_VERSION(9, 4, 4)) dev_err(adev->dev, " cookie node_id %d fault from die %s%d%s\n", node_id, node_id % 4 == 3 ? "RSV" : "AID", node_id / 4, node_id % 4 == 1 ? ".XCD0" : node_id % 4 == 2 ? ".XCD1" : ""); @@ -664,7 +665,15 @@ static int gmc_v9_0_process_interrupt(struct amdgpu_device *adev, status = RREG32(hub->vm_l2_pro_fault_status); cid = REG_GET_FIELD(status, VM_L2_PROTECTION_FAULT_STATUS, CID); rw = REG_GET_FIELD(status, VM_L2_PROTECTION_FAULT_STATUS, RW); - WREG32_P(hub->vm_l2_pro_fault_cntl, 1, ~1); + fed = REG_GET_FIELD(status, VM_L2_PROTECTION_FAULT_STATUS, FED); + + /* for fed error, kfd will handle it, return directly */ + if (fed && amdgpu_ras_is_poison_mode_supported(adev) && + (amdgpu_ip_version(adev, GC_HWIP, 0) >= IP_VERSION(9, 4, 2))) + return 0; + + if (!amdgpu_sriov_vf(adev)) + WREG32_P(hub->vm_l2_pro_fault_cntl, 1, ~1); amdgpu_vm_update_fault_cache(adev, entry->pasid, addr, status, vmhub); @@ -779,7 +788,8 @@ static bool gmc_v9_0_use_invalidate_semaphore(struct amdgpu_device *adev, uint32_t vmhub) { if (amdgpu_ip_version(adev, GC_HWIP, 0) == IP_VERSION(9, 4, 2) || - amdgpu_ip_version(adev, GC_HWIP, 0) == IP_VERSION(9, 4, 3)) + amdgpu_ip_version(adev, GC_HWIP, 0) == IP_VERSION(9, 4, 3) || + amdgpu_ip_version(adev, GC_HWIP, 0) == IP_VERSION(9, 4, 4)) return false; return ((vmhub == AMDGPU_MMHUB0(0) || @@ -835,7 +845,7 @@ static void gmc_v9_0_flush_gpu_tlb(struct amdgpu_device *adev, uint32_t vmid, ack = hub->vm_inv_eng0_ack + hub->eng_distance * eng; if (vmhub >= AMDGPU_MMHUB0(0)) - inst = GET_INST(GC, 0); + inst = 0; else inst = vmhub; @@ -867,9 +877,9 @@ static void gmc_v9_0_flush_gpu_tlb(struct amdgpu_device *adev, uint32_t vmid, for (j = 0; j < adev->usec_timeout; j++) { /* a read return value of 1 means semaphore acquire */ if (vmhub >= AMDGPU_MMHUB0(0)) - tmp = RREG32_SOC15_IP_NO_KIQ(MMHUB, sem, inst); + tmp = RREG32_SOC15_IP_NO_KIQ(MMHUB, sem, GET_INST(GC, inst)); else - tmp = RREG32_SOC15_IP_NO_KIQ(GC, sem, inst); + tmp = RREG32_SOC15_IP_NO_KIQ(GC, sem, GET_INST(GC, inst)); if (tmp & 0x1) break; udelay(1); @@ -880,9 +890,9 @@ static void gmc_v9_0_flush_gpu_tlb(struct amdgpu_device *adev, uint32_t vmid, } if (vmhub >= AMDGPU_MMHUB0(0)) - WREG32_SOC15_IP_NO_KIQ(MMHUB, req, inv_req, inst); + WREG32_SOC15_IP_NO_KIQ(MMHUB, req, inv_req, GET_INST(GC, inst)); else - WREG32_SOC15_IP_NO_KIQ(GC, req, inv_req, inst); + WREG32_SOC15_IP_NO_KIQ(GC, req, inv_req, GET_INST(GC, inst)); /* * Issue a dummy read to wait for the ACK register to @@ -895,9 +905,9 @@ static void gmc_v9_0_flush_gpu_tlb(struct amdgpu_device *adev, uint32_t vmid, for (j = 0; j < adev->usec_timeout; j++) { if (vmhub >= AMDGPU_MMHUB0(0)) - tmp = RREG32_SOC15_IP_NO_KIQ(MMHUB, ack, inst); + tmp = RREG32_SOC15_IP_NO_KIQ(MMHUB, ack, GET_INST(GC, inst)); else - tmp = RREG32_SOC15_IP_NO_KIQ(GC, ack, inst); + tmp = RREG32_SOC15_IP_NO_KIQ(GC, ack, GET_INST(GC, inst)); if (tmp & (1 << vmid)) break; udelay(1); @@ -910,9 +920,9 @@ static void gmc_v9_0_flush_gpu_tlb(struct amdgpu_device *adev, uint32_t vmid, * write with 0 means semaphore release */ if (vmhub >= AMDGPU_MMHUB0(0)) - WREG32_SOC15_IP_NO_KIQ(MMHUB, sem, 0, inst); + WREG32_SOC15_IP_NO_KIQ(MMHUB, sem, 0, GET_INST(GC, inst)); else - WREG32_SOC15_IP_NO_KIQ(GC, sem, 0, inst); + WREG32_SOC15_IP_NO_KIQ(GC, sem, 0, GET_INST(GC, inst)); } spin_unlock(&adev->gmc.invalidate_lock); @@ -1066,19 +1076,19 @@ static uint64_t gmc_v9_0_map_mtype(struct amdgpu_device *adev, uint32_t flags) { switch (flags) { case AMDGPU_VM_MTYPE_DEFAULT: - return AMDGPU_PTE_MTYPE_VG10(MTYPE_NC); + return AMDGPU_PTE_MTYPE_VG10(0ULL, MTYPE_NC); case AMDGPU_VM_MTYPE_NC: - return AMDGPU_PTE_MTYPE_VG10(MTYPE_NC); + return AMDGPU_PTE_MTYPE_VG10(0ULL, MTYPE_NC); case AMDGPU_VM_MTYPE_WC: - return AMDGPU_PTE_MTYPE_VG10(MTYPE_WC); + return AMDGPU_PTE_MTYPE_VG10(0ULL, MTYPE_WC); case AMDGPU_VM_MTYPE_RW: - return AMDGPU_PTE_MTYPE_VG10(MTYPE_RW); + return AMDGPU_PTE_MTYPE_VG10(0ULL, MTYPE_RW); case AMDGPU_VM_MTYPE_CC: - return AMDGPU_PTE_MTYPE_VG10(MTYPE_CC); + return AMDGPU_PTE_MTYPE_VG10(0ULL, MTYPE_CC); case AMDGPU_VM_MTYPE_UC: - return AMDGPU_PTE_MTYPE_VG10(MTYPE_UC); + return AMDGPU_PTE_MTYPE_VG10(0ULL, MTYPE_UC); default: - return AMDGPU_PTE_MTYPE_VG10(MTYPE_NC); + return AMDGPU_PTE_MTYPE_VG10(0ULL, MTYPE_NC); } } @@ -1163,6 +1173,7 @@ static void gmc_v9_0_get_coherence_flags(struct amdgpu_device *adev, } break; case IP_VERSION(9, 4, 3): + case IP_VERSION(9, 4, 4): /* Only local VRAM BOs or system memory on non-NUMA APUs * can be assumed to be local in their entirety. Choose * MTYPE_NC as safe fallback for all system memory BOs on @@ -1218,8 +1229,8 @@ static void gmc_v9_0_get_coherence_flags(struct amdgpu_device *adev, } if (mtype != MTYPE_NC) - *flags = (*flags & ~AMDGPU_PTE_MTYPE_VG10_MASK) | - AMDGPU_PTE_MTYPE_VG10(mtype); + *flags = AMDGPU_PTE_MTYPE_VG10(*flags, mtype); + *flags |= snoop ? AMDGPU_PTE_SNOOPED : 0; } @@ -1254,7 +1265,8 @@ static void gmc_v9_0_override_vm_pte_flags(struct amdgpu_device *adev, /* Only GFX 9.4.3 APUs associate GPUs with NUMA nodes. Local system * memory can use more efficient MTYPEs. */ - if (amdgpu_ip_version(adev, GC_HWIP, 0) != IP_VERSION(9, 4, 3)) + if (amdgpu_ip_version(adev, GC_HWIP, 0) != IP_VERSION(9, 4, 3) && + amdgpu_ip_version(adev, GC_HWIP, 0) != IP_VERSION(9, 4, 4)) return; /* Only direct-mapped memory allows us to determine the NUMA node from @@ -1270,9 +1282,9 @@ static void gmc_v9_0_override_vm_pte_flags(struct amdgpu_device *adev, * and can also be overridden. */ if ((*flags & AMDGPU_PTE_MTYPE_VG10_MASK) != - AMDGPU_PTE_MTYPE_VG10(MTYPE_NC) && + AMDGPU_PTE_MTYPE_VG10(0ULL, MTYPE_NC) && (*flags & AMDGPU_PTE_MTYPE_VG10_MASK) != - AMDGPU_PTE_MTYPE_VG10(MTYPE_UC)) { + AMDGPU_PTE_MTYPE_VG10(0ULL, MTYPE_UC)) { dev_dbg_ratelimited(adev->dev, "MTYPE is not NC or UC\n"); return; } @@ -1301,7 +1313,7 @@ static void gmc_v9_0_override_vm_pte_flags(struct amdgpu_device *adev, if (nid == local_node) { uint64_t old_flags = *flags; if ((*flags & AMDGPU_PTE_MTYPE_VG10_MASK) == - AMDGPU_PTE_MTYPE_VG10(MTYPE_NC)) { + AMDGPU_PTE_MTYPE_VG10(0ULL, MTYPE_NC)) { unsigned int mtype_local = MTYPE_RW; if (amdgpu_mtype_local == 1) @@ -1309,12 +1321,10 @@ static void gmc_v9_0_override_vm_pte_flags(struct amdgpu_device *adev, else if (amdgpu_mtype_local == 2) mtype_local = MTYPE_CC; - *flags = (*flags & ~AMDGPU_PTE_MTYPE_VG10_MASK) | - AMDGPU_PTE_MTYPE_VG10(mtype_local); + *flags = AMDGPU_PTE_MTYPE_VG10(*flags, mtype_local); } else if (adev->rev_id) { /* MTYPE_UC case */ - *flags = (*flags & ~AMDGPU_PTE_MTYPE_VG10_MASK) | - AMDGPU_PTE_MTYPE_VG10(MTYPE_CC); + *flags = AMDGPU_PTE_MTYPE_VG10(*flags, MTYPE_CC); } dev_dbg_ratelimited(adev->dev, "flags updated from %llx to %llx\n", @@ -1450,7 +1460,6 @@ static void gmc_v9_0_set_umc_funcs(struct amdgpu_device *adev) adev->umc.channel_offs = UMC_V12_0_PER_CHANNEL_OFFSET; adev->umc.active_mask = adev->aid_mask; adev->umc.retire_unit = UMC_V12_0_BAD_PAGE_NUM_PER_CHANNEL; - adev->umc.channel_idx_tbl = &umc_v12_0_channel_idx_tbl[0][0][0]; if (!adev->gmc.xgmi.connected_to_cpu && !adev->gmc.is_app_apu) adev->umc.ras = &umc_v12_0_ras; break; @@ -1500,7 +1509,8 @@ static void gmc_v9_0_set_mmhub_ras_funcs(struct amdgpu_device *adev) static void gmc_v9_0_set_gfxhub_funcs(struct amdgpu_device *adev) { - if (amdgpu_ip_version(adev, GC_HWIP, 0) == IP_VERSION(9, 4, 3)) + if (amdgpu_ip_version(adev, GC_HWIP, 0) == IP_VERSION(9, 4, 3) || + amdgpu_ip_version(adev, GC_HWIP, 0) == IP_VERSION(9, 4, 4)) adev->gfxhub.funcs = &gfxhub_v1_2_funcs; else adev->gfxhub.funcs = &gfxhub_v1_0_funcs; @@ -1545,7 +1555,8 @@ static int gmc_v9_0_early_init(void *handle) */ if (amdgpu_ip_version(adev, GC_HWIP, 0) == IP_VERSION(9, 4, 0) || amdgpu_ip_version(adev, GC_HWIP, 0) == IP_VERSION(9, 4, 1) || - amdgpu_ip_version(adev, GC_HWIP, 0) == IP_VERSION(9, 4, 3)) + amdgpu_ip_version(adev, GC_HWIP, 0) == IP_VERSION(9, 4, 3) || + amdgpu_ip_version(adev, GC_HWIP, 0) == IP_VERSION(9, 4, 4)) adev->gmc.xgmi.supported = true; if (amdgpu_ip_version(adev, XGMI_HWIP, 0) == IP_VERSION(6, 1, 0)) { @@ -1554,7 +1565,8 @@ static int gmc_v9_0_early_init(void *handle) adev->smuio.funcs->is_host_gpu_xgmi_supported(adev); } - if (amdgpu_ip_version(adev, GC_HWIP, 0) == IP_VERSION(9, 4, 3)) { + if (amdgpu_ip_version(adev, GC_HWIP, 0) == IP_VERSION(9, 4, 3) || + amdgpu_ip_version(adev, GC_HWIP, 0) == IP_VERSION(9, 4, 4)) { enum amdgpu_pkg_type pkg_type = adev->smuio.funcs->get_pkg_type(adev); /* On GFXIP 9.4.3. APU, there is no physical VRAM domain present @@ -1716,6 +1728,7 @@ static int gmc_v9_0_mc_init(struct amdgpu_device *adev) case IP_VERSION(9, 4, 1): case IP_VERSION(9, 4, 2): case IP_VERSION(9, 4, 3): + case IP_VERSION(9, 4, 4): default: adev->gmc.gart_size = 512ULL << 20; break; @@ -1758,7 +1771,7 @@ static int gmc_v9_0_gart_init(struct amdgpu_device *adev) if (r) return r; adev->gart.table_size = adev->gart.num_gpu_pages * 8; - adev->gart.gart_pte_flags = AMDGPU_PTE_MTYPE_VG10(MTYPE_UC) | + adev->gart.gart_pte_flags = AMDGPU_PTE_MTYPE_VG10(0ULL, MTYPE_UC) | AMDGPU_PTE_EXECUTABLE; if (!adev->gmc.real_vram_size) { @@ -1881,7 +1894,7 @@ gmc_v9_0_init_sw_mem_ranges(struct amdgpu_device *adev, { enum amdgpu_memory_partition mode; u32 start_addr = 0, size; - int i; + int i, r, l; mode = gmc_v9_0_query_memory_partition(adev); @@ -1904,23 +1917,39 @@ gmc_v9_0_init_sw_mem_ranges(struct amdgpu_device *adev, break; } - size = adev->gmc.real_vram_size >> AMDGPU_GPU_PAGE_SHIFT; - size /= adev->gmc.num_mem_partitions; + /* Use NPS range info, if populated */ + r = amdgpu_gmc_get_nps_memranges(adev, mem_ranges, + adev->gmc.num_mem_partitions); + if (!r) { + l = 0; + for (i = 1; i < adev->gmc.num_mem_partitions; ++i) { + if (mem_ranges[i].range.lpfn > + mem_ranges[i - 1].range.lpfn) + l = i; + } + + } else { + /* Fallback to sw based calculation */ + size = (adev->gmc.real_vram_size + SZ_16M) >> AMDGPU_GPU_PAGE_SHIFT; + size /= adev->gmc.num_mem_partitions; + + for (i = 0; i < adev->gmc.num_mem_partitions; ++i) { + mem_ranges[i].range.fpfn = start_addr; + mem_ranges[i].size = + ((u64)size << AMDGPU_GPU_PAGE_SHIFT); + mem_ranges[i].range.lpfn = start_addr + size - 1; + start_addr += size; + } - for (i = 0; i < adev->gmc.num_mem_partitions; ++i) { - mem_ranges[i].range.fpfn = start_addr; - mem_ranges[i].size = ((u64)size << AMDGPU_GPU_PAGE_SHIFT); - mem_ranges[i].range.lpfn = start_addr + size - 1; - start_addr += size; + l = adev->gmc.num_mem_partitions - 1; } /* Adjust the last one */ - mem_ranges[adev->gmc.num_mem_partitions - 1].range.lpfn = + mem_ranges[l].range.lpfn = (adev->gmc.real_vram_size >> AMDGPU_GPU_PAGE_SHIFT) - 1; - mem_ranges[adev->gmc.num_mem_partitions - 1].size = + mem_ranges[l].size = adev->gmc.real_vram_size - - ((u64)mem_ranges[adev->gmc.num_mem_partitions - 1].range.fpfn - << AMDGPU_GPU_PAGE_SHIFT); + ((u64)mem_ranges[l].range.fpfn << AMDGPU_GPU_PAGE_SHIFT); } static int gmc_v9_0_init_mem_ranges(struct amdgpu_device *adev) @@ -1970,7 +1999,8 @@ static int gmc_v9_0_sw_init(void *handle) spin_lock_init(&adev->gmc.invalidate_lock); - if (amdgpu_ip_version(adev, GC_HWIP, 0) == IP_VERSION(9, 4, 3)) { + if (amdgpu_ip_version(adev, GC_HWIP, 0) == IP_VERSION(9, 4, 3) || + amdgpu_ip_version(adev, GC_HWIP, 0) == IP_VERSION(9, 4, 4)) { gmc_v9_4_3_init_vram_info(adev); } else if (!adev->bios) { if (adev->flags & AMD_IS_APU) { @@ -2053,6 +2083,7 @@ static int gmc_v9_0_sw_init(void *handle) adev->gmc.translate_further = adev->vm_manager.num_level > 1; break; case IP_VERSION(9, 4, 3): + case IP_VERSION(9, 4, 4): bitmap_set(adev->vmhubs_mask, AMDGPU_GFXHUB(0), NUM_XCC(adev->gfx.xcc_mask)); @@ -2118,7 +2149,8 @@ static int gmc_v9_0_sw_init(void *handle) amdgpu_gmc_get_vbios_allocations(adev); - if (amdgpu_ip_version(adev, GC_HWIP, 0) == IP_VERSION(9, 4, 3)) { + if (amdgpu_ip_version(adev, GC_HWIP, 0) == IP_VERSION(9, 4, 3) || + amdgpu_ip_version(adev, GC_HWIP, 0) == IP_VERSION(9, 4, 4)) { r = gmc_v9_0_init_mem_ranges(adev); if (r) return r; @@ -2146,7 +2178,8 @@ static int gmc_v9_0_sw_init(void *handle) adev->vm_manager.first_kfd_vmid = (amdgpu_ip_version(adev, GC_HWIP, 0) == IP_VERSION(9, 4, 1) || amdgpu_ip_version(adev, GC_HWIP, 0) == IP_VERSION(9, 4, 2) || - amdgpu_ip_version(adev, GC_HWIP, 0) == IP_VERSION(9, 4, 3)) ? + amdgpu_ip_version(adev, GC_HWIP, 0) == IP_VERSION(9, 4, 3) || + amdgpu_ip_version(adev, GC_HWIP, 0) == IP_VERSION(9, 4, 4)) ? 3 : 8; @@ -2158,7 +2191,8 @@ static int gmc_v9_0_sw_init(void *handle) if (r) return r; - if (amdgpu_ip_version(adev, GC_HWIP, 0) == IP_VERSION(9, 4, 3)) + if (amdgpu_ip_version(adev, GC_HWIP, 0) == IP_VERSION(9, 4, 3) || + amdgpu_ip_version(adev, GC_HWIP, 0) == IP_VERSION(9, 4, 4)) amdgpu_gmc_sysfs_init(adev); return 0; @@ -2168,7 +2202,8 @@ static int gmc_v9_0_sw_fini(void *handle) { struct amdgpu_device *adev = (struct amdgpu_device *)handle; - if (amdgpu_ip_version(adev, GC_HWIP, 0) == IP_VERSION(9, 4, 3)) + if (amdgpu_ip_version(adev, GC_HWIP, 0) == IP_VERSION(9, 4, 3) || + amdgpu_ip_version(adev, GC_HWIP, 0) == IP_VERSION(9, 4, 4)) amdgpu_gmc_sysfs_fini(adev); amdgpu_gmc_ras_fini(adev); diff --git a/drivers/gpu/drm/amd/amdgpu/hdp_v4_0.c b/drivers/gpu/drm/amd/amdgpu/hdp_v4_0.c index 4db6bb73ead4..077c6d920e27 100644 --- a/drivers/gpu/drm/amd/amdgpu/hdp_v4_0.c +++ b/drivers/gpu/drm/amd/amdgpu/hdp_v4_0.c @@ -50,7 +50,8 @@ static void hdp_v4_0_invalidate_hdp(struct amdgpu_device *adev, struct amdgpu_ring *ring) { if (amdgpu_ip_version(adev, HDP_HWIP, 0) == IP_VERSION(4, 4, 0) || - amdgpu_ip_version(adev, HDP_HWIP, 0) == IP_VERSION(4, 4, 2)) + amdgpu_ip_version(adev, HDP_HWIP, 0) == IP_VERSION(4, 4, 2) || + amdgpu_ip_version(adev, HDP_HWIP, 0) == IP_VERSION(4, 4, 5)) return; if (!ring || !ring->funcs->emit_wreg) @@ -129,7 +130,8 @@ static void hdp_v4_0_get_clockgating_state(struct amdgpu_device *adev, { int data; - if (amdgpu_ip_version(adev, HDP_HWIP, 0) == IP_VERSION(4, 4, 2)) { + if (amdgpu_ip_version(adev, HDP_HWIP, 0) == IP_VERSION(4, 4, 2) || + amdgpu_ip_version(adev, HDP_HWIP, 0) == IP_VERSION(4, 4, 5)) { /* Default enabled */ *flags |= AMD_CG_SUPPORT_HDP_MGCG; return; diff --git a/drivers/gpu/drm/amd/amdgpu/iceland_ih.c b/drivers/gpu/drm/amd/amdgpu/iceland_ih.c index 2c02ae69883d..07984f7c3ae7 100644 --- a/drivers/gpu/drm/amd/amdgpu/iceland_ih.c +++ b/drivers/gpu/drm/amd/amdgpu/iceland_ih.c @@ -425,6 +425,8 @@ static const struct amd_ip_funcs iceland_ih_ip_funcs = { .soft_reset = iceland_ih_soft_reset, .set_clockgating_state = iceland_ih_set_clockgating_state, .set_powergating_state = iceland_ih_set_powergating_state, + .dump_ip_state = NULL, + .print_ip_state = NULL, }; static const struct amdgpu_ih_funcs iceland_ih_funcs = { diff --git a/drivers/gpu/drm/amd/amdgpu/ih_v6_0.c b/drivers/gpu/drm/amd/amdgpu/ih_v6_0.c index ad4ad39f128f..18a761d6ef33 100644 --- a/drivers/gpu/drm/amd/amdgpu/ih_v6_0.c +++ b/drivers/gpu/drm/amd/amdgpu/ih_v6_0.c @@ -135,6 +135,34 @@ static int ih_v6_0_toggle_ring_interrupts(struct amdgpu_device *adev, tmp = RREG32(ih_regs->ih_rb_cntl); tmp = REG_SET_FIELD(tmp, IH_RB_CNTL, RB_ENABLE, (enable ? 1 : 0)); + + if (enable) { + /* Unset the CLEAR_OVERFLOW bit to make sure the next step + * is switching the bit from 0 to 1 + */ + tmp = REG_SET_FIELD(tmp, IH_RB_CNTL, WPTR_OVERFLOW_CLEAR, 0); + if (amdgpu_sriov_vf(adev) && amdgpu_sriov_reg_indirect_ih(adev)) { + if (psp_reg_program(&adev->psp, ih_regs->psp_reg_id, tmp)) + return -ETIMEDOUT; + } else { + WREG32_NO_KIQ(ih_regs->ih_rb_cntl, tmp); + } + + /* Clear RB_OVERFLOW bit */ + tmp = REG_SET_FIELD(tmp, IH_RB_CNTL, WPTR_OVERFLOW_CLEAR, 1); + if (amdgpu_sriov_vf(adev) && amdgpu_sriov_reg_indirect_ih(adev)) { + if (psp_reg_program(&adev->psp, ih_regs->psp_reg_id, tmp)) + return -ETIMEDOUT; + } else { + WREG32_NO_KIQ(ih_regs->ih_rb_cntl, tmp); + } + + /* Unset the CLEAR_OVERFLOW bit immediately so new overflows + * can be detected. + */ + tmp = REG_SET_FIELD(tmp, IH_RB_CNTL, WPTR_OVERFLOW_CLEAR, 0); + } + /* enable_intr field is only valid in ring0 */ if (ih == &adev->irq.ih) tmp = REG_SET_FIELD(tmp, IH_RB_CNTL, ENABLE_INTR, (enable ? 1 : 0)); @@ -346,6 +374,21 @@ static int ih_v6_0_irq_init(struct amdgpu_device *adev) DELAY, 3); WREG32_SOC15(OSSSYS, 0, regIH_MSI_STORM_CTRL, tmp); + /* Redirect the interrupts to IH RB1 for dGPU */ + if (adev->irq.ih1.ring_size) { + tmp = RREG32_SOC15(OSSSYS, 0, regIH_RING1_CLIENT_CFG_INDEX); + tmp = REG_SET_FIELD(tmp, IH_RING1_CLIENT_CFG_INDEX, INDEX, 0); + WREG32_SOC15(OSSSYS, 0, regIH_RING1_CLIENT_CFG_INDEX, tmp); + + tmp = RREG32_SOC15(OSSSYS, 0, regIH_RING1_CLIENT_CFG_DATA); + tmp = REG_SET_FIELD(tmp, IH_RING1_CLIENT_CFG_DATA, CLIENT_ID, 0xa); + tmp = REG_SET_FIELD(tmp, IH_RING1_CLIENT_CFG_DATA, SOURCE_ID, 0x0); + tmp = REG_SET_FIELD(tmp, IH_RING1_CLIENT_CFG_DATA, + SOURCE_ID_MATCH_ENABLE, 0x1); + + WREG32_SOC15(OSSSYS, 0, regIH_RING1_CLIENT_CFG_DATA, tmp); + } + pci_set_master(adev->pdev); /* enable interrupts */ @@ -549,8 +592,15 @@ static int ih_v6_0_sw_init(void *handle) adev->irq.ih.use_doorbell = true; adev->irq.ih.doorbell_index = adev->doorbell_index.ih << 1; - adev->irq.ih1.ring_size = 0; - adev->irq.ih2.ring_size = 0; + if (!(adev->flags & AMD_IS_APU)) { + r = amdgpu_ih_ring_init(adev, &adev->irq.ih1, IH_RING_SIZE, + use_bus_addr); + if (r) + return r; + + adev->irq.ih1.use_doorbell = true; + adev->irq.ih1.doorbell_index = (adev->doorbell_index.ih + 1) << 1; + } /* initialize ih control register offset */ ih_v6_0_init_register_offset(adev); @@ -748,6 +798,8 @@ static const struct amd_ip_funcs ih_v6_0_ip_funcs = { .set_clockgating_state = ih_v6_0_set_clockgating_state, .set_powergating_state = ih_v6_0_set_powergating_state, .get_clockgating_state = ih_v6_0_get_clockgating_state, + .dump_ip_state = NULL, + .print_ip_state = NULL, }; static const struct amdgpu_ih_funcs ih_v6_0_funcs = { diff --git a/drivers/gpu/drm/amd/amdgpu/ih_v6_1.c b/drivers/gpu/drm/amd/amdgpu/ih_v6_1.c index b8da0fc29378..2e0469feca1e 100644 --- a/drivers/gpu/drm/amd/amdgpu/ih_v6_1.c +++ b/drivers/gpu/drm/amd/amdgpu/ih_v6_1.c @@ -346,6 +346,21 @@ static int ih_v6_1_irq_init(struct amdgpu_device *adev) DELAY, 3); WREG32_SOC15(OSSSYS, 0, regIH_MSI_STORM_CTRL, tmp); + /* Redirect the interrupts to IH RB1 for dGPU */ + if (adev->irq.ih1.ring_size) { + tmp = RREG32_SOC15(OSSSYS, 0, regIH_RING1_CLIENT_CFG_INDEX); + tmp = REG_SET_FIELD(tmp, IH_RING1_CLIENT_CFG_INDEX, INDEX, 0); + WREG32_SOC15(OSSSYS, 0, regIH_RING1_CLIENT_CFG_INDEX, tmp); + + tmp = RREG32_SOC15(OSSSYS, 0, regIH_RING1_CLIENT_CFG_DATA); + tmp = REG_SET_FIELD(tmp, IH_RING1_CLIENT_CFG_DATA, CLIENT_ID, 0xa); + tmp = REG_SET_FIELD(tmp, IH_RING1_CLIENT_CFG_DATA, SOURCE_ID, 0x0); + tmp = REG_SET_FIELD(tmp, IH_RING1_CLIENT_CFG_DATA, + SOURCE_ID_MATCH_ENABLE, 0x1); + + WREG32_SOC15(OSSSYS, 0, regIH_RING1_CLIENT_CFG_DATA, tmp); + } + pci_set_master(adev->pdev); /* enable interrupts */ @@ -520,6 +535,12 @@ static void ih_v6_1_set_self_irq_funcs(struct amdgpu_device *adev) static int ih_v6_1_early_init(void *handle) { struct amdgpu_device *adev = (struct amdgpu_device *)handle; + int ret; + + ret = amdgpu_irq_add_domain(adev); + if (ret) { + return ret; + } ih_v6_1_set_interrupt_funcs(adev); ih_v6_1_set_self_irq_funcs(adev); @@ -550,8 +571,15 @@ static int ih_v6_1_sw_init(void *handle) adev->irq.ih.use_doorbell = true; adev->irq.ih.doorbell_index = adev->doorbell_index.ih << 1; - adev->irq.ih1.ring_size = 0; - adev->irq.ih2.ring_size = 0; + if (!(adev->flags & AMD_IS_APU)) { + r = amdgpu_ih_ring_init(adev, &adev->irq.ih1, IH_RING_SIZE, + use_bus_addr); + if (r) + return r; + + adev->irq.ih1.use_doorbell = true; + adev->irq.ih1.doorbell_index = (adev->doorbell_index.ih + 1) << 1; + } /* initialize ih control register offset */ ih_v6_1_init_register_offset(adev); @@ -753,6 +781,8 @@ static const struct amd_ip_funcs ih_v6_1_ip_funcs = { .set_clockgating_state = ih_v6_1_set_clockgating_state, .set_powergating_state = ih_v6_1_set_powergating_state, .get_clockgating_state = ih_v6_1_get_clockgating_state, + .dump_ip_state = NULL, + .print_ip_state = NULL, }; static const struct amdgpu_ih_funcs ih_v6_1_funcs = { diff --git a/drivers/gpu/drm/amd/amdgpu/ih_v7_0.c b/drivers/gpu/drm/amd/amdgpu/ih_v7_0.c index 7aed96fa10a9..6852081fcff2 100644 --- a/drivers/gpu/drm/amd/amdgpu/ih_v7_0.c +++ b/drivers/gpu/drm/amd/amdgpu/ih_v7_0.c @@ -346,6 +346,21 @@ static int ih_v7_0_irq_init(struct amdgpu_device *adev) DELAY, 3); WREG32_SOC15(OSSSYS, 0, regIH_MSI_STORM_CTRL, tmp); + /* Redirect the interrupts to IH RB1 for dGPU */ + if (adev->irq.ih1.ring_size) { + tmp = RREG32_SOC15(OSSSYS, 0, regIH_RING1_CLIENT_CFG_INDEX); + tmp = REG_SET_FIELD(tmp, IH_RING1_CLIENT_CFG_INDEX, INDEX, 0); + WREG32_SOC15(OSSSYS, 0, regIH_RING1_CLIENT_CFG_INDEX, tmp); + + tmp = RREG32_SOC15(OSSSYS, 0, regIH_RING1_CLIENT_CFG_DATA); + tmp = REG_SET_FIELD(tmp, IH_RING1_CLIENT_CFG_DATA, CLIENT_ID, 0xa); + tmp = REG_SET_FIELD(tmp, IH_RING1_CLIENT_CFG_DATA, SOURCE_ID, 0x0); + tmp = REG_SET_FIELD(tmp, IH_RING1_CLIENT_CFG_DATA, + SOURCE_ID_MATCH_ENABLE, 0x1); + + WREG32_SOC15(OSSSYS, 0, regIH_RING1_CLIENT_CFG_DATA, tmp); + } + pci_set_master(adev->pdev); /* enable interrupts */ @@ -546,8 +561,15 @@ static int ih_v7_0_sw_init(void *handle) adev->irq.ih.use_doorbell = true; adev->irq.ih.doorbell_index = adev->doorbell_index.ih << 1; - adev->irq.ih1.ring_size = 0; - adev->irq.ih2.ring_size = 0; + if (!(adev->flags & AMD_IS_APU)) { + r = amdgpu_ih_ring_init(adev, &adev->irq.ih1, IH_RING_SIZE, + use_bus_addr); + if (r) + return r; + + adev->irq.ih1.use_doorbell = true; + adev->irq.ih1.doorbell_index = (adev->doorbell_index.ih + 1) << 1; + } /* initialize ih control register offset */ ih_v7_0_init_register_offset(adev); @@ -749,6 +771,8 @@ static const struct amd_ip_funcs ih_v7_0_ip_funcs = { .set_clockgating_state = ih_v7_0_set_clockgating_state, .set_powergating_state = ih_v7_0_set_powergating_state, .get_clockgating_state = ih_v7_0_get_clockgating_state, + .dump_ip_state = NULL, + .print_ip_state = NULL, }; static const struct amdgpu_ih_funcs ih_v7_0_funcs = { diff --git a/drivers/gpu/drm/amd/amdgpu/imu_v11_0.c b/drivers/gpu/drm/amd/amdgpu/imu_v11_0.c index 3e91a8e42c21..6c1891889c4d 100644 --- a/drivers/gpu/drm/amd/amdgpu/imu_v11_0.c +++ b/drivers/gpu/drm/amd/amdgpu/imu_v11_0.c @@ -38,10 +38,10 @@ MODULE_FIRMWARE("amdgpu/gc_11_0_3_imu.bin"); MODULE_FIRMWARE("amdgpu/gc_11_0_4_imu.bin"); MODULE_FIRMWARE("amdgpu/gc_11_5_0_imu.bin"); MODULE_FIRMWARE("amdgpu/gc_11_5_1_imu.bin"); +MODULE_FIRMWARE("amdgpu/gc_11_5_2_imu.bin"); static int imu_v11_0_init_microcode(struct amdgpu_device *adev) { - char fw_name[45]; char ucode_prefix[30]; int err; const struct imu_firmware_header_v1_0 *imu_hdr; @@ -50,11 +50,10 @@ static int imu_v11_0_init_microcode(struct amdgpu_device *adev) DRM_DEBUG("\n"); amdgpu_ucode_ip_version_decode(adev, GC_HWIP, ucode_prefix, sizeof(ucode_prefix)); - - snprintf(fw_name, sizeof(fw_name), "amdgpu/%s_imu.bin", ucode_prefix); - err = amdgpu_ucode_request(adev, &adev->gfx.imu_fw, fw_name); + err = amdgpu_ucode_request(adev, &adev->gfx.imu_fw, "amdgpu/%s_imu.bin", ucode_prefix); if (err) goto out; + imu_hdr = (const struct imu_firmware_header_v1_0 *)adev->gfx.imu_fw->data; //adev->gfx.imu_feature_version = le32_to_cpu(imu_hdr->ucode_feature_version); @@ -75,8 +74,8 @@ static int imu_v11_0_init_microcode(struct amdgpu_device *adev) out: if (err) { dev_err(adev->dev, - "gfx11: Failed to load firmware \"%s\"\n", - fw_name); + "gfx11: Failed to load firmware \"%s_imu.bin\"\n", + ucode_prefix); amdgpu_ucode_release(&adev->gfx.imu_fw); } diff --git a/drivers/gpu/drm/amd/amdgpu/imu_v12_0.c b/drivers/gpu/drm/amd/amdgpu/imu_v12_0.c new file mode 100644 index 000000000000..1341f0292031 --- /dev/null +++ b/drivers/gpu/drm/amd/amdgpu/imu_v12_0.c @@ -0,0 +1,400 @@ +/* + * Copyright 2023 Advanced Micro Devices, Inc. + * + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and associated documentation files (the "Software"), + * to deal in the Software without restriction, including without limitation + * the rights to use, copy, modify, merge, publish, distribute, sublicense, + * and/or sell copies of the Software, and to permit persons to whom the + * Software is furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included in + * all copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL + * THE COPYRIGHT HOLDER(S) OR AUTHOR(S) BE LIABLE FOR ANY CLAIM, DAMAGES OR + * OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, + * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR + * OTHER DEALINGS IN THE SOFTWARE. + * + */ + +#include <linux/firmware.h> +#include "amdgpu.h" +#include "amdgpu_imu.h" +#include "amdgpu_dpm.h" + +#include "imu_v12_0.h" + +#include "gc/gc_12_0_0_offset.h" +#include "gc/gc_12_0_0_sh_mask.h" +#include "mmhub/mmhub_4_1_0_offset.h" + +MODULE_FIRMWARE("amdgpu/gc_12_0_0_imu.bin"); +MODULE_FIRMWARE("amdgpu/gc_12_0_1_imu.bin"); + +#define TRANSFER_RAM_MASK 0x001c0000 + +static int imu_v12_0_init_microcode(struct amdgpu_device *adev) +{ + char ucode_prefix[15]; + int err; + const struct imu_firmware_header_v1_0 *imu_hdr; + struct amdgpu_firmware_info *info = NULL; + + DRM_DEBUG("\n"); + + amdgpu_ucode_ip_version_decode(adev, GC_HWIP, ucode_prefix, sizeof(ucode_prefix)); + err = amdgpu_ucode_request(adev, &adev->gfx.imu_fw, "amdgpu/%s_imu.bin", ucode_prefix); + if (err) + goto out; + + imu_hdr = (const struct imu_firmware_header_v1_0 *)adev->gfx.imu_fw->data; + adev->gfx.imu_fw_version = le32_to_cpu(imu_hdr->header.ucode_version); + + if (adev->firmware.load_type == AMDGPU_FW_LOAD_PSP) { + info = &adev->firmware.ucode[AMDGPU_UCODE_ID_IMU_I]; + info->ucode_id = AMDGPU_UCODE_ID_IMU_I; + info->fw = adev->gfx.imu_fw; + adev->firmware.fw_size += + ALIGN(le32_to_cpu(imu_hdr->imu_iram_ucode_size_bytes), PAGE_SIZE); + info = &adev->firmware.ucode[AMDGPU_UCODE_ID_IMU_D]; + info->ucode_id = AMDGPU_UCODE_ID_IMU_D; + info->fw = adev->gfx.imu_fw; + adev->firmware.fw_size += + ALIGN(le32_to_cpu(imu_hdr->imu_dram_ucode_size_bytes), PAGE_SIZE); + } + +out: + if (err) { + dev_err(adev->dev, + "gfx12: Failed to load firmware \"%s_imu.bin\"\n", + ucode_prefix); + amdgpu_ucode_release(&adev->gfx.imu_fw); + } + + return err; +} + +static int imu_v12_0_load_microcode(struct amdgpu_device *adev) +{ + const struct imu_firmware_header_v1_0 *hdr; + const __le32 *fw_data; + unsigned i, fw_size; + + if (!adev->gfx.imu_fw) + return -EINVAL; + + hdr = (const struct imu_firmware_header_v1_0 *)adev->gfx.imu_fw->data; + + fw_data = (const __le32 *)(adev->gfx.imu_fw->data + + le32_to_cpu(hdr->header.ucode_array_offset_bytes)); + fw_size = le32_to_cpu(hdr->imu_iram_ucode_size_bytes) / 4; + + WREG32_SOC15(GC, 0, regGFX_IMU_I_RAM_ADDR, 0); + + for (i = 0; i < fw_size; i++) + WREG32_SOC15(GC, 0, regGFX_IMU_I_RAM_DATA, le32_to_cpup(fw_data++)); + + WREG32_SOC15(GC, 0, regGFX_IMU_I_RAM_ADDR, adev->gfx.imu_fw_version); + + fw_data = (const __le32 *)(adev->gfx.imu_fw->data + + le32_to_cpu(hdr->header.ucode_array_offset_bytes) + + le32_to_cpu(hdr->imu_iram_ucode_size_bytes)); + fw_size = le32_to_cpu(hdr->imu_dram_ucode_size_bytes) / 4; + + WREG32_SOC15(GC, 0, regGFX_IMU_D_RAM_ADDR, 0); + + for (i = 0; i < fw_size; i++) + WREG32_SOC15(GC, 0, regGFX_IMU_D_RAM_DATA, le32_to_cpup(fw_data++)); + + WREG32_SOC15(GC, 0, regGFX_IMU_D_RAM_ADDR, adev->gfx.imu_fw_version); + + return 0; +} + +static int imu_v12_0_wait_for_reset_status(struct amdgpu_device *adev) +{ + u32 imu_reg_val = 0; + int i; + + for (i = 0; i < adev->usec_timeout; i++) { + imu_reg_val = RREG32_SOC15(GC, 0, regGFX_IMU_GFX_RESET_CTRL); + if ((imu_reg_val & 0x1f) == 0x1f) + break; + udelay(1); + } + + if (i >= adev->usec_timeout) { + dev_err(adev->dev, "init imu: IMU start timeout\n"); + return -ETIMEDOUT; + } + + return 0; +} + +static void imu_v12_0_setup(struct amdgpu_device *adev) +{ + u32 imu_reg_val; + + WREG32_SOC15(GC, 0, regGFX_IMU_C2PMSG_ACCESS_CTRL0, 0xffffff); + WREG32_SOC15(GC, 0, regGFX_IMU_C2PMSG_ACCESS_CTRL1, 0xffff); + + if (adev->gfx.imu.mode == DEBUG_MODE) { + imu_reg_val = RREG32_SOC15(GC, 0, regGFX_IMU_C2PMSG_16); + imu_reg_val |= 0x1; + WREG32_SOC15(GC, 0, regGFX_IMU_C2PMSG_16, imu_reg_val); + + imu_reg_val = RREG32_SOC15(GC, 0, regGFX_IMU_SCRATCH_10); + imu_reg_val |= 0x20010007; + WREG32_SOC15(GC, 0, regGFX_IMU_SCRATCH_10, imu_reg_val); + + } +} + +static int imu_v12_0_start(struct amdgpu_device *adev) +{ + u32 imu_reg_val; + + imu_reg_val = RREG32_SOC15(GC, 0, regGFX_IMU_CORE_CTRL); + imu_reg_val &= 0xfffffffe; + WREG32_SOC15(GC, 0, regGFX_IMU_CORE_CTRL, imu_reg_val); + + if (adev->flags & AMD_IS_APU) + amdgpu_dpm_set_gfx_power_up_by_imu(adev); + + return imu_v12_0_wait_for_reset_status(adev); +} + +static const struct imu_rlc_ram_golden imu_rlc_ram_golden_12_0_1[] = { + IMU_RLC_RAM_GOLDEN_VALUE(GC, 0, regCH_PIPE_STEER, 0x1e4, 0x1c0000), + IMU_RLC_RAM_GOLDEN_VALUE(GC, 0, regGL1X_PIPE_STEER, 0x1e4, 0x1c0000), + IMU_RLC_RAM_GOLDEN_VALUE(GC, 0, regGL1_PIPE_STEER, 0x1e4, 0x1c0000), + IMU_RLC_RAM_GOLDEN_VALUE(GC, 0, regGL2_PIPE_STEER_0, 0x13571357, 0x1c0000), + IMU_RLC_RAM_GOLDEN_VALUE(GC, 0, regGL2_PIPE_STEER_1, 0x64206420, 0x1c0000), + IMU_RLC_RAM_GOLDEN_VALUE(GC, 0, regGL2_PIPE_STEER_2, 0x2460246, 0x1c0000), + IMU_RLC_RAM_GOLDEN_VALUE(GC, 0, regGL2_PIPE_STEER_3, 0x75317531, 0x1c0000), + IMU_RLC_RAM_GOLDEN_VALUE(GC, 0, regGL2C_CTRL3, 0xc0d41183, 0x1c0000), + IMU_RLC_RAM_GOLDEN_VALUE(GC, 0, regSDMA0_CHICKEN_BITS, 0x507d1c0, 0x1c0000), + IMU_RLC_RAM_GOLDEN_VALUE(GC, 0, regSDMA1_CHICKEN_BITS, 0x507d1c0, 0x1c0000), + IMU_RLC_RAM_GOLDEN_VALUE(GC, 0, regCP_RB_WPTR_POLL_CNTL, 0x600100, 0x1c0000), + IMU_RLC_RAM_GOLDEN_VALUE(GC, 0, regGC_EA_CPWD_SDP_CREDITS, 0x3f7fff, 0x1c0000), + IMU_RLC_RAM_GOLDEN_VALUE(GC, 0, regGC_EA_SE_SDP_CREDITS, 0x3f7ebf, 0x1c0000), + IMU_RLC_RAM_GOLDEN_VALUE(GC, 0, regGC_EA_CPWD_SDP_TAG_RESERVE0, 0x2e00000, 0x1c0000), + IMU_RLC_RAM_GOLDEN_VALUE(GC, 0, regGC_EA_CPWD_SDP_TAG_RESERVE1, 0x1a078, 0x1c0000), + IMU_RLC_RAM_GOLDEN_VALUE(GC, 0, regGC_EA_CPWD_SDP_TAG_RESERVE2, 0x0, 0x1c0000), + IMU_RLC_RAM_GOLDEN_VALUE(GC, 0, regGC_EA_SE_SDP_TAG_RESERVE0, 0x0, 0x1c0000), + IMU_RLC_RAM_GOLDEN_VALUE(GC, 0, regGC_EA_SE_SDP_TAG_RESERVE1, 0x12030, 0x1c0000), + IMU_RLC_RAM_GOLDEN_VALUE(GC, 0, regGC_EA_SE_SDP_TAG_RESERVE2, 0x0, 0x1c0000), + IMU_RLC_RAM_GOLDEN_VALUE(GC, 0, regGC_EA_CPWD_SDP_VCC_RESERVE0, 0x19041000, 0x1c0000), + IMU_RLC_RAM_GOLDEN_VALUE(GC, 0, regGC_EA_CPWD_SDP_VCC_RESERVE1, 0x80000000, 0x1c0000), + IMU_RLC_RAM_GOLDEN_VALUE(GC, 0, regGC_EA_SE_SDP_VCC_RESERVE0, 0x1e080000, 0x1c0000), + IMU_RLC_RAM_GOLDEN_VALUE(GC, 0, regGC_EA_SE_SDP_VCC_RESERVE1, 0x80000000, 0x1c0000), + IMU_RLC_RAM_GOLDEN_VALUE(GC, 0, regGC_EA_CPWD_SDP_PRIORITY, 0x880, 0x1c0000), + IMU_RLC_RAM_GOLDEN_VALUE(GC, 0, regGC_EA_SE_SDP_PRIORITY, 0x8880, 0x1c0000), + IMU_RLC_RAM_GOLDEN_VALUE(GC, 0, regGC_EA_CPWD_SDP_ARB_FINAL, 0x17, 0x1c0000), + IMU_RLC_RAM_GOLDEN_VALUE(GC, 0, regGC_EA_SE_SDP_ARB_FINAL, 0x77, 0x1c0000), + IMU_RLC_RAM_GOLDEN_VALUE(GC, 0, regGC_EA_CPWD_SDP_ENABLE, 0x00000001, 0x1c0000), + IMU_RLC_RAM_GOLDEN_VALUE(GC, 0, regGC_EA_SE_SDP_ENABLE, 0x00000001, 0x1c0000), + IMU_RLC_RAM_GOLDEN_VALUE(GC, 0, regGCVM_L2_PROTECTION_FAULT_CNTL2, 0x20000, 0x1c0000), + IMU_RLC_RAM_GOLDEN_VALUE(GC, 0, regGCMC_VM_APT_CNTL, 0x0c, 0x1c0000), + IMU_RLC_RAM_GOLDEN_VALUE(GC, 0, regGCMC_VM_CACHEABLE_DRAM_ADDRESS_END, 0xfffff, 0x1c0000), + IMU_RLC_RAM_GOLDEN_VALUE(GC, 0, regGC_EA_CPWD_MISC, 0x0091, 0x1c0000), + IMU_RLC_RAM_GOLDEN_VALUE(GC, 0, regGC_EA_SE_MISC, 0x0091, 0x1c0000), + IMU_RLC_RAM_GOLDEN_VALUE(GC, 0, regGRBM_GFX_INDEX, 0xe0000000, 0x1c0000), + IMU_RLC_RAM_GOLDEN_VALUE(GC, 0, regGCR_GENERAL_CNTL, 0x00008500, 0x1c0000), + IMU_RLC_RAM_GOLDEN_VALUE(GC, 0, regPA_CL_ENHANCE, 0x00880007, 0x1c0000), + IMU_RLC_RAM_GOLDEN_VALUE(GC, 0, regTD_CNTL, 0x00000001, 0x1c0000), + IMU_RLC_RAM_GOLDEN_VALUE(GC, 0, regGRBM_GFX_INDEX, 0x00000000, 0x1c0000), + IMU_RLC_RAM_GOLDEN_VALUE(GC, 0, regRMI_GENERAL_CNTL, 0x01e00000, 0x1c0000), + IMU_RLC_RAM_GOLDEN_VALUE(GC, 0, regGRBM_GFX_INDEX, 0x00000001, 0x1c0000), + IMU_RLC_RAM_GOLDEN_VALUE(GC, 0, regRMI_GENERAL_CNTL, 0x01e00000, 0x1c0000), + IMU_RLC_RAM_GOLDEN_VALUE(GC, 0, regGRBM_GFX_INDEX, 0x00000100, 0x1c0000), + IMU_RLC_RAM_GOLDEN_VALUE(GC, 0, regRMI_GENERAL_CNTL, 0x01e00000, 0x1c0000), + IMU_RLC_RAM_GOLDEN_VALUE(GC, 0, regGRBM_GFX_INDEX, 0x00000101, 0x1c0000), + IMU_RLC_RAM_GOLDEN_VALUE(GC, 0, regRMI_GENERAL_CNTL, 0x01e00000, 0x1c0000), + IMU_RLC_RAM_GOLDEN_VALUE(GC, 0, regGRBM_GFX_INDEX, 0xe0000000, 0x1c0000), + IMU_RLC_RAM_GOLDEN_VALUE(GC, 0, regGB_ADDR_CONFIG, 0x08200545, 0x1c0000), + IMU_RLC_RAM_GOLDEN_VALUE(GC, 0, regGRBMH_CP_PERFMON_CNTL, 0x00000000, 0x1c0000), + IMU_RLC_RAM_GOLDEN_VALUE(GC, 0, regCB_PERFCOUNTER0_SELECT1, 0x000fffff, 0x1c0000), + IMU_RLC_RAM_GOLDEN_VALUE(GC, 0, regCP_DEBUG_2, 0x00020000, 0x1c0000), + IMU_RLC_RAM_GOLDEN_VALUE(GC, 0, regCP_CPC_DEBUG, 0x00500010, 0x1c0000), + IMU_RLC_RAM_GOLDEN_VALUE(GC, 0, regGCMC_VM_MX_L1_TLB_CNTL, 0x00000500, 0x1c0000), + IMU_RLC_RAM_GOLDEN_VALUE(GC, 0, regGCMC_VM_SYSTEM_APERTURE_LOW_ADDR, 0x00000001, 0x1c0000), + IMU_RLC_RAM_GOLDEN_VALUE(GC, 0, regGCMC_VM_SYSTEM_APERTURE_HIGH_ADDR, 0x00000000, 0x1c0000), + IMU_RLC_RAM_GOLDEN_VALUE(GC, 0, regGCMC_VM_LOCAL_FB_ADDRESS_START, 0x00000000, 0x1c0000), + IMU_RLC_RAM_GOLDEN_VALUE(GC, 0, regGCMC_VM_LOCAL_FB_ADDRESS_END, 0x0000000f, 0x1c0000), + IMU_RLC_RAM_GOLDEN_VALUE(GC, 0, regGCMC_VM_FB_LOCATION_BASE, 0x00006000, 0x1c0000), + IMU_RLC_RAM_GOLDEN_VALUE(GC, 0, regGCMC_VM_FB_LOCATION_TOP, 0x0000600f, 0x1c0000), + IMU_RLC_RAM_GOLDEN_VALUE(GC, 0, regGCVM_CONTEXT0_CNTL, 0x00000000, 0x1c0000), + IMU_RLC_RAM_GOLDEN_VALUE(GC, 0, regGCVM_CONTEXT1_CNTL, 0x00000000, 0x1c0000), + IMU_RLC_RAM_GOLDEN_VALUE(GC, 0, regGCMC_VM_NB_TOP_OF_DRAM_SLOT1, 0xff800000, 0xe0000000), + IMU_RLC_RAM_GOLDEN_VALUE(GC, 0, regGCMC_VM_NB_LOWER_TOP_OF_DRAM2, 0x00000001, 0x1c0000), + IMU_RLC_RAM_GOLDEN_VALUE(GC, 0, regGCMC_VM_NB_UPPER_TOP_OF_DRAM2, 0x0000ffff, 0x1c0000), + IMU_RLC_RAM_GOLDEN_VALUE(GC, 0, regGCMC_VM_AGP_BASE, 0x00000000, 0x1c0000), + IMU_RLC_RAM_GOLDEN_VALUE(GC, 0, regGCMC_VM_AGP_BOT, 0x00000002, 0x1c0000), + IMU_RLC_RAM_GOLDEN_VALUE(GC, 0, regGCMC_VM_AGP_TOP, 0x00000000, 0x1c0000), + IMU_RLC_RAM_GOLDEN_VALUE(GC, 0, regGCVM_L2_PROTECTION_FAULT_CNTL, 0x00001ffc, 0x1c0000), + IMU_RLC_RAM_GOLDEN_VALUE(GC, 0, regGCMC_VM_MX_L1_TLB_CNTL, 0x00000551, 0x1c0000), + IMU_RLC_RAM_GOLDEN_VALUE(GC, 0, regGCVM_L2_CNTL, 0x00080603, 0x1c0000), + IMU_RLC_RAM_GOLDEN_VALUE(GC, 0, regGCVM_L2_CNTL2, 0x00000003, 0x1c0000), + IMU_RLC_RAM_GOLDEN_VALUE(GC, 0, regGCVM_L2_CNTL3, 0x00100003, 0x1c0000), + IMU_RLC_RAM_GOLDEN_VALUE(GC, 0, regGCVM_L2_CNTL5, 0x00003fe0, 0x1c0000), + IMU_RLC_RAM_GOLDEN_VALUE(GC, 0, regGCMC_VM_SYSTEM_APERTURE_LOW_ADDR, 0x0003d000, 0x1c0000), + IMU_RLC_RAM_GOLDEN_VALUE(GC, 0, regGCMC_VM_SYSTEM_APERTURE_HIGH_ADDR, 0x0003d7ff, 0x1c0000), + IMU_RLC_RAM_GOLDEN_VALUE(GC, 0, regGCMC_VM_SYSTEM_APERTURE_DEFAULT_ADDR_LSB, 0, 0x1c0000), + IMU_RLC_RAM_GOLDEN_VALUE(GC, 0, regGCMC_VM_SYSTEM_APERTURE_DEFAULT_ADDR_MSB, 0, 0x1c0000) +}; + +static void program_imu_rlc_ram_old(struct amdgpu_device *adev, + const struct imu_rlc_ram_golden *regs, + const u32 array_size) +{ + const struct imu_rlc_ram_golden *entry; + u32 reg, data; + int i; + + for (i = 0; i < array_size; ++i) { + entry = ®s[i]; + reg = adev->reg_offset[entry->hwip][entry->instance][entry->segment] + entry->reg; + reg |= entry->addr_mask; + data = entry->data; + if (entry->reg == regGCMC_VM_AGP_BASE) + data = 0x00ffffff; + else if (entry->reg == regGCMC_VM_AGP_TOP) + data = 0x0; + else if (entry->reg == regGCMC_VM_FB_LOCATION_BASE) + data = adev->gmc.vram_start >> 24; + else if (entry->reg == regGCMC_VM_FB_LOCATION_TOP) + data = adev->gmc.vram_end >> 24; + + WREG32_SOC15(GC, 0, regGFX_IMU_RLC_RAM_ADDR_HIGH, 0); + WREG32_SOC15(GC, 0, regGFX_IMU_RLC_RAM_ADDR_LOW, reg); + WREG32_SOC15(GC, 0, regGFX_IMU_RLC_RAM_DATA, data); + } +} + +static u32 imu_v12_0_grbm_gfx_index_remap(struct amdgpu_device *adev, + u32 data, bool high) +{ + u32 val, inst_index; + + inst_index = REG_GET_FIELD(data, GRBM_GFX_INDEX, INSTANCE_INDEX); + + if (high) + val = inst_index >> 5; + else + val = REG_GET_FIELD(data, GRBM_GFX_INDEX, SE_BROADCAST_WRITES) << 18 | + REG_GET_FIELD(data, GRBM_GFX_INDEX, SA_BROADCAST_WRITES) << 19 | + REG_GET_FIELD(data, GRBM_GFX_INDEX, INSTANCE_BROADCAST_WRITES) << 20 | + REG_GET_FIELD(data, GRBM_GFX_INDEX, SE_INDEX) << 21 | + REG_GET_FIELD(data, GRBM_GFX_INDEX, SA_INDEX) << 25 | + (inst_index & 0x1f); + + return val; +} + +static u32 imu_v12_init_gfxhub_settings(struct amdgpu_device *adev, + u32 reg, u32 data) +{ + if (reg == SOC15_REG_OFFSET(GC, 0, regGCMC_VM_FB_LOCATION_BASE)) + return RREG32_SOC15(MMHUB, 0, regMMMC_VM_FB_LOCATION_BASE); + else if (reg == SOC15_REG_OFFSET(GC, 0, regGCMC_VM_FB_LOCATION_TOP)) + return RREG32_SOC15(MMHUB, 0, regMMMC_VM_FB_LOCATION_TOP); + else if (reg == SOC15_REG_OFFSET(GC, 0, regGCMC_VM_FB_OFFSET)) + return RREG32_SOC15(MMHUB, 0, regMMMC_VM_FB_OFFSET); + else if (reg == SOC15_REG_OFFSET(GC, 0, regGCMC_VM_AGP_BASE)) + return RREG32_SOC15(MMHUB, 0, regMMMC_VM_AGP_BASE); + else if (reg == SOC15_REG_OFFSET(GC, 0, regGCMC_VM_AGP_BOT)) + return RREG32_SOC15(MMHUB, 0, regMMMC_VM_AGP_BOT); + else if (reg == SOC15_REG_OFFSET(GC, 0, regGCMC_VM_AGP_TOP)) + return RREG32_SOC15(MMHUB, 0, regMMMC_VM_AGP_TOP); + else if (reg == SOC15_REG_OFFSET(GC, 0, regGCMC_VM_MX_L1_TLB_CNTL)) + return RREG32_SOC15(MMHUB, 0, regMMMC_VM_MX_L1_TLB_CNTL); + else if (reg == SOC15_REG_OFFSET(GC, 0, regGCMC_VM_SYSTEM_APERTURE_LOW_ADDR)) + return RREG32_SOC15(MMHUB, 0, regMMMC_VM_SYSTEM_APERTURE_LOW_ADDR); + else if (reg == SOC15_REG_OFFSET(GC, 0, regGCMC_VM_SYSTEM_APERTURE_HIGH_ADDR)) + return RREG32_SOC15(MMHUB, 0, regMMMC_VM_SYSTEM_APERTURE_HIGH_ADDR); + else if (reg == SOC15_REG_OFFSET(GC, 0, regGCMC_VM_LOCAL_FB_ADDRESS_START)) + return RREG32_SOC15(MMHUB, 0, regMMMC_VM_LOCAL_FB_ADDRESS_START); + else if (reg == SOC15_REG_OFFSET(GC, 0, regGCMC_VM_LOCAL_FB_ADDRESS_END)) + return RREG32_SOC15(MMHUB, 0, regMMMC_VM_LOCAL_FB_ADDRESS_END); + else if (reg == SOC15_REG_OFFSET(GC, 0, regGCMC_VM_LOCAL_SYSMEM_ADDRESS_START)) + return RREG32_SOC15(MMHUB, 0, regMMMC_VM_LOCAL_SYSMEM_ADDRESS_START); + else if (reg == SOC15_REG_OFFSET(GC, 0, regGCMC_VM_LOCAL_SYSMEM_ADDRESS_END)) + return RREG32_SOC15(MMHUB, 0, regMMMC_VM_LOCAL_SYSMEM_ADDRESS_END); + else if (reg == SOC15_REG_OFFSET(GC, 0, regGCMC_VM_SYSTEM_APERTURE_DEFAULT_ADDR_LSB)) + return RREG32_SOC15(MMHUB, 0, regMMMC_VM_SYSTEM_APERTURE_DEFAULT_ADDR_LSB); + else if (reg == SOC15_REG_OFFSET(GC, 0, regGCMC_VM_SYSTEM_APERTURE_DEFAULT_ADDR_MSB)) + return RREG32_SOC15(MMHUB, 0, regMMMC_VM_SYSTEM_APERTURE_DEFAULT_ADDR_MSB); + else + return data; +} + +static void program_imu_rlc_ram(struct amdgpu_device *adev, + const u32 *regs, + const u32 array_size) +{ + u32 reg, data, val_h = 0, val_l = TRANSFER_RAM_MASK; + int i; + + if (array_size % 3) + return; + + for (i = 0; i < array_size; i += 3) { + reg = regs[i + 0]; + data = regs[i + 2]; + data = imu_v12_init_gfxhub_settings(adev, reg, data); + if (reg == SOC15_REG_OFFSET(GC, 0, regGRBM_GFX_INDEX)) { + val_l = imu_v12_0_grbm_gfx_index_remap(adev, data, false); + val_h = imu_v12_0_grbm_gfx_index_remap(adev, data, true); + } else { + WREG32_SOC15(GC, 0, regGFX_IMU_RLC_RAM_ADDR_HIGH, val_h); + WREG32_SOC15(GC, 0, regGFX_IMU_RLC_RAM_ADDR_LOW, reg | val_l); + WREG32_SOC15(GC, 0, regGFX_IMU_RLC_RAM_DATA, data); + } + } +} + +static void imu_v12_0_program_rlc_ram(struct amdgpu_device *adev) +{ + u32 reg_data, size = 0; + const u32 *data; + int r = -EINVAL; + + WREG32_SOC15(GC, 0, regGFX_IMU_RLC_RAM_INDEX, 0x2); + + switch (amdgpu_ip_version(adev, GC_HWIP, 0)) { + case IP_VERSION(12, 0, 0): + case IP_VERSION(12, 0, 1): + if (!r) + program_imu_rlc_ram(adev, data, (const u32)size); + else + program_imu_rlc_ram_old(adev, imu_rlc_ram_golden_12_0_1, + (const u32)ARRAY_SIZE(imu_rlc_ram_golden_12_0_1)); + break; + default: + BUG(); + break; + } + + //Indicate the latest entry + WREG32_SOC15(GC, 0, regGFX_IMU_RLC_RAM_ADDR_HIGH, 0); + WREG32_SOC15(GC, 0, regGFX_IMU_RLC_RAM_ADDR_LOW, 0); + WREG32_SOC15(GC, 0, regGFX_IMU_RLC_RAM_DATA, 0); + + reg_data = RREG32_SOC15(GC, 0, regGFX_IMU_RLC_RAM_INDEX); + reg_data |= GFX_IMU_RLC_RAM_INDEX__RAM_VALID_MASK; + WREG32_SOC15(GC, 0, regGFX_IMU_RLC_RAM_INDEX, reg_data); +} + +const struct amdgpu_imu_funcs gfx_v12_0_imu_funcs = { + .init_microcode = imu_v12_0_init_microcode, + .load_microcode = imu_v12_0_load_microcode, + .setup_imu = imu_v12_0_setup, + .start_imu = imu_v12_0_start, + .program_rlc_ram = imu_v12_0_program_rlc_ram, + .wait_for_reset_status = imu_v12_0_wait_for_reset_status, +}; diff --git a/drivers/gpu/drm/amd/amdgpu/imu_v12_0.h b/drivers/gpu/drm/amd/amdgpu/imu_v12_0.h new file mode 100644 index 000000000000..a1f50cb1aeab --- /dev/null +++ b/drivers/gpu/drm/amd/amdgpu/imu_v12_0.h @@ -0,0 +1,30 @@ +/* + * Copyright 2021 Advanced Micro Devices, Inc. + * + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and associated documentation files (the "Software"), + * to deal in the Software without restriction, including without limitation + * the rights to use, copy, modify, merge, publish, distribute, sublicense, + * and/or sell copies of the Software, and to permit persons to whom the + * Software is furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included in + * all copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL + * THE COPYRIGHT HOLDER(S) OR AUTHOR(S) BE LIABLE FOR ANY CLAIM, DAMAGES OR + * OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, + * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR + * OTHER DEALINGS IN THE SOFTWARE. + * + */ + +#ifndef __IMU_V12_0_H__ +#define __IMU_V12_0_H__ + +extern const struct amdgpu_imu_funcs gfx_v12_0_imu_funcs; + +#endif + diff --git a/drivers/gpu/drm/amd/amdgpu/isp_v4_1_0.c b/drivers/gpu/drm/amd/amdgpu/isp_v4_1_0.c new file mode 100644 index 000000000000..aac107898bae --- /dev/null +++ b/drivers/gpu/drm/amd/amdgpu/isp_v4_1_0.c @@ -0,0 +1,137 @@ +/* SPDX-License-Identifier: MIT */ +/* + * Copyright (C) 2024 Advanced Micro Devices, Inc. All rights reserved. + * All Rights Reserved. + * + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and associated documentation files (the + * "Software"), to deal in the Software without restriction, including + * without limitation the rights to use, copy, modify, merge, publish, + * distribute, sub license, and/or sell copies of the Software, and to + * permit persons to whom the Software is furnished to do so, subject to + * the following conditions: + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT. IN NO EVENT SHALL + * THE COPYRIGHT HOLDERS, AUTHORS AND/OR ITS SUPPLIERS BE LIABLE FOR ANY CLAIM, + * DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR + * OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE + * USE OR OTHER DEALINGS IN THE SOFTWARE. + * + * The above copyright notice and this permission notice (including the + * next paragraph) shall be included in all copies or substantial portions + * of the Software. + * + */ + +#include "amdgpu.h" +#include "isp_v4_1_0.h" + +static const unsigned int isp_4_1_0_int_srcid[MAX_ISP410_INT_SRC] = { + ISP_4_1__SRCID__ISP_RINGBUFFER_WPT9, + ISP_4_1__SRCID__ISP_RINGBUFFER_WPT10, + ISP_4_1__SRCID__ISP_RINGBUFFER_WPT11, + ISP_4_1__SRCID__ISP_RINGBUFFER_WPT12, + ISP_4_1__SRCID__ISP_RINGBUFFER_WPT13, + ISP_4_1__SRCID__ISP_RINGBUFFER_WPT14, + ISP_4_1__SRCID__ISP_RINGBUFFER_WPT15, + ISP_4_1__SRCID__ISP_RINGBUFFER_WPT16 +}; + +static int isp_v4_1_0_hw_init(struct amdgpu_isp *isp) +{ + struct amdgpu_device *adev = isp->adev; + u64 isp_base; + int int_idx; + int r; + + if (adev->rmmio_size == 0 || adev->rmmio_size < 0x5289) + return -EINVAL; + + isp_base = adev->rmmio_base; + + isp->isp_cell = kcalloc(1, sizeof(struct mfd_cell), GFP_KERNEL); + if (!isp->isp_cell) { + r = -ENOMEM; + DRM_ERROR("%s: isp mfd cell alloc failed\n", __func__); + goto failure; + } + + isp->isp_res = kcalloc(MAX_ISP410_INT_SRC + 1, sizeof(struct resource), + GFP_KERNEL); + if (!isp->isp_res) { + r = -ENOMEM; + DRM_ERROR("%s: isp mfd res alloc failed\n", __func__); + goto failure; + } + + isp->isp_pdata = kzalloc(sizeof(*isp->isp_pdata), GFP_KERNEL); + if (!isp->isp_pdata) { + r = -ENOMEM; + DRM_ERROR("%s: isp platform data alloc failed\n", __func__); + goto failure; + } + + /* initialize isp platform data */ + isp->isp_pdata->adev = (void *)adev; + isp->isp_pdata->asic_type = adev->asic_type; + isp->isp_pdata->base_rmmio_size = adev->rmmio_size; + + isp->isp_res[0].name = "isp_4_1_0_reg"; + isp->isp_res[0].flags = IORESOURCE_MEM; + isp->isp_res[0].start = isp_base; + isp->isp_res[0].end = isp_base + ISP_REGS_OFFSET_END; + + for (int_idx = 0; int_idx < MAX_ISP410_INT_SRC; int_idx++) { + isp->isp_res[int_idx + 1].name = "isp_4_1_0_irq"; + isp->isp_res[int_idx + 1].flags = IORESOURCE_IRQ; + isp->isp_res[int_idx + 1].start = + amdgpu_irq_create_mapping(adev, isp_4_1_0_int_srcid[int_idx]); + isp->isp_res[int_idx + 1].end = + isp->isp_res[int_idx + 1].start; + } + + isp->isp_cell[0].name = "amd_isp_capture"; + isp->isp_cell[0].num_resources = MAX_ISP410_INT_SRC + 1; + isp->isp_cell[0].resources = &isp->isp_res[0]; + isp->isp_cell[0].platform_data = isp->isp_pdata; + isp->isp_cell[0].pdata_size = sizeof(struct isp_platform_data); + + r = mfd_add_hotplug_devices(isp->parent, isp->isp_cell, 1); + if (r) { + DRM_ERROR("%s: add mfd hotplug device failed\n", __func__); + goto failure; + } + + return 0; + +failure: + + kfree(isp->isp_pdata); + kfree(isp->isp_res); + kfree(isp->isp_cell); + + return r; +} + +static int isp_v4_1_0_hw_fini(struct amdgpu_isp *isp) +{ + mfd_remove_devices(isp->parent); + + kfree(isp->isp_res); + kfree(isp->isp_cell); + kfree(isp->isp_pdata); + + return 0; +} + +static const struct isp_funcs isp_v4_1_0_funcs = { + .hw_init = isp_v4_1_0_hw_init, + .hw_fini = isp_v4_1_0_hw_fini, +}; + +void isp_v4_1_0_set_isp_funcs(struct amdgpu_isp *isp) +{ + isp->funcs = &isp_v4_1_0_funcs; +} diff --git a/drivers/gpu/drm/amd/amdgpu/isp_v4_1_0.h b/drivers/gpu/drm/amd/amdgpu/isp_v4_1_0.h new file mode 100644 index 000000000000..315f2822410c --- /dev/null +++ b/drivers/gpu/drm/amd/amdgpu/isp_v4_1_0.h @@ -0,0 +1,39 @@ +/* SPDX-License-Identifier: MIT */ +/* + * Copyright (C) 2024 Advanced Micro Devices, Inc. All rights reserved. + * All Rights Reserved. + * + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and associated documentation files (the + * "Software"), to deal in the Software without restriction, including + * without limitation the rights to use, copy, modify, merge, publish, + * distribute, sub license, and/or sell copies of the Software, and to + * permit persons to whom the Software is furnished to do so, subject to + * the following conditions: + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT. IN NO EVENT SHALL + * THE COPYRIGHT HOLDERS, AUTHORS AND/OR ITS SUPPLIERS BE LIABLE FOR ANY CLAIM, + * DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR + * OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE + * USE OR OTHER DEALINGS IN THE SOFTWARE. + * + * The above copyright notice and this permission notice (including the + * next paragraph) shall be included in all copies or substantial portions + * of the Software. + * + */ + +#ifndef __ISP_V4_1_0_H__ +#define __ISP_V4_1_0_H__ + +#include "amdgpu_isp.h" + +#include "ivsrcid/isp/irqsrcs_isp_4_1.h" + +#define MAX_ISP410_INT_SRC 8 + +void isp_v4_1_0_set_isp_funcs(struct amdgpu_isp *isp); + +#endif diff --git a/drivers/gpu/drm/amd/amdgpu/isp_v4_1_1.c b/drivers/gpu/drm/amd/amdgpu/isp_v4_1_1.c new file mode 100644 index 000000000000..4e17fa03f7b5 --- /dev/null +++ b/drivers/gpu/drm/amd/amdgpu/isp_v4_1_1.c @@ -0,0 +1,137 @@ +/* SPDX-License-Identifier: MIT */ +/* + * Copyright (C) 2024 Advanced Micro Devices, Inc. All rights reserved. + * All Rights Reserved. + * + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and associated documentation files (the + * "Software"), to deal in the Software without restriction, including + * without limitation the rights to use, copy, modify, merge, publish, + * distribute, sub license, and/or sell copies of the Software, and to + * permit persons to whom the Software is furnished to do so, subject to + * the following conditions: + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT. IN NO EVENT SHALL + * THE COPYRIGHT HOLDERS, AUTHORS AND/OR ITS SUPPLIERS BE LIABLE FOR ANY CLAIM, + * DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR + * OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE + * USE OR OTHER DEALINGS IN THE SOFTWARE. + * + * The above copyright notice and this permission notice (including the + * next paragraph) shall be included in all copies or substantial portions + * of the Software. + * + */ + +#include "amdgpu.h" +#include "isp_v4_1_1.h" + +static const unsigned int isp_4_1_1_int_srcid[MAX_ISP411_INT_SRC] = { + ISP_4_1__SRCID__ISP_RINGBUFFER_WPT9, + ISP_4_1__SRCID__ISP_RINGBUFFER_WPT10, + ISP_4_1__SRCID__ISP_RINGBUFFER_WPT11, + ISP_4_1__SRCID__ISP_RINGBUFFER_WPT12, + ISP_4_1__SRCID__ISP_RINGBUFFER_WPT13, + ISP_4_1__SRCID__ISP_RINGBUFFER_WPT14, + ISP_4_1__SRCID__ISP_RINGBUFFER_WPT15, + ISP_4_1__SRCID__ISP_RINGBUFFER_WPT16 +}; + +static int isp_v4_1_1_hw_init(struct amdgpu_isp *isp) +{ + struct amdgpu_device *adev = isp->adev; + u64 isp_base; + int int_idx; + int r; + + if (adev->rmmio_size == 0 || adev->rmmio_size < 0x5289) + return -EINVAL; + + isp_base = adev->rmmio_base; + + isp->isp_cell = kcalloc(1, sizeof(struct mfd_cell), GFP_KERNEL); + if (!isp->isp_cell) { + r = -ENOMEM; + DRM_ERROR("%s: isp mfd cell alloc failed\n", __func__); + goto failure; + } + + isp->isp_res = kcalloc(MAX_ISP411_INT_SRC + 1, sizeof(struct resource), + GFP_KERNEL); + if (!isp->isp_res) { + r = -ENOMEM; + DRM_ERROR("%s: isp mfd res alloc failed\n", __func__); + goto failure; + } + + isp->isp_pdata = kzalloc(sizeof(*isp->isp_pdata), GFP_KERNEL); + if (!isp->isp_pdata) { + r = -ENOMEM; + DRM_ERROR("%s: isp platform data alloc failed\n", __func__); + goto failure; + } + + /* initialize isp platform data */ + isp->isp_pdata->adev = (void *)adev; + isp->isp_pdata->asic_type = adev->asic_type; + isp->isp_pdata->base_rmmio_size = adev->rmmio_size; + + isp->isp_res[0].name = "isp_4_1_1_reg"; + isp->isp_res[0].flags = IORESOURCE_MEM; + isp->isp_res[0].start = isp_base; + isp->isp_res[0].end = isp_base + ISP_REGS_OFFSET_END; + + for (int_idx = 0; int_idx < MAX_ISP411_INT_SRC; int_idx++) { + isp->isp_res[int_idx + 1].name = "isp_4_1_1_irq"; + isp->isp_res[int_idx + 1].flags = IORESOURCE_IRQ; + isp->isp_res[int_idx + 1].start = + amdgpu_irq_create_mapping(adev, isp_4_1_1_int_srcid[int_idx]); + isp->isp_res[int_idx + 1].end = + isp->isp_res[int_idx + 1].start; + } + + isp->isp_cell[0].name = "amd_isp_capture"; + isp->isp_cell[0].num_resources = MAX_ISP411_INT_SRC + 1; + isp->isp_cell[0].resources = &isp->isp_res[0]; + isp->isp_cell[0].platform_data = isp->isp_pdata; + isp->isp_cell[0].pdata_size = sizeof(struct isp_platform_data); + + r = mfd_add_hotplug_devices(isp->parent, isp->isp_cell, 1); + if (r) { + DRM_ERROR("%s: add mfd hotplug device failed\n", __func__); + goto failure; + } + + return 0; + +failure: + + kfree(isp->isp_pdata); + kfree(isp->isp_res); + kfree(isp->isp_cell); + + return r; +} + +static int isp_v4_1_1_hw_fini(struct amdgpu_isp *isp) +{ + mfd_remove_devices(isp->parent); + + kfree(isp->isp_res); + kfree(isp->isp_cell); + kfree(isp->isp_pdata); + + return 0; +} + +static const struct isp_funcs isp_v4_1_1_funcs = { + .hw_init = isp_v4_1_1_hw_init, + .hw_fini = isp_v4_1_1_hw_fini, +}; + +void isp_v4_1_1_set_isp_funcs(struct amdgpu_isp *isp) +{ + isp->funcs = &isp_v4_1_1_funcs; +} diff --git a/drivers/gpu/drm/amd/amdgpu/isp_v4_1_1.h b/drivers/gpu/drm/amd/amdgpu/isp_v4_1_1.h new file mode 100644 index 000000000000..dfb9522c9d6a --- /dev/null +++ b/drivers/gpu/drm/amd/amdgpu/isp_v4_1_1.h @@ -0,0 +1,39 @@ +/* SPDX-License-Identifier: MIT */ +/* + * Copyright (C) 2024 Advanced Micro Devices, Inc. All rights reserved. + * All Rights Reserved. + * + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and associated documentation files (the + * "Software"), to deal in the Software without restriction, including + * without limitation the rights to use, copy, modify, merge, publish, + * distribute, sub license, and/or sell copies of the Software, and to + * permit persons to whom the Software is furnished to do so, subject to + * the following conditions: + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT. IN NO EVENT SHALL + * THE COPYRIGHT HOLDERS, AUTHORS AND/OR ITS SUPPLIERS BE LIABLE FOR ANY CLAIM, + * DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR + * OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE + * USE OR OTHER DEALINGS IN THE SOFTWARE. + * + * The above copyright notice and this permission notice (including the + * next paragraph) shall be included in all copies or substantial portions + * of the Software. + * + */ + +#ifndef __ISP_V4_1_1_H__ +#define __ISP_V4_1_1_H__ + +#include "amdgpu_isp.h" + +#include "ivsrcid/isp/irqsrcs_isp_4_1.h" + +#define MAX_ISP411_INT_SRC 8 + +void isp_v4_1_1_set_isp_funcs(struct amdgpu_isp *isp); + +#endif diff --git a/drivers/gpu/drm/amd/amdgpu/jpeg_v1_0.c b/drivers/gpu/drm/amd/amdgpu/jpeg_v1_0.c index 77595e9622da..71f43a5c7f72 100644 --- a/drivers/gpu/drm/amd/amdgpu/jpeg_v1_0.c +++ b/drivers/gpu/drm/amd/amdgpu/jpeg_v1_0.c @@ -581,7 +581,6 @@ static const struct amdgpu_ring_funcs jpeg_v1_0_decode_ring_vm_funcs = { static void jpeg_v1_0_set_dec_ring_funcs(struct amdgpu_device *adev) { adev->jpeg.inst->ring_dec->funcs = &jpeg_v1_0_decode_ring_vm_funcs; - DRM_INFO("JPEG decode is enabled in VM mode\n"); } static const struct amdgpu_irq_src_funcs jpeg_v1_0_irq_funcs = { diff --git a/drivers/gpu/drm/amd/amdgpu/jpeg_v2_0.c b/drivers/gpu/drm/amd/amdgpu/jpeg_v2_0.c index 1c8116d75f63..99adf3625657 100644 --- a/drivers/gpu/drm/amd/amdgpu/jpeg_v2_0.c +++ b/drivers/gpu/drm/amd/amdgpu/jpeg_v2_0.c @@ -131,16 +131,11 @@ static int jpeg_v2_0_hw_init(void *handle) { struct amdgpu_device *adev = (struct amdgpu_device *)handle; struct amdgpu_ring *ring = adev->jpeg.inst->ring_dec; - int r; adev->nbio.funcs->vcn_doorbell_range(adev, ring->use_doorbell, (adev->doorbell_index.vcn.vcn_ring0_1 << 1), 0); - r = amdgpu_ring_test_helper(ring); - if (!r) - DRM_INFO("JPEG decode initialized successfully.\n"); - - return r; + return amdgpu_ring_test_helper(ring); } /** @@ -759,6 +754,8 @@ static const struct amd_ip_funcs jpeg_v2_0_ip_funcs = { .post_soft_reset = NULL, .set_clockgating_state = jpeg_v2_0_set_clockgating_state, .set_powergating_state = jpeg_v2_0_set_powergating_state, + .dump_ip_state = NULL, + .print_ip_state = NULL, }; static const struct amdgpu_ring_funcs jpeg_v2_0_dec_ring_vm_funcs = { @@ -793,7 +790,6 @@ static const struct amdgpu_ring_funcs jpeg_v2_0_dec_ring_vm_funcs = { static void jpeg_v2_0_set_dec_ring_funcs(struct amdgpu_device *adev) { adev->jpeg.inst->ring_dec->funcs = &jpeg_v2_0_dec_ring_vm_funcs; - DRM_INFO("JPEG decode is enabled in VM mode\n"); } static const struct amdgpu_irq_src_funcs jpeg_v2_0_irq_funcs = { diff --git a/drivers/gpu/drm/amd/amdgpu/jpeg_v2_5.c b/drivers/gpu/drm/amd/amdgpu/jpeg_v2_5.c index 99cd49ee8ef6..d8ef95c847c2 100644 --- a/drivers/gpu/drm/amd/amdgpu/jpeg_v2_5.c +++ b/drivers/gpu/drm/amd/amdgpu/jpeg_v2_5.c @@ -196,8 +196,6 @@ static int jpeg_v2_5_hw_init(void *handle) return r; } - DRM_INFO("JPEG decode initialized successfully.\n"); - return 0; } @@ -632,6 +630,8 @@ static const struct amd_ip_funcs jpeg_v2_5_ip_funcs = { .post_soft_reset = NULL, .set_clockgating_state = jpeg_v2_5_set_clockgating_state, .set_powergating_state = jpeg_v2_5_set_powergating_state, + .dump_ip_state = NULL, + .print_ip_state = NULL, }; static const struct amd_ip_funcs jpeg_v2_6_ip_funcs = { @@ -652,6 +652,8 @@ static const struct amd_ip_funcs jpeg_v2_6_ip_funcs = { .post_soft_reset = NULL, .set_clockgating_state = jpeg_v2_5_set_clockgating_state, .set_powergating_state = jpeg_v2_5_set_powergating_state, + .dump_ip_state = NULL, + .print_ip_state = NULL, }; static const struct amdgpu_ring_funcs jpeg_v2_5_dec_ring_vm_funcs = { @@ -724,7 +726,6 @@ static void jpeg_v2_5_set_dec_ring_funcs(struct amdgpu_device *adev) else /* CHIP_ALDEBARAN */ adev->jpeg.inst[i].ring_dec->funcs = &jpeg_v2_6_dec_ring_vm_funcs; adev->jpeg.inst[i].ring_dec->me = i; - DRM_INFO("JPEG(%d) JPEG decode is enabled in VM mode\n", i); } } diff --git a/drivers/gpu/drm/amd/amdgpu/jpeg_v3_0.c b/drivers/gpu/drm/amd/amdgpu/jpeg_v3_0.c index a92481da60cd..31cfa3ce6528 100644 --- a/drivers/gpu/drm/amd/amdgpu/jpeg_v3_0.c +++ b/drivers/gpu/drm/amd/amdgpu/jpeg_v3_0.c @@ -146,18 +146,11 @@ static int jpeg_v3_0_hw_init(void *handle) { struct amdgpu_device *adev = (struct amdgpu_device *)handle; struct amdgpu_ring *ring = adev->jpeg.inst->ring_dec; - int r; adev->nbio.funcs->vcn_doorbell_range(adev, ring->use_doorbell, (adev->doorbell_index.vcn.vcn_ring0_1 << 1), 0); - r = amdgpu_ring_test_helper(ring); - if (r) - return r; - - DRM_INFO("JPEG decode initialized successfully.\n"); - - return 0; + return amdgpu_ring_test_helper(ring); } /** @@ -557,6 +550,8 @@ static const struct amd_ip_funcs jpeg_v3_0_ip_funcs = { .post_soft_reset = NULL, .set_clockgating_state = jpeg_v3_0_set_clockgating_state, .set_powergating_state = jpeg_v3_0_set_powergating_state, + .dump_ip_state = NULL, + .print_ip_state = NULL, }; static const struct amdgpu_ring_funcs jpeg_v3_0_dec_ring_vm_funcs = { @@ -591,7 +586,6 @@ static const struct amdgpu_ring_funcs jpeg_v3_0_dec_ring_vm_funcs = { static void jpeg_v3_0_set_dec_ring_funcs(struct amdgpu_device *adev) { adev->jpeg.inst->ring_dec->funcs = &jpeg_v3_0_dec_ring_vm_funcs; - DRM_INFO("JPEG decode is enabled in VM mode\n"); } static const struct amdgpu_irq_src_funcs jpeg_v3_0_irq_funcs = { diff --git a/drivers/gpu/drm/amd/amdgpu/jpeg_v4_0.c b/drivers/gpu/drm/amd/amdgpu/jpeg_v4_0.c index 88ea58d5c4ab..3dac8f259d7f 100644 --- a/drivers/gpu/drm/amd/amdgpu/jpeg_v4_0.c +++ b/drivers/gpu/drm/amd/amdgpu/jpeg_v4_0.c @@ -181,8 +181,6 @@ static int jpeg_v4_0_hw_init(void *handle) return r; } - DRM_DEV_INFO(adev->dev, "JPEG decode initialized successfully.\n"); - return 0; } @@ -719,6 +717,8 @@ static const struct amd_ip_funcs jpeg_v4_0_ip_funcs = { .post_soft_reset = NULL, .set_clockgating_state = jpeg_v4_0_set_clockgating_state, .set_powergating_state = jpeg_v4_0_set_powergating_state, + .dump_ip_state = NULL, + .print_ip_state = NULL, }; static const struct amdgpu_ring_funcs jpeg_v4_0_dec_ring_vm_funcs = { @@ -753,7 +753,6 @@ static const struct amdgpu_ring_funcs jpeg_v4_0_dec_ring_vm_funcs = { static void jpeg_v4_0_set_dec_ring_funcs(struct amdgpu_device *adev) { adev->jpeg.inst->ring_dec->funcs = &jpeg_v4_0_dec_ring_vm_funcs; - DRM_DEV_INFO(adev->dev, "JPEG decode is enabled in VM mode\n"); } static const struct amdgpu_irq_src_funcs jpeg_v4_0_irq_funcs = { diff --git a/drivers/gpu/drm/amd/amdgpu/jpeg_v4_0_3.c b/drivers/gpu/drm/amd/amdgpu/jpeg_v4_0_3.c index 32caeb37cef9..ad524ddc9760 100644 --- a/drivers/gpu/drm/amd/amdgpu/jpeg_v4_0_3.c +++ b/drivers/gpu/drm/amd/amdgpu/jpeg_v4_0_3.c @@ -32,6 +32,9 @@ #include "vcn/vcn_4_0_3_sh_mask.h" #include "ivsrcid/vcn/irqsrcs_vcn_4_0.h" +#define NORMALIZE_JPEG_REG_OFFSET(offset) \ + (offset & 0x1FFFF) + enum jpeg_engin_status { UVD_PGFSM_STATUS__UVDJ_PWR_ON = 0, UVD_PGFSM_STATUS__UVDJ_PWR_OFF = 2, @@ -341,7 +344,6 @@ static int jpeg_v4_0_3_hw_init(void *handle) } } } - DRM_DEV_INFO(adev->dev, "JPEG decode initialized successfully.\n"); return 0; } @@ -622,6 +624,13 @@ static uint64_t jpeg_v4_0_3_dec_ring_get_wptr(struct amdgpu_ring *ring) ring->pipe ? (0x40 * ring->pipe - 0xc80) : 0); } +static void jpeg_v4_0_3_ring_emit_hdp_flush(struct amdgpu_ring *ring) +{ + /* JPEG engine access for HDP flush doesn't work when RRMT is enabled. + * This is a workaround to avoid any HDP flush through JPEG ring. + */ +} + /** * jpeg_v4_0_3_dec_ring_set_wptr - set write pointer * @@ -818,7 +827,13 @@ void jpeg_v4_0_3_dec_ring_emit_ib(struct amdgpu_ring *ring, void jpeg_v4_0_3_dec_ring_emit_reg_wait(struct amdgpu_ring *ring, uint32_t reg, uint32_t val, uint32_t mask) { - uint32_t reg_offset = (reg << 2); + uint32_t reg_offset; + + /* For VF, only local offsets should be used */ + if (amdgpu_sriov_vf(ring->adev)) + reg = NORMALIZE_JPEG_REG_OFFSET(reg); + + reg_offset = (reg << 2); amdgpu_ring_write(ring, PACKETJ(regUVD_JRBC_RB_COND_RD_TIMER_INTERNAL_OFFSET, 0, 0, PACKETJ_TYPE0)); @@ -859,7 +874,13 @@ void jpeg_v4_0_3_dec_ring_emit_vm_flush(struct amdgpu_ring *ring, void jpeg_v4_0_3_dec_ring_emit_wreg(struct amdgpu_ring *ring, uint32_t reg, uint32_t val) { - uint32_t reg_offset = (reg << 2); + uint32_t reg_offset; + + /* For VF, only local offsets should be used */ + if (amdgpu_sriov_vf(ring->adev)) + reg = NORMALIZE_JPEG_REG_OFFSET(reg); + + reg_offset = (reg << 2); amdgpu_ring_write(ring, PACKETJ(regUVD_JRBC_EXTERNAL_REG_INTERNAL_OFFSET, 0, 0, PACKETJ_TYPE0)); @@ -1053,6 +1074,8 @@ static const struct amd_ip_funcs jpeg_v4_0_3_ip_funcs = { .post_soft_reset = NULL, .set_clockgating_state = jpeg_v4_0_3_set_clockgating_state, .set_powergating_state = jpeg_v4_0_3_set_powergating_state, + .dump_ip_state = NULL, + .print_ip_state = NULL, }; static const struct amdgpu_ring_funcs jpeg_v4_0_3_dec_ring_vm_funcs = { @@ -1071,6 +1094,7 @@ static const struct amdgpu_ring_funcs jpeg_v4_0_3_dec_ring_vm_funcs = { .emit_ib = jpeg_v4_0_3_dec_ring_emit_ib, .emit_fence = jpeg_v4_0_3_dec_ring_emit_fence, .emit_vm_flush = jpeg_v4_0_3_dec_ring_emit_vm_flush, + .emit_hdp_flush = jpeg_v4_0_3_ring_emit_hdp_flush, .test_ring = amdgpu_jpeg_dec_ring_test_ring, .test_ib = amdgpu_jpeg_dec_ring_test_ib, .insert_nop = jpeg_v4_0_3_dec_ring_nop, @@ -1098,7 +1122,6 @@ static void jpeg_v4_0_3_set_dec_ring_funcs(struct amdgpu_device *adev) adev->jpeg.inst[i].aid_id = jpeg_inst / adev->jpeg.num_inst_per_aid; } - DRM_DEV_INFO(adev->dev, "JPEG decode is enabled in VM mode\n"); } static const struct amdgpu_irq_src_funcs jpeg_v4_0_3_irq_funcs = { diff --git a/drivers/gpu/drm/amd/amdgpu/jpeg_v4_0_5.c b/drivers/gpu/drm/amd/amdgpu/jpeg_v4_0_5.c index edf5bcdd2bc9..f96ac6bce526 100644 --- a/drivers/gpu/drm/amd/amdgpu/jpeg_v4_0_5.c +++ b/drivers/gpu/drm/amd/amdgpu/jpeg_v4_0_5.c @@ -187,11 +187,10 @@ static int jpeg_v4_0_5_hw_init(void *handle) { struct amdgpu_device *adev = (struct amdgpu_device *)handle; struct amdgpu_ring *ring; - int r, i; + int i, r = 0; // TODO: Enable ring test with DPG support if (adev->pg_flags & AMD_PG_SUPPORT_JPEG_DPG) { - DRM_DEV_INFO(adev->dev, "JPEG decode initialized successfully under DPG Mode"); return 0; } @@ -205,9 +204,6 @@ static int jpeg_v4_0_5_hw_init(void *handle) return r; } - if (!r) - DRM_INFO("JPEG decode initialized successfully under SPG Mode\n"); - return 0; } @@ -762,6 +758,8 @@ static const struct amd_ip_funcs jpeg_v4_0_5_ip_funcs = { .post_soft_reset = NULL, .set_clockgating_state = jpeg_v4_0_5_set_clockgating_state, .set_powergating_state = jpeg_v4_0_5_set_powergating_state, + .dump_ip_state = NULL, + .print_ip_state = NULL, }; static const struct amdgpu_ring_funcs jpeg_v4_0_5_dec_ring_vm_funcs = { @@ -803,7 +801,6 @@ static void jpeg_v4_0_5_set_dec_ring_funcs(struct amdgpu_device *adev) adev->jpeg.inst[i].ring_dec->funcs = &jpeg_v4_0_5_dec_ring_vm_funcs; adev->jpeg.inst[i].ring_dec->me = i; - DRM_DEV_INFO(adev->dev, "JPEG%d decode is enabled in VM mode\n", i); } } diff --git a/drivers/gpu/drm/amd/amdgpu/jpeg_v5_0_0.c b/drivers/gpu/drm/amd/amdgpu/jpeg_v5_0_0.c index e70200f97555..d694a276498a 100644 --- a/drivers/gpu/drm/amd/amdgpu/jpeg_v5_0_0.c +++ b/drivers/gpu/drm/amd/amdgpu/jpeg_v5_0_0.c @@ -31,6 +31,7 @@ #include "vcn/vcn_5_0_0_offset.h" #include "vcn/vcn_5_0_0_sh_mask.h" #include "ivsrcid/vcn/irqsrcs_vcn_4_0.h" +#include "jpeg_v5_0_0.h" static void jpeg_v5_0_0_set_dec_ring_funcs(struct amdgpu_device *adev); static void jpeg_v5_0_0_set_irq_funcs(struct amdgpu_device *adev); @@ -137,16 +138,14 @@ static int jpeg_v5_0_0_hw_init(void *handle) adev->nbio.funcs->vcn_doorbell_range(adev, ring->use_doorbell, (adev->doorbell_index.vcn.vcn_ring0_1 << 1), 0); - WREG32_SOC15(VCN, 0, regVCN_JPEG_DB_CTRL, - ring->doorbell_index << VCN_JPEG_DB_CTRL__OFFSET__SHIFT | - VCN_JPEG_DB_CTRL__EN_MASK); + /* Skip ring test because pause DPG is not implemented. */ + if (adev->pg_flags & AMD_PG_SUPPORT_JPEG_DPG) + return 0; r = amdgpu_ring_test_helper(ring); if (r) return r; - DRM_DEV_INFO(adev->dev, "JPEG decode initialized successfully.\n"); - return 0; } @@ -241,7 +240,7 @@ static void jpeg_v5_0_0_enable_clock_gating(struct amdgpu_device *adev) WREG32_SOC15(JPEG, 0, regJPEG_CGC_GATE, data); } -static int jpeg_v5_0_0_disable_static_power_gating(struct amdgpu_device *adev) +static int jpeg_v5_0_0_disable_power_gating(struct amdgpu_device *adev) { uint32_t data = 0; @@ -254,14 +253,10 @@ static int jpeg_v5_0_0_disable_static_power_gating(struct amdgpu_device *adev) WREG32_P(SOC15_REG_OFFSET(JPEG, 0, regUVD_JPEG_POWER_STATUS), 0, ~UVD_JPEG_POWER_STATUS__JPEG_POWER_STATUS_MASK); - /* keep the JPEG in static PG mode */ - WREG32_P(SOC15_REG_OFFSET(JPEG, 0, regUVD_JPEG_POWER_STATUS), 0, - ~UVD_JPEG_POWER_STATUS__JPEG_PG_MODE_MASK); - return 0; } -static int jpeg_v5_0_0_enable_static_power_gating(struct amdgpu_device *adev) +static int jpeg_v5_0_0_enable_power_gating(struct amdgpu_device *adev) { /* enable anti hang mechanism */ WREG32_P(SOC15_REG_OFFSET(JPEG, 0, regUVD_JPEG_POWER_STATUS), @@ -279,6 +274,121 @@ static int jpeg_v5_0_0_enable_static_power_gating(struct amdgpu_device *adev) return 0; } +static void jpeg_engine_5_0_0_dpg_clock_gating_mode(struct amdgpu_device *adev, + int inst_idx, uint8_t indirect) +{ + uint32_t data = 0; + + // JPEG disable CGC + if (adev->cg_flags & AMD_CG_SUPPORT_JPEG_MGCG) + data = 1 << JPEG_CGC_CTRL__DYN_CLOCK_MODE__SHIFT; + else + data = 0 << JPEG_CGC_CTRL__DYN_CLOCK_MODE__SHIFT; + + data |= 1 << JPEG_CGC_CTRL__CLK_GATE_DLY_TIMER__SHIFT; + data |= 4 << JPEG_CGC_CTRL__CLK_OFF_DELAY__SHIFT; + + if (indirect) { + ADD_SOC24_JPEG_TO_DPG_SRAM(inst_idx, vcnipJPEG_CGC_CTRL, data, indirect); + + // Turn on All JPEG clocks + data = 0; + ADD_SOC24_JPEG_TO_DPG_SRAM(inst_idx, vcnipJPEG_CGC_GATE, data, indirect); + } else { + WREG32_SOC24_JPEG_DPG_MODE(inst_idx, vcnipJPEG_CGC_CTRL, data, indirect); + + // Turn on All JPEG clocks + data = 0; + WREG32_SOC24_JPEG_DPG_MODE(inst_idx, vcnipJPEG_CGC_GATE, data, indirect); + } +} + +/** + * jpeg_v5_0_0_start_dpg_mode - Jpeg start with dpg mode + * + * @adev: amdgpu_device pointer + * @inst_idx: instance number index + * @indirect: indirectly write sram + * + * Start JPEG block with dpg mode + */ +static int jpeg_v5_0_0_start_dpg_mode(struct amdgpu_device *adev, int inst_idx, bool indirect) +{ + struct amdgpu_ring *ring = adev->jpeg.inst[inst_idx].ring_dec; + uint32_t reg_data = 0; + + jpeg_v5_0_0_enable_power_gating(adev); + + // enable dynamic power gating mode + reg_data = RREG32_SOC15(JPEG, inst_idx, regUVD_JPEG_POWER_STATUS); + reg_data |= UVD_JPEG_POWER_STATUS__JPEG_PG_MODE_MASK; + WREG32_SOC15(JPEG, inst_idx, regUVD_JPEG_POWER_STATUS, reg_data); + + if (indirect) + adev->jpeg.inst[inst_idx].dpg_sram_curr_addr = + (uint32_t *)adev->jpeg.inst[inst_idx].dpg_sram_cpu_addr; + + jpeg_engine_5_0_0_dpg_clock_gating_mode(adev, inst_idx, indirect); + + /* MJPEG global tiling registers */ + if (indirect) + ADD_SOC24_JPEG_TO_DPG_SRAM(inst_idx, vcnipJPEG_DEC_GFX10_ADDR_CONFIG, + adev->gfx.config.gb_addr_config, indirect); + else + WREG32_SOC24_JPEG_DPG_MODE(inst_idx, vcnipJPEG_DEC_GFX10_ADDR_CONFIG, + adev->gfx.config.gb_addr_config, 1); + + /* enable System Interrupt for JRBC */ + if (indirect) + ADD_SOC24_JPEG_TO_DPG_SRAM(inst_idx, vcnipJPEG_SYS_INT_EN, + JPEG_SYS_INT_EN__DJRBC0_MASK, indirect); + else + WREG32_SOC24_JPEG_DPG_MODE(inst_idx, vcnipJPEG_SYS_INT_EN, + JPEG_SYS_INT_EN__DJRBC0_MASK, 1); + + if (indirect) { + /* add nop to workaround PSP size check */ + ADD_SOC24_JPEG_TO_DPG_SRAM(inst_idx, vcnipUVD_NO_OP, 0, indirect); + + amdgpu_jpeg_psp_update_sram(adev, inst_idx, 0); + } + + WREG32_SOC15(VCN, 0, regVCN_JPEG_DB_CTRL, + ring->doorbell_index << VCN_JPEG_DB_CTRL__OFFSET__SHIFT | + VCN_JPEG_DB_CTRL__EN_MASK); + + WREG32_SOC15(JPEG, inst_idx, regUVD_LMI_JRBC_RB_VMID, 0); + WREG32_SOC15(JPEG, inst_idx, regUVD_JRBC_RB_CNTL, (0x00000001L | 0x00000002L)); + WREG32_SOC15(JPEG, inst_idx, regUVD_LMI_JRBC_RB_64BIT_BAR_LOW, + lower_32_bits(ring->gpu_addr)); + WREG32_SOC15(JPEG, inst_idx, regUVD_LMI_JRBC_RB_64BIT_BAR_HIGH, + upper_32_bits(ring->gpu_addr)); + WREG32_SOC15(JPEG, inst_idx, regUVD_JRBC_RB_RPTR, 0); + WREG32_SOC15(JPEG, inst_idx, regUVD_JRBC_RB_WPTR, 0); + WREG32_SOC15(JPEG, inst_idx, regUVD_JRBC_RB_CNTL, 0x00000002L); + WREG32_SOC15(JPEG, inst_idx, regUVD_JRBC_RB_SIZE, ring->ring_size / 4); + ring->wptr = RREG32_SOC15(JPEG, inst_idx, regUVD_JRBC_RB_WPTR); + + return 0; +} + +/** + * jpeg_v5_0_0_stop_dpg_mode - Jpeg stop with dpg mode + * + * @adev: amdgpu_device pointer + * @inst_idx: instance number index + * + * Stop JPEG block with dpg mode + */ +static void jpeg_v5_0_0_stop_dpg_mode(struct amdgpu_device *adev, int inst_idx) +{ + uint32_t reg_data = 0; + + reg_data = RREG32_SOC15(JPEG, inst_idx, regUVD_JPEG_POWER_STATUS); + reg_data &= ~UVD_JPEG_POWER_STATUS__JPEG_PG_MODE_MASK; + WREG32_SOC15(JPEG, inst_idx, regUVD_JPEG_POWER_STATUS, reg_data); +} + /** * jpeg_v5_0_0_start - start JPEG block * @@ -294,8 +404,13 @@ static int jpeg_v5_0_0_start(struct amdgpu_device *adev) if (adev->pm.dpm_enabled) amdgpu_dpm_enable_jpeg(adev, true); + if (adev->pg_flags & AMD_PG_SUPPORT_JPEG_DPG) { + r = jpeg_v5_0_0_start_dpg_mode(adev, 0, adev->jpeg.indirect_sram); + return r; + } + /* disable power gating */ - r = jpeg_v5_0_0_disable_static_power_gating(adev); + r = jpeg_v5_0_0_disable_power_gating(adev); if (r) return r; @@ -306,7 +421,6 @@ static int jpeg_v5_0_0_start(struct amdgpu_device *adev) WREG32_SOC15(JPEG, 0, regJPEG_DEC_GFX10_ADDR_CONFIG, adev->gfx.config.gb_addr_config); - /* enable JMI channel */ WREG32_P(SOC15_REG_OFFSET(JPEG, 0, regUVD_JMI_CNTL), 0, ~UVD_JMI_CNTL__SOFT_RESET_MASK); @@ -316,6 +430,10 @@ static int jpeg_v5_0_0_start(struct amdgpu_device *adev) JPEG_SYS_INT_EN__DJRBC0_MASK, ~JPEG_SYS_INT_EN__DJRBC0_MASK); + WREG32_SOC15(VCN, 0, regVCN_JPEG_DB_CTRL, + ring->doorbell_index << VCN_JPEG_DB_CTRL__OFFSET__SHIFT | + VCN_JPEG_DB_CTRL__EN_MASK); + WREG32_SOC15(JPEG, 0, regUVD_LMI_JRBC_RB_VMID, 0); WREG32_SOC15(JPEG, 0, regUVD_JRBC_RB_CNTL, (0x00000001L | 0x00000002L)); WREG32_SOC15(JPEG, 0, regUVD_LMI_JRBC_RB_64BIT_BAR_LOW, @@ -342,17 +460,22 @@ static int jpeg_v5_0_0_stop(struct amdgpu_device *adev) { int r; - /* reset JMI */ - WREG32_P(SOC15_REG_OFFSET(JPEG, 0, regUVD_JMI_CNTL), - UVD_JMI_CNTL__SOFT_RESET_MASK, - ~UVD_JMI_CNTL__SOFT_RESET_MASK); + if (adev->pg_flags & AMD_PG_SUPPORT_JPEG_DPG) { + jpeg_v5_0_0_stop_dpg_mode(adev, 0); + } else { - jpeg_v5_0_0_enable_clock_gating(adev); + /* reset JMI */ + WREG32_P(SOC15_REG_OFFSET(JPEG, 0, regUVD_JMI_CNTL), + UVD_JMI_CNTL__SOFT_RESET_MASK, + ~UVD_JMI_CNTL__SOFT_RESET_MASK); - /* enable power gating */ - r = jpeg_v5_0_0_enable_static_power_gating(adev); - if (r) - return r; + jpeg_v5_0_0_enable_clock_gating(adev); + + /* enable power gating */ + r = jpeg_v5_0_0_enable_power_gating(adev); + if (r) + return r; + } if (adev->pm.dpm_enabled) amdgpu_dpm_enable_jpeg(adev, false); @@ -513,6 +636,8 @@ static const struct amd_ip_funcs jpeg_v5_0_0_ip_funcs = { .post_soft_reset = NULL, .set_clockgating_state = jpeg_v5_0_0_set_clockgating_state, .set_powergating_state = jpeg_v5_0_0_set_powergating_state, + .dump_ip_state = NULL, + .print_ip_state = NULL, }; static const struct amdgpu_ring_funcs jpeg_v5_0_0_dec_ring_vm_funcs = { @@ -547,7 +672,6 @@ static const struct amdgpu_ring_funcs jpeg_v5_0_0_dec_ring_vm_funcs = { static void jpeg_v5_0_0_set_dec_ring_funcs(struct amdgpu_device *adev) { adev->jpeg.inst->ring_dec->funcs = &jpeg_v5_0_0_dec_ring_vm_funcs; - DRM_DEV_INFO(adev->dev, "JPEG decode is enabled in VM mode\n"); } static const struct amdgpu_irq_src_funcs jpeg_v5_0_0_irq_funcs = { diff --git a/drivers/gpu/drm/amd/amdgpu/jpeg_v5_0_0.h b/drivers/gpu/drm/amd/amdgpu/jpeg_v5_0_0.h index bd348336b215..5abb96159814 100644 --- a/drivers/gpu/drm/amd/amdgpu/jpeg_v5_0_0.h +++ b/drivers/gpu/drm/amd/amdgpu/jpeg_v5_0_0.h @@ -24,6 +24,12 @@ #ifndef __JPEG_V5_0_0_H__ #define __JPEG_V5_0_0_H__ +#define vcnipJPEG_CGC_GATE 0x4160 +#define vcnipJPEG_CGC_CTRL 0x4161 +#define vcnipJPEG_SYS_INT_EN 0x4141 +#define vcnipUVD_NO_OP 0x0029 +#define vcnipJPEG_DEC_GFX10_ADDR_CONFIG 0x404A + extern const struct amdgpu_ip_block_version jpeg_v5_0_0_ip_block; #endif /* __JPEG_V5_0_0_H__ */ diff --git a/drivers/gpu/drm/amd/amdgpu/mes_v10_1.c b/drivers/gpu/drm/amd/amdgpu/mes_v10_1.c deleted file mode 100644 index 1e5ad1e08d2a..000000000000 --- a/drivers/gpu/drm/amd/amdgpu/mes_v10_1.c +++ /dev/null @@ -1,1187 +0,0 @@ -/* - * Copyright 2019 Advanced Micro Devices, Inc. - * - * Permission is hereby granted, free of charge, to any person obtaining a - * copy of this software and associated documentation files (the "Software"), - * to deal in the Software without restriction, including without limitation - * the rights to use, copy, modify, merge, publish, distribute, sublicense, - * and/or sell copies of the Software, and to permit persons to whom the - * Software is furnished to do so, subject to the following conditions: - * - * The above copyright notice and this permission notice shall be included in - * all copies or substantial portions of the Software. - * - * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR - * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, - * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL - * THE COPYRIGHT HOLDER(S) OR AUTHOR(S) BE LIABLE FOR ANY CLAIM, DAMAGES OR - * OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, - * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR - * OTHER DEALINGS IN THE SOFTWARE. - * - */ - -#include <linux/firmware.h> -#include <linux/module.h> -#include "amdgpu.h" -#include "soc15_common.h" -#include "nv.h" -#include "gc/gc_10_1_0_offset.h" -#include "gc/gc_10_1_0_sh_mask.h" -#include "gc/gc_10_1_0_default.h" -#include "v10_structs.h" -#include "mes_api_def.h" - -#define mmCP_MES_IC_OP_CNTL_Sienna_Cichlid 0x2820 -#define mmCP_MES_IC_OP_CNTL_Sienna_Cichlid_BASE_IDX 1 -#define mmRLC_CP_SCHEDULERS_Sienna_Cichlid 0x4ca1 -#define mmRLC_CP_SCHEDULERS_Sienna_Cichlid_BASE_IDX 1 - -MODULE_FIRMWARE("amdgpu/navi10_mes.bin"); -MODULE_FIRMWARE("amdgpu/sienna_cichlid_mes.bin"); -MODULE_FIRMWARE("amdgpu/sienna_cichlid_mes1.bin"); - -static int mes_v10_1_hw_fini(void *handle); -static int mes_v10_1_kiq_hw_init(struct amdgpu_device *adev); - -#define MES_EOP_SIZE 2048 - -static void mes_v10_1_ring_set_wptr(struct amdgpu_ring *ring) -{ - struct amdgpu_device *adev = ring->adev; - - if (ring->use_doorbell) { - atomic64_set((atomic64_t *)ring->wptr_cpu_addr, - ring->wptr); - WDOORBELL64(ring->doorbell_index, ring->wptr); - } else { - BUG(); - } -} - -static u64 mes_v10_1_ring_get_rptr(struct amdgpu_ring *ring) -{ - return *ring->rptr_cpu_addr; -} - -static u64 mes_v10_1_ring_get_wptr(struct amdgpu_ring *ring) -{ - u64 wptr; - - if (ring->use_doorbell) - wptr = atomic64_read((atomic64_t *)ring->wptr_cpu_addr); - else - BUG(); - return wptr; -} - -static const struct amdgpu_ring_funcs mes_v10_1_ring_funcs = { - .type = AMDGPU_RING_TYPE_MES, - .align_mask = 1, - .nop = 0, - .support_64bit_ptrs = true, - .get_rptr = mes_v10_1_ring_get_rptr, - .get_wptr = mes_v10_1_ring_get_wptr, - .set_wptr = mes_v10_1_ring_set_wptr, - .insert_nop = amdgpu_ring_insert_nop, -}; - -static int mes_v10_1_submit_pkt_and_poll_completion(struct amdgpu_mes *mes, - void *pkt, int size, - int api_status_off) -{ - int ndw = size / 4; - signed long r; - union MESAPI__ADD_QUEUE *x_pkt = pkt; - struct MES_API_STATUS *api_status; - struct amdgpu_device *adev = mes->adev; - struct amdgpu_ring *ring = &mes->ring; - unsigned long flags; - - BUG_ON(size % 4 != 0); - - spin_lock_irqsave(&mes->ring_lock, flags); - if (amdgpu_ring_alloc(ring, ndw)) { - spin_unlock_irqrestore(&mes->ring_lock, flags); - return -ENOMEM; - } - - api_status = (struct MES_API_STATUS *)((char *)pkt + api_status_off); - api_status->api_completion_fence_addr = mes->ring.fence_drv.gpu_addr; - api_status->api_completion_fence_value = ++mes->ring.fence_drv.sync_seq; - - amdgpu_ring_write_multiple(ring, pkt, ndw); - amdgpu_ring_commit(ring); - spin_unlock_irqrestore(&mes->ring_lock, flags); - - DRM_DEBUG("MES msg=%d was emitted\n", x_pkt->header.opcode); - - r = amdgpu_fence_wait_polling(ring, ring->fence_drv.sync_seq, - adev->usec_timeout); - if (r < 1) { - DRM_ERROR("MES failed to response msg=%d\n", - x_pkt->header.opcode); - - while (halt_if_hws_hang) - schedule(); - - return -ETIMEDOUT; - } - - return 0; -} - -static int convert_to_mes_queue_type(int queue_type) -{ - if (queue_type == AMDGPU_RING_TYPE_GFX) - return MES_QUEUE_TYPE_GFX; - else if (queue_type == AMDGPU_RING_TYPE_COMPUTE) - return MES_QUEUE_TYPE_COMPUTE; - else if (queue_type == AMDGPU_RING_TYPE_SDMA) - return MES_QUEUE_TYPE_SDMA; - else - BUG(); - return -1; -} - -static int mes_v10_1_add_hw_queue(struct amdgpu_mes *mes, - struct mes_add_queue_input *input) -{ - struct amdgpu_device *adev = mes->adev; - union MESAPI__ADD_QUEUE mes_add_queue_pkt; - struct amdgpu_vmhub *hub = &adev->vmhub[AMDGPU_GFXHUB(0)]; - uint32_t vm_cntx_cntl = hub->vm_cntx_cntl; - - memset(&mes_add_queue_pkt, 0, sizeof(mes_add_queue_pkt)); - - mes_add_queue_pkt.header.type = MES_API_TYPE_SCHEDULER; - mes_add_queue_pkt.header.opcode = MES_SCH_API_ADD_QUEUE; - mes_add_queue_pkt.header.dwsize = API_FRAME_SIZE_IN_DWORDS; - - mes_add_queue_pkt.process_id = input->process_id; - mes_add_queue_pkt.page_table_base_addr = input->page_table_base_addr; - mes_add_queue_pkt.process_va_start = input->process_va_start; - mes_add_queue_pkt.process_va_end = input->process_va_end; - mes_add_queue_pkt.process_quantum = input->process_quantum; - mes_add_queue_pkt.process_context_addr = input->process_context_addr; - mes_add_queue_pkt.gang_quantum = input->gang_quantum; - mes_add_queue_pkt.gang_context_addr = input->gang_context_addr; - mes_add_queue_pkt.inprocess_gang_priority = - input->inprocess_gang_priority; - mes_add_queue_pkt.gang_global_priority_level = - input->gang_global_priority_level; - mes_add_queue_pkt.doorbell_offset = input->doorbell_offset; - mes_add_queue_pkt.mqd_addr = input->mqd_addr; - mes_add_queue_pkt.wptr_addr = input->wptr_addr; - mes_add_queue_pkt.queue_type = - convert_to_mes_queue_type(input->queue_type); - mes_add_queue_pkt.paging = input->paging; - mes_add_queue_pkt.vm_context_cntl = vm_cntx_cntl; - mes_add_queue_pkt.gws_base = input->gws_base; - mes_add_queue_pkt.gws_size = input->gws_size; - mes_add_queue_pkt.trap_handler_addr = input->tba_addr; - - return mes_v10_1_submit_pkt_and_poll_completion(mes, - &mes_add_queue_pkt, sizeof(mes_add_queue_pkt), - offsetof(union MESAPI__ADD_QUEUE, api_status)); -} - -static int mes_v10_1_remove_hw_queue(struct amdgpu_mes *mes, - struct mes_remove_queue_input *input) -{ - union MESAPI__REMOVE_QUEUE mes_remove_queue_pkt; - - memset(&mes_remove_queue_pkt, 0, sizeof(mes_remove_queue_pkt)); - - mes_remove_queue_pkt.header.type = MES_API_TYPE_SCHEDULER; - mes_remove_queue_pkt.header.opcode = MES_SCH_API_REMOVE_QUEUE; - mes_remove_queue_pkt.header.dwsize = API_FRAME_SIZE_IN_DWORDS; - - mes_remove_queue_pkt.doorbell_offset = input->doorbell_offset; - mes_remove_queue_pkt.gang_context_addr = input->gang_context_addr; - - return mes_v10_1_submit_pkt_and_poll_completion(mes, - &mes_remove_queue_pkt, sizeof(mes_remove_queue_pkt), - offsetof(union MESAPI__REMOVE_QUEUE, api_status)); -} - -static int mes_v10_1_unmap_legacy_queue(struct amdgpu_mes *mes, - struct mes_unmap_legacy_queue_input *input) -{ - union MESAPI__REMOVE_QUEUE mes_remove_queue_pkt; - - memset(&mes_remove_queue_pkt, 0, sizeof(mes_remove_queue_pkt)); - - mes_remove_queue_pkt.header.type = MES_API_TYPE_SCHEDULER; - mes_remove_queue_pkt.header.opcode = MES_SCH_API_REMOVE_QUEUE; - mes_remove_queue_pkt.header.dwsize = API_FRAME_SIZE_IN_DWORDS; - - mes_remove_queue_pkt.doorbell_offset = input->doorbell_offset; - mes_remove_queue_pkt.gang_context_addr = 0; - - mes_remove_queue_pkt.pipe_id = input->pipe_id; - mes_remove_queue_pkt.queue_id = input->queue_id; - - if (input->action == PREEMPT_QUEUES_NO_UNMAP) { - mes_remove_queue_pkt.preempt_legacy_gfx_queue = 1; - mes_remove_queue_pkt.tf_addr = input->trail_fence_addr; - mes_remove_queue_pkt.tf_data = - lower_32_bits(input->trail_fence_data); - } else { - if (input->queue_type == AMDGPU_RING_TYPE_GFX) - mes_remove_queue_pkt.unmap_legacy_gfx_queue = 1; - else - mes_remove_queue_pkt.unmap_kiq_utility_queue = 1; - } - - return mes_v10_1_submit_pkt_and_poll_completion(mes, - &mes_remove_queue_pkt, sizeof(mes_remove_queue_pkt), - offsetof(union MESAPI__REMOVE_QUEUE, api_status)); -} - -static int mes_v10_1_suspend_gang(struct amdgpu_mes *mes, - struct mes_suspend_gang_input *input) -{ - return 0; -} - -static int mes_v10_1_resume_gang(struct amdgpu_mes *mes, - struct mes_resume_gang_input *input) -{ - return 0; -} - -static int mes_v10_1_query_sched_status(struct amdgpu_mes *mes) -{ - union MESAPI__QUERY_MES_STATUS mes_status_pkt; - - memset(&mes_status_pkt, 0, sizeof(mes_status_pkt)); - - mes_status_pkt.header.type = MES_API_TYPE_SCHEDULER; - mes_status_pkt.header.opcode = MES_SCH_API_QUERY_SCHEDULER_STATUS; - mes_status_pkt.header.dwsize = API_FRAME_SIZE_IN_DWORDS; - - return mes_v10_1_submit_pkt_and_poll_completion(mes, - &mes_status_pkt, sizeof(mes_status_pkt), - offsetof(union MESAPI__QUERY_MES_STATUS, api_status)); -} - -static int mes_v10_1_set_hw_resources(struct amdgpu_mes *mes) -{ - int i; - struct amdgpu_device *adev = mes->adev; - union MESAPI_SET_HW_RESOURCES mes_set_hw_res_pkt; - - memset(&mes_set_hw_res_pkt, 0, sizeof(mes_set_hw_res_pkt)); - - mes_set_hw_res_pkt.header.type = MES_API_TYPE_SCHEDULER; - mes_set_hw_res_pkt.header.opcode = MES_SCH_API_SET_HW_RSRC; - mes_set_hw_res_pkt.header.dwsize = API_FRAME_SIZE_IN_DWORDS; - - mes_set_hw_res_pkt.vmid_mask_mmhub = mes->vmid_mask_mmhub; - mes_set_hw_res_pkt.vmid_mask_gfxhub = mes->vmid_mask_gfxhub; - mes_set_hw_res_pkt.gds_size = adev->gds.gds_size; - mes_set_hw_res_pkt.paging_vmid = 0; - mes_set_hw_res_pkt.g_sch_ctx_gpu_mc_ptr = mes->sch_ctx_gpu_addr; - mes_set_hw_res_pkt.query_status_fence_gpu_mc_ptr = - mes->query_status_fence_gpu_addr; - - for (i = 0; i < MAX_COMPUTE_PIPES; i++) - mes_set_hw_res_pkt.compute_hqd_mask[i] = - mes->compute_hqd_mask[i]; - - for (i = 0; i < MAX_GFX_PIPES; i++) - mes_set_hw_res_pkt.gfx_hqd_mask[i] = mes->gfx_hqd_mask[i]; - - for (i = 0; i < MAX_SDMA_PIPES; i++) - mes_set_hw_res_pkt.sdma_hqd_mask[i] = mes->sdma_hqd_mask[i]; - - for (i = 0; i < AMD_PRIORITY_NUM_LEVELS; i++) - mes_set_hw_res_pkt.aggregated_doorbells[i] = - mes->aggregated_doorbells[i]; - - for (i = 0; i < 5; i++) { - mes_set_hw_res_pkt.gc_base[i] = adev->reg_offset[GC_HWIP][0][i]; - mes_set_hw_res_pkt.mmhub_base[i] = - adev->reg_offset[MMHUB_HWIP][0][i]; - mes_set_hw_res_pkt.osssys_base[i] = - adev->reg_offset[OSSSYS_HWIP][0][i]; - } - - mes_set_hw_res_pkt.disable_reset = 1; - mes_set_hw_res_pkt.disable_mes_log = 1; - mes_set_hw_res_pkt.use_different_vmid_compute = 1; - - return mes_v10_1_submit_pkt_and_poll_completion(mes, - &mes_set_hw_res_pkt, sizeof(mes_set_hw_res_pkt), - offsetof(union MESAPI_SET_HW_RESOURCES, api_status)); -} - -static void mes_v10_1_init_aggregated_doorbell(struct amdgpu_mes *mes) -{ - struct amdgpu_device *adev = mes->adev; - uint32_t data; - - data = RREG32_SOC15(GC, 0, mmCP_MES_DOORBELL_CONTROL1); - data &= ~(CP_MES_DOORBELL_CONTROL1__DOORBELL_OFFSET_MASK | - CP_MES_DOORBELL_CONTROL1__DOORBELL_EN_MASK | - CP_MES_DOORBELL_CONTROL1__DOORBELL_HIT_MASK); - data |= mes->aggregated_doorbells[AMDGPU_MES_PRIORITY_LEVEL_LOW] << - CP_MES_DOORBELL_CONTROL1__DOORBELL_OFFSET__SHIFT; - data |= 1 << CP_MES_DOORBELL_CONTROL1__DOORBELL_EN__SHIFT; - WREG32_SOC15(GC, 0, mmCP_MES_DOORBELL_CONTROL1, data); - - data = RREG32_SOC15(GC, 0, mmCP_MES_DOORBELL_CONTROL2); - data &= ~(CP_MES_DOORBELL_CONTROL2__DOORBELL_OFFSET_MASK | - CP_MES_DOORBELL_CONTROL2__DOORBELL_EN_MASK | - CP_MES_DOORBELL_CONTROL2__DOORBELL_HIT_MASK); - data |= mes->aggregated_doorbells[AMDGPU_MES_PRIORITY_LEVEL_NORMAL] << - CP_MES_DOORBELL_CONTROL2__DOORBELL_OFFSET__SHIFT; - data |= 1 << CP_MES_DOORBELL_CONTROL2__DOORBELL_EN__SHIFT; - WREG32_SOC15(GC, 0, mmCP_MES_DOORBELL_CONTROL2, data); - - data = RREG32_SOC15(GC, 0, mmCP_MES_DOORBELL_CONTROL3); - data &= ~(CP_MES_DOORBELL_CONTROL3__DOORBELL_OFFSET_MASK | - CP_MES_DOORBELL_CONTROL3__DOORBELL_EN_MASK | - CP_MES_DOORBELL_CONTROL3__DOORBELL_HIT_MASK); - data |= mes->aggregated_doorbells[AMDGPU_MES_PRIORITY_LEVEL_MEDIUM] << - CP_MES_DOORBELL_CONTROL3__DOORBELL_OFFSET__SHIFT; - data |= 1 << CP_MES_DOORBELL_CONTROL3__DOORBELL_EN__SHIFT; - WREG32_SOC15(GC, 0, mmCP_MES_DOORBELL_CONTROL3, data); - - data = RREG32_SOC15(GC, 0, mmCP_MES_DOORBELL_CONTROL4); - data &= ~(CP_MES_DOORBELL_CONTROL4__DOORBELL_OFFSET_MASK | - CP_MES_DOORBELL_CONTROL4__DOORBELL_EN_MASK | - CP_MES_DOORBELL_CONTROL4__DOORBELL_HIT_MASK); - data |= mes->aggregated_doorbells[AMDGPU_MES_PRIORITY_LEVEL_HIGH] << - CP_MES_DOORBELL_CONTROL4__DOORBELL_OFFSET__SHIFT; - data |= 1 << CP_MES_DOORBELL_CONTROL4__DOORBELL_EN__SHIFT; - WREG32_SOC15(GC, 0, mmCP_MES_DOORBELL_CONTROL4, data); - - data = RREG32_SOC15(GC, 0, mmCP_MES_DOORBELL_CONTROL5); - data &= ~(CP_MES_DOORBELL_CONTROL5__DOORBELL_OFFSET_MASK | - CP_MES_DOORBELL_CONTROL5__DOORBELL_EN_MASK | - CP_MES_DOORBELL_CONTROL5__DOORBELL_HIT_MASK); - data |= mes->aggregated_doorbells[AMDGPU_MES_PRIORITY_LEVEL_REALTIME] << - CP_MES_DOORBELL_CONTROL5__DOORBELL_OFFSET__SHIFT; - data |= 1 << CP_MES_DOORBELL_CONTROL5__DOORBELL_EN__SHIFT; - WREG32_SOC15(GC, 0, mmCP_MES_DOORBELL_CONTROL5, data); - - data = 1 << CP_HQD_GFX_CONTROL__DB_UPDATED_MSG_EN__SHIFT; - WREG32_SOC15(GC, 0, mmCP_HQD_GFX_CONTROL, data); -} - -static const struct amdgpu_mes_funcs mes_v10_1_funcs = { - .add_hw_queue = mes_v10_1_add_hw_queue, - .remove_hw_queue = mes_v10_1_remove_hw_queue, - .unmap_legacy_queue = mes_v10_1_unmap_legacy_queue, - .suspend_gang = mes_v10_1_suspend_gang, - .resume_gang = mes_v10_1_resume_gang, -}; - -static int mes_v10_1_allocate_ucode_buffer(struct amdgpu_device *adev, - enum admgpu_mes_pipe pipe) -{ - int r; - const struct mes_firmware_header_v1_0 *mes_hdr; - const __le32 *fw_data; - unsigned fw_size; - - mes_hdr = (const struct mes_firmware_header_v1_0 *) - adev->mes.fw[pipe]->data; - - fw_data = (const __le32 *)(adev->mes.fw[pipe]->data + - le32_to_cpu(mes_hdr->mes_ucode_offset_bytes)); - fw_size = le32_to_cpu(mes_hdr->mes_ucode_size_bytes); - - r = amdgpu_bo_create_reserved(adev, fw_size, - PAGE_SIZE, AMDGPU_GEM_DOMAIN_GTT, - &adev->mes.ucode_fw_obj[pipe], - &adev->mes.ucode_fw_gpu_addr[pipe], - (void **)&adev->mes.ucode_fw_ptr[pipe]); - if (r) { - dev_err(adev->dev, "(%d) failed to create mes fw bo\n", r); - return r; - } - - memcpy(adev->mes.ucode_fw_ptr[pipe], fw_data, fw_size); - - amdgpu_bo_kunmap(adev->mes.ucode_fw_obj[pipe]); - amdgpu_bo_unreserve(adev->mes.ucode_fw_obj[pipe]); - - return 0; -} - -static int mes_v10_1_allocate_ucode_data_buffer(struct amdgpu_device *adev, - enum admgpu_mes_pipe pipe) -{ - int r; - const struct mes_firmware_header_v1_0 *mes_hdr; - const __le32 *fw_data; - unsigned fw_size; - - mes_hdr = (const struct mes_firmware_header_v1_0 *) - adev->mes.fw[pipe]->data; - - fw_data = (const __le32 *)(adev->mes.fw[pipe]->data + - le32_to_cpu(mes_hdr->mes_ucode_data_offset_bytes)); - fw_size = le32_to_cpu(mes_hdr->mes_ucode_data_size_bytes); - - r = amdgpu_bo_create_reserved(adev, fw_size, - 64 * 1024, AMDGPU_GEM_DOMAIN_GTT, - &adev->mes.data_fw_obj[pipe], - &adev->mes.data_fw_gpu_addr[pipe], - (void **)&adev->mes.data_fw_ptr[pipe]); - if (r) { - dev_err(adev->dev, "(%d) failed to create mes data fw bo\n", r); - return r; - } - - memcpy(adev->mes.data_fw_ptr[pipe], fw_data, fw_size); - - amdgpu_bo_kunmap(adev->mes.data_fw_obj[pipe]); - amdgpu_bo_unreserve(adev->mes.data_fw_obj[pipe]); - - return 0; -} - -static void mes_v10_1_free_ucode_buffers(struct amdgpu_device *adev, - enum admgpu_mes_pipe pipe) -{ - amdgpu_bo_free_kernel(&adev->mes.data_fw_obj[pipe], - &adev->mes.data_fw_gpu_addr[pipe], - (void **)&adev->mes.data_fw_ptr[pipe]); - - amdgpu_bo_free_kernel(&adev->mes.ucode_fw_obj[pipe], - &adev->mes.ucode_fw_gpu_addr[pipe], - (void **)&adev->mes.ucode_fw_ptr[pipe]); -} - -static void mes_v10_1_enable(struct amdgpu_device *adev, bool enable) -{ - uint32_t pipe, data = 0; - - if (enable) { - data = RREG32_SOC15(GC, 0, mmCP_MES_CNTL); - data = REG_SET_FIELD(data, CP_MES_CNTL, MES_PIPE0_RESET, 1); - data = REG_SET_FIELD(data, CP_MES_CNTL, - MES_PIPE1_RESET, adev->enable_mes_kiq ? 1 : 0); - WREG32_SOC15(GC, 0, mmCP_MES_CNTL, data); - - mutex_lock(&adev->srbm_mutex); - for (pipe = 0; pipe < AMDGPU_MAX_MES_PIPES; pipe++) { - if (!adev->enable_mes_kiq && - pipe == AMDGPU_MES_KIQ_PIPE) - continue; - - nv_grbm_select(adev, 3, pipe, 0, 0); - WREG32_SOC15(GC, 0, mmCP_MES_PRGRM_CNTR_START, - (uint32_t)(adev->mes.uc_start_addr[pipe]) >> 2); - } - nv_grbm_select(adev, 0, 0, 0, 0); - mutex_unlock(&adev->srbm_mutex); - - /* clear BYPASS_UNCACHED to avoid hangs after interrupt. */ - data = RREG32_SOC15(GC, 0, mmCP_MES_DC_OP_CNTL); - data = REG_SET_FIELD(data, CP_MES_DC_OP_CNTL, - BYPASS_UNCACHED, 0); - WREG32_SOC15(GC, 0, mmCP_MES_DC_OP_CNTL, data); - - /* unhalt MES and activate pipe0 */ - data = REG_SET_FIELD(0, CP_MES_CNTL, MES_PIPE0_ACTIVE, 1); - data = REG_SET_FIELD(data, CP_MES_CNTL, MES_PIPE1_ACTIVE, - adev->enable_mes_kiq ? 1 : 0); - WREG32_SOC15(GC, 0, mmCP_MES_CNTL, data); - udelay(100); - } else { - data = RREG32_SOC15(GC, 0, mmCP_MES_CNTL); - data = REG_SET_FIELD(data, CP_MES_CNTL, MES_PIPE0_ACTIVE, 0); - data = REG_SET_FIELD(data, CP_MES_CNTL, MES_PIPE1_ACTIVE, 0); - data = REG_SET_FIELD(data, CP_MES_CNTL, - MES_INVALIDATE_ICACHE, 1); - data = REG_SET_FIELD(data, CP_MES_CNTL, MES_PIPE0_RESET, 1); - data = REG_SET_FIELD(data, CP_MES_CNTL, MES_PIPE1_RESET, - adev->enable_mes_kiq ? 1 : 0); - data = REG_SET_FIELD(data, CP_MES_CNTL, MES_HALT, 1); - WREG32_SOC15(GC, 0, mmCP_MES_CNTL, data); - } -} - -/* This function is for backdoor MES firmware */ -static int mes_v10_1_load_microcode(struct amdgpu_device *adev, - enum admgpu_mes_pipe pipe) -{ - int r; - uint32_t data; - - mes_v10_1_enable(adev, false); - - if (!adev->mes.fw[pipe]) - return -EINVAL; - - r = mes_v10_1_allocate_ucode_buffer(adev, pipe); - if (r) - return r; - - r = mes_v10_1_allocate_ucode_data_buffer(adev, pipe); - if (r) { - mes_v10_1_free_ucode_buffers(adev, pipe); - return r; - } - - WREG32_SOC15(GC, 0, mmCP_MES_IC_BASE_CNTL, 0); - - mutex_lock(&adev->srbm_mutex); - /* me=3, pipe=0, queue=0 */ - nv_grbm_select(adev, 3, pipe, 0, 0); - - /* set ucode start address */ - WREG32_SOC15(GC, 0, mmCP_MES_PRGRM_CNTR_START, - (uint32_t)(adev->mes.uc_start_addr[pipe]) >> 2); - - /* set ucode fimrware address */ - WREG32_SOC15(GC, 0, mmCP_MES_IC_BASE_LO, - lower_32_bits(adev->mes.ucode_fw_gpu_addr[pipe])); - WREG32_SOC15(GC, 0, mmCP_MES_IC_BASE_HI, - upper_32_bits(adev->mes.ucode_fw_gpu_addr[pipe])); - - /* set ucode instruction cache boundary to 2M-1 */ - WREG32_SOC15(GC, 0, mmCP_MES_MIBOUND_LO, 0x1FFFFF); - - /* set ucode data firmware address */ - WREG32_SOC15(GC, 0, mmCP_MES_MDBASE_LO, - lower_32_bits(adev->mes.data_fw_gpu_addr[pipe])); - WREG32_SOC15(GC, 0, mmCP_MES_MDBASE_HI, - upper_32_bits(adev->mes.data_fw_gpu_addr[pipe])); - - /* Set 0x3FFFF (256K-1) to CP_MES_MDBOUND_LO */ - WREG32_SOC15(GC, 0, mmCP_MES_MDBOUND_LO, 0x3FFFF); - - /* invalidate ICACHE */ - switch (amdgpu_ip_version(adev, GC_HWIP, 0)) { - case IP_VERSION(10, 3, 0): - data = RREG32_SOC15(GC, 0, mmCP_MES_IC_OP_CNTL_Sienna_Cichlid); - break; - default: - data = RREG32_SOC15(GC, 0, mmCP_MES_IC_OP_CNTL); - break; - } - data = REG_SET_FIELD(data, CP_MES_IC_OP_CNTL, PRIME_ICACHE, 0); - data = REG_SET_FIELD(data, CP_MES_IC_OP_CNTL, INVALIDATE_CACHE, 1); - switch (amdgpu_ip_version(adev, GC_HWIP, 0)) { - case IP_VERSION(10, 3, 0): - WREG32_SOC15(GC, 0, mmCP_MES_IC_OP_CNTL_Sienna_Cichlid, data); - break; - default: - WREG32_SOC15(GC, 0, mmCP_MES_IC_OP_CNTL, data); - break; - } - - /* prime the ICACHE. */ - switch (amdgpu_ip_version(adev, GC_HWIP, 0)) { - case IP_VERSION(10, 3, 0): - data = RREG32_SOC15(GC, 0, mmCP_MES_IC_OP_CNTL_Sienna_Cichlid); - break; - default: - data = RREG32_SOC15(GC, 0, mmCP_MES_IC_OP_CNTL); - break; - } - data = REG_SET_FIELD(data, CP_MES_IC_OP_CNTL, PRIME_ICACHE, 1); - switch (amdgpu_ip_version(adev, GC_HWIP, 0)) { - case IP_VERSION(10, 3, 0): - WREG32_SOC15(GC, 0, mmCP_MES_IC_OP_CNTL_Sienna_Cichlid, data); - break; - default: - WREG32_SOC15(GC, 0, mmCP_MES_IC_OP_CNTL, data); - break; - } - - nv_grbm_select(adev, 0, 0, 0, 0); - mutex_unlock(&adev->srbm_mutex); - - return 0; -} - -static int mes_v10_1_allocate_eop_buf(struct amdgpu_device *adev, - enum admgpu_mes_pipe pipe) -{ - int r; - u32 *eop; - - r = amdgpu_bo_create_reserved(adev, MES_EOP_SIZE, PAGE_SIZE, - AMDGPU_GEM_DOMAIN_GTT, - &adev->mes.eop_gpu_obj[pipe], - &adev->mes.eop_gpu_addr[pipe], - (void **)&eop); - if (r) { - dev_warn(adev->dev, "(%d) create EOP bo failed\n", r); - return r; - } - - memset(eop, 0, adev->mes.eop_gpu_obj[pipe]->tbo.base.size); - - amdgpu_bo_kunmap(adev->mes.eop_gpu_obj[pipe]); - amdgpu_bo_unreserve(adev->mes.eop_gpu_obj[pipe]); - - return 0; -} - -static int mes_v10_1_mqd_init(struct amdgpu_ring *ring) -{ - struct v10_compute_mqd *mqd = ring->mqd_ptr; - uint64_t hqd_gpu_addr, wb_gpu_addr, eop_base_addr; - uint32_t tmp; - - memset(mqd, 0, sizeof(*mqd)); - - mqd->header = 0xC0310800; - mqd->compute_pipelinestat_enable = 0x00000001; - mqd->compute_static_thread_mgmt_se0 = 0xffffffff; - mqd->compute_static_thread_mgmt_se1 = 0xffffffff; - mqd->compute_static_thread_mgmt_se2 = 0xffffffff; - mqd->compute_static_thread_mgmt_se3 = 0xffffffff; - mqd->compute_misc_reserved = 0x00000003; - - eop_base_addr = ring->eop_gpu_addr >> 8; - - /* set the EOP size, register value is 2^(EOP_SIZE+1) dwords */ - tmp = mmCP_HQD_EOP_CONTROL_DEFAULT; - tmp = REG_SET_FIELD(tmp, CP_HQD_EOP_CONTROL, EOP_SIZE, - (order_base_2(MES_EOP_SIZE / 4) - 1)); - - mqd->cp_hqd_eop_base_addr_lo = lower_32_bits(eop_base_addr); - mqd->cp_hqd_eop_base_addr_hi = upper_32_bits(eop_base_addr); - mqd->cp_hqd_eop_control = tmp; - - /* disable the queue if it's active */ - ring->wptr = 0; - mqd->cp_hqd_pq_rptr = 0; - mqd->cp_hqd_pq_wptr_lo = 0; - mqd->cp_hqd_pq_wptr_hi = 0; - - /* set the pointer to the MQD */ - mqd->cp_mqd_base_addr_lo = ring->mqd_gpu_addr & 0xfffffffc; - mqd->cp_mqd_base_addr_hi = upper_32_bits(ring->mqd_gpu_addr); - - /* set MQD vmid to 0 */ - tmp = mmCP_MQD_CONTROL_DEFAULT; - tmp = REG_SET_FIELD(tmp, CP_MQD_CONTROL, VMID, 0); - mqd->cp_mqd_control = tmp; - - /* set the pointer to the HQD, this is similar CP_RB0_BASE/_HI */ - hqd_gpu_addr = ring->gpu_addr >> 8; - mqd->cp_hqd_pq_base_lo = lower_32_bits(hqd_gpu_addr); - mqd->cp_hqd_pq_base_hi = upper_32_bits(hqd_gpu_addr); - - /* set the wb address whether it's enabled or not */ - wb_gpu_addr = ring->rptr_gpu_addr; - mqd->cp_hqd_pq_rptr_report_addr_lo = wb_gpu_addr & 0xfffffffc; - mqd->cp_hqd_pq_rptr_report_addr_hi = - upper_32_bits(wb_gpu_addr) & 0xffff; - - /* only used if CP_PQ_WPTR_POLL_CNTL.CP_PQ_WPTR_POLL_CNTL__EN_MASK=1 */ - wb_gpu_addr = ring->wptr_gpu_addr; - mqd->cp_hqd_pq_wptr_poll_addr_lo = wb_gpu_addr & 0xfffffff8; - mqd->cp_hqd_pq_wptr_poll_addr_hi = upper_32_bits(wb_gpu_addr) & 0xffff; - - /* set up the HQD, this is similar to CP_RB0_CNTL */ - tmp = mmCP_HQD_PQ_CONTROL_DEFAULT; - tmp = REG_SET_FIELD(tmp, CP_HQD_PQ_CONTROL, QUEUE_SIZE, - (order_base_2(ring->ring_size / 4) - 1)); - tmp = REG_SET_FIELD(tmp, CP_HQD_PQ_CONTROL, RPTR_BLOCK_SIZE, - ((order_base_2(AMDGPU_GPU_PAGE_SIZE / 4) - 1) << 8)); -#ifdef __BIG_ENDIAN - tmp = REG_SET_FIELD(tmp, CP_HQD_PQ_CONTROL, ENDIAN_SWAP, 1); -#endif - tmp = REG_SET_FIELD(tmp, CP_HQD_PQ_CONTROL, UNORD_DISPATCH, 1); - tmp = REG_SET_FIELD(tmp, CP_HQD_PQ_CONTROL, TUNNEL_DISPATCH, 0); - tmp = REG_SET_FIELD(tmp, CP_HQD_PQ_CONTROL, PRIV_STATE, 1); - tmp = REG_SET_FIELD(tmp, CP_HQD_PQ_CONTROL, KMD_QUEUE, 1); - tmp = REG_SET_FIELD(tmp, CP_HQD_PQ_CONTROL, NO_UPDATE_RPTR, 1); - mqd->cp_hqd_pq_control = tmp; - - /* enable doorbell? */ - tmp = 0; - if (ring->use_doorbell) { - tmp = REG_SET_FIELD(tmp, CP_HQD_PQ_DOORBELL_CONTROL, - DOORBELL_OFFSET, ring->doorbell_index); - tmp = REG_SET_FIELD(tmp, CP_HQD_PQ_DOORBELL_CONTROL, - DOORBELL_EN, 1); - tmp = REG_SET_FIELD(tmp, CP_HQD_PQ_DOORBELL_CONTROL, - DOORBELL_SOURCE, 0); - tmp = REG_SET_FIELD(tmp, CP_HQD_PQ_DOORBELL_CONTROL, - DOORBELL_HIT, 0); - } - else - tmp = REG_SET_FIELD(tmp, CP_HQD_PQ_DOORBELL_CONTROL, - DOORBELL_EN, 0); - mqd->cp_hqd_pq_doorbell_control = tmp; - - mqd->cp_hqd_vmid = 0; - /* activate the queue */ - mqd->cp_hqd_active = 1; - mqd->cp_hqd_persistent_state = mmCP_HQD_PERSISTENT_STATE_DEFAULT; - mqd->cp_hqd_ib_control = mmCP_HQD_IB_CONTROL_DEFAULT; - mqd->cp_hqd_iq_timer = mmCP_HQD_IQ_TIMER_DEFAULT; - mqd->cp_hqd_quantum = mmCP_HQD_QUANTUM_DEFAULT; - - tmp = mmCP_HQD_GFX_CONTROL_DEFAULT; - tmp = REG_SET_FIELD(tmp, CP_HQD_GFX_CONTROL, DB_UPDATED_MSG_EN, 1); - /* offset: 184 - this is used for CP_HQD_GFX_CONTROL */ - mqd->cp_hqd_suspend_cntl_stack_offset = tmp; - - amdgpu_device_flush_hdp(ring->adev, NULL); - return 0; -} - -#if 0 -static void mes_v10_1_queue_init_register(struct amdgpu_ring *ring) -{ - struct v10_compute_mqd *mqd = ring->mqd_ptr; - struct amdgpu_device *adev = ring->adev; - uint32_t data = 0; - - mutex_lock(&adev->srbm_mutex); - nv_grbm_select(adev, 3, ring->pipe, 0, 0); - - /* set CP_HQD_VMID.VMID = 0. */ - data = RREG32_SOC15(GC, 0, mmCP_HQD_VMID); - data = REG_SET_FIELD(data, CP_HQD_VMID, VMID, 0); - WREG32_SOC15(GC, 0, mmCP_HQD_VMID, data); - - /* set CP_HQD_PQ_DOORBELL_CONTROL.DOORBELL_EN=0 */ - data = RREG32_SOC15(GC, 0, mmCP_HQD_PQ_DOORBELL_CONTROL); - data = REG_SET_FIELD(data, CP_HQD_PQ_DOORBELL_CONTROL, - DOORBELL_EN, 0); - WREG32_SOC15(GC, 0, mmCP_HQD_PQ_DOORBELL_CONTROL, data); - - /* set CP_MQD_BASE_ADDR/HI with the MQD base address */ - WREG32_SOC15(GC, 0, mmCP_MQD_BASE_ADDR, mqd->cp_mqd_base_addr_lo); - WREG32_SOC15(GC, 0, mmCP_MQD_BASE_ADDR_HI, mqd->cp_mqd_base_addr_hi); - - /* set CP_MQD_CONTROL.VMID=0 */ - data = RREG32_SOC15(GC, 0, mmCP_MQD_CONTROL); - data = REG_SET_FIELD(data, CP_MQD_CONTROL, VMID, 0); - WREG32_SOC15(GC, 0, mmCP_MQD_CONTROL, 0); - - /* set CP_HQD_PQ_BASE/HI with the ring buffer base address */ - WREG32_SOC15(GC, 0, mmCP_HQD_PQ_BASE, mqd->cp_hqd_pq_base_lo); - WREG32_SOC15(GC, 0, mmCP_HQD_PQ_BASE_HI, mqd->cp_hqd_pq_base_hi); - - /* set CP_HQD_PQ_RPTR_REPORT_ADDR/HI */ - WREG32_SOC15(GC, 0, mmCP_HQD_PQ_RPTR_REPORT_ADDR, - mqd->cp_hqd_pq_rptr_report_addr_lo); - WREG32_SOC15(GC, 0, mmCP_HQD_PQ_RPTR_REPORT_ADDR_HI, - mqd->cp_hqd_pq_rptr_report_addr_hi); - - /* set CP_HQD_PQ_CONTROL */ - WREG32_SOC15(GC, 0, mmCP_HQD_PQ_CONTROL, mqd->cp_hqd_pq_control); - - /* set CP_HQD_PQ_WPTR_POLL_ADDR/HI */ - WREG32_SOC15(GC, 0, mmCP_HQD_PQ_WPTR_POLL_ADDR, - mqd->cp_hqd_pq_wptr_poll_addr_lo); - WREG32_SOC15(GC, 0, mmCP_HQD_PQ_WPTR_POLL_ADDR_HI, - mqd->cp_hqd_pq_wptr_poll_addr_hi); - - /* set CP_HQD_PQ_DOORBELL_CONTROL */ - WREG32_SOC15(GC, 0, mmCP_HQD_PQ_DOORBELL_CONTROL, - mqd->cp_hqd_pq_doorbell_control); - - /* set CP_HQD_PERSISTENT_STATE.PRELOAD_SIZE=0x53 */ - WREG32_SOC15(GC, 0, mmCP_HQD_PERSISTENT_STATE, mqd->cp_hqd_persistent_state); - - /* set CP_HQD_ACTIVE.ACTIVE=1 */ - WREG32_SOC15(GC, 0, mmCP_HQD_ACTIVE, mqd->cp_hqd_active); - - nv_grbm_select(adev, 0, 0, 0, 0); - mutex_unlock(&adev->srbm_mutex); -} -#endif - -static int mes_v10_1_kiq_enable_queue(struct amdgpu_device *adev) -{ - struct amdgpu_kiq *kiq = &adev->gfx.kiq[0]; - struct amdgpu_ring *kiq_ring = &adev->gfx.kiq[0].ring; - int r; - - if (!kiq->pmf || !kiq->pmf->kiq_map_queues) - return -EINVAL; - - r = amdgpu_ring_alloc(kiq_ring, kiq->pmf->map_queues_size); - if (r) { - DRM_ERROR("Failed to lock KIQ (%d).\n", r); - return r; - } - - kiq->pmf->kiq_map_queues(kiq_ring, &adev->mes.ring); - - return amdgpu_ring_test_helper(kiq_ring); -} - -static int mes_v10_1_queue_init(struct amdgpu_device *adev) -{ - int r; - - r = mes_v10_1_mqd_init(&adev->mes.ring); - if (r) - return r; - - r = mes_v10_1_kiq_enable_queue(adev); - if (r) - return r; - - return 0; -} - -static int mes_v10_1_ring_init(struct amdgpu_device *adev) -{ - struct amdgpu_ring *ring; - - ring = &adev->mes.ring; - - ring->funcs = &mes_v10_1_ring_funcs; - - ring->me = 3; - ring->pipe = 0; - ring->queue = 0; - - ring->ring_obj = NULL; - ring->use_doorbell = true; - ring->doorbell_index = adev->doorbell_index.mes_ring0 << 1; - ring->eop_gpu_addr = adev->mes.eop_gpu_addr[AMDGPU_MES_SCHED_PIPE]; - ring->no_scheduler = true; - sprintf(ring->name, "mes_%d.%d.%d", ring->me, ring->pipe, ring->queue); - - return amdgpu_ring_init(adev, ring, 1024, NULL, 0, - AMDGPU_RING_PRIO_DEFAULT, NULL); -} - -static int mes_v10_1_kiq_ring_init(struct amdgpu_device *adev) -{ - struct amdgpu_ring *ring; - - spin_lock_init(&adev->gfx.kiq[0].ring_lock); - - ring = &adev->gfx.kiq[0].ring; - - ring->me = 3; - ring->pipe = 1; - ring->queue = 0; - - ring->adev = NULL; - ring->ring_obj = NULL; - ring->use_doorbell = true; - ring->doorbell_index = adev->doorbell_index.mes_ring1 << 1; - ring->eop_gpu_addr = adev->mes.eop_gpu_addr[AMDGPU_MES_KIQ_PIPE]; - ring->no_scheduler = true; - sprintf(ring->name, "mes_kiq_%d.%d.%d", - ring->me, ring->pipe, ring->queue); - - return amdgpu_ring_init(adev, ring, 1024, NULL, 0, - AMDGPU_RING_PRIO_DEFAULT, NULL); -} - -static int mes_v10_1_mqd_sw_init(struct amdgpu_device *adev, - enum admgpu_mes_pipe pipe) -{ - int r, mqd_size = sizeof(struct v10_compute_mqd); - struct amdgpu_ring *ring; - - if (pipe == AMDGPU_MES_KIQ_PIPE) - ring = &adev->gfx.kiq[0].ring; - else if (pipe == AMDGPU_MES_SCHED_PIPE) - ring = &adev->mes.ring; - else - BUG(); - - if (ring->mqd_obj) - return 0; - - r = amdgpu_bo_create_kernel(adev, mqd_size, PAGE_SIZE, - AMDGPU_GEM_DOMAIN_VRAM | - AMDGPU_GEM_DOMAIN_GTT, &ring->mqd_obj, - &ring->mqd_gpu_addr, &ring->mqd_ptr); - if (r) { - dev_warn(adev->dev, "failed to create ring mqd bo (%d)", r); - return r; - } - memset(ring->mqd_ptr, 0, mqd_size); - - /* prepare MQD backup */ - adev->mes.mqd_backup[pipe] = kmalloc(mqd_size, GFP_KERNEL); - if (!adev->mes.mqd_backup[pipe]) { - dev_warn(adev->dev, - "no memory to create MQD backup for ring %s\n", - ring->name); - return -ENOMEM; - } - - return 0; -} - -static int mes_v10_1_sw_init(void *handle) -{ - struct amdgpu_device *adev = (struct amdgpu_device *)handle; - int pipe, r; - - adev->mes.funcs = &mes_v10_1_funcs; - adev->mes.kiq_hw_init = &mes_v10_1_kiq_hw_init; - - r = amdgpu_mes_init(adev); - if (r) - return r; - - for (pipe = 0; pipe < AMDGPU_MAX_MES_PIPES; pipe++) { - if (!adev->enable_mes_kiq && pipe == AMDGPU_MES_KIQ_PIPE) - continue; - - r = mes_v10_1_allocate_eop_buf(adev, pipe); - if (r) - return r; - - r = mes_v10_1_mqd_sw_init(adev, pipe); - if (r) - return r; - } - - if (adev->enable_mes_kiq) { - r = mes_v10_1_kiq_ring_init(adev); - if (r) - return r; - } - - r = mes_v10_1_ring_init(adev); - if (r) - return r; - - return 0; -} - -static int mes_v10_1_sw_fini(void *handle) -{ - struct amdgpu_device *adev = (struct amdgpu_device *)handle; - int pipe; - - amdgpu_device_wb_free(adev, adev->mes.sch_ctx_offs); - amdgpu_device_wb_free(adev, adev->mes.query_status_fence_offs); - - for (pipe = 0; pipe < AMDGPU_MAX_MES_PIPES; pipe++) { - kfree(adev->mes.mqd_backup[pipe]); - - amdgpu_bo_free_kernel(&adev->mes.eop_gpu_obj[pipe], - &adev->mes.eop_gpu_addr[pipe], - NULL); - amdgpu_ucode_release(&adev->mes.fw[pipe]); - } - - amdgpu_bo_free_kernel(&adev->gfx.kiq[0].ring.mqd_obj, - &adev->gfx.kiq[0].ring.mqd_gpu_addr, - &adev->gfx.kiq[0].ring.mqd_ptr); - - amdgpu_bo_free_kernel(&adev->mes.ring.mqd_obj, - &adev->mes.ring.mqd_gpu_addr, - &adev->mes.ring.mqd_ptr); - - amdgpu_ring_fini(&adev->gfx.kiq[0].ring); - amdgpu_ring_fini(&adev->mes.ring); - - amdgpu_mes_fini(adev); - return 0; -} - -static void mes_v10_1_kiq_setting(struct amdgpu_ring *ring) -{ - uint32_t tmp; - struct amdgpu_device *adev = ring->adev; - - /* tell RLC which is KIQ queue */ - switch (amdgpu_ip_version(adev, GC_HWIP, 0)) { - case IP_VERSION(10, 3, 0): - case IP_VERSION(10, 3, 2): - case IP_VERSION(10, 3, 1): - case IP_VERSION(10, 3, 4): - tmp = RREG32_SOC15(GC, 0, mmRLC_CP_SCHEDULERS_Sienna_Cichlid); - tmp &= 0xffffff00; - tmp |= (ring->me << 5) | (ring->pipe << 3) | (ring->queue); - WREG32_SOC15(GC, 0, mmRLC_CP_SCHEDULERS_Sienna_Cichlid, tmp); - tmp |= 0x80; - WREG32_SOC15(GC, 0, mmRLC_CP_SCHEDULERS_Sienna_Cichlid, tmp); - break; - default: - tmp = RREG32_SOC15(GC, 0, mmRLC_CP_SCHEDULERS); - tmp &= 0xffffff00; - tmp |= (ring->me << 5) | (ring->pipe << 3) | (ring->queue); - WREG32_SOC15(GC, 0, mmRLC_CP_SCHEDULERS, tmp); - tmp |= 0x80; - WREG32_SOC15(GC, 0, mmRLC_CP_SCHEDULERS, tmp); - break; - } -} - -static int mes_v10_1_kiq_hw_init(struct amdgpu_device *adev) -{ - int r = 0; - - if (adev->firmware.load_type == AMDGPU_FW_LOAD_DIRECT) { - r = mes_v10_1_load_microcode(adev, AMDGPU_MES_KIQ_PIPE); - if (r) { - DRM_ERROR("failed to load MES kiq fw, r=%d\n", r); - return r; - } - - r = mes_v10_1_load_microcode(adev, AMDGPU_MES_SCHED_PIPE); - if (r) { - DRM_ERROR("failed to load MES fw, r=%d\n", r); - return r; - } - } - - mes_v10_1_enable(adev, true); - - mes_v10_1_kiq_setting(&adev->gfx.kiq[0].ring); - - r = mes_v10_1_queue_init(adev); - if (r) - goto failure; - - return r; - -failure: - mes_v10_1_hw_fini(adev); - return r; -} - -static int mes_v10_1_hw_init(void *handle) -{ - int r; - struct amdgpu_device *adev = (struct amdgpu_device *)handle; - - if (!adev->enable_mes_kiq) { - if (adev->firmware.load_type == AMDGPU_FW_LOAD_DIRECT) { - r = mes_v10_1_load_microcode(adev, - AMDGPU_MES_SCHED_PIPE); - if (r) { - DRM_ERROR("failed to MES fw, r=%d\n", r); - return r; - } - } - - mes_v10_1_enable(adev, true); - } - - r = mes_v10_1_queue_init(adev); - if (r) - goto failure; - - r = mes_v10_1_set_hw_resources(&adev->mes); - if (r) - goto failure; - - mes_v10_1_init_aggregated_doorbell(&adev->mes); - - r = mes_v10_1_query_sched_status(&adev->mes); - if (r) { - DRM_ERROR("MES is busy\n"); - goto failure; - } - - /* - * Disable KIQ ring usage from the driver once MES is enabled. - * MES uses KIQ ring exclusively so driver cannot access KIQ ring - * with MES enabled. - */ - adev->gfx.kiq[0].ring.sched.ready = false; - adev->mes.ring.sched.ready = true; - - return 0; - -failure: - mes_v10_1_hw_fini(adev); - return r; -} - -static int mes_v10_1_hw_fini(void *handle) -{ - struct amdgpu_device *adev = (struct amdgpu_device *)handle; - - adev->mes.ring.sched.ready = false; - - mes_v10_1_enable(adev, false); - - if (adev->firmware.load_type == AMDGPU_FW_LOAD_DIRECT) { - mes_v10_1_free_ucode_buffers(adev, AMDGPU_MES_KIQ_PIPE); - mes_v10_1_free_ucode_buffers(adev, AMDGPU_MES_SCHED_PIPE); - } - - return 0; -} - -static int mes_v10_1_suspend(void *handle) -{ - int r; - struct amdgpu_device *adev = (struct amdgpu_device *)handle; - - r = amdgpu_mes_suspend(adev); - if (r) - return r; - - return mes_v10_1_hw_fini(adev); -} - -static int mes_v10_1_resume(void *handle) -{ - int r; - struct amdgpu_device *adev = (struct amdgpu_device *)handle; - - r = mes_v10_1_hw_init(adev); - if (r) - return r; - - return amdgpu_mes_resume(adev); -} - -static int mes_v10_0_early_init(void *handle) -{ - struct amdgpu_device *adev = (struct amdgpu_device *)handle; - int pipe, r; - - for (pipe = 0; pipe < AMDGPU_MAX_MES_PIPES; pipe++) { - if (!adev->enable_mes_kiq && pipe == AMDGPU_MES_KIQ_PIPE) - continue; - r = amdgpu_mes_init_microcode(adev, pipe); - if (r) - return r; - } - - return 0; -} - -static int mes_v10_0_late_init(void *handle) -{ - struct amdgpu_device *adev = (struct amdgpu_device *)handle; - - if (!amdgpu_in_reset(adev)) - amdgpu_mes_self_test(adev); - - return 0; -} - -static const struct amd_ip_funcs mes_v10_1_ip_funcs = { - .name = "mes_v10_1", - .early_init = mes_v10_0_early_init, - .late_init = mes_v10_0_late_init, - .sw_init = mes_v10_1_sw_init, - .sw_fini = mes_v10_1_sw_fini, - .hw_init = mes_v10_1_hw_init, - .hw_fini = mes_v10_1_hw_fini, - .suspend = mes_v10_1_suspend, - .resume = mes_v10_1_resume, -}; - -const struct amdgpu_ip_block_version mes_v10_1_ip_block = { - .type = AMD_IP_BLOCK_TYPE_MES, - .major = 10, - .minor = 1, - .rev = 0, - .funcs = &mes_v10_1_ip_funcs, -}; diff --git a/drivers/gpu/drm/amd/amdgpu/mes_v11_0.c b/drivers/gpu/drm/amd/amdgpu/mes_v11_0.c index 072c478665ad..8ce51b9236c1 100644 --- a/drivers/gpu/drm/amd/amdgpu/mes_v11_0.c +++ b/drivers/gpu/drm/amd/amdgpu/mes_v11_0.c @@ -51,8 +51,10 @@ MODULE_FIRMWARE("amdgpu/gc_11_5_0_mes_2.bin"); MODULE_FIRMWARE("amdgpu/gc_11_5_0_mes1.bin"); MODULE_FIRMWARE("amdgpu/gc_11_5_1_mes_2.bin"); MODULE_FIRMWARE("amdgpu/gc_11_5_1_mes1.bin"); +MODULE_FIRMWARE("amdgpu/gc_11_5_2_mes_2.bin"); +MODULE_FIRMWARE("amdgpu/gc_11_5_2_mes1.bin"); - +static int mes_v11_0_hw_init(void *handle); static int mes_v11_0_hw_fini(void *handle); static int mes_v11_0_kiq_hw_init(struct amdgpu_device *adev); static int mes_v11_0_kiq_hw_fini(struct amdgpu_device *adev); @@ -100,18 +102,79 @@ static const struct amdgpu_ring_funcs mes_v11_0_ring_funcs = { .insert_nop = amdgpu_ring_insert_nop, }; +static const char *mes_v11_0_opcodes[] = { + "SET_HW_RSRC", + "SET_SCHEDULING_CONFIG", + "ADD_QUEUE", + "REMOVE_QUEUE", + "PERFORM_YIELD", + "SET_GANG_PRIORITY_LEVEL", + "SUSPEND", + "RESUME", + "RESET", + "SET_LOG_BUFFER", + "CHANGE_GANG_PRORITY", + "QUERY_SCHEDULER_STATUS", + "PROGRAM_GDS", + "SET_DEBUG_VMID", + "MISC", + "UPDATE_ROOT_PAGE_TABLE", + "AMD_LOG", + "unused", + "unused", + "SET_HW_RSRC_1", +}; + +static const char *mes_v11_0_misc_opcodes[] = { + "WRITE_REG", + "INV_GART", + "QUERY_STATUS", + "READ_REG", + "WAIT_REG_MEM", + "SET_SHADER_DEBUGGER", +}; + +static const char *mes_v11_0_get_op_string(union MESAPI__MISC *x_pkt) +{ + const char *op_str = NULL; + + if (x_pkt->header.opcode < ARRAY_SIZE(mes_v11_0_opcodes)) + op_str = mes_v11_0_opcodes[x_pkt->header.opcode]; + + return op_str; +} + +static const char *mes_v11_0_get_misc_op_string(union MESAPI__MISC *x_pkt) +{ + const char *op_str = NULL; + + if ((x_pkt->header.opcode == MES_SCH_API_MISC) && + (x_pkt->opcode < ARRAY_SIZE(mes_v11_0_misc_opcodes))) + op_str = mes_v11_0_misc_opcodes[x_pkt->opcode]; + + return op_str; +} + static int mes_v11_0_submit_pkt_and_poll_completion(struct amdgpu_mes *mes, void *pkt, int size, int api_status_off) { - int ndw = size / 4; - signed long r; - union MESAPI__ADD_QUEUE *x_pkt = pkt; - struct MES_API_STATUS *api_status; + union MESAPI__QUERY_MES_STATUS mes_status_pkt; + signed long timeout = 3000000; /* 3000 ms */ struct amdgpu_device *adev = mes->adev; struct amdgpu_ring *ring = &mes->ring; + struct MES_API_STATUS *api_status; + union MESAPI__MISC *x_pkt = pkt; + const char *op_str, *misc_op_str; unsigned long flags; - signed long timeout = adev->usec_timeout; + u64 status_gpu_addr; + u32 status_offset; + u64 *status_ptr; + signed long r; + int ret; + + if (x_pkt->header.opcode >= MES_SCH_API_MAX) + return -EINVAL; if (amdgpu_emu_mode) { timeout *= 100; @@ -119,37 +182,82 @@ static int mes_v11_0_submit_pkt_and_poll_completion(struct amdgpu_mes *mes, /* Worst case in sriov where all other 15 VF timeout, each VF needs about 600ms */ timeout = 15 * 600 * 1000; } - BUG_ON(size % 4 != 0); + + ret = amdgpu_device_wb_get(adev, &status_offset); + if (ret) + return ret; + + status_gpu_addr = adev->wb.gpu_addr + (status_offset * 4); + status_ptr = (u64 *)&adev->wb.wb[status_offset]; + *status_ptr = 0; spin_lock_irqsave(&mes->ring_lock, flags); - if (amdgpu_ring_alloc(ring, ndw)) { - spin_unlock_irqrestore(&mes->ring_lock, flags); - return -ENOMEM; - } + r = amdgpu_ring_alloc(ring, (size + sizeof(mes_status_pkt)) / 4); + if (r) + goto error_unlock_free; api_status = (struct MES_API_STATUS *)((char *)pkt + api_status_off); - api_status->api_completion_fence_addr = mes->ring.fence_drv.gpu_addr; - api_status->api_completion_fence_value = ++mes->ring.fence_drv.sync_seq; + api_status->api_completion_fence_addr = status_gpu_addr; + api_status->api_completion_fence_value = 1; + + amdgpu_ring_write_multiple(ring, pkt, size / 4); + + memset(&mes_status_pkt, 0, sizeof(mes_status_pkt)); + mes_status_pkt.header.type = MES_API_TYPE_SCHEDULER; + mes_status_pkt.header.opcode = MES_SCH_API_QUERY_SCHEDULER_STATUS; + mes_status_pkt.header.dwsize = API_FRAME_SIZE_IN_DWORDS; + mes_status_pkt.api_status.api_completion_fence_addr = + ring->fence_drv.gpu_addr; + mes_status_pkt.api_status.api_completion_fence_value = + ++ring->fence_drv.sync_seq; + + amdgpu_ring_write_multiple(ring, &mes_status_pkt, + sizeof(mes_status_pkt) / 4); - amdgpu_ring_write_multiple(ring, pkt, ndw); amdgpu_ring_commit(ring); spin_unlock_irqrestore(&mes->ring_lock, flags); - DRM_DEBUG("MES msg=%d was emitted\n", x_pkt->header.opcode); + op_str = mes_v11_0_get_op_string(x_pkt); + misc_op_str = mes_v11_0_get_misc_op_string(x_pkt); - r = amdgpu_fence_wait_polling(ring, ring->fence_drv.sync_seq, - timeout); - if (r < 1) { - DRM_ERROR("MES failed to response msg=%d\n", - x_pkt->header.opcode); + if (misc_op_str) + dev_dbg(adev->dev, "MES msg=%s (%s) was emitted\n", op_str, + misc_op_str); + else if (op_str) + dev_dbg(adev->dev, "MES msg=%s was emitted\n", op_str); + else + dev_dbg(adev->dev, "MES msg=%d was emitted\n", + x_pkt->header.opcode); + + r = amdgpu_fence_wait_polling(ring, ring->fence_drv.sync_seq, timeout); + if (r < 1 || !*status_ptr) { + + if (misc_op_str) + dev_err(adev->dev, "MES failed to respond to msg=%s (%s)\n", + op_str, misc_op_str); + else if (op_str) + dev_err(adev->dev, "MES failed to respond to msg=%s\n", + op_str); + else + dev_err(adev->dev, "MES failed to respond to msg=%d\n", + x_pkt->header.opcode); while (halt_if_hws_hang) schedule(); - return -ETIMEDOUT; + r = -ETIMEDOUT; + goto error_wb_free; } + amdgpu_device_wb_free(adev, status_offset); return 0; + +error_unlock_free: + spin_unlock_irqrestore(&mes->ring_lock, flags); + +error_wb_free: + amdgpu_device_wb_free(adev, status_offset); + return r; } static int convert_to_mes_queue_type(int queue_type) @@ -242,6 +350,31 @@ static int mes_v11_0_remove_hw_queue(struct amdgpu_mes *mes, offsetof(union MESAPI__REMOVE_QUEUE, api_status)); } +static int mes_v11_0_map_legacy_queue(struct amdgpu_mes *mes, + struct mes_map_legacy_queue_input *input) +{ + union MESAPI__ADD_QUEUE mes_add_queue_pkt; + + memset(&mes_add_queue_pkt, 0, sizeof(mes_add_queue_pkt)); + + mes_add_queue_pkt.header.type = MES_API_TYPE_SCHEDULER; + mes_add_queue_pkt.header.opcode = MES_SCH_API_ADD_QUEUE; + mes_add_queue_pkt.header.dwsize = API_FRAME_SIZE_IN_DWORDS; + + mes_add_queue_pkt.pipe_id = input->pipe_id; + mes_add_queue_pkt.queue_id = input->queue_id; + mes_add_queue_pkt.doorbell_offset = input->doorbell_offset; + mes_add_queue_pkt.mqd_addr = input->mqd_addr; + mes_add_queue_pkt.wptr_addr = input->wptr_addr; + mes_add_queue_pkt.queue_type = + convert_to_mes_queue_type(input->queue_type); + mes_add_queue_pkt.map_legacy_kq = 1; + + return mes_v11_0_submit_pkt_and_poll_completion(mes, + &mes_add_queue_pkt, sizeof(mes_add_queue_pkt), + offsetof(union MESAPI__ADD_QUEUE, api_status)); +} + static int mes_v11_0_unmap_legacy_queue(struct amdgpu_mes *mes, struct mes_unmap_legacy_queue_input *input) { @@ -411,17 +544,51 @@ static int mes_v11_0_set_hw_resources(struct amdgpu_mes *mes) mes_set_hw_res_pkt.enable_reg_active_poll = 1; mes_set_hw_res_pkt.enable_level_process_quantum_check = 1; mes_set_hw_res_pkt.oversubscription_timer = 50; - mes_set_hw_res_pkt.enable_mes_event_int_logging = 1; - mes_set_hw_res_pkt.event_intr_history_gpu_mc_ptr = mes->event_log_gpu_addr; + if (amdgpu_mes_log_enable) { + mes_set_hw_res_pkt.enable_mes_event_int_logging = 1; + mes_set_hw_res_pkt.event_intr_history_gpu_mc_ptr = + mes->event_log_gpu_addr; + } return mes_v11_0_submit_pkt_and_poll_completion(mes, &mes_set_hw_res_pkt, sizeof(mes_set_hw_res_pkt), offsetof(union MESAPI_SET_HW_RESOURCES, api_status)); } +static int mes_v11_0_set_hw_resources_1(struct amdgpu_mes *mes) +{ + int size = 128 * PAGE_SIZE; + int ret = 0; + struct amdgpu_device *adev = mes->adev; + union MESAPI_SET_HW_RESOURCES_1 mes_set_hw_res_pkt; + memset(&mes_set_hw_res_pkt, 0, sizeof(mes_set_hw_res_pkt)); + + mes_set_hw_res_pkt.header.type = MES_API_TYPE_SCHEDULER; + mes_set_hw_res_pkt.header.opcode = MES_SCH_API_SET_HW_RSRC_1; + mes_set_hw_res_pkt.header.dwsize = API_FRAME_SIZE_IN_DWORDS; + mes_set_hw_res_pkt.enable_mes_info_ctx = 1; + + ret = amdgpu_bo_create_kernel(adev, size, PAGE_SIZE, + AMDGPU_GEM_DOMAIN_VRAM, + &mes->resource_1, + &mes->resource_1_gpu_addr, + &mes->resource_1_addr); + if (ret) { + dev_err(adev->dev, "(%d) failed to create mes resource_1 bo\n", ret); + return ret; + } + + mes_set_hw_res_pkt.mes_info_ctx_mc_addr = mes->resource_1_gpu_addr; + mes_set_hw_res_pkt.mes_info_ctx_size = mes->resource_1->tbo.base.size; + return mes_v11_0_submit_pkt_and_poll_completion(mes, + &mes_set_hw_res_pkt, sizeof(mes_set_hw_res_pkt), + offsetof(union MESAPI_SET_HW_RESOURCES_1, api_status)); +} + static const struct amdgpu_mes_funcs mes_v11_0_funcs = { .add_hw_queue = mes_v11_0_add_hw_queue, .remove_hw_queue = mes_v11_0_remove_hw_queue, + .map_legacy_queue = mes_v11_0_map_legacy_queue, .unmap_legacy_queue = mes_v11_0_unmap_legacy_queue, .suspend_gang = mes_v11_0_suspend_gang, .resume_gang = mes_v11_0_resume_gang, @@ -554,7 +721,7 @@ static void mes_v11_0_enable(struct amdgpu_device *adev, bool enable) if (amdgpu_emu_mode) msleep(100); else - udelay(50); + udelay(500); } else { data = RREG32_SOC15(GC, 0, regCP_MES_CNTL); data = REG_SET_FIELD(data, CP_MES_CNTL, MES_PIPE0_ACTIVE, 0); @@ -1150,6 +1317,10 @@ static int mes_v11_0_kiq_hw_init(struct amdgpu_device *adev) if (r) goto failure; + r = mes_v11_0_hw_init(adev); + if (r) + goto failure; + return r; failure: @@ -1179,6 +1350,9 @@ static int mes_v11_0_hw_init(void *handle) int r; struct amdgpu_device *adev = (struct amdgpu_device *)handle; + if (adev->mes.ring.sched.ready) + goto out; + if (!adev->enable_mes_kiq) { if (adev->firmware.load_type == AMDGPU_FW_LOAD_DIRECT) { r = mes_v11_0_load_microcode(adev, @@ -1200,12 +1374,21 @@ static int mes_v11_0_hw_init(void *handle) if (r) goto failure; + if (amdgpu_sriov_is_mes_info_enable(adev)) { + r = mes_v11_0_set_hw_resources_1(&adev->mes); + if (r) { + DRM_ERROR("failed mes_v11_0_set_hw_resources_1, r=%d\n", r); + goto failure; + } + } + r = mes_v11_0_query_sched_status(&adev->mes); if (r) { DRM_ERROR("MES is busy\n"); goto failure; } +out: /* * Disable KIQ ring usage from the driver once MES is enabled. * MES uses KIQ ring exclusively so driver cannot access KIQ ring @@ -1223,6 +1406,11 @@ failure: static int mes_v11_0_hw_fini(void *handle) { + struct amdgpu_device *adev = (struct amdgpu_device *)handle; + if (amdgpu_sriov_is_mes_info_enable(adev)) { + amdgpu_bo_free_kernel(&adev->mes.resource_1, &adev->mes.resource_1_gpu_addr, + &adev->mes.resource_1_addr); + } return 0; } @@ -1288,6 +1476,8 @@ static const struct amd_ip_funcs mes_v11_0_ip_funcs = { .hw_fini = mes_v11_0_hw_fini, .suspend = mes_v11_0_suspend, .resume = mes_v11_0_resume, + .dump_ip_state = NULL, + .print_ip_state = NULL, }; const struct amdgpu_ip_block_version mes_v11_0_ip_block = { diff --git a/drivers/gpu/drm/amd/amdgpu/mes_v12_0.c b/drivers/gpu/drm/amd/amdgpu/mes_v12_0.c new file mode 100644 index 000000000000..c9f74231ad59 --- /dev/null +++ b/drivers/gpu/drm/amd/amdgpu/mes_v12_0.c @@ -0,0 +1,1562 @@ +/* + * Copyright 2023 Advanced Micro Devices, Inc. + * + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and associated documentation files (the "Software"), + * to deal in the Software without restriction, including without limitation + * the rights to use, copy, modify, merge, publish, distribute, sublicense, + * and/or sell copies of the Software, and to permit persons to whom the + * Software is furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included in + * all copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL + * THE COPYRIGHT HOLDER(S) OR AUTHOR(S) BE LIABLE FOR ANY CLAIM, DAMAGES OR + * OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, + * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR + * OTHER DEALINGS IN THE SOFTWARE. + * + */ + +#include <linux/firmware.h> +#include <linux/module.h> +#include "amdgpu.h" +#include "soc15_common.h" +#include "soc21.h" +#include "gc/gc_12_0_0_offset.h" +#include "gc/gc_12_0_0_sh_mask.h" +#include "gc/gc_11_0_0_default.h" +#include "v12_structs.h" +#include "mes_v12_api_def.h" + +MODULE_FIRMWARE("amdgpu/gc_12_0_0_mes.bin"); +MODULE_FIRMWARE("amdgpu/gc_12_0_0_mes1.bin"); +MODULE_FIRMWARE("amdgpu/gc_12_0_0_uni_mes.bin"); +MODULE_FIRMWARE("amdgpu/gc_12_0_1_mes.bin"); +MODULE_FIRMWARE("amdgpu/gc_12_0_1_mes1.bin"); +MODULE_FIRMWARE("amdgpu/gc_12_0_1_uni_mes.bin"); + +static int mes_v12_0_hw_init(void *handle); +static int mes_v12_0_hw_fini(void *handle); +static int mes_v12_0_kiq_hw_init(struct amdgpu_device *adev); +static int mes_v12_0_kiq_hw_fini(struct amdgpu_device *adev); + +#define MES_EOP_SIZE 2048 + +static void mes_v12_0_ring_set_wptr(struct amdgpu_ring *ring) +{ + struct amdgpu_device *adev = ring->adev; + + if (ring->use_doorbell) { + atomic64_set((atomic64_t *)ring->wptr_cpu_addr, + ring->wptr); + WDOORBELL64(ring->doorbell_index, ring->wptr); + } else { + BUG(); + } +} + +static u64 mes_v12_0_ring_get_rptr(struct amdgpu_ring *ring) +{ + return *ring->rptr_cpu_addr; +} + +static u64 mes_v12_0_ring_get_wptr(struct amdgpu_ring *ring) +{ + u64 wptr; + + if (ring->use_doorbell) + wptr = atomic64_read((atomic64_t *)ring->wptr_cpu_addr); + else + BUG(); + return wptr; +} + +static const struct amdgpu_ring_funcs mes_v12_0_ring_funcs = { + .type = AMDGPU_RING_TYPE_MES, + .align_mask = 1, + .nop = 0, + .support_64bit_ptrs = true, + .get_rptr = mes_v12_0_ring_get_rptr, + .get_wptr = mes_v12_0_ring_get_wptr, + .set_wptr = mes_v12_0_ring_set_wptr, + .insert_nop = amdgpu_ring_insert_nop, +}; + +static const char *mes_v12_0_opcodes[] = { + "SET_HW_RSRC", + "SET_SCHEDULING_CONFIG", + "ADD_QUEUE", + "REMOVE_QUEUE", + "PERFORM_YIELD", + "SET_GANG_PRIORITY_LEVEL", + "SUSPEND", + "RESUME", + "RESET", + "SET_LOG_BUFFER", + "CHANGE_GANG_PRORITY", + "QUERY_SCHEDULER_STATUS", + "unused", + "SET_DEBUG_VMID", + "MISC", + "UPDATE_ROOT_PAGE_TABLE", + "AMD_LOG", + "SET_SE_MODE", + "SET_GANG_SUBMIT", + "SET_HW_RSRC_1", +}; + +static const char *mes_v12_0_misc_opcodes[] = { + "WRITE_REG", + "INV_GART", + "QUERY_STATUS", + "READ_REG", + "WAIT_REG_MEM", + "SET_SHADER_DEBUGGER", + "NOTIFY_WORK_ON_UNMAPPED_QUEUE", + "NOTIFY_TO_UNMAP_PROCESSES", +}; + +static const char *mes_v12_0_get_op_string(union MESAPI__MISC *x_pkt) +{ + const char *op_str = NULL; + + if (x_pkt->header.opcode < ARRAY_SIZE(mes_v12_0_opcodes)) + op_str = mes_v12_0_opcodes[x_pkt->header.opcode]; + + return op_str; +} + +static const char *mes_v12_0_get_misc_op_string(union MESAPI__MISC *x_pkt) +{ + const char *op_str = NULL; + + if ((x_pkt->header.opcode == MES_SCH_API_MISC) && + (x_pkt->opcode < ARRAY_SIZE(mes_v12_0_misc_opcodes))) + op_str = mes_v12_0_misc_opcodes[x_pkt->opcode]; + + return op_str; +} + +static int mes_v12_0_submit_pkt_and_poll_completion(struct amdgpu_mes *mes, + void *pkt, int size, + int api_status_off) +{ + union MESAPI__QUERY_MES_STATUS mes_status_pkt; + signed long timeout = 3000000; /* 3000 ms */ + struct amdgpu_device *adev = mes->adev; + struct amdgpu_ring *ring = &mes->ring; + struct MES_API_STATUS *api_status; + union MESAPI__MISC *x_pkt = pkt; + const char *op_str, *misc_op_str; + unsigned long flags; + u64 status_gpu_addr; + u32 status_offset; + u64 *status_ptr; + signed long r; + int ret; + + if (x_pkt->header.opcode >= MES_SCH_API_MAX) + return -EINVAL; + + if (amdgpu_emu_mode) { + timeout *= 100; + } else if (amdgpu_sriov_vf(adev)) { + /* Worst case in sriov where all other 15 VF timeout, each VF needs about 600ms */ + timeout = 15 * 600 * 1000; + } + + ret = amdgpu_device_wb_get(adev, &status_offset); + if (ret) + return ret; + + status_gpu_addr = adev->wb.gpu_addr + (status_offset * 4); + status_ptr = (u64 *)&adev->wb.wb[status_offset]; + *status_ptr = 0; + + spin_lock_irqsave(&mes->ring_lock, flags); + r = amdgpu_ring_alloc(ring, (size + sizeof(mes_status_pkt)) / 4); + if (r) + goto error_unlock_free; + + api_status = (struct MES_API_STATUS *)((char *)pkt + api_status_off); + api_status->api_completion_fence_addr = status_gpu_addr; + api_status->api_completion_fence_value = 1; + + amdgpu_ring_write_multiple(ring, pkt, size / 4); + + memset(&mes_status_pkt, 0, sizeof(mes_status_pkt)); + mes_status_pkt.header.type = MES_API_TYPE_SCHEDULER; + mes_status_pkt.header.opcode = MES_SCH_API_QUERY_SCHEDULER_STATUS; + mes_status_pkt.header.dwsize = API_FRAME_SIZE_IN_DWORDS; + mes_status_pkt.api_status.api_completion_fence_addr = + ring->fence_drv.gpu_addr; + mes_status_pkt.api_status.api_completion_fence_value = + ++ring->fence_drv.sync_seq; + + amdgpu_ring_write_multiple(ring, &mes_status_pkt, + sizeof(mes_status_pkt) / 4); + + amdgpu_ring_commit(ring); + spin_unlock_irqrestore(&mes->ring_lock, flags); + + op_str = mes_v12_0_get_op_string(x_pkt); + misc_op_str = mes_v12_0_get_misc_op_string(x_pkt); + + if (misc_op_str) + dev_dbg(adev->dev, "MES msg=%s (%s) was emitted\n", op_str, + misc_op_str); + else if (op_str) + dev_dbg(adev->dev, "MES msg=%s was emitted\n", op_str); + else + dev_dbg(adev->dev, "MES msg=%d was emitted\n", + x_pkt->header.opcode); + + r = amdgpu_fence_wait_polling(ring, ring->fence_drv.sync_seq, timeout); + if (r < 1 || !*status_ptr) { + + if (misc_op_str) + dev_err(adev->dev, "MES failed to respond to msg=%s (%s)\n", + op_str, misc_op_str); + else if (op_str) + dev_err(adev->dev, "MES failed to respond to msg=%s\n", + op_str); + else + dev_err(adev->dev, "MES failed to respond to msg=%d\n", + x_pkt->header.opcode); + + while (halt_if_hws_hang) + schedule(); + + r = -ETIMEDOUT; + goto error_wb_free; + } + + amdgpu_device_wb_free(adev, status_offset); + return 0; + +error_unlock_free: + spin_unlock_irqrestore(&mes->ring_lock, flags); + +error_wb_free: + amdgpu_device_wb_free(adev, status_offset); + return r; +} + +static int convert_to_mes_queue_type(int queue_type) +{ + if (queue_type == AMDGPU_RING_TYPE_GFX) + return MES_QUEUE_TYPE_GFX; + else if (queue_type == AMDGPU_RING_TYPE_COMPUTE) + return MES_QUEUE_TYPE_COMPUTE; + else if (queue_type == AMDGPU_RING_TYPE_SDMA) + return MES_QUEUE_TYPE_SDMA; + else + BUG(); + return -1; +} + +static int mes_v12_0_add_hw_queue(struct amdgpu_mes *mes, + struct mes_add_queue_input *input) +{ + struct amdgpu_device *adev = mes->adev; + union MESAPI__ADD_QUEUE mes_add_queue_pkt; + struct amdgpu_vmhub *hub = &adev->vmhub[AMDGPU_GFXHUB(0)]; + uint32_t vm_cntx_cntl = hub->vm_cntx_cntl; + + memset(&mes_add_queue_pkt, 0, sizeof(mes_add_queue_pkt)); + + mes_add_queue_pkt.header.type = MES_API_TYPE_SCHEDULER; + mes_add_queue_pkt.header.opcode = MES_SCH_API_ADD_QUEUE; + mes_add_queue_pkt.header.dwsize = API_FRAME_SIZE_IN_DWORDS; + + mes_add_queue_pkt.process_id = input->process_id; + mes_add_queue_pkt.page_table_base_addr = input->page_table_base_addr; + mes_add_queue_pkt.process_va_start = input->process_va_start; + mes_add_queue_pkt.process_va_end = input->process_va_end; + mes_add_queue_pkt.process_quantum = input->process_quantum; + mes_add_queue_pkt.process_context_addr = input->process_context_addr; + mes_add_queue_pkt.gang_quantum = input->gang_quantum; + mes_add_queue_pkt.gang_context_addr = input->gang_context_addr; + mes_add_queue_pkt.inprocess_gang_priority = + input->inprocess_gang_priority; + mes_add_queue_pkt.gang_global_priority_level = + input->gang_global_priority_level; + mes_add_queue_pkt.doorbell_offset = input->doorbell_offset; + mes_add_queue_pkt.mqd_addr = input->mqd_addr; + + mes_add_queue_pkt.wptr_addr = input->wptr_mc_addr; + + mes_add_queue_pkt.queue_type = + convert_to_mes_queue_type(input->queue_type); + mes_add_queue_pkt.paging = input->paging; + mes_add_queue_pkt.vm_context_cntl = vm_cntx_cntl; + mes_add_queue_pkt.gws_base = input->gws_base; + mes_add_queue_pkt.gws_size = input->gws_size; + mes_add_queue_pkt.trap_handler_addr = input->tba_addr; + mes_add_queue_pkt.tma_addr = input->tma_addr; + mes_add_queue_pkt.trap_en = input->trap_en; + mes_add_queue_pkt.skip_process_ctx_clear = input->skip_process_ctx_clear; + mes_add_queue_pkt.is_kfd_process = input->is_kfd_process; + + /* For KFD, gds_size is re-used for queue size (needed in MES for AQL queues) */ + mes_add_queue_pkt.is_aql_queue = input->is_aql_queue; + mes_add_queue_pkt.gds_size = input->queue_size; + + /* For KFD, gds_size is re-used for queue size (needed in MES for AQL queues) */ + mes_add_queue_pkt.is_aql_queue = input->is_aql_queue; + mes_add_queue_pkt.gds_size = input->queue_size; + + return mes_v12_0_submit_pkt_and_poll_completion(mes, + &mes_add_queue_pkt, sizeof(mes_add_queue_pkt), + offsetof(union MESAPI__ADD_QUEUE, api_status)); +} + +static int mes_v12_0_remove_hw_queue(struct amdgpu_mes *mes, + struct mes_remove_queue_input *input) +{ + union MESAPI__REMOVE_QUEUE mes_remove_queue_pkt; + + memset(&mes_remove_queue_pkt, 0, sizeof(mes_remove_queue_pkt)); + + mes_remove_queue_pkt.header.type = MES_API_TYPE_SCHEDULER; + mes_remove_queue_pkt.header.opcode = MES_SCH_API_REMOVE_QUEUE; + mes_remove_queue_pkt.header.dwsize = API_FRAME_SIZE_IN_DWORDS; + + mes_remove_queue_pkt.doorbell_offset = input->doorbell_offset; + mes_remove_queue_pkt.gang_context_addr = input->gang_context_addr; + + return mes_v12_0_submit_pkt_and_poll_completion(mes, + &mes_remove_queue_pkt, sizeof(mes_remove_queue_pkt), + offsetof(union MESAPI__REMOVE_QUEUE, api_status)); +} + +static int mes_v12_0_map_legacy_queue(struct amdgpu_mes *mes, + struct mes_map_legacy_queue_input *input) +{ + union MESAPI__ADD_QUEUE mes_add_queue_pkt; + + memset(&mes_add_queue_pkt, 0, sizeof(mes_add_queue_pkt)); + + mes_add_queue_pkt.header.type = MES_API_TYPE_SCHEDULER; + mes_add_queue_pkt.header.opcode = MES_SCH_API_ADD_QUEUE; + mes_add_queue_pkt.header.dwsize = API_FRAME_SIZE_IN_DWORDS; + + mes_add_queue_pkt.pipe_id = input->pipe_id; + mes_add_queue_pkt.queue_id = input->queue_id; + mes_add_queue_pkt.doorbell_offset = input->doorbell_offset; + mes_add_queue_pkt.mqd_addr = input->mqd_addr; + mes_add_queue_pkt.wptr_addr = input->wptr_addr; + mes_add_queue_pkt.queue_type = + convert_to_mes_queue_type(input->queue_type); + mes_add_queue_pkt.map_legacy_kq = 1; + + return mes_v12_0_submit_pkt_and_poll_completion(mes, + &mes_add_queue_pkt, sizeof(mes_add_queue_pkt), + offsetof(union MESAPI__ADD_QUEUE, api_status)); +} + +static int mes_v12_0_unmap_legacy_queue(struct amdgpu_mes *mes, + struct mes_unmap_legacy_queue_input *input) +{ + union MESAPI__REMOVE_QUEUE mes_remove_queue_pkt; + + memset(&mes_remove_queue_pkt, 0, sizeof(mes_remove_queue_pkt)); + + mes_remove_queue_pkt.header.type = MES_API_TYPE_SCHEDULER; + mes_remove_queue_pkt.header.opcode = MES_SCH_API_REMOVE_QUEUE; + mes_remove_queue_pkt.header.dwsize = API_FRAME_SIZE_IN_DWORDS; + + mes_remove_queue_pkt.doorbell_offset = input->doorbell_offset; + mes_remove_queue_pkt.gang_context_addr = 0; + + mes_remove_queue_pkt.pipe_id = input->pipe_id; + mes_remove_queue_pkt.queue_id = input->queue_id; + + if (input->action == PREEMPT_QUEUES_NO_UNMAP) { + mes_remove_queue_pkt.preempt_legacy_gfx_queue = 1; + mes_remove_queue_pkt.tf_addr = input->trail_fence_addr; + mes_remove_queue_pkt.tf_data = + lower_32_bits(input->trail_fence_data); + } else { + mes_remove_queue_pkt.unmap_legacy_queue = 1; + mes_remove_queue_pkt.queue_type = + convert_to_mes_queue_type(input->queue_type); + } + + return mes_v12_0_submit_pkt_and_poll_completion(mes, + &mes_remove_queue_pkt, sizeof(mes_remove_queue_pkt), + offsetof(union MESAPI__REMOVE_QUEUE, api_status)); +} + +static int mes_v12_0_suspend_gang(struct amdgpu_mes *mes, + struct mes_suspend_gang_input *input) +{ + return 0; +} + +static int mes_v12_0_resume_gang(struct amdgpu_mes *mes, + struct mes_resume_gang_input *input) +{ + return 0; +} + +static int mes_v12_0_query_sched_status(struct amdgpu_mes *mes) +{ + union MESAPI__QUERY_MES_STATUS mes_status_pkt; + + memset(&mes_status_pkt, 0, sizeof(mes_status_pkt)); + + mes_status_pkt.header.type = MES_API_TYPE_SCHEDULER; + mes_status_pkt.header.opcode = MES_SCH_API_QUERY_SCHEDULER_STATUS; + mes_status_pkt.header.dwsize = API_FRAME_SIZE_IN_DWORDS; + + return mes_v12_0_submit_pkt_and_poll_completion(mes, + &mes_status_pkt, sizeof(mes_status_pkt), + offsetof(union MESAPI__QUERY_MES_STATUS, api_status)); +} + +static int mes_v12_0_misc_op(struct amdgpu_mes *mes, + struct mes_misc_op_input *input) +{ + union MESAPI__MISC misc_pkt; + + memset(&misc_pkt, 0, sizeof(misc_pkt)); + + misc_pkt.header.type = MES_API_TYPE_SCHEDULER; + misc_pkt.header.opcode = MES_SCH_API_MISC; + misc_pkt.header.dwsize = API_FRAME_SIZE_IN_DWORDS; + + switch (input->op) { + case MES_MISC_OP_READ_REG: + misc_pkt.opcode = MESAPI_MISC__READ_REG; + misc_pkt.read_reg.reg_offset = input->read_reg.reg_offset; + misc_pkt.read_reg.buffer_addr = input->read_reg.buffer_addr; + break; + case MES_MISC_OP_WRITE_REG: + misc_pkt.opcode = MESAPI_MISC__WRITE_REG; + misc_pkt.write_reg.reg_offset = input->write_reg.reg_offset; + misc_pkt.write_reg.reg_value = input->write_reg.reg_value; + break; + case MES_MISC_OP_WRM_REG_WAIT: + misc_pkt.opcode = MESAPI_MISC__WAIT_REG_MEM; + misc_pkt.wait_reg_mem.op = WRM_OPERATION__WAIT_REG_MEM; + misc_pkt.wait_reg_mem.reference = input->wrm_reg.ref; + misc_pkt.wait_reg_mem.mask = input->wrm_reg.mask; + misc_pkt.wait_reg_mem.reg_offset1 = input->wrm_reg.reg0; + misc_pkt.wait_reg_mem.reg_offset2 = 0; + break; + case MES_MISC_OP_WRM_REG_WR_WAIT: + misc_pkt.opcode = MESAPI_MISC__WAIT_REG_MEM; + misc_pkt.wait_reg_mem.op = WRM_OPERATION__WR_WAIT_WR_REG; + misc_pkt.wait_reg_mem.reference = input->wrm_reg.ref; + misc_pkt.wait_reg_mem.mask = input->wrm_reg.mask; + misc_pkt.wait_reg_mem.reg_offset1 = input->wrm_reg.reg0; + misc_pkt.wait_reg_mem.reg_offset2 = input->wrm_reg.reg1; + break; + case MES_MISC_OP_SET_SHADER_DEBUGGER: + misc_pkt.opcode = MESAPI_MISC__SET_SHADER_DEBUGGER; + misc_pkt.set_shader_debugger.process_context_addr = + input->set_shader_debugger.process_context_addr; + misc_pkt.set_shader_debugger.flags.u32all = + input->set_shader_debugger.flags.u32all; + misc_pkt.set_shader_debugger.spi_gdbg_per_vmid_cntl = + input->set_shader_debugger.spi_gdbg_per_vmid_cntl; + memcpy(misc_pkt.set_shader_debugger.tcp_watch_cntl, + input->set_shader_debugger.tcp_watch_cntl, + sizeof(misc_pkt.set_shader_debugger.tcp_watch_cntl)); + misc_pkt.set_shader_debugger.trap_en = input->set_shader_debugger.trap_en; + break; + default: + DRM_ERROR("unsupported misc op (%d) \n", input->op); + return -EINVAL; + } + + return mes_v12_0_submit_pkt_and_poll_completion(mes, + &misc_pkt, sizeof(misc_pkt), + offsetof(union MESAPI__MISC, api_status)); +} + +static int mes_v12_0_set_hw_resources_1(struct amdgpu_mes *mes) +{ + union MESAPI_SET_HW_RESOURCES_1 mes_set_hw_res_1_pkt; + + memset(&mes_set_hw_res_1_pkt, 0, sizeof(mes_set_hw_res_1_pkt)); + + mes_set_hw_res_1_pkt.header.type = MES_API_TYPE_SCHEDULER; + mes_set_hw_res_1_pkt.header.opcode = MES_SCH_API_SET_HW_RSRC_1; + mes_set_hw_res_1_pkt.header.dwsize = API_FRAME_SIZE_IN_DWORDS; + mes_set_hw_res_1_pkt.mes_kiq_unmap_timeout = 100; + + return mes_v12_0_submit_pkt_and_poll_completion(mes, + &mes_set_hw_res_1_pkt, sizeof(mes_set_hw_res_1_pkt), + offsetof(union MESAPI_SET_HW_RESOURCES_1, api_status)); +} + +static int mes_v12_0_set_hw_resources(struct amdgpu_mes *mes) +{ + int i; + struct amdgpu_device *adev = mes->adev; + union MESAPI_SET_HW_RESOURCES mes_set_hw_res_pkt; + + memset(&mes_set_hw_res_pkt, 0, sizeof(mes_set_hw_res_pkt)); + + mes_set_hw_res_pkt.header.type = MES_API_TYPE_SCHEDULER; + mes_set_hw_res_pkt.header.opcode = MES_SCH_API_SET_HW_RSRC; + mes_set_hw_res_pkt.header.dwsize = API_FRAME_SIZE_IN_DWORDS; + + mes_set_hw_res_pkt.vmid_mask_mmhub = mes->vmid_mask_mmhub; + mes_set_hw_res_pkt.vmid_mask_gfxhub = mes->vmid_mask_gfxhub; + mes_set_hw_res_pkt.gds_size = adev->gds.gds_size; + mes_set_hw_res_pkt.paging_vmid = 0; + mes_set_hw_res_pkt.g_sch_ctx_gpu_mc_ptr = mes->sch_ctx_gpu_addr; + mes_set_hw_res_pkt.query_status_fence_gpu_mc_ptr = + mes->query_status_fence_gpu_addr; + + for (i = 0; i < MAX_COMPUTE_PIPES; i++) + mes_set_hw_res_pkt.compute_hqd_mask[i] = + mes->compute_hqd_mask[i]; + + for (i = 0; i < MAX_GFX_PIPES; i++) + mes_set_hw_res_pkt.gfx_hqd_mask[i] = mes->gfx_hqd_mask[i]; + + for (i = 0; i < MAX_SDMA_PIPES; i++) + mes_set_hw_res_pkt.sdma_hqd_mask[i] = mes->sdma_hqd_mask[i]; + + for (i = 0; i < AMD_PRIORITY_NUM_LEVELS; i++) + mes_set_hw_res_pkt.aggregated_doorbells[i] = + mes->aggregated_doorbells[i]; + + for (i = 0; i < 5; i++) { + mes_set_hw_res_pkt.gc_base[i] = adev->reg_offset[GC_HWIP][0][i]; + mes_set_hw_res_pkt.mmhub_base[i] = + adev->reg_offset[MMHUB_HWIP][0][i]; + mes_set_hw_res_pkt.osssys_base[i] = + adev->reg_offset[OSSSYS_HWIP][0][i]; + } + + mes_set_hw_res_pkt.disable_reset = 1; + mes_set_hw_res_pkt.disable_mes_log = 1; + mes_set_hw_res_pkt.use_different_vmid_compute = 1; + mes_set_hw_res_pkt.enable_reg_active_poll = 1; + + /* + * Keep oversubscribe timer for sdma . When we have unmapped doorbell + * handling support, other queue will not use the oversubscribe timer. + * handling mode - 0: disabled; 1: basic version; 2: basic+ version + */ + mes_set_hw_res_pkt.oversubscription_timer = 50; + mes_set_hw_res_pkt.unmapped_doorbell_handling = 1; + + mes_set_hw_res_pkt.enable_mes_event_int_logging = 0; + mes_set_hw_res_pkt.event_intr_history_gpu_mc_ptr = mes->event_log_gpu_addr; + + return mes_v12_0_submit_pkt_and_poll_completion(mes, + &mes_set_hw_res_pkt, sizeof(mes_set_hw_res_pkt), + offsetof(union MESAPI_SET_HW_RESOURCES, api_status)); +} + +static void mes_v12_0_init_aggregated_doorbell(struct amdgpu_mes *mes) +{ + struct amdgpu_device *adev = mes->adev; + uint32_t data; + + data = RREG32_SOC15(GC, 0, regCP_MES_DOORBELL_CONTROL1); + data &= ~(CP_MES_DOORBELL_CONTROL1__DOORBELL_OFFSET_MASK | + CP_MES_DOORBELL_CONTROL1__DOORBELL_EN_MASK | + CP_MES_DOORBELL_CONTROL1__DOORBELL_HIT_MASK); + data |= mes->aggregated_doorbells[AMDGPU_MES_PRIORITY_LEVEL_LOW] << + CP_MES_DOORBELL_CONTROL1__DOORBELL_OFFSET__SHIFT; + data |= 1 << CP_MES_DOORBELL_CONTROL1__DOORBELL_EN__SHIFT; + WREG32_SOC15(GC, 0, regCP_MES_DOORBELL_CONTROL1, data); + + data = RREG32_SOC15(GC, 0, regCP_MES_DOORBELL_CONTROL2); + data &= ~(CP_MES_DOORBELL_CONTROL2__DOORBELL_OFFSET_MASK | + CP_MES_DOORBELL_CONTROL2__DOORBELL_EN_MASK | + CP_MES_DOORBELL_CONTROL2__DOORBELL_HIT_MASK); + data |= mes->aggregated_doorbells[AMDGPU_MES_PRIORITY_LEVEL_NORMAL] << + CP_MES_DOORBELL_CONTROL2__DOORBELL_OFFSET__SHIFT; + data |= 1 << CP_MES_DOORBELL_CONTROL2__DOORBELL_EN__SHIFT; + WREG32_SOC15(GC, 0, regCP_MES_DOORBELL_CONTROL2, data); + + data = RREG32_SOC15(GC, 0, regCP_MES_DOORBELL_CONTROL3); + data &= ~(CP_MES_DOORBELL_CONTROL3__DOORBELL_OFFSET_MASK | + CP_MES_DOORBELL_CONTROL3__DOORBELL_EN_MASK | + CP_MES_DOORBELL_CONTROL3__DOORBELL_HIT_MASK); + data |= mes->aggregated_doorbells[AMDGPU_MES_PRIORITY_LEVEL_MEDIUM] << + CP_MES_DOORBELL_CONTROL3__DOORBELL_OFFSET__SHIFT; + data |= 1 << CP_MES_DOORBELL_CONTROL3__DOORBELL_EN__SHIFT; + WREG32_SOC15(GC, 0, regCP_MES_DOORBELL_CONTROL3, data); + + data = RREG32_SOC15(GC, 0, regCP_MES_DOORBELL_CONTROL4); + data &= ~(CP_MES_DOORBELL_CONTROL4__DOORBELL_OFFSET_MASK | + CP_MES_DOORBELL_CONTROL4__DOORBELL_EN_MASK | + CP_MES_DOORBELL_CONTROL4__DOORBELL_HIT_MASK); + data |= mes->aggregated_doorbells[AMDGPU_MES_PRIORITY_LEVEL_HIGH] << + CP_MES_DOORBELL_CONTROL4__DOORBELL_OFFSET__SHIFT; + data |= 1 << CP_MES_DOORBELL_CONTROL4__DOORBELL_EN__SHIFT; + WREG32_SOC15(GC, 0, regCP_MES_DOORBELL_CONTROL4, data); + + data = RREG32_SOC15(GC, 0, regCP_MES_DOORBELL_CONTROL5); + data &= ~(CP_MES_DOORBELL_CONTROL5__DOORBELL_OFFSET_MASK | + CP_MES_DOORBELL_CONTROL5__DOORBELL_EN_MASK | + CP_MES_DOORBELL_CONTROL5__DOORBELL_HIT_MASK); + data |= mes->aggregated_doorbells[AMDGPU_MES_PRIORITY_LEVEL_REALTIME] << + CP_MES_DOORBELL_CONTROL5__DOORBELL_OFFSET__SHIFT; + data |= 1 << CP_MES_DOORBELL_CONTROL5__DOORBELL_EN__SHIFT; + WREG32_SOC15(GC, 0, regCP_MES_DOORBELL_CONTROL5, data); + + data = 1 << CP_HQD_GFX_CONTROL__DB_UPDATED_MSG_EN__SHIFT; + WREG32_SOC15(GC, 0, regCP_HQD_GFX_CONTROL, data); +} + + +static void mes_v12_0_enable_unmapped_doorbell_handling( + struct amdgpu_mes *mes, bool enable) +{ + struct amdgpu_device *adev = mes->adev; + uint32_t data = RREG32_SOC15(GC, 0, regCP_UNMAPPED_DOORBELL); + + /* + * The default PROC_LSB settng is 0xc which means doorbell + * addr[16:12] gives the doorbell page number. For kfd, each + * process will use 2 pages of doorbell, we need to change the + * setting to 0xd + */ + data &= ~CP_UNMAPPED_DOORBELL__PROC_LSB_MASK; + data |= 0xd << CP_UNMAPPED_DOORBELL__PROC_LSB__SHIFT; + + data |= (enable ? 1 : 0) << CP_UNMAPPED_DOORBELL__ENABLE__SHIFT; + + WREG32_SOC15(GC, 0, regCP_UNMAPPED_DOORBELL, data); +} + +static const struct amdgpu_mes_funcs mes_v12_0_funcs = { + .add_hw_queue = mes_v12_0_add_hw_queue, + .remove_hw_queue = mes_v12_0_remove_hw_queue, + .map_legacy_queue = mes_v12_0_map_legacy_queue, + .unmap_legacy_queue = mes_v12_0_unmap_legacy_queue, + .suspend_gang = mes_v12_0_suspend_gang, + .resume_gang = mes_v12_0_resume_gang, + .misc_op = mes_v12_0_misc_op, +}; + +static int mes_v12_0_allocate_ucode_buffer(struct amdgpu_device *adev, + enum admgpu_mes_pipe pipe) +{ + int r; + const struct mes_firmware_header_v1_0 *mes_hdr; + const __le32 *fw_data; + unsigned fw_size; + + mes_hdr = (const struct mes_firmware_header_v1_0 *) + adev->mes.fw[pipe]->data; + + fw_data = (const __le32 *)(adev->mes.fw[pipe]->data + + le32_to_cpu(mes_hdr->mes_ucode_offset_bytes)); + fw_size = le32_to_cpu(mes_hdr->mes_ucode_size_bytes); + + r = amdgpu_bo_create_reserved(adev, fw_size, + PAGE_SIZE, + AMDGPU_GEM_DOMAIN_VRAM, + &adev->mes.ucode_fw_obj[pipe], + &adev->mes.ucode_fw_gpu_addr[pipe], + (void **)&adev->mes.ucode_fw_ptr[pipe]); + if (r) { + dev_err(adev->dev, "(%d) failed to create mes fw bo\n", r); + return r; + } + + memcpy(adev->mes.ucode_fw_ptr[pipe], fw_data, fw_size); + + amdgpu_bo_kunmap(adev->mes.ucode_fw_obj[pipe]); + amdgpu_bo_unreserve(adev->mes.ucode_fw_obj[pipe]); + + return 0; +} + +static int mes_v12_0_allocate_ucode_data_buffer(struct amdgpu_device *adev, + enum admgpu_mes_pipe pipe) +{ + int r; + const struct mes_firmware_header_v1_0 *mes_hdr; + const __le32 *fw_data; + unsigned fw_size; + + mes_hdr = (const struct mes_firmware_header_v1_0 *) + adev->mes.fw[pipe]->data; + + fw_data = (const __le32 *)(adev->mes.fw[pipe]->data + + le32_to_cpu(mes_hdr->mes_ucode_data_offset_bytes)); + fw_size = le32_to_cpu(mes_hdr->mes_ucode_data_size_bytes); + + r = amdgpu_bo_create_reserved(adev, fw_size, + 64 * 1024, + AMDGPU_GEM_DOMAIN_VRAM, + &adev->mes.data_fw_obj[pipe], + &adev->mes.data_fw_gpu_addr[pipe], + (void **)&adev->mes.data_fw_ptr[pipe]); + if (r) { + dev_err(adev->dev, "(%d) failed to create mes data fw bo\n", r); + return r; + } + + memcpy(adev->mes.data_fw_ptr[pipe], fw_data, fw_size); + + amdgpu_bo_kunmap(adev->mes.data_fw_obj[pipe]); + amdgpu_bo_unreserve(adev->mes.data_fw_obj[pipe]); + + return 0; +} + +static void mes_v12_0_free_ucode_buffers(struct amdgpu_device *adev, + enum admgpu_mes_pipe pipe) +{ + amdgpu_bo_free_kernel(&adev->mes.data_fw_obj[pipe], + &adev->mes.data_fw_gpu_addr[pipe], + (void **)&adev->mes.data_fw_ptr[pipe]); + + amdgpu_bo_free_kernel(&adev->mes.ucode_fw_obj[pipe], + &adev->mes.ucode_fw_gpu_addr[pipe], + (void **)&adev->mes.ucode_fw_ptr[pipe]); +} + +static void mes_v12_0_enable(struct amdgpu_device *adev, bool enable) +{ + uint64_t ucode_addr; + uint32_t pipe, data = 0; + + if (enable) { + data = RREG32_SOC15(GC, 0, regCP_MES_CNTL); + data = REG_SET_FIELD(data, CP_MES_CNTL, MES_PIPE0_RESET, 1); + data = REG_SET_FIELD(data, CP_MES_CNTL, MES_PIPE1_RESET, + (!adev->enable_uni_mes && adev->enable_mes_kiq) ? 1 : 0); + WREG32_SOC15(GC, 0, regCP_MES_CNTL, data); + + mutex_lock(&adev->srbm_mutex); + for (pipe = 0; pipe < AMDGPU_MAX_MES_PIPES; pipe++) { + if ((!adev->enable_mes_kiq || adev->enable_uni_mes) && + pipe == AMDGPU_MES_KIQ_PIPE) + continue; + + soc21_grbm_select(adev, 3, pipe, 0, 0); + + ucode_addr = adev->mes.uc_start_addr[pipe] >> 2; + WREG32_SOC15(GC, 0, regCP_MES_PRGRM_CNTR_START, + lower_32_bits(ucode_addr)); + WREG32_SOC15(GC, 0, regCP_MES_PRGRM_CNTR_START_HI, + upper_32_bits(ucode_addr)); + } + soc21_grbm_select(adev, 0, 0, 0, 0); + mutex_unlock(&adev->srbm_mutex); + + /* unhalt MES and activate pipe0 */ + data = REG_SET_FIELD(0, CP_MES_CNTL, MES_PIPE0_ACTIVE, 1); + data = REG_SET_FIELD(data, CP_MES_CNTL, MES_PIPE1_ACTIVE, + (!adev->enable_uni_mes && adev->enable_mes_kiq) ? 1 : 0); + WREG32_SOC15(GC, 0, regCP_MES_CNTL, data); + + if (amdgpu_emu_mode) + msleep(100); + else if (adev->enable_uni_mes) + udelay(500); + else + udelay(50); + } else { + data = RREG32_SOC15(GC, 0, regCP_MES_CNTL); + data = REG_SET_FIELD(data, CP_MES_CNTL, MES_PIPE0_ACTIVE, 0); + data = REG_SET_FIELD(data, CP_MES_CNTL, MES_PIPE1_ACTIVE, 0); + data = REG_SET_FIELD(data, CP_MES_CNTL, + MES_INVALIDATE_ICACHE, 1); + data = REG_SET_FIELD(data, CP_MES_CNTL, MES_PIPE0_RESET, 1); + data = REG_SET_FIELD(data, CP_MES_CNTL, MES_PIPE1_RESET, + (!adev->enable_uni_mes && adev->enable_mes_kiq) ? 1 : 0); + data = REG_SET_FIELD(data, CP_MES_CNTL, MES_HALT, 1); + WREG32_SOC15(GC, 0, regCP_MES_CNTL, data); + } +} + +static void mes_v12_0_set_ucode_start_addr(struct amdgpu_device *adev) +{ + uint64_t ucode_addr; + int pipe; + + mes_v12_0_enable(adev, false); + + mutex_lock(&adev->srbm_mutex); + for (pipe = 0; pipe < AMDGPU_MAX_MES_PIPES; pipe++) { + if ((!adev->enable_mes_kiq || adev->enable_uni_mes) && + pipe == AMDGPU_MES_KIQ_PIPE) + continue; + + /* me=3, queue=0 */ + soc21_grbm_select(adev, 3, pipe, 0, 0); + + /* set ucode start address */ + ucode_addr = adev->mes.uc_start_addr[pipe] >> 2; + WREG32_SOC15(GC, 0, regCP_MES_PRGRM_CNTR_START, + lower_32_bits(ucode_addr)); + WREG32_SOC15(GC, 0, regCP_MES_PRGRM_CNTR_START_HI, + upper_32_bits(ucode_addr)); + + soc21_grbm_select(adev, 0, 0, 0, 0); + } + mutex_unlock(&adev->srbm_mutex); +} + +/* This function is for backdoor MES firmware */ +static int mes_v12_0_load_microcode(struct amdgpu_device *adev, + enum admgpu_mes_pipe pipe, bool prime_icache) +{ + int r; + uint32_t data; + + mes_v12_0_enable(adev, false); + + if (!adev->mes.fw[pipe]) + return -EINVAL; + + r = mes_v12_0_allocate_ucode_buffer(adev, pipe); + if (r) + return r; + + r = mes_v12_0_allocate_ucode_data_buffer(adev, pipe); + if (r) { + mes_v12_0_free_ucode_buffers(adev, pipe); + return r; + } + + mutex_lock(&adev->srbm_mutex); + /* me=3, pipe=0, queue=0 */ + soc21_grbm_select(adev, 3, pipe, 0, 0); + + WREG32_SOC15(GC, 0, regCP_MES_IC_BASE_CNTL, 0); + + /* set ucode fimrware address */ + WREG32_SOC15(GC, 0, regCP_MES_IC_BASE_LO, + lower_32_bits(adev->mes.ucode_fw_gpu_addr[pipe])); + WREG32_SOC15(GC, 0, regCP_MES_IC_BASE_HI, + upper_32_bits(adev->mes.ucode_fw_gpu_addr[pipe])); + + /* set ucode instruction cache boundary to 2M-1 */ + WREG32_SOC15(GC, 0, regCP_MES_MIBOUND_LO, 0x1FFFFF); + + /* set ucode data firmware address */ + WREG32_SOC15(GC, 0, regCP_MES_MDBASE_LO, + lower_32_bits(adev->mes.data_fw_gpu_addr[pipe])); + WREG32_SOC15(GC, 0, regCP_MES_MDBASE_HI, + upper_32_bits(adev->mes.data_fw_gpu_addr[pipe])); + + /* Set data cache boundary CP_MES_MDBOUND_LO */ + WREG32_SOC15(GC, 0, regCP_MES_MDBOUND_LO, 0x7FFFF); + + if (prime_icache) { + /* invalidate ICACHE */ + data = RREG32_SOC15(GC, 0, regCP_MES_IC_OP_CNTL); + data = REG_SET_FIELD(data, CP_MES_IC_OP_CNTL, PRIME_ICACHE, 0); + data = REG_SET_FIELD(data, CP_MES_IC_OP_CNTL, INVALIDATE_CACHE, 1); + WREG32_SOC15(GC, 0, regCP_MES_IC_OP_CNTL, data); + + /* prime the ICACHE. */ + data = RREG32_SOC15(GC, 0, regCP_MES_IC_OP_CNTL); + data = REG_SET_FIELD(data, CP_MES_IC_OP_CNTL, PRIME_ICACHE, 1); + WREG32_SOC15(GC, 0, regCP_MES_IC_OP_CNTL, data); + } + + soc21_grbm_select(adev, 0, 0, 0, 0); + mutex_unlock(&adev->srbm_mutex); + + return 0; +} + +static int mes_v12_0_allocate_eop_buf(struct amdgpu_device *adev, + enum admgpu_mes_pipe pipe) +{ + int r; + u32 *eop; + + r = amdgpu_bo_create_reserved(adev, MES_EOP_SIZE, PAGE_SIZE, + AMDGPU_GEM_DOMAIN_GTT, + &adev->mes.eop_gpu_obj[pipe], + &adev->mes.eop_gpu_addr[pipe], + (void **)&eop); + if (r) { + dev_warn(adev->dev, "(%d) create EOP bo failed\n", r); + return r; + } + + memset(eop, 0, + adev->mes.eop_gpu_obj[pipe]->tbo.base.size); + + amdgpu_bo_kunmap(adev->mes.eop_gpu_obj[pipe]); + amdgpu_bo_unreserve(adev->mes.eop_gpu_obj[pipe]); + + return 0; +} + +static int mes_v12_0_mqd_init(struct amdgpu_ring *ring) +{ + struct v12_compute_mqd *mqd = ring->mqd_ptr; + uint64_t hqd_gpu_addr, wb_gpu_addr, eop_base_addr; + uint32_t tmp; + + mqd->header = 0xC0310800; + mqd->compute_pipelinestat_enable = 0x00000001; + mqd->compute_static_thread_mgmt_se0 = 0xffffffff; + mqd->compute_static_thread_mgmt_se1 = 0xffffffff; + mqd->compute_static_thread_mgmt_se2 = 0xffffffff; + mqd->compute_static_thread_mgmt_se3 = 0xffffffff; + mqd->compute_misc_reserved = 0x00000007; + + eop_base_addr = ring->eop_gpu_addr >> 8; + + /* set the EOP size, register value is 2^(EOP_SIZE+1) dwords */ + tmp = regCP_HQD_EOP_CONTROL_DEFAULT; + tmp = REG_SET_FIELD(tmp, CP_HQD_EOP_CONTROL, EOP_SIZE, + (order_base_2(MES_EOP_SIZE / 4) - 1)); + + mqd->cp_hqd_eop_base_addr_lo = lower_32_bits(eop_base_addr); + mqd->cp_hqd_eop_base_addr_hi = upper_32_bits(eop_base_addr); + mqd->cp_hqd_eop_control = tmp; + + /* disable the queue if it's active */ + ring->wptr = 0; + mqd->cp_hqd_pq_rptr = 0; + mqd->cp_hqd_pq_wptr_lo = 0; + mqd->cp_hqd_pq_wptr_hi = 0; + + /* set the pointer to the MQD */ + mqd->cp_mqd_base_addr_lo = ring->mqd_gpu_addr & 0xfffffffc; + mqd->cp_mqd_base_addr_hi = upper_32_bits(ring->mqd_gpu_addr); + + /* set MQD vmid to 0 */ + tmp = regCP_MQD_CONTROL_DEFAULT; + tmp = REG_SET_FIELD(tmp, CP_MQD_CONTROL, VMID, 0); + mqd->cp_mqd_control = tmp; + + /* set the pointer to the HQD, this is similar CP_RB0_BASE/_HI */ + hqd_gpu_addr = ring->gpu_addr >> 8; + mqd->cp_hqd_pq_base_lo = lower_32_bits(hqd_gpu_addr); + mqd->cp_hqd_pq_base_hi = upper_32_bits(hqd_gpu_addr); + + /* set the wb address whether it's enabled or not */ + wb_gpu_addr = ring->rptr_gpu_addr; + mqd->cp_hqd_pq_rptr_report_addr_lo = wb_gpu_addr & 0xfffffffc; + mqd->cp_hqd_pq_rptr_report_addr_hi = + upper_32_bits(wb_gpu_addr) & 0xffff; + + /* only used if CP_PQ_WPTR_POLL_CNTL.CP_PQ_WPTR_POLL_CNTL__EN_MASK=1 */ + wb_gpu_addr = ring->wptr_gpu_addr; + mqd->cp_hqd_pq_wptr_poll_addr_lo = wb_gpu_addr & 0xfffffff8; + mqd->cp_hqd_pq_wptr_poll_addr_hi = upper_32_bits(wb_gpu_addr) & 0xffff; + + /* set up the HQD, this is similar to CP_RB0_CNTL */ + tmp = regCP_HQD_PQ_CONTROL_DEFAULT; + tmp = REG_SET_FIELD(tmp, CP_HQD_PQ_CONTROL, QUEUE_SIZE, + (order_base_2(ring->ring_size / 4) - 1)); + tmp = REG_SET_FIELD(tmp, CP_HQD_PQ_CONTROL, RPTR_BLOCK_SIZE, + ((order_base_2(AMDGPU_GPU_PAGE_SIZE / 4) - 1) << 8)); + tmp = REG_SET_FIELD(tmp, CP_HQD_PQ_CONTROL, UNORD_DISPATCH, 1); + tmp = REG_SET_FIELD(tmp, CP_HQD_PQ_CONTROL, TUNNEL_DISPATCH, 0); + tmp = REG_SET_FIELD(tmp, CP_HQD_PQ_CONTROL, PRIV_STATE, 1); + tmp = REG_SET_FIELD(tmp, CP_HQD_PQ_CONTROL, KMD_QUEUE, 1); + tmp = REG_SET_FIELD(tmp, CP_HQD_PQ_CONTROL, NO_UPDATE_RPTR, 1); + mqd->cp_hqd_pq_control = tmp; + + /* enable doorbell */ + tmp = 0; + if (ring->use_doorbell) { + tmp = REG_SET_FIELD(tmp, CP_HQD_PQ_DOORBELL_CONTROL, + DOORBELL_OFFSET, ring->doorbell_index); + tmp = REG_SET_FIELD(tmp, CP_HQD_PQ_DOORBELL_CONTROL, + DOORBELL_EN, 1); + tmp = REG_SET_FIELD(tmp, CP_HQD_PQ_DOORBELL_CONTROL, + DOORBELL_SOURCE, 0); + tmp = REG_SET_FIELD(tmp, CP_HQD_PQ_DOORBELL_CONTROL, + DOORBELL_HIT, 0); + } else { + tmp = REG_SET_FIELD(tmp, CP_HQD_PQ_DOORBELL_CONTROL, + DOORBELL_EN, 0); + } + mqd->cp_hqd_pq_doorbell_control = tmp; + + mqd->cp_hqd_vmid = 0; + /* activate the queue */ + mqd->cp_hqd_active = 1; + + tmp = regCP_HQD_PERSISTENT_STATE_DEFAULT; + tmp = REG_SET_FIELD(tmp, CP_HQD_PERSISTENT_STATE, + PRELOAD_SIZE, 0x55); + mqd->cp_hqd_persistent_state = tmp; + + mqd->cp_hqd_ib_control = regCP_HQD_IB_CONTROL_DEFAULT; + mqd->cp_hqd_iq_timer = regCP_HQD_IQ_TIMER_DEFAULT; + mqd->cp_hqd_quantum = regCP_HQD_QUANTUM_DEFAULT; + + /* + * Set CP_HQD_GFX_CONTROL.DB_UPDATED_MSG_EN[15] to enable unmapped + * doorbell handling. This is a reserved CP internal register can + * not be accesss by others + */ + mqd->reserved_184 = BIT(15); + + return 0; +} + +static void mes_v12_0_queue_init_register(struct amdgpu_ring *ring) +{ + struct v12_compute_mqd *mqd = ring->mqd_ptr; + struct amdgpu_device *adev = ring->adev; + uint32_t data = 0; + + mutex_lock(&adev->srbm_mutex); + soc21_grbm_select(adev, 3, ring->pipe, 0, 0); + + /* set CP_HQD_VMID.VMID = 0. */ + data = RREG32_SOC15(GC, 0, regCP_HQD_VMID); + data = REG_SET_FIELD(data, CP_HQD_VMID, VMID, 0); + WREG32_SOC15(GC, 0, regCP_HQD_VMID, data); + + /* set CP_HQD_PQ_DOORBELL_CONTROL.DOORBELL_EN=0 */ + data = RREG32_SOC15(GC, 0, regCP_HQD_PQ_DOORBELL_CONTROL); + data = REG_SET_FIELD(data, CP_HQD_PQ_DOORBELL_CONTROL, + DOORBELL_EN, 0); + WREG32_SOC15(GC, 0, regCP_HQD_PQ_DOORBELL_CONTROL, data); + + /* set CP_MQD_BASE_ADDR/HI with the MQD base address */ + WREG32_SOC15(GC, 0, regCP_MQD_BASE_ADDR, mqd->cp_mqd_base_addr_lo); + WREG32_SOC15(GC, 0, regCP_MQD_BASE_ADDR_HI, mqd->cp_mqd_base_addr_hi); + + /* set CP_MQD_CONTROL.VMID=0 */ + data = RREG32_SOC15(GC, 0, regCP_MQD_CONTROL); + data = REG_SET_FIELD(data, CP_MQD_CONTROL, VMID, 0); + WREG32_SOC15(GC, 0, regCP_MQD_CONTROL, 0); + + /* set CP_HQD_PQ_BASE/HI with the ring buffer base address */ + WREG32_SOC15(GC, 0, regCP_HQD_PQ_BASE, mqd->cp_hqd_pq_base_lo); + WREG32_SOC15(GC, 0, regCP_HQD_PQ_BASE_HI, mqd->cp_hqd_pq_base_hi); + + /* set CP_HQD_PQ_RPTR_REPORT_ADDR/HI */ + WREG32_SOC15(GC, 0, regCP_HQD_PQ_RPTR_REPORT_ADDR, + mqd->cp_hqd_pq_rptr_report_addr_lo); + WREG32_SOC15(GC, 0, regCP_HQD_PQ_RPTR_REPORT_ADDR_HI, + mqd->cp_hqd_pq_rptr_report_addr_hi); + + /* set CP_HQD_PQ_CONTROL */ + WREG32_SOC15(GC, 0, regCP_HQD_PQ_CONTROL, mqd->cp_hqd_pq_control); + + /* set CP_HQD_PQ_WPTR_POLL_ADDR/HI */ + WREG32_SOC15(GC, 0, regCP_HQD_PQ_WPTR_POLL_ADDR, + mqd->cp_hqd_pq_wptr_poll_addr_lo); + WREG32_SOC15(GC, 0, regCP_HQD_PQ_WPTR_POLL_ADDR_HI, + mqd->cp_hqd_pq_wptr_poll_addr_hi); + + /* set CP_HQD_PQ_DOORBELL_CONTROL */ + WREG32_SOC15(GC, 0, regCP_HQD_PQ_DOORBELL_CONTROL, + mqd->cp_hqd_pq_doorbell_control); + + /* set CP_HQD_PERSISTENT_STATE.PRELOAD_SIZE=0x53 */ + WREG32_SOC15(GC, 0, regCP_HQD_PERSISTENT_STATE, mqd->cp_hqd_persistent_state); + + /* set CP_HQD_ACTIVE.ACTIVE=1 */ + WREG32_SOC15(GC, 0, regCP_HQD_ACTIVE, mqd->cp_hqd_active); + + soc21_grbm_select(adev, 0, 0, 0, 0); + mutex_unlock(&adev->srbm_mutex); +} + +static int mes_v12_0_kiq_enable_queue(struct amdgpu_device *adev) +{ + struct amdgpu_kiq *kiq = &adev->gfx.kiq[0]; + struct amdgpu_ring *kiq_ring = &adev->gfx.kiq[0].ring; + int r; + + if (!kiq->pmf || !kiq->pmf->kiq_map_queues) + return -EINVAL; + + r = amdgpu_ring_alloc(kiq_ring, kiq->pmf->map_queues_size); + if (r) { + DRM_ERROR("Failed to lock KIQ (%d).\n", r); + return r; + } + + kiq->pmf->kiq_map_queues(kiq_ring, &adev->mes.ring); + + r = amdgpu_ring_test_ring(kiq_ring); + if (r) { + DRM_ERROR("kfq enable failed\n"); + kiq_ring->sched.ready = false; + } + return r; +} + +static int mes_v12_0_queue_init(struct amdgpu_device *adev, + enum admgpu_mes_pipe pipe) +{ + struct amdgpu_ring *ring; + int r; + + if (pipe == AMDGPU_MES_KIQ_PIPE) + ring = &adev->gfx.kiq[0].ring; + else if (pipe == AMDGPU_MES_SCHED_PIPE) + ring = &adev->mes.ring; + else + BUG(); + + if ((pipe == AMDGPU_MES_SCHED_PIPE) && + (amdgpu_in_reset(adev) || adev->in_suspend)) { + *(ring->wptr_cpu_addr) = 0; + *(ring->rptr_cpu_addr) = 0; + amdgpu_ring_clear_ring(ring); + } + + r = mes_v12_0_mqd_init(ring); + if (r) + return r; + + if (pipe == AMDGPU_MES_SCHED_PIPE) { + if (adev->enable_uni_mes) { + mes_v12_0_queue_init_register(ring); + } else { + r = mes_v12_0_kiq_enable_queue(adev); + if (r) + return r; + } + } else { + mes_v12_0_queue_init_register(ring); + } + + /* get MES scheduler/KIQ versions */ + mutex_lock(&adev->srbm_mutex); + soc21_grbm_select(adev, 3, pipe, 0, 0); + + if (pipe == AMDGPU_MES_SCHED_PIPE) + adev->mes.sched_version = RREG32_SOC15(GC, 0, regCP_MES_GP3_LO); + else if (pipe == AMDGPU_MES_KIQ_PIPE && adev->enable_mes_kiq) + adev->mes.kiq_version = RREG32_SOC15(GC, 0, regCP_MES_GP3_LO); + + soc21_grbm_select(adev, 0, 0, 0, 0); + mutex_unlock(&adev->srbm_mutex); + + return 0; +} + +static int mes_v12_0_ring_init(struct amdgpu_device *adev) +{ + struct amdgpu_ring *ring; + + ring = &adev->mes.ring; + + ring->funcs = &mes_v12_0_ring_funcs; + + ring->me = 3; + ring->pipe = 0; + ring->queue = 0; + + ring->ring_obj = NULL; + ring->use_doorbell = true; + ring->doorbell_index = adev->doorbell_index.mes_ring0 << 1; + ring->eop_gpu_addr = adev->mes.eop_gpu_addr[AMDGPU_MES_SCHED_PIPE]; + ring->no_scheduler = true; + sprintf(ring->name, "mes_%d.%d.%d", ring->me, ring->pipe, ring->queue); + + return amdgpu_ring_init(adev, ring, 1024, NULL, 0, + AMDGPU_RING_PRIO_DEFAULT, NULL); +} + +static int mes_v12_0_kiq_ring_init(struct amdgpu_device *adev) +{ + struct amdgpu_ring *ring; + + spin_lock_init(&adev->gfx.kiq[0].ring_lock); + + ring = &adev->gfx.kiq[0].ring; + + ring->me = 3; + ring->pipe = adev->enable_uni_mes ? 0 : 1; + ring->queue = 0; + + ring->adev = NULL; + ring->ring_obj = NULL; + ring->use_doorbell = true; + ring->doorbell_index = adev->doorbell_index.mes_ring1 << 1; + ring->eop_gpu_addr = adev->mes.eop_gpu_addr[AMDGPU_MES_KIQ_PIPE]; + ring->no_scheduler = true; + sprintf(ring->name, "mes_kiq_%d.%d.%d", + ring->me, ring->pipe, ring->queue); + + return amdgpu_ring_init(adev, ring, 1024, NULL, 0, + AMDGPU_RING_PRIO_DEFAULT, NULL); +} + +static int mes_v12_0_mqd_sw_init(struct amdgpu_device *adev, + enum admgpu_mes_pipe pipe) +{ + int r, mqd_size = sizeof(struct v12_compute_mqd); + struct amdgpu_ring *ring; + + if (pipe == AMDGPU_MES_KIQ_PIPE) + ring = &adev->gfx.kiq[0].ring; + else if (pipe == AMDGPU_MES_SCHED_PIPE) + ring = &adev->mes.ring; + else + BUG(); + + if (ring->mqd_obj) + return 0; + + r = amdgpu_bo_create_kernel(adev, mqd_size, PAGE_SIZE, + AMDGPU_GEM_DOMAIN_GTT, &ring->mqd_obj, + &ring->mqd_gpu_addr, &ring->mqd_ptr); + if (r) { + dev_warn(adev->dev, "failed to create ring mqd bo (%d)", r); + return r; + } + + memset(ring->mqd_ptr, 0, mqd_size); + + /* prepare MQD backup */ + adev->mes.mqd_backup[pipe] = kmalloc(mqd_size, GFP_KERNEL); + if (!adev->mes.mqd_backup[pipe]) + dev_warn(adev->dev, + "no memory to create MQD backup for ring %s\n", + ring->name); + + return 0; +} + +static int mes_v12_0_sw_init(void *handle) +{ + struct amdgpu_device *adev = (struct amdgpu_device *)handle; + int pipe, r; + + adev->mes.funcs = &mes_v12_0_funcs; + adev->mes.kiq_hw_init = &mes_v12_0_kiq_hw_init; + adev->mes.kiq_hw_fini = &mes_v12_0_kiq_hw_fini; + + r = amdgpu_mes_init(adev); + if (r) + return r; + + for (pipe = 0; pipe < AMDGPU_MAX_MES_PIPES; pipe++) { + if (!adev->enable_mes_kiq && pipe == AMDGPU_MES_KIQ_PIPE) + continue; + + r = mes_v12_0_allocate_eop_buf(adev, pipe); + if (r) + return r; + + r = mes_v12_0_mqd_sw_init(adev, pipe); + if (r) + return r; + } + + if (adev->enable_mes_kiq) { + r = mes_v12_0_kiq_ring_init(adev); + if (r) + return r; + } + + r = mes_v12_0_ring_init(adev); + if (r) + return r; + + return 0; +} + +static int mes_v12_0_sw_fini(void *handle) +{ + struct amdgpu_device *adev = (struct amdgpu_device *)handle; + int pipe; + + amdgpu_device_wb_free(adev, adev->mes.sch_ctx_offs); + amdgpu_device_wb_free(adev, adev->mes.query_status_fence_offs); + + for (pipe = 0; pipe < AMDGPU_MAX_MES_PIPES; pipe++) { + kfree(adev->mes.mqd_backup[pipe]); + + amdgpu_bo_free_kernel(&adev->mes.eop_gpu_obj[pipe], + &adev->mes.eop_gpu_addr[pipe], + NULL); + amdgpu_ucode_release(&adev->mes.fw[pipe]); + } + + amdgpu_bo_free_kernel(&adev->gfx.kiq[0].ring.mqd_obj, + &adev->gfx.kiq[0].ring.mqd_gpu_addr, + &adev->gfx.kiq[0].ring.mqd_ptr); + + amdgpu_bo_free_kernel(&adev->mes.ring.mqd_obj, + &adev->mes.ring.mqd_gpu_addr, + &adev->mes.ring.mqd_ptr); + + amdgpu_ring_fini(&adev->gfx.kiq[0].ring); + amdgpu_ring_fini(&adev->mes.ring); + + if (adev->firmware.load_type == AMDGPU_FW_LOAD_DIRECT) { + mes_v12_0_free_ucode_buffers(adev, AMDGPU_MES_KIQ_PIPE); + mes_v12_0_free_ucode_buffers(adev, AMDGPU_MES_SCHED_PIPE); + } + + amdgpu_mes_fini(adev); + return 0; +} + +static void mes_v12_0_kiq_dequeue_sched(struct amdgpu_device *adev) +{ + uint32_t data; + int i; + + mutex_lock(&adev->srbm_mutex); + soc21_grbm_select(adev, 3, AMDGPU_MES_SCHED_PIPE, 0, 0); + + /* disable the queue if it's active */ + if (RREG32_SOC15(GC, 0, regCP_HQD_ACTIVE) & 1) { + WREG32_SOC15(GC, 0, regCP_HQD_DEQUEUE_REQUEST, 1); + for (i = 0; i < adev->usec_timeout; i++) { + if (!(RREG32_SOC15(GC, 0, regCP_HQD_ACTIVE) & 1)) + break; + udelay(1); + } + } + data = RREG32_SOC15(GC, 0, regCP_HQD_PQ_DOORBELL_CONTROL); + data = REG_SET_FIELD(data, CP_HQD_PQ_DOORBELL_CONTROL, + DOORBELL_EN, 0); + data = REG_SET_FIELD(data, CP_HQD_PQ_DOORBELL_CONTROL, + DOORBELL_HIT, 1); + WREG32_SOC15(GC, 0, regCP_HQD_PQ_DOORBELL_CONTROL, data); + + WREG32_SOC15(GC, 0, regCP_HQD_PQ_DOORBELL_CONTROL, 0); + + WREG32_SOC15(GC, 0, regCP_HQD_PQ_WPTR_LO, 0); + WREG32_SOC15(GC, 0, regCP_HQD_PQ_WPTR_HI, 0); + WREG32_SOC15(GC, 0, regCP_HQD_PQ_RPTR, 0); + + soc21_grbm_select(adev, 0, 0, 0, 0); + mutex_unlock(&adev->srbm_mutex); + + adev->mes.ring.sched.ready = false; +} + +static void mes_v12_0_kiq_setting(struct amdgpu_ring *ring) +{ + uint32_t tmp; + struct amdgpu_device *adev = ring->adev; + + /* tell RLC which is KIQ queue */ + tmp = RREG32_SOC15(GC, 0, regRLC_CP_SCHEDULERS); + tmp &= 0xffffff00; + tmp |= (ring->me << 5) | (ring->pipe << 3) | (ring->queue); + WREG32_SOC15(GC, 0, regRLC_CP_SCHEDULERS, tmp); + tmp |= 0x80; + WREG32_SOC15(GC, 0, regRLC_CP_SCHEDULERS, tmp); +} + +static int mes_v12_0_kiq_hw_init(struct amdgpu_device *adev) +{ + int r = 0; + + mes_v12_0_kiq_setting(&adev->gfx.kiq[0].ring); + + if (adev->enable_uni_mes) + return mes_v12_0_hw_init(adev); + + if (adev->firmware.load_type == AMDGPU_FW_LOAD_DIRECT) { + + r = mes_v12_0_load_microcode(adev, AMDGPU_MES_SCHED_PIPE, false); + if (r) { + DRM_ERROR("failed to load MES fw, r=%d\n", r); + return r; + } + + r = mes_v12_0_load_microcode(adev, AMDGPU_MES_KIQ_PIPE, true); + if (r) { + DRM_ERROR("failed to load MES kiq fw, r=%d\n", r); + return r; + } + + mes_v12_0_set_ucode_start_addr(adev); + + } else if (adev->firmware.load_type == AMDGPU_FW_LOAD_RLC_BACKDOOR_AUTO) + mes_v12_0_set_ucode_start_addr(adev); + + mes_v12_0_enable(adev, true); + + r = mes_v12_0_queue_init(adev, AMDGPU_MES_KIQ_PIPE); + if (r) + goto failure; + + r = mes_v12_0_hw_init(adev); + if (r) + goto failure; + + return r; + +failure: + mes_v12_0_hw_fini(adev); + return r; +} + +static int mes_v12_0_kiq_hw_fini(struct amdgpu_device *adev) +{ + if (adev->mes.ring.sched.ready) { + mes_v12_0_kiq_dequeue_sched(adev); + adev->mes.ring.sched.ready = false; + } + + mes_v12_0_enable(adev, false); + + return 0; +} + +static int mes_v12_0_hw_init(void *handle) +{ + int r; + struct amdgpu_device *adev = (struct amdgpu_device *)handle; + + if (adev->mes.ring.sched.ready) + goto out; + + if (!adev->enable_mes_kiq || adev->enable_uni_mes) { + if (adev->firmware.load_type == AMDGPU_FW_LOAD_DIRECT) { + r = mes_v12_0_load_microcode(adev, + AMDGPU_MES_SCHED_PIPE, true); + if (r) { + DRM_ERROR("failed to MES fw, r=%d\n", r); + return r; + } + + mes_v12_0_set_ucode_start_addr(adev); + + } else if (adev->firmware.load_type == + AMDGPU_FW_LOAD_RLC_BACKDOOR_AUTO) { + + mes_v12_0_set_ucode_start_addr(adev); + } + + mes_v12_0_enable(adev, true); + } + + r = mes_v12_0_queue_init(adev, AMDGPU_MES_SCHED_PIPE); + if (r) + goto failure; + + r = mes_v12_0_set_hw_resources(&adev->mes); + if (r) + goto failure; + + if (adev->enable_uni_mes) + mes_v12_0_set_hw_resources_1(&adev->mes); + + mes_v12_0_init_aggregated_doorbell(&adev->mes); + + /* Enable the MES to handle doorbell ring on unmapped queue */ + mes_v12_0_enable_unmapped_doorbell_handling(&adev->mes, true); + + r = mes_v12_0_query_sched_status(&adev->mes); + if (r) { + DRM_ERROR("MES is busy\n"); + goto failure; + } + +out: + /* + * Disable KIQ ring usage from the driver once MES is enabled. + * MES uses KIQ ring exclusively so driver cannot access KIQ ring + * with MES enabled. + */ + adev->gfx.kiq[0].ring.sched.ready = false; + adev->mes.ring.sched.ready = true; + + return 0; + +failure: + mes_v12_0_hw_fini(adev); + return r; +} + +static int mes_v12_0_hw_fini(void *handle) +{ + return 0; +} + +static int mes_v12_0_suspend(void *handle) +{ + int r; + struct amdgpu_device *adev = (struct amdgpu_device *)handle; + + r = amdgpu_mes_suspend(adev); + if (r) + return r; + + return mes_v12_0_hw_fini(adev); +} + +static int mes_v12_0_resume(void *handle) +{ + int r; + struct amdgpu_device *adev = (struct amdgpu_device *)handle; + + r = mes_v12_0_hw_init(adev); + if (r) + return r; + + return amdgpu_mes_resume(adev); +} + +static int mes_v12_0_early_init(void *handle) +{ + struct amdgpu_device *adev = (struct amdgpu_device *)handle; + int pipe, r; + + if (adev->enable_uni_mes) { + r = amdgpu_mes_init_microcode(adev, AMDGPU_MES_SCHED_PIPE); + if (!r) + return 0; + + adev->enable_uni_mes = false; + } + + for (pipe = 0; pipe < AMDGPU_MAX_MES_PIPES; pipe++) { + if (!adev->enable_mes_kiq && pipe == AMDGPU_MES_KIQ_PIPE) + continue; + r = amdgpu_mes_init_microcode(adev, pipe); + if (r) + return r; + } + + return 0; +} + +static int mes_v12_0_late_init(void *handle) +{ + struct amdgpu_device *adev = (struct amdgpu_device *)handle; + + /* it's only intended for use in mes_self_test case, not for s0ix and reset */ + if (!amdgpu_in_reset(adev) && !adev->in_s0ix && !adev->in_suspend) + amdgpu_mes_self_test(adev); + + return 0; +} + +static const struct amd_ip_funcs mes_v12_0_ip_funcs = { + .name = "mes_v12_0", + .early_init = mes_v12_0_early_init, + .late_init = mes_v12_0_late_init, + .sw_init = mes_v12_0_sw_init, + .sw_fini = mes_v12_0_sw_fini, + .hw_init = mes_v12_0_hw_init, + .hw_fini = mes_v12_0_hw_fini, + .suspend = mes_v12_0_suspend, + .resume = mes_v12_0_resume, +}; + +const struct amdgpu_ip_block_version mes_v12_0_ip_block = { + .type = AMD_IP_BLOCK_TYPE_MES, + .major = 12, + .minor = 0, + .rev = 0, + .funcs = &mes_v12_0_ip_funcs, +}; diff --git a/drivers/gpu/drm/amd/amdgpu/mes_v12_0.h b/drivers/gpu/drm/amd/amdgpu/mes_v12_0.h new file mode 100644 index 000000000000..ac3740f353aa --- /dev/null +++ b/drivers/gpu/drm/amd/amdgpu/mes_v12_0.h @@ -0,0 +1,29 @@ +/* + * Copyright 2021 Advanced Micro Devices, Inc. + * + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and associated documentation files (the "Software"), + * to deal in the Software without restriction, including without limitation + * the rights to use, copy, modify, merge, publish, distribute, sublicense, + * and/or sell copies of the Software, and to permit persons to whom the + * Software is furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included in + * all copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL + * THE COPYRIGHT HOLDER(S) OR AUTHOR(S) BE LIABLE FOR ANY CLAIM, DAMAGES OR + * OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, + * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR + * OTHER DEALINGS IN THE SOFTWARE. + * + */ + +#ifndef __MES_V12_0_H__ +#define __MES_V12_0_H__ + +extern const struct amdgpu_ip_block_version mes_v12_0_ip_block; + +#endif diff --git a/drivers/gpu/drm/amd/amdgpu/mmhub_v1_7.c b/drivers/gpu/drm/amd/amdgpu/mmhub_v1_7.c index 92432cd2c0c7..9689e2b5d4e5 100644 --- a/drivers/gpu/drm/amd/amdgpu/mmhub_v1_7.c +++ b/drivers/gpu/drm/amd/amdgpu/mmhub_v1_7.c @@ -544,7 +544,7 @@ static int mmhub_v1_7_set_clockgating(struct amdgpu_device *adev, static void mmhub_v1_7_get_clockgating(struct amdgpu_device *adev, u64 *flags) { - int data, data1; + u32 data, data1; if (amdgpu_sriov_vf(adev)) *flags = 0; diff --git a/drivers/gpu/drm/amd/amdgpu/mmhub_v1_8.c b/drivers/gpu/drm/amd/amdgpu/mmhub_v1_8.c index c0fc44cdd658..621761a17ac7 100644 --- a/drivers/gpu/drm/amd/amdgpu/mmhub_v1_8.c +++ b/drivers/gpu/drm/amd/amdgpu/mmhub_v1_8.c @@ -559,6 +559,22 @@ static void mmhub_v1_8_get_clockgating(struct amdgpu_device *adev, u64 *flags) } +static bool mmhub_v1_8_query_utcl2_poison_status(struct amdgpu_device *adev, + int hub_inst) +{ + u32 fed, status; + + status = RREG32_SOC15(MMHUB, hub_inst, regVM_L2_PROTECTION_FAULT_STATUS); + fed = REG_GET_FIELD(status, VM_L2_PROTECTION_FAULT_STATUS, FED); + if (!amdgpu_sriov_vf(adev)) { + /* clear page fault status and address */ + WREG32_P(SOC15_REG_OFFSET(MMHUB, hub_inst, + regVM_L2_PROTECTION_FAULT_CNTL), 1, ~1); + } + + return fed; +} + const struct amdgpu_mmhub_funcs mmhub_v1_8_funcs = { .get_fb_location = mmhub_v1_8_get_fb_location, .init = mmhub_v1_8_init, @@ -568,6 +584,7 @@ const struct amdgpu_mmhub_funcs mmhub_v1_8_funcs = { .setup_vm_pt_regs = mmhub_v1_8_setup_vm_pt_regs, .set_clockgating = mmhub_v1_8_set_clockgating, .get_clockgating = mmhub_v1_8_get_clockgating, + .query_utcl2_poison_status = mmhub_v1_8_query_utcl2_poison_status, }; static const struct amdgpu_ras_err_status_reg_entry mmhub_v1_8_ce_reg_list[] = { @@ -706,28 +723,32 @@ static const struct amdgpu_ras_block_hw_ops mmhub_v1_8_ras_hw_ops = { .reset_ras_error_count = mmhub_v1_8_reset_ras_error_count, }; -static int mmhub_v1_8_aca_bank_generate_report(struct aca_handle *handle, - struct aca_bank *bank, enum aca_error_type type, - struct aca_bank_report *report, void *data) +static int mmhub_v1_8_aca_bank_parser(struct aca_handle *handle, struct aca_bank *bank, + enum aca_smu_type type, void *data) { - u64 status, misc0; + struct aca_bank_info info; + u64 misc0; int ret; - status = bank->regs[ACA_REG_IDX_STATUS]; - if ((type == ACA_ERROR_TYPE_UE && - ACA_REG__STATUS__ERRORCODEEXT(status) == ACA_EXTERROR_CODE_FAULT) || - (type == ACA_ERROR_TYPE_CE && - ACA_REG__STATUS__ERRORCODEEXT(status) == ACA_EXTERROR_CODE_CE)) { - - ret = aca_bank_info_decode(bank, &report->info); - if (ret) - return ret; - - misc0 = bank->regs[ACA_REG_IDX_MISC0]; - report->count[type] = ACA_REG__MISC0__ERRCNT(misc0); + ret = aca_bank_info_decode(bank, &info); + if (ret) + return ret; + + misc0 = bank->regs[ACA_REG_IDX_MISC0]; + switch (type) { + case ACA_SMU_TYPE_UE: + ret = aca_error_cache_log_bank_error(handle, &info, ACA_ERROR_TYPE_UE, + 1ULL); + break; + case ACA_SMU_TYPE_CE: + ret = aca_error_cache_log_bank_error(handle, &info, ACA_ERROR_TYPE_CE, + ACA_REG__MISC0__ERRCNT(misc0)); + break; + default: + return -EINVAL; } - return 0; + return ret; } /* reference to smu driver if header file */ @@ -741,7 +762,7 @@ static int mmhub_v1_8_err_codes[] = { }; static bool mmhub_v1_8_aca_bank_is_valid(struct aca_handle *handle, struct aca_bank *bank, - enum aca_error_type type, void *data) + enum aca_smu_type type, void *data) { u32 instlo; @@ -760,7 +781,7 @@ static bool mmhub_v1_8_aca_bank_is_valid(struct aca_handle *handle, struct aca_b } static const struct aca_bank_ops mmhub_v1_8_aca_bank_ops = { - .aca_bank_generate_report = mmhub_v1_8_aca_bank_generate_report, + .aca_bank_parser = mmhub_v1_8_aca_bank_parser, .aca_bank_is_valid = mmhub_v1_8_aca_bank_is_valid, }; diff --git a/drivers/gpu/drm/amd/amdgpu/mmhub_v2_0.c b/drivers/gpu/drm/amd/amdgpu/mmhub_v2_0.c index 02fd45261399..a0cc8e218ca1 100644 --- a/drivers/gpu/drm/amd/amdgpu/mmhub_v2_0.c +++ b/drivers/gpu/drm/amd/amdgpu/mmhub_v2_0.c @@ -671,7 +671,7 @@ static int mmhub_v2_0_set_clockgating(struct amdgpu_device *adev, static void mmhub_v2_0_get_clockgating(struct amdgpu_device *adev, u64 *flags) { - int data, data1; + u32 data, data1; if (amdgpu_sriov_vf(adev)) *flags = 0; diff --git a/drivers/gpu/drm/amd/amdgpu/mmhub_v3_3.c b/drivers/gpu/drm/amd/amdgpu/mmhub_v3_3.c index 238ea40c2450..b4ce3375d3fd 100644 --- a/drivers/gpu/drm/amd/amdgpu/mmhub_v3_3.c +++ b/drivers/gpu/drm/amd/amdgpu/mmhub_v3_3.c @@ -33,6 +33,10 @@ #define regMMVM_L2_CNTL3_DEFAULT 0x80100007 #define regMMVM_L2_CNTL4_DEFAULT 0x000000c1 #define regMMVM_L2_CNTL5_DEFAULT 0x00003fe0 +#define regDAGB0_L1TLB_REG_RW_3_3 0x00a4 +#define regDAGB0_L1TLB_REG_RW_3_3_BASE_IDX 1 +#define regDAGB1_L1TLB_REG_RW_3_3 0x0163 +#define regDAGB1_L1TLB_REG_RW_3_3_BASE_IDX 1 static const char *mmhub_client_ids_v3_3[][2] = { [0][0] = "VMC", @@ -359,6 +363,49 @@ static void mmhub_v3_3_program_invalidation(struct amdgpu_device *adev) } } +static void mmhub_v3_3_init_saw_regs(struct amdgpu_device *adev) +{ + uint64_t pt_base = amdgpu_gmc_pd_addr(adev->gart.bo); + uint32_t tmp; + + /* Program page table base, gart start, gart end */ + WREG32_SOC15(MMHUB, 0, regMMVM_L2_SAW_CONTEXT0_PAGE_TABLE_BASE_ADDR_LO32, + lower_32_bits(pt_base >> 12)); + WREG32_SOC15(MMHUB, 0, regMMVM_L2_SAW_CONTEXT0_PAGE_TABLE_BASE_ADDR_HI32, + upper_32_bits(pt_base >> 12)); + + WREG32_SOC15(MMHUB, 0, regMMVM_L2_SAW_CONTEXT0_PAGE_TABLE_START_ADDR_LO32, + (u32)(adev->gmc.gart_start >> 12)); + WREG32_SOC15(MMHUB, 0, regMMVM_L2_SAW_CONTEXT0_PAGE_TABLE_START_ADDR_HI32, + (u32)(adev->gmc.gart_start >> 44)); + + WREG32_SOC15(MMHUB, 0, regMMVM_L2_SAW_CONTEXT0_PAGE_TABLE_END_ADDR_LO32, + (u32)(adev->gmc.gart_end >> 12)); + WREG32_SOC15(MMHUB, 0, regMMVM_L2_SAW_CONTEXT0_PAGE_TABLE_END_ADDR_HI32, + (u32)(adev->gmc.gart_end >> 44)); + + tmp = RREG32_SOC15(MMHUB, 0, regMMVM_L2_SAW_CONTEXT0_CNTL); + tmp = REG_SET_FIELD(tmp, MMVM_L2_SAW_CONTEXT0_CNTL, ENABLE_CONTEXT, 1); + tmp = REG_SET_FIELD(tmp, MMVM_L2_SAW_CONTEXT0_CNTL, PAGE_TABLE_DEPTH, 0); + WREG32_SOC15(MMHUB, 0, regMMVM_L2_SAW_CONTEXT0_CNTL, tmp); + + /* Disable all contexts except context 0 */ + tmp = 0xfffe; + WREG32_SOC15(MMHUB, 0, regMMVM_L2_SAW_CONTEXTS_DISABLE, tmp); + + /* Program saw cntl4 */ + tmp = RREG32_SOC15(MMHUB, 0, regMMVM_L2_SAW_CNTL4); + tmp = REG_SET_FIELD(tmp, MMVM_L2_SAW_CNTL4, VMC_TAP_CONTEXT0_PDE_REQUEST_SNOOP, 1); + tmp = REG_SET_FIELD(tmp, MMVM_L2_SAW_CNTL4, VMC_TAP_CONTEXT0_PTE_REQUEST_SNOOP, 1); + WREG32_SOC15(MMHUB, 0, regMMVM_L2_SAW_CNTL4, tmp); +} + +static void mmhub_v3_3_enable_tls(struct amdgpu_device *adev) +{ + WREG32_SOC15(MMHUB, 0, regDAGB0_L1TLB_REG_RW_3_3, 0); + WREG32_SOC15(MMHUB, 0, regDAGB1_L1TLB_REG_RW_3_3, 3); +} + static int mmhub_v3_3_gart_enable(struct amdgpu_device *adev) { /* GART Enable. */ @@ -372,6 +419,12 @@ static int mmhub_v3_3_gart_enable(struct amdgpu_device *adev) mmhub_v3_3_setup_vmid_config(adev); mmhub_v3_3_program_invalidation(adev); + /* standalone alone walker init */ + mmhub_v3_3_init_saw_regs(adev); + + /* enable mmhub tls */ + mmhub_v3_3_enable_tls(adev); + return 0; } @@ -560,7 +613,7 @@ static int mmhub_v3_3_set_clockgating(struct amdgpu_device *adev, static void mmhub_v3_3_get_clockgating(struct amdgpu_device *adev, u64 *flags) { - int data; + u32 data; if (amdgpu_sriov_vf(adev)) *flags = 0; diff --git a/drivers/gpu/drm/amd/amdgpu/mmhub_v4_1_0.c b/drivers/gpu/drm/amd/amdgpu/mmhub_v4_1_0.c new file mode 100644 index 000000000000..5bbaa2b2caab --- /dev/null +++ b/drivers/gpu/drm/amd/amdgpu/mmhub_v4_1_0.c @@ -0,0 +1,654 @@ +/* + * Copyright 2023 Advanced Micro Devices, Inc. + * + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and associated documentation files (the "Software"), + * to deal in the Software without restriction, including without limitation + * the rights to use, copy, modify, merge, publish, distribute, sublicense, + * and/or sell copies of the Software, and to permit persons to whom the + * Software is furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included in + * all copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL + * THE COPYRIGHT HOLDER(S) OR AUTHOR(S) BE LIABLE FOR ANY CLAIM, DAMAGES OR + * OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, + * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR + * OTHER DEALINGS IN THE SOFTWARE. + * + */ + +#include "amdgpu.h" +#include "mmhub_v4_1_0.h" + +#include "mmhub/mmhub_4_1_0_offset.h" +#include "mmhub/mmhub_4_1_0_sh_mask.h" + +#include "soc15_common.h" +#include "soc24_enum.h" + +#define regMMVM_L2_CNTL3_DEFAULT 0x80100007 +#define regMMVM_L2_CNTL4_DEFAULT 0x000000c1 +#define regMMVM_L2_CNTL5_DEFAULT 0x00003fe0 + +static const char *mmhub_client_ids_v4_1_0[][2] = { + [0][0] = "VMC", + [4][0] = "DCEDMC", + [5][0] = "DCEVGA", + [6][0] = "MP0", + [7][0] = "MP1", + [8][0] = "MPIO", + [16][0] = "HDP", + [17][0] = "LSDMA", + [18][0] = "JPEG", + [19][0] = "VCNU0", + [21][0] = "VSCH", + [22][0] = "VCNU1", + [23][0] = "VCN1", + [32+20][0] = "VCN0", + [2][1] = "DBGUNBIO", + [3][1] = "DCEDWB", + [4][1] = "DCEDMC", + [5][1] = "DCEVGA", + [6][1] = "MP0", + [7][1] = "MP1", + [8][1] = "MPIO", + [10][1] = "DBGU0", + [11][1] = "DBGU1", + [12][1] = "DBGU2", + [13][1] = "DBGU3", + [14][1] = "XDP", + [15][1] = "OSSSYS", + [16][1] = "HDP", + [17][1] = "LSDMA", + [18][1] = "JPEG", + [19][1] = "VCNU0", + [20][1] = "VCN0", + [21][1] = "VSCH", + [22][1] = "VCNU1", + [23][1] = "VCN1", +}; + +static uint32_t mmhub_v4_1_0_get_invalidate_req(unsigned int vmid, + uint32_t flush_type) +{ + u32 req = 0; + + /* invalidate using legacy mode on vmid*/ + req = REG_SET_FIELD(req, MMVM_INVALIDATE_ENG0_REQ, + PER_VMID_INVALIDATE_REQ, 1 << vmid); + req = REG_SET_FIELD(req, MMVM_INVALIDATE_ENG0_REQ, FLUSH_TYPE, flush_type); + req = REG_SET_FIELD(req, MMVM_INVALIDATE_ENG0_REQ, INVALIDATE_L2_PTES, 1); + req = REG_SET_FIELD(req, MMVM_INVALIDATE_ENG0_REQ, INVALIDATE_L2_PDE0, 1); + req = REG_SET_FIELD(req, MMVM_INVALIDATE_ENG0_REQ, INVALIDATE_L2_PDE1, 1); + req = REG_SET_FIELD(req, MMVM_INVALIDATE_ENG0_REQ, INVALIDATE_L2_PDE2, 1); + req = REG_SET_FIELD(req, MMVM_INVALIDATE_ENG0_REQ, INVALIDATE_L1_PTES, 1); + req = REG_SET_FIELD(req, MMVM_INVALIDATE_ENG0_REQ, + CLEAR_PROTECTION_FAULT_STATUS_ADDR, 0); + + return req; +} + +static void +mmhub_v4_1_0_print_l2_protection_fault_status(struct amdgpu_device *adev, + uint32_t status) +{ + uint32_t cid, rw; + const char *mmhub_cid = NULL; + + cid = REG_GET_FIELD(status, + MMVM_L2_PROTECTION_FAULT_STATUS_LO32, CID); + rw = REG_GET_FIELD(status, + MMVM_L2_PROTECTION_FAULT_STATUS_LO32, RW); + + dev_err(adev->dev, + "MMVM_L2_PROTECTION_FAULT_STATUS_LO32:0x%08X\n", + status); + switch (adev->ip_versions[MMHUB_HWIP][0]) { + case IP_VERSION(4, 1, 0): + mmhub_cid = mmhub_client_ids_v4_1_0[cid][rw]; + break; + default: + mmhub_cid = NULL; + break; + } + dev_err(adev->dev, "\t Faulty UTCL2 client ID: %s (0x%x)\n", + mmhub_cid ? mmhub_cid : "unknown", cid); + dev_err(adev->dev, "\t MORE_FAULTS: 0x%lx\n", + REG_GET_FIELD(status, + MMVM_L2_PROTECTION_FAULT_STATUS_LO32, MORE_FAULTS)); + dev_err(adev->dev, "\t WALKER_ERROR: 0x%lx\n", + REG_GET_FIELD(status, + MMVM_L2_PROTECTION_FAULT_STATUS_LO32, WALKER_ERROR)); + dev_err(adev->dev, "\t PERMISSION_FAULTS: 0x%lx\n", + REG_GET_FIELD(status, + MMVM_L2_PROTECTION_FAULT_STATUS_LO32, PERMISSION_FAULTS)); + dev_err(adev->dev, "\t MAPPING_ERROR: 0x%lx\n", + REG_GET_FIELD(status, + MMVM_L2_PROTECTION_FAULT_STATUS_LO32, MAPPING_ERROR)); + dev_err(adev->dev, "\t RW: 0x%x\n", rw); +} + +static void mmhub_v4_1_0_setup_vm_pt_regs(struct amdgpu_device *adev, + uint32_t vmid, uint64_t page_table_base) +{ + struct amdgpu_vmhub *hub = &adev->vmhub[AMDGPU_MMHUB0(0)]; + + WREG32_SOC15_OFFSET(MMHUB, 0, regMMVM_CONTEXT0_PAGE_TABLE_BASE_ADDR_LO32, + hub->ctx_addr_distance * vmid, + lower_32_bits(page_table_base)); + + WREG32_SOC15_OFFSET(MMHUB, 0, regMMVM_CONTEXT0_PAGE_TABLE_BASE_ADDR_HI32, + hub->ctx_addr_distance * vmid, + upper_32_bits(page_table_base)); +} + +static void mmhub_v4_1_0_init_gart_aperture_regs(struct amdgpu_device *adev) +{ + uint64_t pt_base = amdgpu_gmc_pd_addr(adev->gart.bo); + + mmhub_v4_1_0_setup_vm_pt_regs(adev, 0, pt_base); + + WREG32_SOC15(MMHUB, 0, regMMVM_CONTEXT0_PAGE_TABLE_START_ADDR_LO32, + (u32)(adev->gmc.gart_start >> 12)); + WREG32_SOC15(MMHUB, 0, regMMVM_CONTEXT0_PAGE_TABLE_START_ADDR_HI32, + (u32)(adev->gmc.gart_start >> 44)); + + WREG32_SOC15(MMHUB, 0, regMMVM_CONTEXT0_PAGE_TABLE_END_ADDR_LO32, + (u32)(adev->gmc.gart_end >> 12)); + WREG32_SOC15(MMHUB, 0, regMMVM_CONTEXT0_PAGE_TABLE_END_ADDR_HI32, + (u32)(adev->gmc.gart_end >> 44)); +} + +static void mmhub_v4_1_0_init_system_aperture_regs(struct amdgpu_device *adev) +{ + uint64_t value; + uint32_t tmp; + + /* + * the new L1 policy will block SRIOV guest from writing + * these regs, and they will be programed at host. + * so skip programing these regs. + */ + if (amdgpu_sriov_vf(adev)) + return; + + /* Program the AGP BAR */ + WREG32_SOC15(MMHUB, 0, regMMMC_VM_AGP_BASE, 0); + WREG32_SOC15(MMHUB, 0, regMMMC_VM_AGP_BOT, adev->gmc.agp_start >> 24); + WREG32_SOC15(MMHUB, 0, regMMMC_VM_AGP_TOP, adev->gmc.agp_end >> 24); + + /* Program the system aperture low logical page number. */ + WREG32_SOC15(MMHUB, 0, regMMMC_VM_SYSTEM_APERTURE_LOW_ADDR, + min(adev->gmc.fb_start, adev->gmc.agp_start) >> 18); + WREG32_SOC15(MMHUB, 0, regMMMC_VM_SYSTEM_APERTURE_HIGH_ADDR, + max(adev->gmc.fb_end, adev->gmc.agp_end) >> 18); + + /* Set default page address. */ + value = adev->mem_scratch.gpu_addr - adev->gmc.vram_start + + adev->vm_manager.vram_base_offset; + WREG32_SOC15(MMHUB, 0, regMMMC_VM_SYSTEM_APERTURE_DEFAULT_ADDR_LSB, + (u32)(value >> 12)); + WREG32_SOC15(MMHUB, 0, regMMMC_VM_SYSTEM_APERTURE_DEFAULT_ADDR_MSB, + (u32)(value >> 44)); + + /* Program "protection fault". */ + WREG32_SOC15(MMHUB, 0, regMMVM_L2_PROTECTION_FAULT_DEFAULT_ADDR_LO32, + (u32)(adev->dummy_page_addr >> 12)); + WREG32_SOC15(MMHUB, 0, regMMVM_L2_PROTECTION_FAULT_DEFAULT_ADDR_HI32, + (u32)((u64)adev->dummy_page_addr >> 44)); + + tmp = RREG32_SOC15(MMHUB, 0, regMMVM_L2_PROTECTION_FAULT_CNTL2); + tmp = REG_SET_FIELD(tmp, MMVM_L2_PROTECTION_FAULT_CNTL2, + ACTIVE_PAGE_MIGRATION_PTE_READ_RETRY, 1); + WREG32_SOC15(MMHUB, 0, regMMVM_L2_PROTECTION_FAULT_CNTL2, tmp); +} + +static void mmhub_v4_1_0_init_tlb_regs(struct amdgpu_device *adev) +{ + uint32_t tmp; + + /* Setup TLB control */ + tmp = RREG32_SOC15(MMHUB, 0, regMMMC_VM_MX_L1_TLB_CNTL); + + tmp = REG_SET_FIELD(tmp, MMMC_VM_MX_L1_TLB_CNTL, ENABLE_L1_TLB, 1); + tmp = REG_SET_FIELD(tmp, MMMC_VM_MX_L1_TLB_CNTL, SYSTEM_ACCESS_MODE, 3); + tmp = REG_SET_FIELD(tmp, MMMC_VM_MX_L1_TLB_CNTL, + ENABLE_ADVANCED_DRIVER_MODEL, 1); + tmp = REG_SET_FIELD(tmp, MMMC_VM_MX_L1_TLB_CNTL, + SYSTEM_APERTURE_UNMAPPED_ACCESS, 0); + tmp = REG_SET_FIELD(tmp, MMMC_VM_MX_L1_TLB_CNTL, ECO_BITS, 0); + tmp = REG_SET_FIELD(tmp, MMMC_VM_MX_L1_TLB_CNTL, + MTYPE, MTYPE_UC); /* UC, uncached */ + + WREG32_SOC15(MMHUB, 0, regMMMC_VM_MX_L1_TLB_CNTL, tmp); +} + +static void mmhub_v4_1_0_init_cache_regs(struct amdgpu_device *adev) +{ + uint32_t tmp; + + /* These registers are not accessible to VF-SRIOV. + * The PF will program them instead. + */ + if (amdgpu_sriov_vf(adev)) + return; + + /* Setup L2 cache */ + tmp = RREG32_SOC15(MMHUB, 0, regMMVM_L2_CNTL); + tmp = REG_SET_FIELD(tmp, MMVM_L2_CNTL, ENABLE_L2_CACHE, 1); + tmp = REG_SET_FIELD(tmp, MMVM_L2_CNTL, ENABLE_L2_FRAGMENT_PROCESSING, 0); + tmp = REG_SET_FIELD(tmp, MMVM_L2_CNTL, + ENABLE_DEFAULT_PAGE_OUT_TO_SYSTEM_MEMORY, 1); + /* XXX for emulation, Refer to closed source code.*/ + tmp = REG_SET_FIELD(tmp, MMVM_L2_CNTL, L2_PDE0_CACHE_TAG_GENERATION_MODE, + 0); + tmp = REG_SET_FIELD(tmp, MMVM_L2_CNTL, PDE_FAULT_CLASSIFICATION, 0); + tmp = REG_SET_FIELD(tmp, MMVM_L2_CNTL, CONTEXT1_IDENTITY_ACCESS_MODE, 1); + tmp = REG_SET_FIELD(tmp, MMVM_L2_CNTL, IDENTITY_MODE_FRAGMENT_SIZE, 0); + WREG32_SOC15(MMHUB, 0, regMMVM_L2_CNTL, tmp); + + tmp = RREG32_SOC15(MMHUB, 0, regMMVM_L2_CNTL2); + tmp = REG_SET_FIELD(tmp, MMVM_L2_CNTL2, INVALIDATE_ALL_L1_TLBS, 1); + tmp = REG_SET_FIELD(tmp, MMVM_L2_CNTL2, INVALIDATE_L2_CACHE, 1); + WREG32_SOC15(MMHUB, 0, regMMVM_L2_CNTL2, tmp); + + tmp = regMMVM_L2_CNTL3_DEFAULT; + if (adev->gmc.translate_further) { + tmp = REG_SET_FIELD(tmp, MMVM_L2_CNTL3, BANK_SELECT, 12); + tmp = REG_SET_FIELD(tmp, MMVM_L2_CNTL3, + L2_CACHE_BIGK_FRAGMENT_SIZE, 9); + } else { + tmp = REG_SET_FIELD(tmp, MMVM_L2_CNTL3, BANK_SELECT, 9); + tmp = REG_SET_FIELD(tmp, MMVM_L2_CNTL3, + L2_CACHE_BIGK_FRAGMENT_SIZE, 6); + } + WREG32_SOC15(MMHUB, 0, regMMVM_L2_CNTL3, tmp); + + tmp = regMMVM_L2_CNTL4_DEFAULT; + tmp = REG_SET_FIELD(tmp, MMVM_L2_CNTL4, VMC_TAP_PDE_REQUEST_PHYSICAL, 0); + tmp = REG_SET_FIELD(tmp, MMVM_L2_CNTL4, VMC_TAP_PTE_REQUEST_PHYSICAL, 0); + WREG32_SOC15(MMHUB, 0, regMMVM_L2_CNTL4, tmp); + + tmp = regMMVM_L2_CNTL5_DEFAULT; + tmp = REG_SET_FIELD(tmp, MMVM_L2_CNTL5, L2_CACHE_SMALLK_FRAGMENT_SIZE, 0); + WREG32_SOC15(MMHUB, 0, regMMVM_L2_CNTL5, tmp); +} + +static void mmhub_v4_1_0_enable_system_domain(struct amdgpu_device *adev) +{ + uint32_t tmp; + + tmp = RREG32_SOC15(MMHUB, 0, regMMVM_CONTEXT0_CNTL); + tmp = REG_SET_FIELD(tmp, MMVM_CONTEXT0_CNTL, ENABLE_CONTEXT, 1); + tmp = REG_SET_FIELD(tmp, MMVM_CONTEXT0_CNTL, PAGE_TABLE_DEPTH, 0); + tmp = REG_SET_FIELD(tmp, MMVM_CONTEXT0_CNTL, + RETRY_PERMISSION_OR_INVALID_PAGE_FAULT, 0); + WREG32_SOC15(MMHUB, 0, regMMVM_CONTEXT0_CNTL, tmp); +} + +static void mmhub_v4_1_0_disable_identity_aperture(struct amdgpu_device *adev) +{ + /* These registers are not accessible to VF-SRIOV. + * The PF will program them instead. + */ + if (amdgpu_sriov_vf(adev)) + return; + + WREG32_SOC15(MMHUB, 0, + regMMVM_L2_CONTEXT1_IDENTITY_APERTURE_LOW_ADDR_LO32, + 0xFFFFFFFF); + WREG32_SOC15(MMHUB, 0, + regMMVM_L2_CONTEXT1_IDENTITY_APERTURE_LOW_ADDR_HI32, + 0x0000000F); + + WREG32_SOC15(MMHUB, 0, + regMMVM_L2_CONTEXT1_IDENTITY_APERTURE_HIGH_ADDR_LO32, 0); + WREG32_SOC15(MMHUB, 0, + regMMVM_L2_CONTEXT1_IDENTITY_APERTURE_HIGH_ADDR_HI32, 0); + + WREG32_SOC15(MMHUB, 0, regMMVM_L2_CONTEXT_IDENTITY_PHYSICAL_OFFSET_LO32, + 0); + WREG32_SOC15(MMHUB, 0, regMMVM_L2_CONTEXT_IDENTITY_PHYSICAL_OFFSET_HI32, + 0); +} + +static void mmhub_v4_1_0_setup_vmid_config(struct amdgpu_device *adev) +{ + struct amdgpu_vmhub *hub = &adev->vmhub[AMDGPU_MMHUB0(0)]; + int i; + uint32_t tmp; + + for (i = 0; i <= 14; i++) { + tmp = RREG32_SOC15_OFFSET(MMHUB, 0, regMMVM_CONTEXT1_CNTL, i); + tmp = REG_SET_FIELD(tmp, MMVM_CONTEXT1_CNTL, ENABLE_CONTEXT, 1); + tmp = REG_SET_FIELD(tmp, MMVM_CONTEXT1_CNTL, PAGE_TABLE_DEPTH, + adev->vm_manager.num_level); + tmp = REG_SET_FIELD(tmp, MMVM_CONTEXT1_CNTL, + RANGE_PROTECTION_FAULT_ENABLE_DEFAULT, 1); + tmp = REG_SET_FIELD(tmp, MMVM_CONTEXT1_CNTL, + DUMMY_PAGE_PROTECTION_FAULT_ENABLE_DEFAULT, + 1); + tmp = REG_SET_FIELD(tmp, MMVM_CONTEXT1_CNTL, + PDE0_PROTECTION_FAULT_ENABLE_DEFAULT, 1); + tmp = REG_SET_FIELD(tmp, MMVM_CONTEXT1_CNTL, + VALID_PROTECTION_FAULT_ENABLE_DEFAULT, 1); + tmp = REG_SET_FIELD(tmp, MMVM_CONTEXT1_CNTL, + READ_PROTECTION_FAULT_ENABLE_DEFAULT, 1); + tmp = REG_SET_FIELD(tmp, MMVM_CONTEXT1_CNTL, + WRITE_PROTECTION_FAULT_ENABLE_DEFAULT, 1); + tmp = REG_SET_FIELD(tmp, MMVM_CONTEXT1_CNTL, + EXECUTE_PROTECTION_FAULT_ENABLE_DEFAULT, 1); + tmp = REG_SET_FIELD(tmp, MMVM_CONTEXT1_CNTL, + PAGE_TABLE_BLOCK_SIZE, + adev->vm_manager.block_size - 9); + /* Send no-retry XNACK on fault to suppress VM fault storm. */ + tmp = REG_SET_FIELD(tmp, MMVM_CONTEXT1_CNTL, + RETRY_PERMISSION_OR_INVALID_PAGE_FAULT, + !amdgpu_noretry); + WREG32_SOC15_OFFSET(MMHUB, 0, regMMVM_CONTEXT1_CNTL, + i * hub->ctx_distance, tmp); + WREG32_SOC15_OFFSET(MMHUB, 0, regMMVM_CONTEXT1_PAGE_TABLE_START_ADDR_LO32, + i * hub->ctx_addr_distance, 0); + WREG32_SOC15_OFFSET(MMHUB, 0, regMMVM_CONTEXT1_PAGE_TABLE_START_ADDR_HI32, + i * hub->ctx_addr_distance, 0); + WREG32_SOC15_OFFSET(MMHUB, 0, regMMVM_CONTEXT1_PAGE_TABLE_END_ADDR_LO32, + i * hub->ctx_addr_distance, + lower_32_bits(adev->vm_manager.max_pfn - 1)); + WREG32_SOC15_OFFSET(MMHUB, 0, regMMVM_CONTEXT1_PAGE_TABLE_END_ADDR_HI32, + i * hub->ctx_addr_distance, + upper_32_bits(adev->vm_manager.max_pfn - 1)); + } + + hub->vm_cntx_cntl = tmp; +} + +static void mmhub_v4_1_0_program_invalidation(struct amdgpu_device *adev) +{ + struct amdgpu_vmhub *hub = &adev->vmhub[AMDGPU_MMHUB0(0)]; + unsigned i; + + for (i = 0; i < 18; ++i) { + WREG32_SOC15_OFFSET(MMHUB, 0, regMMVM_INVALIDATE_ENG0_ADDR_RANGE_LO32, + i * hub->eng_addr_distance, 0xffffffff); + WREG32_SOC15_OFFSET(MMHUB, 0, regMMVM_INVALIDATE_ENG0_ADDR_RANGE_HI32, + i * hub->eng_addr_distance, 0x1f); + } +} + +static int mmhub_v4_1_0_gart_enable(struct amdgpu_device *adev) +{ + /* GART Enable. */ + mmhub_v4_1_0_init_gart_aperture_regs(adev); + mmhub_v4_1_0_init_system_aperture_regs(adev); + mmhub_v4_1_0_init_tlb_regs(adev); + mmhub_v4_1_0_init_cache_regs(adev); + + mmhub_v4_1_0_enable_system_domain(adev); + mmhub_v4_1_0_disable_identity_aperture(adev); + mmhub_v4_1_0_setup_vmid_config(adev); + mmhub_v4_1_0_program_invalidation(adev); + + return 0; +} + +static void mmhub_v4_1_0_gart_disable(struct amdgpu_device *adev) +{ + struct amdgpu_vmhub *hub = &adev->vmhub[AMDGPU_MMHUB0(0)]; + u32 tmp; + u32 i; + + /* Disable all tables */ + for (i = 0; i < 16; i++) + WREG32_SOC15_OFFSET(MMHUB, 0, regMMVM_CONTEXT0_CNTL, + i * hub->ctx_distance, 0); + + /* Setup TLB control */ + tmp = RREG32_SOC15(MMHUB, 0, regMMMC_VM_MX_L1_TLB_CNTL); + tmp = REG_SET_FIELD(tmp, MMMC_VM_MX_L1_TLB_CNTL, ENABLE_L1_TLB, 0); + tmp = REG_SET_FIELD(tmp, MMMC_VM_MX_L1_TLB_CNTL, + ENABLE_ADVANCED_DRIVER_MODEL, 0); + WREG32_SOC15(MMHUB, 0, regMMMC_VM_MX_L1_TLB_CNTL, tmp); + + /* Setup L2 cache */ + tmp = RREG32_SOC15(MMHUB, 0, regMMVM_L2_CNTL); + tmp = REG_SET_FIELD(tmp, MMVM_L2_CNTL, ENABLE_L2_CACHE, 0); + WREG32_SOC15(MMHUB, 0, regMMVM_L2_CNTL, tmp); + WREG32_SOC15(MMHUB, 0, regMMVM_L2_CNTL3, 0); +} + +/** + * mmhub_v4_1_0_set_fault_enable_default - update GART/VM fault handling + * + * @adev: amdgpu_device pointer + * @value: true redirects VM faults to the default page + */ +static void +mmhub_v4_1_0_set_fault_enable_default(struct amdgpu_device *adev, bool value) +{ + u32 tmp; + + /* These registers are not accessible to VF-SRIOV. + * The PF will program them instead. + */ + if (amdgpu_sriov_vf(adev)) + return; + + tmp = RREG32_SOC15(MMHUB, 0, regMMVM_L2_PROTECTION_FAULT_CNTL); + tmp = REG_SET_FIELD(tmp, MMVM_L2_PROTECTION_FAULT_CNTL, + RANGE_PROTECTION_FAULT_ENABLE_DEFAULT, value); + tmp = REG_SET_FIELD(tmp, MMVM_L2_PROTECTION_FAULT_CNTL, + PDE0_PROTECTION_FAULT_ENABLE_DEFAULT, value); + tmp = REG_SET_FIELD(tmp, MMVM_L2_PROTECTION_FAULT_CNTL, + PDE1_PROTECTION_FAULT_ENABLE_DEFAULT, value); + tmp = REG_SET_FIELD(tmp, MMVM_L2_PROTECTION_FAULT_CNTL, + PDE2_PROTECTION_FAULT_ENABLE_DEFAULT, value); + tmp = REG_SET_FIELD(tmp, MMVM_L2_PROTECTION_FAULT_CNTL, + TRANSLATE_FURTHER_PROTECTION_FAULT_ENABLE_DEFAULT, + value); + tmp = REG_SET_FIELD(tmp, MMVM_L2_PROTECTION_FAULT_CNTL, + NACK_PROTECTION_FAULT_ENABLE_DEFAULT, value); + tmp = REG_SET_FIELD(tmp, MMVM_L2_PROTECTION_FAULT_CNTL, + DUMMY_PAGE_PROTECTION_FAULT_ENABLE_DEFAULT, value); + tmp = REG_SET_FIELD(tmp, MMVM_L2_PROTECTION_FAULT_CNTL, + VALID_PROTECTION_FAULT_ENABLE_DEFAULT, value); + tmp = REG_SET_FIELD(tmp, MMVM_L2_PROTECTION_FAULT_CNTL, + READ_PROTECTION_FAULT_ENABLE_DEFAULT, value); + tmp = REG_SET_FIELD(tmp, MMVM_L2_PROTECTION_FAULT_CNTL, + WRITE_PROTECTION_FAULT_ENABLE_DEFAULT, value); + tmp = REG_SET_FIELD(tmp, MMVM_L2_PROTECTION_FAULT_CNTL, + EXECUTE_PROTECTION_FAULT_ENABLE_DEFAULT, value); + if (!value) { + tmp = REG_SET_FIELD(tmp, MMVM_L2_PROTECTION_FAULT_CNTL, + CRASH_ON_NO_RETRY_FAULT, 1); + tmp = REG_SET_FIELD(tmp, MMVM_L2_PROTECTION_FAULT_CNTL, + CRASH_ON_RETRY_FAULT, 1); + } + WREG32_SOC15(MMHUB, 0, regMMVM_L2_PROTECTION_FAULT_CNTL, tmp); +} + +static const struct amdgpu_vmhub_funcs mmhub_v4_1_0_vmhub_funcs = { + .print_l2_protection_fault_status = mmhub_v4_1_0_print_l2_protection_fault_status, + .get_invalidate_req = mmhub_v4_1_0_get_invalidate_req, +}; + +static void mmhub_v4_1_0_init(struct amdgpu_device *adev) +{ + struct amdgpu_vmhub *hub = &adev->vmhub[AMDGPU_MMHUB0(0)]; + + hub->ctx0_ptb_addr_lo32 = + SOC15_REG_OFFSET(MMHUB, 0, + regMMVM_CONTEXT0_PAGE_TABLE_BASE_ADDR_LO32); + hub->ctx0_ptb_addr_hi32 = + SOC15_REG_OFFSET(MMHUB, 0, + regMMVM_CONTEXT0_PAGE_TABLE_BASE_ADDR_HI32); + hub->vm_inv_eng0_sem = + SOC15_REG_OFFSET(MMHUB, 0, regMMVM_INVALIDATE_ENG0_SEM); + hub->vm_inv_eng0_req = + SOC15_REG_OFFSET(MMHUB, 0, regMMVM_INVALIDATE_ENG0_REQ); + hub->vm_inv_eng0_ack = + SOC15_REG_OFFSET(MMHUB, 0, regMMVM_INVALIDATE_ENG0_ACK); + hub->vm_context0_cntl = + SOC15_REG_OFFSET(MMHUB, 0, regMMVM_CONTEXT0_CNTL); + hub->vm_l2_pro_fault_status = + SOC15_REG_OFFSET(MMHUB, 0, regMMVM_L2_PROTECTION_FAULT_STATUS_LO32); + hub->vm_l2_pro_fault_cntl = + SOC15_REG_OFFSET(MMHUB, 0, regMMVM_L2_PROTECTION_FAULT_CNTL); + + hub->ctx_distance = regMMVM_CONTEXT1_CNTL - regMMVM_CONTEXT0_CNTL; + hub->ctx_addr_distance = regMMVM_CONTEXT1_PAGE_TABLE_BASE_ADDR_LO32 - + regMMVM_CONTEXT0_PAGE_TABLE_BASE_ADDR_LO32; + hub->eng_distance = regMMVM_INVALIDATE_ENG1_REQ - + regMMVM_INVALIDATE_ENG0_REQ; + hub->eng_addr_distance = regMMVM_INVALIDATE_ENG1_ADDR_RANGE_LO32 - + regMMVM_INVALIDATE_ENG0_ADDR_RANGE_LO32; + + hub->vm_cntx_cntl_vm_fault = MMVM_CONTEXT1_CNTL__RANGE_PROTECTION_FAULT_ENABLE_INTERRUPT_MASK | + MMVM_CONTEXT1_CNTL__DUMMY_PAGE_PROTECTION_FAULT_ENABLE_INTERRUPT_MASK | + MMVM_CONTEXT1_CNTL__PDE0_PROTECTION_FAULT_ENABLE_INTERRUPT_MASK | + MMVM_CONTEXT1_CNTL__VALID_PROTECTION_FAULT_ENABLE_INTERRUPT_MASK | + MMVM_CONTEXT1_CNTL__READ_PROTECTION_FAULT_ENABLE_INTERRUPT_MASK | + MMVM_CONTEXT1_CNTL__WRITE_PROTECTION_FAULT_ENABLE_INTERRUPT_MASK | + MMVM_CONTEXT1_CNTL__EXECUTE_PROTECTION_FAULT_ENABLE_INTERRUPT_MASK; + + hub->vm_l2_bank_select_reserved_cid2 = + SOC15_REG_OFFSET(MMHUB, 0, regMMVM_L2_BANK_SELECT_RESERVED_CID2); + + hub->vm_contexts_disable = + SOC15_REG_OFFSET(MMHUB, 0, regMMVM_CONTEXTS_DISABLE); + + hub->vmhub_funcs = &mmhub_v4_1_0_vmhub_funcs; +} + +static u64 mmhub_v4_1_0_get_fb_location(struct amdgpu_device *adev) +{ + u64 base; + + base = RREG32_SOC15(MMHUB, 0, regMMMC_VM_FB_LOCATION_BASE); + + base &= MMMC_VM_FB_LOCATION_BASE__FB_BASE_MASK; + base <<= 24; + + return base; +} + +static u64 mmhub_v4_1_0_get_mc_fb_offset(struct amdgpu_device *adev) +{ + return (u64)RREG32_SOC15(MMHUB, 0, regMMMC_VM_FB_OFFSET) << 24; +} + +static void +mmhub_v4_1_0_update_medium_grain_clock_gating(struct amdgpu_device *adev, + bool enable) +{ +#if 0 + uint32_t def, data; +#endif + uint32_t def1, data1, def2 = 0, data2 = 0; +#if 0 + def = data = RREG32_SOC15(MMHUB, 0, regMM_ATC_L2_MISC_CG); +#endif + def1 = data1 = RREG32_SOC15(MMHUB, 0, regDAGB0_CNTL_MISC2); + def2 = data2 = RREG32_SOC15(MMHUB, 0, regDAGB1_CNTL_MISC2); + + if (enable) { +#if 0 + data |= MM_ATC_L2_MISC_CG__ENABLE_MASK; +#endif + data1 &= ~(DAGB0_CNTL_MISC2__DISABLE_RDRET_TAP_CHAIN_FGCG_MASK | + DAGB0_CNTL_MISC2__DISABLE_WRRET_TAP_CHAIN_FGCG_MASK); + + data2 &= ~(DAGB1_CNTL_MISC2__DISABLE_RDRET_TAP_CHAIN_FGCG_MASK | + DAGB1_CNTL_MISC2__DISABLE_WRRET_TAP_CHAIN_FGCG_MASK); + } else { +#if 0 + data &= ~MM_ATC_L2_MISC_CG__ENABLE_MASK; +#endif + data1 |= (DAGB0_CNTL_MISC2__DISABLE_RDRET_TAP_CHAIN_FGCG_MASK | + DAGB0_CNTL_MISC2__DISABLE_WRRET_TAP_CHAIN_FGCG_MASK); + + data2 |= (DAGB1_CNTL_MISC2__DISABLE_RDRET_TAP_CHAIN_FGCG_MASK | + DAGB1_CNTL_MISC2__DISABLE_WRRET_TAP_CHAIN_FGCG_MASK); + } + +#if 0 + if (def != data) + WREG32_SOC15(MMHUB, 0, regMM_ATC_L2_MISC_CG, data); +#endif + if (def1 != data1) + WREG32_SOC15(MMHUB, 0, regDAGB0_CNTL_MISC2, data1); + + if (def2 != data2) + WREG32_SOC15(MMHUB, 0, regDAGB1_CNTL_MISC2, data2); +} + +static void +mmhub_v4_1_0_update_medium_grain_light_sleep(struct amdgpu_device *adev, + bool enable) +{ +#if 0 + uint32_t def, data; + + def = data = RREG32_SOC15(MMHUB, 0, regMM_ATC_L2_MISC_CG); + + if (enable) + data |= MM_ATC_L2_MISC_CG__MEM_LS_ENABLE_MASK; + else + data &= ~MM_ATC_L2_MISC_CG__MEM_LS_ENABLE_MASK; + + if (def != data) + WREG32_SOC15(MMHUB, 0, regMM_ATC_L2_MISC_CG, data); +#endif +} + +static int mmhub_v4_1_0_set_clockgating(struct amdgpu_device *adev, + enum amd_clockgating_state state) +{ + if (amdgpu_sriov_vf(adev)) + return 0; + + if (adev->cg_flags & AMD_CG_SUPPORT_MC_MGCG) + mmhub_v4_1_0_update_medium_grain_clock_gating(adev, + state == AMD_CG_STATE_GATE); + + if (adev->cg_flags & AMD_CG_SUPPORT_MC_LS) + mmhub_v4_1_0_update_medium_grain_light_sleep(adev, + state == AMD_CG_STATE_GATE); + + return 0; +} + +static void mmhub_v4_1_0_get_clockgating(struct amdgpu_device *adev, u64 *flags) +{ +#if 0 + int data; + + if (amdgpu_sriov_vf(adev)) + *flags = 0; + + data = RREG32_SOC15(MMHUB, 0, regMM_ATC_L2_MISC_CG); + + /* AMD_CG_SUPPORT_MC_MGCG */ + if (data & MM_ATC_L2_MISC_CG__ENABLE_MASK) + *flags |= AMD_CG_SUPPORT_MC_MGCG; + + /* AMD_CG_SUPPORT_MC_LS */ + if (data & MM_ATC_L2_MISC_CG__MEM_LS_ENABLE_MASK) + *flags |= AMD_CG_SUPPORT_MC_LS; +#endif +} + +const struct amdgpu_mmhub_funcs mmhub_v4_1_0_funcs = { + .init = mmhub_v4_1_0_init, + .get_fb_location = mmhub_v4_1_0_get_fb_location, + .get_mc_fb_offset = mmhub_v4_1_0_get_mc_fb_offset, + .gart_enable = mmhub_v4_1_0_gart_enable, + .set_fault_enable_default = mmhub_v4_1_0_set_fault_enable_default, + .gart_disable = mmhub_v4_1_0_gart_disable, + .set_clockgating = mmhub_v4_1_0_set_clockgating, + .get_clockgating = mmhub_v4_1_0_get_clockgating, + .setup_vm_pt_regs = mmhub_v4_1_0_setup_vm_pt_regs, +}; diff --git a/drivers/gpu/drm/amd/amdgpu/mmhub_v4_1_0.h b/drivers/gpu/drm/amd/amdgpu/mmhub_v4_1_0.h new file mode 100644 index 000000000000..3902d653353c --- /dev/null +++ b/drivers/gpu/drm/amd/amdgpu/mmhub_v4_1_0.h @@ -0,0 +1,28 @@ +/* + * Copyright 2023 Advanced Micro Devices, Inc. + * + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and associated documentation files (the "Software"), + * to deal in the Software without restriction, including without limitation + * the rights to use, copy, modify, merge, publish, distribute, sublicense, + * and/or sell copies of the Software, and to permit persons to whom the + * Software is furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included in + * all copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL + * THE COPYRIGHT HOLDER(S) OR AUTHOR(S) BE LIABLE FOR ANY CLAIM, DAMAGES OR + * OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, + * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR + * OTHER DEALINGS IN THE SOFTWARE. + * + */ +#ifndef __MMHUB_V4_1_0_H__ +#define __MMHUB_V4_1_0_H__ + +extern const struct amdgpu_mmhub_funcs mmhub_v4_1_0_funcs; + +#endif diff --git a/drivers/gpu/drm/amd/amdgpu/mmhub_v9_4.c b/drivers/gpu/drm/amd/amdgpu/mmhub_v9_4.c index 1b7da4aff2b8..ff1b58e44689 100644 --- a/drivers/gpu/drm/amd/amdgpu/mmhub_v9_4.c +++ b/drivers/gpu/drm/amd/amdgpu/mmhub_v9_4.c @@ -657,7 +657,7 @@ static int mmhub_v9_4_set_clockgating(struct amdgpu_device *adev, static void mmhub_v9_4_get_clockgating(struct amdgpu_device *adev, u64 *flags) { - int data, data1; + u32 data, data1; if (amdgpu_sriov_vf(adev)) *flags = 0; diff --git a/drivers/gpu/drm/amd/amdgpu/mxgpu_ai.c b/drivers/gpu/drm/amd/amdgpu/mxgpu_ai.c index a2bd2c3b1ef9..f5411b798e11 100644 --- a/drivers/gpu/drm/amd/amdgpu/mxgpu_ai.c +++ b/drivers/gpu/drm/amd/amdgpu/mxgpu_ai.c @@ -93,7 +93,7 @@ static int xgpu_ai_poll_ack(struct amdgpu_device *adev) timeout -= 5; } while (timeout > 1); - pr_err("Doesn't get TRN_MSG_ACK from pf in %d msec\n", AI_MAILBOX_POLL_ACK_TIMEDOUT); + dev_err(adev->dev, "Doesn't get TRN_MSG_ACK from pf in %d msec\n", AI_MAILBOX_POLL_ACK_TIMEDOUT); return -ETIME; } @@ -111,7 +111,7 @@ static int xgpu_ai_poll_msg(struct amdgpu_device *adev, enum idh_event event) timeout -= 10; } while (timeout > 1); - pr_err("Doesn't get msg:%d from pf, error=%d\n", event, r); + dev_err(adev->dev, "Doesn't get msg:%d from pf, error=%d\n", event, r); return -ETIME; } @@ -132,7 +132,7 @@ static void xgpu_ai_mailbox_trans_msg (struct amdgpu_device *adev, xgpu_ai_mailbox_set_valid(adev, false); trn = xgpu_ai_peek_ack(adev); if (trn) { - pr_err("trn=%x ACK should not assert! wait again !\n", trn); + dev_err_ratelimited(adev->dev, "trn=%x ACK should not assert! wait again !\n", trn); msleep(1); } } while(trn); @@ -155,7 +155,7 @@ static void xgpu_ai_mailbox_trans_msg (struct amdgpu_device *adev, /* start to poll ack */ r = xgpu_ai_poll_ack(adev); if (r) - pr_err("Doesn't get ack from pf, continue\n"); + dev_err(adev->dev, "Doesn't get ack from pf, continue\n"); xgpu_ai_mailbox_set_valid(adev, false); } @@ -173,7 +173,7 @@ static int xgpu_ai_send_access_requests(struct amdgpu_device *adev, req == IDH_REQ_GPU_RESET_ACCESS) { r = xgpu_ai_poll_msg(adev, IDH_READY_TO_ACCESS_GPU); if (r) { - pr_err("Doesn't get READY_TO_ACCESS_GPU from pf, give up\n"); + dev_err(adev->dev, "Doesn't get READY_TO_ACCESS_GPU from pf, give up\n"); return r; } /* Retrieve checksum from mailbox2 */ @@ -231,7 +231,7 @@ static int xgpu_ai_mailbox_ack_irq(struct amdgpu_device *adev, struct amdgpu_irq_src *source, struct amdgpu_iv_entry *entry) { - DRM_DEBUG("get ack intr and do nothing.\n"); + dev_dbg(adev->dev, "get ack intr and do nothing.\n"); return 0; } @@ -249,36 +249,33 @@ static int xgpu_ai_set_mailbox_ack_irq(struct amdgpu_device *adev, return 0; } -static void xgpu_ai_mailbox_flr_work(struct work_struct *work) +static void xgpu_ai_ready_to_reset(struct amdgpu_device *adev) { - struct amdgpu_virt *virt = container_of(work, struct amdgpu_virt, flr_work); - struct amdgpu_device *adev = container_of(virt, struct amdgpu_device, virt); - int timeout = AI_MAILBOX_POLL_FLR_TIMEDOUT; - - /* block amdgpu_gpu_recover till msg FLR COMPLETE received, - * otherwise the mailbox msg will be ruined/reseted by - * the VF FLR. - */ - if (atomic_cmpxchg(&adev->reset_domain->in_gpu_reset, 0, 1) != 0) - return; - - down_write(&adev->reset_domain->sem); - - amdgpu_virt_fini_data_exchange(adev); - xgpu_ai_mailbox_trans_msg(adev, IDH_READY_TO_RESET, 0, 0, 0); +} +static int xgpu_ai_wait_reset(struct amdgpu_device *adev) +{ + int timeout = AI_MAILBOX_POLL_FLR_TIMEDOUT; do { - if (xgpu_ai_mailbox_peek_msg(adev) == IDH_FLR_NOTIFICATION_CMPL) - goto flr_done; - + if (xgpu_ai_mailbox_peek_msg(adev) == IDH_FLR_NOTIFICATION_CMPL) { + dev_dbg(adev->dev, "Got AI IDH_FLR_NOTIFICATION_CMPL after %d ms\n", AI_MAILBOX_POLL_FLR_TIMEDOUT - timeout); + return 0; + } msleep(10); timeout -= 10; } while (timeout > 1); -flr_done: - atomic_set(&adev->reset_domain->in_gpu_reset, 0); - up_write(&adev->reset_domain->sem); + dev_dbg(adev->dev, "waiting AI IDH_FLR_NOTIFICATION_CMPL timeout\n"); + return -ETIME; +} + +static void xgpu_ai_mailbox_flr_work(struct work_struct *work) +{ + struct amdgpu_virt *virt = container_of(work, struct amdgpu_virt, flr_work); + struct amdgpu_device *adev = container_of(virt, struct amdgpu_device, virt); + + amdgpu_virt_fini_data_exchange(adev); /* Trigger recovery for world switch failure if no TDR */ if (amdgpu_device_should_recover_gpu(adev) @@ -290,6 +287,7 @@ flr_done: reset_context.method = AMD_RESET_METHOD_NONE; reset_context.reset_req_dev = adev; clear_bit(AMDGPU_NEED_FULL_RESET, &reset_context.flags); + set_bit(AMDGPU_HOST_FLR, &reset_context.flags); amdgpu_device_gpu_recover(adev, NULL, &reset_context); } @@ -317,7 +315,7 @@ static int xgpu_ai_mailbox_rcv_irq(struct amdgpu_device *adev, switch (event) { case IDH_FLR_NOTIFICATION: - if (amdgpu_sriov_runtime(adev) && !amdgpu_in_reset(adev)) + if (amdgpu_sriov_runtime(adev)) WARN_ONCE(!amdgpu_reset_domain_schedule(adev->reset_domain, &adev->virt.flr_work), "Failed to queue work! at %s", @@ -410,12 +408,21 @@ static void xgpu_ai_ras_poison_handler(struct amdgpu_device *adev, xgpu_ai_send_access_requests(adev, IDH_RAS_POISON); } +static bool xgpu_ai_rcvd_ras_intr(struct amdgpu_device *adev) +{ + enum idh_event msg = xgpu_ai_mailbox_peek_msg(adev); + + return (msg == IDH_RAS_ERROR_DETECTED || msg == 0xFFFFFFFF); +} + const struct amdgpu_virt_ops xgpu_ai_virt_ops = { .req_full_gpu = xgpu_ai_request_full_gpu_access, .rel_full_gpu = xgpu_ai_release_full_gpu_access, .reset_gpu = xgpu_ai_request_reset, - .wait_reset = NULL, + .ready_to_reset = xgpu_ai_ready_to_reset, + .wait_reset = xgpu_ai_wait_reset, .trans_msg = xgpu_ai_mailbox_trans_msg, .req_init_data = xgpu_ai_request_init_data, .ras_poison_handler = xgpu_ai_ras_poison_handler, + .rcvd_ras_intr = xgpu_ai_rcvd_ras_intr, }; diff --git a/drivers/gpu/drm/amd/amdgpu/mxgpu_ai.h b/drivers/gpu/drm/amd/amdgpu/mxgpu_ai.h index c520b2fabfb9..ed57cbc150af 100644 --- a/drivers/gpu/drm/amd/amdgpu/mxgpu_ai.h +++ b/drivers/gpu/drm/amd/amdgpu/mxgpu_ai.h @@ -51,7 +51,9 @@ enum idh_event { IDH_FAIL, IDH_QUERY_ALIVE, IDH_REQ_GPU_INIT_DATA_READY, - + IDH_RAS_POISON_READY, + IDH_PF_SOFT_FLR_NOTIFICATION, + IDH_RAS_ERROR_DETECTED, IDH_TEXT_MESSAGE = 255, }; diff --git a/drivers/gpu/drm/amd/amdgpu/mxgpu_nv.c b/drivers/gpu/drm/amd/amdgpu/mxgpu_nv.c index 77f5b55decf9..f47bd7ada4d7 100644 --- a/drivers/gpu/drm/amd/amdgpu/mxgpu_nv.c +++ b/drivers/gpu/drm/amd/amdgpu/mxgpu_nv.c @@ -91,7 +91,7 @@ static int xgpu_nv_poll_ack(struct amdgpu_device *adev) timeout -= 5; } while (timeout > 1); - pr_err("Doesn't get TRN_MSG_ACK from pf in %d msec\n", NV_MAILBOX_POLL_ACK_TIMEDOUT); + dev_err(adev->dev, "Doesn't get TRN_MSG_ACK from pf in %d msec \n", NV_MAILBOX_POLL_ACK_TIMEDOUT); return -ETIME; } @@ -106,13 +106,16 @@ static int xgpu_nv_poll_msg(struct amdgpu_device *adev, enum idh_event event) do { r = xgpu_nv_mailbox_rcv_msg(adev, event); - if (!r) + if (!r) { + dev_dbg(adev->dev, "rcv_msg 0x%x after %llu ms\n", event, NV_MAILBOX_POLL_MSG_TIMEDOUT - timeout + now); return 0; + } msleep(10); now = (uint64_t)ktime_to_ms(ktime_get()); } while (timeout > now); + dev_dbg(adev->dev, "nv_poll_msg timed out\n"); return -ETIME; } @@ -133,11 +136,12 @@ static void xgpu_nv_mailbox_trans_msg (struct amdgpu_device *adev, xgpu_nv_mailbox_set_valid(adev, false); trn = xgpu_nv_peek_ack(adev); if (trn) { - pr_err("trn=%x ACK should not assert! wait again !\n", trn); + dev_err_ratelimited(adev->dev, "trn=%x ACK should not assert! wait again !\n", trn); msleep(1); } } while (trn); + dev_dbg(adev->dev, "trans_msg req = 0x%x, data1 = 0x%x\n", req, data1); WREG32_NO_KIQ(mmMAILBOX_MSGBUF_TRN_DW0, req); WREG32_NO_KIQ(mmMAILBOX_MSGBUF_TRN_DW1, data1); WREG32_NO_KIQ(mmMAILBOX_MSGBUF_TRN_DW2, data2); @@ -147,7 +151,7 @@ static void xgpu_nv_mailbox_trans_msg (struct amdgpu_device *adev, /* start to poll ack */ r = xgpu_nv_poll_ack(adev); if (r) - pr_err("Doesn't get ack from pf, continue\n"); + dev_err(adev->dev, "Doesn't get ack from pf, continue\n"); xgpu_nv_mailbox_set_valid(adev, false); } @@ -181,11 +185,11 @@ send_request: if (event != -1) { r = xgpu_nv_poll_msg(adev, event); if (r) { - if (retry++ < 2) + if (retry++ < 5) goto send_request; if (req != IDH_REQ_GPU_INIT_DATA) { - pr_err("Doesn't get msg:%d from pf, error=%d\n", event, r); + dev_err(adev->dev, "Doesn't get msg:%d from pf, error=%d\n", event, r); return r; } else /* host doesn't support REQ_GPU_INIT_DATA handshake */ adev->virt.req_init_data_ver = 0; @@ -261,7 +265,7 @@ static int xgpu_nv_mailbox_ack_irq(struct amdgpu_device *adev, struct amdgpu_irq_src *source, struct amdgpu_iv_entry *entry) { - DRM_DEBUG("get ack intr and do nothing.\n"); + dev_dbg(adev->dev, "get ack intr and do nothing.\n"); return 0; } @@ -282,36 +286,33 @@ static int xgpu_nv_set_mailbox_ack_irq(struct amdgpu_device *adev, return 0; } -static void xgpu_nv_mailbox_flr_work(struct work_struct *work) +static void xgpu_nv_ready_to_reset(struct amdgpu_device *adev) { - struct amdgpu_virt *virt = container_of(work, struct amdgpu_virt, flr_work); - struct amdgpu_device *adev = container_of(virt, struct amdgpu_device, virt); - int timeout = NV_MAILBOX_POLL_FLR_TIMEDOUT; - - /* block amdgpu_gpu_recover till msg FLR COMPLETE received, - * otherwise the mailbox msg will be ruined/reseted by - * the VF FLR. - */ - if (atomic_cmpxchg(&adev->reset_domain->in_gpu_reset, 0, 1) != 0) - return; - - down_write(&adev->reset_domain->sem); - - amdgpu_virt_fini_data_exchange(adev); - xgpu_nv_mailbox_trans_msg(adev, IDH_READY_TO_RESET, 0, 0, 0); +} +static int xgpu_nv_wait_reset(struct amdgpu_device *adev) +{ + int timeout = NV_MAILBOX_POLL_FLR_TIMEDOUT; do { - if (xgpu_nv_mailbox_peek_msg(adev) == IDH_FLR_NOTIFICATION_CMPL) - goto flr_done; - + if (xgpu_nv_mailbox_peek_msg(adev) == IDH_FLR_NOTIFICATION_CMPL) { + dev_dbg(adev->dev, "Got NV IDH_FLR_NOTIFICATION_CMPL after %d ms\n", NV_MAILBOX_POLL_FLR_TIMEDOUT - timeout); + return 0; + } msleep(10); timeout -= 10; } while (timeout > 1); -flr_done: - atomic_set(&adev->reset_domain->in_gpu_reset, 0); - up_write(&adev->reset_domain->sem); + dev_dbg(adev->dev, "waiting NV IDH_FLR_NOTIFICATION_CMPL timeout\n"); + return -ETIME; +} + +static void xgpu_nv_mailbox_flr_work(struct work_struct *work) +{ + struct amdgpu_virt *virt = container_of(work, struct amdgpu_virt, flr_work); + struct amdgpu_device *adev = container_of(virt, struct amdgpu_device, virt); + + amdgpu_virt_fini_data_exchange(adev); /* Trigger recovery for world switch failure if no TDR */ if (amdgpu_device_should_recover_gpu(adev) @@ -326,6 +327,7 @@ flr_done: reset_context.method = AMD_RESET_METHOD_NONE; reset_context.reset_req_dev = adev; clear_bit(AMDGPU_NEED_FULL_RESET, &reset_context.flags); + set_bit(AMDGPU_HOST_FLR, &reset_context.flags); amdgpu_device_gpu_recover(adev, NULL, &reset_context); } @@ -356,7 +358,7 @@ static int xgpu_nv_mailbox_rcv_irq(struct amdgpu_device *adev, switch (event) { case IDH_FLR_NOTIFICATION: - if (amdgpu_sriov_runtime(adev) && !amdgpu_in_reset(adev)) + if (amdgpu_sriov_runtime(adev)) WARN_ONCE(!amdgpu_reset_domain_schedule(adev->reset_domain, &adev->virt.flr_work), "Failed to queue work! at %s", @@ -444,16 +446,24 @@ static void xgpu_nv_ras_poison_handler(struct amdgpu_device *adev, amdgpu_virt_fini_data_exchange(adev); xgpu_nv_send_access_requests_with_param(adev, IDH_RAS_POISON, block, 0, 0); - amdgpu_virt_init_data_exchange(adev); } } +static bool xgpu_nv_rcvd_ras_intr(struct amdgpu_device *adev) +{ + enum idh_event msg = xgpu_nv_mailbox_peek_msg(adev); + + return (msg == IDH_RAS_ERROR_DETECTED || msg == 0xFFFFFFFF); +} + const struct amdgpu_virt_ops xgpu_nv_virt_ops = { .req_full_gpu = xgpu_nv_request_full_gpu_access, .rel_full_gpu = xgpu_nv_release_full_gpu_access, .req_init_data = xgpu_nv_request_init_data, .reset_gpu = xgpu_nv_request_reset, - .wait_reset = NULL, + .ready_to_reset = xgpu_nv_ready_to_reset, + .wait_reset = xgpu_nv_wait_reset, .trans_msg = xgpu_nv_mailbox_trans_msg, .ras_poison_handler = xgpu_nv_ras_poison_handler, + .rcvd_ras_intr = xgpu_nv_rcvd_ras_intr, }; diff --git a/drivers/gpu/drm/amd/amdgpu/mxgpu_nv.h b/drivers/gpu/drm/amd/amdgpu/mxgpu_nv.h index 1e8fd90cab43..caf616a2c8a6 100644 --- a/drivers/gpu/drm/amd/amdgpu/mxgpu_nv.h +++ b/drivers/gpu/drm/amd/amdgpu/mxgpu_nv.h @@ -26,7 +26,7 @@ #define NV_MAILBOX_POLL_ACK_TIMEDOUT 500 #define NV_MAILBOX_POLL_MSG_TIMEDOUT 6000 -#define NV_MAILBOX_POLL_FLR_TIMEDOUT 5000 +#define NV_MAILBOX_POLL_FLR_TIMEDOUT 10000 #define NV_MAILBOX_POLL_MSG_REP_MAX 11 enum idh_request { @@ -52,7 +52,8 @@ enum idh_event { IDH_QUERY_ALIVE, IDH_REQ_GPU_INIT_DATA_READY, IDH_RAS_POISON_READY, - + IDH_PF_SOFT_FLR_NOTIFICATION, + IDH_RAS_ERROR_DETECTED, IDH_TEXT_MESSAGE = 255, }; diff --git a/drivers/gpu/drm/amd/amdgpu/mxgpu_vi.c b/drivers/gpu/drm/amd/amdgpu/mxgpu_vi.c index 59f53c743362..e1d63bed84bf 100644 --- a/drivers/gpu/drm/amd/amdgpu/mxgpu_vi.c +++ b/drivers/gpu/drm/amd/amdgpu/mxgpu_vi.c @@ -515,12 +515,6 @@ static void xgpu_vi_mailbox_flr_work(struct work_struct *work) struct amdgpu_virt *virt = container_of(work, struct amdgpu_virt, flr_work); struct amdgpu_device *adev = container_of(virt, struct amdgpu_device, virt); - /* wait until RCV_MSG become 3 */ - if (xgpu_vi_poll_msg(adev, IDH_FLR_NOTIFICATION_CMPL)) { - pr_err("failed to receive FLR_CMPL\n"); - return; - } - /* Trigger recovery due to world switch failure */ if (amdgpu_device_should_recover_gpu(adev)) { struct amdgpu_reset_context reset_context; @@ -529,6 +523,7 @@ static void xgpu_vi_mailbox_flr_work(struct work_struct *work) reset_context.method = AMD_RESET_METHOD_NONE; reset_context.reset_req_dev = adev; clear_bit(AMDGPU_NEED_FULL_RESET, &reset_context.flags); + set_bit(AMDGPU_HOST_FLR, &reset_context.flags); amdgpu_device_gpu_recover(adev, NULL, &reset_context); } @@ -560,7 +555,7 @@ static int xgpu_vi_mailbox_rcv_irq(struct amdgpu_device *adev, r = xgpu_vi_mailbox_rcv_msg(adev, IDH_FLR_NOTIFICATION); /* only handle FLR_NOTIFY now */ - if (!r && !amdgpu_in_reset(adev)) + if (!r) WARN_ONCE(!amdgpu_reset_domain_schedule(adev->reset_domain, &adev->virt.flr_work), "Failed to queue work! at %s", diff --git a/drivers/gpu/drm/amd/amdgpu/navi10_ih.c b/drivers/gpu/drm/amd/amdgpu/navi10_ih.c index 4178f4e5dad7..b281462093f1 100644 --- a/drivers/gpu/drm/amd/amdgpu/navi10_ih.c +++ b/drivers/gpu/drm/amd/amdgpu/navi10_ih.c @@ -713,6 +713,8 @@ static const struct amd_ip_funcs navi10_ih_ip_funcs = { .set_clockgating_state = navi10_ih_set_clockgating_state, .set_powergating_state = navi10_ih_set_powergating_state, .get_clockgating_state = navi10_ih_get_clockgating_state, + .dump_ip_state = NULL, + .print_ip_state = NULL, }; static const struct amdgpu_ih_funcs navi10_ih_funcs = { diff --git a/drivers/gpu/drm/amd/amdgpu/nbif_v6_3_1.c b/drivers/gpu/drm/amd/amdgpu/nbif_v6_3_1.c index 96ed00ac81ac..39919e0892c1 100644 --- a/drivers/gpu/drm/amd/amdgpu/nbif_v6_3_1.c +++ b/drivers/gpu/drm/amd/amdgpu/nbif_v6_3_1.c @@ -315,6 +315,7 @@ static u32 nbif_v6_3_1_get_rom_offset(struct amdgpu_device *adev) static void nbif_v6_3_1_program_ltr(struct amdgpu_device *adev) { uint32_t def, data; + u16 devctl2; def = RREG32_SOC15(NBIO, 0, regRCC_EP_DEV0_0_EP_PCIE_TX_LTR_CNTL); data = 0x35EB; @@ -328,13 +329,15 @@ static void nbif_v6_3_1_program_ltr(struct amdgpu_device *adev) if (def != data) WREG32_SOC15(NBIO, 0, regRCC_STRAP0_RCC_BIF_STRAP2, data); - def = data = RREG32_SOC15(NBIO, 0, regBIF_CFG_DEV0_EPF0_DEVICE_CNTL2); + pcie_capability_read_word(adev->pdev, PCI_EXP_DEVCTL2, &devctl2); + + if (adev->pdev->ltr_path == (devctl2 & PCI_EXP_DEVCTL2_LTR_EN)) + return; + if (adev->pdev->ltr_path) - data |= BIF_CFG_DEV0_EPF0_DEVICE_CNTL2__LTR_EN_MASK; + pcie_capability_set_word(adev->pdev, PCI_EXP_DEVCTL2, PCI_EXP_DEVCTL2_LTR_EN); else - data &= ~BIF_CFG_DEV0_EPF0_DEVICE_CNTL2__LTR_EN_MASK; - if (def != data) - WREG32_SOC15(NBIO, 0, regBIF_CFG_DEV0_EPF0_DEVICE_CNTL2, data); + pcie_capability_clear_word(adev->pdev, PCI_EXP_DEVCTL2, PCI_EXP_DEVCTL2_LTR_EN); } #endif @@ -342,6 +345,7 @@ static void nbif_v6_3_1_program_aspm(struct amdgpu_device *adev) { #ifdef CONFIG_PCIEASPM uint32_t def, data; + u16 devctl2, ltr; def = data = RREG32_SOC15(PCIE, 0, regPCIE_LC_CNTL); data &= ~PCIE_LC_CNTL__LC_L1_INACTIVITY_MASK; @@ -371,12 +375,17 @@ static void nbif_v6_3_1_program_aspm(struct amdgpu_device *adev) if (def != data) WREG32_SOC15(NBIO, 0, regRCC_STRAP0_RCC_BIF_STRAP5, data); - def = data = RREG32_SOC15(NBIO, 0, regBIF_CFG_DEV0_EPF0_DEVICE_CNTL2); - data &= ~BIF_CFG_DEV0_EPF0_DEVICE_CNTL2__LTR_EN_MASK; + pcie_capability_read_word(adev->pdev, PCI_EXP_DEVCTL2, &devctl2); + data = def = devctl2; + data &= ~PCI_EXP_DEVCTL2_LTR_EN; if (def != data) - WREG32_SOC15(NBIO, 0, regBIF_CFG_DEV0_EPF0_DEVICE_CNTL2, data); + pcie_capability_set_word(adev->pdev, PCI_EXP_DEVCTL2, (u16)data); - WREG32_SOC15(NBIO, 0, regBIF_CFG_DEV0_EPF0_PCIE_LTR_CAP, 0x10011001); + ltr = pci_find_ext_capability(adev->pdev, PCI_EXT_CAP_ID_LTR); + + if (ltr) { + pci_write_config_dword(adev->pdev, ltr + PCI_LTR_MAX_SNOOP_LAT, 0x10011001); + } #if 0 /* regPSWUSP0_PCIE_LC_CNTL2 should be replace by PCIE_LC_CNTL2 or someone else ? */ @@ -424,6 +433,20 @@ static void nbif_v6_3_1_program_aspm(struct amdgpu_device *adev) #endif } +#define MMIO_REG_HOLE_OFFSET (0x80000 - PAGE_SIZE) + +static void nbif_v6_3_1_set_reg_remap(struct amdgpu_device *adev) +{ + if (!amdgpu_sriov_vf(adev) && (PAGE_SIZE <= 4096)) { + adev->rmmio_remap.reg_offset = MMIO_REG_HOLE_OFFSET; + adev->rmmio_remap.bus_addr = adev->rmmio_base + MMIO_REG_HOLE_OFFSET; + } else { + adev->rmmio_remap.reg_offset = SOC15_REG_OFFSET(NBIO, 0, + regBIF_BX_PF0_HDP_MEM_COHERENCY_FLUSH_CNTL) << 2; + adev->rmmio_remap.bus_addr = 0; + } +} + const struct amdgpu_nbio_funcs nbif_v6_3_1_funcs = { .get_hdp_flush_req_offset = nbif_v6_3_1_get_hdp_flush_req_offset, .get_hdp_flush_done_offset = nbif_v6_3_1_get_hdp_flush_done_offset, @@ -446,6 +469,7 @@ const struct amdgpu_nbio_funcs nbif_v6_3_1_funcs = { .remap_hdp_registers = nbif_v6_3_1_remap_hdp_registers, .get_rom_offset = nbif_v6_3_1_get_rom_offset, .program_aspm = nbif_v6_3_1_program_aspm, + .set_reg_remap = nbif_v6_3_1_set_reg_remap, }; @@ -492,4 +516,5 @@ const struct amdgpu_nbio_funcs nbif_v6_3_1_sriov_funcs = { .init_registers = nbif_v6_3_1_init_registers, .remap_hdp_registers = nbif_v6_3_1_remap_hdp_registers, .get_rom_offset = nbif_v6_3_1_get_rom_offset, + .set_reg_remap = nbif_v6_3_1_set_reg_remap, }; diff --git a/drivers/gpu/drm/amd/amdgpu/nbio_v2_3.c b/drivers/gpu/drm/amd/amdgpu/nbio_v2_3.c index df218d5ca775..fa479dfa1ec1 100644 --- a/drivers/gpu/drm/amd/amdgpu/nbio_v2_3.c +++ b/drivers/gpu/drm/amd/amdgpu/nbio_v2_3.c @@ -339,10 +339,6 @@ static void nbio_v2_3_init_registers(struct amdgpu_device *adev) if (def != data) WREG32_PCIE(smnPCIE_CONFIG_CNTL, data); - - if (amdgpu_sriov_vf(adev)) - adev->rmmio_remap.reg_offset = SOC15_REG_OFFSET(NBIO, 0, - mmBIF_BX_DEV0_EPF0_VF0_HDP_MEM_COHERENCY_FLUSH_CNTL) << 2; } #define NAVI10_PCIE__LC_L0S_INACTIVITY_DEFAULT 0x00000000 // off by default, no gains over L1 @@ -553,6 +549,20 @@ static void nbio_v2_3_clear_doorbell_interrupt(struct amdgpu_device *adev) } } +#define MMIO_REG_HOLE_OFFSET (0x80000 - PAGE_SIZE) + +static void nbio_v2_3_set_reg_remap(struct amdgpu_device *adev) +{ + if (!amdgpu_sriov_vf(adev) && (PAGE_SIZE <= 4096)) { + adev->rmmio_remap.reg_offset = MMIO_REG_HOLE_OFFSET; + adev->rmmio_remap.bus_addr = adev->rmmio_base + MMIO_REG_HOLE_OFFSET; + } else { + adev->rmmio_remap.reg_offset = SOC15_REG_OFFSET(NBIO, 0, + mmBIF_BX_DEV0_EPF0_VF0_HDP_MEM_COHERENCY_FLUSH_CNTL) << 2; + adev->rmmio_remap.bus_addr = 0; + } +} + const struct amdgpu_nbio_funcs nbio_v2_3_funcs = { .get_hdp_flush_req_offset = nbio_v2_3_get_hdp_flush_req_offset, .get_hdp_flush_done_offset = nbio_v2_3_get_hdp_flush_done_offset, @@ -577,4 +587,5 @@ const struct amdgpu_nbio_funcs nbio_v2_3_funcs = { .apply_lc_spc_mode_wa = nbio_v2_3_apply_lc_spc_mode_wa, .apply_l1_link_width_reconfig_wa = nbio_v2_3_apply_l1_link_width_reconfig_wa, .clear_doorbell_interrupt = nbio_v2_3_clear_doorbell_interrupt, + .set_reg_remap = nbio_v2_3_set_reg_remap, }; diff --git a/drivers/gpu/drm/amd/amdgpu/nbio_v4_3.c b/drivers/gpu/drm/amd/amdgpu/nbio_v4_3.c index a3622897e3fe..a54052dea8bf 100644 --- a/drivers/gpu/drm/amd/amdgpu/nbio_v4_3.c +++ b/drivers/gpu/drm/amd/amdgpu/nbio_v4_3.c @@ -345,9 +345,6 @@ static void nbio_v4_3_init_registers(struct amdgpu_device *adev) data &= ~RCC_DEV0_EPF2_STRAP2__STRAP_NO_SOFT_RESET_DEV0_F2_MASK; WREG32_SOC15(NBIO, 0, regRCC_DEV0_EPF2_STRAP2, data); } - if (amdgpu_sriov_vf(adev)) - adev->rmmio_remap.reg_offset = SOC15_REG_OFFSET(NBIO, 0, - regBIF_BX_DEV0_EPF0_VF0_HDP_MEM_COHERENCY_FLUSH_CNTL) << 2; } static u32 nbio_v4_3_get_rom_offset(struct amdgpu_device *adev) @@ -475,6 +472,20 @@ static void nbio_v4_3_program_aspm(struct amdgpu_device *adev) #endif } +#define MMIO_REG_HOLE_OFFSET (0x80000 - PAGE_SIZE) + +static void nbio_v4_3_set_reg_remap(struct amdgpu_device *adev) +{ + if (!amdgpu_sriov_vf(adev) && (PAGE_SIZE <= 4096)) { + adev->rmmio_remap.reg_offset = MMIO_REG_HOLE_OFFSET; + adev->rmmio_remap.bus_addr = adev->rmmio_base + MMIO_REG_HOLE_OFFSET; + } else { + adev->rmmio_remap.reg_offset = SOC15_REG_OFFSET(NBIO, 0, + regBIF_BX_DEV0_EPF0_VF0_HDP_MEM_COHERENCY_FLUSH_CNTL) << 2; + adev->rmmio_remap.bus_addr = 0; + } +} + const struct amdgpu_nbio_funcs nbio_v4_3_funcs = { .get_hdp_flush_req_offset = nbio_v4_3_get_hdp_flush_req_offset, .get_hdp_flush_done_offset = nbio_v4_3_get_hdp_flush_done_offset, @@ -497,6 +508,7 @@ const struct amdgpu_nbio_funcs nbio_v4_3_funcs = { .remap_hdp_registers = nbio_v4_3_remap_hdp_registers, .get_rom_offset = nbio_v4_3_get_rom_offset, .program_aspm = nbio_v4_3_program_aspm, + .set_reg_remap = nbio_v4_3_set_reg_remap, }; @@ -541,6 +553,7 @@ const struct amdgpu_nbio_funcs nbio_v4_3_sriov_funcs = { .init_registers = nbio_v4_3_init_registers, .remap_hdp_registers = nbio_v4_3_remap_hdp_registers, .get_rom_offset = nbio_v4_3_get_rom_offset, + .set_reg_remap = nbio_v4_3_set_reg_remap, }; static int nbio_v4_3_set_ras_err_event_athub_irq_state(struct amdgpu_device *adev, diff --git a/drivers/gpu/drm/amd/amdgpu/nbio_v6_1.c b/drivers/gpu/drm/amd/amdgpu/nbio_v6_1.c index 37615a77287b..34180c6070dd 100644 --- a/drivers/gpu/drm/amd/amdgpu/nbio_v6_1.c +++ b/drivers/gpu/drm/amd/amdgpu/nbio_v6_1.c @@ -276,10 +276,6 @@ static void nbio_v6_1_init_registers(struct amdgpu_device *adev) if (def != data) WREG32_PCIE(smnPCIE_CI_CNTL, data); - - if (amdgpu_sriov_vf(adev)) - adev->rmmio_remap.reg_offset = SOC15_REG_OFFSET(NBIO, 0, - mmBIF_BX_DEV0_EPF0_VF0_HDP_MEM_COHERENCY_FLUSH_CNTL) << 2; } #ifdef CONFIG_PCIEASPM @@ -394,6 +390,21 @@ static void nbio_v6_1_program_aspm(struct amdgpu_device *adev) #endif } +#define MMIO_REG_HOLE_OFFSET (0x80000 - PAGE_SIZE) + +static void nbio_v6_1_set_reg_remap(struct amdgpu_device *adev) +{ + if (!amdgpu_sriov_vf(adev) && (PAGE_SIZE <= 4096)) { + adev->rmmio_remap.reg_offset = MMIO_REG_HOLE_OFFSET; + adev->rmmio_remap.bus_addr = adev->rmmio_base + MMIO_REG_HOLE_OFFSET; + } else { + adev->rmmio_remap.reg_offset = + SOC15_REG_OFFSET(NBIO, 0, + mmBIF_BX_DEV0_EPF0_VF0_HDP_MEM_COHERENCY_FLUSH_CNTL) << 2; + adev->rmmio_remap.bus_addr = 0; + } +} + const struct amdgpu_nbio_funcs nbio_v6_1_funcs = { .get_hdp_flush_req_offset = nbio_v6_1_get_hdp_flush_req_offset, .get_hdp_flush_done_offset = nbio_v6_1_get_hdp_flush_done_offset, @@ -412,5 +423,6 @@ const struct amdgpu_nbio_funcs nbio_v6_1_funcs = { .ih_control = nbio_v6_1_ih_control, .init_registers = nbio_v6_1_init_registers, .remap_hdp_registers = nbio_v6_1_remap_hdp_registers, - .program_aspm = nbio_v6_1_program_aspm, + .program_aspm = nbio_v6_1_program_aspm, + .set_reg_remap = nbio_v6_1_set_reg_remap, }; diff --git a/drivers/gpu/drm/amd/amdgpu/nbio_v7_0.c b/drivers/gpu/drm/amd/amdgpu/nbio_v7_0.c index aa0326d00c72..b1b57dcc5a73 100644 --- a/drivers/gpu/drm/amd/amdgpu/nbio_v7_0.c +++ b/drivers/gpu/drm/amd/amdgpu/nbio_v7_0.c @@ -273,9 +273,20 @@ const struct nbio_hdp_flush_reg nbio_v7_0_hdp_flush_reg = { static void nbio_v7_0_init_registers(struct amdgpu_device *adev) { - if (amdgpu_sriov_vf(adev)) +} + +#define MMIO_REG_HOLE_OFFSET (0x80000 - PAGE_SIZE) + +static void nbio_v7_0_set_reg_remap(struct amdgpu_device *adev) +{ + if (!amdgpu_sriov_vf(adev) && (PAGE_SIZE <= 4096)) { + adev->rmmio_remap.reg_offset = MMIO_REG_HOLE_OFFSET; + adev->rmmio_remap.bus_addr = adev->rmmio_base + MMIO_REG_HOLE_OFFSET; + } else { adev->rmmio_remap.reg_offset = SOC15_REG_OFFSET(NBIO, 0, mmHDP_MEM_COHERENCY_FLUSH_CNTL) << 2; + adev->rmmio_remap.bus_addr = 0; + } } const struct amdgpu_nbio_funcs nbio_v7_0_funcs = { @@ -297,4 +308,5 @@ const struct amdgpu_nbio_funcs nbio_v7_0_funcs = { .ih_control = nbio_v7_0_ih_control, .init_registers = nbio_v7_0_init_registers, .remap_hdp_registers = nbio_v7_0_remap_hdp_registers, + .set_reg_remap = nbio_v7_0_set_reg_remap, }; diff --git a/drivers/gpu/drm/amd/amdgpu/nbio_v7_11.c b/drivers/gpu/drm/amd/amdgpu/nbio_v7_11.c index 05020141c0ae..7a9adfda5814 100644 --- a/drivers/gpu/drm/amd/amdgpu/nbio_v7_11.c +++ b/drivers/gpu/drm/amd/amdgpu/nbio_v7_11.c @@ -352,6 +352,20 @@ static void nbio_v7_11_get_clockgating_state(struct amdgpu_device *adev, *flags |= AMD_CG_SUPPORT_BIF_LS; } +#define MMIO_REG_HOLE_OFFSET (0x80000 - PAGE_SIZE) + +static void nbio_v7_11_set_reg_remap(struct amdgpu_device *adev) +{ + if (!amdgpu_sriov_vf(adev) && (PAGE_SIZE <= 4096)) { + adev->rmmio_remap.reg_offset = MMIO_REG_HOLE_OFFSET; + adev->rmmio_remap.bus_addr = adev->rmmio_base + MMIO_REG_HOLE_OFFSET; + } else { + adev->rmmio_remap.reg_offset = + SOC15_REG_OFFSET(NBIO, 0, regBIF_BX_PF1_HDP_MEM_COHERENCY_FLUSH_CNTL) << 2; + adev->rmmio_remap.bus_addr = 0; + } +} + const struct amdgpu_nbio_funcs nbio_v7_11_funcs = { .get_hdp_flush_req_offset = nbio_v7_11_get_hdp_flush_req_offset, .get_hdp_flush_done_offset = nbio_v7_11_get_hdp_flush_done_offset, @@ -374,4 +388,5 @@ const struct amdgpu_nbio_funcs nbio_v7_11_funcs = { .ih_control = nbio_v7_11_ih_control, .init_registers = nbio_v7_11_init_registers, .remap_hdp_registers = nbio_v7_11_remap_hdp_registers, + .set_reg_remap = nbio_v7_11_set_reg_remap, }; diff --git a/drivers/gpu/drm/amd/amdgpu/nbio_v7_2.c b/drivers/gpu/drm/amd/amdgpu/nbio_v7_2.c index e962821ae6a1..a766e2d90cd0 100644 --- a/drivers/gpu/drm/amd/amdgpu/nbio_v7_2.c +++ b/drivers/gpu/drm/amd/amdgpu/nbio_v7_2.c @@ -402,10 +402,21 @@ static void nbio_v7_2_init_registers(struct amdgpu_device *adev) WREG32_SOC15(NBIO, 0, regRCC_DEV2_EPF0_STRAP2, data); break; } +} + +#define MMIO_REG_HOLE_OFFSET (0x80000 - PAGE_SIZE) - if (amdgpu_sriov_vf(adev)) - adev->rmmio_remap.reg_offset = SOC15_REG_OFFSET(NBIO, 0, - regBIF_BX_PF0_HDP_MEM_COHERENCY_FLUSH_CNTL) << 2; +static void nbio_v7_2_set_reg_remap(struct amdgpu_device *adev) +{ + if (!amdgpu_sriov_vf(adev) && (PAGE_SIZE <= 4096)) { + adev->rmmio_remap.reg_offset = MMIO_REG_HOLE_OFFSET; + adev->rmmio_remap.bus_addr = adev->rmmio_base + MMIO_REG_HOLE_OFFSET; + } else { + adev->rmmio_remap.reg_offset = + SOC15_REG_OFFSET(NBIO, 0, + regBIF_BX_PF0_HDP_MEM_COHERENCY_FLUSH_CNTL) << 2; + adev->rmmio_remap.bus_addr = 0; + } } const struct amdgpu_nbio_funcs nbio_v7_2_funcs = { @@ -429,4 +440,5 @@ const struct amdgpu_nbio_funcs nbio_v7_2_funcs = { .ih_control = nbio_v7_2_ih_control, .init_registers = nbio_v7_2_init_registers, .remap_hdp_registers = nbio_v7_2_remap_hdp_registers, + .set_reg_remap = nbio_v7_2_set_reg_remap, }; diff --git a/drivers/gpu/drm/amd/amdgpu/nbio_v7_4.c b/drivers/gpu/drm/amd/amdgpu/nbio_v7_4.c index 19986ff6a48d..8d80df94bd8b 100644 --- a/drivers/gpu/drm/amd/amdgpu/nbio_v7_4.c +++ b/drivers/gpu/drm/amd/amdgpu/nbio_v7_4.c @@ -343,10 +343,6 @@ static void nbio_v7_4_init_registers(struct amdgpu_device *adev) { uint32_t baco_cntl; - if (amdgpu_sriov_vf(adev)) - adev->rmmio_remap.reg_offset = SOC15_REG_OFFSET(NBIO, 0, - mmBIF_BX_DEV0_EPF0_VF0_HDP_MEM_COHERENCY_FLUSH_CNTL) << 2; - if (amdgpu_ip_version(adev, NBIO_HWIP, 0) == IP_VERSION(7, 4, 4) && !amdgpu_sriov_vf(adev)) { baco_cntl = RREG32_SOC15(NBIO, 0, mmBACO_CNTL); @@ -387,7 +383,7 @@ static void nbio_v7_4_handle_ras_controller_intr_no_bifring(struct amdgpu_device else WREG32_SOC15(NBIO, 0, mmBIF_DOORBELL_INT_CNTL, bif_doorbell_intr_cntl); - if (!ras->disable_ras_err_cnt_harvest) { + if (ras && !ras->disable_ras_err_cnt_harvest && obj) { /* * clear error status after ras_controller_intr * according to hw team and count ue number @@ -418,6 +414,7 @@ static void nbio_v7_4_handle_ras_controller_intr_no_bifring(struct amdgpu_device /* ras_controller_int is dedicated for nbif ras error, * not the global interrupt for sync flood */ + amdgpu_ras_set_fed(adev, true); amdgpu_ras_reset_gpu(adev); } @@ -792,6 +789,21 @@ static void nbio_v7_4_program_aspm(struct amdgpu_device *adev) #endif } +#define MMIO_REG_HOLE_OFFSET (0x80000 - PAGE_SIZE) + +static void nbio_v7_4_set_reg_remap(struct amdgpu_device *adev) +{ + if (!amdgpu_sriov_vf(adev) && (PAGE_SIZE <= 4096)) { + adev->rmmio_remap.reg_offset = MMIO_REG_HOLE_OFFSET; + adev->rmmio_remap.bus_addr = adev->rmmio_base + MMIO_REG_HOLE_OFFSET; + } else { + adev->rmmio_remap.reg_offset = + SOC15_REG_OFFSET(NBIO, 0, + mmBIF_BX_DEV0_EPF0_VF0_HDP_MEM_COHERENCY_FLUSH_CNTL) << 2; + adev->rmmio_remap.bus_addr = 0; + } +} + const struct amdgpu_nbio_funcs nbio_v7_4_funcs = { .get_hdp_flush_req_offset = nbio_v7_4_get_hdp_flush_req_offset, .get_hdp_flush_done_offset = nbio_v7_4_get_hdp_flush_done_offset, @@ -813,4 +825,5 @@ const struct amdgpu_nbio_funcs nbio_v7_4_funcs = { .init_registers = nbio_v7_4_init_registers, .remap_hdp_registers = nbio_v7_4_remap_hdp_registers, .program_aspm = nbio_v7_4_program_aspm, + .set_reg_remap = nbio_v7_4_set_reg_remap, }; diff --git a/drivers/gpu/drm/amd/amdgpu/nbio_v7_7.c b/drivers/gpu/drm/amd/amdgpu/nbio_v7_7.c index 4df1055e640a..fb37e354a9d5 100644 --- a/drivers/gpu/drm/amd/amdgpu/nbio_v7_7.c +++ b/drivers/gpu/drm/amd/amdgpu/nbio_v7_7.c @@ -324,6 +324,21 @@ static void nbio_v7_7_get_clockgating_state(struct amdgpu_device *adev, *flags |= AMD_CG_SUPPORT_BIF_LS; } +#define MMIO_REG_HOLE_OFFSET (0x80000 - PAGE_SIZE) + +static void nbio_v7_7_set_reg_remap(struct amdgpu_device *adev) +{ + if (!amdgpu_sriov_vf(adev) && (PAGE_SIZE <= 4096)) { + adev->rmmio_remap.reg_offset = MMIO_REG_HOLE_OFFSET; + adev->rmmio_remap.bus_addr = adev->rmmio_base + MMIO_REG_HOLE_OFFSET; + } else { + adev->rmmio_remap.reg_offset = + SOC15_REG_OFFSET(NBIO, 0, + regBIF_BX_PF0_HDP_MEM_COHERENCY_FLUSH_CNTL) << 2; + adev->rmmio_remap.bus_addr = 0; + } +} + const struct amdgpu_nbio_funcs nbio_v7_7_funcs = { .get_hdp_flush_req_offset = nbio_v7_7_get_hdp_flush_req_offset, .get_hdp_flush_done_offset = nbio_v7_7_get_hdp_flush_done_offset, @@ -345,4 +360,5 @@ const struct amdgpu_nbio_funcs nbio_v7_7_funcs = { .ih_control = nbio_v7_7_ih_control, .init_registers = nbio_v7_7_init_registers, .remap_hdp_registers = nbio_v7_7_remap_hdp_registers, + .set_reg_remap = nbio_v7_7_set_reg_remap, }; diff --git a/drivers/gpu/drm/amd/amdgpu/nbio_v7_9.c b/drivers/gpu/drm/amd/amdgpu/nbio_v7_9.c index 40d1e209eab7..d1bd79bbae53 100644 --- a/drivers/gpu/drm/amd/amdgpu/nbio_v7_9.c +++ b/drivers/gpu/drm/amd/amdgpu/nbio_v7_9.c @@ -422,12 +422,6 @@ static void nbio_v7_9_init_registers(struct amdgpu_device *adev) u32 inst_mask; int i; - if (amdgpu_sriov_vf(adev)) - adev->rmmio_remap.reg_offset = - SOC15_REG_OFFSET( - NBIO, 0, - regBIF_BX_DEV0_EPF0_VF0_HDP_MEM_COHERENCY_FLUSH_CNTL) - << 2; WREG32_SOC15(NBIO, 0, regXCC_DOORBELL_FENCE, 0xff & ~(adev->gfx.xcc_mask)); @@ -475,6 +469,23 @@ static u64 nbio_v7_9_get_pcie_replay_count(struct amdgpu_device *adev) return (nak_r + nak_g); } +#define MMIO_REG_HOLE_OFFSET 0x1A000 + +static void nbio_v7_9_set_reg_remap(struct amdgpu_device *adev) +{ + if (!amdgpu_sriov_vf(adev) && (PAGE_SIZE <= 4096)) { + adev->rmmio_remap.reg_offset = MMIO_REG_HOLE_OFFSET; + adev->rmmio_remap.bus_addr = adev->rmmio_base + MMIO_REG_HOLE_OFFSET; + } else { + adev->rmmio_remap.reg_offset = + SOC15_REG_OFFSET( + NBIO, 0, + regBIF_BX_DEV0_EPF0_VF0_HDP_MEM_COHERENCY_FLUSH_CNTL) + << 2; + adev->rmmio_remap.bus_addr = 0; + } +} + const struct amdgpu_nbio_funcs nbio_v7_9_funcs = { .get_hdp_flush_req_offset = nbio_v7_9_get_hdp_flush_req_offset, .get_hdp_flush_done_offset = nbio_v7_9_get_hdp_flush_done_offset, @@ -499,6 +510,7 @@ const struct amdgpu_nbio_funcs nbio_v7_9_funcs = { .get_memory_partition_mode = nbio_v7_9_get_memory_partition_mode, .init_registers = nbio_v7_9_init_registers, .get_pcie_replay_count = nbio_v7_9_get_pcie_replay_count, + .set_reg_remap = nbio_v7_9_set_reg_remap, }; static void nbio_v7_9_query_ras_error_count(struct amdgpu_device *adev, diff --git a/drivers/gpu/drm/amd/amdgpu/nv.c b/drivers/gpu/drm/amd/amdgpu/nv.c index 4d7976b77767..4938e6b340e9 100644 --- a/drivers/gpu/drm/amd/amdgpu/nv.c +++ b/drivers/gpu/drm/amd/amdgpu/nv.c @@ -59,7 +59,6 @@ #include "vcn_v3_0.h" #include "jpeg_v3_0.h" #include "amdgpu_vkms.h" -#include "mes_v10_1.h" #include "mxgpu_nv.h" #include "smuio_v11_0.h" #include "smuio_v11_0_6.h" @@ -110,7 +109,7 @@ static const struct amdgpu_video_codec_info sc_video_codecs_decode_array_vcn0[] {codec_info_build(AMDGPU_INFO_VIDEO_CAPS_CODEC_IDX_MPEG4_AVC, 4096, 4096, 52)}, {codec_info_build(AMDGPU_INFO_VIDEO_CAPS_CODEC_IDX_VC1, 4096, 4096, 4)}, {codec_info_build(AMDGPU_INFO_VIDEO_CAPS_CODEC_IDX_HEVC, 8192, 4352, 186)}, - {codec_info_build(AMDGPU_INFO_VIDEO_CAPS_CODEC_IDX_JPEG, 4096, 4096, 0)}, + {codec_info_build(AMDGPU_INFO_VIDEO_CAPS_CODEC_IDX_JPEG, 16384, 16384, 0)}, {codec_info_build(AMDGPU_INFO_VIDEO_CAPS_CODEC_IDX_VP9, 8192, 4352, 0)}, {codec_info_build(AMDGPU_INFO_VIDEO_CAPS_CODEC_IDX_AV1, 8192, 4352, 0)}, }; @@ -121,7 +120,7 @@ static const struct amdgpu_video_codec_info sc_video_codecs_decode_array_vcn1[] {codec_info_build(AMDGPU_INFO_VIDEO_CAPS_CODEC_IDX_MPEG4_AVC, 4096, 4096, 52)}, {codec_info_build(AMDGPU_INFO_VIDEO_CAPS_CODEC_IDX_VC1, 4096, 4096, 4)}, {codec_info_build(AMDGPU_INFO_VIDEO_CAPS_CODEC_IDX_HEVC, 8192, 4352, 186)}, - {codec_info_build(AMDGPU_INFO_VIDEO_CAPS_CODEC_IDX_JPEG, 4096, 4096, 0)}, + {codec_info_build(AMDGPU_INFO_VIDEO_CAPS_CODEC_IDX_JPEG, 16384, 16384, 0)}, {codec_info_build(AMDGPU_INFO_VIDEO_CAPS_CODEC_IDX_VP9, 8192, 4352, 0)}, }; @@ -199,7 +198,7 @@ static const struct amdgpu_video_codec_info yc_video_codecs_decode_array[] = { {codec_info_build(AMDGPU_INFO_VIDEO_CAPS_CODEC_IDX_MPEG4_AVC, 4096, 4096, 52)}, {codec_info_build(AMDGPU_INFO_VIDEO_CAPS_CODEC_IDX_HEVC, 8192, 4352, 186)}, {codec_info_build(AMDGPU_INFO_VIDEO_CAPS_CODEC_IDX_VP9, 8192, 4352, 0)}, - {codec_info_build(AMDGPU_INFO_VIDEO_CAPS_CODEC_IDX_JPEG, 4096, 4096, 0)}, + {codec_info_build(AMDGPU_INFO_VIDEO_CAPS_CODEC_IDX_JPEG, 16384, 16384, 0)}, {codec_info_build(AMDGPU_INFO_VIDEO_CAPS_CODEC_IDX_AV1, 8192, 4352, 0)}, }; @@ -637,13 +636,9 @@ static const struct amdgpu_asic_funcs nv_asic_funcs = { static int nv_common_early_init(void *handle) { -#define MMIO_REG_HOLE_OFFSET (0x80000 - PAGE_SIZE) struct amdgpu_device *adev = (struct amdgpu_device *)handle; - if (!amdgpu_sriov_vf(adev)) { - adev->rmmio_remap.reg_offset = MMIO_REG_HOLE_OFFSET; - adev->rmmio_remap.bus_addr = adev->rmmio_base + MMIO_REG_HOLE_OFFSET; - } + adev->nbio.funcs->set_reg_remap(adev); adev->smc_rreg = NULL; adev->smc_wreg = NULL; adev->pcie_rreg = &amdgpu_device_indirect_rreg; @@ -1131,4 +1126,6 @@ static const struct amd_ip_funcs nv_common_ip_funcs = { .set_clockgating_state = nv_common_set_clockgating_state, .set_powergating_state = nv_common_set_powergating_state, .get_clockgating_state = nv_common_get_clockgating_state, + .dump_ip_state = NULL, + .print_ip_state = NULL, }; diff --git a/drivers/gpu/drm/amd/amdgpu/psp_gfx_if.h b/drivers/gpu/drm/amd/amdgpu/psp_gfx_if.h index 7566973ed8f5..37b5ddd6f13b 100644 --- a/drivers/gpu/drm/amd/amdgpu/psp_gfx_if.h +++ b/drivers/gpu/drm/amd/amdgpu/psp_gfx_if.h @@ -464,8 +464,9 @@ struct psp_gfx_rb_frame #define PSP_ERR_UNKNOWN_COMMAND 0x00000100 enum tee_error_code { - TEE_SUCCESS = 0x00000000, - TEE_ERROR_NOT_SUPPORTED = 0xFFFF000A, + TEE_SUCCESS = 0x00000000, + TEE_ERROR_CANCEL = 0xFFFF0002, + TEE_ERROR_NOT_SUPPORTED = 0xFFFF000A, }; #endif /* _PSP_TEE_GFX_IF_H_ */ diff --git a/drivers/gpu/drm/amd/amdgpu/psp_v13_0.c b/drivers/gpu/drm/amd/amdgpu/psp_v13_0.c index 0da50ea46eaf..1251ee38a676 100644 --- a/drivers/gpu/drm/amd/amdgpu/psp_v13_0.c +++ b/drivers/gpu/drm/amd/amdgpu/psp_v13_0.c @@ -51,10 +51,14 @@ MODULE_FIRMWARE("amdgpu/psp_13_0_11_toc.bin"); MODULE_FIRMWARE("amdgpu/psp_13_0_11_ta.bin"); MODULE_FIRMWARE("amdgpu/psp_13_0_6_sos.bin"); MODULE_FIRMWARE("amdgpu/psp_13_0_6_ta.bin"); +MODULE_FIRMWARE("amdgpu/psp_13_0_14_sos.bin"); +MODULE_FIRMWARE("amdgpu/psp_13_0_14_ta.bin"); MODULE_FIRMWARE("amdgpu/psp_14_0_0_toc.bin"); MODULE_FIRMWARE("amdgpu/psp_14_0_0_ta.bin"); MODULE_FIRMWARE("amdgpu/psp_14_0_1_toc.bin"); MODULE_FIRMWARE("amdgpu/psp_14_0_1_ta.bin"); +MODULE_FIRMWARE("amdgpu/psp_14_0_4_toc.bin"); +MODULE_FIRMWARE("amdgpu/psp_14_0_4_ta.bin"); /* For large FW files the time to complete can be very long */ #define USBC_PD_POLLING_LIMIT_S 240 @@ -104,6 +108,7 @@ static int psp_v13_0_init_microcode(struct psp_context *psp) case IP_VERSION(13, 0, 11): case IP_VERSION(14, 0, 0): case IP_VERSION(14, 0, 1): + case IP_VERSION(14, 0, 4): err = psp_init_toc_microcode(psp, ucode_prefix); if (err) return err; @@ -115,6 +120,7 @@ static int psp_v13_0_init_microcode(struct psp_context *psp) case IP_VERSION(13, 0, 6): case IP_VERSION(13, 0, 7): case IP_VERSION(13, 0, 10): + case IP_VERSION(13, 0, 14): err = psp_init_sos_microcode(psp, ucode_prefix); if (err) return err; @@ -168,7 +174,8 @@ static int psp_v13_0_wait_for_bootloader(struct psp_context *psp) int retry_loop, retry_cnt, ret; retry_cnt = - (amdgpu_ip_version(adev, MP0_HWIP, 0) == IP_VERSION(13, 0, 6)) ? + ((amdgpu_ip_version(adev, MP0_HWIP, 0) == IP_VERSION(13, 0, 6) || + amdgpu_ip_version(adev, MP0_HWIP, 0) == IP_VERSION(13, 0, 14))) ? PSP_VMBX_POLLING_LIMIT : 10; /* Wait for bootloader to signify that it is ready having bit 31 of @@ -193,7 +200,8 @@ static int psp_v13_0_wait_for_bootloader_steady_state(struct psp_context *psp) struct amdgpu_device *adev = psp->adev; int ret; - if (amdgpu_ip_version(adev, MP0_HWIP, 0) == IP_VERSION(13, 0, 6)) { + if (amdgpu_ip_version(adev, MP0_HWIP, 0) == IP_VERSION(13, 0, 6) || + amdgpu_ip_version(adev, MP0_HWIP, 0) == IP_VERSION(13, 0, 14)) { ret = psp_v13_0_wait_for_vmbx_ready(psp); if (ret) amdgpu_ras_query_boot_status(adev, 4); @@ -787,7 +795,8 @@ static bool psp_v13_0_get_ras_capability(struct psp_context *psp) if (!con) return false; - if ((amdgpu_ip_version(adev, MP0_HWIP, 0) == IP_VERSION(13, 0, 6)) && + if ((amdgpu_ip_version(adev, MP0_HWIP, 0) == IP_VERSION(13, 0, 6) || + amdgpu_ip_version(adev, MP0_HWIP, 0) == IP_VERSION(13, 0, 14)) && (!(adev->flags & AMD_IS_APU))) { reg_data = RREG32_SOC15(MP0, 0, regMP0_SMN_C2PMSG_127); adev->ras_hw_enabled = (reg_data & GENMASK_ULL(23, 0)); diff --git a/drivers/gpu/drm/amd/amdgpu/psp_v14_0.c b/drivers/gpu/drm/amd/amdgpu/psp_v14_0.c index 78a95f8f370b..4d33c95a5116 100644 --- a/drivers/gpu/drm/amd/amdgpu/psp_v14_0.c +++ b/drivers/gpu/drm/amd/amdgpu/psp_v14_0.c @@ -32,7 +32,9 @@ #include "mp/mp_14_0_2_sh_mask.h" MODULE_FIRMWARE("amdgpu/psp_14_0_2_sos.bin"); +MODULE_FIRMWARE("amdgpu/psp_14_0_2_ta.bin"); MODULE_FIRMWARE("amdgpu/psp_14_0_3_sos.bin"); +MODULE_FIRMWARE("amdgpu/psp_14_0_3_ta.bin"); /* For large FW files the time to complete can be very long */ #define USBC_PD_POLLING_LIMIT_S 240 @@ -66,6 +68,9 @@ static int psp_v14_0_init_microcode(struct psp_context *psp) err = psp_init_sos_microcode(psp, ucode_prefix); if (err) return err; + err = psp_init_ta_microcode(psp, ucode_prefix); + if (err) + return err; break; default: BUG(); @@ -149,7 +154,7 @@ static int psp_v14_0_bootloader_load_kdb(struct psp_context *psp) static int psp_v14_0_bootloader_load_spl(struct psp_context *psp) { - return psp_v14_0_bootloader_load_component(psp, &psp->kdb, PSP_BL__LOAD_TOS_SPL_TABLE); + return psp_v14_0_bootloader_load_component(psp, &psp->spl, PSP_BL__LOAD_TOS_SPL_TABLE); } static int psp_v14_0_bootloader_load_sysdrv(struct psp_context *psp) @@ -169,7 +174,8 @@ static int psp_v14_0_bootloader_load_intf_drv(struct psp_context *psp) static int psp_v14_0_bootloader_load_dbg_drv(struct psp_context *psp) { - return psp_v14_0_bootloader_load_component(psp, &psp->dbg_drv, PSP_BL__LOAD_DBGDRV); + /* dbg_drv was renamed to had_drv in psp v14 */ + return psp_v14_0_bootloader_load_component(psp, &psp->dbg_drv, PSP_BL__LOAD_HADDRV); } static int psp_v14_0_bootloader_load_ras_drv(struct psp_context *psp) @@ -177,6 +183,10 @@ static int psp_v14_0_bootloader_load_ras_drv(struct psp_context *psp) return psp_v14_0_bootloader_load_component(psp, &psp->ras_drv, PSP_BL__LOAD_RASDRV); } +static int psp_v14_0_bootloader_load_ipkeymgr_drv(struct psp_context *psp) +{ + return psp_v14_0_bootloader_load_component(psp, &psp->ipkeymgr_drv, PSP_BL__LOAD_IPKEYMGRDRV); +} static int psp_v14_0_bootloader_load_sos(struct psp_context *psp) { @@ -653,6 +663,7 @@ static const struct psp_funcs psp_v14_0_funcs = { .bootloader_load_intf_drv = psp_v14_0_bootloader_load_intf_drv, .bootloader_load_dbg_drv = psp_v14_0_bootloader_load_dbg_drv, .bootloader_load_ras_drv = psp_v14_0_bootloader_load_ras_drv, + .bootloader_load_ipkeymgr_drv = psp_v14_0_bootloader_load_ipkeymgr_drv, .bootloader_load_sos = psp_v14_0_bootloader_load_sos, .ring_create = psp_v14_0_ring_create, .ring_stop = psp_v14_0_ring_stop, diff --git a/drivers/gpu/drm/amd/amdgpu/sdma_v2_4.c b/drivers/gpu/drm/amd/amdgpu/sdma_v2_4.c index 07e19caf2bc1..725392522267 100644 --- a/drivers/gpu/drm/amd/amdgpu/sdma_v2_4.c +++ b/drivers/gpu/drm/amd/amdgpu/sdma_v2_4.c @@ -127,7 +127,6 @@ static void sdma_v2_4_free_microcode(struct amdgpu_device *adev) static int sdma_v2_4_init_microcode(struct amdgpu_device *adev) { const char *chip_name; - char fw_name[30]; int err = 0, i; struct amdgpu_firmware_info *info = NULL; const struct common_firmware_header *header = NULL; @@ -145,10 +144,11 @@ static int sdma_v2_4_init_microcode(struct amdgpu_device *adev) for (i = 0; i < adev->sdma.num_instances; i++) { if (i == 0) - snprintf(fw_name, sizeof(fw_name), "amdgpu/%s_sdma.bin", chip_name); + err = amdgpu_ucode_request(adev, &adev->sdma.instance[i].fw, + "amdgpu/%s_sdma.bin", chip_name); else - snprintf(fw_name, sizeof(fw_name), "amdgpu/%s_sdma1.bin", chip_name); - err = amdgpu_ucode_request(adev, &adev->sdma.instance[i].fw, fw_name); + err = amdgpu_ucode_request(adev, &adev->sdma.instance[i].fw, + "amdgpu/%s_sdma1.bin", chip_name); if (err) goto out; hdr = (const struct sdma_firmware_header_v1_0 *)adev->sdma.instance[i].fw->data; @@ -169,7 +169,8 @@ static int sdma_v2_4_init_microcode(struct amdgpu_device *adev) out: if (err) { - pr_err("sdma_v2_4: Failed to load firmware \"%s\"\n", fw_name); + pr_err("sdma_v2_4: Failed to load firmware \"%s_sdma%s.bin\"\n", + chip_name, i == 0 ? "" : "1"); for (i = 0; i < adev->sdma.num_instances; i++) amdgpu_ucode_release(&adev->sdma.instance[i].fw); } @@ -1113,6 +1114,8 @@ static const struct amd_ip_funcs sdma_v2_4_ip_funcs = { .soft_reset = sdma_v2_4_soft_reset, .set_clockgating_state = sdma_v2_4_set_clockgating_state, .set_powergating_state = sdma_v2_4_set_powergating_state, + .dump_ip_state = NULL, + .print_ip_state = NULL, }; static const struct amdgpu_ring_funcs sdma_v2_4_ring_funcs = { @@ -1176,7 +1179,7 @@ static void sdma_v2_4_set_irq_funcs(struct amdgpu_device *adev) * @src_offset: src GPU address * @dst_offset: dst GPU address * @byte_count: number of bytes to xfer - * @tmz: unused + * @copy_flags: unused * * Copy GPU buffers using the DMA engine (VI). * Used by the amdgpu ttm implementation to move pages if @@ -1186,7 +1189,7 @@ static void sdma_v2_4_emit_copy_buffer(struct amdgpu_ib *ib, uint64_t src_offset, uint64_t dst_offset, uint32_t byte_count, - bool tmz) + uint32_t copy_flags) { ib->ptr[ib->length_dw++] = SDMA_PKT_HEADER_OP(SDMA_OP_COPY) | SDMA_PKT_HEADER_SUB_OP(SDMA_SUBOP_COPY_LINEAR); diff --git a/drivers/gpu/drm/amd/amdgpu/sdma_v3_0.c b/drivers/gpu/drm/amd/amdgpu/sdma_v3_0.c index 2ad615be4bb3..aa637541da58 100644 --- a/drivers/gpu/drm/amd/amdgpu/sdma_v3_0.c +++ b/drivers/gpu/drm/amd/amdgpu/sdma_v3_0.c @@ -267,7 +267,6 @@ static void sdma_v3_0_free_microcode(struct amdgpu_device *adev) static int sdma_v3_0_init_microcode(struct amdgpu_device *adev) { const char *chip_name; - char fw_name[30]; int err = 0, i; struct amdgpu_firmware_info *info = NULL; const struct common_firmware_header *header = NULL; @@ -305,10 +304,11 @@ static int sdma_v3_0_init_microcode(struct amdgpu_device *adev) for (i = 0; i < adev->sdma.num_instances; i++) { if (i == 0) - snprintf(fw_name, sizeof(fw_name), "amdgpu/%s_sdma.bin", chip_name); + err = amdgpu_ucode_request(adev, &adev->sdma.instance[i].fw, + "amdgpu/%s_sdma.bin", chip_name); else - snprintf(fw_name, sizeof(fw_name), "amdgpu/%s_sdma1.bin", chip_name); - err = amdgpu_ucode_request(adev, &adev->sdma.instance[i].fw, fw_name); + err = amdgpu_ucode_request(adev, &adev->sdma.instance[i].fw, + "amdgpu/%s_sdma1.bin", chip_name); if (err) goto out; hdr = (const struct sdma_firmware_header_v1_0 *)adev->sdma.instance[i].fw->data; @@ -327,7 +327,8 @@ static int sdma_v3_0_init_microcode(struct amdgpu_device *adev) } out: if (err) { - pr_err("sdma_v3_0: Failed to load firmware \"%s\"\n", fw_name); + pr_err("sdma_v3_0: Failed to load firmware \"%s_sdma%s.bin\"\n", + chip_name, i == 0 ? "" : "1"); for (i = 0; i < adev->sdma.num_instances; i++) amdgpu_ucode_release(&adev->sdma.instance[i].fw); } @@ -1553,6 +1554,8 @@ static const struct amd_ip_funcs sdma_v3_0_ip_funcs = { .set_clockgating_state = sdma_v3_0_set_clockgating_state, .set_powergating_state = sdma_v3_0_set_powergating_state, .get_clockgating_state = sdma_v3_0_get_clockgating_state, + .dump_ip_state = NULL, + .print_ip_state = NULL, }; static const struct amdgpu_ring_funcs sdma_v3_0_ring_funcs = { @@ -1616,7 +1619,7 @@ static void sdma_v3_0_set_irq_funcs(struct amdgpu_device *adev) * @src_offset: src GPU address * @dst_offset: dst GPU address * @byte_count: number of bytes to xfer - * @tmz: unused + * @copy_flags: unused * * Copy GPU buffers using the DMA engine (VI). * Used by the amdgpu ttm implementation to move pages if @@ -1626,7 +1629,7 @@ static void sdma_v3_0_emit_copy_buffer(struct amdgpu_ib *ib, uint64_t src_offset, uint64_t dst_offset, uint32_t byte_count, - bool tmz) + uint32_t copy_flags) { ib->ptr[ib->length_dw++] = SDMA_PKT_HEADER_OP(SDMA_OP_COPY) | SDMA_PKT_HEADER_SUB_OP(SDMA_SUBOP_COPY_LINEAR); diff --git a/drivers/gpu/drm/amd/amdgpu/sdma_v4_0.c b/drivers/gpu/drm/amd/amdgpu/sdma_v4_0.c index 43775cb67ff5..772604feb6ac 100644 --- a/drivers/gpu/drm/amd/amdgpu/sdma_v4_0.c +++ b/drivers/gpu/drm/amd/amdgpu/sdma_v4_0.c @@ -2017,10 +2017,13 @@ static int sdma_v4_0_process_trap_irq(struct amdgpu_device *adev, struct amdgpu_irq_src *source, struct amdgpu_iv_entry *entry) { - uint32_t instance; + int instance; DRM_DEBUG("IH: SDMA trap\n"); instance = sdma_v4_0_irq_id_to_seq(entry->client_id); + if (instance < 0) + return instance; + switch (entry->ring_id) { case 0: amdgpu_fence_process(&adev->sdma.instance[instance].ring); @@ -2448,7 +2451,7 @@ static void sdma_v4_0_set_irq_funcs(struct amdgpu_device *adev) * @src_offset: src GPU address * @dst_offset: dst GPU address * @byte_count: number of bytes to xfer - * @tmz: if a secure copy should be used + * @copy_flags: copy flags for the buffers * * Copy GPU buffers using the DMA engine (VEGA10/12). * Used by the amdgpu ttm implementation to move pages if @@ -2458,11 +2461,11 @@ static void sdma_v4_0_emit_copy_buffer(struct amdgpu_ib *ib, uint64_t src_offset, uint64_t dst_offset, uint32_t byte_count, - bool tmz) + uint32_t copy_flags) { ib->ptr[ib->length_dw++] = SDMA_PKT_HEADER_OP(SDMA_OP_COPY) | SDMA_PKT_HEADER_SUB_OP(SDMA_SUBOP_COPY_LINEAR) | - SDMA_PKT_COPY_LINEAR_HEADER_TMZ(tmz ? 1 : 0); + SDMA_PKT_COPY_LINEAR_HEADER_TMZ((copy_flags & AMDGPU_COPY_FLAGS_TMZ) ? 1 : 0); ib->ptr[ib->length_dw++] = byte_count - 1; ib->ptr[ib->length_dw++] = 0; /* src/dst endian swap */ ib->ptr[ib->length_dw++] = lower_32_bits(src_offset); diff --git a/drivers/gpu/drm/amd/amdgpu/sdma_v4_4_2.c b/drivers/gpu/drm/amd/amdgpu/sdma_v4_4_2.c index 34237a1b1f2e..2c55bfd935bb 100644 --- a/drivers/gpu/drm/amd/amdgpu/sdma_v4_4_2.c +++ b/drivers/gpu/drm/amd/amdgpu/sdma_v4_4_2.c @@ -44,6 +44,7 @@ #include "amdgpu_ras.h" MODULE_FIRMWARE("amdgpu/sdma_4_4_2.bin"); +MODULE_FIRMWARE("amdgpu/sdma_4_4_5.bin"); #define mmSMNAID_AID0_MCA_SMU 0x03b30400 @@ -82,7 +83,7 @@ static unsigned sdma_v4_4_2_seq_to_irq_id(int seq_num) } } -static int sdma_v4_4_2_irq_id_to_seq(unsigned client_id) +static int sdma_v4_4_2_irq_id_to_seq(struct amdgpu_device *adev, unsigned client_id) { switch (client_id) { case SOC15_IH_CLIENTID_SDMA0: @@ -90,9 +91,15 @@ static int sdma_v4_4_2_irq_id_to_seq(unsigned client_id) case SOC15_IH_CLIENTID_SDMA1: return 1; case SOC15_IH_CLIENTID_SDMA2: - return 2; + if (amdgpu_sriov_vf(adev) && (adev->gfx.xcc_mask == 0x1)) + return 0; + else + return 2; case SOC15_IH_CLIENTID_SDMA3: - return 3; + if (amdgpu_sriov_vf(adev) && (adev->gfx.xcc_mask == 0x1)) + return 1; + else + return 3; default: return -EINVAL; } @@ -134,8 +141,8 @@ static int sdma_v4_4_2_init_microcode(struct amdgpu_device *adev) int ret, i; for (i = 0; i < adev->sdma.num_instances; i++) { - if (amdgpu_ip_version(adev, SDMA0_HWIP, 0) == - IP_VERSION(4, 4, 2)) { + if (amdgpu_ip_version(adev, SDMA0_HWIP, 0) == IP_VERSION(4, 4, 2) || + amdgpu_ip_version(adev, SDMA0_HWIP, 0) == IP_VERSION(4, 4, 5)) { ret = amdgpu_sdma_init_microcode(adev, 0, true); break; } else { @@ -368,7 +375,8 @@ static void sdma_v4_4_2_ring_emit_hdp_flush(struct amdgpu_ring *ring) u32 ref_and_mask = 0; const struct nbio_hdp_flush_reg *nbio_hf_reg = adev->nbio.hdp_flush_reg; - ref_and_mask = nbio_hf_reg->ref_and_mask_sdma0 << ring->me; + ref_and_mask = nbio_hf_reg->ref_and_mask_sdma0 + << (ring->me % adev->sdma.num_inst_per_aid); sdma_v4_4_2_wait_reg_mem(ring, 0, 1, adev->nbio.funcs->get_hdp_flush_done_offset(adev), @@ -1228,6 +1236,7 @@ static bool sdma_v4_4_2_fw_support_paging_queue(struct amdgpu_device *adev) { switch (amdgpu_ip_version(adev, SDMA0_HWIP, 0)) { case IP_VERSION(4, 4, 2): + case IP_VERSION(4, 4, 5): return false; default: return false; @@ -1391,7 +1400,8 @@ static int sdma_v4_4_2_sw_fini(void *handle) amdgpu_ring_fini(&adev->sdma.instance[i].page); } - if (amdgpu_ip_version(adev, SDMA0_HWIP, 0) == IP_VERSION(4, 4, 2)) + if (amdgpu_ip_version(adev, SDMA0_HWIP, 0) == IP_VERSION(4, 4, 2) || + amdgpu_ip_version(adev, SDMA0_HWIP, 0) == IP_VERSION(4, 4, 5)) amdgpu_sdma_destroy_inst_ctx(adev, true); else amdgpu_sdma_destroy_inst_ctx(adev, false); @@ -1520,7 +1530,7 @@ static int sdma_v4_4_2_process_trap_irq(struct amdgpu_device *adev, uint32_t instance, i; DRM_DEBUG("IH: SDMA trap\n"); - instance = sdma_v4_4_2_irq_id_to_seq(entry->client_id); + instance = sdma_v4_4_2_irq_id_to_seq(adev, entry->client_id); /* Client id gives the SDMA instance in AID. To know the exact SDMA * instance, interrupt entry gives the node id which corresponds to the AID instance. @@ -1563,7 +1573,7 @@ static int sdma_v4_4_2_process_ras_data_cb(struct amdgpu_device *adev, if (amdgpu_ras_is_supported(adev, AMDGPU_RAS_BLOCK__SDMA)) goto out; - instance = sdma_v4_4_2_irq_id_to_seq(entry->client_id); + instance = sdma_v4_4_2_irq_id_to_seq(adev, entry->client_id); if (instance < 0) goto out; @@ -1582,7 +1592,7 @@ static int sdma_v4_4_2_process_illegal_inst_irq(struct amdgpu_device *adev, DRM_ERROR("Illegal instruction in SDMA command stream\n"); - instance = sdma_v4_4_2_irq_id_to_seq(entry->client_id); + instance = sdma_v4_4_2_irq_id_to_seq(adev, entry->client_id); if (instance < 0) return 0; @@ -1602,19 +1612,9 @@ static int sdma_v4_4_2_set_ecc_irq_state(struct amdgpu_device *adev, u32 sdma_cntl; sdma_cntl = RREG32_SDMA(type, regSDMA_CNTL); - switch (state) { - case AMDGPU_IRQ_STATE_DISABLE: - sdma_cntl = REG_SET_FIELD(sdma_cntl, SDMA_CNTL, - DRAM_ECC_INT_ENABLE, 0); - WREG32_SDMA(type, regSDMA_CNTL, sdma_cntl); - break; - /* sdma ecc interrupt is enabled by default - * driver doesn't need to do anything to - * enable the interrupt */ - case AMDGPU_IRQ_STATE_ENABLE: - default: - break; - } + sdma_cntl = REG_SET_FIELD(sdma_cntl, SDMA_CNTL, DRAM_ECC_INT_ENABLE, + state == AMDGPU_IRQ_STATE_ENABLE ? 1 : 0); + WREG32_SDMA(type, regSDMA_CNTL, sdma_cntl); return 0; } @@ -1626,7 +1626,7 @@ static int sdma_v4_4_2_print_iv_entry(struct amdgpu_device *adev, struct amdgpu_task_info *task_info; u64 addr; - instance = sdma_v4_4_2_irq_id_to_seq(entry->client_id); + instance = sdma_v4_4_2_irq_id_to_seq(adev, entry->client_id); if (instance < 0 || instance >= adev->sdma.num_instances) { dev_err(adev->dev, "sdma instance invalid %d\n", instance); return -EINVAL; @@ -1954,7 +1954,7 @@ static void sdma_v4_4_2_set_irq_funcs(struct amdgpu_device *adev) * @src_offset: src GPU address * @dst_offset: dst GPU address * @byte_count: number of bytes to xfer - * @tmz: if a secure copy should be used + * @copy_flags: copy flags for the buffers * * Copy GPU buffers using the DMA engine. * Used by the amdgpu ttm implementation to move pages if @@ -1964,11 +1964,11 @@ static void sdma_v4_4_2_emit_copy_buffer(struct amdgpu_ib *ib, uint64_t src_offset, uint64_t dst_offset, uint32_t byte_count, - bool tmz) + uint32_t copy_flags) { ib->ptr[ib->length_dw++] = SDMA_PKT_HEADER_OP(SDMA_OP_COPY) | SDMA_PKT_HEADER_SUB_OP(SDMA_SUBOP_COPY_LINEAR) | - SDMA_PKT_COPY_LINEAR_HEADER_TMZ(tmz ? 1 : 0); + SDMA_PKT_COPY_LINEAR_HEADER_TMZ((copy_flags & AMDGPU_COPY_FLAGS_TMZ) ? 1 : 0); ib->ptr[ib->length_dw++] = byte_count - 1; ib->ptr[ib->length_dw++] = 0; /* src/dst endian swap */ ib->ptr[ib->length_dw++] = lower_32_bits(src_offset); @@ -2189,35 +2189,39 @@ static const struct amdgpu_ras_block_hw_ops sdma_v4_4_2_ras_hw_ops = { .reset_ras_error_count = sdma_v4_4_2_reset_ras_error_count, }; -static int sdma_v4_4_2_aca_bank_generate_report(struct aca_handle *handle, - struct aca_bank *bank, enum aca_error_type type, - struct aca_bank_report *report, void *data) +static int sdma_v4_4_2_aca_bank_parser(struct aca_handle *handle, struct aca_bank *bank, + enum aca_smu_type type, void *data) { - u64 status, misc0; + struct aca_bank_info info; + u64 misc0; int ret; - status = bank->regs[ACA_REG_IDX_STATUS]; - if ((type == ACA_ERROR_TYPE_UE && - ACA_REG__STATUS__ERRORCODEEXT(status) == ACA_EXTERROR_CODE_FAULT) || - (type == ACA_ERROR_TYPE_CE && - ACA_REG__STATUS__ERRORCODEEXT(status) == ACA_EXTERROR_CODE_CE)) { + ret = aca_bank_info_decode(bank, &info); + if (ret) + return ret; - ret = aca_bank_info_decode(bank, &report->info); - if (ret) - return ret; - - misc0 = bank->regs[ACA_REG_IDX_MISC0]; - report->count[type] = ACA_REG__MISC0__ERRCNT(misc0); + misc0 = bank->regs[ACA_REG_IDX_MISC0]; + switch (type) { + case ACA_SMU_TYPE_UE: + ret = aca_error_cache_log_bank_error(handle, &info, ACA_ERROR_TYPE_UE, + 1ULL); + break; + case ACA_SMU_TYPE_CE: + ret = aca_error_cache_log_bank_error(handle, &info, ACA_ERROR_TYPE_CE, + ACA_REG__MISC0__ERRCNT(misc0)); + break; + default: + return -EINVAL; } - return 0; + return ret; } /* CODE_SDMA0 - CODE_SDMA4, reference to smu driver if header file */ static int sdma_v4_4_2_err_codes[] = { 33, 34, 35, 36 }; static bool sdma_v4_4_2_aca_bank_is_valid(struct aca_handle *handle, struct aca_bank *bank, - enum aca_error_type type, void *data) + enum aca_smu_type type, void *data) { u32 instlo; @@ -2236,7 +2240,7 @@ static bool sdma_v4_4_2_aca_bank_is_valid(struct aca_handle *handle, struct aca_ } static const struct aca_bank_ops sdma_v4_4_2_aca_bank_ops = { - .aca_bank_generate_report = sdma_v4_4_2_aca_bank_generate_report, + .aca_bank_parser = sdma_v4_4_2_aca_bank_parser, .aca_bank_is_valid = sdma_v4_4_2_aca_bank_is_valid, }; diff --git a/drivers/gpu/drm/amd/amdgpu/sdma_v5_0.c b/drivers/gpu/drm/amd/amdgpu/sdma_v5_0.c index 883e8a1b8a40..b7d33d78bce0 100644 --- a/drivers/gpu/drm/amd/amdgpu/sdma_v5_0.c +++ b/drivers/gpu/drm/amd/amdgpu/sdma_v5_0.c @@ -999,7 +999,8 @@ static int sdma_v5_0_ring_test_ring(struct amdgpu_ring *ring) r = amdgpu_ring_alloc(ring, 20); if (r) { DRM_ERROR("amdgpu: dma failed to lock ring %d (%d).\n", ring->idx, r); - amdgpu_device_wb_free(adev, index); + if (!ring->is_mes_queue) + amdgpu_device_wb_free(adev, index); return r; } @@ -1805,7 +1806,7 @@ static void sdma_v5_0_set_irq_funcs(struct amdgpu_device *adev) * @src_offset: src GPU address * @dst_offset: dst GPU address * @byte_count: number of bytes to xfer - * @tmz: if a secure copy should be used + * @copy_flags: copy flags for the buffers * * Copy GPU buffers using the DMA engine (NAVI10). * Used by the amdgpu ttm implementation to move pages if @@ -1815,11 +1816,11 @@ static void sdma_v5_0_emit_copy_buffer(struct amdgpu_ib *ib, uint64_t src_offset, uint64_t dst_offset, uint32_t byte_count, - bool tmz) + uint32_t copy_flags) { ib->ptr[ib->length_dw++] = SDMA_PKT_HEADER_OP(SDMA_OP_COPY) | SDMA_PKT_HEADER_SUB_OP(SDMA_SUBOP_COPY_LINEAR) | - SDMA_PKT_COPY_LINEAR_HEADER_TMZ(tmz ? 1 : 0); + SDMA_PKT_COPY_LINEAR_HEADER_TMZ((copy_flags & AMDGPU_COPY_FLAGS_TMZ) ? 1 : 0); ib->ptr[ib->length_dw++] = byte_count - 1; ib->ptr[ib->length_dw++] = 0; /* src/dst endian swap */ ib->ptr[ib->length_dw++] = lower_32_bits(src_offset); diff --git a/drivers/gpu/drm/amd/amdgpu/sdma_v5_2.c b/drivers/gpu/drm/amd/amdgpu/sdma_v5_2.c index 42f4bd250def..af1e90159ce3 100644 --- a/drivers/gpu/drm/amd/amdgpu/sdma_v5_2.c +++ b/drivers/gpu/drm/amd/amdgpu/sdma_v5_2.c @@ -176,6 +176,14 @@ static void sdma_v5_2_ring_set_wptr(struct amdgpu_ring *ring) DRM_DEBUG("calling WDOORBELL64(0x%08x, 0x%016llx)\n", ring->doorbell_index, ring->wptr << 2); WDOORBELL64(ring->doorbell_index, ring->wptr << 2); + /* SDMA seems to miss doorbells sometimes when powergating kicks in. + * Updating the wptr directly will wake it. This is only safe because + * we disallow gfxoff in begin_use() and then allow it again in end_use(). + */ + WREG32(sdma_v5_2_get_reg_offset(adev, ring->me, mmSDMA0_GFX_RB_WPTR), + lower_32_bits(ring->wptr << 2)); + WREG32(sdma_v5_2_get_reg_offset(adev, ring->me, mmSDMA0_GFX_RB_WPTR_HI), + upper_32_bits(ring->wptr << 2)); } else { DRM_DEBUG("Not using doorbell -- " "mmSDMA%i_GFX_RB_WPTR == 0x%08x " @@ -280,17 +288,21 @@ static void sdma_v5_2_ring_emit_hdp_flush(struct amdgpu_ring *ring) u32 ref_and_mask = 0; const struct nbio_hdp_flush_reg *nbio_hf_reg = adev->nbio.hdp_flush_reg; - ref_and_mask = nbio_hf_reg->ref_and_mask_sdma0 << ring->me; - - amdgpu_ring_write(ring, SDMA_PKT_HEADER_OP(SDMA_OP_POLL_REGMEM) | - SDMA_PKT_POLL_REGMEM_HEADER_HDP_FLUSH(1) | - SDMA_PKT_POLL_REGMEM_HEADER_FUNC(3)); /* == */ - amdgpu_ring_write(ring, (adev->nbio.funcs->get_hdp_flush_done_offset(adev)) << 2); - amdgpu_ring_write(ring, (adev->nbio.funcs->get_hdp_flush_req_offset(adev)) << 2); - amdgpu_ring_write(ring, ref_and_mask); /* reference */ - amdgpu_ring_write(ring, ref_and_mask); /* mask */ - amdgpu_ring_write(ring, SDMA_PKT_POLL_REGMEM_DW5_RETRY_COUNT(0xfff) | - SDMA_PKT_POLL_REGMEM_DW5_INTERVAL(10)); /* retry count, poll interval */ + if (ring->me > 1) { + amdgpu_asic_flush_hdp(adev, ring); + } else { + ref_and_mask = nbio_hf_reg->ref_and_mask_sdma0 << ring->me; + + amdgpu_ring_write(ring, SDMA_PKT_HEADER_OP(SDMA_OP_POLL_REGMEM) | + SDMA_PKT_POLL_REGMEM_HEADER_HDP_FLUSH(1) | + SDMA_PKT_POLL_REGMEM_HEADER_FUNC(3)); /* == */ + amdgpu_ring_write(ring, (adev->nbio.funcs->get_hdp_flush_done_offset(adev)) << 2); + amdgpu_ring_write(ring, (adev->nbio.funcs->get_hdp_flush_req_offset(adev)) << 2); + amdgpu_ring_write(ring, ref_and_mask); /* reference */ + amdgpu_ring_write(ring, ref_and_mask); /* mask */ + amdgpu_ring_write(ring, SDMA_PKT_POLL_REGMEM_DW5_RETRY_COUNT(0xfff) | + SDMA_PKT_POLL_REGMEM_DW5_INTERVAL(10)); /* retry count, poll interval */ + } } /** @@ -835,7 +847,8 @@ static int sdma_v5_2_ring_test_ring(struct amdgpu_ring *ring) r = amdgpu_ring_alloc(ring, 20); if (r) { DRM_ERROR("amdgpu: dma failed to lock ring %d (%d).\n", ring->idx, r); - amdgpu_device_wb_free(adev, index); + if (!ring->is_mes_queue) + amdgpu_device_wb_free(adev, index); return r; } @@ -1642,6 +1655,10 @@ static void sdma_v5_2_ring_begin_use(struct amdgpu_ring *ring) * but it shouldn't hurt for other parts since * this GFXOFF will be disallowed anyway when SDMA is * active, this just makes it explicit. + * sdma_v5_2_ring_set_wptr() takes advantage of this + * to update the wptr because sometimes SDMA seems to miss + * doorbells when entering PG. If you remove this, update + * sdma_v5_2_ring_set_wptr() as well! */ amdgpu_gfx_off_ctrl(adev, false); } @@ -1747,7 +1764,7 @@ static void sdma_v5_2_set_irq_funcs(struct amdgpu_device *adev) * @src_offset: src GPU address * @dst_offset: dst GPU address * @byte_count: number of bytes to xfer - * @tmz: if a secure copy should be used + * @copy_flags: copy flags for the buffers * * Copy GPU buffers using the DMA engine. * Used by the amdgpu ttm implementation to move pages if @@ -1757,11 +1774,11 @@ static void sdma_v5_2_emit_copy_buffer(struct amdgpu_ib *ib, uint64_t src_offset, uint64_t dst_offset, uint32_t byte_count, - bool tmz) + uint32_t copy_flags) { ib->ptr[ib->length_dw++] = SDMA_PKT_HEADER_OP(SDMA_OP_COPY) | SDMA_PKT_HEADER_SUB_OP(SDMA_SUBOP_COPY_LINEAR) | - SDMA_PKT_COPY_LINEAR_HEADER_TMZ(tmz ? 1 : 0); + SDMA_PKT_COPY_LINEAR_HEADER_TMZ((copy_flags & AMDGPU_COPY_FLAGS_TMZ) ? 1 : 0); ib->ptr[ib->length_dw++] = byte_count - 1; ib->ptr[ib->length_dw++] = 0; /* src/dst endian swap */ ib->ptr[ib->length_dw++] = lower_32_bits(src_offset); diff --git a/drivers/gpu/drm/amd/amdgpu/sdma_v6_0.c b/drivers/gpu/drm/amd/amdgpu/sdma_v6_0.c index 361835a61f2e..dab4c2db8c9d 100644 --- a/drivers/gpu/drm/amd/amdgpu/sdma_v6_0.c +++ b/drivers/gpu/drm/amd/amdgpu/sdma_v6_0.c @@ -50,6 +50,7 @@ MODULE_FIRMWARE("amdgpu/sdma_6_0_2.bin"); MODULE_FIRMWARE("amdgpu/sdma_6_0_3.bin"); MODULE_FIRMWARE("amdgpu/sdma_6_1_0.bin"); MODULE_FIRMWARE("amdgpu/sdma_6_1_1.bin"); +MODULE_FIRMWARE("amdgpu/sdma_6_1_2.bin"); #define SDMA1_REG_OFFSET 0x600 #define SDMA0_HYP_DEC_REG_START 0x5880 @@ -507,6 +508,13 @@ static int sdma_v6_0_gfx_resume(struct amdgpu_device *adev) /* set minor_ptr_update to 0 after wptr programed */ WREG32_SOC15_IP(GC, sdma_v6_0_get_reg_offset(adev, i, regSDMA0_QUEUE0_MINOR_PTR_UPDATE), 0); + /* Set up sdma hang watchdog */ + temp = RREG32_SOC15_IP(GC, sdma_v6_0_get_reg_offset(adev, i, regSDMA0_WATCHDOG_CNTL)); + /* 100ms per unit */ + temp = REG_SET_FIELD(temp, SDMA0_WATCHDOG_CNTL, QUEUE_HANG_COUNT, + max(adev->usec_timeout/100000, 1)); + WREG32_SOC15_IP(GC, sdma_v6_0_get_reg_offset(adev, i, regSDMA0_WATCHDOG_CNTL), temp); + /* Set up RESP_MODE to non-copy addresses */ temp = RREG32_SOC15_IP(GC, sdma_v6_0_get_reg_offset(adev, i, regSDMA0_UTCL1_CNTL)); temp = REG_SET_FIELD(temp, SDMA0_UTCL1_CNTL, RESP_MODE, 3); @@ -854,7 +862,8 @@ static int sdma_v6_0_ring_test_ring(struct amdgpu_ring *ring) r = amdgpu_ring_alloc(ring, 5); if (r) { DRM_ERROR("amdgpu: dma failed to lock ring %d (%d).\n", ring->idx, r); - amdgpu_device_wb_free(adev, index); + if (!ring->is_mes_queue) + amdgpu_device_wb_free(adev, index); return r; } @@ -1567,7 +1576,7 @@ static void sdma_v6_0_set_irq_funcs(struct amdgpu_device *adev) * @src_offset: src GPU address * @dst_offset: dst GPU address * @byte_count: number of bytes to xfer - * @tmz: if a secure copy should be used + * @copy_flags: copy flags for the buffers * * Copy GPU buffers using the DMA engine. * Used by the amdgpu ttm implementation to move pages if @@ -1577,11 +1586,11 @@ static void sdma_v6_0_emit_copy_buffer(struct amdgpu_ib *ib, uint64_t src_offset, uint64_t dst_offset, uint32_t byte_count, - bool tmz) + uint32_t copy_flags) { ib->ptr[ib->length_dw++] = SDMA_PKT_COPY_LINEAR_HEADER_OP(SDMA_OP_COPY) | SDMA_PKT_COPY_LINEAR_HEADER_SUB_OP(SDMA_SUBOP_COPY_LINEAR) | - SDMA_PKT_COPY_LINEAR_HEADER_TMZ(tmz ? 1 : 0); + SDMA_PKT_COPY_LINEAR_HEADER_TMZ((copy_flags & AMDGPU_COPY_FLAGS_TMZ) ? 1 : 0); ib->ptr[ib->length_dw++] = byte_count - 1; ib->ptr[ib->length_dw++] = 0; /* src/dst endian swap */ ib->ptr[ib->length_dw++] = lower_32_bits(src_offset); diff --git a/drivers/gpu/drm/amd/amdgpu/sdma_v6_0_0_pkt_open.h b/drivers/gpu/drm/amd/amdgpu/sdma_v6_0_0_pkt_open.h index 6af23e7888ca..d8cf830916b9 100644 --- a/drivers/gpu/drm/amd/amdgpu/sdma_v6_0_0_pkt_open.h +++ b/drivers/gpu/drm/amd/amdgpu/sdma_v6_0_0_pkt_open.h @@ -91,6 +91,14 @@ #define SDMA_GCR_GLM_WB (1 << 4) #define SDMA_GCR_GL1_RANGE(x) (((x) & 0x3) << 2) #define SDMA_GCR_GLI_INV(x) (((x) & 0x3) << 0) + +#define SDMA_DCC_DATA_FORMAT(x) ((x) & 0x3f) +#define SDMA_DCC_NUM_TYPE(x) (((x) & 0x7) << 9) +#define SDMA_DCC_READ_CM(x) (((x) & 0x3) << 16) +#define SDMA_DCC_WRITE_CM(x) (((x) & 0x3) << 18) +#define SDMA_DCC_MAX_COM(x) (((x) & 0x3) << 24) +#define SDMA_DCC_MAX_UCOM(x) (((x) & 0x1) << 26) + /* ** Definitions for SDMA_PKT_COPY_LINEAR packet */ diff --git a/drivers/gpu/drm/amd/amdgpu/sdma_v7_0.c b/drivers/gpu/drm/amd/amdgpu/sdma_v7_0.c new file mode 100644 index 000000000000..41b5e45697dc --- /dev/null +++ b/drivers/gpu/drm/amd/amdgpu/sdma_v7_0.c @@ -0,0 +1,1657 @@ +/* + * Copyright 2023 Advanced Micro Devices, Inc. + * + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and associated documentation files (the "Software"), + * to deal in the Software without restriction, including without limitation + * the rights to use, copy, modify, merge, publish, distribute, sublicense, + * and/or sell copies of the Software, and to permit persons to whom the + * Software is furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included in + * all copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL + * THE COPYRIGHT HOLDER(S) OR AUTHOR(S) BE LIABLE FOR ANY CLAIM, DAMAGES OR + * OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, + * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR + * OTHER DEALINGS IN THE SOFTWARE. + * + */ + +#include <linux/delay.h> +#include <linux/firmware.h> +#include <linux/module.h> +#include <linux/pci.h> + +#include "amdgpu.h" +#include "amdgpu_ucode.h" +#include "amdgpu_trace.h" + +#include "gc/gc_12_0_0_offset.h" +#include "gc/gc_12_0_0_sh_mask.h" +#include "hdp/hdp_6_0_0_offset.h" +#include "ivsrcid/gfx/irqsrcs_gfx_11_0_0.h" + +#include "soc15_common.h" +#include "soc15.h" +#include "sdma_v6_0_0_pkt_open.h" +#include "nbio_v4_3.h" +#include "sdma_common.h" +#include "sdma_v7_0.h" +#include "v12_structs.h" + +MODULE_FIRMWARE("amdgpu/sdma_7_0_0.bin"); +MODULE_FIRMWARE("amdgpu/sdma_7_0_1.bin"); + +#define SDMA1_REG_OFFSET 0x600 +#define SDMA0_HYP_DEC_REG_START 0x5880 +#define SDMA0_HYP_DEC_REG_END 0x589a +#define SDMA1_HYP_DEC_REG_OFFSET 0x20 + +static void sdma_v7_0_set_ring_funcs(struct amdgpu_device *adev); +static void sdma_v7_0_set_buffer_funcs(struct amdgpu_device *adev); +static void sdma_v7_0_set_vm_pte_funcs(struct amdgpu_device *adev); +static void sdma_v7_0_set_irq_funcs(struct amdgpu_device *adev); +static int sdma_v7_0_start(struct amdgpu_device *adev); + +static u32 sdma_v7_0_get_reg_offset(struct amdgpu_device *adev, u32 instance, u32 internal_offset) +{ + u32 base; + + if (internal_offset >= SDMA0_HYP_DEC_REG_START && + internal_offset <= SDMA0_HYP_DEC_REG_END) { + base = adev->reg_offset[GC_HWIP][0][1]; + if (instance != 0) + internal_offset += SDMA1_HYP_DEC_REG_OFFSET * instance; + } else { + base = adev->reg_offset[GC_HWIP][0][0]; + if (instance == 1) + internal_offset += SDMA1_REG_OFFSET; + } + + return base + internal_offset; +} + +static unsigned sdma_v7_0_ring_init_cond_exec(struct amdgpu_ring *ring, + uint64_t addr) +{ + unsigned ret; + + amdgpu_ring_write(ring, SDMA_PKT_COPY_LINEAR_HEADER_OP(SDMA_OP_COND_EXE)); + amdgpu_ring_write(ring, lower_32_bits(addr)); + amdgpu_ring_write(ring, upper_32_bits(addr)); + amdgpu_ring_write(ring, 1); + /* this is the offset we need patch later */ + ret = ring->wptr & ring->buf_mask; + /* insert dummy here and patch it later */ + amdgpu_ring_write(ring, 0); + + return ret; +} + +/** + * sdma_v7_0_ring_get_rptr - get the current read pointer + * + * @ring: amdgpu ring pointer + * + * Get the current rptr from the hardware. + */ +static uint64_t sdma_v7_0_ring_get_rptr(struct amdgpu_ring *ring) +{ + u64 *rptr; + + /* XXX check if swapping is necessary on BE */ + rptr = (u64 *)ring->rptr_cpu_addr; + + DRM_DEBUG("rptr before shift == 0x%016llx\n", *rptr); + return ((*rptr) >> 2); +} + +/** + * sdma_v7_0_ring_get_wptr - get the current write pointer + * + * @ring: amdgpu ring pointer + * + * Get the current wptr from the hardware. + */ +static uint64_t sdma_v7_0_ring_get_wptr(struct amdgpu_ring *ring) +{ + u64 wptr = 0; + + if (ring->use_doorbell) { + /* XXX check if swapping is necessary on BE */ + wptr = READ_ONCE(*((u64 *)ring->wptr_cpu_addr)); + DRM_DEBUG("wptr/doorbell before shift == 0x%016llx\n", wptr); + } + + return wptr >> 2; +} + +/** + * sdma_v7_0_ring_set_wptr - commit the write pointer + * + * @ring: amdgpu ring pointer + * + * Write the wptr back to the hardware. + */ +static void sdma_v7_0_ring_set_wptr(struct amdgpu_ring *ring) +{ + struct amdgpu_device *adev = ring->adev; + uint32_t *wptr_saved; + uint32_t *is_queue_unmap; + uint64_t aggregated_db_index; + uint32_t mqd_size = adev->mqds[AMDGPU_HW_IP_DMA].mqd_size; + + DRM_DEBUG("Setting write pointer\n"); + + if (ring->is_mes_queue) { + wptr_saved = (uint32_t *)(ring->mqd_ptr + mqd_size); + is_queue_unmap = (uint32_t *)(ring->mqd_ptr + mqd_size + + sizeof(uint32_t)); + aggregated_db_index = + amdgpu_mes_get_aggregated_doorbell_index(adev, + ring->hw_prio); + + atomic64_set((atomic64_t *)ring->wptr_cpu_addr, + ring->wptr << 2); + *wptr_saved = ring->wptr << 2; + if (*is_queue_unmap) { + WDOORBELL64(aggregated_db_index, ring->wptr << 2); + DRM_DEBUG("calling WDOORBELL64(0x%08x, 0x%016llx)\n", + ring->doorbell_index, ring->wptr << 2); + WDOORBELL64(ring->doorbell_index, ring->wptr << 2); + } else { + DRM_DEBUG("calling WDOORBELL64(0x%08x, 0x%016llx)\n", + ring->doorbell_index, ring->wptr << 2); + WDOORBELL64(ring->doorbell_index, ring->wptr << 2); + } + } else { + if (ring->use_doorbell) { + DRM_DEBUG("Using doorbell -- " + "wptr_offs == 0x%08x " + "lower_32_bits(ring->wptr) << 2 == 0x%08x " + "upper_32_bits(ring->wptr) << 2 == 0x%08x\n", + ring->wptr_offs, + lower_32_bits(ring->wptr << 2), + upper_32_bits(ring->wptr << 2)); + /* XXX check if swapping is necessary on BE */ + atomic64_set((atomic64_t *)ring->wptr_cpu_addr, + ring->wptr << 2); + DRM_DEBUG("calling WDOORBELL64(0x%08x, 0x%016llx)\n", + ring->doorbell_index, ring->wptr << 2); + WDOORBELL64(ring->doorbell_index, ring->wptr << 2); + } else { + DRM_DEBUG("Not using doorbell -- " + "regSDMA%i_GFX_RB_WPTR == 0x%08x " + "regSDMA%i_GFX_RB_WPTR_HI == 0x%08x\n", + ring->me, + lower_32_bits(ring->wptr << 2), + ring->me, + upper_32_bits(ring->wptr << 2)); + WREG32_SOC15_IP(GC, sdma_v7_0_get_reg_offset(adev, + ring->me, + regSDMA0_QUEUE0_RB_WPTR), + lower_32_bits(ring->wptr << 2)); + WREG32_SOC15_IP(GC, sdma_v7_0_get_reg_offset(adev, + ring->me, + regSDMA0_QUEUE0_RB_WPTR_HI), + upper_32_bits(ring->wptr << 2)); + } + } +} + +static void sdma_v7_0_ring_insert_nop(struct amdgpu_ring *ring, uint32_t count) +{ + struct amdgpu_sdma_instance *sdma = amdgpu_sdma_get_instance_from_ring(ring); + int i; + + for (i = 0; i < count; i++) + if (sdma && sdma->burst_nop && (i == 0)) + amdgpu_ring_write(ring, ring->funcs->nop | + SDMA_PKT_NOP_HEADER_COUNT(count - 1)); + else + amdgpu_ring_write(ring, ring->funcs->nop); +} + +/** + * sdma_v7_0_ring_emit_ib - Schedule an IB on the DMA engine + * + * @ring: amdgpu ring pointer + * @job: job to retrieve vmid from + * @ib: IB object to schedule + * @flags: unused + * + * Schedule an IB in the DMA ring. + */ +static void sdma_v7_0_ring_emit_ib(struct amdgpu_ring *ring, + struct amdgpu_job *job, + struct amdgpu_ib *ib, + uint32_t flags) +{ + unsigned vmid = AMDGPU_JOB_GET_VMID(job); + uint64_t csa_mc_addr = amdgpu_sdma_get_csa_mc_addr(ring, vmid); + + /* An IB packet must end on a 8 DW boundary--the next dword + * must be on a 8-dword boundary. Our IB packet below is 6 + * dwords long, thus add x number of NOPs, such that, in + * modular arithmetic, + * wptr + 6 + x = 8k, k >= 0, which in C is, + * (wptr + 6 + x) % 8 = 0. + * The expression below, is a solution of x. + */ + sdma_v7_0_ring_insert_nop(ring, (2 - lower_32_bits(ring->wptr)) & 7); + + amdgpu_ring_write(ring, SDMA_PKT_COPY_LINEAR_HEADER_OP(SDMA_OP_INDIRECT) | + SDMA_PKT_INDIRECT_HEADER_VMID(vmid & 0xf)); + /* base must be 32 byte aligned */ + amdgpu_ring_write(ring, lower_32_bits(ib->gpu_addr) & 0xffffffe0); + amdgpu_ring_write(ring, upper_32_bits(ib->gpu_addr)); + amdgpu_ring_write(ring, ib->length_dw); + amdgpu_ring_write(ring, lower_32_bits(csa_mc_addr)); + amdgpu_ring_write(ring, upper_32_bits(csa_mc_addr)); +} + +/** + * sdma_v7_0_ring_emit_mem_sync - flush the IB by graphics cache rinse + * + * @ring: amdgpu ring pointer + * + * flush the IB by graphics cache rinse. + */ +static void sdma_v7_0_ring_emit_mem_sync(struct amdgpu_ring *ring) +{ + uint32_t gcr_cntl = SDMA_GCR_GL2_INV | SDMA_GCR_GL2_WB | SDMA_GCR_GLM_INV | + SDMA_GCR_GL1_INV | SDMA_GCR_GLV_INV | SDMA_GCR_GLK_INV | + SDMA_GCR_GLI_INV(1); + + /* flush entire cache L0/L1/L2, this can be optimized by performance requirement */ + amdgpu_ring_write(ring, SDMA_PKT_COPY_LINEAR_HEADER_OP(SDMA_OP_GCR_REQ)); + amdgpu_ring_write(ring, SDMA_PKT_GCR_REQ_PAYLOAD1_BASE_VA_31_7(0)); + amdgpu_ring_write(ring, SDMA_PKT_GCR_REQ_PAYLOAD2_GCR_CONTROL_15_0(gcr_cntl) | + SDMA_PKT_GCR_REQ_PAYLOAD2_BASE_VA_47_32(0)); + amdgpu_ring_write(ring, SDMA_PKT_GCR_REQ_PAYLOAD3_LIMIT_VA_31_7(0) | + SDMA_PKT_GCR_REQ_PAYLOAD3_GCR_CONTROL_18_16(gcr_cntl >> 16)); + amdgpu_ring_write(ring, SDMA_PKT_GCR_REQ_PAYLOAD4_LIMIT_VA_47_32(0) | + SDMA_PKT_GCR_REQ_PAYLOAD4_VMID(0)); +} + + +/** + * sdma_v7_0_ring_emit_hdp_flush - emit an hdp flush on the DMA ring + * + * @ring: amdgpu ring pointer + * + * Emit an hdp flush packet on the requested DMA ring. + */ +static void sdma_v7_0_ring_emit_hdp_flush(struct amdgpu_ring *ring) +{ + struct amdgpu_device *adev = ring->adev; + u32 ref_and_mask = 0; + const struct nbio_hdp_flush_reg *nbio_hf_reg = adev->nbio.hdp_flush_reg; + + ref_and_mask = nbio_hf_reg->ref_and_mask_sdma0 << ring->me; + + amdgpu_ring_write(ring, SDMA_PKT_COPY_LINEAR_HEADER_OP(SDMA_OP_POLL_REGMEM) | + SDMA_PKT_POLL_REGMEM_HEADER_HDP_FLUSH(1) | + SDMA_PKT_POLL_REGMEM_HEADER_FUNC(3)); /* == */ + amdgpu_ring_write(ring, (adev->nbio.funcs->get_hdp_flush_done_offset(adev)) << 2); + amdgpu_ring_write(ring, (adev->nbio.funcs->get_hdp_flush_req_offset(adev)) << 2); + amdgpu_ring_write(ring, ref_and_mask); /* reference */ + amdgpu_ring_write(ring, ref_and_mask); /* mask */ + amdgpu_ring_write(ring, SDMA_PKT_POLL_REGMEM_DW5_RETRY_COUNT(0xfff) | + SDMA_PKT_POLL_REGMEM_DW5_INTERVAL(10)); /* retry count, poll interval */ +} + +/** + * sdma_v7_0_ring_emit_fence - emit a fence on the DMA ring + * + * @ring: amdgpu ring pointer + * @addr: address + * @seq: fence seq number + * @flags: fence flags + * + * Add a DMA fence packet to the ring to write + * the fence seq number and DMA trap packet to generate + * an interrupt if needed. + */ +static void sdma_v7_0_ring_emit_fence(struct amdgpu_ring *ring, u64 addr, u64 seq, + unsigned flags) +{ + bool write64bit = flags & AMDGPU_FENCE_FLAG_64BIT; + /* write the fence */ + amdgpu_ring_write(ring, SDMA_PKT_COPY_LINEAR_HEADER_OP(SDMA_OP_FENCE) | + SDMA_PKT_FENCE_HEADER_MTYPE(0x3)); /* Ucached(UC) */ + /* zero in first two bits */ + BUG_ON(addr & 0x3); + amdgpu_ring_write(ring, lower_32_bits(addr)); + amdgpu_ring_write(ring, upper_32_bits(addr)); + amdgpu_ring_write(ring, lower_32_bits(seq)); + + /* optionally write high bits as well */ + if (write64bit) { + addr += 4; + amdgpu_ring_write(ring, SDMA_PKT_COPY_LINEAR_HEADER_OP(SDMA_OP_FENCE) | + SDMA_PKT_FENCE_HEADER_MTYPE(0x3)); + /* zero in first two bits */ + BUG_ON(addr & 0x3); + amdgpu_ring_write(ring, lower_32_bits(addr)); + amdgpu_ring_write(ring, upper_32_bits(addr)); + amdgpu_ring_write(ring, upper_32_bits(seq)); + } + + if (flags & AMDGPU_FENCE_FLAG_INT) { + uint32_t ctx = ring->is_mes_queue ? + (ring->hw_queue_id | AMDGPU_FENCE_MES_QUEUE_FLAG) : 0; + /* generate an interrupt */ + amdgpu_ring_write(ring, SDMA_PKT_COPY_LINEAR_HEADER_OP(SDMA_OP_TRAP)); + amdgpu_ring_write(ring, SDMA_PKT_TRAP_INT_CONTEXT_INT_CONTEXT(ctx)); + } +} + +/** + * sdma_v7_0_gfx_stop - stop the gfx async dma engines + * + * @adev: amdgpu_device pointer + * + * Stop the gfx async dma ring buffers. + */ +static void sdma_v7_0_gfx_stop(struct amdgpu_device *adev) +{ + u32 rb_cntl, ib_cntl; + int i; + + for (i = 0; i < adev->sdma.num_instances; i++) { + rb_cntl = RREG32_SOC15_IP(GC, sdma_v7_0_get_reg_offset(adev, i, regSDMA0_QUEUE0_RB_CNTL)); + rb_cntl = REG_SET_FIELD(rb_cntl, SDMA0_QUEUE0_RB_CNTL, RB_ENABLE, 0); + WREG32_SOC15_IP(GC, sdma_v7_0_get_reg_offset(adev, i, regSDMA0_QUEUE0_RB_CNTL), rb_cntl); + ib_cntl = RREG32_SOC15_IP(GC, sdma_v7_0_get_reg_offset(adev, i, regSDMA0_QUEUE0_IB_CNTL)); + ib_cntl = REG_SET_FIELD(ib_cntl, SDMA0_QUEUE0_IB_CNTL, IB_ENABLE, 0); + WREG32_SOC15_IP(GC, sdma_v7_0_get_reg_offset(adev, i, regSDMA0_QUEUE0_IB_CNTL), ib_cntl); + } +} + +/** + * sdma_v7_0_rlc_stop - stop the compute async dma engines + * + * @adev: amdgpu_device pointer + * + * Stop the compute async dma queues. + */ +static void sdma_v7_0_rlc_stop(struct amdgpu_device *adev) +{ + /* XXX todo */ +} + +/** + * sdma_v7_0_ctx_switch_enable - stop the async dma engines context switch + * + * @adev: amdgpu_device pointer + * @enable: enable/disable the DMA MEs context switch. + * + * Halt or unhalt the async dma engines context switch. + */ +static void sdma_v7_0_ctx_switch_enable(struct amdgpu_device *adev, bool enable) +{ +} + +/** + * sdma_v7_0_enable - stop the async dma engines + * + * @adev: amdgpu_device pointer + * @enable: enable/disable the DMA MEs. + * + * Halt or unhalt the async dma engines. + */ +static void sdma_v7_0_enable(struct amdgpu_device *adev, bool enable) +{ + u32 mcu_cntl; + int i; + + if (!enable) { + sdma_v7_0_gfx_stop(adev); + sdma_v7_0_rlc_stop(adev); + } + + if (amdgpu_sriov_vf(adev)) + return; + + for (i = 0; i < adev->sdma.num_instances; i++) { + mcu_cntl = RREG32_SOC15_IP(GC, sdma_v7_0_get_reg_offset(adev, i, regSDMA0_MCU_CNTL)); + mcu_cntl = REG_SET_FIELD(mcu_cntl, SDMA0_MCU_CNTL, HALT, enable ? 0 : 1); + WREG32_SOC15_IP(GC, sdma_v7_0_get_reg_offset(adev, i, regSDMA0_MCU_CNTL), mcu_cntl); + } +} + +/** + * sdma_v7_0_gfx_resume - setup and start the async dma engines + * + * @adev: amdgpu_device pointer + * + * Set up the gfx DMA ring buffers and enable them. + * Returns 0 for success, error for failure. + */ +static int sdma_v7_0_gfx_resume(struct amdgpu_device *adev) +{ + struct amdgpu_ring *ring; + u32 rb_cntl, ib_cntl; + u32 rb_bufsz; + u32 doorbell; + u32 doorbell_offset; + u32 tmp; + u64 wptr_gpu_addr; + int i, r; + + for (i = 0; i < adev->sdma.num_instances; i++) { + ring = &adev->sdma.instance[i].ring; + + //if (!amdgpu_sriov_vf(adev)) + // WREG32_SOC15_IP(GC, sdma_v7_0_get_reg_offset(adev, i, regSDMA0_SEM_WAIT_FAIL_TIMER_CNTL), 0); + + /* Set ring buffer size in dwords */ + rb_bufsz = order_base_2(ring->ring_size / 4); + rb_cntl = RREG32_SOC15_IP(GC, sdma_v7_0_get_reg_offset(adev, i, regSDMA0_QUEUE0_RB_CNTL)); + rb_cntl = REG_SET_FIELD(rb_cntl, SDMA0_QUEUE0_RB_CNTL, RB_SIZE, rb_bufsz); +#ifdef __BIG_ENDIAN + rb_cntl = REG_SET_FIELD(rb_cntl, SDMA0_QUEUE0_RB_CNTL, RB_SWAP_ENABLE, 1); + rb_cntl = REG_SET_FIELD(rb_cntl, SDMA0_QUEUE0_RB_CNTL, + RPTR_WRITEBACK_SWAP_ENABLE, 1); +#endif + rb_cntl = REG_SET_FIELD(rb_cntl, SDMA0_QUEUE0_RB_CNTL, RB_PRIV, 1); + WREG32_SOC15_IP(GC, sdma_v7_0_get_reg_offset(adev, i, regSDMA0_QUEUE0_RB_CNTL), rb_cntl); + + /* Initialize the ring buffer's read and write pointers */ + WREG32_SOC15_IP(GC, sdma_v7_0_get_reg_offset(adev, i, regSDMA0_QUEUE0_RB_RPTR), 0); + WREG32_SOC15_IP(GC, sdma_v7_0_get_reg_offset(adev, i, regSDMA0_QUEUE0_RB_RPTR_HI), 0); + WREG32_SOC15_IP(GC, sdma_v7_0_get_reg_offset(adev, i, regSDMA0_QUEUE0_RB_WPTR), 0); + WREG32_SOC15_IP(GC, sdma_v7_0_get_reg_offset(adev, i, regSDMA0_QUEUE0_RB_WPTR_HI), 0); + + /* setup the wptr shadow polling */ + wptr_gpu_addr = ring->wptr_gpu_addr; + WREG32_SOC15_IP(GC, sdma_v7_0_get_reg_offset(adev, i, regSDMA0_QUEUE0_RB_WPTR_POLL_ADDR_LO), + lower_32_bits(wptr_gpu_addr)); + WREG32_SOC15_IP(GC, sdma_v7_0_get_reg_offset(adev, i, regSDMA0_QUEUE0_RB_WPTR_POLL_ADDR_HI), + upper_32_bits(wptr_gpu_addr)); + + /* set the wb address whether it's enabled or not */ + WREG32_SOC15_IP(GC, sdma_v7_0_get_reg_offset(adev, i, regSDMA0_QUEUE0_RB_RPTR_ADDR_HI), + upper_32_bits(ring->rptr_gpu_addr) & 0xFFFFFFFF); + WREG32_SOC15_IP(GC, sdma_v7_0_get_reg_offset(adev, i, regSDMA0_QUEUE0_RB_RPTR_ADDR_LO), + lower_32_bits(ring->rptr_gpu_addr) & 0xFFFFFFFC); + + rb_cntl = REG_SET_FIELD(rb_cntl, SDMA0_QUEUE0_RB_CNTL, RPTR_WRITEBACK_ENABLE, 1); + if (amdgpu_sriov_vf(adev)) + rb_cntl = REG_SET_FIELD(rb_cntl, SDMA0_QUEUE0_RB_CNTL, WPTR_POLL_ENABLE, 1); + else + rb_cntl = REG_SET_FIELD(rb_cntl, SDMA0_QUEUE0_RB_CNTL, WPTR_POLL_ENABLE, 0); + rb_cntl = REG_SET_FIELD(rb_cntl, SDMA0_QUEUE0_RB_CNTL, MCU_WPTR_POLL_ENABLE, 1); + + WREG32_SOC15_IP(GC, sdma_v7_0_get_reg_offset(adev, i, regSDMA0_QUEUE0_RB_BASE), ring->gpu_addr >> 8); + WREG32_SOC15_IP(GC, sdma_v7_0_get_reg_offset(adev, i, regSDMA0_QUEUE0_RB_BASE_HI), ring->gpu_addr >> 40); + + ring->wptr = 0; + + /* before programing wptr to a less value, need set minor_ptr_update first */ + WREG32_SOC15_IP(GC, sdma_v7_0_get_reg_offset(adev, i, regSDMA0_QUEUE0_MINOR_PTR_UPDATE), 1); + + if (!amdgpu_sriov_vf(adev)) { /* only bare-metal use register write for wptr */ + WREG32_SOC15_IP(GC, sdma_v7_0_get_reg_offset(adev, i, regSDMA0_QUEUE0_RB_WPTR), lower_32_bits(ring->wptr) << 2); + WREG32_SOC15_IP(GC, sdma_v7_0_get_reg_offset(adev, i, regSDMA0_QUEUE0_RB_WPTR_HI), upper_32_bits(ring->wptr) << 2); + } + + doorbell = RREG32_SOC15_IP(GC, sdma_v7_0_get_reg_offset(adev, i, regSDMA0_QUEUE0_DOORBELL)); + doorbell_offset = RREG32_SOC15_IP(GC, sdma_v7_0_get_reg_offset(adev, i, regSDMA0_QUEUE0_DOORBELL_OFFSET)); + + if (ring->use_doorbell) { + doorbell = REG_SET_FIELD(doorbell, SDMA0_QUEUE0_DOORBELL, ENABLE, 1); + doorbell_offset = REG_SET_FIELD(doorbell_offset, SDMA0_QUEUE0_DOORBELL_OFFSET, + OFFSET, ring->doorbell_index); + } else { + doorbell = REG_SET_FIELD(doorbell, SDMA0_QUEUE0_DOORBELL, ENABLE, 0); + } + WREG32_SOC15_IP(GC, sdma_v7_0_get_reg_offset(adev, i, regSDMA0_QUEUE0_DOORBELL), doorbell); + WREG32_SOC15_IP(GC, sdma_v7_0_get_reg_offset(adev, i, regSDMA0_QUEUE0_DOORBELL_OFFSET), doorbell_offset); + + if (i == 0) + adev->nbio.funcs->sdma_doorbell_range(adev, i, ring->use_doorbell, + ring->doorbell_index, + adev->doorbell_index.sdma_doorbell_range * adev->sdma.num_instances); + + if (amdgpu_sriov_vf(adev)) + sdma_v7_0_ring_set_wptr(ring); + + /* set minor_ptr_update to 0 after wptr programed */ + WREG32_SOC15_IP(GC, sdma_v7_0_get_reg_offset(adev, i, regSDMA0_QUEUE0_MINOR_PTR_UPDATE), 0); + + /* Set up sdma hang watchdog */ + tmp = RREG32_SOC15_IP(GC, sdma_v7_0_get_reg_offset(adev, i, regSDMA0_WATCHDOG_CNTL)); + /* 100ms per unit */ + tmp = REG_SET_FIELD(tmp, SDMA0_WATCHDOG_CNTL, QUEUE_HANG_COUNT, + max(adev->usec_timeout/100000, 1)); + WREG32_SOC15_IP(GC, sdma_v7_0_get_reg_offset(adev, i, regSDMA0_WATCHDOG_CNTL), tmp); + + /* Set up RESP_MODE to non-copy addresses */ + tmp = RREG32_SOC15_IP(GC, sdma_v7_0_get_reg_offset(adev, i, regSDMA0_UTCL1_CNTL)); + tmp = REG_SET_FIELD(tmp, SDMA0_UTCL1_CNTL, RESP_MODE, 3); + tmp = REG_SET_FIELD(tmp, SDMA0_UTCL1_CNTL, REDO_DELAY, 9); + WREG32_SOC15_IP(GC, sdma_v7_0_get_reg_offset(adev, i, regSDMA0_UTCL1_CNTL), tmp); + + /* program default cache read and write policy */ + tmp = RREG32_SOC15_IP(GC, sdma_v7_0_get_reg_offset(adev, i, regSDMA0_UTCL1_PAGE)); + /* clean read policy and write policy bits */ + tmp &= 0xFF0FFF; + tmp |= ((CACHE_READ_POLICY_L2__DEFAULT << 12) | + (CACHE_WRITE_POLICY_L2__DEFAULT << 14)); + WREG32_SOC15_IP(GC, sdma_v7_0_get_reg_offset(adev, i, regSDMA0_UTCL1_PAGE), tmp); + + if (!amdgpu_sriov_vf(adev)) { + /* unhalt engine */ + tmp = RREG32_SOC15_IP(GC, sdma_v7_0_get_reg_offset(adev, i, regSDMA0_MCU_CNTL)); + tmp = REG_SET_FIELD(tmp, SDMA0_MCU_CNTL, HALT, 0); + tmp = REG_SET_FIELD(tmp, SDMA0_MCU_CNTL, RESET, 0); + WREG32_SOC15_IP(GC, sdma_v7_0_get_reg_offset(adev, i, regSDMA0_MCU_CNTL), tmp); + } + + /* enable DMA RB */ + rb_cntl = REG_SET_FIELD(rb_cntl, SDMA0_QUEUE0_RB_CNTL, RB_ENABLE, 1); + WREG32_SOC15_IP(GC, sdma_v7_0_get_reg_offset(adev, i, regSDMA0_QUEUE0_RB_CNTL), rb_cntl); + + ib_cntl = RREG32_SOC15_IP(GC, sdma_v7_0_get_reg_offset(adev, i, regSDMA0_QUEUE0_IB_CNTL)); + ib_cntl = REG_SET_FIELD(ib_cntl, SDMA0_QUEUE0_IB_CNTL, IB_ENABLE, 1); +#ifdef __BIG_ENDIAN + ib_cntl = REG_SET_FIELD(ib_cntl, SDMA0_QUEUE0_IB_CNTL, IB_SWAP_ENABLE, 1); +#endif + /* enable DMA IBs */ + WREG32_SOC15_IP(GC, sdma_v7_0_get_reg_offset(adev, i, regSDMA0_QUEUE0_IB_CNTL), ib_cntl); + + ring->sched.ready = true; + + if (amdgpu_sriov_vf(adev)) { /* bare-metal sequence doesn't need below to lines */ + sdma_v7_0_ctx_switch_enable(adev, true); + sdma_v7_0_enable(adev, true); + } + + r = amdgpu_ring_test_helper(ring); + if (r) { + ring->sched.ready = false; + return r; + } + + } + + return 0; +} + +/** + * sdma_v7_0_rlc_resume - setup and start the async dma engines + * + * @adev: amdgpu_device pointer + * + * Set up the compute DMA queues and enable them. + * Returns 0 for success, error for failure. + */ +static int sdma_v7_0_rlc_resume(struct amdgpu_device *adev) +{ + return 0; +} + +static void sdma_v12_0_free_ucode_buffer(struct amdgpu_device *adev) +{ + int i; + + for (i = 0; i < adev->sdma.num_instances; i++) { + amdgpu_bo_free_kernel(&adev->sdma.instance[i].sdma_fw_obj, + &adev->sdma.instance[i].sdma_fw_gpu_addr, + (void **)&adev->sdma.instance[i].sdma_fw_ptr); + } +} + +/** + * sdma_v7_0_load_microcode - load the sDMA ME ucode + * + * @adev: amdgpu_device pointer + * + * Loads the sDMA0/1 ucode. + * Returns 0 for success, -EINVAL if the ucode is not available. + */ +static int sdma_v7_0_load_microcode(struct amdgpu_device *adev) +{ + const struct sdma_firmware_header_v3_0 *hdr; + const __le32 *fw_data; + u32 fw_size; + uint32_t tmp, sdma_status, ic_op_cntl; + int i, r, j; + + /* halt the MEs */ + sdma_v7_0_enable(adev, false); + + if (!adev->sdma.instance[0].fw) + return -EINVAL; + + hdr = (const struct sdma_firmware_header_v3_0 *) + adev->sdma.instance[0].fw->data; + amdgpu_ucode_print_sdma_hdr(&hdr->header); + + fw_data = (const __le32 *)(adev->sdma.instance[0].fw->data + + le32_to_cpu(hdr->ucode_offset_bytes)); + fw_size = le32_to_cpu(hdr->ucode_size_bytes); + + for (i = 0; i < adev->sdma.num_instances; i++) { + r = amdgpu_bo_create_reserved(adev, fw_size, + PAGE_SIZE, + AMDGPU_GEM_DOMAIN_VRAM, + &adev->sdma.instance[i].sdma_fw_obj, + &adev->sdma.instance[i].sdma_fw_gpu_addr, + (void **)&adev->sdma.instance[i].sdma_fw_ptr); + if (r) { + dev_err(adev->dev, "(%d) failed to create sdma ucode bo\n", r); + return r; + } + + memcpy(adev->sdma.instance[i].sdma_fw_ptr, fw_data, fw_size); + + amdgpu_bo_kunmap(adev->sdma.instance[i].sdma_fw_obj); + amdgpu_bo_unreserve(adev->sdma.instance[i].sdma_fw_obj); + + tmp = RREG32_SOC15_IP(GC, sdma_v7_0_get_reg_offset(adev, i, regSDMA0_IC_CNTL)); + tmp = REG_SET_FIELD(tmp, SDMA0_IC_CNTL, GPA, 0); + WREG32_SOC15_IP(GC, sdma_v7_0_get_reg_offset(adev, i, regSDMA0_IC_CNTL), tmp); + + WREG32_SOC15_IP(GC, sdma_v7_0_get_reg_offset(adev, i, regSDMA0_IC_BASE_LO), + lower_32_bits(adev->sdma.instance[i].sdma_fw_gpu_addr)); + WREG32_SOC15_IP(GC, sdma_v7_0_get_reg_offset(adev, i, regSDMA0_IC_BASE_HI), + upper_32_bits(adev->sdma.instance[i].sdma_fw_gpu_addr)); + + tmp = RREG32_SOC15_IP(GC, sdma_v7_0_get_reg_offset(adev, i, regSDMA0_IC_OP_CNTL)); + tmp = REG_SET_FIELD(tmp, SDMA0_IC_OP_CNTL, PRIME_ICACHE, 1); + WREG32_SOC15_IP(GC, sdma_v7_0_get_reg_offset(adev, i, regSDMA0_IC_OP_CNTL), tmp); + + /* Wait for sdma ucode init complete */ + for (j = 0; j < adev->usec_timeout; j++) { + ic_op_cntl = RREG32_SOC15_IP(GC, + sdma_v7_0_get_reg_offset(adev, i, regSDMA0_IC_OP_CNTL)); + sdma_status = RREG32_SOC15_IP(GC, + sdma_v7_0_get_reg_offset(adev, i, regSDMA0_STATUS_REG)); + if ((REG_GET_FIELD(ic_op_cntl, SDMA0_IC_OP_CNTL, ICACHE_PRIMED) == 1) && + (REG_GET_FIELD(sdma_status, SDMA0_STATUS_REG, UCODE_INIT_DONE) == 1)) + break; + udelay(1); + } + + if (j >= adev->usec_timeout) { + dev_err(adev->dev, "failed to init sdma ucode\n"); + return -EINVAL; + } + } + + return 0; +} + +static int sdma_v7_0_soft_reset(void *handle) +{ + struct amdgpu_device *adev = (struct amdgpu_device *)handle; + u32 tmp; + int i; + + sdma_v7_0_gfx_stop(adev); + + for (i = 0; i < adev->sdma.num_instances; i++) { + //tmp = RREG32_SOC15_IP(GC, sdma_v7_0_get_reg_offset(adev, i, regSDMA0_FREEZE)); + //tmp |= SDMA0_FREEZE__FREEZE_MASK; + //WREG32_SOC15_IP(GC, sdma_v7_0_get_reg_offset(adev, i, regSDMA0_FREEZE), tmp); + tmp = RREG32_SOC15_IP(GC, sdma_v7_0_get_reg_offset(adev, i, regSDMA0_MCU_CNTL)); + tmp |= SDMA0_MCU_CNTL__HALT_MASK; + tmp |= SDMA0_MCU_CNTL__RESET_MASK; + WREG32_SOC15_IP(GC, sdma_v7_0_get_reg_offset(adev, i, regSDMA0_MCU_CNTL), tmp); + + WREG32_SOC15_IP(GC, sdma_v7_0_get_reg_offset(adev, i, regSDMA0_QUEUE0_PREEMPT), 0); + + udelay(100); + + tmp = GRBM_SOFT_RESET__SOFT_RESET_SDMA0_MASK << i; + WREG32_SOC15(GC, 0, regGRBM_SOFT_RESET, tmp); + tmp = RREG32_SOC15(GC, 0, regGRBM_SOFT_RESET); + + udelay(100); + + WREG32_SOC15(GC, 0, regGRBM_SOFT_RESET, 0); + tmp = RREG32_SOC15(GC, 0, regGRBM_SOFT_RESET); + + udelay(100); + } + + return sdma_v7_0_start(adev); +} + +static bool sdma_v7_0_check_soft_reset(void *handle) +{ + struct amdgpu_device *adev = (struct amdgpu_device *)handle; + struct amdgpu_ring *ring; + int i, r; + long tmo = msecs_to_jiffies(1000); + + for (i = 0; i < adev->sdma.num_instances; i++) { + ring = &adev->sdma.instance[i].ring; + r = amdgpu_ring_test_ib(ring, tmo); + if (r) + return true; + } + + return false; +} + +/** + * sdma_v7_0_start - setup and start the async dma engines + * + * @adev: amdgpu_device pointer + * + * Set up the DMA engines and enable them. + * Returns 0 for success, error for failure. + */ +static int sdma_v7_0_start(struct amdgpu_device *adev) +{ + int r = 0; + + if (amdgpu_sriov_vf(adev)) { + sdma_v7_0_ctx_switch_enable(adev, false); + sdma_v7_0_enable(adev, false); + + /* set RB registers */ + r = sdma_v7_0_gfx_resume(adev); + return r; + } + + if (adev->firmware.load_type == AMDGPU_FW_LOAD_DIRECT) { + r = sdma_v7_0_load_microcode(adev); + if (r) { + sdma_v12_0_free_ucode_buffer(adev); + return r; + } + + if (amdgpu_emu_mode == 1) + msleep(1000); + } + + /* unhalt the MEs */ + sdma_v7_0_enable(adev, true); + /* enable sdma ring preemption */ + sdma_v7_0_ctx_switch_enable(adev, true); + + /* start the gfx rings and rlc compute queues */ + r = sdma_v7_0_gfx_resume(adev); + if (r) + return r; + r = sdma_v7_0_rlc_resume(adev); + + return r; +} + +static int sdma_v7_0_mqd_init(struct amdgpu_device *adev, void *mqd, + struct amdgpu_mqd_prop *prop) +{ + struct v12_sdma_mqd *m = mqd; + uint64_t wb_gpu_addr; + + m->sdmax_rlcx_rb_cntl = + order_base_2(prop->queue_size / 4) << SDMA0_QUEUE0_RB_CNTL__RB_SIZE__SHIFT | + 1 << SDMA0_QUEUE0_RB_CNTL__RPTR_WRITEBACK_ENABLE__SHIFT | + 4 << SDMA0_QUEUE0_RB_CNTL__RPTR_WRITEBACK_TIMER__SHIFT | + 1 << SDMA0_QUEUE0_RB_CNTL__MCU_WPTR_POLL_ENABLE__SHIFT; + + m->sdmax_rlcx_rb_base = lower_32_bits(prop->hqd_base_gpu_addr >> 8); + m->sdmax_rlcx_rb_base_hi = upper_32_bits(prop->hqd_base_gpu_addr >> 8); + + wb_gpu_addr = prop->wptr_gpu_addr; + m->sdmax_rlcx_rb_wptr_poll_addr_lo = lower_32_bits(wb_gpu_addr); + m->sdmax_rlcx_rb_wptr_poll_addr_hi = upper_32_bits(wb_gpu_addr); + + wb_gpu_addr = prop->rptr_gpu_addr; + m->sdmax_rlcx_rb_rptr_addr_lo = lower_32_bits(wb_gpu_addr); + m->sdmax_rlcx_rb_rptr_addr_hi = upper_32_bits(wb_gpu_addr); + + m->sdmax_rlcx_ib_cntl = RREG32_SOC15_IP(GC, sdma_v7_0_get_reg_offset(adev, 0, + regSDMA0_QUEUE0_IB_CNTL)); + + m->sdmax_rlcx_doorbell_offset = + prop->doorbell_index << SDMA0_QUEUE0_DOORBELL_OFFSET__OFFSET__SHIFT; + + m->sdmax_rlcx_doorbell = REG_SET_FIELD(0, SDMA0_QUEUE0_DOORBELL, ENABLE, 1); + + m->sdmax_rlcx_doorbell_log = 0; + m->sdmax_rlcx_rb_aql_cntl = 0x4000; //regSDMA0_QUEUE0_RB_AQL_CNTL_DEFAULT; + m->sdmax_rlcx_dummy_reg = 0xf; //regSDMA0_QUEUE0_DUMMY_REG_DEFAULT; + + return 0; +} + +static void sdma_v7_0_set_mqd_funcs(struct amdgpu_device *adev) +{ + adev->mqds[AMDGPU_HW_IP_DMA].mqd_size = sizeof(struct v12_sdma_mqd); + adev->mqds[AMDGPU_HW_IP_DMA].init_mqd = sdma_v7_0_mqd_init; +} + +/** + * sdma_v7_0_ring_test_ring - simple async dma engine test + * + * @ring: amdgpu_ring structure holding ring information + * + * Test the DMA engine by writing using it to write an + * value to memory. + * Returns 0 for success, error for failure. + */ +static int sdma_v7_0_ring_test_ring(struct amdgpu_ring *ring) +{ + struct amdgpu_device *adev = ring->adev; + unsigned i; + unsigned index; + int r; + u32 tmp; + u64 gpu_addr; + volatile uint32_t *cpu_ptr = NULL; + + tmp = 0xCAFEDEAD; + + if (ring->is_mes_queue) { + uint32_t offset = 0; + offset = amdgpu_mes_ctx_get_offs(ring, + AMDGPU_MES_CTX_PADDING_OFFS); + gpu_addr = amdgpu_mes_ctx_get_offs_gpu_addr(ring, offset); + cpu_ptr = amdgpu_mes_ctx_get_offs_cpu_addr(ring, offset); + *cpu_ptr = tmp; + } else { + r = amdgpu_device_wb_get(adev, &index); + if (r) { + dev_err(adev->dev, "(%d) failed to allocate wb slot\n", r); + return r; + } + + gpu_addr = adev->wb.gpu_addr + (index * 4); + adev->wb.wb[index] = cpu_to_le32(tmp); + } + + r = amdgpu_ring_alloc(ring, 5); + if (r) { + DRM_ERROR("amdgpu: dma failed to lock ring %d (%d).\n", ring->idx, r); + if (!ring->is_mes_queue) + amdgpu_device_wb_free(adev, index); + return r; + } + + amdgpu_ring_write(ring, SDMA_PKT_COPY_LINEAR_HEADER_OP(SDMA_OP_WRITE) | + SDMA_PKT_COPY_LINEAR_HEADER_SUB_OP(SDMA_SUBOP_WRITE_LINEAR)); + amdgpu_ring_write(ring, lower_32_bits(gpu_addr)); + amdgpu_ring_write(ring, upper_32_bits(gpu_addr)); + amdgpu_ring_write(ring, SDMA_PKT_WRITE_UNTILED_DW_3_COUNT(0)); + amdgpu_ring_write(ring, 0xDEADBEEF); + amdgpu_ring_commit(ring); + + for (i = 0; i < adev->usec_timeout; i++) { + if (ring->is_mes_queue) + tmp = le32_to_cpu(*cpu_ptr); + else + tmp = le32_to_cpu(adev->wb.wb[index]); + if (tmp == 0xDEADBEEF) + break; + if (amdgpu_emu_mode == 1) + msleep(1); + else + udelay(1); + } + + if (i >= adev->usec_timeout) + r = -ETIMEDOUT; + + if (!ring->is_mes_queue) + amdgpu_device_wb_free(adev, index); + + return r; +} + +/** + * sdma_v7_0_ring_test_ib - test an IB on the DMA engine + * + * @ring: amdgpu_ring structure holding ring information + * @timeout: timeout value in jiffies, or MAX_SCHEDULE_TIMEOUT + * + * Test a simple IB in the DMA ring. + * Returns 0 on success, error on failure. + */ +static int sdma_v7_0_ring_test_ib(struct amdgpu_ring *ring, long timeout) +{ + struct amdgpu_device *adev = ring->adev; + struct amdgpu_ib ib; + struct dma_fence *f = NULL; + unsigned index; + long r; + u32 tmp = 0; + u64 gpu_addr; + volatile uint32_t *cpu_ptr = NULL; + + tmp = 0xCAFEDEAD; + memset(&ib, 0, sizeof(ib)); + + if (ring->is_mes_queue) { + uint32_t offset = 0; + offset = amdgpu_mes_ctx_get_offs(ring, AMDGPU_MES_CTX_IB_OFFS); + ib.gpu_addr = amdgpu_mes_ctx_get_offs_gpu_addr(ring, offset); + ib.ptr = (void *)amdgpu_mes_ctx_get_offs_cpu_addr(ring, offset); + + offset = amdgpu_mes_ctx_get_offs(ring, + AMDGPU_MES_CTX_PADDING_OFFS); + gpu_addr = amdgpu_mes_ctx_get_offs_gpu_addr(ring, offset); + cpu_ptr = amdgpu_mes_ctx_get_offs_cpu_addr(ring, offset); + *cpu_ptr = tmp; + } else { + r = amdgpu_device_wb_get(adev, &index); + if (r) { + dev_err(adev->dev, "(%ld) failed to allocate wb slot\n", r); + return r; + } + + gpu_addr = adev->wb.gpu_addr + (index * 4); + adev->wb.wb[index] = cpu_to_le32(tmp); + + r = amdgpu_ib_get(adev, NULL, 256, AMDGPU_IB_POOL_DIRECT, &ib); + if (r) { + DRM_ERROR("amdgpu: failed to get ib (%ld).\n", r); + goto err0; + } + } + + ib.ptr[0] = SDMA_PKT_COPY_LINEAR_HEADER_OP(SDMA_OP_WRITE) | + SDMA_PKT_COPY_LINEAR_HEADER_SUB_OP(SDMA_SUBOP_WRITE_LINEAR); + ib.ptr[1] = lower_32_bits(gpu_addr); + ib.ptr[2] = upper_32_bits(gpu_addr); + ib.ptr[3] = SDMA_PKT_WRITE_UNTILED_DW_3_COUNT(0); + ib.ptr[4] = 0xDEADBEEF; + ib.ptr[5] = SDMA_PKT_NOP_HEADER_OP(SDMA_OP_NOP); + ib.ptr[6] = SDMA_PKT_NOP_HEADER_OP(SDMA_OP_NOP); + ib.ptr[7] = SDMA_PKT_NOP_HEADER_OP(SDMA_OP_NOP); + ib.length_dw = 8; + + r = amdgpu_ib_schedule(ring, 1, &ib, NULL, &f); + if (r) + goto err1; + + r = dma_fence_wait_timeout(f, false, timeout); + if (r == 0) { + DRM_ERROR("amdgpu: IB test timed out\n"); + r = -ETIMEDOUT; + goto err1; + } else if (r < 0) { + DRM_ERROR("amdgpu: fence wait failed (%ld).\n", r); + goto err1; + } + + if (ring->is_mes_queue) + tmp = le32_to_cpu(*cpu_ptr); + else + tmp = le32_to_cpu(adev->wb.wb[index]); + + if (tmp == 0xDEADBEEF) + r = 0; + else + r = -EINVAL; + +err1: + amdgpu_ib_free(adev, &ib, NULL); + dma_fence_put(f); +err0: + if (!ring->is_mes_queue) + amdgpu_device_wb_free(adev, index); + return r; +} + + +/** + * sdma_v7_0_vm_copy_pte - update PTEs by copying them from the GART + * + * @ib: indirect buffer to fill with commands + * @pe: addr of the page entry + * @src: src addr to copy from + * @count: number of page entries to update + * + * Update PTEs by copying them from the GART using sDMA. + */ +static void sdma_v7_0_vm_copy_pte(struct amdgpu_ib *ib, + uint64_t pe, uint64_t src, + unsigned count) +{ + unsigned bytes = count * 8; + + ib->ptr[ib->length_dw++] = SDMA_PKT_COPY_LINEAR_HEADER_OP(SDMA_OP_COPY) | + SDMA_PKT_COPY_LINEAR_HEADER_SUB_OP(SDMA_SUBOP_COPY_LINEAR); + ib->ptr[ib->length_dw++] = bytes - 1; + ib->ptr[ib->length_dw++] = 0; /* src/dst endian swap */ + ib->ptr[ib->length_dw++] = lower_32_bits(src); + ib->ptr[ib->length_dw++] = upper_32_bits(src); + ib->ptr[ib->length_dw++] = lower_32_bits(pe); + ib->ptr[ib->length_dw++] = upper_32_bits(pe); + +} + +/** + * sdma_v7_0_vm_write_pte - update PTEs by writing them manually + * + * @ib: indirect buffer to fill with commands + * @pe: addr of the page entry + * @value: dst addr to write into pe + * @count: number of page entries to update + * @incr: increase next addr by incr bytes + * + * Update PTEs by writing them manually using sDMA. + */ +static void sdma_v7_0_vm_write_pte(struct amdgpu_ib *ib, uint64_t pe, + uint64_t value, unsigned count, + uint32_t incr) +{ + unsigned ndw = count * 2; + + ib->ptr[ib->length_dw++] = SDMA_PKT_COPY_LINEAR_HEADER_OP(SDMA_OP_WRITE) | + SDMA_PKT_COPY_LINEAR_HEADER_SUB_OP(SDMA_SUBOP_WRITE_LINEAR); + ib->ptr[ib->length_dw++] = lower_32_bits(pe); + ib->ptr[ib->length_dw++] = upper_32_bits(pe); + ib->ptr[ib->length_dw++] = ndw - 1; + for (; ndw > 0; ndw -= 2) { + ib->ptr[ib->length_dw++] = lower_32_bits(value); + ib->ptr[ib->length_dw++] = upper_32_bits(value); + value += incr; + } +} + +/** + * sdma_v7_0_vm_set_pte_pde - update the page tables using sDMA + * + * @ib: indirect buffer to fill with commands + * @pe: addr of the page entry + * @addr: dst addr to write into pe + * @count: number of page entries to update + * @incr: increase next addr by incr bytes + * @flags: access flags + * + * Update the page tables using sDMA. + */ +static void sdma_v7_0_vm_set_pte_pde(struct amdgpu_ib *ib, + uint64_t pe, + uint64_t addr, unsigned count, + uint32_t incr, uint64_t flags) +{ + /* for physically contiguous pages (vram) */ + ib->ptr[ib->length_dw++] = SDMA_PKT_COPY_LINEAR_HEADER_OP(SDMA_OP_PTEPDE); + ib->ptr[ib->length_dw++] = lower_32_bits(pe); /* dst addr */ + ib->ptr[ib->length_dw++] = upper_32_bits(pe); + ib->ptr[ib->length_dw++] = lower_32_bits(flags); /* mask */ + ib->ptr[ib->length_dw++] = upper_32_bits(flags); + ib->ptr[ib->length_dw++] = lower_32_bits(addr); /* value */ + ib->ptr[ib->length_dw++] = upper_32_bits(addr); + ib->ptr[ib->length_dw++] = incr; /* increment size */ + ib->ptr[ib->length_dw++] = 0; + ib->ptr[ib->length_dw++] = count - 1; /* number of entries */ +} + +/** + * sdma_v7_0_ring_pad_ib - pad the IB + * + * @ring: amdgpu ring pointer + * @ib: indirect buffer to fill with padding + * + * Pad the IB with NOPs to a boundary multiple of 8. + */ +static void sdma_v7_0_ring_pad_ib(struct amdgpu_ring *ring, struct amdgpu_ib *ib) +{ + struct amdgpu_sdma_instance *sdma = amdgpu_sdma_get_instance_from_ring(ring); + u32 pad_count; + int i; + + pad_count = (-ib->length_dw) & 0x7; + for (i = 0; i < pad_count; i++) + if (sdma && sdma->burst_nop && (i == 0)) + ib->ptr[ib->length_dw++] = + SDMA_PKT_COPY_LINEAR_HEADER_OP(SDMA_OP_NOP) | + SDMA_PKT_NOP_HEADER_COUNT(pad_count - 1); + else + ib->ptr[ib->length_dw++] = + SDMA_PKT_COPY_LINEAR_HEADER_OP(SDMA_OP_NOP); +} + +/** + * sdma_v7_0_ring_emit_pipeline_sync - sync the pipeline + * + * @ring: amdgpu_ring pointer + * + * Make sure all previous operations are completed (CIK). + */ +static void sdma_v7_0_ring_emit_pipeline_sync(struct amdgpu_ring *ring) +{ + uint32_t seq = ring->fence_drv.sync_seq; + uint64_t addr = ring->fence_drv.gpu_addr; + + /* wait for idle */ + amdgpu_ring_write(ring, SDMA_PKT_COPY_LINEAR_HEADER_OP(SDMA_OP_POLL_REGMEM) | + SDMA_PKT_POLL_REGMEM_HEADER_HDP_FLUSH(0) | + SDMA_PKT_POLL_REGMEM_HEADER_FUNC(3) | /* equal */ + SDMA_PKT_POLL_REGMEM_HEADER_MEM_POLL(1)); + amdgpu_ring_write(ring, addr & 0xfffffffc); + amdgpu_ring_write(ring, upper_32_bits(addr) & 0xffffffff); + amdgpu_ring_write(ring, seq); /* reference */ + amdgpu_ring_write(ring, 0xffffffff); /* mask */ + amdgpu_ring_write(ring, SDMA_PKT_POLL_REGMEM_DW5_RETRY_COUNT(0xfff) | + SDMA_PKT_POLL_REGMEM_DW5_INTERVAL(4)); /* retry count, poll interval */ +} + +/** + * sdma_v7_0_ring_emit_vm_flush - vm flush using sDMA + * + * @ring: amdgpu_ring pointer + * @vmid: vmid number to use + * @pd_addr: address + * + * Update the page table base and flush the VM TLB + * using sDMA. + */ +static void sdma_v7_0_ring_emit_vm_flush(struct amdgpu_ring *ring, + unsigned vmid, uint64_t pd_addr) +{ + amdgpu_gmc_emit_flush_gpu_tlb(ring, vmid, pd_addr); +} + +static void sdma_v7_0_ring_emit_wreg(struct amdgpu_ring *ring, + uint32_t reg, uint32_t val) +{ + /* SRBM WRITE command will not support on sdma v7. + * Use Register WRITE command instead, which OPCODE is same as SRBM WRITE + */ + amdgpu_ring_write(ring, SDMA_PKT_COPY_LINEAR_HEADER_OP(SDMA_OP_SRBM_WRITE)); + amdgpu_ring_write(ring, reg << 2); + amdgpu_ring_write(ring, val); +} + +static void sdma_v7_0_ring_emit_reg_wait(struct amdgpu_ring *ring, uint32_t reg, + uint32_t val, uint32_t mask) +{ + amdgpu_ring_write(ring, SDMA_PKT_COPY_LINEAR_HEADER_OP(SDMA_OP_POLL_REGMEM) | + SDMA_PKT_POLL_REGMEM_HEADER_HDP_FLUSH(0) | + SDMA_PKT_POLL_REGMEM_HEADER_FUNC(3)); /* equal */ + amdgpu_ring_write(ring, reg << 2); + amdgpu_ring_write(ring, 0); + amdgpu_ring_write(ring, val); /* reference */ + amdgpu_ring_write(ring, mask); /* mask */ + amdgpu_ring_write(ring, SDMA_PKT_POLL_REGMEM_DW5_RETRY_COUNT(0xfff) | + SDMA_PKT_POLL_REGMEM_DW5_INTERVAL(10)); +} + +static void sdma_v7_0_ring_emit_reg_write_reg_wait(struct amdgpu_ring *ring, + uint32_t reg0, uint32_t reg1, + uint32_t ref, uint32_t mask) +{ + amdgpu_ring_emit_wreg(ring, reg0, ref); + /* wait for a cycle to reset vm_inv_eng*_ack */ + amdgpu_ring_emit_reg_wait(ring, reg0, 0, 0); + amdgpu_ring_emit_reg_wait(ring, reg1, mask, mask); +} + +static int sdma_v7_0_early_init(void *handle) +{ + struct amdgpu_device *adev = (struct amdgpu_device *)handle; + int r; + + r = amdgpu_sdma_init_microcode(adev, 0, true); + if (r) { + DRM_ERROR("Failed to init sdma firmware!\n"); + return r; + } + + sdma_v7_0_set_ring_funcs(adev); + sdma_v7_0_set_buffer_funcs(adev); + sdma_v7_0_set_vm_pte_funcs(adev); + sdma_v7_0_set_irq_funcs(adev); + sdma_v7_0_set_mqd_funcs(adev); + + return 0; +} + +static int sdma_v7_0_sw_init(void *handle) +{ + struct amdgpu_ring *ring; + int r, i; + struct amdgpu_device *adev = (struct amdgpu_device *)handle; + + /* SDMA trap event */ + r = amdgpu_irq_add_id(adev, SOC21_IH_CLIENTID_GFX, + GFX_11_0_0__SRCID__SDMA_TRAP, + &adev->sdma.trap_irq); + if (r) + return r; + + for (i = 0; i < adev->sdma.num_instances; i++) { + ring = &adev->sdma.instance[i].ring; + ring->ring_obj = NULL; + ring->use_doorbell = true; + ring->me = i; + + DRM_DEBUG("SDMA %d use_doorbell being set to: [%s]\n", i, + ring->use_doorbell?"true":"false"); + + ring->doorbell_index = + (adev->doorbell_index.sdma_engine[i] << 1); // get DWORD offset + + ring->vm_hub = AMDGPU_GFXHUB(0); + sprintf(ring->name, "sdma%d", i); + r = amdgpu_ring_init(adev, ring, 1024, + &adev->sdma.trap_irq, + AMDGPU_SDMA_IRQ_INSTANCE0 + i, + AMDGPU_RING_PRIO_DEFAULT, NULL); + if (r) + return r; + } + + return r; +} + +static int sdma_v7_0_sw_fini(void *handle) +{ + struct amdgpu_device *adev = (struct amdgpu_device *)handle; + int i; + + for (i = 0; i < adev->sdma.num_instances; i++) + amdgpu_ring_fini(&adev->sdma.instance[i].ring); + + amdgpu_sdma_destroy_inst_ctx(adev, true); + + if (adev->firmware.load_type == AMDGPU_FW_LOAD_DIRECT) + sdma_v12_0_free_ucode_buffer(adev); + + return 0; +} + +static int sdma_v7_0_hw_init(void *handle) +{ + struct amdgpu_device *adev = (struct amdgpu_device *)handle; + + return sdma_v7_0_start(adev); +} + +static int sdma_v7_0_hw_fini(void *handle) +{ + struct amdgpu_device *adev = (struct amdgpu_device *)handle; + + if (amdgpu_sriov_vf(adev)) + return 0; + + sdma_v7_0_ctx_switch_enable(adev, false); + sdma_v7_0_enable(adev, false); + + return 0; +} + +static int sdma_v7_0_suspend(void *handle) +{ + struct amdgpu_device *adev = (struct amdgpu_device *)handle; + + return sdma_v7_0_hw_fini(adev); +} + +static int sdma_v7_0_resume(void *handle) +{ + struct amdgpu_device *adev = (struct amdgpu_device *)handle; + + return sdma_v7_0_hw_init(adev); +} + +static bool sdma_v7_0_is_idle(void *handle) +{ + struct amdgpu_device *adev = (struct amdgpu_device *)handle; + u32 i; + + for (i = 0; i < adev->sdma.num_instances; i++) { + u32 tmp = RREG32(sdma_v7_0_get_reg_offset(adev, i, regSDMA0_STATUS_REG)); + + if (!(tmp & SDMA0_STATUS_REG__IDLE_MASK)) + return false; + } + + return true; +} + +static int sdma_v7_0_wait_for_idle(void *handle) +{ + unsigned i; + u32 sdma0, sdma1; + struct amdgpu_device *adev = (struct amdgpu_device *)handle; + + for (i = 0; i < adev->usec_timeout; i++) { + sdma0 = RREG32(sdma_v7_0_get_reg_offset(adev, 0, regSDMA0_STATUS_REG)); + sdma1 = RREG32(sdma_v7_0_get_reg_offset(adev, 1, regSDMA0_STATUS_REG)); + + if (sdma0 & sdma1 & SDMA0_STATUS_REG__IDLE_MASK) + return 0; + udelay(1); + } + return -ETIMEDOUT; +} + +static int sdma_v7_0_ring_preempt_ib(struct amdgpu_ring *ring) +{ + int i, r = 0; + struct amdgpu_device *adev = ring->adev; + u32 index = 0; + u64 sdma_gfx_preempt; + + amdgpu_sdma_get_index_from_ring(ring, &index); + sdma_gfx_preempt = + sdma_v7_0_get_reg_offset(adev, index, regSDMA0_QUEUE0_PREEMPT); + + /* assert preemption condition */ + amdgpu_ring_set_preempt_cond_exec(ring, false); + + /* emit the trailing fence */ + ring->trail_seq += 1; + r = amdgpu_ring_alloc(ring, 10); + if (r) { + DRM_ERROR("ring %d failed to be allocated \n", ring->idx); + return r; + } + sdma_v7_0_ring_emit_fence(ring, ring->trail_fence_gpu_addr, + ring->trail_seq, 0); + amdgpu_ring_commit(ring); + + /* assert IB preemption */ + WREG32(sdma_gfx_preempt, 1); + + /* poll the trailing fence */ + for (i = 0; i < adev->usec_timeout; i++) { + if (ring->trail_seq == + le32_to_cpu(*(ring->trail_fence_cpu_addr))) + break; + udelay(1); + } + + if (i >= adev->usec_timeout) { + r = -EINVAL; + DRM_ERROR("ring %d failed to be preempted\n", ring->idx); + } + + /* deassert IB preemption */ + WREG32(sdma_gfx_preempt, 0); + + /* deassert the preemption condition */ + amdgpu_ring_set_preempt_cond_exec(ring, true); + return r; +} + +static int sdma_v7_0_set_trap_irq_state(struct amdgpu_device *adev, + struct amdgpu_irq_src *source, + unsigned type, + enum amdgpu_interrupt_state state) +{ + u32 sdma_cntl; + + u32 reg_offset = sdma_v7_0_get_reg_offset(adev, type, regSDMA0_CNTL); + + sdma_cntl = RREG32(reg_offset); + sdma_cntl = REG_SET_FIELD(sdma_cntl, SDMA0_CNTL, TRAP_ENABLE, + state == AMDGPU_IRQ_STATE_ENABLE ? 1 : 0); + WREG32(reg_offset, sdma_cntl); + + return 0; +} + +static int sdma_v7_0_process_trap_irq(struct amdgpu_device *adev, + struct amdgpu_irq_src *source, + struct amdgpu_iv_entry *entry) +{ + int instances, queue; + uint32_t mes_queue_id = entry->src_data[0]; + + DRM_DEBUG("IH: SDMA trap\n"); + + if (adev->enable_mes && (mes_queue_id & AMDGPU_FENCE_MES_QUEUE_FLAG)) { + struct amdgpu_mes_queue *queue; + + mes_queue_id &= AMDGPU_FENCE_MES_QUEUE_ID_MASK; + + spin_lock(&adev->mes.queue_id_lock); + queue = idr_find(&adev->mes.queue_id_idr, mes_queue_id); + if (queue) { + DRM_DEBUG("process smda queue id = %d\n", mes_queue_id); + amdgpu_fence_process(queue->ring); + } + spin_unlock(&adev->mes.queue_id_lock); + return 0; + } + + queue = entry->ring_id & 0xf; + instances = (entry->ring_id & 0xf0) >> 4; + if (instances > 1) { + DRM_ERROR("IH: wrong ring_ID detected, as wrong sdma instance\n"); + return -EINVAL; + } + + switch (entry->client_id) { + case SOC21_IH_CLIENTID_GFX: + switch (queue) { + case 0: + amdgpu_fence_process(&adev->sdma.instance[instances].ring); + break; + default: + break; + } + break; + } + return 0; +} + +static int sdma_v7_0_process_illegal_inst_irq(struct amdgpu_device *adev, + struct amdgpu_irq_src *source, + struct amdgpu_iv_entry *entry) +{ + return 0; +} + +static int sdma_v7_0_set_clockgating_state(void *handle, + enum amd_clockgating_state state) +{ + return 0; +} + +static int sdma_v7_0_set_powergating_state(void *handle, + enum amd_powergating_state state) +{ + return 0; +} + +static void sdma_v7_0_get_clockgating_state(void *handle, u64 *flags) +{ +} + +const struct amd_ip_funcs sdma_v7_0_ip_funcs = { + .name = "sdma_v7_0", + .early_init = sdma_v7_0_early_init, + .late_init = NULL, + .sw_init = sdma_v7_0_sw_init, + .sw_fini = sdma_v7_0_sw_fini, + .hw_init = sdma_v7_0_hw_init, + .hw_fini = sdma_v7_0_hw_fini, + .suspend = sdma_v7_0_suspend, + .resume = sdma_v7_0_resume, + .is_idle = sdma_v7_0_is_idle, + .wait_for_idle = sdma_v7_0_wait_for_idle, + .soft_reset = sdma_v7_0_soft_reset, + .check_soft_reset = sdma_v7_0_check_soft_reset, + .set_clockgating_state = sdma_v7_0_set_clockgating_state, + .set_powergating_state = sdma_v7_0_set_powergating_state, + .get_clockgating_state = sdma_v7_0_get_clockgating_state, +}; + +static const struct amdgpu_ring_funcs sdma_v7_0_ring_funcs = { + .type = AMDGPU_RING_TYPE_SDMA, + .align_mask = 0xf, + .nop = SDMA_PKT_NOP_HEADER_OP(SDMA_OP_NOP), + .support_64bit_ptrs = true, + .secure_submission_supported = true, + .get_rptr = sdma_v7_0_ring_get_rptr, + .get_wptr = sdma_v7_0_ring_get_wptr, + .set_wptr = sdma_v7_0_ring_set_wptr, + .emit_frame_size = + 5 + /* sdma_v7_0_ring_init_cond_exec */ + 6 + /* sdma_v7_0_ring_emit_hdp_flush */ + 6 + /* sdma_v7_0_ring_emit_pipeline_sync */ + /* sdma_v7_0_ring_emit_vm_flush */ + SOC15_FLUSH_GPU_TLB_NUM_WREG * 3 + + SOC15_FLUSH_GPU_TLB_NUM_REG_WAIT * 6 + + 10 + 10 + 10, /* sdma_v7_0_ring_emit_fence x3 for user fence, vm fence */ + .emit_ib_size = 5 + 7 + 6, /* sdma_v7_0_ring_emit_ib */ + .emit_ib = sdma_v7_0_ring_emit_ib, + .emit_mem_sync = sdma_v7_0_ring_emit_mem_sync, + .emit_fence = sdma_v7_0_ring_emit_fence, + .emit_pipeline_sync = sdma_v7_0_ring_emit_pipeline_sync, + .emit_vm_flush = sdma_v7_0_ring_emit_vm_flush, + .emit_hdp_flush = sdma_v7_0_ring_emit_hdp_flush, + .test_ring = sdma_v7_0_ring_test_ring, + .test_ib = sdma_v7_0_ring_test_ib, + .insert_nop = sdma_v7_0_ring_insert_nop, + .pad_ib = sdma_v7_0_ring_pad_ib, + .emit_wreg = sdma_v7_0_ring_emit_wreg, + .emit_reg_wait = sdma_v7_0_ring_emit_reg_wait, + .emit_reg_write_reg_wait = sdma_v7_0_ring_emit_reg_write_reg_wait, + .init_cond_exec = sdma_v7_0_ring_init_cond_exec, + .preempt_ib = sdma_v7_0_ring_preempt_ib, +}; + +static void sdma_v7_0_set_ring_funcs(struct amdgpu_device *adev) +{ + int i; + + for (i = 0; i < adev->sdma.num_instances; i++) { + adev->sdma.instance[i].ring.funcs = &sdma_v7_0_ring_funcs; + adev->sdma.instance[i].ring.me = i; + } +} + +static const struct amdgpu_irq_src_funcs sdma_v7_0_trap_irq_funcs = { + .set = sdma_v7_0_set_trap_irq_state, + .process = sdma_v7_0_process_trap_irq, +}; + +static const struct amdgpu_irq_src_funcs sdma_v7_0_illegal_inst_irq_funcs = { + .process = sdma_v7_0_process_illegal_inst_irq, +}; + +static void sdma_v7_0_set_irq_funcs(struct amdgpu_device *adev) +{ + adev->sdma.trap_irq.num_types = AMDGPU_SDMA_IRQ_INSTANCE0 + + adev->sdma.num_instances; + adev->sdma.trap_irq.funcs = &sdma_v7_0_trap_irq_funcs; + adev->sdma.illegal_inst_irq.funcs = &sdma_v7_0_illegal_inst_irq_funcs; +} + +/** + * sdma_v7_0_emit_copy_buffer - copy buffer using the sDMA engine + * + * @ib: indirect buffer to fill with commands + * @src_offset: src GPU address + * @dst_offset: dst GPU address + * @byte_count: number of bytes to xfer + * @copy_flags: copy flags for the buffers + * + * Copy GPU buffers using the DMA engine. + * Used by the amdgpu ttm implementation to move pages if + * registered as the asic copy callback. + */ +static void sdma_v7_0_emit_copy_buffer(struct amdgpu_ib *ib, + uint64_t src_offset, + uint64_t dst_offset, + uint32_t byte_count, + uint32_t copy_flags) +{ + uint32_t num_type, data_format, max_com; + + max_com = AMDGPU_COPY_FLAGS_GET(copy_flags, MAX_COMPRESSED); + data_format = AMDGPU_COPY_FLAGS_GET(copy_flags, DATA_FORMAT); + num_type = AMDGPU_COPY_FLAGS_GET(copy_flags, NUMBER_TYPE); + + ib->ptr[ib->length_dw++] = SDMA_PKT_COPY_LINEAR_HEADER_OP(SDMA_OP_COPY) | + SDMA_PKT_COPY_LINEAR_HEADER_SUB_OP(SDMA_SUBOP_COPY_LINEAR) | + SDMA_PKT_COPY_LINEAR_HEADER_TMZ((copy_flags & AMDGPU_COPY_FLAGS_TMZ) ? 1 : 0) | + SDMA_PKT_COPY_LINEAR_HEADER_CPV((copy_flags & + (AMDGPU_COPY_FLAGS_READ_DECOMPRESSED | AMDGPU_COPY_FLAGS_WRITE_COMPRESSED)) ? 1 : 0); + + ib->ptr[ib->length_dw++] = byte_count - 1; + ib->ptr[ib->length_dw++] = 0; /* src/dst endian swap */ + ib->ptr[ib->length_dw++] = lower_32_bits(src_offset); + ib->ptr[ib->length_dw++] = upper_32_bits(src_offset); + ib->ptr[ib->length_dw++] = lower_32_bits(dst_offset); + ib->ptr[ib->length_dw++] = upper_32_bits(dst_offset); + + if ((copy_flags & (AMDGPU_COPY_FLAGS_READ_DECOMPRESSED | AMDGPU_COPY_FLAGS_WRITE_COMPRESSED))) + ib->ptr[ib->length_dw++] = SDMA_DCC_DATA_FORMAT(data_format) | SDMA_DCC_NUM_TYPE(num_type) | + ((copy_flags & AMDGPU_COPY_FLAGS_READ_DECOMPRESSED) ? SDMA_DCC_READ_CM(2) : 0) | + ((copy_flags & AMDGPU_COPY_FLAGS_WRITE_COMPRESSED) ? SDMA_DCC_WRITE_CM(1) : 0) | + SDMA_DCC_MAX_COM(max_com) | SDMA_DCC_MAX_UCOM(1); +} + +/** + * sdma_v7_0_emit_fill_buffer - fill buffer using the sDMA engine + * + * @ib: indirect buffer to fill + * @src_data: value to write to buffer + * @dst_offset: dst GPU address + * @byte_count: number of bytes to xfer + * + * Fill GPU buffers using the DMA engine. + */ +static void sdma_v7_0_emit_fill_buffer(struct amdgpu_ib *ib, + uint32_t src_data, + uint64_t dst_offset, + uint32_t byte_count) +{ + ib->ptr[ib->length_dw++] = SDMA_PKT_COPY_LINEAR_HEADER_OP(SDMA_OP_CONST_FILL); + ib->ptr[ib->length_dw++] = lower_32_bits(dst_offset); + ib->ptr[ib->length_dw++] = upper_32_bits(dst_offset); + ib->ptr[ib->length_dw++] = src_data; + ib->ptr[ib->length_dw++] = byte_count - 1; +} + +static const struct amdgpu_buffer_funcs sdma_v7_0_buffer_funcs = { + .copy_max_bytes = 0x400000, + .copy_num_dw = 7, + .emit_copy_buffer = sdma_v7_0_emit_copy_buffer, + .fill_max_bytes = 0x400000, + .fill_num_dw = 5, + .emit_fill_buffer = sdma_v7_0_emit_fill_buffer, +}; + +static void sdma_v7_0_set_buffer_funcs(struct amdgpu_device *adev) +{ + adev->mman.buffer_funcs = &sdma_v7_0_buffer_funcs; + adev->mman.buffer_funcs_ring = &adev->sdma.instance[0].ring; +} + +static const struct amdgpu_vm_pte_funcs sdma_v7_0_vm_pte_funcs = { + .copy_pte_num_dw = 7, + .copy_pte = sdma_v7_0_vm_copy_pte, + .write_pte = sdma_v7_0_vm_write_pte, + .set_pte_pde = sdma_v7_0_vm_set_pte_pde, +}; + +static void sdma_v7_0_set_vm_pte_funcs(struct amdgpu_device *adev) +{ + unsigned i; + + adev->vm_manager.vm_pte_funcs = &sdma_v7_0_vm_pte_funcs; + for (i = 0; i < adev->sdma.num_instances; i++) { + adev->vm_manager.vm_pte_scheds[i] = + &adev->sdma.instance[i].ring.sched; + } + adev->vm_manager.vm_pte_num_scheds = adev->sdma.num_instances; +} + +const struct amdgpu_ip_block_version sdma_v7_0_ip_block = { + .type = AMD_IP_BLOCK_TYPE_SDMA, + .major = 7, + .minor = 0, + .rev = 0, + .funcs = &sdma_v7_0_ip_funcs, +}; diff --git a/drivers/gpu/drm/amd/amdgpu/sdma_v7_0.h b/drivers/gpu/drm/amd/amdgpu/sdma_v7_0.h new file mode 100644 index 000000000000..5af863bb39c4 --- /dev/null +++ b/drivers/gpu/drm/amd/amdgpu/sdma_v7_0.h @@ -0,0 +1,30 @@ +/* + * Copyright 2023 Advanced Micro Devices, Inc. + * + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and associated documentation files (the "Software"), + * to deal in the Software without restriction, including without limitation + * the rights to use, copy, modify, merge, publish, distribute, sublicense, + * and/or sell copies of the Software, and to permit persons to whom the + * Software is furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included in + * all copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL + * THE COPYRIGHT HOLDER(S) OR AUTHOR(S) BE LIABLE FOR ANY CLAIM, DAMAGES OR + * OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, + * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR + * OTHER DEALINGS IN THE SOFTWARE. + * + */ + +#ifndef __SDMA_V7_0_H__ +#define __SDMA_V7_0_H__ + +extern const struct amd_ip_funcs sdma_v7_0_ip_funcs; +extern const struct amdgpu_ip_block_version sdma_v7_0_ip_block; + +#endif /* __SDMA_V7_0_H__ */ diff --git a/drivers/gpu/drm/amd/amdgpu/si.c b/drivers/gpu/drm/amd/amdgpu/si.c index 23e4ef4fff7c..85235470e872 100644 --- a/drivers/gpu/drm/amd/amdgpu/si.c +++ b/drivers/gpu/drm/amd/amdgpu/si.c @@ -1409,9 +1409,9 @@ static int si_gpu_pci_config_reset(struct amdgpu_device *adev) return r; } -static bool si_asic_supports_baco(struct amdgpu_device *adev) +static int si_asic_supports_baco(struct amdgpu_device *adev) { - return false; + return 0; } static enum amd_reset_method @@ -2706,6 +2706,8 @@ static const struct amd_ip_funcs si_common_ip_funcs = { .soft_reset = si_common_soft_reset, .set_clockgating_state = si_common_set_clockgating_state, .set_powergating_state = si_common_set_powergating_state, + .dump_ip_state = NULL, + .print_ip_state = NULL, }; static const struct amdgpu_ip_block_version si_common_ip_block = diff --git a/drivers/gpu/drm/amd/amdgpu/si_dma.c b/drivers/gpu/drm/amd/amdgpu/si_dma.c index 9aa0e11ee673..11db5b755832 100644 --- a/drivers/gpu/drm/amd/amdgpu/si_dma.c +++ b/drivers/gpu/drm/amd/amdgpu/si_dma.c @@ -708,6 +708,8 @@ static const struct amd_ip_funcs si_dma_ip_funcs = { .soft_reset = si_dma_soft_reset, .set_clockgating_state = si_dma_set_clockgating_state, .set_powergating_state = si_dma_set_powergating_state, + .dump_ip_state = NULL, + .print_ip_state = NULL, }; static const struct amdgpu_ring_funcs si_dma_ring_funcs = { @@ -761,7 +763,7 @@ static void si_dma_set_irq_funcs(struct amdgpu_device *adev) * @src_offset: src GPU address * @dst_offset: dst GPU address * @byte_count: number of bytes to xfer - * @tmz: is this a secure operation + * @copy_flags: unused * * Copy GPU buffers using the DMA engine (VI). * Used by the amdgpu ttm implementation to move pages if @@ -771,7 +773,7 @@ static void si_dma_emit_copy_buffer(struct amdgpu_ib *ib, uint64_t src_offset, uint64_t dst_offset, uint32_t byte_count, - bool tmz) + uint32_t copy_flags) { ib->ptr[ib->length_dw++] = DMA_PACKET(DMA_PACKET_COPY, 1, 0, 0, byte_count); diff --git a/drivers/gpu/drm/amd/amdgpu/si_ih.c b/drivers/gpu/drm/amd/amdgpu/si_ih.c index cada9f300a7f..5237395e4fab 100644 --- a/drivers/gpu/drm/amd/amdgpu/si_ih.c +++ b/drivers/gpu/drm/amd/amdgpu/si_ih.c @@ -296,6 +296,8 @@ static const struct amd_ip_funcs si_ih_ip_funcs = { .soft_reset = si_ih_soft_reset, .set_clockgating_state = si_ih_set_clockgating_state, .set_powergating_state = si_ih_set_powergating_state, + .dump_ip_state = NULL, + .print_ip_state = NULL, }; static const struct amdgpu_ih_funcs si_ih_funcs = { diff --git a/drivers/gpu/drm/amd/amdgpu/sienna_cichlid.c b/drivers/gpu/drm/amd/amdgpu/sienna_cichlid.c index 93f6772d1b24..481217c32d85 100644 --- a/drivers/gpu/drm/amd/amdgpu/sienna_cichlid.c +++ b/drivers/gpu/drm/amd/amdgpu/sienna_cichlid.c @@ -92,7 +92,7 @@ static int sienna_cichlid_mode2_suspend_ip(struct amdgpu_device *adev) adev->ip_blocks[i].status.hw = false; } - return r; + return 0; } static int diff --git a/drivers/gpu/drm/amd/amdgpu/smu_v13_0_10.c b/drivers/gpu/drm/amd/amdgpu/smu_v13_0_10.c index 04c797d54511..0af648931df5 100644 --- a/drivers/gpu/drm/amd/amdgpu/smu_v13_0_10.c +++ b/drivers/gpu/drm/amd/amdgpu/smu_v13_0_10.c @@ -91,7 +91,7 @@ static int smu_v13_0_10_mode2_suspend_ip(struct amdgpu_device *adev) adev->ip_blocks[i].status.hw = false; } - return r; + return 0; } static int diff --git a/drivers/gpu/drm/amd/amdgpu/smuio_v14_0_2.c b/drivers/gpu/drm/amd/amdgpu/smuio_v14_0_2.c new file mode 100644 index 000000000000..2a51a70d4846 --- /dev/null +++ b/drivers/gpu/drm/amd/amdgpu/smuio_v14_0_2.c @@ -0,0 +1,62 @@ +/* + * Copyright 2023 Advanced Micro Devices, Inc. + * + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and associated documentation files (the "Software"), + * to deal in the Software without restriction, including without limitation + * the rights to use, copy, modify, merge, publish, distribute, sublicense, + * and/or sell copies of the Software, and to permit persons to whom the + * Software is furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included in + * all copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL + * THE COPYRIGHT HOLDER(S) OR AUTHOR(S) BE LIABLE FOR ANY CLAIM, DAMAGES OR + * OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, + * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR + * OTHER DEALINGS IN THE SOFTWARE. + * + */ +#include "amdgpu.h" +#include "smuio_v14_0_2.h" +#include "smuio/smuio_14_0_2_offset.h" +#include "smuio/smuio_14_0_2_sh_mask.h" +#include <linux/preempt.h> + +static u32 smuio_v14_0_2_get_rom_index_offset(struct amdgpu_device *adev) +{ + return SOC15_REG_OFFSET(SMUIO, 0, regROM_INDEX); +} + +static u32 smuio_v14_0_2_get_rom_data_offset(struct amdgpu_device *adev) +{ + return SOC15_REG_OFFSET(SMUIO, 0, regROM_DATA); +} + +static u64 smuio_v14_0_2_get_gpu_clock_counter(struct amdgpu_device *adev) +{ + u64 clock; + u64 clock_counter_lo, clock_counter_hi_pre, clock_counter_hi_after; + + preempt_disable(); + clock_counter_hi_pre = (u64)RREG32_SOC15(SMUIO, 0, regGOLDEN_TSC_COUNT_UPPER); + clock_counter_lo = (u64)RREG32_SOC15(SMUIO, 0, regGOLDEN_TSC_COUNT_LOWER); + /* the clock counter may be udpated during polling the counters */ + clock_counter_hi_after = (u64)RREG32_SOC15(SMUIO, 0, regGOLDEN_TSC_COUNT_UPPER); + if (clock_counter_hi_pre != clock_counter_hi_after) + clock_counter_lo = (u64)RREG32_SOC15(SMUIO, 0, regGOLDEN_TSC_COUNT_LOWER); + preempt_enable(); + + clock = clock_counter_lo | (clock_counter_hi_after << 32ULL); + + return clock; +} + +const struct amdgpu_smuio_funcs smuio_v14_0_2_funcs = { + .get_rom_index_offset = smuio_v14_0_2_get_rom_index_offset, + .get_rom_data_offset = smuio_v14_0_2_get_rom_data_offset, + .get_gpu_clock_counter = smuio_v14_0_2_get_gpu_clock_counter, +}; diff --git a/drivers/gpu/drm/amd/amdgpu/smuio_v14_0_2.h b/drivers/gpu/drm/amd/amdgpu/smuio_v14_0_2.h new file mode 100644 index 000000000000..6e617f832d90 --- /dev/null +++ b/drivers/gpu/drm/amd/amdgpu/smuio_v14_0_2.h @@ -0,0 +1,30 @@ +/* + * Copyright 2023 Advanced Micro Devices, Inc. + * + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and associated documentation files (the "Software"), + * to deal in the Software without restriction, including without limitation + * the rights to use, copy, modify, merge, publish, distribute, sublicense, + * and/or sell copies of the Software, and to permit persons to whom the + * Software is furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included in + * all copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL + * THE COPYRIGHT HOLDER(S) OR AUTHOR(S) BE LIABLE FOR ANY CLAIM, DAMAGES OR + * OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, + * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR + * OTHER DEALINGS IN THE SOFTWARE. + * + */ +#ifndef __SMUIO_V14_0_2_H__ +#define __SMUIO_V14_0_2_H__ + +#include "soc15_common.h" + +extern const struct amdgpu_smuio_funcs smuio_v14_0_2_funcs; + +#endif /* __SMUIO_V14_0_2_H__ */ diff --git a/drivers/gpu/drm/amd/amdgpu/soc15.c b/drivers/gpu/drm/amd/amdgpu/soc15.c index dec81ccf6240..8d16dacdc172 100644 --- a/drivers/gpu/drm/amd/amdgpu/soc15.c +++ b/drivers/gpu/drm/amd/amdgpu/soc15.c @@ -143,7 +143,7 @@ static const struct amdgpu_video_codec_info rn_video_codecs_decode_array[] = {codec_info_build(AMDGPU_INFO_VIDEO_CAPS_CODEC_IDX_MPEG4_AVC, 4096, 4096, 52)}, {codec_info_build(AMDGPU_INFO_VIDEO_CAPS_CODEC_IDX_VC1, 4096, 4096, 4)}, {codec_info_build(AMDGPU_INFO_VIDEO_CAPS_CODEC_IDX_HEVC, 8192, 4352, 186)}, - {codec_info_build(AMDGPU_INFO_VIDEO_CAPS_CODEC_IDX_JPEG, 4096, 4096, 0)}, + {codec_info_build(AMDGPU_INFO_VIDEO_CAPS_CODEC_IDX_JPEG, 16384, 16384, 0)}, {codec_info_build(AMDGPU_INFO_VIDEO_CAPS_CODEC_IDX_VP9, 8192, 4352, 0)}, }; @@ -156,7 +156,7 @@ static const struct amdgpu_video_codecs rn_video_codecs_decode = static const struct amdgpu_video_codec_info vcn_4_0_3_video_codecs_decode_array[] = { {codec_info_build(AMDGPU_INFO_VIDEO_CAPS_CODEC_IDX_MPEG4_AVC, 4096, 4096, 52)}, {codec_info_build(AMDGPU_INFO_VIDEO_CAPS_CODEC_IDX_HEVC, 8192, 4352, 186)}, - {codec_info_build(AMDGPU_INFO_VIDEO_CAPS_CODEC_IDX_JPEG, 4096, 4096, 0)}, + {codec_info_build(AMDGPU_INFO_VIDEO_CAPS_CODEC_IDX_JPEG, 16384, 16384, 0)}, {codec_info_build(AMDGPU_INFO_VIDEO_CAPS_CODEC_IDX_VP9, 8192, 4352, 0)}, {codec_info_build(AMDGPU_INFO_VIDEO_CAPS_CODEC_IDX_AV1, 8192, 4352, 0)}, }; @@ -326,7 +326,8 @@ static u32 soc15_get_xclk(struct amdgpu_device *adev) if (amdgpu_ip_version(adev, MP1_HWIP, 0) == IP_VERSION(12, 0, 0) || amdgpu_ip_version(adev, MP1_HWIP, 0) == IP_VERSION(12, 0, 1) || - amdgpu_ip_version(adev, MP1_HWIP, 0) == IP_VERSION(13, 0, 6)) + amdgpu_ip_version(adev, MP1_HWIP, 0) == IP_VERSION(13, 0, 6) || + amdgpu_ip_version(adev, MP1_HWIP, 0) == IP_VERSION(13, 0, 14)) return 10000; if (amdgpu_ip_version(adev, MP1_HWIP, 0) == IP_VERSION(10, 0, 0) || amdgpu_ip_version(adev, MP1_HWIP, 0) == IP_VERSION(10, 0, 1)) @@ -502,7 +503,7 @@ static int soc15_asic_baco_reset(struct amdgpu_device *adev) static enum amd_reset_method soc15_asic_reset_method(struct amdgpu_device *adev) { - bool baco_reset = false; + int baco_reset = 0; bool connected_to_cpu = false; struct amdgpu_ras *ras = amdgpu_ras_get_context(adev); @@ -540,7 +541,7 @@ soc15_asic_reset_method(struct amdgpu_device *adev) */ if (ras && adev->ras_enabled && adev->pm.fw_version <= 0x283400) - baco_reset = false; + baco_reset = 0; } else { baco_reset = amdgpu_dpm_is_baco_supported(adev); } @@ -554,6 +555,7 @@ soc15_asic_reset_method(struct amdgpu_device *adev) return AMD_RESET_METHOD_MODE2; break; case IP_VERSION(13, 0, 6): + case IP_VERSION(13, 0, 14): /* Use gpu_recovery param to target a reset method. * Enable triggering of GPU reset only if specified * by module parameter. @@ -620,7 +622,7 @@ static int soc15_asic_reset(struct amdgpu_device *adev) } } -static bool soc15_supports_baco(struct amdgpu_device *adev) +static int soc15_supports_baco(struct amdgpu_device *adev) { switch (amdgpu_ip_version(adev, MP1_HWIP, 0)) { case IP_VERSION(9, 0, 0): @@ -628,13 +630,13 @@ static bool soc15_supports_baco(struct amdgpu_device *adev) if (adev->asic_type == CHIP_VEGA20) { if (adev->psp.sos.fw_version >= 0x80067) return amdgpu_dpm_is_baco_supported(adev); - return false; + return 0; } else { return amdgpu_dpm_is_baco_supported(adev); } break; default: - return false; + return 0; } } @@ -929,13 +931,9 @@ static const struct amdgpu_asic_funcs aqua_vanjaram_asic_funcs = static int soc15_common_early_init(void *handle) { -#define MMIO_REG_HOLE_OFFSET (0x80000 - PAGE_SIZE) struct amdgpu_device *adev = (struct amdgpu_device *)handle; - if (!amdgpu_sriov_vf(adev)) { - adev->rmmio_remap.reg_offset = MMIO_REG_HOLE_OFFSET; - adev->rmmio_remap.bus_addr = adev->rmmio_base + MMIO_REG_HOLE_OFFSET; - } + adev->nbio.funcs->set_reg_remap(adev); adev->smc_rreg = NULL; adev->smc_wreg = NULL; adev->pcie_rreg = &amdgpu_device_indirect_rreg; @@ -1172,6 +1170,7 @@ static int soc15_common_early_init(void *handle) adev->external_rev_id = adev->rev_id + 0x3c; break; case IP_VERSION(9, 4, 3): + case IP_VERSION(9, 4, 4): adev->asic_funcs = &aqua_vanjaram_asic_funcs; adev->cg_flags = AMD_CG_SUPPORT_GFX_MGCG | AMD_CG_SUPPORT_GFX_CGCG | @@ -1183,12 +1182,8 @@ static int soc15_common_early_init(void *handle) AMD_PG_SUPPORT_VCN | AMD_PG_SUPPORT_VCN_DPG | AMD_PG_SUPPORT_JPEG; + /*TODO: need a new external_rev_id for GC 9.4.4? */ adev->external_rev_id = adev->rev_id + 0x46; - /* GC 9.4.3 uses MMIO register region hole at a different offset */ - if (!amdgpu_sriov_vf(adev)) { - adev->rmmio_remap.reg_offset = 0x1A000; - adev->rmmio_remap.bus_addr = adev->rmmio_base + 0x1A000; - } break; default: /* FIXME: not supported yet */ @@ -1458,7 +1453,8 @@ static void soc15_common_get_clockgating_state(void *handle, u64 *flags) adev->hdp.funcs->get_clock_gating_state(adev, flags); if ((amdgpu_ip_version(adev, MP0_HWIP, 0) != IP_VERSION(13, 0, 2)) && - (amdgpu_ip_version(adev, MP0_HWIP, 0) != IP_VERSION(13, 0, 6))) { + (amdgpu_ip_version(adev, MP0_HWIP, 0) != IP_VERSION(13, 0, 6)) && + (amdgpu_ip_version(adev, MP0_HWIP, 0) != IP_VERSION(13, 0, 14))) { /* AMD_CG_SUPPORT_DRM_MGCG */ data = RREG32(SOC15_REG_OFFSET(MP0, 0, mmMP0_MISC_CGTT_CTRL0)); if (!(data & 0x01000000)) @@ -1501,4 +1497,6 @@ static const struct amd_ip_funcs soc15_common_ip_funcs = { .set_clockgating_state = soc15_common_set_clockgating_state, .set_powergating_state = soc15_common_set_powergating_state, .get_clockgating_state= soc15_common_get_clockgating_state, + .dump_ip_state = NULL, + .print_ip_state = NULL, }; diff --git a/drivers/gpu/drm/amd/amdgpu/soc15.h b/drivers/gpu/drm/amd/amdgpu/soc15.h index 1444b7765e4b..282584a48be0 100644 --- a/drivers/gpu/drm/amd/amdgpu/soc15.h +++ b/drivers/gpu/drm/amd/amdgpu/soc15.h @@ -88,6 +88,8 @@ struct soc15_ras_field_entry { }; #define SOC15_REG_ENTRY(ip, inst, reg) ip##_HWIP, inst, reg##_BASE_IDX, reg +#define SOC15_REG_ENTRY_STR(ip, inst, reg) \ + { ip##_HWIP, inst, reg##_BASE_IDX, reg, #reg } #define SOC15_REG_ENTRY_OFFSET(entry) (adev->reg_offset[entry.hwip][entry.inst][entry.seg] + entry.reg_offset) diff --git a/drivers/gpu/drm/amd/amdgpu/soc21.c b/drivers/gpu/drm/amd/amdgpu/soc21.c index 581a3bd11481..d30ad7d56def 100644 --- a/drivers/gpu/drm/amd/amdgpu/soc21.c +++ b/drivers/gpu/drm/amd/amdgpu/soc21.c @@ -72,7 +72,7 @@ static const struct amdgpu_video_codecs vcn_4_0_0_video_codecs_encode_vcn1 = { static const struct amdgpu_video_codec_info vcn_4_0_0_video_codecs_decode_array_vcn0[] = { {codec_info_build(AMDGPU_INFO_VIDEO_CAPS_CODEC_IDX_MPEG4_AVC, 4096, 4096, 52)}, {codec_info_build(AMDGPU_INFO_VIDEO_CAPS_CODEC_IDX_HEVC, 8192, 4352, 186)}, - {codec_info_build(AMDGPU_INFO_VIDEO_CAPS_CODEC_IDX_JPEG, 4096, 4096, 0)}, + {codec_info_build(AMDGPU_INFO_VIDEO_CAPS_CODEC_IDX_JPEG, 16384, 16384, 0)}, {codec_info_build(AMDGPU_INFO_VIDEO_CAPS_CODEC_IDX_VP9, 8192, 4352, 0)}, {codec_info_build(AMDGPU_INFO_VIDEO_CAPS_CODEC_IDX_AV1, 8192, 4352, 0)}, }; @@ -80,7 +80,7 @@ static const struct amdgpu_video_codec_info vcn_4_0_0_video_codecs_decode_array_ static const struct amdgpu_video_codec_info vcn_4_0_0_video_codecs_decode_array_vcn1[] = { {codec_info_build(AMDGPU_INFO_VIDEO_CAPS_CODEC_IDX_MPEG4_AVC, 4096, 4096, 52)}, {codec_info_build(AMDGPU_INFO_VIDEO_CAPS_CODEC_IDX_HEVC, 8192, 4352, 186)}, - {codec_info_build(AMDGPU_INFO_VIDEO_CAPS_CODEC_IDX_JPEG, 4096, 4096, 0)}, + {codec_info_build(AMDGPU_INFO_VIDEO_CAPS_CODEC_IDX_JPEG, 16384, 16384, 0)}, {codec_info_build(AMDGPU_INFO_VIDEO_CAPS_CODEC_IDX_VP9, 8192, 4352, 0)}, }; @@ -389,6 +389,7 @@ soc21_asic_reset_method(struct amdgpu_device *adev) case IP_VERSION(13, 0, 11): case IP_VERSION(14, 0, 0): case IP_VERSION(14, 0, 1): + case IP_VERSION(14, 0, 4): return AMD_RESET_METHOD_MODE2; default: if (amdgpu_dpm_is_baco_supported(adev)) @@ -457,10 +458,8 @@ static bool soc21_need_full_reset(struct amdgpu_device *adev) { switch (amdgpu_ip_version(adev, GC_HWIP, 0)) { case IP_VERSION(11, 0, 0): - return amdgpu_ras_is_supported(adev, AMDGPU_RAS_BLOCK__UMC); case IP_VERSION(11, 0, 2): case IP_VERSION(11, 0, 3): - return false; default: return true; } @@ -559,11 +558,9 @@ static const struct amdgpu_asic_funcs soc21_asic_funcs = { static int soc21_common_early_init(void *handle) { -#define MMIO_REG_HOLE_OFFSET (0x80000 - PAGE_SIZE) struct amdgpu_device *adev = (struct amdgpu_device *)handle; - adev->rmmio_remap.reg_offset = MMIO_REG_HOLE_OFFSET; - adev->rmmio_remap.bus_addr = adev->rmmio_base + MMIO_REG_HOLE_OFFSET; + adev->nbio.funcs->set_reg_remap(adev); adev->smc_rreg = NULL; adev->smc_wreg = NULL; adev->pcie_rreg = &amdgpu_device_indirect_rreg; @@ -722,7 +719,10 @@ static int soc21_common_early_init(void *handle) AMD_PG_SUPPORT_VCN | AMD_PG_SUPPORT_JPEG | AMD_PG_SUPPORT_GFX_PG; - adev->external_rev_id = adev->rev_id + 0x1; + if (adev->rev_id == 0) + adev->external_rev_id = 0x1; + else + adev->external_rev_id = adev->rev_id + 0x10; break; case IP_VERSION(11, 5, 1): adev->cg_flags = @@ -753,6 +753,34 @@ static int soc21_common_early_init(void *handle) AMD_PG_SUPPORT_JPEG; adev->external_rev_id = adev->rev_id + 0xc1; break; + case IP_VERSION(11, 5, 2): + adev->cg_flags = AMD_CG_SUPPORT_VCN_MGCG | + AMD_CG_SUPPORT_JPEG_MGCG | + AMD_CG_SUPPORT_GFX_CGCG | + AMD_CG_SUPPORT_GFX_CGLS | + AMD_CG_SUPPORT_GFX_MGCG | + AMD_CG_SUPPORT_GFX_FGCG | + AMD_CG_SUPPORT_REPEATER_FGCG | + AMD_CG_SUPPORT_GFX_PERF_CLK | + AMD_CG_SUPPORT_GFX_3D_CGCG | + AMD_CG_SUPPORT_GFX_3D_CGLS | + AMD_CG_SUPPORT_MC_MGCG | + AMD_CG_SUPPORT_MC_LS | + AMD_CG_SUPPORT_HDP_LS | + AMD_CG_SUPPORT_HDP_DS | + AMD_CG_SUPPORT_HDP_SD | + AMD_CG_SUPPORT_ATHUB_MGCG | + AMD_CG_SUPPORT_ATHUB_LS | + AMD_CG_SUPPORT_IH_CG | + AMD_CG_SUPPORT_BIF_MGCG | + AMD_CG_SUPPORT_BIF_LS; + adev->pg_flags = AMD_PG_SUPPORT_VCN_DPG | + AMD_PG_SUPPORT_VCN | + AMD_PG_SUPPORT_JPEG_DPG | + AMD_PG_SUPPORT_JPEG | + AMD_PG_SUPPORT_GFX_PG; + adev->external_rev_id = adev->rev_id + 0x40; + break; default: /* FIXME: not supported yet */ return -EINVAL; @@ -869,10 +897,35 @@ static int soc21_common_suspend(void *handle) return soc21_common_hw_fini(adev); } +static bool soc21_need_reset_on_resume(struct amdgpu_device *adev) +{ + u32 sol_reg1, sol_reg2; + + /* Will reset for the following suspend abort cases. + * 1) Only reset dGPU side. + * 2) S3 suspend got aborted and TOS is active. + */ + if (!(adev->flags & AMD_IS_APU) && adev->in_s3 && + !adev->suspend_complete) { + sol_reg1 = RREG32_SOC15(MP0, 0, regMP0_SMN_C2PMSG_81); + msleep(100); + sol_reg2 = RREG32_SOC15(MP0, 0, regMP0_SMN_C2PMSG_81); + + return (sol_reg1 != sol_reg2); + } + + return false; +} + static int soc21_common_resume(void *handle) { struct amdgpu_device *adev = (struct amdgpu_device *)handle; + if (soc21_need_reset_on_resume(adev)) { + dev_info(adev->dev, "S3 suspend aborted, resetting..."); + soc21_asic_reset(adev); + } + return soc21_common_hw_init(adev); } @@ -903,6 +956,7 @@ static int soc21_common_set_clockgating_state(void *handle, case IP_VERSION(7, 7, 1): case IP_VERSION(7, 11, 0): case IP_VERSION(7, 11, 1): + case IP_VERSION(7, 11, 3): adev->nbio.funcs->update_medium_grain_clock_gating(adev, state == AMD_CG_STATE_GATE); adev->nbio.funcs->update_medium_grain_light_sleep(adev, @@ -959,4 +1013,6 @@ static const struct amd_ip_funcs soc21_common_ip_funcs = { .set_clockgating_state = soc21_common_set_clockgating_state, .set_powergating_state = soc21_common_set_powergating_state, .get_clockgating_state = soc21_common_get_clockgating_state, + .dump_ip_state = NULL, + .print_ip_state = NULL, }; diff --git a/drivers/gpu/drm/amd/amdgpu/soc24.c b/drivers/gpu/drm/amd/amdgpu/soc24.c new file mode 100644 index 000000000000..7d641d0dadba --- /dev/null +++ b/drivers/gpu/drm/amd/amdgpu/soc24.c @@ -0,0 +1,604 @@ +/* + * Copyright 2023 Advanced Micro Devices, Inc. + * + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and associated documentation files (the "Software"), + * to deal in the Software without restriction, including without limitation + * the rights to use, copy, modify, merge, publish, distribute, sublicense, + * and/or sell copies of the Software, and to permit persons to whom the + * Software is furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included in + * all copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL + * THE COPYRIGHT HOLDER(S) OR AUTHOR(S) BE LIABLE FOR ANY CLAIM, DAMAGES OR + * OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, + * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR + * OTHER DEALINGS IN THE SOFTWARE. + * + */ +#include <linux/firmware.h> +#include <linux/slab.h> +#include <linux/module.h> +#include <linux/pci.h> + +#include "amdgpu.h" +#include "amdgpu_atombios.h" +#include "amdgpu_ih.h" +#include "amdgpu_uvd.h" +#include "amdgpu_vce.h" +#include "amdgpu_ucode.h" +#include "amdgpu_psp.h" +#include "amdgpu_smu.h" +#include "atom.h" +#include "amd_pcie.h" + +#include "gc/gc_12_0_0_offset.h" +#include "gc/gc_12_0_0_sh_mask.h" +#include "mp/mp_14_0_2_offset.h" + +#include "soc15.h" +#include "soc15_common.h" +#include "soc24.h" +#include "mxgpu_nv.h" + +static const struct amd_ip_funcs soc24_common_ip_funcs; + +static const struct amdgpu_video_codec_info vcn_5_0_0_video_codecs_encode_array_vcn0[] = { + {codec_info_build(AMDGPU_INFO_VIDEO_CAPS_CODEC_IDX_MPEG4_AVC, 4096, 2304, 0)}, + {codec_info_build(AMDGPU_INFO_VIDEO_CAPS_CODEC_IDX_HEVC, 8192, 4352, 0)}, + {codec_info_build(AMDGPU_INFO_VIDEO_CAPS_CODEC_IDX_AV1, 8192, 4352, 0)}, +}; + +static const struct amdgpu_video_codecs vcn_5_0_0_video_codecs_encode_vcn0 = { + .codec_count = ARRAY_SIZE(vcn_5_0_0_video_codecs_encode_array_vcn0), + .codec_array = vcn_5_0_0_video_codecs_encode_array_vcn0, +}; + +static const struct amdgpu_video_codec_info vcn_5_0_0_video_codecs_decode_array_vcn0[] = { + {codec_info_build(AMDGPU_INFO_VIDEO_CAPS_CODEC_IDX_MPEG4_AVC, 4096, 4096, 52)}, + {codec_info_build(AMDGPU_INFO_VIDEO_CAPS_CODEC_IDX_HEVC, 8192, 4352, 186)}, + {codec_info_build(AMDGPU_INFO_VIDEO_CAPS_CODEC_IDX_JPEG, 16384, 16384, 0)}, + {codec_info_build(AMDGPU_INFO_VIDEO_CAPS_CODEC_IDX_VP9, 8192, 4352, 0)}, + {codec_info_build(AMDGPU_INFO_VIDEO_CAPS_CODEC_IDX_AV1, 8192, 4352, 0)}, +}; + +static const struct amdgpu_video_codecs vcn_5_0_0_video_codecs_decode_vcn0 = { + .codec_count = ARRAY_SIZE(vcn_5_0_0_video_codecs_decode_array_vcn0), + .codec_array = vcn_5_0_0_video_codecs_decode_array_vcn0, +}; + +static int soc24_query_video_codecs(struct amdgpu_device *adev, bool encode, + const struct amdgpu_video_codecs **codecs) +{ + if (adev->vcn.num_vcn_inst == hweight8(adev->vcn.harvest_config)) + return -EINVAL; + + switch (amdgpu_ip_version(adev, UVD_HWIP, 0)) { + case IP_VERSION(5, 0, 0): + if (encode) + *codecs = &vcn_5_0_0_video_codecs_encode_vcn0; + else + *codecs = &vcn_5_0_0_video_codecs_decode_vcn0; + return 0; + default: + return -EINVAL; + } +} + +static u32 soc24_get_config_memsize(struct amdgpu_device *adev) +{ + return adev->nbio.funcs->get_memsize(adev); +} + +static u32 soc24_get_xclk(struct amdgpu_device *adev) +{ + return adev->clock.spll.reference_freq; +} + +void soc24_grbm_select(struct amdgpu_device *adev, + u32 me, u32 pipe, u32 queue, u32 vmid) +{ + u32 grbm_gfx_cntl = 0; + grbm_gfx_cntl = REG_SET_FIELD(grbm_gfx_cntl, GRBM_GFX_CNTL, PIPEID, pipe); + grbm_gfx_cntl = REG_SET_FIELD(grbm_gfx_cntl, GRBM_GFX_CNTL, MEID, me); + grbm_gfx_cntl = REG_SET_FIELD(grbm_gfx_cntl, GRBM_GFX_CNTL, VMID, vmid); + grbm_gfx_cntl = REG_SET_FIELD(grbm_gfx_cntl, GRBM_GFX_CNTL, QUEUEID, queue); + + WREG32_SOC15(GC, 0, regGRBM_GFX_CNTL, grbm_gfx_cntl); +} + +static struct soc15_allowed_register_entry soc24_allowed_read_registers[] = { + { SOC15_REG_ENTRY(GC, 0, regGRBM_STATUS)}, + { SOC15_REG_ENTRY(GC, 0, regGRBM_STATUS2)}, + { SOC15_REG_ENTRY(GC, 0, regGRBM_STATUS_SE0)}, + { SOC15_REG_ENTRY(GC, 0, regGRBM_STATUS_SE1)}, + { SOC15_REG_ENTRY(GC, 0, regGRBM_STATUS_SE2)}, + { SOC15_REG_ENTRY(GC, 0, regGRBM_STATUS_SE3)}, + { SOC15_REG_ENTRY(SDMA0, 0, regSDMA0_STATUS_REG)}, + { SOC15_REG_ENTRY(SDMA1, 0, regSDMA1_STATUS_REG)}, + { SOC15_REG_ENTRY(GC, 0, regCP_STAT)}, + { SOC15_REG_ENTRY(GC, 0, regCP_STALLED_STAT1)}, + { SOC15_REG_ENTRY(GC, 0, regCP_STALLED_STAT2)}, + { SOC15_REG_ENTRY(GC, 0, regCP_STALLED_STAT3)}, + { SOC15_REG_ENTRY(GC, 0, regCP_CPF_BUSY_STAT)}, + { SOC15_REG_ENTRY(GC, 0, regCP_CPF_STALLED_STAT1)}, + { SOC15_REG_ENTRY(GC, 0, regCP_CPF_STATUS)}, + { SOC15_REG_ENTRY(GC, 0, regCP_CPC_BUSY_STAT)}, + { SOC15_REG_ENTRY(GC, 0, regCP_CPC_STALLED_STAT1)}, + { SOC15_REG_ENTRY(GC, 0, regCP_CPC_STATUS)}, + { SOC15_REG_ENTRY(GC, 0, regGB_ADDR_CONFIG)}, +}; + +static uint32_t soc24_read_indexed_register(struct amdgpu_device *adev, + u32 se_num, + u32 sh_num, + u32 reg_offset) +{ + uint32_t val; + + mutex_lock(&adev->grbm_idx_mutex); + if (se_num != 0xffffffff || sh_num != 0xffffffff) + amdgpu_gfx_select_se_sh(adev, se_num, sh_num, 0xffffffff, 0); + + val = RREG32(reg_offset); + + if (se_num != 0xffffffff || sh_num != 0xffffffff) + amdgpu_gfx_select_se_sh(adev, 0xffffffff, 0xffffffff, 0xffffffff, 0); + mutex_unlock(&adev->grbm_idx_mutex); + return val; +} + +static uint32_t soc24_get_register_value(struct amdgpu_device *adev, + bool indexed, u32 se_num, + u32 sh_num, u32 reg_offset) +{ + if (indexed) { + return soc24_read_indexed_register(adev, se_num, sh_num, reg_offset); + } else { + if (reg_offset == SOC15_REG_OFFSET(GC, 0, regGB_ADDR_CONFIG) && + adev->gfx.config.gb_addr_config) + return adev->gfx.config.gb_addr_config; + return RREG32(reg_offset); + } +} + +static int soc24_read_register(struct amdgpu_device *adev, u32 se_num, + u32 sh_num, u32 reg_offset, u32 *value) +{ + uint32_t i; + struct soc15_allowed_register_entry *en; + + *value = 0; + for (i = 0; i < ARRAY_SIZE(soc24_allowed_read_registers); i++) { + en = &soc24_allowed_read_registers[i]; + if (!adev->reg_offset[en->hwip][en->inst]) + continue; + else if (reg_offset != (adev->reg_offset[en->hwip][en->inst][en->seg] + + en->reg_offset)) + continue; + + *value = soc24_get_register_value(adev, + soc24_allowed_read_registers[i].grbm_indexed, + se_num, sh_num, reg_offset); + return 0; + } + return -EINVAL; +} + +static enum amd_reset_method +soc24_asic_reset_method(struct amdgpu_device *adev) +{ + if (amdgpu_reset_method == AMD_RESET_METHOD_MODE1 || + amdgpu_reset_method == AMD_RESET_METHOD_MODE2 || + amdgpu_reset_method == AMD_RESET_METHOD_BACO) + return amdgpu_reset_method; + + if (amdgpu_reset_method != -1) + dev_warn(adev->dev, + "Specified reset method:%d isn't supported, using AUTO instead.\n", + amdgpu_reset_method); + + switch (amdgpu_ip_version(adev, MP1_HWIP, 0)) { + case IP_VERSION(14, 0, 2): + case IP_VERSION(14, 0, 3): + return AMD_RESET_METHOD_MODE1; + default: + if (amdgpu_dpm_is_baco_supported(adev)) + return AMD_RESET_METHOD_BACO; + else + return AMD_RESET_METHOD_MODE1; + } +} + +static int soc24_asic_reset(struct amdgpu_device *adev) +{ + int ret = 0; + + switch (soc24_asic_reset_method(adev)) { + case AMD_RESET_METHOD_PCI: + dev_info(adev->dev, "PCI reset\n"); + ret = amdgpu_device_pci_reset(adev); + break; + case AMD_RESET_METHOD_BACO: + dev_info(adev->dev, "BACO reset\n"); + ret = amdgpu_dpm_baco_reset(adev); + break; + case AMD_RESET_METHOD_MODE2: + dev_info(adev->dev, "MODE2 reset\n"); + ret = amdgpu_dpm_mode2_reset(adev); + break; + default: + dev_info(adev->dev, "MODE1 reset\n"); + ret = amdgpu_device_mode1_reset(adev); + break; + } + + return ret; +} + +static void soc24_program_aspm(struct amdgpu_device *adev) +{ + if (!amdgpu_device_should_use_aspm(adev)) + return; + + if (!(adev->flags & AMD_IS_APU) && + (adev->nbio.funcs->program_aspm)) + adev->nbio.funcs->program_aspm(adev); +} + +static void soc24_enable_doorbell_aperture(struct amdgpu_device *adev, + bool enable) +{ + adev->nbio.funcs->enable_doorbell_aperture(adev, enable); + adev->nbio.funcs->enable_doorbell_selfring_aperture(adev, enable); +} + +const struct amdgpu_ip_block_version soc24_common_ip_block = { + .type = AMD_IP_BLOCK_TYPE_COMMON, + .major = 1, + .minor = 0, + .rev = 0, + .funcs = &soc24_common_ip_funcs, +}; + +static bool soc24_need_full_reset(struct amdgpu_device *adev) +{ + switch (amdgpu_ip_version(adev, GC_HWIP, 0)) { + case IP_VERSION(12, 0, 0): + case IP_VERSION(12, 0, 1): + default: + return true; + } +} + +static bool soc24_need_reset_on_init(struct amdgpu_device *adev) +{ + u32 sol_reg; + + if (adev->flags & AMD_IS_APU) + return false; + + /* Check sOS sign of life register to confirm sys driver and sOS + * are already been loaded. + */ + sol_reg = RREG32_SOC15(MP0, 0, regMPASP_SMN_C2PMSG_81); + if (sol_reg) + return true; + + return false; +} + +static uint64_t soc24_get_pcie_replay_count(struct amdgpu_device *adev) +{ + /* TODO + * dummy implement for pcie_replay_count sysfs interface + * */ + return 0; +} + +static void soc24_init_doorbell_index(struct amdgpu_device *adev) +{ + adev->doorbell_index.kiq = AMDGPU_NAVI10_DOORBELL_KIQ; + adev->doorbell_index.mec_ring0 = AMDGPU_NAVI10_DOORBELL_MEC_RING0; + adev->doorbell_index.mec_ring1 = AMDGPU_NAVI10_DOORBELL_MEC_RING1; + adev->doorbell_index.mec_ring2 = AMDGPU_NAVI10_DOORBELL_MEC_RING2; + adev->doorbell_index.mec_ring3 = AMDGPU_NAVI10_DOORBELL_MEC_RING3; + adev->doorbell_index.mec_ring4 = AMDGPU_NAVI10_DOORBELL_MEC_RING4; + adev->doorbell_index.mec_ring5 = AMDGPU_NAVI10_DOORBELL_MEC_RING5; + adev->doorbell_index.mec_ring6 = AMDGPU_NAVI10_DOORBELL_MEC_RING6; + adev->doorbell_index.mec_ring7 = AMDGPU_NAVI10_DOORBELL_MEC_RING7; + adev->doorbell_index.userqueue_start = AMDGPU_NAVI10_DOORBELL_USERQUEUE_START; + adev->doorbell_index.userqueue_end = AMDGPU_NAVI10_DOORBELL_USERQUEUE_END; + adev->doorbell_index.gfx_ring0 = AMDGPU_NAVI10_DOORBELL_GFX_RING0; + adev->doorbell_index.gfx_ring1 = AMDGPU_NAVI10_DOORBELL_GFX_RING1; + adev->doorbell_index.gfx_userqueue_start = + AMDGPU_NAVI10_DOORBELL_GFX_USERQUEUE_START; + adev->doorbell_index.gfx_userqueue_end = + AMDGPU_NAVI10_DOORBELL_GFX_USERQUEUE_END; + adev->doorbell_index.mes_ring0 = AMDGPU_NAVI10_DOORBELL_MES_RING0; + adev->doorbell_index.mes_ring1 = AMDGPU_NAVI10_DOORBELL_MES_RING1; + adev->doorbell_index.sdma_engine[0] = AMDGPU_NAVI10_DOORBELL_sDMA_ENGINE0; + adev->doorbell_index.sdma_engine[1] = AMDGPU_NAVI10_DOORBELL_sDMA_ENGINE1; + adev->doorbell_index.ih = AMDGPU_NAVI10_DOORBELL_IH; + adev->doorbell_index.vcn.vcn_ring0_1 = AMDGPU_NAVI10_DOORBELL64_VCN0_1; + adev->doorbell_index.vcn.vcn_ring2_3 = AMDGPU_NAVI10_DOORBELL64_VCN2_3; + adev->doorbell_index.vcn.vcn_ring4_5 = AMDGPU_NAVI10_DOORBELL64_VCN4_5; + adev->doorbell_index.vcn.vcn_ring6_7 = AMDGPU_NAVI10_DOORBELL64_VCN6_7; + adev->doorbell_index.first_non_cp = AMDGPU_NAVI10_DOORBELL64_FIRST_NON_CP; + adev->doorbell_index.last_non_cp = AMDGPU_NAVI10_DOORBELL64_LAST_NON_CP; + + adev->doorbell_index.max_assignment = AMDGPU_NAVI10_DOORBELL_MAX_ASSIGNMENT << 1; + adev->doorbell_index.sdma_doorbell_range = 20; +} + +static void soc24_pre_asic_init(struct amdgpu_device *adev) +{ +} + +static int soc24_update_umd_stable_pstate(struct amdgpu_device *adev, + bool enter) +{ + if (enter) + amdgpu_gfx_rlc_enter_safe_mode(adev, 0); + else + amdgpu_gfx_rlc_exit_safe_mode(adev, 0); + + if (adev->gfx.funcs->update_perfmon_mgcg) + adev->gfx.funcs->update_perfmon_mgcg(adev, !enter); + + return 0; +} + +static const struct amdgpu_asic_funcs soc24_asic_funcs = { + .read_bios_from_rom = &amdgpu_soc15_read_bios_from_rom, + .read_register = &soc24_read_register, + .reset = &soc24_asic_reset, + .reset_method = &soc24_asic_reset_method, + .get_xclk = &soc24_get_xclk, + .get_config_memsize = &soc24_get_config_memsize, + .init_doorbell_index = &soc24_init_doorbell_index, + .need_full_reset = &soc24_need_full_reset, + .need_reset_on_init = &soc24_need_reset_on_init, + .get_pcie_replay_count = &soc24_get_pcie_replay_count, + .supports_baco = &amdgpu_dpm_is_baco_supported, + .pre_asic_init = &soc24_pre_asic_init, + .query_video_codecs = &soc24_query_video_codecs, + .update_umd_stable_pstate = &soc24_update_umd_stable_pstate, +}; + +static int soc24_common_early_init(void *handle) +{ + struct amdgpu_device *adev = (struct amdgpu_device *)handle; + + adev->nbio.funcs->set_reg_remap(adev); + adev->smc_rreg = NULL; + adev->smc_wreg = NULL; + adev->pcie_rreg = &amdgpu_device_indirect_rreg; + adev->pcie_wreg = &amdgpu_device_indirect_wreg; + adev->pcie_rreg64 = &amdgpu_device_indirect_rreg64; + adev->pcie_wreg64 = &amdgpu_device_indirect_wreg64; + adev->pciep_rreg = amdgpu_device_pcie_port_rreg; + adev->pciep_wreg = amdgpu_device_pcie_port_wreg; + adev->uvd_ctx_rreg = NULL; + adev->uvd_ctx_wreg = NULL; + adev->didt_rreg = NULL; + adev->didt_wreg = NULL; + + adev->asic_funcs = &soc24_asic_funcs; + + adev->rev_id = amdgpu_device_get_rev_id(adev); + adev->external_rev_id = 0xff; + + switch (amdgpu_ip_version(adev, GC_HWIP, 0)) { + case IP_VERSION(12, 0, 0): + adev->cg_flags = AMD_CG_SUPPORT_GFX_CGCG | + AMD_CG_SUPPORT_GFX_CGLS | + AMD_CG_SUPPORT_GFX_MGCG | + AMD_CG_SUPPORT_GFX_3D_CGCG | + AMD_CG_SUPPORT_GFX_3D_CGLS | + AMD_CG_SUPPORT_REPEATER_FGCG | + AMD_CG_SUPPORT_GFX_FGCG | + AMD_CG_SUPPORT_GFX_PERF_CLK | + AMD_CG_SUPPORT_ATHUB_MGCG | + AMD_CG_SUPPORT_ATHUB_LS | + AMD_CG_SUPPORT_MC_MGCG | + AMD_CG_SUPPORT_MC_LS; + adev->pg_flags = AMD_PG_SUPPORT_VCN | + AMD_PG_SUPPORT_JPEG | + AMD_PG_SUPPORT_VCN_DPG; + adev->external_rev_id = adev->rev_id + 0x40; + break; + case IP_VERSION(12, 0, 1): + adev->cg_flags = AMD_CG_SUPPORT_GFX_CGCG | + AMD_CG_SUPPORT_GFX_CGLS | + AMD_CG_SUPPORT_GFX_MGCG | + AMD_CG_SUPPORT_GFX_3D_CGCG | + AMD_CG_SUPPORT_GFX_3D_CGLS | + AMD_CG_SUPPORT_REPEATER_FGCG | + AMD_CG_SUPPORT_GFX_FGCG | + AMD_CG_SUPPORT_GFX_PERF_CLK | + AMD_CG_SUPPORT_ATHUB_MGCG | + AMD_CG_SUPPORT_ATHUB_LS | + AMD_CG_SUPPORT_MC_MGCG | + AMD_CG_SUPPORT_MC_LS; + + adev->pg_flags = AMD_PG_SUPPORT_VCN | + AMD_PG_SUPPORT_JPEG | + AMD_PG_SUPPORT_JPEG_DPG | + AMD_PG_SUPPORT_VCN_DPG; + adev->external_rev_id = adev->rev_id + 0x50; + break; + default: + /* FIXME: not supported yet */ + return -EINVAL; + } + + if (amdgpu_sriov_vf(adev)) { + amdgpu_virt_init_setting(adev); + xgpu_nv_mailbox_set_irq_funcs(adev); + } + + return 0; +} + +static int soc24_common_late_init(void *handle) +{ + struct amdgpu_device *adev = (struct amdgpu_device *)handle; + + if (amdgpu_sriov_vf(adev)) + xgpu_nv_mailbox_get_irq(adev); + + return 0; +} + +static int soc24_common_sw_init(void *handle) +{ + struct amdgpu_device *adev = (struct amdgpu_device *)handle; + + if (amdgpu_sriov_vf(adev)) + xgpu_nv_mailbox_add_irq_id(adev); + + return 0; +} + +static int soc24_common_sw_fini(void *handle) +{ + return 0; +} + +static int soc24_common_hw_init(void *handle) +{ + struct amdgpu_device *adev = (struct amdgpu_device *)handle; + + /* enable aspm */ + soc24_program_aspm(adev); + /* setup nbio registers */ + adev->nbio.funcs->init_registers(adev); + /* remap HDP registers to a hole in mmio space, + * for the purpose of expose those registers + * to process space + */ + if (adev->nbio.funcs->remap_hdp_registers) + adev->nbio.funcs->remap_hdp_registers(adev); + + if (adev->df.funcs->hw_init) + adev->df.funcs->hw_init(adev); + + /* enable the doorbell aperture */ + soc24_enable_doorbell_aperture(adev, true); + + return 0; +} + +static int soc24_common_hw_fini(void *handle) +{ + struct amdgpu_device *adev = (struct amdgpu_device *)handle; + + /* disable the doorbell aperture */ + soc24_enable_doorbell_aperture(adev, false); + + if (amdgpu_sriov_vf(adev)) + xgpu_nv_mailbox_put_irq(adev); + + return 0; +} + +static int soc24_common_suspend(void *handle) +{ + struct amdgpu_device *adev = (struct amdgpu_device *)handle; + + return soc24_common_hw_fini(adev); +} + +static int soc24_common_resume(void *handle) +{ + struct amdgpu_device *adev = (struct amdgpu_device *)handle; + + return soc24_common_hw_init(adev); +} + +static bool soc24_common_is_idle(void *handle) +{ + return true; +} + +static int soc24_common_wait_for_idle(void *handle) +{ + return 0; +} + +static int soc24_common_soft_reset(void *handle) +{ + return 0; +} + +static int soc24_common_set_clockgating_state(void *handle, + enum amd_clockgating_state state) +{ + struct amdgpu_device *adev = (struct amdgpu_device *)handle; + + switch (amdgpu_ip_version(adev, NBIO_HWIP, 0)) { + case IP_VERSION(6, 3, 1): + adev->nbio.funcs->update_medium_grain_clock_gating(adev, + state == AMD_CG_STATE_GATE); + adev->nbio.funcs->update_medium_grain_light_sleep(adev, + state == AMD_CG_STATE_GATE); + adev->hdp.funcs->update_clock_gating(adev, + state == AMD_CG_STATE_GATE); + break; + default: + break; + } + return 0; +} + +static int soc24_common_set_powergating_state(void *handle, + enum amd_powergating_state state) +{ + struct amdgpu_device *adev = (struct amdgpu_device *)handle; + + switch (amdgpu_ip_version(adev, LSDMA_HWIP, 0)) { + case IP_VERSION(7, 0, 0): + case IP_VERSION(7, 0, 1): + adev->lsdma.funcs->update_memory_power_gating(adev, + state == AMD_PG_STATE_GATE); + break; + default: + break; + } + + return 0; +} + +static void soc24_common_get_clockgating_state(void *handle, u64 *flags) +{ + struct amdgpu_device *adev = (struct amdgpu_device *)handle; + + adev->nbio.funcs->get_clockgating_state(adev, flags); + + adev->hdp.funcs->get_clock_gating_state(adev, flags); + + return; +} + +static const struct amd_ip_funcs soc24_common_ip_funcs = { + .name = "soc24_common", + .early_init = soc24_common_early_init, + .late_init = soc24_common_late_init, + .sw_init = soc24_common_sw_init, + .sw_fini = soc24_common_sw_fini, + .hw_init = soc24_common_hw_init, + .hw_fini = soc24_common_hw_fini, + .suspend = soc24_common_suspend, + .resume = soc24_common_resume, + .is_idle = soc24_common_is_idle, + .wait_for_idle = soc24_common_wait_for_idle, + .soft_reset = soc24_common_soft_reset, + .set_clockgating_state = soc24_common_set_clockgating_state, + .set_powergating_state = soc24_common_set_powergating_state, + .get_clockgating_state = soc24_common_get_clockgating_state, +}; diff --git a/drivers/gpu/drm/amd/amdgpu/soc24.h b/drivers/gpu/drm/amd/amdgpu/soc24.h new file mode 100644 index 000000000000..fa7e442e0b62 --- /dev/null +++ b/drivers/gpu/drm/amd/amdgpu/soc24.h @@ -0,0 +1,30 @@ +/* + * Copyright 2023 Advanced Micro Devices, Inc. + * + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and associated documentation files (the "Software"), + * to deal in the Software without restriction, including without limitation + * the rights to use, copy, modify, merge, publish, distribute, sublicense, + * and/or sell copies of the Software, and to permit persons to whom the + * Software is furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included in + * all copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL + * THE COPYRIGHT HOLDER(S) OR AUTHOR(S) BE LIABLE FOR ANY CLAIM, DAMAGES OR + * OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, + * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR + * OTHER DEALINGS IN THE SOFTWARE. + * + */ +#ifndef __SOC24_H__ +#define __SOC24_H__ + +extern const struct amdgpu_ip_block_version soc24_common_ip_block; + +void soc24_grbm_select(struct amdgpu_device *adev, + u32 me, u32 pipe, u32 queue, u32 vmid); +#endif diff --git a/drivers/gpu/drm/amd/amdgpu/ta_ras_if.h b/drivers/gpu/drm/amd/amdgpu/ta_ras_if.h index 056d4df8fa1f..3ac56a9645eb 100644 --- a/drivers/gpu/drm/amd/amdgpu/ta_ras_if.h +++ b/drivers/gpu/drm/amd/amdgpu/ta_ras_if.h @@ -146,6 +146,7 @@ struct ta_ras_mca_addr { uint32_t ch_inst; uint32_t umc_inst; uint32_t node_inst; + uint32_t socket_id; }; struct ta_ras_phy_addr { diff --git a/drivers/gpu/drm/amd/amdgpu/tonga_ih.c b/drivers/gpu/drm/amd/amdgpu/tonga_ih.c index 450b6e831509..24d49d813607 100644 --- a/drivers/gpu/drm/amd/amdgpu/tonga_ih.c +++ b/drivers/gpu/drm/amd/amdgpu/tonga_ih.c @@ -486,6 +486,8 @@ static const struct amd_ip_funcs tonga_ih_ip_funcs = { .post_soft_reset = tonga_ih_post_soft_reset, .set_clockgating_state = tonga_ih_set_clockgating_state, .set_powergating_state = tonga_ih_set_powergating_state, + .dump_ip_state = NULL, + .print_ip_state = NULL, }; static const struct amdgpu_ih_funcs tonga_ih_funcs = { diff --git a/drivers/gpu/drm/amd/amdgpu/umc_v12_0.c b/drivers/gpu/drm/amd/amdgpu/umc_v12_0.c index 77af4e25ff46..9dbb13adb661 100644 --- a/drivers/gpu/drm/amd/amdgpu/umc_v12_0.c +++ b/drivers/gpu/drm/amd/amdgpu/umc_v12_0.c @@ -28,27 +28,8 @@ #include "umc/umc_12_0_0_sh_mask.h" #include "mp/mp_13_0_6_sh_mask.h" -const uint32_t - umc_v12_0_channel_idx_tbl[] - [UMC_V12_0_UMC_INSTANCE_NUM] - [UMC_V12_0_CHANNEL_INSTANCE_NUM] = { - {{3, 7, 11, 15, 2, 6, 10, 14}, {1, 5, 9, 13, 0, 4, 8, 12}, - {19, 23, 27, 31, 18, 22, 26, 30}, {17, 21, 25, 29, 16, 20, 24, 28}}, - {{47, 43, 39, 35, 46, 42, 38, 34}, {45, 41, 37, 33, 44, 40, 36, 32}, - {63, 59, 55, 51, 62, 58, 54, 50}, {61, 57, 53, 49, 60, 56, 52, 48}}, - {{79, 75, 71, 67, 78, 74, 70, 66}, {77, 73, 69, 65, 76, 72, 68, 64}, - {95, 91, 87, 83, 94, 90, 86, 82}, {93, 89, 85, 81, 92, 88, 84, 80}}, - {{99, 103, 107, 111, 98, 102, 106, 110}, {97, 101, 105, 109, 96, 100, 104, 108}, - {115, 119, 123, 127, 114, 118, 122, 126}, {113, 117, 121, 125, 112, 116, 120, 124}} - }; - -/* mapping of MCA error address to normalized address */ -static const uint32_t umc_v12_0_ma2na_mapping[] = { - 0, 5, 6, 8, 9, 14, 12, 13, - 10, 11, 15, 16, 17, 18, 19, 20, - 21, 22, 23, 24, 25, 26, 27, 28, - 24, 7, 29, 30, -}; +#define MAX_ECC_NUM_PER_RETIREMENT 32 +#define DELAYED_TIME_FOR_GPU_RESET 1000 //ms static inline uint64_t get_umc_v12_0_reg_offset(struct amdgpu_device *adev, uint32_t node_inst, @@ -91,7 +72,7 @@ static void umc_v12_0_reset_error_count(struct amdgpu_device *adev) bool umc_v12_0_is_deferred_error(struct amdgpu_device *adev, uint64_t mc_umc_status) { - dev_info(adev->dev, + dev_dbg(adev->dev, "MCA_UMC_STATUS(0x%llx): Val:%llu, Poison:%llu, Deferred:%llu, PCC:%llu, UC:%llu, TCC:%llu\n", mc_umc_status, REG_GET_FIELD(mc_umc_status, MCA_UMC_UMC0_MCUMC_STATUST0, Val), @@ -192,99 +173,74 @@ static void umc_v12_0_query_ras_error_count(struct amdgpu_device *adev, umc_v12_0_reset_error_count(adev); } -static bool umc_v12_0_bit_wise_xor(uint32_t val) +static void umc_v12_0_convert_error_address(struct amdgpu_device *adev, + struct ras_err_data *err_data, + struct ta_ras_query_address_input *addr_in) { - bool result = 0; - int i; + uint32_t col, row, row_xor, bank, channel_index; + uint64_t soc_pa, retired_page, column, err_addr; + struct ta_ras_query_address_output addr_out; - for (i = 0; i < 32; i++) - result = result ^ ((val >> i) & 0x1); + err_addr = addr_in->ma.err_addr; + addr_in->addr_type = TA_RAS_MCA_TO_PA; + if (psp_ras_query_address(&adev->psp, addr_in, &addr_out)) { + dev_warn(adev->dev, "Failed to query RAS physical address for 0x%llx", + err_addr); - return result; -} + return; + } + + soc_pa = addr_out.pa.pa; + bank = addr_out.pa.bank; + channel_index = addr_out.pa.channel_idx; -static void umc_v12_0_mca_addr_to_pa(struct amdgpu_device *adev, - uint64_t err_addr, uint32_t ch_inst, uint32_t umc_inst, - uint32_t node_inst, - struct ta_ras_query_address_output *addr_out) -{ - uint32_t channel_index, i; - uint64_t na, soc_pa; - uint32_t bank_hash0, bank_hash1, bank_hash2, bank_hash3, col, row; - uint32_t bank0, bank1, bank2, bank3, bank; - - bank_hash0 = (err_addr >> UMC_V12_0_MCA_B0_BIT) & 0x1ULL; - bank_hash1 = (err_addr >> UMC_V12_0_MCA_B1_BIT) & 0x1ULL; - bank_hash2 = (err_addr >> UMC_V12_0_MCA_B2_BIT) & 0x1ULL; - bank_hash3 = (err_addr >> UMC_V12_0_MCA_B3_BIT) & 0x1ULL; col = (err_addr >> 1) & 0x1fULL; row = (err_addr >> 10) & 0x3fffULL; + row_xor = row ^ (0x1ULL << 13); + /* clear [C3 C2] in soc physical address */ + soc_pa &= ~(0x3ULL << UMC_V12_0_PA_C2_BIT); + /* clear [C4] in soc physical address */ + soc_pa &= ~(0x1ULL << UMC_V12_0_PA_C4_BIT); - /* apply bank hash algorithm */ - bank0 = - bank_hash0 ^ (UMC_V12_0_XOR_EN0 & - (umc_v12_0_bit_wise_xor(col & UMC_V12_0_COL_XOR0) ^ - (umc_v12_0_bit_wise_xor(row & UMC_V12_0_ROW_XOR0)))); - bank1 = - bank_hash1 ^ (UMC_V12_0_XOR_EN1 & - (umc_v12_0_bit_wise_xor(col & UMC_V12_0_COL_XOR1) ^ - (umc_v12_0_bit_wise_xor(row & UMC_V12_0_ROW_XOR1)))); - bank2 = - bank_hash2 ^ (UMC_V12_0_XOR_EN2 & - (umc_v12_0_bit_wise_xor(col & UMC_V12_0_COL_XOR2) ^ - (umc_v12_0_bit_wise_xor(row & UMC_V12_0_ROW_XOR2)))); - bank3 = - bank_hash3 ^ (UMC_V12_0_XOR_EN3 & - (umc_v12_0_bit_wise_xor(col & UMC_V12_0_COL_XOR3) ^ - (umc_v12_0_bit_wise_xor(row & UMC_V12_0_ROW_XOR3)))); - - bank = bank0 | (bank1 << 1) | (bank2 << 2) | (bank3 << 3); - err_addr &= ~0x3c0ULL; - err_addr |= (bank << UMC_V12_0_MCA_B0_BIT); - - na = 0x0; - /* convert mca error address to normalized address */ - for (i = 1; i < ARRAY_SIZE(umc_v12_0_ma2na_mapping); i++) - na |= ((err_addr >> i) & 0x1ULL) << umc_v12_0_ma2na_mapping[i]; - - channel_index = - adev->umc.channel_idx_tbl[node_inst * adev->umc.umc_inst_num * - adev->umc.channel_inst_num + - umc_inst * adev->umc.channel_inst_num + - ch_inst]; - /* translate umc channel address to soc pa, 3 parts are included */ - soc_pa = ADDR_OF_32KB_BLOCK(na) | - ADDR_OF_256B_BLOCK(channel_index) | - OFFSET_IN_256B_BLOCK(na); - - /* the umc channel bits are not original values, they are hashed */ - UMC_V12_0_SET_CHANNEL_HASH(channel_index, soc_pa); - - addr_out->pa.pa = soc_pa; - addr_out->pa.bank = bank; - addr_out->pa.channel_idx = channel_index; + /* loop for all possibilities of [C4 C3 C2] */ + for (column = 0; column < UMC_V12_0_NA_MAP_PA_NUM; column++) { + retired_page = soc_pa | ((column & 0x3) << UMC_V12_0_PA_C2_BIT); + retired_page |= (((column & 0x4) >> 2) << UMC_V12_0_PA_C4_BIT); + /* include column bit 0 and 1 */ + col &= 0x3; + col |= (column << 2); + dev_info(adev->dev, + "Error Address(PA):0x%-10llx Row:0x%-4x Col:0x%-2x Bank:0x%x Channel:0x%x\n", + retired_page, row, col, bank, channel_index); + amdgpu_umc_fill_error_record(err_data, err_addr, + retired_page, channel_index, addr_in->ma.umc_inst); + + /* shift R13 bit */ + retired_page ^= (0x1ULL << UMC_V12_0_PA_R13_BIT); + dev_info(adev->dev, + "Error Address(PA):0x%-10llx Row:0x%-4x Col:0x%-2x Bank:0x%x Channel:0x%x\n", + retired_page, row_xor, col, bank, channel_index); + amdgpu_umc_fill_error_record(err_data, err_addr, + retired_page, channel_index, addr_in->ma.umc_inst); + } } -static void umc_v12_0_convert_error_address(struct amdgpu_device *adev, - struct ras_err_data *err_data, uint64_t err_addr, - uint32_t ch_inst, uint32_t umc_inst, - uint32_t node_inst) +static int umc_v12_0_convert_err_addr(struct amdgpu_device *adev, + struct ta_ras_query_address_input *addr_in, + uint64_t *pfns, int len) { uint32_t col, row, row_xor, bank, channel_index; - uint64_t soc_pa, retired_page, column; - struct ta_ras_query_address_input addr_in; + uint64_t soc_pa, retired_page, column, err_addr; struct ta_ras_query_address_output addr_out; + uint32_t pos = 0; - addr_in.addr_type = TA_RAS_MCA_TO_PA; - addr_in.ma.err_addr = err_addr; - addr_in.ma.ch_inst = ch_inst; - addr_in.ma.umc_inst = umc_inst; - addr_in.ma.node_inst = node_inst; - - if (psp_ras_query_address(&adev->psp, &addr_in, &addr_out)) - /* fallback to old path if fail to get pa from psp */ - umc_v12_0_mca_addr_to_pa(adev, err_addr, ch_inst, umc_inst, - node_inst, &addr_out); + err_addr = addr_in->ma.err_addr; + addr_in->addr_type = TA_RAS_MCA_TO_PA; + if (psp_ras_query_address(&adev->psp, addr_in, &addr_out)) { + dev_warn(adev->dev, "Failed to query RAS physical address for 0x%llx", + err_addr); + return 0; + } soc_pa = addr_out.pa.pa; bank = addr_out.pa.bank; @@ -302,33 +258,42 @@ static void umc_v12_0_convert_error_address(struct amdgpu_device *adev, for (column = 0; column < UMC_V12_0_NA_MAP_PA_NUM; column++) { retired_page = soc_pa | ((column & 0x3) << UMC_V12_0_PA_C2_BIT); retired_page |= (((column & 0x4) >> 2) << UMC_V12_0_PA_C4_BIT); + + if (pos >= len) + return 0; + pfns[pos++] = retired_page >> AMDGPU_GPU_PAGE_SHIFT; + /* include column bit 0 and 1 */ col &= 0x3; col |= (column << 2); dev_info(adev->dev, "Error Address(PA):0x%-10llx Row:0x%-4x Col:0x%-2x Bank:0x%x Channel:0x%x\n", retired_page, row, col, bank, channel_index); - amdgpu_umc_fill_error_record(err_data, err_addr, - retired_page, channel_index, umc_inst); /* shift R13 bit */ retired_page ^= (0x1ULL << UMC_V12_0_PA_R13_BIT); + + if (pos >= len) + return 0; + pfns[pos++] = retired_page >> AMDGPU_GPU_PAGE_SHIFT; + dev_info(adev->dev, "Error Address(PA):0x%-10llx Row:0x%-4x Col:0x%-2x Bank:0x%x Channel:0x%x\n", retired_page, row_xor, col, bank, channel_index); - amdgpu_umc_fill_error_record(err_data, err_addr, - retired_page, channel_index, umc_inst); } + + return pos; } static int umc_v12_0_query_error_address(struct amdgpu_device *adev, uint32_t node_inst, uint32_t umc_inst, uint32_t ch_inst, void *data) { + struct ras_err_data *err_data = (struct ras_err_data *)data; + struct ta_ras_query_address_input addr_in; uint64_t mc_umc_status_addr; uint64_t mc_umc_status, err_addr; uint64_t mc_umc_addrt0; - struct ras_err_data *err_data = (struct ras_err_data *)data; uint64_t umc_reg_offset = get_umc_v12_0_reg_offset(adev, node_inst, umc_inst, ch_inst); @@ -357,8 +322,19 @@ static int umc_v12_0_query_error_address(struct amdgpu_device *adev, err_addr = REG_GET_FIELD(err_addr, MCA_UMC_UMC0_MCUMC_ADDRT0, ErrorAddr); - umc_v12_0_convert_error_address(adev, err_data, err_addr, - ch_inst, umc_inst, node_inst); + if (!adev->aid_mask && + adev->smuio.funcs && + adev->smuio.funcs->get_socket_id) + addr_in.ma.socket_id = adev->smuio.funcs->get_socket_id(adev); + else + addr_in.ma.socket_id = 0; + + addr_in.ma.err_addr = err_addr; + addr_in.ma.ch_inst = ch_inst; + addr_in.ma.umc_inst = umc_inst; + addr_in.ma.node_inst = node_inst; + + umc_v12_0_convert_error_address(adev, err_data, &addr_in); } /* clear umc status */ @@ -401,64 +377,6 @@ static int umc_v12_0_err_cnt_init_per_channel(struct amdgpu_device *adev, return 0; } -static void umc_v12_0_ecc_info_query_ras_error_count(struct amdgpu_device *adev, - void *ras_error_status) -{ - amdgpu_mca_smu_log_ras_error(adev, - AMDGPU_RAS_BLOCK__UMC, AMDGPU_MCA_ERROR_TYPE_CE, ras_error_status); - amdgpu_mca_smu_log_ras_error(adev, - AMDGPU_RAS_BLOCK__UMC, AMDGPU_MCA_ERROR_TYPE_UE, ras_error_status); -} - -static void umc_v12_0_ecc_info_query_ras_error_address(struct amdgpu_device *adev, - void *ras_error_status) -{ - struct ras_err_node *err_node; - uint64_t mc_umc_status; - struct ras_err_info *err_info; - struct ras_err_addr *mca_err_addr, *tmp; - struct ras_err_data *err_data = (struct ras_err_data *)ras_error_status; - - for_each_ras_error(err_node, err_data) { - err_info = &err_node->err_info; - if (list_empty(&err_info->err_addr_list)) - continue; - - list_for_each_entry_safe(mca_err_addr, tmp, &err_info->err_addr_list, node) { - mc_umc_status = mca_err_addr->err_status; - if (mc_umc_status && - (umc_v12_0_is_uncorrectable_error(adev, mc_umc_status) || - umc_v12_0_is_deferred_error(adev, mc_umc_status))) { - uint64_t mca_addr, err_addr, mca_ipid; - uint32_t InstanceIdLo; - - mca_addr = mca_err_addr->err_addr; - mca_ipid = mca_err_addr->err_ipid; - - err_addr = REG_GET_FIELD(mca_addr, - MCA_UMC_UMC0_MCUMC_ADDRT0, ErrorAddr); - InstanceIdLo = REG_GET_FIELD(mca_ipid, MCMP1_IPIDT0, InstanceIdLo); - - dev_info(adev->dev, "UMC:IPID:0x%llx, aid:%d, inst:%d, ch:%d, err_addr:0x%llx\n", - mca_ipid, - err_info->mcm_info.die_id, - MCA_IPID_LO_2_UMC_INST(InstanceIdLo), - MCA_IPID_LO_2_UMC_CH(InstanceIdLo), - err_addr); - - umc_v12_0_convert_error_address(adev, - err_data, err_addr, - MCA_IPID_LO_2_UMC_CH(InstanceIdLo), - MCA_IPID_LO_2_UMC_INST(InstanceIdLo), - err_info->mcm_info.die_id); - } - - /* Delete error address node from list and free memory */ - amdgpu_ras_del_mca_err_addr(err_info, mca_err_addr); - } - } -} - static bool umc_v12_0_check_ecc_err_status(struct amdgpu_device *adev, enum amdgpu_mca_error_type type, void *ras_error_status) { @@ -498,43 +416,49 @@ const struct amdgpu_ras_block_hw_ops umc_v12_0_ras_hw_ops = { .query_ras_error_address = umc_v12_0_query_ras_error_address, }; -static int umc_v12_0_aca_bank_generate_report(struct aca_handle *handle, struct aca_bank *bank, enum aca_error_type type, - struct aca_bank_report *report, void *data) +static int umc_v12_0_aca_bank_parser(struct aca_handle *handle, struct aca_bank *bank, + enum aca_smu_type type, void *data) { struct amdgpu_device *adev = handle->adev; - u64 status; + struct aca_bank_info info; + enum aca_error_type err_type; + u64 status, count; + u32 ext_error_code; int ret; - ret = aca_bank_info_decode(bank, &report->info); + status = bank->regs[ACA_REG_IDX_STATUS]; + if (umc_v12_0_is_deferred_error(adev, status)) + err_type = ACA_ERROR_TYPE_DEFERRED; + else if (umc_v12_0_is_uncorrectable_error(adev, status)) + err_type = ACA_ERROR_TYPE_UE; + else if (umc_v12_0_is_correctable_error(adev, status)) + err_type = ACA_ERROR_TYPE_CE; + else + return 0; + + ret = aca_bank_info_decode(bank, &info); if (ret) return ret; - status = bank->regs[ACA_REG_IDX_STATUS]; - switch (type) { - case ACA_ERROR_TYPE_UE: - if (umc_v12_0_is_uncorrectable_error(adev, status)) { - report->count[type] = 1; - } - break; - case ACA_ERROR_TYPE_CE: - if (umc_v12_0_is_correctable_error(adev, status)) { - report->count[type] = 1; - } - break; - default: - return -EINVAL; - } + amdgpu_umc_update_ecc_status(adev, + bank->regs[ACA_REG_IDX_STATUS], + bank->regs[ACA_REG_IDX_IPID], + bank->regs[ACA_REG_IDX_ADDR]); - return 0; + ext_error_code = ACA_REG__STATUS__ERRORCODEEXT(status); + count = ext_error_code == 0 ? + ACA_REG__MISC0__ERRCNT(bank->regs[ACA_REG_IDX_MISC0]) : 1ULL; + + return aca_error_cache_log_bank_error(handle, &info, err_type, count); } static const struct aca_bank_ops umc_v12_0_aca_bank_ops = { - .aca_bank_generate_report = umc_v12_0_aca_bank_generate_report, + .aca_bank_parser = umc_v12_0_aca_bank_parser, }; const struct aca_info umc_v12_0_aca_info = { .hwip = ACA_HWIP_TYPE_UMC, - .mask = ACA_ERROR_UE_MASK | ACA_ERROR_CE_MASK, + .mask = ACA_ERROR_UE_MASK | ACA_ERROR_CE_MASK | ACA_ERROR_DEFERRED_MASK, .bank_ops = &umc_v12_0_aca_bank_ops, }; @@ -554,6 +478,169 @@ static int umc_v12_0_ras_late_init(struct amdgpu_device *adev, struct ras_common return 0; } +static int umc_v12_0_update_ecc_status(struct amdgpu_device *adev, + uint64_t status, uint64_t ipid, uint64_t addr) +{ + struct amdgpu_ras *con = amdgpu_ras_get_context(adev); + uint16_t hwid, mcatype; + struct ta_ras_query_address_input addr_in; + uint64_t page_pfn[UMC_V12_0_BAD_PAGE_NUM_PER_CHANNEL]; + uint64_t err_addr, hash_val = 0; + struct ras_ecc_err *ecc_err; + int count; + int ret; + + hwid = REG_GET_FIELD(ipid, MCMP1_IPIDT0, HardwareID); + mcatype = REG_GET_FIELD(ipid, MCMP1_IPIDT0, McaType); + + if ((hwid != MCA_UMC_HWID_V12_0) || (mcatype != MCA_UMC_MCATYPE_V12_0)) + return 0; + + if (!status) + return 0; + + if (!umc_v12_0_is_deferred_error(adev, status)) + return 0; + + err_addr = REG_GET_FIELD(addr, + MCA_UMC_UMC0_MCUMC_ADDRT0, ErrorAddr); + + dev_dbg(adev->dev, + "UMC:IPID:0x%llx, socket:%llu, aid:%llu, inst:%llu, ch:%llu, err_addr:0x%llx\n", + ipid, + MCA_IPID_2_SOCKET_ID(ipid), + MCA_IPID_2_DIE_ID(ipid), + MCA_IPID_2_UMC_INST(ipid), + MCA_IPID_2_UMC_CH(ipid), + err_addr); + + memset(page_pfn, 0, sizeof(page_pfn)); + + memset(&addr_in, 0, sizeof(addr_in)); + addr_in.ma.err_addr = err_addr; + addr_in.ma.ch_inst = MCA_IPID_2_UMC_CH(ipid); + addr_in.ma.umc_inst = MCA_IPID_2_UMC_INST(ipid); + addr_in.ma.node_inst = MCA_IPID_2_DIE_ID(ipid); + addr_in.ma.socket_id = MCA_IPID_2_SOCKET_ID(ipid); + + count = umc_v12_0_convert_err_addr(adev, + &addr_in, page_pfn, ARRAY_SIZE(page_pfn)); + if (count <= 0) { + dev_warn(adev->dev, "Fail to convert error address! count:%d\n", count); + return 0; + } + + ret = amdgpu_umc_build_pages_hash(adev, + page_pfn, count, &hash_val); + if (ret) { + dev_err(adev->dev, "Fail to build error pages hash\n"); + return ret; + } + + ecc_err = kzalloc(sizeof(*ecc_err), GFP_KERNEL); + if (!ecc_err) + return -ENOMEM; + + ecc_err->err_pages.pfn = kcalloc(count, sizeof(*ecc_err->err_pages.pfn), GFP_KERNEL); + if (!ecc_err->err_pages.pfn) { + kfree(ecc_err); + return -ENOMEM; + } + + memcpy(ecc_err->err_pages.pfn, page_pfn, count * sizeof(*ecc_err->err_pages.pfn)); + ecc_err->err_pages.count = count; + + ecc_err->hash_index = hash_val; + ecc_err->status = status; + ecc_err->ipid = ipid; + ecc_err->addr = addr; + + ret = amdgpu_umc_logs_ecc_err(adev, &con->umc_ecc_log.de_page_tree, ecc_err); + if (ret) { + if (ret == -EEXIST) + con->umc_ecc_log.de_queried_count++; + else + dev_err(adev->dev, "Fail to log ecc error! ret:%d\n", ret); + + kfree(ecc_err->err_pages.pfn); + kfree(ecc_err); + return ret; + } + + con->umc_ecc_log.de_queried_count++; + + /* The problem case is as follows: + * 1. GPU A triggers a gpu ras reset, and GPU A drives + * GPU B to also perform a gpu ras reset. + * 2. After gpu B ras reset started, gpu B queried a DE + * data. Since the DE data was queried in the ras reset + * thread instead of the page retirement thread, bad + * page retirement work would not be triggered. Then + * even if all gpu resets are completed, the bad pages + * will be cached in RAM until GPU B's bad page retirement + * work is triggered again and then saved to eeprom. + * Trigger delayed work to save the bad pages to eeprom in time + * after gpu ras reset is completed. + */ + if (amdgpu_ras_in_recovery(adev)) + schedule_delayed_work(&con->page_retirement_dwork, + msecs_to_jiffies(DELAYED_TIME_FOR_GPU_RESET)); + + return 0; +} + +static int umc_v12_0_fill_error_record(struct amdgpu_device *adev, + struct ras_ecc_err *ecc_err, void *ras_error_status) +{ + struct ras_err_data *err_data = (struct ras_err_data *)ras_error_status; + uint32_t i = 0; + int ret = 0; + + if (!err_data || !ecc_err) + return -EINVAL; + + for (i = 0; i < ecc_err->err_pages.count; i++) { + ret = amdgpu_umc_fill_error_record(err_data, + ecc_err->addr, + ecc_err->err_pages.pfn[i] << AMDGPU_GPU_PAGE_SHIFT, + MCA_IPID_2_UMC_CH(ecc_err->ipid), + MCA_IPID_2_UMC_INST(ecc_err->ipid)); + if (ret) + break; + } + + err_data->de_count++; + + return ret; +} + +static void umc_v12_0_query_ras_ecc_err_addr(struct amdgpu_device *adev, + void *ras_error_status) +{ + struct amdgpu_ras *con = amdgpu_ras_get_context(adev); + struct ras_ecc_err *entries[MAX_ECC_NUM_PER_RETIREMENT]; + struct radix_tree_root *ecc_tree; + int new_detected, ret, i; + + ecc_tree = &con->umc_ecc_log.de_page_tree; + + mutex_lock(&con->umc_ecc_log.lock); + new_detected = radix_tree_gang_lookup_tag(ecc_tree, (void **)entries, + 0, ARRAY_SIZE(entries), UMC_ECC_NEW_DETECTED_TAG); + for (i = 0; i < new_detected; i++) { + if (!entries[i]) + continue; + + ret = umc_v12_0_fill_error_record(adev, entries[i], ras_error_status); + if (ret) { + dev_err(adev->dev, "Fail to fill umc error record, ret:%d\n", ret); + break; + } + radix_tree_tag_clear(ecc_tree, entries[i]->hash_index, UMC_ECC_NEW_DETECTED_TAG); + } + mutex_unlock(&con->umc_ecc_log.lock); +} + struct amdgpu_umc_ras umc_v12_0_ras = { .ras_block = { .hw_ops = &umc_v12_0_ras_hw_ops, @@ -561,8 +648,8 @@ struct amdgpu_umc_ras umc_v12_0_ras = { }, .err_cnt_init = umc_v12_0_err_cnt_init, .query_ras_poison_mode = umc_v12_0_query_ras_poison_mode, - .ecc_info_query_ras_error_count = umc_v12_0_ecc_info_query_ras_error_count, - .ecc_info_query_ras_error_address = umc_v12_0_ecc_info_query_ras_error_address, + .ecc_info_query_ras_error_address = umc_v12_0_query_ras_ecc_err_addr, .check_ecc_err_status = umc_v12_0_check_ecc_err_status, + .update_ecc_status = umc_v12_0_update_ecc_status, }; diff --git a/drivers/gpu/drm/amd/amdgpu/umc_v12_0.h b/drivers/gpu/drm/amd/amdgpu/umc_v12_0.h index 5973bfb14fce..b4974793850b 100644 --- a/drivers/gpu/drm/amd/amdgpu/umc_v12_0.h +++ b/drivers/gpu/drm/amd/amdgpu/umc_v12_0.h @@ -55,83 +55,38 @@ #define UMC_V12_0_NA_MAP_PA_NUM 8 /* R13 bit shift should be considered, double the number */ #define UMC_V12_0_BAD_PAGE_NUM_PER_CHANNEL (UMC_V12_0_NA_MAP_PA_NUM * 2) -/* bank bits in MCA error address */ -#define UMC_V12_0_MCA_B0_BIT 6 -#define UMC_V12_0_MCA_B1_BIT 7 -#define UMC_V12_0_MCA_B2_BIT 8 -#define UMC_V12_0_MCA_B3_BIT 9 + /* column bits in SOC physical address */ #define UMC_V12_0_PA_C2_BIT 15 #define UMC_V12_0_PA_C4_BIT 21 /* row bits in SOC physical address */ #define UMC_V12_0_PA_R13_BIT 35 -/* channel index bits in SOC physical address */ -#define UMC_V12_0_PA_CH4_BIT 12 -#define UMC_V12_0_PA_CH5_BIT 13 -#define UMC_V12_0_PA_CH6_BIT 14 - -/* bank hash settings */ -#define UMC_V12_0_XOR_EN0 1 -#define UMC_V12_0_XOR_EN1 1 -#define UMC_V12_0_XOR_EN2 1 -#define UMC_V12_0_XOR_EN3 1 -#define UMC_V12_0_COL_XOR0 0x0 -#define UMC_V12_0_COL_XOR1 0x0 -#define UMC_V12_0_COL_XOR2 0x800 -#define UMC_V12_0_COL_XOR3 0x1000 -#define UMC_V12_0_ROW_XOR0 0x11111 -#define UMC_V12_0_ROW_XOR1 0x22222 -#define UMC_V12_0_ROW_XOR2 0x4444 -#define UMC_V12_0_ROW_XOR3 0x8888 - -/* channel hash settings */ -#define UMC_V12_0_HASH_4K 0 -#define UMC_V12_0_HASH_64K 1 -#define UMC_V12_0_HASH_2M 1 -#define UMC_V12_0_HASH_1G 1 -#define UMC_V12_0_HASH_1T 1 - -/* XOR some bits of PA into CH4~CH6 bits (bits 12~14 of PA), - * hash bit is only effective when related setting is enabled - */ -#define UMC_V12_0_CHANNEL_HASH_CH4(channel_idx, pa) ((((channel_idx) >> 5) & 0x1) ^ \ - (((pa) >> 20) & 0x1ULL & UMC_V12_0_HASH_64K) ^ \ - (((pa) >> 27) & 0x1ULL & UMC_V12_0_HASH_2M) ^ \ - (((pa) >> 34) & 0x1ULL & UMC_V12_0_HASH_1G) ^ \ - (((pa) >> 41) & 0x1ULL & UMC_V12_0_HASH_1T)) -#define UMC_V12_0_CHANNEL_HASH_CH5(channel_idx, pa) ((((channel_idx) >> 6) & 0x1) ^ \ - (((pa) >> 21) & 0x1ULL & UMC_V12_0_HASH_64K) ^ \ - (((pa) >> 28) & 0x1ULL & UMC_V12_0_HASH_2M) ^ \ - (((pa) >> 35) & 0x1ULL & UMC_V12_0_HASH_1G) ^ \ - (((pa) >> 42) & 0x1ULL & UMC_V12_0_HASH_1T)) -#define UMC_V12_0_CHANNEL_HASH_CH6(channel_idx, pa) ((((channel_idx) >> 4) & 0x1) ^ \ - (((pa) >> 19) & 0x1ULL & UMC_V12_0_HASH_64K) ^ \ - (((pa) >> 26) & 0x1ULL & UMC_V12_0_HASH_2M) ^ \ - (((pa) >> 33) & 0x1ULL & UMC_V12_0_HASH_1G) ^ \ - (((pa) >> 40) & 0x1ULL & UMC_V12_0_HASH_1T) ^ \ - (((pa) >> 47) & 0x1ULL & UMC_V12_0_HASH_4K)) -#define UMC_V12_0_SET_CHANNEL_HASH(channel_idx, pa) do { \ - (pa) &= ~(0x7ULL << UMC_V12_0_PA_CH4_BIT); \ - (pa) |= (UMC_V12_0_CHANNEL_HASH_CH4(channel_idx, pa) << UMC_V12_0_PA_CH4_BIT); \ - (pa) |= (UMC_V12_0_CHANNEL_HASH_CH5(channel_idx, pa) << UMC_V12_0_PA_CH5_BIT); \ - (pa) |= (UMC_V12_0_CHANNEL_HASH_CH6(channel_idx, pa) << UMC_V12_0_PA_CH6_BIT); \ - } while (0) + +#define MCA_UMC_HWID_V12_0 0x96 +#define MCA_UMC_MCATYPE_V12_0 0x0 #define MCA_IPID_LO_2_UMC_CH(_ipid_lo) (((((_ipid_lo) >> 20) & 0x1) * 4) + \ (((_ipid_lo) >> 12) & 0xF)) #define MCA_IPID_LO_2_UMC_INST(_ipid_lo) (((_ipid_lo) >> 21) & 0x7) +#define MCA_IPID_2_DIE_ID(ipid) ((REG_GET_FIELD(ipid, MCMP1_IPIDT0, InstanceIdHi) >> 2) & 0x03) + +#define MCA_IPID_2_UMC_CH(ipid) \ + (MCA_IPID_LO_2_UMC_CH(REG_GET_FIELD(ipid, MCMP1_IPIDT0, InstanceIdLo))) + +#define MCA_IPID_2_UMC_INST(ipid) \ + (MCA_IPID_LO_2_UMC_INST(REG_GET_FIELD(ipid, MCMP1_IPIDT0, InstanceIdLo))) + +#define MCA_IPID_2_SOCKET_ID(ipid) \ + (((REG_GET_FIELD(ipid, MCMP1_IPIDT0, InstanceIdLo) & 0x1) << 2) | \ + (REG_GET_FIELD(ipid, MCMP1_IPIDT0, InstanceIdHi) & 0x03)) + bool umc_v12_0_is_deferred_error(struct amdgpu_device *adev, uint64_t mc_umc_status); bool umc_v12_0_is_uncorrectable_error(struct amdgpu_device *adev, uint64_t mc_umc_status); bool umc_v12_0_is_correctable_error(struct amdgpu_device *adev, uint64_t mc_umc_status); typedef bool (*check_error_type_func)(struct amdgpu_device *adev, uint64_t mc_umc_status); -extern const uint32_t - umc_v12_0_channel_idx_tbl[] - [UMC_V12_0_UMC_INSTANCE_NUM] - [UMC_V12_0_CHANNEL_INSTANCE_NUM]; - extern struct amdgpu_umc_ras umc_v12_0_ras; #endif diff --git a/drivers/gpu/drm/amd/amdgpu/umc_v8_10.c b/drivers/gpu/drm/amd/amdgpu/umc_v8_10.c index c4c77257710c..a32f87992f20 100644 --- a/drivers/gpu/drm/amd/amdgpu/umc_v8_10.c +++ b/drivers/gpu/drm/amd/amdgpu/umc_v8_10.c @@ -442,11 +442,6 @@ static void umc_v8_10_ecc_info_query_ras_error_address(struct amdgpu_device *ade umc_v8_10_ecc_info_query_error_address, ras_error_status); } -static void umc_v8_10_set_eeprom_table_version(struct amdgpu_ras_eeprom_table_header *hdr) -{ - hdr->version = RAS_TABLE_VER_V2_1; -} - const struct amdgpu_ras_block_hw_ops umc_v8_10_ras_hw_ops = { .query_ras_error_count = umc_v8_10_query_ras_error_count, .query_ras_error_address = umc_v8_10_query_ras_error_address, @@ -460,5 +455,4 @@ struct amdgpu_umc_ras umc_v8_10_ras = { .query_ras_poison_mode = umc_v8_10_query_ras_poison_mode, .ecc_info_query_ras_error_count = umc_v8_10_ecc_info_query_ras_error_count, .ecc_info_query_ras_error_address = umc_v8_10_ecc_info_query_ras_error_address, - .set_eeprom_table_version = umc_v8_10_set_eeprom_table_version, }; diff --git a/drivers/gpu/drm/amd/amdgpu/umsch_mm_v4_0.c b/drivers/gpu/drm/amd/amdgpu/umsch_mm_v4_0.c index 8e7b763cfdb7..ce3bb12e3572 100644 --- a/drivers/gpu/drm/amd/amdgpu/umsch_mm_v4_0.c +++ b/drivers/gpu/drm/amd/amdgpu/umsch_mm_v4_0.c @@ -24,6 +24,7 @@ #include <linux/firmware.h> #include <linux/module.h> +#include <linux/debugfs.h> #include "amdgpu.h" #include "soc15_common.h" #include "soc21.h" @@ -60,7 +61,7 @@ static int umsch_mm_v4_0_load_microcode(struct amdgpu_umsch_mm *umsch) umsch->cmd_buf_curr_ptr = umsch->cmd_buf_ptr; - if (amdgpu_ip_version(adev, VCN_HWIP, 0) == IP_VERSION(4, 0, 5)) { + if (amdgpu_ip_version(adev, VCN_HWIP, 0) >= IP_VERSION(4, 0, 5)) { WREG32_SOC15(VCN, 0, regUVD_IPX_DLDO_CONFIG, 1 << UVD_IPX_DLDO_CONFIG__ONO0_PWR_CONFIG__SHIFT); SOC15_WAIT_ON_RREG(VCN, 0, regUVD_IPX_DLDO_STATUS, @@ -115,9 +116,8 @@ static int umsch_mm_v4_0_load_microcode(struct amdgpu_umsch_mm *umsch) upper_32_bits(adev->umsch_mm.data_start_addr)); WREG32_SOC15_UMSCH(regVCN_MES_LOCAL_MASK0_LO, - lower_32_bits(adev->umsch_mm.data_size - 1)); - WREG32_SOC15_UMSCH(regVCN_MES_LOCAL_MASK0_HI, - upper_32_bits(adev->umsch_mm.data_size - 1)); + adev->umsch_mm.data_size - 1); + WREG32_SOC15_UMSCH(regVCN_MES_LOCAL_MASK0_HI, 0); data = adev->firmware.load_type == AMDGPU_FW_LOAD_PSP ? 0 : adev->umsch_mm.data_fw_gpu_addr; @@ -143,6 +143,11 @@ static int umsch_mm_v4_0_load_microcode(struct amdgpu_umsch_mm *umsch) WREG32_SOC15_UMSCH(regVCN_MES_GP0_LO, 0); WREG32_SOC15_UMSCH(regVCN_MES_GP0_HI, 0); +#if defined(CONFIG_DEBUG_FS) + WREG32_SOC15_UMSCH(regVCN_MES_GP0_LO, lower_32_bits(umsch->log_gpu_addr)); + WREG32_SOC15_UMSCH(regVCN_MES_GP0_HI, upper_32_bits(umsch->log_gpu_addr)); +#endif + WREG32_SOC15_UMSCH(regVCN_MES_GP1_LO, 0); WREG32_SOC15_UMSCH(regVCN_MES_GP1_HI, 0); @@ -225,6 +230,8 @@ static int umsch_mm_v4_0_ring_start(struct amdgpu_umsch_mm *umsch) WREG32_SOC15(VCN, 0, regVCN_UMSCH_RB_SIZE, ring->ring_size); + ring->wptr = 0; + data = RREG32_SOC15(VCN, 0, regVCN_RB_ENABLE); data &= ~(VCN_RB_ENABLE__AUDIO_RB_EN_MASK); WREG32_SOC15(VCN, 0, regVCN_RB_ENABLE, data); @@ -248,7 +255,7 @@ static int umsch_mm_v4_0_ring_stop(struct amdgpu_umsch_mm *umsch) data = REG_SET_FIELD(data, VCN_UMSCH_RB_DB_CTRL, EN, 0); WREG32_SOC15(VCN, 0, regVCN_UMSCH_RB_DB_CTRL, data); - if (amdgpu_ip_version(adev, VCN_HWIP, 0) == IP_VERSION(4, 0, 5)) { + if (amdgpu_ip_version(adev, VCN_HWIP, 0) >= IP_VERSION(4, 0, 5)) { WREG32_SOC15(VCN, 0, regUVD_IPX_DLDO_CONFIG, 2 << UVD_IPX_DLDO_CONFIG__ONO0_PWR_CONFIG__SHIFT); SOC15_WAIT_ON_RREG(VCN, 0, regUVD_IPX_DLDO_STATUS, @@ -271,6 +278,8 @@ static int umsch_mm_v4_0_set_hw_resources(struct amdgpu_umsch_mm *umsch) set_hw_resources.vmid_mask_mm_vcn = umsch->vmid_mask_mm_vcn; set_hw_resources.vmid_mask_mm_vpe = umsch->vmid_mask_mm_vpe; + set_hw_resources.collaboration_mask_vpe = + adev->vpe.collaborate_mode ? 0x3 : 0x0; set_hw_resources.engine_mask = umsch->engine_mask; set_hw_resources.vcn0_hqd_mask[0] = umsch->vcn0_hqd_mask; @@ -346,6 +355,7 @@ static int umsch_mm_v4_0_add_queue(struct amdgpu_umsch_mm *umsch, add_queue.h_queue = input_ptr->h_queue; add_queue.vm_context_cntl = input_ptr->vm_context_cntl; add_queue.is_context_suspended = input_ptr->is_context_suspended; + add_queue.collaboration_mode = adev->vpe.collaborate_mode ? 1 : 0; add_queue.api_status.api_completion_fence_addr = umsch->ring.fence_drv.gpu_addr; add_queue.api_status.api_completion_fence_value = ++umsch->ring.fence_drv.sync_seq; diff --git a/drivers/gpu/drm/amd/amdgpu/uvd_v3_1.c b/drivers/gpu/drm/amd/amdgpu/uvd_v3_1.c index a6006f231c65..805d6662c88b 100644 --- a/drivers/gpu/drm/amd/amdgpu/uvd_v3_1.c +++ b/drivers/gpu/drm/amd/amdgpu/uvd_v3_1.c @@ -819,6 +819,8 @@ static const struct amd_ip_funcs uvd_v3_1_ip_funcs = { .soft_reset = uvd_v3_1_soft_reset, .set_clockgating_state = uvd_v3_1_set_clockgating_state, .set_powergating_state = uvd_v3_1_set_powergating_state, + .dump_ip_state = NULL, + .print_ip_state = NULL, }; const struct amdgpu_ip_block_version uvd_v3_1_ip_block = { diff --git a/drivers/gpu/drm/amd/amdgpu/uvd_v4_2.c b/drivers/gpu/drm/amd/amdgpu/uvd_v4_2.c index 1aa09ad7bbe3..3f19c606f4de 100644 --- a/drivers/gpu/drm/amd/amdgpu/uvd_v4_2.c +++ b/drivers/gpu/drm/amd/amdgpu/uvd_v4_2.c @@ -769,6 +769,8 @@ static const struct amd_ip_funcs uvd_v4_2_ip_funcs = { .soft_reset = uvd_v4_2_soft_reset, .set_clockgating_state = uvd_v4_2_set_clockgating_state, .set_powergating_state = uvd_v4_2_set_powergating_state, + .dump_ip_state = NULL, + .print_ip_state = NULL, }; static const struct amdgpu_ring_funcs uvd_v4_2_ring_funcs = { diff --git a/drivers/gpu/drm/amd/amdgpu/uvd_v5_0.c b/drivers/gpu/drm/amd/amdgpu/uvd_v5_0.c index f8b229b75435..efd903c21d48 100644 --- a/drivers/gpu/drm/amd/amdgpu/uvd_v5_0.c +++ b/drivers/gpu/drm/amd/amdgpu/uvd_v5_0.c @@ -877,6 +877,8 @@ static const struct amd_ip_funcs uvd_v5_0_ip_funcs = { .set_clockgating_state = uvd_v5_0_set_clockgating_state, .set_powergating_state = uvd_v5_0_set_powergating_state, .get_clockgating_state = uvd_v5_0_get_clockgating_state, + .dump_ip_state = NULL, + .print_ip_state = NULL, }; static const struct amdgpu_ring_funcs uvd_v5_0_ring_funcs = { diff --git a/drivers/gpu/drm/amd/amdgpu/uvd_v6_0.c b/drivers/gpu/drm/amd/amdgpu/uvd_v6_0.c index a9a6880f44e3..495de5068455 100644 --- a/drivers/gpu/drm/amd/amdgpu/uvd_v6_0.c +++ b/drivers/gpu/drm/amd/amdgpu/uvd_v6_0.c @@ -1545,6 +1545,8 @@ static const struct amd_ip_funcs uvd_v6_0_ip_funcs = { .set_clockgating_state = uvd_v6_0_set_clockgating_state, .set_powergating_state = uvd_v6_0_set_powergating_state, .get_clockgating_state = uvd_v6_0_get_clockgating_state, + .dump_ip_state = NULL, + .print_ip_state = NULL, }; static const struct amdgpu_ring_funcs uvd_v6_0_ring_phys_funcs = { diff --git a/drivers/gpu/drm/amd/amdgpu/vce_v2_0.c b/drivers/gpu/drm/amd/amdgpu/vce_v2_0.c index a08e7abca423..66fada199bda 100644 --- a/drivers/gpu/drm/amd/amdgpu/vce_v2_0.c +++ b/drivers/gpu/drm/amd/amdgpu/vce_v2_0.c @@ -626,6 +626,8 @@ static const struct amd_ip_funcs vce_v2_0_ip_funcs = { .soft_reset = vce_v2_0_soft_reset, .set_clockgating_state = vce_v2_0_set_clockgating_state, .set_powergating_state = vce_v2_0_set_powergating_state, + .dump_ip_state = NULL, + .print_ip_state = NULL, }; static const struct amdgpu_ring_funcs vce_v2_0_ring_funcs = { diff --git a/drivers/gpu/drm/amd/amdgpu/vce_v3_0.c b/drivers/gpu/drm/amd/amdgpu/vce_v3_0.c index f4760748d349..32517c364cf7 100644 --- a/drivers/gpu/drm/amd/amdgpu/vce_v3_0.c +++ b/drivers/gpu/drm/amd/amdgpu/vce_v3_0.c @@ -913,6 +913,8 @@ static const struct amd_ip_funcs vce_v3_0_ip_funcs = { .set_clockgating_state = vce_v3_0_set_clockgating_state, .set_powergating_state = vce_v3_0_set_powergating_state, .get_clockgating_state = vce_v3_0_get_clockgating_state, + .dump_ip_state = NULL, + .print_ip_state = NULL, }; static const struct amdgpu_ring_funcs vce_v3_0_ring_phys_funcs = { diff --git a/drivers/gpu/drm/amd/amdgpu/vcn_v1_0.c b/drivers/gpu/drm/amd/amdgpu/vcn_v1_0.c index aaceecd558cf..a280b9fecb77 100644 --- a/drivers/gpu/drm/amd/amdgpu/vcn_v1_0.c +++ b/drivers/gpu/drm/amd/amdgpu/vcn_v1_0.c @@ -202,24 +202,17 @@ static int vcn_v1_0_hw_init(void *handle) r = amdgpu_ring_test_helper(ring); if (r) - goto done; + return r; for (i = 0; i < adev->vcn.num_enc_rings; ++i) { ring = &adev->vcn.inst->ring_enc[i]; r = amdgpu_ring_test_helper(ring); if (r) - goto done; + return r; } ring = adev->jpeg.inst->ring_dec; r = amdgpu_ring_test_helper(ring); - if (r) - goto done; - -done: - if (!r) - DRM_INFO("VCN decode and encode initialized successfully(under %s).\n", - (adev->pg_flags & AMD_PG_SUPPORT_VCN_DPG)?"DPG Mode":"SPG Mode"); return r; } @@ -1902,6 +1895,8 @@ static const struct amd_ip_funcs vcn_v1_0_ip_funcs = { .post_soft_reset = NULL /* vcn_v1_0_post_soft_reset */, .set_clockgating_state = vcn_v1_0_set_clockgating_state, .set_powergating_state = vcn_v1_0_set_powergating_state, + .dump_ip_state = NULL, + .print_ip_state = NULL, }; /* @@ -2041,7 +2036,6 @@ static const struct amdgpu_ring_funcs vcn_v1_0_enc_ring_vm_funcs = { static void vcn_v1_0_set_dec_ring_funcs(struct amdgpu_device *adev) { adev->vcn.inst->ring_dec.funcs = &vcn_v1_0_dec_ring_vm_funcs; - DRM_INFO("VCN decode is enabled in VM mode\n"); } static void vcn_v1_0_set_enc_ring_funcs(struct amdgpu_device *adev) @@ -2050,8 +2044,6 @@ static void vcn_v1_0_set_enc_ring_funcs(struct amdgpu_device *adev) for (i = 0; i < adev->vcn.num_enc_rings; ++i) adev->vcn.inst->ring_enc[i].funcs = &vcn_v1_0_enc_ring_vm_funcs; - - DRM_INFO("VCN encode is enabled in VM mode\n"); } static const struct amdgpu_irq_src_funcs vcn_v1_0_irq_funcs = { diff --git a/drivers/gpu/drm/amd/amdgpu/vcn_v2_0.c b/drivers/gpu/drm/amd/amdgpu/vcn_v2_0.c index e357d8cf0c01..d3d096909a7f 100644 --- a/drivers/gpu/drm/amd/amdgpu/vcn_v2_0.c +++ b/drivers/gpu/drm/amd/amdgpu/vcn_v2_0.c @@ -237,7 +237,7 @@ static int vcn_v2_0_hw_init(void *handle) r = amdgpu_ring_test_helper(ring); if (r) - goto done; + return r; //Disable vcn decode for sriov if (amdgpu_sriov_vf(adev)) @@ -247,15 +247,10 @@ static int vcn_v2_0_hw_init(void *handle) ring = &adev->vcn.inst->ring_enc[i]; r = amdgpu_ring_test_helper(ring); if (r) - goto done; + return r; } -done: - if (!r) - DRM_INFO("VCN decode and encode initialized successfully(under %s).\n", - (adev->pg_flags & AMD_PG_SUPPORT_VCN_DPG)?"DPG Mode":"SPG Mode"); - - return r; + return 0; } /** @@ -2008,6 +2003,8 @@ static const struct amd_ip_funcs vcn_v2_0_ip_funcs = { .post_soft_reset = NULL, .set_clockgating_state = vcn_v2_0_set_clockgating_state, .set_powergating_state = vcn_v2_0_set_powergating_state, + .dump_ip_state = NULL, + .print_ip_state = NULL, }; static const struct amdgpu_ring_funcs vcn_v2_0_dec_ring_vm_funcs = { @@ -2072,7 +2069,6 @@ static const struct amdgpu_ring_funcs vcn_v2_0_enc_ring_vm_funcs = { static void vcn_v2_0_set_dec_ring_funcs(struct amdgpu_device *adev) { adev->vcn.inst->ring_dec.funcs = &vcn_v2_0_dec_ring_vm_funcs; - DRM_INFO("VCN decode is enabled in VM mode\n"); } static void vcn_v2_0_set_enc_ring_funcs(struct amdgpu_device *adev) @@ -2081,8 +2077,6 @@ static void vcn_v2_0_set_enc_ring_funcs(struct amdgpu_device *adev) for (i = 0; i < adev->vcn.num_enc_rings; ++i) adev->vcn.inst->ring_enc[i].funcs = &vcn_v2_0_enc_ring_vm_funcs; - - DRM_INFO("VCN encode is enabled in VM mode\n"); } static const struct amdgpu_irq_src_funcs vcn_v2_0_irq_funcs = { diff --git a/drivers/gpu/drm/amd/amdgpu/vcn_v2_5.c b/drivers/gpu/drm/amd/amdgpu/vcn_v2_5.c index 1cd8a94b0fbc..96f60c303161 100644 --- a/drivers/gpu/drm/amd/amdgpu/vcn_v2_5.c +++ b/drivers/gpu/drm/amd/amdgpu/vcn_v2_5.c @@ -314,22 +314,17 @@ static int vcn_v2_5_hw_init(void *handle) r = amdgpu_ring_test_helper(ring); if (r) - goto done; + return r; for (i = 0; i < adev->vcn.num_enc_rings; ++i) { ring = &adev->vcn.inst[j].ring_enc[i]; r = amdgpu_ring_test_helper(ring); if (r) - goto done; + return r; } } } -done: - if (!r) - DRM_INFO("VCN decode and encode initialized successfully(under %s).\n", - (adev->pg_flags & AMD_PG_SUPPORT_VCN_DPG)?"DPG Mode":"SPG Mode"); - return r; } @@ -1710,7 +1705,6 @@ static void vcn_v2_5_set_dec_ring_funcs(struct amdgpu_device *adev) continue; adev->vcn.inst[i].ring_dec.funcs = &vcn_v2_5_dec_ring_vm_funcs; adev->vcn.inst[i].ring_dec.me = i; - DRM_INFO("VCN(%d) decode is enabled in VM mode\n", i); } } @@ -1725,7 +1719,6 @@ static void vcn_v2_5_set_enc_ring_funcs(struct amdgpu_device *adev) adev->vcn.inst[j].ring_enc[i].funcs = &vcn_v2_5_enc_ring_vm_funcs; adev->vcn.inst[j].ring_enc[i].me = j; } - DRM_INFO("VCN(%d) encode is enabled in VM mode\n", j); } } @@ -1901,6 +1894,8 @@ static const struct amd_ip_funcs vcn_v2_5_ip_funcs = { .post_soft_reset = NULL, .set_clockgating_state = vcn_v2_5_set_clockgating_state, .set_powergating_state = vcn_v2_5_set_powergating_state, + .dump_ip_state = NULL, + .print_ip_state = NULL, }; static const struct amd_ip_funcs vcn_v2_6_ip_funcs = { @@ -1921,6 +1916,8 @@ static const struct amd_ip_funcs vcn_v2_6_ip_funcs = { .post_soft_reset = NULL, .set_clockgating_state = vcn_v2_5_set_clockgating_state, .set_powergating_state = vcn_v2_5_set_powergating_state, + .dump_ip_state = NULL, + .print_ip_state = NULL, }; const struct amdgpu_ip_block_version vcn_v2_5_ip_block = diff --git a/drivers/gpu/drm/amd/amdgpu/vcn_v3_0.c b/drivers/gpu/drm/amd/amdgpu/vcn_v3_0.c index 8f82fb887e9c..24f947751c46 100644 --- a/drivers/gpu/drm/amd/amdgpu/vcn_v3_0.c +++ b/drivers/gpu/drm/amd/amdgpu/vcn_v3_0.c @@ -303,7 +303,7 @@ static int vcn_v3_0_hw_init(void *handle) if (amdgpu_sriov_vf(adev)) { r = vcn_v3_0_start_sriov(adev); if (r) - goto done; + return r; /* initialize VCN dec and enc ring buffers */ for (i = 0; i < adev->vcn.num_vcn_inst; ++i) { @@ -348,23 +348,18 @@ static int vcn_v3_0_hw_init(void *handle) r = amdgpu_ring_test_helper(ring); if (r) - goto done; + return r; for (j = 0; j < adev->vcn.num_enc_rings; ++j) { ring = &adev->vcn.inst[i].ring_enc[j]; r = amdgpu_ring_test_helper(ring); if (r) - goto done; + return r; } } } -done: - if (!r) - DRM_INFO("VCN decode and encode initialized successfully(under %s).\n", - (adev->pg_flags & AMD_PG_SUPPORT_VCN_DPG)?"DPG Mode":"SPG Mode"); - - return r; + return 0; } /** @@ -2040,8 +2035,6 @@ static void vcn_v3_0_set_dec_ring_funcs(struct amdgpu_device *adev) else adev->vcn.inst[i].ring_dec.funcs = &vcn_v3_0_dec_sw_ring_vm_funcs; adev->vcn.inst[i].ring_dec.me = i; - DRM_INFO("VCN(%d) decode%s is enabled in VM mode\n", i, - DEC_SW_RING_ENABLED?"(Software Ring)":""); } } @@ -2057,8 +2050,6 @@ static void vcn_v3_0_set_enc_ring_funcs(struct amdgpu_device *adev) adev->vcn.inst[i].ring_enc[j].funcs = &vcn_v3_0_enc_ring_vm_funcs; adev->vcn.inst[i].ring_enc[j].me = i; } - if (adev->vcn.num_enc_rings > 0) - DRM_INFO("VCN(%d) encode is enabled in VM mode\n", i); } } @@ -2230,6 +2221,8 @@ static const struct amd_ip_funcs vcn_v3_0_ip_funcs = { .post_soft_reset = NULL, .set_clockgating_state = vcn_v3_0_set_clockgating_state, .set_powergating_state = vcn_v3_0_set_powergating_state, + .dump_ip_state = NULL, + .print_ip_state = NULL, }; const struct amdgpu_ip_block_version vcn_v3_0_ip_block = { diff --git a/drivers/gpu/drm/amd/amdgpu/vcn_v4_0.c b/drivers/gpu/drm/amd/amdgpu/vcn_v4_0.c index 832d15f7b5f6..776c539bfdda 100644 --- a/drivers/gpu/drm/amd/amdgpu/vcn_v4_0.c +++ b/drivers/gpu/drm/amd/amdgpu/vcn_v4_0.c @@ -258,7 +258,7 @@ static int vcn_v4_0_hw_init(void *handle) if (amdgpu_sriov_vf(adev)) { r = vcn_v4_0_start_sriov(adev); if (r) - goto done; + return r; for (i = 0; i < adev->vcn.num_vcn_inst; ++i) { if (adev->vcn.harvest_config & (1 << i)) @@ -269,7 +269,6 @@ static int vcn_v4_0_hw_init(void *handle) ring->wptr_old = 0; vcn_v4_0_unified_ring_set_wptr(ring); ring->sched.ready = true; - } } else { for (i = 0; i < adev->vcn.num_vcn_inst; ++i) { @@ -283,17 +282,11 @@ static int vcn_v4_0_hw_init(void *handle) r = amdgpu_ring_test_helper(ring); if (r) - goto done; - + return r; } } -done: - if (!r) - DRM_INFO("VCN decode and encode initialized successfully(under %s).\n", - (adev->pg_flags & AMD_PG_SUPPORT_VCN_DPG)?"DPG Mode":"SPG Mode"); - - return r; + return 0; } /** @@ -1052,6 +1045,9 @@ static int vcn_v4_0_start(struct amdgpu_device *adev) amdgpu_dpm_enable_uvd(adev, true); for (i = 0; i < adev->vcn.num_vcn_inst; ++i) { + if (adev->vcn.harvest_config & (1 << i)) + continue; + fw_shared = adev->vcn.inst[i].fw_shared.cpu_addr; if (adev->pg_flags & AMD_PG_SUPPORT_VCN_DPG) { @@ -1505,6 +1501,9 @@ static int vcn_v4_0_stop(struct amdgpu_device *adev) int i, r = 0; for (i = 0; i < adev->vcn.num_vcn_inst; ++i) { + if (adev->vcn.harvest_config & (1 << i)) + continue; + fw_shared = adev->vcn.inst[i].fw_shared.cpu_addr; fw_shared->sq.queue_mode |= FW_QUEUE_DPG_HOLD_OFF; @@ -1899,8 +1898,6 @@ static void vcn_v4_0_set_unified_ring_funcs(struct amdgpu_device *adev) adev->vcn.inst[i].ring_enc[0].funcs = (const struct amdgpu_ring_funcs *)&vcn_v4_0_unified_ring_vm_funcs; adev->vcn.inst[i].ring_enc[0].me = i; - - DRM_INFO("VCN(%d) encode/decode are enabled in VM mode\n", i); } } @@ -2130,6 +2127,8 @@ static const struct amd_ip_funcs vcn_v4_0_ip_funcs = { .post_soft_reset = NULL, .set_clockgating_state = vcn_v4_0_set_clockgating_state, .set_powergating_state = vcn_v4_0_set_powergating_state, + .dump_ip_state = NULL, + .print_ip_state = NULL, }; const struct amdgpu_ip_block_version vcn_v4_0_ip_block = { diff --git a/drivers/gpu/drm/amd/amdgpu/vcn_v4_0_3.c b/drivers/gpu/drm/amd/amdgpu/vcn_v4_0_3.c index 203fa988322b..9bae95538b62 100644 --- a/drivers/gpu/drm/amd/amdgpu/vcn_v4_0_3.c +++ b/drivers/gpu/drm/amd/amdgpu/vcn_v4_0_3.c @@ -45,6 +45,9 @@ #define VCN_VID_SOC_ADDRESS_2_0 0x1fb00 #define VCN1_VID_SOC_ADDRESS_3_0 0x48300 +#define NORMALIZE_VCN_REG_OFFSET(offset) \ + (offset & 0x1FFFF) + static int vcn_v4_0_3_start_sriov(struct amdgpu_device *adev); static void vcn_v4_0_3_set_unified_ring_funcs(struct amdgpu_device *adev); static void vcn_v4_0_3_set_irq_funcs(struct amdgpu_device *adev); @@ -210,7 +213,7 @@ static int vcn_v4_0_3_hw_init(void *handle) if (amdgpu_sriov_vf(adev)) { r = vcn_v4_0_3_start_sriov(adev); if (r) - goto done; + return r; for (i = 0; i < adev->vcn.num_vcn_inst; ++i) { ring = &adev->vcn.inst[i].ring_enc[0]; @@ -246,15 +249,10 @@ static int vcn_v4_0_3_hw_init(void *handle) r = amdgpu_ring_test_helper(ring); if (r) - goto done; + return r; } } -done: - if (!r) - DRM_DEV_INFO(adev->dev, "VCN decode initialized successfully(under %s).\n", - (adev->pg_flags & AMD_PG_SUPPORT_VCN_DPG)?"DPG Mode":"SPG Mode"); - return r; } @@ -1380,6 +1378,50 @@ static uint64_t vcn_v4_0_3_unified_ring_get_wptr(struct amdgpu_ring *ring) regUVD_RB_WPTR); } +static void vcn_v4_0_3_enc_ring_emit_reg_wait(struct amdgpu_ring *ring, uint32_t reg, + uint32_t val, uint32_t mask) +{ + /* For VF, only local offsets should be used */ + if (amdgpu_sriov_vf(ring->adev)) + reg = NORMALIZE_VCN_REG_OFFSET(reg); + + amdgpu_ring_write(ring, VCN_ENC_CMD_REG_WAIT); + amdgpu_ring_write(ring, reg << 2); + amdgpu_ring_write(ring, mask); + amdgpu_ring_write(ring, val); +} + +static void vcn_v4_0_3_enc_ring_emit_wreg(struct amdgpu_ring *ring, uint32_t reg, uint32_t val) +{ + /* For VF, only local offsets should be used */ + if (amdgpu_sriov_vf(ring->adev)) + reg = NORMALIZE_VCN_REG_OFFSET(reg); + + amdgpu_ring_write(ring, VCN_ENC_CMD_REG_WRITE); + amdgpu_ring_write(ring, reg << 2); + amdgpu_ring_write(ring, val); +} + +static void vcn_v4_0_3_enc_ring_emit_vm_flush(struct amdgpu_ring *ring, + unsigned int vmid, uint64_t pd_addr) +{ + struct amdgpu_vmhub *hub = &ring->adev->vmhub[ring->vm_hub]; + + pd_addr = amdgpu_gmc_emit_flush_gpu_tlb(ring, vmid, pd_addr); + + /* wait for reg writes */ + vcn_v4_0_3_enc_ring_emit_reg_wait(ring, hub->ctx0_ptb_addr_lo32 + + vmid * hub->ctx_addr_distance, + lower_32_bits(pd_addr), 0xffffffff); +} + +static void vcn_v4_0_3_ring_emit_hdp_flush(struct amdgpu_ring *ring) +{ + /* VCN engine access for HDP flush doesn't work when RRMT is enabled. + * This is a workaround to avoid any HDP flush through VCN ring. + */ +} + /** * vcn_v4_0_3_unified_ring_set_wptr - set enc write pointer * @@ -1419,7 +1461,8 @@ static const struct amdgpu_ring_funcs vcn_v4_0_3_unified_ring_vm_funcs = { .emit_ib_size = 5, /* vcn_v2_0_enc_ring_emit_ib */ .emit_ib = vcn_v2_0_enc_ring_emit_ib, .emit_fence = vcn_v2_0_enc_ring_emit_fence, - .emit_vm_flush = vcn_v2_0_enc_ring_emit_vm_flush, + .emit_vm_flush = vcn_v4_0_3_enc_ring_emit_vm_flush, + .emit_hdp_flush = vcn_v4_0_3_ring_emit_hdp_flush, .test_ring = amdgpu_vcn_enc_ring_test_ring, .test_ib = amdgpu_vcn_unified_ring_test_ib, .insert_nop = amdgpu_ring_insert_nop, @@ -1427,8 +1470,8 @@ static const struct amdgpu_ring_funcs vcn_v4_0_3_unified_ring_vm_funcs = { .pad_ib = amdgpu_ring_generic_pad_ib, .begin_use = amdgpu_vcn_ring_begin_use, .end_use = amdgpu_vcn_ring_end_use, - .emit_wreg = vcn_v2_0_enc_ring_emit_wreg, - .emit_reg_wait = vcn_v2_0_enc_ring_emit_reg_wait, + .emit_wreg = vcn_v4_0_3_enc_ring_emit_wreg, + .emit_reg_wait = vcn_v4_0_3_enc_ring_emit_reg_wait, .emit_reg_write_reg_wait = amdgpu_ring_emit_reg_write_reg_wait_helper, }; @@ -1450,7 +1493,6 @@ static void vcn_v4_0_3_set_unified_ring_funcs(struct amdgpu_device *adev) adev->vcn.inst[i].aid_id = vcn_inst / adev->vcn.num_inst_per_aid; } - DRM_DEV_INFO(adev->dev, "VCN decode is enabled in VM mode\n"); } /** @@ -1660,6 +1702,8 @@ static const struct amd_ip_funcs vcn_v4_0_3_ip_funcs = { .post_soft_reset = NULL, .set_clockgating_state = vcn_v4_0_3_set_clockgating_state, .set_powergating_state = vcn_v4_0_3_set_powergating_state, + .dump_ip_state = NULL, + .print_ip_state = NULL, }; const struct amdgpu_ip_block_version vcn_v4_0_3_ip_block = { diff --git a/drivers/gpu/drm/amd/amdgpu/vcn_v4_0_5.c b/drivers/gpu/drm/amd/amdgpu/vcn_v4_0_5.c index 501e53e69f2a..8d75061f9f38 100644 --- a/drivers/gpu/drm/amd/amdgpu/vcn_v4_0_5.c +++ b/drivers/gpu/drm/amd/amdgpu/vcn_v4_0_5.c @@ -234,15 +234,10 @@ static int vcn_v4_0_5_hw_init(void *handle) r = amdgpu_ring_test_helper(ring); if (r) - goto done; + return r; } -done: - if (!r) - DRM_INFO("VCN decode and encode initialized successfully(under %s).\n", - (adev->pg_flags & AMD_PG_SUPPORT_VCN_DPG)?"DPG Mode":"SPG Mode"); - - return r; + return 0; } /** @@ -963,6 +958,9 @@ static int vcn_v4_0_5_start(struct amdgpu_device *adev) amdgpu_dpm_enable_uvd(adev, true); for (i = 0; i < adev->vcn.num_vcn_inst; ++i) { + if (adev->vcn.harvest_config & (1 << i)) + continue; + fw_shared = adev->vcn.inst[i].fw_shared.cpu_addr; if (adev->pg_flags & AMD_PG_SUPPORT_VCN_DPG) { @@ -1167,6 +1165,9 @@ static int vcn_v4_0_5_stop(struct amdgpu_device *adev) int i, r = 0; for (i = 0; i < adev->vcn.num_vcn_inst; ++i) { + if (adev->vcn.harvest_config & (1 << i)) + continue; + fw_shared = adev->vcn.inst[i].fw_shared.cpu_addr; fw_shared->sq.queue_mode |= FW_QUEUE_DPG_HOLD_OFF; @@ -1557,8 +1558,6 @@ static void vcn_v4_0_5_set_unified_ring_funcs(struct amdgpu_device *adev) adev->vcn.inst[i].ring_enc[0].funcs = &vcn_v4_0_5_unified_ring_vm_funcs; adev->vcn.inst[i].ring_enc[0].me = i; - - DRM_INFO("VCN(%d) encode/decode are enabled in VM mode\n", i); } } @@ -1752,6 +1751,8 @@ static const struct amd_ip_funcs vcn_v4_0_5_ip_funcs = { .post_soft_reset = NULL, .set_clockgating_state = vcn_v4_0_5_set_clockgating_state, .set_powergating_state = vcn_v4_0_5_set_powergating_state, + .dump_ip_state = NULL, + .print_ip_state = NULL, }; const struct amdgpu_ip_block_version vcn_v4_0_5_ip_block = { diff --git a/drivers/gpu/drm/amd/amdgpu/vcn_v5_0_0.c b/drivers/gpu/drm/amd/amdgpu/vcn_v5_0_0.c index bc60c554eb32..68c97fcd539b 100644 --- a/drivers/gpu/drm/amd/amdgpu/vcn_v5_0_0.c +++ b/drivers/gpu/drm/amd/amdgpu/vcn_v5_0_0.c @@ -95,7 +95,7 @@ static int vcn_v5_0_0_sw_init(void *handle) return r; for (i = 0; i < adev->vcn.num_vcn_inst; i++) { - volatile struct amdgpu_vcn4_fw_shared *fw_shared; + volatile struct amdgpu_vcn5_fw_shared *fw_shared; if (adev->vcn.harvest_config & (1 << i)) continue; @@ -154,7 +154,7 @@ static int vcn_v5_0_0_sw_fini(void *handle) if (drm_dev_enter(adev_to_drm(adev), &idx)) { for (i = 0; i < adev->vcn.num_vcn_inst; i++) { - volatile struct amdgpu_vcn4_fw_shared *fw_shared; + volatile struct amdgpu_vcn5_fw_shared *fw_shared; if (adev->vcn.harvest_config & (1 << i)) continue; @@ -200,15 +200,10 @@ static int vcn_v5_0_0_hw_init(void *handle) r = amdgpu_ring_test_helper(ring); if (r) - goto done; + return r; } -done: - if (!r) - DRM_INFO("VCN decode and encode initialized successfully(under %s).\n", - (adev->pg_flags & AMD_PG_SUPPORT_VCN_DPG)?"DPG Mode":"SPG Mode"); - - return r; + return 0; } /** @@ -228,8 +223,13 @@ static int vcn_v5_0_0_hw_fini(void *handle) for (i = 0; i < adev->vcn.num_vcn_inst; ++i) { if (adev->vcn.harvest_config & (1 << i)) continue; - - amdgpu_irq_put(adev, &adev->vcn.inst[i].irq, 0); + if (!amdgpu_sriov_vf(adev)) { + if ((adev->pg_flags & AMD_PG_SUPPORT_VCN_DPG) || + (adev->vcn.cur_state != AMD_PG_STATE_GATE && + RREG32_SOC15(VCN, i, regUVD_STATUS))) { + vcn_v5_0_0_set_powergating_state(adev, AMD_PG_STATE_GATE); + } + } } return 0; @@ -334,7 +334,7 @@ static void vcn_v5_0_0_mc_resume(struct amdgpu_device *adev, int inst) upper_32_bits(adev->vcn.inst[inst].fw_shared.gpu_addr)); WREG32_SOC15(VCN, inst, regUVD_VCPU_NONCACHE_OFFSET0, 0); WREG32_SOC15(VCN, inst, regUVD_VCPU_NONCACHE_SIZE0, - AMDGPU_GPU_PAGE_ALIGN(sizeof(struct amdgpu_vcn4_fw_shared))); + AMDGPU_GPU_PAGE_ALIGN(sizeof(struct amdgpu_vcn5_fw_shared))); } /** @@ -438,7 +438,7 @@ static void vcn_v5_0_0_mc_resume_dpg_mode(struct amdgpu_device *adev, int inst_i VCN, inst_idx, regUVD_VCPU_NONCACHE_OFFSET0), 0, 0, indirect); WREG32_SOC24_DPG_MODE(inst_idx, SOC24_DPG_MODE_OFFSET( VCN, inst_idx, regUVD_VCPU_NONCACHE_SIZE0), - AMDGPU_GPU_PAGE_ALIGN(sizeof(struct amdgpu_vcn4_fw_shared)), 0, indirect); + AMDGPU_GPU_PAGE_ALIGN(sizeof(struct amdgpu_vcn5_fw_shared)), 0, indirect); /* VCN global tiling registers */ WREG32_SOC24_DPG_MODE(inst_idx, SOC24_DPG_MODE_OFFSET( @@ -615,7 +615,7 @@ static void vcn_v5_0_0_enable_clock_gating(struct amdgpu_device *adev, int inst) */ static int vcn_v5_0_0_start_dpg_mode(struct amdgpu_device *adev, int inst_idx, bool indirect) { - volatile struct amdgpu_vcn4_fw_shared *fw_shared = adev->vcn.inst[inst_idx].fw_shared.cpu_addr; + volatile struct amdgpu_vcn5_fw_shared *fw_shared = adev->vcn.inst[inst_idx].fw_shared.cpu_addr; struct amdgpu_ring *ring; uint32_t tmp; @@ -712,7 +712,7 @@ static int vcn_v5_0_0_start_dpg_mode(struct amdgpu_device *adev, int inst_idx, b */ static int vcn_v5_0_0_start(struct amdgpu_device *adev) { - volatile struct amdgpu_vcn4_fw_shared *fw_shared; + volatile struct amdgpu_vcn5_fw_shared *fw_shared; struct amdgpu_ring *ring; uint32_t tmp; int i, j, k, r; @@ -721,6 +721,9 @@ static int vcn_v5_0_0_start(struct amdgpu_device *adev) amdgpu_dpm_enable_uvd(adev, true); for (i = 0; i < adev->vcn.num_vcn_inst; ++i) { + if (adev->vcn.harvest_config & (1 << i)) + continue; + fw_shared = adev->vcn.inst[i].fw_shared.cpu_addr; if (adev->pg_flags & AMD_PG_SUPPORT_VCN_DPG) { @@ -893,11 +896,14 @@ static void vcn_v5_0_0_stop_dpg_mode(struct amdgpu_device *adev, int inst_idx) */ static int vcn_v5_0_0_stop(struct amdgpu_device *adev) { - volatile struct amdgpu_vcn4_fw_shared *fw_shared; + volatile struct amdgpu_vcn5_fw_shared *fw_shared; uint32_t tmp; int i, r = 0; for (i = 0; i < adev->vcn.num_vcn_inst; ++i) { + if (adev->vcn.harvest_config & (1 << i)) + continue; + fw_shared = adev->vcn.inst[i].fw_shared.cpu_addr; fw_shared->sq.queue_mode |= FW_QUEUE_DPG_HOLD_OFF; @@ -1116,8 +1122,6 @@ static void vcn_v5_0_0_set_unified_ring_funcs(struct amdgpu_device *adev) adev->vcn.inst[i].ring_enc[0].funcs = &vcn_v5_0_0_unified_ring_vm_funcs; adev->vcn.inst[i].ring_enc[0].me = i; - - DRM_INFO("VCN(%d) encode/decode are enabled in VM mode\n", i); } } @@ -1226,22 +1230,6 @@ static int vcn_v5_0_0_set_powergating_state(void *handle, enum amd_powergating_s } /** - * vcn_v5_0_0_set_interrupt_state - set VCN block interrupt state - * - * @adev: amdgpu_device pointer - * @source: interrupt sources - * @type: interrupt types - * @state: interrupt states - * - * Set VCN block interrupt state - */ -static int vcn_v5_0_0_set_interrupt_state(struct amdgpu_device *adev, struct amdgpu_irq_src *source, - unsigned type, enum amdgpu_interrupt_state state) -{ - return 0; -} - -/** * vcn_v5_0_0_process_interrupt - process VCN block interrupt * * @adev: amdgpu_device pointer @@ -1286,7 +1274,6 @@ static int vcn_v5_0_0_process_interrupt(struct amdgpu_device *adev, struct amdgp } static const struct amdgpu_irq_src_funcs vcn_v5_0_0_irq_funcs = { - .set = vcn_v5_0_0_set_interrupt_state, .process = vcn_v5_0_0_process_interrupt, }; @@ -1328,6 +1315,8 @@ static const struct amd_ip_funcs vcn_v5_0_0_ip_funcs = { .post_soft_reset = NULL, .set_clockgating_state = vcn_v5_0_0_set_clockgating_state, .set_powergating_state = vcn_v5_0_0_set_powergating_state, + .dump_ip_state = NULL, + .print_ip_state = NULL, }; const struct amdgpu_ip_block_version vcn_v5_0_0_ip_block = { diff --git a/drivers/gpu/drm/amd/amdgpu/vega20_ih.c b/drivers/gpu/drm/amd/amdgpu/vega20_ih.c index b9e785846637..ac439f0565e3 100644 --- a/drivers/gpu/drm/amd/amdgpu/vega20_ih.c +++ b/drivers/gpu/drm/amd/amdgpu/vega20_ih.c @@ -306,7 +306,8 @@ static int vega20_ih_irq_init(struct amdgpu_device *adev) * driver needs to program it properly according to * MC_SPACE type in IH_RB_CNTL */ if ((amdgpu_ip_version(adev, OSSSYS_HWIP, 0) == IP_VERSION(4, 4, 0)) || - (amdgpu_ip_version(adev, OSSSYS_HWIP, 0) == IP_VERSION(4, 4, 2))) { + (amdgpu_ip_version(adev, OSSSYS_HWIP, 0) == IP_VERSION(4, 4, 2)) || + (amdgpu_ip_version(adev, OSSSYS_HWIP, 0) == IP_VERSION(4, 4, 5))) { ih_chicken = RREG32_SOC15(OSSSYS, 0, mmIH_CHICKEN_ALDEBARAN); if (adev->irq.ih.use_bus_addr) { ih_chicken = REG_SET_FIELD(ih_chicken, IH_CHICKEN, @@ -337,7 +338,8 @@ static int vega20_ih_irq_init(struct amdgpu_device *adev) /* Enable IH Retry CAM */ if (amdgpu_ip_version(adev, OSSSYS_HWIP, 0) == IP_VERSION(4, 4, 0) || - amdgpu_ip_version(adev, OSSSYS_HWIP, 0) == IP_VERSION(4, 4, 2)) + amdgpu_ip_version(adev, OSSSYS_HWIP, 0) == IP_VERSION(4, 4, 2) || + amdgpu_ip_version(adev, OSSSYS_HWIP, 0) == IP_VERSION(4, 4, 5)) WREG32_FIELD15(OSSSYS, 0, IH_RETRY_INT_CAM_CNTL_ALDEBARAN, ENABLE, 1); else @@ -562,7 +564,8 @@ static int vega20_ih_sw_init(void *handle) adev->irq.ih1.use_doorbell = true; adev->irq.ih1.doorbell_index = (adev->doorbell_index.ih + 1) << 1; - if (amdgpu_ip_version(adev, OSSSYS_HWIP, 0) != IP_VERSION(4, 4, 2)) { + if (amdgpu_ip_version(adev, OSSSYS_HWIP, 0) != IP_VERSION(4, 4, 2) && + amdgpu_ip_version(adev, OSSSYS_HWIP, 0) != IP_VERSION(4, 4, 5)) { r = amdgpu_ih_ring_init(adev, &adev->irq.ih2, PAGE_SIZE, true); if (r) return r; diff --git a/drivers/gpu/drm/amd/amdgpu/vi.c b/drivers/gpu/drm/amd/amdgpu/vi.c index 1a98812981f4..d39c670f6220 100644 --- a/drivers/gpu/drm/amd/amdgpu/vi.c +++ b/drivers/gpu/drm/amd/amdgpu/vi.c @@ -897,7 +897,7 @@ static int vi_asic_pci_config_reset(struct amdgpu_device *adev) return r; } -static bool vi_asic_supports_baco(struct amdgpu_device *adev) +static int vi_asic_supports_baco(struct amdgpu_device *adev) { switch (adev->asic_type) { case CHIP_FIJI: @@ -908,14 +908,14 @@ static bool vi_asic_supports_baco(struct amdgpu_device *adev) case CHIP_TOPAZ: return amdgpu_dpm_is_baco_supported(adev); default: - return false; + return 0; } } static enum amd_reset_method vi_asic_reset_method(struct amdgpu_device *adev) { - bool baco_reset; + int baco_reset; if (amdgpu_reset_method == AMD_RESET_METHOD_LEGACY || amdgpu_reset_method == AMD_RESET_METHOD_BACO) @@ -935,7 +935,7 @@ vi_asic_reset_method(struct amdgpu_device *adev) baco_reset = amdgpu_dpm_is_baco_supported(adev); break; default: - baco_reset = false; + baco_reset = 0; break; } @@ -2058,6 +2058,8 @@ static const struct amd_ip_funcs vi_common_ip_funcs = { .set_clockgating_state = vi_common_set_clockgating_state, .set_powergating_state = vi_common_set_powergating_state, .get_clockgating_state = vi_common_get_clockgating_state, + .dump_ip_state = NULL, + .print_ip_state = NULL, }; static const struct amdgpu_ip_block_version vi_common_ip_block = diff --git a/drivers/gpu/drm/amd/amdgpu/vpe_v6_1.c b/drivers/gpu/drm/amd/amdgpu/vpe_v6_1.c index 769eb8f7bb3c..45876883bbf3 100644 --- a/drivers/gpu/drm/amd/amdgpu/vpe_v6_1.c +++ b/drivers/gpu/drm/amd/amdgpu/vpe_v6_1.c @@ -34,6 +34,7 @@ MODULE_FIRMWARE("amdgpu/vpe_6_1_0.bin"); MODULE_FIRMWARE("amdgpu/vpe_6_1_1.bin"); +MODULE_FIRMWARE("amdgpu/vpe_6_1_3.bin"); #define VPE_THREAD1_UCODE_OFFSET 0x8000 @@ -144,6 +145,12 @@ static int vpe_v6_1_load_microcode(struct amdgpu_vpe *vpe) WREG32(vpe_get_reg_offset(vpe, j, regVPEC_CNTL), ret); } + /* setup collaborate mode */ + vpe_v6_1_set_collaborate_mode(vpe, true); + /* setup DPM */ + if (amdgpu_vpe_configure_dpm(vpe)) + dev_warn(adev->dev, "VPE failed to enable DPM\n"); + /* * For VPE 6.1.1, still only need to add master's offset, and psp will apply it to slave as well. * Here use instance 0 as master. @@ -159,11 +166,7 @@ static int vpe_v6_1_load_microcode(struct amdgpu_vpe *vpe) adev->vpe.cmdbuf_cpu_addr[0] = f32_offset; adev->vpe.cmdbuf_cpu_addr[1] = f32_cntl; - amdgpu_vpe_psp_update_sram(adev); - vpe_v6_1_set_collaborate_mode(vpe, true); - amdgpu_vpe_configure_dpm(vpe); - - return 0; + return amdgpu_vpe_psp_update_sram(adev); } vpe_hdr = (const struct vpe_firmware_header_v1_0 *)adev->vpe.fw->data; @@ -196,8 +199,6 @@ static int vpe_v6_1_load_microcode(struct amdgpu_vpe *vpe) } vpe_v6_1_halt(vpe, false); - vpe_v6_1_set_collaborate_mode(vpe, true); - amdgpu_vpe_configure_dpm(vpe); return 0; } |