diff options
Diffstat (limited to 'drivers/gpu/drm/amd/amdgpu')
204 files changed, 12399 insertions, 5847 deletions
diff --git a/drivers/gpu/drm/amd/amdgpu/Makefile b/drivers/gpu/drm/amd/amdgpu/Makefile index ee85e8aba636..8d0748184a14 100644 --- a/drivers/gpu/drm/amd/amdgpu/Makefile +++ b/drivers/gpu/drm/amd/amdgpu/Makefile @@ -51,12 +51,16 @@ amdgpu-y += amdgpu_device.o amdgpu_kms.o \ atombios_encoders.o amdgpu_sa.o atombios_i2c.o \ amdgpu_dma_buf.o amdgpu_vm.o amdgpu_ib.o amdgpu_pll.o \ amdgpu_ucode.o amdgpu_bo_list.o amdgpu_ctx.o amdgpu_sync.o \ - amdgpu_gtt_mgr.o amdgpu_vram_mgr.o amdgpu_virt.o amdgpu_atomfirmware.o \ - amdgpu_vf_error.o amdgpu_sched.o amdgpu_debugfs.o amdgpu_ids.o \ - amdgpu_gmc.o amdgpu_mmhub.o amdgpu_xgmi.o amdgpu_csa.o amdgpu_ras.o amdgpu_vm_cpu.o \ + amdgpu_gtt_mgr.o amdgpu_preempt_mgr.o amdgpu_vram_mgr.o amdgpu_virt.o \ + amdgpu_atomfirmware.o amdgpu_vf_error.o amdgpu_sched.o \ + amdgpu_debugfs.o amdgpu_ids.o amdgpu_gmc.o amdgpu_mmhub.o \ + amdgpu_xgmi.o amdgpu_csa.o amdgpu_ras.o amdgpu_vm_cpu.o \ amdgpu_vm_sdma.o amdgpu_discovery.o amdgpu_ras_eeprom.o amdgpu_nbio.o \ amdgpu_umc.o smu_v11_0_i2c.o amdgpu_fru_eeprom.o amdgpu_rap.o \ - amdgpu_fw_attestation.o amdgpu_securedisplay.o + amdgpu_fw_attestation.o amdgpu_securedisplay.o amdgpu_hdp.o \ + amdgpu_eeprom.o amdgpu_mca.o + +amdgpu-$(CONFIG_PROC_FS) += amdgpu_fdinfo.o amdgpu-$(CONFIG_PERF_EVENTS) += amdgpu_pmu.o @@ -71,7 +75,8 @@ amdgpu-y += \ vi.o mxgpu_vi.o nbio_v6_1.o soc15.o emu_soc.o mxgpu_ai.o nbio_v7_0.o vega10_reg_init.o \ vega20_reg_init.o nbio_v7_4.o nbio_v2_3.o nv.o navi10_reg_init.o navi14_reg_init.o \ arct_reg_init.o navi12_reg_init.o mxgpu_nv.o sienna_cichlid_reg_init.o vangogh_reg_init.o \ - nbio_v7_2.o dimgrey_cavefish_reg_init.o hdp_v4_0.o hdp_v5_0.o aldebaran_reg_init.o aldebaran.o + nbio_v7_2.o dimgrey_cavefish_reg_init.o hdp_v4_0.o hdp_v5_0.o aldebaran_reg_init.o aldebaran.o \ + beige_goby_reg_init.o yellow_carp_reg_init.o cyan_skillfish_reg_init.o # add DF block amdgpu-y += \ @@ -107,6 +112,7 @@ amdgpu-y += \ psp_v3_1.o \ psp_v10_0.o \ psp_v11_0.o \ + psp_v11_0_8.o \ psp_v12_0.o \ psp_v13_0.o @@ -114,7 +120,7 @@ amdgpu-y += \ amdgpu-y += \ dce_v10_0.o \ dce_v11_0.o \ - dce_virtual.o + amdgpu_vkms.o # add GFX block amdgpu-y += \ @@ -183,6 +189,10 @@ amdgpu-y += \ amdgpu-y += \ amdgpu_reset.o +# add MCA block +amdgpu-y += \ + mca_v3_0.o + # add amdkfd interfaces amdgpu-y += amdgpu_amdkfd.o diff --git a/drivers/gpu/drm/amd/amdgpu/aldebaran.c b/drivers/gpu/drm/amd/amdgpu/aldebaran.c index 65b1dca4b02e..148f6c3343ab 100644 --- a/drivers/gpu/drm/amd/amdgpu/aldebaran.c +++ b/drivers/gpu/drm/amd/amdgpu/aldebaran.c @@ -227,7 +227,7 @@ static int aldebaran_mode2_restore_ip(struct amdgpu_device *adev) break; default: break; - }; + } } /* Reinit NBIF block */ diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu.h b/drivers/gpu/drm/amd/amdgpu/amdgpu.h index 264176a01e16..dc3c6b3a00e5 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu.h +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu.h @@ -107,6 +107,8 @@ #include "amdgpu_gfxhub.h" #include "amdgpu_df.h" #include "amdgpu_smuio.h" +#include "amdgpu_fdinfo.h" +#include "amdgpu_mca.h" #define MAX_GPU_INSTANCE 16 @@ -129,6 +131,13 @@ struct amdgpu_mgpu_info bool pending_reset; }; +enum amdgpu_ss { + AMDGPU_SS_DRV_LOAD, + AMDGPU_SS_DEV_D0, + AMDGPU_SS_DEV_D3, + AMDGPU_SS_DRV_UNLOAD +}; + struct amdgpu_watchdog_timer { bool timeout_fatal_disable; @@ -203,6 +212,7 @@ extern int amdgpu_discovery; extern int amdgpu_mes; extern int amdgpu_noretry; extern int amdgpu_force_asic_type; +extern int amdgpu_smartshift_bias; #ifdef CONFIG_HSA_AMD extern int sched_policy; extern bool debug_evictions; @@ -260,6 +270,10 @@ extern int amdgpu_num_kcq; #define CIK_CURSOR_WIDTH 128 #define CIK_CURSOR_HEIGHT 128 +/* smasrt shift bias level limits */ +#define AMDGPU_SMARTSHIFT_MAX_BIAS (100) +#define AMDGPU_SMARTSHIFT_MIN_BIAS (-100) + struct amdgpu_device; struct amdgpu_ib; struct amdgpu_cs_parser; @@ -267,7 +281,6 @@ struct amdgpu_job; struct amdgpu_irq_src; struct amdgpu_fpriv; struct amdgpu_bo_va_mapping; -struct amdgpu_atif; struct kfd_vm_fault_info; struct amdgpu_hive_info; struct amdgpu_reset_context; @@ -607,6 +620,13 @@ struct amdgpu_video_codec_info { u32 max_level; }; +#define codec_info_build(type, width, height, level) \ + .codec_type = type,\ + .max_width = width,\ + .max_height = height,\ + .max_pixels_per_frame = height * width,\ + .max_level = level, + struct amdgpu_video_codecs { const u32 codec_count; const struct amdgpu_video_codec_info *codec_array; @@ -682,20 +702,6 @@ struct amdgpu_vram_scratch { }; /* - * ACPI - */ -struct amdgpu_atcs_functions { - bool get_ext_state; - bool pcie_perf_req; - bool pcie_dev_rdy; - bool pcie_bus_width; -}; - -struct amdgpu_atcs { - struct amdgpu_atcs_functions functions; -}; - -/* * CGS */ struct cgs_device *amdgpu_cgs_create_device(struct amdgpu_device *adev); @@ -824,8 +830,6 @@ struct amdgpu_device { struct notifier_block acpi_nb; struct amdgpu_i2c_chan *i2c_bus[AMDGPU_MAX_I2C_BUS]; struct debugfs_blob_wrapper debugfs_vbios_blob; - struct amdgpu_atif *atif; - struct amdgpu_atcs atcs; struct mutex srbm_mutex; /* GRBM index mutex. Protects concurrent access to GRBM index */ struct mutex grbm_idx_mutex; @@ -913,6 +917,7 @@ struct amdgpu_device { /* display */ bool enable_virtual_display; + struct amdgpu_vkms_output *amdgpu_vkms_output; struct amdgpu_mode_info mode_info; /* For pre-DCE11. DCE11 and later are in "struct amdgpu_device->dm" */ struct work_struct hotplug_work; @@ -1005,6 +1010,9 @@ struct amdgpu_device { /* df */ struct amdgpu_df df; + /* MCA */ + struct amdgpu_mca mca; + struct amdgpu_ip_block ip_blocks[AMDGPU_MAX_IP_NUM]; uint32_t harvest_ip_mask; int num_ip_blocks; @@ -1074,9 +1082,10 @@ struct amdgpu_device { atomic_t throttling_logging_enabled; struct ratelimit_state throttling_logging_rs; - uint32_t ras_features; + uint32_t ras_hw_enabled; + uint32_t ras_enabled; - bool in_pci_err_recovery; + bool no_hw_access; struct pci_saved_state *pci_state; struct amdgpu_reset_control *reset_cntl; @@ -1099,11 +1108,18 @@ static inline struct amdgpu_device *amdgpu_ttm_adev(struct ttm_device *bdev) int amdgpu_device_init(struct amdgpu_device *adev, uint32_t flags); -void amdgpu_device_fini(struct amdgpu_device *adev); +void amdgpu_device_fini_hw(struct amdgpu_device *adev); +void amdgpu_device_fini_sw(struct amdgpu_device *adev); + int amdgpu_gpu_wait_for_idle(struct amdgpu_device *adev); +void amdgpu_device_mm_access(struct amdgpu_device *adev, loff_t pos, + void *buf, size_t size, bool write); +size_t amdgpu_device_aper_access(struct amdgpu_device *adev, loff_t pos, + void *buf, size_t size, bool write); + void amdgpu_device_vram_access(struct amdgpu_device *adev, loff_t pos, - uint32_t *buf, size_t size, bool write); + void *buf, size_t size, bool write); uint32_t amdgpu_device_rreg(struct amdgpu_device *adev, uint32_t reg, uint32_t acc_flags); void amdgpu_device_wreg(struct amdgpu_device *adev, @@ -1142,6 +1158,7 @@ int emu_soc_asic_init(struct amdgpu_device *adev); * Registers read & write functions. */ #define AMDGPU_REGS_NO_KIQ (1<<1) +#define AMDGPU_REGS_RLC (1<<2) #define RREG32_NO_KIQ(reg) amdgpu_device_rreg(adev, (reg), AMDGPU_REGS_NO_KIQ) #define WREG32_NO_KIQ(reg, v) amdgpu_device_wreg(adev, (reg), (v), AMDGPU_REGS_NO_KIQ) @@ -1258,6 +1275,8 @@ int emu_soc_asic_init(struct amdgpu_device *adev); #define amdgpu_inc_vram_lost(adev) atomic_inc(&((adev)->vram_lost_counter)); +#define MIN(X, Y) ((X) < (Y) ? (X) : (Y)) + /* Common functions */ bool amdgpu_device_has_job_running(struct amdgpu_device *adev); bool amdgpu_device_should_recover_gpu(struct amdgpu_device *adev); @@ -1278,12 +1297,18 @@ int amdgpu_device_mode1_reset(struct amdgpu_device *adev); bool amdgpu_device_supports_atpx(struct drm_device *dev); bool amdgpu_device_supports_px(struct drm_device *dev); bool amdgpu_device_supports_boco(struct drm_device *dev); +bool amdgpu_device_supports_smart_shift(struct drm_device *dev); bool amdgpu_device_supports_baco(struct drm_device *dev); bool amdgpu_device_is_peer_accessible(struct amdgpu_device *adev, struct amdgpu_device *peer_adev); int amdgpu_device_baco_enter(struct drm_device *dev); int amdgpu_device_baco_exit(struct drm_device *dev); +void amdgpu_device_flush_hdp(struct amdgpu_device *adev, + struct amdgpu_ring *ring); +void amdgpu_device_invalidate_hdp(struct amdgpu_device *adev, + struct amdgpu_ring *ring); + /* atpx handler */ #if defined(CONFIG_VGA_SWITCHEROO) void amdgpu_register_atpx_handler(void); @@ -1319,6 +1344,8 @@ void amdgpu_driver_lastclose_kms(struct drm_device *dev); int amdgpu_driver_open_kms(struct drm_device *dev, struct drm_file *file_priv); void amdgpu_driver_postclose_kms(struct drm_device *dev, struct drm_file *file_priv); +void amdgpu_driver_release_kms(struct drm_device *dev); + int amdgpu_device_ip_suspend(struct amdgpu_device *adev); int amdgpu_device_suspend(struct drm_device *dev, bool fbcon); int amdgpu_device_resume(struct drm_device *dev, bool fbcon); @@ -1350,21 +1377,38 @@ struct amdgpu_afmt_acr { struct amdgpu_afmt_acr amdgpu_afmt_acr(uint32_t clock); /* amdgpu_acpi.c */ + +/* ATCS Device/Driver State */ +#define AMDGPU_ATCS_PSC_DEV_STATE_D0 0 +#define AMDGPU_ATCS_PSC_DEV_STATE_D3_HOT 3 +#define AMDGPU_ATCS_PSC_DRV_STATE_OPR 0 +#define AMDGPU_ATCS_PSC_DRV_STATE_NOT_OPR 1 + #if defined(CONFIG_ACPI) int amdgpu_acpi_init(struct amdgpu_device *adev); void amdgpu_acpi_fini(struct amdgpu_device *adev); bool amdgpu_acpi_is_pcie_performance_request_supported(struct amdgpu_device *adev); +bool amdgpu_acpi_is_power_shift_control_supported(void); int amdgpu_acpi_pcie_performance_request(struct amdgpu_device *adev, u8 perf_req, bool advertise); +int amdgpu_acpi_power_shift_control(struct amdgpu_device *adev, + u8 dev_state, bool drv_state); +int amdgpu_acpi_smart_shift_update(struct drm_device *dev, enum amdgpu_ss ss_state); int amdgpu_acpi_pcie_notify_device_ready(struct amdgpu_device *adev); -void amdgpu_acpi_get_backlight_caps(struct amdgpu_device *adev, - struct amdgpu_dm_backlight_caps *caps); -bool amdgpu_acpi_is_s0ix_supported(struct amdgpu_device *adev); +void amdgpu_acpi_get_backlight_caps(struct amdgpu_dm_backlight_caps *caps); +bool amdgpu_acpi_is_s0ix_active(struct amdgpu_device *adev); +void amdgpu_acpi_detect(void); #else static inline int amdgpu_acpi_init(struct amdgpu_device *adev) { return 0; } static inline void amdgpu_acpi_fini(struct amdgpu_device *adev) { } -static inline bool amdgpu_acpi_is_s0ix_supported(struct amdgpu_device *adev) { return false; } +static inline bool amdgpu_acpi_is_s0ix_active(struct amdgpu_device *adev) { return false; } +static inline void amdgpu_acpi_detect(void) { } +static inline bool amdgpu_acpi_is_power_shift_control_supported(void) { return false; } +static inline int amdgpu_acpi_power_shift_control(struct amdgpu_device *adev, + u8 dev_state, bool drv_state) { return 0; } +static inline int amdgpu_acpi_smart_shift_update(struct drm_device *dev, + enum amdgpu_ss ss_state) { return 0; } #endif int amdgpu_cs_find_mapping(struct amdgpu_cs_parser *parser, diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_acp.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_acp.c index b8655ff73a65..cc9c9f8b23b2 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_acp.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_acp.c @@ -160,17 +160,28 @@ static int acp_poweron(struct generic_pm_domain *genpd) return 0; } -static struct device *get_mfd_cell_dev(const char *device_name, int r) +static int acp_genpd_add_device(struct device *dev, void *data) { - char auto_dev_name[25]; - struct device *dev; + struct generic_pm_domain *gpd = data; + int ret; - snprintf(auto_dev_name, sizeof(auto_dev_name), - "%s.%d.auto", device_name, r); - dev = bus_find_device_by_name(&platform_bus_type, NULL, auto_dev_name); - dev_info(dev, "device %s added to pm domain\n", auto_dev_name); + ret = pm_genpd_add_device(gpd, dev); + if (ret) + dev_err(dev, "Failed to add dev to genpd %d\n", ret); - return dev; + return ret; +} + +static int acp_genpd_remove_device(struct device *dev, void *data) +{ + int ret; + + ret = pm_genpd_remove_device(dev); + if (ret) + dev_err(dev, "Failed to remove dev from genpd %d\n", ret); + + /* Continue to remove */ + return 0; } /** @@ -181,11 +192,10 @@ static struct device *get_mfd_cell_dev(const char *device_name, int r) */ static int acp_hw_init(void *handle) { - int r, i; + int r; uint64_t acp_base; u32 val = 0; u32 count = 0; - struct device *dev; struct i2s_platform_data *i2s_pdata = NULL; struct amdgpu_device *adev = (struct amdgpu_device *)handle; @@ -341,15 +351,10 @@ static int acp_hw_init(void *handle) if (r) goto failure; - for (i = 0; i < ACP_DEVS ; i++) { - dev = get_mfd_cell_dev(adev->acp.acp_cell[i].name, i); - r = pm_genpd_add_device(&adev->acp.acp_genpd->gpd, dev); - if (r) { - dev_err(dev, "Failed to add dev to genpd\n"); - goto failure; - } - } - + r = device_for_each_child(adev->acp.parent, &adev->acp.acp_genpd->gpd, + acp_genpd_add_device); + if (r) + goto failure; /* Assert Soft reset of ACP */ val = cgs_read_register(adev->acp.cgs_device, mmACP_SOFT_RESET); @@ -410,10 +415,8 @@ failure: */ static int acp_hw_fini(void *handle) { - int i, ret; u32 val = 0; u32 count = 0; - struct device *dev; struct amdgpu_device *adev = (struct amdgpu_device *)handle; /* return early if no ACP */ @@ -458,13 +461,8 @@ static int acp_hw_fini(void *handle) udelay(100); } - for (i = 0; i < ACP_DEVS ; i++) { - dev = get_mfd_cell_dev(adev->acp.acp_cell[i].name, i); - ret = pm_genpd_remove_device(dev); - /* If removal fails, dont giveup and try rest */ - if (ret) - dev_err(dev, "remove dev from genpd failed\n"); - } + device_for_each_child(adev->acp.parent, NULL, + acp_genpd_remove_device); mfd_remove_devices(adev->acp.parent); kfree(adev->acp.acp_res); diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_acpi.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_acpi.c index 2e9b16fb3fcd..4811b0faafd9 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_acpi.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_acpi.c @@ -26,6 +26,7 @@ #include <linux/slab.h> #include <linux/power_supply.h> #include <linux/pm_runtime.h> +#include <linux/suspend.h> #include <acpi/video.h> #include <acpi/actbl.h> @@ -71,12 +72,31 @@ struct amdgpu_atif { struct amdgpu_dm_backlight_caps backlight_caps; }; +struct amdgpu_atcs_functions { + bool get_ext_state; + bool pcie_perf_req; + bool pcie_dev_rdy; + bool pcie_bus_width; + bool power_shift_control; +}; + +struct amdgpu_atcs { + acpi_handle handle; + + struct amdgpu_atcs_functions functions; +}; + +static struct amdgpu_acpi_priv { + struct amdgpu_atif atif; + struct amdgpu_atcs atcs; +} amdgpu_acpi_priv; + /* Call the ATIF method */ /** * amdgpu_atif_call - call an ATIF method * - * @handle: acpi handle + * @atif: atif structure * @function: the ATIF function to execute * @params: ATIF function params * @@ -166,7 +186,6 @@ static void amdgpu_atif_parse_functions(struct amdgpu_atif_functions *f, u32 mas /** * amdgpu_atif_verify_interface - verify ATIF * - * @handle: acpi handle * @atif: amdgpu atif struct * * Execute the ATIF_FUNCTION_VERIFY_INTERFACE ATIF function @@ -208,40 +227,10 @@ out: return err; } -static acpi_handle amdgpu_atif_probe_handle(acpi_handle dhandle) -{ - acpi_handle handle = NULL; - char acpi_method_name[255] = { 0 }; - struct acpi_buffer buffer = { sizeof(acpi_method_name), acpi_method_name }; - acpi_status status; - - /* For PX/HG systems, ATIF and ATPX are in the iGPU's namespace, on dGPU only - * systems, ATIF is in the dGPU's namespace. - */ - status = acpi_get_handle(dhandle, "ATIF", &handle); - if (ACPI_SUCCESS(status)) - goto out; - - if (amdgpu_has_atpx()) { - status = acpi_get_handle(amdgpu_atpx_get_dhandle(), "ATIF", - &handle); - if (ACPI_SUCCESS(status)) - goto out; - } - - DRM_DEBUG_DRIVER("No ATIF handle found\n"); - return NULL; -out: - acpi_get_name(handle, ACPI_FULL_PATHNAME, &buffer); - DRM_DEBUG_DRIVER("Found ATIF handle %s\n", acpi_method_name); - return handle; -} - /** * amdgpu_atif_get_notification_params - determine notify configuration * - * @handle: acpi handle - * @n: atif notification configuration struct + * @atif: acpi handle * * Execute the ATIF_FUNCTION_GET_SYSTEM_PARAMETERS ATIF function * to determine if a notifier is used and if so which one @@ -304,7 +293,7 @@ out: /** * amdgpu_atif_query_backlight_caps - get min and max backlight input signal * - * @handle: acpi handle + * @atif: acpi handle * * Execute the QUERY_BRIGHTNESS_TRANSFER_CHARACTERISTICS ATIF function * to determine the acceptable range of backlight values @@ -363,7 +352,7 @@ out: /** * amdgpu_atif_get_sbios_requests - get requested sbios event * - * @handle: acpi handle + * @atif: acpi handle * @req: atif sbios request struct * * Execute the ATIF_FUNCTION_GET_SYSTEM_BIOS_REQUESTS ATIF function @@ -416,7 +405,7 @@ out: static int amdgpu_atif_handler(struct amdgpu_device *adev, struct acpi_bus_event *event) { - struct amdgpu_atif *atif = adev->atif; + struct amdgpu_atif *atif = &amdgpu_acpi_priv.atif; int count; DRM_DEBUG_DRIVER("event, device_class = %s, type = %#x\n", @@ -426,8 +415,7 @@ static int amdgpu_atif_handler(struct amdgpu_device *adev, return NOTIFY_DONE; /* Is this actually our event? */ - if (!atif || - !atif->notification_cfg.enabled || + if (!atif->notification_cfg.enabled || event->type != atif->notification_cfg.command_code) { /* These events will generate keypresses otherwise */ if (event->type == ACPI_VIDEO_NOTIFY_PROBE) @@ -487,14 +475,15 @@ static int amdgpu_atif_handler(struct amdgpu_device *adev, /** * amdgpu_atcs_call - call an ATCS method * - * @handle: acpi handle + * @atcs: atcs structure * @function: the ATCS function to execute * @params: ATCS function params * * Executes the requested ATCS function (all asics). * Returns a pointer to the acpi output buffer. */ -static union acpi_object *amdgpu_atcs_call(acpi_handle handle, int function, +static union acpi_object *amdgpu_atcs_call(struct amdgpu_atcs *atcs, + int function, struct acpi_buffer *params) { acpi_status status; @@ -518,7 +507,7 @@ static union acpi_object *amdgpu_atcs_call(acpi_handle handle, int function, atcs_arg_elements[1].integer.value = 0; } - status = acpi_evaluate_object(handle, "ATCS", &atcs_arg, &buffer); + status = acpi_evaluate_object(atcs->handle, NULL, &atcs_arg, &buffer); /* Fail only if calling the method fails and ATIF is supported */ if (ACPI_FAILURE(status) && status != AE_NOT_FOUND) { @@ -547,12 +536,12 @@ static void amdgpu_atcs_parse_functions(struct amdgpu_atcs_functions *f, u32 mas f->pcie_perf_req = mask & ATCS_PCIE_PERFORMANCE_REQUEST_SUPPORTED; f->pcie_dev_rdy = mask & ATCS_PCIE_DEVICE_READY_NOTIFICATION_SUPPORTED; f->pcie_bus_width = mask & ATCS_SET_PCIE_BUS_WIDTH_SUPPORTED; + f->power_shift_control = mask & ATCS_SET_POWER_SHIFT_CONTROL_SUPPORTED; } /** * amdgpu_atcs_verify_interface - verify ATCS * - * @handle: acpi handle * @atcs: amdgpu atcs struct * * Execute the ATCS_FUNCTION_VERIFY_INTERFACE ATCS function @@ -560,15 +549,14 @@ static void amdgpu_atcs_parse_functions(struct amdgpu_atcs_functions *f, u32 mas * (all asics). * returns 0 on success, error on failure. */ -static int amdgpu_atcs_verify_interface(acpi_handle handle, - struct amdgpu_atcs *atcs) +static int amdgpu_atcs_verify_interface(struct amdgpu_atcs *atcs) { union acpi_object *info; struct atcs_verify_interface output; size_t size; int err = 0; - info = amdgpu_atcs_call(handle, ATCS_FUNCTION_VERIFY_INTERFACE, NULL); + info = amdgpu_atcs_call(atcs, ATCS_FUNCTION_VERIFY_INTERFACE, NULL); if (!info) return -EIO; @@ -605,7 +593,7 @@ out: */ bool amdgpu_acpi_is_pcie_performance_request_supported(struct amdgpu_device *adev) { - struct amdgpu_atcs *atcs = &adev->atcs; + struct amdgpu_atcs *atcs = &amdgpu_acpi_priv.atcs; if (atcs->functions.pcie_perf_req && atcs->functions.pcie_dev_rdy) return true; @@ -614,6 +602,18 @@ bool amdgpu_acpi_is_pcie_performance_request_supported(struct amdgpu_device *ade } /** + * amdgpu_acpi_is_power_shift_control_supported + * + * Check if the ATCS power shift control method + * is supported. + * returns true if supported, false if not. + */ +bool amdgpu_acpi_is_power_shift_control_supported(void) +{ + return amdgpu_acpi_priv.atcs.functions.power_shift_control; +} + +/** * amdgpu_acpi_pcie_notify_device_ready * * @adev: amdgpu_device pointer @@ -624,19 +624,13 @@ bool amdgpu_acpi_is_pcie_performance_request_supported(struct amdgpu_device *ade */ int amdgpu_acpi_pcie_notify_device_ready(struct amdgpu_device *adev) { - acpi_handle handle; union acpi_object *info; - struct amdgpu_atcs *atcs = &adev->atcs; - - /* Get the device handle */ - handle = ACPI_HANDLE(&adev->pdev->dev); - if (!handle) - return -EINVAL; + struct amdgpu_atcs *atcs = &amdgpu_acpi_priv.atcs; if (!atcs->functions.pcie_dev_rdy) return -EINVAL; - info = amdgpu_atcs_call(handle, ATCS_FUNCTION_PCIE_DEVICE_READY_NOTIFICATION, NULL); + info = amdgpu_atcs_call(atcs, ATCS_FUNCTION_PCIE_DEVICE_READY_NOTIFICATION, NULL); if (!info) return -EIO; @@ -659,9 +653,8 @@ int amdgpu_acpi_pcie_notify_device_ready(struct amdgpu_device *adev) int amdgpu_acpi_pcie_performance_request(struct amdgpu_device *adev, u8 perf_req, bool advertise) { - acpi_handle handle; union acpi_object *info; - struct amdgpu_atcs *atcs = &adev->atcs; + struct amdgpu_atcs *atcs = &amdgpu_acpi_priv.atcs; struct atcs_pref_req_input atcs_input; struct atcs_pref_req_output atcs_output; struct acpi_buffer params; @@ -671,11 +664,6 @@ int amdgpu_acpi_pcie_performance_request(struct amdgpu_device *adev, if (amdgpu_acpi_pcie_notify_device_ready(adev)) return -EINVAL; - /* Get the device handle */ - handle = ACPI_HANDLE(&adev->pdev->dev); - if (!handle) - return -EINVAL; - if (!atcs->functions.pcie_perf_req) return -EINVAL; @@ -693,7 +681,7 @@ int amdgpu_acpi_pcie_performance_request(struct amdgpu_device *adev, params.pointer = &atcs_input; while (retry--) { - info = amdgpu_atcs_call(handle, ATCS_FUNCTION_PCIE_PERFORMANCE_REQUEST, ¶ms); + info = amdgpu_atcs_call(atcs, ATCS_FUNCTION_PCIE_PERFORMANCE_REQUEST, ¶ms); if (!info) return -EIO; @@ -727,6 +715,96 @@ int amdgpu_acpi_pcie_performance_request(struct amdgpu_device *adev, } /** + * amdgpu_acpi_power_shift_control + * + * @adev: amdgpu_device pointer + * @dev_state: device acpi state + * @drv_state: driver state + * + * Executes the POWER_SHIFT_CONTROL method to + * communicate current dGPU device state and + * driver state to APU/SBIOS. + * returns 0 on success, error on failure. + */ +int amdgpu_acpi_power_shift_control(struct amdgpu_device *adev, + u8 dev_state, bool drv_state) +{ + union acpi_object *info; + struct amdgpu_atcs *atcs = &amdgpu_acpi_priv.atcs; + struct atcs_pwr_shift_input atcs_input; + struct acpi_buffer params; + + if (!amdgpu_acpi_is_power_shift_control_supported()) + return -EINVAL; + + atcs_input.size = sizeof(struct atcs_pwr_shift_input); + /* dGPU id (bit 2-0: func num, 7-3: dev num, 15-8: bus num) */ + atcs_input.dgpu_id = adev->pdev->devfn | (adev->pdev->bus->number << 8); + atcs_input.dev_acpi_state = dev_state; + atcs_input.drv_state = drv_state; + + params.length = sizeof(struct atcs_pwr_shift_input); + params.pointer = &atcs_input; + + info = amdgpu_atcs_call(atcs, ATCS_FUNCTION_POWER_SHIFT_CONTROL, ¶ms); + if (!info) { + DRM_ERROR("ATCS PSC update failed\n"); + return -EIO; + } + + return 0; +} + +/** + * amdgpu_acpi_smart_shift_update - update dGPU device state to SBIOS + * + * @dev: drm_device pointer + * @ss_state: current smart shift event + * + * returns 0 on success, + * otherwise return error number. + */ +int amdgpu_acpi_smart_shift_update(struct drm_device *dev, enum amdgpu_ss ss_state) +{ + struct amdgpu_device *adev = drm_to_adev(dev); + int r; + + if (!amdgpu_device_supports_smart_shift(dev)) + return 0; + + switch (ss_state) { + /* SBIOS trigger “stop”, “enable” and “start” at D0, Driver Operational. + * SBIOS trigger “stop” at D3, Driver Not Operational. + * SBIOS trigger “stop” and “disable” at D0, Driver NOT operational. + */ + case AMDGPU_SS_DRV_LOAD: + r = amdgpu_acpi_power_shift_control(adev, + AMDGPU_ATCS_PSC_DEV_STATE_D0, + AMDGPU_ATCS_PSC_DRV_STATE_OPR); + break; + case AMDGPU_SS_DEV_D0: + r = amdgpu_acpi_power_shift_control(adev, + AMDGPU_ATCS_PSC_DEV_STATE_D0, + AMDGPU_ATCS_PSC_DRV_STATE_OPR); + break; + case AMDGPU_SS_DEV_D3: + r = amdgpu_acpi_power_shift_control(adev, + AMDGPU_ATCS_PSC_DEV_STATE_D3_HOT, + AMDGPU_ATCS_PSC_DRV_STATE_NOT_OPR); + break; + case AMDGPU_SS_DRV_UNLOAD: + r = amdgpu_acpi_power_shift_control(adev, + AMDGPU_ATCS_PSC_DEV_STATE_D0, + AMDGPU_ATCS_PSC_DRV_STATE_NOT_OPR); + break; + default: + return -EINVAL; + } + + return r; +} + +/** * amdgpu_acpi_event - handle notify events * * @nb: notifier block @@ -769,50 +847,15 @@ static int amdgpu_acpi_event(struct notifier_block *nb, */ int amdgpu_acpi_init(struct amdgpu_device *adev) { - acpi_handle handle, atif_handle; - struct amdgpu_atif *atif; - struct amdgpu_atcs *atcs = &adev->atcs; - int ret; - - /* Get the device handle */ - handle = ACPI_HANDLE(&adev->pdev->dev); - - if (!adev->bios || !handle) - return 0; - - /* Call the ATCS method */ - ret = amdgpu_atcs_verify_interface(handle, atcs); - if (ret) { - DRM_DEBUG_DRIVER("Call to ATCS verify_interface failed: %d\n", ret); - } - - /* Probe for ATIF, and initialize it if found */ - atif_handle = amdgpu_atif_probe_handle(handle); - if (!atif_handle) - goto out; - - atif = kzalloc(sizeof(*atif), GFP_KERNEL); - if (!atif) { - DRM_WARN("Not enough memory to initialize ATIF\n"); - goto out; - } - atif->handle = atif_handle; - - /* Call the ATIF method */ - ret = amdgpu_atif_verify_interface(atif); - if (ret) { - DRM_DEBUG_DRIVER("Call to ATIF verify_interface failed: %d\n", ret); - kfree(atif); - goto out; - } - adev->atif = atif; + struct amdgpu_atif *atif = &amdgpu_acpi_priv.atif; #if defined(CONFIG_BACKLIGHT_CLASS_DEVICE) || defined(CONFIG_BACKLIGHT_CLASS_DEVICE_MODULE) if (atif->notifications.brightness_change) { if (amdgpu_device_has_dc_support(adev)) { #if defined(CONFIG_DRM_AMD_DC) struct amdgpu_display_manager *dm = &adev->dm; - atif->bd = dm->backlight_dev; + if (dm->backlight_dev[0]) + atif->bd = dm->backlight_dev[0]; #endif } else { struct drm_encoder *tmp; @@ -834,6 +877,129 @@ int amdgpu_acpi_init(struct amdgpu_device *adev) } } #endif + adev->acpi_nb.notifier_call = amdgpu_acpi_event; + register_acpi_notifier(&adev->acpi_nb); + + return 0; +} + +void amdgpu_acpi_get_backlight_caps(struct amdgpu_dm_backlight_caps *caps) +{ + struct amdgpu_atif *atif = &amdgpu_acpi_priv.atif; + + caps->caps_valid = atif->backlight_caps.caps_valid; + caps->min_input_signal = atif->backlight_caps.min_input_signal; + caps->max_input_signal = atif->backlight_caps.max_input_signal; +} + +/** + * amdgpu_acpi_fini - tear down driver acpi support + * + * @adev: amdgpu_device pointer + * + * Unregisters with the acpi notifier chain (all asics). + */ +void amdgpu_acpi_fini(struct amdgpu_device *adev) +{ + unregister_acpi_notifier(&adev->acpi_nb); +} + +/** + * amdgpu_atif_pci_probe_handle - look up the ATIF handle + * + * @pdev: pci device + * + * Look up the ATIF handles (all asics). + * Returns true if the handle is found, false if not. + */ +static bool amdgpu_atif_pci_probe_handle(struct pci_dev *pdev) +{ + char acpi_method_name[255] = { 0 }; + struct acpi_buffer buffer = {sizeof(acpi_method_name), acpi_method_name}; + acpi_handle dhandle, atif_handle; + acpi_status status; + int ret; + + dhandle = ACPI_HANDLE(&pdev->dev); + if (!dhandle) + return false; + + status = acpi_get_handle(dhandle, "ATIF", &atif_handle); + if (ACPI_FAILURE(status)) { + return false; + } + amdgpu_acpi_priv.atif.handle = atif_handle; + acpi_get_name(amdgpu_acpi_priv.atif.handle, ACPI_FULL_PATHNAME, &buffer); + DRM_DEBUG_DRIVER("Found ATIF handle %s\n", acpi_method_name); + ret = amdgpu_atif_verify_interface(&amdgpu_acpi_priv.atif); + if (ret) { + amdgpu_acpi_priv.atif.handle = 0; + return false; + } + return true; +} + +/** + * amdgpu_atcs_pci_probe_handle - look up the ATCS handle + * + * @pdev: pci device + * + * Look up the ATCS handles (all asics). + * Returns true if the handle is found, false if not. + */ +static bool amdgpu_atcs_pci_probe_handle(struct pci_dev *pdev) +{ + char acpi_method_name[255] = { 0 }; + struct acpi_buffer buffer = { sizeof(acpi_method_name), acpi_method_name }; + acpi_handle dhandle, atcs_handle; + acpi_status status; + int ret; + + dhandle = ACPI_HANDLE(&pdev->dev); + if (!dhandle) + return false; + + status = acpi_get_handle(dhandle, "ATCS", &atcs_handle); + if (ACPI_FAILURE(status)) { + return false; + } + amdgpu_acpi_priv.atcs.handle = atcs_handle; + acpi_get_name(amdgpu_acpi_priv.atcs.handle, ACPI_FULL_PATHNAME, &buffer); + DRM_DEBUG_DRIVER("Found ATCS handle %s\n", acpi_method_name); + ret = amdgpu_atcs_verify_interface(&amdgpu_acpi_priv.atcs); + if (ret) { + amdgpu_acpi_priv.atcs.handle = 0; + return false; + } + return true; +} + +/* + * amdgpu_acpi_detect - detect ACPI ATIF/ATCS methods + * + * Check if we have the ATIF/ATCS methods and populate + * the structures in the driver. + */ +void amdgpu_acpi_detect(void) +{ + struct amdgpu_atif *atif = &amdgpu_acpi_priv.atif; + struct amdgpu_atcs *atcs = &amdgpu_acpi_priv.atcs; + struct pci_dev *pdev = NULL; + int ret; + + while ((pdev = pci_get_class(PCI_CLASS_DISPLAY_VGA << 8, pdev)) != NULL) { + if (!atif->handle) + amdgpu_atif_pci_probe_handle(pdev); + if (!atcs->handle) + amdgpu_atcs_pci_probe_handle(pdev); + } + + while ((pdev = pci_get_class(PCI_CLASS_DISPLAY_OTHER << 8, pdev)) != NULL) { + if (!atif->handle) + amdgpu_atif_pci_probe_handle(pdev); + if (!atcs->handle) + amdgpu_atcs_pci_probe_handle(pdev); + } if (atif->functions.sbios_requests && !atif->functions.system_params) { /* XXX check this workraround, if sbios request function is @@ -863,50 +1029,21 @@ int amdgpu_acpi_init(struct amdgpu_device *adev) } else { atif->backlight_caps.caps_valid = false; } - -out: - adev->acpi_nb.notifier_call = amdgpu_acpi_event; - register_acpi_notifier(&adev->acpi_nb); - - return ret; -} - -void amdgpu_acpi_get_backlight_caps(struct amdgpu_device *adev, - struct amdgpu_dm_backlight_caps *caps) -{ - if (!adev->atif) { - caps->caps_valid = false; - return; - } - caps->caps_valid = adev->atif->backlight_caps.caps_valid; - caps->min_input_signal = adev->atif->backlight_caps.min_input_signal; - caps->max_input_signal = adev->atif->backlight_caps.max_input_signal; } /** - * amdgpu_acpi_fini - tear down driver acpi support + * amdgpu_acpi_is_s0ix_active * - * @adev: amdgpu_device pointer - * - * Unregisters with the acpi notifier chain (all asics). - */ -void amdgpu_acpi_fini(struct amdgpu_device *adev) -{ - unregister_acpi_notifier(&adev->acpi_nb); - kfree(adev->atif); -} - -/** - * amdgpu_acpi_is_s0ix_supported + * @adev: amdgpu_device_pointer * * returns true if supported, false if not. */ -bool amdgpu_acpi_is_s0ix_supported(struct amdgpu_device *adev) +bool amdgpu_acpi_is_s0ix_active(struct amdgpu_device *adev) { -#if defined(CONFIG_AMD_PMC) || defined(CONFIG_AMD_PMC_MODULE) +#if IS_ENABLED(CONFIG_AMD_PMC) && IS_ENABLED(CONFIG_SUSPEND) if (acpi_gbl_FADT.flags & ACPI_FADT_LOW_POWER_S0) { if (adev->flags & AMD_IS_APU) - return true; + return pm_suspend_target_state == PM_SUSPEND_TO_IDLE; } #endif return false; diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd.c index 5f6696a3c778..3003ee1c9487 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd.c @@ -21,6 +21,7 @@ */ #include "amdgpu_amdkfd.h" +#include "amd_pcie.h" #include "amd_shared.h" #include "amdgpu.h" @@ -170,7 +171,7 @@ void amdgpu_amdkfd_device_init(struct amdgpu_device *adev) } } -void amdgpu_amdkfd_device_fini(struct amdgpu_device *adev) +void amdgpu_amdkfd_device_fini_sw(struct amdgpu_device *adev) { if (adev->kfd.dev) { kgd2kfd_device_exit(adev->kfd.dev); @@ -553,6 +554,88 @@ uint8_t amdgpu_amdkfd_get_xgmi_hops_count(struct kgd_dev *dst, struct kgd_dev *s return (uint8_t)ret; } +int amdgpu_amdkfd_get_xgmi_bandwidth_mbytes(struct kgd_dev *dst, struct kgd_dev *src, bool is_min) +{ + struct amdgpu_device *adev = (struct amdgpu_device *)dst, *peer_adev; + int num_links; + + if (adev->asic_type != CHIP_ALDEBARAN) + return 0; + + if (src) + peer_adev = (struct amdgpu_device *)src; + + /* num links returns 0 for indirect peers since indirect route is unknown. */ + num_links = is_min ? 1 : amdgpu_xgmi_get_num_links(adev, peer_adev); + if (num_links < 0) { + DRM_ERROR("amdgpu: failed to get xgmi num links between node %d and %d. ret = %d\n", + adev->gmc.xgmi.physical_node_id, + peer_adev->gmc.xgmi.physical_node_id, num_links); + num_links = 0; + } + + /* Aldebaran xGMI DPM is defeatured so assume x16 x 25Gbps for bandwidth. */ + return (num_links * 16 * 25000)/BITS_PER_BYTE; +} + +int amdgpu_amdkfd_get_pcie_bandwidth_mbytes(struct kgd_dev *dev, bool is_min) +{ + struct amdgpu_device *adev = (struct amdgpu_device *)dev; + int num_lanes_shift = (is_min ? ffs(adev->pm.pcie_mlw_mask) : + fls(adev->pm.pcie_mlw_mask)) - 1; + int gen_speed_shift = (is_min ? ffs(adev->pm.pcie_gen_mask & + CAIL_PCIE_LINK_SPEED_SUPPORT_MASK) : + fls(adev->pm.pcie_gen_mask & + CAIL_PCIE_LINK_SPEED_SUPPORT_MASK)) - 1; + uint32_t num_lanes_mask = 1 << num_lanes_shift; + uint32_t gen_speed_mask = 1 << gen_speed_shift; + int num_lanes_factor = 0, gen_speed_mbits_factor = 0; + + switch (num_lanes_mask) { + case CAIL_PCIE_LINK_WIDTH_SUPPORT_X1: + num_lanes_factor = 1; + break; + case CAIL_PCIE_LINK_WIDTH_SUPPORT_X2: + num_lanes_factor = 2; + break; + case CAIL_PCIE_LINK_WIDTH_SUPPORT_X4: + num_lanes_factor = 4; + break; + case CAIL_PCIE_LINK_WIDTH_SUPPORT_X8: + num_lanes_factor = 8; + break; + case CAIL_PCIE_LINK_WIDTH_SUPPORT_X12: + num_lanes_factor = 12; + break; + case CAIL_PCIE_LINK_WIDTH_SUPPORT_X16: + num_lanes_factor = 16; + break; + case CAIL_PCIE_LINK_WIDTH_SUPPORT_X32: + num_lanes_factor = 32; + break; + } + + switch (gen_speed_mask) { + case CAIL_PCIE_LINK_SPEED_SUPPORT_GEN1: + gen_speed_mbits_factor = 2500; + break; + case CAIL_PCIE_LINK_SPEED_SUPPORT_GEN2: + gen_speed_mbits_factor = 5000; + break; + case CAIL_PCIE_LINK_SPEED_SUPPORT_GEN3: + gen_speed_mbits_factor = 8000; + break; + case CAIL_PCIE_LINK_SPEED_SUPPORT_GEN4: + gen_speed_mbits_factor = 16000; + break; + case CAIL_PCIE_LINK_SPEED_SUPPORT_GEN5: + gen_speed_mbits_factor = 32000; + break; + } + + return (num_lanes_factor * gen_speed_mbits_factor)/BITS_PER_BYTE; +} + uint64_t amdgpu_amdkfd_get_mmio_remap_phys_addr(struct kgd_dev *kgd) { struct amdgpu_device *adev = (struct amdgpu_device *)kgd; @@ -631,7 +714,6 @@ int amdgpu_amdkfd_submit_ib(struct kgd_dev *kgd, enum kgd_engine_type engine, ret = dma_fence_wait(f, false); err_ib_sched: - dma_fence_put(f); amdgpu_job_free(job); err: return ret; @@ -670,10 +752,10 @@ int amdgpu_amdkfd_flush_gpu_tlb_vmid(struct kgd_dev *kgd, uint16_t vmid) return 0; } -int amdgpu_amdkfd_flush_gpu_tlb_pasid(struct kgd_dev *kgd, uint16_t pasid) +int amdgpu_amdkfd_flush_gpu_tlb_pasid(struct kgd_dev *kgd, uint16_t pasid, + enum TLB_FLUSH_TYPE flush_type) { struct amdgpu_device *adev = (struct amdgpu_device *)kgd; - const uint32_t flush_type = 0; bool all_hub = false; if (adev->family == AMDGPU_FAMILY_AI) diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd.h b/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd.h index 5ffb07b02810..ec028cf963f5 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd.h +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd.h @@ -36,13 +36,26 @@ extern uint64_t amdgpu_amdkfd_total_mem_size; +enum TLB_FLUSH_TYPE { + TLB_FLUSH_LEGACY = 0, + TLB_FLUSH_LIGHTWEIGHT, + TLB_FLUSH_HEAVYWEIGHT +}; + struct amdgpu_device; -struct kfd_bo_va_list { - struct list_head bo_list; - struct amdgpu_bo_va *bo_va; - void *kgd_dev; +enum kfd_mem_attachment_type { + KFD_MEM_ATT_SHARED, /* Share kgd_mem->bo or another attachment's */ + KFD_MEM_ATT_USERPTR, /* SG bo to DMA map pages from a userptr bo */ + KFD_MEM_ATT_DMABUF, /* DMAbuf to DMA map TTM BOs */ +}; + +struct kfd_mem_attachment { + struct list_head list; + enum kfd_mem_attachment_type type; bool is_mapped; + struct amdgpu_bo_va *bo_va; + struct amdgpu_device *adev; uint64_t va; uint64_t pte_flags; }; @@ -50,7 +63,8 @@ struct kfd_bo_va_list { struct kgd_mem { struct mutex lock; struct amdgpu_bo *bo; - struct list_head bo_va_list; + struct dma_buf *dmabuf; + struct list_head attachments; /* protected by amdkfd_process_info.lock */ struct ttm_validate_buffer validate_list; struct ttm_validate_buffer resv_list; @@ -75,6 +89,7 @@ struct amdgpu_amdkfd_fence { struct mm_struct *mm; spinlock_t lock; char timeline_name[TASK_COMM_LEN]; + struct svm_range_bo *svm_bo; }; struct amdgpu_kfd_dev { @@ -127,14 +142,15 @@ void amdgpu_amdkfd_interrupt(struct amdgpu_device *adev, const void *ih_ring_entry); void amdgpu_amdkfd_device_probe(struct amdgpu_device *adev); void amdgpu_amdkfd_device_init(struct amdgpu_device *adev); -void amdgpu_amdkfd_device_fini(struct amdgpu_device *adev); +void amdgpu_amdkfd_device_fini_sw(struct amdgpu_device *adev); int amdgpu_amdkfd_submit_ib(struct kgd_dev *kgd, enum kgd_engine_type engine, uint32_t vmid, uint64_t gpu_addr, uint32_t *ib_cmd, uint32_t ib_len); void amdgpu_amdkfd_set_compute_idle(struct kgd_dev *kgd, bool idle); bool amdgpu_amdkfd_have_atomics_support(struct kgd_dev *kgd); int amdgpu_amdkfd_flush_gpu_tlb_vmid(struct kgd_dev *kgd, uint16_t vmid); -int amdgpu_amdkfd_flush_gpu_tlb_pasid(struct kgd_dev *kgd, uint16_t pasid); +int amdgpu_amdkfd_flush_gpu_tlb_pasid(struct kgd_dev *kgd, uint16_t pasid, + enum TLB_FLUSH_TYPE flush_type); bool amdgpu_amdkfd_is_kfd_vmid(struct amdgpu_device *adev, u32 vmid); @@ -148,7 +164,8 @@ int amdgpu_queue_mask_bit_to_set_resource_bit(struct amdgpu_device *adev, int queue_bit); struct amdgpu_amdkfd_fence *amdgpu_amdkfd_fence_create(u64 context, - struct mm_struct *mm); + struct mm_struct *mm, + struct svm_range_bo *svm_bo); #if IS_ENABLED(CONFIG_HSA_AMD) bool amdkfd_fence_check_mm(struct dma_fence *f, struct mm_struct *mm); struct amdgpu_amdkfd_fence *to_amdgpu_amdkfd_fence(struct dma_fence *f); @@ -209,6 +226,8 @@ uint32_t amdgpu_amdkfd_get_num_gws(struct kgd_dev *kgd); uint32_t amdgpu_amdkfd_get_asic_rev_id(struct kgd_dev *kgd); int amdgpu_amdkfd_get_noretry(struct kgd_dev *kgd); uint8_t amdgpu_amdkfd_get_xgmi_hops_count(struct kgd_dev *dst, struct kgd_dev *src); +int amdgpu_amdkfd_get_xgmi_bandwidth_mbytes(struct kgd_dev *dst, struct kgd_dev *src, bool is_min); +int amdgpu_amdkfd_get_pcie_bandwidth_mbytes(struct kgd_dev *dev, bool is_min); /* Read user wptr from a specified user address space with page fault * disabled. The memory must be pinned and mapped to the hardware when @@ -234,22 +253,27 @@ uint8_t amdgpu_amdkfd_get_xgmi_hops_count(struct kgd_dev *dst, struct kgd_dev *s }) /* GPUVM API */ +#define drm_priv_to_vm(drm_priv) \ + (&((struct amdgpu_fpriv *) \ + ((struct drm_file *)(drm_priv))->driver_priv)->vm) + int amdgpu_amdkfd_gpuvm_acquire_process_vm(struct kgd_dev *kgd, struct file *filp, u32 pasid, - void **vm, void **process_info, + void **process_info, struct dma_fence **ef); -void amdgpu_amdkfd_gpuvm_release_process_vm(struct kgd_dev *kgd, void *vm); -uint64_t amdgpu_amdkfd_gpuvm_get_process_page_dir(void *vm); +void amdgpu_amdkfd_gpuvm_release_process_vm(struct kgd_dev *kgd, void *drm_priv); +uint64_t amdgpu_amdkfd_gpuvm_get_process_page_dir(void *drm_priv); int amdgpu_amdkfd_gpuvm_alloc_memory_of_gpu( struct kgd_dev *kgd, uint64_t va, uint64_t size, - void *vm, struct kgd_mem **mem, + void *drm_priv, struct kgd_mem **mem, uint64_t *offset, uint32_t flags); int amdgpu_amdkfd_gpuvm_free_memory_of_gpu( - struct kgd_dev *kgd, struct kgd_mem *mem, uint64_t *size); + struct kgd_dev *kgd, struct kgd_mem *mem, void *drm_priv, + uint64_t *size); int amdgpu_amdkfd_gpuvm_map_memory_to_gpu( - struct kgd_dev *kgd, struct kgd_mem *mem, void *vm); + struct kgd_dev *kgd, struct kgd_mem *mem, void *drm_priv, bool *table_freed); int amdgpu_amdkfd_gpuvm_unmap_memory_from_gpu( - struct kgd_dev *kgd, struct kgd_mem *mem, void *vm); + struct kgd_dev *kgd, struct kgd_mem *mem, void *drm_priv); int amdgpu_amdkfd_gpuvm_sync_memory( struct kgd_dev *kgd, struct kgd_mem *mem, bool intr); int amdgpu_amdkfd_gpuvm_map_gtt_bo_to_kernel(struct kgd_dev *kgd, @@ -260,7 +284,7 @@ int amdgpu_amdkfd_gpuvm_get_vm_fault_info(struct kgd_dev *kgd, struct kfd_vm_fault_info *info); int amdgpu_amdkfd_gpuvm_import_dmabuf(struct kgd_dev *kgd, struct dma_buf *dmabuf, - uint64_t va, void *vm, + uint64_t va, void *drm_priv, struct kgd_mem **mem, uint64_t *size, uint64_t *mmap_offset); int amdgpu_amdkfd_get_tile_config(struct kgd_dev *kgd, @@ -270,6 +294,7 @@ void amdgpu_amdkfd_gpuvm_init_mem_limits(void); void amdgpu_amdkfd_gpuvm_destroy_cb(struct amdgpu_device *adev, struct amdgpu_vm *vm); void amdgpu_amdkfd_unreserve_memory_limit(struct amdgpu_bo *bo); +void amdgpu_amdkfd_reserve_system_mem(uint64_t size); #else static inline void amdgpu_amdkfd_gpuvm_init_mem_limits(void) @@ -307,7 +332,7 @@ int kgd2kfd_pre_reset(struct kfd_dev *kfd); int kgd2kfd_post_reset(struct kfd_dev *kfd); void kgd2kfd_interrupt(struct kfd_dev *kfd, const void *ih_ring_entry); void kgd2kfd_set_sram_ecc_flag(struct kfd_dev *kfd); -void kgd2kfd_smi_event_throttle(struct kfd_dev *kfd, uint32_t throttle_bitmask); +void kgd2kfd_smi_event_throttle(struct kfd_dev *kfd, uint64_t throttle_bitmask); #else static inline int kgd2kfd_init(void) { @@ -366,7 +391,7 @@ void kgd2kfd_set_sram_ecc_flag(struct kfd_dev *kfd) } static inline -void kgd2kfd_smi_event_throttle(struct kfd_dev *kfd, uint32_t throttle_bitmask) +void kgd2kfd_smi_event_throttle(struct kfd_dev *kfd, uint64_t throttle_bitmask) { } #endif diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_aldebaran.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_aldebaran.c index a5434b713856..46cd4ee6bafb 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_aldebaran.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_aldebaran.c @@ -44,4 +44,5 @@ const struct kfd2kgd_calls aldebaran_kfd2kgd = { .get_atc_vmid_pasid_mapping_info = kgd_gfx_v9_get_atc_vmid_pasid_mapping_info, .set_vm_context_page_table_base = kgd_gfx_v9_set_vm_context_page_table_base, + .program_trap_handler_settings = kgd_gfx_v9_program_trap_handler_settings }; diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_arcturus.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_arcturus.c index 9ef9f3ddad48..5a7f680bcb3f 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_arcturus.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_arcturus.c @@ -25,6 +25,7 @@ #include <linux/firmware.h> #include "amdgpu.h" #include "amdgpu_amdkfd.h" +#include "amdgpu_amdkfd_arcturus.h" #include "sdma0/sdma0_4_2_2_offset.h" #include "sdma0/sdma0_4_2_2_sh_mask.h" #include "sdma1/sdma1_4_2_2_offset.h" @@ -304,5 +305,6 @@ const struct kfd2kgd_calls arcturus_kfd2kgd = { kgd_gfx_v9_get_atc_vmid_pasid_mapping_info, .set_vm_context_page_table_base = kgd_gfx_v9_set_vm_context_page_table_base, - .get_cu_occupancy = kgd_gfx_v9_get_cu_occupancy + .get_cu_occupancy = kgd_gfx_v9_get_cu_occupancy, + .program_trap_handler_settings = kgd_gfx_v9_program_trap_handler_settings }; diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_fence.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_fence.c index 5af464933976..1d0dbff87d3f 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_fence.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_fence.c @@ -28,6 +28,7 @@ #include <linux/slab.h> #include <linux/sched/mm.h> #include "amdgpu_amdkfd.h" +#include "kfd_svm.h" static const struct dma_fence_ops amdkfd_fence_ops; static atomic_t fence_seq = ATOMIC_INIT(0); @@ -60,7 +61,8 @@ static atomic_t fence_seq = ATOMIC_INIT(0); */ struct amdgpu_amdkfd_fence *amdgpu_amdkfd_fence_create(u64 context, - struct mm_struct *mm) + struct mm_struct *mm, + struct svm_range_bo *svm_bo) { struct amdgpu_amdkfd_fence *fence; @@ -73,7 +75,7 @@ struct amdgpu_amdkfd_fence *amdgpu_amdkfd_fence_create(u64 context, fence->mm = mm; get_task_comm(fence->timeline_name, current); spin_lock_init(&fence->lock); - + fence->svm_bo = svm_bo; dma_fence_init(&fence->base, &amdkfd_fence_ops, &fence->lock, context, atomic_inc_return(&fence_seq)); @@ -111,6 +113,8 @@ static const char *amdkfd_fence_get_timeline_name(struct dma_fence *f) * a KFD BO and schedules a job to move the BO. * If fence is already signaled return true. * If fence is not signaled schedule a evict KFD process work item. + * + * @f: dma_fence */ static bool amdkfd_fence_enable_signaling(struct dma_fence *f) { @@ -122,16 +126,20 @@ static bool amdkfd_fence_enable_signaling(struct dma_fence *f) if (dma_fence_is_signaled(f)) return true; - if (!kgd2kfd_schedule_evict_and_restore_process(fence->mm, f)) - return true; - + if (!fence->svm_bo) { + if (!kgd2kfd_schedule_evict_and_restore_process(fence->mm, f)) + return true; + } else { + if (!svm_range_schedule_evict_svm_bo(fence)) + return true; + } return false; } /** * amdkfd_fence_release - callback that fence can be freed * - * @fence: fence + * @f: dma_fence * * This function is called when the reference count becomes zero. * Drops the mm_struct reference and RCU schedules freeing up the fence. diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_gfx_v10.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_gfx_v10.c index 62aa1a6f64ed..960acf68150a 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_gfx_v10.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_gfx_v10.c @@ -96,8 +96,8 @@ static void kgd_program_sh_mem_settings(struct kgd_dev *kgd, uint32_t vmid, lock_srbm(kgd, 0, 0, 0, vmid); - WREG32(SOC15_REG_OFFSET(GC, 0, mmSH_MEM_CONFIG), sh_mem_config); - WREG32(SOC15_REG_OFFSET(GC, 0, mmSH_MEM_BASES), sh_mem_bases); + WREG32_SOC15(GC, 0, mmSH_MEM_CONFIG, sh_mem_config); + WREG32_SOC15(GC, 0, mmSH_MEM_BASES, sh_mem_bases); /* APE1 no longer exists on GFX9 */ unlock_srbm(kgd); @@ -161,7 +161,7 @@ static int kgd_init_interrupts(struct kgd_dev *kgd, uint32_t pipe_id) lock_srbm(kgd, mec, pipe, 0, 0); - WREG32(SOC15_REG_OFFSET(GC, 0, mmCPC_INT_CNTL), + WREG32_SOC15(GC, 0, mmCPC_INT_CNTL, CP_INT_CNTL_RING0__TIME_STAMP_INT_ENABLE_MASK | CP_INT_CNTL_RING0__OPCODE_ERROR_INT_ENABLE_MASK); @@ -239,13 +239,13 @@ static int kgd_hqd_load(struct kgd_dev *kgd, void *mqd, uint32_t pipe_id, for (reg = hqd_base; reg <= SOC15_REG_OFFSET(GC, 0, mmCP_HQD_PQ_WPTR_HI); reg++) - WREG32(reg, mqd_hqd[reg - hqd_base]); + WREG32_SOC15_IP(GC, reg, mqd_hqd[reg - hqd_base]); /* Activate doorbell logic before triggering WPTR poll. */ data = REG_SET_FIELD(m->cp_hqd_pq_doorbell_control, CP_HQD_PQ_DOORBELL_CONTROL, DOORBELL_EN, 1); - WREG32(SOC15_REG_OFFSET(GC, 0, mmCP_HQD_PQ_DOORBELL_CONTROL), data); + WREG32_SOC15(GC, 0, mmCP_HQD_PQ_DOORBELL_CONTROL, data); if (wptr) { /* Don't read wptr with get_user because the user @@ -274,27 +274,27 @@ static int kgd_hqd_load(struct kgd_dev *kgd, void *mqd, uint32_t pipe_id, guessed_wptr += m->cp_hqd_pq_wptr_lo & ~(queue_size - 1); guessed_wptr += (uint64_t)m->cp_hqd_pq_wptr_hi << 32; - WREG32(SOC15_REG_OFFSET(GC, 0, mmCP_HQD_PQ_WPTR_LO), + WREG32_SOC15(GC, 0, mmCP_HQD_PQ_WPTR_LO, lower_32_bits(guessed_wptr)); - WREG32(SOC15_REG_OFFSET(GC, 0, mmCP_HQD_PQ_WPTR_HI), + WREG32_SOC15(GC, 0, mmCP_HQD_PQ_WPTR_HI, upper_32_bits(guessed_wptr)); - WREG32(SOC15_REG_OFFSET(GC, 0, mmCP_HQD_PQ_WPTR_POLL_ADDR), + WREG32_SOC15(GC, 0, mmCP_HQD_PQ_WPTR_POLL_ADDR, lower_32_bits((uint64_t)wptr)); - WREG32(SOC15_REG_OFFSET(GC, 0, mmCP_HQD_PQ_WPTR_POLL_ADDR_HI), + WREG32_SOC15(GC, 0, mmCP_HQD_PQ_WPTR_POLL_ADDR_HI, upper_32_bits((uint64_t)wptr)); pr_debug("%s setting CP_PQ_WPTR_POLL_CNTL1 to %x\n", __func__, (uint32_t)get_queue_mask(adev, pipe_id, queue_id)); - WREG32(SOC15_REG_OFFSET(GC, 0, mmCP_PQ_WPTR_POLL_CNTL1), + WREG32_SOC15(GC, 0, mmCP_PQ_WPTR_POLL_CNTL1, (uint32_t)get_queue_mask(adev, pipe_id, queue_id)); } /* Start the EOP fetcher */ - WREG32(SOC15_REG_OFFSET(GC, 0, mmCP_HQD_EOP_RPTR), + WREG32_SOC15(GC, 0, mmCP_HQD_EOP_RPTR, REG_SET_FIELD(m->cp_hqd_eop_rptr, CP_HQD_EOP_RPTR, INIT_FETCHER, 1)); data = REG_SET_FIELD(m->cp_hqd_active, CP_HQD_ACTIVE, ACTIVE, 1); - WREG32(SOC15_REG_OFFSET(GC, 0, mmCP_HQD_ACTIVE), data); + WREG32_SOC15(GC, 0, mmCP_HQD_ACTIVE, data); release_queue(kgd); @@ -365,7 +365,7 @@ static int kgd_hqd_dump(struct kgd_dev *kgd, if (WARN_ON_ONCE(i >= HQD_N_REGS)) \ break; \ (*dump)[i][0] = (addr) << 2; \ - (*dump)[i++][1] = RREG32(addr); \ + (*dump)[i++][1] = RREG32_SOC15_IP(GC, addr); \ } while (0) *dump = kmalloc(HQD_N_REGS*2*sizeof(uint32_t), GFP_KERNEL); @@ -497,13 +497,13 @@ static bool kgd_hqd_is_occupied(struct kgd_dev *kgd, uint64_t queue_address, uint32_t low, high; acquire_queue(kgd, pipe_id, queue_id); - act = RREG32(SOC15_REG_OFFSET(GC, 0, mmCP_HQD_ACTIVE)); + act = RREG32_SOC15(GC, 0, mmCP_HQD_ACTIVE); if (act) { low = lower_32_bits(queue_address >> 8); high = upper_32_bits(queue_address >> 8); - if (low == RREG32(SOC15_REG_OFFSET(GC, 0, mmCP_HQD_PQ_BASE)) && - high == RREG32(SOC15_REG_OFFSET(GC, 0, mmCP_HQD_PQ_BASE_HI))) + if (low == RREG32_SOC15(GC, 0, mmCP_HQD_PQ_BASE) && + high == RREG32_SOC15(GC, 0, mmCP_HQD_PQ_BASE_HI)) retval = true; } release_queue(kgd); @@ -560,6 +560,9 @@ static int kgd_hqd_destroy(struct kgd_dev *kgd, void *mqd, case KFD_PREEMPT_TYPE_WAVEFRONT_RESET: type = RESET_WAVES; break; + case KFD_PREEMPT_TYPE_WAVEFRONT_SAVE: + type = SAVE_WAVES; + break; default: type = DRAIN_PIPE; break; @@ -621,11 +624,11 @@ loop: preempt_enable(); #endif - WREG32(SOC15_REG_OFFSET(GC, 0, mmCP_HQD_DEQUEUE_REQUEST), type); + WREG32_SOC15(GC, 0, mmCP_HQD_DEQUEUE_REQUEST, type); end_jiffies = (utimeout * HZ / 1000) + jiffies; while (true) { - temp = RREG32(SOC15_REG_OFFSET(GC, 0, mmCP_HQD_ACTIVE)); + temp = RREG32_SOC15(GC, 0, mmCP_HQD_ACTIVE); if (!(temp & CP_HQD_ACTIVE__ACTIVE_MASK)) break; if (time_after(jiffies, end_jiffies)) { @@ -716,8 +719,8 @@ static int kgd_wave_control_execute(struct kgd_dev *kgd, mutex_lock(&adev->grbm_idx_mutex); - WREG32(SOC15_REG_OFFSET(GC, 0, mmGRBM_GFX_INDEX), gfx_index_val); - WREG32(SOC15_REG_OFFSET(GC, 0, mmSQ_CMD), sq_cmd); + WREG32_SOC15(GC, 0, mmGRBM_GFX_INDEX, gfx_index_val); + WREG32_SOC15(GC, 0, mmSQ_CMD, sq_cmd); data = REG_SET_FIELD(data, GRBM_GFX_INDEX, INSTANCE_BROADCAST_WRITES, 1); @@ -726,7 +729,7 @@ static int kgd_wave_control_execute(struct kgd_dev *kgd, data = REG_SET_FIELD(data, GRBM_GFX_INDEX, SE_BROADCAST_WRITES, 1); - WREG32(SOC15_REG_OFFSET(GC, 0, mmGRBM_GFX_INDEX), data); + WREG32_SOC15(GC, 0, mmGRBM_GFX_INDEX, data); mutex_unlock(&adev->grbm_idx_mutex); return 0; @@ -754,6 +757,33 @@ static void set_vm_context_page_table_base(struct kgd_dev *kgd, uint32_t vmid, adev->gfxhub.funcs->setup_vm_pt_regs(adev, vmid, page_table_base); } +static void program_trap_handler_settings(struct kgd_dev *kgd, + uint32_t vmid, uint64_t tba_addr, uint64_t tma_addr) +{ + struct amdgpu_device *adev = get_amdgpu_device(kgd); + + lock_srbm(kgd, 0, 0, 0, vmid); + + /* + * Program TBA registers + */ + WREG32(SOC15_REG_OFFSET(GC, 0, mmSQ_SHADER_TBA_LO), + lower_32_bits(tba_addr >> 8)); + WREG32(SOC15_REG_OFFSET(GC, 0, mmSQ_SHADER_TBA_HI), + upper_32_bits(tba_addr >> 8) | + (1 << SQ_SHADER_TBA_HI__TRAP_EN__SHIFT)); + + /* + * Program TMA registers + */ + WREG32(SOC15_REG_OFFSET(GC, 0, mmSQ_SHADER_TMA_LO), + lower_32_bits(tma_addr >> 8)); + WREG32(SOC15_REG_OFFSET(GC, 0, mmSQ_SHADER_TMA_HI), + upper_32_bits(tma_addr >> 8)); + + unlock_srbm(kgd); +} + const struct kfd2kgd_calls gfx_v10_kfd2kgd = { .program_sh_mem_settings = kgd_program_sh_mem_settings, .set_pasid_vmid_mapping = kgd_set_pasid_vmid_mapping, @@ -774,4 +804,5 @@ const struct kfd2kgd_calls gfx_v10_kfd2kgd = { .get_atc_vmid_pasid_mapping_info = get_atc_vmid_pasid_mapping_info, .set_vm_context_page_table_base = set_vm_context_page_table_base, + .program_trap_handler_settings = program_trap_handler_settings, }; diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_gfx_v10_3.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_gfx_v10_3.c index d39cff4a1fe3..dac0d751d5af 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_gfx_v10_3.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_gfx_v10_3.c @@ -95,8 +95,8 @@ static void program_sh_mem_settings_v10_3(struct kgd_dev *kgd, uint32_t vmid, lock_srbm(kgd, 0, 0, 0, vmid); - WREG32(SOC15_REG_OFFSET(GC, 0, mmSH_MEM_CONFIG), sh_mem_config); - WREG32(SOC15_REG_OFFSET(GC, 0, mmSH_MEM_BASES), sh_mem_bases); + WREG32_SOC15(GC, 0, mmSH_MEM_CONFIG, sh_mem_config); + WREG32_SOC15(GC, 0, mmSH_MEM_BASES, sh_mem_bases); /* APE1 no longer exists on GFX9 */ unlock_srbm(kgd); @@ -129,7 +129,7 @@ static int init_interrupts_v10_3(struct kgd_dev *kgd, uint32_t pipe_id) lock_srbm(kgd, mec, pipe, 0, 0); - WREG32(SOC15_REG_OFFSET(GC, 0, mmCPC_INT_CNTL), + WREG32_SOC15(GC, 0, mmCPC_INT_CNTL, CP_INT_CNTL_RING0__TIME_STAMP_INT_ENABLE_MASK | CP_INT_CNTL_RING0__OPCODE_ERROR_INT_ENABLE_MASK); @@ -212,10 +212,10 @@ static int hqd_load_v10_3(struct kgd_dev *kgd, void *mqd, uint32_t pipe_id, pr_debug("kfd: set HIQ, mec:%d, pipe:%d, queue:%d.\n", mec, pipe, queue_id); - value = RREG32(SOC15_REG_OFFSET(GC, 0, mmRLC_CP_SCHEDULERS)); + value = RREG32_SOC15(GC, 0, mmRLC_CP_SCHEDULERS); value = REG_SET_FIELD(value, RLC_CP_SCHEDULERS, scheduler1, ((mec << 5) | (pipe << 3) | queue_id | 0x80)); - WREG32(SOC15_REG_OFFSET(GC, 0, mmRLC_CP_SCHEDULERS), value); + WREG32_SOC15(GC, 0, mmRLC_CP_SCHEDULERS, value); } /* HQD registers extend from CP_MQD_BASE_ADDR to CP_HQD_EOP_WPTR_MEM. */ @@ -224,13 +224,13 @@ static int hqd_load_v10_3(struct kgd_dev *kgd, void *mqd, uint32_t pipe_id, for (reg = hqd_base; reg <= SOC15_REG_OFFSET(GC, 0, mmCP_HQD_PQ_WPTR_HI); reg++) - WREG32(reg, mqd_hqd[reg - hqd_base]); + WREG32_SOC15_IP(GC, reg, mqd_hqd[reg - hqd_base]); /* Activate doorbell logic before triggering WPTR poll. */ data = REG_SET_FIELD(m->cp_hqd_pq_doorbell_control, CP_HQD_PQ_DOORBELL_CONTROL, DOORBELL_EN, 1); - WREG32(SOC15_REG_OFFSET(GC, 0, mmCP_HQD_PQ_DOORBELL_CONTROL), data); + WREG32_SOC15(GC, 0, mmCP_HQD_PQ_DOORBELL_CONTROL, data); if (wptr) { /* Don't read wptr with get_user because the user @@ -259,17 +259,17 @@ static int hqd_load_v10_3(struct kgd_dev *kgd, void *mqd, uint32_t pipe_id, guessed_wptr += m->cp_hqd_pq_wptr_lo & ~(queue_size - 1); guessed_wptr += (uint64_t)m->cp_hqd_pq_wptr_hi << 32; - WREG32(SOC15_REG_OFFSET(GC, 0, mmCP_HQD_PQ_WPTR_LO), + WREG32_SOC15(GC, 0, mmCP_HQD_PQ_WPTR_LO, lower_32_bits(guessed_wptr)); - WREG32(SOC15_REG_OFFSET(GC, 0, mmCP_HQD_PQ_WPTR_HI), + WREG32_SOC15(GC, 0, mmCP_HQD_PQ_WPTR_HI, upper_32_bits(guessed_wptr)); - WREG32(SOC15_REG_OFFSET(GC, 0, mmCP_HQD_PQ_WPTR_POLL_ADDR), + WREG32_SOC15(GC, 0, mmCP_HQD_PQ_WPTR_POLL_ADDR, lower_32_bits((uint64_t)wptr)); - WREG32(SOC15_REG_OFFSET(GC, 0, mmCP_HQD_PQ_WPTR_POLL_ADDR_HI), + WREG32_SOC15(GC, 0, mmCP_HQD_PQ_WPTR_POLL_ADDR_HI, upper_32_bits((uint64_t)wptr)); pr_debug("%s setting CP_PQ_WPTR_POLL_CNTL1 to %x\n", __func__, (uint32_t)get_queue_mask(adev, pipe_id, queue_id)); - WREG32(SOC15_REG_OFFSET(GC, 0, mmCP_PQ_WPTR_POLL_CNTL1), + WREG32_SOC15(GC, 0, mmCP_PQ_WPTR_POLL_CNTL1, (uint32_t)get_queue_mask(adev, pipe_id, queue_id)); } @@ -279,7 +279,7 @@ static int hqd_load_v10_3(struct kgd_dev *kgd, void *mqd, uint32_t pipe_id, CP_HQD_EOP_RPTR, INIT_FETCHER, 1)); data = REG_SET_FIELD(m->cp_hqd_active, CP_HQD_ACTIVE, ACTIVE, 1); - WREG32(SOC15_REG_OFFSET(GC, 0, mmCP_HQD_ACTIVE), data); + WREG32_SOC15(GC, 0, mmCP_HQD_ACTIVE, data); release_queue(kgd); @@ -350,7 +350,7 @@ static int hqd_dump_v10_3(struct kgd_dev *kgd, if (WARN_ON_ONCE(i >= HQD_N_REGS)) \ break; \ (*dump)[i][0] = (addr) << 2; \ - (*dump)[i++][1] = RREG32(addr); \ + (*dump)[i++][1] = RREG32_SOC15_IP(GC, addr); \ } while (0) *dump = kmalloc(HQD_N_REGS*2*sizeof(uint32_t), GFP_KERNEL); @@ -482,13 +482,13 @@ static bool hqd_is_occupied_v10_3(struct kgd_dev *kgd, uint64_t queue_address, uint32_t low, high; acquire_queue(kgd, pipe_id, queue_id); - act = RREG32(SOC15_REG_OFFSET(GC, 0, mmCP_HQD_ACTIVE)); + act = RREG32_SOC15(GC, 0, mmCP_HQD_ACTIVE); if (act) { low = lower_32_bits(queue_address >> 8); high = upper_32_bits(queue_address >> 8); - if (low == RREG32(SOC15_REG_OFFSET(GC, 0, mmCP_HQD_PQ_BASE)) && - high == RREG32(SOC15_REG_OFFSET(GC, 0, mmCP_HQD_PQ_BASE_HI))) + if (low == RREG32_SOC15(GC, 0, mmCP_HQD_PQ_BASE) && + high == RREG32_SOC15(GC, 0, mmCP_HQD_PQ_BASE_HI)) retval = true; } release_queue(kgd); @@ -537,16 +537,19 @@ static int hqd_destroy_v10_3(struct kgd_dev *kgd, void *mqd, case KFD_PREEMPT_TYPE_WAVEFRONT_RESET: type = RESET_WAVES; break; + case KFD_PREEMPT_TYPE_WAVEFRONT_SAVE: + type = SAVE_WAVES; + break; default: type = DRAIN_PIPE; break; } - WREG32(SOC15_REG_OFFSET(GC, 0, mmCP_HQD_DEQUEUE_REQUEST), type); + WREG32_SOC15(GC, 0, mmCP_HQD_DEQUEUE_REQUEST, type); end_jiffies = (utimeout * HZ / 1000) + jiffies; while (true) { - temp = RREG32(SOC15_REG_OFFSET(GC, 0, mmCP_HQD_ACTIVE)); + temp = RREG32_SOC15(GC, 0, mmCP_HQD_ACTIVE); if (!(temp & CP_HQD_ACTIVE__ACTIVE_MASK)) break; if (time_after(jiffies, end_jiffies)) { @@ -626,7 +629,7 @@ static int wave_control_execute_v10_3(struct kgd_dev *kgd, mutex_lock(&adev->grbm_idx_mutex); - WREG32(SOC15_REG_OFFSET(GC, 0, mmGRBM_GFX_INDEX), gfx_index_val); + WREG32_SOC15(GC, 0, mmGRBM_GFX_INDEX, gfx_index_val); WREG32(SOC15_REG_OFFSET(GC, 0, mmSQ_CMD), sq_cmd); data = REG_SET_FIELD(data, GRBM_GFX_INDEX, @@ -636,7 +639,7 @@ static int wave_control_execute_v10_3(struct kgd_dev *kgd, data = REG_SET_FIELD(data, GRBM_GFX_INDEX, SE_BROADCAST_WRITES, 1); - WREG32(SOC15_REG_OFFSET(GC, 0, mmGRBM_GFX_INDEX), data); + WREG32_SOC15(GC, 0, mmGRBM_GFX_INDEX, data); mutex_unlock(&adev->grbm_idx_mutex); return 0; @@ -658,6 +661,33 @@ static void set_vm_context_page_table_base_v10_3(struct kgd_dev *kgd, uint32_t v adev->gfxhub.funcs->setup_vm_pt_regs(adev, vmid, page_table_base); } +static void program_trap_handler_settings_v10_3(struct kgd_dev *kgd, + uint32_t vmid, uint64_t tba_addr, uint64_t tma_addr) +{ + struct amdgpu_device *adev = get_amdgpu_device(kgd); + + lock_srbm(kgd, 0, 0, 0, vmid); + + /* + * Program TBA registers + */ + WREG32(SOC15_REG_OFFSET(GC, 0, mmSQ_SHADER_TBA_LO), + lower_32_bits(tba_addr >> 8)); + WREG32(SOC15_REG_OFFSET(GC, 0, mmSQ_SHADER_TBA_HI), + upper_32_bits(tba_addr >> 8) | + (1 << SQ_SHADER_TBA_HI__TRAP_EN__SHIFT)); + + /* + * Program TMA registers + */ + WREG32(SOC15_REG_OFFSET(GC, 0, mmSQ_SHADER_TMA_LO), + lower_32_bits(tma_addr >> 8)); + WREG32(SOC15_REG_OFFSET(GC, 0, mmSQ_SHADER_TMA_HI), + upper_32_bits(tma_addr >> 8)); + + unlock_srbm(kgd); +} + #if 0 uint32_t enable_debug_trap_v10_3(struct kgd_dev *kgd, uint32_t trap_debug_wave_launch_mode, @@ -820,6 +850,7 @@ const struct kfd2kgd_calls gfx_v10_3_kfd2kgd = { .address_watch_get_offset = address_watch_get_offset_v10_3, .get_atc_vmid_pasid_mapping_info = NULL, .set_vm_context_page_table_base = set_vm_context_page_table_base_v10_3, + .program_trap_handler_settings = program_trap_handler_settings_v10_3, #if 0 .enable_debug_trap = enable_debug_trap_v10_3, .disable_debug_trap = disable_debug_trap_v10_3, diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_gfx_v9.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_gfx_v9.c index b43e68fc1378..bcc1cbeb8799 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_gfx_v9.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_gfx_v9.c @@ -42,7 +42,8 @@ enum hqd_dequeue_request_type { NO_ACTION = 0, DRAIN_PIPE, - RESET_WAVES + RESET_WAVES, + SAVE_WAVES }; static inline struct amdgpu_device *get_amdgpu_device(struct kgd_dev *kgd) @@ -566,6 +567,9 @@ int kgd_gfx_v9_hqd_destroy(struct kgd_dev *kgd, void *mqd, case KFD_PREEMPT_TYPE_WAVEFRONT_RESET: type = RESET_WAVES; break; + case KFD_PREEMPT_TYPE_WAVEFRONT_SAVE: + type = SAVE_WAVES; + break; default: type = DRAIN_PIPE; break; @@ -719,7 +723,7 @@ static void unlock_spi_csq_mutexes(struct amdgpu_device *adev) } /** - * @get_wave_count: Read device registers to get number of waves in flight for + * get_wave_count: Read device registers to get number of waves in flight for * a particular queue. The method also returns the VMID associated with the * queue. * @@ -755,19 +759,19 @@ static void get_wave_count(struct amdgpu_device *adev, int queue_idx, } /** - * @kgd_gfx_v9_get_cu_occupancy: Reads relevant registers associated with each + * kgd_gfx_v9_get_cu_occupancy: Reads relevant registers associated with each * shader engine and aggregates the number of waves that are in flight for the * process whose pasid is provided as a parameter. The process could have ZERO * or more queues running and submitting waves to compute units. * * @kgd: Handle of device from which to get number of waves in flight * @pasid: Identifies the process for which this query call is invoked - * @wave_cnt: Output parameter updated with number of waves in flight that + * @pasid_wave_cnt: Output parameter updated with number of waves in flight that * belong to process with given pasid * @max_waves_per_cu: Output parameter updated with maximum number of waves * possible per Compute Unit * - * @note: It's possible that the device has too many queues (oversubscription) + * Note: It's possible that the device has too many queues (oversubscription) * in which case a VMID could be remapped to a different PASID. This could lead * to an iaccurate wave count. Following is a high-level sequence: * Time T1: vmid = getVmid(); vmid is associated with Pasid P1 @@ -878,6 +882,32 @@ void kgd_gfx_v9_get_cu_occupancy(struct kgd_dev *kgd, int pasid, adev->gfx.cu_info.max_waves_per_simd; } +void kgd_gfx_v9_program_trap_handler_settings(struct kgd_dev *kgd, + uint32_t vmid, uint64_t tba_addr, uint64_t tma_addr) +{ + struct amdgpu_device *adev = get_amdgpu_device(kgd); + + lock_srbm(kgd, 0, 0, 0, vmid); + + /* + * Program TBA registers + */ + WREG32(SOC15_REG_OFFSET(GC, 0, mmSQ_SHADER_TBA_LO), + lower_32_bits(tba_addr >> 8)); + WREG32(SOC15_REG_OFFSET(GC, 0, mmSQ_SHADER_TBA_HI), + upper_32_bits(tba_addr >> 8)); + + /* + * Program TMA registers + */ + WREG32(SOC15_REG_OFFSET(GC, 0, mmSQ_SHADER_TMA_LO), + lower_32_bits(tma_addr >> 8)); + WREG32(SOC15_REG_OFFSET(GC, 0, mmSQ_SHADER_TMA_HI), + upper_32_bits(tma_addr >> 8)); + + unlock_srbm(kgd); +} + const struct kfd2kgd_calls gfx_v9_kfd2kgd = { .program_sh_mem_settings = kgd_gfx_v9_program_sh_mem_settings, .set_pasid_vmid_mapping = kgd_gfx_v9_set_pasid_vmid_mapping, @@ -899,4 +929,5 @@ const struct kfd2kgd_calls gfx_v9_kfd2kgd = { kgd_gfx_v9_get_atc_vmid_pasid_mapping_info, .set_vm_context_page_table_base = kgd_gfx_v9_set_vm_context_page_table_base, .get_cu_occupancy = kgd_gfx_v9_get_cu_occupancy, + .program_trap_handler_settings = kgd_gfx_v9_program_trap_handler_settings, }; diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_gfx_v9.h b/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_gfx_v9.h index e64deba8900f..c63591106879 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_gfx_v9.h +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_gfx_v9.h @@ -65,3 +65,5 @@ void kgd_gfx_v9_set_vm_context_page_table_base(struct kgd_dev *kgd, uint32_t vmid, uint64_t page_table_base); void kgd_gfx_v9_get_cu_occupancy(struct kgd_dev *kgd, int pasid, int *pasid_wave_cnt, int *max_waves_per_cu); +void kgd_gfx_v9_program_trap_handler_settings(struct kgd_dev *kgd, + uint32_t vmid, uint64_t tba_addr, uint64_t tma_addr); diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_gpuvm.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_gpuvm.c index 7d4118c8128a..2d6b2d77b738 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_gpuvm.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_gpuvm.c @@ -33,9 +33,6 @@ #include <uapi/linux/kfd_ioctl.h> #include "amdgpu_xgmi.h" -/* BO flag to indicate a KFD userptr BO */ -#define AMDGPU_AMDKFD_USERPTR_BO (1ULL << 63) - /* Userptr restore delay, just long enough to allow consecutive VM * changes to accumulate */ @@ -50,12 +47,6 @@ static struct { spinlock_t mem_limit_lock; } kfd_mem_limit; -/* Struct used for amdgpu_amdkfd_bo_validate */ -struct amdgpu_vm_parser { - uint32_t domain; - bool wait; -}; - static const char * const domain_bit_to_string[] = { "CPU", "GTT", @@ -75,16 +66,16 @@ static inline struct amdgpu_device *get_amdgpu_device(struct kgd_dev *kgd) return (struct amdgpu_device *)kgd; } -static bool check_if_add_bo_to_vm(struct amdgpu_vm *avm, +static bool kfd_mem_is_attached(struct amdgpu_vm *avm, struct kgd_mem *mem) { - struct kfd_bo_va_list *entry; + struct kfd_mem_attachment *entry; - list_for_each_entry(entry, &mem->bo_va_list, bo_list) + list_for_each_entry(entry, &mem->attachments, list) if (entry->bo_va->base.vm == avm) - return false; + return true; - return true; + return false; } /* Set memory usage limits. Current, limits are @@ -108,6 +99,11 @@ void amdgpu_amdkfd_gpuvm_init_mem_limits(void) (kfd_mem_limit.max_ttm_mem_limit >> 20)); } +void amdgpu_amdkfd_reserve_system_mem(uint64_t size) +{ + kfd_mem_limit.system_mem_used += size; +} + /* Estimate page table size needed to represent a given memory size * * With 4KB pages, we need one 8 byte PTE for each 4KB of memory @@ -217,7 +213,7 @@ void amdgpu_amdkfd_unreserve_memory_limit(struct amdgpu_bo *bo) u32 domain = bo->preferred_domains; bool sg = (bo->preferred_domains == AMDGPU_GEM_DOMAIN_CPU); - if (bo->flags & AMDGPU_AMDKFD_USERPTR_BO) { + if (bo->flags & AMDGPU_AMDKFD_CREATE_USERPTR_BO) { domain = AMDGPU_GEM_DOMAIN_CPU; sg = false; } @@ -245,7 +241,7 @@ static int amdgpu_amdkfd_remove_eviction_fence(struct amdgpu_bo *bo, if (!ef) return -EINVAL; - old = dma_resv_get_list(resv); + old = dma_resv_shared_list(resv); if (!old) return 0; @@ -276,7 +272,7 @@ static int amdgpu_amdkfd_remove_eviction_fence(struct amdgpu_bo *bo, write_seqcount_end(&resv->seq); /* Drop the references to the removed fences or move them to ef_list */ - for (i = j, k = 0; i < old->shared_count; ++i) { + for (i = j; i < old->shared_count; ++i) { struct dma_fence *f; f = rcu_dereference_protected(new->shared[i], @@ -346,11 +342,9 @@ validate_fail: return ret; } -static int amdgpu_amdkfd_validate(void *param, struct amdgpu_bo *bo) +static int amdgpu_amdkfd_validate_vm_bo(void *_unused, struct amdgpu_bo *bo) { - struct amdgpu_vm_parser *p = param; - - return amdgpu_amdkfd_bo_validate(bo, p->domain, p->wait); + return amdgpu_amdkfd_bo_validate(bo, bo->allowed_domains, false); } /* vm_validate_pt_pd_bos - Validate page table and directory BOs @@ -362,28 +356,23 @@ static int amdgpu_amdkfd_validate(void *param, struct amdgpu_bo *bo) */ static int vm_validate_pt_pd_bos(struct amdgpu_vm *vm) { - struct amdgpu_bo *pd = vm->root.base.bo; + struct amdgpu_bo *pd = vm->root.bo; struct amdgpu_device *adev = amdgpu_ttm_adev(pd->tbo.bdev); - struct amdgpu_vm_parser param; int ret; - param.domain = AMDGPU_GEM_DOMAIN_VRAM; - param.wait = false; - - ret = amdgpu_vm_validate_pt_bos(adev, vm, amdgpu_amdkfd_validate, - ¶m); + ret = amdgpu_vm_validate_pt_bos(adev, vm, amdgpu_amdkfd_validate_vm_bo, NULL); if (ret) { pr_err("failed to validate PT BOs\n"); return ret; } - ret = amdgpu_amdkfd_validate(¶m, pd); + ret = amdgpu_amdkfd_validate_vm_bo(NULL, pd); if (ret) { pr_err("failed to validate PD\n"); return ret; } - vm->pd_phys_addr = amdgpu_gmc_pd_addr(vm->root.base.bo); + vm->pd_phys_addr = amdgpu_gmc_pd_addr(vm->root.bo); if (vm->use_cpu_for_update) { ret = amdgpu_bo_kmap(pd, NULL); @@ -398,7 +387,7 @@ static int vm_validate_pt_pd_bos(struct amdgpu_vm *vm) static int vm_update_pds(struct amdgpu_vm *vm, struct amdgpu_sync *sync) { - struct amdgpu_bo *pd = vm->root.base.bo; + struct amdgpu_bo *pd = vm->root.bo; struct amdgpu_device *adev = amdgpu_ttm_adev(pd->tbo.bdev); int ret; @@ -431,7 +420,8 @@ static uint64_t get_pte_flags(struct amdgpu_device *adev, struct kgd_mem *mem) mapping_flags |= coherent ? AMDGPU_VM_MTYPE_CC : AMDGPU_VM_MTYPE_RW; else - mapping_flags |= AMDGPU_VM_MTYPE_UC; + mapping_flags |= coherent ? + AMDGPU_VM_MTYPE_UC : AMDGPU_VM_MTYPE_NC; } else { mapping_flags |= coherent ? AMDGPU_VM_MTYPE_UC : AMDGPU_VM_MTYPE_NC; @@ -450,7 +440,8 @@ static uint64_t get_pte_flags(struct amdgpu_device *adev, struct kgd_mem *mem) if (adev->gmc.xgmi.connected_to_cpu) snoop = true; } else { - mapping_flags |= AMDGPU_VM_MTYPE_UC; + mapping_flags |= coherent ? + AMDGPU_VM_MTYPE_UC : AMDGPU_VM_MTYPE_NC; if (amdgpu_xgmi_same_hive(adev, bo_adev)) snoop = true; } @@ -471,87 +462,320 @@ static uint64_t get_pte_flags(struct amdgpu_device *adev, struct kgd_mem *mem) return pte_flags; } -/* add_bo_to_vm - Add a BO to a VM +static int +kfd_mem_dmamap_userptr(struct kgd_mem *mem, + struct kfd_mem_attachment *attachment) +{ + enum dma_data_direction direction = + mem->alloc_flags & KFD_IOC_ALLOC_MEM_FLAGS_WRITABLE ? + DMA_BIDIRECTIONAL : DMA_TO_DEVICE; + struct ttm_operation_ctx ctx = {.interruptible = true}; + struct amdgpu_bo *bo = attachment->bo_va->base.bo; + struct amdgpu_device *adev = attachment->adev; + struct ttm_tt *src_ttm = mem->bo->tbo.ttm; + struct ttm_tt *ttm = bo->tbo.ttm; + int ret; + + ttm->sg = kmalloc(sizeof(*ttm->sg), GFP_KERNEL); + if (unlikely(!ttm->sg)) + return -ENOMEM; + + if (WARN_ON(ttm->num_pages != src_ttm->num_pages)) + return -EINVAL; + + /* Same sequence as in amdgpu_ttm_tt_pin_userptr */ + ret = sg_alloc_table_from_pages(ttm->sg, src_ttm->pages, + ttm->num_pages, 0, + (u64)ttm->num_pages << PAGE_SHIFT, + GFP_KERNEL); + if (unlikely(ret)) + goto free_sg; + + ret = dma_map_sgtable(adev->dev, ttm->sg, direction, 0); + if (unlikely(ret)) + goto release_sg; + + drm_prime_sg_to_dma_addr_array(ttm->sg, ttm->dma_address, + ttm->num_pages); + + amdgpu_bo_placement_from_domain(bo, AMDGPU_GEM_DOMAIN_GTT); + ret = ttm_bo_validate(&bo->tbo, &bo->placement, &ctx); + if (ret) + goto unmap_sg; + + return 0; + +unmap_sg: + dma_unmap_sgtable(adev->dev, ttm->sg, direction, 0); +release_sg: + pr_err("DMA map userptr failed: %d\n", ret); + sg_free_table(ttm->sg); +free_sg: + kfree(ttm->sg); + ttm->sg = NULL; + return ret; +} + +static int +kfd_mem_dmamap_dmabuf(struct kfd_mem_attachment *attachment) +{ + struct ttm_operation_ctx ctx = {.interruptible = true}; + struct amdgpu_bo *bo = attachment->bo_va->base.bo; + + amdgpu_bo_placement_from_domain(bo, AMDGPU_GEM_DOMAIN_GTT); + return ttm_bo_validate(&bo->tbo, &bo->placement, &ctx); +} + +static int +kfd_mem_dmamap_attachment(struct kgd_mem *mem, + struct kfd_mem_attachment *attachment) +{ + switch (attachment->type) { + case KFD_MEM_ATT_SHARED: + return 0; + case KFD_MEM_ATT_USERPTR: + return kfd_mem_dmamap_userptr(mem, attachment); + case KFD_MEM_ATT_DMABUF: + return kfd_mem_dmamap_dmabuf(attachment); + default: + WARN_ON_ONCE(1); + } + return -EINVAL; +} + +static void +kfd_mem_dmaunmap_userptr(struct kgd_mem *mem, + struct kfd_mem_attachment *attachment) +{ + enum dma_data_direction direction = + mem->alloc_flags & KFD_IOC_ALLOC_MEM_FLAGS_WRITABLE ? + DMA_BIDIRECTIONAL : DMA_TO_DEVICE; + struct ttm_operation_ctx ctx = {.interruptible = false}; + struct amdgpu_bo *bo = attachment->bo_va->base.bo; + struct amdgpu_device *adev = attachment->adev; + struct ttm_tt *ttm = bo->tbo.ttm; + + if (unlikely(!ttm->sg)) + return; + + amdgpu_bo_placement_from_domain(bo, AMDGPU_GEM_DOMAIN_CPU); + ttm_bo_validate(&bo->tbo, &bo->placement, &ctx); + + dma_unmap_sgtable(adev->dev, ttm->sg, direction, 0); + sg_free_table(ttm->sg); + ttm->sg = NULL; +} + +static void +kfd_mem_dmaunmap_dmabuf(struct kfd_mem_attachment *attachment) +{ + struct ttm_operation_ctx ctx = {.interruptible = true}; + struct amdgpu_bo *bo = attachment->bo_va->base.bo; + + amdgpu_bo_placement_from_domain(bo, AMDGPU_GEM_DOMAIN_CPU); + ttm_bo_validate(&bo->tbo, &bo->placement, &ctx); +} + +static void +kfd_mem_dmaunmap_attachment(struct kgd_mem *mem, + struct kfd_mem_attachment *attachment) +{ + switch (attachment->type) { + case KFD_MEM_ATT_SHARED: + break; + case KFD_MEM_ATT_USERPTR: + kfd_mem_dmaunmap_userptr(mem, attachment); + break; + case KFD_MEM_ATT_DMABUF: + kfd_mem_dmaunmap_dmabuf(attachment); + break; + default: + WARN_ON_ONCE(1); + } +} + +static int +kfd_mem_attach_userptr(struct amdgpu_device *adev, struct kgd_mem *mem, + struct amdgpu_bo **bo) +{ + unsigned long bo_size = mem->bo->tbo.base.size; + struct drm_gem_object *gobj; + int ret; + + ret = amdgpu_bo_reserve(mem->bo, false); + if (ret) + return ret; + + ret = amdgpu_gem_object_create(adev, bo_size, 1, + AMDGPU_GEM_DOMAIN_CPU, + AMDGPU_GEM_CREATE_PREEMPTIBLE, + ttm_bo_type_sg, mem->bo->tbo.base.resv, + &gobj); + amdgpu_bo_unreserve(mem->bo); + if (ret) + return ret; + + *bo = gem_to_amdgpu_bo(gobj); + (*bo)->parent = amdgpu_bo_ref(mem->bo); + + return 0; +} + +static int +kfd_mem_attach_dmabuf(struct amdgpu_device *adev, struct kgd_mem *mem, + struct amdgpu_bo **bo) +{ + struct drm_gem_object *gobj; + int ret; + + if (!mem->dmabuf) { + mem->dmabuf = amdgpu_gem_prime_export(&mem->bo->tbo.base, + mem->alloc_flags & KFD_IOC_ALLOC_MEM_FLAGS_WRITABLE ? + DRM_RDWR : 0); + if (IS_ERR(mem->dmabuf)) { + ret = PTR_ERR(mem->dmabuf); + mem->dmabuf = NULL; + return ret; + } + } + + gobj = amdgpu_gem_prime_import(adev_to_drm(adev), mem->dmabuf); + if (IS_ERR(gobj)) + return PTR_ERR(gobj); + + /* Import takes an extra reference on the dmabuf. Drop it now to + * avoid leaking it. We only need the one reference in + * kgd_mem->dmabuf. + */ + dma_buf_put(mem->dmabuf); + + *bo = gem_to_amdgpu_bo(gobj); + (*bo)->flags |= AMDGPU_GEM_CREATE_PREEMPTIBLE; + (*bo)->parent = amdgpu_bo_ref(mem->bo); + + return 0; +} + +/* kfd_mem_attach - Add a BO to a VM * * Everything that needs to bo done only once when a BO is first added * to a VM. It can later be mapped and unmapped many times without * repeating these steps. * + * 0. Create BO for DMA mapping, if needed * 1. Allocate and initialize BO VA entry data structure * 2. Add BO to the VM * 3. Determine ASIC-specific PTE flags * 4. Alloc page tables and directories if needed * 4a. Validate new page tables and directories */ -static int add_bo_to_vm(struct amdgpu_device *adev, struct kgd_mem *mem, - struct amdgpu_vm *vm, bool is_aql, - struct kfd_bo_va_list **p_bo_va_entry) +static int kfd_mem_attach(struct amdgpu_device *adev, struct kgd_mem *mem, + struct amdgpu_vm *vm, bool is_aql) { - int ret; - struct kfd_bo_va_list *bo_va_entry; - struct amdgpu_bo *bo = mem->bo; + struct amdgpu_device *bo_adev = amdgpu_ttm_adev(mem->bo->tbo.bdev); + unsigned long bo_size = mem->bo->tbo.base.size; uint64_t va = mem->va; - struct list_head *list_bo_va = &mem->bo_va_list; - unsigned long bo_size = bo->tbo.base.size; + struct kfd_mem_attachment *attachment[2] = {NULL, NULL}; + struct amdgpu_bo *bo[2] = {NULL, NULL}; + int i, ret; if (!va) { pr_err("Invalid VA when adding BO to VM\n"); return -EINVAL; } - if (is_aql) - va += bo_size; - - bo_va_entry = kzalloc(sizeof(*bo_va_entry), GFP_KERNEL); - if (!bo_va_entry) - return -ENOMEM; + for (i = 0; i <= is_aql; i++) { + attachment[i] = kzalloc(sizeof(*attachment[i]), GFP_KERNEL); + if (unlikely(!attachment[i])) { + ret = -ENOMEM; + goto unwind; + } - pr_debug("\t add VA 0x%llx - 0x%llx to vm %p\n", va, - va + bo_size, vm); + pr_debug("\t add VA 0x%llx - 0x%llx to vm %p\n", va, + va + bo_size, vm); - /* Add BO to VM internal data structures*/ - bo_va_entry->bo_va = amdgpu_vm_bo_add(adev, vm, bo); - if (!bo_va_entry->bo_va) { - ret = -EINVAL; - pr_err("Failed to add BO object to VM. ret == %d\n", - ret); - goto err_vmadd; - } + if (adev == bo_adev || (mem->domain == AMDGPU_GEM_DOMAIN_VRAM && + amdgpu_xgmi_same_hive(adev, bo_adev))) { + /* Mappings on the local GPU and VRAM mappings in the + * local hive share the original BO + */ + attachment[i]->type = KFD_MEM_ATT_SHARED; + bo[i] = mem->bo; + drm_gem_object_get(&bo[i]->tbo.base); + } else if (i > 0) { + /* Multiple mappings on the same GPU share the BO */ + attachment[i]->type = KFD_MEM_ATT_SHARED; + bo[i] = bo[0]; + drm_gem_object_get(&bo[i]->tbo.base); + } else if (amdgpu_ttm_tt_get_usermm(mem->bo->tbo.ttm)) { + /* Create an SG BO to DMA-map userptrs on other GPUs */ + attachment[i]->type = KFD_MEM_ATT_USERPTR; + ret = kfd_mem_attach_userptr(adev, mem, &bo[i]); + if (ret) + goto unwind; + } else if (mem->domain == AMDGPU_GEM_DOMAIN_GTT && + mem->bo->tbo.type != ttm_bo_type_sg) { + /* GTT BOs use DMA-mapping ability of dynamic-attach + * DMA bufs. TODO: The same should work for VRAM on + * large-BAR GPUs. + */ + attachment[i]->type = KFD_MEM_ATT_DMABUF; + ret = kfd_mem_attach_dmabuf(adev, mem, &bo[i]); + if (ret) + goto unwind; + } else { + /* FIXME: Need to DMA-map other BO types: + * large-BAR VRAM, doorbells, MMIO remap + */ + attachment[i]->type = KFD_MEM_ATT_SHARED; + bo[i] = mem->bo; + drm_gem_object_get(&bo[i]->tbo.base); + } - bo_va_entry->va = va; - bo_va_entry->pte_flags = get_pte_flags(adev, mem); - bo_va_entry->kgd_dev = (void *)adev; - list_add(&bo_va_entry->bo_list, list_bo_va); + /* Add BO to VM internal data structures */ + attachment[i]->bo_va = amdgpu_vm_bo_add(adev, vm, bo[i]); + if (unlikely(!attachment[i]->bo_va)) { + ret = -ENOMEM; + pr_err("Failed to add BO object to VM. ret == %d\n", + ret); + goto unwind; + } - if (p_bo_va_entry) - *p_bo_va_entry = bo_va_entry; + attachment[i]->va = va; + attachment[i]->pte_flags = get_pte_flags(adev, mem); + attachment[i]->adev = adev; + list_add(&attachment[i]->list, &mem->attachments); - /* Allocate validate page tables if needed */ - ret = vm_validate_pt_pd_bos(vm); - if (ret) { - pr_err("validate_pt_pd_bos() failed\n"); - goto err_alloc_pts; + va += bo_size; } return 0; -err_alloc_pts: - amdgpu_vm_bo_rmv(adev, bo_va_entry->bo_va); - list_del(&bo_va_entry->bo_list); -err_vmadd: - kfree(bo_va_entry); +unwind: + for (; i >= 0; i--) { + if (!attachment[i]) + continue; + if (attachment[i]->bo_va) { + amdgpu_vm_bo_rmv(adev, attachment[i]->bo_va); + list_del(&attachment[i]->list); + } + if (bo[i]) + drm_gem_object_put(&bo[i]->tbo.base); + kfree(attachment[i]); + } return ret; } -static void remove_bo_from_vm(struct amdgpu_device *adev, - struct kfd_bo_va_list *entry, unsigned long size) +static void kfd_mem_detach(struct kfd_mem_attachment *attachment) { - pr_debug("\t remove VA 0x%llx - 0x%llx in entry %p\n", - entry->va, - entry->va + size, entry); - amdgpu_vm_bo_rmv(adev, entry->bo_va); - list_del(&entry->bo_list); - kfree(entry); + struct amdgpu_bo *bo = attachment->bo_va->base.bo; + + pr_debug("\t remove VA 0x%llx in entry %p\n", + attachment->va, attachment); + amdgpu_vm_bo_rmv(attachment->adev, attachment->bo_va); + drm_gem_object_put(&bo->tbo.base); + list_del(&attachment->list); + kfree(attachment); } static void add_kgd_mem_to_kfd_bo_list(struct kgd_mem *mem, @@ -726,7 +950,7 @@ static int reserve_bo_and_cond_vms(struct kgd_mem *mem, struct bo_vm_reservation_context *ctx) { struct amdgpu_bo *bo = mem->bo; - struct kfd_bo_va_list *entry; + struct kfd_mem_attachment *entry; unsigned int i; int ret; @@ -738,7 +962,7 @@ static int reserve_bo_and_cond_vms(struct kgd_mem *mem, INIT_LIST_HEAD(&ctx->list); INIT_LIST_HEAD(&ctx->duplicates); - list_for_each_entry(entry, &mem->bo_va_list, bo_list) { + list_for_each_entry(entry, &mem->attachments, list) { if ((vm && vm != entry->bo_va->base.vm) || (entry->is_mapped != map_type && map_type != BO_VM_ALL)) @@ -760,7 +984,7 @@ static int reserve_bo_and_cond_vms(struct kgd_mem *mem, list_add(&ctx->kfd_bo.tv.head, &ctx->list); i = 0; - list_for_each_entry(entry, &mem->bo_va_list, bo_list) { + list_for_each_entry(entry, &mem->attachments, list) { if ((vm && vm != entry->bo_va->base.vm) || (entry->is_mapped != map_type && map_type != BO_VM_ALL)) @@ -814,11 +1038,12 @@ static int unreserve_bo_and_vms(struct bo_vm_reservation_context *ctx, return ret; } -static int unmap_bo_from_gpuvm(struct amdgpu_device *adev, - struct kfd_bo_va_list *entry, +static void unmap_bo_from_gpuvm(struct kgd_mem *mem, + struct kfd_mem_attachment *entry, struct amdgpu_sync *sync) { struct amdgpu_bo_va *bo_va = entry->bo_va; + struct amdgpu_device *adev = entry->adev; struct amdgpu_vm *vm = bo_va->base.vm; amdgpu_vm_bo_unmap(adev, bo_va, entry->va); @@ -827,18 +1052,24 @@ static int unmap_bo_from_gpuvm(struct amdgpu_device *adev, amdgpu_sync_fence(sync, bo_va->last_pt_update); - return 0; + kfd_mem_dmaunmap_attachment(mem, entry); } -static int update_gpuvm_pte(struct amdgpu_device *adev, - struct kfd_bo_va_list *entry, - struct amdgpu_sync *sync) +static int update_gpuvm_pte(struct kgd_mem *mem, + struct kfd_mem_attachment *entry, + struct amdgpu_sync *sync, + bool *table_freed) { - int ret; struct amdgpu_bo_va *bo_va = entry->bo_va; + struct amdgpu_device *adev = entry->adev; + int ret; + + ret = kfd_mem_dmamap_attachment(mem, entry); + if (ret) + return ret; /* Update the page tables */ - ret = amdgpu_vm_bo_update(adev, bo_va, false); + ret = amdgpu_vm_bo_update(adev, bo_va, false, table_freed); if (ret) { pr_err("amdgpu_vm_bo_update failed\n"); return ret; @@ -847,14 +1078,16 @@ static int update_gpuvm_pte(struct amdgpu_device *adev, return amdgpu_sync_fence(sync, bo_va->last_pt_update); } -static int map_bo_to_gpuvm(struct amdgpu_device *adev, - struct kfd_bo_va_list *entry, struct amdgpu_sync *sync, - bool no_update_pte) +static int map_bo_to_gpuvm(struct kgd_mem *mem, + struct kfd_mem_attachment *entry, + struct amdgpu_sync *sync, + bool no_update_pte, + bool *table_freed) { int ret; /* Set virtual address for the allocation */ - ret = amdgpu_vm_bo_map(adev, entry->bo_va, entry->va, 0, + ret = amdgpu_vm_bo_map(entry->adev, entry->bo_va, entry->va, 0, amdgpu_bo_size(entry->bo_va->base.bo), entry->pte_flags); if (ret) { @@ -866,7 +1099,7 @@ static int map_bo_to_gpuvm(struct amdgpu_device *adev, if (no_update_pte) return 0; - ret = update_gpuvm_pte(adev, entry, sync); + ret = update_gpuvm_pte(mem, entry, sync, table_freed); if (ret) { pr_err("update_gpuvm_pte() failed\n"); goto update_gpuvm_pte_failed; @@ -875,7 +1108,7 @@ static int map_bo_to_gpuvm(struct amdgpu_device *adev, return 0; update_gpuvm_pte_failed: - unmap_bo_from_gpuvm(adev, entry, sync); + unmap_bo_from_gpuvm(mem, entry, sync); return ret; } @@ -920,7 +1153,7 @@ static int process_sync_pds_resv(struct amdkfd_process_info *process_info, list_for_each_entry(peer_vm, &process_info->vm_list_head, vm_list_node) { - struct amdgpu_bo *pd = peer_vm->root.base.bo; + struct amdgpu_bo *pd = peer_vm->root.bo; ret = amdgpu_sync_resv(NULL, sync, pd->tbo.base.resv, AMDGPU_SYNC_NE_OWNER, @@ -967,7 +1200,8 @@ static int init_kfd_vm(struct amdgpu_vm *vm, void **process_info, info->eviction_fence = amdgpu_amdkfd_fence_create(dma_fence_context_alloc(1), - current->mm); + current->mm, + NULL); if (!info->eviction_fence) { pr_err("Failed to create eviction fence\n"); ret = -ENOMEM; @@ -986,7 +1220,7 @@ static int init_kfd_vm(struct amdgpu_vm *vm, void **process_info, vm->process_info = *process_info; /* Validate page directory and attach eviction fence */ - ret = amdgpu_bo_reserve(vm->root.base.bo, true); + ret = amdgpu_bo_reserve(vm->root.bo, true); if (ret) goto reserve_pd_fail; ret = vm_validate_pt_pd_bos(vm); @@ -994,16 +1228,16 @@ static int init_kfd_vm(struct amdgpu_vm *vm, void **process_info, pr_err("validate_pt_pd_bos() failed\n"); goto validate_pd_fail; } - ret = amdgpu_bo_sync_wait(vm->root.base.bo, + ret = amdgpu_bo_sync_wait(vm->root.bo, AMDGPU_FENCE_OWNER_KFD, false); if (ret) goto wait_pd_fail; - ret = dma_resv_reserve_shared(vm->root.base.bo->tbo.base.resv, 1); + ret = dma_resv_reserve_shared(vm->root.bo->tbo.base.resv, 1); if (ret) goto reserve_shared_fail; - amdgpu_bo_fence(vm->root.base.bo, + amdgpu_bo_fence(vm->root.bo, &vm->process_info->eviction_fence->base, true); - amdgpu_bo_unreserve(vm->root.base.bo); + amdgpu_bo_unreserve(vm->root.bo); /* Update process info */ mutex_lock(&vm->process_info->lock); @@ -1017,7 +1251,7 @@ static int init_kfd_vm(struct amdgpu_vm *vm, void **process_info, reserve_shared_fail: wait_pd_fail: validate_pd_fail: - amdgpu_bo_unreserve(vm->root.base.bo); + amdgpu_bo_unreserve(vm->root.bo); reserve_pd_fail: vm->process_info = NULL; if (info) { @@ -1036,30 +1270,45 @@ create_evict_fence_fail: int amdgpu_amdkfd_gpuvm_acquire_process_vm(struct kgd_dev *kgd, struct file *filp, u32 pasid, - void **vm, void **process_info, + void **process_info, struct dma_fence **ef) { struct amdgpu_device *adev = get_amdgpu_device(kgd); - struct drm_file *drm_priv = filp->private_data; - struct amdgpu_fpriv *drv_priv = drm_priv->driver_priv; - struct amdgpu_vm *avm = &drv_priv->vm; + struct amdgpu_fpriv *drv_priv; + struct amdgpu_vm *avm; int ret; + ret = amdgpu_file_to_fpriv(filp, &drv_priv); + if (ret) + return ret; + avm = &drv_priv->vm; + /* Already a compute VM? */ if (avm->process_info) return -EINVAL; + /* Free the original amdgpu allocated pasid, + * will be replaced with kfd allocated pasid. + */ + if (avm->pasid) { + amdgpu_pasid_free(avm->pasid); + amdgpu_vm_set_pasid(adev, avm, 0); + } + /* Convert VM into a compute VM */ - ret = amdgpu_vm_make_compute(adev, avm, pasid); + ret = amdgpu_vm_make_compute(adev, avm); if (ret) return ret; + ret = amdgpu_vm_set_pasid(adev, avm, pasid); + if (ret) + return ret; /* Initialize KFD part of the VM and process info */ ret = init_kfd_vm(avm, process_info, ef); if (ret) return ret; - *vm = (void *)avm; + amdgpu_vm_set_task_info(avm); return 0; } @@ -1068,7 +1317,7 @@ void amdgpu_amdkfd_gpuvm_destroy_cb(struct amdgpu_device *adev, struct amdgpu_vm *vm) { struct amdkfd_process_info *process_info = vm->process_info; - struct amdgpu_bo *pd = vm->root.base.bo; + struct amdgpu_bo *pd = vm->root.bo; if (!process_info) return; @@ -1100,15 +1349,17 @@ void amdgpu_amdkfd_gpuvm_destroy_cb(struct amdgpu_device *adev, } } -void amdgpu_amdkfd_gpuvm_release_process_vm(struct kgd_dev *kgd, void *vm) +void amdgpu_amdkfd_gpuvm_release_process_vm(struct kgd_dev *kgd, void *drm_priv) { struct amdgpu_device *adev = get_amdgpu_device(kgd); - struct amdgpu_vm *avm = (struct amdgpu_vm *)vm; + struct amdgpu_vm *avm; - if (WARN_ON(!kgd || !vm)) + if (WARN_ON(!kgd || !drm_priv)) return; - pr_debug("Releasing process vm %p\n", vm); + avm = drm_priv_to_vm(drm_priv); + + pr_debug("Releasing process vm %p\n", avm); /* The original pasid of amdgpu vm has already been * released during making a amdgpu vm to a compute vm @@ -1119,10 +1370,10 @@ void amdgpu_amdkfd_gpuvm_release_process_vm(struct kgd_dev *kgd, void *vm) amdgpu_vm_release_compute(adev, avm); } -uint64_t amdgpu_amdkfd_gpuvm_get_process_page_dir(void *vm) +uint64_t amdgpu_amdkfd_gpuvm_get_process_page_dir(void *drm_priv) { - struct amdgpu_vm *avm = (struct amdgpu_vm *)vm; - struct amdgpu_bo *pd = avm->root.base.bo; + struct amdgpu_vm *avm = drm_priv_to_vm(drm_priv); + struct amdgpu_bo *pd = avm->root.bo; struct amdgpu_device *adev = amdgpu_ttm_adev(pd->tbo.bdev); if (adev->asic_type < CHIP_VEGA10) @@ -1132,11 +1383,11 @@ uint64_t amdgpu_amdkfd_gpuvm_get_process_page_dir(void *vm) int amdgpu_amdkfd_gpuvm_alloc_memory_of_gpu( struct kgd_dev *kgd, uint64_t va, uint64_t size, - void *vm, struct kgd_mem **mem, + void *drm_priv, struct kgd_mem **mem, uint64_t *offset, uint32_t flags) { struct amdgpu_device *adev = get_amdgpu_device(kgd); - struct amdgpu_vm *avm = (struct amdgpu_vm *)vm; + struct amdgpu_vm *avm = drm_priv_to_vm(drm_priv); enum ttm_bo_type bo_type = ttm_bo_type_device; struct sg_table *sg = NULL; uint64_t user_addr = 0; @@ -1153,15 +1404,14 @@ int amdgpu_amdkfd_gpuvm_alloc_memory_of_gpu( domain = alloc_domain = AMDGPU_GEM_DOMAIN_VRAM; alloc_flags = AMDGPU_GEM_CREATE_VRAM_WIPE_ON_RELEASE; alloc_flags |= (flags & KFD_IOC_ALLOC_MEM_FLAGS_PUBLIC) ? - AMDGPU_GEM_CREATE_CPU_ACCESS_REQUIRED : - AMDGPU_GEM_CREATE_NO_CPU_ACCESS; + AMDGPU_GEM_CREATE_CPU_ACCESS_REQUIRED : 0; } else if (flags & KFD_IOC_ALLOC_MEM_FLAGS_GTT) { domain = alloc_domain = AMDGPU_GEM_DOMAIN_GTT; alloc_flags = 0; } else if (flags & KFD_IOC_ALLOC_MEM_FLAGS_USERPTR) { domain = AMDGPU_GEM_DOMAIN_GTT; alloc_domain = AMDGPU_GEM_DOMAIN_CPU; - alloc_flags = 0; + alloc_flags = AMDGPU_GEM_CREATE_PREEMPTIBLE; if (!offset || !*offset) return -EINVAL; user_addr = untagged_addr(*offset); @@ -1185,7 +1435,7 @@ int amdgpu_amdkfd_gpuvm_alloc_memory_of_gpu( ret = -ENOMEM; goto err; } - INIT_LIST_HEAD(&(*mem)->bo_va_list); + INIT_LIST_HEAD(&(*mem)->attachments); mutex_init(&(*mem)->lock); (*mem)->aql_queue = !!(flags & KFD_IOC_ALLOC_MEM_FLAGS_AQL_QUEUE_MEM); @@ -1216,6 +1466,11 @@ int amdgpu_amdkfd_gpuvm_alloc_memory_of_gpu( domain_string(alloc_domain), ret); goto err_bo_create; } + ret = drm_vma_node_allow(&gobj->vma_node, drm_priv); + if (ret) { + pr_debug("Failed to allow vma node access. ret %d\n", ret); + goto err_node_allow; + } bo = gem_to_amdgpu_bo(gobj); if (bo_type == ttm_bo_type_sg) { bo->tbo.sg = sg; @@ -1224,7 +1479,7 @@ int amdgpu_amdkfd_gpuvm_alloc_memory_of_gpu( bo->kfd_bo = *mem; (*mem)->bo = bo; if (user_addr) - bo->flags |= AMDGPU_AMDKFD_USERPTR_BO; + bo->flags |= AMDGPU_AMDKFD_CREATE_USERPTR_BO; (*mem)->va = va; (*mem)->domain = domain; @@ -1245,6 +1500,8 @@ int amdgpu_amdkfd_gpuvm_alloc_memory_of_gpu( allocate_init_user_pages_failed: remove_kgd_mem_from_kfd_bo_list(*mem, avm->process_info); + drm_vma_node_revoke(&gobj->vma_node, drm_priv); +err_node_allow: amdgpu_bo_unref(&bo); /* Don't unreserve system mem limit twice */ goto err_reserve_limit; @@ -1262,11 +1519,12 @@ err: } int amdgpu_amdkfd_gpuvm_free_memory_of_gpu( - struct kgd_dev *kgd, struct kgd_mem *mem, uint64_t *size) + struct kgd_dev *kgd, struct kgd_mem *mem, void *drm_priv, + uint64_t *size) { struct amdkfd_process_info *process_info = mem->process_info; unsigned long bo_size = mem->bo->tbo.base.size; - struct kfd_bo_va_list *entry, *tmp; + struct kfd_mem_attachment *entry, *tmp; struct bo_vm_reservation_context ctx; struct ttm_validate_buffer *bo_list_entry; unsigned int mapped_to_gpu_memory; @@ -1309,13 +1567,12 @@ int amdgpu_amdkfd_gpuvm_free_memory_of_gpu( pr_debug("Release VA 0x%llx - 0x%llx\n", mem->va, mem->va + bo_size * (1 + mem->aql_queue)); - /* Remove from VM internal data structures */ - list_for_each_entry_safe(entry, tmp, &mem->bo_va_list, bo_list) - remove_bo_from_vm((struct amdgpu_device *)entry->kgd_dev, - entry, bo_size); - ret = unreserve_bo_and_vms(&ctx, false, false); + /* Remove from VM internal data structures */ + list_for_each_entry_safe(entry, tmp, &mem->attachments, list) + kfd_mem_detach(entry); + /* Free the sync object */ amdgpu_sync_free(&mem->sync); @@ -1339,6 +1596,9 @@ int amdgpu_amdkfd_gpuvm_free_memory_of_gpu( } /* Free the BO*/ + drm_vma_node_revoke(&mem->bo->tbo.base.vma_node, drm_priv); + if (mem->dmabuf) + dma_buf_put(mem->dmabuf); drm_gem_object_put(&mem->bo->tbo.base); mutex_destroy(&mem->lock); kfree(mem); @@ -1347,17 +1607,16 @@ int amdgpu_amdkfd_gpuvm_free_memory_of_gpu( } int amdgpu_amdkfd_gpuvm_map_memory_to_gpu( - struct kgd_dev *kgd, struct kgd_mem *mem, void *vm) + struct kgd_dev *kgd, struct kgd_mem *mem, + void *drm_priv, bool *table_freed) { struct amdgpu_device *adev = get_amdgpu_device(kgd); - struct amdgpu_vm *avm = (struct amdgpu_vm *)vm; + struct amdgpu_vm *avm = drm_priv_to_vm(drm_priv); int ret; struct amdgpu_bo *bo; uint32_t domain; - struct kfd_bo_va_list *entry; + struct kfd_mem_attachment *entry; struct bo_vm_reservation_context ctx; - struct kfd_bo_va_list *bo_va_entry = NULL; - struct kfd_bo_va_list *bo_va_entry_aql = NULL; unsigned long bo_size; bool is_invalid_userptr = false; @@ -1391,9 +1650,15 @@ int amdgpu_amdkfd_gpuvm_map_memory_to_gpu( pr_debug("Map VA 0x%llx - 0x%llx to vm %p domain %s\n", mem->va, mem->va + bo_size * (1 + mem->aql_queue), - vm, domain_string(domain)); + avm, domain_string(domain)); - ret = reserve_bo_and_vm(mem, vm, &ctx); + if (!kfd_mem_is_attached(avm, mem)) { + ret = kfd_mem_attach(adev, mem, avm, mem->aql_queue); + if (ret) + goto out; + } + + ret = reserve_bo_and_vm(mem, avm, &ctx); if (unlikely(ret)) goto out; @@ -1403,25 +1668,12 @@ int amdgpu_amdkfd_gpuvm_map_memory_to_gpu( * the next restore worker */ if (amdgpu_ttm_tt_get_usermm(bo->tbo.ttm) && - bo->tbo.mem.mem_type == TTM_PL_SYSTEM) + bo->tbo.resource->mem_type == TTM_PL_SYSTEM) is_invalid_userptr = true; - if (check_if_add_bo_to_vm(avm, mem)) { - ret = add_bo_to_vm(adev, mem, avm, false, - &bo_va_entry); - if (ret) - goto add_bo_to_vm_failed; - if (mem->aql_queue) { - ret = add_bo_to_vm(adev, mem, avm, - true, &bo_va_entry_aql); - if (ret) - goto add_bo_to_vm_failed_aql; - } - } else { - ret = vm_validate_pt_pd_bos(avm); - if (unlikely(ret)) - goto add_bo_to_vm_failed; - } + ret = vm_validate_pt_pd_bos(avm); + if (unlikely(ret)) + goto out_unreserve; if (mem->mapped_to_gpu_memory == 0 && !amdgpu_ttm_tt_get_usermm(bo->tbo.ttm)) { @@ -1432,34 +1684,34 @@ int amdgpu_amdkfd_gpuvm_map_memory_to_gpu( ret = amdgpu_amdkfd_bo_validate(bo, domain, true); if (ret) { pr_debug("Validate failed\n"); - goto map_bo_to_gpuvm_failed; + goto out_unreserve; } } - list_for_each_entry(entry, &mem->bo_va_list, bo_list) { - if (entry->bo_va->base.vm == vm && !entry->is_mapped) { - pr_debug("\t map VA 0x%llx - 0x%llx in entry %p\n", - entry->va, entry->va + bo_size, - entry); + list_for_each_entry(entry, &mem->attachments, list) { + if (entry->bo_va->base.vm != avm || entry->is_mapped) + continue; - ret = map_bo_to_gpuvm(adev, entry, ctx.sync, - is_invalid_userptr); - if (ret) { - pr_err("Failed to map bo to gpuvm\n"); - goto map_bo_to_gpuvm_failed; - } + pr_debug("\t map VA 0x%llx - 0x%llx in entry %p\n", + entry->va, entry->va + bo_size, entry); - ret = vm_update_pds(vm, ctx.sync); - if (ret) { - pr_err("Failed to update page directories\n"); - goto map_bo_to_gpuvm_failed; - } + ret = map_bo_to_gpuvm(mem, entry, ctx.sync, + is_invalid_userptr, table_freed); + if (ret) { + pr_err("Failed to map bo to gpuvm\n"); + goto out_unreserve; + } - entry->is_mapped = true; - mem->mapped_to_gpu_memory++; - pr_debug("\t INC mapping count %d\n", - mem->mapped_to_gpu_memory); + ret = vm_update_pds(avm, ctx.sync); + if (ret) { + pr_err("Failed to update page directories\n"); + goto out_unreserve; } + + entry->is_mapped = true; + mem->mapped_to_gpu_memory++; + pr_debug("\t INC mapping count %d\n", + mem->mapped_to_gpu_memory); } if (!amdgpu_ttm_tt_get_usermm(bo->tbo.ttm) && !bo->tbo.pin_count) @@ -1468,15 +1720,15 @@ int amdgpu_amdkfd_gpuvm_map_memory_to_gpu( true); ret = unreserve_bo_and_vms(&ctx, false, false); + /* Only apply no TLB flush on Aldebaran to + * workaround regressions on other Asics. + */ + if (table_freed && (adev->asic_type != CHIP_ALDEBARAN)) + *table_freed = true; + goto out; -map_bo_to_gpuvm_failed: - if (bo_va_entry_aql) - remove_bo_from_vm(adev, bo_va_entry_aql, bo_size); -add_bo_to_vm_failed_aql: - if (bo_va_entry) - remove_bo_from_vm(adev, bo_va_entry, bo_size); -add_bo_to_vm_failed: +out_unreserve: unreserve_bo_and_vms(&ctx, false, false); out: mutex_unlock(&mem->process_info->lock); @@ -1485,19 +1737,18 @@ out: } int amdgpu_amdkfd_gpuvm_unmap_memory_from_gpu( - struct kgd_dev *kgd, struct kgd_mem *mem, void *vm) + struct kgd_dev *kgd, struct kgd_mem *mem, void *drm_priv) { - struct amdgpu_device *adev = get_amdgpu_device(kgd); - struct amdkfd_process_info *process_info = - ((struct amdgpu_vm *)vm)->process_info; + struct amdgpu_vm *avm = drm_priv_to_vm(drm_priv); + struct amdkfd_process_info *process_info = avm->process_info; unsigned long bo_size = mem->bo->tbo.base.size; - struct kfd_bo_va_list *entry; + struct kfd_mem_attachment *entry; struct bo_vm_reservation_context ctx; int ret; mutex_lock(&mem->lock); - ret = reserve_bo_and_cond_vms(mem, vm, BO_VM_MAPPED, &ctx); + ret = reserve_bo_and_cond_vms(mem, avm, BO_VM_MAPPED, &ctx); if (unlikely(ret)) goto out; /* If no VMs were reserved, it means the BO wasn't actually mapped */ @@ -1506,35 +1757,28 @@ int amdgpu_amdkfd_gpuvm_unmap_memory_from_gpu( goto unreserve_out; } - ret = vm_validate_pt_pd_bos((struct amdgpu_vm *)vm); + ret = vm_validate_pt_pd_bos(avm); if (unlikely(ret)) goto unreserve_out; pr_debug("Unmap VA 0x%llx - 0x%llx from vm %p\n", mem->va, mem->va + bo_size * (1 + mem->aql_queue), - vm); - - list_for_each_entry(entry, &mem->bo_va_list, bo_list) { - if (entry->bo_va->base.vm == vm && entry->is_mapped) { - pr_debug("\t unmap VA 0x%llx - 0x%llx from entry %p\n", - entry->va, - entry->va + bo_size, - entry); - - ret = unmap_bo_from_gpuvm(adev, entry, ctx.sync); - if (ret == 0) { - entry->is_mapped = false; - } else { - pr_err("failed to unmap VA 0x%llx\n", - mem->va); - goto unreserve_out; - } + avm); - mem->mapped_to_gpu_memory--; - pr_debug("\t DEC mapping count %d\n", - mem->mapped_to_gpu_memory); - } + list_for_each_entry(entry, &mem->attachments, list) { + if (entry->bo_va->base.vm != avm || !entry->is_mapped) + continue; + + pr_debug("\t unmap VA 0x%llx - 0x%llx from entry %p\n", + entry->va, entry->va + bo_size, entry); + + unmap_bo_from_gpuvm(mem, entry, ctx.sync); + entry->is_mapped = false; + + mem->mapped_to_gpu_memory--; + pr_debug("\t DEC mapping count %d\n", + mem->mapped_to_gpu_memory); } /* If BO is unmapped from all VMs, unfence it. It can be evicted if @@ -1642,14 +1886,15 @@ int amdgpu_amdkfd_gpuvm_get_vm_fault_info(struct kgd_dev *kgd, int amdgpu_amdkfd_gpuvm_import_dmabuf(struct kgd_dev *kgd, struct dma_buf *dma_buf, - uint64_t va, void *vm, + uint64_t va, void *drm_priv, struct kgd_mem **mem, uint64_t *size, uint64_t *mmap_offset) { struct amdgpu_device *adev = (struct amdgpu_device *)kgd; + struct amdgpu_vm *avm = drm_priv_to_vm(drm_priv); struct drm_gem_object *obj; struct amdgpu_bo *bo; - struct amdgpu_vm *avm = (struct amdgpu_vm *)vm; + int ret; if (dma_buf->ops != &amdgpu_dmabuf_ops) /* Can't handle non-graphics buffers */ @@ -1670,13 +1915,19 @@ int amdgpu_amdkfd_gpuvm_import_dmabuf(struct kgd_dev *kgd, if (!*mem) return -ENOMEM; + ret = drm_vma_node_allow(&obj->vma_node, drm_priv); + if (ret) { + kfree(mem); + return ret; + } + if (size) *size = amdgpu_bo_size(bo); if (mmap_offset) *mmap_offset = amdgpu_bo_mmap_offset(bo); - INIT_LIST_HEAD(&(*mem)->bo_va_list); + INIT_LIST_HEAD(&(*mem)->attachments); mutex_init(&(*mem)->lock); (*mem)->alloc_flags = @@ -1873,7 +2124,7 @@ static int validate_invalid_user_pages(struct amdkfd_process_info *process_info) list_for_each_entry_safe(mem, tmp_mem, &process_info->userptr_inval_list, validate_list.head) { - struct kfd_bo_va_list *bo_va_entry; + struct kfd_mem_attachment *attachment; bo = mem->bo; @@ -1896,13 +2147,12 @@ static int validate_invalid_user_pages(struct amdkfd_process_info *process_info) * VM faults if the GPU tries to access the invalid * memory. */ - list_for_each_entry(bo_va_entry, &mem->bo_va_list, bo_list) { - if (!bo_va_entry->is_mapped) + list_for_each_entry(attachment, &mem->attachments, list) { + if (!attachment->is_mapped) continue; - ret = update_gpuvm_pte((struct amdgpu_device *) - bo_va_entry->kgd_dev, - bo_va_entry, &sync); + kfd_mem_dmaunmap_attachment(mem, attachment); + ret = update_gpuvm_pte(mem, attachment, &sync, NULL); if (ret) { pr_err("%s: update PTE failed\n", __func__); /* make sure this gets validated again */ @@ -2083,7 +2333,7 @@ int amdgpu_amdkfd_gpuvm_restore_process_bos(void *info, struct dma_fence **ef) struct amdgpu_bo *bo = mem->bo; uint32_t domain = mem->domain; - struct kfd_bo_va_list *bo_va_entry; + struct kfd_mem_attachment *attachment; total_size += amdgpu_bo_size(bo); @@ -2103,12 +2353,12 @@ int amdgpu_amdkfd_gpuvm_restore_process_bos(void *info, struct dma_fence **ef) pr_debug("Memory eviction: Sync BO fence failed. Try again\n"); goto validate_map_fail; } - list_for_each_entry(bo_va_entry, &mem->bo_va_list, - bo_list) { - ret = update_gpuvm_pte((struct amdgpu_device *) - bo_va_entry->kgd_dev, - bo_va_entry, - &sync_obj); + list_for_each_entry(attachment, &mem->attachments, list) { + if (!attachment->is_mapped) + continue; + + kfd_mem_dmaunmap_attachment(mem, attachment); + ret = update_gpuvm_pte(mem, attachment, &sync_obj, NULL); if (ret) { pr_debug("Memory eviction: update PTE failed. Try again\n"); goto validate_map_fail; @@ -2135,7 +2385,8 @@ int amdgpu_amdkfd_gpuvm_restore_process_bos(void *info, struct dma_fence **ef) */ new_fence = amdgpu_amdkfd_fence_create( process_info->eviction_fence->base.context, - process_info->eviction_fence->mm); + process_info->eviction_fence->mm, + NULL); if (!new_fence) { pr_err("Failed to create eviction fence\n"); ret = -ENOMEM; @@ -2154,7 +2405,7 @@ int amdgpu_amdkfd_gpuvm_restore_process_bos(void *info, struct dma_fence **ef) /* Attach eviction fence to PD / PT BOs */ list_for_each_entry(peer_vm, &process_info->vm_list_head, vm_list_node) { - struct amdgpu_bo *bo = peer_vm->root.base.bo; + struct amdgpu_bo *bo = peer_vm->root.bo; amdgpu_bo_fence(bo, &process_info->eviction_fence->base, true); } @@ -2182,7 +2433,7 @@ int amdgpu_amdkfd_add_gws_to_process(void *info, void *gws, struct kgd_mem **mem return -ENOMEM; mutex_init(&(*mem)->lock); - INIT_LIST_HEAD(&(*mem)->bo_va_list); + INIT_LIST_HEAD(&(*mem)->attachments); (*mem)->bo = amdgpu_bo_ref(gws_bo); (*mem)->domain = AMDGPU_GEM_DOMAIN_GWS; (*mem)->process_info = process_info; diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_atombios.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_atombios.c index 494b2e1717d5..96b7bb13a2dd 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_atombios.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_atombios.c @@ -1768,6 +1768,15 @@ static ssize_t amdgpu_atombios_get_vbios_version(struct device *dev, static DEVICE_ATTR(vbios_version, 0444, amdgpu_atombios_get_vbios_version, NULL); +static struct attribute *amdgpu_vbios_version_attrs[] = { + &dev_attr_vbios_version.attr, + NULL +}; + +const struct attribute_group amdgpu_vbios_version_attr_group = { + .attrs = amdgpu_vbios_version_attrs +}; + /** * amdgpu_atombios_fini - free the driver info and callbacks for atombios * @@ -1787,7 +1796,6 @@ void amdgpu_atombios_fini(struct amdgpu_device *adev) adev->mode_info.atom_context = NULL; kfree(adev->mode_info.atom_card_info); adev->mode_info.atom_card_info = NULL; - device_remove_file(adev->dev, &dev_attr_vbios_version); } /** @@ -1804,7 +1812,6 @@ int amdgpu_atombios_init(struct amdgpu_device *adev) { struct card_info *atom_card_info = kzalloc(sizeof(struct card_info), GFP_KERNEL); - int ret; if (!atom_card_info) return -ENOMEM; @@ -1828,17 +1835,14 @@ int amdgpu_atombios_init(struct amdgpu_device *adev) if (adev->is_atom_fw) { amdgpu_atomfirmware_scratch_regs_init(adev); amdgpu_atomfirmware_allocate_fb_scratch(adev); + /* cached firmware_flags for further usage */ + adev->mode_info.firmware_flags = + amdgpu_atomfirmware_query_firmware_capability(adev); } else { amdgpu_atombios_scratch_regs_init(adev); amdgpu_atombios_allocate_fb_scratch(adev); } - ret = device_create_file(adev->dev, &dev_attr_vbios_version); - if (ret) { - DRM_ERROR("Failed to create device file for VBIOS version\n"); - return ret; - } - return 0; } diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_atomfirmware.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_atomfirmware.c index 60716b35444b..97178b307ed6 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_atomfirmware.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_atomfirmware.c @@ -29,23 +29,59 @@ #include "atombios.h" #include "soc15_hw_ip.h" -bool amdgpu_atomfirmware_gpu_supports_virtualization(struct amdgpu_device *adev) +union firmware_info { + struct atom_firmware_info_v3_1 v31; + struct atom_firmware_info_v3_2 v32; + struct atom_firmware_info_v3_3 v33; + struct atom_firmware_info_v3_4 v34; +}; + +/* + * Helper function to query firmware capability + * + * @adev: amdgpu_device pointer + * + * Return firmware_capability in firmwareinfo table on success or 0 if not + */ +uint32_t amdgpu_atomfirmware_query_firmware_capability(struct amdgpu_device *adev) { - int index = get_index_into_master_table(atom_master_list_of_data_tables_v2_1, - firmwareinfo); - uint16_t data_offset; + struct amdgpu_mode_info *mode_info = &adev->mode_info; + int index; + u16 data_offset, size; + union firmware_info *firmware_info; + u8 frev, crev; + u32 fw_cap = 0; - if (amdgpu_atom_parse_data_header(adev->mode_info.atom_context, index, NULL, - NULL, NULL, &data_offset)) { - struct atom_firmware_info_v3_1 *firmware_info = - (struct atom_firmware_info_v3_1 *)(adev->mode_info.atom_context->bios + - data_offset); + index = get_index_into_master_table(atom_master_list_of_data_tables_v2_1, + firmwareinfo); - if (le32_to_cpu(firmware_info->firmware_capability) & - ATOM_FIRMWARE_CAP_GPU_VIRTUALIZATION) - return true; + if (amdgpu_atom_parse_data_header(adev->mode_info.atom_context, + index, &size, &frev, &crev, &data_offset)) { + /* support firmware_info 3.1 + */ + if ((frev == 3 && crev >=1) || (frev > 3)) { + firmware_info = (union firmware_info *) + (mode_info->atom_context->bios + data_offset); + fw_cap = le32_to_cpu(firmware_info->v31.firmware_capability); + } } - return false; + + return fw_cap; +} + +/* + * Helper function to query gpu virtualizaiton capability + * + * @adev: amdgpu_device pointer + * + * Return true if gpu virtualization is supported or false if not + */ +bool amdgpu_atomfirmware_gpu_virtualization_supported(struct amdgpu_device *adev) +{ + u32 fw_cap; + + fw_cap = adev->mode_info.firmware_flags; + + return (fw_cap & ATOM_FIRMWARE_CAP_GPU_VIRTUALIZATION) ? true : false; } void amdgpu_atomfirmware_scratch_regs_init(struct amdgpu_device *adev) @@ -400,43 +436,94 @@ bool amdgpu_atomfirmware_mem_ecc_supported(struct amdgpu_device *adev) return ecc_default_enabled; } -union firmware_info { - struct atom_firmware_info_v3_1 v31; - struct atom_firmware_info_v3_2 v32; - struct atom_firmware_info_v3_3 v33; - struct atom_firmware_info_v3_4 v34; -}; - /* + * Helper function to query sram ecc capablity + * + * @adev: amdgpu_device pointer + * * Return true if vbios supports sram ecc or false if not */ bool amdgpu_atomfirmware_sram_ecc_supported(struct amdgpu_device *adev) { + u32 fw_cap; + + fw_cap = adev->mode_info.firmware_flags; + + return (fw_cap & ATOM_FIRMWARE_CAP_SRAM_ECC) ? true : false; +} + +/* + * Helper function to query dynamic boot config capability + * + * @adev: amdgpu_device pointer + * + * Return true if vbios supports dynamic boot config or false if not + */ +bool amdgpu_atomfirmware_dynamic_boot_config_supported(struct amdgpu_device *adev) +{ + u32 fw_cap; + + fw_cap = adev->mode_info.firmware_flags; + + return (fw_cap & ATOM_FIRMWARE_CAP_DYNAMIC_BOOT_CFG_ENABLE) ? true : false; +} + +/** + * amdgpu_atomfirmware_ras_rom_addr -- Get the RAS EEPROM addr from VBIOS + * adev: amdgpu_device pointer + * i2c_address: pointer to u8; if not NULL, will contain + * the RAS EEPROM address if the function returns true + * + * Return true if VBIOS supports RAS EEPROM address reporting, + * else return false. If true and @i2c_address is not NULL, + * will contain the RAS ROM address. + */ +bool amdgpu_atomfirmware_ras_rom_addr(struct amdgpu_device *adev, + u8 *i2c_address) +{ struct amdgpu_mode_info *mode_info = &adev->mode_info; int index; u16 data_offset, size; union firmware_info *firmware_info; u8 frev, crev; - bool sram_ecc_supported = false; index = get_index_into_master_table(atom_master_list_of_data_tables_v2_1, - firmwareinfo); + firmwareinfo); if (amdgpu_atom_parse_data_header(adev->mode_info.atom_context, - index, &size, &frev, &crev, &data_offset)) { - /* support firmware_info 3.1 + */ - if ((frev == 3 && crev >=1) || (frev > 3)) { + index, &size, &frev, &crev, + &data_offset)) { + /* support firmware_info 3.4 + */ + if ((frev == 3 && crev >=4) || (frev > 3)) { firmware_info = (union firmware_info *) (mode_info->atom_context->bios + data_offset); - sram_ecc_supported = - (le32_to_cpu(firmware_info->v31.firmware_capability) & - ATOM_FIRMWARE_CAP_SRAM_ECC) ? true : false; + /* The ras_rom_i2c_slave_addr should ideally + * be a 19-bit EEPROM address, which would be + * used as is by the driver; see top of + * amdgpu_eeprom.c. + * + * When this is the case, 0 is of course a + * valid RAS EEPROM address, in which case, + * we'll drop the first "if (firm...)" and only + * leave the check for the pointer. + * + * The reason this works right now is because + * ras_rom_i2c_slave_addr contains the EEPROM + * device type qualifier 1010b in the top 4 + * bits. + */ + if (firmware_info->v34.ras_rom_i2c_slave_addr) { + if (i2c_address) + *i2c_address = firmware_info->v34.ras_rom_i2c_slave_addr; + return true; + } } } - return sram_ecc_supported; + return false; } + union smu_info { struct atom_smu_info_v3_1 v31; }; @@ -466,10 +553,6 @@ int amdgpu_atomfirmware_get_clock_info(struct amdgpu_device *adev) adev->pm.current_sclk = adev->clock.default_sclk; adev->pm.current_mclk = adev->clock.default_mclk; - /* not technically a clock, but... */ - adev->mode_info.firmware_flags = - le32_to_cpu(firmware_info->v31.firmware_capability); - ret = 0; } @@ -519,6 +602,21 @@ int amdgpu_atomfirmware_get_clock_info(struct amdgpu_device *adev) ret = 0; } + /* if asic is Navi+, the rlc reference clock is used for system clock + * from vbios gfx_info table */ + if (adev->asic_type >= CHIP_NAVI10) { + index = get_index_into_master_table(atom_master_list_of_data_tables_v2_1, + gfx_info); + if (amdgpu_atom_parse_data_header(mode_info->atom_context, index, NULL, + &frev, &crev, &data_offset)) { + struct atom_gfx_info_v2_2 *gfx_info = (struct atom_gfx_info_v2_2*) + (mode_info->atom_context->bios + data_offset); + if ((frev == 2) && (crev >= 2)) + spll->reference_freq = le32_to_cpu(gfx_info->rlc_gpu_timer_refclk); + ret = 0; + } + } + return ret; } @@ -584,67 +682,19 @@ int amdgpu_atomfirmware_get_gfx_info(struct amdgpu_device *adev) } /* - * Check if VBIOS supports GDDR6 training data save/restore + * Helper function to query two stage mem training capability + * + * @adev: amdgpu_device pointer + * + * Return true if two stage mem training is supported or false if not */ -static bool gddr6_mem_train_vbios_support(struct amdgpu_device *adev) -{ - uint16_t data_offset; - int index; - - index = get_index_into_master_table(atom_master_list_of_data_tables_v2_1, - firmwareinfo); - if (amdgpu_atom_parse_data_header(adev->mode_info.atom_context, index, NULL, - NULL, NULL, &data_offset)) { - struct atom_firmware_info_v3_1 *firmware_info = - (struct atom_firmware_info_v3_1 *)(adev->mode_info.atom_context->bios + - data_offset); - - DRM_DEBUG("atom firmware capability:0x%08x.\n", - le32_to_cpu(firmware_info->firmware_capability)); - - if (le32_to_cpu(firmware_info->firmware_capability) & - ATOM_FIRMWARE_CAP_ENABLE_2STAGE_BIST_TRAINING) - return true; - } - - return false; -} - -int amdgpu_mem_train_support(struct amdgpu_device *adev) +bool amdgpu_atomfirmware_mem_training_supported(struct amdgpu_device *adev) { - int ret; - uint32_t major, minor, revision, hw_v; - - if (gddr6_mem_train_vbios_support(adev)) { - amdgpu_discovery_get_ip_version(adev, MP0_HWID, &major, &minor, &revision); - hw_v = HW_REV(major, minor, revision); - /* - * treat 0 revision as a special case since register for MP0 and MMHUB is missing - * for some Navi10 A0, preventing driver from discovering the hwip information since - * none of the functions will be initialized, it should not cause any problems - */ - switch (hw_v) { - case HW_REV(11, 0, 0): - case HW_REV(11, 0, 5): - case HW_REV(11, 0, 7): - case HW_REV(11, 0, 11): - case HW_REV(11, 0, 12): - ret = 1; - break; - default: - DRM_ERROR("memory training vbios supports but psp hw(%08x)" - " doesn't support!\n", hw_v); - ret = -1; - break; - } - } else { - ret = 0; - hw_v = -1; - } + u32 fw_cap; + fw_cap = adev->mode_info.firmware_flags; - DRM_DEBUG("mp0 hw_v %08x, ret:%d.\n", hw_v, ret); - return ret; + return (fw_cap & ATOM_FIRMWARE_CAP_ENABLE_2STAGE_BIST_TRAINING) ? true : false; } int amdgpu_atomfirmware_get_fw_reserved_fb_size(struct amdgpu_device *adev) diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_atomfirmware.h b/drivers/gpu/drm/amd/amdgpu/amdgpu_atomfirmware.h index 9f0d4356e8df..751248b253de 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_atomfirmware.h +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_atomfirmware.h @@ -26,7 +26,8 @@ #define get_index_into_master_table(master_table, table_name) (offsetof(struct master_table, table_name) / sizeof(uint16_t)) -bool amdgpu_atomfirmware_gpu_supports_virtualization(struct amdgpu_device *adev); +uint32_t amdgpu_atomfirmware_query_firmware_capability(struct amdgpu_device *adev); +bool amdgpu_atomfirmware_gpu_virtualization_supported(struct amdgpu_device *adev); void amdgpu_atomfirmware_scratch_regs_init(struct amdgpu_device *adev); int amdgpu_atomfirmware_allocate_fb_scratch(struct amdgpu_device *adev); int amdgpu_atomfirmware_get_vram_info(struct amdgpu_device *adev, @@ -35,7 +36,9 @@ int amdgpu_atomfirmware_get_clock_info(struct amdgpu_device *adev); int amdgpu_atomfirmware_get_gfx_info(struct amdgpu_device *adev); bool amdgpu_atomfirmware_mem_ecc_supported(struct amdgpu_device *adev); bool amdgpu_atomfirmware_sram_ecc_supported(struct amdgpu_device *adev); +bool amdgpu_atomfirmware_ras_rom_addr(struct amdgpu_device *adev, uint8_t* i2c_address); +bool amdgpu_atomfirmware_mem_training_supported(struct amdgpu_device *adev); +bool amdgpu_atomfirmware_dynamic_boot_config_supported(struct amdgpu_device *adev); int amdgpu_atomfirmware_get_fw_reserved_fb_size(struct amdgpu_device *adev); -int amdgpu_mem_train_support(struct amdgpu_device *adev); #endif diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_bo_list.h b/drivers/gpu/drm/amd/amdgpu/amdgpu_bo_list.h index a130e766cbdb..c905a4cfc173 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_bo_list.h +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_bo_list.h @@ -34,6 +34,7 @@ struct amdgpu_fpriv; struct amdgpu_bo_list_entry { struct ttm_validate_buffer tv; struct amdgpu_bo_va *bo_va; + struct dma_fence_chain *chain; uint32_t priority; struct page **user_pages; bool user_invalidated; diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_cs.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_cs.c index b5c766998045..913f9eaa9cd6 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_cs.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_cs.c @@ -396,10 +396,10 @@ void amdgpu_cs_report_moved_bytes(struct amdgpu_device *adev, u64 num_bytes, spin_unlock(&adev->mm_stats.lock); } -static int amdgpu_cs_bo_validate(struct amdgpu_cs_parser *p, - struct amdgpu_bo *bo) +static int amdgpu_cs_bo_validate(void *param, struct amdgpu_bo *bo) { struct amdgpu_device *adev = amdgpu_ttm_adev(bo->tbo.bdev); + struct amdgpu_cs_parser *p = param; struct ttm_operation_ctx ctx = { .interruptible = true, .no_wait_gpu = false, @@ -451,21 +451,6 @@ retry: return r; } -static int amdgpu_cs_validate(void *param, struct amdgpu_bo *bo) -{ - struct amdgpu_cs_parser *p = param; - int r; - - r = amdgpu_cs_bo_validate(p, bo); - if (r) - return r; - - if (bo->shadow) - r = amdgpu_cs_bo_validate(p, bo->shadow); - - return r; -} - static int amdgpu_cs_list_validate(struct amdgpu_cs_parser *p, struct list_head *validated) { @@ -493,7 +478,7 @@ static int amdgpu_cs_list_validate(struct amdgpu_cs_parser *p, lobj->user_pages); } - r = amdgpu_cs_validate(p, bo); + r = amdgpu_cs_bo_validate(p, bo); if (r) return r; @@ -587,13 +572,27 @@ static int amdgpu_cs_parser_bos(struct amdgpu_cs_parser *p, goto out; } + amdgpu_bo_list_for_each_entry(e, p->bo_list) { + struct amdgpu_bo *bo = ttm_to_amdgpu_bo(e->tv.bo); + + e->bo_va = amdgpu_vm_bo_find(vm, bo); + + if (bo->tbo.base.dma_buf && !amdgpu_bo_explicit_sync(bo)) { + e->chain = dma_fence_chain_alloc(); + if (!e->chain) { + r = -ENOMEM; + goto error_validate; + } + } + } + amdgpu_cs_get_threshold_for_moves(p->adev, &p->bytes_moved_threshold, &p->bytes_moved_vis_threshold); p->bytes_moved = 0; p->bytes_moved_vis = 0; r = amdgpu_vm_validate_pt_bos(p->adev, &fpriv->vm, - amdgpu_cs_validate, p); + amdgpu_cs_bo_validate, p); if (r) { DRM_ERROR("amdgpu_vm_validate_pt_bos() failed.\n"); goto error_validate; @@ -614,15 +613,6 @@ static int amdgpu_cs_parser_bos(struct amdgpu_cs_parser *p, gws = p->bo_list->gws_obj; oa = p->bo_list->oa_obj; - amdgpu_bo_list_for_each_entry(e, p->bo_list) { - struct amdgpu_bo *bo = ttm_to_amdgpu_bo(e->tv.bo); - - /* Make sure we use the exclusive slot for shared BOs */ - if (bo->prime_shared_count) - e->tv.num_shared = 0; - e->bo_va = amdgpu_vm_bo_find(vm, bo); - } - if (gds) { p->job->gds_base = amdgpu_bo_gpu_offset(gds) >> PAGE_SHIFT; p->job->gds_size = amdgpu_bo_size(gds) >> PAGE_SHIFT; @@ -644,8 +634,13 @@ static int amdgpu_cs_parser_bos(struct amdgpu_cs_parser *p, } error_validate: - if (r) + if (r) { + amdgpu_bo_list_for_each_entry(e, p->bo_list) { + dma_fence_chain_free(e->chain); + e->chain = NULL; + } ttm_eu_backoff_reservation(&p->ticket, &p->validated); + } out: return r; } @@ -672,12 +667,12 @@ static int amdgpu_cs_sync_rings(struct amdgpu_cs_parser *p) } /** - * cs_parser_fini() - clean parser states + * amdgpu_cs_parser_fini() - clean parser states * @parser: parser structure holding parsing context. * @error: error number * @backoff: indicator to backoff the reservation * - * If error is set than unvalidate buffer, otherwise just free memory + * If error is set then unvalidate buffer, otherwise just free memory * used by parsing context. **/ static void amdgpu_cs_parser_fini(struct amdgpu_cs_parser *parser, int error, @@ -685,9 +680,17 @@ static void amdgpu_cs_parser_fini(struct amdgpu_cs_parser *parser, int error, { unsigned i; - if (error && backoff) + if (error && backoff) { + struct amdgpu_bo_list_entry *e; + + amdgpu_bo_list_for_each_entry(e, parser->bo_list) { + dma_fence_chain_free(e->chain); + e->chain = NULL; + } + ttm_eu_backoff_reservation(&parser->ticket, &parser->validated); + } for (i = 0; i < parser->num_post_deps; i++) { drm_syncobj_put(parser->post_deps[i].syncobj); @@ -796,7 +799,7 @@ static int amdgpu_cs_vm_handling(struct amdgpu_cs_parser *p) if (r) return r; - r = amdgpu_vm_bo_update(adev, fpriv->prt_va, false); + r = amdgpu_vm_bo_update(adev, fpriv->prt_va, false, NULL); if (r) return r; @@ -807,7 +810,7 @@ static int amdgpu_cs_vm_handling(struct amdgpu_cs_parser *p) if (amdgpu_mcbp || amdgpu_sriov_vf(adev)) { bo_va = fpriv->csa_va; BUG_ON(!bo_va); - r = amdgpu_vm_bo_update(adev, bo_va, false); + r = amdgpu_vm_bo_update(adev, bo_va, false, NULL); if (r) return r; @@ -826,7 +829,7 @@ static int amdgpu_cs_vm_handling(struct amdgpu_cs_parser *p) if (bo_va == NULL) continue; - r = amdgpu_vm_bo_update(adev, bo_va, false); + r = amdgpu_vm_bo_update(adev, bo_va, false, NULL); if (r) return r; @@ -847,7 +850,7 @@ static int amdgpu_cs_vm_handling(struct amdgpu_cs_parser *p) if (r) return r; - p->job->vm_pd_addr = amdgpu_gmc_pd_addr(vm->root.base.bo); + p->job->vm_pd_addr = amdgpu_gmc_pd_addr(vm->root.bo); if (amdgpu_vm_debug) { /* Invalidate all BOs to test for userspace bugs */ @@ -1124,7 +1127,7 @@ static int amdgpu_cs_process_syncobj_timeline_out_dep(struct amdgpu_cs_parser *p dep->chain = NULL; if (syncobj_deps[i].point) { - dep->chain = kmalloc(sizeof(*dep->chain), GFP_KERNEL); + dep->chain = dma_fence_chain_alloc(); if (!dep->chain) return -ENOMEM; } @@ -1132,7 +1135,7 @@ static int amdgpu_cs_process_syncobj_timeline_out_dep(struct amdgpu_cs_parser *p dep->syncobj = drm_syncobj_find(p->filp, syncobj_deps[i].handle); if (!dep->syncobj) { - kfree(dep->chain); + dma_fence_chain_free(dep->chain); return -EINVAL; } dep->point = syncobj_deps[i].point; @@ -1260,6 +1263,28 @@ static int amdgpu_cs_submit(struct amdgpu_cs_parser *p, amdgpu_vm_move_to_lru_tail(p->adev, &fpriv->vm); + amdgpu_bo_list_for_each_entry(e, p->bo_list) { + struct dma_resv *resv = e->tv.bo->base.resv; + struct dma_fence_chain *chain = e->chain; + + if (!chain) + continue; + + /* + * Work around dma_resv shortcommings by wrapping up the + * submission in a dma_fence_chain and add it as exclusive + * fence, but first add the submission as shared fence to make + * sure that shared fences never signal before the exclusive + * one. + */ + dma_fence_chain_init(chain, dma_resv_excl_fence(resv), + dma_fence_get(p->fence), 1); + + dma_resv_add_shared_fence(resv, p->fence); + rcu_assign_pointer(resv->fence_excl, &chain->base); + e->chain = NULL; + } + ttm_eu_fence_buffer_objects(&p->ticket, &p->validated, p->fence); mutex_unlock(&p->adev->notifier_lock); @@ -1488,7 +1513,7 @@ int amdgpu_cs_fence_to_handle_ioctl(struct drm_device *dev, void *data, } /** - * amdgpu_cs_wait_all_fence - wait on all fences to signal + * amdgpu_cs_wait_all_fences - wait on all fences to signal * * @adev: amdgpu device * @filp: file private @@ -1639,7 +1664,7 @@ err_free_fences: } /** - * amdgpu_cs_find_bo_va - find bo_va for VM address + * amdgpu_cs_find_mapping - find bo_va for VM address * * @parser: command submission parser context * @addr: VM address diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_ctx.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_ctx.c index 6819fe5612d9..e7a010b7ca1f 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_ctx.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_ctx.c @@ -331,10 +331,13 @@ static int amdgpu_ctx_query(struct amdgpu_device *adev, return 0; } +#define AMDGPU_RAS_COUNTE_DELAY_MS 3000 + static int amdgpu_ctx_query2(struct amdgpu_device *adev, - struct amdgpu_fpriv *fpriv, uint32_t id, - union drm_amdgpu_ctx_out *out) + struct amdgpu_fpriv *fpriv, uint32_t id, + union drm_amdgpu_ctx_out *out) { + struct amdgpu_ras *con = amdgpu_ras_get_context(adev); struct amdgpu_ctx *ctx; struct amdgpu_ctx_mgr *mgr; @@ -361,6 +364,30 @@ static int amdgpu_ctx_query2(struct amdgpu_device *adev, if (atomic_read(&ctx->guilty)) out->state.flags |= AMDGPU_CTX_QUERY2_FLAGS_GUILTY; + if (adev->ras_enabled && con) { + /* Return the cached values in O(1), + * and schedule delayed work to cache + * new vaues. + */ + int ce_count, ue_count; + + ce_count = atomic_read(&con->ras_ce_count); + ue_count = atomic_read(&con->ras_ue_count); + + if (ce_count != ctx->ras_counter_ce) { + ctx->ras_counter_ce = ce_count; + out->state.flags |= AMDGPU_CTX_QUERY2_FLAGS_RAS_CE; + } + + if (ue_count != ctx->ras_counter_ue) { + ctx->ras_counter_ue = ue_count; + out->state.flags |= AMDGPU_CTX_QUERY2_FLAGS_RAS_UE; + } + + schedule_delayed_work(&con->ras_counte_delay_work, + msecs_to_jiffies(AMDGPU_RAS_COUNTE_DELAY_MS)); + } + mutex_unlock(&mgr->lock); return 0; } @@ -635,3 +662,81 @@ void amdgpu_ctx_mgr_fini(struct amdgpu_ctx_mgr *mgr) idr_destroy(&mgr->ctx_handles); mutex_destroy(&mgr->lock); } + +static void amdgpu_ctx_fence_time(struct amdgpu_ctx *ctx, + struct amdgpu_ctx_entity *centity, ktime_t *total, ktime_t *max) +{ + ktime_t now, t1; + uint32_t i; + + *total = *max = 0; + + now = ktime_get(); + for (i = 0; i < amdgpu_sched_jobs; i++) { + struct dma_fence *fence; + struct drm_sched_fence *s_fence; + + spin_lock(&ctx->ring_lock); + fence = dma_fence_get(centity->fences[i]); + spin_unlock(&ctx->ring_lock); + if (!fence) + continue; + s_fence = to_drm_sched_fence(fence); + if (!dma_fence_is_signaled(&s_fence->scheduled)) { + dma_fence_put(fence); + continue; + } + t1 = s_fence->scheduled.timestamp; + if (!ktime_before(t1, now)) { + dma_fence_put(fence); + continue; + } + if (dma_fence_is_signaled(&s_fence->finished) && + s_fence->finished.timestamp < now) + *total += ktime_sub(s_fence->finished.timestamp, t1); + else + *total += ktime_sub(now, t1); + t1 = ktime_sub(now, t1); + dma_fence_put(fence); + *max = max(t1, *max); + } +} + +ktime_t amdgpu_ctx_mgr_fence_usage(struct amdgpu_ctx_mgr *mgr, uint32_t hwip, + uint32_t idx, uint64_t *elapsed) +{ + struct idr *idp; + struct amdgpu_ctx *ctx; + uint32_t id; + struct amdgpu_ctx_entity *centity; + ktime_t total = 0, max = 0; + + if (idx >= AMDGPU_MAX_ENTITY_NUM) + return 0; + idp = &mgr->ctx_handles; + mutex_lock(&mgr->lock); + idr_for_each_entry(idp, ctx, id) { + ktime_t ttotal, tmax; + + if (!ctx->entities[hwip][idx]) + continue; + + centity = ctx->entities[hwip][idx]; + amdgpu_ctx_fence_time(ctx, centity, &ttotal, &tmax); + + /* Harmonic mean approximation diverges for very small + * values. If ratio < 0.01% ignore + */ + if (AMDGPU_CTX_FENCE_USAGE_MIN_RATIO(tmax, ttotal)) + continue; + + total = ktime_add(total, ttotal); + max = ktime_after(tmax, max) ? tmax : max; + } + + mutex_unlock(&mgr->lock); + if (elapsed) + *elapsed = max; + + return total; +} diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_ctx.h b/drivers/gpu/drm/amd/amdgpu/amdgpu_ctx.h index f54e10314661..14db16bc3322 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_ctx.h +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_ctx.h @@ -30,6 +30,7 @@ struct drm_file; struct amdgpu_fpriv; #define AMDGPU_MAX_ENTITY_NUM 4 +#define AMDGPU_CTX_FENCE_USAGE_MIN_RATIO(max, total) ((max) > 16384ULL*(total)) struct amdgpu_ctx_entity { uint64_t sequence; @@ -87,5 +88,6 @@ void amdgpu_ctx_mgr_init(struct amdgpu_ctx_mgr *mgr); void amdgpu_ctx_mgr_entity_fini(struct amdgpu_ctx_mgr *mgr); long amdgpu_ctx_mgr_entity_flush(struct amdgpu_ctx_mgr *mgr, long timeout); void amdgpu_ctx_mgr_fini(struct amdgpu_ctx_mgr *mgr); - +ktime_t amdgpu_ctx_mgr_fence_usage(struct amdgpu_ctx_mgr *mgr, uint32_t hwip, + uint32_t idx, uint64_t *elapsed); #endif diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_debugfs.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_debugfs.c index bcaf271b39bf..277128846dd1 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_debugfs.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_debugfs.c @@ -990,7 +990,7 @@ err: } /** - * amdgpu_debugfs_regs_gfxoff_write - Enable/disable GFXOFF + * amdgpu_debugfs_gfxoff_write - Enable/disable GFXOFF * * @f: open file handle * @buf: User buffer to write data from @@ -1041,7 +1041,7 @@ static ssize_t amdgpu_debugfs_gfxoff_write(struct file *f, const char __user *bu /** - * amdgpu_debugfs_regs_gfxoff_status - read gfxoff status + * amdgpu_debugfs_gfxoff_read - read gfxoff status * * @f: open file handle * @buf: User buffer to store read data in @@ -1304,11 +1304,11 @@ static int amdgpu_debugfs_vm_info_show(struct seq_file *m, void *unused) seq_printf(m, "pid:%d\tProcess:%s ----------\n", vm->task_info.pid, vm->task_info.process_name); - r = amdgpu_bo_reserve(vm->root.base.bo, true); + r = amdgpu_bo_reserve(vm->root.bo, true); if (r) break; amdgpu_debugfs_vm_bo_info(vm, m); - amdgpu_bo_unreserve(vm->root.base.bo); + amdgpu_bo_unreserve(vm->root.bo); } mutex_unlock(&dev->filelist_mutex); @@ -1414,7 +1414,7 @@ no_preempt: continue; } job = to_amdgpu_job(s_job); - if (preempted && job->fence == fence) + if (preempted && (&job->hw_fence) == fence) /* mark the job as preempted */ job->preemption_status |= AMDGPU_IB_PREEMPTED; } diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_device.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_device.c index 57ec108b5972..41c6b3aacd37 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_device.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_device.c @@ -71,6 +71,8 @@ #include <drm/task_barrier.h> #include <linux/pm_runtime.h> +#include <drm/drm_drv.h> + MODULE_FIRMWARE("amdgpu/vega10_gpu_info.bin"); MODULE_FIRMWARE("amdgpu/vega12_gpu_info.bin"); MODULE_FIRMWARE("amdgpu/raven_gpu_info.bin"); @@ -82,6 +84,7 @@ MODULE_FIRMWARE("amdgpu/navi10_gpu_info.bin"); MODULE_FIRMWARE("amdgpu/navi14_gpu_info.bin"); MODULE_FIRMWARE("amdgpu/navi12_gpu_info.bin"); MODULE_FIRMWARE("amdgpu/vangogh_gpu_info.bin"); +MODULE_FIRMWARE("amdgpu/yellow_carp_gpu_info.bin"); #define AMDGPU_RESUME_MS 2000 @@ -113,12 +116,15 @@ const char *amdgpu_asic_name[] = { "RENOIR", "ALDEBARAN", "NAVI10", + "CYAN_SKILLFISH", "NAVI14", "NAVI12", "SIENNA_CICHLID", "NAVY_FLOUNDER", "VANGOGH", "DIMGREY_CAVEFISH", + "BEIGE_GOBY", + "YELLOW_CARP", "LAST", }; @@ -262,12 +268,27 @@ bool amdgpu_device_supports_baco(struct drm_device *dev) return amdgpu_asic_supports_baco(adev); } +/** + * amdgpu_device_supports_smart_shift - Is the device dGPU with + * smart shift support + * + * @dev: drm_device pointer + * + * Returns true if the device is a dGPU with Smart Shift support, + * otherwise returns false. + */ +bool amdgpu_device_supports_smart_shift(struct drm_device *dev) +{ + return (amdgpu_device_supports_boco(dev) && + amdgpu_acpi_is_power_shift_control_supported()); +} + /* * VRAM access helper functions */ /** - * amdgpu_device_vram_access - read/write a buffer in vram + * amdgpu_device_mm_access - access vram by MM_INDEX/MM_DATA * * @adev: amdgpu_device pointer * @pos: offset of the buffer in vram @@ -275,54 +296,107 @@ bool amdgpu_device_supports_baco(struct drm_device *dev) * @size: read/write size, sizeof(@buf) must > @size * @write: true - write to vram, otherwise - read from vram */ -void amdgpu_device_vram_access(struct amdgpu_device *adev, loff_t pos, - uint32_t *buf, size_t size, bool write) +void amdgpu_device_mm_access(struct amdgpu_device *adev, loff_t pos, + void *buf, size_t size, bool write) { unsigned long flags; - uint32_t hi = ~0; + uint32_t hi = ~0, tmp = 0; + uint32_t *data = buf; uint64_t last; + int idx; + + if (!drm_dev_enter(&adev->ddev, &idx)) + return; + + BUG_ON(!IS_ALIGNED(pos, 4) || !IS_ALIGNED(size, 4)); + spin_lock_irqsave(&adev->mmio_idx_lock, flags); + for (last = pos + size; pos < last; pos += 4) { + tmp = pos >> 31; + + WREG32_NO_KIQ(mmMM_INDEX, ((uint32_t)pos) | 0x80000000); + if (tmp != hi) { + WREG32_NO_KIQ(mmMM_INDEX_HI, tmp); + hi = tmp; + } + if (write) + WREG32_NO_KIQ(mmMM_DATA, *data++); + else + *data++ = RREG32_NO_KIQ(mmMM_DATA); + } + + spin_unlock_irqrestore(&adev->mmio_idx_lock, flags); + drm_dev_exit(idx); +} +/** + * amdgpu_device_vram_access - access vram by vram aperature + * + * @adev: amdgpu_device pointer + * @pos: offset of the buffer in vram + * @buf: virtual address of the buffer in system memory + * @size: read/write size, sizeof(@buf) must > @size + * @write: true - write to vram, otherwise - read from vram + * + * The return value means how many bytes have been transferred. + */ +size_t amdgpu_device_aper_access(struct amdgpu_device *adev, loff_t pos, + void *buf, size_t size, bool write) +{ #ifdef CONFIG_64BIT + void __iomem *addr; + size_t count = 0; + uint64_t last; + + if (!adev->mman.aper_base_kaddr) + return 0; + last = min(pos + size, adev->gmc.visible_vram_size); if (last > pos) { - void __iomem *addr = adev->mman.aper_base_kaddr + pos; - size_t count = last - pos; + addr = adev->mman.aper_base_kaddr + pos; + count = last - pos; if (write) { memcpy_toio(addr, buf, count); mb(); - amdgpu_asic_flush_hdp(adev, NULL); + amdgpu_device_flush_hdp(adev, NULL); } else { - amdgpu_asic_invalidate_hdp(adev, NULL); + amdgpu_device_invalidate_hdp(adev, NULL); mb(); memcpy_fromio(buf, addr, count); } - if (count == size) - return; - - pos += count; - buf += count / 4; - size -= count; } + + return count; +#else + return 0; #endif +} - spin_lock_irqsave(&adev->mmio_idx_lock, flags); - for (last = pos + size; pos < last; pos += 4) { - uint32_t tmp = pos >> 31; +/** + * amdgpu_device_vram_access - read/write a buffer in vram + * + * @adev: amdgpu_device pointer + * @pos: offset of the buffer in vram + * @buf: virtual address of the buffer in system memory + * @size: read/write size, sizeof(@buf) must > @size + * @write: true - write to vram, otherwise - read from vram + */ +void amdgpu_device_vram_access(struct amdgpu_device *adev, loff_t pos, + void *buf, size_t size, bool write) +{ + size_t count; - WREG32_NO_KIQ(mmMM_INDEX, ((uint32_t)pos) | 0x80000000); - if (tmp != hi) { - WREG32_NO_KIQ(mmMM_INDEX_HI, tmp); - hi = tmp; - } - if (write) - WREG32_NO_KIQ(mmMM_DATA, *buf++); - else - *buf++ = RREG32_NO_KIQ(mmMM_DATA); + /* try to using vram apreature to access vram first */ + count = amdgpu_device_aper_access(adev, pos, buf, size, write); + size -= count; + if (size) { + /* using MM to access rest vram */ + pos += count; + buf += count; + amdgpu_device_mm_access(adev, pos, buf, size, write); } - spin_unlock_irqrestore(&adev->mmio_idx_lock, flags); } /* @@ -332,7 +406,7 @@ void amdgpu_device_vram_access(struct amdgpu_device *adev, loff_t pos, /* Check if hw access should be skipped because of hotplug or device error */ bool amdgpu_device_skip_hw_access(struct amdgpu_device *adev) { - if (adev->in_pci_err_recovery) + if (adev->no_hw_access) return true; #ifdef CONFIG_LOCKDEP @@ -490,7 +564,7 @@ void amdgpu_mm_wreg_mmio_rlc(struct amdgpu_device *adev, adev->gfx.rlc.funcs && adev->gfx.rlc.funcs->is_rlcg_access_range) { if (adev->gfx.rlc.funcs->is_rlcg_access_range(adev, reg)) - return adev->gfx.rlc.funcs->rlcg_wreg(adev, reg, v, 0); + return adev->gfx.rlc.funcs->sriov_wreg(adev, reg, v, 0, 0); } else { writel(v, ((void __iomem *)adev->rmmio) + (reg * 4)); } @@ -1238,15 +1312,16 @@ bool amdgpu_device_need_post(struct amdgpu_device *adev) /** * amdgpu_device_vga_set_decode - enable/disable vga decode * - * @cookie: amdgpu_device pointer + * @pdev: PCI device pointer * @state: enable/disable vga decode * * Enable/disable vga decode (all asics). * Returns VGA resource flags. */ -static unsigned int amdgpu_device_vga_set_decode(void *cookie, bool state) +static unsigned int amdgpu_device_vga_set_decode(struct pci_dev *pdev, + bool state) { - struct amdgpu_device *adev = cookie; + struct amdgpu_device *adev = drm_to_adev(pci_get_drvdata(pdev)); amdgpu_asic_set_vga_state(adev, state); if (state) return VGA_RSRC_LEGACY_IO | VGA_RSRC_LEGACY_MEM | @@ -1341,6 +1416,42 @@ def_value: adev->pm.smu_prv_buffer_size = 0; } +static int amdgpu_device_init_apu_flags(struct amdgpu_device *adev) +{ + if (!(adev->flags & AMD_IS_APU) || + adev->asic_type < CHIP_RAVEN) + return 0; + + switch (adev->asic_type) { + case CHIP_RAVEN: + if (adev->pdev->device == 0x15dd) + adev->apu_flags |= AMD_APU_IS_RAVEN; + if (adev->pdev->device == 0x15d8) + adev->apu_flags |= AMD_APU_IS_PICASSO; + break; + case CHIP_RENOIR: + if ((adev->pdev->device == 0x1636) || + (adev->pdev->device == 0x164c)) + adev->apu_flags |= AMD_APU_IS_RENOIR; + else + adev->apu_flags |= AMD_APU_IS_GREEN_SARDINE; + break; + case CHIP_VANGOGH: + adev->apu_flags |= AMD_APU_IS_VANGOGH; + break; + case CHIP_YELLOW_CARP: + break; + case CHIP_CYAN_SKILLFISH: + if (adev->pdev->device == 0x13FE) + adev->apu_flags |= AMD_APU_IS_CYAN_SKILLFISH2; + break; + default: + return -EINVAL; + } + + return 0; +} + /** * amdgpu_device_check_arguments - validate module params * @@ -1820,6 +1931,7 @@ static int amdgpu_device_parse_gpu_info_fw(struct amdgpu_device *adev) case CHIP_SIENNA_CICHLID: case CHIP_NAVY_FLOUNDER: case CHIP_DIMGREY_CAVEFISH: + case CHIP_BEIGE_GOBY: default: return 0; case CHIP_VEGA10: @@ -1857,6 +1969,9 @@ static int amdgpu_device_parse_gpu_info_fw(struct amdgpu_device *adev) case CHIP_VANGOGH: chip_name = "vangogh"; break; + case CHIP_YELLOW_CARP: + chip_name = "yellow_carp"; + break; } snprintf(fw_name, sizeof(fw_name), "amdgpu/%s_gpu_info.bin", chip_name); @@ -2033,9 +2148,14 @@ static int amdgpu_device_ip_early_init(struct amdgpu_device *adev) case CHIP_SIENNA_CICHLID: case CHIP_NAVY_FLOUNDER: case CHIP_DIMGREY_CAVEFISH: + case CHIP_BEIGE_GOBY: case CHIP_VANGOGH: + case CHIP_YELLOW_CARP: + case CHIP_CYAN_SKILLFISH: if (adev->asic_type == CHIP_VANGOGH) adev->family = AMDGPU_FAMILY_VGH; + else if (adev->asic_type == CHIP_YELLOW_CARP) + adev->family = AMDGPU_FAMILY_YC; else adev->family = AMDGPU_FAMILY_NV; @@ -2571,34 +2691,26 @@ static int amdgpu_device_ip_late_init(struct amdgpu_device *adev) return 0; } -/** - * amdgpu_device_ip_fini - run fini for hardware IPs - * - * @adev: amdgpu_device pointer - * - * Main teardown pass for hardware IPs. The list of all the hardware - * IPs that make up the asic is walked and the hw_fini and sw_fini callbacks - * are run. hw_fini tears down the hardware associated with each IP - * and sw_fini tears down any software state associated with each IP. - * Returns 0 on success, negative error code on failure. - */ -static int amdgpu_device_ip_fini(struct amdgpu_device *adev) +static int amdgpu_device_ip_fini_early(struct amdgpu_device *adev) { int i, r; - if (amdgpu_sriov_vf(adev) && adev->virt.ras_init_done) - amdgpu_virt_release_ras_err_handler_data(adev); + for (i = 0; i < adev->num_ip_blocks; i++) { + if (!adev->ip_blocks[i].version->funcs->early_fini) + continue; - amdgpu_ras_pre_fini(adev); + r = adev->ip_blocks[i].version->funcs->early_fini((void *)adev); + if (r) { + DRM_DEBUG("early_fini of IP block <%s> failed %d\n", + adev->ip_blocks[i].version->funcs->name, r); + } + } - if (adev->gmc.xgmi.num_physical_nodes > 1) - amdgpu_xgmi_remove_device(adev); + amdgpu_amdkfd_suspend(adev, false); amdgpu_device_set_pg_state(adev, AMD_PG_STATE_UNGATE); amdgpu_device_set_cg_state(adev, AMD_CG_STATE_UNGATE); - amdgpu_amdkfd_device_fini(adev); - /* need to disable SMC first */ for (i = 0; i < adev->num_ip_blocks; i++) { if (!adev->ip_blocks[i].status.hw) @@ -2629,6 +2741,33 @@ static int amdgpu_device_ip_fini(struct amdgpu_device *adev) adev->ip_blocks[i].status.hw = false; } + return 0; +} + +/** + * amdgpu_device_ip_fini - run fini for hardware IPs + * + * @adev: amdgpu_device pointer + * + * Main teardown pass for hardware IPs. The list of all the hardware + * IPs that make up the asic is walked and the hw_fini and sw_fini callbacks + * are run. hw_fini tears down the hardware associated with each IP + * and sw_fini tears down any software state associated with each IP. + * Returns 0 on success, negative error code on failure. + */ +static int amdgpu_device_ip_fini(struct amdgpu_device *adev) +{ + int i, r; + + if (amdgpu_sriov_vf(adev) && adev->virt.ras_init_done) + amdgpu_virt_release_ras_err_handler_data(adev); + + amdgpu_ras_pre_fini(adev); + + if (adev->gmc.xgmi.num_physical_nodes > 1) + amdgpu_xgmi_remove_device(adev); + + amdgpu_amdkfd_device_fini_sw(adev); for (i = adev->num_ip_blocks - 1; i >= 0; i--) { if (!adev->ip_blocks[i].status.sw) @@ -2690,12 +2829,11 @@ static void amdgpu_device_delay_enable_gfx_off(struct work_struct *work) struct amdgpu_device *adev = container_of(work, struct amdgpu_device, gfx.gfx_off_delay_work.work); - mutex_lock(&adev->gfx.gfx_off_mutex); - if (!adev->gfx.gfx_off_state && !adev->gfx.gfx_off_req_count) { - if (!amdgpu_dpm_set_powergating_by_smu(adev, AMD_IP_BLOCK_TYPE_GFX, true)) - adev->gfx.gfx_off_state = true; - } - mutex_unlock(&adev->gfx.gfx_off_mutex); + WARN_ON_ONCE(adev->gfx.gfx_off_state); + WARN_ON_ONCE(adev->gfx.gfx_off_req_count); + + if (!amdgpu_dpm_set_powergating_by_smu(adev, AMD_IP_BLOCK_TYPE_GFX, true)) + adev->gfx.gfx_off_state = true; } /** @@ -2856,7 +2994,7 @@ static int amdgpu_device_ip_reinit_early_sriov(struct amdgpu_device *adev) AMD_IP_BLOCK_TYPE_IH, }; - for (i = 0; i < ARRAY_SIZE(ip_order); i++) { + for (i = 0; i < adev->num_ip_blocks; i++) { int j; struct amdgpu_ip_block *block; @@ -3034,7 +3172,7 @@ static void amdgpu_device_detect_sriov_bios(struct amdgpu_device *adev) { if (amdgpu_sriov_vf(adev)) { if (adev->is_atom_fw) { - if (amdgpu_atomfirmware_gpu_supports_virtualization(adev)) + if (amdgpu_atomfirmware_gpu_virtualization_supported(adev)) adev->virt.caps |= AMDGPU_SRIOV_CAPS_SRIOV_VBIOS; } else { if (amdgpu_atombios_has_gpu_virtualization_table(adev)) @@ -3097,7 +3235,9 @@ bool amdgpu_device_asic_has_dc_support(enum amd_asic_type asic_type) case CHIP_SIENNA_CICHLID: case CHIP_NAVY_FLOUNDER: case CHIP_DIMGREY_CAVEFISH: + case CHIP_BEIGE_GOBY: case CHIP_VANGOGH: + case CHIP_YELLOW_CARP: #endif return amdgpu_dc != 0; #endif @@ -3181,8 +3321,8 @@ static int amdgpu_device_get_job_timeout_settings(struct amdgpu_device *adev) int ret = 0; /* - * By default timeout for non compute jobs is 10000. - * And there is no timeout enforced on compute jobs. + * By default timeout for non compute jobs is 10000 + * and 60000 for compute jobs. * In SR-IOV or passthrough mode, timeout for compute * jobs are 60000 by default. */ @@ -3191,10 +3331,8 @@ static int amdgpu_device_get_job_timeout_settings(struct amdgpu_device *adev) if (amdgpu_sriov_vf(adev)) adev->compute_timeout = amdgpu_sriov_is_pp_one_vf(adev) ? msecs_to_jiffies(60000) : msecs_to_jiffies(10000); - else if (amdgpu_passthrough(adev)) - adev->compute_timeout = msecs_to_jiffies(60000); else - adev->compute_timeout = MAX_SCHEDULE_TIMEOUT; + adev->compute_timeout = msecs_to_jiffies(60000); if (strnlen(input, AMDGPU_MAX_TIMEOUT_PARAM_LENGTH)) { while ((timeout_setting = strsep(&input, ",")) && @@ -3251,7 +3389,6 @@ static const struct attribute *amdgpu_dev_attributes[] = { NULL }; - /** * amdgpu_device_init - initialize the driver * @@ -3332,6 +3469,10 @@ int amdgpu_device_init(struct amdgpu_device *adev, mutex_init(&adev->psp.mutex); mutex_init(&adev->notifier_lock); + r = amdgpu_device_init_apu_flags(adev); + if (r) + return r; + r = amdgpu_device_check_arguments(adev); if (r) return r; @@ -3414,13 +3555,13 @@ int amdgpu_device_init(struct amdgpu_device *adev, r = amdgpu_device_get_job_timeout_settings(adev); if (r) { dev_err(adev->dev, "invalid lockup_timeout parameter syntax\n"); - goto failed_unmap; + return r; } /* early init functions */ r = amdgpu_device_ip_early_init(adev); if (r) - goto failed_unmap; + return r; /* doorbell bar mapping and doorbell index init*/ amdgpu_device_doorbell_init(adev); @@ -3505,9 +3646,9 @@ int amdgpu_device_init(struct amdgpu_device *adev, fence_driver_init: /* Fence driver */ - r = amdgpu_fence_driver_init(adev); + r = amdgpu_fence_driver_sw_init(adev); if (r) { - dev_err(adev->dev, "amdgpu_fence_driver_init failed\n"); + dev_err(adev->dev, "amdgpu_fence_driver_sw_init failed\n"); amdgpu_vf_error_put(adev, AMDGIM_ERROR_VF_FENCE_INIT_FAIL, 0, 0); goto failed; } @@ -3534,6 +3675,8 @@ fence_driver_init: goto release_ras_con; } + amdgpu_fence_driver_hw_init(adev); + dev_info(adev->dev, "SE %d, SH per SE %d, CU per SH %d, active_cu_number %d\n", adev->gfx.config.max_shader_engines, @@ -3625,7 +3768,7 @@ fence_driver_init: /* this will fail for cards that aren't VGA class devices, just * ignore it */ if ((adev->pdev->class >> 8) == PCI_CLASS_DISPLAY_VGA) - vga_client_register(adev->pdev, adev, NULL, amdgpu_device_vga_set_decode); + vga_client_register(adev->pdev, amdgpu_device_vga_set_decode); if (amdgpu_device_supports_px(ddev)) { px = true; @@ -3646,11 +3789,28 @@ release_ras_con: failed: amdgpu_vf_error_trans_all(adev); -failed_unmap: + return r; +} + +static void amdgpu_device_unmap_mmio(struct amdgpu_device *adev) +{ + /* Clear all CPU mappings pointing to this device */ + unmap_mapping_range(adev->ddev.anon_inode->i_mapping, 0, 0, 1); + + /* Unmap all mapped bars - Doorbell, registers and VRAM */ + amdgpu_device_doorbell_fini(adev); + iounmap(adev->rmmio); adev->rmmio = NULL; + if (adev->mman.aper_base_kaddr) + iounmap(adev->mman.aper_base_kaddr); + adev->mman.aper_base_kaddr = NULL; - return r; + /* Memory manager related */ + if (!adev->gmc.xgmi.connected_to_cpu) { + arch_phys_wc_del(adev->gmc.vram_mtrr); + arch_io_free_memtype_wc(adev->gmc.aper_base, adev->gmc.aper_size); + } } /** @@ -3661,15 +3821,16 @@ failed_unmap: * Tear down the driver info (all asics). * Called at driver shutdown. */ -void amdgpu_device_fini(struct amdgpu_device *adev) +void amdgpu_device_fini_hw(struct amdgpu_device *adev) { dev_info(adev->dev, "amdgpu: finishing device.\n"); flush_delayed_work(&adev->delayed_init_work); - ttm_bo_lock_delayed_workqueue(&adev->mman.bdev); + if (adev->mman.initialized) { + flush_delayed_work(&adev->mman.bdev.wq); + ttm_bo_lock_delayed_workqueue(&adev->mman.bdev); + } adev->shutdown = true; - kfree(adev->pci_state); - /* make sure IB test finished before entering exclusive mode * to avoid preemption on IB test * */ @@ -3686,11 +3847,29 @@ void amdgpu_device_fini(struct amdgpu_device *adev) else drm_atomic_helper_shutdown(adev_to_drm(adev)); } - amdgpu_fence_driver_fini(adev); + amdgpu_fence_driver_hw_fini(adev); + if (adev->pm_sysfs_en) amdgpu_pm_sysfs_fini(adev); + if (adev->ucode_sysfs_en) + amdgpu_ucode_sysfs_fini(adev); + sysfs_remove_files(&adev->dev->kobj, amdgpu_dev_attributes); + amdgpu_fbdev_fini(adev); + + amdgpu_irq_fini_hw(adev); + + amdgpu_device_ip_fini_early(adev); + + amdgpu_gart_dummy_page_fini(adev); + + amdgpu_device_unmap_mmio(adev); +} + +void amdgpu_device_fini_sw(struct amdgpu_device *adev) +{ amdgpu_device_ip_fini(adev); + amdgpu_fence_driver_sw_fini(adev); release_firmware(adev->firmware.gpu_info_fw); adev->firmware.gpu_info_fw = NULL; adev->accel_working = false; @@ -3711,19 +3890,15 @@ void amdgpu_device_fini(struct amdgpu_device *adev) vga_switcheroo_fini_domain_pm_ops(adev->dev); } if ((adev->pdev->class >> 8) == PCI_CLASS_DISPLAY_VGA) - vga_client_register(adev->pdev, NULL, NULL, NULL); - iounmap(adev->rmmio); - adev->rmmio = NULL; - amdgpu_device_doorbell_fini(adev); + vga_client_unregister(adev->pdev); - if (adev->ucode_sysfs_en) - amdgpu_ucode_sysfs_fini(adev); - - sysfs_remove_files(&adev->dev->kobj, amdgpu_dev_attributes); if (IS_ENABLED(CONFIG_PERF_EVENTS)) amdgpu_pmu_fini(adev); if (adev->mman.discovery_bin) amdgpu_discovery_fini(adev); + + kfree(adev->pci_state); + } @@ -3743,12 +3918,15 @@ void amdgpu_device_fini(struct amdgpu_device *adev) int amdgpu_device_suspend(struct drm_device *dev, bool fbcon) { struct amdgpu_device *adev = drm_to_adev(dev); - int r; if (dev->switch_power_state == DRM_SWITCH_POWER_OFF) return 0; adev->in_suspend = true; + + if (amdgpu_acpi_smart_shift_update(dev, AMDGPU_SS_DEV_D3)) + DRM_WARN("smart shift update failed\n"); + drm_kms_helper_poll_disable(dev); if (fbcon) @@ -3758,7 +3936,7 @@ int amdgpu_device_suspend(struct drm_device *dev, bool fbcon) amdgpu_ras_suspend(adev); - r = amdgpu_device_ip_suspend_phase1(adev); + amdgpu_device_ip_suspend_phase1(adev); if (!adev->in_s0ix) amdgpu_amdkfd_suspend(adev, adev->in_runpm); @@ -3766,9 +3944,9 @@ int amdgpu_device_suspend(struct drm_device *dev, bool fbcon) /* evict vram memory */ amdgpu_bo_evict_vram(adev); - amdgpu_fence_driver_suspend(adev); + amdgpu_fence_driver_hw_fini(adev); - r = amdgpu_device_ip_suspend_phase2(adev); + amdgpu_device_ip_suspend_phase2(adev); /* evict remaining vram memory * This second call to evict vram is to evict the gart page table * using the CPU. @@ -3811,8 +3989,7 @@ int amdgpu_device_resume(struct drm_device *dev, bool fbcon) dev_err(adev->dev, "amdgpu_device_ip_resume failed (%d).\n", r); return r; } - amdgpu_fence_driver_resume(adev); - + amdgpu_fence_driver_hw_init(adev); r = amdgpu_device_ip_late_init(adev); if (r) @@ -3858,6 +4035,9 @@ int amdgpu_device_resume(struct drm_device *dev, bool fbcon) #endif adev->in_suspend = false; + if (amdgpu_acpi_smart_shift_update(dev, AMDGPU_SS_DEV_D0)) + DRM_WARN("smart shift update failed\n"); + return 0; } @@ -4031,6 +4211,7 @@ static int amdgpu_device_recover_vram(struct amdgpu_device *adev) { struct dma_fence *fence = NULL, *next = NULL; struct amdgpu_bo *shadow; + struct amdgpu_bo_vm *vmbo; long r = 1, tmo; if (amdgpu_sriov_runtime(adev)) @@ -4040,12 +4221,12 @@ static int amdgpu_device_recover_vram(struct amdgpu_device *adev) dev_info(adev->dev, "recover vram bo from shadow start\n"); mutex_lock(&adev->shadow_list_lock); - list_for_each_entry(shadow, &adev->shadow_list, shadow_list) { - + list_for_each_entry(vmbo, &adev->shadow_list, shadow_list) { + shadow = &vmbo->bo; /* No need to recover an evicted BO */ - if (shadow->tbo.mem.mem_type != TTM_PL_TT || - shadow->tbo.mem.start == AMDGPU_BO_INVALID_OFFSET || - shadow->parent->tbo.mem.mem_type != TTM_PL_VRAM) + if (shadow->tbo.resource->mem_type != TTM_PL_TT || + shadow->tbo.resource->start == AMDGPU_BO_INVALID_OFFSET || + shadow->parent->tbo.resource->mem_type != TTM_PL_VRAM) continue; r = amdgpu_bo_restore_shadow(shadow, &next); @@ -4210,6 +4391,7 @@ bool amdgpu_device_should_recover_gpu(struct amdgpu_device *adev) case CHIP_SIENNA_CICHLID: case CHIP_NAVY_FLOUNDER: case CHIP_DIMGREY_CAVEFISH: + case CHIP_BEIGE_GOBY: case CHIP_VANGOGH: case CHIP_ALDEBARAN: break; @@ -4268,7 +4450,7 @@ int amdgpu_device_mode1_reset(struct amdgpu_device *adev) int amdgpu_device_pre_asic_reset(struct amdgpu_device *adev, struct amdgpu_reset_context *reset_context) { - int i, r = 0; + int i, j, r = 0; struct amdgpu_job *job = NULL; bool need_full_reset = test_bit(AMDGPU_NEED_FULL_RESET, &reset_context->flags); @@ -4292,11 +4474,22 @@ int amdgpu_device_pre_asic_reset(struct amdgpu_device *adev, if (!ring || !ring->sched.thread) continue; + /*clear job fence from fence drv to avoid force_completion + *leave NULL and vm flush fence in fence drv */ + for (j = 0; j <= ring->fence_drv.num_fences_mask; j++) { + struct dma_fence *old, **ptr; + + ptr = &ring->fence_drv.fences[j]; + old = rcu_dereference_protected(*ptr, 1); + if (old && test_bit(AMDGPU_FENCE_FLAG_EMBED_IN_JOB_BIT, &old->flags)) { + RCU_INIT_POINTER(*ptr, NULL); + } + } /* after all hw jobs are reset, hw fence is meaningless, so force_completion */ amdgpu_fence_driver_force_completion(ring); } - if(job) + if (job && job->vm) drm_sched_increase_karma(&job->base); r = amdgpu_reset_prepare_hwcontext(adev, reset_context); @@ -4634,7 +4827,7 @@ static int amdgpu_device_suspend_display_audio(struct amdgpu_device *adev) return 0; } -void amdgpu_device_recheck_guilty_jobs( +static void amdgpu_device_recheck_guilty_jobs( struct amdgpu_device *adev, struct list_head *device_list_handle, struct amdgpu_reset_context *reset_context) { @@ -4760,7 +4953,7 @@ int amdgpu_device_gpu_recover(struct amdgpu_device *adev, DRM_INFO("Bailing on TDR for s_job:%llx, hive: %llx as another already in progress", job ? job->base.id : -1, hive->hive_id); amdgpu_put_xgmi_hive(hive); - if (job) + if (job && job->vm) drm_sched_increase_karma(&job->base); return 0; } @@ -4784,7 +4977,7 @@ int amdgpu_device_gpu_recover(struct amdgpu_device *adev, job ? job->base.id : -1); /* even we skipped this reset, still need to set the job to guilty */ - if (job) + if (job && job->vm) drm_sched_increase_karma(&job->base); goto skip_recovery; } @@ -4937,6 +5130,8 @@ skip_hw_reset: amdgpu_vf_error_put(tmp_adev, AMDGIM_ERROR_VF_GPU_RESET_FAIL, 0, r); } else { dev_info(tmp_adev->dev, "GPU reset(%d) succeeded!\n", atomic_read(&tmp_adev->gpu_reset_counter)); + if (amdgpu_acpi_smart_shift_update(adev_to_drm(tmp_adev), AMDGPU_SS_DEV_D0)) + DRM_WARN("smart shift update failed\n"); } } @@ -5125,7 +5320,8 @@ int amdgpu_device_baco_enter(struct drm_device *dev) if (!amdgpu_device_supports_baco(adev_to_drm(adev))) return -ENOTSUPP; - if (ras && ras->supported && adev->nbio.funcs->enable_doorbell_interrupt) + if (ras && adev->ras_enabled && + adev->nbio.funcs->enable_doorbell_interrupt) adev->nbio.funcs->enable_doorbell_interrupt(adev, false); return amdgpu_dpm_baco_enter(adev); @@ -5144,9 +5340,14 @@ int amdgpu_device_baco_exit(struct drm_device *dev) if (ret) return ret; - if (ras && ras->supported && adev->nbio.funcs->enable_doorbell_interrupt) + if (ras && adev->ras_enabled && + adev->nbio.funcs->enable_doorbell_interrupt) adev->nbio.funcs->enable_doorbell_interrupt(adev, true); + if (amdgpu_passthrough(adev) && + adev->nbio.funcs->clear_doorbell_interrupt) + adev->nbio.funcs->clear_doorbell_interrupt(adev); + return 0; } @@ -5290,9 +5491,9 @@ pci_ers_result_t amdgpu_pci_slot_reset(struct pci_dev *pdev) set_bit(AMDGPU_NEED_FULL_RESET, &reset_context.flags); set_bit(AMDGPU_SKIP_HW_RESET, &reset_context.flags); - adev->in_pci_err_recovery = true; + adev->no_hw_access = true; r = amdgpu_device_pre_asic_reset(adev, &reset_context); - adev->in_pci_err_recovery = false; + adev->no_hw_access = false; if (r) goto out; @@ -5387,4 +5588,31 @@ bool amdgpu_device_load_pci_state(struct pci_dev *pdev) return true; } +void amdgpu_device_flush_hdp(struct amdgpu_device *adev, + struct amdgpu_ring *ring) +{ +#ifdef CONFIG_X86_64 + if (adev->flags & AMD_IS_APU) + return; +#endif + if (adev->gmc.xgmi.connected_to_cpu) + return; + + if (ring && ring->funcs->emit_hdp_flush) + amdgpu_ring_emit_hdp_flush(ring); + else + amdgpu_asic_flush_hdp(adev, ring); +} +void amdgpu_device_invalidate_hdp(struct amdgpu_device *adev, + struct amdgpu_ring *ring) +{ +#ifdef CONFIG_X86_64 + if (adev->flags & AMD_IS_APU) + return; +#endif + if (adev->gmc.xgmi.connected_to_cpu) + return; + + amdgpu_asic_invalidate_hdp(adev, ring); +} diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_discovery.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_discovery.c index e1b6f5891759..ada7bc19118a 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_discovery.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_discovery.c @@ -299,6 +299,9 @@ int amdgpu_discovery_reg_base_init(struct amdgpu_device *adev) ip->major, ip->minor, ip->revision); + if (le16_to_cpu(ip->hw_id) == VCN_HWID) + adev->vcn.num_vcn_inst++; + for (k = 0; k < num_base_address; k++) { /* * convert the endianness of base addresses in place, @@ -325,7 +328,7 @@ int amdgpu_discovery_reg_base_init(struct amdgpu_device *adev) return 0; } -int amdgpu_discovery_get_ip_version(struct amdgpu_device *adev, int hw_id, +int amdgpu_discovery_get_ip_version(struct amdgpu_device *adev, int hw_id, int number_instance, int *major, int *minor, int *revision) { struct binary_header *bhdr; @@ -357,7 +360,7 @@ int amdgpu_discovery_get_ip_version(struct amdgpu_device *adev, int hw_id, for (j = 0; j < num_ips; j++) { ip = (struct ip *)(adev->mman.discovery_bin + ip_offset); - if (le16_to_cpu(ip->hw_id) == hw_id) { + if ((le16_to_cpu(ip->hw_id) == hw_id) && (ip->number_instance == number_instance)) { if (major) *major = ip->major; if (minor) @@ -373,11 +376,19 @@ int amdgpu_discovery_get_ip_version(struct amdgpu_device *adev, int hw_id, return -EINVAL; } + +int amdgpu_discovery_get_vcn_version(struct amdgpu_device *adev, int vcn_instance, + int *major, int *minor, int *revision) +{ + return amdgpu_discovery_get_ip_version(adev, VCN_HWID, + vcn_instance, major, minor, revision); +} + void amdgpu_discovery_harvest_ip(struct amdgpu_device *adev) { struct binary_header *bhdr; struct harvest_table *harvest_info; - int i; + int i, vcn_harvest_count = 0; bhdr = (struct binary_header *)adev->mman.discovery_bin; harvest_info = (struct harvest_table *)(adev->mman.discovery_bin + @@ -389,8 +400,7 @@ void amdgpu_discovery_harvest_ip(struct amdgpu_device *adev) switch (le32_to_cpu(harvest_info->list[i].hw_id)) { case VCN_HWID: - adev->harvest_ip_mask |= AMD_HARVEST_IP_VCN_MASK; - adev->harvest_ip_mask |= AMD_HARVEST_IP_JPEG_MASK; + vcn_harvest_count++; break; case DMU_HWID: adev->harvest_ip_mask |= AMD_HARVEST_IP_DMU_MASK; @@ -399,6 +409,10 @@ void amdgpu_discovery_harvest_ip(struct amdgpu_device *adev) break; } } + if (vcn_harvest_count == adev->vcn.num_vcn_inst) { + adev->harvest_ip_mask |= AMD_HARVEST_IP_VCN_MASK; + adev->harvest_ip_mask |= AMD_HARVEST_IP_JPEG_MASK; + } } int amdgpu_discovery_get_gfx_info(struct amdgpu_device *adev) diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_discovery.h b/drivers/gpu/drm/amd/amdgpu/amdgpu_discovery.h index 1b1ae21b1037..48e6b88cfdfe 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_discovery.h +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_discovery.h @@ -30,8 +30,11 @@ void amdgpu_discovery_fini(struct amdgpu_device *adev); int amdgpu_discovery_reg_base_init(struct amdgpu_device *adev); void amdgpu_discovery_harvest_ip(struct amdgpu_device *adev); -int amdgpu_discovery_get_ip_version(struct amdgpu_device *adev, int hw_id, +int amdgpu_discovery_get_ip_version(struct amdgpu_device *adev, int hw_id, int number_instance, int *major, int *minor, int *revision); + +int amdgpu_discovery_get_vcn_version(struct amdgpu_device *adev, int vcn_instance, + int *major, int *minor, int *revision); int amdgpu_discovery_get_gfx_info(struct amdgpu_device *adev); #endif /* __AMDGPU_DISCOVERY__ */ diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_display.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_display.c index 8a1fb8b6606e..7a7316731911 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_display.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_display.c @@ -203,9 +203,8 @@ int amdgpu_display_crtc_page_flip_target(struct drm_crtc *crtc, goto unpin; } - r = dma_resv_get_fences_rcu(new_abo->tbo.base.resv, &work->excl, - &work->shared_count, - &work->shared); + r = dma_resv_get_fences(new_abo->tbo.base.resv, &work->excl, + &work->shared_count, &work->shared); if (unlikely(r != 0)) { DRM_ERROR("failed to get fences for buffer\n"); goto unpin; @@ -523,6 +522,7 @@ uint32_t amdgpu_display_supported_domains(struct amdgpu_device *adev, break; case CHIP_RENOIR: case CHIP_VANGOGH: + case CHIP_YELLOW_CARP: domain |= AMDGPU_GEM_DOMAIN_GTT; break; @@ -1047,17 +1047,18 @@ int amdgpu_display_gem_fb_init(struct drm_device *dev, rfb->base.obj[0] = obj; drm_helper_mode_fill_fb_struct(dev, &rfb->base, mode_cmd); - ret = drm_framebuffer_init(dev, &rfb->base, &amdgpu_fb_funcs); + + ret = amdgpu_display_framebuffer_init(dev, rfb, mode_cmd, obj); if (ret) goto err; - ret = amdgpu_display_framebuffer_init(dev, rfb, mode_cmd, obj); + ret = drm_framebuffer_init(dev, &rfb->base, &amdgpu_fb_funcs); if (ret) goto err; return 0; err: - drm_err(dev, "Failed to init gem fb: %d\n", ret); + drm_dbg_kms(dev, "Failed to init gem fb: %d\n", ret); rfb->base.obj[0] = NULL; return ret; } @@ -1071,18 +1072,12 @@ int amdgpu_display_gem_fb_verify_and_init( rfb->base.obj[0] = obj; drm_helper_mode_fill_fb_struct(dev, &rfb->base, mode_cmd); - ret = drm_framebuffer_init(dev, &rfb->base, &amdgpu_fb_funcs); - if (ret) - goto err; /* Verify that the modifier is supported. */ if (!drm_any_plane_has_format(dev, mode_cmd->pixel_format, mode_cmd->modifier[0])) { - struct drm_format_name_buf format_name; drm_dbg_kms(dev, - "unsupported pixel format %s / modifier 0x%llx\n", - drm_get_format_name(mode_cmd->pixel_format, - &format_name), - mode_cmd->modifier[0]); + "unsupported pixel format %p4cc / modifier 0x%llx\n", + &mode_cmd->pixel_format, mode_cmd->modifier[0]); ret = -EINVAL; goto err; @@ -1092,9 +1087,13 @@ int amdgpu_display_gem_fb_verify_and_init( if (ret) goto err; + ret = drm_framebuffer_init(dev, &rfb->base, &amdgpu_fb_funcs); + if (ret) + goto err; + return 0; err: - drm_err(dev, "Failed to verify and init gem fb: %d\n", ret); + drm_dbg_kms(dev, "Failed to verify and init gem fb: %d\n", ret); rfb->base.obj[0] = NULL; return ret; } diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_dma_buf.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_dma_buf.c index baa980a477d9..ae6ab93c868b 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_dma_buf.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_dma_buf.c @@ -43,94 +43,6 @@ #include <linux/pm_runtime.h> /** - * amdgpu_gem_prime_mmap - &drm_driver.gem_prime_mmap implementation - * @obj: GEM BO - * @vma: Virtual memory area - * - * Sets up a userspace mapping of the BO's memory in the given - * virtual memory area. - * - * Returns: - * 0 on success or a negative error code on failure. - */ -int amdgpu_gem_prime_mmap(struct drm_gem_object *obj, - struct vm_area_struct *vma) -{ - struct amdgpu_bo *bo = gem_to_amdgpu_bo(obj); - struct amdgpu_device *adev = amdgpu_ttm_adev(bo->tbo.bdev); - unsigned asize = amdgpu_bo_size(bo); - int ret; - - if (!vma->vm_file) - return -ENODEV; - - if (adev == NULL) - return -ENODEV; - - /* Check for valid size. */ - if (asize < vma->vm_end - vma->vm_start) - return -EINVAL; - - if (amdgpu_ttm_tt_get_usermm(bo->tbo.ttm) || - (bo->flags & AMDGPU_GEM_CREATE_NO_CPU_ACCESS)) { - return -EPERM; - } - vma->vm_pgoff += amdgpu_bo_mmap_offset(bo) >> PAGE_SHIFT; - - /* prime mmap does not need to check access, so allow here */ - ret = drm_vma_node_allow(&obj->vma_node, vma->vm_file->private_data); - if (ret) - return ret; - - ret = ttm_bo_mmap(vma->vm_file, vma, &adev->mman.bdev); - drm_vma_node_revoke(&obj->vma_node, vma->vm_file->private_data); - - return ret; -} - -static int -__dma_resv_make_exclusive(struct dma_resv *obj) -{ - struct dma_fence **fences; - unsigned int count; - int r; - - if (!dma_resv_get_list(obj)) /* no shared fences to convert */ - return 0; - - r = dma_resv_get_fences_rcu(obj, NULL, &count, &fences); - if (r) - return r; - - if (count == 0) { - /* Now that was unexpected. */ - } else if (count == 1) { - dma_resv_add_excl_fence(obj, fences[0]); - dma_fence_put(fences[0]); - kfree(fences); - } else { - struct dma_fence_array *array; - - array = dma_fence_array_create(count, fences, - dma_fence_context_alloc(1), 0, - false); - if (!array) - goto err_fences_put; - - dma_resv_add_excl_fence(obj, &array->base); - dma_fence_put(&array->base); - } - - return 0; - -err_fences_put: - while (count--) - dma_fence_put(fences[count]); - kfree(fences); - return -ENOMEM; -} - -/** * amdgpu_dma_buf_attach - &dma_buf_ops.attach implementation * * @dmabuf: DMA-buf where we attach to @@ -156,24 +68,6 @@ static int amdgpu_dma_buf_attach(struct dma_buf *dmabuf, if (r < 0) goto out; - r = amdgpu_bo_reserve(bo, false); - if (unlikely(r != 0)) - goto out; - - /* - * We only create shared fences for internal use, but importers - * of the dmabuf rely on exclusive fences for implicitly - * tracking write hazards. As any of the current fences may - * correspond to a write, we need to convert all existing - * fences on the reservation object into a single exclusive - * fence. - */ - r = __dma_resv_make_exclusive(bo->tbo.base.resv); - if (r) - goto out; - - bo->prime_shared_count++; - amdgpu_bo_unreserve(bo); return 0; out: @@ -196,9 +90,6 @@ static void amdgpu_dma_buf_detach(struct dma_buf *dmabuf, struct amdgpu_bo *bo = gem_to_amdgpu_bo(obj); struct amdgpu_device *adev = amdgpu_ttm_adev(bo->tbo.bdev); - if (attach->dev->driver != adev->dev->driver && bo->prime_shared_count) - bo->prime_shared_count--; - pm_runtime_mark_last_busy(adev_to_drm(adev)->dev); pm_runtime_put_autosuspend(adev_to_drm(adev)->dev); } @@ -214,9 +105,21 @@ static int amdgpu_dma_buf_pin(struct dma_buf_attachment *attach) { struct drm_gem_object *obj = attach->dmabuf->priv; struct amdgpu_bo *bo = gem_to_amdgpu_bo(obj); + int r; /* pin buffer into GTT */ - return amdgpu_bo_pin(bo, AMDGPU_GEM_DOMAIN_GTT); + r = amdgpu_bo_pin(bo, AMDGPU_GEM_DOMAIN_GTT); + if (r) + return r; + + if (bo->tbo.moving) { + r = dma_fence_wait(bo->tbo.moving, true); + if (r) { + amdgpu_bo_unpin(bo); + return r; + } + } + return 0; } /** @@ -272,12 +175,12 @@ static struct sg_table *amdgpu_dma_buf_map(struct dma_buf_attachment *attach, if (r) return ERR_PTR(r); - } else if (!(amdgpu_mem_type_to_domain(bo->tbo.mem.mem_type) & + } else if (!(amdgpu_mem_type_to_domain(bo->tbo.resource->mem_type) & AMDGPU_GEM_DOMAIN_GTT)) { return ERR_PTR(-EBUSY); } - switch (bo->tbo.mem.mem_type) { + switch (bo->tbo.resource->mem_type) { case TTM_PL_TT: sgt = drm_prime_pages_to_sg(obj->dev, bo->tbo.ttm->pages, @@ -291,8 +194,9 @@ static struct sg_table *amdgpu_dma_buf_map(struct dma_buf_attachment *attach, break; case TTM_PL_VRAM: - r = amdgpu_vram_mgr_alloc_sgt(adev, &bo->tbo.mem, 0, - bo->tbo.base.size, attach->dev, dir, &sgt); + r = amdgpu_vram_mgr_alloc_sgt(adev, bo->tbo.resource, 0, + bo->tbo.base.size, attach->dev, + dir, &sgt); if (r) return ERR_PTR(r); break; @@ -451,8 +355,6 @@ amdgpu_dma_buf_create_obj(struct drm_device *dev, struct dma_buf *dma_buf) bo = gem_to_amdgpu_bo(gobj); bo->allowed_domains = AMDGPU_GEM_DOMAIN_GTT; bo->preferred_domains = AMDGPU_GEM_DOMAIN_GTT; - if (dma_buf->ops != &amdgpu_dmabuf_ops) - bo->prime_shared_count = 1; dma_resv_unlock(resv); return gobj; @@ -482,7 +384,7 @@ amdgpu_dma_buf_move_notify(struct dma_buf_attachment *attach) struct amdgpu_vm_bo_base *bo_base; int r; - if (bo->tbo.mem.mem_type == TTM_PL_SYSTEM) + if (bo->tbo.resource->mem_type == TTM_PL_SYSTEM) return; r = ttm_bo_validate(&bo->tbo, &placement, &ctx); @@ -493,7 +395,7 @@ amdgpu_dma_buf_move_notify(struct dma_buf_attachment *attach) for (bo_base = bo->vm_bo; bo_base; bo_base = bo_base->next) { struct amdgpu_vm *vm = bo_base->vm; - struct dma_resv *resv = vm->root.base.bo->tbo.base.resv; + struct dma_resv *resv = vm->root.bo->tbo.base.resv; if (ticket) { /* When we get an error here it means that somebody diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_dma_buf.h b/drivers/gpu/drm/amd/amdgpu/amdgpu_dma_buf.h index 39b5b9616fd8..3e93b9b407a9 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_dma_buf.h +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_dma_buf.h @@ -31,8 +31,6 @@ struct drm_gem_object *amdgpu_gem_prime_import(struct drm_device *dev, struct dma_buf *dma_buf); bool amdgpu_dmabuf_is_xgmi_accessible(struct amdgpu_device *adev, struct amdgpu_bo *bo); -int amdgpu_gem_prime_mmap(struct drm_gem_object *obj, - struct vm_area_struct *vma); extern const struct dma_buf_ops amdgpu_dmabuf_ops; diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_drv.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_drv.c index f93883db2b46..f18240f87387 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_drv.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_drv.c @@ -23,6 +23,7 @@ */ #include <drm/amdgpu_drm.h> +#include <drm/drm_aperture.h> #include <drm/drm_drv.h> #include <drm/drm_gem.h> #include <drm/drm_vblank.h> @@ -42,7 +43,7 @@ #include "amdgpu_irq.h" #include "amdgpu_dma_buf.h" #include "amdgpu_sched.h" - +#include "amdgpu_fdinfo.h" #include "amdgpu_amdkfd.h" #include "amdgpu_ras.h" @@ -94,9 +95,10 @@ * - 3.39.0 - DMABUF implicit sync does a full pipeline sync * - 3.40.0 - Add AMDGPU_IDS_FLAGS_TMZ * - 3.41.0 - Add video codec query + * - 3.42.0 - Add 16bpc fixed point display support */ #define KMS_DRIVER_MAJOR 3 -#define KMS_DRIVER_MINOR 41 +#define KMS_DRIVER_MINOR 42 #define KMS_DRIVER_PATCHLEVEL 0 int amdgpu_vram_limit; @@ -158,6 +160,7 @@ int amdgpu_smu_pptable_id = -1; * highest. That helps saving some idle power. * DISABLE_FRACTIONAL_PWM (bit 2) disabled by default * PSR (bit 3) disabled by default + * EDP NO POWER SEQUENCING (bit 4) disabled by default */ uint amdgpu_dc_feature_mask = 2; uint amdgpu_dc_debug_mask; @@ -171,6 +174,7 @@ int amdgpu_tmz = -1; /* auto */ uint amdgpu_freesync_vid_mode; int amdgpu_reset_method = -1; /* auto */ int amdgpu_num_kcq = -1; +int amdgpu_smartshift_bias; static void amdgpu_drv_delayed_reset_work_handler(struct work_struct *work); @@ -287,9 +291,9 @@ module_param_named(msi, amdgpu_msi, int, 0444); * for SDMA and Video. * * By default(with no lockup_timeout settings), the timeout for all non-compute(GFX, SDMA and Video) - * jobs is 10000. And there is no timeout enforced on compute jobs. + * jobs is 10000. The timeout for compute is 60000. */ -MODULE_PARM_DESC(lockup_timeout, "GPU lockup timeout in ms (default: for bare metal 10000 for non-compute jobs and infinity timeout for compute jobs; " +MODULE_PARM_DESC(lockup_timeout, "GPU lockup timeout in ms (default: for bare metal 10000 for non-compute jobs and 60000 for compute jobs; " "for passthrough or sriov, 10000 for all jobs." " 0: keep default value. negative: infinity timeout), " "format: for bare metal [Non-Compute] or [GFX,Compute,SDMA,Video]; " @@ -640,7 +644,8 @@ module_param_named(mes, amdgpu_mes, int, 0444); /** * DOC: noretry (int) - * Disable retry faults in the GPU memory controller. + * Disable XNACK retry in the SQ by default on GFXv9 hardware. On ASICs that + * do not support per-process XNACK this also disables retry page faults. * (0 = retry enabled, 1 = retry disabled, -1 auto (default)) */ MODULE_PARM_DESC(noretry, @@ -833,8 +838,23 @@ module_param_named(tmz, amdgpu_tmz, int, 0444); /** * DOC: freesync_video (uint) - * Enabled the optimization to adjust front porch timing to achieve seamless mode change experience - * when setting a freesync supported mode for which full modeset is not needed. + * Enable the optimization to adjust front porch timing to achieve seamless + * mode change experience when setting a freesync supported mode for which full + * modeset is not needed. + * + * The Display Core will add a set of modes derived from the base FreeSync + * video mode into the corresponding connector's mode list based on commonly + * used refresh rates and VRR range of the connected display, when users enable + * this feature. From the userspace perspective, they can see a seamless mode + * change experience when the change between different refresh rates under the + * same resolution. Additionally, userspace applications such as Video playback + * can read this modeset list and change the refresh rate based on the video + * frame rate. Finally, the userspace can also derive an appropriate mode for a + * particular refresh rate based on the FreeSync Mode and add it to the + * connector's mode list. + * + * Note: This is an experimental feature. + * * The default value: 0 (off). */ MODULE_PARM_DESC( @@ -850,11 +870,10 @@ MODULE_PARM_DESC(reset_method, "GPU reset method (-1 = auto (default), 0 = legac module_param_named(reset_method, amdgpu_reset_method, int, 0444); /** - * DOC: bad_page_threshold (int) - * Bad page threshold is to specify the threshold value of faulty pages - * detected by RAS ECC, that may result in GPU entering bad status if total - * faulty pages by ECC exceed threshold value and leave it for user's further - * check. + * DOC: bad_page_threshold (int) Bad page threshold is specifies the + * threshold value of faulty pages detected by RAS ECC, which may + * result in the GPU entering bad status when the number of total + * faulty pages by ECC exceeds the threshold value. */ MODULE_PARM_DESC(bad_page_threshold, "Bad page threshold(-1 = auto(default value), 0 = disable bad page retirement)"); module_param_named(bad_page_threshold, amdgpu_bad_page_threshold, int, 0444); @@ -1148,6 +1167,7 @@ static const struct pci_device_id pciidlist[] = { {0x1002, 0x734F, PCI_ANY_ID, PCI_ANY_ID, 0, 0, CHIP_NAVI14}, /* Renoir */ + {0x1002, 0x15E7, PCI_ANY_ID, PCI_ANY_ID, 0, 0, CHIP_RENOIR|AMD_IS_APU}, {0x1002, 0x1636, PCI_ANY_ID, PCI_ANY_ID, 0, 0, CHIP_RENOIR|AMD_IS_APU}, {0x1002, 0x1638, PCI_ANY_ID, PCI_ANY_ID, 0, 0, CHIP_RENOIR|AMD_IS_APU}, {0x1002, 0x164C, PCI_ANY_ID, PCI_ANY_ID, 0, 0, CHIP_RENOIR|AMD_IS_APU}, @@ -1161,7 +1181,12 @@ static const struct pci_device_id pciidlist[] = { {0x1002, 0x73A1, PCI_ANY_ID, PCI_ANY_ID, 0, 0, CHIP_SIENNA_CICHLID}, {0x1002, 0x73A2, PCI_ANY_ID, PCI_ANY_ID, 0, 0, CHIP_SIENNA_CICHLID}, {0x1002, 0x73A3, PCI_ANY_ID, PCI_ANY_ID, 0, 0, CHIP_SIENNA_CICHLID}, + {0x1002, 0x73A5, PCI_ANY_ID, PCI_ANY_ID, 0, 0, CHIP_SIENNA_CICHLID}, + {0x1002, 0x73A8, PCI_ANY_ID, PCI_ANY_ID, 0, 0, CHIP_SIENNA_CICHLID}, + {0x1002, 0x73A9, PCI_ANY_ID, PCI_ANY_ID, 0, 0, CHIP_SIENNA_CICHLID}, {0x1002, 0x73AB, PCI_ANY_ID, PCI_ANY_ID, 0, 0, CHIP_SIENNA_CICHLID}, + {0x1002, 0x73AC, PCI_ANY_ID, PCI_ANY_ID, 0, 0, CHIP_SIENNA_CICHLID}, + {0x1002, 0x73AD, PCI_ANY_ID, PCI_ANY_ID, 0, 0, CHIP_SIENNA_CICHLID}, {0x1002, 0x73AE, PCI_ANY_ID, PCI_ANY_ID, 0, 0, CHIP_SIENNA_CICHLID}, {0x1002, 0x73AF, PCI_ANY_ID, PCI_ANY_ID, 0, 0, CHIP_SIENNA_CICHLID}, {0x1002, 0x73BF, PCI_ANY_ID, PCI_ANY_ID, 0, 0, CHIP_SIENNA_CICHLID}, @@ -1169,22 +1194,50 @@ static const struct pci_device_id pciidlist[] = { /* Van Gogh */ {0x1002, 0x163F, PCI_ANY_ID, PCI_ANY_ID, 0, 0, CHIP_VANGOGH|AMD_IS_APU}, + /* Yellow Carp */ + {0x1002, 0x164D, PCI_ANY_ID, PCI_ANY_ID, 0, 0, CHIP_YELLOW_CARP|AMD_IS_APU}, + {0x1002, 0x1681, PCI_ANY_ID, PCI_ANY_ID, 0, 0, CHIP_YELLOW_CARP|AMD_IS_APU}, + /* Navy_Flounder */ {0x1002, 0x73C0, PCI_ANY_ID, PCI_ANY_ID, 0, 0, CHIP_NAVY_FLOUNDER}, {0x1002, 0x73C1, PCI_ANY_ID, PCI_ANY_ID, 0, 0, CHIP_NAVY_FLOUNDER}, {0x1002, 0x73C3, PCI_ANY_ID, PCI_ANY_ID, 0, 0, CHIP_NAVY_FLOUNDER}, + {0x1002, 0x73DA, PCI_ANY_ID, PCI_ANY_ID, 0, 0, CHIP_NAVY_FLOUNDER}, + {0x1002, 0x73DB, PCI_ANY_ID, PCI_ANY_ID, 0, 0, CHIP_NAVY_FLOUNDER}, + {0x1002, 0x73DC, PCI_ANY_ID, PCI_ANY_ID, 0, 0, CHIP_NAVY_FLOUNDER}, + {0x1002, 0x73DD, PCI_ANY_ID, PCI_ANY_ID, 0, 0, CHIP_NAVY_FLOUNDER}, + {0x1002, 0x73DE, PCI_ANY_ID, PCI_ANY_ID, 0, 0, CHIP_NAVY_FLOUNDER}, {0x1002, 0x73DF, PCI_ANY_ID, PCI_ANY_ID, 0, 0, CHIP_NAVY_FLOUNDER}, /* DIMGREY_CAVEFISH */ {0x1002, 0x73E0, PCI_ANY_ID, PCI_ANY_ID, 0, 0, CHIP_DIMGREY_CAVEFISH}, {0x1002, 0x73E1, PCI_ANY_ID, PCI_ANY_ID, 0, 0, CHIP_DIMGREY_CAVEFISH}, {0x1002, 0x73E2, PCI_ANY_ID, PCI_ANY_ID, 0, 0, CHIP_DIMGREY_CAVEFISH}, + {0x1002, 0x73E3, PCI_ANY_ID, PCI_ANY_ID, 0, 0, CHIP_DIMGREY_CAVEFISH}, + {0x1002, 0x73E8, PCI_ANY_ID, PCI_ANY_ID, 0, 0, CHIP_DIMGREY_CAVEFISH}, + {0x1002, 0x73E9, PCI_ANY_ID, PCI_ANY_ID, 0, 0, CHIP_DIMGREY_CAVEFISH}, + {0x1002, 0x73EA, PCI_ANY_ID, PCI_ANY_ID, 0, 0, CHIP_DIMGREY_CAVEFISH}, + {0x1002, 0x73EB, PCI_ANY_ID, PCI_ANY_ID, 0, 0, CHIP_DIMGREY_CAVEFISH}, + {0x1002, 0x73EC, PCI_ANY_ID, PCI_ANY_ID, 0, 0, CHIP_DIMGREY_CAVEFISH}, + {0x1002, 0x73ED, PCI_ANY_ID, PCI_ANY_ID, 0, 0, CHIP_DIMGREY_CAVEFISH}, + {0x1002, 0x73EF, PCI_ANY_ID, PCI_ANY_ID, 0, 0, CHIP_DIMGREY_CAVEFISH}, {0x1002, 0x73FF, PCI_ANY_ID, PCI_ANY_ID, 0, 0, CHIP_DIMGREY_CAVEFISH}, /* Aldebaran */ {0x1002, 0x7408, PCI_ANY_ID, PCI_ANY_ID, 0, 0, CHIP_ALDEBARAN|AMD_EXP_HW_SUPPORT}, {0x1002, 0x740C, PCI_ANY_ID, PCI_ANY_ID, 0, 0, CHIP_ALDEBARAN|AMD_EXP_HW_SUPPORT}, {0x1002, 0x740F, PCI_ANY_ID, PCI_ANY_ID, 0, 0, CHIP_ALDEBARAN|AMD_EXP_HW_SUPPORT}, + {0x1002, 0x7410, PCI_ANY_ID, PCI_ANY_ID, 0, 0, CHIP_ALDEBARAN|AMD_EXP_HW_SUPPORT}, + + /* CYAN_SKILLFISH */ + {0x1002, 0x13FE, PCI_ANY_ID, PCI_ANY_ID, 0, 0, CHIP_CYAN_SKILLFISH|AMD_IS_APU}, + + /* BEIGE_GOBY */ + {0x1002, 0x7420, PCI_ANY_ID, PCI_ANY_ID, 0, 0, CHIP_BEIGE_GOBY}, + {0x1002, 0x7421, PCI_ANY_ID, PCI_ANY_ID, 0, 0, CHIP_BEIGE_GOBY}, + {0x1002, 0x7422, PCI_ANY_ID, PCI_ANY_ID, 0, 0, CHIP_BEIGE_GOBY}, + {0x1002, 0x7423, PCI_ANY_ID, PCI_ANY_ID, 0, 0, CHIP_BEIGE_GOBY}, + {0x1002, 0x743F, PCI_ANY_ID, PCI_ANY_ID, 0, 0, CHIP_BEIGE_GOBY}, {0, 0, 0} }; @@ -1202,7 +1255,7 @@ static int amdgpu_pci_probe(struct pci_dev *pdev, int ret, retry = 0; bool supports_atomic = false; - if (!amdgpu_virtual_display && + if (amdgpu_virtual_display || amdgpu_device_asic_has_dc_support(flags & AMD_ASIC_MASK)) supports_atomic = true; @@ -1258,7 +1311,7 @@ static int amdgpu_pci_probe(struct pci_dev *pdev, #endif /* Get rid of things like offb */ - ret = drm_fb_helper_remove_conflicting_pci_framebuffers(pdev, "amdgpudrmfb"); + ret = drm_aperture_remove_conflicting_pci_framebuffers(pdev, &amdgpu_kms_driver); if (ret) return ret; @@ -1310,14 +1363,16 @@ amdgpu_pci_remove(struct pci_dev *pdev) { struct drm_device *dev = pci_get_drvdata(pdev); -#ifdef MODULE - if (THIS_MODULE->state != MODULE_STATE_GOING) -#endif - DRM_ERROR("Hotplug removal is not supported\n"); drm_dev_unplug(dev); amdgpu_driver_unload_kms(dev); + + /* + * Flush any in flight DMA operations from device. + * Clear the Bus Master Enable bit and then wait on the PCIe Device + * StatusTransactions Pending bit. + */ pci_disable_device(pdev); - pci_set_drvdata(pdev, NULL); + pci_wait_for_pending_transaction(pdev); } static void @@ -1438,7 +1493,7 @@ static int amdgpu_pmops_suspend(struct device *dev) struct amdgpu_device *adev = drm_to_adev(drm_dev); int r; - if (amdgpu_acpi_is_s0ix_supported(adev)) + if (amdgpu_acpi_is_s0ix_active(adev)) adev->in_s0ix = true; adev->in_s3 = true; r = amdgpu_device_suspend(drm_dev, true); @@ -1454,7 +1509,7 @@ static int amdgpu_pmops_resume(struct device *dev) int r; r = amdgpu_device_resume(drm_dev, true); - if (amdgpu_acpi_is_s0ix_supported(adev)) + if (amdgpu_acpi_is_s0ix_active(adev)) adev->in_s0ix = false; return r; } @@ -1535,6 +1590,8 @@ static int amdgpu_pmops_runtime_suspend(struct device *dev) pci_ignore_hotplug(pdev); pci_set_power_state(pdev, PCI_D3cold); drm_dev->switch_power_state = DRM_SWITCH_POWER_DYNAMIC_OFF; + } else if (amdgpu_device_supports_boco(drm_dev)) { + /* nothing to do */ } else if (amdgpu_device_supports_baco(drm_dev)) { amdgpu_device_baco_enter(drm_dev); } @@ -1552,6 +1609,10 @@ static int amdgpu_pmops_runtime_resume(struct device *dev) if (!adev->runpm) return -EINVAL; + /* Avoids registers access if device is physically gone */ + if (!pci_device_is_present(adev->pdev)) + adev->no_hw_access = true; + if (amdgpu_device_supports_px(drm_dev)) { drm_dev->switch_power_state = DRM_SWITCH_POWER_CHANGING; @@ -1597,17 +1658,15 @@ static int amdgpu_pmops_runtime_idle(struct device *dev) if (amdgpu_device_has_dc_support(adev)) { struct drm_crtc *crtc; - drm_modeset_lock_all(drm_dev); - drm_for_each_crtc(crtc, drm_dev) { - if (crtc->state->active) { + drm_modeset_lock(&crtc->mutex, NULL); + if (crtc->state->active) ret = -EBUSY; + drm_modeset_unlock(&crtc->mutex); + if (ret < 0) break; - } } - drm_modeset_unlock_all(drm_dev); - } else { struct drm_connector *list_connector; struct drm_connector_list_iter iter; @@ -1688,12 +1747,15 @@ static const struct file_operations amdgpu_driver_kms_fops = { .flush = amdgpu_flush, .release = drm_release, .unlocked_ioctl = amdgpu_drm_ioctl, - .mmap = amdgpu_mmap, + .mmap = drm_gem_mmap, .poll = drm_poll, .read = drm_read, #ifdef CONFIG_COMPAT .compat_ioctl = amdgpu_kms_compat_ioctl, #endif +#ifdef CONFIG_PROC_FS + .show_fdinfo = amdgpu_show_fdinfo +#endif }; int amdgpu_file_to_fpriv(struct file *filp, struct amdgpu_fpriv **fpriv) @@ -1741,17 +1803,17 @@ static const struct drm_driver amdgpu_kms_driver = { .open = amdgpu_driver_open_kms, .postclose = amdgpu_driver_postclose_kms, .lastclose = amdgpu_driver_lastclose_kms, - .irq_handler = amdgpu_irq_handler, .ioctls = amdgpu_ioctls_kms, .num_ioctls = ARRAY_SIZE(amdgpu_ioctls_kms), .dumb_create = amdgpu_mode_dumb_create, .dumb_map_offset = amdgpu_mode_dumb_mmap, .fops = &amdgpu_driver_kms_fops, + .release = &amdgpu_driver_release_kms, .prime_handle_to_fd = drm_gem_prime_handle_to_fd, .prime_fd_to_handle = drm_gem_prime_fd_to_handle, .gem_prime_import = amdgpu_gem_prime_import, - .gem_prime_mmap = amdgpu_gem_prime_mmap, + .gem_prime_mmap = drm_gem_prime_mmap, .name = DRIVER_NAME, .desc = DRIVER_DESC, @@ -1768,6 +1830,18 @@ static struct pci_error_handlers amdgpu_pci_err_handler = { .resume = amdgpu_pci_resume, }; +extern const struct attribute_group amdgpu_vram_mgr_attr_group; +extern const struct attribute_group amdgpu_gtt_mgr_attr_group; +extern const struct attribute_group amdgpu_vbios_version_attr_group; + +static const struct attribute_group *amdgpu_sysfs_groups[] = { + &amdgpu_vram_mgr_attr_group, + &amdgpu_gtt_mgr_attr_group, + &amdgpu_vbios_version_attr_group, + NULL, +}; + + static struct pci_driver amdgpu_kms_pci_driver = { .name = DRIVER_NAME, .id_table = pciidlist, @@ -1776,6 +1850,7 @@ static struct pci_driver amdgpu_kms_pci_driver = { .shutdown = amdgpu_pci_shutdown, .driver.pm = &amdgpu_pm_ops, .err_handler = &amdgpu_pci_err_handler, + .dev_groups = amdgpu_sysfs_groups, }; static int __init amdgpu_init(void) @@ -1797,6 +1872,7 @@ static int __init amdgpu_init(void) DRM_INFO("amdgpu kernel modesetting enabled.\n"); amdgpu_register_atpx_handler(); + amdgpu_acpi_detect(); /* Ignore KFD init failures. Normal when CONFIG_HSA_AMD is not set. */ amdgpu_amdkfd_init(); diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_eeprom.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_eeprom.c new file mode 100644 index 000000000000..4d9eb0137f8c --- /dev/null +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_eeprom.c @@ -0,0 +1,239 @@ +/* + * Copyright 2021 Advanced Micro Devices, Inc. + * + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and associated documentation files (the "Software"), + * to deal in the Software without restriction, including without limitation + * the rights to use, copy, modify, merge, publish, distribute, sublicense, + * and/or sell copies of the Software, and to permit persons to whom the + * Software is furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included in + * all copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL + * THE COPYRIGHT HOLDER(S) OR AUTHOR(S) BE LIABLE FOR ANY CLAIM, DAMAGES OR + * OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, + * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR + * OTHER DEALINGS IN THE SOFTWARE. + * + */ + +#include "amdgpu_eeprom.h" +#include "amdgpu.h" + +/* AT24CM02 and M24M02-R have a 256-byte write page size. + */ +#define EEPROM_PAGE_BITS 8 +#define EEPROM_PAGE_SIZE (1U << EEPROM_PAGE_BITS) +#define EEPROM_PAGE_MASK (EEPROM_PAGE_SIZE - 1) + +#define EEPROM_OFFSET_SIZE 2 + +/* EEPROM memory addresses are 19-bits long, which can + * be partitioned into 3, 8, 8 bits, for a total of 19. + * The upper 3 bits are sent as part of the 7-bit + * "Device Type Identifier"--an I2C concept, which for EEPROM devices + * is hard-coded as 1010b, indicating that it is an EEPROM + * device--this is the wire format, followed by the upper + * 3 bits of the 19-bit address, followed by the direction, + * followed by two bytes holding the rest of the 16-bits of + * the EEPROM memory address. The format on the wire for EEPROM + * devices is: 1010XYZD, A15:A8, A7:A0, + * Where D is the direction and sequenced out by the hardware. + * Bits XYZ are memory address bits 18, 17 and 16. + * These bits are compared to how pins 1-3 of the part are connected, + * depending on the size of the part, more on that later. + * + * Note that of this wire format, a client is in control + * of, and needs to specify only XYZ, A15:A8, A7:0, bits, + * which is exactly the EEPROM memory address, or offset, + * in order to address up to 8 EEPROM devices on the I2C bus. + * + * For instance, a 2-Mbit I2C EEPROM part, addresses all its bytes, + * using an 18-bit address, bit 17 to 0 and thus would use all but one bit of + * the 19 bits previously mentioned. The designer would then not connect + * pins 1 and 2, and pin 3 usually named "A_2" or "E2", would be connected to + * either Vcc or GND. This would allow for up to two 2-Mbit parts on + * the same bus, where one would be addressable with bit 18 as 1, and + * the other with bit 18 of the address as 0. + * + * For a 2-Mbit part, bit 18 is usually known as the "Chip Enable" or + * "Hardware Address Bit". This bit is compared to the load on pin 3 + * of the device, described above, and if there is a match, then this + * device responds to the command. This way, you can connect two + * 2-Mbit EEPROM devices on the same bus, but see one contiguous + * memory from 0 to 7FFFFh, where address 0 to 3FFFF is in the device + * whose pin 3 is connected to GND, and address 40000 to 7FFFFh is in + * the 2nd device, whose pin 3 is connected to Vcc. + * + * This addressing you encode in the 32-bit "eeprom_addr" below, + * namely the 19-bits "XYZ,A15:A0", as a single 19-bit address. For + * instance, eeprom_addr = 0x6DA01, is 110_1101_1010_0000_0001, where + * XYZ=110b, and A15:A0=DA01h. The XYZ bits become part of the device + * address, and the rest of the address bits are sent as the memory + * address bytes. + * + * That is, for an I2C EEPROM driver everything is controlled by + * the "eeprom_addr". + * + * P.S. If you need to write, lock and read the Identification Page, + * (M24M02-DR device only, which we do not use), change the "7" to + * "0xF" in the macro below, and let the client set bit 20 to 1 in + * "eeprom_addr", and set A10 to 0 to write into it, and A10 and A1 to + * 1 to lock it permanently. + */ +#define MAKE_I2C_ADDR(_aa) ((0xA << 3) | (((_aa) >> 16) & 7)) + +static int __amdgpu_eeprom_xfer(struct i2c_adapter *i2c_adap, u32 eeprom_addr, + u8 *eeprom_buf, u16 buf_size, bool read) +{ + u8 eeprom_offset_buf[EEPROM_OFFSET_SIZE]; + struct i2c_msg msgs[] = { + { + .flags = 0, + .len = EEPROM_OFFSET_SIZE, + .buf = eeprom_offset_buf, + }, + { + .flags = read ? I2C_M_RD : 0, + }, + }; + const u8 *p = eeprom_buf; + int r; + u16 len; + + for (r = 0; buf_size > 0; + buf_size -= len, eeprom_addr += len, eeprom_buf += len) { + /* Set the EEPROM address we want to write to/read from. + */ + msgs[0].addr = MAKE_I2C_ADDR(eeprom_addr); + msgs[1].addr = msgs[0].addr; + msgs[0].buf[0] = (eeprom_addr >> 8) & 0xff; + msgs[0].buf[1] = eeprom_addr & 0xff; + + if (!read) { + /* Write the maximum amount of data, without + * crossing the device's page boundary, as per + * its spec. Partial page writes are allowed, + * starting at any location within the page, + * so long as the page boundary isn't crossed + * over (actually the page pointer rolls + * over). + * + * As per the AT24CM02 EEPROM spec, after + * writing into a page, the I2C driver should + * terminate the transfer, i.e. in + * "i2c_transfer()" below, with a STOP + * condition, so that the self-timed write + * cycle begins. This is implied for the + * "i2c_transfer()" abstraction. + */ + len = min(EEPROM_PAGE_SIZE - (eeprom_addr & + EEPROM_PAGE_MASK), + (u32)buf_size); + } else { + /* Reading from the EEPROM has no limitation + * on the number of bytes read from the EEPROM + * device--they are simply sequenced out. + */ + len = buf_size; + } + msgs[1].len = len; + msgs[1].buf = eeprom_buf; + + /* This constitutes a START-STOP transaction. + */ + r = i2c_transfer(i2c_adap, msgs, ARRAY_SIZE(msgs)); + if (r != ARRAY_SIZE(msgs)) + break; + + if (!read) { + /* According to EEPROM specs the length of the + * self-writing cycle, tWR (tW), is 10 ms. + * + * TODO: Use polling on ACK, aka Acknowledge + * Polling, to minimize waiting for the + * internal write cycle to complete, as it is + * usually smaller than tWR (tW). + */ + msleep(10); + } + } + + return r < 0 ? r : eeprom_buf - p; +} + +/** + * amdgpu_eeprom_xfer -- Read/write from/to an I2C EEPROM device + * @i2c_adap: pointer to the I2C adapter to use + * @eeprom_addr: EEPROM address from which to read/write + * @eeprom_buf: pointer to data buffer to read into/write from + * @buf_size: the size of @eeprom_buf + * @read: True if reading from the EEPROM, false if writing + * + * Returns the number of bytes read/written; -errno on error. + */ +static int amdgpu_eeprom_xfer(struct i2c_adapter *i2c_adap, u32 eeprom_addr, + u8 *eeprom_buf, u16 buf_size, bool read) +{ + const struct i2c_adapter_quirks *quirks = i2c_adap->quirks; + u16 limit; + + if (!quirks) + limit = 0; + else if (read) + limit = quirks->max_read_len; + else + limit = quirks->max_write_len; + + if (limit == 0) { + return __amdgpu_eeprom_xfer(i2c_adap, eeprom_addr, + eeprom_buf, buf_size, read); + } else if (limit <= EEPROM_OFFSET_SIZE) { + dev_err_ratelimited(&i2c_adap->dev, + "maddr:0x%04X size:0x%02X:quirk max_%s_len must be > %d", + eeprom_addr, buf_size, + read ? "read" : "write", EEPROM_OFFSET_SIZE); + return -EINVAL; + } else { + u16 ps; /* Partial size */ + int res = 0, r; + + /* The "limit" includes all data bytes sent/received, + * which would include the EEPROM_OFFSET_SIZE bytes. + * Account for them here. + */ + limit -= EEPROM_OFFSET_SIZE; + for ( ; buf_size > 0; + buf_size -= ps, eeprom_addr += ps, eeprom_buf += ps) { + ps = min(limit, buf_size); + + r = __amdgpu_eeprom_xfer(i2c_adap, eeprom_addr, + eeprom_buf, ps, read); + if (r < 0) + return r; + res += r; + } + + return res; + } +} + +int amdgpu_eeprom_read(struct i2c_adapter *i2c_adap, + u32 eeprom_addr, u8 *eeprom_buf, + u16 bytes) +{ + return amdgpu_eeprom_xfer(i2c_adap, eeprom_addr, eeprom_buf, bytes, + true); +} + +int amdgpu_eeprom_write(struct i2c_adapter *i2c_adap, + u32 eeprom_addr, u8 *eeprom_buf, + u16 bytes) +{ + return amdgpu_eeprom_xfer(i2c_adap, eeprom_addr, eeprom_buf, bytes, + false); +} diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_eeprom.h b/drivers/gpu/drm/amd/amdgpu/amdgpu_eeprom.h new file mode 100644 index 000000000000..6935adb2be1f --- /dev/null +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_eeprom.h @@ -0,0 +1,37 @@ +/* + * Copyright 2021 Advanced Micro Devices, Inc. + * + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and associated documentation files (the "Software"), + * to deal in the Software without restriction, including without limitation + * the rights to use, copy, modify, merge, publish, distribute, sublicense, + * and/or sell copies of the Software, and to permit persons to whom the + * Software is furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included in + * all copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL + * THE COPYRIGHT HOLDER(S) OR AUTHOR(S) BE LIABLE FOR ANY CLAIM, DAMAGES OR + * OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, + * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR + * OTHER DEALINGS IN THE SOFTWARE. + * + */ + +#ifndef _AMDGPU_EEPROM_H +#define _AMDGPU_EEPROM_H + +#include <linux/i2c.h> + +int amdgpu_eeprom_read(struct i2c_adapter *i2c_adap, + u32 eeprom_addr, u8 *eeprom_buf, + u16 bytes); + +int amdgpu_eeprom_write(struct i2c_adapter *i2c_adap, + u32 eeprom_addr, u8 *eeprom_buf, + u16 bytes); + +#endif diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_fb.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_fb.c index 09b048647523..cd0acbea75da 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_fb.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_fb.c @@ -273,9 +273,6 @@ static int amdgpufb_create(struct drm_fb_helper *helper, return 0; out: - if (abo) { - - } if (fb && ret) { drm_gem_object_put(gobj); drm_framebuffer_unregister_private(fb); @@ -344,7 +341,7 @@ int amdgpu_fbdev_init(struct amdgpu_device *adev) } /* disable all the possible outputs/crtcs before entering KMS mode */ - if (!amdgpu_device_has_dc_support(adev)) + if (!amdgpu_device_has_dc_support(adev) && !amdgpu_virtual_display) drm_helper_disable_unused_functions(adev_to_drm(adev)); drm_fb_helper_initial_config(&rfbdev->helper, bpp_sel); diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_fdinfo.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_fdinfo.c new file mode 100644 index 000000000000..5a6857c44bb6 --- /dev/null +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_fdinfo.c @@ -0,0 +1,111 @@ +// SPDX-License-Identifier: MIT +/* Copyright 2021 Advanced Micro Devices, Inc. + * + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and associated documentation files (the "Software"), + * to deal in the Software without restriction, including without limitation + * the rights to use, copy, modify, merge, publish, distribute, sublicense, + * and/or sell copies of the Software, and to permit persons to whom the + * Software is furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included in + * all copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL + * THE COPYRIGHT HOLDER(S) OR AUTHOR(S) BE LIABLE FOR ANY CLAIM, DAMAGES OR + * OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, + * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR + * OTHER DEALINGS IN THE SOFTWARE. + * + * Authors: David Nieto + * Roy Sun + */ + +#include <linux/debugfs.h> +#include <linux/list.h> +#include <linux/module.h> +#include <linux/uaccess.h> +#include <linux/reboot.h> +#include <linux/syscalls.h> + +#include <drm/amdgpu_drm.h> +#include <drm/drm_debugfs.h> + +#include "amdgpu.h" +#include "amdgpu_vm.h" +#include "amdgpu_gem.h" +#include "amdgpu_ctx.h" +#include "amdgpu_fdinfo.h" + + +static const char *amdgpu_ip_name[AMDGPU_HW_IP_NUM] = { + [AMDGPU_HW_IP_GFX] = "gfx", + [AMDGPU_HW_IP_COMPUTE] = "compute", + [AMDGPU_HW_IP_DMA] = "dma", + [AMDGPU_HW_IP_UVD] = "dec", + [AMDGPU_HW_IP_VCE] = "enc", + [AMDGPU_HW_IP_UVD_ENC] = "enc_1", + [AMDGPU_HW_IP_VCN_DEC] = "dec", + [AMDGPU_HW_IP_VCN_ENC] = "enc", + [AMDGPU_HW_IP_VCN_JPEG] = "jpeg", +}; + +void amdgpu_show_fdinfo(struct seq_file *m, struct file *f) +{ + struct amdgpu_fpriv *fpriv; + uint32_t bus, dev, fn, i, domain; + uint64_t vram_mem = 0, gtt_mem = 0, cpu_mem = 0; + struct drm_file *file = f->private_data; + struct amdgpu_device *adev = drm_to_adev(file->minor->dev); + struct amdgpu_bo *root; + int ret; + + ret = amdgpu_file_to_fpriv(f, &fpriv); + if (ret) + return; + bus = adev->pdev->bus->number; + domain = pci_domain_nr(adev->pdev->bus); + dev = PCI_SLOT(adev->pdev->devfn); + fn = PCI_FUNC(adev->pdev->devfn); + + root = amdgpu_bo_ref(fpriv->vm.root.bo); + if (!root) + return; + + ret = amdgpu_bo_reserve(root, false); + if (ret) { + DRM_ERROR("Fail to reserve bo\n"); + return; + } + amdgpu_vm_get_memory(&fpriv->vm, &vram_mem, >t_mem, &cpu_mem); + amdgpu_bo_unreserve(root); + amdgpu_bo_unref(&root); + + seq_printf(m, "pdev:\t%04x:%02x:%02x.%d\npasid:\t%u\n", domain, bus, + dev, fn, fpriv->vm.pasid); + seq_printf(m, "vram mem:\t%llu kB\n", vram_mem/1024UL); + seq_printf(m, "gtt mem:\t%llu kB\n", gtt_mem/1024UL); + seq_printf(m, "cpu mem:\t%llu kB\n", cpu_mem/1024UL); + for (i = 0; i < AMDGPU_HW_IP_NUM; i++) { + uint32_t count = amdgpu_ctx_num_entities[i]; + int idx = 0; + uint64_t total = 0, min = 0; + uint32_t perc, frac; + + for (idx = 0; idx < count; idx++) { + total = amdgpu_ctx_mgr_fence_usage(&fpriv->ctx_mgr, + i, idx, &min); + if ((total == 0) || (min == 0)) + continue; + + perc = div64_u64(10000 * total, min); + frac = perc % 100; + + seq_printf(m, "%s%d:\t%d.%d%%\n", + amdgpu_ip_name[i], + idx, perc/100, frac); + } + } +} diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_fdinfo.h b/drivers/gpu/drm/amd/amdgpu/amdgpu_fdinfo.h new file mode 100644 index 000000000000..41a4c7056729 --- /dev/null +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_fdinfo.h @@ -0,0 +1,43 @@ +/* SPDX-License-Identifier: MIT + * Copyright 2021 Advanced Micro Devices, Inc. + * + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and associated documentation files (the "Software"), + * to deal in the Software without restriction, including without limitation + * the rights to use, copy, modify, merge, publish, distribute, sublicense, + * and/or sell copies of the Software, and to permit persons to whom the + * Software is furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included in + * all copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL + * THE COPYRIGHT HOLDER(S) OR AUTHOR(S) BE LIABLE FOR ANY CLAIM, DAMAGES OR + * OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, + * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR + * OTHER DEALINGS IN THE SOFTWARE. + * + * Authors: David Nieto + * Roy Sun + */ +#ifndef __AMDGPU_SMI_H__ +#define __AMDGPU_SMI_H__ + +#include <linux/idr.h> +#include <linux/kfifo.h> +#include <linux/rbtree.h> +#include <drm/gpu_scheduler.h> +#include <drm/drm_file.h> +#include <drm/ttm/ttm_bo_driver.h> +#include <linux/sched/mm.h> + +#include "amdgpu_sync.h" +#include "amdgpu_ring.h" +#include "amdgpu_ids.h" + +uint32_t amdgpu_get_ip_count(struct amdgpu_device *adev, int id); +void amdgpu_show_fdinfo(struct seq_file *m, struct file *f); + +#endif diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_fence.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_fence.c index 47ea46859618..8d682befe0d6 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_fence.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_fence.c @@ -36,6 +36,7 @@ #include <linux/firmware.h> #include <linux/pm_runtime.h> +#include <drm/drm_drv.h> #include "amdgpu.h" #include "amdgpu_trace.h" @@ -128,30 +129,50 @@ static u32 amdgpu_fence_read(struct amdgpu_ring *ring) * * @ring: ring the fence is associated with * @f: resulting fence object + * @job: job the fence is embedded in * @flags: flags to pass into the subordinate .emit_fence() call * * Emits a fence command on the requested ring (all asics). * Returns 0 on success, -ENOMEM on failure. */ -int amdgpu_fence_emit(struct amdgpu_ring *ring, struct dma_fence **f, +int amdgpu_fence_emit(struct amdgpu_ring *ring, struct dma_fence **f, struct amdgpu_job *job, unsigned flags) { struct amdgpu_device *adev = ring->adev; - struct amdgpu_fence *fence; + struct dma_fence *fence; + struct amdgpu_fence *am_fence; struct dma_fence __rcu **ptr; uint32_t seq; int r; - fence = kmem_cache_alloc(amdgpu_fence_slab, GFP_KERNEL); - if (fence == NULL) - return -ENOMEM; + if (job == NULL) { + /* create a sperate hw fence */ + am_fence = kmem_cache_alloc(amdgpu_fence_slab, GFP_ATOMIC); + if (am_fence == NULL) + return -ENOMEM; + fence = &am_fence->base; + am_fence->ring = ring; + } else { + /* take use of job-embedded fence */ + fence = &job->hw_fence; + } seq = ++ring->fence_drv.sync_seq; - fence->ring = ring; - dma_fence_init(&fence->base, &amdgpu_fence_ops, - &ring->fence_drv.lock, - adev->fence_context + ring->idx, - seq); + if (job != NULL && job->job_run_counter) { + /* reinit seq for resubmitted jobs */ + fence->seqno = seq; + } else { + dma_fence_init(fence, &amdgpu_fence_ops, + &ring->fence_drv.lock, + adev->fence_context + ring->idx, + seq); + } + + if (job != NULL) { + /* mark this fence has a parent job */ + set_bit(AMDGPU_FENCE_FLAG_EMBED_IN_JOB_BIT, &fence->flags); + } + amdgpu_ring_emit_fence(ring, ring->fence_drv.gpu_addr, seq, flags | AMDGPU_FENCE_FLAG_INT); pm_runtime_get_noresume(adev_to_drm(adev)->dev); @@ -174,9 +195,9 @@ int amdgpu_fence_emit(struct amdgpu_ring *ring, struct dma_fence **f, /* This function can't be called concurrently anyway, otherwise * emitting the fence would mess up the hardware ring buffer. */ - rcu_assign_pointer(*ptr, dma_fence_get(&fence->base)); + rcu_assign_pointer(*ptr, dma_fence_get(fence)); - *f = &fence->base; + *f = fence; return 0; } @@ -416,9 +437,6 @@ int amdgpu_fence_driver_start_ring(struct amdgpu_ring *ring, } amdgpu_fence_write(ring, atomic_read(&ring->fence_drv.last_seq)); - if (irq_src) - amdgpu_irq_get(adev, irq_src, irq_type); - ring->fence_drv.irq_src = irq_src; ring->fence_drv.irq_type = irq_type; ring->fence_drv.initialized = true; @@ -434,6 +452,7 @@ int amdgpu_fence_driver_start_ring(struct amdgpu_ring *ring, * * @ring: ring to init the fence driver on * @num_hw_submission: number of entries on the hardware queue + * @sched_score: optional score atomic shared with other schedulers * * Init the fence driver for the requested ring (all asics). * Helper function for amdgpu_fence_driver_init(). @@ -488,7 +507,7 @@ int amdgpu_fence_driver_init_ring(struct amdgpu_ring *ring, r = drm_sched_init(&ring->sched, &amdgpu_sched_ops, num_hw_submission, amdgpu_job_hang_limit, - timeout, sched_score, ring->name); + timeout, NULL, sched_score, ring->name); if (r) { DRM_ERROR("Failed to create scheduler on ring %s.\n", ring->name); @@ -499,7 +518,7 @@ int amdgpu_fence_driver_init_ring(struct amdgpu_ring *ring, } /** - * amdgpu_fence_driver_init - init the fence driver + * amdgpu_fence_driver_sw_init - init the fence driver * for all possible rings. * * @adev: amdgpu device pointer @@ -510,93 +529,83 @@ int amdgpu_fence_driver_init_ring(struct amdgpu_ring *ring, * amdgpu_fence_driver_start_ring(). * Returns 0 for success. */ -int amdgpu_fence_driver_init(struct amdgpu_device *adev) +int amdgpu_fence_driver_sw_init(struct amdgpu_device *adev) { return 0; } /** - * amdgpu_fence_driver_fini - tear down the fence driver + * amdgpu_fence_driver_hw_fini - tear down the fence driver * for all possible rings. * * @adev: amdgpu device pointer * * Tear down the fence driver for all possible rings (all asics). */ -void amdgpu_fence_driver_fini(struct amdgpu_device *adev) +void amdgpu_fence_driver_hw_fini(struct amdgpu_device *adev) { - unsigned i, j; - int r; + int i, r; for (i = 0; i < AMDGPU_MAX_RINGS; i++) { struct amdgpu_ring *ring = adev->rings[i]; if (!ring || !ring->fence_drv.initialized) continue; + if (!ring->no_scheduler) - drm_sched_fini(&ring->sched); - r = amdgpu_fence_wait_empty(ring); - if (r) { - /* no need to trigger GPU reset as we are unloading */ + drm_sched_stop(&ring->sched, NULL); + + /* You can't wait for HW to signal if it's gone */ + if (!drm_dev_is_unplugged(&adev->ddev)) + r = amdgpu_fence_wait_empty(ring); + else + r = -ENODEV; + /* no need to trigger GPU reset as we are unloading */ + if (r) amdgpu_fence_driver_force_completion(ring); - } + if (ring->fence_drv.irq_src) amdgpu_irq_put(adev, ring->fence_drv.irq_src, ring->fence_drv.irq_type); del_timer_sync(&ring->fence_drv.fallback_timer); - for (j = 0; j <= ring->fence_drv.num_fences_mask; ++j) - dma_fence_put(ring->fence_drv.fences[j]); - kfree(ring->fence_drv.fences); - ring->fence_drv.fences = NULL; - ring->fence_drv.initialized = false; } } -/** - * amdgpu_fence_driver_suspend - suspend the fence driver - * for all possible rings. - * - * @adev: amdgpu device pointer - * - * Suspend the fence driver for all possible rings (all asics). - */ -void amdgpu_fence_driver_suspend(struct amdgpu_device *adev) +void amdgpu_fence_driver_sw_fini(struct amdgpu_device *adev) { - int i, r; + unsigned int i, j; for (i = 0; i < AMDGPU_MAX_RINGS; i++) { struct amdgpu_ring *ring = adev->rings[i]; + if (!ring || !ring->fence_drv.initialized) continue; - /* wait for gpu to finish processing current batch */ - r = amdgpu_fence_wait_empty(ring); - if (r) { - /* delay GPU reset to resume */ - amdgpu_fence_driver_force_completion(ring); - } + if (!ring->no_scheduler) + drm_sched_fini(&ring->sched); - /* disable the interrupt */ - if (ring->fence_drv.irq_src) - amdgpu_irq_put(adev, ring->fence_drv.irq_src, - ring->fence_drv.irq_type); + for (j = 0; j <= ring->fence_drv.num_fences_mask; ++j) + dma_fence_put(ring->fence_drv.fences[j]); + kfree(ring->fence_drv.fences); + ring->fence_drv.fences = NULL; + ring->fence_drv.initialized = false; } } /** - * amdgpu_fence_driver_resume - resume the fence driver + * amdgpu_fence_driver_hw_init - enable the fence driver * for all possible rings. * * @adev: amdgpu device pointer * - * Resume the fence driver for all possible rings (all asics). + * Enable the fence driver for all possible rings (all asics). * Not all asics have all rings, so each asic will only * start the fence driver on the rings it has using * amdgpu_fence_driver_start_ring(). * Returns 0 for success. */ -void amdgpu_fence_driver_resume(struct amdgpu_device *adev) +void amdgpu_fence_driver_hw_init(struct amdgpu_device *adev) { int i; @@ -605,6 +614,11 @@ void amdgpu_fence_driver_resume(struct amdgpu_device *adev) if (!ring || !ring->fence_drv.initialized) continue; + if (!ring->no_scheduler) { + drm_sched_resubmit_jobs(&ring->sched); + drm_sched_start(&ring->sched, true); + } + /* enable the interrupt */ if (ring->fence_drv.irq_src) amdgpu_irq_get(adev, ring->fence_drv.irq_src, @@ -635,8 +649,16 @@ static const char *amdgpu_fence_get_driver_name(struct dma_fence *fence) static const char *amdgpu_fence_get_timeline_name(struct dma_fence *f) { - struct amdgpu_fence *fence = to_amdgpu_fence(f); - return (const char *)fence->ring->name; + struct amdgpu_ring *ring; + + if (test_bit(AMDGPU_FENCE_FLAG_EMBED_IN_JOB_BIT, &f->flags)) { + struct amdgpu_job *job = container_of(f, struct amdgpu_job, hw_fence); + + ring = to_amdgpu_ring(job->base.sched); + } else { + ring = to_amdgpu_fence(f)->ring; + } + return (const char *)ring->name; } /** @@ -649,13 +671,20 @@ static const char *amdgpu_fence_get_timeline_name(struct dma_fence *f) */ static bool amdgpu_fence_enable_signaling(struct dma_fence *f) { - struct amdgpu_fence *fence = to_amdgpu_fence(f); - struct amdgpu_ring *ring = fence->ring; + struct amdgpu_ring *ring; + + if (test_bit(AMDGPU_FENCE_FLAG_EMBED_IN_JOB_BIT, &f->flags)) { + struct amdgpu_job *job = container_of(f, struct amdgpu_job, hw_fence); + + ring = to_amdgpu_ring(job->base.sched); + } else { + ring = to_amdgpu_fence(f)->ring; + } if (!timer_pending(&ring->fence_drv.fallback_timer)) amdgpu_fence_schedule_fallback(ring); - DMA_FENCE_TRACE(&fence->base, "armed on ring %i!\n", ring->idx); + DMA_FENCE_TRACE(f, "armed on ring %i!\n", ring->idx); return true; } @@ -670,8 +699,20 @@ static bool amdgpu_fence_enable_signaling(struct dma_fence *f) static void amdgpu_fence_free(struct rcu_head *rcu) { struct dma_fence *f = container_of(rcu, struct dma_fence, rcu); - struct amdgpu_fence *fence = to_amdgpu_fence(f); - kmem_cache_free(amdgpu_fence_slab, fence); + + if (test_bit(AMDGPU_FENCE_FLAG_EMBED_IN_JOB_BIT, &f->flags)) { + /* free job if fence has a parent job */ + struct amdgpu_job *job; + + job = container_of(f, struct amdgpu_job, hw_fence); + kfree(job); + } else { + /* free fence_slab if it's separated fence*/ + struct amdgpu_fence *fence; + + fence = to_amdgpu_fence(f); + kmem_cache_free(amdgpu_fence_slab, fence); + } } /** @@ -694,6 +735,7 @@ static const struct dma_fence_ops amdgpu_fence_ops = { .release = amdgpu_fence_release, }; + /* * Fence debugfs */ diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_fru_eeprom.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_fru_eeprom.c index 39b6c6bfab45..7709caeb233d 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_fru_eeprom.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_fru_eeprom.c @@ -27,10 +27,10 @@ #include "smu_v11_0_i2c.h" #include "atom.h" #include "amdgpu_fru_eeprom.h" +#include "amdgpu_eeprom.h" -#define I2C_PRODUCT_INFO_ADDR 0xAC -#define I2C_PRODUCT_INFO_ADDR_SIZE 0x2 -#define I2C_PRODUCT_INFO_OFFSET 0xC0 +#define FRU_EEPROM_MADDR 0x60000 +#define I2C_PRODUCT_INFO_OFFSET 0xC0 static bool is_fru_eeprom_supported(struct amdgpu_device *adev) { @@ -62,19 +62,11 @@ static bool is_fru_eeprom_supported(struct amdgpu_device *adev) } static int amdgpu_fru_read_eeprom(struct amdgpu_device *adev, uint32_t addrptr, - unsigned char *buff) + unsigned char *buff) { int ret, size; - struct i2c_msg msg = { - .addr = I2C_PRODUCT_INFO_ADDR, - .flags = I2C_M_RD, - .buf = buff, - }; - buff[0] = 0; - buff[1] = addrptr; - msg.len = I2C_PRODUCT_INFO_ADDR_SIZE + 1; - ret = i2c_transfer(&adev->pm.smu_i2c, &msg, 1); + ret = amdgpu_eeprom_read(&adev->pm.smu_i2c, addrptr, buff, 1); if (ret < 1) { DRM_WARN("FRU: Failed to get size field"); return ret; @@ -83,13 +75,9 @@ static int amdgpu_fru_read_eeprom(struct amdgpu_device *adev, uint32_t addrptr, /* The size returned by the i2c requires subtraction of 0xC0 since the * size apparently always reports as 0xC0+actual size. */ - size = buff[2] - I2C_PRODUCT_INFO_OFFSET; - /* Add 1 since address field was 1 byte */ - buff[1] = addrptr + 1; - - msg.len = I2C_PRODUCT_INFO_ADDR_SIZE + size; - ret = i2c_transfer(&adev->pm.smu_i2c, &msg, 1); + size = buff[0] - I2C_PRODUCT_INFO_OFFSET; + ret = amdgpu_eeprom_read(&adev->pm.smu_i2c, addrptr + 1, buff, size); if (ret < 1) { DRM_WARN("FRU: Failed to get data field"); return ret; @@ -101,8 +89,8 @@ static int amdgpu_fru_read_eeprom(struct amdgpu_device *adev, uint32_t addrptr, int amdgpu_fru_get_product_info(struct amdgpu_device *adev) { unsigned char buff[34]; - int addrptr, size; - int len; + u32 addrptr; + int size, len; if (!is_fru_eeprom_supported(adev)) return 0; @@ -125,7 +113,7 @@ int amdgpu_fru_get_product_info(struct amdgpu_device *adev) * Bytes 8-a are all 1-byte and refer to the size of the entire struct, * and the language field, so just start from 0xb, manufacturer size */ - addrptr = 0xb; + addrptr = FRU_EEPROM_MADDR + 0xb; size = amdgpu_fru_read_eeprom(adev, addrptr, buff); if (size < 1) { DRM_ERROR("Failed to read FRU Manufacturer, ret:%d", size); diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_fw_attestation.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_fw_attestation.c index 8d1ad294cb02..2ca3c329de6d 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_fw_attestation.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_fw_attestation.c @@ -121,6 +121,9 @@ static const struct file_operations amdgpu_fw_attestation_debugfs_ops = { static int amdgpu_is_fw_attestation_supported(struct amdgpu_device *adev) { + if (adev->flags & AMD_IS_APU) + return 0; + if (adev->asic_type >= CHIP_SIENNA_CICHLID) return 1; diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_gart.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_gart.c index c5a9a4fb10d2..76efd5f8950f 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_gart.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_gart.c @@ -60,7 +60,7 @@ */ /** - * amdgpu_dummy_page_init - init dummy page used by the driver + * amdgpu_gart_dummy_page_init - init dummy page used by the driver * * @adev: amdgpu_device pointer * @@ -76,7 +76,7 @@ static int amdgpu_gart_dummy_page_init(struct amdgpu_device *adev) if (adev->dummy_page_addr) return 0; adev->dummy_page_addr = dma_map_page(&adev->pdev->dev, dummy_page, 0, - PAGE_SIZE, PCI_DMA_BIDIRECTIONAL); + PAGE_SIZE, DMA_BIDIRECTIONAL); if (dma_mapping_error(&adev->pdev->dev, adev->dummy_page_addr)) { dev_err(&adev->pdev->dev, "Failed to DMA MAP the dummy page\n"); adev->dummy_page_addr = 0; @@ -86,18 +86,18 @@ static int amdgpu_gart_dummy_page_init(struct amdgpu_device *adev) } /** - * amdgpu_dummy_page_fini - free dummy page used by the driver + * amdgpu_gart_dummy_page_fini - free dummy page used by the driver * * @adev: amdgpu_device pointer * * Frees the dummy page used by the driver (all asics). */ -static void amdgpu_gart_dummy_page_fini(struct amdgpu_device *adev) +void amdgpu_gart_dummy_page_fini(struct amdgpu_device *adev) { if (!adev->dummy_page_addr) return; - pci_unmap_page(adev->pdev, adev->dummy_page_addr, - PAGE_SIZE, PCI_DMA_BIDIRECTIONAL); + dma_unmap_page(&adev->pdev->dev, adev->dummy_page_addr, PAGE_SIZE, + DMA_BIDIRECTIONAL); adev->dummy_page_addr = 0; } @@ -250,7 +250,7 @@ int amdgpu_gart_unbind(struct amdgpu_device *adev, uint64_t offset, } } mb(); - amdgpu_asic_flush_hdp(adev, NULL); + amdgpu_device_flush_hdp(adev, NULL); for (i = 0; i < adev->num_vmhubs; i++) amdgpu_gmc_flush_gpu_tlb(adev, 0, i, 0); @@ -300,7 +300,6 @@ int amdgpu_gart_map(struct amdgpu_device *adev, uint64_t offset, * @adev: amdgpu_device pointer * @offset: offset into the GPU's gart aperture * @pages: number of pages to bind - * @pagelist: pages to bind * @dma_addr: DMA addresses of pages * @flags: page table entry flags * @@ -309,11 +308,9 @@ int amdgpu_gart_map(struct amdgpu_device *adev, uint64_t offset, * Returns 0 for success, -EINVAL for failure. */ int amdgpu_gart_bind(struct amdgpu_device *adev, uint64_t offset, - int pages, struct page **pagelist, dma_addr_t *dma_addr, + int pages, dma_addr_t *dma_addr, uint64_t flags) { - int r, i; - if (!adev->gart.ready) { WARN(1, "trying to bind memory to uninitialized GART !\n"); return -EINVAL; @@ -322,16 +319,26 @@ int amdgpu_gart_bind(struct amdgpu_device *adev, uint64_t offset, if (!adev->gart.ptr) return 0; - r = amdgpu_gart_map(adev, offset, pages, dma_addr, flags, - adev->gart.ptr); - if (r) - return r; + return amdgpu_gart_map(adev, offset, pages, dma_addr, flags, + adev->gart.ptr); +} + +/** + * amdgpu_gart_invalidate_tlb - invalidate gart TLB + * + * @adev: amdgpu device driver pointer + * + * Invalidate gart TLB which can be use as a way to flush gart changes + * + */ +void amdgpu_gart_invalidate_tlb(struct amdgpu_device *adev) +{ + int i; mb(); - amdgpu_asic_flush_hdp(adev, NULL); + amdgpu_device_flush_hdp(adev, NULL); for (i = 0; i < adev->num_vmhubs; i++) amdgpu_gmc_flush_gpu_tlb(adev, 0, i, 0); - return 0; } /** @@ -365,15 +372,3 @@ int amdgpu_gart_init(struct amdgpu_device *adev) return 0; } - -/** - * amdgpu_gart_fini - tear down the driver info for managing the gart - * - * @adev: amdgpu_device pointer - * - * Tear down the gart driver info and free the dummy page (all asics). - */ -void amdgpu_gart_fini(struct amdgpu_device *adev) -{ - amdgpu_gart_dummy_page_fini(adev); -} diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_gart.h b/drivers/gpu/drm/amd/amdgpu/amdgpu_gart.h index a25fe97b0196..78895413cf9f 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_gart.h +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_gart.h @@ -57,14 +57,13 @@ void amdgpu_gart_table_vram_free(struct amdgpu_device *adev); int amdgpu_gart_table_vram_pin(struct amdgpu_device *adev); void amdgpu_gart_table_vram_unpin(struct amdgpu_device *adev); int amdgpu_gart_init(struct amdgpu_device *adev); -void amdgpu_gart_fini(struct amdgpu_device *adev); +void amdgpu_gart_dummy_page_fini(struct amdgpu_device *adev); int amdgpu_gart_unbind(struct amdgpu_device *adev, uint64_t offset, int pages); int amdgpu_gart_map(struct amdgpu_device *adev, uint64_t offset, int pages, dma_addr_t *dma_addr, uint64_t flags, void *dst); int amdgpu_gart_bind(struct amdgpu_device *adev, uint64_t offset, - int pages, struct page **pagelist, - dma_addr_t *dma_addr, uint64_t flags); - + int pages, dma_addr_t *dma_addr, uint64_t flags); +void amdgpu_gart_invalidate_tlb(struct amdgpu_device *adev); #endif diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_gem.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_gem.c index 311bcdc59eda..d6aa032890ee 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_gem.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_gem.c @@ -32,6 +32,7 @@ #include <linux/dma-buf.h> #include <drm/amdgpu_drm.h> +#include <drm/drm_drv.h> #include <drm/drm_gem_ttm_helper.h> #include "amdgpu.h" @@ -41,6 +42,46 @@ static const struct drm_gem_object_funcs amdgpu_gem_object_funcs; +static vm_fault_t amdgpu_gem_fault(struct vm_fault *vmf) +{ + struct ttm_buffer_object *bo = vmf->vma->vm_private_data; + struct drm_device *ddev = bo->base.dev; + vm_fault_t ret; + int idx; + + ret = ttm_bo_vm_reserve(bo, vmf); + if (ret) + return ret; + + if (drm_dev_enter(ddev, &idx)) { + ret = amdgpu_bo_fault_reserve_notify(bo); + if (ret) { + drm_dev_exit(idx); + goto unlock; + } + + ret = ttm_bo_vm_fault_reserved(vmf, vmf->vma->vm_page_prot, + TTM_BO_VM_NUM_PREFAULT, 1); + + drm_dev_exit(idx); + } else { + ret = ttm_bo_vm_dummy_page(vmf, vmf->vma->vm_page_prot); + } + if (ret == VM_FAULT_RETRY && !(vmf->flags & FAULT_FLAG_RETRY_NOWAIT)) + return ret; + +unlock: + dma_resv_unlock(bo->base.resv); + return ret; +} + +static const struct vm_operations_struct amdgpu_gem_vm_ops = { + .fault = amdgpu_gem_fault, + .open = ttm_bo_vm_open, + .close = ttm_bo_vm_close, + .access = ttm_bo_vm_access +}; + static void amdgpu_gem_object_free(struct drm_gem_object *gobj) { struct amdgpu_bo *robj = gem_to_amdgpu_bo(gobj); @@ -129,7 +170,7 @@ static int amdgpu_gem_object_open(struct drm_gem_object *obj, return -EPERM; if (abo->flags & AMDGPU_GEM_CREATE_VM_ALWAYS_VALID && - abo->tbo.base.resv != vm->root.base.bo->tbo.base.resv) + abo->tbo.base.resv != vm->root.bo->tbo.base.resv) return -EPERM; r = amdgpu_bo_reserve(abo, false); @@ -185,7 +226,7 @@ static void amdgpu_gem_object_close(struct drm_gem_object *obj, if (!amdgpu_vm_ready(vm)) goto out_unlock; - fence = dma_resv_get_excl(bo->tbo.base.resv); + fence = dma_resv_excl_fence(bo->tbo.base.resv); if (fence) { amdgpu_bo_fence(bo, fence, true); fence = NULL; @@ -205,6 +246,27 @@ out_unlock: ttm_eu_backoff_reservation(&ticket, &list); } +static int amdgpu_gem_object_mmap(struct drm_gem_object *obj, struct vm_area_struct *vma) +{ + struct amdgpu_bo *bo = gem_to_amdgpu_bo(obj); + + if (amdgpu_ttm_tt_get_usermm(bo->tbo.ttm)) + return -EPERM; + if (bo->flags & AMDGPU_GEM_CREATE_NO_CPU_ACCESS) + return -EPERM; + + /* Workaround for Thunk bug creating PROT_NONE,MAP_PRIVATE mappings + * for debugger access to invisible VRAM. Should have used MAP_SHARED + * instead. Clearing VM_MAYWRITE prevents the mapping from ever + * becoming writable and makes is_cow_mapping(vm_flags) false. + */ + if (is_cow_mapping(vma->vm_flags) && + !(vma->vm_flags & (VM_READ | VM_WRITE | VM_EXEC))) + vma->vm_flags &= ~VM_MAYWRITE; + + return drm_gem_ttm_mmap(obj, vma); +} + static const struct drm_gem_object_funcs amdgpu_gem_object_funcs = { .free = amdgpu_gem_object_free, .open = amdgpu_gem_object_open, @@ -212,6 +274,8 @@ static const struct drm_gem_object_funcs amdgpu_gem_object_funcs = { .export = amdgpu_gem_prime_export, .vmap = drm_gem_ttm_vmap, .vunmap = drm_gem_ttm_vunmap, + .mmap = amdgpu_gem_object_mmap, + .vm_ops = &amdgpu_gem_vm_ops, }; /* @@ -265,11 +329,11 @@ int amdgpu_gem_create_ioctl(struct drm_device *dev, void *data, } if (flags & AMDGPU_GEM_CREATE_VM_ALWAYS_VALID) { - r = amdgpu_bo_reserve(vm->root.base.bo, false); + r = amdgpu_bo_reserve(vm->root.bo, false); if (r) return r; - resv = vm->root.base.bo->tbo.base.resv; + resv = vm->root.bo->tbo.base.resv; } initial_domain = (u32)(0xffffffff & args->in.domains); @@ -277,30 +341,27 @@ retry: r = amdgpu_gem_object_create(adev, size, args->in.alignment, initial_domain, flags, ttm_bo_type_device, resv, &gobj); - if (r) { - if (r != -ERESTARTSYS) { - if (flags & AMDGPU_GEM_CREATE_CPU_ACCESS_REQUIRED) { - flags &= ~AMDGPU_GEM_CREATE_CPU_ACCESS_REQUIRED; - goto retry; - } + if (r && r != -ERESTARTSYS) { + if (flags & AMDGPU_GEM_CREATE_CPU_ACCESS_REQUIRED) { + flags &= ~AMDGPU_GEM_CREATE_CPU_ACCESS_REQUIRED; + goto retry; + } - if (initial_domain == AMDGPU_GEM_DOMAIN_VRAM) { - initial_domain |= AMDGPU_GEM_DOMAIN_GTT; - goto retry; - } - DRM_DEBUG("Failed to allocate GEM object (%llu, %d, %llu, %d)\n", - size, initial_domain, args->in.alignment, r); + if (initial_domain == AMDGPU_GEM_DOMAIN_VRAM) { + initial_domain |= AMDGPU_GEM_DOMAIN_GTT; + goto retry; } - return r; + DRM_DEBUG("Failed to allocate GEM object (%llu, %d, %llu, %d)\n", + size, initial_domain, args->in.alignment, r); } if (flags & AMDGPU_GEM_CREATE_VM_ALWAYS_VALID) { if (!r) { struct amdgpu_bo *abo = gem_to_amdgpu_bo(gobj); - abo->parent = amdgpu_bo_ref(vm->root.base.bo); + abo->parent = amdgpu_bo_ref(vm->root.bo); } - amdgpu_bo_unreserve(vm->root.base.bo); + amdgpu_bo_unreserve(vm->root.bo); } if (r) return r; @@ -471,8 +532,7 @@ int amdgpu_gem_wait_idle_ioctl(struct drm_device *dev, void *data, return -ENOENT; } robj = gem_to_amdgpu_bo(gobj); - ret = dma_resv_wait_timeout_rcu(robj->tbo.base.resv, true, true, - timeout); + ret = dma_resv_wait_timeout(robj->tbo.base.resv, true, true, timeout); /* ret == 0 means not signaled, * ret > 0 means signaled @@ -558,7 +618,7 @@ static void amdgpu_gem_va_update_vm(struct amdgpu_device *adev, if (operation == AMDGPU_VA_OP_MAP || operation == AMDGPU_VA_OP_REPLACE) { - r = amdgpu_vm_bo_update(adev, bo_va, false); + r = amdgpu_vm_bo_update(adev, bo_va, false, NULL); if (r) goto error; } @@ -766,7 +826,7 @@ int amdgpu_gem_op_ioctl(struct drm_device *dev, void *data, void __user *out = u64_to_user_ptr(args->value); info.bo_size = robj->tbo.base.size; - info.alignment = robj->tbo.mem.page_alignment << PAGE_SHIFT; + info.alignment = robj->tbo.page_alignment << PAGE_SHIFT; info.domains = robj->preferred_domains; info.domain_flags = robj->flags; amdgpu_bo_unreserve(robj); @@ -775,7 +835,8 @@ int amdgpu_gem_op_ioctl(struct drm_device *dev, void *data, break; } case AMDGPU_GEM_OP_SET_PLACEMENT: - if (robj->prime_shared_count && (args->value & AMDGPU_GEM_DOMAIN_VRAM)) { + if (robj->tbo.base.import_attach && + args->value & AMDGPU_GEM_DOMAIN_VRAM) { r = -EINVAL; amdgpu_bo_unreserve(robj); break; @@ -787,7 +848,7 @@ int amdgpu_gem_op_ioctl(struct drm_device *dev, void *data, } for (base = robj->vm_bo; base; base = base->next) if (amdgpu_xgmi_same_hive(amdgpu_ttm_adev(robj->tbo.bdev), - amdgpu_ttm_adev(base->vm->root.base.bo->tbo.bdev))) { + amdgpu_ttm_adev(base->vm->root.bo->tbo.bdev))) { r = -EINVAL; amdgpu_bo_unreserve(robj); goto out; @@ -840,7 +901,7 @@ int amdgpu_mode_dumb_create(struct drm_file *file_priv, DIV_ROUND_UP(args->bpp, 8), 0); args->size = (u64)args->pitch * args->height; args->size = ALIGN(args->size, PAGE_SIZE); - domain = amdgpu_bo_get_preferred_pin_domain(adev, + domain = amdgpu_bo_get_preferred_domain(adev, amdgpu_display_supported_domains(adev, flags)); r = amdgpu_gem_object_create(adev, args->size, 0, domain, flags, ttm_bo_type_device, NULL, &gobj); diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_gfx.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_gfx.c index 95d4f43a03df..e7f06bd0f0cd 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_gfx.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_gfx.c @@ -563,24 +563,38 @@ void amdgpu_gfx_off_ctrl(struct amdgpu_device *adev, bool enable) mutex_lock(&adev->gfx.gfx_off_mutex); - if (!enable) - adev->gfx.gfx_off_req_count++; - else if (adev->gfx.gfx_off_req_count > 0) + if (enable) { + /* If the count is already 0, it means there's an imbalance bug somewhere. + * Note that the bug may be in a different caller than the one which triggers the + * WARN_ON_ONCE. + */ + if (WARN_ON_ONCE(adev->gfx.gfx_off_req_count == 0)) + goto unlock; + adev->gfx.gfx_off_req_count--; - if (enable && !adev->gfx.gfx_off_state && !adev->gfx.gfx_off_req_count) { - schedule_delayed_work(&adev->gfx.gfx_off_delay_work, GFX_OFF_DELAY_ENABLE); - } else if (!enable && adev->gfx.gfx_off_state) { - if (!amdgpu_dpm_set_powergating_by_smu(adev, AMD_IP_BLOCK_TYPE_GFX, false)) { - adev->gfx.gfx_off_state = false; + if (adev->gfx.gfx_off_req_count == 0 && !adev->gfx.gfx_off_state) + schedule_delayed_work(&adev->gfx.gfx_off_delay_work, GFX_OFF_DELAY_ENABLE); + } else { + if (adev->gfx.gfx_off_req_count == 0) { + cancel_delayed_work_sync(&adev->gfx.gfx_off_delay_work); + + if (adev->gfx.gfx_off_state && + !amdgpu_dpm_set_powergating_by_smu(adev, AMD_IP_BLOCK_TYPE_GFX, false)) { + adev->gfx.gfx_off_state = false; - if (adev->gfx.funcs->init_spm_golden) { - dev_dbg(adev->dev, "GFXOFF is disabled, re-init SPM golden settings\n"); - amdgpu_gfx_init_spm_golden(adev); + if (adev->gfx.funcs->init_spm_golden) { + dev_dbg(adev->dev, + "GFXOFF is disabled, re-init SPM golden settings\n"); + amdgpu_gfx_init_spm_golden(adev); + } } } + + adev->gfx.gfx_off_req_count++; } +unlock: mutex_unlock(&adev->gfx.gfx_off_mutex); } @@ -607,7 +621,6 @@ int amdgpu_gfx_ras_late_init(struct amdgpu_device *adev) struct ras_ih_if ih_info = { .cb = amdgpu_gfx_process_ras_data_cb, }; - struct ras_query_if info = { 0 }; if (!adev->gfx.ras_if) { adev->gfx.ras_if = kmalloc(sizeof(struct ras_common_if), GFP_KERNEL); @@ -616,7 +629,6 @@ int amdgpu_gfx_ras_late_init(struct amdgpu_device *adev) adev->gfx.ras_if->block = AMDGPU_RAS_BLOCK__GFX; adev->gfx.ras_if->type = AMDGPU_RAS_ERROR__MULTI_UNCORRECTABLE; adev->gfx.ras_if->sub_block_index = 0; - strcpy(adev->gfx.ras_if->name, "gfx"); } fs_info.head = ih_info.head = *adev->gfx.ras_if; r = amdgpu_ras_late_init(adev, adev->gfx.ras_if, @@ -625,12 +637,8 @@ int amdgpu_gfx_ras_late_init(struct amdgpu_device *adev) goto free; if (amdgpu_ras_is_supported(adev, adev->gfx.ras_if->block)) { - if (adev->gmc.xgmi.connected_to_cpu) { - info.head = *adev->gfx.ras_if; - amdgpu_ras_query_error_status(adev, &info); - } else { + if (!amdgpu_persistent_edc_harvesting_supported(adev)) amdgpu_ras_reset_error_status(adev, AMDGPU_RAS_BLOCK__GFX); - } r = amdgpu_irq_get(adev, &adev->gfx.cp_ecc_error_irq, 0); if (r) diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_gfxhub.h b/drivers/gpu/drm/amd/amdgpu/amdgpu_gfxhub.h index 66ebc2e3b2ad..beabab515836 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_gfxhub.h +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_gfxhub.h @@ -34,6 +34,7 @@ struct amdgpu_gfxhub_funcs { void (*set_fault_enable_default)(struct amdgpu_device *adev, bool value); void (*init)(struct amdgpu_device *adev); int (*get_xgmi_info)(struct amdgpu_device *adev); + void (*utcl2_harvest)(struct amdgpu_device *adev); }; struct amdgpu_gfxhub { diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_gmc.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_gmc.c index c39ed9eb0987..c7797eac83c3 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_gmc.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_gmc.c @@ -31,6 +31,8 @@ #include "amdgpu_ras.h" #include "amdgpu_xgmi.h" +#include <drm/drm_drv.h> + /** * amdgpu_gmc_pdb0_alloc - allocate vram for pdb0 * @@ -99,7 +101,7 @@ void amdgpu_gmc_get_pde_for_bo(struct amdgpu_bo *bo, int level, { struct amdgpu_device *adev = amdgpu_ttm_adev(bo->tbo.bdev); - switch (bo->tbo.mem.mem_type) { + switch (bo->tbo.resource->mem_type) { case TTM_PL_TT: *addr = bo->tbo.ttm->dma_address[0]; break; @@ -110,7 +112,7 @@ void amdgpu_gmc_get_pde_for_bo(struct amdgpu_bo *bo, int level, *addr = 0; break; } - *flags = amdgpu_ttm_tt_pde_flags(bo->tbo.ttm, &bo->tbo.mem); + *flags = amdgpu_ttm_tt_pde_flags(bo->tbo.ttm, bo->tbo.resource); amdgpu_gmc_get_vm_pde(adev, level, addr, flags); } @@ -151,6 +153,10 @@ int amdgpu_gmc_set_pte_pde(struct amdgpu_device *adev, void *cpu_pt_addr, { void __iomem *ptr = (void *)cpu_pt_addr; uint64_t value; + int idx; + + if (!drm_dev_enter(&adev->ddev, &idx)) + return 0; /* * The following is for PTE only. GART does not have PDEs. @@ -158,6 +164,9 @@ int amdgpu_gmc_set_pte_pde(struct amdgpu_device *adev, void *cpu_pt_addr, value = addr & 0x0000FFFFFFFFF000ULL; value |= flags; writeq(value, ptr + (gpu_page_idx * 8)); + + drm_dev_exit(idx); + return 0; } @@ -333,6 +342,17 @@ void amdgpu_gmc_agp_location(struct amdgpu_device *adev, struct amdgpu_gmc *mc) } /** + * amdgpu_gmc_fault_key - get hask key from vm fault address and pasid + * + * @addr: 48 bit physical address, page aligned (36 significant bits) + * @pasid: 16 bit process address space identifier + */ +static inline uint64_t amdgpu_gmc_fault_key(uint64_t addr, uint16_t pasid) +{ + return addr << 4 | pasid; +} + +/** * amdgpu_gmc_filter_faults - filter VM faults * * @adev: amdgpu device structure @@ -348,8 +368,7 @@ bool amdgpu_gmc_filter_faults(struct amdgpu_device *adev, uint64_t addr, uint16_t pasid, uint64_t timestamp) { struct amdgpu_gmc *gmc = &adev->gmc; - - uint64_t stamp, key = addr << 4 | pasid; + uint64_t stamp, key = amdgpu_gmc_fault_key(addr, pasid); struct amdgpu_gmc_fault *fault; uint32_t hash; @@ -365,7 +384,7 @@ bool amdgpu_gmc_filter_faults(struct amdgpu_device *adev, uint64_t addr, while (fault->timestamp >= stamp) { uint64_t tmp; - if (fault->key == key) + if (atomic64_read(&fault->key) == key) return true; tmp = fault->timestamp; @@ -378,7 +397,7 @@ bool amdgpu_gmc_filter_faults(struct amdgpu_device *adev, uint64_t addr, /* Add the fault to the ring */ fault = &gmc->fault_ring[gmc->last_fault]; - fault->key = key; + atomic64_set(&fault->key, key); fault->timestamp = timestamp; /* And update the hash */ @@ -387,6 +406,36 @@ bool amdgpu_gmc_filter_faults(struct amdgpu_device *adev, uint64_t addr, return false; } +/** + * amdgpu_gmc_filter_faults_remove - remove address from VM faults filter + * + * @adev: amdgpu device structure + * @addr: address of the VM fault + * @pasid: PASID of the process causing the fault + * + * Remove the address from fault filter, then future vm fault on this address + * will pass to retry fault handler to recover. + */ +void amdgpu_gmc_filter_faults_remove(struct amdgpu_device *adev, uint64_t addr, + uint16_t pasid) +{ + struct amdgpu_gmc *gmc = &adev->gmc; + uint64_t key = amdgpu_gmc_fault_key(addr, pasid); + struct amdgpu_gmc_fault *fault; + uint32_t hash; + uint64_t tmp; + + hash = hash_64(key, AMDGPU_GMC_FAULT_HASH_ORDER); + fault = &gmc->fault_ring[gmc->fault_hash[hash].idx]; + do { + if (atomic64_cmpxchg(&fault->key, key, 0) == key) + break; + + tmp = fault->timestamp; + fault = &gmc->fault_ring[fault->next]; + } while (fault->timestamp < tmp); +} + int amdgpu_gmc_ras_late_init(struct amdgpu_device *adev) { int r; @@ -415,6 +464,34 @@ int amdgpu_gmc_ras_late_init(struct amdgpu_device *adev) return r; } + if (adev->hdp.ras_funcs && + adev->hdp.ras_funcs->ras_late_init) { + r = adev->hdp.ras_funcs->ras_late_init(adev); + if (r) + return r; + } + + if (adev->mca.mp0.ras_funcs && + adev->mca.mp0.ras_funcs->ras_late_init) { + r = adev->mca.mp0.ras_funcs->ras_late_init(adev); + if (r) + return r; + } + + if (adev->mca.mp1.ras_funcs && + adev->mca.mp1.ras_funcs->ras_late_init) { + r = adev->mca.mp1.ras_funcs->ras_late_init(adev); + if (r) + return r; + } + + if (adev->mca.mpio.ras_funcs && + adev->mca.mpio.ras_funcs->ras_late_init) { + r = adev->mca.mpio.ras_funcs->ras_late_init(adev); + if (r) + return r; + } + return 0; } @@ -426,11 +503,15 @@ void amdgpu_gmc_ras_fini(struct amdgpu_device *adev) if (adev->mmhub.ras_funcs && adev->mmhub.ras_funcs->ras_fini) - amdgpu_mmhub_ras_fini(adev); + adev->mmhub.ras_funcs->ras_fini(adev); if (adev->gmc.xgmi.ras_funcs && adev->gmc.xgmi.ras_funcs->ras_fini) adev->gmc.xgmi.ras_funcs->ras_fini(adev); + + if (adev->hdp.ras_funcs && + adev->hdp.ras_funcs->ras_fini) + adev->hdp.ras_funcs->ras_fini(adev); } /* @@ -477,7 +558,7 @@ int amdgpu_gmc_allocate_vm_inv_eng(struct amdgpu_device *adev) } /** - * amdgpu_tmz_set -- check and set if a device supports TMZ + * amdgpu_gmc_tmz_set -- check and set if a device supports TMZ * @adev: amdgpu_device pointer * * Check and set if an the device @adev supports Trusted Memory @@ -502,6 +583,7 @@ void amdgpu_gmc_tmz_set(struct amdgpu_device *adev) case CHIP_NAVI14: case CHIP_NAVI12: case CHIP_VANGOGH: + case CHIP_YELLOW_CARP: /* Don't enable it by default yet. */ if (amdgpu_tmz < 1) { @@ -523,7 +605,7 @@ void amdgpu_gmc_tmz_set(struct amdgpu_device *adev) } /** - * amdgpu_noretry_set -- set per asic noretry defaults + * amdgpu_gmc_noretry_set -- set per asic noretry defaults * @adev: amdgpu_device pointer * * Set a per asic default for the no-retry parameter. @@ -578,13 +660,18 @@ void amdgpu_gmc_set_vm_fault_masks(struct amdgpu_device *adev, int hub_type, for (i = 0; i < 16; i++) { reg = hub->vm_context0_cntl + hub->ctx_distance * i; - tmp = RREG32(reg); + tmp = (hub_type == AMDGPU_GFXHUB_0) ? + RREG32_SOC15_IP(GC, reg) : + RREG32_SOC15_IP(MMHUB, reg); + if (enable) tmp |= hub->vm_cntx_cntl_vm_fault; else tmp &= ~hub->vm_cntx_cntl_vm_fault; - WREG32(reg, tmp); + (hub_type == AMDGPU_GFXHUB_0) ? + WREG32_SOC15_IP(GC, reg, tmp) : + WREG32_SOC15_IP(MMHUB, reg, tmp); } } @@ -720,3 +807,22 @@ uint64_t amdgpu_gmc_vram_cpu_pa(struct amdgpu_device *adev, struct amdgpu_bo *bo { return amdgpu_bo_gpu_offset(bo) - adev->gmc.vram_start + adev->gmc.aper_base; } + +void amdgpu_gmc_get_reserved_allocation(struct amdgpu_device *adev) +{ + /* Some ASICs need to reserve a region of video memory to avoid access + * from driver */ + adev->mman.stolen_reserved_offset = 0; + adev->mman.stolen_reserved_size = 0; + + switch (adev->asic_type) { + case CHIP_YELLOW_CARP: + if (amdgpu_discovery == 0) { + adev->mman.stolen_reserved_offset = 0x1ffb0000; + adev->mman.stolen_reserved_size = 64 * PAGE_SIZE; + } + break; + default: + break; + } +} diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_gmc.h b/drivers/gpu/drm/amd/amdgpu/amdgpu_gmc.h index 9d11c02a3938..e55201134a01 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_gmc.h +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_gmc.h @@ -66,9 +66,9 @@ struct firmware; * GMC page fault information */ struct amdgpu_gmc_fault { - uint64_t timestamp; + uint64_t timestamp:48; uint64_t next:AMDGPU_GMC_FAULT_RING_ORDER; - uint64_t key:52; + atomic64_t key; }; /* @@ -318,6 +318,8 @@ void amdgpu_gmc_agp_location(struct amdgpu_device *adev, struct amdgpu_gmc *mc); bool amdgpu_gmc_filter_faults(struct amdgpu_device *adev, uint64_t addr, uint16_t pasid, uint64_t timestamp); +void amdgpu_gmc_filter_faults_remove(struct amdgpu_device *adev, uint64_t addr, + uint16_t pasid); int amdgpu_gmc_ras_late_init(struct amdgpu_device *adev); void amdgpu_gmc_ras_fini(struct amdgpu_device *adev); int amdgpu_gmc_allocate_vm_inv_eng(struct amdgpu_device *adev); @@ -330,6 +332,7 @@ amdgpu_gmc_set_vm_fault_masks(struct amdgpu_device *adev, int hub_type, bool enable); void amdgpu_gmc_get_vbios_allocations(struct amdgpu_device *adev); +void amdgpu_gmc_get_reserved_allocation(struct amdgpu_device *adev); void amdgpu_gmc_init_pdb0(struct amdgpu_device *adev); uint64_t amdgpu_gmc_vram_mc2pa(struct amdgpu_device *adev, uint64_t mc_addr); diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_gtt_mgr.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_gtt_mgr.c index 540c01052b21..675a72ef305d 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_gtt_mgr.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_gtt_mgr.c @@ -22,17 +22,26 @@ * Authors: Christian König */ +#include <drm/ttm/ttm_range_manager.h> + #include "amdgpu.h" -static inline struct amdgpu_gtt_mgr *to_gtt_mgr(struct ttm_resource_manager *man) +struct amdgpu_gtt_node { + struct ttm_buffer_object *tbo; + struct ttm_range_mgr_node base; +}; + +static inline struct amdgpu_gtt_mgr * +to_gtt_mgr(struct ttm_resource_manager *man) { return container_of(man, struct amdgpu_gtt_mgr, manager); } -struct amdgpu_gtt_node { - struct drm_mm_node node; - struct ttm_buffer_object *tbo; -}; +static inline struct amdgpu_gtt_node * +to_amdgpu_gtt_node(struct ttm_resource *res) +{ + return container_of(res, struct amdgpu_gtt_node, base.base); +} /** * DOC: mem_info_gtt_total @@ -43,12 +52,14 @@ struct amdgpu_gtt_node { * the GTT block, in bytes */ static ssize_t amdgpu_mem_info_gtt_total_show(struct device *dev, - struct device_attribute *attr, char *buf) + struct device_attribute *attr, + char *buf) { struct drm_device *ddev = dev_get_drvdata(dev); struct amdgpu_device *adev = drm_to_adev(ddev); - struct ttm_resource_manager *man = ttm_manager_type(&adev->mman.bdev, TTM_PL_TT); + struct ttm_resource_manager *man; + man = ttm_manager_type(&adev->mman.bdev, TTM_PL_TT); return sysfs_emit(buf, "%llu\n", man->size * PAGE_SIZE); } @@ -61,12 +72,14 @@ static ssize_t amdgpu_mem_info_gtt_total_show(struct device *dev, * size of the GTT block, in bytes */ static ssize_t amdgpu_mem_info_gtt_used_show(struct device *dev, - struct device_attribute *attr, char *buf) + struct device_attribute *attr, + char *buf) { struct drm_device *ddev = dev_get_drvdata(dev); struct amdgpu_device *adev = drm_to_adev(ddev); - struct ttm_resource_manager *man = ttm_manager_type(&adev->mman.bdev, TTM_PL_TT); + struct ttm_resource_manager *man; + man = ttm_manager_type(&adev->mman.bdev, TTM_PL_TT); return sysfs_emit(buf, "%llu\n", amdgpu_gtt_mgr_usage(man)); } @@ -75,90 +88,28 @@ static DEVICE_ATTR(mem_info_gtt_total, S_IRUGO, static DEVICE_ATTR(mem_info_gtt_used, S_IRUGO, amdgpu_mem_info_gtt_used_show, NULL); -static const struct ttm_resource_manager_func amdgpu_gtt_mgr_func; -/** - * amdgpu_gtt_mgr_init - init GTT manager and DRM MM - * - * @adev: amdgpu_device pointer - * @gtt_size: maximum size of GTT - * - * Allocate and initialize the GTT manager. - */ -int amdgpu_gtt_mgr_init(struct amdgpu_device *adev, uint64_t gtt_size) -{ - struct amdgpu_gtt_mgr *mgr = &adev->mman.gtt_mgr; - struct ttm_resource_manager *man = &mgr->manager; - uint64_t start, size; - int ret; - - man->use_tt = true; - man->func = &amdgpu_gtt_mgr_func; - - ttm_resource_manager_init(man, gtt_size >> PAGE_SHIFT); - - start = AMDGPU_GTT_MAX_TRANSFER_SIZE * AMDGPU_GTT_NUM_TRANSFER_WINDOWS; - size = (adev->gmc.gart_size >> PAGE_SHIFT) - start; - drm_mm_init(&mgr->mm, start, size); - spin_lock_init(&mgr->lock); - atomic64_set(&mgr->available, gtt_size >> PAGE_SHIFT); - - ret = device_create_file(adev->dev, &dev_attr_mem_info_gtt_total); - if (ret) { - DRM_ERROR("Failed to create device file mem_info_gtt_total\n"); - return ret; - } - ret = device_create_file(adev->dev, &dev_attr_mem_info_gtt_used); - if (ret) { - DRM_ERROR("Failed to create device file mem_info_gtt_used\n"); - return ret; - } - - ttm_set_driver_manager(&adev->mman.bdev, TTM_PL_TT, &mgr->manager); - ttm_resource_manager_set_used(man, true); - return 0; -} - -/** - * amdgpu_gtt_mgr_fini - free and destroy GTT manager - * - * @adev: amdgpu_device pointer - * - * Destroy and free the GTT manager, returns -EBUSY if ranges are still - * allocated inside it. - */ -void amdgpu_gtt_mgr_fini(struct amdgpu_device *adev) -{ - struct amdgpu_gtt_mgr *mgr = &adev->mman.gtt_mgr; - struct ttm_resource_manager *man = &mgr->manager; - int ret; - - ttm_resource_manager_set_used(man, false); - - ret = ttm_resource_manager_evict_all(&adev->mman.bdev, man); - if (ret) - return; - - spin_lock(&mgr->lock); - drm_mm_takedown(&mgr->mm); - spin_unlock(&mgr->lock); - - device_remove_file(adev->dev, &dev_attr_mem_info_gtt_total); - device_remove_file(adev->dev, &dev_attr_mem_info_gtt_used); +static struct attribute *amdgpu_gtt_mgr_attributes[] = { + &dev_attr_mem_info_gtt_total.attr, + &dev_attr_mem_info_gtt_used.attr, + NULL +}; - ttm_resource_manager_cleanup(man); - ttm_set_driver_manager(&adev->mman.bdev, TTM_PL_TT, NULL); -} +const struct attribute_group amdgpu_gtt_mgr_attr_group = { + .attrs = amdgpu_gtt_mgr_attributes +}; /** * amdgpu_gtt_mgr_has_gart_addr - Check if mem has address space * - * @mem: the mem object to check + * @res: the mem object to check * * Check if a mem object has already address space allocated. */ -bool amdgpu_gtt_mgr_has_gart_addr(struct ttm_resource *mem) +bool amdgpu_gtt_mgr_has_gart_addr(struct ttm_resource *res) { - return mem->mm_node != NULL; + struct amdgpu_gtt_node *node = to_amdgpu_gtt_node(res); + + return drm_mm_node_allocated(&node->base.mm_nodes[0]); } /** @@ -167,61 +118,62 @@ bool amdgpu_gtt_mgr_has_gart_addr(struct ttm_resource *mem) * @man: TTM memory type manager * @tbo: TTM BO we need this range for * @place: placement flags and restrictions - * @mem: the resulting mem object + * @res: the resulting mem object * * Dummy, allocate the node but no space for it yet. */ static int amdgpu_gtt_mgr_new(struct ttm_resource_manager *man, struct ttm_buffer_object *tbo, const struct ttm_place *place, - struct ttm_resource *mem) + struct ttm_resource **res) { struct amdgpu_gtt_mgr *mgr = to_gtt_mgr(man); + uint32_t num_pages = PFN_UP(tbo->base.size); struct amdgpu_gtt_node *node; int r; - spin_lock(&mgr->lock); - if ((&tbo->mem == mem || tbo->mem.mem_type != TTM_PL_TT) && - atomic64_read(&mgr->available) < mem->num_pages) { - spin_unlock(&mgr->lock); + if (!(place->flags & TTM_PL_FLAG_TEMPORARY) && + atomic64_add_return(num_pages, &mgr->used) > man->size) { + atomic64_sub(num_pages, &mgr->used); return -ENOSPC; } - atomic64_sub(mem->num_pages, &mgr->available); - spin_unlock(&mgr->lock); - - if (!place->lpfn) { - mem->mm_node = NULL; - mem->start = AMDGPU_BO_INVALID_OFFSET; - return 0; - } - node = kzalloc(sizeof(*node), GFP_KERNEL); + node = kzalloc(struct_size(node, base.mm_nodes, 1), GFP_KERNEL); if (!node) { r = -ENOMEM; goto err_out; } node->tbo = tbo; + ttm_resource_init(tbo, place, &node->base.base); - spin_lock(&mgr->lock); - r = drm_mm_insert_node_in_range(&mgr->mm, &node->node, mem->num_pages, - mem->page_alignment, 0, place->fpfn, - place->lpfn, DRM_MM_INSERT_BEST); - spin_unlock(&mgr->lock); - - if (unlikely(r)) - goto err_free; - - mem->mm_node = node; - mem->start = node->node.start; + if (place->lpfn) { + spin_lock(&mgr->lock); + r = drm_mm_insert_node_in_range(&mgr->mm, + &node->base.mm_nodes[0], + num_pages, tbo->page_alignment, + 0, place->fpfn, place->lpfn, + DRM_MM_INSERT_BEST); + spin_unlock(&mgr->lock); + if (unlikely(r)) + goto err_free; + + node->base.base.start = node->base.mm_nodes[0].start; + } else { + node->base.mm_nodes[0].start = 0; + node->base.mm_nodes[0].size = node->base.base.num_pages; + node->base.base.start = AMDGPU_BO_INVALID_OFFSET; + } + *res = &node->base.base; return 0; err_free: kfree(node); err_out: - atomic64_add(mem->num_pages, &mgr->available); + if (!(place->flags & TTM_PL_FLAG_TEMPORARY)) + atomic64_sub(num_pages, &mgr->used); return r; } @@ -230,24 +182,25 @@ err_out: * amdgpu_gtt_mgr_del - free ranges * * @man: TTM memory type manager - * @mem: TTM memory object + * @res: TTM memory object * * Free the allocated GTT again. */ static void amdgpu_gtt_mgr_del(struct ttm_resource_manager *man, - struct ttm_resource *mem) + struct ttm_resource *res) { + struct amdgpu_gtt_node *node = to_amdgpu_gtt_node(res); struct amdgpu_gtt_mgr *mgr = to_gtt_mgr(man); - struct amdgpu_gtt_node *node = mem->mm_node; - if (node) { - spin_lock(&mgr->lock); - drm_mm_remove_node(&node->node); - spin_unlock(&mgr->lock); - kfree(node); - } + spin_lock(&mgr->lock); + if (drm_mm_node_allocated(&node->base.mm_nodes[0])) + drm_mm_remove_node(&node->base.mm_nodes[0]); + spin_unlock(&mgr->lock); + + if (!(res->placement & TTM_PL_FLAG_TEMPORARY)) + atomic64_sub(res->num_pages, &mgr->used); - atomic64_add(mem->num_pages, &mgr->available); + kfree(node); } /** @@ -260,27 +213,37 @@ static void amdgpu_gtt_mgr_del(struct ttm_resource_manager *man, uint64_t amdgpu_gtt_mgr_usage(struct ttm_resource_manager *man) { struct amdgpu_gtt_mgr *mgr = to_gtt_mgr(man); - s64 result = man->size - atomic64_read(&mgr->available); - return (result > 0 ? result : 0) * PAGE_SIZE; + return atomic64_read(&mgr->used) * PAGE_SIZE; } +/** + * amdgpu_gtt_mgr_recover - re-init gart + * + * @man: TTM memory type manager + * + * Re-init the gart for each known BO in the GTT. + */ int amdgpu_gtt_mgr_recover(struct ttm_resource_manager *man) { struct amdgpu_gtt_mgr *mgr = to_gtt_mgr(man); + struct amdgpu_device *adev; struct amdgpu_gtt_node *node; struct drm_mm_node *mm_node; int r = 0; + adev = container_of(mgr, typeof(*adev), mman.gtt_mgr); spin_lock(&mgr->lock); drm_mm_for_each_node(mm_node, &mgr->mm) { - node = container_of(mm_node, struct amdgpu_gtt_node, node); + node = container_of(mm_node, typeof(*node), base.mm_nodes[0]); r = amdgpu_ttm_recover_gart(node->tbo); if (r) break; } spin_unlock(&mgr->lock); + amdgpu_gart_invalidate_tlb(adev); + return r; } @@ -301,9 +264,8 @@ static void amdgpu_gtt_mgr_debug(struct ttm_resource_manager *man, drm_mm_print(&mgr->mm, printer); spin_unlock(&mgr->lock); - drm_printf(printer, "man size:%llu pages, gtt available:%lld pages, usage:%lluMB\n", - man->size, (u64)atomic64_read(&mgr->available), - amdgpu_gtt_mgr_usage(man) >> 20); + drm_printf(printer, "man size:%llu pages, gtt used:%llu pages\n", + man->size, atomic64_read(&mgr->used)); } static const struct ttm_resource_manager_func amdgpu_gtt_mgr_func = { @@ -311,3 +273,61 @@ static const struct ttm_resource_manager_func amdgpu_gtt_mgr_func = { .free = amdgpu_gtt_mgr_del, .debug = amdgpu_gtt_mgr_debug }; + +/** + * amdgpu_gtt_mgr_init - init GTT manager and DRM MM + * + * @adev: amdgpu_device pointer + * @gtt_size: maximum size of GTT + * + * Allocate and initialize the GTT manager. + */ +int amdgpu_gtt_mgr_init(struct amdgpu_device *adev, uint64_t gtt_size) +{ + struct amdgpu_gtt_mgr *mgr = &adev->mman.gtt_mgr; + struct ttm_resource_manager *man = &mgr->manager; + uint64_t start, size; + + man->use_tt = true; + man->func = &amdgpu_gtt_mgr_func; + + ttm_resource_manager_init(man, gtt_size >> PAGE_SHIFT); + + start = AMDGPU_GTT_MAX_TRANSFER_SIZE * AMDGPU_GTT_NUM_TRANSFER_WINDOWS; + size = (adev->gmc.gart_size >> PAGE_SHIFT) - start; + drm_mm_init(&mgr->mm, start, size); + spin_lock_init(&mgr->lock); + atomic64_set(&mgr->used, 0); + + ttm_set_driver_manager(&adev->mman.bdev, TTM_PL_TT, &mgr->manager); + ttm_resource_manager_set_used(man, true); + return 0; +} + +/** + * amdgpu_gtt_mgr_fini - free and destroy GTT manager + * + * @adev: amdgpu_device pointer + * + * Destroy and free the GTT manager, returns -EBUSY if ranges are still + * allocated inside it. + */ +void amdgpu_gtt_mgr_fini(struct amdgpu_device *adev) +{ + struct amdgpu_gtt_mgr *mgr = &adev->mman.gtt_mgr; + struct ttm_resource_manager *man = &mgr->manager; + int ret; + + ttm_resource_manager_set_used(man, false); + + ret = ttm_resource_manager_evict_all(&adev->mman.bdev, man); + if (ret) + return; + + spin_lock(&mgr->lock); + drm_mm_takedown(&mgr->mm); + spin_unlock(&mgr->lock); + + ttm_resource_manager_cleanup(man); + ttm_set_driver_manager(&adev->mman.bdev, TTM_PL_TT, NULL); +} diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_hdp.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_hdp.c new file mode 100644 index 000000000000..a766e1aad2b9 --- /dev/null +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_hdp.c @@ -0,0 +1,68 @@ +/* + * Copyright 2021 Advanced Micro Devices, Inc. + * + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and associated documentation files (the "Software"), + * to deal in the Software without restriction, including without limitation + * the rights to use, copy, modify, merge, publish, distribute, sublicense, + * and/or sell copies of the Software, and to permit persons to whom the + * Software is furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included in + * all copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL + * THE COPYRIGHT HOLDER(S) OR AUTHOR(S) BE LIABLE FOR ANY CLAIM, DAMAGES OR + * OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, + * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR + * OTHER DEALINGS IN THE SOFTWARE. + * + */ + +#include "amdgpu.h" +#include "amdgpu_ras.h" + +int amdgpu_hdp_ras_late_init(struct amdgpu_device *adev) +{ + int r; + struct ras_ih_if ih_info = { + .cb = NULL, + }; + struct ras_fs_if fs_info = { + .sysfs_name = "hdp_err_count", + }; + + if (!adev->hdp.ras_if) { + adev->hdp.ras_if = kmalloc(sizeof(struct ras_common_if), GFP_KERNEL); + if (!adev->hdp.ras_if) + return -ENOMEM; + adev->hdp.ras_if->block = AMDGPU_RAS_BLOCK__HDP; + adev->hdp.ras_if->type = AMDGPU_RAS_ERROR__MULTI_UNCORRECTABLE; + adev->hdp.ras_if->sub_block_index = 0; + } + ih_info.head = fs_info.head = *adev->hdp.ras_if; + r = amdgpu_ras_late_init(adev, adev->hdp.ras_if, + &fs_info, &ih_info); + if (r || !amdgpu_ras_is_supported(adev, adev->hdp.ras_if->block)) { + kfree(adev->hdp.ras_if); + adev->hdp.ras_if = NULL; + } + + return r; +} + +void amdgpu_hdp_ras_fini(struct amdgpu_device *adev) +{ + if (amdgpu_ras_is_supported(adev, AMDGPU_RAS_BLOCK__HDP) && + adev->hdp.ras_if) { + struct ras_common_if *ras_if = adev->hdp.ras_if; + struct ras_ih_if ih_info = { + .cb = NULL, + }; + + amdgpu_ras_late_fini(adev, ras_if, &ih_info); + kfree(ras_if); + } +} diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_hdp.h b/drivers/gpu/drm/amd/amdgpu/amdgpu_hdp.h index 43caf9f8cc11..7ec99d591584 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_hdp.h +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_hdp.h @@ -23,18 +23,29 @@ #ifndef __AMDGPU_HDP_H__ #define __AMDGPU_HDP_H__ +struct amdgpu_hdp_ras_funcs { + int (*ras_late_init)(struct amdgpu_device *adev); + void (*ras_fini)(struct amdgpu_device *adev); + void (*query_ras_error_count)(struct amdgpu_device *adev, + void *ras_error_status); + void (*reset_ras_error_count)(struct amdgpu_device *adev); +}; + struct amdgpu_hdp_funcs { void (*flush_hdp)(struct amdgpu_device *adev, struct amdgpu_ring *ring); void (*invalidate_hdp)(struct amdgpu_device *adev, struct amdgpu_ring *ring); - void (*reset_ras_error_count)(struct amdgpu_device *adev); void (*update_clock_gating)(struct amdgpu_device *adev, bool enable); void (*get_clock_gating_state)(struct amdgpu_device *adev, u32 *flags); void (*init_registers)(struct amdgpu_device *adev); }; struct amdgpu_hdp { + struct ras_common_if *ras_if; const struct amdgpu_hdp_funcs *funcs; + const struct amdgpu_hdp_ras_funcs *ras_funcs; }; +int amdgpu_hdp_ras_late_init(struct amdgpu_device *adev); +void amdgpu_hdp_ras_fini(struct amdgpu_device *adev); #endif /* __AMDGPU_HDP_H__ */ diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_i2c.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_i2c.c index bca4dddd5a15..82608df43396 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_i2c.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_i2c.c @@ -339,7 +339,7 @@ static void amdgpu_i2c_put_byte(struct amdgpu_i2c_chan *i2c_bus, void amdgpu_i2c_router_select_ddc_port(const struct amdgpu_connector *amdgpu_connector) { - u8 val; + u8 val = 0; if (!amdgpu_connector->router.ddc_valid) return; diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_ib.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_ib.c index a2fe2dac32c1..c076a6b9a5a2 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_ib.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_ib.c @@ -130,7 +130,7 @@ int amdgpu_ib_schedule(struct amdgpu_ring *ring, unsigned num_ibs, struct amdgpu_device *adev = ring->adev; struct amdgpu_ib *ib = &ibs[0]; struct dma_fence *tmp = NULL; - bool skip_preamble, need_ctx_switch; + bool need_ctx_switch; unsigned patch_offset = ~0; struct amdgpu_vm *vm; uint64_t fence_ctx; @@ -214,20 +214,11 @@ int amdgpu_ib_schedule(struct amdgpu_ring *ring, unsigned num_ibs, if (job && ring->funcs->init_cond_exec) patch_offset = amdgpu_ring_init_cond_exec(ring); -#ifdef CONFIG_X86_64 - if (!(adev->flags & AMD_IS_APU)) -#endif - { - if (ring->funcs->emit_hdp_flush) - amdgpu_ring_emit_hdp_flush(ring); - else - amdgpu_asic_flush_hdp(adev, ring); - } + amdgpu_device_flush_hdp(adev, ring); if (need_ctx_switch) status |= AMDGPU_HAVE_CTX_SWITCH; - skip_preamble = ring->current_ctx == fence_ctx; if (job && ring->funcs->emit_cntxcntl) { status |= job->preamble_status; status |= job->preemption_status; @@ -245,14 +236,6 @@ int amdgpu_ib_schedule(struct amdgpu_ring *ring, unsigned num_ibs, for (i = 0; i < num_ibs; ++i) { ib = &ibs[i]; - /* drop preamble IBs if we don't have a context switch */ - if ((ib->flags & AMDGPU_IB_FLAG_PREAMBLE) && - skip_preamble && - !(status & AMDGPU_PREAMBLE_IB_PRESENT_FIRST) && - !amdgpu_mcbp && - !amdgpu_sriov_vf(adev)) /* for SRIOV preemption, Preamble CE ib must be inserted anyway */ - continue; - if (job && ring->funcs->emit_frame_cntl) { if (secure != !!(ib->flags & AMDGPU_IB_FLAGS_SECURE)) { amdgpu_ring_emit_frame_cntl(ring, false, secure); @@ -268,10 +251,7 @@ int amdgpu_ib_schedule(struct amdgpu_ring *ring, unsigned num_ibs, if (job && ring->funcs->emit_frame_cntl) amdgpu_ring_emit_frame_cntl(ring, false, secure); -#ifdef CONFIG_X86_64 - if (!(adev->flags & AMD_IS_APU)) -#endif - amdgpu_asic_invalidate_hdp(adev, ring); + amdgpu_device_invalidate_hdp(adev, ring); if (ib->flags & AMDGPU_IB_FLAG_TC_WB_NOT_INVALIDATE) fence_flags |= AMDGPU_FENCE_FLAG_TC_WB_ONLY; @@ -282,7 +262,7 @@ int amdgpu_ib_schedule(struct amdgpu_ring *ring, unsigned num_ibs, fence_flags | AMDGPU_FENCE_FLAG_64BIT); } - r = amdgpu_fence_emit(ring, f, fence_flags); + r = amdgpu_fence_emit(ring, f, job, fence_flags); if (r) { dev_err(adev->dev, "failed to emit fence (%d)\n", r); if (job && job->vmid) @@ -328,7 +308,7 @@ int amdgpu_ib_pool_init(struct amdgpu_device *adev) for (i = 0; i < AMDGPU_IB_POOL_MAX; i++) { if (i == AMDGPU_IB_POOL_DIRECT) - size = PAGE_SIZE * 2; + size = PAGE_SIZE * 6; else size = AMDGPU_IB_POOL_SIZE; diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_ids.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_ids.c index b4971e90b98c..b7fb72bff2c1 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_ids.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_ids.c @@ -112,7 +112,7 @@ void amdgpu_pasid_free_delayed(struct dma_resv *resv, unsigned count; int r; - r = dma_resv_get_fences_rcu(resv, NULL, &count, &fences); + r = dma_resv_get_fences(resv, NULL, &count, &fences); if (r) goto fallback; @@ -156,8 +156,7 @@ fallback: /* Not enough memory for the delayed delete, as last resort * block for all the fences to complete. */ - dma_resv_wait_timeout_rcu(resv, true, false, - MAX_SCHEDULE_TIMEOUT); + dma_resv_wait_timeout(resv, true, false, MAX_SCHEDULE_TIMEOUT); amdgpu_pasid_free(pasid); } @@ -183,7 +182,7 @@ bool amdgpu_vmid_had_gpu_reset(struct amdgpu_device *adev, } /** - * amdgpu_vm_grab_idle - grab idle VMID + * amdgpu_vmid_grab_idle - grab idle VMID * * @vm: vm to allocate id for * @ring: ring we want to submit job to @@ -256,7 +255,7 @@ static int amdgpu_vmid_grab_idle(struct amdgpu_vm *vm, } /** - * amdgpu_vm_grab_reserved - try to assign reserved VMID + * amdgpu_vmid_grab_reserved - try to assign reserved VMID * * @vm: vm to allocate id for * @ring: ring we want to submit job to @@ -325,7 +324,7 @@ static int amdgpu_vmid_grab_reserved(struct amdgpu_vm *vm, } /** - * amdgpu_vm_grab_used - try to reuse a VMID + * amdgpu_vmid_grab_used - try to reuse a VMID * * @vm: vm to allocate id for * @ring: ring we want to submit job to @@ -397,7 +396,7 @@ static int amdgpu_vmid_grab_used(struct amdgpu_vm *vm, } /** - * amdgpu_vm_grab_id - allocate the next free VMID + * amdgpu_vmid_grab - allocate the next free VMID * * @vm: vm to allocate id for * @ring: ring we want to submit job to diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_ih.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_ih.c index faaa6aa2faaf..f3d62e196901 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_ih.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_ih.c @@ -115,9 +115,11 @@ int amdgpu_ih_ring_init(struct amdgpu_device *adev, struct amdgpu_ih_ring *ih, */ void amdgpu_ih_ring_fini(struct amdgpu_device *adev, struct amdgpu_ih_ring *ih) { + + if (!ih->ring) + return; + if (ih->use_bus_addr) { - if (!ih->ring) - return; /* add 8 bytes for the rptr/wptr shadows and * add them to the end of the ring allocation. @@ -175,7 +177,9 @@ static bool amdgpu_ih_has_checkpoint_processed(struct amdgpu_device *adev, cur_rptr += ih->ptr_mask + 1; *prev_rptr = cur_rptr; - return cur_rptr >= checkpoint_wptr; + /* check ring is empty to workaround missing wptr overflow flag */ + return cur_rptr >= checkpoint_wptr || + (cur_rptr & ih->ptr_mask) == amdgpu_ih_get_wptr(adev, ih); } /** diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_irq.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_irq.c index 90f50561b43a..cc2e0c9cfe0a 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_irq.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_irq.c @@ -46,9 +46,9 @@ #include <linux/pci.h> #include <drm/drm_crtc_helper.h> -#include <drm/drm_irq.h> #include <drm/drm_vblank.h> #include <drm/amdgpu_drm.h> +#include <drm/drm_drv.h> #include "amdgpu.h" #include "amdgpu_ih.h" #include "atom.h" @@ -183,7 +183,7 @@ void amdgpu_irq_disable_all(struct amdgpu_device *adev) * Returns: * result of handling the IRQ, as defined by &irqreturn_t */ -irqreturn_t amdgpu_irq_handler(int irq, void *arg) +static irqreturn_t amdgpu_irq_handler(int irq, void *arg) { struct drm_device *dev = (struct drm_device *) arg; struct amdgpu_device *adev = drm_to_adev(dev); @@ -277,6 +277,21 @@ static bool amdgpu_msi_ok(struct amdgpu_device *adev) return true; } +static void amdgpu_restore_msix(struct amdgpu_device *adev) +{ + u16 ctrl; + + pci_read_config_word(adev->pdev, adev->pdev->msix_cap + PCI_MSIX_FLAGS, &ctrl); + if (!(ctrl & PCI_MSIX_FLAGS_ENABLE)) + return; + + /* VF FLR */ + ctrl &= ~PCI_MSIX_FLAGS_ENABLE; + pci_write_config_word(adev->pdev, adev->pdev->msix_cap + PCI_MSIX_FLAGS, ctrl); + ctrl |= PCI_MSIX_FLAGS_ENABLE; + pci_write_config_word(adev->pdev, adev->pdev->msix_cap + PCI_MSIX_FLAGS, ctrl); +} + /** * amdgpu_irq_init - initialize interrupt handling * @@ -291,6 +306,7 @@ static bool amdgpu_msi_ok(struct amdgpu_device *adev) int amdgpu_irq_init(struct amdgpu_device *adev) { int r = 0; + unsigned int irq; spin_lock_init(&adev->irq.lock); @@ -333,21 +349,47 @@ int amdgpu_irq_init(struct amdgpu_device *adev) INIT_WORK(&adev->irq.ih2_work, amdgpu_irq_handle_ih2); INIT_WORK(&adev->irq.ih_soft_work, amdgpu_irq_handle_ih_soft); - adev->irq.installed = true; - /* Use vector 0 for MSI-X */ - r = drm_irq_install(adev_to_drm(adev), pci_irq_vector(adev->pdev, 0)); + /* Use vector 0 for MSI-X. */ + r = pci_irq_vector(adev->pdev, 0); + if (r < 0) + return r; + irq = r; + + /* PCI devices require shared interrupts. */ + r = request_irq(irq, amdgpu_irq_handler, IRQF_SHARED, adev_to_drm(adev)->driver->name, + adev_to_drm(adev)); if (r) { - adev->irq.installed = false; if (!amdgpu_device_has_dc_support(adev)) flush_work(&adev->hotplug_work); return r; } + adev->irq.installed = true; + adev->irq.irq = irq; adev_to_drm(adev)->max_vblank_count = 0x00ffffff; DRM_DEBUG("amdgpu: irq initialized.\n"); return 0; } + +void amdgpu_irq_fini_hw(struct amdgpu_device *adev) +{ + if (adev->irq.installed) { + free_irq(adev->irq.irq, adev_to_drm(adev)); + adev->irq.installed = false; + if (adev->irq.msi_enabled) + pci_free_irq_vectors(adev->pdev); + + if (!amdgpu_device_has_dc_support(adev)) + flush_work(&adev->hotplug_work); + } + + amdgpu_ih_ring_fini(adev, &adev->irq.ih_soft); + amdgpu_ih_ring_fini(adev, &adev->irq.ih); + amdgpu_ih_ring_fini(adev, &adev->irq.ih1); + amdgpu_ih_ring_fini(adev, &adev->irq.ih2); +} + /** * amdgpu_irq_fini - shut down interrupt handling * @@ -357,19 +399,10 @@ int amdgpu_irq_init(struct amdgpu_device *adev) * functionality, shuts down vblank, hotplug and reset interrupt handling, * turns off interrupts from all sources (all ASICs). */ -void amdgpu_irq_fini(struct amdgpu_device *adev) +void amdgpu_irq_fini_sw(struct amdgpu_device *adev) { unsigned i, j; - if (adev->irq.installed) { - drm_irq_uninstall(adev_to_drm(adev)); - adev->irq.installed = false; - if (adev->irq.msi_enabled) - pci_free_irq_vectors(adev->pdev); - if (!amdgpu_device_has_dc_support(adev)) - flush_work(&adev->hotplug_work); - } - for (i = 0; i < AMDGPU_IRQ_CLIENTID_MAX; ++i) { if (!adev->irq.client[i].sources) continue; @@ -476,7 +509,7 @@ void amdgpu_irq_dispatch(struct amdgpu_device *adev, } else if ((client_id == AMDGPU_IRQ_CLIENTID_LEGACY) && adev->irq.virq[src_id]) { - generic_handle_irq(irq_find_mapping(adev->irq.domain, src_id)); + generic_handle_domain_irq(adev->irq.domain, src_id); } else if (!adev->irq.client[client_id].sources) { DRM_DEBUG("Unregistered interrupt client_id: %d src_id: %d\n", @@ -558,6 +591,9 @@ void amdgpu_irq_gpu_reset_resume_helper(struct amdgpu_device *adev) { int i, j, k; + if (amdgpu_sriov_vf(adev) || amdgpu_passthrough(adev)) + amdgpu_restore_msix(adev); + for (i = 0; i < AMDGPU_IRQ_CLIENTID_MAX; ++i) { if (!adev->irq.client[i].sources) continue; @@ -588,7 +624,7 @@ void amdgpu_irq_gpu_reset_resume_helper(struct amdgpu_device *adev) int amdgpu_irq_get(struct amdgpu_device *adev, struct amdgpu_irq_src *src, unsigned type) { - if (!adev_to_drm(adev)->irq_enabled) + if (!adev->irq.installed) return -ENOENT; if (type >= src->num_types) @@ -618,7 +654,7 @@ int amdgpu_irq_get(struct amdgpu_device *adev, struct amdgpu_irq_src *src, int amdgpu_irq_put(struct amdgpu_device *adev, struct amdgpu_irq_src *src, unsigned type) { - if (!adev_to_drm(adev)->irq_enabled) + if (!adev->irq.installed) return -ENOENT; if (type >= src->num_types) @@ -649,7 +685,7 @@ int amdgpu_irq_put(struct amdgpu_device *adev, struct amdgpu_irq_src *src, bool amdgpu_irq_enabled(struct amdgpu_device *adev, struct amdgpu_irq_src *src, unsigned type) { - if (!adev_to_drm(adev)->irq_enabled) + if (!adev->irq.installed) return false; if (type >= src->num_types) diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_irq.h b/drivers/gpu/drm/amd/amdgpu/amdgpu_irq.h index cf6116648322..e9f2c11ea416 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_irq.h +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_irq.h @@ -80,6 +80,7 @@ struct amdgpu_irq_src_funcs { struct amdgpu_irq { bool installed; + unsigned int irq; spinlock_t lock; /* interrupt sources */ struct amdgpu_irq_client client[AMDGPU_IRQ_CLIENTID_MAX]; @@ -100,10 +101,10 @@ struct amdgpu_irq { }; void amdgpu_irq_disable_all(struct amdgpu_device *adev); -irqreturn_t amdgpu_irq_handler(int irq, void *arg); int amdgpu_irq_init(struct amdgpu_device *adev); -void amdgpu_irq_fini(struct amdgpu_device *adev); +void amdgpu_irq_fini_sw(struct amdgpu_device *adev); +void amdgpu_irq_fini_hw(struct amdgpu_device *adev); int amdgpu_irq_add_id(struct amdgpu_device *adev, unsigned client_id, unsigned src_id, struct amdgpu_irq_src *source); diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_job.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_job.c index 759b34799221..de29518673dd 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_job.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_job.c @@ -25,6 +25,8 @@ #include <linux/wait.h> #include <linux/sched.h> +#include <drm/drm_drv.h> + #include "amdgpu.h" #include "amdgpu_trace.h" @@ -34,6 +36,15 @@ static enum drm_gpu_sched_stat amdgpu_job_timedout(struct drm_sched_job *s_job) struct amdgpu_job *job = to_amdgpu_job(s_job); struct amdgpu_task_info ti; struct amdgpu_device *adev = ring->adev; + int idx; + + if (!drm_dev_enter(&adev->ddev, &idx)) { + DRM_INFO("%s - device unplugged skipping recovery on scheduler:%s", + __func__, s_job->sched->name); + + /* Effectively the job is aborted as the device is gone */ + return DRM_GPU_SCHED_STAT_ENODEV; + } memset(&ti, 0, sizeof(struct amdgpu_task_info)); @@ -41,7 +52,7 @@ static enum drm_gpu_sched_stat amdgpu_job_timedout(struct drm_sched_job *s_job) amdgpu_ring_soft_recovery(ring, job->vmid, s_job->s_fence->parent)) { DRM_ERROR("ring %s timeout, but soft recovered\n", s_job->sched->name); - return DRM_GPU_SCHED_STAT_NOMINAL; + goto exit; } amdgpu_vm_get_task_info(ring->adev, job->pasid, &ti); @@ -53,13 +64,15 @@ static enum drm_gpu_sched_stat amdgpu_job_timedout(struct drm_sched_job *s_job) if (amdgpu_device_should_recover_gpu(ring->adev)) { amdgpu_device_gpu_recover(ring->adev, job); - return DRM_GPU_SCHED_STAT_NOMINAL; } else { drm_sched_suspend_timeout(&ring->sched); if (amdgpu_sriov_vf(adev)) adev->virt.tdr_debug = true; - return DRM_GPU_SCHED_STAT_NOMINAL; } + +exit: + drm_dev_exit(idx); + return DRM_GPU_SCHED_STAT_NOMINAL; } int amdgpu_job_alloc(struct amdgpu_device *adev, unsigned num_ibs, @@ -114,11 +127,16 @@ void amdgpu_job_free_resources(struct amdgpu_job *job) { struct amdgpu_ring *ring = to_amdgpu_ring(job->base.sched); struct dma_fence *f; + struct dma_fence *hw_fence; unsigned i; - /* use sched fence if available */ - f = job->base.s_fence ? &job->base.s_fence->finished : job->fence; + if (job->hw_fence.ops == NULL) + hw_fence = job->external_hw_fence; + else + hw_fence = &job->hw_fence; + /* use sched fence if available */ + f = job->base.s_fence ? &job->base.s_fence->finished : hw_fence; for (i = 0; i < job->num_ibs; ++i) amdgpu_ib_free(ring->adev, &job->ibs[i], f); } @@ -129,20 +147,27 @@ static void amdgpu_job_free_cb(struct drm_sched_job *s_job) drm_sched_job_cleanup(s_job); - dma_fence_put(job->fence); amdgpu_sync_free(&job->sync); amdgpu_sync_free(&job->sched_sync); - kfree(job); + + /* only put the hw fence if has embedded fence */ + if (job->hw_fence.ops != NULL) + dma_fence_put(&job->hw_fence); + else + kfree(job); } void amdgpu_job_free(struct amdgpu_job *job) { amdgpu_job_free_resources(job); - - dma_fence_put(job->fence); amdgpu_sync_free(&job->sync); amdgpu_sync_free(&job->sched_sync); - kfree(job); + + /* only put the hw fence if has embedded fence */ + if (job->hw_fence.ops != NULL) + dma_fence_put(&job->hw_fence); + else + kfree(job); } int amdgpu_job_submit(struct amdgpu_job *job, struct drm_sched_entity *entity, @@ -171,11 +196,14 @@ int amdgpu_job_submit_direct(struct amdgpu_job *job, struct amdgpu_ring *ring, job->base.sched = &ring->sched; r = amdgpu_ib_schedule(ring, job->num_ibs, job->ibs, NULL, fence); - job->fence = dma_fence_get(*fence); + /* record external_hw_fence for direct submit */ + job->external_hw_fence = dma_fence_get(*fence); if (r) return r; amdgpu_job_free(job); + dma_fence_put(*fence); + return 0; } @@ -233,10 +261,12 @@ static struct dma_fence *amdgpu_job_run(struct drm_sched_job *sched_job) if (r) DRM_ERROR("Error scheduling IBs (%d)\n", r); } - /* if gpu reset, hw fence will be replaced here */ - dma_fence_put(job->fence); - job->fence = dma_fence_get(fence); + if (!job->job_run_counter) + dma_fence_get(fence); + else if (finished->error < 0) + dma_fence_put(&job->hw_fence); + job->job_run_counter++; amdgpu_job_free_resources(job); fence = r ? ERR_PTR(r) : fence; diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_job.h b/drivers/gpu/drm/amd/amdgpu/amdgpu_job.h index 81caac9b958a..9e65730193b8 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_job.h +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_job.h @@ -46,7 +46,8 @@ struct amdgpu_job { struct amdgpu_sync sync; struct amdgpu_sync sched_sync; struct amdgpu_ib *ibs; - struct dma_fence *fence; /* the hw fence */ + struct dma_fence hw_fence; + struct dma_fence *external_hw_fence; uint32_t preamble_status; uint32_t preemption_status; uint32_t num_ibs; @@ -62,6 +63,9 @@ struct amdgpu_job { /* user fence handling */ uint64_t uf_addr; uint64_t uf_sequence; + + /* job_run_counter >= 1 means a resubmit job */ + uint32_t job_run_counter; }; int amdgpu_job_alloc(struct amdgpu_device *adev, unsigned num_ibs, diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_jpeg.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_jpeg.c index 8996cb4ed57a..9342aa23ebd2 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_jpeg.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_jpeg.c @@ -47,8 +47,6 @@ int amdgpu_jpeg_sw_fini(struct amdgpu_device *adev) { int i; - cancel_delayed_work_sync(&adev->jpeg.idle_work); - for (i = 0; i < adev->jpeg.num_jpeg_inst; ++i) { if (adev->jpeg.harvest_config & (1 << i)) continue; diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_kms.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_kms.c index 39ee88d29cca..7e45640fbee0 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_kms.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_kms.c @@ -28,6 +28,7 @@ #include "amdgpu.h" #include <drm/amdgpu_drm.h> +#include <drm/drm_drv.h> #include "amdgpu_uvd.h" #include "amdgpu_vce.h" #include "atom.h" @@ -91,8 +92,11 @@ void amdgpu_driver_unload_kms(struct drm_device *dev) pm_runtime_forbid(dev->dev); } + if (amdgpu_acpi_smart_shift_update(dev, AMDGPU_SS_DRV_UNLOAD)) + DRM_WARN("smart shift update failed\n"); + amdgpu_acpi_fini(adev); - amdgpu_device_fini(adev); + amdgpu_device_fini_hw(adev); } void amdgpu_register_gpu_instance(struct amdgpu_device *adev) @@ -120,6 +124,22 @@ void amdgpu_register_gpu_instance(struct amdgpu_device *adev) mutex_unlock(&mgpu_info.mutex); } +static void amdgpu_get_audio_func(struct amdgpu_device *adev) +{ + struct pci_dev *p = NULL; + + p = pci_get_domain_bus_and_slot(pci_domain_nr(adev->pdev->bus), + adev->pdev->bus->number, 1); + if (p) { + pm_runtime_get_sync(&p->dev); + + pm_runtime_mark_last_busy(&p->dev); + pm_runtime_put_autosuspend(&p->dev); + + pci_dev_put(p); + } +} + /** * amdgpu_driver_load_kms - Main load function for KMS. * @@ -209,11 +229,40 @@ int amdgpu_driver_load_kms(struct amdgpu_device *adev, unsigned long flags) DPM_FLAG_MAY_SKIP_RESUME); pm_runtime_use_autosuspend(dev->dev); pm_runtime_set_autosuspend_delay(dev->dev, 5000); + pm_runtime_allow(dev->dev); + pm_runtime_mark_last_busy(dev->dev); pm_runtime_put_autosuspend(dev->dev); + + /* + * For runpm implemented via BACO, PMFW will handle the + * timing for BACO in and out: + * - put ASIC into BACO state only when both video and + * audio functions are in D3 state. + * - pull ASIC out of BACO state when either video or + * audio function is in D0 state. + * Also, at startup, PMFW assumes both functions are in + * D0 state. + * + * So if snd driver was loaded prior to amdgpu driver + * and audio function was put into D3 state, there will + * be no PMFW-aware D-state transition(D0->D3) on runpm + * suspend. Thus the BACO will be not correctly kicked in. + * + * Via amdgpu_get_audio_func(), the audio dev is put + * into D0 state. Then there will be a PMFW-aware D-state + * transition(D0->D3) on runpm suspend. + */ + if (amdgpu_device_supports_baco(dev) && + !(adev->flags & AMD_IS_APU) && + (adev->asic_type >= CHIP_NAVI10)) + amdgpu_get_audio_func(adev); } + if (amdgpu_acpi_smart_shift_update(dev, AMDGPU_SS_DRV_LOAD)) + DRM_WARN("smart shift update failed\n"); + out: if (r) { /* balance pm_runtime_get_sync in amdgpu_driver_unload_kms */ @@ -292,27 +341,27 @@ static int amdgpu_firmware_info(struct drm_amdgpu_info_firmware *fw_info, switch (query_fw->index) { case TA_FW_TYPE_PSP_XGMI: fw_info->ver = adev->psp.ta_fw_version; - fw_info->feature = adev->psp.ta_xgmi_ucode_version; + fw_info->feature = adev->psp.xgmi.feature_version; break; case TA_FW_TYPE_PSP_RAS: fw_info->ver = adev->psp.ta_fw_version; - fw_info->feature = adev->psp.ta_ras_ucode_version; + fw_info->feature = adev->psp.ras.feature_version; break; case TA_FW_TYPE_PSP_HDCP: fw_info->ver = adev->psp.ta_fw_version; - fw_info->feature = adev->psp.ta_hdcp_ucode_version; + fw_info->feature = adev->psp.hdcp.feature_version; break; case TA_FW_TYPE_PSP_DTM: fw_info->ver = adev->psp.ta_fw_version; - fw_info->feature = adev->psp.ta_dtm_ucode_version; + fw_info->feature = adev->psp.dtm.feature_version; break; case TA_FW_TYPE_PSP_RAP: fw_info->ver = adev->psp.ta_fw_version; - fw_info->feature = adev->psp.ta_rap_ucode_version; + fw_info->feature = adev->psp.rap.feature_version; break; case TA_FW_TYPE_PSP_SECUREDISPLAY: fw_info->ver = adev->psp.ta_fw_version; - fw_info->feature = adev->psp.ta_securedisplay_ucode_version; + fw_info->feature = adev->psp.securedisplay.feature_version; break; default: return -EINVAL; @@ -325,12 +374,12 @@ static int amdgpu_firmware_info(struct drm_amdgpu_info_firmware *fw_info, fw_info->feature = adev->sdma.instance[query_fw->index].feature_version; break; case AMDGPU_INFO_FW_SOS: - fw_info->ver = adev->psp.sos_fw_version; - fw_info->feature = adev->psp.sos_feature_version; + fw_info->ver = adev->psp.sos.fw_version; + fw_info->feature = adev->psp.sos.feature_version; break; case AMDGPU_INFO_FW_ASD: - fw_info->ver = adev->psp.asd_fw_version; - fw_info->feature = adev->psp.asd_feature_version; + fw_info->ver = adev->psp.asd.fw_version; + fw_info->feature = adev->psp.asd.feature_version; break; case AMDGPU_INFO_FW_DMCU: fw_info->ver = adev->dm.dmcu_fw_version; @@ -341,8 +390,8 @@ static int amdgpu_firmware_info(struct drm_amdgpu_info_firmware *fw_info, fw_info->feature = 0; break; case AMDGPU_INFO_FW_TOC: - fw_info->ver = adev->psp.toc_fw_version; - fw_info->feature = adev->psp.toc_feature_version; + fw_info->ver = adev->psp.toc.fw_version; + fw_info->feature = adev->psp.toc.feature_version; break; default: return -EINVAL; @@ -861,6 +910,21 @@ int amdgpu_info_ioctl(struct drm_device *dev, void *data, struct drm_file *filp) min((size_t)size, (size_t)(bios_size - bios_offset))) ? -EFAULT : 0; } + case AMDGPU_INFO_VBIOS_INFO: { + struct drm_amdgpu_info_vbios vbios_info = {}; + struct atom_context *atom_context; + + atom_context = adev->mode_info.atom_context; + memcpy(vbios_info.name, atom_context->name, sizeof(atom_context->name)); + memcpy(vbios_info.vbios_pn, atom_context->vbios_pn, sizeof(atom_context->vbios_pn)); + vbios_info.version = atom_context->version; + memcpy(vbios_info.vbios_ver_str, atom_context->vbios_ver_str, + sizeof(atom_context->vbios_ver_str)); + memcpy(vbios_info.date, atom_context->date, sizeof(atom_context->date)); + + return copy_to_user(out, &vbios_info, + min((size_t)size, sizeof(vbios_info))) ? -EFAULT : 0; + } default: DRM_DEBUG_KMS("Invalid request %d\n", info->vbios_info.type); @@ -986,7 +1050,7 @@ int amdgpu_info_ioctl(struct drm_device *dev, void *data, struct drm_file *filp) if (!ras) return -EINVAL; - ras_mask = (uint64_t)ras->supported << 32 | ras->features; + ras_mask = (uint64_t)adev->ras_enabled << 32 | ras->features; return copy_to_user(out, &ras_mask, min_t(u64, size, sizeof(ras_mask))) ? @@ -1114,10 +1178,15 @@ int amdgpu_driver_open_kms(struct drm_device *dev, struct drm_file *file_priv) dev_warn(adev->dev, "No more PASIDs available!"); pasid = 0; } - r = amdgpu_vm_init(adev, &fpriv->vm, AMDGPU_VM_CONTEXT_GFX, pasid); + + r = amdgpu_vm_init(adev, &fpriv->vm); if (r) goto error_pasid; + r = amdgpu_vm_set_pasid(adev, &fpriv->vm, pasid); + if (r) + goto error_vm; + fpriv->prt_va = amdgpu_vm_bo_add(adev, &fpriv->vm, NULL); if (!fpriv->prt_va) { r = -ENOMEM; @@ -1145,8 +1214,10 @@ error_vm: amdgpu_vm_fini(adev, &fpriv->vm); error_pasid: - if (pasid) + if (pasid) { amdgpu_pasid_free(pasid); + amdgpu_vm_set_pasid(adev, &fpriv->vm, 0); + } kfree(fpriv); @@ -1197,7 +1268,7 @@ void amdgpu_driver_postclose_kms(struct drm_device *dev, } pasid = fpriv->vm.pasid; - pd = amdgpu_bo_ref(fpriv->vm.root.base.bo); + pd = amdgpu_bo_ref(fpriv->vm.root.bo); amdgpu_ctx_mgr_fini(&fpriv->ctx_mgr); amdgpu_vm_fini(adev, &fpriv->vm); @@ -1219,6 +1290,15 @@ void amdgpu_driver_postclose_kms(struct drm_device *dev, pm_runtime_put_autosuspend(dev->dev); } + +void amdgpu_driver_release_kms(struct drm_device *dev) +{ + struct amdgpu_device *adev = drm_to_adev(dev); + + amdgpu_device_fini_sw(adev); + pci_set_drvdata(adev->pdev, NULL); +} + /* * VBlank related functions. */ diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_mca.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_mca.c new file mode 100644 index 000000000000..a2d3dbbf7d25 --- /dev/null +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_mca.c @@ -0,0 +1,117 @@ +/* + * Copyright 2021 Advanced Micro Devices, Inc. + * + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and associated documentation files (the "Software"), + * to deal in the Software without restriction, including without limitation + * the rights to use, copy, modify, merge, publish, distribute, sublicense, + * and/or sell copies of the Software, and to permit persons to whom the + * Software is furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included in + * all copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL + * THE COPYRIGHT HOLDER(S) OR AUTHOR(S) BE LIABLE FOR ANY CLAIM, DAMAGES OR + * OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, + * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR + * OTHER DEALINGS IN THE SOFTWARE. + * + */ +#include "amdgpu_ras.h" +#include "amdgpu.h" +#include "amdgpu_mca.h" + +#include "umc/umc_6_7_0_offset.h" +#include "umc/umc_6_7_0_sh_mask.h" + +void amdgpu_mca_query_correctable_error_count(struct amdgpu_device *adev, + uint64_t mc_status_addr, + unsigned long *error_count) +{ + uint64_t mc_status = RREG64_PCIE(mc_status_addr * 4); + + if (REG_GET_FIELD(mc_status, MCA_UMC_UMC0_MCUMC_STATUST0, Val) == 1 && + REG_GET_FIELD(mc_status, MCA_UMC_UMC0_MCUMC_STATUST0, CECC) == 1) + *error_count += 1; +} + +void amdgpu_mca_query_uncorrectable_error_count(struct amdgpu_device *adev, + uint64_t mc_status_addr, + unsigned long *error_count) +{ + uint64_t mc_status = RREG64_PCIE(mc_status_addr * 4); + + if ((REG_GET_FIELD(mc_status, MCA_UMC_UMC0_MCUMC_STATUST0, Val) == 1) && + (REG_GET_FIELD(mc_status, MCA_UMC_UMC0_MCUMC_STATUST0, Deferred) == 1 || + REG_GET_FIELD(mc_status, MCA_UMC_UMC0_MCUMC_STATUST0, UECC) == 1 || + REG_GET_FIELD(mc_status, MCA_UMC_UMC0_MCUMC_STATUST0, PCC) == 1 || + REG_GET_FIELD(mc_status, MCA_UMC_UMC0_MCUMC_STATUST0, UC) == 1 || + REG_GET_FIELD(mc_status, MCA_UMC_UMC0_MCUMC_STATUST0, TCC) == 1)) + *error_count += 1; +} + +void amdgpu_mca_reset_error_count(struct amdgpu_device *adev, + uint64_t mc_status_addr) +{ + WREG64_PCIE(mc_status_addr * 4, 0x0ULL); +} + +void amdgpu_mca_query_ras_error_count(struct amdgpu_device *adev, + uint64_t mc_status_addr, + void *ras_error_status) +{ + struct ras_err_data *err_data = (struct ras_err_data *)ras_error_status; + + amdgpu_mca_query_correctable_error_count(adev, mc_status_addr, &(err_data->ce_count)); + amdgpu_mca_query_uncorrectable_error_count(adev, mc_status_addr, &(err_data->ue_count)); + + amdgpu_mca_reset_error_count(adev, mc_status_addr); +} + +int amdgpu_mca_ras_late_init(struct amdgpu_device *adev, + struct amdgpu_mca_ras *mca_dev) +{ + int r; + struct ras_ih_if ih_info = { + .cb = NULL, + }; + struct ras_fs_if fs_info = { + .sysfs_name = mca_dev->ras_funcs->sysfs_name, + }; + + if (!mca_dev->ras_if) { + mca_dev->ras_if = kmalloc(sizeof(struct ras_common_if), GFP_KERNEL); + if (!mca_dev->ras_if) + return -ENOMEM; + mca_dev->ras_if->block = mca_dev->ras_funcs->ras_block; + mca_dev->ras_if->type = AMDGPU_RAS_ERROR__MULTI_UNCORRECTABLE; + mca_dev->ras_if->sub_block_index = 0; + } + ih_info.head = fs_info.head = *mca_dev->ras_if; + r = amdgpu_ras_late_init(adev, mca_dev->ras_if, + &fs_info, &ih_info); + if (r || !amdgpu_ras_is_supported(adev, mca_dev->ras_if->block)) { + kfree(mca_dev->ras_if); + mca_dev->ras_if = NULL; + } + + return r; +} + +void amdgpu_mca_ras_fini(struct amdgpu_device *adev, + struct amdgpu_mca_ras *mca_dev) +{ + struct ras_ih_if ih_info = { + .cb = NULL, + }; + + if (!mca_dev->ras_if) + return; + + amdgpu_ras_late_fini(adev, mca_dev->ras_if, &ih_info); + kfree(mca_dev->ras_if); + mca_dev->ras_if = NULL; +}
\ No newline at end of file diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_mca.h b/drivers/gpu/drm/amd/amdgpu/amdgpu_mca.h new file mode 100644 index 000000000000..f860f2f0e296 --- /dev/null +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_mca.h @@ -0,0 +1,72 @@ +/* + * Copyright (C) 2021 Advanced Micro Devices, Inc. + * + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and associated documentation files (the "Software"), + * to deal in the Software without restriction, including without limitation + * the rights to use, copy, modify, merge, publish, distribute, sublicense, + * and/or sell copies of the Software, and to permit persons to whom the + * Software is furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included + * in all copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS + * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL + * THE COPYRIGHT HOLDER(S) BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN + * AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN + * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. + */ +#ifndef __AMDGPU_MCA_H__ +#define __AMDGPU_MCA_H__ + +struct amdgpu_mca_ras_funcs { + int (*ras_late_init)(struct amdgpu_device *adev); + void (*ras_fini)(struct amdgpu_device *adev); + void (*query_ras_error_count)(struct amdgpu_device *adev, + void *ras_error_status); + void (*query_ras_error_address)(struct amdgpu_device *adev, + void *ras_error_status); + uint32_t ras_block; + const char* sysfs_name; +}; + +struct amdgpu_mca_ras { + struct ras_common_if *ras_if; + const struct amdgpu_mca_ras_funcs *ras_funcs; +}; + +struct amdgpu_mca_funcs { + void (*init)(struct amdgpu_device *adev); +}; + +struct amdgpu_mca { + const struct amdgpu_mca_funcs *funcs; + struct amdgpu_mca_ras mp0; + struct amdgpu_mca_ras mp1; + struct amdgpu_mca_ras mpio; +}; + +void amdgpu_mca_query_correctable_error_count(struct amdgpu_device *adev, + uint64_t mc_status_addr, + unsigned long *error_count); + +void amdgpu_mca_query_uncorrectable_error_count(struct amdgpu_device *adev, + uint64_t mc_status_addr, + unsigned long *error_count); + +void amdgpu_mca_reset_error_count(struct amdgpu_device *adev, + uint64_t mc_status_addr); + +void amdgpu_mca_query_ras_error_count(struct amdgpu_device *adev, + uint64_t mc_status_addr, + void *ras_error_status); + +int amdgpu_mca_ras_late_init(struct amdgpu_device *adev, + struct amdgpu_mca_ras *mca_dev); + +void amdgpu_mca_ras_fini(struct amdgpu_device *adev, + struct amdgpu_mca_ras *mca_dev); + +#endif diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_mmhub.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_mmhub.c index ead3dc572ec5..24297dc51434 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_mmhub.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_mmhub.c @@ -41,7 +41,6 @@ int amdgpu_mmhub_ras_late_init(struct amdgpu_device *adev) adev->mmhub.ras_if->block = AMDGPU_RAS_BLOCK__MMHUB; adev->mmhub.ras_if->type = AMDGPU_RAS_ERROR__MULTI_UNCORRECTABLE; adev->mmhub.ras_if->sub_block_index = 0; - strcpy(adev->mmhub.ras_if->name, "mmhub"); } ih_info.head = fs_info.head = *adev->mmhub.ras_if; r = amdgpu_ras_late_init(adev, adev->mmhub.ras_if, diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_mmhub.h b/drivers/gpu/drm/amd/amdgpu/amdgpu_mmhub.h index 11aa29933c1f..b27fcbccce2b 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_mmhub.h +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_mmhub.h @@ -28,6 +28,7 @@ struct amdgpu_mmhub_ras_funcs { void *ras_error_status); void (*query_ras_error_status)(struct amdgpu_device *adev); void (*reset_ras_error_count)(struct amdgpu_device *adev); + void (*reset_ras_error_status)(struct amdgpu_device *adev); }; struct amdgpu_mmhub_funcs { diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_mn.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_mn.c index 828b5167ff12..4b153daf283d 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_mn.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_mn.c @@ -75,8 +75,8 @@ static bool amdgpu_mn_invalidate_gfx(struct mmu_interval_notifier *mni, mmu_interval_set_seq(mni, cur_seq); - r = dma_resv_wait_timeout_rcu(bo->tbo.base.resv, true, false, - MAX_SCHEDULE_TIMEOUT); + r = dma_resv_wait_timeout(bo->tbo.base.resv, true, false, + MAX_SCHEDULE_TIMEOUT); mutex_unlock(&adev->notifier_lock); if (r <= 0) DRM_ERROR("(%ld) failed to wait for user bo\n", r); @@ -155,3 +155,90 @@ void amdgpu_mn_unregister(struct amdgpu_bo *bo) mmu_interval_notifier_remove(&bo->notifier); bo->notifier.mm = NULL; } + +int amdgpu_hmm_range_get_pages(struct mmu_interval_notifier *notifier, + struct mm_struct *mm, struct page **pages, + uint64_t start, uint64_t npages, + struct hmm_range **phmm_range, bool readonly, + bool mmap_locked, void *owner) +{ + struct hmm_range *hmm_range; + unsigned long timeout; + unsigned long i; + unsigned long *pfns; + int r = 0; + + hmm_range = kzalloc(sizeof(*hmm_range), GFP_KERNEL); + if (unlikely(!hmm_range)) + return -ENOMEM; + + pfns = kvmalloc_array(npages, sizeof(*pfns), GFP_KERNEL); + if (unlikely(!pfns)) { + r = -ENOMEM; + goto out_free_range; + } + + hmm_range->notifier = notifier; + hmm_range->default_flags = HMM_PFN_REQ_FAULT; + if (!readonly) + hmm_range->default_flags |= HMM_PFN_REQ_WRITE; + hmm_range->hmm_pfns = pfns; + hmm_range->start = start; + hmm_range->end = start + npages * PAGE_SIZE; + hmm_range->dev_private_owner = owner; + + /* Assuming 512MB takes maxmium 1 second to fault page address */ + timeout = max(npages >> 17, 1ULL) * HMM_RANGE_DEFAULT_TIMEOUT; + timeout = jiffies + msecs_to_jiffies(timeout); + +retry: + hmm_range->notifier_seq = mmu_interval_read_begin(notifier); + + if (likely(!mmap_locked)) + mmap_read_lock(mm); + + r = hmm_range_fault(hmm_range); + + if (likely(!mmap_locked)) + mmap_read_unlock(mm); + if (unlikely(r)) { + /* + * FIXME: This timeout should encompass the retry from + * mmu_interval_read_retry() as well. + */ + if (r == -EBUSY && !time_after(jiffies, timeout)) + goto retry; + goto out_free_pfns; + } + + /* + * Due to default_flags, all pages are HMM_PFN_VALID or + * hmm_range_fault() fails. FIXME: The pages cannot be touched outside + * the notifier_lock, and mmu_interval_read_retry() must be done first. + */ + for (i = 0; pages && i < npages; i++) + pages[i] = hmm_pfn_to_page(pfns[i]); + + *phmm_range = hmm_range; + + return 0; + +out_free_pfns: + kvfree(pfns); +out_free_range: + kfree(hmm_range); + + return r; +} + +int amdgpu_hmm_range_get_pages_done(struct hmm_range *hmm_range) +{ + int r; + + r = mmu_interval_read_retry(hmm_range->notifier, + hmm_range->notifier_seq); + kvfree(hmm_range->hmm_pfns); + kfree(hmm_range); + + return r; +} diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_mn.h b/drivers/gpu/drm/amd/amdgpu/amdgpu_mn.h index a292238f75eb..14a3c1864085 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_mn.h +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_mn.h @@ -30,6 +30,13 @@ #include <linux/workqueue.h> #include <linux/interval_tree.h> +int amdgpu_hmm_range_get_pages(struct mmu_interval_notifier *notifier, + struct mm_struct *mm, struct page **pages, + uint64_t start, uint64_t npages, + struct hmm_range **phmm_range, bool readonly, + bool mmap_locked, void *owner); +int amdgpu_hmm_range_get_pages_done(struct hmm_range *hmm_range); + #if defined(CONFIG_HMM_MIRROR) int amdgpu_mn_register(struct amdgpu_bo *bo, unsigned long addr); void amdgpu_mn_unregister(struct amdgpu_bo *bo); diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_mode.h b/drivers/gpu/drm/amd/amdgpu/amdgpu_mode.h index cb0b581bbce7..89fb372ed49c 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_mode.h +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_mode.h @@ -344,7 +344,7 @@ struct amdgpu_mode_info { /* pointer to fbdev info structure */ struct amdgpu_fbdev *rfbdev; /* firmware flags */ - u16 firmware_flags; + u32 firmware_flags; /* pointer to backlight encoder */ struct amdgpu_encoder *bl_encoder; u8 bl_level; /* saved backlight level */ diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_nbio.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_nbio.c index 6201a5f4b4fa..6afb02fef8cf 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_nbio.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_nbio.c @@ -39,7 +39,6 @@ int amdgpu_nbio_ras_late_init(struct amdgpu_device *adev) adev->nbio.ras_if->block = AMDGPU_RAS_BLOCK__PCIE_BIF; adev->nbio.ras_if->type = AMDGPU_RAS_ERROR__MULTI_UNCORRECTABLE; adev->nbio.ras_if->sub_block_index = 0; - strcpy(adev->nbio.ras_if->name, "pcie_bif"); } ih_info.head = fs_info.head = *adev->nbio.ras_if; r = amdgpu_ras_late_init(adev, adev->nbio.ras_if, diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_nbio.h b/drivers/gpu/drm/amd/amdgpu/amdgpu_nbio.h index 25ee53545837..843052205bd5 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_nbio.h +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_nbio.h @@ -93,6 +93,9 @@ struct amdgpu_nbio_funcs { void (*enable_aspm)(struct amdgpu_device *adev, bool enable); void (*program_aspm)(struct amdgpu_device *adev); + void (*apply_lc_spc_mode_wa)(struct amdgpu_device *adev); + void (*apply_l1_link_width_reconfig_wa)(struct amdgpu_device *adev); + void (*clear_doorbell_interrupt)(struct amdgpu_device *adev); }; struct amdgpu_nbio { diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_object.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_object.c index 1345f7eba011..01a78c786536 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_object.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_object.c @@ -52,55 +52,44 @@ * */ -/** - * amdgpu_bo_subtract_pin_size - Remove BO from pin_size accounting - * - * @bo: &amdgpu_bo buffer object - * - * This function is called when a BO stops being pinned, and updates the - * &amdgpu_device pin_size values accordingly. - */ -static void amdgpu_bo_subtract_pin_size(struct amdgpu_bo *bo) +static void amdgpu_bo_destroy(struct ttm_buffer_object *tbo) { - struct amdgpu_device *adev = amdgpu_ttm_adev(bo->tbo.bdev); + struct amdgpu_bo *bo = ttm_to_amdgpu_bo(tbo); - if (bo->tbo.mem.mem_type == TTM_PL_VRAM) { - atomic64_sub(amdgpu_bo_size(bo), &adev->vram_pin_size); - atomic64_sub(amdgpu_vram_mgr_bo_visible_size(bo), - &adev->visible_pin_size); - } else if (bo->tbo.mem.mem_type == TTM_PL_TT) { - atomic64_sub(amdgpu_bo_size(bo), &adev->gart_pin_size); - } + amdgpu_bo_kunmap(bo); + + if (bo->tbo.base.import_attach) + drm_prime_gem_destroy(&bo->tbo.base, bo->tbo.sg); + drm_gem_object_release(&bo->tbo.base); + amdgpu_bo_unref(&bo->parent); + kvfree(bo); } -static void amdgpu_bo_destroy(struct ttm_buffer_object *tbo) +static void amdgpu_bo_user_destroy(struct ttm_buffer_object *tbo) { - struct amdgpu_device *adev = amdgpu_ttm_adev(tbo->bdev); struct amdgpu_bo *bo = ttm_to_amdgpu_bo(tbo); struct amdgpu_bo_user *ubo; - if (bo->tbo.pin_count > 0) - amdgpu_bo_subtract_pin_size(bo); + ubo = to_amdgpu_bo_user(bo); + kfree(ubo->metadata); + amdgpu_bo_destroy(tbo); +} - amdgpu_bo_kunmap(bo); +static void amdgpu_bo_vm_destroy(struct ttm_buffer_object *tbo) +{ + struct amdgpu_device *adev = amdgpu_ttm_adev(tbo->bdev); + struct amdgpu_bo *bo = ttm_to_amdgpu_bo(tbo); + struct amdgpu_bo_vm *vmbo; - if (bo->tbo.base.import_attach) - drm_prime_gem_destroy(&bo->tbo.base, bo->tbo.sg); - drm_gem_object_release(&bo->tbo.base); + vmbo = to_amdgpu_bo_vm(bo); /* in case amdgpu_device_recover_vram got NULL of bo->parent */ - if (!list_empty(&bo->shadow_list)) { + if (!list_empty(&vmbo->shadow_list)) { mutex_lock(&adev->shadow_list_lock); - list_del_init(&bo->shadow_list); + list_del_init(&vmbo->shadow_list); mutex_unlock(&adev->shadow_list_lock); } - amdgpu_bo_unref(&bo->parent); - if (bo->tbo.type == ttm_bo_type_device) { - ubo = to_amdgpu_bo_user(bo); - kfree(ubo->metadata); - } - - kfree(bo); + amdgpu_bo_destroy(tbo); } /** @@ -115,8 +104,11 @@ static void amdgpu_bo_destroy(struct ttm_buffer_object *tbo) */ bool amdgpu_bo_is_amdgpu_bo(struct ttm_buffer_object *bo) { - if (bo->destroy == &amdgpu_bo_destroy) + if (bo->destroy == &amdgpu_bo_destroy || + bo->destroy == &amdgpu_bo_user_destroy || + bo->destroy == &amdgpu_bo_vm_destroy) return true; + return false; } @@ -157,7 +149,9 @@ void amdgpu_bo_placement_from_domain(struct amdgpu_bo *abo, u32 domain) if (domain & AMDGPU_GEM_DOMAIN_GTT) { places[c].fpfn = 0; places[c].lpfn = 0; - places[c].mem_type = TTM_PL_TT; + places[c].mem_type = + abo->flags & AMDGPU_GEM_CREATE_PREEMPTIBLE ? + AMDGPU_PL_PREEMPT : TTM_PL_TT; places[c].flags = 0; c++; } @@ -202,7 +196,7 @@ void amdgpu_bo_placement_from_domain(struct amdgpu_bo *abo, u32 domain) c++; } - BUG_ON(c >= AMDGPU_BO_MAX_PLACEMENTS); + BUG_ON(c > AMDGPU_BO_MAX_PLACEMENTS); placement->num_placement = c; placement->placement = places; @@ -386,14 +380,14 @@ int amdgpu_bo_create_kernel_at(struct amdgpu_device *adev, if (cpu_addr) amdgpu_bo_kunmap(*bo_ptr); - ttm_resource_free(&(*bo_ptr)->tbo, &(*bo_ptr)->tbo.mem); + ttm_resource_free(&(*bo_ptr)->tbo, &(*bo_ptr)->tbo.resource); for (i = 0; i < (*bo_ptr)->placement.num_placement; ++i) { (*bo_ptr)->placements[i].fpfn = offset >> PAGE_SHIFT; (*bo_ptr)->placements[i].lpfn = (offset + size) >> PAGE_SHIFT; } r = ttm_bo_mem_space(&(*bo_ptr)->tbo, &(*bo_ptr)->placement, - &(*bo_ptr)->tbo.mem, &ctx); + &(*bo_ptr)->tbo.resource, &ctx); if (r) goto error; @@ -515,7 +509,18 @@ bool amdgpu_bo_support_uswc(u64 bo_flags) #endif } -static int amdgpu_bo_do_create(struct amdgpu_device *adev, +/** + * amdgpu_bo_create - create an &amdgpu_bo buffer object + * @adev: amdgpu device object + * @bp: parameters to be used for the buffer object + * @bo_ptr: pointer to the buffer object pointer + * + * Creates an &amdgpu_bo buffer object. + * + * Returns: + * 0 for success or a negative error code on failure. + */ +int amdgpu_bo_create(struct amdgpu_device *adev, struct amdgpu_bo_param *bp, struct amdgpu_bo **bo_ptr) { @@ -552,11 +557,10 @@ static int amdgpu_bo_do_create(struct amdgpu_device *adev, BUG_ON(bp->bo_ptr_size < sizeof(struct amdgpu_bo)); *bo_ptr = NULL; - bo = kzalloc(bp->bo_ptr_size, GFP_KERNEL); + bo = kvzalloc(bp->bo_ptr_size, GFP_KERNEL); if (bo == NULL) return -ENOMEM; drm_gem_private_object_init(adev_to_drm(adev), &bo->tbo.base, size); - INIT_LIST_HEAD(&bo->shadow_list); bo->vm_bo = NULL; bo->preferred_domains = bp->preferred_domain ? bp->preferred_domain : bp->domain; @@ -579,22 +583,25 @@ static int amdgpu_bo_do_create(struct amdgpu_device *adev, if (bp->type == ttm_bo_type_kernel) bo->tbo.priority = 1; + if (!bp->destroy) + bp->destroy = &amdgpu_bo_destroy; + r = ttm_bo_init_reserved(&adev->mman.bdev, &bo->tbo, size, bp->type, &bo->placement, page_align, &ctx, NULL, - bp->resv, &amdgpu_bo_destroy); + bp->resv, bp->destroy); if (unlikely(r != 0)) return r; if (!amdgpu_gmc_vram_full_visible(&adev->gmc) && - bo->tbo.mem.mem_type == TTM_PL_VRAM && - bo->tbo.mem.start < adev->gmc.visible_vram_size >> PAGE_SHIFT) + bo->tbo.resource->mem_type == TTM_PL_VRAM && + bo->tbo.resource->start < adev->gmc.visible_vram_size >> PAGE_SHIFT) amdgpu_cs_report_moved_bytes(adev, ctx.bytes_moved, ctx.bytes_moved); else amdgpu_cs_report_moved_bytes(adev, ctx.bytes_moved, 0); if (bp->flags & AMDGPU_GEM_CREATE_VRAM_CLEARED && - bo->tbo.mem.mem_type == TTM_PL_VRAM) { + bo->tbo.resource->mem_type == TTM_PL_VRAM) { struct dma_fence *fence; r = amdgpu_fill_buffer(bo, 0, bo->tbo.base.resv, &fence); @@ -625,108 +632,68 @@ fail_unreserve: return r; } -static int amdgpu_bo_create_shadow(struct amdgpu_device *adev, - unsigned long size, - struct amdgpu_bo *bo) -{ - struct amdgpu_bo_param bp; - int r; - - if (bo->shadow) - return 0; - - memset(&bp, 0, sizeof(bp)); - bp.size = size; - bp.domain = AMDGPU_GEM_DOMAIN_GTT; - bp.flags = AMDGPU_GEM_CREATE_CPU_GTT_USWC | - AMDGPU_GEM_CREATE_SHADOW; - bp.type = ttm_bo_type_kernel; - bp.resv = bo->tbo.base.resv; - bp.bo_ptr_size = sizeof(struct amdgpu_bo); - - r = amdgpu_bo_do_create(adev, &bp, &bo->shadow); - if (!r) { - bo->shadow->parent = amdgpu_bo_ref(bo); - mutex_lock(&adev->shadow_list_lock); - list_add_tail(&bo->shadow->shadow_list, &adev->shadow_list); - mutex_unlock(&adev->shadow_list_lock); - } - - return r; -} - /** - * amdgpu_bo_create - create an &amdgpu_bo buffer object + * amdgpu_bo_create_user - create an &amdgpu_bo_user buffer object * @adev: amdgpu device object * @bp: parameters to be used for the buffer object - * @bo_ptr: pointer to the buffer object pointer + * @ubo_ptr: pointer to the buffer object pointer * - * Creates an &amdgpu_bo buffer object; and if requested, also creates a - * shadow object. - * Shadow object is used to backup the original buffer object, and is always - * in GTT. + * Create a BO to be used by user application; * * Returns: * 0 for success or a negative error code on failure. */ -int amdgpu_bo_create(struct amdgpu_device *adev, - struct amdgpu_bo_param *bp, - struct amdgpu_bo **bo_ptr) + +int amdgpu_bo_create_user(struct amdgpu_device *adev, + struct amdgpu_bo_param *bp, + struct amdgpu_bo_user **ubo_ptr) { - u64 flags = bp->flags; + struct amdgpu_bo *bo_ptr; int r; - bp->flags = bp->flags & ~AMDGPU_GEM_CREATE_SHADOW; - - r = amdgpu_bo_do_create(adev, bp, bo_ptr); + bp->bo_ptr_size = sizeof(struct amdgpu_bo_user); + bp->destroy = &amdgpu_bo_user_destroy; + r = amdgpu_bo_create(adev, bp, &bo_ptr); if (r) return r; - if ((flags & AMDGPU_GEM_CREATE_SHADOW) && !(adev->flags & AMD_IS_APU)) { - if (!bp->resv) - WARN_ON(dma_resv_lock((*bo_ptr)->tbo.base.resv, - NULL)); - - r = amdgpu_bo_create_shadow(adev, bp->size, *bo_ptr); - - if (!bp->resv) - dma_resv_unlock((*bo_ptr)->tbo.base.resv); - - if (r) - amdgpu_bo_unref(bo_ptr); - } - + *ubo_ptr = to_amdgpu_bo_user(bo_ptr); return r; } /** - * amdgpu_bo_create_user - create an &amdgpu_bo_user buffer object + * amdgpu_bo_create_vm - create an &amdgpu_bo_vm buffer object * @adev: amdgpu device object * @bp: parameters to be used for the buffer object - * @ubo_ptr: pointer to the buffer object pointer + * @vmbo_ptr: pointer to the buffer object pointer * - * Create a BO to be used by user application; + * Create a BO to be for GPUVM. * * Returns: * 0 for success or a negative error code on failure. */ -int amdgpu_bo_create_user(struct amdgpu_device *adev, - struct amdgpu_bo_param *bp, - struct amdgpu_bo_user **ubo_ptr) +int amdgpu_bo_create_vm(struct amdgpu_device *adev, + struct amdgpu_bo_param *bp, + struct amdgpu_bo_vm **vmbo_ptr) { struct amdgpu_bo *bo_ptr; int r; - bp->flags = bp->flags & ~AMDGPU_GEM_CREATE_SHADOW; - bp->bo_ptr_size = sizeof(struct amdgpu_bo_user); - r = amdgpu_bo_do_create(adev, bp, &bo_ptr); + /* bo_ptr_size will be determined by the caller and it depends on + * num of amdgpu_vm_pt entries. + */ + BUG_ON(bp->bo_ptr_size < sizeof(struct amdgpu_bo_vm)); + bp->destroy = &amdgpu_bo_vm_destroy; + r = amdgpu_bo_create(adev, bp, &bo_ptr); if (r) return r; - *ubo_ptr = to_amdgpu_bo_user(bo_ptr); + *vmbo_ptr = to_amdgpu_bo_vm(bo_ptr); + INIT_LIST_HEAD(&(*vmbo_ptr)->shadow_list); return r; } + /** * amdgpu_bo_validate - validate an &amdgpu_bo buffer object * @bo: pointer to the buffer object @@ -762,6 +729,22 @@ retry: } /** + * amdgpu_bo_add_to_shadow_list - add a BO to the shadow list + * + * @vmbo: BO that will be inserted into the shadow list + * + * Insert a BO to the shadow list. + */ +void amdgpu_bo_add_to_shadow_list(struct amdgpu_bo_vm *vmbo) +{ + struct amdgpu_device *adev = amdgpu_ttm_adev(vmbo->bo.tbo.bdev); + + mutex_lock(&adev->shadow_list_lock); + list_add_tail(&vmbo->shadow_list, &adev->shadow_list); + mutex_unlock(&adev->shadow_list_lock); +} + +/** * amdgpu_bo_restore_shadow - restore an &amdgpu_bo shadow * * @shadow: &amdgpu_bo shadow to be restored @@ -815,12 +798,12 @@ int amdgpu_bo_kmap(struct amdgpu_bo *bo, void **ptr) return 0; } - r = dma_resv_wait_timeout_rcu(bo->tbo.base.resv, false, false, - MAX_SCHEDULE_TIMEOUT); + r = dma_resv_wait_timeout(bo->tbo.base.resv, false, false, + MAX_SCHEDULE_TIMEOUT); if (r < 0) return r; - r = ttm_bo_kmap(&bo->tbo, 0, bo->tbo.mem.num_pages, &bo->kmap); + r = ttm_bo_kmap(&bo->tbo, 0, bo->tbo.resource->num_pages, &bo->kmap); if (r) return r; @@ -930,21 +913,16 @@ int amdgpu_bo_pin_restricted(struct amdgpu_bo *bo, u32 domain, return -EINVAL; /* A shared bo cannot be migrated to VRAM */ - if (bo->prime_shared_count || bo->tbo.base.import_attach) { + if (bo->tbo.base.import_attach) { if (domain & AMDGPU_GEM_DOMAIN_GTT) domain = AMDGPU_GEM_DOMAIN_GTT; else return -EINVAL; } - /* This assumes only APU display buffers are pinned with (VRAM|GTT). - * See function amdgpu_display_supported_domains() - */ - domain = amdgpu_bo_get_preferred_pin_domain(adev, domain); - if (bo->tbo.pin_count) { - uint32_t mem_type = bo->tbo.mem.mem_type; - uint32_t mem_flags = bo->tbo.mem.placement; + uint32_t mem_type = bo->tbo.resource->mem_type; + uint32_t mem_flags = bo->tbo.resource->placement; if (!(domain & amdgpu_mem_type_to_domain(mem_type))) return -EINVAL; @@ -966,6 +944,11 @@ int amdgpu_bo_pin_restricted(struct amdgpu_bo *bo, u32 domain, return 0; } + /* This assumes only APU display buffers are pinned with (VRAM|GTT). + * See function amdgpu_display_supported_domains() + */ + domain = amdgpu_bo_get_preferred_domain(adev, domain); + if (bo->tbo.base.import_attach) dma_buf_pin(bo->tbo.base.import_attach); @@ -994,7 +977,7 @@ int amdgpu_bo_pin_restricted(struct amdgpu_bo *bo, u32 domain, ttm_bo_pin(&bo->tbo); - domain = amdgpu_mem_type_to_domain(bo->tbo.mem.mem_type); + domain = amdgpu_mem_type_to_domain(bo->tbo.resource->mem_type); if (domain == AMDGPU_GEM_DOMAIN_VRAM) { atomic64_add(amdgpu_bo_size(bo), &adev->vram_pin_size); atomic64_add(amdgpu_vram_mgr_bo_visible_size(bo), @@ -1037,14 +1020,22 @@ int amdgpu_bo_pin(struct amdgpu_bo *bo, u32 domain) */ void amdgpu_bo_unpin(struct amdgpu_bo *bo) { + struct amdgpu_device *adev = amdgpu_ttm_adev(bo->tbo.bdev); + ttm_bo_unpin(&bo->tbo); if (bo->tbo.pin_count) return; - amdgpu_bo_subtract_pin_size(bo); - if (bo->tbo.base.import_attach) dma_buf_unpin(bo->tbo.base.import_attach); + + if (bo->tbo.resource->mem_type == TTM_PL_VRAM) { + atomic64_sub(amdgpu_bo_size(bo), &adev->vram_pin_size); + atomic64_sub(amdgpu_vram_mgr_bo_visible_size(bo), + &adev->visible_pin_size); + } else if (bo->tbo.resource->mem_type == TTM_PL_TT) { + atomic64_sub(amdgpu_bo_size(bo), &adev->gart_pin_size); + } } /** @@ -1123,10 +1114,6 @@ int amdgpu_bo_init(struct amdgpu_device *adev) void amdgpu_bo_fini(struct amdgpu_device *adev) { amdgpu_ttm_fini(adev); - if (!adev->gmc.xgmi.connected_to_cpu) { - arch_phys_wc_del(adev->gmc.vram_mtrr); - arch_io_free_memtype_wc(adev->gmc.aper_base, adev->gmc.aper_size); - } } /** @@ -1246,6 +1233,9 @@ int amdgpu_bo_get_metadata(struct amdgpu_bo *bo, void *buffer, BUG_ON(bo->tbo.type == ttm_bo_type_kernel); ubo = to_amdgpu_bo_user(bo); + if (metadata_size) + *metadata_size = ubo->metadata_size; + if (buffer) { if (buffer_size < ubo->metadata_size) return -EINVAL; @@ -1254,8 +1244,6 @@ int amdgpu_bo_get_metadata(struct amdgpu_bo *bo, void *buffer, memcpy(buffer, ubo->metadata, ubo->metadata_size); } - if (metadata_size) - *metadata_size = ubo->metadata_size; if (flags) *flags = ubo->metadata_flags; @@ -1278,7 +1266,7 @@ void amdgpu_bo_move_notify(struct ttm_buffer_object *bo, { struct amdgpu_device *adev = amdgpu_ttm_adev(bo->bdev); struct amdgpu_bo *abo; - struct ttm_resource *old_mem = &bo->mem; + struct ttm_resource *old_mem = bo->resource; if (!amdgpu_bo_is_amdgpu_bo(bo)) return; @@ -1289,7 +1277,7 @@ void amdgpu_bo_move_notify(struct ttm_buffer_object *bo, amdgpu_bo_kunmap(abo); if (abo->tbo.base.dma_buf && !abo->tbo.base.import_attach && - bo->mem.mem_type != TTM_PL_SYSTEM) + bo->resource->mem_type != TTM_PL_SYSTEM) dma_buf_move_notify(abo->tbo.base.dma_buf); /* remember the eviction */ @@ -1304,6 +1292,26 @@ void amdgpu_bo_move_notify(struct ttm_buffer_object *bo, trace_amdgpu_bo_move(abo, new_mem->mem_type, old_mem->mem_type); } +void amdgpu_bo_get_memory(struct amdgpu_bo *bo, uint64_t *vram_mem, + uint64_t *gtt_mem, uint64_t *cpu_mem) +{ + unsigned int domain; + + domain = amdgpu_mem_type_to_domain(bo->tbo.resource->mem_type); + switch (domain) { + case AMDGPU_GEM_DOMAIN_VRAM: + *vram_mem += amdgpu_bo_size(bo); + break; + case AMDGPU_GEM_DOMAIN_GTT: + *gtt_mem += amdgpu_bo_size(bo); + break; + case AMDGPU_GEM_DOMAIN_CPU: + default: + *cpu_mem += amdgpu_bo_size(bo); + break; + } +} + /** * amdgpu_bo_release_notify - notification about a BO being released * @bo: pointer to a buffer object @@ -1331,7 +1339,7 @@ void amdgpu_bo_release_notify(struct ttm_buffer_object *bo) if (bo->base.resv == &bo->base._resv) amdgpu_amdkfd_remove_fence_on_pt_pd_bos(abo); - if (bo->mem.mem_type != TTM_PL_VRAM || !bo->mem.mm_node || + if (bo->resource->mem_type != TTM_PL_VRAM || !(abo->flags & AMDGPU_GEM_CREATE_VRAM_WIPE_ON_RELEASE)) return; @@ -1362,18 +1370,17 @@ vm_fault_t amdgpu_bo_fault_reserve_notify(struct ttm_buffer_object *bo) struct amdgpu_device *adev = amdgpu_ttm_adev(bo->bdev); struct ttm_operation_ctx ctx = { false, false }; struct amdgpu_bo *abo = ttm_to_amdgpu_bo(bo); - unsigned long offset, size; + unsigned long offset; int r; /* Remember that this BO was accessed by the CPU */ abo->flags |= AMDGPU_GEM_CREATE_CPU_ACCESS_REQUIRED; - if (bo->mem.mem_type != TTM_PL_VRAM) + if (bo->resource->mem_type != TTM_PL_VRAM) return 0; - size = bo->mem.num_pages << PAGE_SHIFT; - offset = bo->mem.start << PAGE_SHIFT; - if ((offset + size) <= adev->gmc.visible_vram_size) + offset = bo->resource->start << PAGE_SHIFT; + if ((offset + bo->base.size) <= adev->gmc.visible_vram_size) return 0; /* Can't move a pinned BO to visible VRAM */ @@ -1395,10 +1402,10 @@ vm_fault_t amdgpu_bo_fault_reserve_notify(struct ttm_buffer_object *bo) else if (unlikely(r)) return VM_FAULT_SIGBUS; - offset = bo->mem.start << PAGE_SHIFT; + offset = bo->resource->start << PAGE_SHIFT; /* this should never happen */ - if (bo->mem.mem_type == TTM_PL_VRAM && - (offset + size) > adev->gmc.visible_vram_size) + if (bo->resource->mem_type == TTM_PL_VRAM && + (offset + bo->base.size) > adev->gmc.visible_vram_size) return VM_FAULT_SIGBUS; ttm_bo_move_to_lru_tail_unlocked(bo); @@ -1482,11 +1489,11 @@ int amdgpu_bo_sync_wait(struct amdgpu_bo *bo, void *owner, bool intr) */ u64 amdgpu_bo_gpu_offset(struct amdgpu_bo *bo) { - WARN_ON_ONCE(bo->tbo.mem.mem_type == TTM_PL_SYSTEM); + WARN_ON_ONCE(bo->tbo.resource->mem_type == TTM_PL_SYSTEM); WARN_ON_ONCE(!dma_resv_is_locked(bo->tbo.base.resv) && !bo->tbo.pin_count && bo->tbo.type != ttm_bo_type_kernel); - WARN_ON_ONCE(bo->tbo.mem.start == AMDGPU_BO_INVALID_OFFSET); - WARN_ON_ONCE(bo->tbo.mem.mem_type == TTM_PL_VRAM && + WARN_ON_ONCE(bo->tbo.resource->start == AMDGPU_BO_INVALID_OFFSET); + WARN_ON_ONCE(bo->tbo.resource->mem_type == TTM_PL_VRAM && !(bo->flags & AMDGPU_GEM_CREATE_VRAM_CONTIGUOUS)); return amdgpu_bo_gpu_offset_no_check(bo); @@ -1504,21 +1511,21 @@ u64 amdgpu_bo_gpu_offset_no_check(struct amdgpu_bo *bo) struct amdgpu_device *adev = amdgpu_ttm_adev(bo->tbo.bdev); uint64_t offset; - offset = (bo->tbo.mem.start << PAGE_SHIFT) + - amdgpu_ttm_domain_start(adev, bo->tbo.mem.mem_type); + offset = (bo->tbo.resource->start << PAGE_SHIFT) + + amdgpu_ttm_domain_start(adev, bo->tbo.resource->mem_type); return amdgpu_gmc_sign_extend(offset); } /** - * amdgpu_bo_get_preferred_pin_domain - get preferred domain for scanout + * amdgpu_bo_get_preferred_domain - get preferred domain * @adev: amdgpu device object * @domain: allowed :ref:`memory domains <amdgpu_memory_domains>` * * Returns: - * Which of the allowed domains is preferred for pinning the BO for scanout. + * Which of the allowed domains is preferred for allocating the BO. */ -uint32_t amdgpu_bo_get_preferred_pin_domain(struct amdgpu_device *adev, +uint32_t amdgpu_bo_get_preferred_domain(struct amdgpu_device *adev, uint32_t domain) { if (domain == (AMDGPU_GEM_DOMAIN_VRAM | AMDGPU_GEM_DOMAIN_GTT)) { @@ -1558,7 +1565,7 @@ u64 amdgpu_bo_print_info(int id, struct amdgpu_bo *bo, struct seq_file *m) unsigned int pin_count; u64 size; - domain = amdgpu_mem_type_to_domain(bo->tbo.mem.mem_type); + domain = amdgpu_mem_type_to_domain(bo->tbo.resource->mem_type); switch (domain) { case AMDGPU_GEM_DOMAIN_VRAM: placement = "VRAM"; @@ -1592,7 +1599,6 @@ u64 amdgpu_bo_print_info(int id, struct amdgpu_bo *bo, struct seq_file *m) amdgpu_bo_print_flag(m, bo, NO_CPU_ACCESS); amdgpu_bo_print_flag(m, bo, CPU_GTT_USWC); amdgpu_bo_print_flag(m, bo, VRAM_CLEARED); - amdgpu_bo_print_flag(m, bo, SHADOW); amdgpu_bo_print_flag(m, bo, VRAM_CONTIGUOUS); amdgpu_bo_print_flag(m, bo, VM_ALWAYS_VALID); amdgpu_bo_print_flag(m, bo, EXPLICIT_SYNC); diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_object.h b/drivers/gpu/drm/amd/amdgpu/amdgpu_object.h index 2d1fefbe1e99..9d6c001c15f8 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_object.h +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_object.h @@ -30,6 +30,8 @@ #include <drm/amdgpu_drm.h> #include "amdgpu.h" +#include "amdgpu_res_cursor.h" + #ifdef CONFIG_MMU_NOTIFIER #include <linux/mmu_notifier.h> #endif @@ -37,7 +39,12 @@ #define AMDGPU_BO_INVALID_OFFSET LONG_MAX #define AMDGPU_BO_MAX_PLACEMENTS 3 +/* BO flag to indicate a KFD userptr BO */ +#define AMDGPU_AMDKFD_CREATE_USERPTR_BO (1ULL << 63) +#define AMDGPU_AMDKFD_CREATE_SVM_BO (1ULL << 62) + #define to_amdgpu_bo_user(abo) container_of((abo), struct amdgpu_bo_user, bo) +#define to_amdgpu_bo_vm(abo) container_of((abo), struct amdgpu_bo_vm, bo) struct amdgpu_bo_param { unsigned long size; @@ -48,7 +55,8 @@ struct amdgpu_bo_param { u64 flags; enum ttm_bo_type type; bool no_wait_gpu; - struct dma_resv *resv; + struct dma_resv *resv; + void (*destroy)(struct ttm_buffer_object *bo); }; /* bo virtual addresses in a vm */ @@ -92,21 +100,14 @@ struct amdgpu_bo { struct ttm_buffer_object tbo; struct ttm_bo_kmap_obj kmap; u64 flags; - unsigned prime_shared_count; /* per VM structure for page tables and with virtual addresses */ struct amdgpu_vm_bo_base *vm_bo; /* Constant after initialization */ struct amdgpu_bo *parent; - struct amdgpu_bo *shadow; - - #ifdef CONFIG_MMU_NOTIFIER struct mmu_interval_notifier notifier; #endif - - struct list_head shadow_list; - struct kgd_mem *kfd_bo; }; @@ -119,6 +120,13 @@ struct amdgpu_bo_user { }; +struct amdgpu_bo_vm { + struct amdgpu_bo bo; + struct amdgpu_bo *shadow; + struct list_head shadow_list; + struct amdgpu_vm_bo_base entries[]; +}; + static inline struct amdgpu_bo *ttm_to_amdgpu_bo(struct ttm_buffer_object *tbo) { return container_of(tbo, struct amdgpu_bo, tbo); @@ -191,7 +199,7 @@ static inline unsigned amdgpu_bo_ngpu_pages(struct amdgpu_bo *bo) static inline unsigned amdgpu_bo_gpu_page_alignment(struct amdgpu_bo *bo) { - return (bo->tbo.mem.page_alignment << PAGE_SHIFT) / AMDGPU_GPU_PAGE_SIZE; + return (bo->tbo.page_alignment << PAGE_SHIFT) / AMDGPU_GPU_PAGE_SIZE; } /** @@ -211,18 +219,19 @@ static inline u64 amdgpu_bo_mmap_offset(struct amdgpu_bo *bo) static inline bool amdgpu_bo_in_cpu_visible_vram(struct amdgpu_bo *bo) { struct amdgpu_device *adev = amdgpu_ttm_adev(bo->tbo.bdev); - unsigned fpfn = adev->gmc.visible_vram_size >> PAGE_SHIFT; - struct drm_mm_node *node = bo->tbo.mem.mm_node; - unsigned long pages_left; + struct amdgpu_res_cursor cursor; - if (bo->tbo.mem.mem_type != TTM_PL_VRAM) + if (bo->tbo.resource->mem_type != TTM_PL_VRAM) return false; - for (pages_left = bo->tbo.mem.num_pages; pages_left; - pages_left -= node->size, node++) - if (node->start < fpfn) + amdgpu_res_first(bo->tbo.resource, 0, amdgpu_bo_size(bo), &cursor); + while (cursor.remaining) { + if (cursor.start < adev->gmc.visible_vram_size) return true; + amdgpu_res_next(&cursor, cursor.size); + } + return false; } @@ -245,6 +254,22 @@ static inline bool amdgpu_bo_encrypted(struct amdgpu_bo *bo) return bo->flags & AMDGPU_GEM_CREATE_ENCRYPTED; } +/** + * amdgpu_bo_shadowed - check if the BO is shadowed + * + * @bo: BO to be tested. + * + * Returns: + * NULL if not shadowed or else return a BO pointer. + */ +static inline struct amdgpu_bo *amdgpu_bo_shadowed(struct amdgpu_bo *bo) +{ + if (bo->tbo.type == ttm_bo_type_kernel) + return to_amdgpu_bo_vm(bo)->shadow; + + return NULL; +} + bool amdgpu_bo_is_amdgpu_bo(struct ttm_buffer_object *bo); void amdgpu_bo_placement_from_domain(struct amdgpu_bo *abo, u32 domain); @@ -265,6 +290,9 @@ int amdgpu_bo_create_kernel_at(struct amdgpu_device *adev, int amdgpu_bo_create_user(struct amdgpu_device *adev, struct amdgpu_bo_param *bp, struct amdgpu_bo_user **ubo_ptr); +int amdgpu_bo_create_vm(struct amdgpu_device *adev, + struct amdgpu_bo_param *bp, + struct amdgpu_bo_vm **ubo_ptr); void amdgpu_bo_free_kernel(struct amdgpu_bo **bo, u64 *gpu_addr, void **cpu_addr); int amdgpu_bo_kmap(struct amdgpu_bo *bo, void **ptr); @@ -300,9 +328,12 @@ int amdgpu_bo_sync_wait(struct amdgpu_bo *bo, void *owner, bool intr); u64 amdgpu_bo_gpu_offset(struct amdgpu_bo *bo); u64 amdgpu_bo_gpu_offset_no_check(struct amdgpu_bo *bo); int amdgpu_bo_validate(struct amdgpu_bo *bo); +void amdgpu_bo_get_memory(struct amdgpu_bo *bo, uint64_t *vram_mem, + uint64_t *gtt_mem, uint64_t *cpu_mem); +void amdgpu_bo_add_to_shadow_list(struct amdgpu_bo_vm *vmbo); int amdgpu_bo_restore_shadow(struct amdgpu_bo *shadow, struct dma_fence **fence); -uint32_t amdgpu_bo_get_preferred_pin_domain(struct amdgpu_device *adev, +uint32_t amdgpu_bo_get_preferred_domain(struct amdgpu_device *adev, uint32_t domain); /* diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_pll.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_pll.c index f2e20666c9c1..4eaec446b49d 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_pll.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_pll.c @@ -80,12 +80,17 @@ static void amdgpu_pll_reduce_ratio(unsigned *nom, unsigned *den, * Calculate feedback and reference divider for a given post divider. Makes * sure we stay within the limits. */ -static void amdgpu_pll_get_fb_ref_div(unsigned nom, unsigned den, unsigned post_div, - unsigned fb_div_max, unsigned ref_div_max, - unsigned *fb_div, unsigned *ref_div) +static void amdgpu_pll_get_fb_ref_div(struct amdgpu_device *adev, unsigned int nom, + unsigned int den, unsigned int post_div, + unsigned int fb_div_max, unsigned int ref_div_max, + unsigned int *fb_div, unsigned int *ref_div) { + /* limit reference * post divider to a maximum */ - ref_div_max = min(128 / post_div, ref_div_max); + if (adev->family == AMDGPU_FAMILY_SI) + ref_div_max = min(100 / post_div, ref_div_max); + else + ref_div_max = min(128 / post_div, ref_div_max); /* get matching reference and feedback divider */ *ref_div = min(max(DIV_ROUND_CLOSEST(den, post_div), 1u), ref_div_max); @@ -112,7 +117,8 @@ static void amdgpu_pll_get_fb_ref_div(unsigned nom, unsigned den, unsigned post_ * Try to calculate the PLL parameters to generate the given frequency: * dot_clock = (ref_freq * feedback_div) / (ref_div * post_div) */ -void amdgpu_pll_compute(struct amdgpu_pll *pll, +void amdgpu_pll_compute(struct amdgpu_device *adev, + struct amdgpu_pll *pll, u32 freq, u32 *dot_clock_p, u32 *fb_div_p, @@ -199,7 +205,7 @@ void amdgpu_pll_compute(struct amdgpu_pll *pll, for (post_div = post_div_min; post_div <= post_div_max; ++post_div) { unsigned diff; - amdgpu_pll_get_fb_ref_div(nom, den, post_div, fb_div_max, + amdgpu_pll_get_fb_ref_div(adev, nom, den, post_div, fb_div_max, ref_div_max, &fb_div, &ref_div); diff = abs(target_clock - (pll->reference_freq * fb_div) / (ref_div * post_div)); @@ -214,7 +220,7 @@ void amdgpu_pll_compute(struct amdgpu_pll *pll, post_div = post_div_best; /* get the feedback and reference divider for the optimal value */ - amdgpu_pll_get_fb_ref_div(nom, den, post_div, fb_div_max, ref_div_max, + amdgpu_pll_get_fb_ref_div(adev, nom, den, post_div, fb_div_max, ref_div_max, &fb_div, &ref_div); /* reduce the numbers to a simpler ratio once more */ diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_pll.h b/drivers/gpu/drm/amd/amdgpu/amdgpu_pll.h index db6136f68b82..44a583d6c9b4 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_pll.h +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_pll.h @@ -24,7 +24,8 @@ #ifndef __AMDGPU_PLL_H__ #define __AMDGPU_PLL_H__ -void amdgpu_pll_compute(struct amdgpu_pll *pll, +void amdgpu_pll_compute(struct amdgpu_device *adev, + struct amdgpu_pll *pll, u32 freq, u32 *dot_clock_p, u32 *fb_div_p, diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_preempt_mgr.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_preempt_mgr.c new file mode 100644 index 000000000000..d02c8637f909 --- /dev/null +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_preempt_mgr.c @@ -0,0 +1,195 @@ +// SPDX-License-Identifier: GPL-2.0 OR MIT +/* + * Copyright 2016-2021 Advanced Micro Devices, Inc. + * + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and associated documentation files (the "Software"), + * to deal in the Software without restriction, including without limitation + * the rights to use, copy, modify, merge, publish, distribute, sublicense, + * and/or sell copies of the Software, and to permit persons to whom the + * Software is furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included in + * all copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL + * THE COPYRIGHT HOLDER(S) OR AUTHOR(S) BE LIABLE FOR ANY CLAIM, DAMAGES OR + * OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, + * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR + * OTHER DEALINGS IN THE SOFTWARE. + * + * Authors: Christian König, Felix Kuehling + */ + +#include "amdgpu.h" + +static inline struct amdgpu_preempt_mgr * +to_preempt_mgr(struct ttm_resource_manager *man) +{ + return container_of(man, struct amdgpu_preempt_mgr, manager); +} + +/** + * DOC: mem_info_preempt_used + * + * The amdgpu driver provides a sysfs API for reporting current total amount of + * used preemptible memory. + * The file mem_info_preempt_used is used for this, and returns the current + * used size of the preemptible block, in bytes + */ +static ssize_t mem_info_preempt_used_show(struct device *dev, + struct device_attribute *attr, + char *buf) +{ + struct drm_device *ddev = dev_get_drvdata(dev); + struct amdgpu_device *adev = drm_to_adev(ddev); + struct ttm_resource_manager *man; + + man = ttm_manager_type(&adev->mman.bdev, AMDGPU_PL_PREEMPT); + return sysfs_emit(buf, "%llu\n", amdgpu_preempt_mgr_usage(man)); +} + +static DEVICE_ATTR_RO(mem_info_preempt_used); + +/** + * amdgpu_preempt_mgr_new - allocate a new node + * + * @man: TTM memory type manager + * @tbo: TTM BO we need this range for + * @place: placement flags and restrictions + * @mem: the resulting mem object + * + * Dummy, just count the space used without allocating resources or any limit. + */ +static int amdgpu_preempt_mgr_new(struct ttm_resource_manager *man, + struct ttm_buffer_object *tbo, + const struct ttm_place *place, + struct ttm_resource **res) +{ + struct amdgpu_preempt_mgr *mgr = to_preempt_mgr(man); + + *res = kzalloc(sizeof(**res), GFP_KERNEL); + if (!*res) + return -ENOMEM; + + ttm_resource_init(tbo, place, *res); + (*res)->start = AMDGPU_BO_INVALID_OFFSET; + + atomic64_add((*res)->num_pages, &mgr->used); + return 0; +} + +/** + * amdgpu_preempt_mgr_del - free ranges + * + * @man: TTM memory type manager + * @mem: TTM memory object + * + * Free the allocated GTT again. + */ +static void amdgpu_preempt_mgr_del(struct ttm_resource_manager *man, + struct ttm_resource *res) +{ + struct amdgpu_preempt_mgr *mgr = to_preempt_mgr(man); + + atomic64_sub(res->num_pages, &mgr->used); + kfree(res); +} + +/** + * amdgpu_preempt_mgr_usage - return usage of PREEMPT domain + * + * @man: TTM memory type manager + * + * Return how many bytes are used in the GTT domain + */ +uint64_t amdgpu_preempt_mgr_usage(struct ttm_resource_manager *man) +{ + struct amdgpu_preempt_mgr *mgr = to_preempt_mgr(man); + s64 result = atomic64_read(&mgr->used); + + return (result > 0 ? result : 0) * PAGE_SIZE; +} + +/** + * amdgpu_preempt_mgr_debug - dump VRAM table + * + * @man: TTM memory type manager + * @printer: DRM printer to use + * + * Dump the table content using printk. + */ +static void amdgpu_preempt_mgr_debug(struct ttm_resource_manager *man, + struct drm_printer *printer) +{ + struct amdgpu_preempt_mgr *mgr = to_preempt_mgr(man); + + drm_printf(printer, "man size:%llu pages, preempt used:%lld pages\n", + man->size, (u64)atomic64_read(&mgr->used)); +} + +static const struct ttm_resource_manager_func amdgpu_preempt_mgr_func = { + .alloc = amdgpu_preempt_mgr_new, + .free = amdgpu_preempt_mgr_del, + .debug = amdgpu_preempt_mgr_debug +}; + +/** + * amdgpu_preempt_mgr_init - init PREEMPT manager and DRM MM + * + * @adev: amdgpu_device pointer + * + * Allocate and initialize the GTT manager. + */ +int amdgpu_preempt_mgr_init(struct amdgpu_device *adev) +{ + struct amdgpu_preempt_mgr *mgr = &adev->mman.preempt_mgr; + struct ttm_resource_manager *man = &mgr->manager; + int ret; + + man->use_tt = true; + man->func = &amdgpu_preempt_mgr_func; + + ttm_resource_manager_init(man, (1 << 30)); + + atomic64_set(&mgr->used, 0); + + ret = device_create_file(adev->dev, &dev_attr_mem_info_preempt_used); + if (ret) { + DRM_ERROR("Failed to create device file mem_info_preempt_used\n"); + return ret; + } + + ttm_set_driver_manager(&adev->mman.bdev, AMDGPU_PL_PREEMPT, + &mgr->manager); + ttm_resource_manager_set_used(man, true); + return 0; +} + +/** + * amdgpu_preempt_mgr_fini - free and destroy GTT manager + * + * @adev: amdgpu_device pointer + * + * Destroy and free the GTT manager, returns -EBUSY if ranges are still + * allocated inside it. + */ +void amdgpu_preempt_mgr_fini(struct amdgpu_device *adev) +{ + struct amdgpu_preempt_mgr *mgr = &adev->mman.preempt_mgr; + struct ttm_resource_manager *man = &mgr->manager; + int ret; + + ttm_resource_manager_set_used(man, false); + + ret = ttm_resource_manager_evict_all(&adev->mman.bdev, man); + if (ret) + return; + + device_remove_file(adev->dev, &dev_attr_mem_info_preempt_used); + + ttm_resource_manager_cleanup(man); + ttm_set_driver_manager(&adev->mman.bdev, AMDGPU_PL_PREEMPT, NULL); +} diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_psp.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_psp.c index a09483beb968..9b41cb8c3de5 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_psp.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_psp.c @@ -24,20 +24,23 @@ */ #include <linux/firmware.h> -#include <linux/dma-mapping.h> +#include <drm/drm_drv.h> #include "amdgpu.h" #include "amdgpu_psp.h" #include "amdgpu_ucode.h" +#include "amdgpu_xgmi.h" #include "soc15_common.h" #include "psp_v3_1.h" #include "psp_v10_0.h" #include "psp_v11_0.h" +#include "psp_v11_0_8.h" #include "psp_v12_0.h" #include "psp_v13_0.h" #include "amdgpu_ras.h" #include "amdgpu_securedisplay.h" +#include "amdgpu_atomfirmware.h" static int psp_sysfs_init(struct amdgpu_device *adev); static void psp_sysfs_fini(struct amdgpu_device *adev); @@ -104,6 +107,7 @@ static int psp_early_init(void *handle) case CHIP_NAVY_FLOUNDER: case CHIP_VANGOGH: case CHIP_DIMGREY_CAVEFISH: + case CHIP_BEIGE_GOBY: psp_v11_0_set_psp_funcs(psp); psp->autoload_supported = true; break; @@ -113,6 +117,16 @@ static int psp_early_init(void *handle) case CHIP_ALDEBARAN: psp_v13_0_set_psp_funcs(psp); break; + case CHIP_YELLOW_CARP: + psp_v13_0_set_psp_funcs(psp); + psp->autoload_supported = true; + break; + case CHIP_CYAN_SKILLFISH: + if (adev->apu_flags & AMD_APU_IS_CYAN_SKILLFISH2) { + psp_v11_0_8_set_psp_funcs(psp); + psp->autoload_supported = false; + } + break; default: return -EINVAL; } @@ -162,11 +176,87 @@ Err_out: return ret; } +/* + * Helper funciton to query psp runtime database entry + * + * @adev: amdgpu_device pointer + * @entry_type: the type of psp runtime database entry + * @db_entry: runtime database entry pointer + * + * Return false if runtime database doesn't exit or entry is invalid + * or true if the specific database entry is found, and copy to @db_entry + */ +static bool psp_get_runtime_db_entry(struct amdgpu_device *adev, + enum psp_runtime_entry_type entry_type, + void *db_entry) +{ + uint64_t db_header_pos, db_dir_pos; + struct psp_runtime_data_header db_header = {0}; + struct psp_runtime_data_directory db_dir = {0}; + bool ret = false; + int i; + + db_header_pos = adev->gmc.mc_vram_size - PSP_RUNTIME_DB_OFFSET; + db_dir_pos = db_header_pos + sizeof(struct psp_runtime_data_header); + + /* read runtime db header from vram */ + amdgpu_device_vram_access(adev, db_header_pos, (uint32_t *)&db_header, + sizeof(struct psp_runtime_data_header), false); + + if (db_header.cookie != PSP_RUNTIME_DB_COOKIE_ID) { + /* runtime db doesn't exist, exit */ + dev_warn(adev->dev, "PSP runtime database doesn't exist\n"); + return false; + } + + /* read runtime database entry from vram */ + amdgpu_device_vram_access(adev, db_dir_pos, (uint32_t *)&db_dir, + sizeof(struct psp_runtime_data_directory), false); + + if (db_dir.entry_count >= PSP_RUNTIME_DB_DIAG_ENTRY_MAX_COUNT) { + /* invalid db entry count, exit */ + dev_warn(adev->dev, "Invalid PSP runtime database entry count\n"); + return false; + } + + /* look up for requested entry type */ + for (i = 0; i < db_dir.entry_count && !ret; i++) { + if (db_dir.entry_list[i].entry_type == entry_type) { + switch (entry_type) { + case PSP_RUNTIME_ENTRY_TYPE_BOOT_CONFIG: + if (db_dir.entry_list[i].size < sizeof(struct psp_runtime_boot_cfg_entry)) { + /* invalid db entry size */ + dev_warn(adev->dev, "Invalid PSP runtime database entry size\n"); + return false; + } + /* read runtime database entry */ + amdgpu_device_vram_access(adev, db_header_pos + db_dir.entry_list[i].offset, + (uint32_t *)db_entry, sizeof(struct psp_runtime_boot_cfg_entry), false); + ret = true; + break; + default: + ret = false; + break; + } + } + } + + return ret; +} + static int psp_sw_init(void *handle) { struct amdgpu_device *adev = (struct amdgpu_device *)handle; struct psp_context *psp = &adev->psp; int ret; + struct psp_runtime_boot_cfg_entry boot_cfg_entry; + struct psp_memory_training_context *mem_training_ctx = &psp->mem_train_ctx; + + psp->cmd = kzalloc(sizeof(struct psp_gfx_cmd_resp), GFP_KERNEL); + if (!psp->cmd) { + DRM_ERROR("Failed to allocate memory to command buffer!\n"); + ret = -ENOMEM; + } if (!amdgpu_sriov_vf(adev)) { ret = psp_init_microcode(psp); @@ -174,17 +264,47 @@ static int psp_sw_init(void *handle) DRM_ERROR("Failed to load psp firmware!\n"); return ret; } + } else if (amdgpu_sriov_vf(adev) && adev->asic_type == CHIP_ALDEBARAN) { + ret = psp_init_ta_microcode(psp, "aldebaran"); + if (ret) { + DRM_ERROR("Failed to initialize ta microcode!\n"); + return ret; + } } - ret = psp_memory_training_init(psp); - if (ret) { - DRM_ERROR("Failed to initialize memory training!\n"); - return ret; + memset(&boot_cfg_entry, 0, sizeof(boot_cfg_entry)); + if (psp_get_runtime_db_entry(adev, + PSP_RUNTIME_ENTRY_TYPE_BOOT_CONFIG, + &boot_cfg_entry)) { + psp->boot_cfg_bitmask = boot_cfg_entry.boot_cfg_bitmask; + if ((psp->boot_cfg_bitmask) & + BOOT_CFG_FEATURE_TWO_STAGE_DRAM_TRAINING) { + /* If psp runtime database exists, then + * only enable two stage memory training + * when TWO_STAGE_DRAM_TRAINING bit is set + * in runtime database */ + mem_training_ctx->enable_mem_training = true; + } + + } else { + /* If psp runtime database doesn't exist or + * is invalid, force enable two stage memory + * training */ + mem_training_ctx->enable_mem_training = true; } - ret = psp_mem_training(psp, PSP_MEM_TRAIN_COLD_BOOT); - if (ret) { - DRM_ERROR("Failed to process memory training!\n"); - return ret; + + if (mem_training_ctx->enable_mem_training) { + ret = psp_memory_training_init(psp); + if (ret) { + DRM_ERROR("Failed to initialize memory training!\n"); + return ret; + } + + ret = psp_mem_training(psp, PSP_MEM_TRAIN_COLD_BOOT); + if (ret) { + DRM_ERROR("Failed to process memory training!\n"); + return ret; + } } if (adev->asic_type == CHIP_NAVI10 || adev->asic_type == CHIP_SIENNA_CICHLID) { @@ -200,25 +320,30 @@ static int psp_sw_init(void *handle) static int psp_sw_fini(void *handle) { struct amdgpu_device *adev = (struct amdgpu_device *)handle; + struct psp_context *psp = &adev->psp; + struct psp_gfx_cmd_resp *cmd = psp->cmd; - psp_memory_training_fini(&adev->psp); - if (adev->psp.sos_fw) { - release_firmware(adev->psp.sos_fw); - adev->psp.sos_fw = NULL; + psp_memory_training_fini(psp); + if (psp->sos_fw) { + release_firmware(psp->sos_fw); + psp->sos_fw = NULL; } - if (adev->psp.asd_fw) { - release_firmware(adev->psp.asd_fw); - adev->psp.asd_fw = NULL; + if (psp->asd_fw) { + release_firmware(psp->asd_fw); + psp->asd_fw = NULL; } - if (adev->psp.ta_fw) { - release_firmware(adev->psp.ta_fw); - adev->psp.ta_fw = NULL; + if (psp->ta_fw) { + release_firmware(psp->ta_fw); + psp->ta_fw = NULL; } if (adev->asic_type == CHIP_NAVI10 || adev->asic_type == CHIP_SIENNA_CICHLID) psp_sysfs_fini(adev); + kfree(cmd); + cmd = NULL; + return 0; } @@ -229,7 +354,7 @@ int psp_wait_for(struct psp_context *psp, uint32_t reg_index, int i; struct amdgpu_device *adev = psp->adev; - if (psp->adev->in_pci_err_recovery) + if (psp->adev->no_hw_access) return 0; for (i = 0; i < adev->usec_timeout; i++) { @@ -247,21 +372,60 @@ int psp_wait_for(struct psp_context *psp, uint32_t reg_index, return -ETIME; } +static const char *psp_gfx_cmd_name(enum psp_gfx_cmd_id cmd_id) +{ + switch (cmd_id) { + case GFX_CMD_ID_LOAD_TA: + return "LOAD_TA"; + case GFX_CMD_ID_UNLOAD_TA: + return "UNLOAD_TA"; + case GFX_CMD_ID_INVOKE_CMD: + return "INVOKE_CMD"; + case GFX_CMD_ID_LOAD_ASD: + return "LOAD_ASD"; + case GFX_CMD_ID_SETUP_TMR: + return "SETUP_TMR"; + case GFX_CMD_ID_LOAD_IP_FW: + return "LOAD_IP_FW"; + case GFX_CMD_ID_DESTROY_TMR: + return "DESTROY_TMR"; + case GFX_CMD_ID_SAVE_RESTORE: + return "SAVE_RESTORE_IP_FW"; + case GFX_CMD_ID_SETUP_VMR: + return "SETUP_VMR"; + case GFX_CMD_ID_DESTROY_VMR: + return "DESTROY_VMR"; + case GFX_CMD_ID_PROG_REG: + return "PROG_REG"; + case GFX_CMD_ID_GET_FW_ATTESTATION: + return "GET_FW_ATTESTATION"; + case GFX_CMD_ID_LOAD_TOC: + return "ID_LOAD_TOC"; + case GFX_CMD_ID_AUTOLOAD_RLC: + return "AUTOLOAD_RLC"; + case GFX_CMD_ID_BOOT_CFG: + return "BOOT_CFG"; + default: + return "UNKNOWN CMD"; + } +} + static int psp_cmd_submit_buf(struct psp_context *psp, struct amdgpu_firmware_info *ucode, struct psp_gfx_cmd_resp *cmd, uint64_t fence_mc_addr) { int ret; - int index; + int index, idx; int timeout = 20000; bool ras_intr = false; bool skip_unsupport = false; - if (psp->adev->in_pci_err_recovery) + if (psp->adev->no_hw_access) return 0; - mutex_lock(&psp->mutex); + if (!drm_dev_enter(&psp->adev->ddev, &idx)) + return 0; memset(psp->cmd_buf_mem, 0, PSP_CMD_BUFFER_SIZE); @@ -271,11 +435,10 @@ psp_cmd_submit_buf(struct psp_context *psp, ret = psp_ring_cmd_submit(psp, psp->cmd_buf_mc_addr, fence_mc_addr, index); if (ret) { atomic_dec(&psp->fence_value); - mutex_unlock(&psp->mutex); - return ret; + goto exit; } - amdgpu_asic_invalidate_hdp(psp->adev, NULL); + amdgpu_device_invalidate_hdp(psp->adev, NULL); while (*((unsigned int *)psp->fence_buf) != index) { if (--timeout == 0) break; @@ -288,7 +451,7 @@ psp_cmd_submit_buf(struct psp_context *psp, if (ras_intr) break; usleep_range(10, 100); - amdgpu_asic_invalidate_hdp(psp->adev, NULL); + amdgpu_device_invalidate_hdp(psp->adev, NULL); } /* We allow TEE_ERROR_NOT_SUPPORTED for VMR command and PSP_ERR_UNKNOWN_COMMAND in SRIOV */ @@ -306,14 +469,14 @@ psp_cmd_submit_buf(struct psp_context *psp, */ if (!skip_unsupport && (psp->cmd_buf_mem->resp.status || !timeout) && !ras_intr) { if (ucode) - DRM_WARN("failed to load ucode id (%d) ", - ucode->ucode_id); - DRM_WARN("psp command (0x%X) failed and response status is (0x%X)\n", - psp->cmd_buf_mem->cmd_id, + DRM_WARN("failed to load ucode %s(0x%X) ", + amdgpu_ucode_name(ucode->ucode_id), ucode->ucode_id); + DRM_WARN("psp gfx command %s(0x%X) failed and response status is (0x%X)\n", + psp_gfx_cmd_name(psp->cmd_buf_mem->cmd_id), psp->cmd_buf_mem->cmd_id, psp->cmd_buf_mem->resp.status); if (!timeout) { - mutex_unlock(&psp->mutex); - return -EINVAL; + ret = -EINVAL; + goto exit; } } @@ -321,11 +484,28 @@ psp_cmd_submit_buf(struct psp_context *psp, ucode->tmr_mc_addr_lo = psp->cmd_buf_mem->resp.fw_addr_lo; ucode->tmr_mc_addr_hi = psp->cmd_buf_mem->resp.fw_addr_hi; } - mutex_unlock(&psp->mutex); +exit: + drm_dev_exit(idx); return ret; } +static struct psp_gfx_cmd_resp *acquire_psp_cmd_buf(struct psp_context *psp) +{ + struct psp_gfx_cmd_resp *cmd = psp->cmd; + + mutex_lock(&psp->mutex); + + memset(cmd, 0, sizeof(struct psp_gfx_cmd_resp)); + + return cmd; +} + +void release_psp_cmd_buf(struct psp_context *psp) +{ + mutex_unlock(&psp->mutex); +} + static void psp_prep_tmr_cmd_buf(struct psp_context *psp, struct psp_gfx_cmd_resp *cmd, uint64_t tmr_mc, struct amdgpu_bo *tmr_bo) @@ -360,22 +540,20 @@ static int psp_load_toc(struct psp_context *psp, uint32_t *tmr_size) { int ret; - struct psp_gfx_cmd_resp *cmd; + struct psp_gfx_cmd_resp *cmd = acquire_psp_cmd_buf(psp); - cmd = kzalloc(sizeof(struct psp_gfx_cmd_resp), GFP_KERNEL); - if (!cmd) - return -ENOMEM; /* Copy toc to psp firmware private buffer */ - memset(psp->fw_pri_buf, 0, PSP_1_MEG); - memcpy(psp->fw_pri_buf, psp->toc_start_addr, psp->toc_bin_size); + psp_copy_fw(psp, psp->toc.start_addr, psp->toc.size_bytes); - psp_prep_load_toc_cmd_buf(cmd, psp->fw_pri_mc_addr, psp->toc_bin_size); + psp_prep_load_toc_cmd_buf(cmd, psp->fw_pri_mc_addr, psp->toc.size_bytes); ret = psp_cmd_submit_buf(psp, NULL, cmd, psp->fence_buf_mc_addr); if (!ret) *tmr_size = psp->cmd_buf_mem->resp.tmr_size; - kfree(cmd); + + release_psp_cmd_buf(psp); + return ret; } @@ -399,8 +577,8 @@ static int psp_tmr_init(struct psp_context *psp) /* For ASICs support RLC autoload, psp will parse the toc * and calculate the total size of TMR needed */ if (!amdgpu_sriov_vf(psp->adev) && - psp->toc_start_addr && - psp->toc_bin_size && + psp->toc.start_addr && + psp->toc.size_bytes && psp->fw_pri_buf) { ret = psp_load_toc(psp, &tmr_size); if (ret) { @@ -417,31 +595,12 @@ static int psp_tmr_init(struct psp_context *psp) return ret; } -static int psp_clear_vf_fw(struct psp_context *psp) -{ - int ret; - struct psp_gfx_cmd_resp *cmd; - - if (!amdgpu_sriov_vf(psp->adev) || psp->adev->asic_type != CHIP_NAVI12) - return 0; - - cmd = kzalloc(sizeof(struct psp_gfx_cmd_resp), GFP_KERNEL); - if (!cmd) - return -ENOMEM; - - cmd->cmd_id = GFX_CMD_ID_CLEAR_VF_FW; - - ret = psp_cmd_submit_buf(psp, NULL, cmd, psp->fence_buf_mc_addr); - kfree(cmd); - - return ret; -} - static bool psp_skip_tmr(struct psp_context *psp) { switch (psp->adev->asic_type) { case CHIP_NAVI12: case CHIP_SIENNA_CICHLID: + case CHIP_ALDEBARAN: return true; default: return false; @@ -459,9 +618,7 @@ static int psp_tmr_load(struct psp_context *psp) if (amdgpu_sriov_vf(psp->adev) && psp_skip_tmr(psp)) return 0; - cmd = kzalloc(sizeof(struct psp_gfx_cmd_resp), GFP_KERNEL); - if (!cmd) - return -ENOMEM; + cmd = acquire_psp_cmd_buf(psp); psp_prep_tmr_cmd_buf(psp, cmd, psp->tmr_mc_addr, psp->tmr_bo); DRM_INFO("reserve 0x%lx from 0x%llx for PSP TMR\n", @@ -470,13 +627,13 @@ static int psp_tmr_load(struct psp_context *psp) ret = psp_cmd_submit_buf(psp, NULL, cmd, psp->fence_buf_mc_addr); - kfree(cmd); + release_psp_cmd_buf(psp); return ret; } static void psp_prep_tmr_unload_cmd_buf(struct psp_context *psp, - struct psp_gfx_cmd_resp *cmd) + struct psp_gfx_cmd_resp *cmd) { if (amdgpu_sriov_vf(psp->adev)) cmd->cmd_id = GFX_CMD_ID_DESTROY_VMR; @@ -487,11 +644,7 @@ static void psp_prep_tmr_unload_cmd_buf(struct psp_context *psp, static int psp_tmr_unload(struct psp_context *psp) { int ret; - struct psp_gfx_cmd_resp *cmd; - - cmd = kzalloc(sizeof(struct psp_gfx_cmd_resp), GFP_KERNEL); - if (!cmd) - return -ENOMEM; + struct psp_gfx_cmd_resp *cmd = acquire_psp_cmd_buf(psp); psp_prep_tmr_unload_cmd_buf(psp, cmd); DRM_INFO("free PSP TMR buffer\n"); @@ -499,7 +652,7 @@ static int psp_tmr_unload(struct psp_context *psp) ret = psp_cmd_submit_buf(psp, NULL, cmd, psp->fence_buf_mc_addr); - kfree(cmd); + release_psp_cmd_buf(psp); return ret; } @@ -533,9 +686,7 @@ int psp_get_fw_attestation_records_addr(struct psp_context *psp, if (amdgpu_sriov_vf(psp->adev)) return 0; - cmd = kzalloc(sizeof(struct psp_gfx_cmd_resp), GFP_KERNEL); - if (!cmd) - return -ENOMEM; + cmd = acquire_psp_cmd_buf(psp); cmd->cmd_id = GFX_CMD_ID_GET_FW_ATTESTATION; @@ -547,49 +698,84 @@ int psp_get_fw_attestation_records_addr(struct psp_context *psp, ((uint64_t)cmd->resp.uresp.fwar_db_info.fwar_db_addr_hi << 32); } - kfree(cmd); + release_psp_cmd_buf(psp); return ret; } -static int psp_boot_config_set(struct amdgpu_device *adev) +static int psp_boot_config_get(struct amdgpu_device *adev, uint32_t *boot_cfg) { struct psp_context *psp = &adev->psp; - struct psp_gfx_cmd_resp *cmd = psp->cmd; + struct psp_gfx_cmd_resp *cmd; + int ret; + + if (amdgpu_sriov_vf(adev)) + return 0; + + cmd = acquire_psp_cmd_buf(psp); + + cmd->cmd_id = GFX_CMD_ID_BOOT_CFG; + cmd->cmd.boot_cfg.sub_cmd = BOOTCFG_CMD_GET; + + ret = psp_cmd_submit_buf(psp, NULL, cmd, psp->fence_buf_mc_addr); + if (!ret) { + *boot_cfg = + (cmd->resp.uresp.boot_cfg.boot_cfg & BOOT_CONFIG_GECC) ? 1 : 0; + } + + release_psp_cmd_buf(psp); + + return ret; +} - if (adev->asic_type != CHIP_SIENNA_CICHLID || amdgpu_sriov_vf(adev)) +static int psp_boot_config_set(struct amdgpu_device *adev, uint32_t boot_cfg) +{ + int ret; + struct psp_context *psp = &adev->psp; + struct psp_gfx_cmd_resp *cmd; + + if (amdgpu_sriov_vf(adev)) return 0; - memset(cmd, 0, sizeof(struct psp_gfx_cmd_resp)); + cmd = acquire_psp_cmd_buf(psp); cmd->cmd_id = GFX_CMD_ID_BOOT_CFG; cmd->cmd.boot_cfg.sub_cmd = BOOTCFG_CMD_SET; - cmd->cmd.boot_cfg.boot_config = BOOT_CONFIG_GECC; - cmd->cmd.boot_cfg.boot_config_valid = BOOT_CONFIG_GECC; + cmd->cmd.boot_cfg.boot_config = boot_cfg; + cmd->cmd.boot_cfg.boot_config_valid = boot_cfg; - return psp_cmd_submit_buf(psp, NULL, cmd, psp->fence_buf_mc_addr); + ret = psp_cmd_submit_buf(psp, NULL, cmd, psp->fence_buf_mc_addr); + + release_psp_cmd_buf(psp); + + return ret; } static int psp_rl_load(struct amdgpu_device *adev) { + int ret; struct psp_context *psp = &adev->psp; - struct psp_gfx_cmd_resp *cmd = psp->cmd; + struct psp_gfx_cmd_resp *cmd; - if (psp->rl_bin_size == 0) + if (!is_psp_fw_valid(psp->rl)) return 0; - memset(psp->fw_pri_buf, 0, PSP_1_MEG); - memcpy(psp->fw_pri_buf, psp->rl_start_addr, psp->rl_bin_size); + cmd = acquire_psp_cmd_buf(psp); - memset(cmd, 0, sizeof(struct psp_gfx_cmd_resp)); + memset(psp->fw_pri_buf, 0, PSP_1_MEG); + memcpy(psp->fw_pri_buf, psp->rl.start_addr, psp->rl.size_bytes); cmd->cmd_id = GFX_CMD_ID_LOAD_IP_FW; cmd->cmd.cmd_load_ip_fw.fw_phy_addr_lo = lower_32_bits(psp->fw_pri_mc_addr); cmd->cmd.cmd_load_ip_fw.fw_phy_addr_hi = upper_32_bits(psp->fw_pri_mc_addr); - cmd->cmd.cmd_load_ip_fw.fw_size = psp->rl_bin_size; + cmd->cmd.cmd_load_ip_fw.fw_size = psp->rl.size_bytes; cmd->cmd.cmd_load_ip_fw.fw_type = GFX_FW_TYPE_REG_LIST; - return psp_cmd_submit_buf(psp, NULL, cmd, psp->fence_buf_mc_addr); + ret = psp_cmd_submit_buf(psp, NULL, cmd, psp->fence_buf_mc_addr); + + release_psp_cmd_buf(psp); + + return ret; } static void psp_prep_asd_load_cmd_buf(struct psp_gfx_cmd_resp *cmd, @@ -614,18 +800,15 @@ static int psp_asd_load(struct psp_context *psp) * add workaround to bypass it for sriov now. * TODO: add version check to make it common */ - if (amdgpu_sriov_vf(psp->adev) || !psp->asd_ucode_size) + if (amdgpu_sriov_vf(psp->adev) || !psp->asd.size_bytes) return 0; - cmd = kzalloc(sizeof(struct psp_gfx_cmd_resp), GFP_KERNEL); - if (!cmd) - return -ENOMEM; + cmd = acquire_psp_cmd_buf(psp); - memset(psp->fw_pri_buf, 0, PSP_1_MEG); - memcpy(psp->fw_pri_buf, psp->asd_start_addr, psp->asd_ucode_size); + psp_copy_fw(psp, psp->asd.start_addr, psp->asd.size_bytes); psp_prep_asd_load_cmd_buf(cmd, psp->fw_pri_mc_addr, - psp->asd_ucode_size); + psp->asd.size_bytes); ret = psp_cmd_submit_buf(psp, NULL, cmd, psp->fence_buf_mc_addr); @@ -634,7 +817,7 @@ static int psp_asd_load(struct psp_context *psp) psp->asd_context.session_id = cmd->resp.session_id; } - kfree(cmd); + release_psp_cmd_buf(psp); return ret; } @@ -657,9 +840,7 @@ static int psp_asd_unload(struct psp_context *psp) if (!psp->asd_context.asd_initialized) return 0; - cmd = kzalloc(sizeof(struct psp_gfx_cmd_resp), GFP_KERNEL); - if (!cmd) - return -ENOMEM; + cmd = acquire_psp_cmd_buf(psp); psp_prep_ta_unload_cmd_buf(cmd, psp->asd_context.session_id); @@ -668,7 +849,7 @@ static int psp_asd_unload(struct psp_context *psp) if (!ret) psp->asd_context.asd_initialized = false; - kfree(cmd); + release_psp_cmd_buf(psp); return ret; } @@ -684,20 +865,21 @@ static void psp_prep_reg_prog_cmd_buf(struct psp_gfx_cmd_resp *cmd, int psp_reg_program(struct psp_context *psp, enum psp_reg_prog_id reg, uint32_t value) { - struct psp_gfx_cmd_resp *cmd = NULL; + struct psp_gfx_cmd_resp *cmd; int ret = 0; if (reg >= PSP_REG_LAST) return -EINVAL; - cmd = kzalloc(sizeof(struct psp_gfx_cmd_resp), GFP_KERNEL); - if (!cmd) - return -ENOMEM; + cmd = acquire_psp_cmd_buf(psp); psp_prep_reg_prog_cmd_buf(cmd, reg, value); ret = psp_cmd_submit_buf(psp, NULL, cmd, psp->fence_buf_mc_addr); + if (ret) + DRM_ERROR("PSP failed to program reg id %d", reg); + + release_psp_cmd_buf(psp); - kfree(cmd); return ret; } @@ -717,23 +899,37 @@ static void psp_prep_ta_load_cmd_buf(struct psp_gfx_cmd_resp *cmd, cmd->cmd.cmd_load_ta.cmd_buf_len = ta_shared_size; } -static int psp_xgmi_init_shared_buf(struct psp_context *psp) +static int psp_ta_init_shared_buf(struct psp_context *psp, + struct ta_mem_context *mem_ctx, + uint32_t shared_mem_size) { int ret; /* - * Allocate 16k memory aligned to 4k from Frame Buffer (local - * physical) for xgmi ta <-> Driver - */ - ret = amdgpu_bo_create_kernel(psp->adev, PSP_XGMI_SHARED_MEM_SIZE, - PAGE_SIZE, AMDGPU_GEM_DOMAIN_VRAM, - &psp->xgmi_context.xgmi_shared_bo, - &psp->xgmi_context.xgmi_shared_mc_addr, - &psp->xgmi_context.xgmi_shared_buf); + * Allocate 16k memory aligned to 4k from Frame Buffer (local + * physical) for ta to host memory + */ + ret = amdgpu_bo_create_kernel(psp->adev, shared_mem_size, PAGE_SIZE, + AMDGPU_GEM_DOMAIN_VRAM, + &mem_ctx->shared_bo, + &mem_ctx->shared_mc_addr, + &mem_ctx->shared_buf); return ret; } +static void psp_ta_free_shared_buf(struct ta_mem_context *mem_ctx) +{ + amdgpu_bo_free_kernel(&mem_ctx->shared_bo, &mem_ctx->shared_mc_addr, + &mem_ctx->shared_buf); +} + +static int psp_xgmi_init_shared_buf(struct psp_context *psp) +{ + return psp_ta_init_shared_buf(psp, &psp->xgmi_context.context.mem_context, + PSP_XGMI_SHARED_MEM_SIZE); +} + static void psp_prep_ta_invoke_cmd_buf(struct psp_gfx_cmd_resp *cmd, uint32_t ta_cmd_id, uint32_t session_id) @@ -748,18 +944,14 @@ static int psp_ta_invoke(struct psp_context *psp, uint32_t session_id) { int ret; - struct psp_gfx_cmd_resp *cmd; - - cmd = kzalloc(sizeof(struct psp_gfx_cmd_resp), GFP_KERNEL); - if (!cmd) - return -ENOMEM; + struct psp_gfx_cmd_resp *cmd = acquire_psp_cmd_buf(psp); psp_prep_ta_invoke_cmd_buf(cmd, ta_cmd_id, session_id); ret = psp_cmd_submit_buf(psp, NULL, cmd, psp->fence_buf_mc_addr); - kfree(cmd); + release_psp_cmd_buf(psp); return ret; } @@ -773,28 +965,25 @@ static int psp_xgmi_load(struct psp_context *psp) * TODO: bypass the loading in sriov for now */ - cmd = kzalloc(sizeof(struct psp_gfx_cmd_resp), GFP_KERNEL); - if (!cmd) - return -ENOMEM; + cmd = acquire_psp_cmd_buf(psp); - memset(psp->fw_pri_buf, 0, PSP_1_MEG); - memcpy(psp->fw_pri_buf, psp->ta_xgmi_start_addr, psp->ta_xgmi_ucode_size); + psp_copy_fw(psp, psp->xgmi.start_addr, psp->xgmi.size_bytes); psp_prep_ta_load_cmd_buf(cmd, psp->fw_pri_mc_addr, - psp->ta_xgmi_ucode_size, - psp->xgmi_context.xgmi_shared_mc_addr, + psp->xgmi.size_bytes, + psp->xgmi_context.context.mem_context.shared_mc_addr, PSP_XGMI_SHARED_MEM_SIZE); ret = psp_cmd_submit_buf(psp, NULL, cmd, psp->fence_buf_mc_addr); if (!ret) { - psp->xgmi_context.initialized = 1; - psp->xgmi_context.session_id = cmd->resp.session_id; + psp->xgmi_context.context.initialized = true; + psp->xgmi_context.context.session_id = cmd->resp.session_id; } - kfree(cmd); + release_psp_cmd_buf(psp); return ret; } @@ -814,57 +1003,56 @@ static int psp_xgmi_unload(struct psp_context *psp) * TODO: bypass the unloading in sriov for now */ - cmd = kzalloc(sizeof(struct psp_gfx_cmd_resp), GFP_KERNEL); - if (!cmd) - return -ENOMEM; + cmd = acquire_psp_cmd_buf(psp); - psp_prep_ta_unload_cmd_buf(cmd, psp->xgmi_context.session_id); + psp_prep_ta_unload_cmd_buf(cmd, psp->xgmi_context.context.session_id); ret = psp_cmd_submit_buf(psp, NULL, cmd, psp->fence_buf_mc_addr); - kfree(cmd); + release_psp_cmd_buf(psp); return ret; } int psp_xgmi_invoke(struct psp_context *psp, uint32_t ta_cmd_id) { - return psp_ta_invoke(psp, ta_cmd_id, psp->xgmi_context.session_id); + return psp_ta_invoke(psp, ta_cmd_id, psp->xgmi_context.context.session_id); } int psp_xgmi_terminate(struct psp_context *psp) { int ret; - if (!psp->xgmi_context.initialized) + if (!psp->xgmi_context.context.initialized) return 0; ret = psp_xgmi_unload(psp); if (ret) return ret; - psp->xgmi_context.initialized = 0; + psp->xgmi_context.context.initialized = false; /* free xgmi shared memory */ - amdgpu_bo_free_kernel(&psp->xgmi_context.xgmi_shared_bo, - &psp->xgmi_context.xgmi_shared_mc_addr, - &psp->xgmi_context.xgmi_shared_buf); + psp_ta_free_shared_buf(&psp->xgmi_context.context.mem_context); return 0; } -int psp_xgmi_initialize(struct psp_context *psp) +int psp_xgmi_initialize(struct psp_context *psp, bool set_extended_data, bool load_ta) { struct ta_xgmi_shared_memory *xgmi_cmd; int ret; - if (!psp->adev->psp.ta_fw || - !psp->adev->psp.ta_xgmi_ucode_size || - !psp->adev->psp.ta_xgmi_start_addr) + if (!psp->ta_fw || + !psp->xgmi.size_bytes || + !psp->xgmi.start_addr) return -ENOENT; - if (!psp->xgmi_context.initialized) { + if (!load_ta) + goto invoke; + + if (!psp->xgmi_context.context.initialized) { ret = psp_xgmi_init_shared_buf(psp); if (ret) return ret; @@ -875,9 +1063,11 @@ int psp_xgmi_initialize(struct psp_context *psp) if (ret) return ret; +invoke: /* Initialize XGMI session */ - xgmi_cmd = (struct ta_xgmi_shared_memory *)(psp->xgmi_context.xgmi_shared_buf); + xgmi_cmd = (struct ta_xgmi_shared_memory *)(psp->xgmi_context.context.mem_context.shared_buf); memset(xgmi_cmd, 0, sizeof(struct ta_xgmi_shared_memory)); + xgmi_cmd->flag_extend_link_record = set_extended_data; xgmi_cmd->cmd_id = TA_COMMAND_XGMI__INITIALIZE; ret = psp_xgmi_invoke(psp, xgmi_cmd->cmd_id); @@ -890,7 +1080,7 @@ int psp_xgmi_get_hive_id(struct psp_context *psp, uint64_t *hive_id) struct ta_xgmi_shared_memory *xgmi_cmd; int ret; - xgmi_cmd = (struct ta_xgmi_shared_memory *)psp->xgmi_context.xgmi_shared_buf; + xgmi_cmd = (struct ta_xgmi_shared_memory *)psp->xgmi_context.context.mem_context.shared_buf; memset(xgmi_cmd, 0, sizeof(struct ta_xgmi_shared_memory)); xgmi_cmd->cmd_id = TA_COMMAND_XGMI__GET_HIVE_ID; @@ -910,7 +1100,7 @@ int psp_xgmi_get_node_id(struct psp_context *psp, uint64_t *node_id) struct ta_xgmi_shared_memory *xgmi_cmd; int ret; - xgmi_cmd = (struct ta_xgmi_shared_memory *)psp->xgmi_context.xgmi_shared_buf; + xgmi_cmd = (struct ta_xgmi_shared_memory *)psp->xgmi_context.context.mem_context.shared_buf; memset(xgmi_cmd, 0, sizeof(struct ta_xgmi_shared_memory)); xgmi_cmd->cmd_id = TA_COMMAND_XGMI__GET_NODE_ID; @@ -925,9 +1115,62 @@ int psp_xgmi_get_node_id(struct psp_context *psp, uint64_t *node_id) return 0; } +static bool psp_xgmi_peer_link_info_supported(struct psp_context *psp) +{ + return psp->adev->asic_type == CHIP_ALDEBARAN && + psp->xgmi.feature_version >= 0x2000000b; +} + +/* + * Chips that support extended topology information require the driver to + * reflect topology information in the opposite direction. This is + * because the TA has already exceeded its link record limit and if the + * TA holds bi-directional information, the driver would have to do + * multiple fetches instead of just two. + */ +static void psp_xgmi_reflect_topology_info(struct psp_context *psp, + struct psp_xgmi_node_info node_info) +{ + struct amdgpu_device *mirror_adev; + struct amdgpu_hive_info *hive; + uint64_t src_node_id = psp->adev->gmc.xgmi.node_id; + uint64_t dst_node_id = node_info.node_id; + uint8_t dst_num_hops = node_info.num_hops; + uint8_t dst_num_links = node_info.num_links; + + hive = amdgpu_get_xgmi_hive(psp->adev); + list_for_each_entry(mirror_adev, &hive->device_list, gmc.xgmi.head) { + struct psp_xgmi_topology_info *mirror_top_info; + int j; + + if (mirror_adev->gmc.xgmi.node_id != dst_node_id) + continue; + + mirror_top_info = &mirror_adev->psp.xgmi_context.top_info; + for (j = 0; j < mirror_top_info->num_nodes; j++) { + if (mirror_top_info->nodes[j].node_id != src_node_id) + continue; + + mirror_top_info->nodes[j].num_hops = dst_num_hops; + /* + * prevent 0 num_links value re-reflection since reflection + * criteria is based on num_hops (direct or indirect). + * + */ + if (dst_num_links) + mirror_top_info->nodes[j].num_links = dst_num_links; + + break; + } + + break; + } +} + int psp_xgmi_get_topology_info(struct psp_context *psp, int number_devices, - struct psp_xgmi_topology_info *topology) + struct psp_xgmi_topology_info *topology, + bool get_extended_data) { struct ta_xgmi_shared_memory *xgmi_cmd; struct ta_xgmi_cmd_get_topology_info_input *topology_info_input; @@ -938,8 +1181,9 @@ int psp_xgmi_get_topology_info(struct psp_context *psp, if (!topology || topology->num_nodes > TA_XGMI__MAX_CONNECTED_NODES) return -EINVAL; - xgmi_cmd = (struct ta_xgmi_shared_memory *)psp->xgmi_context.xgmi_shared_buf; + xgmi_cmd = (struct ta_xgmi_shared_memory *)psp->xgmi_context.context.mem_context.shared_buf; memset(xgmi_cmd, 0, sizeof(struct ta_xgmi_shared_memory)); + xgmi_cmd->flag_extend_link_record = get_extended_data; /* Fill in the shared memory with topology information as input */ topology_info_input = &xgmi_cmd->xgmi_in_message.get_topology_info; @@ -962,10 +1206,45 @@ int psp_xgmi_get_topology_info(struct psp_context *psp, topology_info_output = &xgmi_cmd->xgmi_out_message.get_topology_info; topology->num_nodes = xgmi_cmd->xgmi_out_message.get_topology_info.num_nodes; for (i = 0; i < topology->num_nodes; i++) { - topology->nodes[i].node_id = topology_info_output->nodes[i].node_id; - topology->nodes[i].num_hops = topology_info_output->nodes[i].num_hops; - topology->nodes[i].is_sharing_enabled = topology_info_output->nodes[i].is_sharing_enabled; - topology->nodes[i].sdma_engine = topology_info_output->nodes[i].sdma_engine; + /* extended data will either be 0 or equal to non-extended data */ + if (topology_info_output->nodes[i].num_hops) + topology->nodes[i].num_hops = topology_info_output->nodes[i].num_hops; + + /* non-extended data gets everything here so no need to update */ + if (!get_extended_data) { + topology->nodes[i].node_id = topology_info_output->nodes[i].node_id; + topology->nodes[i].is_sharing_enabled = + topology_info_output->nodes[i].is_sharing_enabled; + topology->nodes[i].sdma_engine = + topology_info_output->nodes[i].sdma_engine; + } + + } + + /* Invoke xgmi ta again to get the link information */ + if (psp_xgmi_peer_link_info_supported(psp)) { + struct ta_xgmi_cmd_get_peer_link_info_output *link_info_output; + + xgmi_cmd->cmd_id = TA_COMMAND_XGMI__GET_PEER_LINKS; + + ret = psp_xgmi_invoke(psp, TA_COMMAND_XGMI__GET_PEER_LINKS); + + if (ret) + return ret; + + link_info_output = &xgmi_cmd->xgmi_out_message.get_link_info; + for (i = 0; i < topology->num_nodes; i++) { + /* accumulate num_links on extended data */ + topology->nodes[i].num_links = get_extended_data ? + topology->nodes[i].num_links + + link_info_output->nodes[i].num_links : + link_info_output->nodes[i].num_links; + + /* reflect the topology information for bi-directionality */ + if (psp->xgmi_context.supports_extended_data && + get_extended_data && topology->nodes[i].num_hops) + psp_xgmi_reflect_topology_info(psp, topology->nodes[i]); + } } return 0; @@ -982,7 +1261,7 @@ int psp_xgmi_set_topology_info(struct psp_context *psp, if (!topology || topology->num_nodes > TA_XGMI__MAX_CONNECTED_NODES) return -EINVAL; - xgmi_cmd = (struct ta_xgmi_shared_memory *)psp->xgmi_context.xgmi_shared_buf; + xgmi_cmd = (struct ta_xgmi_shared_memory *)psp->xgmi_context.context.mem_context.shared_buf; memset(xgmi_cmd, 0, sizeof(struct ta_xgmi_shared_memory)); topology_info_input = &xgmi_cmd->xgmi_in_message.get_topology_info; @@ -1003,19 +1282,8 @@ int psp_xgmi_set_topology_info(struct psp_context *psp, // ras begin static int psp_ras_init_shared_buf(struct psp_context *psp) { - int ret; - - /* - * Allocate 16k memory aligned to 4k from Frame Buffer (local - * physical) for ras ta <-> Driver - */ - ret = amdgpu_bo_create_kernel(psp->adev, PSP_RAS_SHARED_MEM_SIZE, - PAGE_SIZE, AMDGPU_GEM_DOMAIN_VRAM, - &psp->ras.ras_shared_bo, - &psp->ras.ras_shared_mc_addr, - &psp->ras.ras_shared_buf); - - return ret; + return psp_ta_init_shared_buf(psp, &psp->ras_context.context.mem_context, + PSP_RAS_SHARED_MEM_SIZE); } static int psp_ras_load(struct psp_context *psp) @@ -1030,38 +1298,40 @@ static int psp_ras_load(struct psp_context *psp) if (amdgpu_sriov_vf(psp->adev)) return 0; - cmd = kzalloc(sizeof(struct psp_gfx_cmd_resp), GFP_KERNEL); - if (!cmd) - return -ENOMEM; + psp_copy_fw(psp, psp->ras.start_addr, psp->ras.size_bytes); - memset(psp->fw_pri_buf, 0, PSP_1_MEG); - memcpy(psp->fw_pri_buf, psp->ta_ras_start_addr, psp->ta_ras_ucode_size); + ras_cmd = (struct ta_ras_shared_memory *)psp->ras_context.context.mem_context.shared_buf; + + if (psp->adev->gmc.xgmi.connected_to_cpu) + ras_cmd->ras_in_message.init_flags.poison_mode_en = 1; + else + ras_cmd->ras_in_message.init_flags.dgpu_mode = 1; + + cmd = acquire_psp_cmd_buf(psp); psp_prep_ta_load_cmd_buf(cmd, psp->fw_pri_mc_addr, - psp->ta_ras_ucode_size, - psp->ras.ras_shared_mc_addr, + psp->ras.size_bytes, + psp->ras_context.context.mem_context.shared_mc_addr, PSP_RAS_SHARED_MEM_SIZE); ret = psp_cmd_submit_buf(psp, NULL, cmd, psp->fence_buf_mc_addr); - ras_cmd = (struct ta_ras_shared_memory *)psp->ras.ras_shared_buf; - if (!ret) { - psp->ras.session_id = cmd->resp.session_id; + psp->ras_context.context.session_id = cmd->resp.session_id; if (!ras_cmd->ras_status) - psp->ras.ras_initialized = true; + psp->ras_context.context.initialized = true; else dev_warn(psp->adev->dev, "RAS Init Status: 0x%X\n", ras_cmd->ras_status); } + release_psp_cmd_buf(psp); + if (ret || ras_cmd->ras_status) amdgpu_ras_fini(psp->adev); - kfree(cmd); - return ret; } @@ -1076,16 +1346,14 @@ static int psp_ras_unload(struct psp_context *psp) if (amdgpu_sriov_vf(psp->adev)) return 0; - cmd = kzalloc(sizeof(struct psp_gfx_cmd_resp), GFP_KERNEL); - if (!cmd) - return -ENOMEM; + cmd = acquire_psp_cmd_buf(psp); - psp_prep_ta_unload_cmd_buf(cmd, psp->ras.session_id); + psp_prep_ta_unload_cmd_buf(cmd, psp->ras_context.context.session_id); ret = psp_cmd_submit_buf(psp, NULL, cmd, psp->fence_buf_mc_addr); - kfree(cmd); + release_psp_cmd_buf(psp); return ret; } @@ -1095,7 +1363,7 @@ int psp_ras_invoke(struct psp_context *psp, uint32_t ta_cmd_id) struct ta_ras_shared_memory *ras_cmd; int ret; - ras_cmd = (struct ta_ras_shared_memory *)psp->ras.ras_shared_buf; + ras_cmd = (struct ta_ras_shared_memory *)psp->ras_context.context.mem_context.shared_buf; /* * TODO: bypass the loading in sriov for now @@ -1103,7 +1371,7 @@ int psp_ras_invoke(struct psp_context *psp, uint32_t ta_cmd_id) if (amdgpu_sriov_vf(psp->adev)) return 0; - ret = psp_ta_invoke(psp, ta_cmd_id, psp->ras.session_id); + ret = psp_ta_invoke(psp, ta_cmd_id, psp->ras_context.context.session_id); if (amdgpu_ras_intr_triggered()) return ret; @@ -1128,16 +1396,41 @@ int psp_ras_invoke(struct psp_context *psp, uint32_t ta_cmd_id) return ret; } +static int psp_ras_status_to_errno(struct amdgpu_device *adev, + enum ta_ras_status ras_status) +{ + int ret = -EINVAL; + + switch (ras_status) { + case TA_RAS_STATUS__SUCCESS: + ret = 0; + break; + case TA_RAS_STATUS__RESET_NEEDED: + ret = -EAGAIN; + break; + case TA_RAS_STATUS__ERROR_RAS_NOT_AVAILABLE: + dev_warn(adev->dev, "RAS WARN: ras function unavailable\n"); + break; + case TA_RAS_STATUS__ERROR_ASD_READ_WRITE: + dev_warn(adev->dev, "RAS WARN: asd read or write failed\n"); + break; + default: + dev_err(adev->dev, "RAS ERROR: ras function failed ret 0x%X\n", ret); + } + + return ret; +} + int psp_ras_enable_features(struct psp_context *psp, union ta_ras_cmd_input *info, bool enable) { struct ta_ras_shared_memory *ras_cmd; int ret; - if (!psp->ras.ras_initialized) + if (!psp->ras_context.context.initialized) return -EINVAL; - ras_cmd = (struct ta_ras_shared_memory *)psp->ras.ras_shared_buf; + ras_cmd = (struct ta_ras_shared_memory *)psp->ras_context.context.mem_context.shared_buf; memset(ras_cmd, 0, sizeof(struct ta_ras_shared_memory)); if (enable) @@ -1151,7 +1444,7 @@ int psp_ras_enable_features(struct psp_context *psp, if (ret) return -EINVAL; - return ras_cmd->ras_status; + return psp_ras_status_to_errno(psp->adev, ras_cmd->ras_status); } static int psp_ras_terminate(struct psp_context *psp) @@ -1164,19 +1457,17 @@ static int psp_ras_terminate(struct psp_context *psp) if (amdgpu_sriov_vf(psp->adev)) return 0; - if (!psp->ras.ras_initialized) + if (!psp->ras_context.context.initialized) return 0; ret = psp_ras_unload(psp); if (ret) return ret; - psp->ras.ras_initialized = false; + psp->ras_context.context.initialized = false; /* free ras shared memory */ - amdgpu_bo_free_kernel(&psp->ras.ras_shared_bo, - &psp->ras.ras_shared_mc_addr, - &psp->ras.ras_shared_buf); + psp_ta_free_shared_buf(&psp->ras_context.context.mem_context); return 0; } @@ -1184,20 +1475,63 @@ static int psp_ras_terminate(struct psp_context *psp) static int psp_ras_initialize(struct psp_context *psp) { int ret; + uint32_t boot_cfg = 0xFF; + struct amdgpu_device *adev = psp->adev; /* * TODO: bypass the initialize in sriov for now */ - if (amdgpu_sriov_vf(psp->adev)) + if (amdgpu_sriov_vf(adev)) return 0; - if (!psp->adev->psp.ta_ras_ucode_size || - !psp->adev->psp.ta_ras_start_addr) { - dev_info(psp->adev->dev, "RAS: optional ras ta ucode is not available\n"); + if (!adev->psp.ras.size_bytes || + !adev->psp.ras.start_addr) { + dev_info(adev->dev, "RAS: optional ras ta ucode is not available\n"); return 0; } - if (!psp->ras.ras_initialized) { + if (amdgpu_atomfirmware_dynamic_boot_config_supported(adev)) { + /* query GECC enablement status from boot config + * boot_cfg: 1: GECC is enabled or 0: GECC is disabled + */ + ret = psp_boot_config_get(adev, &boot_cfg); + if (ret) + dev_warn(adev->dev, "PSP get boot config failed\n"); + + if (!amdgpu_ras_is_supported(psp->adev, AMDGPU_RAS_BLOCK__UMC)) { + if (!boot_cfg) { + dev_info(adev->dev, "GECC is disabled\n"); + } else { + /* disable GECC in next boot cycle if ras is + * disabled by module parameter amdgpu_ras_enable + * and/or amdgpu_ras_mask, or boot_config_get call + * is failed + */ + ret = psp_boot_config_set(adev, 0); + if (ret) + dev_warn(adev->dev, "PSP set boot config failed\n"); + else + dev_warn(adev->dev, "GECC will be disabled in next boot cycle " + "if set amdgpu_ras_enable and/or amdgpu_ras_mask to 0x0\n"); + } + } else { + if (1 == boot_cfg) { + dev_info(adev->dev, "GECC is enabled\n"); + } else { + /* enable GECC in next boot cycle if it is disabled + * in boot config, or force enable GECC if failed to + * get boot configuration + */ + ret = psp_boot_config_set(adev, BOOT_CONFIG_GECC); + if (ret) + dev_warn(adev->dev, "PSP set boot config failed\n"); + else + dev_warn(adev->dev, "GECC will be enabled in next boot cycle\n"); + } + } + } + + if (!psp->ras_context.context.initialized) { ret = psp_ras_init_shared_buf(psp); if (ret) return ret; @@ -1216,10 +1550,10 @@ int psp_ras_trigger_error(struct psp_context *psp, struct ta_ras_shared_memory *ras_cmd; int ret; - if (!psp->ras.ras_initialized) + if (!psp->ras_context.context.initialized) return -EINVAL; - ras_cmd = (struct ta_ras_shared_memory *)psp->ras.ras_shared_buf; + ras_cmd = (struct ta_ras_shared_memory *)psp->ras_context.context.mem_context.shared_buf; memset(ras_cmd, 0, sizeof(struct ta_ras_shared_memory)); ras_cmd->cmd_id = TA_RAS_COMMAND__TRIGGER_ERROR; @@ -1234,26 +1568,15 @@ int psp_ras_trigger_error(struct psp_context *psp, if (amdgpu_ras_intr_triggered()) return 0; - return ras_cmd->ras_status; + return psp_ras_status_to_errno(psp->adev, ras_cmd->ras_status); } // ras end // HDCP start static int psp_hdcp_init_shared_buf(struct psp_context *psp) { - int ret; - - /* - * Allocate 16k memory aligned to 4k from Frame Buffer (local - * physical) for hdcp ta <-> Driver - */ - ret = amdgpu_bo_create_kernel(psp->adev, PSP_HDCP_SHARED_MEM_SIZE, - PAGE_SIZE, AMDGPU_GEM_DOMAIN_VRAM, - &psp->hdcp_context.hdcp_shared_bo, - &psp->hdcp_context.hdcp_shared_mc_addr, - &psp->hdcp_context.hdcp_shared_buf); - - return ret; + return psp_ta_init_shared_buf(psp, &psp->hdcp_context.context.mem_context, + PSP_HDCP_SHARED_MEM_SIZE); } static int psp_hdcp_load(struct psp_context *psp) @@ -1267,29 +1590,26 @@ static int psp_hdcp_load(struct psp_context *psp) if (amdgpu_sriov_vf(psp->adev)) return 0; - cmd = kzalloc(sizeof(struct psp_gfx_cmd_resp), GFP_KERNEL); - if (!cmd) - return -ENOMEM; + psp_copy_fw(psp, psp->hdcp.start_addr, + psp->hdcp.size_bytes); - memset(psp->fw_pri_buf, 0, PSP_1_MEG); - memcpy(psp->fw_pri_buf, psp->ta_hdcp_start_addr, - psp->ta_hdcp_ucode_size); + cmd = acquire_psp_cmd_buf(psp); psp_prep_ta_load_cmd_buf(cmd, psp->fw_pri_mc_addr, - psp->ta_hdcp_ucode_size, - psp->hdcp_context.hdcp_shared_mc_addr, + psp->hdcp.size_bytes, + psp->hdcp_context.context.mem_context.shared_mc_addr, PSP_HDCP_SHARED_MEM_SIZE); ret = psp_cmd_submit_buf(psp, NULL, cmd, psp->fence_buf_mc_addr); if (!ret) { - psp->hdcp_context.hdcp_initialized = true; - psp->hdcp_context.session_id = cmd->resp.session_id; + psp->hdcp_context.context.initialized = true; + psp->hdcp_context.context.session_id = cmd->resp.session_id; mutex_init(&psp->hdcp_context.mutex); } - kfree(cmd); + release_psp_cmd_buf(psp); return ret; } @@ -1303,13 +1623,13 @@ static int psp_hdcp_initialize(struct psp_context *psp) if (amdgpu_sriov_vf(psp->adev)) return 0; - if (!psp->adev->psp.ta_hdcp_ucode_size || - !psp->adev->psp.ta_hdcp_start_addr) { + if (!psp->hdcp.size_bytes || + !psp->hdcp.start_addr) { dev_info(psp->adev->dev, "HDCP: optional hdcp ta ucode is not available\n"); return 0; } - if (!psp->hdcp_context.hdcp_initialized) { + if (!psp->hdcp_context.context.initialized) { ret = psp_hdcp_init_shared_buf(psp); if (ret) return ret; @@ -1333,15 +1653,13 @@ static int psp_hdcp_unload(struct psp_context *psp) if (amdgpu_sriov_vf(psp->adev)) return 0; - cmd = kzalloc(sizeof(struct psp_gfx_cmd_resp), GFP_KERNEL); - if (!cmd) - return -ENOMEM; + cmd = acquire_psp_cmd_buf(psp); - psp_prep_ta_unload_cmd_buf(cmd, psp->hdcp_context.session_id); + psp_prep_ta_unload_cmd_buf(cmd, psp->hdcp_context.context.session_id); ret = psp_cmd_submit_buf(psp, NULL, cmd, psp->fence_buf_mc_addr); - kfree(cmd); + release_psp_cmd_buf(psp); return ret; } @@ -1354,7 +1672,7 @@ int psp_hdcp_invoke(struct psp_context *psp, uint32_t ta_cmd_id) if (amdgpu_sriov_vf(psp->adev)) return 0; - return psp_ta_invoke(psp, ta_cmd_id, psp->hdcp_context.session_id); + return psp_ta_invoke(psp, ta_cmd_id, psp->hdcp_context.context.session_id); } static int psp_hdcp_terminate(struct psp_context *psp) @@ -1367,8 +1685,8 @@ static int psp_hdcp_terminate(struct psp_context *psp) if (amdgpu_sriov_vf(psp->adev)) return 0; - if (!psp->hdcp_context.hdcp_initialized) { - if (psp->hdcp_context.hdcp_shared_buf) + if (!psp->hdcp_context.context.initialized) { + if (psp->hdcp_context.context.mem_context.shared_buf) goto out; else return 0; @@ -1378,13 +1696,11 @@ static int psp_hdcp_terminate(struct psp_context *psp) if (ret) return ret; - psp->hdcp_context.hdcp_initialized = false; + psp->hdcp_context.context.initialized = false; out: /* free hdcp shared memory */ - amdgpu_bo_free_kernel(&psp->hdcp_context.hdcp_shared_bo, - &psp->hdcp_context.hdcp_shared_mc_addr, - &psp->hdcp_context.hdcp_shared_buf); + psp_ta_free_shared_buf(&psp->hdcp_context.context.mem_context); return 0; } @@ -1393,19 +1709,8 @@ out: // DTM start static int psp_dtm_init_shared_buf(struct psp_context *psp) { - int ret; - - /* - * Allocate 16k memory aligned to 4k from Frame Buffer (local - * physical) for dtm ta <-> Driver - */ - ret = amdgpu_bo_create_kernel(psp->adev, PSP_DTM_SHARED_MEM_SIZE, - PAGE_SIZE, AMDGPU_GEM_DOMAIN_VRAM, - &psp->dtm_context.dtm_shared_bo, - &psp->dtm_context.dtm_shared_mc_addr, - &psp->dtm_context.dtm_shared_buf); - - return ret; + return psp_ta_init_shared_buf(psp, &psp->dtm_context.context.mem_context, + PSP_DTM_SHARED_MEM_SIZE); } static int psp_dtm_load(struct psp_context *psp) @@ -1419,28 +1724,25 @@ static int psp_dtm_load(struct psp_context *psp) if (amdgpu_sriov_vf(psp->adev)) return 0; - cmd = kzalloc(sizeof(struct psp_gfx_cmd_resp), GFP_KERNEL); - if (!cmd) - return -ENOMEM; + psp_copy_fw(psp, psp->dtm.start_addr, psp->dtm.size_bytes); - memset(psp->fw_pri_buf, 0, PSP_1_MEG); - memcpy(psp->fw_pri_buf, psp->ta_dtm_start_addr, psp->ta_dtm_ucode_size); + cmd = acquire_psp_cmd_buf(psp); psp_prep_ta_load_cmd_buf(cmd, psp->fw_pri_mc_addr, - psp->ta_dtm_ucode_size, - psp->dtm_context.dtm_shared_mc_addr, + psp->dtm.size_bytes, + psp->dtm_context.context.mem_context.shared_mc_addr, PSP_DTM_SHARED_MEM_SIZE); ret = psp_cmd_submit_buf(psp, NULL, cmd, psp->fence_buf_mc_addr); if (!ret) { - psp->dtm_context.dtm_initialized = true; - psp->dtm_context.session_id = cmd->resp.session_id; + psp->dtm_context.context.initialized = true; + psp->dtm_context.context.session_id = cmd->resp.session_id; mutex_init(&psp->dtm_context.mutex); } - kfree(cmd); + release_psp_cmd_buf(psp); return ret; } @@ -1455,13 +1757,13 @@ static int psp_dtm_initialize(struct psp_context *psp) if (amdgpu_sriov_vf(psp->adev)) return 0; - if (!psp->adev->psp.ta_dtm_ucode_size || - !psp->adev->psp.ta_dtm_start_addr) { + if (!psp->dtm.size_bytes || + !psp->dtm.start_addr) { dev_info(psp->adev->dev, "DTM: optional dtm ta ucode is not available\n"); return 0; } - if (!psp->dtm_context.dtm_initialized) { + if (!psp->dtm_context.context.initialized) { ret = psp_dtm_init_shared_buf(psp); if (ret) return ret; @@ -1485,15 +1787,13 @@ static int psp_dtm_unload(struct psp_context *psp) if (amdgpu_sriov_vf(psp->adev)) return 0; - cmd = kzalloc(sizeof(struct psp_gfx_cmd_resp), GFP_KERNEL); - if (!cmd) - return -ENOMEM; + cmd = acquire_psp_cmd_buf(psp); - psp_prep_ta_unload_cmd_buf(cmd, psp->dtm_context.session_id); + psp_prep_ta_unload_cmd_buf(cmd, psp->dtm_context.context.session_id); ret = psp_cmd_submit_buf(psp, NULL, cmd, psp->fence_buf_mc_addr); - kfree(cmd); + release_psp_cmd_buf(psp); return ret; } @@ -1506,7 +1806,7 @@ int psp_dtm_invoke(struct psp_context *psp, uint32_t ta_cmd_id) if (amdgpu_sriov_vf(psp->adev)) return 0; - return psp_ta_invoke(psp, ta_cmd_id, psp->dtm_context.session_id); + return psp_ta_invoke(psp, ta_cmd_id, psp->dtm_context.context.session_id); } static int psp_dtm_terminate(struct psp_context *psp) @@ -1519,8 +1819,8 @@ static int psp_dtm_terminate(struct psp_context *psp) if (amdgpu_sriov_vf(psp->adev)) return 0; - if (!psp->dtm_context.dtm_initialized) { - if (psp->dtm_context.dtm_shared_buf) + if (!psp->dtm_context.context.initialized) { + if (psp->dtm_context.context.mem_context.shared_buf) goto out; else return 0; @@ -1530,13 +1830,11 @@ static int psp_dtm_terminate(struct psp_context *psp) if (ret) return ret; - psp->dtm_context.dtm_initialized = false; + psp->dtm_context.context.initialized = false; out: - /* free hdcp shared memory */ - amdgpu_bo_free_kernel(&psp->dtm_context.dtm_shared_bo, - &psp->dtm_context.dtm_shared_mc_addr, - &psp->dtm_context.dtm_shared_buf); + /* free dtm shared memory */ + psp_ta_free_shared_buf(&psp->dtm_context.context.mem_context); return 0; } @@ -1545,19 +1843,8 @@ out: // RAP start static int psp_rap_init_shared_buf(struct psp_context *psp) { - int ret; - - /* - * Allocate 16k memory aligned to 4k from Frame Buffer (local - * physical) for rap ta <-> Driver - */ - ret = amdgpu_bo_create_kernel(psp->adev, PSP_RAP_SHARED_MEM_SIZE, - PAGE_SIZE, AMDGPU_GEM_DOMAIN_VRAM, - &psp->rap_context.rap_shared_bo, - &psp->rap_context.rap_shared_mc_addr, - &psp->rap_context.rap_shared_buf); - - return ret; + return psp_ta_init_shared_buf(psp, &psp->rap_context.context.mem_context, + PSP_RAP_SHARED_MEM_SIZE); } static int psp_rap_load(struct psp_context *psp) @@ -1565,28 +1852,25 @@ static int psp_rap_load(struct psp_context *psp) int ret; struct psp_gfx_cmd_resp *cmd; - cmd = kzalloc(sizeof(struct psp_gfx_cmd_resp), GFP_KERNEL); - if (!cmd) - return -ENOMEM; + psp_copy_fw(psp, psp->rap.start_addr, psp->rap.size_bytes); - memset(psp->fw_pri_buf, 0, PSP_1_MEG); - memcpy(psp->fw_pri_buf, psp->ta_rap_start_addr, psp->ta_rap_ucode_size); + cmd = acquire_psp_cmd_buf(psp); psp_prep_ta_load_cmd_buf(cmd, psp->fw_pri_mc_addr, - psp->ta_rap_ucode_size, - psp->rap_context.rap_shared_mc_addr, + psp->rap.size_bytes, + psp->rap_context.context.mem_context.shared_mc_addr, PSP_RAP_SHARED_MEM_SIZE); ret = psp_cmd_submit_buf(psp, NULL, cmd, psp->fence_buf_mc_addr); if (!ret) { - psp->rap_context.rap_initialized = true; - psp->rap_context.session_id = cmd->resp.session_id; + psp->rap_context.context.initialized = true; + psp->rap_context.context.session_id = cmd->resp.session_id; mutex_init(&psp->rap_context.mutex); } - kfree(cmd); + release_psp_cmd_buf(psp); return ret; } @@ -1594,17 +1878,13 @@ static int psp_rap_load(struct psp_context *psp) static int psp_rap_unload(struct psp_context *psp) { int ret; - struct psp_gfx_cmd_resp *cmd; + struct psp_gfx_cmd_resp *cmd = acquire_psp_cmd_buf(psp); - cmd = kzalloc(sizeof(struct psp_gfx_cmd_resp), GFP_KERNEL); - if (!cmd) - return -ENOMEM; - - psp_prep_ta_unload_cmd_buf(cmd, psp->rap_context.session_id); + psp_prep_ta_unload_cmd_buf(cmd, psp->rap_context.context.session_id); ret = psp_cmd_submit_buf(psp, NULL, cmd, psp->fence_buf_mc_addr); - kfree(cmd); + release_psp_cmd_buf(psp); return ret; } @@ -1620,13 +1900,13 @@ static int psp_rap_initialize(struct psp_context *psp) if (amdgpu_sriov_vf(psp->adev)) return 0; - if (!psp->adev->psp.ta_rap_ucode_size || - !psp->adev->psp.ta_rap_start_addr) { + if (!psp->rap.size_bytes || + !psp->rap.start_addr) { dev_info(psp->adev->dev, "RAP: optional rap ta ucode is not available\n"); return 0; } - if (!psp->rap_context.rap_initialized) { + if (!psp->rap_context.context.initialized) { ret = psp_rap_init_shared_buf(psp); if (ret) return ret; @@ -1640,11 +1920,9 @@ static int psp_rap_initialize(struct psp_context *psp) if (ret || status != TA_RAP_STATUS__SUCCESS) { psp_rap_unload(psp); - amdgpu_bo_free_kernel(&psp->rap_context.rap_shared_bo, - &psp->rap_context.rap_shared_mc_addr, - &psp->rap_context.rap_shared_buf); + psp_ta_free_shared_buf(&psp->rap_context.context.mem_context); - psp->rap_context.rap_initialized = false; + psp->rap_context.context.initialized = false; dev_warn(psp->adev->dev, "RAP TA initialize fail (%d) status %d.\n", ret, status); @@ -1659,17 +1937,15 @@ static int psp_rap_terminate(struct psp_context *psp) { int ret; - if (!psp->rap_context.rap_initialized) + if (!psp->rap_context.context.initialized) return 0; ret = psp_rap_unload(psp); - psp->rap_context.rap_initialized = false; + psp->rap_context.context.initialized = false; /* free rap shared memory */ - amdgpu_bo_free_kernel(&psp->rap_context.rap_shared_bo, - &psp->rap_context.rap_shared_mc_addr, - &psp->rap_context.rap_shared_buf); + psp_ta_free_shared_buf(&psp->rap_context.context.mem_context); return ret; } @@ -1679,7 +1955,7 @@ int psp_rap_invoke(struct psp_context *psp, uint32_t ta_cmd_id, enum ta_rap_stat struct ta_rap_shared_memory *rap_cmd; int ret = 0; - if (!psp->rap_context.rap_initialized) + if (!psp->rap_context.context.initialized) return 0; if (ta_cmd_id != TA_CMD_RAP__INITIALIZE && @@ -1689,13 +1965,13 @@ int psp_rap_invoke(struct psp_context *psp, uint32_t ta_cmd_id, enum ta_rap_stat mutex_lock(&psp->rap_context.mutex); rap_cmd = (struct ta_rap_shared_memory *) - psp->rap_context.rap_shared_buf; + psp->rap_context.context.mem_context.shared_buf; memset(rap_cmd, 0, sizeof(struct ta_rap_shared_memory)); rap_cmd->cmd_id = ta_cmd_id; rap_cmd->validation_method_id = METHOD_A; - ret = psp_ta_invoke(psp, rap_cmd->cmd_id, psp->rap_context.session_id); + ret = psp_ta_invoke(psp, rap_cmd->cmd_id, psp->rap_context.context.session_id); if (ret) goto out_unlock; @@ -1712,67 +1988,48 @@ out_unlock: /* securedisplay start */ static int psp_securedisplay_init_shared_buf(struct psp_context *psp) { - int ret; - - /* - * Allocate 16k memory aligned to 4k from Frame Buffer (local - * physical) for sa ta <-> Driver - */ - ret = amdgpu_bo_create_kernel(psp->adev, PSP_SECUREDISPLAY_SHARED_MEM_SIZE, - PAGE_SIZE, AMDGPU_GEM_DOMAIN_VRAM, - &psp->securedisplay_context.securedisplay_shared_bo, - &psp->securedisplay_context.securedisplay_shared_mc_addr, - &psp->securedisplay_context.securedisplay_shared_buf); - - return ret; + return psp_ta_init_shared_buf( + psp, &psp->securedisplay_context.context.mem_context, + PSP_SECUREDISPLAY_SHARED_MEM_SIZE); } static int psp_securedisplay_load(struct psp_context *psp) { int ret; - struct psp_gfx_cmd_resp *cmd; - - cmd = kzalloc(sizeof(struct psp_gfx_cmd_resp), GFP_KERNEL); - if (!cmd) - return -ENOMEM; + struct psp_gfx_cmd_resp *cmd = acquire_psp_cmd_buf(psp); memset(psp->fw_pri_buf, 0, PSP_1_MEG); - memcpy(psp->fw_pri_buf, psp->ta_securedisplay_start_addr, psp->ta_securedisplay_ucode_size); + memcpy(psp->fw_pri_buf, psp->securedisplay.start_addr, psp->securedisplay.size_bytes); psp_prep_ta_load_cmd_buf(cmd, psp->fw_pri_mc_addr, - psp->ta_securedisplay_ucode_size, - psp->securedisplay_context.securedisplay_shared_mc_addr, + psp->securedisplay.size_bytes, + psp->securedisplay_context.context.mem_context.shared_mc_addr, PSP_SECUREDISPLAY_SHARED_MEM_SIZE); ret = psp_cmd_submit_buf(psp, NULL, cmd, psp->fence_buf_mc_addr); - if (ret) - goto failed; + if (!ret) { + psp->securedisplay_context.context.initialized = true; + psp->securedisplay_context.context.session_id = cmd->resp.session_id; + mutex_init(&psp->securedisplay_context.mutex); + } - psp->securedisplay_context.securedisplay_initialized = true; - psp->securedisplay_context.session_id = cmd->resp.session_id; - mutex_init(&psp->securedisplay_context.mutex); + release_psp_cmd_buf(psp); -failed: - kfree(cmd); return ret; } static int psp_securedisplay_unload(struct psp_context *psp) { int ret; - struct psp_gfx_cmd_resp *cmd; - - cmd = kzalloc(sizeof(struct psp_gfx_cmd_resp), GFP_KERNEL); - if (!cmd) - return -ENOMEM; + struct psp_gfx_cmd_resp *cmd = acquire_psp_cmd_buf(psp); - psp_prep_ta_unload_cmd_buf(cmd, psp->securedisplay_context.session_id); + psp_prep_ta_unload_cmd_buf(cmd, psp->securedisplay_context.context.session_id); ret = psp_cmd_submit_buf(psp, NULL, cmd, psp->fence_buf_mc_addr); - kfree(cmd); + release_psp_cmd_buf(psp); return ret; } @@ -1788,13 +2045,13 @@ static int psp_securedisplay_initialize(struct psp_context *psp) if (amdgpu_sriov_vf(psp->adev)) return 0; - if (!psp->adev->psp.ta_securedisplay_ucode_size || - !psp->adev->psp.ta_securedisplay_start_addr) { + if (!psp->securedisplay.size_bytes || + !psp->securedisplay.start_addr) { dev_info(psp->adev->dev, "SECUREDISPLAY: securedisplay ta ucode is not available\n"); return 0; } - if (!psp->securedisplay_context.securedisplay_initialized) { + if (!psp->securedisplay_context.context.initialized) { ret = psp_securedisplay_init_shared_buf(psp); if (ret) return ret; @@ -1811,11 +2068,9 @@ static int psp_securedisplay_initialize(struct psp_context *psp) if (ret) { psp_securedisplay_unload(psp); - amdgpu_bo_free_kernel(&psp->securedisplay_context.securedisplay_shared_bo, - &psp->securedisplay_context.securedisplay_shared_mc_addr, - &psp->securedisplay_context.securedisplay_shared_buf); + psp_ta_free_shared_buf(&psp->securedisplay_context.context.mem_context); - psp->securedisplay_context.securedisplay_initialized = false; + psp->securedisplay_context.context.initialized = false; dev_err(psp->adev->dev, "SECUREDISPLAY TA initialize fail.\n"); return -EINVAL; @@ -1840,19 +2095,17 @@ static int psp_securedisplay_terminate(struct psp_context *psp) if (amdgpu_sriov_vf(psp->adev)) return 0; - if (!psp->securedisplay_context.securedisplay_initialized) + if (!psp->securedisplay_context.context.initialized) return 0; ret = psp_securedisplay_unload(psp); if (ret) return ret; - psp->securedisplay_context.securedisplay_initialized = false; + psp->securedisplay_context.context.initialized = false; /* free securedisplay shared memory */ - amdgpu_bo_free_kernel(&psp->securedisplay_context.securedisplay_shared_bo, - &psp->securedisplay_context.securedisplay_shared_mc_addr, - &psp->securedisplay_context.securedisplay_shared_buf); + psp_ta_free_shared_buf(&psp->securedisplay_context.context.mem_context); return ret; } @@ -1861,7 +2114,7 @@ int psp_securedisplay_invoke(struct psp_context *psp, uint32_t ta_cmd_id) { int ret; - if (!psp->securedisplay_context.securedisplay_initialized) + if (!psp->securedisplay_context.context.initialized) return -EINVAL; if (ta_cmd_id != TA_SECUREDISPLAY_COMMAND__QUERY_TA && @@ -1870,7 +2123,7 @@ int psp_securedisplay_invoke(struct psp_context *psp, uint32_t ta_cmd_id) mutex_lock(&psp->securedisplay_context.mutex); - ret = psp_ta_invoke(psp, ta_cmd_id, psp->securedisplay_context.session_id); + ret = psp_ta_invoke(psp, ta_cmd_id, psp->securedisplay_context.context.session_id); mutex_unlock(&psp->securedisplay_context.mutex); @@ -1884,7 +2137,7 @@ static int psp_hw_start(struct psp_context *psp) int ret; if (!amdgpu_sriov_vf(adev)) { - if (psp->kdb_bin_size && + if ((is_psp_fw_valid(psp->kdb)) && (psp->funcs->bootloader_load_kdb != NULL)) { ret = psp_bootloader_load_kdb(psp); if (ret) { @@ -1893,7 +2146,8 @@ static int psp_hw_start(struct psp_context *psp) } } - if (psp->spl_bin_size) { + if ((is_psp_fw_valid(psp->spl)) && + (psp->funcs->bootloader_load_spl != NULL)) { ret = psp_bootloader_load_spl(psp); if (ret) { DRM_ERROR("PSP load spl failed!\n"); @@ -1901,16 +2155,49 @@ static int psp_hw_start(struct psp_context *psp) } } - ret = psp_bootloader_load_sysdrv(psp); - if (ret) { - DRM_ERROR("PSP load sysdrv failed!\n"); - return ret; + if ((is_psp_fw_valid(psp->sys)) && + (psp->funcs->bootloader_load_sysdrv != NULL)) { + ret = psp_bootloader_load_sysdrv(psp); + if (ret) { + DRM_ERROR("PSP load sys drv failed!\n"); + return ret; + } } - ret = psp_bootloader_load_sos(psp); - if (ret) { - DRM_ERROR("PSP load sos failed!\n"); - return ret; + if ((is_psp_fw_valid(psp->soc_drv)) && + (psp->funcs->bootloader_load_soc_drv != NULL)) { + ret = psp_bootloader_load_soc_drv(psp); + if (ret) { + DRM_ERROR("PSP load soc drv failed!\n"); + return ret; + } + } + + if ((is_psp_fw_valid(psp->intf_drv)) && + (psp->funcs->bootloader_load_intf_drv != NULL)) { + ret = psp_bootloader_load_intf_drv(psp); + if (ret) { + DRM_ERROR("PSP load intf drv failed!\n"); + return ret; + } + } + + if ((is_psp_fw_valid(psp->dbg_drv)) && + (psp->funcs->bootloader_load_dbg_drv != NULL)) { + ret = psp_bootloader_load_dbg_drv(psp); + if (ret) { + DRM_ERROR("PSP load dbg drv failed!\n"); + return ret; + } + } + + if ((is_psp_fw_valid(psp->sos)) && + (psp->funcs->bootloader_load_sos != NULL)) { + ret = psp_bootloader_load_sos(psp); + if (ret) { + DRM_ERROR("PSP load sos failed!\n"); + return ret; + } } } @@ -1920,17 +2207,6 @@ static int psp_hw_start(struct psp_context *psp) return ret; } - ret = psp_clear_vf_fw(psp); - if (ret) { - DRM_ERROR("PSP clear vf fw!\n"); - return ret; - } - - ret = psp_boot_config_set(adev); - if (ret) { - DRM_WARN("PSP set boot config@\n"); - } - ret = psp_tmr_init(psp); if (ret) { DRM_ERROR("PSP tmr init failed!\n"); @@ -2125,8 +2401,6 @@ static int psp_prep_load_ip_fw_cmd_buf(struct amdgpu_firmware_info *ucode, int ret; uint64_t fw_mem_mc_addr = ucode->mc_addr; - memset(cmd, 0, sizeof(struct psp_gfx_cmd_resp)); - cmd->cmd_id = GFX_CMD_ID_LOAD_IP_FW; cmd->cmd.cmd_load_ip_fw.fw_phy_addr_lo = lower_32_bits(fw_mem_mc_addr); cmd->cmd.cmd_load_ip_fw.fw_phy_addr_hi = upper_32_bits(fw_mem_mc_addr); @@ -2139,17 +2413,19 @@ static int psp_prep_load_ip_fw_cmd_buf(struct amdgpu_firmware_info *ucode, return ret; } -static int psp_execute_np_fw_load(struct psp_context *psp, +static int psp_execute_non_psp_fw_load(struct psp_context *psp, struct amdgpu_firmware_info *ucode) { int ret = 0; + struct psp_gfx_cmd_resp *cmd = acquire_psp_cmd_buf(psp); - ret = psp_prep_load_ip_fw_cmd_buf(ucode, psp->cmd); - if (ret) - return ret; + ret = psp_prep_load_ip_fw_cmd_buf(ucode, cmd); + if (!ret) { + ret = psp_cmd_submit_buf(psp, ucode, cmd, + psp->fence_buf_mc_addr); + } - ret = psp_cmd_submit_buf(psp, ucode, psp->cmd, - psp->fence_buf_mc_addr); + release_psp_cmd_buf(psp); return ret; } @@ -2160,25 +2436,22 @@ static int psp_load_smu_fw(struct psp_context *psp) struct amdgpu_device *adev = psp->adev; struct amdgpu_firmware_info *ucode = &adev->firmware.ucode[AMDGPU_UCODE_ID_SMC]; - struct amdgpu_ras *ras = psp->ras.ras; + struct amdgpu_ras *ras = psp->ras_context.ras; if (!ucode->fw || amdgpu_sriov_vf(psp->adev)) return 0; if ((amdgpu_in_reset(adev) && - ras && ras->supported && + ras && adev->ras_enabled && (adev->asic_type == CHIP_ARCTURUS || - adev->asic_type == CHIP_VEGA20)) || - (adev->in_runpm && - adev->asic_type >= CHIP_NAVI10 && - adev->asic_type <= CHIP_NAVI12)) { + adev->asic_type == CHIP_VEGA20))) { ret = amdgpu_dpm_set_mp1_state(adev, PP_MP1_STATE_UNLOAD); if (ret) { DRM_WARN("Failed to set MP1 state prepare for reload\n"); } } - ret = psp_execute_np_fw_load(psp, ucode); + ret = psp_execute_non_psp_fw_load(psp, ucode); if (ret) DRM_ERROR("PSP load smu failed!\n"); @@ -2233,14 +2506,14 @@ int psp_load_fw_list(struct psp_context *psp, for (i = 0; i < ucode_count; ++i) { ucode = ucode_list[i]; psp_print_fw_hdr(psp, ucode); - ret = psp_execute_np_fw_load(psp, ucode); + ret = psp_execute_non_psp_fw_load(psp, ucode); if (ret) return ret; } return ret; } -static int psp_np_fw_load(struct psp_context *psp) +static int psp_load_non_psp_fw(struct psp_context *psp) { int i, ret; struct amdgpu_firmware_info *ucode; @@ -2279,7 +2552,7 @@ static int psp_np_fw_load(struct psp_context *psp) psp_print_fw_hdr(psp, ucode); - ret = psp_execute_np_fw_load(psp, ucode); + ret = psp_execute_non_psp_fw_load(psp, ucode); if (ret) return ret; @@ -2307,15 +2580,20 @@ static int psp_load_fw(struct amdgpu_device *adev) goto skip_memalloc; } - psp->cmd = kzalloc(sizeof(struct psp_gfx_cmd_resp), GFP_KERNEL); - if (!psp->cmd) - return -ENOMEM; - - ret = amdgpu_bo_create_kernel(adev, PSP_1_MEG, PSP_1_MEG, - AMDGPU_GEM_DOMAIN_GTT, - &psp->fw_pri_bo, - &psp->fw_pri_mc_addr, - &psp->fw_pri_buf); + if (amdgpu_sriov_vf(adev)) { + ret = amdgpu_bo_create_kernel(adev, PSP_1_MEG, PSP_1_MEG, + AMDGPU_GEM_DOMAIN_VRAM, + &psp->fw_pri_bo, + &psp->fw_pri_mc_addr, + &psp->fw_pri_buf); + } else { + ret = amdgpu_bo_create_kernel(adev, PSP_1_MEG, PSP_1_MEG, + AMDGPU_GEM_DOMAIN_GTT, + &psp->fw_pri_bo, + &psp->fw_pri_mc_addr, + &psp->fw_pri_buf); + } + if (ret) goto failed; @@ -2347,7 +2625,7 @@ skip_memalloc: if (ret) goto failed; - ret = psp_np_fw_load(psp); + ret = psp_load_non_psp_fw(psp); if (ret) goto failed; @@ -2363,7 +2641,7 @@ skip_memalloc: return ret; } - if (psp->adev->psp.ta_fw) { + if (psp->ta_fw) { ret = psp_ras_initialize(psp); if (ret) dev_err(psp->adev->dev, @@ -2434,9 +2712,8 @@ static int psp_hw_fini(void *handle) { struct amdgpu_device *adev = (struct amdgpu_device *)handle; struct psp_context *psp = &adev->psp; - int ret; - if (psp->adev->psp.ta_fw) { + if (psp->ta_fw) { psp_ras_terminate(psp); psp_securedisplay_terminate(psp); psp_rap_terminate(psp); @@ -2445,11 +2722,6 @@ static int psp_hw_fini(void *handle) } psp_asd_unload(psp); - ret = psp_clear_vf_fw(psp); - if (ret) { - DRM_ERROR("PSP clear vf fw!\n"); - return ret; - } psp_tmr_terminate(psp); psp_ring_destroy(psp, PSP_RING_TYPE__KM); @@ -2461,9 +2733,6 @@ static int psp_hw_fini(void *handle) amdgpu_bo_free_kernel(&psp->cmd_buf_bo, &psp->cmd_buf_mc_addr, (void **)&psp->cmd_buf_mem); - kfree(psp->cmd); - psp->cmd = NULL; - return 0; } @@ -2474,7 +2743,7 @@ static int psp_suspend(void *handle) struct psp_context *psp = &adev->psp; if (adev->gmc.xgmi.num_physical_nodes > 1 && - psp->xgmi_context.initialized == 1) { + psp->xgmi_context.context.initialized) { ret = psp_xgmi_terminate(psp); if (ret) { DRM_ERROR("Failed to terminate xgmi ta\n"); @@ -2482,7 +2751,7 @@ static int psp_suspend(void *handle) } } - if (psp->adev->psp.ta_fw) { + if (psp->ta_fw) { ret = psp_ras_terminate(psp); if (ret) { DRM_ERROR("Failed to terminate ras ta\n"); @@ -2539,10 +2808,12 @@ static int psp_resume(void *handle) DRM_INFO("PSP is resuming...\n"); - ret = psp_mem_training(psp, PSP_MEM_TRAIN_RESUME); - if (ret) { - DRM_ERROR("Failed to process memory training!\n"); - return ret; + if (psp->mem_train_ctx.enable_mem_training) { + ret = psp_mem_training(psp, PSP_MEM_TRAIN_RESUME); + if (ret) { + DRM_ERROR("Failed to process memory training!\n"); + return ret; + } } mutex_lock(&adev->firmware.mutex); @@ -2551,7 +2822,7 @@ static int psp_resume(void *handle) if (ret) goto failed; - ret = psp_np_fw_load(psp); + ret = psp_load_non_psp_fw(psp); if (ret) goto failed; @@ -2562,7 +2833,7 @@ static int psp_resume(void *handle) } if (adev->gmc.xgmi.num_physical_nodes > 1) { - ret = psp_xgmi_initialize(psp); + ret = psp_xgmi_initialize(psp, false, true); /* Warning the XGMI seesion initialize failure * Instead of stop driver initialization */ @@ -2571,7 +2842,7 @@ static int psp_resume(void *handle) "XGMI: Failed to initialize XGMI session\n"); } - if (psp->adev->psp.ta_fw) { + if (psp->ta_fw) { ret = psp_ras_initialize(psp); if (ret) dev_err(psp->adev->dev, @@ -2625,17 +2896,15 @@ int psp_gpu_reset(struct amdgpu_device *adev) int psp_rlc_autoload_start(struct psp_context *psp) { int ret; - struct psp_gfx_cmd_resp *cmd; - - cmd = kzalloc(sizeof(struct psp_gfx_cmd_resp), GFP_KERNEL); - if (!cmd) - return -ENOMEM; + struct psp_gfx_cmd_resp *cmd = acquire_psp_cmd_buf(psp); cmd->cmd_id = GFX_CMD_ID_AUTOLOAD_RLC; ret = psp_cmd_submit_buf(psp, NULL, cmd, psp->fence_buf_mc_addr); - kfree(cmd); + + release_psp_cmd_buf(psp); + return ret; } @@ -2649,7 +2918,7 @@ int psp_update_vcn_sram(struct amdgpu_device *adev, int inst_idx, ucode.mc_addr = cmd_gpu_addr; ucode.ucode_size = cmd_size; - return psp_execute_np_fw_load(&adev->psp, &ucode); + return psp_execute_non_psp_fw_load(&adev->psp, &ucode); } int psp_ring_cmd_submit(struct psp_context *psp, @@ -2694,7 +2963,7 @@ int psp_ring_cmd_submit(struct psp_context *psp, write_frame->fence_addr_hi = upper_32_bits(fence_mc_addr); write_frame->fence_addr_lo = lower_32_bits(fence_mc_addr); write_frame->fence_value = index; - amdgpu_asic_flush_hdp(adev, NULL); + amdgpu_device_flush_hdp(adev, NULL); /* Update the write Pointer in DWORDs */ psp_write_ptr_reg = (psp_write_ptr_reg + rb_frame_size_dw) % ring_size_dw; @@ -2725,10 +2994,10 @@ int psp_init_asd_microcode(struct psp_context *psp, goto out; asd_hdr = (const struct psp_firmware_header_v1_0 *)adev->psp.asd_fw->data; - adev->psp.asd_fw_version = le32_to_cpu(asd_hdr->header.ucode_version); - adev->psp.asd_feature_version = le32_to_cpu(asd_hdr->ucode_feature_version); - adev->psp.asd_ucode_size = le32_to_cpu(asd_hdr->header.ucode_size_bytes); - adev->psp.asd_start_addr = (uint8_t *)asd_hdr + + adev->psp.asd.fw_version = le32_to_cpu(asd_hdr->header.ucode_version); + adev->psp.asd.feature_version = le32_to_cpu(asd_hdr->sos.fw_version); + adev->psp.asd.size_bytes = le32_to_cpu(asd_hdr->header.ucode_size_bytes); + adev->psp.asd.start_addr = (uint8_t *)asd_hdr + le32_to_cpu(asd_hdr->header.ucode_array_offset_bytes); return 0; out: @@ -2742,7 +3011,7 @@ int psp_init_toc_microcode(struct psp_context *psp, const char *chip_name) { struct amdgpu_device *adev = psp->adev; - char fw_name[30]; + char fw_name[PSP_FW_NAME_LEN]; const struct psp_firmware_header_v1_0 *toc_hdr; int err = 0; @@ -2761,10 +3030,10 @@ int psp_init_toc_microcode(struct psp_context *psp, goto out; toc_hdr = (const struct psp_firmware_header_v1_0 *)adev->psp.toc_fw->data; - adev->psp.toc_fw_version = le32_to_cpu(toc_hdr->header.ucode_version); - adev->psp.toc_feature_version = le32_to_cpu(toc_hdr->ucode_feature_version); - adev->psp.toc_bin_size = le32_to_cpu(toc_hdr->header.ucode_size_bytes); - adev->psp.toc_start_addr = (uint8_t *)toc_hdr + + adev->psp.toc.fw_version = le32_to_cpu(toc_hdr->header.ucode_version); + adev->psp.toc.feature_version = le32_to_cpu(toc_hdr->sos.fw_version); + adev->psp.toc.size_bytes = le32_to_cpu(toc_hdr->header.ucode_size_bytes); + adev->psp.toc.start_addr = (uint8_t *)toc_hdr + le32_to_cpu(toc_hdr->header.ucode_array_offset_bytes); return 0; out: @@ -2774,6 +3043,128 @@ out: return err; } +static int parse_sos_bin_descriptor(struct psp_context *psp, + const struct psp_fw_bin_desc *desc, + const struct psp_firmware_header_v2_0 *sos_hdr) +{ + uint8_t *ucode_start_addr = NULL; + + if (!psp || !desc || !sos_hdr) + return -EINVAL; + + ucode_start_addr = (uint8_t *)sos_hdr + + le32_to_cpu(desc->offset_bytes) + + le32_to_cpu(sos_hdr->header.ucode_array_offset_bytes); + + switch (desc->fw_type) { + case PSP_FW_TYPE_PSP_SOS: + psp->sos.fw_version = le32_to_cpu(desc->fw_version); + psp->sos.feature_version = le32_to_cpu(desc->fw_version); + psp->sos.size_bytes = le32_to_cpu(desc->size_bytes); + psp->sos.start_addr = ucode_start_addr; + break; + case PSP_FW_TYPE_PSP_SYS_DRV: + psp->sys.fw_version = le32_to_cpu(desc->fw_version); + psp->sys.feature_version = le32_to_cpu(desc->fw_version); + psp->sys.size_bytes = le32_to_cpu(desc->size_bytes); + psp->sys.start_addr = ucode_start_addr; + break; + case PSP_FW_TYPE_PSP_KDB: + psp->kdb.fw_version = le32_to_cpu(desc->fw_version); + psp->kdb.feature_version = le32_to_cpu(desc->fw_version); + psp->kdb.size_bytes = le32_to_cpu(desc->size_bytes); + psp->kdb.start_addr = ucode_start_addr; + break; + case PSP_FW_TYPE_PSP_TOC: + psp->toc.fw_version = le32_to_cpu(desc->fw_version); + psp->toc.feature_version = le32_to_cpu(desc->fw_version); + psp->toc.size_bytes = le32_to_cpu(desc->size_bytes); + psp->toc.start_addr = ucode_start_addr; + break; + case PSP_FW_TYPE_PSP_SPL: + psp->spl.fw_version = le32_to_cpu(desc->fw_version); + psp->spl.feature_version = le32_to_cpu(desc->fw_version); + psp->spl.size_bytes = le32_to_cpu(desc->size_bytes); + psp->spl.start_addr = ucode_start_addr; + break; + case PSP_FW_TYPE_PSP_RL: + psp->rl.fw_version = le32_to_cpu(desc->fw_version); + psp->rl.feature_version = le32_to_cpu(desc->fw_version); + psp->rl.size_bytes = le32_to_cpu(desc->size_bytes); + psp->rl.start_addr = ucode_start_addr; + break; + case PSP_FW_TYPE_PSP_SOC_DRV: + psp->soc_drv.fw_version = le32_to_cpu(desc->fw_version); + psp->soc_drv.feature_version = le32_to_cpu(desc->fw_version); + psp->soc_drv.size_bytes = le32_to_cpu(desc->size_bytes); + psp->soc_drv.start_addr = ucode_start_addr; + break; + case PSP_FW_TYPE_PSP_INTF_DRV: + psp->intf_drv.fw_version = le32_to_cpu(desc->fw_version); + psp->intf_drv.feature_version = le32_to_cpu(desc->fw_version); + psp->intf_drv.size_bytes = le32_to_cpu(desc->size_bytes); + psp->intf_drv.start_addr = ucode_start_addr; + break; + case PSP_FW_TYPE_PSP_DBG_DRV: + psp->dbg_drv.fw_version = le32_to_cpu(desc->fw_version); + psp->dbg_drv.feature_version = le32_to_cpu(desc->fw_version); + psp->dbg_drv.size_bytes = le32_to_cpu(desc->size_bytes); + psp->dbg_drv.start_addr = ucode_start_addr; + break; + default: + dev_warn(psp->adev->dev, "Unsupported PSP FW type: %d\n", desc->fw_type); + break; + } + + return 0; +} + +static int psp_init_sos_base_fw(struct amdgpu_device *adev) +{ + const struct psp_firmware_header_v1_0 *sos_hdr; + const struct psp_firmware_header_v1_3 *sos_hdr_v1_3; + uint8_t *ucode_array_start_addr; + + sos_hdr = (const struct psp_firmware_header_v1_0 *)adev->psp.sos_fw->data; + ucode_array_start_addr = (uint8_t *)sos_hdr + + le32_to_cpu(sos_hdr->header.ucode_array_offset_bytes); + + if (adev->gmc.xgmi.connected_to_cpu || (adev->asic_type != CHIP_ALDEBARAN)) { + adev->psp.sos.fw_version = le32_to_cpu(sos_hdr->header.ucode_version); + adev->psp.sos.feature_version = le32_to_cpu(sos_hdr->sos.fw_version); + + adev->psp.sys.size_bytes = le32_to_cpu(sos_hdr->sos.offset_bytes); + adev->psp.sys.start_addr = ucode_array_start_addr; + + adev->psp.sos.size_bytes = le32_to_cpu(sos_hdr->sos.size_bytes); + adev->psp.sos.start_addr = ucode_array_start_addr + + le32_to_cpu(sos_hdr->sos.offset_bytes); + adev->psp.xgmi_context.supports_extended_data = false; + } else { + /* Load alternate PSP SOS FW */ + sos_hdr_v1_3 = (const struct psp_firmware_header_v1_3 *)adev->psp.sos_fw->data; + + adev->psp.sos.fw_version = le32_to_cpu(sos_hdr_v1_3->sos_aux.fw_version); + adev->psp.sos.feature_version = le32_to_cpu(sos_hdr_v1_3->sos_aux.fw_version); + + adev->psp.sys.size_bytes = le32_to_cpu(sos_hdr_v1_3->sys_drv_aux.size_bytes); + adev->psp.sys.start_addr = ucode_array_start_addr + + le32_to_cpu(sos_hdr_v1_3->sys_drv_aux.offset_bytes); + + adev->psp.sos.size_bytes = le32_to_cpu(sos_hdr_v1_3->sos_aux.size_bytes); + adev->psp.sos.start_addr = ucode_array_start_addr + + le32_to_cpu(sos_hdr_v1_3->sos_aux.offset_bytes); + adev->psp.xgmi_context.supports_extended_data = true; + } + + if ((adev->psp.sys.size_bytes == 0) || (adev->psp.sos.size_bytes == 0)) { + dev_warn(adev->dev, "PSP SOS FW not available"); + return -EINVAL; + } + + return 0; +} + int psp_init_sos_microcode(struct psp_context *psp, const char *chip_name) { @@ -2783,7 +3174,10 @@ int psp_init_sos_microcode(struct psp_context *psp, const struct psp_firmware_header_v1_1 *sos_hdr_v1_1; const struct psp_firmware_header_v1_2 *sos_hdr_v1_2; const struct psp_firmware_header_v1_3 *sos_hdr_v1_3; + const struct psp_firmware_header_v2_0 *sos_hdr_v2_0; int err = 0; + uint8_t *ucode_array_start_addr; + int fw_index = 0; if (!chip_name) { dev_err(adev->dev, "invalid chip name for sos microcode\n"); @@ -2800,47 +3194,62 @@ int psp_init_sos_microcode(struct psp_context *psp, goto out; sos_hdr = (const struct psp_firmware_header_v1_0 *)adev->psp.sos_fw->data; + ucode_array_start_addr = (uint8_t *)sos_hdr + + le32_to_cpu(sos_hdr->header.ucode_array_offset_bytes); amdgpu_ucode_print_psp_hdr(&sos_hdr->header); switch (sos_hdr->header.header_version_major) { case 1: - adev->psp.sos_fw_version = le32_to_cpu(sos_hdr->header.ucode_version); - adev->psp.sos_feature_version = le32_to_cpu(sos_hdr->ucode_feature_version); - adev->psp.sos_bin_size = le32_to_cpu(sos_hdr->sos_size_bytes); - adev->psp.sys_bin_size = le32_to_cpu(sos_hdr->sos_offset_bytes); - adev->psp.sys_start_addr = (uint8_t *)sos_hdr + - le32_to_cpu(sos_hdr->header.ucode_array_offset_bytes); - adev->psp.sos_start_addr = (uint8_t *)adev->psp.sys_start_addr + - le32_to_cpu(sos_hdr->sos_offset_bytes); + err = psp_init_sos_base_fw(adev); + if (err) + goto out; + if (sos_hdr->header.header_version_minor == 1) { sos_hdr_v1_1 = (const struct psp_firmware_header_v1_1 *)adev->psp.sos_fw->data; - adev->psp.toc_bin_size = le32_to_cpu(sos_hdr_v1_1->toc_size_bytes); - adev->psp.toc_start_addr = (uint8_t *)adev->psp.sys_start_addr + - le32_to_cpu(sos_hdr_v1_1->toc_offset_bytes); - adev->psp.kdb_bin_size = le32_to_cpu(sos_hdr_v1_1->kdb_size_bytes); - adev->psp.kdb_start_addr = (uint8_t *)adev->psp.sys_start_addr + - le32_to_cpu(sos_hdr_v1_1->kdb_offset_bytes); + adev->psp.toc.size_bytes = le32_to_cpu(sos_hdr_v1_1->toc.size_bytes); + adev->psp.toc.start_addr = (uint8_t *)adev->psp.sys.start_addr + + le32_to_cpu(sos_hdr_v1_1->toc.offset_bytes); + adev->psp.kdb.size_bytes = le32_to_cpu(sos_hdr_v1_1->kdb.size_bytes); + adev->psp.kdb.start_addr = (uint8_t *)adev->psp.sys.start_addr + + le32_to_cpu(sos_hdr_v1_1->kdb.offset_bytes); } if (sos_hdr->header.header_version_minor == 2) { sos_hdr_v1_2 = (const struct psp_firmware_header_v1_2 *)adev->psp.sos_fw->data; - adev->psp.kdb_bin_size = le32_to_cpu(sos_hdr_v1_2->kdb_size_bytes); - adev->psp.kdb_start_addr = (uint8_t *)adev->psp.sys_start_addr + - le32_to_cpu(sos_hdr_v1_2->kdb_offset_bytes); + adev->psp.kdb.size_bytes = le32_to_cpu(sos_hdr_v1_2->kdb.size_bytes); + adev->psp.kdb.start_addr = (uint8_t *)adev->psp.sys.start_addr + + le32_to_cpu(sos_hdr_v1_2->kdb.offset_bytes); } if (sos_hdr->header.header_version_minor == 3) { sos_hdr_v1_3 = (const struct psp_firmware_header_v1_3 *)adev->psp.sos_fw->data; - adev->psp.toc_bin_size = le32_to_cpu(sos_hdr_v1_3->v1_1.toc_size_bytes); - adev->psp.toc_start_addr = (uint8_t *)adev->psp.sys_start_addr + - le32_to_cpu(sos_hdr_v1_3->v1_1.toc_offset_bytes); - adev->psp.kdb_bin_size = le32_to_cpu(sos_hdr_v1_3->v1_1.kdb_size_bytes); - adev->psp.kdb_start_addr = (uint8_t *)adev->psp.sys_start_addr + - le32_to_cpu(sos_hdr_v1_3->v1_1.kdb_offset_bytes); - adev->psp.spl_bin_size = le32_to_cpu(sos_hdr_v1_3->spl_size_bytes); - adev->psp.spl_start_addr = (uint8_t *)adev->psp.sys_start_addr + - le32_to_cpu(sos_hdr_v1_3->spl_offset_bytes); - adev->psp.rl_bin_size = le32_to_cpu(sos_hdr_v1_3->rl_size_bytes); - adev->psp.rl_start_addr = (uint8_t *)adev->psp.sys_start_addr + - le32_to_cpu(sos_hdr_v1_3->rl_offset_bytes); + adev->psp.toc.size_bytes = le32_to_cpu(sos_hdr_v1_3->v1_1.toc.size_bytes); + adev->psp.toc.start_addr = ucode_array_start_addr + + le32_to_cpu(sos_hdr_v1_3->v1_1.toc.offset_bytes); + adev->psp.kdb.size_bytes = le32_to_cpu(sos_hdr_v1_3->v1_1.kdb.size_bytes); + adev->psp.kdb.start_addr = ucode_array_start_addr + + le32_to_cpu(sos_hdr_v1_3->v1_1.kdb.offset_bytes); + adev->psp.spl.size_bytes = le32_to_cpu(sos_hdr_v1_3->spl.size_bytes); + adev->psp.spl.start_addr = ucode_array_start_addr + + le32_to_cpu(sos_hdr_v1_3->spl.offset_bytes); + adev->psp.rl.size_bytes = le32_to_cpu(sos_hdr_v1_3->rl.size_bytes); + adev->psp.rl.start_addr = ucode_array_start_addr + + le32_to_cpu(sos_hdr_v1_3->rl.offset_bytes); + } + break; + case 2: + sos_hdr_v2_0 = (const struct psp_firmware_header_v2_0 *)adev->psp.sos_fw->data; + + if (le32_to_cpu(sos_hdr_v2_0->psp_fw_bin_count) >= UCODE_MAX_PSP_PACKAGING) { + dev_err(adev->dev, "packed SOS count exceeds maximum limit\n"); + err = -EINVAL; + goto out; + } + + for (fw_index = 0; fw_index < le32_to_cpu(sos_hdr_v2_0->psp_fw_bin_count); fw_index++) { + err = parse_sos_bin_descriptor(psp, + &sos_hdr_v2_0->psp_fw_bin[fw_index], + sos_hdr_v2_0); + if (err) + goto out; } break; default: @@ -2861,7 +3270,7 @@ out: } static int parse_ta_bin_descriptor(struct psp_context *psp, - const struct ta_fw_bin_desc *desc, + const struct psp_fw_bin_desc *desc, const struct ta_firmware_header_v2_0 *ta_hdr) { uint8_t *ucode_start_addr = NULL; @@ -2875,40 +3284,40 @@ static int parse_ta_bin_descriptor(struct psp_context *psp, switch (desc->fw_type) { case TA_FW_TYPE_PSP_ASD: - psp->asd_fw_version = le32_to_cpu(desc->fw_version); - psp->asd_feature_version = le32_to_cpu(desc->fw_version); - psp->asd_ucode_size = le32_to_cpu(desc->size_bytes); - psp->asd_start_addr = ucode_start_addr; + psp->asd.fw_version = le32_to_cpu(desc->fw_version); + psp->asd.feature_version = le32_to_cpu(desc->fw_version); + psp->asd.size_bytes = le32_to_cpu(desc->size_bytes); + psp->asd.start_addr = ucode_start_addr; break; case TA_FW_TYPE_PSP_XGMI: - psp->ta_xgmi_ucode_version = le32_to_cpu(desc->fw_version); - psp->ta_xgmi_ucode_size = le32_to_cpu(desc->size_bytes); - psp->ta_xgmi_start_addr = ucode_start_addr; + psp->xgmi.feature_version = le32_to_cpu(desc->fw_version); + psp->xgmi.size_bytes = le32_to_cpu(desc->size_bytes); + psp->xgmi.start_addr = ucode_start_addr; break; case TA_FW_TYPE_PSP_RAS: - psp->ta_ras_ucode_version = le32_to_cpu(desc->fw_version); - psp->ta_ras_ucode_size = le32_to_cpu(desc->size_bytes); - psp->ta_ras_start_addr = ucode_start_addr; + psp->ras.feature_version = le32_to_cpu(desc->fw_version); + psp->ras.size_bytes = le32_to_cpu(desc->size_bytes); + psp->ras.start_addr = ucode_start_addr; break; case TA_FW_TYPE_PSP_HDCP: - psp->ta_hdcp_ucode_version = le32_to_cpu(desc->fw_version); - psp->ta_hdcp_ucode_size = le32_to_cpu(desc->size_bytes); - psp->ta_hdcp_start_addr = ucode_start_addr; + psp->hdcp.feature_version = le32_to_cpu(desc->fw_version); + psp->hdcp.size_bytes = le32_to_cpu(desc->size_bytes); + psp->hdcp.start_addr = ucode_start_addr; break; case TA_FW_TYPE_PSP_DTM: - psp->ta_dtm_ucode_version = le32_to_cpu(desc->fw_version); - psp->ta_dtm_ucode_size = le32_to_cpu(desc->size_bytes); - psp->ta_dtm_start_addr = ucode_start_addr; + psp->dtm.feature_version = le32_to_cpu(desc->fw_version); + psp->dtm.size_bytes = le32_to_cpu(desc->size_bytes); + psp->dtm.start_addr = ucode_start_addr; break; case TA_FW_TYPE_PSP_RAP: - psp->ta_rap_ucode_version = le32_to_cpu(desc->fw_version); - psp->ta_rap_ucode_size = le32_to_cpu(desc->size_bytes); - psp->ta_rap_start_addr = ucode_start_addr; + psp->rap.feature_version = le32_to_cpu(desc->fw_version); + psp->rap.size_bytes = le32_to_cpu(desc->size_bytes); + psp->rap.start_addr = ucode_start_addr; break; case TA_FW_TYPE_PSP_SECUREDISPLAY: - psp->ta_securedisplay_ucode_version = le32_to_cpu(desc->fw_version); - psp->ta_securedisplay_ucode_size = le32_to_cpu(desc->size_bytes); - psp->ta_securedisplay_start_addr = ucode_start_addr; + psp->securedisplay.feature_version = le32_to_cpu(desc->fw_version); + psp->securedisplay.size_bytes = le32_to_cpu(desc->size_bytes); + psp->securedisplay.start_addr = ucode_start_addr; break; default: dev_warn(psp->adev->dev, "Unsupported TA type: %d\n", desc->fw_type); @@ -2949,7 +3358,7 @@ int psp_init_ta_microcode(struct psp_context *psp, goto out; } - if (le32_to_cpu(ta_hdr->ta_fw_bin_count) >= UCODE_MAX_TA_PACKAGING) { + if (le32_to_cpu(ta_hdr->ta_fw_bin_count) >= UCODE_MAX_PSP_PACKAGING) { dev_err(adev->dev, "packed TA count exceeds maximum limit\n"); err = -EINVAL; goto out; @@ -3016,63 +3425,76 @@ static ssize_t psp_usbc_pd_fw_sysfs_write(struct device *dev, { struct drm_device *ddev = dev_get_drvdata(dev); struct amdgpu_device *adev = drm_to_adev(ddev); - void *cpu_addr; - dma_addr_t dma_addr; - int ret; + int ret, idx; char fw_name[100]; const struct firmware *usbc_pd_fw; + struct amdgpu_bo *fw_buf_bo = NULL; + uint64_t fw_pri_mc_addr; + void *fw_pri_cpu_addr; if (!adev->ip_blocks[AMD_IP_BLOCK_TYPE_PSP].status.late_initialized) { DRM_INFO("PSP block is not ready yet."); return -EBUSY; } + if (!drm_dev_enter(ddev, &idx)) + return -ENODEV; + snprintf(fw_name, sizeof(fw_name), "amdgpu/%s", buf); ret = request_firmware(&usbc_pd_fw, fw_name, adev->dev); if (ret) goto fail; - /* We need contiguous physical mem to place the FW for psp to access */ - cpu_addr = dma_alloc_coherent(adev->dev, usbc_pd_fw->size, &dma_addr, GFP_KERNEL); - - ret = dma_mapping_error(adev->dev, dma_addr); + /* LFB address which is aligned to 1MB boundary per PSP request */ + ret = amdgpu_bo_create_kernel(adev, usbc_pd_fw->size, 0x100000, + AMDGPU_GEM_DOMAIN_VRAM, + &fw_buf_bo, + &fw_pri_mc_addr, + &fw_pri_cpu_addr); if (ret) goto rel_buf; - memcpy_toio(cpu_addr, usbc_pd_fw->data, usbc_pd_fw->size); - - /* - * x86 specific workaround. - * Without it the buffer is invisible in PSP. - * - * TODO Remove once PSP starts snooping CPU cache - */ -#ifdef CONFIG_X86 - clflush_cache_range(cpu_addr, (usbc_pd_fw->size & ~(L1_CACHE_BYTES - 1))); -#endif + memcpy_toio(fw_pri_cpu_addr, usbc_pd_fw->data, usbc_pd_fw->size); mutex_lock(&adev->psp.mutex); - ret = psp_load_usbc_pd_fw(&adev->psp, dma_addr); + ret = psp_load_usbc_pd_fw(&adev->psp, fw_pri_mc_addr); mutex_unlock(&adev->psp.mutex); + amdgpu_bo_free_kernel(&fw_buf_bo, &fw_pri_mc_addr, &fw_pri_cpu_addr); + rel_buf: - dma_free_coherent(adev->dev, usbc_pd_fw->size, cpu_addr, dma_addr); release_firmware(usbc_pd_fw); - fail: if (ret) { DRM_ERROR("Failed to load USBC PD FW, err = %d", ret); - return ret; + count = ret; } + drm_dev_exit(idx); return count; } +void psp_copy_fw(struct psp_context *psp, uint8_t *start_addr, uint32_t bin_size) +{ + int idx; + + if (!drm_dev_enter(&psp->adev->ddev, &idx)) + return; + + memset(psp->fw_pri_buf, 0, PSP_1_MEG); + memcpy(psp->fw_pri_buf, start_addr, bin_size); + + drm_dev_exit(idx); +} + static DEVICE_ATTR(usbc_pd_fw, S_IRUGO | S_IWUSR, psp_usbc_pd_fw_sysfs_read, psp_usbc_pd_fw_sysfs_write); - +int is_psp_fw_valid(struct psp_bin_desc bin) +{ + return bin.size_bytes; +} const struct amd_ip_funcs psp_ip_funcs = { .name = "psp", @@ -3134,6 +3556,14 @@ const struct amdgpu_ip_block_version psp_v11_0_ip_block = .funcs = &psp_ip_funcs, }; +const struct amdgpu_ip_block_version psp_v11_0_8_ip_block = { + .type = AMD_IP_BLOCK_TYPE_PSP, + .major = 11, + .minor = 0, + .rev = 8, + .funcs = &psp_ip_funcs, +}; + const struct amdgpu_ip_block_version psp_v12_0_ip_block = { .type = AMD_IP_BLOCK_TYPE_PSP, diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_psp.h b/drivers/gpu/drm/amd/amdgpu/amdgpu_psp.h index 60aa99a39a74..8ef2d28af92a 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_psp.h +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_psp.h @@ -48,11 +48,15 @@ struct psp_context; struct psp_xgmi_node_info; struct psp_xgmi_topology_info; +struct psp_bin_desc; enum psp_bootloader_cmd { PSP_BL__LOAD_SYSDRV = 0x10000, PSP_BL__LOAD_SOSDRV = 0x20000, PSP_BL__LOAD_KEY_DATABASE = 0x80000, + PSP_BL__LOAD_SOCDRV = 0xB0000, + PSP_BL__LOAD_INTFDRV = 0xC0000, + PSP_BL__LOAD_DBGDRV = 0xD0000, PSP_BL__DRAM_LONG_TRAIN = 0x100000, PSP_BL__DRAM_SHORT_TRAIN = 0x200000, PSP_BL__LOAD_TOS_SPL_TABLE = 0x10000000, @@ -93,6 +97,9 @@ struct psp_funcs int (*bootloader_load_kdb)(struct psp_context *psp); int (*bootloader_load_spl)(struct psp_context *psp); int (*bootloader_load_sysdrv)(struct psp_context *psp); + int (*bootloader_load_soc_drv)(struct psp_context *psp); + int (*bootloader_load_intf_drv)(struct psp_context *psp); + int (*bootloader_load_dbg_drv)(struct psp_context *psp); int (*bootloader_load_sos)(struct psp_context *psp); int (*ring_init)(struct psp_context *psp, enum psp_ring_type ring_type); int (*ring_create)(struct psp_context *psp, @@ -106,7 +113,7 @@ struct psp_funcs int (*mem_training)(struct psp_context *psp, uint32_t ops); uint32_t (*ring_get_wptr)(struct psp_context *psp); void (*ring_set_wptr)(struct psp_context *psp, uint32_t value); - int (*load_usbc_pd_fw)(struct psp_context *psp, dma_addr_t dma_addr); + int (*load_usbc_pd_fw)(struct psp_context *psp, uint64_t fw_pri_mc_addr); int (*read_usbc_pd_fw)(struct psp_context *psp, uint32_t *fw_ver); }; @@ -116,6 +123,7 @@ struct psp_xgmi_node_info { uint8_t num_hops; uint8_t is_sharing_enabled; enum ta_xgmi_assigned_sdma_engine sdma_engine; + uint8_t num_links; }; struct psp_xgmi_topology_info { @@ -128,59 +136,32 @@ struct psp_asd_context { uint32_t session_id; }; -struct psp_xgmi_context { - uint8_t initialized; - uint32_t session_id; - struct amdgpu_bo *xgmi_shared_bo; - uint64_t xgmi_shared_mc_addr; - void *xgmi_shared_buf; - struct psp_xgmi_topology_info top_info; +struct ta_mem_context { + struct amdgpu_bo *shared_bo; + uint64_t shared_mc_addr; + void *shared_buf; }; -struct psp_ras_context { - /*ras fw*/ - bool ras_initialized; +struct ta_context { + bool initialized; uint32_t session_id; - struct amdgpu_bo *ras_shared_bo; - uint64_t ras_shared_mc_addr; - void *ras_shared_buf; - struct amdgpu_ras *ras; + struct ta_mem_context mem_context; }; -struct psp_hdcp_context { - bool hdcp_initialized; - uint32_t session_id; - struct amdgpu_bo *hdcp_shared_bo; - uint64_t hdcp_shared_mc_addr; - void *hdcp_shared_buf; - struct mutex mutex; -}; - -struct psp_dtm_context { - bool dtm_initialized; - uint32_t session_id; - struct amdgpu_bo *dtm_shared_bo; - uint64_t dtm_shared_mc_addr; - void *dtm_shared_buf; - struct mutex mutex; +struct ta_cp_context { + struct ta_context context; + struct mutex mutex; }; -struct psp_rap_context { - bool rap_initialized; - uint32_t session_id; - struct amdgpu_bo *rap_shared_bo; - uint64_t rap_shared_mc_addr; - void *rap_shared_buf; - struct mutex mutex; +struct psp_xgmi_context { + struct ta_context context; + struct psp_xgmi_topology_info top_info; + bool supports_extended_data; }; -struct psp_securedisplay_context { - bool securedisplay_initialized; - uint32_t session_id; - struct amdgpu_bo *securedisplay_shared_bo; - uint64_t securedisplay_shared_mc_addr; - void *securedisplay_shared_buf; - struct mutex mutex; +struct psp_ras_context { + struct ta_context context; + struct amdgpu_ras *ras; }; #define MEM_TRAIN_SYSTEM_SIGNATURE 0x54534942 @@ -225,6 +206,68 @@ struct psp_memory_training_context { enum psp_memory_training_init_flag init; u32 training_cnt; + bool enable_mem_training; +}; + +/** PSP runtime DB **/ +#define PSP_RUNTIME_DB_SIZE_IN_BYTES 0x10000 +#define PSP_RUNTIME_DB_OFFSET 0x100000 +#define PSP_RUNTIME_DB_COOKIE_ID 0x0ed5 +#define PSP_RUNTIME_DB_VER_1 0x0100 +#define PSP_RUNTIME_DB_DIAG_ENTRY_MAX_COUNT 0x40 + +enum psp_runtime_entry_type { + PSP_RUNTIME_ENTRY_TYPE_INVALID = 0x0, + PSP_RUNTIME_ENTRY_TYPE_TEST = 0x1, + PSP_RUNTIME_ENTRY_TYPE_MGPU_COMMON = 0x2, /* Common mGPU runtime data */ + PSP_RUNTIME_ENTRY_TYPE_MGPU_WAFL = 0x3, /* WAFL runtime data */ + PSP_RUNTIME_ENTRY_TYPE_MGPU_XGMI = 0x4, /* XGMI runtime data */ + PSP_RUNTIME_ENTRY_TYPE_BOOT_CONFIG = 0x5, /* Boot Config runtime data */ +}; + +/* PSP runtime DB header */ +struct psp_runtime_data_header { + /* determine the existence of runtime db */ + uint16_t cookie; + /* version of runtime db */ + uint16_t version; +}; + +/* PSP runtime DB entry */ +struct psp_runtime_entry { + /* type of runtime db entry */ + uint32_t entry_type; + /* offset of entry in bytes */ + uint16_t offset; + /* size of entry in bytes */ + uint16_t size; +}; + +/* PSP runtime DB directory */ +struct psp_runtime_data_directory { + /* number of valid entries */ + uint16_t entry_count; + /* db entries*/ + struct psp_runtime_entry entry_list[PSP_RUNTIME_DB_DIAG_ENTRY_MAX_COUNT]; +}; + +/* PSP runtime DB boot config feature bitmask */ +enum psp_runtime_boot_cfg_feature { + BOOT_CFG_FEATURE_GECC = 0x1, + BOOT_CFG_FEATURE_TWO_STAGE_DRAM_TRAINING = 0x2, +}; + +/* PSP runtime DB boot config entry */ +struct psp_runtime_boot_cfg_entry { + uint32_t boot_cfg_bitmask; + uint32_t reserved; +}; + +struct psp_bin_desc { + uint32_t fw_version; + uint32_t feature_version; + uint32_t size_bytes; + uint8_t *start_addr; }; struct psp_context @@ -242,36 +285,26 @@ struct psp_context /* sos firmware */ const struct firmware *sos_fw; - uint32_t sos_fw_version; - uint32_t sos_feature_version; - uint32_t sys_bin_size; - uint32_t sos_bin_size; - uint32_t toc_bin_size; - uint32_t kdb_bin_size; - uint32_t spl_bin_size; - uint32_t rl_bin_size; - uint8_t *sys_start_addr; - uint8_t *sos_start_addr; - uint8_t *toc_start_addr; - uint8_t *kdb_start_addr; - uint8_t *spl_start_addr; - uint8_t *rl_start_addr; + struct psp_bin_desc sys; + struct psp_bin_desc sos; + struct psp_bin_desc toc; + struct psp_bin_desc kdb; + struct psp_bin_desc spl; + struct psp_bin_desc rl; + struct psp_bin_desc soc_drv; + struct psp_bin_desc intf_drv; + struct psp_bin_desc dbg_drv; /* tmr buffer */ struct amdgpu_bo *tmr_bo; uint64_t tmr_mc_addr; /* asd firmware */ - const struct firmware *asd_fw; - uint32_t asd_fw_version; - uint32_t asd_feature_version; - uint32_t asd_ucode_size; - uint8_t *asd_start_addr; + const struct firmware *asd_fw; + struct psp_bin_desc asd; /* toc firmware */ const struct firmware *toc_fw; - uint32_t toc_fw_version; - uint32_t toc_feature_version; /* fence buffer */ struct amdgpu_bo *fence_buf_bo; @@ -293,38 +326,24 @@ struct psp_context /* xgmi ta firmware and buffer */ const struct firmware *ta_fw; uint32_t ta_fw_version; - uint32_t ta_xgmi_ucode_version; - uint32_t ta_xgmi_ucode_size; - uint8_t *ta_xgmi_start_addr; - uint32_t ta_ras_ucode_version; - uint32_t ta_ras_ucode_size; - uint8_t *ta_ras_start_addr; - - uint32_t ta_hdcp_ucode_version; - uint32_t ta_hdcp_ucode_size; - uint8_t *ta_hdcp_start_addr; - - uint32_t ta_dtm_ucode_version; - uint32_t ta_dtm_ucode_size; - uint8_t *ta_dtm_start_addr; - - uint32_t ta_rap_ucode_version; - uint32_t ta_rap_ucode_size; - uint8_t *ta_rap_start_addr; - - uint32_t ta_securedisplay_ucode_version; - uint32_t ta_securedisplay_ucode_size; - uint8_t *ta_securedisplay_start_addr; + struct psp_bin_desc xgmi; + struct psp_bin_desc ras; + struct psp_bin_desc hdcp; + struct psp_bin_desc dtm; + struct psp_bin_desc rap; + struct psp_bin_desc securedisplay; struct psp_asd_context asd_context; struct psp_xgmi_context xgmi_context; - struct psp_ras_context ras; - struct psp_hdcp_context hdcp_context; - struct psp_dtm_context dtm_context; - struct psp_rap_context rap_context; - struct psp_securedisplay_context securedisplay_context; + struct psp_ras_context ras_context; + struct ta_cp_context hdcp_context; + struct ta_cp_context dtm_context; + struct ta_cp_context rap_context; + struct ta_cp_context securedisplay_context; struct mutex mutex; struct psp_memory_training_context mem_train_ctx; + + uint32_t boot_cfg_bitmask; }; struct amdgpu_psp_funcs { @@ -345,6 +364,12 @@ struct amdgpu_psp_funcs { ((psp)->funcs->bootloader_load_spl ? (psp)->funcs->bootloader_load_spl((psp)) : 0) #define psp_bootloader_load_sysdrv(psp) \ ((psp)->funcs->bootloader_load_sysdrv ? (psp)->funcs->bootloader_load_sysdrv((psp)) : 0) +#define psp_bootloader_load_soc_drv(psp) \ + ((psp)->funcs->bootloader_load_soc_drv ? (psp)->funcs->bootloader_load_soc_drv((psp)) : 0) +#define psp_bootloader_load_intf_drv(psp) \ + ((psp)->funcs->bootloader_load_intf_drv ? (psp)->funcs->bootloader_load_intf_drv((psp)) : 0) +#define psp_bootloader_load_dbg_drv(psp) \ + ((psp)->funcs->bootloader_load_dbg_drv ? (psp)->funcs->bootloader_load_dbg_drv((psp)) : 0) #define psp_bootloader_load_sos(psp) \ ((psp)->funcs->bootloader_load_sos ? (psp)->funcs->bootloader_load_sos((psp)) : 0) #define psp_smu_reload_quirk(psp) \ @@ -357,9 +382,9 @@ struct amdgpu_psp_funcs { #define psp_ring_get_wptr(psp) (psp)->funcs->ring_get_wptr((psp)) #define psp_ring_set_wptr(psp, value) (psp)->funcs->ring_set_wptr((psp), (value)) -#define psp_load_usbc_pd_fw(psp, dma_addr) \ +#define psp_load_usbc_pd_fw(psp, fw_pri_mc_addr) \ ((psp)->funcs->load_usbc_pd_fw ? \ - (psp)->funcs->load_usbc_pd_fw((psp), (dma_addr)) : -EINVAL) + (psp)->funcs->load_usbc_pd_fw((psp), (fw_pri_mc_addr)) : -EINVAL) #define psp_read_usbc_pd_fw(psp, fw_ver) \ ((psp)->funcs->read_usbc_pd_fw ? \ @@ -370,6 +395,7 @@ extern const struct amd_ip_funcs psp_ip_funcs; extern const struct amdgpu_ip_block_version psp_v3_1_ip_block; extern const struct amdgpu_ip_block_version psp_v10_0_ip_block; extern const struct amdgpu_ip_block_version psp_v11_0_ip_block; +extern const struct amdgpu_ip_block_version psp_v11_0_8_ip_block; extern const struct amdgpu_ip_block_version psp_v12_0_ip_block; extern const struct amdgpu_ip_block_version psp_v13_0_ip_block; @@ -380,14 +406,15 @@ int psp_gpu_reset(struct amdgpu_device *adev); int psp_update_vcn_sram(struct amdgpu_device *adev, int inst_idx, uint64_t cmd_gpu_addr, int cmd_size); -int psp_xgmi_initialize(struct psp_context *psp); +int psp_xgmi_initialize(struct psp_context *psp, bool set_extended_data, bool load_ta); int psp_xgmi_terminate(struct psp_context *psp); int psp_xgmi_invoke(struct psp_context *psp, uint32_t ta_cmd_id); int psp_xgmi_get_hive_id(struct psp_context *psp, uint64_t *hive_id); int psp_xgmi_get_node_id(struct psp_context *psp, uint64_t *node_id); int psp_xgmi_get_topology_info(struct psp_context *psp, int number_devices, - struct psp_xgmi_topology_info *topology); + struct psp_xgmi_topology_info *topology, + bool get_extended_data); int psp_xgmi_set_topology_info(struct psp_context *psp, int number_devices, struct psp_xgmi_topology_info *topology); @@ -424,4 +451,7 @@ int psp_get_fw_attestation_records_addr(struct psp_context *psp, int psp_load_fw_list(struct psp_context *psp, struct amdgpu_firmware_info **ucode_list, int ucode_count); +void psp_copy_fw(struct psp_context *psp, uint8_t *start_addr, uint32_t bin_size); + +int is_psp_fw_valid(struct psp_bin_desc bin); #endif diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_rap.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_rap.c index 51909bf8798c..12010c988c8b 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_rap.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_rap.c @@ -76,7 +76,7 @@ static ssize_t amdgpu_rap_debugfs_write(struct file *f, const char __user *buf, dev_info(adev->dev, "RAP L0 validate test success.\n"); } else { rap_shared_mem = (struct ta_rap_shared_memory *) - adev->psp.rap_context.rap_shared_buf; + adev->psp.rap_context.context.mem_context.shared_buf; rap_cmd_output = &(rap_shared_mem->rap_out_message.output); dev_info(adev->dev, "RAP test failed, the output is:\n"); @@ -119,7 +119,7 @@ void amdgpu_rap_debugfs_init(struct amdgpu_device *adev) #if defined(CONFIG_DEBUG_FS) struct drm_minor *minor = adev_to_drm(adev)->primary; - if (!adev->psp.rap_context.rap_initialized) + if (!adev->psp.rap_context.context.initialized) return; debugfs_create_file("rap_test", S_IWUSR, minor->debugfs_root, diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_ras.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_ras.c index b0d2fc9454ca..96a8fd0ca1df 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_ras.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_ras.c @@ -27,12 +27,14 @@ #include <linux/uaccess.h> #include <linux/reboot.h> #include <linux/syscalls.h> +#include <linux/pm_runtime.h> #include "amdgpu.h" #include "amdgpu_ras.h" #include "amdgpu_atomfirmware.h" #include "amdgpu_xgmi.h" #include "ivsrcid/nbio/irqsrcs_nbif_7_4.h" +#include "atom.h" static const char *RAS_FS_NAME = "ras"; @@ -62,15 +64,14 @@ const char *ras_block_string[] = { }; #define ras_err_str(i) (ras_error_string[ffs(i)]) -#define ras_block_str(i) (ras_block_string[i]) #define RAS_DEFAULT_FLAGS (AMDGPU_RAS_FLAG_INIT_BY_VBIOS) /* inject address is 52 bits */ #define RAS_UMC_INJECT_ADDR_LIMIT (0x1ULL << 52) -/* typical ECC bad page rate(1 bad page per 100MB VRAM) */ -#define RAS_BAD_PAGE_RATE (100 * 1024 * 1024ULL) +/* typical ECC bad page rate is 1 bad page per 100MB VRAM */ +#define RAS_BAD_PAGE_COVER (100 * 1024 * 1024ULL) enum amdgpu_ras_retire_page_reservation { AMDGPU_RAS_RETIRE_PAGE_RESERVED, @@ -320,11 +321,14 @@ static int amdgpu_ras_debugfs_ctrl_parse_data(struct file *f, * "disable" requires only the block. * "enable" requires the block and error type. * "inject" requires the block, error type, address, and value. + * * The block is one of: umc, sdma, gfx, etc. * see ras_block_string[] for details + * * The error type is one of: ue, ce, where, * ue is multi-uncorrectable * ce is single-correctable + * * The sub-block is a the sub-block index, pass 0 if there is no sub-block. * The address and value are hexadecimal numbers, leading 0x is optional. * @@ -350,8 +354,9 @@ static int amdgpu_ras_debugfs_ctrl_parse_data(struct file *f, * to see which blocks support RAS on a particular asic. * */ -static ssize_t amdgpu_ras_debugfs_ctrl_write(struct file *f, const char __user *buf, - size_t size, loff_t *pos) +static ssize_t amdgpu_ras_debugfs_ctrl_write(struct file *f, + const char __user *buf, + size_t size, loff_t *pos) { struct amdgpu_device *adev = (struct amdgpu_device *)file_inode(f)->i_private; struct ras_debug_if data; @@ -365,7 +370,7 @@ static ssize_t amdgpu_ras_debugfs_ctrl_write(struct file *f, const char __user * ret = amdgpu_ras_debugfs_ctrl_parse_data(f, buf, size, pos, &data); if (ret) - return -EINVAL; + return ret; if (data.op == 3) { ret = amdgpu_reserve_page_direct(adev, data.inject.address); @@ -398,9 +403,9 @@ static ssize_t amdgpu_ras_debugfs_ctrl_write(struct file *f, const char __user * /* umc ce/ue error injection for a bad page is not allowed */ if ((data.head.block == AMDGPU_RAS_BLOCK__UMC) && amdgpu_ras_check_bad_page(adev, data.inject.address)) { - dev_warn(adev->dev, "RAS WARN: 0x%llx has been marked " - "as bad before error injection!\n", - data.inject.address); + dev_warn(adev->dev, "RAS WARN: inject: 0x%llx has " + "already been marked as bad!\n", + data.inject.address); break; } @@ -434,21 +439,24 @@ static ssize_t amdgpu_ras_debugfs_ctrl_write(struct file *f, const char __user * * will reset EEPROM table to 0 entries. * */ -static ssize_t amdgpu_ras_debugfs_eeprom_write(struct file *f, const char __user *buf, - size_t size, loff_t *pos) +static ssize_t amdgpu_ras_debugfs_eeprom_write(struct file *f, + const char __user *buf, + size_t size, loff_t *pos) { struct amdgpu_device *adev = (struct amdgpu_device *)file_inode(f)->i_private; int ret; ret = amdgpu_ras_eeprom_reset_table( - &(amdgpu_ras_get_context(adev)->eeprom_control)); + &(amdgpu_ras_get_context(adev)->eeprom_control)); - if (ret == 1) { + if (!ret) { + /* Something was written to EEPROM. + */ amdgpu_ras_get_context(adev)->flags = RAS_DEFAULT_FLAGS; return size; } else { - return -EIO; + return ret; } } @@ -521,7 +529,7 @@ static inline void put_obj(struct ras_manager *obj) if (obj && (--obj->use == 0)) list_del(&obj->node); if (obj && (obj->use < 0)) - DRM_ERROR("RAS ERROR: Unbalance obj(%s) use\n", obj->head.name); + DRM_ERROR("RAS ERROR: Unbalance obj(%s) use\n", ras_block_str(obj->head.block)); } /* make one obj and return it. */ @@ -531,7 +539,7 @@ static struct ras_manager *amdgpu_ras_create_obj(struct amdgpu_device *adev, struct amdgpu_ras *con = amdgpu_ras_get_context(adev); struct ras_manager *obj; - if (!adev->ras_features || !con) + if (!adev->ras_enabled || !con) return NULL; if (head->block >= AMDGPU_RAS_BLOCK_COUNT) @@ -558,7 +566,7 @@ struct ras_manager *amdgpu_ras_find_obj(struct amdgpu_device *adev, struct ras_manager *obj; int i; - if (!adev->ras_features || !con) + if (!adev->ras_enabled || !con) return NULL; if (head) { @@ -585,36 +593,11 @@ struct ras_manager *amdgpu_ras_find_obj(struct amdgpu_device *adev, } /* obj end */ -static void amdgpu_ras_parse_status_code(struct amdgpu_device *adev, - const char* invoke_type, - const char* block_name, - enum ta_ras_status ret) -{ - switch (ret) { - case TA_RAS_STATUS__SUCCESS: - return; - case TA_RAS_STATUS__ERROR_RAS_NOT_AVAILABLE: - dev_warn(adev->dev, - "RAS WARN: %s %s currently unavailable\n", - invoke_type, - block_name); - break; - default: - dev_err(adev->dev, - "RAS ERROR: %s %s error failed ret 0x%X\n", - invoke_type, - block_name, - ret); - } -} - /* feature ctl begin */ static int amdgpu_ras_is_feature_allowed(struct amdgpu_device *adev, - struct ras_common_if *head) + struct ras_common_if *head) { - struct amdgpu_ras *con = amdgpu_ras_get_context(adev); - - return con->hw_supported & BIT(head->block); + return adev->ras_hw_enabled & BIT(head->block); } static int amdgpu_ras_is_feature_enabled(struct amdgpu_device *adev, @@ -658,11 +641,7 @@ static int __amdgpu_ras_feature_enable(struct amdgpu_device *adev, con->features |= BIT(head->block); } else { if (obj && amdgpu_ras_is_feature_enabled(adev, head)) { - /* skip clean gfx ras context feature for VEGA20 Gaming. - * will clean later - */ - if (!(!adev->ras_features && con->features & BIT(AMDGPU_RAS_BLOCK__GFX))) - con->features &= ~BIT(head->block); + con->features &= ~BIT(head->block); put_obj(obj); } } @@ -708,15 +687,10 @@ int amdgpu_ras_feature_enable(struct amdgpu_device *adev, if (!amdgpu_ras_intr_triggered()) { ret = psp_ras_enable_features(&adev->psp, info, enable); if (ret) { - amdgpu_ras_parse_status_code(adev, - enable ? "enable":"disable", - ras_block_str(head->block), - (enum ta_ras_status)ret); - if (ret == TA_RAS_STATUS__RESET_NEEDED) - ret = -EAGAIN; - else - ret = -EINVAL; - + dev_err(adev->dev, "ras %s %s failed %d\n", + enable ? "enable":"disable", + ras_block_str(head->block), + ret); goto out; } } @@ -770,6 +744,10 @@ int amdgpu_ras_feature_enable_on_boot(struct amdgpu_device *adev, con->features |= BIT(head->block); ret = amdgpu_ras_feature_enable(adev, head, 0); + + /* clean gfx block ras features flag */ + if (adev->ras_enabled && head->block == AMDGPU_RAS_BLOCK__GFX) + con->features &= ~BIT(head->block); } } else ret = amdgpu_ras_feature_enable(adev, head, enable); @@ -814,7 +792,6 @@ static int amdgpu_ras_enable_all_features(struct amdgpu_device *adev, .type = default_ras_type, .sub_block_index = 0, }; - strcpy(head.name, ras_block_str(i)); if (bypass) { /* * bypass psp. vbios enable ras for us. @@ -834,7 +811,7 @@ static int amdgpu_ras_enable_all_features(struct amdgpu_device *adev, /* query/inject/cure begin */ int amdgpu_ras_query_error_status(struct amdgpu_device *adev, - struct ras_query_if *info) + struct ras_query_if *info) { struct ras_manager *obj = amdgpu_ras_find_obj(adev, &info->head); struct ras_err_data err_data = {0, 0, 0, NULL}; @@ -890,6 +867,11 @@ int amdgpu_ras_query_error_status(struct amdgpu_device *adev, adev->gmc.xgmi.ras_funcs->query_ras_error_count) adev->gmc.xgmi.ras_funcs->query_ras_error_count(adev, &err_data); break; + case AMDGPU_RAS_BLOCK__HDP: + if (adev->hdp.ras_funcs && + adev->hdp.ras_funcs->query_ras_error_count) + adev->hdp.ras_funcs->query_ras_error_count(adev, &err_data); + break; default: break; } @@ -901,17 +883,42 @@ int amdgpu_ras_query_error_status(struct amdgpu_device *adev, info->ce_count = obj->err_data.ce_count; if (err_data.ce_count) { - dev_info(adev->dev, "%ld correctable hardware errors " + if (adev->smuio.funcs && + adev->smuio.funcs->get_socket_id && + adev->smuio.funcs->get_die_id) { + dev_info(adev->dev, "socket: %d, die: %d " + "%ld correctable hardware errors " "detected in %s block, no user " "action is needed.\n", + adev->smuio.funcs->get_socket_id(adev), + adev->smuio.funcs->get_die_id(adev), obj->err_data.ce_count, ras_block_str(info->head.block)); + } else { + dev_info(adev->dev, "%ld correctable hardware errors " + "detected in %s block, no user " + "action is needed.\n", + obj->err_data.ce_count, + ras_block_str(info->head.block)); + } } if (err_data.ue_count) { - dev_info(adev->dev, "%ld uncorrectable hardware errors " + if (adev->smuio.funcs && + adev->smuio.funcs->get_socket_id && + adev->smuio.funcs->get_die_id) { + dev_info(adev->dev, "socket: %d, die: %d " + "%ld uncorrectable hardware errors " "detected in %s block\n", + adev->smuio.funcs->get_socket_id(adev), + adev->smuio.funcs->get_die_id(adev), obj->err_data.ue_count, ras_block_str(info->head.block)); + } else { + dev_info(adev->dev, "%ld uncorrectable hardware errors " + "detected in %s block\n", + obj->err_data.ue_count, + ras_block_str(info->head.block)); + } } return 0; @@ -937,11 +944,20 @@ int amdgpu_ras_reset_error_status(struct amdgpu_device *adev, if (adev->mmhub.ras_funcs && adev->mmhub.ras_funcs->reset_ras_error_count) adev->mmhub.ras_funcs->reset_ras_error_count(adev); + + if (adev->mmhub.ras_funcs && + adev->mmhub.ras_funcs->reset_ras_error_status) + adev->mmhub.ras_funcs->reset_ras_error_status(adev); break; case AMDGPU_RAS_BLOCK__SDMA: if (adev->sdma.funcs->reset_ras_error_count) adev->sdma.funcs->reset_ras_error_count(adev); break; + case AMDGPU_RAS_BLOCK__HDP: + if (adev->hdp.ras_funcs && + adev->hdp.ras_funcs->reset_ras_error_count) + adev->hdp.ras_funcs->reset_ras_error_count(adev); + break; default: break; } @@ -1022,38 +1038,63 @@ int amdgpu_ras_error_inject(struct amdgpu_device *adev, ret = -EINVAL; } - amdgpu_ras_parse_status_code(adev, - "inject", - ras_block_str(info->head.block), - (enum ta_ras_status)ret); + if (ret) + dev_err(adev->dev, "ras inject %s failed %d\n", + ras_block_str(info->head.block), ret); return ret; } -/* get the total error counts on all IPs */ -unsigned long amdgpu_ras_query_error_count(struct amdgpu_device *adev, - bool is_ce) +/** + * amdgpu_ras_query_error_count -- Get error counts of all IPs + * adev: pointer to AMD GPU device + * ce_count: pointer to an integer to be set to the count of correctible errors. + * ue_count: pointer to an integer to be set to the count of uncorrectible + * errors. + * + * If set, @ce_count or @ue_count, count and return the corresponding + * error counts in those integer pointers. Return 0 if the device + * supports RAS. Return -EOPNOTSUPP if the device doesn't support RAS. + */ +int amdgpu_ras_query_error_count(struct amdgpu_device *adev, + unsigned long *ce_count, + unsigned long *ue_count) { struct amdgpu_ras *con = amdgpu_ras_get_context(adev); struct ras_manager *obj; - struct ras_err_data data = {0, 0}; + unsigned long ce, ue; - if (!adev->ras_features || !con) + if (!adev->ras_enabled || !con) + return -EOPNOTSUPP; + + /* Don't count since no reporting. + */ + if (!ce_count && !ue_count) return 0; + ce = 0; + ue = 0; list_for_each_entry(obj, &con->head, node) { struct ras_query_if info = { .head = obj->head, }; + int res; - if (amdgpu_ras_query_error_status(adev, &info)) - return 0; + res = amdgpu_ras_query_error_status(adev, &info); + if (res) + return res; - data.ce_count += info.ce_count; - data.ue_count += info.ue_count; + ce += info.ce_count; + ue += info.ue_count; } - return is_ce ? data.ce_count : data.ue_count; + if (ce_count) + *ce_count = ce; + + if (ue_count) + *ue_count = ue; + + return 0; } /* query/inject/cure end */ @@ -1265,8 +1306,8 @@ static int amdgpu_ras_sysfs_remove_all(struct amdgpu_device *adev) static struct dentry *amdgpu_ras_debugfs_create_ctrl_node(struct amdgpu_device *adev) { struct amdgpu_ras *con = amdgpu_ras_get_context(adev); - struct dentry *dir; - struct drm_minor *minor = adev_to_drm(adev)->primary; + struct drm_minor *minor = adev_to_drm(adev)->primary; + struct dentry *dir; dir = debugfs_create_dir(RAS_FS_NAME, minor->debugfs_root); debugfs_create_file("ras_ctrl", S_IWUGO | S_IRUGO, dir, adev, @@ -1275,6 +1316,14 @@ static struct dentry *amdgpu_ras_debugfs_create_ctrl_node(struct amdgpu_device * &amdgpu_ras_debugfs_eeprom_ops); debugfs_create_u32("bad_page_cnt_threshold", 0444, dir, &con->bad_page_cnt_threshold); + debugfs_create_x32("ras_hw_enabled", 0444, dir, &adev->ras_hw_enabled); + debugfs_create_x32("ras_enabled", 0444, dir, &adev->ras_enabled); + debugfs_create_file("ras_eeprom_size", S_IRUGO, dir, adev, + &amdgpu_ras_debugfs_eeprom_size_ops); + con->de_ras_eeprom_table = debugfs_create_file("ras_eeprom_table", + S_IRUGO, dir, adev, + &amdgpu_ras_debugfs_eeprom_table_ops); + amdgpu_ras_debugfs_set_ret_size(&con->eeprom_control); /* * After one uncorrectable error happens, usually GPU recovery will @@ -1561,7 +1610,7 @@ static void amdgpu_ras_log_on_err_counter(struct amdgpu_device *adev) struct amdgpu_ras *con = amdgpu_ras_get_context(adev); struct ras_manager *obj; - if (!adev->ras_features || !con) + if (!adev->ras_enabled || !con) return; list_for_each_entry(obj, &con->head, node) { @@ -1611,7 +1660,7 @@ static void amdgpu_ras_query_err_status(struct amdgpu_device *adev) struct amdgpu_ras *con = amdgpu_ras_get_context(adev); struct ras_manager *obj; - if (!adev->ras_features || !con) + if (!adev->ras_enabled || !con) return; list_for_each_entry(obj, &con->head, node) { @@ -1792,13 +1841,12 @@ int amdgpu_ras_save_bad_pages(struct amdgpu_device *adev) control = &con->eeprom_control; data = con->eh_data; - save_count = data->count - control->num_recs; + save_count = data->count - control->ras_num_recs; /* only new entries are saved */ if (save_count > 0) { - if (amdgpu_ras_eeprom_process_recods(control, - &data->bps[control->num_recs], - true, - save_count)) { + if (amdgpu_ras_eeprom_append(control, + &data->bps[control->ras_num_recs], + save_count)) { dev_err(adev->dev, "Failed to save EEPROM table data!"); return -EIO; } @@ -1816,28 +1864,24 @@ int amdgpu_ras_save_bad_pages(struct amdgpu_device *adev) static int amdgpu_ras_load_bad_pages(struct amdgpu_device *adev) { struct amdgpu_ras_eeprom_control *control = - &adev->psp.ras.ras->eeprom_control; - struct eeprom_table_record *bps = NULL; - int ret = 0; + &adev->psp.ras_context.ras->eeprom_control; + struct eeprom_table_record *bps; + int ret; /* no bad page record, skip eeprom access */ - if (!control->num_recs || (amdgpu_bad_page_threshold == 0)) - return ret; + if (control->ras_num_recs == 0 || amdgpu_bad_page_threshold == 0) + return 0; - bps = kcalloc(control->num_recs, sizeof(*bps), GFP_KERNEL); + bps = kcalloc(control->ras_num_recs, sizeof(*bps), GFP_KERNEL); if (!bps) return -ENOMEM; - if (amdgpu_ras_eeprom_process_recods(control, bps, false, - control->num_recs)) { + ret = amdgpu_ras_eeprom_read(control, bps, control->ras_num_recs); + if (ret) dev_err(adev->dev, "Failed to load EEPROM table records!"); - ret = -EIO; - goto out; - } - - ret = amdgpu_ras_add_bad_pages(adev, bps, control->num_recs); + else + ret = amdgpu_ras_add_bad_pages(adev, bps, control->ras_num_recs); -out: kfree(bps); return ret; } @@ -1877,11 +1921,9 @@ static bool amdgpu_ras_check_bad_page(struct amdgpu_device *adev, } static void amdgpu_ras_validate_threshold(struct amdgpu_device *adev, - uint32_t max_length) + uint32_t max_count) { struct amdgpu_ras *con = amdgpu_ras_get_context(adev); - int tmp_threshold = amdgpu_bad_page_threshold; - u64 val; /* * Justification of value bad_page_cnt_threshold in ras structure @@ -1902,18 +1944,15 @@ static void amdgpu_ras_validate_threshold(struct amdgpu_device *adev, * take no effect. */ - if (tmp_threshold < -1) - tmp_threshold = -1; - else if (tmp_threshold > max_length) - tmp_threshold = max_length; + if (amdgpu_bad_page_threshold < 0) { + u64 val = adev->gmc.mc_vram_size; - if (tmp_threshold == -1) { - val = adev->gmc.mc_vram_size; - do_div(val, RAS_BAD_PAGE_RATE); + do_div(val, RAS_BAD_PAGE_COVER); con->bad_page_cnt_threshold = min(lower_32_bits(val), - max_length); + max_count); } else { - con->bad_page_cnt_threshold = tmp_threshold; + con->bad_page_cnt_threshold = min_t(int, max_count, + amdgpu_bad_page_threshold); } } @@ -1921,15 +1960,24 @@ int amdgpu_ras_recovery_init(struct amdgpu_device *adev) { struct amdgpu_ras *con = amdgpu_ras_get_context(adev); struct ras_err_handler_data **data; - uint32_t max_eeprom_records_len = 0; + u32 max_eeprom_records_count = 0; bool exc_err_limit = false; int ret; - if (adev->ras_features && con) - data = &con->eh_data; - else + if (!con) return 0; + /* Allow access to RAS EEPROM via debugfs, when the ASIC + * supports RAS and debugfs is enabled, but when + * adev->ras_enabled is unset, i.e. when "ras_enable" + * module parameter is set to 0. + */ + con->adev = adev; + + if (!adev->ras_enabled) + return 0; + + data = &con->eh_data; *data = kmalloc(sizeof(**data), GFP_KERNEL | __GFP_ZERO); if (!*data) { ret = -ENOMEM; @@ -1939,10 +1987,9 @@ int amdgpu_ras_recovery_init(struct amdgpu_device *adev) mutex_init(&con->recovery_lock); INIT_WORK(&con->recovery_work, amdgpu_ras_do_recovery); atomic_set(&con->in_recovery, 0); - con->adev = adev; - max_eeprom_records_len = amdgpu_ras_eeprom_get_record_max_length(); - amdgpu_ras_validate_threshold(adev, max_eeprom_records_len); + max_eeprom_records_count = amdgpu_ras_eeprom_max_record_count(); + amdgpu_ras_validate_threshold(adev, max_eeprom_records_count); /* Todo: During test the SMU might fail to read the eeprom through I2C * when the GPU is pending on XGMI reset during probe time @@ -1958,10 +2005,13 @@ int amdgpu_ras_recovery_init(struct amdgpu_device *adev) if (exc_err_limit || ret) goto free; - if (con->eeprom_control.num_recs) { + if (con->eeprom_control.ras_num_recs) { ret = amdgpu_ras_load_bad_pages(adev); if (ret) goto free; + + if (adev->smu.ppt_funcs && adev->smu.ppt_funcs->send_hbm_bad_pages_num) + adev->smu.ppt_funcs->send_hbm_bad_pages_num(&adev->smu, con->eeprom_control.ras_num_recs); } return 0; @@ -1971,7 +2021,7 @@ free: kfree(*data); con->eh_data = NULL; out: - dev_warn(adev->dev, "Failed to initialize ras recovery!\n"); + dev_warn(adev->dev, "Failed to initialize ras recovery! (%d)\n", ret); /* * Except error threshold exceeding case, other failure cases in this @@ -2029,6 +2079,25 @@ static bool amdgpu_ras_asic_supported(struct amdgpu_device *adev) } /* + * this is workaround for vega20 workstation sku, + * force enable gfx ras, ignore vbios gfx ras flag + * due to GC EDC can not write + */ +static void amdgpu_ras_get_quirks(struct amdgpu_device *adev) +{ + struct atom_context *ctx = adev->mode_info.atom_context; + + if (!ctx) + return; + + if (strnstr(ctx->vbios_version, "D16406", + sizeof(ctx->vbios_version)) || + strnstr(ctx->vbios_version, "D36002", + sizeof(ctx->vbios_version))) + adev->ras_hw_enabled |= (1 << AMDGPU_RAS_BLOCK__GFX); +} + +/* * check hardware's ras ability which will be saved in hw_supported. * if hardware does not support ras, we can skip some ras initializtion and * forbid some ras operations from IP. @@ -2037,11 +2106,9 @@ static bool amdgpu_ras_asic_supported(struct amdgpu_device *adev) * we have to initialize ras as normal. but need check if operation is * allowed or not in each function. */ -static void amdgpu_ras_check_supported(struct amdgpu_device *adev, - uint32_t *hw_supported, uint32_t *supported) +static void amdgpu_ras_check_supported(struct amdgpu_device *adev) { - *hw_supported = 0; - *supported = 0; + adev->ras_hw_enabled = adev->ras_enabled = 0; if (amdgpu_sriov_vf(adev) || !adev->is_atom_fw || !amdgpu_ras_asic_supported(adev)) @@ -2050,33 +2117,59 @@ static void amdgpu_ras_check_supported(struct amdgpu_device *adev, if (!adev->gmc.xgmi.connected_to_cpu) { if (amdgpu_atomfirmware_mem_ecc_supported(adev)) { dev_info(adev->dev, "MEM ECC is active.\n"); - *hw_supported |= (1 << AMDGPU_RAS_BLOCK__UMC | - 1 << AMDGPU_RAS_BLOCK__DF); + adev->ras_hw_enabled |= (1 << AMDGPU_RAS_BLOCK__UMC | + 1 << AMDGPU_RAS_BLOCK__DF); } else { dev_info(adev->dev, "MEM ECC is not presented.\n"); } if (amdgpu_atomfirmware_sram_ecc_supported(adev)) { dev_info(adev->dev, "SRAM ECC is active.\n"); - *hw_supported |= ~(1 << AMDGPU_RAS_BLOCK__UMC | - 1 << AMDGPU_RAS_BLOCK__DF); + adev->ras_hw_enabled |= ~(1 << AMDGPU_RAS_BLOCK__UMC | + 1 << AMDGPU_RAS_BLOCK__DF); } else { dev_info(adev->dev, "SRAM ECC is not presented.\n"); } } else { /* driver only manages a few IP blocks RAS feature * when GPU is connected cpu through XGMI */ - *hw_supported |= (1 << AMDGPU_RAS_BLOCK__GFX | - 1 << AMDGPU_RAS_BLOCK__SDMA | - 1 << AMDGPU_RAS_BLOCK__MMHUB); + adev->ras_hw_enabled |= (1 << AMDGPU_RAS_BLOCK__GFX | + 1 << AMDGPU_RAS_BLOCK__SDMA | + 1 << AMDGPU_RAS_BLOCK__MMHUB); } + amdgpu_ras_get_quirks(adev); + /* hw_supported needs to be aligned with RAS block mask. */ - *hw_supported &= AMDGPU_RAS_BLOCK_MASK; + adev->ras_hw_enabled &= AMDGPU_RAS_BLOCK_MASK; + + adev->ras_enabled = amdgpu_ras_enable == 0 ? 0 : + adev->ras_hw_enabled & amdgpu_ras_mask; +} + +static void amdgpu_ras_counte_dw(struct work_struct *work) +{ + struct amdgpu_ras *con = container_of(work, struct amdgpu_ras, + ras_counte_delay_work.work); + struct amdgpu_device *adev = con->adev; + struct drm_device *dev = adev_to_drm(adev); + unsigned long ce_count, ue_count; + int res; + + res = pm_runtime_get_sync(dev->dev); + if (res < 0) + goto Out; + + /* Cache new values. + */ + if (amdgpu_ras_query_error_count(adev, &ce_count, &ue_count) == 0) { + atomic_set(&con->ras_ce_count, ce_count); + atomic_set(&con->ras_ue_count, ue_count); + } - *supported = amdgpu_ras_enable == 0 ? - 0 : *hw_supported & amdgpu_ras_mask; - adev->ras_features = *supported; + pm_runtime_mark_last_busy(dev->dev); +Out: + pm_runtime_put_autosuspend(dev->dev); } int amdgpu_ras_init(struct amdgpu_device *adev) @@ -2093,17 +2186,22 @@ int amdgpu_ras_init(struct amdgpu_device *adev) if (!con) return -ENOMEM; + con->adev = adev; + INIT_DELAYED_WORK(&con->ras_counte_delay_work, amdgpu_ras_counte_dw); + atomic_set(&con->ras_ce_count, 0); + atomic_set(&con->ras_ue_count, 0); + con->objs = (struct ras_manager *)(con + 1); amdgpu_ras_set_context(adev, con); - amdgpu_ras_check_supported(adev, &con->hw_supported, - &con->supported); - if (!con->hw_supported || (adev->asic_type == CHIP_VEGA10)) { + amdgpu_ras_check_supported(adev); + + if (!adev->ras_enabled || adev->asic_type == CHIP_VEGA10) { /* set gfx block ras context feature for VEGA20 Gaming * send ras disable cmd to ras ta during ras late init. */ - if (!adev->ras_features && adev->asic_type == CHIP_VEGA20) { + if (!adev->ras_enabled && adev->asic_type == CHIP_VEGA20) { con->features |= BIT(AMDGPU_RAS_BLOCK__GFX); return 0; @@ -2153,8 +2251,9 @@ int amdgpu_ras_init(struct amdgpu_device *adev) } dev_info(adev->dev, "RAS INFO: ras initialized successfully, " - "hardware ability[%x] ras_mask[%x]\n", - con->hw_supported, con->supported); + "hardware ability[%x] ras_mask[%x]\n", + adev->ras_hw_enabled, adev->ras_enabled); + return 0; release_con: amdgpu_ras_set_context(adev, NULL); @@ -2163,7 +2262,7 @@ release_con: return r; } -static int amdgpu_persistent_edc_harvesting_supported(struct amdgpu_device *adev) +int amdgpu_persistent_edc_harvesting_supported(struct amdgpu_device *adev) { if (adev->gmc.xgmi.connected_to_cpu) return 1; @@ -2195,6 +2294,8 @@ int amdgpu_ras_late_init(struct amdgpu_device *adev, struct ras_fs_if *fs_info, struct ras_ih_if *ih_info) { + struct amdgpu_ras *con = amdgpu_ras_get_context(adev); + unsigned long ue_count, ce_count; int r; /* disable RAS feature per IP block if it is not supported */ @@ -2235,6 +2336,13 @@ int amdgpu_ras_late_init(struct amdgpu_device *adev, if (r) goto sysfs; + /* Those are the cached values at init. + */ + if (amdgpu_ras_query_error_count(adev, &ce_count, &ue_count) == 0) { + atomic_set(&con->ras_ce_count, ce_count); + atomic_set(&con->ras_ue_count, ue_count); + } + return 0; cleanup: amdgpu_ras_sysfs_remove(adev, ras_block); @@ -2268,7 +2376,7 @@ void amdgpu_ras_resume(struct amdgpu_device *adev) struct amdgpu_ras *con = amdgpu_ras_get_context(adev); struct ras_manager *obj, *tmp; - if (!adev->ras_features || !con) { + if (!adev->ras_enabled || !con) { /* clean ras context for VEGA20 Gaming after send ras disable cmd */ amdgpu_release_ras_context(adev); @@ -2314,7 +2422,7 @@ void amdgpu_ras_suspend(struct amdgpu_device *adev) { struct amdgpu_ras *con = amdgpu_ras_get_context(adev); - if (!adev->ras_features || !con) + if (!adev->ras_enabled || !con) return; amdgpu_ras_disable_all_features(adev, 0); @@ -2328,9 +2436,10 @@ int amdgpu_ras_pre_fini(struct amdgpu_device *adev) { struct amdgpu_ras *con = amdgpu_ras_get_context(adev); - if (!adev->ras_features || !con) + if (!adev->ras_enabled || !con) return 0; + /* Need disable ras on all IPs here before ip [hw/sw]fini */ amdgpu_ras_disable_all_features(adev, 0); amdgpu_ras_recovery_fini(adev); @@ -2341,7 +2450,7 @@ int amdgpu_ras_fini(struct amdgpu_device *adev) { struct amdgpu_ras *con = amdgpu_ras_get_context(adev); - if (!adev->ras_features || !con) + if (!adev->ras_enabled || !con) return 0; amdgpu_ras_fs_fini(adev); @@ -2352,6 +2461,8 @@ int amdgpu_ras_fini(struct amdgpu_device *adev) if (con->features) amdgpu_ras_disable_all_features(adev, 1); + cancel_delayed_work_sync(&con->ras_counte_delay_work); + amdgpu_ras_set_context(adev, NULL); kfree(con); @@ -2360,10 +2471,8 @@ int amdgpu_ras_fini(struct amdgpu_device *adev) void amdgpu_ras_global_ras_isr(struct amdgpu_device *adev) { - uint32_t hw_supported, supported; - - amdgpu_ras_check_supported(adev, &hw_supported, &supported); - if (!hw_supported) + amdgpu_ras_check_supported(adev); + if (!adev->ras_hw_enabled) return; if (atomic_cmpxchg(&amdgpu_ras_in_intr, 0, 1) == 0) { @@ -2392,7 +2501,7 @@ void amdgpu_release_ras_context(struct amdgpu_device *adev) if (!con) return; - if (!adev->ras_features && con->features & BIT(AMDGPU_RAS_BLOCK__GFX)) { + if (!adev->ras_enabled && con->features & BIT(AMDGPU_RAS_BLOCK__GFX)) { con->features &= ~BIT(AMDGPU_RAS_BLOCK__GFX); amdgpu_ras_set_context(adev, NULL); kfree(con); diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_ras.h b/drivers/gpu/drm/amd/amdgpu/amdgpu_ras.h index 60df268a0c66..eae604fd90b8 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_ras.h +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_ras.h @@ -49,10 +49,14 @@ enum amdgpu_ras_block { AMDGPU_RAS_BLOCK__MP0, AMDGPU_RAS_BLOCK__MP1, AMDGPU_RAS_BLOCK__FUSE, + AMDGPU_RAS_BLOCK__MPIO, AMDGPU_RAS_BLOCK__LAST }; +extern const char *ras_block_string[]; + +#define ras_block_str(i) (ras_block_string[i]) #define AMDGPU_RAS_BLOCK_COUNT AMDGPU_RAS_BLOCK__LAST #define AMDGPU_RAS_BLOCK_MASK ((1ULL << AMDGPU_RAS_BLOCK_COUNT) - 1) @@ -306,21 +310,18 @@ struct ras_common_if { enum amdgpu_ras_block block; enum amdgpu_ras_error_type type; uint32_t sub_block_index; - /* block name */ char name[32]; }; struct amdgpu_ras { /* ras infrastructure */ /* for ras itself. */ - uint32_t hw_supported; - /* for IP to check its ras ability. */ - uint32_t supported; uint32_t features; struct list_head head; /* sysfs */ struct device_attribute features_attr; struct bin_attribute badpages_attr; + struct dentry *de_ras_eeprom_table; /* block array */ struct ras_manager *objs; @@ -343,6 +344,11 @@ struct amdgpu_ras { /* disable ras error count harvest in recovery */ bool disable_ras_err_cnt_harvest; + + /* RAS count errors delayed work */ + struct delayed_work ras_counte_delay_work; + atomic_t ras_ue_count; + atomic_t ras_ce_count; }; struct ras_fs_data { @@ -415,7 +421,7 @@ struct ras_badpage { /* interfaces for IP */ struct ras_fs_if { struct ras_common_if head; - char sysfs_name[32]; + const char* sysfs_name; char debugfs_name[32]; }; @@ -467,8 +473,8 @@ struct ras_debug_if { * 8: feature disable */ -#define amdgpu_ras_get_context(adev) ((adev)->psp.ras.ras) -#define amdgpu_ras_set_context(adev, ras_con) ((adev)->psp.ras.ras = (ras_con)) +#define amdgpu_ras_get_context(adev) ((adev)->psp.ras_context.ras) +#define amdgpu_ras_set_context(adev, ras_con) ((adev)->psp.ras_context.ras = (ras_con)) /* check if ras is supported on block, say, sdma, gfx */ static inline int amdgpu_ras_is_supported(struct amdgpu_device *adev, @@ -478,7 +484,7 @@ static inline int amdgpu_ras_is_supported(struct amdgpu_device *adev, if (block >= AMDGPU_RAS_BLOCK_COUNT) return 0; - return ras && (ras->supported & (1 << block)); + return ras && (adev->ras_enabled & (1 << block)); } int amdgpu_ras_recovery_init(struct amdgpu_device *adev); @@ -488,8 +494,9 @@ int amdgpu_ras_request_reset_on_boot(struct amdgpu_device *adev, void amdgpu_ras_resume(struct amdgpu_device *adev); void amdgpu_ras_suspend(struct amdgpu_device *adev); -unsigned long amdgpu_ras_query_error_count(struct amdgpu_device *adev, - bool is_ce); +int amdgpu_ras_query_error_count(struct amdgpu_device *adev, + unsigned long *ce_count, + unsigned long *ue_count); /* error handling functions */ int amdgpu_ras_add_bad_pages(struct amdgpu_device *adev, @@ -628,4 +635,7 @@ void amdgpu_ras_set_error_query_ready(struct amdgpu_device *adev, bool ready); bool amdgpu_ras_need_emergency_restart(struct amdgpu_device *adev); void amdgpu_release_ras_context(struct amdgpu_device *adev); + +int amdgpu_persistent_edc_harvesting_supported(struct amdgpu_device *adev); + #endif diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_ras_eeprom.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_ras_eeprom.c index f40c871da0c6..dc44c946a244 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_ras_eeprom.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_ras_eeprom.c @@ -26,90 +26,128 @@ #include "amdgpu_ras.h" #include <linux/bits.h> #include "atom.h" +#include "amdgpu_eeprom.h" +#include "amdgpu_atomfirmware.h" +#include <linux/debugfs.h> +#include <linux/uaccess.h> -#define EEPROM_I2C_TARGET_ADDR_VEGA20 0xA0 -#define EEPROM_I2C_TARGET_ADDR_ARCTURUS 0xA8 -#define EEPROM_I2C_TARGET_ADDR_ARCTURUS_D342 0xA0 -#define EEPROM_I2C_TARGET_ADDR_SIENNA_CICHLID 0xA0 -#define EEPROM_I2C_TARGET_ADDR_ALDEBARAN 0xA0 +#define EEPROM_I2C_MADDR_VEGA20 0x0 +#define EEPROM_I2C_MADDR_ARCTURUS 0x40000 +#define EEPROM_I2C_MADDR_ARCTURUS_D342 0x0 +#define EEPROM_I2C_MADDR_SIENNA_CICHLID 0x0 +#define EEPROM_I2C_MADDR_ALDEBARAN 0x0 /* * The 2 macros bellow represent the actual size in bytes that * those entities occupy in the EEPROM memory. - * EEPROM_TABLE_RECORD_SIZE is different than sizeof(eeprom_table_record) which + * RAS_TABLE_RECORD_SIZE is different than sizeof(eeprom_table_record) which * uses uint64 to store 6b fields such as retired_page. */ -#define EEPROM_TABLE_HEADER_SIZE 20 -#define EEPROM_TABLE_RECORD_SIZE 24 - -#define EEPROM_ADDRESS_SIZE 0x2 +#define RAS_TABLE_HEADER_SIZE 20 +#define RAS_TABLE_RECORD_SIZE 24 /* Table hdr is 'AMDR' */ -#define EEPROM_TABLE_HDR_VAL 0x414d4452 -#define EEPROM_TABLE_VER 0x00010000 +#define RAS_TABLE_HDR_VAL 0x414d4452 +#define RAS_TABLE_VER 0x00010000 /* Bad GPU tag ‘BADG’ */ -#define EEPROM_TABLE_HDR_BAD 0x42414447 +#define RAS_TABLE_HDR_BAD 0x42414447 + +/* Assume 2-Mbit size EEPROM and take up the whole space. */ +#define RAS_TBL_SIZE_BYTES (256 * 1024) +#define RAS_TABLE_START 0 +#define RAS_HDR_START RAS_TABLE_START +#define RAS_RECORD_START (RAS_HDR_START + RAS_TABLE_HEADER_SIZE) +#define RAS_MAX_RECORD_COUNT ((RAS_TBL_SIZE_BYTES - RAS_TABLE_HEADER_SIZE) \ + / RAS_TABLE_RECORD_SIZE) + +/* Given a zero-based index of an EEPROM RAS record, yields the EEPROM + * offset off of RAS_TABLE_START. That is, this is something you can + * add to control->i2c_address, and then tell I2C layer to read + * from/write to there. _N is the so called absolute index, + * because it starts right after the table header. + */ +#define RAS_INDEX_TO_OFFSET(_C, _N) ((_C)->ras_record_offset + \ + (_N) * RAS_TABLE_RECORD_SIZE) + +#define RAS_OFFSET_TO_INDEX(_C, _O) (((_O) - \ + (_C)->ras_record_offset) / RAS_TABLE_RECORD_SIZE) + +/* Given a 0-based relative record index, 0, 1, 2, ..., etc., off + * of "fri", return the absolute record index off of the end of + * the table header. + */ +#define RAS_RI_TO_AI(_C, _I) (((_I) + (_C)->ras_fri) % \ + (_C)->ras_max_record_count) -/* Assume 2 Mbit size */ -#define EEPROM_SIZE_BYTES 256000 -#define EEPROM_PAGE__SIZE_BYTES 256 -#define EEPROM_HDR_START 0 -#define EEPROM_RECORD_START (EEPROM_HDR_START + EEPROM_TABLE_HEADER_SIZE) -#define EEPROM_MAX_RECORD_NUM ((EEPROM_SIZE_BYTES - EEPROM_TABLE_HEADER_SIZE) / EEPROM_TABLE_RECORD_SIZE) -#define EEPROM_ADDR_MSB_MASK GENMASK(17, 8) +#define RAS_NUM_RECS(_tbl_hdr) (((_tbl_hdr)->tbl_size - \ + RAS_TABLE_HEADER_SIZE) / RAS_TABLE_RECORD_SIZE) #define to_amdgpu_device(x) (container_of(x, struct amdgpu_ras, eeprom_control))->adev static bool __is_ras_eeprom_supported(struct amdgpu_device *adev) { - if ((adev->asic_type == CHIP_VEGA20) || - (adev->asic_type == CHIP_ARCTURUS) || - (adev->asic_type == CHIP_SIENNA_CICHLID) || - (adev->asic_type == CHIP_ALDEBARAN)) - return true; - - return false; + return adev->asic_type == CHIP_VEGA20 || + adev->asic_type == CHIP_ARCTURUS || + adev->asic_type == CHIP_SIENNA_CICHLID || + adev->asic_type == CHIP_ALDEBARAN; } static bool __get_eeprom_i2c_addr_arct(struct amdgpu_device *adev, - uint16_t *i2c_addr) + struct amdgpu_ras_eeprom_control *control) { struct atom_context *atom_ctx = adev->mode_info.atom_context; - if (!i2c_addr || !atom_ctx) + if (!control || !atom_ctx) return false; if (strnstr(atom_ctx->vbios_version, "D342", sizeof(atom_ctx->vbios_version))) - *i2c_addr = EEPROM_I2C_TARGET_ADDR_ARCTURUS_D342; + control->i2c_address = EEPROM_I2C_MADDR_ARCTURUS_D342; else - *i2c_addr = EEPROM_I2C_TARGET_ADDR_ARCTURUS; + control->i2c_address = EEPROM_I2C_MADDR_ARCTURUS; return true; } static bool __get_eeprom_i2c_addr(struct amdgpu_device *adev, - uint16_t *i2c_addr) + struct amdgpu_ras_eeprom_control *control) { - if (!i2c_addr) + u8 i2c_addr; + + if (!control) return false; + if (amdgpu_atomfirmware_ras_rom_addr(adev, &i2c_addr)) { + /* The address given by VBIOS is an 8-bit, wire-format + * address, i.e. the most significant byte. + * + * Normalize it to a 19-bit EEPROM address. Remove the + * device type identifier and make it a 7-bit address; + * then make it a 19-bit EEPROM address. See top of + * amdgpu_eeprom.c. + */ + i2c_addr = (i2c_addr & 0x0F) >> 1; + control->i2c_address = ((u32) i2c_addr) << 16; + + return true; + } + switch (adev->asic_type) { case CHIP_VEGA20: - *i2c_addr = EEPROM_I2C_TARGET_ADDR_VEGA20; + control->i2c_address = EEPROM_I2C_MADDR_VEGA20; break; case CHIP_ARCTURUS: - return __get_eeprom_i2c_addr_arct(adev, i2c_addr); + return __get_eeprom_i2c_addr_arct(adev, control); case CHIP_SIENNA_CICHLID: - *i2c_addr = EEPROM_I2C_TARGET_ADDR_SIENNA_CICHLID; + control->i2c_address = EEPROM_I2C_MADDR_SIENNA_CICHLID; break; case CHIP_ALDEBARAN: - *i2c_addr = EEPROM_I2C_TARGET_ADDR_ALDEBARAN; + control->i2c_address = EEPROM_I2C_MADDR_ALDEBARAN; break; default: @@ -119,10 +157,11 @@ static bool __get_eeprom_i2c_addr(struct amdgpu_device *adev, return true; } -static void __encode_table_header_to_buff(struct amdgpu_ras_eeprom_table_header *hdr, - unsigned char *buff) +static void +__encode_table_header_to_buf(struct amdgpu_ras_eeprom_table_header *hdr, + unsigned char *buf) { - uint32_t *pp = (uint32_t *) buff; + u32 *pp = (uint32_t *)buf; pp[0] = cpu_to_le32(hdr->header); pp[1] = cpu_to_le32(hdr->version); @@ -131,10 +170,11 @@ static void __encode_table_header_to_buff(struct amdgpu_ras_eeprom_table_header pp[4] = cpu_to_le32(hdr->checksum); } -static void __decode_table_header_from_buff(struct amdgpu_ras_eeprom_table_header *hdr, - unsigned char *buff) +static void +__decode_table_header_from_buf(struct amdgpu_ras_eeprom_table_header *hdr, + unsigned char *buf) { - uint32_t *pp = (uint32_t *)buff; + u32 *pp = (uint32_t *)buf; hdr->header = le32_to_cpu(pp[0]); hdr->version = le32_to_cpu(pp[1]); @@ -143,303 +183,168 @@ static void __decode_table_header_from_buff(struct amdgpu_ras_eeprom_table_heade hdr->checksum = le32_to_cpu(pp[4]); } -static int __update_table_header(struct amdgpu_ras_eeprom_control *control, - unsigned char *buff) +static int __write_table_header(struct amdgpu_ras_eeprom_control *control) { - int ret = 0; + u8 buf[RAS_TABLE_HEADER_SIZE]; struct amdgpu_device *adev = to_amdgpu_device(control); - struct i2c_msg msg = { - .addr = 0, - .flags = 0, - .len = EEPROM_ADDRESS_SIZE + EEPROM_TABLE_HEADER_SIZE, - .buf = buff, - }; - + int res; - *(uint16_t *)buff = EEPROM_HDR_START; - __encode_table_header_to_buff(&control->tbl_hdr, buff + EEPROM_ADDRESS_SIZE); - - msg.addr = control->i2c_address; + memset(buf, 0, sizeof(buf)); + __encode_table_header_to_buf(&control->tbl_hdr, buf); /* i2c may be unstable in gpu reset */ down_read(&adev->reset_sem); - ret = i2c_transfer(&adev->pm.smu_i2c, &msg, 1); + res = amdgpu_eeprom_write(&adev->pm.smu_i2c, + control->i2c_address + + control->ras_header_offset, + buf, RAS_TABLE_HEADER_SIZE); up_read(&adev->reset_sem); - if (ret < 1) - DRM_ERROR("Failed to write EEPROM table header, ret:%d", ret); - - return ret; -} - -static uint32_t __calc_hdr_byte_sum(struct amdgpu_ras_eeprom_control *control) -{ - int i; - uint32_t tbl_sum = 0; - - /* Header checksum, skip checksum field in the calculation */ - for (i = 0; i < sizeof(control->tbl_hdr) - sizeof(control->tbl_hdr.checksum); i++) - tbl_sum += *(((unsigned char *)&control->tbl_hdr) + i); - - return tbl_sum; -} - -static uint32_t __calc_recs_byte_sum(struct eeprom_table_record *records, - int num) -{ - int i, j; - uint32_t tbl_sum = 0; - - /* Records checksum */ - for (i = 0; i < num; i++) { - struct eeprom_table_record *record = &records[i]; - - for (j = 0; j < sizeof(*record); j++) { - tbl_sum += *(((unsigned char *)record) + j); - } + if (res < 0) { + DRM_ERROR("Failed to write EEPROM table header:%d", res); + } else if (res < RAS_TABLE_HEADER_SIZE) { + DRM_ERROR("Short write:%d out of %d\n", + res, RAS_TABLE_HEADER_SIZE); + res = -EIO; + } else { + res = 0; } - return tbl_sum; + return res; } -static inline uint32_t __calc_tbl_byte_sum(struct amdgpu_ras_eeprom_control *control, - struct eeprom_table_record *records, int num) +static u8 __calc_hdr_byte_sum(const struct amdgpu_ras_eeprom_control *control) { - return __calc_hdr_byte_sum(control) + __calc_recs_byte_sum(records, num); -} + int ii; + u8 *pp, csum; + size_t sz; -/* Checksum = 256 -((sum of all table entries) mod 256) */ -static void __update_tbl_checksum(struct amdgpu_ras_eeprom_control *control, - struct eeprom_table_record *records, int num, - uint32_t old_hdr_byte_sum) -{ - /* - * This will update the table sum with new records. - * - * TODO: What happens when the EEPROM table is to be wrapped around - * and old records from start will get overridden. - */ - - /* need to recalculate updated header byte sum */ - control->tbl_byte_sum -= old_hdr_byte_sum; - control->tbl_byte_sum += __calc_tbl_byte_sum(control, records, num); - - control->tbl_hdr.checksum = 256 - (control->tbl_byte_sum % 256); -} - -/* table sum mod 256 + checksum must equals 256 */ -static bool __validate_tbl_checksum(struct amdgpu_ras_eeprom_control *control, - struct eeprom_table_record *records, int num) -{ - control->tbl_byte_sum = __calc_tbl_byte_sum(control, records, num); - - if (control->tbl_hdr.checksum + (control->tbl_byte_sum % 256) != 256) { - DRM_WARN("Checksum mismatch, checksum: %u ", control->tbl_hdr.checksum); - return false; - } + /* Header checksum, skip checksum field in the calculation */ + sz = sizeof(control->tbl_hdr) - sizeof(control->tbl_hdr.checksum); + pp = (u8 *) &control->tbl_hdr; + csum = 0; + for (ii = 0; ii < sz; ii++, pp++) + csum += *pp; - return true; + return csum; } static int amdgpu_ras_eeprom_correct_header_tag( - struct amdgpu_ras_eeprom_control *control, - uint32_t header) + struct amdgpu_ras_eeprom_control *control, + uint32_t header) { - unsigned char buff[EEPROM_ADDRESS_SIZE + EEPROM_TABLE_HEADER_SIZE]; struct amdgpu_ras_eeprom_table_header *hdr = &control->tbl_hdr; - int ret = 0; - - memset(buff, 0, EEPROM_ADDRESS_SIZE + EEPROM_TABLE_HEADER_SIZE); - - mutex_lock(&control->tbl_mutex); + u8 *hh; + int res; + u8 csum; + + csum = -hdr->checksum; + + hh = (void *) &hdr->header; + csum -= (hh[0] + hh[1] + hh[2] + hh[3]); + hh = (void *) &header; + csum += hh[0] + hh[1] + hh[2] + hh[3]; + csum = -csum; + mutex_lock(&control->ras_tbl_mutex); hdr->header = header; - ret = __update_table_header(control, buff); - mutex_unlock(&control->tbl_mutex); + hdr->checksum = csum; + res = __write_table_header(control); + mutex_unlock(&control->ras_tbl_mutex); - return ret; + return res; } +/** + * amdgpu_ras_eeprom_reset_table -- Reset the RAS EEPROM table + * @control: pointer to control structure + * + * Reset the contents of the header of the RAS EEPROM table. + * Return 0 on success, -errno on error. + */ int amdgpu_ras_eeprom_reset_table(struct amdgpu_ras_eeprom_control *control) { - unsigned char buff[EEPROM_ADDRESS_SIZE + EEPROM_TABLE_HEADER_SIZE] = { 0 }; struct amdgpu_ras_eeprom_table_header *hdr = &control->tbl_hdr; - int ret = 0; + u8 csum; + int res; - mutex_lock(&control->tbl_mutex); + mutex_lock(&control->ras_tbl_mutex); - hdr->header = EEPROM_TABLE_HDR_VAL; - hdr->version = EEPROM_TABLE_VER; - hdr->first_rec_offset = EEPROM_RECORD_START; - hdr->tbl_size = EEPROM_TABLE_HEADER_SIZE; + hdr->header = RAS_TABLE_HDR_VAL; + hdr->version = RAS_TABLE_VER; + hdr->first_rec_offset = RAS_RECORD_START; + hdr->tbl_size = RAS_TABLE_HEADER_SIZE; - control->tbl_byte_sum = 0; - __update_tbl_checksum(control, NULL, 0, 0); - control->next_addr = EEPROM_RECORD_START; + csum = __calc_hdr_byte_sum(control); + csum = -csum; + hdr->checksum = csum; + res = __write_table_header(control); - ret = __update_table_header(control, buff); + control->ras_num_recs = 0; + control->ras_fri = 0; - mutex_unlock(&control->tbl_mutex); + amdgpu_ras_debugfs_set_ret_size(control); - return ret; + mutex_unlock(&control->ras_tbl_mutex); + return res; } -int amdgpu_ras_eeprom_init(struct amdgpu_ras_eeprom_control *control, - bool *exceed_err_limit) -{ - int ret = 0; - struct amdgpu_device *adev = to_amdgpu_device(control); - unsigned char buff[EEPROM_ADDRESS_SIZE + EEPROM_TABLE_HEADER_SIZE] = { 0 }; - struct amdgpu_ras_eeprom_table_header *hdr = &control->tbl_hdr; - struct amdgpu_ras *ras = amdgpu_ras_get_context(adev); - struct i2c_msg msg = { - .addr = 0, - .flags = I2C_M_RD, - .len = EEPROM_ADDRESS_SIZE + EEPROM_TABLE_HEADER_SIZE, - .buf = buff, - }; - - *exceed_err_limit = false; - - if (!__is_ras_eeprom_supported(adev)) - return 0; - - /* Verify i2c adapter is initialized */ - if (!adev->pm.smu_i2c.algo) - return -ENOENT; - - if (!__get_eeprom_i2c_addr(adev, &control->i2c_address)) - return -EINVAL; - - mutex_init(&control->tbl_mutex); - - msg.addr = control->i2c_address; - /* Read/Create table header from EEPROM address 0 */ - ret = i2c_transfer(&adev->pm.smu_i2c, &msg, 1); - if (ret < 1) { - DRM_ERROR("Failed to read EEPROM table header, ret:%d", ret); - return ret; - } - - __decode_table_header_from_buff(hdr, &buff[2]); - - if (hdr->header == EEPROM_TABLE_HDR_VAL) { - control->num_recs = (hdr->tbl_size - EEPROM_TABLE_HEADER_SIZE) / - EEPROM_TABLE_RECORD_SIZE; - control->tbl_byte_sum = __calc_hdr_byte_sum(control); - control->next_addr = EEPROM_RECORD_START; - - DRM_DEBUG_DRIVER("Found existing EEPROM table with %d records", - control->num_recs); - - } else if ((hdr->header == EEPROM_TABLE_HDR_BAD) && - (amdgpu_bad_page_threshold != 0)) { - if (ras->bad_page_cnt_threshold > control->num_recs) { - dev_info(adev->dev, "Using one valid bigger bad page " - "threshold and correcting eeprom header tag.\n"); - ret = amdgpu_ras_eeprom_correct_header_tag(control, - EEPROM_TABLE_HDR_VAL); - } else { - *exceed_err_limit = true; - dev_err(adev->dev, "Exceeding the bad_page_threshold parameter, " - "disabling the GPU.\n"); - } - } else { - DRM_INFO("Creating new EEPROM table"); - - ret = amdgpu_ras_eeprom_reset_table(control); - } - - return ret == 1 ? 0 : -EIO; -} - -static void __encode_table_record_to_buff(struct amdgpu_ras_eeprom_control *control, - struct eeprom_table_record *record, - unsigned char *buff) +static void +__encode_table_record_to_buf(struct amdgpu_ras_eeprom_control *control, + struct eeprom_table_record *record, + unsigned char *buf) { __le64 tmp = 0; int i = 0; /* Next are all record fields according to EEPROM page spec in LE foramt */ - buff[i++] = record->err_type; + buf[i++] = record->err_type; - buff[i++] = record->bank; + buf[i++] = record->bank; tmp = cpu_to_le64(record->ts); - memcpy(buff + i, &tmp, 8); + memcpy(buf + i, &tmp, 8); i += 8; tmp = cpu_to_le64((record->offset & 0xffffffffffff)); - memcpy(buff + i, &tmp, 6); + memcpy(buf + i, &tmp, 6); i += 6; - buff[i++] = record->mem_channel; - buff[i++] = record->mcumc_id; + buf[i++] = record->mem_channel; + buf[i++] = record->mcumc_id; tmp = cpu_to_le64((record->retired_page & 0xffffffffffff)); - memcpy(buff + i, &tmp, 6); + memcpy(buf + i, &tmp, 6); } -static void __decode_table_record_from_buff(struct amdgpu_ras_eeprom_control *control, - struct eeprom_table_record *record, - unsigned char *buff) +static void +__decode_table_record_from_buf(struct amdgpu_ras_eeprom_control *control, + struct eeprom_table_record *record, + unsigned char *buf) { __le64 tmp = 0; int i = 0; /* Next are all record fields according to EEPROM page spec in LE foramt */ - record->err_type = buff[i++]; + record->err_type = buf[i++]; - record->bank = buff[i++]; + record->bank = buf[i++]; - memcpy(&tmp, buff + i, 8); + memcpy(&tmp, buf + i, 8); record->ts = le64_to_cpu(tmp); i += 8; - memcpy(&tmp, buff + i, 6); + memcpy(&tmp, buf + i, 6); record->offset = (le64_to_cpu(tmp) & 0xffffffffffff); i += 6; - record->mem_channel = buff[i++]; - record->mcumc_id = buff[i++]; + record->mem_channel = buf[i++]; + record->mcumc_id = buf[i++]; - memcpy(&tmp, buff + i, 6); + memcpy(&tmp, buf + i, 6); record->retired_page = (le64_to_cpu(tmp) & 0xffffffffffff); } -/* - * When reaching end of EEPROM memory jump back to 0 record address - * When next record access will go beyond EEPROM page boundary modify bits A17/A8 - * in I2C selector to go to next page - */ -static uint32_t __correct_eeprom_dest_address(uint32_t curr_address) -{ - uint32_t next_address = curr_address + EEPROM_TABLE_RECORD_SIZE; - - /* When all EEPROM memory used jump back to 0 address */ - if (next_address > EEPROM_SIZE_BYTES) { - DRM_INFO("Reached end of EEPROM memory, jumping to 0 " - "and overriding old record"); - return EEPROM_RECORD_START; - } - - /* - * To check if we overflow page boundary compare next address with - * current and see if bits 17/8 of the EEPROM address will change - * If they do start from the next 256b page - * - * https://www.st.com/resource/en/datasheet/m24m02-dr.pdf sec. 5.1.2 - */ - if ((curr_address & EEPROM_ADDR_MSB_MASK) != (next_address & EEPROM_ADDR_MSB_MASK)) { - DRM_DEBUG_DRIVER("Reached end of EEPROM memory page, jumping to next: %lx", - (next_address & EEPROM_ADDR_MSB_MASK)); - - return (next_address & EEPROM_ADDR_MSB_MASK); - } - - return curr_address; -} - bool amdgpu_ras_eeprom_check_err_threshold(struct amdgpu_device *adev) { struct amdgpu_ras *con = amdgpu_ras_get_context(adev); @@ -454,197 +359,756 @@ bool amdgpu_ras_eeprom_check_err_threshold(struct amdgpu_device *adev) if (!(con->features & BIT(AMDGPU_RAS_BLOCK__UMC))) return false; - if (con->eeprom_control.tbl_hdr.header == EEPROM_TABLE_HDR_BAD) { + if (con->eeprom_control.tbl_hdr.header == RAS_TABLE_HDR_BAD) { dev_warn(adev->dev, "This GPU is in BAD status."); - dev_warn(adev->dev, "Please retire it or setting one bigger " - "threshold value when reloading driver.\n"); + dev_warn(adev->dev, "Please retire it or set a larger " + "threshold value when reloading driver.\n"); return true; } return false; } -int amdgpu_ras_eeprom_process_recods(struct amdgpu_ras_eeprom_control *control, - struct eeprom_table_record *records, - bool write, - int num) +/** + * __amdgpu_ras_eeprom_write -- write indexed from buffer to EEPROM + * @control: pointer to control structure + * @buf: pointer to buffer containing data to write + * @fri: start writing at this index + * @num: number of records to write + * + * The caller must hold the table mutex in @control. + * Return 0 on success, -errno otherwise. + */ +static int __amdgpu_ras_eeprom_write(struct amdgpu_ras_eeprom_control *control, + u8 *buf, const u32 fri, const u32 num) { - int i, ret = 0; - struct i2c_msg *msgs, *msg; - unsigned char *buffs, *buff; - struct eeprom_table_record *record; struct amdgpu_device *adev = to_amdgpu_device(control); - struct amdgpu_ras *ras = amdgpu_ras_get_context(adev); + u32 buf_size; + int res; - if (!__is_ras_eeprom_supported(adev)) - return 0; + /* i2c may be unstable in gpu reset */ + down_read(&adev->reset_sem); + buf_size = num * RAS_TABLE_RECORD_SIZE; + res = amdgpu_eeprom_write(&adev->pm.smu_i2c, + control->i2c_address + + RAS_INDEX_TO_OFFSET(control, fri), + buf, buf_size); + up_read(&adev->reset_sem); + if (res < 0) { + DRM_ERROR("Writing %d EEPROM table records error:%d", + num, res); + } else if (res < buf_size) { + /* Short write, return error. + */ + DRM_ERROR("Wrote %d records out of %d", + res / RAS_TABLE_RECORD_SIZE, num); + res = -EIO; + } else { + res = 0; + } - buffs = kcalloc(num, EEPROM_ADDRESS_SIZE + EEPROM_TABLE_RECORD_SIZE, - GFP_KERNEL); - if (!buffs) - return -ENOMEM; + return res; +} + +static int +amdgpu_ras_eeprom_append_table(struct amdgpu_ras_eeprom_control *control, + struct eeprom_table_record *record, + const u32 num) +{ + u32 a, b, i; + u8 *buf, *pp; + int res; - mutex_lock(&control->tbl_mutex); + buf = kcalloc(num, RAS_TABLE_RECORD_SIZE, GFP_KERNEL); + if (!buf) + return -ENOMEM; - msgs = kcalloc(num, sizeof(*msgs), GFP_KERNEL); - if (!msgs) { - ret = -ENOMEM; - goto free_buff; + /* Encode all of them in one go. + */ + pp = buf; + for (i = 0; i < num; i++, pp += RAS_TABLE_RECORD_SIZE) + __encode_table_record_to_buf(control, &record[i], pp); + + /* a, first record index to write into. + * b, last record index to write into. + * a = first index to read (fri) + number of records in the table, + * b = a + @num - 1. + * Let N = control->ras_max_num_record_count, then we have, + * case 0: 0 <= a <= b < N, + * just append @num records starting at a; + * case 1: 0 <= a < N <= b, + * append (N - a) records starting at a, and + * append the remainder, b % N + 1, starting at 0. + * case 2: 0 <= fri < N <= a <= b, then modulo N we get two subcases, + * case 2a: 0 <= a <= b < N + * append num records starting at a; and fix fri if b overwrote it, + * and since a <= b, if b overwrote it then a must've also, + * and if b didn't overwrite it, then a didn't also. + * case 2b: 0 <= b < a < N + * write num records starting at a, which wraps around 0=N + * and overwrite fri unconditionally. Now from case 2a, + * this means that b eclipsed fri to overwrite it and wrap + * around 0 again, i.e. b = 2N+r pre modulo N, so we unconditionally + * set fri = b + 1 (mod N). + * Now, since fri is updated in every case, except the trivial case 0, + * the number of records present in the table after writing, is, + * num_recs - 1 = b - fri (mod N), and we take the positive value, + * by adding an arbitrary multiple of N before taking the modulo N + * as shown below. + */ + a = control->ras_fri + control->ras_num_recs; + b = a + num - 1; + if (b < control->ras_max_record_count) { + res = __amdgpu_ras_eeprom_write(control, buf, a, num); + } else if (a < control->ras_max_record_count) { + u32 g0, g1; + + g0 = control->ras_max_record_count - a; + g1 = b % control->ras_max_record_count + 1; + res = __amdgpu_ras_eeprom_write(control, buf, a, g0); + if (res) + goto Out; + res = __amdgpu_ras_eeprom_write(control, + buf + g0 * RAS_TABLE_RECORD_SIZE, + 0, g1); + if (res) + goto Out; + if (g1 > control->ras_fri) + control->ras_fri = g1 % control->ras_max_record_count; + } else { + a %= control->ras_max_record_count; + b %= control->ras_max_record_count; + + if (a <= b) { + /* Note that, b - a + 1 = num. */ + res = __amdgpu_ras_eeprom_write(control, buf, a, num); + if (res) + goto Out; + if (b >= control->ras_fri) + control->ras_fri = (b + 1) % control->ras_max_record_count; + } else { + u32 g0, g1; + + /* b < a, which means, we write from + * a to the end of the table, and from + * the start of the table to b. + */ + g0 = control->ras_max_record_count - a; + g1 = b + 1; + res = __amdgpu_ras_eeprom_write(control, buf, a, g0); + if (res) + goto Out; + res = __amdgpu_ras_eeprom_write(control, + buf + g0 * RAS_TABLE_RECORD_SIZE, + 0, g1); + if (res) + goto Out; + control->ras_fri = g1 % control->ras_max_record_count; + } } + control->ras_num_recs = 1 + (control->ras_max_record_count + b + - control->ras_fri) + % control->ras_max_record_count; +Out: + kfree(buf); + return res; +} - /* - * If saved bad pages number exceeds the bad page threshold for - * the whole VRAM, update table header to mark the BAD GPU tag - * and schedule one ras recovery after eeprom write is done, - * this can avoid the missing for latest records. - * - * This new header will be picked up and checked in the bootup - * by ras recovery, which may break bootup process to notify - * user this GPU is in bad state and to retire such GPU for - * further check. +static int +amdgpu_ras_eeprom_update_header(struct amdgpu_ras_eeprom_control *control) +{ + struct amdgpu_device *adev = to_amdgpu_device(control); + struct amdgpu_ras *ras = amdgpu_ras_get_context(adev); + u8 *buf, *pp, csum; + u32 buf_size; + int res; + + /* Modify the header if it exceeds. */ - if (write && (amdgpu_bad_page_threshold != 0) && - ((control->num_recs + num) >= ras->bad_page_cnt_threshold)) { + if (amdgpu_bad_page_threshold != 0 && + control->ras_num_recs >= ras->bad_page_cnt_threshold) { dev_warn(adev->dev, - "Saved bad pages(%d) reaches threshold value(%d).\n", - control->num_recs + num, ras->bad_page_cnt_threshold); - control->tbl_hdr.header = EEPROM_TABLE_HDR_BAD; + "Saved bad pages %d reaches threshold value %d\n", + control->ras_num_recs, ras->bad_page_cnt_threshold); + control->tbl_hdr.header = RAS_TABLE_HDR_BAD; } - /* In case of overflow just start from beginning to not lose newest records */ - if (write && (control->next_addr + EEPROM_TABLE_RECORD_SIZE * num > EEPROM_SIZE_BYTES)) - control->next_addr = EEPROM_RECORD_START; + control->tbl_hdr.version = RAS_TABLE_VER; + control->tbl_hdr.first_rec_offset = RAS_INDEX_TO_OFFSET(control, control->ras_fri); + control->tbl_hdr.tbl_size = RAS_TABLE_HEADER_SIZE + control->ras_num_recs * RAS_TABLE_RECORD_SIZE; + control->tbl_hdr.checksum = 0; + + buf_size = control->ras_num_recs * RAS_TABLE_RECORD_SIZE; + buf = kcalloc(control->ras_num_recs, RAS_TABLE_RECORD_SIZE, GFP_KERNEL); + if (!buf) { + DRM_ERROR("allocating memory for table of size %d bytes failed\n", + control->tbl_hdr.tbl_size); + res = -ENOMEM; + goto Out; + } + + down_read(&adev->reset_sem); + res = amdgpu_eeprom_read(&adev->pm.smu_i2c, + control->i2c_address + + control->ras_record_offset, + buf, buf_size); + up_read(&adev->reset_sem); + if (res < 0) { + DRM_ERROR("EEPROM failed reading records:%d\n", + res); + goto Out; + } else if (res < buf_size) { + DRM_ERROR("EEPROM read %d out of %d bytes\n", + res, buf_size); + res = -EIO; + goto Out; + } - /* - * TODO Currently makes EEPROM writes for each record, this creates - * internal fragmentation. Optimized the code to do full page write of - * 256b + /* Recalc the checksum. */ - for (i = 0; i < num; i++) { - buff = &buffs[i * (EEPROM_ADDRESS_SIZE + EEPROM_TABLE_RECORD_SIZE)]; - record = &records[i]; - msg = &msgs[i]; + csum = 0; + for (pp = buf; pp < buf + buf_size; pp++) + csum += *pp; + + csum += __calc_hdr_byte_sum(control); + /* avoid sign extension when assigning to "checksum" */ + csum = -csum; + control->tbl_hdr.checksum = csum; + res = __write_table_header(control); +Out: + kfree(buf); + return res; +} - control->next_addr = __correct_eeprom_dest_address(control->next_addr); +/** + * amdgpu_ras_eeprom_append -- append records to the EEPROM RAS table + * @control: pointer to control structure + * @record: array of records to append + * @num: number of records in @record array + * + * Append @num records to the table, calculate the checksum and write + * the table back to EEPROM. The maximum number of records that + * can be appended is between 1 and control->ras_max_record_count, + * regardless of how many records are already stored in the table. + * + * Return 0 on success or if EEPROM is not supported, -errno on error. + */ +int amdgpu_ras_eeprom_append(struct amdgpu_ras_eeprom_control *control, + struct eeprom_table_record *record, + const u32 num) +{ + struct amdgpu_device *adev = to_amdgpu_device(control); + int res; - /* - * Update bits 16,17 of EEPROM address in I2C address by setting them - * to bits 1,2 of Device address byte - */ - msg->addr = control->i2c_address | - ((control->next_addr & EEPROM_ADDR_MSB_MASK) >> 15); - msg->flags = write ? 0 : I2C_M_RD; - msg->len = EEPROM_ADDRESS_SIZE + EEPROM_TABLE_RECORD_SIZE; - msg->buf = buff; - - /* Insert the EEPROM dest addess, bits 0-15 */ - buff[0] = ((control->next_addr >> 8) & 0xff); - buff[1] = (control->next_addr & 0xff); - - /* EEPROM table content is stored in LE format */ - if (write) - __encode_table_record_to_buff(control, record, buff + EEPROM_ADDRESS_SIZE); - - /* - * The destination EEPROM address might need to be corrected to account - * for page or entire memory wrapping - */ - control->next_addr += EEPROM_TABLE_RECORD_SIZE; + if (!__is_ras_eeprom_supported(adev)) + return 0; + + if (num == 0) { + DRM_ERROR("will not append 0 records\n"); + return -EINVAL; + } else if (num > control->ras_max_record_count) { + DRM_ERROR("cannot append %d records than the size of table %d\n", + num, control->ras_max_record_count); + return -EINVAL; } + mutex_lock(&control->ras_tbl_mutex); + + res = amdgpu_ras_eeprom_append_table(control, record, num); + if (!res) + res = amdgpu_ras_eeprom_update_header(control); + if (!res) + amdgpu_ras_debugfs_set_ret_size(control); + + mutex_unlock(&control->ras_tbl_mutex); + return res; +} + +/** + * __amdgpu_ras_eeprom_read -- read indexed from EEPROM into buffer + * @control: pointer to control structure + * @buf: pointer to buffer to read into + * @fri: first record index, start reading at this index, absolute index + * @num: number of records to read + * + * The caller must hold the table mutex in @control. + * Return 0 on success, -errno otherwise. + */ +static int __amdgpu_ras_eeprom_read(struct amdgpu_ras_eeprom_control *control, + u8 *buf, const u32 fri, const u32 num) +{ + struct amdgpu_device *adev = to_amdgpu_device(control); + u32 buf_size; + int res; + /* i2c may be unstable in gpu reset */ down_read(&adev->reset_sem); - ret = i2c_transfer(&adev->pm.smu_i2c, msgs, num); + buf_size = num * RAS_TABLE_RECORD_SIZE; + res = amdgpu_eeprom_read(&adev->pm.smu_i2c, + control->i2c_address + + RAS_INDEX_TO_OFFSET(control, fri), + buf, buf_size); up_read(&adev->reset_sem); + if (res < 0) { + DRM_ERROR("Reading %d EEPROM table records error:%d", + num, res); + } else if (res < buf_size) { + /* Short read, return error. + */ + DRM_ERROR("Read %d records out of %d", + res / RAS_TABLE_RECORD_SIZE, num); + res = -EIO; + } else { + res = 0; + } - if (ret < 1) { - DRM_ERROR("Failed to process EEPROM table records, ret:%d", ret); + return res; +} - /* TODO Restore prev next EEPROM address ? */ - goto free_msgs; +/** + * amdgpu_ras_eeprom_read -- read EEPROM + * @control: pointer to control structure + * @record: array of records to read into + * @num: number of records in @record + * + * Reads num records from the RAS table in EEPROM and + * writes the data into @record array. + * + * Returns 0 on success, -errno on error. + */ +int amdgpu_ras_eeprom_read(struct amdgpu_ras_eeprom_control *control, + struct eeprom_table_record *record, + const u32 num) +{ + struct amdgpu_device *adev = to_amdgpu_device(control); + int i, res; + u8 *buf, *pp; + u32 g0, g1; + + if (!__is_ras_eeprom_supported(adev)) + return 0; + + if (num == 0) { + DRM_ERROR("will not read 0 records\n"); + return -EINVAL; + } else if (num > control->ras_num_recs) { + DRM_ERROR("too many records to read:%d available:%d\n", + num, control->ras_num_recs); + return -EINVAL; } + buf = kcalloc(num, RAS_TABLE_RECORD_SIZE, GFP_KERNEL); + if (!buf) + return -ENOMEM; - if (!write) { - for (i = 0; i < num; i++) { - buff = &buffs[i*(EEPROM_ADDRESS_SIZE + EEPROM_TABLE_RECORD_SIZE)]; - record = &records[i]; + /* Determine how many records to read, from the first record + * index, fri, to the end of the table, and from the beginning + * of the table, such that the total number of records is + * @num, and we handle wrap around when fri > 0 and + * fri + num > RAS_MAX_RECORD_COUNT. + * + * First we compute the index of the last element + * which would be fetched from each region, + * g0 is in [fri, fri + num - 1], and + * g1 is in [0, RAS_MAX_RECORD_COUNT - 1]. + * Then, if g0 < RAS_MAX_RECORD_COUNT, the index of + * the last element to fetch, we set g0 to _the number_ + * of elements to fetch, @num, since we know that the last + * indexed to be fetched does not exceed the table. + * + * If, however, g0 >= RAS_MAX_RECORD_COUNT, then + * we set g0 to the number of elements to read + * until the end of the table, and g1 to the number of + * elements to read from the beginning of the table. + */ + g0 = control->ras_fri + num - 1; + g1 = g0 % control->ras_max_record_count; + if (g0 < control->ras_max_record_count) { + g0 = num; + g1 = 0; + } else { + g0 = control->ras_max_record_count - control->ras_fri; + g1 += 1; + } - __decode_table_record_from_buff(control, record, buff + EEPROM_ADDRESS_SIZE); - } + mutex_lock(&control->ras_tbl_mutex); + res = __amdgpu_ras_eeprom_read(control, buf, control->ras_fri, g0); + if (res) + goto Out; + if (g1) { + res = __amdgpu_ras_eeprom_read(control, + buf + g0 * RAS_TABLE_RECORD_SIZE, + 0, g1); + if (res) + goto Out; } - if (write) { - uint32_t old_hdr_byte_sum = __calc_hdr_byte_sum(control); + res = 0; - /* - * Update table header with size and CRC and account for table - * wrap around where the assumption is that we treat it as empty - * table - * - * TODO - Check the assumption is correct + /* Read up everything? Then transform. + */ + pp = buf; + for (i = 0; i < num; i++, pp += RAS_TABLE_RECORD_SIZE) + __decode_table_record_from_buf(control, &record[i], pp); +Out: + kfree(buf); + mutex_unlock(&control->ras_tbl_mutex); + + return res; +} + +inline uint32_t amdgpu_ras_eeprom_max_record_count(void) +{ + return RAS_MAX_RECORD_COUNT; +} + +static ssize_t +amdgpu_ras_debugfs_eeprom_size_read(struct file *f, char __user *buf, + size_t size, loff_t *pos) +{ + struct amdgpu_device *adev = (struct amdgpu_device *)file_inode(f)->i_private; + struct amdgpu_ras *ras = amdgpu_ras_get_context(adev); + struct amdgpu_ras_eeprom_control *control = ras ? &ras->eeprom_control : NULL; + u8 data[50]; + int res; + + if (!size) + return size; + + if (!ras || !control) { + res = snprintf(data, sizeof(data), "Not supported\n"); + } else { + res = snprintf(data, sizeof(data), "%d bytes or %d records\n", + RAS_TBL_SIZE_BYTES, control->ras_max_record_count); + } + + if (*pos >= res) + return 0; + + res -= *pos; + res = min_t(size_t, res, size); + + if (copy_to_user(buf, &data[*pos], res)) + return -EFAULT; + + *pos += res; + + return res; +} + +const struct file_operations amdgpu_ras_debugfs_eeprom_size_ops = { + .owner = THIS_MODULE, + .read = amdgpu_ras_debugfs_eeprom_size_read, + .write = NULL, + .llseek = default_llseek, +}; + +static const char *tbl_hdr_str = " Signature Version FirstOffs Size Checksum\n"; +static const char *tbl_hdr_fmt = "0x%08X 0x%08X 0x%08X 0x%08X 0x%08X\n"; +#define tbl_hdr_fmt_size (5 * (2+8) + 4 + 1) +static const char *rec_hdr_str = "Index Offset ErrType Bank/CU TimeStamp Offs/Addr MemChl MCUMCID RetiredPage\n"; +static const char *rec_hdr_fmt = "%5d 0x%05X %7s 0x%02X 0x%016llX 0x%012llX 0x%02X 0x%02X 0x%012llX\n"; +#define rec_hdr_fmt_size (5 + 1 + 7 + 1 + 7 + 1 + 7 + 1 + 18 + 1 + 14 + 1 + 6 + 1 + 7 + 1 + 14 + 1) + +static const char *record_err_type_str[AMDGPU_RAS_EEPROM_ERR_COUNT] = { + "ignore", + "re", + "ue", +}; + +static loff_t amdgpu_ras_debugfs_table_size(struct amdgpu_ras_eeprom_control *control) +{ + return strlen(tbl_hdr_str) + tbl_hdr_fmt_size + + strlen(rec_hdr_str) + rec_hdr_fmt_size * control->ras_num_recs; +} + +void amdgpu_ras_debugfs_set_ret_size(struct amdgpu_ras_eeprom_control *control) +{ + struct amdgpu_ras *ras = container_of(control, struct amdgpu_ras, + eeprom_control); + struct dentry *de = ras->de_ras_eeprom_table; + + if (de) + d_inode(de)->i_size = amdgpu_ras_debugfs_table_size(control); +} + +static ssize_t amdgpu_ras_debugfs_table_read(struct file *f, char __user *buf, + size_t size, loff_t *pos) +{ + struct amdgpu_device *adev = (struct amdgpu_device *)file_inode(f)->i_private; + struct amdgpu_ras *ras = amdgpu_ras_get_context(adev); + struct amdgpu_ras_eeprom_control *control = &ras->eeprom_control; + const size_t orig_size = size; + int res = -EFAULT; + size_t data_len; + + mutex_lock(&control->ras_tbl_mutex); + + /* We want *pos - data_len > 0, which means there's + * bytes to be printed from data. + */ + data_len = strlen(tbl_hdr_str); + if (*pos < data_len) { + data_len -= *pos; + data_len = min_t(size_t, data_len, size); + if (copy_to_user(buf, &tbl_hdr_str[*pos], data_len)) + goto Out; + buf += data_len; + size -= data_len; + *pos += data_len; + } + + data_len = strlen(tbl_hdr_str) + tbl_hdr_fmt_size; + if (*pos < data_len && size > 0) { + u8 data[tbl_hdr_fmt_size + 1]; + loff_t lpos; + + snprintf(data, sizeof(data), tbl_hdr_fmt, + control->tbl_hdr.header, + control->tbl_hdr.version, + control->tbl_hdr.first_rec_offset, + control->tbl_hdr.tbl_size, + control->tbl_hdr.checksum); + + data_len -= *pos; + data_len = min_t(size_t, data_len, size); + lpos = *pos - strlen(tbl_hdr_str); + if (copy_to_user(buf, &data[lpos], data_len)) + goto Out; + buf += data_len; + size -= data_len; + *pos += data_len; + } + + data_len = strlen(tbl_hdr_str) + tbl_hdr_fmt_size + strlen(rec_hdr_str); + if (*pos < data_len && size > 0) { + loff_t lpos; + + data_len -= *pos; + data_len = min_t(size_t, data_len, size); + lpos = *pos - strlen(tbl_hdr_str) - tbl_hdr_fmt_size; + if (copy_to_user(buf, &rec_hdr_str[lpos], data_len)) + goto Out; + buf += data_len; + size -= data_len; + *pos += data_len; + } + + data_len = amdgpu_ras_debugfs_table_size(control); + if (*pos < data_len && size > 0) { + u8 dare[RAS_TABLE_RECORD_SIZE]; + u8 data[rec_hdr_fmt_size + 1]; + struct eeprom_table_record record; + int s, r; + + /* Find the starting record index */ - control->num_recs += num; - control->num_recs %= EEPROM_MAX_RECORD_NUM; - control->tbl_hdr.tbl_size += EEPROM_TABLE_RECORD_SIZE * num; - if (control->tbl_hdr.tbl_size > EEPROM_SIZE_BYTES) - control->tbl_hdr.tbl_size = EEPROM_TABLE_HEADER_SIZE + - control->num_recs * EEPROM_TABLE_RECORD_SIZE; - - __update_tbl_checksum(control, records, num, old_hdr_byte_sum); - - __update_table_header(control, buffs); - } else if (!__validate_tbl_checksum(control, records, num)) { - DRM_WARN("EEPROM Table checksum mismatch!"); - /* TODO Uncomment when EEPROM read/write is relliable */ - /* ret = -EIO; */ + s = *pos - strlen(tbl_hdr_str) - tbl_hdr_fmt_size - + strlen(rec_hdr_str); + s = s / rec_hdr_fmt_size; + r = *pos - strlen(tbl_hdr_str) - tbl_hdr_fmt_size - + strlen(rec_hdr_str); + r = r % rec_hdr_fmt_size; + + for ( ; size > 0 && s < control->ras_num_recs; s++) { + u32 ai = RAS_RI_TO_AI(control, s); + /* Read a single record + */ + res = __amdgpu_ras_eeprom_read(control, dare, ai, 1); + if (res) + goto Out; + __decode_table_record_from_buf(control, &record, dare); + snprintf(data, sizeof(data), rec_hdr_fmt, + s, + RAS_INDEX_TO_OFFSET(control, ai), + record_err_type_str[record.err_type], + record.bank, + record.ts, + record.offset, + record.mem_channel, + record.mcumc_id, + record.retired_page); + + data_len = min_t(size_t, rec_hdr_fmt_size - r, size); + if (copy_to_user(buf, &data[r], data_len)) { + res = -EFAULT; + goto Out; + } + buf += data_len; + size -= data_len; + *pos += data_len; + r = 0; + } } + res = 0; +Out: + mutex_unlock(&control->ras_tbl_mutex); + return res < 0 ? res : orig_size - size; +} -free_msgs: - kfree(msgs); +static ssize_t +amdgpu_ras_debugfs_eeprom_table_read(struct file *f, char __user *buf, + size_t size, loff_t *pos) +{ + struct amdgpu_device *adev = (struct amdgpu_device *)file_inode(f)->i_private; + struct amdgpu_ras *ras = amdgpu_ras_get_context(adev); + struct amdgpu_ras_eeprom_control *control = ras ? &ras->eeprom_control : NULL; + u8 data[81]; + int res; + + if (!size) + return size; + + if (!ras || !control) { + res = snprintf(data, sizeof(data), "Not supported\n"); + if (*pos >= res) + return 0; -free_buff: - kfree(buffs); + res -= *pos; + res = min_t(size_t, res, size); - mutex_unlock(&control->tbl_mutex); + if (copy_to_user(buf, &data[*pos], res)) + return -EFAULT; - return ret == num ? 0 : -EIO; + *pos += res; + + return res; + } else { + return amdgpu_ras_debugfs_table_read(f, buf, size, pos); + } } -inline uint32_t amdgpu_ras_eeprom_get_record_max_length(void) +const struct file_operations amdgpu_ras_debugfs_eeprom_table_ops = { + .owner = THIS_MODULE, + .read = amdgpu_ras_debugfs_eeprom_table_read, + .write = NULL, + .llseek = default_llseek, +}; + +/** + * __verify_ras_table_checksum -- verify the RAS EEPROM table checksum + * @control: pointer to control structure + * + * Check the checksum of the stored in EEPROM RAS table. + * + * Return 0 if the checksum is correct, + * positive if it is not correct, and + * -errno on I/O error. + */ +static int __verify_ras_table_checksum(struct amdgpu_ras_eeprom_control *control) { - return EEPROM_MAX_RECORD_NUM; + struct amdgpu_device *adev = to_amdgpu_device(control); + int buf_size, res; + u8 csum, *buf, *pp; + + buf_size = RAS_TABLE_HEADER_SIZE + + control->ras_num_recs * RAS_TABLE_RECORD_SIZE; + buf = kzalloc(buf_size, GFP_KERNEL); + if (!buf) { + DRM_ERROR("Out of memory checking RAS table checksum.\n"); + return -ENOMEM; + } + + res = amdgpu_eeprom_read(&adev->pm.smu_i2c, + control->i2c_address + + control->ras_header_offset, + buf, buf_size); + if (res < buf_size) { + DRM_ERROR("Partial read for checksum, res:%d\n", res); + /* On partial reads, return -EIO. + */ + if (res >= 0) + res = -EIO; + goto Out; + } + + csum = 0; + for (pp = buf; pp < buf + buf_size; pp++) + csum += *pp; +Out: + kfree(buf); + return res < 0 ? res : csum; } -/* Used for testing if bugs encountered */ -#if 0 -void amdgpu_ras_eeprom_test(struct amdgpu_ras_eeprom_control *control) +int amdgpu_ras_eeprom_init(struct amdgpu_ras_eeprom_control *control, + bool *exceed_err_limit) { - int i; - struct eeprom_table_record *recs = kcalloc(1, sizeof(*recs), GFP_KERNEL); + struct amdgpu_device *adev = to_amdgpu_device(control); + unsigned char buf[RAS_TABLE_HEADER_SIZE] = { 0 }; + struct amdgpu_ras_eeprom_table_header *hdr = &control->tbl_hdr; + struct amdgpu_ras *ras = amdgpu_ras_get_context(adev); + int res; - if (!recs) - return; + *exceed_err_limit = false; - for (i = 0; i < 1 ; i++) { - recs[i].address = 0xdeadbeef; - recs[i].retired_page = i; + if (!__is_ras_eeprom_supported(adev)) + return 0; + + /* Verify i2c adapter is initialized */ + if (!adev->pm.smu_i2c.algo) + return -ENOENT; + + if (!__get_eeprom_i2c_addr(adev, control)) + return -EINVAL; + + control->ras_header_offset = RAS_HDR_START; + control->ras_record_offset = RAS_RECORD_START; + control->ras_max_record_count = RAS_MAX_RECORD_COUNT; + mutex_init(&control->ras_tbl_mutex); + + /* Read the table header from EEPROM address */ + res = amdgpu_eeprom_read(&adev->pm.smu_i2c, + control->i2c_address + control->ras_header_offset, + buf, RAS_TABLE_HEADER_SIZE); + if (res < RAS_TABLE_HEADER_SIZE) { + DRM_ERROR("Failed to read EEPROM table header, res:%d", res); + return res >= 0 ? -EIO : res; } - if (!amdgpu_ras_eeprom_process_recods(control, recs, true, 1)) { + __decode_table_header_from_buf(hdr, buf); - memset(recs, 0, sizeof(*recs) * 1); + control->ras_num_recs = RAS_NUM_RECS(hdr); + control->ras_fri = RAS_OFFSET_TO_INDEX(control, hdr->first_rec_offset); - control->next_addr = EEPROM_RECORD_START; + if (hdr->header == RAS_TABLE_HDR_VAL) { + DRM_DEBUG_DRIVER("Found existing EEPROM table with %d records", + control->ras_num_recs); + res = __verify_ras_table_checksum(control); + if (res) + DRM_ERROR("RAS table incorrect checksum or error:%d\n", + res); + } else if (hdr->header == RAS_TABLE_HDR_BAD && + amdgpu_bad_page_threshold != 0) { + res = __verify_ras_table_checksum(control); + if (res) + DRM_ERROR("RAS Table incorrect checksum or error:%d\n", + res); + if (ras->bad_page_cnt_threshold > control->ras_num_recs) { + /* This means that, the threshold was increased since + * the last time the system was booted, and now, + * ras->bad_page_cnt_threshold - control->num_recs > 0, + * so that at least one more record can be saved, + * before the page count threshold is reached. + */ + dev_info(adev->dev, + "records:%d threshold:%d, resetting " + "RAS table header signature", + control->ras_num_recs, + ras->bad_page_cnt_threshold); + res = amdgpu_ras_eeprom_correct_header_tag(control, + RAS_TABLE_HDR_VAL); + } else { + *exceed_err_limit = true; + dev_err(adev->dev, + "RAS records:%d exceed threshold:%d, " + "maybe retire this GPU?", + control->ras_num_recs, ras->bad_page_cnt_threshold); + } + } else { + DRM_INFO("Creating a new EEPROM table"); - if (!amdgpu_ras_eeprom_process_recods(control, recs, false, 1)) { - for (i = 0; i < 1; i++) - DRM_INFO("rec.address :0x%llx, rec.retired_page :%llu", - recs[i].address, recs[i].retired_page); - } else - DRM_ERROR("Failed in reading from table"); + res = amdgpu_ras_eeprom_reset_table(control); + } - } else - DRM_ERROR("Failed in writing to table"); + return res < 0 ? res : 0; } -#endif diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_ras_eeprom.h b/drivers/gpu/drm/amd/amdgpu/amdgpu_ras_eeprom.h index 178721170974..f95fc61b3021 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_ras_eeprom.h +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_ras_eeprom.h @@ -28,10 +28,11 @@ struct amdgpu_device; -enum amdgpu_ras_eeprom_err_type{ - AMDGPU_RAS_EEPROM_ERR_PLACE_HOLDER, +enum amdgpu_ras_eeprom_err_type { + AMDGPU_RAS_EEPROM_ERR_NA, AMDGPU_RAS_EEPROM_ERR_RECOVERABLE, - AMDGPU_RAS_EEPROM_ERR_NON_RECOVERABLE + AMDGPU_RAS_EEPROM_ERR_NON_RECOVERABLE, + AMDGPU_RAS_EEPROM_ERR_COUNT, }; struct amdgpu_ras_eeprom_table_header { @@ -40,15 +41,45 @@ struct amdgpu_ras_eeprom_table_header { uint32_t first_rec_offset; uint32_t tbl_size; uint32_t checksum; -}__attribute__((__packed__)); +} __packed; struct amdgpu_ras_eeprom_control { struct amdgpu_ras_eeprom_table_header tbl_hdr; - uint32_t next_addr; - unsigned int num_recs; - struct mutex tbl_mutex; - uint32_t tbl_byte_sum; - uint16_t i2c_address; // 8-bit represented address + + /* Base I2C EEPPROM 19-bit memory address, + * where the table is located. For more information, + * see top of amdgpu_eeprom.c. + */ + u32 i2c_address; + + /* The byte offset off of @i2c_address + * where the table header is found, + * and where the records start--always + * right after the header. + */ + u32 ras_header_offset; + u32 ras_record_offset; + + /* Number of records in the table. + */ + u32 ras_num_recs; + + /* First record index to read, 0-based. + * Range is [0, num_recs-1]. This is + * an absolute index, starting right after + * the table header. + */ + u32 ras_fri; + + /* Maximum possible number of records + * we could store, i.e. the maximum capacity + * of the table. + */ + u32 ras_max_record_count; + + /* Protect table access via this mutex. + */ + struct mutex ras_tbl_mutex; }; /* @@ -74,21 +105,26 @@ struct eeprom_table_record { unsigned char mem_channel; unsigned char mcumc_id; -}__attribute__((__packed__)); +} __packed; int amdgpu_ras_eeprom_init(struct amdgpu_ras_eeprom_control *control, - bool *exceed_err_limit); + bool *exceed_err_limit); + int amdgpu_ras_eeprom_reset_table(struct amdgpu_ras_eeprom_control *control); bool amdgpu_ras_eeprom_check_err_threshold(struct amdgpu_device *adev); -int amdgpu_ras_eeprom_process_recods(struct amdgpu_ras_eeprom_control *control, - struct eeprom_table_record *records, - bool write, - int num); +int amdgpu_ras_eeprom_read(struct amdgpu_ras_eeprom_control *control, + struct eeprom_table_record *records, const u32 num); + +int amdgpu_ras_eeprom_append(struct amdgpu_ras_eeprom_control *control, + struct eeprom_table_record *records, const u32 num); + +inline uint32_t amdgpu_ras_eeprom_max_record_count(void); -inline uint32_t amdgpu_ras_eeprom_get_record_max_length(void); +void amdgpu_ras_debugfs_set_ret_size(struct amdgpu_ras_eeprom_control *control); -void amdgpu_ras_eeprom_test(struct amdgpu_ras_eeprom_control *control); +extern const struct file_operations amdgpu_ras_debugfs_eeprom_size_ops; +extern const struct file_operations amdgpu_ras_debugfs_eeprom_table_ops; #endif // _AMDGPU_RAS_EEPROM_H diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_res_cursor.h b/drivers/gpu/drm/amd/amdgpu/amdgpu_res_cursor.h index 40f2adf305bc..acfa207cf970 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_res_cursor.h +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_res_cursor.h @@ -28,6 +28,7 @@ #include <drm/drm_mm.h> #include <drm/ttm/ttm_resource.h> +#include <drm/ttm/ttm_range_manager.h> /* state back for walking over vram_mgr and gtt_mgr allocations */ struct amdgpu_res_cursor { @@ -53,17 +54,18 @@ static inline void amdgpu_res_first(struct ttm_resource *res, { struct drm_mm_node *node; - if (!res || !res->mm_node) { + if (!res || res->mem_type == TTM_PL_SYSTEM) { cur->start = start; cur->size = size; cur->remaining = size; cur->node = NULL; + WARN_ON(res && start + size > res->num_pages << PAGE_SHIFT); return; } BUG_ON(start + size > res->num_pages << PAGE_SHIFT); - node = res->mm_node; + node = to_ttm_range_mgr_node(res)->mm_nodes; while (start >= node->size << PAGE_SHIFT) start -= node++->size << PAGE_SHIFT; diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_ring.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_ring.c index 688624ebe421..7b634a1517f9 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_ring.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_ring.c @@ -158,6 +158,7 @@ void amdgpu_ring_undo(struct amdgpu_ring *ring) * @irq_src: interrupt source to use for this ring * @irq_type: interrupt type to use for this ring * @hw_prio: ring priority (NORMAL/HIGH) + * @sched_score: optional score atomic shared with other schedulers * * Initialize the driver information for the selected ring (all asics). * Returns 0 on success, error on failure. diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_ring.h b/drivers/gpu/drm/amd/amdgpu/amdgpu_ring.h index ca1622835296..e713d31619fe 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_ring.h +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_ring.h @@ -48,6 +48,9 @@ #define AMDGPU_FENCE_FLAG_INT (1 << 1) #define AMDGPU_FENCE_FLAG_TC_WB_ONLY (1 << 2) +/* fence flag bit to indicate the face is embedded in job*/ +#define AMDGPU_FENCE_FLAG_EMBED_IN_JOB_BIT (DMA_FENCE_FLAG_USER_BITS + 1) + #define to_amdgpu_ring(s) container_of((s), struct amdgpu_ring, sched) #define AMDGPU_IB_POOL_SIZE (1024 * 1024) @@ -106,8 +109,6 @@ struct amdgpu_fence_driver { struct dma_fence **fences; }; -int amdgpu_fence_driver_init(struct amdgpu_device *adev); -void amdgpu_fence_driver_fini(struct amdgpu_device *adev); void amdgpu_fence_driver_force_completion(struct amdgpu_ring *ring); int amdgpu_fence_driver_init_ring(struct amdgpu_ring *ring, @@ -116,9 +117,11 @@ int amdgpu_fence_driver_init_ring(struct amdgpu_ring *ring, int amdgpu_fence_driver_start_ring(struct amdgpu_ring *ring, struct amdgpu_irq_src *irq_src, unsigned irq_type); -void amdgpu_fence_driver_suspend(struct amdgpu_device *adev); -void amdgpu_fence_driver_resume(struct amdgpu_device *adev); -int amdgpu_fence_emit(struct amdgpu_ring *ring, struct dma_fence **fence, +void amdgpu_fence_driver_hw_init(struct amdgpu_device *adev); +void amdgpu_fence_driver_hw_fini(struct amdgpu_device *adev); +int amdgpu_fence_driver_sw_init(struct amdgpu_device *adev); +void amdgpu_fence_driver_sw_fini(struct amdgpu_device *adev); +int amdgpu_fence_emit(struct amdgpu_ring *ring, struct dma_fence **fence, struct amdgpu_job *job, unsigned flags); int amdgpu_fence_emit_polling(struct amdgpu_ring *ring, uint32_t *s, uint32_t timeout); diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_rlc.h b/drivers/gpu/drm/amd/amdgpu/amdgpu_rlc.h index 4fc2ce8ce8ab..00afd0dcae86 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_rlc.h +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_rlc.h @@ -127,8 +127,8 @@ struct amdgpu_rlc_funcs { void (*reset)(struct amdgpu_device *adev); void (*start)(struct amdgpu_device *adev); void (*update_spm_vmid)(struct amdgpu_device *adev, unsigned vmid); - void (*rlcg_wreg)(struct amdgpu_device *adev, u32 offset, u32 v, u32 flag); - u32 (*rlcg_rreg)(struct amdgpu_device *adev, u32 offset, u32 flag); + void (*sriov_wreg)(struct amdgpu_device *adev, u32 offset, u32 v, u32 acc_flags, u32 hwip); + u32 (*sriov_rreg)(struct amdgpu_device *adev, u32 offset, u32 acc_flags, u32 hwip); bool (*is_rlcg_access_range)(struct amdgpu_device *adev, uint32_t reg); }; diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_sdma.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_sdma.c index de91d29c9d96..65debb65a5df 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_sdma.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_sdma.c @@ -105,7 +105,6 @@ int amdgpu_sdma_ras_late_init(struct amdgpu_device *adev, adev->sdma.ras_if->block = AMDGPU_RAS_BLOCK__SDMA; adev->sdma.ras_if->type = AMDGPU_RAS_ERROR__MULTI_UNCORRECTABLE; adev->sdma.ras_if->sub_block_index = 0; - strcpy(adev->sdma.ras_if->name, "sdma"); } fs_info.head = ih_info->head = *adev->sdma.ras_if; diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_securedisplay.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_securedisplay.c index 5369c8dd0764..cc7597a15fe9 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_securedisplay.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_securedisplay.c @@ -80,12 +80,14 @@ void psp_securedisplay_parse_resp_status(struct psp_context *psp, void psp_prep_securedisplay_cmd_buf(struct psp_context *psp, struct securedisplay_cmd **cmd, enum ta_securedisplay_command command_id) { - *cmd = (struct securedisplay_cmd *)psp->securedisplay_context.securedisplay_shared_buf; + *cmd = (struct securedisplay_cmd *)psp->securedisplay_context.context.mem_context.shared_buf; memset(*cmd, 0, sizeof(struct securedisplay_cmd)); (*cmd)->status = TA_SECUREDISPLAY_STATUS__GENERIC_FAILURE; (*cmd)->cmd_id = command_id; } +#if defined(CONFIG_DEBUG_FS) + static ssize_t amdgpu_securedisplay_debugfs_write(struct file *f, const char __user *buf, size_t size, loff_t *pos) { @@ -162,11 +164,13 @@ static const struct file_operations amdgpu_securedisplay_debugfs_ops = { .llseek = default_llseek }; +#endif + void amdgpu_securedisplay_debugfs_init(struct amdgpu_device *adev) { #if defined(CONFIG_DEBUG_FS) - if (!adev->psp.securedisplay_context.securedisplay_initialized) + if (!adev->psp.securedisplay_context.context.initialized) return; debugfs_create_file("securedisplay_test", S_IWUSR, adev_to_drm(adev)->primary->debugfs_root, diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_smuio.h b/drivers/gpu/drm/amd/amdgpu/amdgpu_smuio.h index b860ec913ac5..484bb3dcec47 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_smuio.h +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_smuio.h @@ -29,6 +29,7 @@ struct amdgpu_smuio_funcs { void (*update_rom_clock_gating)(struct amdgpu_device *adev, bool enable); void (*get_clock_gating_state)(struct amdgpu_device *adev, u32 *flags); u32 (*get_die_id)(struct amdgpu_device *adev); + u32 (*get_socket_id)(struct amdgpu_device *adev); bool (*is_host_gpu_xgmi_supported)(struct amdgpu_device *adev); }; diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_sync.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_sync.c index 4e558632a5d2..862eb3c1c4c5 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_sync.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_sync.c @@ -28,6 +28,8 @@ * Christian König <christian.koenig@amd.com> */ +#include <linux/dma-fence-chain.h> + #include "amdgpu.h" #include "amdgpu_trace.h" #include "amdgpu_amdkfd.h" @@ -186,6 +188,55 @@ int amdgpu_sync_vm_fence(struct amdgpu_sync *sync, struct dma_fence *fence) return amdgpu_sync_fence(sync, fence); } +/* Determine based on the owner and mode if we should sync to a fence or not */ +static bool amdgpu_sync_test_fence(struct amdgpu_device *adev, + enum amdgpu_sync_mode mode, + void *owner, struct dma_fence *f) +{ + void *fence_owner = amdgpu_sync_get_owner(f); + + /* Always sync to moves, no matter what */ + if (fence_owner == AMDGPU_FENCE_OWNER_UNDEFINED) + return true; + + /* We only want to trigger KFD eviction fences on + * evict or move jobs. Skip KFD fences otherwise. + */ + if (fence_owner == AMDGPU_FENCE_OWNER_KFD && + owner != AMDGPU_FENCE_OWNER_UNDEFINED) + return false; + + /* Never sync to VM updates either. */ + if (fence_owner == AMDGPU_FENCE_OWNER_VM && + owner != AMDGPU_FENCE_OWNER_UNDEFINED) + return false; + + /* Ignore fences depending on the sync mode */ + switch (mode) { + case AMDGPU_SYNC_ALWAYS: + return true; + + case AMDGPU_SYNC_NE_OWNER: + if (amdgpu_sync_same_dev(adev, f) && + fence_owner == owner) + return false; + break; + + case AMDGPU_SYNC_EQ_OWNER: + if (amdgpu_sync_same_dev(adev, f) && + fence_owner != owner) + return false; + break; + + case AMDGPU_SYNC_EXPLICIT: + return false; + } + + WARN(debug_evictions && fence_owner == AMDGPU_FENCE_OWNER_KFD, + "Adding eviction fence to sync obj"); + return true; +} + /** * amdgpu_sync_resv - sync to a reservation object * @@ -210,68 +261,35 @@ int amdgpu_sync_resv(struct amdgpu_device *adev, struct amdgpu_sync *sync, return -EINVAL; /* always sync to the exclusive fence */ - f = dma_resv_get_excl(resv); - r = amdgpu_sync_fence(sync, f); + f = dma_resv_excl_fence(resv); + dma_fence_chain_for_each(f, f) { + struct dma_fence_chain *chain = to_dma_fence_chain(f); - flist = dma_resv_get_list(resv); - if (!flist || r) - return r; + if (amdgpu_sync_test_fence(adev, mode, owner, chain ? + chain->fence : f)) { + r = amdgpu_sync_fence(sync, f); + dma_fence_put(f); + if (r) + return r; + break; + } + } - for (i = 0; i < flist->shared_count; ++i) { - void *fence_owner; + flist = dma_resv_shared_list(resv); + if (!flist) + return 0; + for (i = 0; i < flist->shared_count; ++i) { f = rcu_dereference_protected(flist->shared[i], dma_resv_held(resv)); - fence_owner = amdgpu_sync_get_owner(f); - - /* Always sync to moves, no matter what */ - if (fence_owner == AMDGPU_FENCE_OWNER_UNDEFINED) { + if (amdgpu_sync_test_fence(adev, mode, owner, f)) { r = amdgpu_sync_fence(sync, f); if (r) - break; - } - - /* We only want to trigger KFD eviction fences on - * evict or move jobs. Skip KFD fences otherwise. - */ - if (fence_owner == AMDGPU_FENCE_OWNER_KFD && - owner != AMDGPU_FENCE_OWNER_UNDEFINED) - continue; - - /* Never sync to VM updates either. */ - if (fence_owner == AMDGPU_FENCE_OWNER_VM && - owner != AMDGPU_FENCE_OWNER_UNDEFINED) - continue; - - /* Ignore fences depending on the sync mode */ - switch (mode) { - case AMDGPU_SYNC_ALWAYS: - break; - - case AMDGPU_SYNC_NE_OWNER: - if (amdgpu_sync_same_dev(adev, f) && - fence_owner == owner) - continue; - break; - - case AMDGPU_SYNC_EQ_OWNER: - if (amdgpu_sync_same_dev(adev, f) && - fence_owner != owner) - continue; - break; - - case AMDGPU_SYNC_EXPLICIT: - continue; + return r; } - - WARN(debug_evictions && fence_owner == AMDGPU_FENCE_OWNER_KFD, - "Adding eviction fence to sync obj"); - r = amdgpu_sync_fence(sync, f); - if (r) - break; } - return r; + return 0; } /** diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_trace.h b/drivers/gpu/drm/amd/amdgpu/amdgpu_trace.h index 792d20261846..d855cb53c7e0 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_trace.h +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_trace.h @@ -127,8 +127,8 @@ TRACE_EVENT(amdgpu_bo_create, TP_fast_assign( __entry->bo = bo; - __entry->pages = bo->tbo.mem.num_pages; - __entry->type = bo->tbo.mem.mem_type; + __entry->pages = bo->tbo.resource->num_pages; + __entry->type = bo->tbo.resource->mem_type; __entry->prefer = bo->preferred_domains; __entry->allow = bo->allowed_domains; __entry->visible = bo->flags; @@ -176,10 +176,10 @@ TRACE_EVENT(amdgpu_cs_ioctl, TP_fast_assign( __entry->sched_job_id = job->base.id; - __assign_str(timeline, AMDGPU_JOB_GET_TIMELINE_NAME(job)) + __assign_str(timeline, AMDGPU_JOB_GET_TIMELINE_NAME(job)); __entry->context = job->base.s_fence->finished.context; __entry->seqno = job->base.s_fence->finished.seqno; - __assign_str(ring, to_amdgpu_ring(job->base.sched)->name) + __assign_str(ring, to_amdgpu_ring(job->base.sched)->name); __entry->num_ibs = job->num_ibs; ), TP_printk("sched_job=%llu, timeline=%s, context=%u, seqno=%u, ring_name=%s, num_ibs=%u", @@ -201,10 +201,10 @@ TRACE_EVENT(amdgpu_sched_run_job, TP_fast_assign( __entry->sched_job_id = job->base.id; - __assign_str(timeline, AMDGPU_JOB_GET_TIMELINE_NAME(job)) + __assign_str(timeline, AMDGPU_JOB_GET_TIMELINE_NAME(job)); __entry->context = job->base.s_fence->finished.context; __entry->seqno = job->base.s_fence->finished.seqno; - __assign_str(ring, to_amdgpu_ring(job->base.sched)->name) + __assign_str(ring, to_amdgpu_ring(job->base.sched)->name); __entry->num_ibs = job->num_ibs; ), TP_printk("sched_job=%llu, timeline=%s, context=%u, seqno=%u, ring_name=%s, num_ibs=%u", @@ -229,7 +229,7 @@ TRACE_EVENT(amdgpu_vm_grab_id, TP_fast_assign( __entry->pasid = vm->pasid; - __assign_str(ring, ring->name) + __assign_str(ring, ring->name); __entry->vmid = job->vmid; __entry->vm_hub = ring->funcs->vmhub, __entry->pd_addr = job->vm_pd_addr; @@ -424,7 +424,7 @@ TRACE_EVENT(amdgpu_vm_flush, ), TP_fast_assign( - __assign_str(ring, ring->name) + __assign_str(ring, ring->name); __entry->vmid = vmid; __entry->vm_hub = ring->funcs->vmhub; __entry->pd_addr = pd_addr; @@ -525,7 +525,7 @@ TRACE_EVENT(amdgpu_ib_pipe_sync, ), TP_fast_assign( - __assign_str(ring, sched_job->base.sched->name) + __assign_str(ring, sched_job->base.sched->name); __entry->id = sched_job->base.id; __entry->fence = fence; __entry->ctx = fence->context; diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_ttm.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_ttm.c index d5cbc51c5eaa..38dade421d46 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_ttm.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_ttm.c @@ -32,7 +32,6 @@ #include <linux/dma-mapping.h> #include <linux/iommu.h> -#include <linux/hmm.h> #include <linux/pagemap.h> #include <linux/sched/task.h> #include <linux/sched/mm.h> @@ -46,6 +45,7 @@ #include <drm/ttm/ttm_bo_api.h> #include <drm/ttm/ttm_bo_driver.h> #include <drm/ttm/ttm_placement.h> +#include <drm/ttm/ttm_range_manager.h> #include <drm/amdgpu_drm.h> @@ -112,7 +112,22 @@ static void amdgpu_evict_flags(struct ttm_buffer_object *bo, } abo = ttm_to_amdgpu_bo(bo); - switch (bo->mem.mem_type) { + if (abo->flags & AMDGPU_AMDKFD_CREATE_SVM_BO) { + struct dma_fence *fence; + struct dma_resv *resv = &bo->base._resv; + + rcu_read_lock(); + fence = rcu_dereference(resv->fence_excl); + if (fence && !fence->ops->signaled) + dma_fence_enable_sw_signaling(fence); + + placement->num_placement = 0; + placement->num_busy_placement = 0; + rcu_read_unlock(); + return; + } + + switch (bo->resource->mem_type) { case AMDGPU_PL_GDS: case AMDGPU_PL_GWS: case AMDGPU_PL_OA: @@ -134,17 +149,20 @@ static void amdgpu_evict_flags(struct ttm_buffer_object *bo, * BOs to be evicted from VRAM */ amdgpu_bo_placement_from_domain(abo, AMDGPU_GEM_DOMAIN_VRAM | - AMDGPU_GEM_DOMAIN_GTT); + AMDGPU_GEM_DOMAIN_GTT | + AMDGPU_GEM_DOMAIN_CPU); abo->placements[0].fpfn = adev->gmc.visible_vram_size >> PAGE_SHIFT; abo->placements[0].lpfn = 0; abo->placement.busy_placement = &abo->placements[1]; abo->placement.num_busy_placement = 1; } else { /* Move to GTT memory */ - amdgpu_bo_placement_from_domain(abo, AMDGPU_GEM_DOMAIN_GTT); + amdgpu_bo_placement_from_domain(abo, AMDGPU_GEM_DOMAIN_GTT | + AMDGPU_GEM_DOMAIN_CPU); } break; case TTM_PL_TT: + case AMDGPU_PL_PREEMPT: default: amdgpu_bo_placement_from_domain(abo, AMDGPU_GEM_DOMAIN_CPU); break; @@ -153,32 +171,6 @@ static void amdgpu_evict_flags(struct ttm_buffer_object *bo, } /** - * amdgpu_verify_access - Verify access for a mmap call - * - * @bo: The buffer object to map - * @filp: The file pointer from the process performing the mmap - * - * This is called by ttm_bo_mmap() to verify whether a process - * has the right to mmap a BO to their process space. - */ -static int amdgpu_verify_access(struct ttm_buffer_object *bo, struct file *filp) -{ - struct amdgpu_bo *abo = ttm_to_amdgpu_bo(bo); - - /* - * Don't verify access for KFD BOs. They don't have a GEM - * object associated with them. - */ - if (abo->kfd_bo) - return 0; - - if (amdgpu_ttm_tt_get_usermm(bo->ttm)) - return -EPERM; - return drm_vma_node_verify_access(&abo->tbo.base.vma_node, - filp->private_data); -} - -/** * amdgpu_ttm_map_buffer - Map memory into the GART windows * @bo: buffer object to map * @mem: memory object to map @@ -211,6 +203,7 @@ static int amdgpu_ttm_map_buffer(struct ttm_buffer_object *bo, BUG_ON(adev->mman.buffer_funcs->copy_max_bytes < AMDGPU_GTT_MAX_TRANSFER_SIZE * 8); + BUG_ON(mem->mem_type == AMDGPU_PL_PREEMPT); /* Map only what can't be accessed directly */ if (!tmz && mem->start != AMDGPU_BO_INVALID_OFFSET) { @@ -288,7 +281,7 @@ error_free: } /** - * amdgpu_copy_ttm_mem_to_mem - Helper function for copy + * amdgpu_ttm_copy_mem_to_mem - Helper function for copy * @adev: amdgpu device * @src: buffer/address where to read from * @dst: buffer/address where to write to @@ -471,10 +464,11 @@ static int amdgpu_bo_move(struct ttm_buffer_object *bo, bool evict, { struct amdgpu_device *adev; struct amdgpu_bo *abo; - struct ttm_resource *old_mem = &bo->mem; + struct ttm_resource *old_mem = bo->resource; int r; - if (new_mem->mem_type == TTM_PL_TT) { + if (new_mem->mem_type == TTM_PL_TT || + new_mem->mem_type == AMDGPU_PL_PREEMPT) { r = amdgpu_ttm_backend_bind(bo->bdev, bo->ttm, new_mem); if (r) return r; @@ -492,18 +486,20 @@ static int amdgpu_bo_move(struct ttm_buffer_object *bo, bool evict, goto out; } if (old_mem->mem_type == TTM_PL_SYSTEM && - new_mem->mem_type == TTM_PL_TT) { + (new_mem->mem_type == TTM_PL_TT || + new_mem->mem_type == AMDGPU_PL_PREEMPT)) { ttm_bo_move_null(bo, new_mem); goto out; } - if (old_mem->mem_type == TTM_PL_TT && + if ((old_mem->mem_type == TTM_PL_TT || + old_mem->mem_type == AMDGPU_PL_PREEMPT) && new_mem->mem_type == TTM_PL_SYSTEM) { r = ttm_bo_wait_ctx(bo, ctx); if (r) return r; amdgpu_ttm_backend_unbind(bo->bdev, bo->ttm); - ttm_resource_free(bo, &bo->mem); + ttm_resource_free(bo, &bo->resource); ttm_bo_assign_mem(bo, new_mem); goto out; } @@ -527,7 +523,7 @@ static int amdgpu_bo_move(struct ttm_buffer_object *bo, bool evict, hop->fpfn = 0; hop->lpfn = 0; hop->mem_type = TTM_PL_TT; - hop->flags = 0; + hop->flags = TTM_PL_FLAG_TEMPORARY; return -EMULTIHOP; } @@ -570,10 +566,10 @@ out: * * Called by ttm_mem_io_reserve() ultimately via ttm_bo_vm_fault() */ -static int amdgpu_ttm_io_mem_reserve(struct ttm_device *bdev, struct ttm_resource *mem) +static int amdgpu_ttm_io_mem_reserve(struct ttm_device *bdev, + struct ttm_resource *mem) { struct amdgpu_device *adev = amdgpu_ttm_adev(bdev); - struct drm_mm_node *mm_node = mem->mm_node; size_t bus_size = (size_t)mem->num_pages << PAGE_SHIFT; switch (mem->mem_type) { @@ -581,27 +577,21 @@ static int amdgpu_ttm_io_mem_reserve(struct ttm_device *bdev, struct ttm_resourc /* system memory */ return 0; case TTM_PL_TT: + case AMDGPU_PL_PREEMPT: break; case TTM_PL_VRAM: mem->bus.offset = mem->start << PAGE_SHIFT; /* check if it's visible */ if ((mem->bus.offset + bus_size) > adev->gmc.visible_vram_size) return -EINVAL; - /* Only physically contiguous buffers apply. In a contiguous - * buffer, size of the first mm_node would match the number of - * pages in ttm_resource. - */ + if (adev->mman.aper_base_kaddr && - (mm_node->size == mem->num_pages)) + mem->placement & TTM_PL_FLAG_CONTIGUOUS) mem->bus.addr = (u8 *)adev->mman.aper_base_kaddr + mem->bus.offset; mem->bus.offset += adev->gmc.aper_base; mem->bus.is_iomem = true; - if (adev->gmc.xgmi.connected_to_cpu) - mem->bus.caching = ttm_cached; - else - mem->bus.caching = ttm_write_combined; break; default: return -EINVAL; @@ -615,7 +605,8 @@ static unsigned long amdgpu_ttm_io_mem_pfn(struct ttm_buffer_object *bo, struct amdgpu_device *adev = amdgpu_ttm_adev(bo->bdev); struct amdgpu_res_cursor cursor; - amdgpu_res_first(&bo->mem, (u64)page_offset << PAGE_SHIFT, 0, &cursor); + amdgpu_res_first(bo->resource, (u64)page_offset << PAGE_SHIFT, 0, + &cursor); return (adev->gmc.aper_base + cursor.start) >> PAGE_SHIFT; } @@ -670,10 +661,8 @@ int amdgpu_ttm_tt_get_user_pages(struct amdgpu_bo *bo, struct page **pages) struct amdgpu_ttm_tt *gtt = (void *)ttm; unsigned long start = gtt->userptr; struct vm_area_struct *vma; - struct hmm_range *range; - unsigned long timeout; struct mm_struct *mm; - unsigned long i; + bool readonly; int r = 0; mm = bo->notifier.mm; @@ -689,28 +678,9 @@ int amdgpu_ttm_tt_get_user_pages(struct amdgpu_bo *bo, struct page **pages) if (!mmget_not_zero(mm)) /* Happens during process shutdown */ return -ESRCH; - range = kzalloc(sizeof(*range), GFP_KERNEL); - if (unlikely(!range)) { - r = -ENOMEM; - goto out; - } - range->notifier = &bo->notifier; - range->start = bo->notifier.interval_tree.start; - range->end = bo->notifier.interval_tree.last + 1; - range->default_flags = HMM_PFN_REQ_FAULT; - if (!amdgpu_ttm_tt_is_readonly(ttm)) - range->default_flags |= HMM_PFN_REQ_WRITE; - - range->hmm_pfns = kvmalloc_array(ttm->num_pages, - sizeof(*range->hmm_pfns), GFP_KERNEL); - if (unlikely(!range->hmm_pfns)) { - r = -ENOMEM; - goto out_free_ranges; - } - mmap_read_lock(mm); - vma = find_vma(mm, start); - if (unlikely(!vma || start < vma->vm_start)) { + vma = vma_lookup(mm, start); + if (unlikely(!vma)) { r = -EFAULT; goto out_unlock; } @@ -719,46 +689,15 @@ int amdgpu_ttm_tt_get_user_pages(struct amdgpu_bo *bo, struct page **pages) r = -EPERM; goto out_unlock; } - mmap_read_unlock(mm); - timeout = jiffies + msecs_to_jiffies(HMM_RANGE_DEFAULT_TIMEOUT); - -retry: - range->notifier_seq = mmu_interval_read_begin(&bo->notifier); - - mmap_read_lock(mm); - r = hmm_range_fault(range); - mmap_read_unlock(mm); - if (unlikely(r)) { - /* - * FIXME: This timeout should encompass the retry from - * mmu_interval_read_retry() as well. - */ - if (r == -EBUSY && !time_after(jiffies, timeout)) - goto retry; - goto out_free_pfns; - } - - /* - * Due to default_flags, all pages are HMM_PFN_VALID or - * hmm_range_fault() fails. FIXME: The pages cannot be touched outside - * the notifier_lock, and mmu_interval_read_retry() must be done first. - */ - for (i = 0; i < ttm->num_pages; i++) - pages[i] = hmm_pfn_to_page(range->hmm_pfns[i]); - - gtt->range = range; - mmput(mm); - - return 0; + readonly = amdgpu_ttm_tt_is_readonly(ttm); + r = amdgpu_hmm_range_get_pages(&bo->notifier, mm, pages, start, + ttm->num_pages, >t->range, readonly, + true, NULL); out_unlock: mmap_read_unlock(mm); -out_free_pfns: - kvfree(range->hmm_pfns); -out_free_ranges: - kfree(range); -out: mmput(mm); + return r; } @@ -787,10 +726,7 @@ bool amdgpu_ttm_tt_get_user_pages_done(struct ttm_tt *ttm) * FIXME: Must always hold notifier_lock for this, and must * not ignore the return code. */ - r = mmu_interval_read_retry(gtt->range->notifier, - gtt->range->notifier_seq); - kvfree(gtt->range->hmm_pfns); - kfree(gtt->range); + r = amdgpu_hmm_range_get_pages_done(gtt->range); gtt->range = NULL; } @@ -903,7 +839,7 @@ static int amdgpu_ttm_gart_bind(struct amdgpu_device *adev, uint64_t page_idx = 1; r = amdgpu_gart_bind(adev, gtt->offset, page_idx, - ttm->pages, gtt->ttm.dma_address, flags); + gtt->ttm.dma_address, flags); if (r) goto gart_bind_fail; @@ -917,11 +853,10 @@ static int amdgpu_ttm_gart_bind(struct amdgpu_device *adev, r = amdgpu_gart_bind(adev, gtt->offset + (page_idx << PAGE_SHIFT), ttm->num_pages - page_idx, - &ttm->pages[page_idx], &(gtt->ttm.dma_address[page_idx]), flags); } else { r = amdgpu_gart_bind(adev, gtt->offset, ttm->num_pages, - ttm->pages, gtt->ttm.dma_address, flags); + gtt->ttm.dma_address, flags); } gart_bind_fail: @@ -959,7 +894,23 @@ static int amdgpu_ttm_backend_bind(struct ttm_device *bdev, DRM_ERROR("failed to pin userptr\n"); return r; } + } else if (ttm->page_flags & TTM_PAGE_FLAG_SG) { + if (!ttm->sg) { + struct dma_buf_attachment *attach; + struct sg_table *sgt; + + attach = gtt->gobj->import_attach; + sgt = dma_buf_map_attachment(attach, DMA_BIDIRECTIONAL); + if (IS_ERR(sgt)) + return PTR_ERR(sgt); + + ttm->sg = sgt; + } + + drm_prime_sg_to_dma_addr_array(ttm->sg, gtt->ttm.dma_address, + ttm->num_pages); } + if (!ttm->num_pages) { WARN(1, "nothing to bind %u pages for mreg %p back %p!\n", ttm->num_pages, bo_mem, ttm); @@ -970,7 +921,8 @@ static int amdgpu_ttm_backend_bind(struct ttm_device *bdev, bo_mem->mem_type == AMDGPU_PL_OA) return -EINVAL; - if (!amdgpu_gtt_mgr_has_gart_addr(bo_mem)) { + if (bo_mem->mem_type != TTM_PL_TT || + !amdgpu_gtt_mgr_has_gart_addr(bo_mem)) { gtt->offset = AMDGPU_BO_INVALID_OFFSET; return 0; } @@ -981,7 +933,7 @@ static int amdgpu_ttm_backend_bind(struct ttm_device *bdev, /* bind pages into GART page tables */ gtt->offset = (u64)bo_mem->start << PAGE_SHIFT; r = amdgpu_gart_bind(adev, gtt->offset, ttm->num_pages, - ttm->pages, gtt->ttm.dma_address, flags); + gtt->ttm.dma_address, flags); if (r) DRM_ERROR("failed to bind %u pages at 0x%08llX\n", @@ -1003,51 +955,50 @@ int amdgpu_ttm_alloc_gart(struct ttm_buffer_object *bo) struct amdgpu_device *adev = amdgpu_ttm_adev(bo->bdev); struct ttm_operation_ctx ctx = { false, false }; struct amdgpu_ttm_tt *gtt = (void *)bo->ttm; - struct ttm_resource tmp; struct ttm_placement placement; struct ttm_place placements; + struct ttm_resource *tmp; uint64_t addr, flags; int r; - if (bo->mem.start != AMDGPU_BO_INVALID_OFFSET) + if (bo->resource->start != AMDGPU_BO_INVALID_OFFSET) return 0; addr = amdgpu_gmc_agp_addr(bo); if (addr != AMDGPU_BO_INVALID_OFFSET) { - bo->mem.start = addr >> PAGE_SHIFT; - } else { + bo->resource->start = addr >> PAGE_SHIFT; + return 0; + } - /* allocate GART space */ - tmp = bo->mem; - tmp.mm_node = NULL; - placement.num_placement = 1; - placement.placement = &placements; - placement.num_busy_placement = 1; - placement.busy_placement = &placements; - placements.fpfn = 0; - placements.lpfn = adev->gmc.gart_size >> PAGE_SHIFT; - placements.mem_type = TTM_PL_TT; - placements.flags = bo->mem.placement; - - r = ttm_bo_mem_space(bo, &placement, &tmp, &ctx); - if (unlikely(r)) - return r; + /* allocate GART space */ + placement.num_placement = 1; + placement.placement = &placements; + placement.num_busy_placement = 1; + placement.busy_placement = &placements; + placements.fpfn = 0; + placements.lpfn = adev->gmc.gart_size >> PAGE_SHIFT; + placements.mem_type = TTM_PL_TT; + placements.flags = bo->resource->placement; - /* compute PTE flags for this buffer object */ - flags = amdgpu_ttm_tt_pte_flags(adev, bo->ttm, &tmp); + r = ttm_bo_mem_space(bo, &placement, &tmp, &ctx); + if (unlikely(r)) + return r; - /* Bind pages */ - gtt->offset = (u64)tmp.start << PAGE_SHIFT; - r = amdgpu_ttm_gart_bind(adev, bo, flags); - if (unlikely(r)) { - ttm_resource_free(bo, &tmp); - return r; - } + /* compute PTE flags for this buffer object */ + flags = amdgpu_ttm_tt_pte_flags(adev, bo->ttm, tmp); - ttm_resource_free(bo, &bo->mem); - bo->mem = tmp; + /* Bind pages */ + gtt->offset = (u64)tmp->start << PAGE_SHIFT; + r = amdgpu_ttm_gart_bind(adev, bo, flags); + if (unlikely(r)) { + ttm_resource_free(bo, &tmp); + return r; } + amdgpu_gart_invalidate_tlb(adev); + ttm_resource_free(bo, &bo->resource); + ttm_bo_assign_mem(bo, tmp); + return 0; } @@ -1066,7 +1017,7 @@ int amdgpu_ttm_recover_gart(struct ttm_buffer_object *tbo) if (!tbo->ttm) return 0; - flags = amdgpu_ttm_tt_pte_flags(adev, tbo->ttm, &tbo->mem); + flags = amdgpu_ttm_tt_pte_flags(adev, tbo->ttm, tbo->resource); r = amdgpu_ttm_gart_bind(adev, tbo, flags); return r; @@ -1086,8 +1037,15 @@ static void amdgpu_ttm_backend_unbind(struct ttm_device *bdev, int r; /* if the pages have userptr pinning then clear that first */ - if (gtt->userptr) + if (gtt->userptr) { amdgpu_ttm_tt_unpin_userptr(bdev, ttm); + } else if (ttm->sg && gtt->gobj->import_attach) { + struct dma_buf_attachment *attach; + + attach = gtt->gobj->import_attach; + dma_buf_unmap_attachment(attach, ttm->sg, DMA_BIDIRECTIONAL); + ttm->sg = NULL; + } if (!gtt->bound) return; @@ -1165,32 +1123,15 @@ static int amdgpu_ttm_tt_populate(struct ttm_device *bdev, struct amdgpu_ttm_tt *gtt = (void *)ttm; /* user pages are bound by amdgpu_ttm_tt_pin_userptr() */ - if (gtt && gtt->userptr) { + if (gtt->userptr) { ttm->sg = kzalloc(sizeof(struct sg_table), GFP_KERNEL); if (!ttm->sg) return -ENOMEM; - - ttm->page_flags |= TTM_PAGE_FLAG_SG; return 0; } - if (ttm->page_flags & TTM_PAGE_FLAG_SG) { - if (!ttm->sg) { - struct dma_buf_attachment *attach; - struct sg_table *sgt; - - attach = gtt->gobj->import_attach; - sgt = dma_buf_map_attachment(attach, DMA_BIDIRECTIONAL); - if (IS_ERR(sgt)) - return PTR_ERR(sgt); - - ttm->sg = sgt; - } - - drm_prime_sg_to_dma_addr_array(ttm->sg, gtt->ttm.dma_address, - ttm->num_pages); + if (ttm->page_flags & TTM_PAGE_FLAG_SG) return 0; - } return ttm_pool_alloc(&adev->mman.bdev.pool, ttm, ctx); } @@ -1207,20 +1148,10 @@ static void amdgpu_ttm_tt_unpopulate(struct ttm_device *bdev, struct amdgpu_ttm_tt *gtt = (void *)ttm; struct amdgpu_device *adev; - if (gtt && gtt->userptr) { + if (gtt->userptr) { amdgpu_ttm_tt_set_user_pages(ttm, NULL); kfree(ttm->sg); ttm->sg = NULL; - ttm->page_flags &= ~TTM_PAGE_FLAG_SG; - return; - } - - if (ttm->sg && gtt->gobj->import_attach) { - struct dma_buf_attachment *attach; - - attach = gtt->gobj->import_attach; - dma_buf_unmap_attachment(attach, ttm->sg, DMA_BIDIRECTIONAL); - ttm->sg = NULL; return; } @@ -1254,6 +1185,9 @@ int amdgpu_ttm_tt_set_userptr(struct ttm_buffer_object *bo, return -ENOMEM; } + /* Set TTM_PAGE_FLAG_SG before populate but after create. */ + bo->ttm->page_flags |= TTM_PAGE_FLAG_SG; + gtt = (void *)bo->ttm; gtt->userptr = addr; gtt->userflags = flags; @@ -1347,7 +1281,8 @@ uint64_t amdgpu_ttm_tt_pde_flags(struct ttm_tt *ttm, struct ttm_resource *mem) if (mem && mem->mem_type != TTM_PL_SYSTEM) flags |= AMDGPU_PTE_VALID; - if (mem && mem->mem_type == TTM_PL_TT) { + if (mem && (mem->mem_type == TTM_PL_TT || + mem->mem_type == AMDGPU_PL_PREEMPT)) { flags |= AMDGPU_PTE_SYSTEM; if (ttm->caching == ttm_cached) @@ -1396,12 +1331,16 @@ uint64_t amdgpu_ttm_tt_pte_flags(struct amdgpu_device *adev, struct ttm_tt *ttm, static bool amdgpu_ttm_bo_eviction_valuable(struct ttm_buffer_object *bo, const struct ttm_place *place) { - unsigned long num_pages = bo->mem.num_pages; + unsigned long num_pages = bo->resource->num_pages; struct amdgpu_res_cursor cursor; struct dma_resv_list *flist; struct dma_fence *f; int i; + /* Swapout? */ + if (bo->resource->mem_type == TTM_PL_SYSTEM) + return true; + if (bo->type == ttm_bo_type_kernel && !amdgpu_vm_evictable(ttm_to_amdgpu_bo(bo))) return false; @@ -1410,7 +1349,7 @@ static bool amdgpu_ttm_bo_eviction_valuable(struct ttm_buffer_object *bo, * If true, then return false as any KFD process needs all its BOs to * be resident to run successfully */ - flist = dma_resv_get_list(bo->base.resv); + flist = dma_resv_shared_list(bo->base.resv); if (flist) { for (i = 0; i < flist->shared_count; ++i) { f = rcu_dereference_protected(flist->shared[i], @@ -1420,7 +1359,16 @@ static bool amdgpu_ttm_bo_eviction_valuable(struct ttm_buffer_object *bo, } } - switch (bo->mem.mem_type) { + switch (bo->resource->mem_type) { + case AMDGPU_PL_PREEMPT: + /* Preemptible BOs don't own system resources managed by the + * driver (pages, VRAM, GART space). They point to resources + * owned by someone else (e.g. pageable memory in user mode + * or a DMABuf). They are used in a preemptible context so we + * can guarantee no deadlocks and good QoS in case of MMU + * notifiers or DMABuf move notifiers from the resource owner. + */ + return false; case TTM_PL_TT: if (amdgpu_bo_is_amdgpu_bo(bo) && amdgpu_bo_encrypted(ttm_to_amdgpu_bo(bo))) @@ -1429,7 +1377,7 @@ static bool amdgpu_ttm_bo_eviction_valuable(struct ttm_buffer_object *bo, case TTM_PL_VRAM: /* Check each drm MM node individually */ - amdgpu_res_first(&bo->mem, 0, (u64)num_pages << PAGE_SHIFT, + amdgpu_res_first(bo->resource, 0, (u64)num_pages << PAGE_SHIFT, &cursor); while (cursor.remaining) { if (place->fpfn < PFN_DOWN(cursor.start + cursor.size) @@ -1448,6 +1396,41 @@ static bool amdgpu_ttm_bo_eviction_valuable(struct ttm_buffer_object *bo, return ttm_bo_eviction_valuable(bo, place); } +static void amdgpu_ttm_vram_mm_access(struct amdgpu_device *adev, loff_t pos, + void *buf, size_t size, bool write) +{ + while (size) { + uint64_t aligned_pos = ALIGN_DOWN(pos, 4); + uint64_t bytes = 4 - (pos & 0x3); + uint32_t shift = (pos & 0x3) * 8; + uint32_t mask = 0xffffffff << shift; + uint32_t value = 0; + + if (size < bytes) { + mask &= 0xffffffff >> (bytes - size) * 8; + bytes = size; + } + + if (mask != 0xffffffff) { + amdgpu_device_mm_access(adev, aligned_pos, &value, 4, false); + if (write) { + value &= ~mask; + value |= (*(uint32_t *)buf << shift) & mask; + amdgpu_device_mm_access(adev, aligned_pos, &value, 4, true); + } else { + value = (value & mask) >> shift; + memcpy(buf, &value, bytes); + } + } else { + amdgpu_device_mm_access(adev, aligned_pos, buf, 4, write); + } + + pos += bytes; + buf += bytes; + size -= bytes; + } +} + /** * amdgpu_ttm_access_memory - Read or Write memory that backs a buffer object. * @@ -1467,50 +1450,28 @@ static int amdgpu_ttm_access_memory(struct ttm_buffer_object *bo, struct amdgpu_bo *abo = ttm_to_amdgpu_bo(bo); struct amdgpu_device *adev = amdgpu_ttm_adev(abo->tbo.bdev); struct amdgpu_res_cursor cursor; - unsigned long flags; - uint32_t value = 0; int ret = 0; - if (bo->mem.mem_type != TTM_PL_VRAM) + if (bo->resource->mem_type != TTM_PL_VRAM) return -EIO; - amdgpu_res_first(&bo->mem, offset, len, &cursor); + amdgpu_res_first(bo->resource, offset, len, &cursor); while (cursor.remaining) { - uint64_t aligned_pos = cursor.start & ~(uint64_t)3; - uint64_t bytes = 4 - (cursor.start & 3); - uint32_t shift = (cursor.start & 3) * 8; - uint32_t mask = 0xffffffff << shift; - - if (cursor.size < bytes) { - mask &= 0xffffffff >> (bytes - cursor.size) * 8; - bytes = cursor.size; + size_t count, size = cursor.size; + loff_t pos = cursor.start; + + count = amdgpu_device_aper_access(adev, pos, buf, size, write); + size -= count; + if (size) { + /* using MM to access rest vram and handle un-aligned address */ + pos += count; + buf += count; + amdgpu_ttm_vram_mm_access(adev, pos, buf, size, write); } - if (mask != 0xffffffff) { - spin_lock_irqsave(&adev->mmio_idx_lock, flags); - WREG32_NO_KIQ(mmMM_INDEX, ((uint32_t)aligned_pos) | 0x80000000); - WREG32_NO_KIQ(mmMM_INDEX_HI, aligned_pos >> 31); - value = RREG32_NO_KIQ(mmMM_DATA); - if (write) { - value &= ~mask; - value |= (*(uint32_t *)buf << shift) & mask; - WREG32_NO_KIQ(mmMM_DATA, value); - } - spin_unlock_irqrestore(&adev->mmio_idx_lock, flags); - if (!write) { - value = (value & mask) >> shift; - memcpy(buf, &value, bytes); - } - } else { - bytes = cursor.size & ~0x3ULL; - amdgpu_device_vram_access(adev, cursor.start, - (uint32_t *)buf, bytes, - write); - } - - ret += bytes; - buf = (uint8_t *)buf + bytes; - amdgpu_res_next(&cursor, bytes); + ret += cursor.size; + buf += cursor.size; + amdgpu_res_next(&cursor, cursor.size); } return ret; @@ -1530,7 +1491,6 @@ static struct ttm_device_funcs amdgpu_bo_driver = { .eviction_valuable = amdgpu_ttm_bo_eviction_valuable, .evict_flags = &amdgpu_evict_flags, .move = &amdgpu_bo_move, - .verify_access = &amdgpu_verify_access, .delete_mem_notify = &amdgpu_bo_delete_mem_notify, .release_notify = &amdgpu_bo_release_notify, .io_mem_reserve = &amdgpu_ttm_io_mem_reserve, @@ -1633,11 +1593,8 @@ static int amdgpu_ttm_reserve_tmr(struct amdgpu_device *adev) bool mem_train_support = false; if (!amdgpu_sriov_vf(adev)) { - ret = amdgpu_mem_train_support(adev); - if (ret == 1) + if (amdgpu_atomfirmware_mem_training_supported(adev)) mem_train_support = true; - else if (ret == -1) - return -EINVAL; else DRM_DEBUG("memory training does not support!\n"); } @@ -1779,6 +1736,13 @@ int amdgpu_ttm_init(struct amdgpu_device *adev) NULL); if (r) return r; + r = amdgpu_bo_create_kernel_at(adev, adev->mman.stolen_reserved_offset, + adev->mman.stolen_reserved_size, + AMDGPU_GEM_DOMAIN_VRAM, + &adev->mman.stolen_reserved_memory, + NULL); + if (r) + return r; DRM_INFO("amdgpu: %uM of VRAM memory ready\n", (unsigned) (adev->gmc.real_vram_size / (1024 * 1024))); @@ -1805,6 +1769,13 @@ int amdgpu_ttm_init(struct amdgpu_device *adev) DRM_INFO("amdgpu: %uM of GTT memory ready.\n", (unsigned)(gtt_size / (1024 * 1024))); + /* Initialize preemptible memory pool */ + r = amdgpu_preempt_mgr_init(adev); + if (r) { + DRM_ERROR("Failed initializing PREEMPT heap.\n"); + return r; + } + /* Initialize various on-chip memory pools */ r = amdgpu_ttm_init_on_chip(adev, AMDGPU_PL_GDS, adev->gds.gds_size); if (r) { @@ -1841,14 +1812,14 @@ void amdgpu_ttm_fini(struct amdgpu_device *adev) amdgpu_bo_free_kernel(&adev->mman.stolen_extended_memory, NULL, NULL); /* return the IP Discovery TMR memory back to VRAM */ amdgpu_bo_free_kernel(&adev->mman.discovery_memory, NULL, NULL); + if (adev->mman.stolen_reserved_size) + amdgpu_bo_free_kernel(&adev->mman.stolen_reserved_memory, + NULL, NULL); amdgpu_ttm_fw_reserve_vram_fini(adev); - if (adev->mman.aper_base_kaddr) - iounmap(adev->mman.aper_base_kaddr); - adev->mman.aper_base_kaddr = NULL; - amdgpu_vram_mgr_fini(adev); amdgpu_gtt_mgr_fini(adev); + amdgpu_preempt_mgr_fini(adev); ttm_range_man_fini(&adev->mman.bdev, AMDGPU_PL_GDS); ttm_range_man_fini(&adev->mman.bdev, AMDGPU_PL_GWS); ttm_range_man_fini(&adev->mman.bdev, AMDGPU_PL_OA); @@ -1905,50 +1876,6 @@ void amdgpu_ttm_set_buffer_funcs_status(struct amdgpu_device *adev, bool enable) adev->mman.buffer_funcs_enabled = enable; } -static vm_fault_t amdgpu_ttm_fault(struct vm_fault *vmf) -{ - struct ttm_buffer_object *bo = vmf->vma->vm_private_data; - vm_fault_t ret; - - ret = ttm_bo_vm_reserve(bo, vmf); - if (ret) - return ret; - - ret = amdgpu_bo_fault_reserve_notify(bo); - if (ret) - goto unlock; - - ret = ttm_bo_vm_fault_reserved(vmf, vmf->vma->vm_page_prot, - TTM_BO_VM_NUM_PREFAULT, 1); - if (ret == VM_FAULT_RETRY && !(vmf->flags & FAULT_FLAG_RETRY_NOWAIT)) - return ret; - -unlock: - dma_resv_unlock(bo->base.resv); - return ret; -} - -static const struct vm_operations_struct amdgpu_ttm_vm_ops = { - .fault = amdgpu_ttm_fault, - .open = ttm_bo_vm_open, - .close = ttm_bo_vm_close, - .access = ttm_bo_vm_access -}; - -int amdgpu_mmap(struct file *filp, struct vm_area_struct *vma) -{ - struct drm_file *file_priv = filp->private_data; - struct amdgpu_device *adev = drm_to_adev(file_priv->minor->dev); - int r; - - r = ttm_bo_mmap(filp, vma, &adev->mman.bdev); - if (unlikely(r != 0)) - return r; - - vma->vm_ops = &amdgpu_ttm_vm_ops; - return 0; -} - int amdgpu_copy_buffer(struct amdgpu_ring *ring, uint64_t src_offset, uint64_t dst_offset, uint32_t byte_count, struct dma_resv *resv, @@ -2043,16 +1970,21 @@ int amdgpu_fill_buffer(struct amdgpu_bo *bo, return -EINVAL; } - if (bo->tbo.mem.mem_type == TTM_PL_TT) { + if (bo->tbo.resource->mem_type == AMDGPU_PL_PREEMPT) { + DRM_ERROR("Trying to clear preemptible memory.\n"); + return -EINVAL; + } + + if (bo->tbo.resource->mem_type == TTM_PL_TT) { r = amdgpu_ttm_alloc_gart(&bo->tbo); if (r) return r; } - num_bytes = bo->tbo.mem.num_pages << PAGE_SHIFT; + num_bytes = bo->tbo.resource->num_pages << PAGE_SHIFT; num_loops = 0; - amdgpu_res_first(&bo->tbo.mem, 0, num_bytes, &cursor); + amdgpu_res_first(bo->tbo.resource, 0, num_bytes, &cursor); while (cursor.remaining) { num_loops += DIV_ROUND_UP_ULL(cursor.size, max_bytes); amdgpu_res_next(&cursor, cursor.size); @@ -2077,12 +2009,13 @@ int amdgpu_fill_buffer(struct amdgpu_bo *bo, } } - amdgpu_res_first(&bo->tbo.mem, 0, num_bytes, &cursor); + amdgpu_res_first(bo->tbo.resource, 0, num_bytes, &cursor); while (cursor.remaining) { uint32_t cur_size = min_t(uint64_t, cursor.size, max_bytes); uint64_t dst_addr = cursor.start; - dst_addr += amdgpu_ttm_domain_start(adev, bo->tbo.mem.mem_type); + dst_addr += amdgpu_ttm_domain_start(adev, + bo->tbo.resource->mem_type); amdgpu_emit_fill_buffer(adev, &job->ibs[0], src_data, dst_addr, cur_size); @@ -2228,7 +2161,6 @@ static ssize_t amdgpu_ttm_vram_write(struct file *f, const char __user *buf, return -ENXIO; while (size) { - unsigned long flags; uint32_t value; if (*pos >= adev->gmc.mc_vram_size) @@ -2238,11 +2170,7 @@ static ssize_t amdgpu_ttm_vram_write(struct file *f, const char __user *buf, if (r) return r; - spin_lock_irqsave(&adev->mmio_idx_lock, flags); - WREG32_NO_KIQ(mmMM_INDEX, ((uint32_t)*pos) | 0x80000000); - WREG32_NO_KIQ(mmMM_INDEX_HI, *pos >> 31); - WREG32_NO_KIQ(mmMM_DATA, value); - spin_unlock_irqrestore(&adev->mmio_idx_lock, flags); + amdgpu_device_mm_access(adev, *pos, &value, 4, true); result += 4; buf += 4; diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_ttm.h b/drivers/gpu/drm/amd/amdgpu/amdgpu_ttm.h index 9e38475e0f8d..3205fd520060 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_ttm.h +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_ttm.h @@ -31,17 +31,13 @@ #define AMDGPU_PL_GDS (TTM_PL_PRIV + 0) #define AMDGPU_PL_GWS (TTM_PL_PRIV + 1) #define AMDGPU_PL_OA (TTM_PL_PRIV + 2) +#define AMDGPU_PL_PREEMPT (TTM_PL_PRIV + 3) #define AMDGPU_GTT_MAX_TRANSFER_SIZE 512 #define AMDGPU_GTT_NUM_TRANSFER_WINDOWS 2 #define AMDGPU_POISON 0xd0bed0be -struct amdgpu_vram_reservation { - struct list_head node; - struct drm_mm_node mm_node; -}; - struct amdgpu_vram_mgr { struct ttm_resource_manager manager; struct drm_mm mm; @@ -56,7 +52,12 @@ struct amdgpu_gtt_mgr { struct ttm_resource_manager manager; struct drm_mm mm; spinlock_t lock; - atomic64_t available; + atomic64_t used; +}; + +struct amdgpu_preempt_mgr { + struct ttm_resource_manager manager; + atomic64_t used; }; struct amdgpu_mman { @@ -75,6 +76,7 @@ struct amdgpu_mman { struct amdgpu_vram_mgr vram_mgr; struct amdgpu_gtt_mgr gtt_mgr; + struct amdgpu_preempt_mgr preempt_mgr; uint64_t stolen_vga_size; struct amdgpu_bo *stolen_vga_memory; @@ -82,6 +84,10 @@ struct amdgpu_mman { struct amdgpu_bo *stolen_extended_memory; bool keep_stolen_vga_memory; + struct amdgpu_bo *stolen_reserved_memory; + uint64_t stolen_reserved_offset; + uint64_t stolen_reserved_size; + /* discovery */ uint8_t *discovery_bin; uint32_t discovery_tmr_size; @@ -102,6 +108,8 @@ struct amdgpu_copy_mem { int amdgpu_gtt_mgr_init(struct amdgpu_device *adev, uint64_t gtt_size); void amdgpu_gtt_mgr_fini(struct amdgpu_device *adev); +int amdgpu_preempt_mgr_init(struct amdgpu_device *adev); +void amdgpu_preempt_mgr_fini(struct amdgpu_device *adev); int amdgpu_vram_mgr_init(struct amdgpu_device *adev); void amdgpu_vram_mgr_fini(struct amdgpu_device *adev); @@ -109,6 +117,8 @@ bool amdgpu_gtt_mgr_has_gart_addr(struct ttm_resource *mem); uint64_t amdgpu_gtt_mgr_usage(struct ttm_resource_manager *man); int amdgpu_gtt_mgr_recover(struct ttm_resource_manager *man); +uint64_t amdgpu_preempt_mgr_usage(struct ttm_resource_manager *man); + u64 amdgpu_vram_mgr_bo_visible_size(struct amdgpu_bo *bo); int amdgpu_vram_mgr_alloc_sgt(struct amdgpu_device *adev, struct ttm_resource *mem, @@ -147,7 +157,6 @@ int amdgpu_fill_buffer(struct amdgpu_bo *bo, struct dma_resv *resv, struct dma_fence **fence); -int amdgpu_mmap(struct file *filp, struct vm_area_struct *vma); int amdgpu_ttm_alloc_gart(struct ttm_buffer_object *bo); int amdgpu_ttm_recover_gart(struct ttm_buffer_object *tbo); uint64_t amdgpu_ttm_domain_start(struct amdgpu_device *adev, uint32_t type); diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_ucode.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_ucode.c index 9733224117e3..abd8469380e5 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_ucode.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_ucode.c @@ -257,36 +257,36 @@ void amdgpu_ucode_print_psp_hdr(const struct common_firmware_header *hdr) container_of(hdr, struct psp_firmware_header_v1_0, header); DRM_DEBUG("ucode_feature_version: %u\n", - le32_to_cpu(psp_hdr->ucode_feature_version)); + le32_to_cpu(psp_hdr->sos.fw_version)); DRM_DEBUG("sos_offset_bytes: %u\n", - le32_to_cpu(psp_hdr->sos_offset_bytes)); + le32_to_cpu(psp_hdr->sos.offset_bytes)); DRM_DEBUG("sos_size_bytes: %u\n", - le32_to_cpu(psp_hdr->sos_size_bytes)); + le32_to_cpu(psp_hdr->sos.size_bytes)); if (version_minor == 1) { const struct psp_firmware_header_v1_1 *psp_hdr_v1_1 = container_of(psp_hdr, struct psp_firmware_header_v1_1, v1_0); DRM_DEBUG("toc_header_version: %u\n", - le32_to_cpu(psp_hdr_v1_1->toc_header_version)); + le32_to_cpu(psp_hdr_v1_1->toc.fw_version)); DRM_DEBUG("toc_offset_bytes: %u\n", - le32_to_cpu(psp_hdr_v1_1->toc_offset_bytes)); + le32_to_cpu(psp_hdr_v1_1->toc.offset_bytes)); DRM_DEBUG("toc_size_bytes: %u\n", - le32_to_cpu(psp_hdr_v1_1->toc_size_bytes)); + le32_to_cpu(psp_hdr_v1_1->toc.size_bytes)); DRM_DEBUG("kdb_header_version: %u\n", - le32_to_cpu(psp_hdr_v1_1->kdb_header_version)); + le32_to_cpu(psp_hdr_v1_1->kdb.fw_version)); DRM_DEBUG("kdb_offset_bytes: %u\n", - le32_to_cpu(psp_hdr_v1_1->kdb_offset_bytes)); + le32_to_cpu(psp_hdr_v1_1->kdb.offset_bytes)); DRM_DEBUG("kdb_size_bytes: %u\n", - le32_to_cpu(psp_hdr_v1_1->kdb_size_bytes)); + le32_to_cpu(psp_hdr_v1_1->kdb.size_bytes)); } if (version_minor == 2) { const struct psp_firmware_header_v1_2 *psp_hdr_v1_2 = container_of(psp_hdr, struct psp_firmware_header_v1_2, v1_0); DRM_DEBUG("kdb_header_version: %u\n", - le32_to_cpu(psp_hdr_v1_2->kdb_header_version)); + le32_to_cpu(psp_hdr_v1_2->kdb.fw_version)); DRM_DEBUG("kdb_offset_bytes: %u\n", - le32_to_cpu(psp_hdr_v1_2->kdb_offset_bytes)); + le32_to_cpu(psp_hdr_v1_2->kdb.offset_bytes)); DRM_DEBUG("kdb_size_bytes: %u\n", - le32_to_cpu(psp_hdr_v1_2->kdb_size_bytes)); + le32_to_cpu(psp_hdr_v1_2->kdb.size_bytes)); } if (version_minor == 3) { const struct psp_firmware_header_v1_1 *psp_hdr_v1_1 = @@ -294,23 +294,23 @@ void amdgpu_ucode_print_psp_hdr(const struct common_firmware_header *hdr) const struct psp_firmware_header_v1_3 *psp_hdr_v1_3 = container_of(psp_hdr_v1_1, struct psp_firmware_header_v1_3, v1_1); DRM_DEBUG("toc_header_version: %u\n", - le32_to_cpu(psp_hdr_v1_3->v1_1.toc_header_version)); + le32_to_cpu(psp_hdr_v1_3->v1_1.toc.fw_version)); DRM_DEBUG("toc_offset_bytes: %u\n", - le32_to_cpu(psp_hdr_v1_3->v1_1.toc_offset_bytes)); + le32_to_cpu(psp_hdr_v1_3->v1_1.toc.offset_bytes)); DRM_DEBUG("toc_size_bytes: %u\n", - le32_to_cpu(psp_hdr_v1_3->v1_1.toc_size_bytes)); + le32_to_cpu(psp_hdr_v1_3->v1_1.toc.size_bytes)); DRM_DEBUG("kdb_header_version: %u\n", - le32_to_cpu(psp_hdr_v1_3->v1_1.kdb_header_version)); + le32_to_cpu(psp_hdr_v1_3->v1_1.kdb.fw_version)); DRM_DEBUG("kdb_offset_bytes: %u\n", - le32_to_cpu(psp_hdr_v1_3->v1_1.kdb_offset_bytes)); + le32_to_cpu(psp_hdr_v1_3->v1_1.kdb.offset_bytes)); DRM_DEBUG("kdb_size_bytes: %u\n", - le32_to_cpu(psp_hdr_v1_3->v1_1.kdb_size_bytes)); + le32_to_cpu(psp_hdr_v1_3->v1_1.kdb.size_bytes)); DRM_DEBUG("spl_header_version: %u\n", - le32_to_cpu(psp_hdr_v1_3->spl_header_version)); + le32_to_cpu(psp_hdr_v1_3->spl.fw_version)); DRM_DEBUG("spl_offset_bytes: %u\n", - le32_to_cpu(psp_hdr_v1_3->spl_offset_bytes)); + le32_to_cpu(psp_hdr_v1_3->spl.offset_bytes)); DRM_DEBUG("spl_size_bytes: %u\n", - le32_to_cpu(psp_hdr_v1_3->spl_size_bytes)); + le32_to_cpu(psp_hdr_v1_3->spl.size_bytes)); } } else { DRM_ERROR("Unknown PSP ucode version: %u.%u\n", @@ -403,10 +403,18 @@ amdgpu_ucode_get_load_type(struct amdgpu_device *adev, int load_type) case CHIP_VANGOGH: case CHIP_DIMGREY_CAVEFISH: case CHIP_ALDEBARAN: + case CHIP_BEIGE_GOBY: + case CHIP_YELLOW_CARP: if (!load_type) return AMDGPU_FW_LOAD_DIRECT; else return AMDGPU_FW_LOAD_PSP; + case CHIP_CYAN_SKILLFISH: + if (!(load_type && + adev->apu_flags & AMD_APU_IS_CYAN_SKILLFISH2)) + return AMDGPU_FW_LOAD_DIRECT; + else + return AMDGPU_FW_LOAD_PSP; default: DRM_ERROR("Unknown firmware load type\n"); } @@ -414,6 +422,84 @@ amdgpu_ucode_get_load_type(struct amdgpu_device *adev, int load_type) return AMDGPU_FW_LOAD_DIRECT; } +const char *amdgpu_ucode_name(enum AMDGPU_UCODE_ID ucode_id) +{ + switch (ucode_id) { + case AMDGPU_UCODE_ID_SDMA0: + return "SDMA0"; + case AMDGPU_UCODE_ID_SDMA1: + return "SDMA1"; + case AMDGPU_UCODE_ID_SDMA2: + return "SDMA2"; + case AMDGPU_UCODE_ID_SDMA3: + return "SDMA3"; + case AMDGPU_UCODE_ID_SDMA4: + return "SDMA4"; + case AMDGPU_UCODE_ID_SDMA5: + return "SDMA5"; + case AMDGPU_UCODE_ID_SDMA6: + return "SDMA6"; + case AMDGPU_UCODE_ID_SDMA7: + return "SDMA7"; + case AMDGPU_UCODE_ID_CP_CE: + return "CP_CE"; + case AMDGPU_UCODE_ID_CP_PFP: + return "CP_PFP"; + case AMDGPU_UCODE_ID_CP_ME: + return "CP_ME"; + case AMDGPU_UCODE_ID_CP_MEC1: + return "CP_MEC1"; + case AMDGPU_UCODE_ID_CP_MEC1_JT: + return "CP_MEC1_JT"; + case AMDGPU_UCODE_ID_CP_MEC2: + return "CP_MEC2"; + case AMDGPU_UCODE_ID_CP_MEC2_JT: + return "CP_MEC2_JT"; + case AMDGPU_UCODE_ID_CP_MES: + return "CP_MES"; + case AMDGPU_UCODE_ID_CP_MES_DATA: + return "CP_MES_DATA"; + case AMDGPU_UCODE_ID_RLC_RESTORE_LIST_CNTL: + return "RLC_RESTORE_LIST_CNTL"; + case AMDGPU_UCODE_ID_RLC_RESTORE_LIST_GPM_MEM: + return "RLC_RESTORE_LIST_GPM_MEM"; + case AMDGPU_UCODE_ID_RLC_RESTORE_LIST_SRM_MEM: + return "RLC_RESTORE_LIST_SRM_MEM"; + case AMDGPU_UCODE_ID_RLC_IRAM: + return "RLC_IRAM"; + case AMDGPU_UCODE_ID_RLC_DRAM: + return "RLC_DRAM"; + case AMDGPU_UCODE_ID_RLC_G: + return "RLC_G"; + case AMDGPU_UCODE_ID_STORAGE: + return "STORAGE"; + case AMDGPU_UCODE_ID_SMC: + return "SMC"; + case AMDGPU_UCODE_ID_UVD: + return "UVD"; + case AMDGPU_UCODE_ID_UVD1: + return "UVD1"; + case AMDGPU_UCODE_ID_VCE: + return "VCE"; + case AMDGPU_UCODE_ID_VCN: + return "VCN"; + case AMDGPU_UCODE_ID_VCN1: + return "VCN1"; + case AMDGPU_UCODE_ID_DMCU_ERAM: + return "DMCU_ERAM"; + case AMDGPU_UCODE_ID_DMCU_INTV: + return "DMCU_INTV"; + case AMDGPU_UCODE_ID_VCN0_RAM: + return "VCN0_RAM"; + case AMDGPU_UCODE_ID_VCN1_RAM: + return "VCN1_RAM"; + case AMDGPU_UCODE_ID_DMCUB: + return "DMCUB"; + default: + return "UNKNOWN UCODE"; + } +} + #define FW_VERSION_ATTR(name, mode, field) \ static ssize_t show_##name(struct device *dev, \ struct device_attribute *attr, \ @@ -438,10 +524,10 @@ FW_VERSION_ATTR(rlc_srlg_fw_version, 0444, gfx.rlc_srlg_fw_version); FW_VERSION_ATTR(rlc_srls_fw_version, 0444, gfx.rlc_srls_fw_version); FW_VERSION_ATTR(mec_fw_version, 0444, gfx.mec_fw_version); FW_VERSION_ATTR(mec2_fw_version, 0444, gfx.mec2_fw_version); -FW_VERSION_ATTR(sos_fw_version, 0444, psp.sos_fw_version); -FW_VERSION_ATTR(asd_fw_version, 0444, psp.asd_fw_version); -FW_VERSION_ATTR(ta_ras_fw_version, 0444, psp.ta_ras_ucode_version); -FW_VERSION_ATTR(ta_xgmi_fw_version, 0444, psp.ta_xgmi_ucode_version); +FW_VERSION_ATTR(sos_fw_version, 0444, psp.sos.fw_version); +FW_VERSION_ATTR(asd_fw_version, 0444, psp.asd.fw_version); +FW_VERSION_ATTR(ta_ras_fw_version, 0444, psp.ras.feature_version); +FW_VERSION_ATTR(ta_xgmi_fw_version, 0444, psp.xgmi.feature_version); FW_VERSION_ATTR(smc_fw_version, 0444, pm.fw_version); FW_VERSION_ATTR(sdma_fw_version, 0444, sdma.instance[0].fw_version); FW_VERSION_ATTR(sdma2_fw_version, 0444, sdma.instance[1].fw_version); diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_ucode.h b/drivers/gpu/drm/amd/amdgpu/amdgpu_ucode.h index 2c42874f7784..7c2538db3cd5 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_ucode.h +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_ucode.h @@ -71,63 +71,76 @@ struct smc_firmware_header_v2_1 { uint32_t pptable_entry_offset; }; +struct psp_fw_legacy_bin_desc { + uint32_t fw_version; + uint32_t offset_bytes; + uint32_t size_bytes; +}; + /* version_major=1, version_minor=0 */ struct psp_firmware_header_v1_0 { struct common_firmware_header header; - uint32_t ucode_feature_version; - uint32_t sos_offset_bytes; - uint32_t sos_size_bytes; + struct psp_fw_legacy_bin_desc sos; }; /* version_major=1, version_minor=1 */ struct psp_firmware_header_v1_1 { struct psp_firmware_header_v1_0 v1_0; - uint32_t toc_header_version; - uint32_t toc_offset_bytes; - uint32_t toc_size_bytes; - uint32_t kdb_header_version; - uint32_t kdb_offset_bytes; - uint32_t kdb_size_bytes; + struct psp_fw_legacy_bin_desc toc; + struct psp_fw_legacy_bin_desc kdb; }; /* version_major=1, version_minor=2 */ struct psp_firmware_header_v1_2 { struct psp_firmware_header_v1_0 v1_0; - uint32_t reserve[3]; - uint32_t kdb_header_version; - uint32_t kdb_offset_bytes; - uint32_t kdb_size_bytes; + struct psp_fw_legacy_bin_desc res; + struct psp_fw_legacy_bin_desc kdb; }; /* version_major=1, version_minor=3 */ struct psp_firmware_header_v1_3 { struct psp_firmware_header_v1_1 v1_1; - uint32_t spl_header_version; - uint32_t spl_offset_bytes; - uint32_t spl_size_bytes; - uint32_t rl_header_version; - uint32_t rl_offset_bytes; - uint32_t rl_size_bytes; + struct psp_fw_legacy_bin_desc spl; + struct psp_fw_legacy_bin_desc rl; + struct psp_fw_legacy_bin_desc sys_drv_aux; + struct psp_fw_legacy_bin_desc sos_aux; +}; + +struct psp_fw_bin_desc { + uint32_t fw_type; + uint32_t fw_version; + uint32_t offset_bytes; + uint32_t size_bytes; +}; + +enum psp_fw_type { + PSP_FW_TYPE_UNKOWN, + PSP_FW_TYPE_PSP_SOS, + PSP_FW_TYPE_PSP_SYS_DRV, + PSP_FW_TYPE_PSP_KDB, + PSP_FW_TYPE_PSP_TOC, + PSP_FW_TYPE_PSP_SPL, + PSP_FW_TYPE_PSP_RL, + PSP_FW_TYPE_PSP_SOC_DRV, + PSP_FW_TYPE_PSP_INTF_DRV, + PSP_FW_TYPE_PSP_DBG_DRV, +}; + +/* version_major=2, version_minor=0 */ +struct psp_firmware_header_v2_0 { + struct common_firmware_header header; + uint32_t psp_fw_bin_count; + struct psp_fw_bin_desc psp_fw_bin[]; }; /* version_major=1, version_minor=0 */ struct ta_firmware_header_v1_0 { struct common_firmware_header header; - uint32_t ta_xgmi_ucode_version; - uint32_t ta_xgmi_offset_bytes; - uint32_t ta_xgmi_size_bytes; - uint32_t ta_ras_ucode_version; - uint32_t ta_ras_offset_bytes; - uint32_t ta_ras_size_bytes; - uint32_t ta_hdcp_ucode_version; - uint32_t ta_hdcp_offset_bytes; - uint32_t ta_hdcp_size_bytes; - uint32_t ta_dtm_ucode_version; - uint32_t ta_dtm_offset_bytes; - uint32_t ta_dtm_size_bytes; - uint32_t ta_securedisplay_ucode_version; - uint32_t ta_securedisplay_offset_bytes; - uint32_t ta_securedisplay_size_bytes; + struct psp_fw_legacy_bin_desc xgmi; + struct psp_fw_legacy_bin_desc ras; + struct psp_fw_legacy_bin_desc hdcp; + struct psp_fw_legacy_bin_desc dtm; + struct psp_fw_legacy_bin_desc securedisplay; }; enum ta_fw_type { @@ -142,18 +155,11 @@ enum ta_fw_type { TA_FW_TYPE_MAX_INDEX, }; -struct ta_fw_bin_desc { - uint32_t fw_type; - uint32_t fw_version; - uint32_t offset_bytes; - uint32_t size_bytes; -}; - /* version_major=2, version_minor=0 */ struct ta_firmware_header_v2_0 { struct common_firmware_header header; uint32_t ta_fw_bin_count; - struct ta_fw_bin_desc ta_fw_bin[]; + struct psp_fw_bin_desc ta_fw_bin[]; }; /* version_major=1, version_minor=0 */ @@ -316,6 +322,7 @@ union amdgpu_firmware_header { struct psp_firmware_header_v1_0 psp; struct psp_firmware_header_v1_1 psp_v1_1; struct psp_firmware_header_v1_3 psp_v1_3; + struct psp_firmware_header_v2_0 psp_v2_0; struct ta_firmware_header_v1_0 ta; struct ta_firmware_header_v2_0 ta_v2_0; struct gfx_firmware_header_v1_0 gfx; @@ -330,7 +337,7 @@ union amdgpu_firmware_header { uint8_t raw[0x100]; }; -#define UCODE_MAX_TA_PACKAGING ((sizeof(union amdgpu_firmware_header) - sizeof(struct common_firmware_header) - 4) / sizeof(struct ta_fw_bin_desc)) +#define UCODE_MAX_PSP_PACKAGING ((sizeof(union amdgpu_firmware_header) - sizeof(struct common_firmware_header) - 4) / sizeof(struct psp_fw_bin_desc)) /* * fw loading support @@ -453,4 +460,6 @@ void amdgpu_ucode_sysfs_fini(struct amdgpu_device *adev); enum amdgpu_firmware_load_type amdgpu_ucode_get_load_type(struct amdgpu_device *adev, int load_type); +const char *amdgpu_ucode_name(enum AMDGPU_UCODE_ID ucode_id); + #endif diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_umc.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_umc.c index ea6f99be070b..a90029ee9733 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_umc.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_umc.c @@ -41,7 +41,6 @@ int amdgpu_umc_ras_late_init(struct amdgpu_device *adev) adev->umc.ras_if->block = AMDGPU_RAS_BLOCK__UMC; adev->umc.ras_if->type = AMDGPU_RAS_ERROR__MULTI_UNCORRECTABLE; adev->umc.ras_if->sub_block_index = 0; - strcpy(adev->umc.ras_if->name, "umc"); } ih_info.head = fs_info.head = *adev->umc.ras_if; @@ -94,6 +93,7 @@ int amdgpu_umc_process_ras_data_cb(struct amdgpu_device *adev, struct amdgpu_iv_entry *entry) { struct ras_err_data *err_data = (struct ras_err_data *)ras_error_status; + struct amdgpu_ras *con = amdgpu_ras_get_context(adev); kgd2kfd_set_sram_ecc_flag(adev->kfd.dev); if (adev->umc.ras_funcs && @@ -131,6 +131,9 @@ int amdgpu_umc_process_ras_data_cb(struct amdgpu_device *adev, amdgpu_ras_add_bad_pages(adev, err_data->err_addr, err_data->err_addr_cnt); amdgpu_ras_save_bad_pages(adev); + + if (adev->smu.ppt_funcs && adev->smu.ppt_funcs->send_hbm_bad_pages_num) + adev->smu.ppt_funcs->send_hbm_bad_pages_num(&adev->smu, con->eeprom_control.ras_num_recs); } amdgpu_ras_reset_gpu(adev); diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_umc.h b/drivers/gpu/drm/amd/amdgpu/amdgpu_umc.h index bbcccf53080d..e5a75fb788dd 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_umc.h +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_umc.h @@ -22,6 +22,11 @@ #define __AMDGPU_UMC_H__ /* + * (addr / 256) * 4096, the higher 26 bits in ErrorAddr + * is the index of 4KB block + */ +#define ADDR_OF_4KB_BLOCK(addr) (((addr) & ~0xffULL) << 4) +/* * (addr / 256) * 8192, the higher 26 bits in ErrorAddr * is the index of 8KB block */ diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_uvd.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_uvd.c index c6dbc0801604..d451c359606a 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_uvd.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_uvd.c @@ -32,6 +32,7 @@ #include <linux/module.h> #include <drm/drm.h> +#include <drm/drm_drv.h> #include "amdgpu.h" #include "amdgpu_pm.h" @@ -325,7 +326,6 @@ int amdgpu_uvd_sw_fini(struct amdgpu_device *adev) { int i, j; - cancel_delayed_work_sync(&adev->uvd.idle_work); drm_sched_entity_destroy(&adev->uvd.entity); for (j = 0; j < adev->uvd.num_uvd_inst; ++j) { @@ -375,7 +375,7 @@ int amdgpu_uvd_suspend(struct amdgpu_device *adev) { unsigned size; void *ptr; - int i, j; + int i, j, idx; bool in_ras_intr = amdgpu_ras_intr_triggered(); cancel_delayed_work_sync(&adev->uvd.idle_work); @@ -403,11 +403,15 @@ int amdgpu_uvd_suspend(struct amdgpu_device *adev) if (!adev->uvd.inst[j].saved_bo) return -ENOMEM; - /* re-write 0 since err_event_athub will corrupt VCPU buffer */ - if (in_ras_intr) - memset(adev->uvd.inst[j].saved_bo, 0, size); - else - memcpy_fromio(adev->uvd.inst[j].saved_bo, ptr, size); + if (drm_dev_enter(&adev->ddev, &idx)) { + /* re-write 0 since err_event_athub will corrupt VCPU buffer */ + if (in_ras_intr) + memset(adev->uvd.inst[j].saved_bo, 0, size); + else + memcpy_fromio(adev->uvd.inst[j].saved_bo, ptr, size); + + drm_dev_exit(idx); + } } if (in_ras_intr) @@ -420,7 +424,7 @@ int amdgpu_uvd_resume(struct amdgpu_device *adev) { unsigned size; void *ptr; - int i; + int i, idx; for (i = 0; i < adev->uvd.num_uvd_inst; i++) { if (adev->uvd.harvest_config & (1 << i)) @@ -432,7 +436,10 @@ int amdgpu_uvd_resume(struct amdgpu_device *adev) ptr = adev->uvd.inst[i].cpu_addr; if (adev->uvd.inst[i].saved_bo != NULL) { - memcpy_toio(ptr, adev->uvd.inst[i].saved_bo, size); + if (drm_dev_enter(&adev->ddev, &idx)) { + memcpy_toio(ptr, adev->uvd.inst[i].saved_bo, size); + drm_dev_exit(idx); + } kvfree(adev->uvd.inst[i].saved_bo); adev->uvd.inst[i].saved_bo = NULL; } else { @@ -442,8 +449,11 @@ int amdgpu_uvd_resume(struct amdgpu_device *adev) hdr = (const struct common_firmware_header *)adev->uvd.fw->data; if (adev->firmware.load_type != AMDGPU_FW_LOAD_PSP) { offset = le32_to_cpu(hdr->ucode_array_offset_bytes); - memcpy_toio(adev->uvd.inst[i].cpu_addr, adev->uvd.fw->data + offset, - le32_to_cpu(hdr->ucode_size_bytes)); + if (drm_dev_enter(&adev->ddev, &idx)) { + memcpy_toio(adev->uvd.inst[i].cpu_addr, adev->uvd.fw->data + offset, + le32_to_cpu(hdr->ucode_size_bytes)); + drm_dev_exit(idx); + } size -= le32_to_cpu(hdr->ucode_size_bytes); ptr += le32_to_cpu(hdr->ucode_size_bytes); } @@ -829,9 +839,8 @@ static int amdgpu_uvd_cs_msg(struct amdgpu_uvd_cs_ctx *ctx, default: DRM_ERROR("Illegal UVD message type (%d)!\n", msg_type); - return -EINVAL; } - BUG(); + return -EINVAL; } @@ -1115,9 +1124,8 @@ static int amdgpu_uvd_send_msg(struct amdgpu_ring *ring, struct amdgpu_bo *bo, ib->length_dw = 16; if (direct) { - r = dma_resv_wait_timeout_rcu(bo->tbo.base.resv, - true, false, - msecs_to_jiffies(10)); + r = dma_resv_wait_timeout(bo->tbo.base.resv, true, false, + msecs_to_jiffies(10)); if (r == 0) r = -ETIMEDOUT; if (r < 0) diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_vce.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_vce.c index ea6a62f67e38..8e8dee9fac9f 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_vce.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_vce.c @@ -29,6 +29,7 @@ #include <linux/module.h> #include <drm/drm.h> +#include <drm/drm_drv.h> #include "amdgpu.h" #include "amdgpu_pm.h" @@ -87,7 +88,7 @@ static int amdgpu_vce_get_destroy_msg(struct amdgpu_ring *ring, uint32_t handle, bool direct, struct dma_fence **fence); /** - * amdgpu_vce_init - allocate memory, load vce firmware + * amdgpu_vce_sw_init - allocate memory, load vce firmware * * @adev: amdgpu_device pointer * @size: size for the new BO @@ -204,7 +205,7 @@ int amdgpu_vce_sw_init(struct amdgpu_device *adev, unsigned long size) } /** - * amdgpu_vce_fini - free memory + * amdgpu_vce_sw_fini - free memory * * @adev: amdgpu_device pointer * @@ -217,7 +218,6 @@ int amdgpu_vce_sw_fini(struct amdgpu_device *adev) if (adev->vce.vcpu_bo == NULL) return 0; - cancel_delayed_work_sync(&adev->vce.idle_work); drm_sched_entity_destroy(&adev->vce.entity); amdgpu_bo_free_kernel(&adev->vce.vcpu_bo, &adev->vce.gpu_addr, @@ -293,7 +293,7 @@ int amdgpu_vce_resume(struct amdgpu_device *adev) void *cpu_addr; const struct common_firmware_header *hdr; unsigned offset; - int r; + int r, idx; if (adev->vce.vcpu_bo == NULL) return -EINVAL; @@ -313,8 +313,12 @@ int amdgpu_vce_resume(struct amdgpu_device *adev) hdr = (const struct common_firmware_header *)adev->vce.fw->data; offset = le32_to_cpu(hdr->ucode_array_offset_bytes); - memcpy_toio(cpu_addr, adev->vce.fw->data + offset, - adev->vce.fw->size - offset); + + if (drm_dev_enter(&adev->ddev, &idx)) { + memcpy_toio(cpu_addr, adev->vce.fw->data + offset, + adev->vce.fw->size - offset); + drm_dev_exit(idx); + } amdgpu_bo_kunmap(adev->vce.vcpu_bo); @@ -574,7 +578,7 @@ err: } /** - * amdgpu_vce_cs_validate_bo - make sure not to cross 4GB boundary + * amdgpu_vce_validate_bo - make sure not to cross 4GB boundary * * @p: parser context * @ib_idx: indirect buffer to use @@ -715,7 +719,7 @@ static int amdgpu_vce_validate_handle(struct amdgpu_cs_parser *p, } /** - * amdgpu_vce_cs_parse - parse and validate the command stream + * amdgpu_vce_ring_parse_cs - parse and validate the command stream * * @p: parser context * @ib_idx: indirect buffer to use @@ -951,7 +955,7 @@ out: } /** - * amdgpu_vce_cs_parse_vm - parse the command stream in VM mode + * amdgpu_vce_ring_parse_cs_vm - parse the command stream in VM mode * * @p: parser context * @ib_idx: indirect buffer to use diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_vcn.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_vcn.c index 201645963ba5..008a308a4eca 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_vcn.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_vcn.c @@ -27,6 +27,7 @@ #include <linux/firmware.h> #include <linux/module.h> #include <linux/pci.h> +#include <drm/drm_drv.h> #include "amdgpu.h" #include "amdgpu_pm.h" @@ -48,6 +49,8 @@ #define FIRMWARE_VANGOGH "amdgpu/vangogh_vcn.bin" #define FIRMWARE_DIMGREY_CAVEFISH "amdgpu/dimgrey_cavefish_vcn.bin" #define FIRMWARE_ALDEBARAN "amdgpu/aldebaran_vcn.bin" +#define FIRMWARE_BEIGE_GOBY "amdgpu/beige_goby_vcn.bin" +#define FIRMWARE_YELLOW_CARP "amdgpu/yellow_carp_vcn.bin" MODULE_FIRMWARE(FIRMWARE_RAVEN); MODULE_FIRMWARE(FIRMWARE_PICASSO); @@ -63,6 +66,8 @@ MODULE_FIRMWARE(FIRMWARE_SIENNA_CICHLID); MODULE_FIRMWARE(FIRMWARE_NAVY_FLOUNDER); MODULE_FIRMWARE(FIRMWARE_VANGOGH); MODULE_FIRMWARE(FIRMWARE_DIMGREY_CAVEFISH); +MODULE_FIRMWARE(FIRMWARE_BEIGE_GOBY); +MODULE_FIRMWARE(FIRMWARE_YELLOW_CARP); static void amdgpu_vcn_idle_work_handler(struct work_struct *work); @@ -151,6 +156,18 @@ int amdgpu_vcn_sw_init(struct amdgpu_device *adev) (adev->pg_flags & AMD_PG_SUPPORT_VCN_DPG)) adev->vcn.indirect_sram = true; break; + case CHIP_BEIGE_GOBY: + fw_name = FIRMWARE_BEIGE_GOBY; + if ((adev->firmware.load_type == AMDGPU_FW_LOAD_PSP) && + (adev->pg_flags & AMD_PG_SUPPORT_VCN_DPG)) + adev->vcn.indirect_sram = true; + break; + case CHIP_YELLOW_CARP: + fw_name = FIRMWARE_YELLOW_CARP; + if ((adev->firmware.load_type == AMDGPU_FW_LOAD_PSP) && + (adev->pg_flags & AMD_PG_SUPPORT_VCN_DPG)) + adev->vcn.indirect_sram = true; + break; default: return -EINVAL; } @@ -241,8 +258,6 @@ int amdgpu_vcn_sw_fini(struct amdgpu_device *adev) { int i, j; - cancel_delayed_work_sync(&adev->vcn.idle_work); - for (j = 0; j < adev->vcn.num_vcn_inst; ++j) { if (adev->vcn.harvest_config & (1 << j)) continue; @@ -271,11 +286,34 @@ int amdgpu_vcn_sw_fini(struct amdgpu_device *adev) return 0; } +bool amdgpu_vcn_is_disabled_vcn(struct amdgpu_device *adev, enum vcn_ring_type type, uint32_t vcn_instance) +{ + bool ret = false; + + int major; + int minor; + int revision; + + /* if cannot find IP data, then this VCN does not exist */ + if (amdgpu_discovery_get_vcn_version(adev, vcn_instance, &major, &minor, &revision) != 0) + return true; + + if ((type == VCN_ENCODE_RING) && (revision & VCN_BLOCK_ENCODE_DISABLE_MASK)) { + ret = true; + } else if ((type == VCN_DECODE_RING) && (revision & VCN_BLOCK_DECODE_DISABLE_MASK)) { + ret = true; + } else if ((type == VCN_UNIFIED_RING) && (revision & VCN_BLOCK_QUEUE_DISABLE_MASK)) { + ret = true; + } + + return ret; +} + int amdgpu_vcn_suspend(struct amdgpu_device *adev) { unsigned size; void *ptr; - int i; + int i, idx; cancel_delayed_work_sync(&adev->vcn.idle_work); @@ -292,7 +330,10 @@ int amdgpu_vcn_suspend(struct amdgpu_device *adev) if (!adev->vcn.inst[i].saved_bo) return -ENOMEM; - memcpy_fromio(adev->vcn.inst[i].saved_bo, ptr, size); + if (drm_dev_enter(&adev->ddev, &idx)) { + memcpy_fromio(adev->vcn.inst[i].saved_bo, ptr, size); + drm_dev_exit(idx); + } } return 0; } @@ -301,7 +342,7 @@ int amdgpu_vcn_resume(struct amdgpu_device *adev) { unsigned size; void *ptr; - int i; + int i, idx; for (i = 0; i < adev->vcn.num_vcn_inst; ++i) { if (adev->vcn.harvest_config & (1 << i)) @@ -313,7 +354,10 @@ int amdgpu_vcn_resume(struct amdgpu_device *adev) ptr = adev->vcn.inst[i].cpu_addr; if (adev->vcn.inst[i].saved_bo != NULL) { - memcpy_toio(ptr, adev->vcn.inst[i].saved_bo, size); + if (drm_dev_enter(&adev->ddev, &idx)) { + memcpy_toio(ptr, adev->vcn.inst[i].saved_bo, size); + drm_dev_exit(idx); + } kvfree(adev->vcn.inst[i].saved_bo); adev->vcn.inst[i].saved_bo = NULL; } else { @@ -323,8 +367,11 @@ int amdgpu_vcn_resume(struct amdgpu_device *adev) hdr = (const struct common_firmware_header *)adev->vcn.fw->data; if (adev->firmware.load_type != AMDGPU_FW_LOAD_PSP) { offset = le32_to_cpu(hdr->ucode_array_offset_bytes); - memcpy_toio(adev->vcn.inst[i].cpu_addr, adev->vcn.fw->data + offset, - le32_to_cpu(hdr->ucode_size_bytes)); + if (drm_dev_enter(&adev->ddev, &idx)) { + memcpy_toio(adev->vcn.inst[i].cpu_addr, adev->vcn.fw->data + offset, + le32_to_cpu(hdr->ucode_size_bytes)); + drm_dev_exit(idx); + } size -= le32_to_cpu(hdr->ucode_size_bytes); ptr += le32_to_cpu(hdr->ucode_size_bytes); } @@ -367,7 +414,6 @@ static void amdgpu_vcn_idle_work_handler(struct work_struct *work) } if (!fences && !atomic_read(&adev->vcn.total_submission_cnt)) { - amdgpu_gfx_off_ctrl(adev, true); amdgpu_device_ip_set_powergating_state(adev, AMD_IP_BLOCK_TYPE_VCN, AMD_PG_STATE_GATE); r = amdgpu_dpm_switch_power_profile(adev, PP_SMC_POWER_PROFILE_VIDEO, @@ -387,7 +433,6 @@ void amdgpu_vcn_ring_begin_use(struct amdgpu_ring *ring) atomic_inc(&adev->vcn.total_submission_cnt); if (!cancel_delayed_work_sync(&adev->vcn.idle_work)) { - amdgpu_gfx_off_ctrl(adev, false); r = amdgpu_dpm_switch_power_profile(adev, PP_SMC_POWER_PROFILE_VIDEO, true); if (r) diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_vcn.h b/drivers/gpu/drm/amd/amdgpu/amdgpu_vcn.h index bc76cab67697..d74c62b49795 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_vcn.h +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_vcn.h @@ -280,6 +280,16 @@ struct amdgpu_vcn_decode_buffer { uint32_t pad[30]; }; +#define VCN_BLOCK_ENCODE_DISABLE_MASK 0x80 +#define VCN_BLOCK_DECODE_DISABLE_MASK 0x40 +#define VCN_BLOCK_QUEUE_DISABLE_MASK 0xC0 + +enum vcn_ring_type { + VCN_ENCODE_RING, + VCN_DECODE_RING, + VCN_UNIFIED_RING, +}; + int amdgpu_vcn_sw_init(struct amdgpu_device *adev); int amdgpu_vcn_sw_fini(struct amdgpu_device *adev); int amdgpu_vcn_suspend(struct amdgpu_device *adev); @@ -287,6 +297,9 @@ int amdgpu_vcn_resume(struct amdgpu_device *adev); void amdgpu_vcn_ring_begin_use(struct amdgpu_ring *ring); void amdgpu_vcn_ring_end_use(struct amdgpu_ring *ring); +bool amdgpu_vcn_is_disabled_vcn(struct amdgpu_device *adev, + enum vcn_ring_type type, uint32_t vcn_instance); + int amdgpu_vcn_dec_ring_test_ring(struct amdgpu_ring *ring); int amdgpu_vcn_dec_ring_test_ib(struct amdgpu_ring *ring, long timeout); int amdgpu_vcn_dec_sw_ring_test_ring(struct amdgpu_ring *ring); diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_virt.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_virt.c index 0c9c5255aa42..ca058fbcccd4 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_virt.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_virt.c @@ -50,9 +50,12 @@ void amdgpu_virt_init_setting(struct amdgpu_device *adev) struct drm_device *ddev = adev_to_drm(adev); /* enable virtual display */ - if (adev->mode_info.num_crtc == 0) - adev->mode_info.num_crtc = 1; - adev->enable_virtual_display = true; + if (adev->asic_type != CHIP_ALDEBARAN && + adev->asic_type != CHIP_ARCTURUS) { + if (adev->mode_info.num_crtc == 0) + adev->mode_info.num_crtc = 1; + adev->enable_virtual_display = true; + } ddev->driver_features &= ~DRIVER_ATOMIC; adev->cg_flags = 0; adev->pg_flags = 0; @@ -429,6 +432,9 @@ static int amdgpu_virt_read_pf2vf_data(struct amdgpu_device *adev) uint32_t checksum; uint32_t checkval; + uint32_t i; + uint32_t tmp; + if (adev->virt.fw_reserve.p_pf2vf == NULL) return -EINVAL; @@ -469,6 +475,29 @@ static int amdgpu_virt_read_pf2vf_data(struct amdgpu_device *adev) adev->virt.reg_access = ((struct amd_sriov_msg_pf2vf_info *)pf2vf_info)->reg_access_flags.all; + adev->virt.decode_max_dimension_pixels = 0; + adev->virt.decode_max_frame_pixels = 0; + adev->virt.encode_max_dimension_pixels = 0; + adev->virt.encode_max_frame_pixels = 0; + adev->virt.is_mm_bw_enabled = false; + for (i = 0; i < AMD_SRIOV_MSG_RESERVE_VCN_INST; i++) { + tmp = ((struct amd_sriov_msg_pf2vf_info *)pf2vf_info)->mm_bw_management[i].decode_max_dimension_pixels; + adev->virt.decode_max_dimension_pixels = max(tmp, adev->virt.decode_max_dimension_pixels); + + tmp = ((struct amd_sriov_msg_pf2vf_info *)pf2vf_info)->mm_bw_management[i].decode_max_frame_pixels; + adev->virt.decode_max_frame_pixels = max(tmp, adev->virt.decode_max_frame_pixels); + + tmp = ((struct amd_sriov_msg_pf2vf_info *)pf2vf_info)->mm_bw_management[i].encode_max_dimension_pixels; + adev->virt.encode_max_dimension_pixels = max(tmp, adev->virt.encode_max_dimension_pixels); + + tmp = ((struct amd_sriov_msg_pf2vf_info *)pf2vf_info)->mm_bw_management[i].encode_max_frame_pixels; + adev->virt.encode_max_frame_pixels = max(tmp, adev->virt.encode_max_frame_pixels); + } + if((adev->virt.decode_max_dimension_pixels > 0) || (adev->virt.encode_max_dimension_pixels > 0)) + adev->virt.is_mm_bw_enabled = true; + + adev->unique_id = + ((struct amd_sriov_msg_pf2vf_info *)pf2vf_info)->uuid; break; default: DRM_ERROR("invalid pf2vf version\n"); @@ -502,10 +531,10 @@ static void amdgpu_virt_populate_vf2pf_ucode_info(struct amdgpu_device *adev) POPULATE_UCODE_INFO(vf2pf_info, AMD_SRIOV_UCODE_ID_RLC_SRLS, adev->gfx.rlc_srls_fw_version); POPULATE_UCODE_INFO(vf2pf_info, AMD_SRIOV_UCODE_ID_MEC, adev->gfx.mec_fw_version); POPULATE_UCODE_INFO(vf2pf_info, AMD_SRIOV_UCODE_ID_MEC2, adev->gfx.mec2_fw_version); - POPULATE_UCODE_INFO(vf2pf_info, AMD_SRIOV_UCODE_ID_SOS, adev->psp.sos_fw_version); - POPULATE_UCODE_INFO(vf2pf_info, AMD_SRIOV_UCODE_ID_ASD, adev->psp.asd_fw_version); - POPULATE_UCODE_INFO(vf2pf_info, AMD_SRIOV_UCODE_ID_TA_RAS, adev->psp.ta_ras_ucode_version); - POPULATE_UCODE_INFO(vf2pf_info, AMD_SRIOV_UCODE_ID_TA_XGMI, adev->psp.ta_xgmi_ucode_version); + POPULATE_UCODE_INFO(vf2pf_info, AMD_SRIOV_UCODE_ID_SOS, adev->psp.sos.fw_version); + POPULATE_UCODE_INFO(vf2pf_info, AMD_SRIOV_UCODE_ID_ASD, adev->psp.asd.fw_version); + POPULATE_UCODE_INFO(vf2pf_info, AMD_SRIOV_UCODE_ID_TA_RAS, adev->psp.ras.feature_version); + POPULATE_UCODE_INFO(vf2pf_info, AMD_SRIOV_UCODE_ID_TA_XGMI, adev->psp.xgmi.feature_version); POPULATE_UCODE_INFO(vf2pf_info, AMD_SRIOV_UCODE_ID_SMC, adev->pm.fw_version); POPULATE_UCODE_INFO(vf2pf_info, AMD_SRIOV_UCODE_ID_SDMA, adev->sdma.instance[0].fw_version); POPULATE_UCODE_INFO(vf2pf_info, AMD_SRIOV_UCODE_ID_SDMA2, adev->sdma.instance[1].fw_version); @@ -679,6 +708,7 @@ void amdgpu_detect_virtualization(struct amdgpu_device *adev) case CHIP_VEGA10: case CHIP_VEGA20: case CHIP_ARCTURUS: + case CHIP_ALDEBARAN: soc15_set_virt_ops(adev); break; case CHIP_NAVI10: @@ -740,3 +770,35 @@ enum amdgpu_sriov_vf_mode amdgpu_virt_get_sriov_vf_mode(struct amdgpu_device *ad return mode; } + +void amdgpu_virt_update_sriov_video_codec(struct amdgpu_device *adev, + struct amdgpu_video_codec_info *encode, uint32_t encode_array_size, + struct amdgpu_video_codec_info *decode, uint32_t decode_array_size) +{ + uint32_t i; + + if (!adev->virt.is_mm_bw_enabled) + return; + + if (encode) { + for (i = 0; i < encode_array_size; i++) { + encode[i].max_width = adev->virt.encode_max_dimension_pixels; + encode[i].max_pixels_per_frame = adev->virt.encode_max_frame_pixels; + if (encode[i].max_width > 0) + encode[i].max_height = encode[i].max_pixels_per_frame / encode[i].max_width; + else + encode[i].max_height = 0; + } + } + + if (decode) { + for (i = 0; i < decode_array_size; i++) { + decode[i].max_width = adev->virt.decode_max_dimension_pixels; + decode[i].max_pixels_per_frame = adev->virt.decode_max_frame_pixels; + if (decode[i].max_width > 0) + decode[i].max_height = decode[i].max_pixels_per_frame / decode[i].max_width; + else + decode[i].max_height = 0; + } + } +} diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_virt.h b/drivers/gpu/drm/amd/amdgpu/amdgpu_virt.h index 383d4bdc3fb5..8d4c20bb71c5 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_virt.h +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_virt.h @@ -233,8 +233,17 @@ struct amdgpu_virt { /* vf2pf message */ struct delayed_work vf2pf_work; uint32_t vf2pf_update_interval_ms; + + /* multimedia bandwidth config */ + bool is_mm_bw_enabled; + uint32_t decode_max_dimension_pixels; + uint32_t decode_max_frame_pixels; + uint32_t encode_max_dimension_pixels; + uint32_t encode_max_frame_pixels; }; +struct amdgpu_video_codec_info; + #define amdgpu_sriov_enabled(adev) \ ((adev)->virt.caps & AMDGPU_SRIOV_CAPS_ENABLE_IOV) @@ -307,4 +316,8 @@ int amdgpu_virt_enable_access_debugfs(struct amdgpu_device *adev); void amdgpu_virt_disable_access_debugfs(struct amdgpu_device *adev); enum amdgpu_sriov_vf_mode amdgpu_virt_get_sriov_vf_mode(struct amdgpu_device *adev); + +void amdgpu_virt_update_sriov_video_codec(struct amdgpu_device *adev, + struct amdgpu_video_codec_info *encode, uint32_t encode_array_size, + struct amdgpu_video_codec_info *decode, uint32_t decode_array_size); #endif diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_vkms.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_vkms.c new file mode 100644 index 000000000000..ce982afeff91 --- /dev/null +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_vkms.c @@ -0,0 +1,643 @@ +// SPDX-License-Identifier: GPL-2.0+ + +#include <drm/drm_atomic_helper.h> +#include <drm/drm_simple_kms_helper.h> +#include <drm/drm_vblank.h> + +#include "amdgpu.h" +#ifdef CONFIG_DRM_AMDGPU_SI +#include "dce_v6_0.h" +#endif +#ifdef CONFIG_DRM_AMDGPU_CIK +#include "dce_v8_0.h" +#endif +#include "dce_v10_0.h" +#include "dce_v11_0.h" +#include "ivsrcid/ivsrcid_vislands30.h" +#include "amdgpu_vkms.h" +#include "amdgpu_display.h" + +/** + * DOC: amdgpu_vkms + * + * The amdgpu vkms interface provides a virtual KMS interface for several use + * cases: devices without display hardware, platforms where the actual display + * hardware is not useful (e.g., servers), SR-IOV virtual functions, device + * emulation/simulation, and device bring up prior to display hardware being + * usable. We previously emulated a legacy KMS interface, but there was a desire + * to move to the atomic KMS interface. The vkms driver did everything we + * needed, but we wanted KMS support natively in the driver without buffer + * sharing and the ability to support an instance of VKMS per device. We first + * looked at splitting vkms into a stub driver and a helper module that other + * drivers could use to implement a virtual display, but this strategy ended up + * being messy due to driver specific callbacks needed for buffer management. + * Ultimately, it proved easier to import the vkms code as it mostly used core + * drm helpers anyway. + */ + +static const u32 amdgpu_vkms_formats[] = { + DRM_FORMAT_XRGB8888, +}; + +static enum hrtimer_restart amdgpu_vkms_vblank_simulate(struct hrtimer *timer) +{ + struct amdgpu_vkms_output *output = container_of(timer, + struct amdgpu_vkms_output, + vblank_hrtimer); + struct drm_crtc *crtc = &output->crtc; + u64 ret_overrun; + bool ret; + + ret_overrun = hrtimer_forward_now(&output->vblank_hrtimer, + output->period_ns); + WARN_ON(ret_overrun != 1); + + ret = drm_crtc_handle_vblank(crtc); + if (!ret) + DRM_ERROR("amdgpu_vkms failure on handling vblank"); + + return HRTIMER_RESTART; +} + +static int amdgpu_vkms_enable_vblank(struct drm_crtc *crtc) +{ + struct drm_device *dev = crtc->dev; + unsigned int pipe = drm_crtc_index(crtc); + struct drm_vblank_crtc *vblank = &dev->vblank[pipe]; + struct amdgpu_vkms_output *out = drm_crtc_to_amdgpu_vkms_output(crtc); + + drm_calc_timestamping_constants(crtc, &crtc->mode); + + hrtimer_init(&out->vblank_hrtimer, CLOCK_MONOTONIC, HRTIMER_MODE_REL); + out->vblank_hrtimer.function = &amdgpu_vkms_vblank_simulate; + out->period_ns = ktime_set(0, vblank->framedur_ns); + hrtimer_start(&out->vblank_hrtimer, out->period_ns, HRTIMER_MODE_REL); + + return 0; +} + +static void amdgpu_vkms_disable_vblank(struct drm_crtc *crtc) +{ + struct amdgpu_vkms_output *out = drm_crtc_to_amdgpu_vkms_output(crtc); + + hrtimer_cancel(&out->vblank_hrtimer); +} + +static bool amdgpu_vkms_get_vblank_timestamp(struct drm_crtc *crtc, + int *max_error, + ktime_t *vblank_time, + bool in_vblank_irq) +{ + struct drm_device *dev = crtc->dev; + unsigned int pipe = crtc->index; + struct amdgpu_vkms_output *output = drm_crtc_to_amdgpu_vkms_output(crtc); + struct drm_vblank_crtc *vblank = &dev->vblank[pipe]; + + if (!READ_ONCE(vblank->enabled)) { + *vblank_time = ktime_get(); + return true; + } + + *vblank_time = READ_ONCE(output->vblank_hrtimer.node.expires); + + if (WARN_ON(*vblank_time == vblank->time)) + return true; + + /* + * To prevent races we roll the hrtimer forward before we do any + * interrupt processing - this is how real hw works (the interrupt is + * only generated after all the vblank registers are updated) and what + * the vblank core expects. Therefore we need to always correct the + * timestampe by one frame. + */ + *vblank_time -= output->period_ns; + + return true; +} + +static const struct drm_crtc_funcs amdgpu_vkms_crtc_funcs = { + .set_config = drm_atomic_helper_set_config, + .destroy = drm_crtc_cleanup, + .page_flip = drm_atomic_helper_page_flip, + .reset = drm_atomic_helper_crtc_reset, + .atomic_duplicate_state = drm_atomic_helper_crtc_duplicate_state, + .atomic_destroy_state = drm_atomic_helper_crtc_destroy_state, + .enable_vblank = amdgpu_vkms_enable_vblank, + .disable_vblank = amdgpu_vkms_disable_vblank, + .get_vblank_timestamp = amdgpu_vkms_get_vblank_timestamp, +}; + +static void amdgpu_vkms_crtc_atomic_enable(struct drm_crtc *crtc, + struct drm_atomic_state *state) +{ + drm_crtc_vblank_on(crtc); +} + +static void amdgpu_vkms_crtc_atomic_disable(struct drm_crtc *crtc, + struct drm_atomic_state *state) +{ + drm_crtc_vblank_off(crtc); +} + +static void amdgpu_vkms_crtc_atomic_flush(struct drm_crtc *crtc, + struct drm_atomic_state *state) +{ + if (crtc->state->event) { + spin_lock(&crtc->dev->event_lock); + + if (drm_crtc_vblank_get(crtc) != 0) + drm_crtc_send_vblank_event(crtc, crtc->state->event); + else + drm_crtc_arm_vblank_event(crtc, crtc->state->event); + + spin_unlock(&crtc->dev->event_lock); + + crtc->state->event = NULL; + } +} + +static const struct drm_crtc_helper_funcs amdgpu_vkms_crtc_helper_funcs = { + .atomic_flush = amdgpu_vkms_crtc_atomic_flush, + .atomic_enable = amdgpu_vkms_crtc_atomic_enable, + .atomic_disable = amdgpu_vkms_crtc_atomic_disable, +}; + +static int amdgpu_vkms_crtc_init(struct drm_device *dev, struct drm_crtc *crtc, + struct drm_plane *primary, struct drm_plane *cursor) +{ + int ret; + + ret = drm_crtc_init_with_planes(dev, crtc, primary, cursor, + &amdgpu_vkms_crtc_funcs, NULL); + if (ret) { + DRM_ERROR("Failed to init CRTC\n"); + return ret; + } + + drm_crtc_helper_add(crtc, &amdgpu_vkms_crtc_helper_funcs); + + return ret; +} + +static const struct drm_connector_funcs amdgpu_vkms_connector_funcs = { + .fill_modes = drm_helper_probe_single_connector_modes, + .destroy = drm_connector_cleanup, + .reset = drm_atomic_helper_connector_reset, + .atomic_duplicate_state = drm_atomic_helper_connector_duplicate_state, + .atomic_destroy_state = drm_atomic_helper_connector_destroy_state, +}; + +static int amdgpu_vkms_conn_get_modes(struct drm_connector *connector) +{ + struct drm_device *dev = connector->dev; + struct drm_display_mode *mode = NULL; + unsigned i; + static const struct mode_size { + int w; + int h; + } common_modes[] = { + { 640, 480}, + { 720, 480}, + { 800, 600}, + { 848, 480}, + {1024, 768}, + {1152, 768}, + {1280, 720}, + {1280, 800}, + {1280, 854}, + {1280, 960}, + {1280, 1024}, + {1440, 900}, + {1400, 1050}, + {1680, 1050}, + {1600, 1200}, + {1920, 1080}, + {1920, 1200}, + {2560, 1440}, + {4096, 3112}, + {3656, 2664}, + {3840, 2160}, + {4096, 2160}, + }; + + for (i = 0; i < ARRAY_SIZE(common_modes); i++) { + mode = drm_cvt_mode(dev, common_modes[i].w, common_modes[i].h, 60, false, false, false); + drm_mode_probed_add(connector, mode); + } + + drm_set_preferred_mode(connector, XRES_DEF, YRES_DEF); + + return ARRAY_SIZE(common_modes); +} + +static const struct drm_connector_helper_funcs amdgpu_vkms_conn_helper_funcs = { + .get_modes = amdgpu_vkms_conn_get_modes, +}; + +static const struct drm_plane_funcs amdgpu_vkms_plane_funcs = { + .update_plane = drm_atomic_helper_update_plane, + .disable_plane = drm_atomic_helper_disable_plane, + .destroy = drm_plane_cleanup, + .reset = drm_atomic_helper_plane_reset, + .atomic_duplicate_state = drm_atomic_helper_plane_duplicate_state, + .atomic_destroy_state = drm_atomic_helper_plane_destroy_state, +}; + +static void amdgpu_vkms_plane_atomic_update(struct drm_plane *plane, + struct drm_atomic_state *old_state) +{ + return; +} + +static int amdgpu_vkms_plane_atomic_check(struct drm_plane *plane, + struct drm_atomic_state *state) +{ + struct drm_plane_state *new_plane_state = drm_atomic_get_new_plane_state(state, + plane); + struct drm_crtc_state *crtc_state; + int ret; + + if (!new_plane_state->fb || WARN_ON(!new_plane_state->crtc)) + return 0; + + crtc_state = drm_atomic_get_crtc_state(state, + new_plane_state->crtc); + if (IS_ERR(crtc_state)) + return PTR_ERR(crtc_state); + + ret = drm_atomic_helper_check_plane_state(new_plane_state, crtc_state, + DRM_PLANE_HELPER_NO_SCALING, + DRM_PLANE_HELPER_NO_SCALING, + false, true); + if (ret != 0) + return ret; + + /* for now primary plane must be visible and full screen */ + if (!new_plane_state->visible) + return -EINVAL; + + return 0; +} + +static int amdgpu_vkms_prepare_fb(struct drm_plane *plane, + struct drm_plane_state *new_state) +{ + struct amdgpu_framebuffer *afb; + struct drm_gem_object *obj; + struct amdgpu_device *adev; + struct amdgpu_bo *rbo; + struct list_head list; + struct ttm_validate_buffer tv; + struct ww_acquire_ctx ticket; + uint32_t domain; + int r; + + if (!new_state->fb) { + DRM_DEBUG_KMS("No FB bound\n"); + return 0; + } + afb = to_amdgpu_framebuffer(new_state->fb); + obj = new_state->fb->obj[0]; + rbo = gem_to_amdgpu_bo(obj); + adev = amdgpu_ttm_adev(rbo->tbo.bdev); + INIT_LIST_HEAD(&list); + + tv.bo = &rbo->tbo; + tv.num_shared = 1; + list_add(&tv.head, &list); + + r = ttm_eu_reserve_buffers(&ticket, &list, false, NULL); + if (r) { + dev_err(adev->dev, "fail to reserve bo (%d)\n", r); + return r; + } + + if (plane->type != DRM_PLANE_TYPE_CURSOR) + domain = amdgpu_display_supported_domains(adev, rbo->flags); + else + domain = AMDGPU_GEM_DOMAIN_VRAM; + + r = amdgpu_bo_pin(rbo, domain); + if (unlikely(r != 0)) { + if (r != -ERESTARTSYS) + DRM_ERROR("Failed to pin framebuffer with error %d\n", r); + ttm_eu_backoff_reservation(&ticket, &list); + return r; + } + + r = amdgpu_ttm_alloc_gart(&rbo->tbo); + if (unlikely(r != 0)) { + amdgpu_bo_unpin(rbo); + ttm_eu_backoff_reservation(&ticket, &list); + DRM_ERROR("%p bind failed\n", rbo); + return r; + } + + ttm_eu_backoff_reservation(&ticket, &list); + + afb->address = amdgpu_bo_gpu_offset(rbo); + + amdgpu_bo_ref(rbo); + + return 0; +} + +static void amdgpu_vkms_cleanup_fb(struct drm_plane *plane, + struct drm_plane_state *old_state) +{ + struct amdgpu_bo *rbo; + int r; + + if (!old_state->fb) + return; + + rbo = gem_to_amdgpu_bo(old_state->fb->obj[0]); + r = amdgpu_bo_reserve(rbo, false); + if (unlikely(r)) { + DRM_ERROR("failed to reserve rbo before unpin\n"); + return; + } + + amdgpu_bo_unpin(rbo); + amdgpu_bo_unreserve(rbo); + amdgpu_bo_unref(&rbo); +} + +static const struct drm_plane_helper_funcs amdgpu_vkms_primary_helper_funcs = { + .atomic_update = amdgpu_vkms_plane_atomic_update, + .atomic_check = amdgpu_vkms_plane_atomic_check, + .prepare_fb = amdgpu_vkms_prepare_fb, + .cleanup_fb = amdgpu_vkms_cleanup_fb, +}; + +static struct drm_plane *amdgpu_vkms_plane_init(struct drm_device *dev, + enum drm_plane_type type, + int index) +{ + struct drm_plane *plane; + int ret; + + plane = kzalloc(sizeof(*plane), GFP_KERNEL); + if (!plane) + return ERR_PTR(-ENOMEM); + + ret = drm_universal_plane_init(dev, plane, 1 << index, + &amdgpu_vkms_plane_funcs, + amdgpu_vkms_formats, + ARRAY_SIZE(amdgpu_vkms_formats), + NULL, type, NULL); + if (ret) { + kfree(plane); + return ERR_PTR(ret); + } + + drm_plane_helper_add(plane, &amdgpu_vkms_primary_helper_funcs); + + return plane; +} + +int amdgpu_vkms_output_init(struct drm_device *dev, + struct amdgpu_vkms_output *output, int index) +{ + struct drm_connector *connector = &output->connector; + struct drm_encoder *encoder = &output->encoder; + struct drm_crtc *crtc = &output->crtc; + struct drm_plane *primary, *cursor = NULL; + int ret; + + primary = amdgpu_vkms_plane_init(dev, DRM_PLANE_TYPE_PRIMARY, index); + if (IS_ERR(primary)) + return PTR_ERR(primary); + + ret = amdgpu_vkms_crtc_init(dev, crtc, primary, cursor); + if (ret) + goto err_crtc; + + ret = drm_connector_init(dev, connector, &amdgpu_vkms_connector_funcs, + DRM_MODE_CONNECTOR_VIRTUAL); + if (ret) { + DRM_ERROR("Failed to init connector\n"); + goto err_connector; + } + + drm_connector_helper_add(connector, &amdgpu_vkms_conn_helper_funcs); + + ret = drm_simple_encoder_init(dev, encoder, DRM_MODE_ENCODER_VIRTUAL); + if (ret) { + DRM_ERROR("Failed to init encoder\n"); + goto err_encoder; + } + encoder->possible_crtcs = 1 << index; + + ret = drm_connector_attach_encoder(connector, encoder); + if (ret) { + DRM_ERROR("Failed to attach connector to encoder\n"); + goto err_attach; + } + + drm_mode_config_reset(dev); + + return 0; + +err_attach: + drm_encoder_cleanup(encoder); + +err_encoder: + drm_connector_cleanup(connector); + +err_connector: + drm_crtc_cleanup(crtc); + +err_crtc: + drm_plane_cleanup(primary); + + return ret; +} + +const struct drm_mode_config_funcs amdgpu_vkms_mode_funcs = { + .fb_create = amdgpu_display_user_framebuffer_create, + .atomic_check = drm_atomic_helper_check, + .atomic_commit = drm_atomic_helper_commit, +}; + +static int amdgpu_vkms_sw_init(void *handle) +{ + int r, i; + struct amdgpu_device *adev = (struct amdgpu_device *)handle; + + adev_to_drm(adev)->max_vblank_count = 0; + + adev_to_drm(adev)->mode_config.funcs = &amdgpu_vkms_mode_funcs; + + adev_to_drm(adev)->mode_config.max_width = XRES_MAX; + adev_to_drm(adev)->mode_config.max_height = YRES_MAX; + + adev_to_drm(adev)->mode_config.preferred_depth = 24; + adev_to_drm(adev)->mode_config.prefer_shadow = 1; + + adev_to_drm(adev)->mode_config.fb_base = adev->gmc.aper_base; + + r = amdgpu_display_modeset_create_props(adev); + if (r) + return r; + + adev->amdgpu_vkms_output = kcalloc(adev->mode_info.num_crtc, sizeof(struct amdgpu_vkms_output), GFP_KERNEL); + if (!adev->amdgpu_vkms_output) + return -ENOMEM; + + /* allocate crtcs, encoders, connectors */ + for (i = 0; i < adev->mode_info.num_crtc; i++) { + r = amdgpu_vkms_output_init(adev_to_drm(adev), &adev->amdgpu_vkms_output[i], i); + if (r) + return r; + } + + drm_kms_helper_poll_init(adev_to_drm(adev)); + + adev->mode_info.mode_config_initialized = true; + return 0; +} + +static int amdgpu_vkms_sw_fini(void *handle) +{ + struct amdgpu_device *adev = (struct amdgpu_device *)handle; + int i = 0; + + for (i = 0; i < adev->mode_info.num_crtc; i++) + if (adev->mode_info.crtcs[i]) + hrtimer_cancel(&adev->mode_info.crtcs[i]->vblank_timer); + + kfree(adev->mode_info.bios_hardcoded_edid); + kfree(adev->amdgpu_vkms_output); + + drm_kms_helper_poll_fini(adev_to_drm(adev)); + + adev->mode_info.mode_config_initialized = false; + return 0; +} + +static int amdgpu_vkms_hw_init(void *handle) +{ + struct amdgpu_device *adev = (struct amdgpu_device *)handle; + + switch (adev->asic_type) { +#ifdef CONFIG_DRM_AMDGPU_SI + case CHIP_TAHITI: + case CHIP_PITCAIRN: + case CHIP_VERDE: + case CHIP_OLAND: + dce_v6_0_disable_dce(adev); + break; +#endif +#ifdef CONFIG_DRM_AMDGPU_CIK + case CHIP_BONAIRE: + case CHIP_HAWAII: + case CHIP_KAVERI: + case CHIP_KABINI: + case CHIP_MULLINS: + dce_v8_0_disable_dce(adev); + break; +#endif + case CHIP_FIJI: + case CHIP_TONGA: + dce_v10_0_disable_dce(adev); + break; + case CHIP_CARRIZO: + case CHIP_STONEY: + case CHIP_POLARIS10: + case CHIP_POLARIS11: + case CHIP_VEGAM: + dce_v11_0_disable_dce(adev); + break; + case CHIP_TOPAZ: +#ifdef CONFIG_DRM_AMDGPU_SI + case CHIP_HAINAN: +#endif + /* no DCE */ + break; + default: + break; + } + return 0; +} + +static int amdgpu_vkms_hw_fini(void *handle) +{ + return 0; +} + +static int amdgpu_vkms_suspend(void *handle) +{ + struct amdgpu_device *adev = (struct amdgpu_device *)handle; + int r; + + r = drm_mode_config_helper_suspend(adev_to_drm(adev)); + if (r) + return r; + return amdgpu_vkms_hw_fini(handle); +} + +static int amdgpu_vkms_resume(void *handle) +{ + struct amdgpu_device *adev = (struct amdgpu_device *)handle; + int r; + + r = amdgpu_vkms_hw_init(handle); + if (r) + return r; + return drm_mode_config_helper_resume(adev_to_drm(adev)); +} + +static bool amdgpu_vkms_is_idle(void *handle) +{ + return true; +} + +static int amdgpu_vkms_wait_for_idle(void *handle) +{ + return 0; +} + +static int amdgpu_vkms_soft_reset(void *handle) +{ + return 0; +} + +static int amdgpu_vkms_set_clockgating_state(void *handle, + enum amd_clockgating_state state) +{ + return 0; +} + +static int amdgpu_vkms_set_powergating_state(void *handle, + enum amd_powergating_state state) +{ + return 0; +} + +static const struct amd_ip_funcs amdgpu_vkms_ip_funcs = { + .name = "amdgpu_vkms", + .early_init = NULL, + .late_init = NULL, + .sw_init = amdgpu_vkms_sw_init, + .sw_fini = amdgpu_vkms_sw_fini, + .hw_init = amdgpu_vkms_hw_init, + .hw_fini = amdgpu_vkms_hw_fini, + .suspend = amdgpu_vkms_suspend, + .resume = amdgpu_vkms_resume, + .is_idle = amdgpu_vkms_is_idle, + .wait_for_idle = amdgpu_vkms_wait_for_idle, + .soft_reset = amdgpu_vkms_soft_reset, + .set_clockgating_state = amdgpu_vkms_set_clockgating_state, + .set_powergating_state = amdgpu_vkms_set_powergating_state, +}; + +const struct amdgpu_ip_block_version amdgpu_vkms_ip_block = +{ + .type = AMD_IP_BLOCK_TYPE_DCE, + .major = 1, + .minor = 0, + .rev = 0, + .funcs = &amdgpu_vkms_ip_funcs, +}; + diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_vkms.h b/drivers/gpu/drm/amd/amdgpu/amdgpu_vkms.h new file mode 100644 index 000000000000..97f1b79c0724 --- /dev/null +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_vkms.h @@ -0,0 +1,26 @@ +/* SPDX-License-Identifier: GPL-2.0+ */ + +#ifndef _AMDGPU_VKMS_H_ +#define _AMDGPU_VKMS_H_ + +#define XRES_DEF 1024 +#define YRES_DEF 768 + +#define XRES_MAX 16384 +#define YRES_MAX 16384 + +#define drm_crtc_to_amdgpu_vkms_output(target) \ + container_of(target, struct amdgpu_vkms_output, crtc) + +extern const struct amdgpu_ip_block_version amdgpu_vkms_ip_block; + +struct amdgpu_vkms_output { + struct drm_crtc crtc; + struct drm_encoder encoder; + struct drm_connector connector; + struct hrtimer vblank_hrtimer; + ktime_t period_ns; + struct drm_pending_vblank_event *event; +}; + +#endif /* _AMDGPU_VKMS_H_ */ diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_vm.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_vm.c index 9acee4a5b2ba..6b15cad78de9 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_vm.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_vm.c @@ -25,18 +25,22 @@ * Alex Deucher * Jerome Glisse */ + #include <linux/dma-fence-array.h> #include <linux/interval_tree_generic.h> #include <linux/idr.h> #include <linux/dma-buf.h> #include <drm/amdgpu_drm.h> +#include <drm/drm_drv.h> #include "amdgpu.h" #include "amdgpu_trace.h" #include "amdgpu_amdkfd.h" #include "amdgpu_gmc.h" #include "amdgpu_xgmi.h" #include "amdgpu_dma_buf.h" +#include "amdgpu_res_cursor.h" +#include "kfd_svm.h" /** * DOC: GPUVM @@ -84,6 +88,46 @@ struct amdgpu_prt_cb { struct dma_fence_cb cb; }; +/** + * amdgpu_vm_set_pasid - manage pasid and vm ptr mapping + * + * @adev: amdgpu_device pointer + * @vm: amdgpu_vm pointer + * @pasid: the pasid the VM is using on this GPU + * + * Set the pasid this VM is using on this GPU, can also be used to remove the + * pasid by passing in zero. + * + */ +int amdgpu_vm_set_pasid(struct amdgpu_device *adev, struct amdgpu_vm *vm, + u32 pasid) +{ + int r; + + if (vm->pasid == pasid) + return 0; + + if (vm->pasid) { + r = xa_err(xa_erase_irq(&adev->vm_manager.pasids, vm->pasid)); + if (r < 0) + return r; + + vm->pasid = 0; + } + + if (pasid) { + r = xa_err(xa_store_irq(&adev->vm_manager.pasids, pasid, vm, + GFP_KERNEL)); + if (r < 0) + return r; + + vm->pasid = pasid; + } + + + return 0; +} + /* * vm eviction_lock can be taken in MMU notifiers. Make sure no reclaim-FS * happens while holding this lock anywhere to prevent deadlocks when @@ -328,7 +372,7 @@ static void amdgpu_vm_bo_base_init(struct amdgpu_vm_bo_base *base, base->next = bo->vm_bo; bo->vm_bo = base; - if (bo->tbo.base.resv != vm->root.base.bo->tbo.base.resv) + if (bo->tbo.base.resv != vm->root.bo->tbo.base.resv) return; vm->bulk_moveable = false; @@ -338,7 +382,7 @@ static void amdgpu_vm_bo_base_init(struct amdgpu_vm_bo_base *base, amdgpu_vm_bo_idle(base); if (bo->preferred_domains & - amdgpu_mem_type_to_domain(bo->tbo.mem.mem_type)) + amdgpu_mem_type_to_domain(bo->tbo.resource->mem_type)) return; /* @@ -357,14 +401,14 @@ static void amdgpu_vm_bo_base_init(struct amdgpu_vm_bo_base *base, * Helper to get the parent entry for the child page table. NULL if we are at * the root page directory. */ -static struct amdgpu_vm_pt *amdgpu_vm_pt_parent(struct amdgpu_vm_pt *pt) +static struct amdgpu_vm_bo_base *amdgpu_vm_pt_parent(struct amdgpu_vm_bo_base *pt) { - struct amdgpu_bo *parent = pt->base.bo->parent; + struct amdgpu_bo *parent = pt->bo->parent; if (!parent) return NULL; - return container_of(parent->vm_bo, struct amdgpu_vm_pt, base); + return parent->vm_bo; } /* @@ -372,8 +416,8 @@ static struct amdgpu_vm_pt *amdgpu_vm_pt_parent(struct amdgpu_vm_pt *pt) */ struct amdgpu_vm_pt_cursor { uint64_t pfn; - struct amdgpu_vm_pt *parent; - struct amdgpu_vm_pt *entry; + struct amdgpu_vm_bo_base *parent; + struct amdgpu_vm_bo_base *entry; unsigned level; }; @@ -412,17 +456,17 @@ static bool amdgpu_vm_pt_descendant(struct amdgpu_device *adev, { unsigned mask, shift, idx; - if (!cursor->entry->entries) + if ((cursor->level == AMDGPU_VM_PTB) || !cursor->entry || + !cursor->entry->bo) return false; - BUG_ON(!cursor->entry->base.bo); mask = amdgpu_vm_entries_mask(adev, cursor->level); shift = amdgpu_vm_level_shift(adev, cursor->level); ++cursor->level; idx = (cursor->pfn >> shift) & mask; cursor->parent = cursor->entry; - cursor->entry = &cursor->entry->entries[idx]; + cursor->entry = &to_amdgpu_bo_vm(cursor->entry->bo)->entries[idx]; return true; } @@ -449,7 +493,7 @@ static bool amdgpu_vm_pt_sibling(struct amdgpu_device *adev, shift = amdgpu_vm_level_shift(adev, cursor->level - 1); num_entries = amdgpu_vm_num_entries(adev, cursor->level - 1); - if (cursor->entry == &cursor->parent->entries[num_entries - 1]) + if (cursor->entry == &to_amdgpu_bo_vm(cursor->parent->bo)->entries[num_entries - 1]) return false; cursor->pfn += 1ULL << shift; @@ -535,7 +579,7 @@ static void amdgpu_vm_pt_first_dfs(struct amdgpu_device *adev, * True when the search should continue, false otherwise. */ static bool amdgpu_vm_pt_continue_dfs(struct amdgpu_vm_pt_cursor *start, - struct amdgpu_vm_pt *entry) + struct amdgpu_vm_bo_base *entry) { return entry && (!start || entry != start->entry); } @@ -586,7 +630,7 @@ void amdgpu_vm_get_pd_bo(struct amdgpu_vm *vm, struct amdgpu_bo_list_entry *entry) { entry->priority = 0; - entry->tv.bo = &vm->root.base.bo->tbo; + entry->tv.bo = &vm->root.bo->tbo; /* Two for VM updates, one for TTM and one for the CS job */ entry->tv.num_shared = 4; entry->user_pages = NULL; @@ -618,7 +662,7 @@ void amdgpu_vm_del_from_lru_notify(struct ttm_buffer_object *bo) for (bo_base = abo->vm_bo; bo_base; bo_base = bo_base->next) { struct amdgpu_vm *vm = bo_base->vm; - if (abo->tbo.base.resv == vm->root.base.bo->tbo.base.resv) + if (abo->tbo.base.resv == vm->root.bo->tbo.base.resv) vm->bulk_moveable = false; } @@ -649,15 +693,16 @@ void amdgpu_vm_move_to_lru_tail(struct amdgpu_device *adev, spin_lock(&adev->mman.bdev.lru_lock); list_for_each_entry(bo_base, &vm->idle, vm_status) { struct amdgpu_bo *bo = bo_base->bo; + struct amdgpu_bo *shadow = amdgpu_bo_shadowed(bo); if (!bo->parent) continue; - ttm_bo_move_to_lru_tail(&bo->tbo, &bo->tbo.mem, + ttm_bo_move_to_lru_tail(&bo->tbo, bo->tbo.resource, &vm->lru_bulk_move); - if (bo->shadow) - ttm_bo_move_to_lru_tail(&bo->shadow->tbo, - &bo->shadow->tbo.mem, + if (shadow) + ttm_bo_move_to_lru_tail(&shadow->tbo, + shadow->tbo.resource, &vm->lru_bulk_move); } spin_unlock(&adev->mman.bdev.lru_lock); @@ -689,15 +734,21 @@ int amdgpu_vm_validate_pt_bos(struct amdgpu_device *adev, struct amdgpu_vm *vm, list_for_each_entry_safe(bo_base, tmp, &vm->evicted, vm_status) { struct amdgpu_bo *bo = bo_base->bo; + struct amdgpu_bo *shadow = amdgpu_bo_shadowed(bo); r = validate(param, bo); if (r) return r; + if (shadow) { + r = validate(param, shadow); + if (r) + return r; + } if (bo->tbo.type != ttm_bo_type_kernel) { amdgpu_vm_bo_moved(bo_base); } else { - vm->update_funcs->map_table(bo); + vm->update_funcs->map_table(to_amdgpu_bo_vm(bo)); amdgpu_vm_bo_relocated(bo_base); } } @@ -729,7 +780,7 @@ bool amdgpu_vm_ready(struct amdgpu_vm *vm) * * @adev: amdgpu_device pointer * @vm: VM to clear BO from - * @bo: BO to clear + * @vmbo: BO to clear * @immediate: use an immediate update * * Root PD needs to be reserved when calling this. @@ -739,13 +790,14 @@ bool amdgpu_vm_ready(struct amdgpu_vm *vm) */ static int amdgpu_vm_clear_bo(struct amdgpu_device *adev, struct amdgpu_vm *vm, - struct amdgpu_bo *bo, + struct amdgpu_bo_vm *vmbo, bool immediate) { struct ttm_operation_ctx ctx = { true, false }; unsigned level = adev->vm_manager.root_level; struct amdgpu_vm_update_params params; - struct amdgpu_bo *ancestor = bo; + struct amdgpu_bo *ancestor = &vmbo->bo; + struct amdgpu_bo *bo = &vmbo->bo; unsigned entries, ats_entries; uint64_t addr; int r; @@ -769,11 +821,11 @@ static int amdgpu_vm_clear_bo(struct amdgpu_device *adev, entries -= ats_entries; } else { - struct amdgpu_vm_pt *pt; + struct amdgpu_vm_bo_base *pt; - pt = container_of(ancestor->vm_bo, struct amdgpu_vm_pt, base); + pt = ancestor->vm_bo; ats_entries = amdgpu_vm_num_ats_entries(adev); - if ((pt - vm->root.entries) >= ats_entries) { + if ((pt - to_amdgpu_bo_vm(vm->root.bo)->entries) >= ats_entries) { ats_entries = 0; } else { ats_entries = entries; @@ -785,14 +837,15 @@ static int amdgpu_vm_clear_bo(struct amdgpu_device *adev, if (r) return r; - if (bo->shadow) { - r = ttm_bo_validate(&bo->shadow->tbo, &bo->shadow->placement, - &ctx); + if (vmbo->shadow) { + struct amdgpu_bo *shadow = vmbo->shadow; + + r = ttm_bo_validate(&shadow->tbo, &shadow->placement, &ctx); if (r) return r; } - r = vm->update_funcs->map_table(bo); + r = vm->update_funcs->map_table(vmbo); if (r) return r; @@ -816,7 +869,7 @@ static int amdgpu_vm_clear_bo(struct amdgpu_device *adev, amdgpu_gmc_get_vm_pde(adev, level, &value, &flags); } - r = vm->update_funcs->update(¶ms, bo, addr, 0, ats_entries, + r = vm->update_funcs->update(¶ms, vmbo, addr, 0, ats_entries, value, flags); if (r) return r; @@ -839,7 +892,7 @@ static int amdgpu_vm_clear_bo(struct amdgpu_device *adev, } } - r = vm->update_funcs->update(¶ms, bo, addr, 0, entries, + r = vm->update_funcs->update(¶ms, vmbo, addr, 0, entries, value, flags); if (r) return r; @@ -849,35 +902,85 @@ static int amdgpu_vm_clear_bo(struct amdgpu_device *adev, } /** - * amdgpu_vm_bo_param - fill in parameters for PD/PT allocation + * amdgpu_vm_pt_create - create bo for PD/PT * * @adev: amdgpu_device pointer * @vm: requesting vm * @level: the page table level * @immediate: use a immediate update - * @bp: resulting BO allocation parameters + * @vmbo: pointer to the buffer object pointer */ -static void amdgpu_vm_bo_param(struct amdgpu_device *adev, struct amdgpu_vm *vm, +static int amdgpu_vm_pt_create(struct amdgpu_device *adev, + struct amdgpu_vm *vm, int level, bool immediate, - struct amdgpu_bo_param *bp) + struct amdgpu_bo_vm **vmbo) { - memset(bp, 0, sizeof(*bp)); + struct amdgpu_bo_param bp; + struct amdgpu_bo *bo; + struct dma_resv *resv; + unsigned int num_entries; + int r; + + memset(&bp, 0, sizeof(bp)); - bp->size = amdgpu_vm_bo_size(adev, level); - bp->byte_align = AMDGPU_GPU_PAGE_SIZE; - bp->domain = AMDGPU_GEM_DOMAIN_VRAM; - bp->domain = amdgpu_bo_get_preferred_pin_domain(adev, bp->domain); - bp->flags = AMDGPU_GEM_CREATE_VRAM_CONTIGUOUS | + bp.size = amdgpu_vm_bo_size(adev, level); + bp.byte_align = AMDGPU_GPU_PAGE_SIZE; + bp.domain = AMDGPU_GEM_DOMAIN_VRAM; + bp.domain = amdgpu_bo_get_preferred_domain(adev, bp.domain); + bp.flags = AMDGPU_GEM_CREATE_VRAM_CONTIGUOUS | AMDGPU_GEM_CREATE_CPU_GTT_USWC; - bp->bo_ptr_size = sizeof(struct amdgpu_bo); + + if (level < AMDGPU_VM_PTB) + num_entries = amdgpu_vm_num_entries(adev, level); + else + num_entries = 0; + + bp.bo_ptr_size = struct_size((*vmbo), entries, num_entries); + if (vm->use_cpu_for_update) - bp->flags |= AMDGPU_GEM_CREATE_CPU_ACCESS_REQUIRED; - else if (!vm->root.base.bo || vm->root.base.bo->shadow) - bp->flags |= AMDGPU_GEM_CREATE_SHADOW; - bp->type = ttm_bo_type_kernel; - bp->no_wait_gpu = immediate; - if (vm->root.base.bo) - bp->resv = vm->root.base.bo->tbo.base.resv; + bp.flags |= AMDGPU_GEM_CREATE_CPU_ACCESS_REQUIRED; + + bp.type = ttm_bo_type_kernel; + bp.no_wait_gpu = immediate; + if (vm->root.bo) + bp.resv = vm->root.bo->tbo.base.resv; + + r = amdgpu_bo_create_vm(adev, &bp, vmbo); + if (r) + return r; + + bo = &(*vmbo)->bo; + if (vm->is_compute_context || (adev->flags & AMD_IS_APU)) { + (*vmbo)->shadow = NULL; + return 0; + } + + if (!bp.resv) + WARN_ON(dma_resv_lock(bo->tbo.base.resv, + NULL)); + resv = bp.resv; + memset(&bp, 0, sizeof(bp)); + bp.size = amdgpu_vm_bo_size(adev, level); + bp.domain = AMDGPU_GEM_DOMAIN_GTT; + bp.flags = AMDGPU_GEM_CREATE_CPU_GTT_USWC; + bp.type = ttm_bo_type_kernel; + bp.resv = bo->tbo.base.resv; + bp.bo_ptr_size = sizeof(struct amdgpu_bo); + + r = amdgpu_bo_create(adev, &bp, &(*vmbo)->shadow); + + if (!resv) + dma_resv_unlock(bo->tbo.base.resv); + + if (r) { + amdgpu_bo_unref(&bo); + return r; + } + + (*vmbo)->shadow->parent = amdgpu_bo_ref(bo); + amdgpu_bo_add_to_shadow_list(*vmbo); + + return 0; } /** @@ -899,37 +1002,24 @@ static int amdgpu_vm_alloc_pts(struct amdgpu_device *adev, struct amdgpu_vm_pt_cursor *cursor, bool immediate) { - struct amdgpu_vm_pt *entry = cursor->entry; - struct amdgpu_bo_param bp; - struct amdgpu_bo *pt; + struct amdgpu_vm_bo_base *entry = cursor->entry; + struct amdgpu_bo *pt_bo; + struct amdgpu_bo_vm *pt; int r; - if (cursor->level < AMDGPU_VM_PTB && !entry->entries) { - unsigned num_entries; - - num_entries = amdgpu_vm_num_entries(adev, cursor->level); - entry->entries = kvmalloc_array(num_entries, - sizeof(*entry->entries), - GFP_KERNEL | __GFP_ZERO); - if (!entry->entries) - return -ENOMEM; - } - - if (entry->base.bo) + if (entry->bo) return 0; - amdgpu_vm_bo_param(adev, vm, cursor->level, immediate, &bp); - - r = amdgpu_bo_create(adev, &bp, &pt); + r = amdgpu_vm_pt_create(adev, vm, cursor->level, immediate, &pt); if (r) return r; /* Keep a reference to the root directory to avoid * freeing them up in the wrong order. */ - pt->parent = amdgpu_bo_ref(cursor->parent->base.bo); - amdgpu_vm_bo_base_init(&entry->base, vm, pt); - + pt_bo = &pt->bo; + pt_bo->parent = amdgpu_bo_ref(cursor->parent->bo); + amdgpu_vm_bo_base_init(entry, vm, pt_bo); r = amdgpu_vm_clear_bo(adev, vm, pt, immediate); if (r) goto error_free_pt; @@ -938,7 +1028,7 @@ static int amdgpu_vm_alloc_pts(struct amdgpu_device *adev, error_free_pt: amdgpu_bo_unref(&pt->shadow); - amdgpu_bo_unref(&pt); + amdgpu_bo_unref(&pt_bo); return r; } @@ -947,16 +1037,17 @@ error_free_pt: * * @entry: PDE to free */ -static void amdgpu_vm_free_table(struct amdgpu_vm_pt *entry) +static void amdgpu_vm_free_table(struct amdgpu_vm_bo_base *entry) { - if (entry->base.bo) { - entry->base.bo->vm_bo = NULL; - list_del(&entry->base.vm_status); - amdgpu_bo_unref(&entry->base.bo->shadow); - amdgpu_bo_unref(&entry->base.bo); - } - kvfree(entry->entries); - entry->entries = NULL; + struct amdgpu_bo *shadow; + + if (!entry->bo) + return; + shadow = amdgpu_bo_shadowed(entry->bo); + entry->bo->vm_bo = NULL; + list_del(&entry->vm_status); + amdgpu_bo_unref(&shadow); + amdgpu_bo_unref(&entry->bo); } /** @@ -973,7 +1064,7 @@ static void amdgpu_vm_free_pts(struct amdgpu_device *adev, struct amdgpu_vm_pt_cursor *start) { struct amdgpu_vm_pt_cursor cursor; - struct amdgpu_vm_pt *entry; + struct amdgpu_vm_bo_base *entry; vm->bulk_moveable = false; @@ -1127,7 +1218,7 @@ int amdgpu_vm_flush(struct amdgpu_ring *ring, struct amdgpu_job *job, amdgpu_gmc_emit_pasid_mapping(ring, job->vmid, job->pasid); if (vm_flush_needed || pasid_mapping_needed) { - r = amdgpu_fence_emit(ring, &fence, 0); + r = amdgpu_fence_emit(ring, &fence, NULL, 0); if (r) return r; } @@ -1241,10 +1332,10 @@ uint64_t amdgpu_vm_map_gart(const dma_addr_t *pages_addr, uint64_t addr) */ static int amdgpu_vm_update_pde(struct amdgpu_vm_update_params *params, struct amdgpu_vm *vm, - struct amdgpu_vm_pt *entry) + struct amdgpu_vm_bo_base *entry) { - struct amdgpu_vm_pt *parent = amdgpu_vm_pt_parent(entry); - struct amdgpu_bo *bo = parent->base.bo, *pbo; + struct amdgpu_vm_bo_base *parent = amdgpu_vm_pt_parent(entry); + struct amdgpu_bo *bo = parent->bo, *pbo; uint64_t pde, pt, flags; unsigned level; @@ -1252,9 +1343,10 @@ static int amdgpu_vm_update_pde(struct amdgpu_vm_update_params *params, pbo = pbo->parent; level += params->adev->vm_manager.root_level; - amdgpu_gmc_get_pde_for_bo(entry->base.bo, level, &pt, &flags); - pde = (entry - parent->entries) * 8; - return vm->update_funcs->update(params, bo, pde, pt, 1, 0, flags); + amdgpu_gmc_get_pde_for_bo(entry->bo, level, &pt, &flags); + pde = (entry - to_amdgpu_bo_vm(parent->bo)->entries) * 8; + return vm->update_funcs->update(params, to_amdgpu_bo_vm(bo), pde, pt, + 1, 0, flags); } /** @@ -1269,11 +1361,11 @@ static void amdgpu_vm_invalidate_pds(struct amdgpu_device *adev, struct amdgpu_vm *vm) { struct amdgpu_vm_pt_cursor cursor; - struct amdgpu_vm_pt *entry; + struct amdgpu_vm_bo_base *entry; for_each_amdgpu_vm_pt_dfs_safe(adev, vm, NULL, cursor, entry) - if (entry->base.bo && !entry->base.moved) - amdgpu_vm_bo_relocated(&entry->base); + if (entry->bo && !entry->moved) + amdgpu_vm_bo_relocated(entry); } /** @@ -1307,11 +1399,12 @@ int amdgpu_vm_update_pdes(struct amdgpu_device *adev, return r; while (!list_empty(&vm->relocated)) { - struct amdgpu_vm_pt *entry; + struct amdgpu_vm_bo_base *entry; - entry = list_first_entry(&vm->relocated, struct amdgpu_vm_pt, - base.vm_status); - amdgpu_vm_bo_idle(&entry->base); + entry = list_first_entry(&vm->relocated, + struct amdgpu_vm_bo_base, + vm_status); + amdgpu_vm_bo_idle(entry); r = amdgpu_vm_update_pde(¶ms, vm, entry); if (r) @@ -1334,9 +1427,9 @@ error: * Make sure to set the right flags for the PTEs at the desired level. */ static void amdgpu_vm_update_flags(struct amdgpu_vm_update_params *params, - struct amdgpu_bo *bo, unsigned level, + struct amdgpu_bo_vm *pt, unsigned int level, uint64_t pe, uint64_t addr, - unsigned count, uint32_t incr, + unsigned int count, uint32_t incr, uint64_t flags) { @@ -1352,7 +1445,7 @@ static void amdgpu_vm_update_flags(struct amdgpu_vm_update_params *params, flags |= AMDGPU_PTE_EXECUTABLE; } - params->vm->update_funcs->update(params, bo, pe, addr, count, incr, + params->vm->update_funcs->update(params, pt, pe, addr, count, incr, flags); } @@ -1491,7 +1584,7 @@ static int amdgpu_vm_update_ptes(struct amdgpu_vm_update_params *params, continue; } - pt = cursor.entry->base.bo; + pt = cursor.entry->bo; if (!pt) { /* We need all PDs and PTs for mapping something, */ if (flags & AMDGPU_PTE_VALID) @@ -1503,7 +1596,7 @@ static int amdgpu_vm_update_ptes(struct amdgpu_vm_update_params *params, if (!amdgpu_vm_pt_ancestor(&cursor)) return -EINVAL; - pt = cursor.entry->base.bo; + pt = cursor.entry->bo; shift = parent_shift; frag_end = max(frag_end, ALIGN(frag_start + 1, 1ULL << shift)); @@ -1532,9 +1625,9 @@ static int amdgpu_vm_update_ptes(struct amdgpu_vm_update_params *params, nptes, dst, incr, upd_flags, vm->task_info.pid, vm->immediate.fence_context); - amdgpu_vm_update_flags(params, pt, cursor.level, - pe_start, dst, nptes, incr, - upd_flags); + amdgpu_vm_update_flags(params, to_amdgpu_bo_vm(pt), + cursor.level, pe_start, dst, + nptes, incr, upd_flags); pe_start += nptes * 8; dst += nptes * incr; @@ -1557,7 +1650,11 @@ static int amdgpu_vm_update_ptes(struct amdgpu_vm_update_params *params, * completely covered by the range and so potentially still in use. */ while (cursor.pfn < frag_start) { - amdgpu_vm_free_pts(adev, params->vm, &cursor); + /* Make sure previous mapping is freed */ + if (cursor.entry->bo) { + params->table_freed = true; + amdgpu_vm_free_pts(adev, params->vm, &cursor); + } amdgpu_vm_pt_next(adev, &cursor); } @@ -1583,29 +1680,34 @@ static int amdgpu_vm_update_ptes(struct amdgpu_vm_update_params *params, * @last: last mapped entry * @flags: flags for the entries * @offset: offset into nodes and pages_addr - * @nodes: array of drm_mm_nodes with the MC addresses + * @res: ttm_resource to map * @pages_addr: DMA addresses to use for mapping * @fence: optional resulting fence + * @table_freed: return true if page table is freed * * Fill in the page table entries between @start and @last. * * Returns: * 0 for success, -EINVAL for failure. */ -static int amdgpu_vm_bo_update_mapping(struct amdgpu_device *adev, - struct amdgpu_device *bo_adev, - struct amdgpu_vm *vm, bool immediate, - bool unlocked, struct dma_resv *resv, - uint64_t start, uint64_t last, - uint64_t flags, uint64_t offset, - struct drm_mm_node *nodes, - dma_addr_t *pages_addr, - struct dma_fence **fence) +int amdgpu_vm_bo_update_mapping(struct amdgpu_device *adev, + struct amdgpu_device *bo_adev, + struct amdgpu_vm *vm, bool immediate, + bool unlocked, struct dma_resv *resv, + uint64_t start, uint64_t last, + uint64_t flags, uint64_t offset, + struct ttm_resource *res, + dma_addr_t *pages_addr, + struct dma_fence **fence, + bool *table_freed) { struct amdgpu_vm_update_params params; + struct amdgpu_res_cursor cursor; enum amdgpu_sync_mode sync_mode; - uint64_t pfn; - int r; + int r, idx; + + if (!drm_dev_enter(&adev->ddev, &idx)) + return -ENODEV; memset(¶ms, 0, sizeof(params)); params.adev = adev; @@ -1622,14 +1724,6 @@ static int amdgpu_vm_bo_update_mapping(struct amdgpu_device *adev, else sync_mode = AMDGPU_SYNC_EXPLICIT; - pfn = offset >> PAGE_SHIFT; - if (nodes) { - while (pfn >= nodes->size) { - pfn -= nodes->size; - ++nodes; - } - } - amdgpu_vm_eviction_lock(vm); if (vm->evicting) { r = -EBUSY; @@ -1639,7 +1733,7 @@ static int amdgpu_vm_bo_update_mapping(struct amdgpu_device *adev, if (!unlocked && !dma_fence_is_signaled(vm->last_unlocked)) { struct dma_fence *tmp = dma_fence_get_stub(); - amdgpu_bo_fence(vm->root.base.bo, vm->last_unlocked, true); + amdgpu_bo_fence(vm->root.bo, vm->last_unlocked, true); swap(vm->last_unlocked, tmp); dma_fence_put(tmp); } @@ -1648,23 +1742,17 @@ static int amdgpu_vm_bo_update_mapping(struct amdgpu_device *adev, if (r) goto error_unlock; - do { + amdgpu_res_first(pages_addr ? NULL : res, offset, + (last - start + 1) * AMDGPU_GPU_PAGE_SIZE, &cursor); + while (cursor.remaining) { uint64_t tmp, num_entries, addr; - - num_entries = last - start + 1; - if (nodes) { - addr = nodes->start << PAGE_SHIFT; - num_entries = min((nodes->size - pfn) * - AMDGPU_GPU_PAGES_IN_CPU_PAGE, num_entries); - } else { - addr = 0; - } - + num_entries = cursor.size >> AMDGPU_GPU_PAGE_SHIFT; if (pages_addr) { bool contiguous = true; if (num_entries > AMDGPU_GPU_PAGES_IN_CPU_PAGE) { + uint64_t pfn = cursor.start >> PAGE_SHIFT; uint64_t count; contiguous = pages_addr[pfn + 1] == @@ -1684,16 +1772,18 @@ static int amdgpu_vm_bo_update_mapping(struct amdgpu_device *adev, } if (!contiguous) { - addr = pfn << PAGE_SHIFT; + addr = cursor.start; params.pages_addr = pages_addr; } else { - addr = pages_addr[pfn]; + addr = pages_addr[cursor.start >> PAGE_SHIFT]; params.pages_addr = NULL; } } else if (flags & (AMDGPU_PTE_VALID | AMDGPU_PTE_PRT)) { - addr += bo_adev->vm_manager.vram_base_offset; - addr += pfn << PAGE_SHIFT; + addr = bo_adev->vm_manager.vram_base_offset + + cursor.start; + } else { + addr = 0; } tmp = start + num_entries; @@ -1701,28 +1791,72 @@ static int amdgpu_vm_bo_update_mapping(struct amdgpu_device *adev, if (r) goto error_unlock; - pfn += num_entries / AMDGPU_GPU_PAGES_IN_CPU_PAGE; - if (nodes && nodes->size == pfn) { - pfn = 0; - ++nodes; - } + amdgpu_res_next(&cursor, num_entries * AMDGPU_GPU_PAGE_SIZE); start = tmp; - - } while (unlikely(start != last + 1)); + } r = vm->update_funcs->commit(¶ms, fence); + if (table_freed) + *table_freed = *table_freed || params.table_freed; + error_unlock: amdgpu_vm_eviction_unlock(vm); + drm_dev_exit(idx); return r; } +void amdgpu_vm_get_memory(struct amdgpu_vm *vm, uint64_t *vram_mem, + uint64_t *gtt_mem, uint64_t *cpu_mem) +{ + struct amdgpu_bo_va *bo_va, *tmp; + + list_for_each_entry_safe(bo_va, tmp, &vm->idle, base.vm_status) { + if (!bo_va->base.bo) + continue; + amdgpu_bo_get_memory(bo_va->base.bo, vram_mem, + gtt_mem, cpu_mem); + } + list_for_each_entry_safe(bo_va, tmp, &vm->evicted, base.vm_status) { + if (!bo_va->base.bo) + continue; + amdgpu_bo_get_memory(bo_va->base.bo, vram_mem, + gtt_mem, cpu_mem); + } + list_for_each_entry_safe(bo_va, tmp, &vm->relocated, base.vm_status) { + if (!bo_va->base.bo) + continue; + amdgpu_bo_get_memory(bo_va->base.bo, vram_mem, + gtt_mem, cpu_mem); + } + list_for_each_entry_safe(bo_va, tmp, &vm->moved, base.vm_status) { + if (!bo_va->base.bo) + continue; + amdgpu_bo_get_memory(bo_va->base.bo, vram_mem, + gtt_mem, cpu_mem); + } + spin_lock(&vm->invalidated_lock); + list_for_each_entry_safe(bo_va, tmp, &vm->invalidated, base.vm_status) { + if (!bo_va->base.bo) + continue; + amdgpu_bo_get_memory(bo_va->base.bo, vram_mem, + gtt_mem, cpu_mem); + } + list_for_each_entry_safe(bo_va, tmp, &vm->done, base.vm_status) { + if (!bo_va->base.bo) + continue; + amdgpu_bo_get_memory(bo_va->base.bo, vram_mem, + gtt_mem, cpu_mem); + } + spin_unlock(&vm->invalidated_lock); +} /** * amdgpu_vm_bo_update - update all BO mappings in the vm page table * * @adev: amdgpu_device pointer * @bo_va: requested BO and VM object * @clear: if true clear the entries + * @table_freed: return true if page table is freed * * Fill in the page table entries for @bo_va. * @@ -1730,14 +1864,13 @@ error_unlock: * 0 for success, -EINVAL for failure. */ int amdgpu_vm_bo_update(struct amdgpu_device *adev, struct amdgpu_bo_va *bo_va, - bool clear) + bool clear, bool *table_freed) { struct amdgpu_bo *bo = bo_va->base.bo; struct amdgpu_vm *vm = bo_va->base.vm; struct amdgpu_bo_va_mapping *mapping; dma_addr_t *pages_addr = NULL; struct ttm_resource *mem; - struct drm_mm_node *nodes; struct dma_fence **last_update; struct dma_resv *resv; uint64_t flags; @@ -1746,8 +1879,7 @@ int amdgpu_vm_bo_update(struct amdgpu_device *adev, struct amdgpu_bo_va *bo_va, if (clear || !bo) { mem = NULL; - nodes = NULL; - resv = vm->root.base.bo->tbo.base.resv; + resv = vm->root.bo->tbo.base.resv; } else { struct drm_gem_object *obj = &bo->tbo.base; @@ -1757,12 +1889,12 @@ int amdgpu_vm_bo_update(struct amdgpu_device *adev, struct amdgpu_bo_va *bo_va, struct drm_gem_object *gobj = dma_buf->priv; struct amdgpu_bo *abo = gem_to_amdgpu_bo(gobj); - if (abo->tbo.mem.mem_type == TTM_PL_VRAM) + if (abo->tbo.resource->mem_type == TTM_PL_VRAM) bo = gem_to_amdgpu_bo(gobj); } - mem = &bo->tbo.mem; - nodes = mem->mm_node; - if (mem->mem_type == TTM_PL_TT) + mem = bo->tbo.resource; + if (mem->mem_type == TTM_PL_TT || + mem->mem_type == AMDGPU_PL_PREEMPT) pages_addr = bo->tbo.ttm->dma_address; } @@ -1778,7 +1910,7 @@ int amdgpu_vm_bo_update(struct amdgpu_device *adev, struct amdgpu_bo_va *bo_va, } if (clear || (bo && bo->tbo.base.resv == - vm->root.base.bo->tbo.base.resv)) + vm->root.bo->tbo.base.resv)) last_update = &vm->last_update; else last_update = &bo_va->last_pt_update; @@ -1810,8 +1942,8 @@ int amdgpu_vm_bo_update(struct amdgpu_device *adev, struct amdgpu_bo_va *bo_va, r = amdgpu_vm_bo_update_mapping(adev, bo_adev, vm, false, false, resv, mapping->start, mapping->last, update_flags, - mapping->offset, nodes, - pages_addr, last_update); + mapping->offset, mem, + pages_addr, last_update, table_freed); if (r) return r; } @@ -1820,8 +1952,8 @@ int amdgpu_vm_bo_update(struct amdgpu_device *adev, struct amdgpu_bo_va *bo_va, * the evicted list so that it gets validated again on the * next command submission. */ - if (bo && bo->tbo.base.resv == vm->root.base.bo->tbo.base.resv) { - uint32_t mem_type = bo->tbo.mem.mem_type; + if (bo && bo->tbo.base.resv == vm->root.bo->tbo.base.resv) { + uint32_t mem_type = bo->tbo.resource->mem_type; if (!(bo->preferred_domains & amdgpu_mem_type_to_domain(mem_type))) @@ -1957,18 +2089,17 @@ static void amdgpu_vm_free_mapping(struct amdgpu_device *adev, */ static void amdgpu_vm_prt_fini(struct amdgpu_device *adev, struct amdgpu_vm *vm) { - struct dma_resv *resv = vm->root.base.bo->tbo.base.resv; + struct dma_resv *resv = vm->root.bo->tbo.base.resv; struct dma_fence *excl, **shared; unsigned i, shared_count; int r; - r = dma_resv_get_fences_rcu(resv, &excl, - &shared_count, &shared); + r = dma_resv_get_fences(resv, &excl, &shared_count, &shared); if (r) { /* Not enough memory to grab the fence list, as last resort * block for all the fences to complete. */ - dma_resv_wait_timeout_rcu(resv, true, false, + dma_resv_wait_timeout(resv, true, false, MAX_SCHEDULE_TIMEOUT); return; } @@ -2004,7 +2135,7 @@ int amdgpu_vm_clear_freed(struct amdgpu_device *adev, struct amdgpu_vm *vm, struct dma_fence **fence) { - struct dma_resv *resv = vm->root.base.bo->tbo.base.resv; + struct dma_resv *resv = vm->root.bo->tbo.base.resv; struct amdgpu_bo_va_mapping *mapping; uint64_t init_pte_value = 0; struct dma_fence *f = NULL; @@ -2022,7 +2153,7 @@ int amdgpu_vm_clear_freed(struct amdgpu_device *adev, r = amdgpu_vm_bo_update_mapping(adev, adev, vm, false, false, resv, mapping->start, mapping->last, init_pte_value, - 0, NULL, NULL, &f); + 0, NULL, NULL, &f, NULL); amdgpu_vm_free_mapping(adev, vm, mapping, f); if (r) { dma_fence_put(f); @@ -2064,7 +2195,7 @@ int amdgpu_vm_handle_moved(struct amdgpu_device *adev, list_for_each_entry_safe(bo_va, tmp, &vm->moved, base.vm_status) { /* Per VM BOs never need to bo cleared in the page tables */ - r = amdgpu_vm_bo_update(adev, bo_va, false); + r = amdgpu_vm_bo_update(adev, bo_va, false, NULL); if (r) return r; } @@ -2083,7 +2214,7 @@ int amdgpu_vm_handle_moved(struct amdgpu_device *adev, else clear = true; - r = amdgpu_vm_bo_update(adev, bo_va, clear); + r = amdgpu_vm_bo_update(adev, bo_va, clear, NULL); if (r) return r; @@ -2163,7 +2294,7 @@ static void amdgpu_vm_bo_insert_map(struct amdgpu_device *adev, if (mapping->flags & AMDGPU_PTE_PRT) amdgpu_vm_prt_get(adev); - if (bo && bo->tbo.base.resv == vm->root.base.bo->tbo.base.resv && + if (bo && bo->tbo.base.resv == vm->root.bo->tbo.base.resv && !bo_va->base.moved) { list_move(&bo_va->base.vm_status, &vm->moved); } @@ -2525,7 +2656,7 @@ void amdgpu_vm_bo_rmv(struct amdgpu_device *adev, struct amdgpu_vm_bo_base **base; if (bo) { - if (bo->tbo.base.resv == vm->root.base.bo->tbo.base.resv) + if (bo->tbo.base.resv == vm->root.bo->tbo.base.resv) vm->bulk_moveable = false; for (base = &bo_va->base.bo->vm_bo; *base; @@ -2580,7 +2711,7 @@ bool amdgpu_vm_evictable(struct amdgpu_bo *bo) return true; /* Don't evict VM page tables while they are busy */ - if (!dma_resv_test_signaled_rcu(bo->tbo.base.resv, true)) + if (!dma_resv_test_signaled(bo->tbo.base.resv, true)) return false; /* Try to block ongoing updates */ @@ -2613,13 +2744,13 @@ void amdgpu_vm_bo_invalidate(struct amdgpu_device *adev, struct amdgpu_vm_bo_base *bo_base; /* shadow bo doesn't have bo base, its validation needs its parent */ - if (bo->parent && bo->parent->shadow == bo) + if (bo->parent && (amdgpu_bo_shadowed(bo->parent) == bo)) bo = bo->parent; for (bo_base = bo->vm_bo; bo_base; bo_base = bo_base->next) { struct amdgpu_vm *vm = bo_base->vm; - if (evicted && bo->tbo.base.resv == vm->root.base.bo->tbo.base.resv) { + if (evicted && bo->tbo.base.resv == vm->root.bo->tbo.base.resv) { amdgpu_vm_bo_evicted(bo_base); continue; } @@ -2630,7 +2761,7 @@ void amdgpu_vm_bo_invalidate(struct amdgpu_device *adev, if (bo->tbo.type == ttm_bo_type_kernel) amdgpu_vm_bo_relocated(bo_base); - else if (bo->tbo.base.resv == vm->root.base.bo->tbo.base.resv) + else if (bo->tbo.base.resv == vm->root.bo->tbo.base.resv) amdgpu_vm_bo_moved(bo_base); else amdgpu_vm_bo_invalidated(bo_base); @@ -2760,8 +2891,8 @@ void amdgpu_vm_adjust_size(struct amdgpu_device *adev, uint32_t min_vm_size, */ long amdgpu_vm_wait_idle(struct amdgpu_vm *vm, long timeout) { - timeout = dma_resv_wait_timeout_rcu(vm->root.base.bo->tbo.base.resv, - true, true, timeout); + timeout = dma_resv_wait_timeout(vm->root.bo->tbo.base.resv, true, + true, timeout); if (timeout <= 0) return timeout; @@ -2773,19 +2904,16 @@ long amdgpu_vm_wait_idle(struct amdgpu_vm *vm, long timeout) * * @adev: amdgpu_device pointer * @vm: requested vm - * @vm_context: Indicates if it GFX or Compute context - * @pasid: Process address space identifier * * Init @vm fields. * * Returns: * 0 for success, error for failure. */ -int amdgpu_vm_init(struct amdgpu_device *adev, struct amdgpu_vm *vm, - int vm_context, u32 pasid) +int amdgpu_vm_init(struct amdgpu_device *adev, struct amdgpu_vm *vm) { - struct amdgpu_bo_param bp; - struct amdgpu_bo *root; + struct amdgpu_bo *root_bo; + struct amdgpu_bo_vm *root; int r, i; vm->va = RB_ROOT_CACHED; @@ -2816,16 +2944,9 @@ int amdgpu_vm_init(struct amdgpu_device *adev, struct amdgpu_vm *vm, vm->pte_support_ats = false; vm->is_compute_context = false; - if (vm_context == AMDGPU_VM_CONTEXT_COMPUTE) { - vm->use_cpu_for_update = !!(adev->vm_manager.vm_update_mode & - AMDGPU_VM_USE_CPU_FOR_COMPUTE); + vm->use_cpu_for_update = !!(adev->vm_manager.vm_update_mode & + AMDGPU_VM_USE_CPU_FOR_GFX); - if (adev->asic_type == CHIP_RAVEN) - vm->pte_support_ats = true; - } else { - vm->use_cpu_for_update = !!(adev->vm_manager.vm_update_mode & - AMDGPU_VM_USE_CPU_FOR_GFX); - } DRM_DEBUG_DRIVER("VM update mode is %s\n", vm->use_cpu_for_update ? "CPU" : "SDMA"); WARN_ONCE((vm->use_cpu_for_update && @@ -2842,53 +2963,38 @@ int amdgpu_vm_init(struct amdgpu_device *adev, struct amdgpu_vm *vm, mutex_init(&vm->eviction_lock); vm->evicting = false; - amdgpu_vm_bo_param(adev, vm, adev->vm_manager.root_level, false, &bp); - if (vm_context == AMDGPU_VM_CONTEXT_COMPUTE) - bp.flags &= ~AMDGPU_GEM_CREATE_SHADOW; - r = amdgpu_bo_create(adev, &bp, &root); + r = amdgpu_vm_pt_create(adev, vm, adev->vm_manager.root_level, + false, &root); if (r) goto error_free_delayed; - - r = amdgpu_bo_reserve(root, true); + root_bo = &root->bo; + r = amdgpu_bo_reserve(root_bo, true); if (r) goto error_free_root; - r = dma_resv_reserve_shared(root->tbo.base.resv, 1); + r = dma_resv_reserve_shared(root_bo->tbo.base.resv, 1); if (r) goto error_unreserve; - amdgpu_vm_bo_base_init(&vm->root.base, vm, root); + amdgpu_vm_bo_base_init(&vm->root, vm, root_bo); r = amdgpu_vm_clear_bo(adev, vm, root, false); if (r) goto error_unreserve; - amdgpu_bo_unreserve(vm->root.base.bo); - - if (pasid) { - unsigned long flags; - - spin_lock_irqsave(&adev->vm_manager.pasid_lock, flags); - r = idr_alloc(&adev->vm_manager.pasid_idr, vm, pasid, pasid + 1, - GFP_ATOMIC); - spin_unlock_irqrestore(&adev->vm_manager.pasid_lock, flags); - if (r < 0) - goto error_free_root; - - vm->pasid = pasid; - } + amdgpu_bo_unreserve(vm->root.bo); INIT_KFIFO(vm->faults); return 0; error_unreserve: - amdgpu_bo_unreserve(vm->root.base.bo); + amdgpu_bo_unreserve(vm->root.bo); error_free_root: - amdgpu_bo_unref(&vm->root.base.bo->shadow); - amdgpu_bo_unref(&vm->root.base.bo); - vm->root.base.bo = NULL; + amdgpu_bo_unref(&root->shadow); + amdgpu_bo_unref(&root_bo); + vm->root.bo = NULL; error_free_delayed: dma_fence_put(vm->last_unlocked); @@ -2914,17 +3020,14 @@ error_free_immediate: * 0 if this VM is clean */ static int amdgpu_vm_check_clean_reserved(struct amdgpu_device *adev, - struct amdgpu_vm *vm) + struct amdgpu_vm *vm) { enum amdgpu_vm_level root = adev->vm_manager.root_level; unsigned int entries = amdgpu_vm_num_entries(adev, root); unsigned int i = 0; - if (!(vm->root.entries)) - return 0; - for (i = 0; i < entries; i++) { - if (vm->root.entries[i].base.bo) + if (to_amdgpu_bo_vm(vm->root.bo)->entries[i].bo) return -EINVAL; } @@ -2936,7 +3039,6 @@ static int amdgpu_vm_check_clean_reserved(struct amdgpu_device *adev, * * @adev: amdgpu_device pointer * @vm: requested vm - * @pasid: pasid to use * * This only works on GFX VMs that don't have any BOs added and no * page tables allocated yet. @@ -2944,7 +3046,6 @@ static int amdgpu_vm_check_clean_reserved(struct amdgpu_device *adev, * Changes the following VM parameters: * - use_cpu_for_update * - pte_supports_ats - * - pasid (old PASID is released, because compute manages its own PASIDs) * * Reinitializes the page directory to reflect the changed ATS * setting. @@ -2952,13 +3053,12 @@ static int amdgpu_vm_check_clean_reserved(struct amdgpu_device *adev, * Returns: * 0 for success, -errno for errors. */ -int amdgpu_vm_make_compute(struct amdgpu_device *adev, struct amdgpu_vm *vm, - u32 pasid) +int amdgpu_vm_make_compute(struct amdgpu_device *adev, struct amdgpu_vm *vm) { bool pte_support_ats = (adev->asic_type == CHIP_RAVEN); int r; - r = amdgpu_bo_reserve(vm->root.base.bo, true); + r = amdgpu_bo_reserve(vm->root.bo, true); if (r) return r; @@ -2967,27 +3067,16 @@ int amdgpu_vm_make_compute(struct amdgpu_device *adev, struct amdgpu_vm *vm, if (r) goto unreserve_bo; - if (pasid) { - unsigned long flags; - - spin_lock_irqsave(&adev->vm_manager.pasid_lock, flags); - r = idr_alloc(&adev->vm_manager.pasid_idr, vm, pasid, pasid + 1, - GFP_ATOMIC); - spin_unlock_irqrestore(&adev->vm_manager.pasid_lock, flags); - - if (r == -ENOSPC) - goto unreserve_bo; - r = 0; - } - /* Check if PD needs to be reinitialized and do it before * changing any other state, in case it fails. */ if (pte_support_ats != vm->pte_support_ats) { vm->pte_support_ats = pte_support_ats; - r = amdgpu_vm_clear_bo(adev, vm, vm->root.base.bo, false); + r = amdgpu_vm_clear_bo(adev, vm, + to_amdgpu_bo_vm(vm->root.bo), + false); if (r) - goto free_idr; + goto unreserve_bo; } /* Update VM state */ @@ -3001,10 +3090,10 @@ int amdgpu_vm_make_compute(struct amdgpu_device *adev, struct amdgpu_vm *vm, if (vm->use_cpu_for_update) { /* Sync with last SDMA update/clear before switching to CPU */ - r = amdgpu_bo_sync_wait(vm->root.base.bo, + r = amdgpu_bo_sync_wait(vm->root.bo, AMDGPU_FENCE_OWNER_UNDEFINED, true); if (r) - goto free_idr; + goto unreserve_bo; vm->update_funcs = &amdgpu_vm_cpu_funcs; } else { @@ -3014,38 +3103,13 @@ int amdgpu_vm_make_compute(struct amdgpu_device *adev, struct amdgpu_vm *vm, vm->last_update = NULL; vm->is_compute_context = true; - if (vm->pasid) { - unsigned long flags; - - spin_lock_irqsave(&adev->vm_manager.pasid_lock, flags); - idr_remove(&adev->vm_manager.pasid_idr, vm->pasid); - spin_unlock_irqrestore(&adev->vm_manager.pasid_lock, flags); - - /* Free the original amdgpu allocated pasid - * Will be replaced with kfd allocated pasid - */ - amdgpu_pasid_free(vm->pasid); - vm->pasid = 0; - } - /* Free the shadow bo for compute VM */ - amdgpu_bo_unref(&vm->root.base.bo->shadow); - - if (pasid) - vm->pasid = pasid; + amdgpu_bo_unref(&to_amdgpu_bo_vm(vm->root.bo)->shadow); goto unreserve_bo; -free_idr: - if (pasid) { - unsigned long flags; - - spin_lock_irqsave(&adev->vm_manager.pasid_lock, flags); - idr_remove(&adev->vm_manager.pasid_idr, pasid); - spin_unlock_irqrestore(&adev->vm_manager.pasid_lock, flags); - } unreserve_bo: - amdgpu_bo_unreserve(vm->root.base.bo); + amdgpu_bo_unreserve(vm->root.bo); return r; } @@ -3059,14 +3123,7 @@ unreserve_bo: */ void amdgpu_vm_release_compute(struct amdgpu_device *adev, struct amdgpu_vm *vm) { - if (vm->pasid) { - unsigned long flags; - - spin_lock_irqsave(&adev->vm_manager.pasid_lock, flags); - idr_remove(&adev->vm_manager.pasid_idr, vm->pasid); - spin_unlock_irqrestore(&adev->vm_manager.pasid_lock, flags); - } - vm->pasid = 0; + amdgpu_vm_set_pasid(adev, vm, 0); vm->is_compute_context = false; } @@ -3088,17 +3145,9 @@ void amdgpu_vm_fini(struct amdgpu_device *adev, struct amdgpu_vm *vm) amdgpu_amdkfd_gpuvm_destroy_cb(adev, vm); - root = amdgpu_bo_ref(vm->root.base.bo); + root = amdgpu_bo_ref(vm->root.bo); amdgpu_bo_reserve(root, true); - if (vm->pasid) { - unsigned long flags; - - spin_lock_irqsave(&adev->vm_manager.pasid_lock, flags); - idr_remove(&adev->vm_manager.pasid_idr, vm->pasid); - spin_unlock_irqrestore(&adev->vm_manager.pasid_lock, flags); - vm->pasid = 0; - } - + amdgpu_vm_set_pasid(adev, vm, 0); dma_fence_wait(vm->last_unlocked, false); dma_fence_put(vm->last_unlocked); @@ -3115,7 +3164,7 @@ void amdgpu_vm_fini(struct amdgpu_device *adev, struct amdgpu_vm *vm) amdgpu_vm_free_pts(adev, vm, NULL); amdgpu_bo_unreserve(root); amdgpu_bo_unref(&root); - WARN_ON(vm->root.base.bo); + WARN_ON(vm->root.bo); drm_sched_entity_destroy(&vm->immediate); drm_sched_entity_destroy(&vm->delayed); @@ -3180,8 +3229,7 @@ void amdgpu_vm_manager_init(struct amdgpu_device *adev) adev->vm_manager.vm_update_mode = 0; #endif - idr_init(&adev->vm_manager.pasid_idr); - spin_lock_init(&adev->vm_manager.pasid_lock); + xa_init_flags(&adev->vm_manager.pasids, XA_FLAGS_LOCK_IRQ); } /** @@ -3193,8 +3241,8 @@ void amdgpu_vm_manager_init(struct amdgpu_device *adev) */ void amdgpu_vm_manager_fini(struct amdgpu_device *adev) { - WARN_ON(!idr_is_empty(&adev->vm_manager.pasid_idr)); - idr_destroy(&adev->vm_manager.pasid_idr); + WARN_ON(!xa_empty(&adev->vm_manager.pasids)); + xa_destroy(&adev->vm_manager.pasids); amdgpu_vmid_mgr_fini(adev); } @@ -3232,7 +3280,7 @@ int amdgpu_vm_ioctl(struct drm_device *dev, void *data, struct drm_file *filp) /* Wait vm idle to make sure the vmid set in SPM_VMID is * not referenced anymore. */ - r = amdgpu_bo_reserve(fpriv->vm.root.base.bo, true); + r = amdgpu_bo_reserve(fpriv->vm.root.bo, true); if (r) return r; @@ -3240,7 +3288,7 @@ int amdgpu_vm_ioctl(struct drm_device *dev, void *data, struct drm_file *filp) if (r < 0) return r; - amdgpu_bo_unreserve(fpriv->vm.root.base.bo); + amdgpu_bo_unreserve(fpriv->vm.root.bo); amdgpu_vmid_free_reserved(adev, &fpriv->vm, AMDGPU_GFXHUB_0); break; default: @@ -3263,13 +3311,13 @@ void amdgpu_vm_get_task_info(struct amdgpu_device *adev, u32 pasid, struct amdgpu_vm *vm; unsigned long flags; - spin_lock_irqsave(&adev->vm_manager.pasid_lock, flags); + xa_lock_irqsave(&adev->vm_manager.pasids, flags); - vm = idr_find(&adev->vm_manager.pasid_idr, pasid); + vm = xa_load(&adev->vm_manager.pasids, pasid); if (vm) *task_info = vm->task_info; - spin_unlock_irqrestore(&adev->vm_manager.pasid_lock, flags); + xa_unlock_irqrestore(&adev->vm_manager.pasids, flags); } /** @@ -3297,54 +3345,65 @@ void amdgpu_vm_set_task_info(struct amdgpu_vm *vm) * @adev: amdgpu device pointer * @pasid: PASID of the VM * @addr: Address of the fault + * @write_fault: true is write fault, false is read fault * * Try to gracefully handle a VM fault. Return true if the fault was handled and * shouldn't be reported any more. */ bool amdgpu_vm_handle_fault(struct amdgpu_device *adev, u32 pasid, - uint64_t addr) + uint64_t addr, bool write_fault) { + bool is_compute_context = false; struct amdgpu_bo *root; + unsigned long irqflags; uint64_t value, flags; struct amdgpu_vm *vm; int r; - spin_lock(&adev->vm_manager.pasid_lock); - vm = idr_find(&adev->vm_manager.pasid_idr, pasid); - if (vm) - root = amdgpu_bo_ref(vm->root.base.bo); - else + xa_lock_irqsave(&adev->vm_manager.pasids, irqflags); + vm = xa_load(&adev->vm_manager.pasids, pasid); + if (vm) { + root = amdgpu_bo_ref(vm->root.bo); + is_compute_context = vm->is_compute_context; + } else { root = NULL; - spin_unlock(&adev->vm_manager.pasid_lock); + } + xa_unlock_irqrestore(&adev->vm_manager.pasids, irqflags); if (!root) return false; + addr /= AMDGPU_GPU_PAGE_SIZE; + + if (is_compute_context && + !svm_range_restore_pages(adev, pasid, addr, write_fault)) { + amdgpu_bo_unref(&root); + return true; + } + r = amdgpu_bo_reserve(root, true); if (r) goto error_unref; /* Double check that the VM still exists */ - spin_lock(&adev->vm_manager.pasid_lock); - vm = idr_find(&adev->vm_manager.pasid_idr, pasid); - if (vm && vm->root.base.bo != root) + xa_lock_irqsave(&adev->vm_manager.pasids, irqflags); + vm = xa_load(&adev->vm_manager.pasids, pasid); + if (vm && vm->root.bo != root) vm = NULL; - spin_unlock(&adev->vm_manager.pasid_lock); + xa_unlock_irqrestore(&adev->vm_manager.pasids, irqflags); if (!vm) goto error_unlock; - addr /= AMDGPU_GPU_PAGE_SIZE; flags = AMDGPU_PTE_VALID | AMDGPU_PTE_SNOOPED | AMDGPU_PTE_SYSTEM; - if (vm->is_compute_context) { + if (is_compute_context) { /* Intentionally setting invalid PTE flag * combination to force a no-retry-fault */ flags = AMDGPU_PTE_EXECUTABLE | AMDGPU_PDE_PTE | AMDGPU_PTE_TF; value = 0; - } else if (amdgpu_vm_fault_stop == AMDGPU_VM_FAULT_STOP_NEVER) { /* Redirect the access to the dummy page */ value = adev->dummy_page_addr; @@ -3363,7 +3422,7 @@ bool amdgpu_vm_handle_fault(struct amdgpu_device *adev, u32 pasid, } r = amdgpu_vm_bo_update_mapping(adev, adev, vm, true, false, NULL, addr, - addr, flags, value, NULL, NULL, + addr, flags, value, NULL, NULL, NULL, NULL); if (r) goto error_unlock; diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_vm.h b/drivers/gpu/drm/amd/amdgpu/amdgpu_vm.h index 4e140288159c..85fcfb8c5efd 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_vm.h +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_vm.h @@ -39,6 +39,7 @@ struct amdgpu_bo_va; struct amdgpu_job; struct amdgpu_bo_list_entry; +struct amdgpu_bo_vm; /* * GPUVM handling @@ -121,9 +122,6 @@ struct amdgpu_bo_list_entry; /* max vmids dedicated for process */ #define AMDGPU_VM_MAX_RESERVED_VMID 1 -#define AMDGPU_VM_CONTEXT_GFX 0 -#define AMDGPU_VM_CONTEXT_COMPUTE 1 - /* See vm_update_mode */ #define AMDGPU_VM_USE_CPU_FOR_GFX (1 << 0) #define AMDGPU_VM_USE_CPU_FOR_COMPUTE (1 << 1) @@ -154,13 +152,6 @@ struct amdgpu_vm_bo_base { bool moved; }; -struct amdgpu_vm_pt { - struct amdgpu_vm_bo_base base; - - /* array of page tables, one for each directory entry */ - struct amdgpu_vm_pt *entries; -}; - /* provided by hw blocks that can write ptes, e.g., sdma */ struct amdgpu_vm_pte_funcs { /* number of dw to reserve per operation */ @@ -234,14 +225,19 @@ struct amdgpu_vm_update_params { * @num_dw_left: number of dw left for the IB */ unsigned int num_dw_left; + + /** + * @table_freed: return true if page table is freed when updating + */ + bool table_freed; }; struct amdgpu_vm_update_funcs { - int (*map_table)(struct amdgpu_bo *bo); + int (*map_table)(struct amdgpu_bo_vm *bo); int (*prepare)(struct amdgpu_vm_update_params *p, struct dma_resv *resv, enum amdgpu_sync_mode sync_mode); int (*update)(struct amdgpu_vm_update_params *p, - struct amdgpu_bo *bo, uint64_t pe, uint64_t addr, + struct amdgpu_bo_vm *bo, uint64_t pe, uint64_t addr, unsigned count, uint32_t incr, uint64_t flags); int (*commit)(struct amdgpu_vm_update_params *p, struct dma_fence **fence); @@ -281,7 +277,7 @@ struct amdgpu_vm { struct list_head done; /* contains the page directory */ - struct amdgpu_vm_pt root; + struct amdgpu_vm_bo_base root; struct dma_fence *last_update; /* Scheduler entities for page table updates */ @@ -363,10 +359,11 @@ struct amdgpu_vm_manager { /* PASID to VM mapping, will be used in interrupt context to * look up VM of a page fault */ - struct idr pasid_idr; - spinlock_t pasid_lock; + struct xarray pasids; }; +struct amdgpu_bo_va_mapping; + #define amdgpu_vm_copy_pte(adev, ib, pe, src, count) ((adev)->vm_manager.vm_pte_funcs->copy_pte((ib), (pe), (src), (count))) #define amdgpu_vm_write_pte(adev, ib, pe, value, count, incr) ((adev)->vm_manager.vm_pte_funcs->write_pte((ib), (pe), (value), (count), (incr))) #define amdgpu_vm_set_pte_pde(adev, ib, pe, addr, count, incr, flags) ((adev)->vm_manager.vm_pte_funcs->set_pte_pde((ib), (pe), (addr), (count), (incr), (flags))) @@ -377,10 +374,12 @@ extern const struct amdgpu_vm_update_funcs amdgpu_vm_sdma_funcs; void amdgpu_vm_manager_init(struct amdgpu_device *adev); void amdgpu_vm_manager_fini(struct amdgpu_device *adev); +int amdgpu_vm_set_pasid(struct amdgpu_device *adev, struct amdgpu_vm *vm, + u32 pasid); + long amdgpu_vm_wait_idle(struct amdgpu_vm *vm, long timeout); -int amdgpu_vm_init(struct amdgpu_device *adev, struct amdgpu_vm *vm, - int vm_context, u32 pasid); -int amdgpu_vm_make_compute(struct amdgpu_device *adev, struct amdgpu_vm *vm, u32 pasid); +int amdgpu_vm_init(struct amdgpu_device *adev, struct amdgpu_vm *vm); +int amdgpu_vm_make_compute(struct amdgpu_device *adev, struct amdgpu_vm *vm); void amdgpu_vm_release_compute(struct amdgpu_device *adev, struct amdgpu_vm *vm); void amdgpu_vm_fini(struct amdgpu_device *adev, struct amdgpu_vm *vm); void amdgpu_vm_get_pd_bo(struct amdgpu_vm *vm, @@ -398,9 +397,18 @@ int amdgpu_vm_clear_freed(struct amdgpu_device *adev, struct dma_fence **fence); int amdgpu_vm_handle_moved(struct amdgpu_device *adev, struct amdgpu_vm *vm); +int amdgpu_vm_bo_update_mapping(struct amdgpu_device *adev, + struct amdgpu_device *bo_adev, + struct amdgpu_vm *vm, bool immediate, + bool unlocked, struct dma_resv *resv, + uint64_t start, uint64_t last, + uint64_t flags, uint64_t offset, + struct ttm_resource *res, + dma_addr_t *pages_addr, + struct dma_fence **fence, bool *free_table); int amdgpu_vm_bo_update(struct amdgpu_device *adev, struct amdgpu_bo_va *bo_va, - bool clear); + bool clear, bool *table_freed); bool amdgpu_vm_evictable(struct amdgpu_bo *bo); void amdgpu_vm_bo_invalidate(struct amdgpu_device *adev, struct amdgpu_bo *bo, bool evicted); @@ -440,13 +448,15 @@ void amdgpu_vm_check_compute_bug(struct amdgpu_device *adev); void amdgpu_vm_get_task_info(struct amdgpu_device *adev, u32 pasid, struct amdgpu_task_info *task_info); bool amdgpu_vm_handle_fault(struct amdgpu_device *adev, u32 pasid, - uint64_t addr); + uint64_t addr, bool write_fault); void amdgpu_vm_set_task_info(struct amdgpu_vm *vm); void amdgpu_vm_move_to_lru_tail(struct amdgpu_device *adev, struct amdgpu_vm *vm); void amdgpu_vm_del_from_lru_notify(struct ttm_buffer_object *bo); +void amdgpu_vm_get_memory(struct amdgpu_vm *vm, uint64_t *vram_mem, + uint64_t *gtt_mem, uint64_t *cpu_mem); #if defined(CONFIG_DEBUG_FS) void amdgpu_debugfs_vm_bo_info(struct amdgpu_vm *vm, struct seq_file *m); diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_vm_cpu.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_vm_cpu.c index ac45d9c7a4e9..e3fbf0f10add 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_vm_cpu.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_vm_cpu.c @@ -29,9 +29,9 @@ * * @table: newly allocated or validated PD/PT */ -static int amdgpu_vm_cpu_map_table(struct amdgpu_bo *table) +static int amdgpu_vm_cpu_map_table(struct amdgpu_bo_vm *table) { - return amdgpu_bo_kmap(table, NULL); + return amdgpu_bo_kmap(&table->bo, NULL); } /** @@ -58,7 +58,7 @@ static int amdgpu_vm_cpu_prepare(struct amdgpu_vm_update_params *p, * amdgpu_vm_cpu_update - helper to update page tables via CPU * * @p: see amdgpu_vm_update_params definition - * @bo: PD/PT to update + * @vmbo: PD/PT to update * @pe: byte offset of the PDE/PTE, relative to start of PDB/PTB * @addr: dst addr to write into pe * @count: number of page entries to update @@ -68,7 +68,7 @@ static int amdgpu_vm_cpu_prepare(struct amdgpu_vm_update_params *p, * Write count number of PT/PD entries directly. */ static int amdgpu_vm_cpu_update(struct amdgpu_vm_update_params *p, - struct amdgpu_bo *bo, uint64_t pe, + struct amdgpu_bo_vm *vmbo, uint64_t pe, uint64_t addr, unsigned count, uint32_t incr, uint64_t flags) { @@ -76,13 +76,13 @@ static int amdgpu_vm_cpu_update(struct amdgpu_vm_update_params *p, uint64_t value; int r; - if (bo->tbo.moving) { - r = dma_fence_wait(bo->tbo.moving, true); + if (vmbo->bo.tbo.moving) { + r = dma_fence_wait(vmbo->bo.tbo.moving, true); if (r) return r; } - pe += (unsigned long)amdgpu_bo_kptr(bo); + pe += (unsigned long)amdgpu_bo_kptr(&vmbo->bo); trace_amdgpu_vm_set_ptes(pe, addr, count, incr, flags, p->immediate); @@ -110,7 +110,7 @@ static int amdgpu_vm_cpu_commit(struct amdgpu_vm_update_params *p, { /* Flush HDP */ mb(); - amdgpu_asic_flush_hdp(p->adev, NULL); + amdgpu_device_flush_hdp(p->adev, NULL); return 0; } diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_vm_sdma.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_vm_sdma.c index a83a646759c5..dbb551762805 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_vm_sdma.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_vm_sdma.c @@ -33,11 +33,11 @@ * * @table: newly allocated or validated PD/PT */ -static int amdgpu_vm_sdma_map_table(struct amdgpu_bo *table) +static int amdgpu_vm_sdma_map_table(struct amdgpu_bo_vm *table) { int r; - r = amdgpu_ttm_alloc_gart(&table->tbo); + r = amdgpu_ttm_alloc_gart(&table->bo.tbo); if (r) return r; @@ -112,7 +112,7 @@ static int amdgpu_vm_sdma_commit(struct amdgpu_vm_update_params *p, swap(p->vm->last_unlocked, f); dma_fence_put(tmp); } else { - amdgpu_bo_fence(p->vm->root.base.bo, f, true); + amdgpu_bo_fence(p->vm->root.bo, f, true); } if (fence && !p->immediate) @@ -186,7 +186,7 @@ static void amdgpu_vm_sdma_set_ptes(struct amdgpu_vm_update_params *p, * amdgpu_vm_sdma_update - execute VM update * * @p: see amdgpu_vm_update_params definition - * @bo: PD/PT to update + * @vmbo: PD/PT to update * @pe: byte offset of the PDE/PTE, relative to start of PDB/PTB * @addr: dst addr to write into pe * @count: number of page entries to update @@ -197,10 +197,11 @@ static void amdgpu_vm_sdma_set_ptes(struct amdgpu_vm_update_params *p, * the IB. */ static int amdgpu_vm_sdma_update(struct amdgpu_vm_update_params *p, - struct amdgpu_bo *bo, uint64_t pe, + struct amdgpu_bo_vm *vmbo, uint64_t pe, uint64_t addr, unsigned count, uint32_t incr, uint64_t flags) { + struct amdgpu_bo *bo = &vmbo->bo; enum amdgpu_ib_pool_type pool = p->immediate ? AMDGPU_IB_POOL_IMMEDIATE : AMDGPU_IB_POOL_DELAYED; unsigned int i, ndw, nptes; @@ -238,8 +239,8 @@ static int amdgpu_vm_sdma_update(struct amdgpu_vm_update_params *p, if (!p->pages_addr) { /* set page commands needed */ - if (bo->shadow) - amdgpu_vm_sdma_set_ptes(p, bo->shadow, pe, addr, + if (vmbo->shadow) + amdgpu_vm_sdma_set_ptes(p, vmbo->shadow, pe, addr, count, incr, flags); amdgpu_vm_sdma_set_ptes(p, bo, pe, addr, count, incr, flags); @@ -248,7 +249,7 @@ static int amdgpu_vm_sdma_update(struct amdgpu_vm_update_params *p, /* copy commands needed */ ndw -= p->adev->vm_manager.vm_pte_funcs->copy_pte_num_dw * - (bo->shadow ? 2 : 1); + (vmbo->shadow ? 2 : 1); /* for padding */ ndw -= 7; @@ -263,8 +264,8 @@ static int amdgpu_vm_sdma_update(struct amdgpu_vm_update_params *p, pte[i] |= flags; } - if (bo->shadow) - amdgpu_vm_sdma_copy_ptes(p, bo->shadow, pe, nptes); + if (vmbo->shadow) + amdgpu_vm_sdma_copy_ptes(p, vmbo->shadow, pe, nptes); amdgpu_vm_sdma_copy_ptes(p, bo, pe, nptes); pe += nptes * 8; diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_vram_mgr.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_vram_mgr.c index bce105e2973e..7b2b0980ec41 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_vram_mgr.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_vram_mgr.c @@ -23,18 +23,27 @@ */ #include <linux/dma-mapping.h> +#include <drm/ttm/ttm_range_manager.h> + #include "amdgpu.h" #include "amdgpu_vm.h" #include "amdgpu_res_cursor.h" #include "amdgpu_atomfirmware.h" #include "atom.h" -static inline struct amdgpu_vram_mgr *to_vram_mgr(struct ttm_resource_manager *man) +struct amdgpu_vram_reservation { + struct list_head node; + struct drm_mm_node mm_node; +}; + +static inline struct amdgpu_vram_mgr * +to_vram_mgr(struct ttm_resource_manager *man) { return container_of(man, struct amdgpu_vram_mgr, manager); } -static inline struct amdgpu_device *to_amdgpu_device(struct amdgpu_vram_mgr *mgr) +static inline struct amdgpu_device * +to_amdgpu_device(struct amdgpu_vram_mgr *mgr) { return container_of(mgr, struct amdgpu_device, mman.vram_mgr); } @@ -82,12 +91,14 @@ static ssize_t amdgpu_mem_info_vis_vram_total_show(struct device *dev, * amount of currently used VRAM in bytes */ static ssize_t amdgpu_mem_info_vram_used_show(struct device *dev, - struct device_attribute *attr, char *buf) + struct device_attribute *attr, + char *buf) { struct drm_device *ddev = dev_get_drvdata(dev); struct amdgpu_device *adev = drm_to_adev(ddev); - struct ttm_resource_manager *man = ttm_manager_type(&adev->mman.bdev, TTM_PL_VRAM); + struct ttm_resource_manager *man; + man = ttm_manager_type(&adev->mman.bdev, TTM_PL_VRAM); return sysfs_emit(buf, "%llu\n", amdgpu_vram_mgr_usage(man)); } @@ -100,18 +111,28 @@ static ssize_t amdgpu_mem_info_vram_used_show(struct device *dev, * amount of currently used visible VRAM in bytes */ static ssize_t amdgpu_mem_info_vis_vram_used_show(struct device *dev, - struct device_attribute *attr, char *buf) + struct device_attribute *attr, + char *buf) { struct drm_device *ddev = dev_get_drvdata(dev); struct amdgpu_device *adev = drm_to_adev(ddev); - struct ttm_resource_manager *man = ttm_manager_type(&adev->mman.bdev, TTM_PL_VRAM); + struct ttm_resource_manager *man; + man = ttm_manager_type(&adev->mman.bdev, TTM_PL_VRAM); return sysfs_emit(buf, "%llu\n", amdgpu_vram_mgr_vis_usage(man)); } +/** + * DOC: mem_info_vram_vendor + * + * The amdgpu driver provides a sysfs API for reporting the vendor of the + * installed VRAM + * The file mem_info_vram_vendor is used for this and returns the name of the + * vendor. + */ static ssize_t amdgpu_mem_info_vram_vendor(struct device *dev, - struct device_attribute *attr, - char *buf) + struct device_attribute *attr, + char *buf) { struct drm_device *ddev = dev_get_drvdata(dev); struct amdgpu_device *adev = drm_to_adev(ddev); @@ -153,7 +174,7 @@ static DEVICE_ATTR(mem_info_vis_vram_used, S_IRUGO, static DEVICE_ATTR(mem_info_vram_vendor, S_IRUGO, amdgpu_mem_info_vram_vendor, NULL); -static const struct attribute *amdgpu_vram_mgr_attributes[] = { +static struct attribute *amdgpu_vram_mgr_attributes[] = { &dev_attr_mem_info_vram_total.attr, &dev_attr_mem_info_vis_vram_total.attr, &dev_attr_mem_info_vram_used.attr, @@ -162,77 +183,9 @@ static const struct attribute *amdgpu_vram_mgr_attributes[] = { NULL }; -static const struct ttm_resource_manager_func amdgpu_vram_mgr_func; - -/** - * amdgpu_vram_mgr_init - init VRAM manager and DRM MM - * - * @adev: amdgpu_device pointer - * - * Allocate and initialize the VRAM manager. - */ -int amdgpu_vram_mgr_init(struct amdgpu_device *adev) -{ - struct amdgpu_vram_mgr *mgr = &adev->mman.vram_mgr; - struct ttm_resource_manager *man = &mgr->manager; - int ret; - - ttm_resource_manager_init(man, adev->gmc.real_vram_size >> PAGE_SHIFT); - - man->func = &amdgpu_vram_mgr_func; - - drm_mm_init(&mgr->mm, 0, man->size); - spin_lock_init(&mgr->lock); - INIT_LIST_HEAD(&mgr->reservations_pending); - INIT_LIST_HEAD(&mgr->reserved_pages); - - /* Add the two VRAM-related sysfs files */ - ret = sysfs_create_files(&adev->dev->kobj, amdgpu_vram_mgr_attributes); - if (ret) - DRM_ERROR("Failed to register sysfs\n"); - - ttm_set_driver_manager(&adev->mman.bdev, TTM_PL_VRAM, &mgr->manager); - ttm_resource_manager_set_used(man, true); - return 0; -} - -/** - * amdgpu_vram_mgr_fini - free and destroy VRAM manager - * - * @adev: amdgpu_device pointer - * - * Destroy and free the VRAM manager, returns -EBUSY if ranges are still - * allocated inside it. - */ -void amdgpu_vram_mgr_fini(struct amdgpu_device *adev) -{ - struct amdgpu_vram_mgr *mgr = &adev->mman.vram_mgr; - struct ttm_resource_manager *man = &mgr->manager; - int ret; - struct amdgpu_vram_reservation *rsv, *temp; - - ttm_resource_manager_set_used(man, false); - - ret = ttm_resource_manager_evict_all(&adev->mman.bdev, man); - if (ret) - return; - - spin_lock(&mgr->lock); - list_for_each_entry_safe(rsv, temp, &mgr->reservations_pending, node) - kfree(rsv); - - list_for_each_entry_safe(rsv, temp, &mgr->reserved_pages, node) { - drm_mm_remove_node(&rsv->mm_node); - kfree(rsv); - } - drm_mm_takedown(&mgr->mm); - spin_unlock(&mgr->lock); - - sysfs_remove_files(&adev->dev->kobj, amdgpu_vram_mgr_attributes); - - ttm_resource_manager_cleanup(man); - ttm_set_driver_manager(&adev->mman.bdev, TTM_PL_VRAM, NULL); -} +const struct attribute_group amdgpu_vram_mgr_attr_group = { + .attrs = amdgpu_vram_mgr_attributes +}; /** * amdgpu_vram_mgr_vis_size - Calculate visible node size @@ -266,23 +219,25 @@ static u64 amdgpu_vram_mgr_vis_size(struct amdgpu_device *adev, u64 amdgpu_vram_mgr_bo_visible_size(struct amdgpu_bo *bo) { struct amdgpu_device *adev = amdgpu_ttm_adev(bo->tbo.bdev); - struct ttm_resource *mem = &bo->tbo.mem; - struct drm_mm_node *nodes = mem->mm_node; - unsigned pages = mem->num_pages; + struct ttm_resource *res = bo->tbo.resource; + unsigned pages = res->num_pages; + struct drm_mm_node *mm; u64 usage; if (amdgpu_gmc_vram_full_visible(&adev->gmc)) return amdgpu_bo_size(bo); - if (mem->start >= adev->gmc.visible_vram_size >> PAGE_SHIFT) + if (res->start >= adev->gmc.visible_vram_size >> PAGE_SHIFT) return 0; - for (usage = 0; nodes && pages; pages -= nodes->size, nodes++) - usage += amdgpu_vram_mgr_vis_size(adev, nodes); + mm = &container_of(res, struct ttm_range_mgr_node, base)->mm_nodes[0]; + for (usage = 0; pages; pages -= mm->size, mm++) + usage += amdgpu_vram_mgr_vis_size(adev, mm); return usage; } +/* Commit the reservation of VRAM pages */ static void amdgpu_vram_mgr_do_reserve(struct ttm_resource_manager *man) { struct amdgpu_vram_mgr *mgr = to_vram_mgr(man); @@ -406,22 +361,22 @@ static void amdgpu_vram_mgr_virt_start(struct ttm_resource *mem, * @man: TTM memory type manager * @tbo: TTM BO we need this range for * @place: placement flags and restrictions - * @mem: the resulting mem object + * @res: the resulting mem object * * Allocate VRAM for the given BO. */ static int amdgpu_vram_mgr_new(struct ttm_resource_manager *man, struct ttm_buffer_object *tbo, const struct ttm_place *place, - struct ttm_resource *mem) + struct ttm_resource **res) { + unsigned long lpfn, num_nodes, pages_per_node, pages_left, pages; struct amdgpu_vram_mgr *mgr = to_vram_mgr(man); struct amdgpu_device *adev = to_amdgpu_device(mgr); + uint64_t vis_usage = 0, mem_bytes, max_bytes; + struct ttm_range_mgr_node *node; struct drm_mm *mm = &mgr->mm; - struct drm_mm_node *nodes; enum drm_mm_insert_mode mode; - unsigned long lpfn, num_nodes, pages_per_node, pages_left; - uint64_t vis_usage = 0, mem_bytes, max_bytes; unsigned i; int r; @@ -434,10 +389,10 @@ static int amdgpu_vram_mgr_new(struct ttm_resource_manager *man, max_bytes -= AMDGPU_VM_RESERVED_VRAM; /* bail out quickly if there's likely not enough VRAM for this BO */ - mem_bytes = (u64)mem->num_pages << PAGE_SHIFT; + mem_bytes = tbo->base.size; if (atomic64_add_return(mem_bytes, &mgr->usage) > max_bytes) { - atomic64_sub(mem_bytes, &mgr->usage); - return -ENOSPC; + r = -ENOSPC; + goto error_sub; } if (place->flags & TTM_PL_FLAG_CONTIGUOUS) { @@ -448,78 +403,83 @@ static int amdgpu_vram_mgr_new(struct ttm_resource_manager *man, pages_per_node = HPAGE_PMD_NR; #else /* default to 2MB */ - pages_per_node = (2UL << (20UL - PAGE_SHIFT)); + pages_per_node = 2UL << (20UL - PAGE_SHIFT); #endif - pages_per_node = max((uint32_t)pages_per_node, mem->page_alignment); - num_nodes = DIV_ROUND_UP(mem->num_pages, pages_per_node); + pages_per_node = max_t(uint32_t, pages_per_node, + tbo->page_alignment); + num_nodes = DIV_ROUND_UP_ULL(PFN_UP(mem_bytes), pages_per_node); } - nodes = kvmalloc_array((uint32_t)num_nodes, sizeof(*nodes), - GFP_KERNEL | __GFP_ZERO); - if (!nodes) { - atomic64_sub(mem_bytes, &mgr->usage); - return -ENOMEM; + node = kvmalloc(struct_size(node, mm_nodes, num_nodes), + GFP_KERNEL | __GFP_ZERO); + if (!node) { + r = -ENOMEM; + goto error_sub; } + ttm_resource_init(tbo, place, &node->base); + mode = DRM_MM_INSERT_BEST; if (place->flags & TTM_PL_FLAG_TOPDOWN) mode = DRM_MM_INSERT_HIGH; - mem->start = 0; - pages_left = mem->num_pages; + pages_left = node->base.num_pages; - spin_lock(&mgr->lock); - for (i = 0; pages_left >= pages_per_node; ++i) { - unsigned long pages = rounddown_pow_of_two(pages_left); - - /* Limit maximum size to 2GB due to SG table limitations */ - pages = min(pages, (2UL << (30 - PAGE_SHIFT))); - - r = drm_mm_insert_node_in_range(mm, &nodes[i], pages, - pages_per_node, 0, - place->fpfn, lpfn, - mode); - if (unlikely(r)) - break; - - vis_usage += amdgpu_vram_mgr_vis_size(adev, &nodes[i]); - amdgpu_vram_mgr_virt_start(mem, &nodes[i]); - pages_left -= pages; - } + /* Limit maximum size to 2GB due to SG table limitations */ + pages = min(pages_left, 2UL << (30 - PAGE_SHIFT)); - for (; pages_left; ++i) { - unsigned long pages = min(pages_left, pages_per_node); - uint32_t alignment = mem->page_alignment; + i = 0; + spin_lock(&mgr->lock); + while (pages_left) { + uint32_t alignment = tbo->page_alignment; - if (pages == pages_per_node) + if (pages >= pages_per_node) alignment = pages_per_node; - r = drm_mm_insert_node_in_range(mm, &nodes[i], - pages, alignment, 0, - place->fpfn, lpfn, - mode); - if (unlikely(r)) - goto error; + r = drm_mm_insert_node_in_range(mm, &node->mm_nodes[i], pages, + alignment, 0, place->fpfn, + lpfn, mode); + if (unlikely(r)) { + if (pages > pages_per_node) { + if (is_power_of_2(pages)) + pages = pages / 2; + else + pages = rounddown_pow_of_two(pages); + continue; + } + goto error_free; + } - vis_usage += amdgpu_vram_mgr_vis_size(adev, &nodes[i]); - amdgpu_vram_mgr_virt_start(mem, &nodes[i]); + vis_usage += amdgpu_vram_mgr_vis_size(adev, &node->mm_nodes[i]); + amdgpu_vram_mgr_virt_start(&node->base, &node->mm_nodes[i]); pages_left -= pages; + ++i; + + if (pages > pages_left) + pages = pages_left; } spin_unlock(&mgr->lock); - atomic64_add(vis_usage, &mgr->vis_usage); + if (i == 1) + node->base.placement |= TTM_PL_FLAG_CONTIGUOUS; - mem->mm_node = nodes; + if (adev->gmc.xgmi.connected_to_cpu) + node->base.bus.caching = ttm_cached; + else + node->base.bus.caching = ttm_write_combined; + atomic64_add(vis_usage, &mgr->vis_usage); + *res = &node->base; return 0; -error: +error_free: while (i--) - drm_mm_remove_node(&nodes[i]); + drm_mm_remove_node(&node->mm_nodes[i]); spin_unlock(&mgr->lock); - atomic64_sub(mem->num_pages << PAGE_SHIFT, &mgr->usage); + kvfree(node); - kvfree(nodes); +error_sub: + atomic64_sub(mem_bytes, &mgr->usage); return r; } @@ -527,29 +487,27 @@ error: * amdgpu_vram_mgr_del - free ranges * * @man: TTM memory type manager - * @mem: TTM memory object + * @res: TTM memory object * * Free the allocated VRAM again. */ static void amdgpu_vram_mgr_del(struct ttm_resource_manager *man, - struct ttm_resource *mem) + struct ttm_resource *res) { + struct ttm_range_mgr_node *node = to_ttm_range_mgr_node(res); struct amdgpu_vram_mgr *mgr = to_vram_mgr(man); struct amdgpu_device *adev = to_amdgpu_device(mgr); - struct drm_mm_node *nodes = mem->mm_node; uint64_t usage = 0, vis_usage = 0; - unsigned pages = mem->num_pages; - - if (!mem->mm_node) - return; + unsigned i, pages; spin_lock(&mgr->lock); - while (pages) { - pages -= nodes->size; - drm_mm_remove_node(nodes); - usage += nodes->size << PAGE_SHIFT; - vis_usage += amdgpu_vram_mgr_vis_size(adev, nodes); - ++nodes; + for (i = 0, pages = res->num_pages; pages; + pages -= node->mm_nodes[i].size, ++i) { + struct drm_mm_node *mm = &node->mm_nodes[i]; + + drm_mm_remove_node(mm); + usage += mm->size << PAGE_SHIFT; + vis_usage += amdgpu_vram_mgr_vis_size(adev, mm); } amdgpu_vram_mgr_do_reserve(man); spin_unlock(&mgr->lock); @@ -557,15 +515,14 @@ static void amdgpu_vram_mgr_del(struct ttm_resource_manager *man, atomic64_sub(usage, &mgr->usage); atomic64_sub(vis_usage, &mgr->vis_usage); - kvfree(mem->mm_node); - mem->mm_node = NULL; + kvfree(node); } /** * amdgpu_vram_mgr_alloc_sgt - allocate and fill a sg table * * @adev: amdgpu device pointer - * @mem: TTM memory object + * @res: TTM memory object * @offset: byte offset from the base of VRAM BO * @length: number of bytes to export in sg_table * @dev: the other device @@ -575,7 +532,7 @@ static void amdgpu_vram_mgr_del(struct ttm_resource_manager *man, * Allocate and fill a sg table from a VRAM allocation. */ int amdgpu_vram_mgr_alloc_sgt(struct amdgpu_device *adev, - struct ttm_resource *mem, + struct ttm_resource *res, u64 offset, u64 length, struct device *dev, enum dma_data_direction dir, @@ -591,7 +548,7 @@ int amdgpu_vram_mgr_alloc_sgt(struct amdgpu_device *adev, return -ENOMEM; /* Determine the number of DRM_MM nodes to export */ - amdgpu_res_first(mem, offset, length, &cursor); + amdgpu_res_first(res, offset, length, &cursor); while (cursor.remaining) { num_entries++; amdgpu_res_next(&cursor, cursor.size); @@ -611,7 +568,7 @@ int amdgpu_vram_mgr_alloc_sgt(struct amdgpu_device *adev, * and the number of bytes from it. Access the following * DRM_MM node(s) if more buffer needs to exported */ - amdgpu_res_first(mem, offset, length, &cursor); + amdgpu_res_first(res, offset, length, &cursor); for_each_sgtable_sg((*sgt), sg, i) { phys_addr_t phys = cursor.start + adev->gmc.aper_base; size_t size = cursor.size; @@ -727,3 +684,65 @@ static const struct ttm_resource_manager_func amdgpu_vram_mgr_func = { .free = amdgpu_vram_mgr_del, .debug = amdgpu_vram_mgr_debug }; + +/** + * amdgpu_vram_mgr_init - init VRAM manager and DRM MM + * + * @adev: amdgpu_device pointer + * + * Allocate and initialize the VRAM manager. + */ +int amdgpu_vram_mgr_init(struct amdgpu_device *adev) +{ + struct amdgpu_vram_mgr *mgr = &adev->mman.vram_mgr; + struct ttm_resource_manager *man = &mgr->manager; + + ttm_resource_manager_init(man, adev->gmc.real_vram_size >> PAGE_SHIFT); + + man->func = &amdgpu_vram_mgr_func; + + drm_mm_init(&mgr->mm, 0, man->size); + spin_lock_init(&mgr->lock); + INIT_LIST_HEAD(&mgr->reservations_pending); + INIT_LIST_HEAD(&mgr->reserved_pages); + + ttm_set_driver_manager(&adev->mman.bdev, TTM_PL_VRAM, &mgr->manager); + ttm_resource_manager_set_used(man, true); + return 0; +} + +/** + * amdgpu_vram_mgr_fini - free and destroy VRAM manager + * + * @adev: amdgpu_device pointer + * + * Destroy and free the VRAM manager, returns -EBUSY if ranges are still + * allocated inside it. + */ +void amdgpu_vram_mgr_fini(struct amdgpu_device *adev) +{ + struct amdgpu_vram_mgr *mgr = &adev->mman.vram_mgr; + struct ttm_resource_manager *man = &mgr->manager; + int ret; + struct amdgpu_vram_reservation *rsv, *temp; + + ttm_resource_manager_set_used(man, false); + + ret = ttm_resource_manager_evict_all(&adev->mman.bdev, man); + if (ret) + return; + + spin_lock(&mgr->lock); + list_for_each_entry_safe(rsv, temp, &mgr->reservations_pending, node) + kfree(rsv); + + list_for_each_entry_safe(rsv, temp, &mgr->reserved_pages, node) { + drm_mm_remove_node(&rsv->mm_node); + kfree(rsv); + } + drm_mm_takedown(&mgr->mm); + spin_unlock(&mgr->lock); + + ttm_resource_manager_cleanup(man); + ttm_set_driver_manager(&adev->mman.bdev, TTM_PL_VRAM, NULL); +} diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_xgmi.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_xgmi.c index 8567d5d77346..978ac927ac11 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_xgmi.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_xgmi.c @@ -32,6 +32,10 @@ #include "wafl/wafl2_4_0_0_smn.h" #include "wafl/wafl2_4_0_0_sh_mask.h" +#define smnPCS_XGMI23_PCS_ERROR_STATUS 0x11a01210 +#define smnPCS_XGMI3X16_PCS_ERROR_STATUS 0x11a0020c +#define smnPCS_GOPX1_PCS_ERROR_STATUS 0x12200210 + static DEFINE_MUTEX(xgmi_mutex); #define AMDGPU_MAX_XGMI_DEVICE_PER_HIVE 4 @@ -63,6 +67,33 @@ static const int wafl_pcs_err_status_reg_arct[] = { smnPCS_GOPX1_0_PCS_GOPX1_PCS_ERROR_STATUS + 0x100000, }; +static const int xgmi23_pcs_err_status_reg_aldebaran[] = { + smnPCS_XGMI23_PCS_ERROR_STATUS, + smnPCS_XGMI23_PCS_ERROR_STATUS + 0x100000, + smnPCS_XGMI23_PCS_ERROR_STATUS + 0x200000, + smnPCS_XGMI23_PCS_ERROR_STATUS + 0x300000, + smnPCS_XGMI23_PCS_ERROR_STATUS + 0x400000, + smnPCS_XGMI23_PCS_ERROR_STATUS + 0x500000, + smnPCS_XGMI23_PCS_ERROR_STATUS + 0x600000, + smnPCS_XGMI23_PCS_ERROR_STATUS + 0x700000 +}; + +static const int xgmi3x16_pcs_err_status_reg_aldebaran[] = { + smnPCS_XGMI3X16_PCS_ERROR_STATUS, + smnPCS_XGMI3X16_PCS_ERROR_STATUS + 0x100000, + smnPCS_XGMI3X16_PCS_ERROR_STATUS + 0x200000, + smnPCS_XGMI3X16_PCS_ERROR_STATUS + 0x300000, + smnPCS_XGMI3X16_PCS_ERROR_STATUS + 0x400000, + smnPCS_XGMI3X16_PCS_ERROR_STATUS + 0x500000, + smnPCS_XGMI3X16_PCS_ERROR_STATUS + 0x600000, + smnPCS_XGMI3X16_PCS_ERROR_STATUS + 0x700000 +}; + +static const int walf_pcs_err_status_reg_aldebaran[] = { + smnPCS_GOPX1_PCS_ERROR_STATUS, + smnPCS_GOPX1_PCS_ERROR_STATUS + 0x100000 +}; + static const struct amdgpu_pcs_ras_field xgmi_pcs_ras_fields[] = { {"XGMI PCS DataLossErr", SOC15_REG_FIELD(XGMI0_PCS_GOPX16_PCS_ERROR_STATUS, DataLossErr)}, @@ -486,6 +517,44 @@ int amdgpu_xgmi_get_hops_count(struct amdgpu_device *adev, return -EINVAL; } +int amdgpu_xgmi_get_num_links(struct amdgpu_device *adev, + struct amdgpu_device *peer_adev) +{ + struct psp_xgmi_topology_info *top = &adev->psp.xgmi_context.top_info; + int i; + + for (i = 0 ; i < top->num_nodes; ++i) + if (top->nodes[i].node_id == peer_adev->gmc.xgmi.node_id) + return top->nodes[i].num_links; + return -EINVAL; +} + +/* + * Devices that support extended data require the entire hive to initialize with + * the shared memory buffer flag set. + * + * Hive locks and conditions apply - see amdgpu_xgmi_add_device + */ +static int amdgpu_xgmi_initialize_hive_get_data_partition(struct amdgpu_hive_info *hive, + bool set_extended_data) +{ + struct amdgpu_device *tmp_adev; + int ret; + + list_for_each_entry(tmp_adev, &hive->device_list, gmc.xgmi.head) { + ret = psp_xgmi_initialize(&tmp_adev->psp, set_extended_data, false); + if (ret) { + dev_err(tmp_adev->dev, + "XGMI: Failed to initialize xgmi session for data partition %i\n", + set_extended_data); + return ret; + } + + } + + return 0; +} + int amdgpu_xgmi_add_device(struct amdgpu_device *adev) { struct psp_xgmi_topology_info *top_info; @@ -500,7 +569,7 @@ int amdgpu_xgmi_add_device(struct amdgpu_device *adev) if (!adev->gmc.xgmi.pending_reset && amdgpu_device_ip_get_ip_block(adev, AMD_IP_BLOCK_TYPE_PSP)) { - ret = psp_xgmi_initialize(&adev->psp); + ret = psp_xgmi_initialize(&adev->psp, false, true); if (ret) { dev_err(adev->dev, "XGMI: Failed to initialize xgmi session\n"); @@ -563,7 +632,7 @@ int amdgpu_xgmi_add_device(struct amdgpu_device *adev) /* get latest topology info for each device from psp */ list_for_each_entry(tmp_adev, &hive->device_list, gmc.xgmi.head) { ret = psp_xgmi_get_topology_info(&tmp_adev->psp, count, - &tmp_adev->psp.xgmi_context.top_info); + &tmp_adev->psp.xgmi_context.top_info, false); if (ret) { dev_err(tmp_adev->dev, "XGMI: Get topology failure on device %llx, hive %llx, ret %d", @@ -573,6 +642,34 @@ int amdgpu_xgmi_add_device(struct amdgpu_device *adev) goto exit_unlock; } } + + /* get topology again for hives that support extended data */ + if (adev->psp.xgmi_context.supports_extended_data) { + + /* initialize the hive to get extended data. */ + ret = amdgpu_xgmi_initialize_hive_get_data_partition(hive, true); + if (ret) + goto exit_unlock; + + /* get the extended data. */ + list_for_each_entry(tmp_adev, &hive->device_list, gmc.xgmi.head) { + ret = psp_xgmi_get_topology_info(&tmp_adev->psp, count, + &tmp_adev->psp.xgmi_context.top_info, true); + if (ret) { + dev_err(tmp_adev->dev, + "XGMI: Get topology for extended data failure on device %llx, hive %llx, ret %d", + tmp_adev->gmc.xgmi.node_id, + tmp_adev->gmc.xgmi.hive_id, ret); + goto exit_unlock; + } + } + + /* initialize the hive to get non-extended data for the next round. */ + ret = amdgpu_xgmi_initialize_hive_get_data_partition(hive, false); + if (ret) + goto exit_unlock; + + } } if (!ret && !adev->gmc.xgmi.pending_reset) @@ -651,7 +748,6 @@ static int amdgpu_xgmi_ras_late_init(struct amdgpu_device *adev) adev->gmc.xgmi.ras_if->block = AMDGPU_RAS_BLOCK__XGMI_WAFL; adev->gmc.xgmi.ras_if->type = AMDGPU_RAS_ERROR__MULTI_UNCORRECTABLE; adev->gmc.xgmi.ras_if->sub_block_index = 0; - strcpy(adev->gmc.xgmi.ras_if->name, "xgmi_wafl"); } ih_info.head = fs_info.head = *adev->gmc.xgmi.ras_if; r = amdgpu_ras_late_init(adev, adev->gmc.xgmi.ras_if, @@ -706,6 +802,17 @@ static void amdgpu_xgmi_reset_ras_error_count(struct amdgpu_device *adev) pcs_clear_status(adev, xgmi_pcs_err_status_reg_vg20[i]); break; + case CHIP_ALDEBARAN: + for (i = 0; i < ARRAY_SIZE(xgmi23_pcs_err_status_reg_aldebaran); i++) + pcs_clear_status(adev, + xgmi23_pcs_err_status_reg_aldebaran[i]); + for (i = 0; i < ARRAY_SIZE(xgmi23_pcs_err_status_reg_aldebaran); i++) + pcs_clear_status(adev, + xgmi23_pcs_err_status_reg_aldebaran[i]); + for (i = 0; i < ARRAY_SIZE(walf_pcs_err_status_reg_aldebaran); i++) + pcs_clear_status(adev, + walf_pcs_err_status_reg_aldebaran[i]); + break; default: break; } @@ -783,7 +890,6 @@ static int amdgpu_xgmi_query_ras_error_count(struct amdgpu_device *adev, } break; case CHIP_VEGA20: - default: /* check xgmi pcs error */ for (i = 0; i < ARRAY_SIZE(xgmi_pcs_err_status_reg_vg20); i++) { data = RREG32_PCIE(xgmi_pcs_err_status_reg_vg20[i]); @@ -799,6 +905,32 @@ static int amdgpu_xgmi_query_ras_error_count(struct amdgpu_device *adev, data, &ue_cnt, &ce_cnt, false); } break; + case CHIP_ALDEBARAN: + /* check xgmi23 pcs error */ + for (i = 0; i < ARRAY_SIZE(xgmi23_pcs_err_status_reg_aldebaran); i++) { + data = RREG32_PCIE(xgmi23_pcs_err_status_reg_aldebaran[i]); + if (data) + amdgpu_xgmi_query_pcs_error_status(adev, + data, &ue_cnt, &ce_cnt, true); + } + /* check xgmi3x16 pcs error */ + for (i = 0; i < ARRAY_SIZE(xgmi3x16_pcs_err_status_reg_aldebaran); i++) { + data = RREG32_PCIE(xgmi3x16_pcs_err_status_reg_aldebaran[i]); + if (data) + amdgpu_xgmi_query_pcs_error_status(adev, + data, &ue_cnt, &ce_cnt, true); + } + /* check wafl pcs error */ + for (i = 0; i < ARRAY_SIZE(walf_pcs_err_status_reg_aldebaran); i++) { + data = RREG32_PCIE(walf_pcs_err_status_reg_aldebaran[i]); + if (data) + amdgpu_xgmi_query_pcs_error_status(adev, + data, &ue_cnt, &ce_cnt, false); + } + break; + default: + dev_warn(adev->dev, "XGMI RAS error query not supported"); + break; } adev->gmc.xgmi.ras_funcs->reset_ras_error_count(adev); diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_xgmi.h b/drivers/gpu/drm/amd/amdgpu/amdgpu_xgmi.h index 12969c0830d5..d2189bf7d428 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_xgmi.h +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_xgmi.h @@ -59,6 +59,8 @@ int amdgpu_xgmi_remove_device(struct amdgpu_device *adev); int amdgpu_xgmi_set_pstate(struct amdgpu_device *adev, int pstate); int amdgpu_xgmi_get_hops_count(struct amdgpu_device *adev, struct amdgpu_device *peer_adev); +int amdgpu_xgmi_get_num_links(struct amdgpu_device *adev, + struct amdgpu_device *peer_adev); uint64_t amdgpu_xgmi_get_relative_phy_addr(struct amdgpu_device *adev, uint64_t addr); static inline bool amdgpu_xgmi_same_hive(struct amdgpu_device *adev, diff --git a/drivers/gpu/drm/amd/amdgpu/amdgv_sriovmsg.h b/drivers/gpu/drm/amd/amdgpu/amdgv_sriovmsg.h index 1a8f6d4baab2..a434c71fde8e 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgv_sriovmsg.h +++ b/drivers/gpu/drm/amd/amdgpu/amdgv_sriovmsg.h @@ -56,6 +56,8 @@ #define AMD_SRIOV_MSG_RESERVE_UCODE 24 +#define AMD_SRIOV_MSG_RESERVE_VCN_INST 4 + enum amd_sriov_ucode_engine_id { AMD_SRIOV_UCODE_ID_VCE = 0, AMD_SRIOV_UCODE_ID_UVD, @@ -98,10 +100,10 @@ union amd_sriov_msg_feature_flags { union amd_sriov_reg_access_flags { struct { - uint32_t vf_reg_access_ih : 1; + uint32_t vf_reg_access_ih : 1; uint32_t vf_reg_access_mmhub : 1; - uint32_t vf_reg_access_gc : 1; - uint32_t reserved : 29; + uint32_t vf_reg_access_gc : 1; + uint32_t reserved : 29; } flags; uint32_t all; }; @@ -114,6 +116,37 @@ union amd_sriov_msg_os_info { uint32_t all; }; +struct amd_sriov_msg_uuid_info { + union { + struct { + uint32_t did : 16; + uint32_t fcn : 8; + uint32_t asic_7 : 8; + }; + uint32_t time_low; + }; + + struct { + uint32_t time_mid : 16; + uint32_t time_high : 12; + uint32_t version : 4; + }; + + struct { + struct { + uint8_t clk_seq_hi : 6; + uint8_t variant : 2; + }; + union { + uint8_t clk_seq_low; + uint8_t asic_6; + }; + uint16_t asic_4; + }; + + uint32_t asic_0; +}; + struct amd_sriov_msg_pf2vf_info_header { /* the total structure size in byte */ uint32_t size; @@ -160,10 +193,19 @@ struct amd_sriov_msg_pf2vf_info { /* identification in ROCm SMI */ uint64_t uuid; uint32_t fcn_idx; - /* flags which indicate the register access method VF should use */ + /* flags to indicate which register access method VF should use */ union amd_sriov_reg_access_flags reg_access_flags; + /* MM BW management */ + struct { + uint32_t decode_max_dimension_pixels; + uint32_t decode_max_frame_pixels; + uint32_t encode_max_dimension_pixels; + uint32_t encode_max_frame_pixels; + } mm_bw_management[AMD_SRIOV_MSG_RESERVE_VCN_INST]; + /* UUID info */ + struct amd_sriov_msg_uuid_info uuid_info; /* reserved */ - uint32_t reserved[256-27]; + uint32_t reserved[256 - 47]; }; struct amd_sriov_msg_vf2pf_info_header { diff --git a/drivers/gpu/drm/amd/amdgpu/athub_v2_0.c b/drivers/gpu/drm/amd/amdgpu/athub_v2_0.c index 5b90efd6f6d0..3ac505d954c4 100644 --- a/drivers/gpu/drm/amd/amdgpu/athub_v2_0.c +++ b/drivers/gpu/drm/amd/amdgpu/athub_v2_0.c @@ -36,9 +36,12 @@ athub_v2_0_update_medium_grain_clock_gating(struct amdgpu_device *adev, { uint32_t def, data; + if (!(adev->cg_flags & AMD_CG_SUPPORT_MC_MGCG)) + return; + def = data = RREG32_SOC15(ATHUB, 0, mmATHUB_MISC_CNTL); - if (enable && (adev->cg_flags & AMD_CG_SUPPORT_MC_MGCG)) + if (enable) data |= ATHUB_MISC_CNTL__CG_ENABLE_MASK; else data &= ~ATHUB_MISC_CNTL__CG_ENABLE_MASK; @@ -53,10 +56,13 @@ athub_v2_0_update_medium_grain_light_sleep(struct amdgpu_device *adev, { uint32_t def, data; + if (!((adev->cg_flags & AMD_CG_SUPPORT_MC_LS) && + (adev->cg_flags & AMD_CG_SUPPORT_HDP_LS))) + return; + def = data = RREG32_SOC15(ATHUB, 0, mmATHUB_MISC_CNTL); - if (enable && (adev->cg_flags & AMD_CG_SUPPORT_MC_LS) && - (adev->cg_flags & AMD_CG_SUPPORT_HDP_LS)) + if (enable) data |= ATHUB_MISC_CNTL__CG_MEM_LS_ENABLE_MASK; else data &= ~ATHUB_MISC_CNTL__CG_MEM_LS_ENABLE_MASK; diff --git a/drivers/gpu/drm/amd/amdgpu/athub_v2_1.c b/drivers/gpu/drm/amd/amdgpu/athub_v2_1.c index 2ac4988ea0ff..c12c2900732b 100644 --- a/drivers/gpu/drm/amd/amdgpu/athub_v2_1.c +++ b/drivers/gpu/drm/amd/amdgpu/athub_v2_1.c @@ -74,6 +74,7 @@ int athub_v2_1_set_clockgating(struct amdgpu_device *adev, case CHIP_SIENNA_CICHLID: case CHIP_NAVY_FLOUNDER: case CHIP_DIMGREY_CAVEFISH: + case CHIP_BEIGE_GOBY: athub_v2_1_update_medium_grain_clock_gating(adev, state == AMD_CG_STATE_GATE); athub_v2_1_update_medium_grain_light_sleep(adev, state == AMD_CG_STATE_GATE); break; diff --git a/drivers/gpu/drm/amd/amdgpu/atom.c b/drivers/gpu/drm/amd/amdgpu/atom.c index 3dcb8b32f48b..6fa2229b7229 100644 --- a/drivers/gpu/drm/amd/amdgpu/atom.c +++ b/drivers/gpu/drm/amd/amdgpu/atom.c @@ -31,6 +31,7 @@ #define ATOM_DEBUG +#include "atomfirmware.h" #include "atom.h" #include "atom-names.h" #include "atom-bits.h" @@ -1299,12 +1300,168 @@ static void atom_index_iio(struct atom_context *ctx, int base) } } +static void atom_get_vbios_name(struct atom_context *ctx) +{ + unsigned char *p_rom; + unsigned char str_num; + unsigned short off_to_vbios_str; + unsigned char *c_ptr; + int name_size; + int i; + + const char *na = "--N/A--"; + char *back; + + p_rom = ctx->bios; + + str_num = *(p_rom + OFFSET_TO_GET_ATOMBIOS_NUMBER_OF_STRINGS); + if (str_num != 0) { + off_to_vbios_str = + *(unsigned short *)(p_rom + OFFSET_TO_GET_ATOMBIOS_STRING_START); + + c_ptr = (unsigned char *)(p_rom + off_to_vbios_str); + } else { + /* do not know where to find name */ + memcpy(ctx->name, na, 7); + ctx->name[7] = 0; + return; + } + + /* + * skip the atombios strings, usually 4 + * 1st is P/N, 2nd is ASIC, 3rd is PCI type, 4th is Memory type + */ + for (i = 0; i < str_num; i++) { + while (*c_ptr != 0) + c_ptr++; + c_ptr++; + } + + /* skip the following 2 chars: 0x0D 0x0A */ + c_ptr += 2; + + name_size = strnlen(c_ptr, STRLEN_LONG - 1); + memcpy(ctx->name, c_ptr, name_size); + back = ctx->name + name_size; + while ((*--back) == ' ') + ; + *(back + 1) = '\0'; +} + +static void atom_get_vbios_date(struct atom_context *ctx) +{ + unsigned char *p_rom; + unsigned char *date_in_rom; + + p_rom = ctx->bios; + + date_in_rom = p_rom + OFFSET_TO_VBIOS_DATE; + + ctx->date[0] = '2'; + ctx->date[1] = '0'; + ctx->date[2] = date_in_rom[6]; + ctx->date[3] = date_in_rom[7]; + ctx->date[4] = '/'; + ctx->date[5] = date_in_rom[0]; + ctx->date[6] = date_in_rom[1]; + ctx->date[7] = '/'; + ctx->date[8] = date_in_rom[3]; + ctx->date[9] = date_in_rom[4]; + ctx->date[10] = ' '; + ctx->date[11] = date_in_rom[9]; + ctx->date[12] = date_in_rom[10]; + ctx->date[13] = date_in_rom[11]; + ctx->date[14] = date_in_rom[12]; + ctx->date[15] = date_in_rom[13]; + ctx->date[16] = '\0'; +} + +static unsigned char *atom_find_str_in_rom(struct atom_context *ctx, char *str, int start, + int end, int maxlen) +{ + unsigned long str_off; + unsigned char *p_rom; + unsigned short str_len; + + str_off = 0; + str_len = strnlen(str, maxlen); + p_rom = ctx->bios; + + for (; start <= end; ++start) { + for (str_off = 0; str_off < str_len; ++str_off) { + if (str[str_off] != *(p_rom + start + str_off)) + break; + } + + if (str_off == str_len || str[str_off] == 0) + return p_rom + start; + } + return NULL; +} + +static void atom_get_vbios_pn(struct atom_context *ctx) +{ + unsigned char *p_rom; + unsigned short off_to_vbios_str; + unsigned char *vbios_str; + int count; + + off_to_vbios_str = 0; + p_rom = ctx->bios; + + if (*(p_rom + OFFSET_TO_GET_ATOMBIOS_NUMBER_OF_STRINGS) != 0) { + off_to_vbios_str = + *(unsigned short *)(p_rom + OFFSET_TO_GET_ATOMBIOS_STRING_START); + + vbios_str = (unsigned char *)(p_rom + off_to_vbios_str); + } else { + vbios_str = p_rom + OFFSET_TO_VBIOS_PART_NUMBER; + } + + if (*vbios_str == 0) { + vbios_str = atom_find_str_in_rom(ctx, BIOS_ATOM_PREFIX, 3, 1024, 64); + if (vbios_str == NULL) + vbios_str += sizeof(BIOS_ATOM_PREFIX) - 1; + } + if (vbios_str != NULL && *vbios_str == 0) + vbios_str++; + + if (vbios_str != NULL) { + count = 0; + while ((count < BIOS_STRING_LENGTH) && vbios_str[count] >= ' ' && + vbios_str[count] <= 'z') { + ctx->vbios_pn[count] = vbios_str[count]; + count++; + } + + ctx->vbios_pn[count] = 0; + } +} + +static void atom_get_vbios_version(struct atom_context *ctx) +{ + unsigned char *vbios_ver; + + /* find anchor ATOMBIOSBK-AMD */ + vbios_ver = atom_find_str_in_rom(ctx, BIOS_VERSION_PREFIX, 3, 1024, 64); + if (vbios_ver != NULL) { + /* skip ATOMBIOSBK-AMD VER */ + vbios_ver += 18; + memcpy(ctx->vbios_ver_str, vbios_ver, STRLEN_NORMAL); + } else { + ctx->vbios_ver_str[0] = '\0'; + } +} + struct atom_context *amdgpu_atom_parse(struct card_info *card, void *bios) { int base; struct atom_context *ctx = kzalloc(sizeof(struct atom_context), GFP_KERNEL); char *str; + struct _ATOM_ROM_HEADER *atom_rom_header; + struct _ATOM_MASTER_DATA_TABLE *master_table; + struct _ATOM_FIRMWARE_INFO *atom_fw_info; u16 idx; if (!ctx) @@ -1353,6 +1510,21 @@ struct atom_context *amdgpu_atom_parse(struct card_info *card, void *bios) strlcpy(ctx->vbios_version, str, sizeof(ctx->vbios_version)); } + atom_rom_header = (struct _ATOM_ROM_HEADER *)CSTR(base); + if (atom_rom_header->usMasterDataTableOffset != 0) { + master_table = (struct _ATOM_MASTER_DATA_TABLE *) + CSTR(atom_rom_header->usMasterDataTableOffset); + if (master_table->ListOfDataTables.FirmwareInfo != 0) { + atom_fw_info = (struct _ATOM_FIRMWARE_INFO *) + CSTR(master_table->ListOfDataTables.FirmwareInfo); + ctx->version = atom_fw_info->ulFirmwareRevision; + } + } + + atom_get_vbios_name(ctx); + atom_get_vbios_pn(ctx); + atom_get_vbios_date(ctx); + atom_get_vbios_version(ctx); return ctx; } diff --git a/drivers/gpu/drm/amd/amdgpu/atom.h b/drivers/gpu/drm/amd/amdgpu/atom.h index d279759cab47..0c1839824520 100644 --- a/drivers/gpu/drm/amd/amdgpu/atom.h +++ b/drivers/gpu/drm/amd/amdgpu/atom.h @@ -112,6 +112,10 @@ struct drm_device; #define ATOM_IO_SYSIO 2 #define ATOM_IO_IIO 0x80 +#define STRLEN_NORMAL 32 +#define STRLEN_LONG 64 +#define STRLEN_VERYLONG 254 + struct card_info { struct drm_device *dev; void (* reg_write)(struct card_info *, uint32_t, uint32_t); /* filled by driver */ @@ -140,6 +144,12 @@ struct atom_context { uint32_t *scratch; int scratch_size_bytes; char vbios_version[20]; + + uint8_t name[STRLEN_LONG]; + uint8_t vbios_pn[STRLEN_LONG]; + uint32_t version; + uint8_t vbios_ver_str[STRLEN_NORMAL]; + uint8_t date[STRLEN_NORMAL]; }; extern int amdgpu_atom_debug; diff --git a/drivers/gpu/drm/amd/amdgpu/atombios_crtc.c b/drivers/gpu/drm/amd/amdgpu/atombios_crtc.c index 159a2a4385a1..afad094f84c2 100644 --- a/drivers/gpu/drm/amd/amdgpu/atombios_crtc.c +++ b/drivers/gpu/drm/amd/amdgpu/atombios_crtc.c @@ -851,7 +851,7 @@ void amdgpu_atombios_crtc_set_pll(struct drm_crtc *crtc, struct drm_display_mode pll->reference_div = amdgpu_crtc->pll_reference_div; pll->post_div = amdgpu_crtc->pll_post_div; - amdgpu_pll_compute(pll, amdgpu_crtc->adjusted_clock, &pll_clock, + amdgpu_pll_compute(adev, pll, amdgpu_crtc->adjusted_clock, &pll_clock, &fb_div, &frac_fb_div, &ref_div, &post_div); amdgpu_atombios_crtc_program_ss(adev, ATOM_DISABLE, amdgpu_crtc->pll_id, diff --git a/drivers/gpu/drm/amd/amdgpu/atombios_dp.c b/drivers/gpu/drm/amd/amdgpu/atombios_dp.c index a3ba9ca11e98..f327becb022f 100644 --- a/drivers/gpu/drm/amd/amdgpu/atombios_dp.c +++ b/drivers/gpu/drm/amd/amdgpu/atombios_dp.c @@ -188,6 +188,8 @@ void amdgpu_atombios_dp_aux_init(struct amdgpu_connector *amdgpu_connector) { amdgpu_connector->ddc_bus->rec.hpd = amdgpu_connector->hpd.hpd; amdgpu_connector->ddc_bus->aux.transfer = amdgpu_atombios_dp_aux_transfer; + amdgpu_connector->ddc_bus->aux.drm_dev = amdgpu_connector->base.dev; + drm_dp_aux_init(&amdgpu_connector->ddc_bus->aux); amdgpu_connector->ddc_bus->has_aux = true; } @@ -610,7 +612,7 @@ amdgpu_atombios_dp_link_train_cr(struct amdgpu_atombios_dp_link_train_info *dp_i dp_info->tries = 0; voltage = 0xff; while (1) { - drm_dp_link_train_clock_recovery_delay(dp_info->dpcd); + drm_dp_link_train_clock_recovery_delay(dp_info->aux, dp_info->dpcd); if (drm_dp_dpcd_read_link_status(dp_info->aux, dp_info->link_status) <= 0) { @@ -675,7 +677,7 @@ amdgpu_atombios_dp_link_train_ce(struct amdgpu_atombios_dp_link_train_info *dp_i dp_info->tries = 0; channel_eq = false; while (1) { - drm_dp_link_train_channel_eq_delay(dp_info->dpcd); + drm_dp_link_train_channel_eq_delay(dp_info->aux, dp_info->dpcd); if (drm_dp_dpcd_read_link_status(dp_info->aux, dp_info->link_status) <= 0) { diff --git a/drivers/gpu/drm/amd/amdgpu/beige_goby_reg_init.c b/drivers/gpu/drm/amd/amdgpu/beige_goby_reg_init.c new file mode 100644 index 000000000000..608a113ce354 --- /dev/null +++ b/drivers/gpu/drm/amd/amdgpu/beige_goby_reg_init.c @@ -0,0 +1,54 @@ +/* + * Copyright 2020 Advanced Micro Devices, Inc. + * + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and associated documentation files (the "Software"), + * to deal in the Software without restriction, including without limitation + * the rights to use, copy, modify, merge, publish, distribute, sublicense, + * and/or sell copies of the Software, and to permit persons to whom the + * Software is furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included in + * all copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL + * THE COPYRIGHT HOLDER(S) OR AUTHOR(S) BE LIABLE FOR ANY CLAIM, DAMAGES OR + * OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, + * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR + * OTHER DEALINGS IN THE SOFTWARE. + * + */ +#include "amdgpu.h" +#include "nv.h" + +#include "soc15_common.h" +#include "soc15_hw_ip.h" +#include "beige_goby_ip_offset.h" + +int beige_goby_reg_base_init(struct amdgpu_device *adev) +{ + /* HW has more IP blocks, only initialize the block needed by driver */ + uint32_t i; + for (i = 0 ; i < MAX_INSTANCE ; ++i) { + adev->reg_offset[GC_HWIP][i] = (uint32_t *)(&(GC_BASE.instance[i])); + adev->reg_offset[HDP_HWIP][i] = (uint32_t *)(&(HDP_BASE.instance[i])); + adev->reg_offset[MMHUB_HWIP][i] = (uint32_t *)(&(MMHUB_BASE.instance[i])); + adev->reg_offset[ATHUB_HWIP][i] = (uint32_t *)(&(ATHUB_BASE.instance[i])); + adev->reg_offset[NBIO_HWIP][i] = (uint32_t *)(&(NBIO_BASE.instance[i])); + adev->reg_offset[MP0_HWIP][i] = (uint32_t *)(&(MP0_BASE.instance[i])); + adev->reg_offset[MP1_HWIP][i] = (uint32_t *)(&(MP1_BASE.instance[i])); + adev->reg_offset[VCN_HWIP][i] = (uint32_t *)(&(VCN0_BASE.instance[i])); + adev->reg_offset[DF_HWIP][i] = (uint32_t *)(&(DF_BASE.instance[i])); + adev->reg_offset[DCE_HWIP][i] = (uint32_t *)(&(DCN_BASE.instance[i])); + adev->reg_offset[OSSSYS_HWIP][i] = (uint32_t *)(&(OSSSYS_BASE.instance[i])); + adev->reg_offset[SDMA0_HWIP][i] = (uint32_t *)(&(GC_BASE.instance[i])); + adev->reg_offset[SDMA1_HWIP][i] = (uint32_t *)(&(GC_BASE.instance[i])); + adev->reg_offset[SDMA2_HWIP][i] = (uint32_t *)(&(GC_BASE.instance[i])); + adev->reg_offset[SDMA3_HWIP][i] = (uint32_t *)(&(GC_BASE.instance[i])); + adev->reg_offset[SMUIO_HWIP][i] = (uint32_t *)(&(SMUIO_BASE.instance[i])); + adev->reg_offset[THM_HWIP][i] = (uint32_t *)(&(THM_BASE.instance[i])); + } + return 0; +} diff --git a/drivers/gpu/drm/amd/amdgpu/cik.c b/drivers/gpu/drm/amd/amdgpu/cik.c index c0fcc41ee574..54f28c075f21 100644 --- a/drivers/gpu/drm/amd/amdgpu/cik.c +++ b/drivers/gpu/drm/amd/amdgpu/cik.c @@ -70,7 +70,7 @@ #include "amdgpu_dm.h" #include "amdgpu_amdkfd.h" -#include "dce_virtual.h" +#include "amdgpu_vkms.h" static const struct amdgpu_video_codec_info cik_video_codecs_encode_array[] = { @@ -2259,7 +2259,7 @@ int cik_set_ip_blocks(struct amdgpu_device *adev) amdgpu_device_ip_block_add(adev, &cik_sdma_ip_block); amdgpu_device_ip_block_add(adev, &pp_smu_ip_block); if (adev->enable_virtual_display) - amdgpu_device_ip_block_add(adev, &dce_virtual_ip_block); + amdgpu_device_ip_block_add(adev, &amdgpu_vkms_ip_block); #if defined(CONFIG_DRM_AMD_DC) else if (amdgpu_device_has_dc_support(adev)) amdgpu_device_ip_block_add(adev, &dm_ip_block); @@ -2277,7 +2277,7 @@ int cik_set_ip_blocks(struct amdgpu_device *adev) amdgpu_device_ip_block_add(adev, &cik_sdma_ip_block); amdgpu_device_ip_block_add(adev, &pp_smu_ip_block); if (adev->enable_virtual_display) - amdgpu_device_ip_block_add(adev, &dce_virtual_ip_block); + amdgpu_device_ip_block_add(adev, &amdgpu_vkms_ip_block); #if defined(CONFIG_DRM_AMD_DC) else if (amdgpu_device_has_dc_support(adev)) amdgpu_device_ip_block_add(adev, &dm_ip_block); @@ -2295,7 +2295,7 @@ int cik_set_ip_blocks(struct amdgpu_device *adev) amdgpu_device_ip_block_add(adev, &cik_sdma_ip_block); amdgpu_device_ip_block_add(adev, &kv_smu_ip_block); if (adev->enable_virtual_display) - amdgpu_device_ip_block_add(adev, &dce_virtual_ip_block); + amdgpu_device_ip_block_add(adev, &amdgpu_vkms_ip_block); #if defined(CONFIG_DRM_AMD_DC) else if (amdgpu_device_has_dc_support(adev)) amdgpu_device_ip_block_add(adev, &dm_ip_block); @@ -2315,7 +2315,7 @@ int cik_set_ip_blocks(struct amdgpu_device *adev) amdgpu_device_ip_block_add(adev, &cik_sdma_ip_block); amdgpu_device_ip_block_add(adev, &kv_smu_ip_block); if (adev->enable_virtual_display) - amdgpu_device_ip_block_add(adev, &dce_virtual_ip_block); + amdgpu_device_ip_block_add(adev, &amdgpu_vkms_ip_block); #if defined(CONFIG_DRM_AMD_DC) else if (amdgpu_device_has_dc_support(adev)) amdgpu_device_ip_block_add(adev, &dm_ip_block); diff --git a/drivers/gpu/drm/amd/amdgpu/cik_ih.c b/drivers/gpu/drm/amd/amdgpu/cik_ih.c index d3745711d55f..df385ffc9768 100644 --- a/drivers/gpu/drm/amd/amdgpu/cik_ih.c +++ b/drivers/gpu/drm/amd/amdgpu/cik_ih.c @@ -309,8 +309,7 @@ static int cik_ih_sw_fini(void *handle) { struct amdgpu_device *adev = (struct amdgpu_device *)handle; - amdgpu_irq_fini(adev); - amdgpu_ih_ring_fini(adev, &adev->irq.ih); + amdgpu_irq_fini_sw(adev); amdgpu_irq_remove_domain(adev); return 0; diff --git a/drivers/gpu/drm/amd/amdgpu/cik_sdma.c b/drivers/gpu/drm/amd/amdgpu/cik_sdma.c index c4bb8eed246d..c8ebd108548d 100644 --- a/drivers/gpu/drm/amd/amdgpu/cik_sdma.c +++ b/drivers/gpu/drm/amd/amdgpu/cik_sdma.c @@ -720,7 +720,7 @@ err0: } /** - * cik_sdma_vm_copy_pages - update PTEs by copying them from the GART + * cik_sdma_vm_copy_pte - update PTEs by copying them from the GART * * @ib: indirect buffer to fill with commands * @pe: addr of the page entry @@ -746,7 +746,7 @@ static void cik_sdma_vm_copy_pte(struct amdgpu_ib *ib, } /** - * cik_sdma_vm_write_pages - update PTEs by writing them manually + * cik_sdma_vm_write_pte - update PTEs by writing them manually * * @ib: indirect buffer to fill with commands * @pe: addr of the page entry @@ -775,7 +775,7 @@ static void cik_sdma_vm_write_pte(struct amdgpu_ib *ib, uint64_t pe, } /** - * cik_sdma_vm_set_pages - update the page tables using sDMA + * cik_sdma_vm_set_pte_pde - update the page tables using sDMA * * @ib: indirect buffer to fill with commands * @pe: addr of the page entry @@ -804,7 +804,7 @@ static void cik_sdma_vm_set_pte_pde(struct amdgpu_ib *ib, uint64_t pe, } /** - * cik_sdma_vm_pad_ib - pad the IB to the required number of dw + * cik_sdma_ring_pad_ib - pad the IB to the required number of dw * * @ring: amdgpu_ring structure holding ring information * @ib: indirect buffer to fill with padding diff --git a/drivers/gpu/drm/amd/amdgpu/cyan_skillfish_reg_init.c b/drivers/gpu/drm/amd/amdgpu/cyan_skillfish_reg_init.c new file mode 100644 index 000000000000..58808814d8fb --- /dev/null +++ b/drivers/gpu/drm/amd/amdgpu/cyan_skillfish_reg_init.c @@ -0,0 +1,51 @@ +/* + * Copyright 2018 Advanced Micro Devices, Inc. + * + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and associated documentation files (the "Software"), + * to deal in the Software without restriction, including without limitation + * the rights to use, copy, modify, merge, publish, distribute, sublicense, + * and/or sell copies of the Software, and to permit persons to whom the + * Software is furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included in + * all copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL + * THE COPYRIGHT HOLDER(S) OR AUTHOR(S) BE LIABLE FOR ANY CLAIM, DAMAGES OR + * OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, + * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR + * OTHER DEALINGS IN THE SOFTWARE. + * + */ +#include "amdgpu.h" +#include "nv.h" + +#include "soc15_common.h" +#include "soc15_hw_ip.h" +#include "cyan_skillfish_ip_offset.h" + +int cyan_skillfish_reg_base_init(struct amdgpu_device *adev) +{ + /* HW has more IP blocks, only initialized the blocke needed by driver */ + uint32_t i; + for (i = 0 ; i < MAX_INSTANCE ; ++i) { + adev->reg_offset[GC_HWIP][i] = (uint32_t *)(&(GC_BASE.instance[i])); + adev->reg_offset[HDP_HWIP][i] = (uint32_t *)(&(HDP_BASE.instance[i])); + adev->reg_offset[MMHUB_HWIP][i] = (uint32_t *)(&(MMHUB_BASE.instance[i])); + adev->reg_offset[ATHUB_HWIP][i] = (uint32_t *)(&(ATHUB_BASE.instance[i])); + adev->reg_offset[NBIO_HWIP][i] = (uint32_t *)(&(NBIO_BASE.instance[i])); + adev->reg_offset[MP0_HWIP][i] = (uint32_t *)(&(MP0_BASE.instance[i])); + adev->reg_offset[MP1_HWIP][i] = (uint32_t *)(&(MP1_BASE.instance[i])); + adev->reg_offset[VCN_HWIP][i] = (uint32_t *)(&(UVD0_BASE.instance[i])); + adev->reg_offset[DF_HWIP][i] = (uint32_t *)(&(DF_BASE.instance[i])); + adev->reg_offset[DCE_HWIP][i] = (uint32_t *)(&(DMU_BASE.instance[i])); + adev->reg_offset[OSSSYS_HWIP][i] = (uint32_t *)(&(OSSSYS_BASE.instance[i])); + adev->reg_offset[SDMA0_HWIP][i] = (uint32_t *)(&(GC_BASE.instance[i])); + adev->reg_offset[SDMA1_HWIP][i] = (uint32_t *)(&(GC_BASE.instance[i])); + adev->reg_offset[SMUIO_HWIP][i] = (uint32_t *)(&(SMUIO_BASE.instance[i])); + } + return 0; +} diff --git a/drivers/gpu/drm/amd/amdgpu/cz_ih.c b/drivers/gpu/drm/amd/amdgpu/cz_ih.c index 307c01301c87..b8c47e0cf37a 100644 --- a/drivers/gpu/drm/amd/amdgpu/cz_ih.c +++ b/drivers/gpu/drm/amd/amdgpu/cz_ih.c @@ -301,8 +301,7 @@ static int cz_ih_sw_fini(void *handle) { struct amdgpu_device *adev = (struct amdgpu_device *)handle; - amdgpu_irq_fini(adev); - amdgpu_ih_ring_fini(adev, &adev->irq.ih); + amdgpu_irq_fini_sw(adev); amdgpu_irq_remove_domain(adev); return 0; diff --git a/drivers/gpu/drm/amd/amdgpu/dce_v6_0.c b/drivers/gpu/drm/amd/amdgpu/dce_v6_0.c index dbcb09cf83e6..c7803dc2b2d5 100644 --- a/drivers/gpu/drm/amd/amdgpu/dce_v6_0.c +++ b/drivers/gpu/drm/amd/amdgpu/dce_v6_0.c @@ -456,7 +456,7 @@ static void dce_v6_0_program_fmt(struct drm_encoder *encoder) } /** - * cik_get_number_of_dram_channels - get the number of dram channels + * si_get_number_of_dram_channels - get the number of dram channels * * @adev: amdgpu_device pointer * diff --git a/drivers/gpu/drm/amd/amdgpu/dce_virtual.c b/drivers/gpu/drm/amd/amdgpu/dce_virtual.c deleted file mode 100644 index 5c11144da051..000000000000 --- a/drivers/gpu/drm/amd/amdgpu/dce_virtual.c +++ /dev/null @@ -1,782 +0,0 @@ -/* - * Copyright 2014 Advanced Micro Devices, Inc. - * - * Permission is hereby granted, free of charge, to any person obtaining a - * copy of this software and associated documentation files (the "Software"), - * to deal in the Software without restriction, including without limitation - * the rights to use, copy, modify, merge, publish, distribute, sublicense, - * and/or sell copies of the Software, and to permit persons to whom the - * Software is furnished to do so, subject to the following conditions: - * - * The above copyright notice and this permission notice shall be included in - * all copies or substantial portions of the Software. - * - * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR - * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, - * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL - * THE COPYRIGHT HOLDER(S) OR AUTHOR(S) BE LIABLE FOR ANY CLAIM, DAMAGES OR - * OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, - * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR - * OTHER DEALINGS IN THE SOFTWARE. - * - */ - -#include <drm/drm_vblank.h> - -#include "amdgpu.h" -#include "amdgpu_pm.h" -#include "amdgpu_i2c.h" -#include "atom.h" -#include "amdgpu_pll.h" -#include "amdgpu_connectors.h" -#ifdef CONFIG_DRM_AMDGPU_SI -#include "dce_v6_0.h" -#endif -#ifdef CONFIG_DRM_AMDGPU_CIK -#include "dce_v8_0.h" -#endif -#include "dce_v10_0.h" -#include "dce_v11_0.h" -#include "dce_virtual.h" -#include "ivsrcid/ivsrcid_vislands30.h" -#include "amdgpu_display.h" - -#define DCE_VIRTUAL_VBLANK_PERIOD 16666666 - - -static void dce_virtual_set_display_funcs(struct amdgpu_device *adev); -static void dce_virtual_set_irq_funcs(struct amdgpu_device *adev); -static int dce_virtual_connector_encoder_init(struct amdgpu_device *adev, - int index); -static int dce_virtual_pageflip(struct amdgpu_device *adev, - unsigned crtc_id); -static enum hrtimer_restart dce_virtual_vblank_timer_handle(struct hrtimer *vblank_timer); -static void dce_virtual_set_crtc_vblank_interrupt_state(struct amdgpu_device *adev, - int crtc, - enum amdgpu_interrupt_state state); - -static u32 dce_virtual_vblank_get_counter(struct amdgpu_device *adev, int crtc) -{ - return 0; -} - -static void dce_virtual_page_flip(struct amdgpu_device *adev, - int crtc_id, u64 crtc_base, bool async) -{ - return; -} - -static int dce_virtual_crtc_get_scanoutpos(struct amdgpu_device *adev, int crtc, - u32 *vbl, u32 *position) -{ - *vbl = 0; - *position = 0; - - return -EINVAL; -} - -static bool dce_virtual_hpd_sense(struct amdgpu_device *adev, - enum amdgpu_hpd_id hpd) -{ - return true; -} - -static void dce_virtual_hpd_set_polarity(struct amdgpu_device *adev, - enum amdgpu_hpd_id hpd) -{ - return; -} - -static u32 dce_virtual_hpd_get_gpio_reg(struct amdgpu_device *adev) -{ - return 0; -} - -/** - * dce_virtual_bandwidth_update - program display watermarks - * - * @adev: amdgpu_device pointer - * - * Calculate and program the display watermarks and line - * buffer allocation (CIK). - */ -static void dce_virtual_bandwidth_update(struct amdgpu_device *adev) -{ - return; -} - -static int dce_virtual_crtc_gamma_set(struct drm_crtc *crtc, u16 *red, - u16 *green, u16 *blue, uint32_t size, - struct drm_modeset_acquire_ctx *ctx) -{ - return 0; -} - -static void dce_virtual_crtc_destroy(struct drm_crtc *crtc) -{ - struct amdgpu_crtc *amdgpu_crtc = to_amdgpu_crtc(crtc); - - drm_crtc_cleanup(crtc); - kfree(amdgpu_crtc); -} - -static const struct drm_crtc_funcs dce_virtual_crtc_funcs = { - .cursor_set2 = NULL, - .cursor_move = NULL, - .gamma_set = dce_virtual_crtc_gamma_set, - .set_config = amdgpu_display_crtc_set_config, - .destroy = dce_virtual_crtc_destroy, - .page_flip_target = amdgpu_display_crtc_page_flip_target, - .get_vblank_counter = amdgpu_get_vblank_counter_kms, - .enable_vblank = amdgpu_enable_vblank_kms, - .disable_vblank = amdgpu_disable_vblank_kms, - .get_vblank_timestamp = drm_crtc_vblank_helper_get_vblank_timestamp, -}; - -static void dce_virtual_crtc_dpms(struct drm_crtc *crtc, int mode) -{ - struct drm_device *dev = crtc->dev; - struct amdgpu_device *adev = drm_to_adev(dev); - struct amdgpu_crtc *amdgpu_crtc = to_amdgpu_crtc(crtc); - unsigned type; - - switch (mode) { - case DRM_MODE_DPMS_ON: - amdgpu_crtc->enabled = true; - /* Make sure VBLANK interrupts are still enabled */ - type = amdgpu_display_crtc_idx_to_irq_type(adev, - amdgpu_crtc->crtc_id); - amdgpu_irq_update(adev, &adev->crtc_irq, type); - drm_crtc_vblank_on(crtc); - break; - case DRM_MODE_DPMS_STANDBY: - case DRM_MODE_DPMS_SUSPEND: - case DRM_MODE_DPMS_OFF: - drm_crtc_vblank_off(crtc); - amdgpu_crtc->enabled = false; - break; - } -} - - -static void dce_virtual_crtc_prepare(struct drm_crtc *crtc) -{ - dce_virtual_crtc_dpms(crtc, DRM_MODE_DPMS_OFF); -} - -static void dce_virtual_crtc_commit(struct drm_crtc *crtc) -{ - dce_virtual_crtc_dpms(crtc, DRM_MODE_DPMS_ON); -} - -static void dce_virtual_crtc_disable(struct drm_crtc *crtc) -{ - struct amdgpu_crtc *amdgpu_crtc = to_amdgpu_crtc(crtc); - struct drm_device *dev = crtc->dev; - - if (dev->num_crtcs) - drm_crtc_vblank_off(crtc); - - amdgpu_crtc->enabled = false; - amdgpu_crtc->pll_id = ATOM_PPLL_INVALID; - amdgpu_crtc->encoder = NULL; - amdgpu_crtc->connector = NULL; -} - -static int dce_virtual_crtc_mode_set(struct drm_crtc *crtc, - struct drm_display_mode *mode, - struct drm_display_mode *adjusted_mode, - int x, int y, struct drm_framebuffer *old_fb) -{ - struct amdgpu_crtc *amdgpu_crtc = to_amdgpu_crtc(crtc); - - /* update the hw version fpr dpm */ - amdgpu_crtc->hw_mode = *adjusted_mode; - - return 0; -} - -static bool dce_virtual_crtc_mode_fixup(struct drm_crtc *crtc, - const struct drm_display_mode *mode, - struct drm_display_mode *adjusted_mode) -{ - return true; -} - - -static int dce_virtual_crtc_set_base(struct drm_crtc *crtc, int x, int y, - struct drm_framebuffer *old_fb) -{ - return 0; -} - -static int dce_virtual_crtc_set_base_atomic(struct drm_crtc *crtc, - struct drm_framebuffer *fb, - int x, int y, enum mode_set_atomic state) -{ - return 0; -} - -static const struct drm_crtc_helper_funcs dce_virtual_crtc_helper_funcs = { - .dpms = dce_virtual_crtc_dpms, - .mode_fixup = dce_virtual_crtc_mode_fixup, - .mode_set = dce_virtual_crtc_mode_set, - .mode_set_base = dce_virtual_crtc_set_base, - .mode_set_base_atomic = dce_virtual_crtc_set_base_atomic, - .prepare = dce_virtual_crtc_prepare, - .commit = dce_virtual_crtc_commit, - .disable = dce_virtual_crtc_disable, - .get_scanout_position = amdgpu_crtc_get_scanout_position, -}; - -static int dce_virtual_crtc_init(struct amdgpu_device *adev, int index) -{ - struct amdgpu_crtc *amdgpu_crtc; - - amdgpu_crtc = kzalloc(sizeof(struct amdgpu_crtc) + - (AMDGPUFB_CONN_LIMIT * sizeof(struct drm_connector *)), GFP_KERNEL); - if (amdgpu_crtc == NULL) - return -ENOMEM; - - drm_crtc_init(adev_to_drm(adev), &amdgpu_crtc->base, &dce_virtual_crtc_funcs); - - drm_mode_crtc_set_gamma_size(&amdgpu_crtc->base, 256); - amdgpu_crtc->crtc_id = index; - adev->mode_info.crtcs[index] = amdgpu_crtc; - - amdgpu_crtc->pll_id = ATOM_PPLL_INVALID; - amdgpu_crtc->encoder = NULL; - amdgpu_crtc->connector = NULL; - amdgpu_crtc->vsync_timer_enabled = AMDGPU_IRQ_STATE_DISABLE; - drm_crtc_helper_add(&amdgpu_crtc->base, &dce_virtual_crtc_helper_funcs); - - hrtimer_init(&amdgpu_crtc->vblank_timer, CLOCK_MONOTONIC, HRTIMER_MODE_REL); - hrtimer_set_expires(&amdgpu_crtc->vblank_timer, DCE_VIRTUAL_VBLANK_PERIOD); - amdgpu_crtc->vblank_timer.function = dce_virtual_vblank_timer_handle; - hrtimer_start(&amdgpu_crtc->vblank_timer, - DCE_VIRTUAL_VBLANK_PERIOD, HRTIMER_MODE_REL); - return 0; -} - -static int dce_virtual_early_init(void *handle) -{ - struct amdgpu_device *adev = (struct amdgpu_device *)handle; - - dce_virtual_set_display_funcs(adev); - dce_virtual_set_irq_funcs(adev); - - adev->mode_info.num_hpd = 1; - adev->mode_info.num_dig = 1; - return 0; -} - -static struct drm_encoder * -dce_virtual_encoder(struct drm_connector *connector) -{ - struct drm_encoder *encoder; - - drm_connector_for_each_possible_encoder(connector, encoder) { - if (encoder->encoder_type == DRM_MODE_ENCODER_VIRTUAL) - return encoder; - } - - /* pick the first one */ - drm_connector_for_each_possible_encoder(connector, encoder) - return encoder; - - return NULL; -} - -static int dce_virtual_get_modes(struct drm_connector *connector) -{ - struct drm_device *dev = connector->dev; - struct drm_display_mode *mode = NULL; - unsigned i; - static const struct mode_size { - int w; - int h; - } common_modes[] = { - { 640, 480}, - { 720, 480}, - { 800, 600}, - { 848, 480}, - {1024, 768}, - {1152, 768}, - {1280, 720}, - {1280, 800}, - {1280, 854}, - {1280, 960}, - {1280, 1024}, - {1440, 900}, - {1400, 1050}, - {1680, 1050}, - {1600, 1200}, - {1920, 1080}, - {1920, 1200}, - {2560, 1440}, - {4096, 3112}, - {3656, 2664}, - {3840, 2160}, - {4096, 2160}, - }; - - for (i = 0; i < ARRAY_SIZE(common_modes); i++) { - mode = drm_cvt_mode(dev, common_modes[i].w, common_modes[i].h, 60, false, false, false); - drm_mode_probed_add(connector, mode); - } - - return 0; -} - -static enum drm_mode_status dce_virtual_mode_valid(struct drm_connector *connector, - struct drm_display_mode *mode) -{ - return MODE_OK; -} - -static int -dce_virtual_dpms(struct drm_connector *connector, int mode) -{ - return 0; -} - -static int -dce_virtual_set_property(struct drm_connector *connector, - struct drm_property *property, - uint64_t val) -{ - return 0; -} - -static void dce_virtual_destroy(struct drm_connector *connector) -{ - drm_connector_unregister(connector); - drm_connector_cleanup(connector); - kfree(connector); -} - -static void dce_virtual_force(struct drm_connector *connector) -{ - return; -} - -static const struct drm_connector_helper_funcs dce_virtual_connector_helper_funcs = { - .get_modes = dce_virtual_get_modes, - .mode_valid = dce_virtual_mode_valid, - .best_encoder = dce_virtual_encoder, -}; - -static const struct drm_connector_funcs dce_virtual_connector_funcs = { - .dpms = dce_virtual_dpms, - .fill_modes = drm_helper_probe_single_connector_modes, - .set_property = dce_virtual_set_property, - .destroy = dce_virtual_destroy, - .force = dce_virtual_force, -}; - -static int dce_virtual_sw_init(void *handle) -{ - int r, i; - struct amdgpu_device *adev = (struct amdgpu_device *)handle; - - r = amdgpu_irq_add_id(adev, AMDGPU_IRQ_CLIENTID_LEGACY, VISLANDS30_IV_SRCID_SMU_DISP_TIMER2_TRIGGER, &adev->crtc_irq); - if (r) - return r; - - adev_to_drm(adev)->max_vblank_count = 0; - - adev_to_drm(adev)->mode_config.funcs = &amdgpu_mode_funcs; - - adev_to_drm(adev)->mode_config.max_width = 16384; - adev_to_drm(adev)->mode_config.max_height = 16384; - - adev_to_drm(adev)->mode_config.preferred_depth = 24; - adev_to_drm(adev)->mode_config.prefer_shadow = 1; - - adev_to_drm(adev)->mode_config.fb_base = adev->gmc.aper_base; - - r = amdgpu_display_modeset_create_props(adev); - if (r) - return r; - - adev_to_drm(adev)->mode_config.max_width = 16384; - adev_to_drm(adev)->mode_config.max_height = 16384; - - /* allocate crtcs, encoders, connectors */ - for (i = 0; i < adev->mode_info.num_crtc; i++) { - r = dce_virtual_crtc_init(adev, i); - if (r) - return r; - r = dce_virtual_connector_encoder_init(adev, i); - if (r) - return r; - } - - drm_kms_helper_poll_init(adev_to_drm(adev)); - - adev->mode_info.mode_config_initialized = true; - return 0; -} - -static int dce_virtual_sw_fini(void *handle) -{ - struct amdgpu_device *adev = (struct amdgpu_device *)handle; - - kfree(adev->mode_info.bios_hardcoded_edid); - - drm_kms_helper_poll_fini(adev_to_drm(adev)); - - drm_mode_config_cleanup(adev_to_drm(adev)); - /* clear crtcs pointer to avoid dce irq finish routine access freed data */ - memset(adev->mode_info.crtcs, 0, sizeof(adev->mode_info.crtcs[0]) * AMDGPU_MAX_CRTCS); - adev->mode_info.mode_config_initialized = false; - return 0; -} - -static int dce_virtual_hw_init(void *handle) -{ - struct amdgpu_device *adev = (struct amdgpu_device *)handle; - - switch (adev->asic_type) { -#ifdef CONFIG_DRM_AMDGPU_SI - case CHIP_TAHITI: - case CHIP_PITCAIRN: - case CHIP_VERDE: - case CHIP_OLAND: - dce_v6_0_disable_dce(adev); - break; -#endif -#ifdef CONFIG_DRM_AMDGPU_CIK - case CHIP_BONAIRE: - case CHIP_HAWAII: - case CHIP_KAVERI: - case CHIP_KABINI: - case CHIP_MULLINS: - dce_v8_0_disable_dce(adev); - break; -#endif - case CHIP_FIJI: - case CHIP_TONGA: - dce_v10_0_disable_dce(adev); - break; - case CHIP_CARRIZO: - case CHIP_STONEY: - case CHIP_POLARIS10: - case CHIP_POLARIS11: - case CHIP_VEGAM: - dce_v11_0_disable_dce(adev); - break; - case CHIP_TOPAZ: -#ifdef CONFIG_DRM_AMDGPU_SI - case CHIP_HAINAN: -#endif - /* no DCE */ - break; - default: - break; - } - return 0; -} - -static int dce_virtual_hw_fini(void *handle) -{ - struct amdgpu_device *adev = (struct amdgpu_device *)handle; - int i = 0; - - for (i = 0; i<adev->mode_info.num_crtc; i++) - if (adev->mode_info.crtcs[i]) - hrtimer_cancel(&adev->mode_info.crtcs[i]->vblank_timer); - - return 0; -} - -static int dce_virtual_suspend(void *handle) -{ - struct amdgpu_device *adev = (struct amdgpu_device *)handle; - int r; - - r = amdgpu_display_suspend_helper(adev); - if (r) - return r; - return dce_virtual_hw_fini(handle); -} - -static int dce_virtual_resume(void *handle) -{ - struct amdgpu_device *adev = (struct amdgpu_device *)handle; - int r; - - r = dce_virtual_hw_init(handle); - if (r) - return r; - return amdgpu_display_resume_helper(adev); -} - -static bool dce_virtual_is_idle(void *handle) -{ - return true; -} - -static int dce_virtual_wait_for_idle(void *handle) -{ - return 0; -} - -static int dce_virtual_soft_reset(void *handle) -{ - return 0; -} - -static int dce_virtual_set_clockgating_state(void *handle, - enum amd_clockgating_state state) -{ - return 0; -} - -static int dce_virtual_set_powergating_state(void *handle, - enum amd_powergating_state state) -{ - return 0; -} - -static const struct amd_ip_funcs dce_virtual_ip_funcs = { - .name = "dce_virtual", - .early_init = dce_virtual_early_init, - .late_init = NULL, - .sw_init = dce_virtual_sw_init, - .sw_fini = dce_virtual_sw_fini, - .hw_init = dce_virtual_hw_init, - .hw_fini = dce_virtual_hw_fini, - .suspend = dce_virtual_suspend, - .resume = dce_virtual_resume, - .is_idle = dce_virtual_is_idle, - .wait_for_idle = dce_virtual_wait_for_idle, - .soft_reset = dce_virtual_soft_reset, - .set_clockgating_state = dce_virtual_set_clockgating_state, - .set_powergating_state = dce_virtual_set_powergating_state, -}; - -/* these are handled by the primary encoders */ -static void dce_virtual_encoder_prepare(struct drm_encoder *encoder) -{ - return; -} - -static void dce_virtual_encoder_commit(struct drm_encoder *encoder) -{ - return; -} - -static void -dce_virtual_encoder_mode_set(struct drm_encoder *encoder, - struct drm_display_mode *mode, - struct drm_display_mode *adjusted_mode) -{ - return; -} - -static void dce_virtual_encoder_disable(struct drm_encoder *encoder) -{ - return; -} - -static void -dce_virtual_encoder_dpms(struct drm_encoder *encoder, int mode) -{ - return; -} - -static bool dce_virtual_encoder_mode_fixup(struct drm_encoder *encoder, - const struct drm_display_mode *mode, - struct drm_display_mode *adjusted_mode) -{ - return true; -} - -static const struct drm_encoder_helper_funcs dce_virtual_encoder_helper_funcs = { - .dpms = dce_virtual_encoder_dpms, - .mode_fixup = dce_virtual_encoder_mode_fixup, - .prepare = dce_virtual_encoder_prepare, - .mode_set = dce_virtual_encoder_mode_set, - .commit = dce_virtual_encoder_commit, - .disable = dce_virtual_encoder_disable, -}; - -static void dce_virtual_encoder_destroy(struct drm_encoder *encoder) -{ - drm_encoder_cleanup(encoder); - kfree(encoder); -} - -static const struct drm_encoder_funcs dce_virtual_encoder_funcs = { - .destroy = dce_virtual_encoder_destroy, -}; - -static int dce_virtual_connector_encoder_init(struct amdgpu_device *adev, - int index) -{ - struct drm_encoder *encoder; - struct drm_connector *connector; - - /* add a new encoder */ - encoder = kzalloc(sizeof(struct drm_encoder), GFP_KERNEL); - if (!encoder) - return -ENOMEM; - encoder->possible_crtcs = 1 << index; - drm_encoder_init(adev_to_drm(adev), encoder, &dce_virtual_encoder_funcs, - DRM_MODE_ENCODER_VIRTUAL, NULL); - drm_encoder_helper_add(encoder, &dce_virtual_encoder_helper_funcs); - - connector = kzalloc(sizeof(struct drm_connector), GFP_KERNEL); - if (!connector) { - kfree(encoder); - return -ENOMEM; - } - - /* add a new connector */ - drm_connector_init(adev_to_drm(adev), connector, &dce_virtual_connector_funcs, - DRM_MODE_CONNECTOR_VIRTUAL); - drm_connector_helper_add(connector, &dce_virtual_connector_helper_funcs); - connector->display_info.subpixel_order = SubPixelHorizontalRGB; - connector->interlace_allowed = false; - connector->doublescan_allowed = false; - - /* link them */ - drm_connector_attach_encoder(connector, encoder); - - return 0; -} - -static const struct amdgpu_display_funcs dce_virtual_display_funcs = { - .bandwidth_update = &dce_virtual_bandwidth_update, - .vblank_get_counter = &dce_virtual_vblank_get_counter, - .backlight_set_level = NULL, - .backlight_get_level = NULL, - .hpd_sense = &dce_virtual_hpd_sense, - .hpd_set_polarity = &dce_virtual_hpd_set_polarity, - .hpd_get_gpio_reg = &dce_virtual_hpd_get_gpio_reg, - .page_flip = &dce_virtual_page_flip, - .page_flip_get_scanoutpos = &dce_virtual_crtc_get_scanoutpos, - .add_encoder = NULL, - .add_connector = NULL, -}; - -static void dce_virtual_set_display_funcs(struct amdgpu_device *adev) -{ - adev->mode_info.funcs = &dce_virtual_display_funcs; -} - -static int dce_virtual_pageflip(struct amdgpu_device *adev, - unsigned crtc_id) -{ - unsigned long flags; - struct amdgpu_crtc *amdgpu_crtc; - struct amdgpu_flip_work *works; - - amdgpu_crtc = adev->mode_info.crtcs[crtc_id]; - - if (crtc_id >= adev->mode_info.num_crtc) { - DRM_ERROR("invalid pageflip crtc %d\n", crtc_id); - return -EINVAL; - } - - /* IRQ could occur when in initial stage */ - if (amdgpu_crtc == NULL) - return 0; - - spin_lock_irqsave(&adev_to_drm(adev)->event_lock, flags); - works = amdgpu_crtc->pflip_works; - if (amdgpu_crtc->pflip_status != AMDGPU_FLIP_SUBMITTED) { - DRM_DEBUG_DRIVER("amdgpu_crtc->pflip_status = %d != " - "AMDGPU_FLIP_SUBMITTED(%d)\n", - amdgpu_crtc->pflip_status, - AMDGPU_FLIP_SUBMITTED); - spin_unlock_irqrestore(&adev_to_drm(adev)->event_lock, flags); - return 0; - } - - /* page flip completed. clean up */ - amdgpu_crtc->pflip_status = AMDGPU_FLIP_NONE; - amdgpu_crtc->pflip_works = NULL; - - /* wakeup usersapce */ - if (works->event) - drm_crtc_send_vblank_event(&amdgpu_crtc->base, works->event); - - spin_unlock_irqrestore(&adev_to_drm(adev)->event_lock, flags); - - drm_crtc_vblank_put(&amdgpu_crtc->base); - amdgpu_bo_unref(&works->old_abo); - kfree(works->shared); - kfree(works); - - return 0; -} - -static enum hrtimer_restart dce_virtual_vblank_timer_handle(struct hrtimer *vblank_timer) -{ - struct amdgpu_crtc *amdgpu_crtc = container_of(vblank_timer, - struct amdgpu_crtc, vblank_timer); - struct drm_device *ddev = amdgpu_crtc->base.dev; - struct amdgpu_device *adev = drm_to_adev(ddev); - struct amdgpu_irq_src *source = adev->irq.client[AMDGPU_IRQ_CLIENTID_LEGACY].sources - [VISLANDS30_IV_SRCID_SMU_DISP_TIMER2_TRIGGER]; - int irq_type = amdgpu_display_crtc_idx_to_irq_type(adev, - amdgpu_crtc->crtc_id); - - if (amdgpu_irq_enabled(adev, source, irq_type)) { - drm_handle_vblank(ddev, amdgpu_crtc->crtc_id); - dce_virtual_pageflip(adev, amdgpu_crtc->crtc_id); - } - hrtimer_start(vblank_timer, DCE_VIRTUAL_VBLANK_PERIOD, - HRTIMER_MODE_REL); - - return HRTIMER_NORESTART; -} - -static void dce_virtual_set_crtc_vblank_interrupt_state(struct amdgpu_device *adev, - int crtc, - enum amdgpu_interrupt_state state) -{ - if (crtc >= adev->mode_info.num_crtc || !adev->mode_info.crtcs[crtc]) { - DRM_DEBUG("invalid crtc %d\n", crtc); - return; - } - - adev->mode_info.crtcs[crtc]->vsync_timer_enabled = state; - DRM_DEBUG("[FM]set crtc %d vblank interrupt state %d\n", crtc, state); -} - - -static int dce_virtual_set_crtc_irq_state(struct amdgpu_device *adev, - struct amdgpu_irq_src *source, - unsigned type, - enum amdgpu_interrupt_state state) -{ - if (type > AMDGPU_CRTC_IRQ_VBLANK6) - return -EINVAL; - - dce_virtual_set_crtc_vblank_interrupt_state(adev, type, state); - - return 0; -} - -static const struct amdgpu_irq_src_funcs dce_virtual_crtc_irq_funcs = { - .set = dce_virtual_set_crtc_irq_state, - .process = NULL, -}; - -static void dce_virtual_set_irq_funcs(struct amdgpu_device *adev) -{ - adev->crtc_irq.num_types = AMDGPU_CRTC_IRQ_VBLANK6 + 1; - adev->crtc_irq.funcs = &dce_virtual_crtc_irq_funcs; -} - -const struct amdgpu_ip_block_version dce_virtual_ip_block = -{ - .type = AMD_IP_BLOCK_TYPE_DCE, - .major = 1, - .minor = 0, - .rev = 0, - .funcs = &dce_virtual_ip_funcs, -}; diff --git a/drivers/gpu/drm/amd/amdgpu/df_v3_6.c b/drivers/gpu/drm/amd/amdgpu/df_v3_6.c index 0d8459d63bac..14514a145c17 100644 --- a/drivers/gpu/drm/amd/amdgpu/df_v3_6.c +++ b/drivers/gpu/drm/amd/amdgpu/df_v3_6.c @@ -219,11 +219,11 @@ static void df_v3_6_query_hashes(struct amdgpu_device *adev) adev->df.hash_status.hash_2m = false; adev->df.hash_status.hash_1g = false; - if (adev->asic_type != CHIP_ARCTURUS) - return; - - /* encoding for hash-enabled on Arcturus */ - if (adev->df.funcs->get_fb_channel_number(adev) == 0xe) { + /* encoding for hash-enabled on Arcturus and Aldebaran */ + if ((adev->asic_type == CHIP_ARCTURUS && + adev->df.funcs->get_fb_channel_number(adev) == 0xe) || + (adev->asic_type == CHIP_ALDEBARAN && + adev->df.funcs->get_fb_channel_number(adev) == 0x1e)) { tmp = RREG32_SOC15(DF, 0, mmDF_CS_UMC_AON0_DfGlobalCtrl); adev->df.hash_status.hash_64k = REG_GET_FIELD(tmp, DF_CS_UMC_AON0_DfGlobalCtrl, @@ -277,8 +277,14 @@ static u32 df_v3_6_get_fb_channel_number(struct amdgpu_device *adev) { u32 tmp; - tmp = RREG32_SOC15(DF, 0, mmDF_CS_UMC_AON0_DramBaseAddress0); - tmp &= DF_CS_UMC_AON0_DramBaseAddress0__IntLvNumChan_MASK; + if (adev->asic_type == CHIP_ALDEBARAN) { + tmp = RREG32_SOC15(DF, 0, mmDF_GCM_AON0_DramMegaBaseAddress0); + tmp &= + ALDEBARAN_DF_CS_UMC_AON0_DramBaseAddress0__IntLvNumChan_MASK; + } else { + tmp = RREG32_SOC15(DF, 0, mmDF_CS_UMC_AON0_DramBaseAddress0); + tmp &= DF_CS_UMC_AON0_DramBaseAddress0__IntLvNumChan_MASK; + } tmp >>= DF_CS_UMC_AON0_DramBaseAddress0__IntLvNumChan__SHIFT; return tmp; diff --git a/drivers/gpu/drm/amd/amdgpu/gfx_v10_0.c b/drivers/gpu/drm/amd/amdgpu/gfx_v10_0.c index 7ce76a6b3a35..16dbe593cba2 100644 --- a/drivers/gpu/drm/amd/amdgpu/gfx_v10_0.c +++ b/drivers/gpu/drm/amd/amdgpu/gfx_v10_0.c @@ -47,7 +47,7 @@ #include "gfx_v10_0.h" #include "nbio_v2_3.h" -/** +/* * Navi10 has two graphic rings to share each graphic pipe. * 1. Primary ring * 2. Async ring @@ -56,6 +56,10 @@ #define GFX10_NUM_GFX_RINGS_Sienna_Cichlid 1 #define GFX10_MEC_HPD_SIZE 2048 +#define RLCG_VFGATE_DISABLED 0x4000000 +#define RLCG_WRONG_OPERATION_TYPE 0x2000000 +#define RLCG_NOT_IN_RANGE 0x1000000 + #define F32_CE_PROGRAM_RAM_SIZE 65536 #define RLCG_UCODE_LOADING_START_ADDRESS 0x00002000L @@ -173,11 +177,17 @@ #define mmGC_THROTTLE_CTRL_Sienna_Cichlid 0x2030 #define mmGC_THROTTLE_CTRL_Sienna_Cichlid_BASE_IDX 0 +#define mmRLC_SPARE_INT_0_Sienna_Cichlid 0x4ca5 +#define mmRLC_SPARE_INT_0_Sienna_Cichlid_BASE_IDX 1 + #define GFX_RLCG_GC_WRITE_OLD (0x8 << 28) #define GFX_RLCG_GC_WRITE (0x0 << 28) #define GFX_RLCG_GC_READ (0x1 << 28) #define GFX_RLCG_MMHUB_WRITE (0x2 << 28) +#define RLCG_ERROR_REPORT_ENABLED(adev) \ + (amdgpu_sriov_reg_indirect_mmhub(adev) || amdgpu_sriov_reg_indirect_gc(adev)) + MODULE_FIRMWARE("amdgpu/navi10_ce.bin"); MODULE_FIRMWARE("amdgpu/navi10_pfp.bin"); MODULE_FIRMWARE("amdgpu/navi10_me.bin"); @@ -232,6 +242,53 @@ MODULE_FIRMWARE("amdgpu/dimgrey_cavefish_mec.bin"); MODULE_FIRMWARE("amdgpu/dimgrey_cavefish_mec2.bin"); MODULE_FIRMWARE("amdgpu/dimgrey_cavefish_rlc.bin"); +MODULE_FIRMWARE("amdgpu/beige_goby_ce.bin"); +MODULE_FIRMWARE("amdgpu/beige_goby_pfp.bin"); +MODULE_FIRMWARE("amdgpu/beige_goby_me.bin"); +MODULE_FIRMWARE("amdgpu/beige_goby_mec.bin"); +MODULE_FIRMWARE("amdgpu/beige_goby_mec2.bin"); +MODULE_FIRMWARE("amdgpu/beige_goby_rlc.bin"); + +MODULE_FIRMWARE("amdgpu/yellow_carp_ce.bin"); +MODULE_FIRMWARE("amdgpu/yellow_carp_pfp.bin"); +MODULE_FIRMWARE("amdgpu/yellow_carp_me.bin"); +MODULE_FIRMWARE("amdgpu/yellow_carp_mec.bin"); +MODULE_FIRMWARE("amdgpu/yellow_carp_mec2.bin"); +MODULE_FIRMWARE("amdgpu/yellow_carp_rlc.bin"); + +MODULE_FIRMWARE("amdgpu/cyan_skillfish_ce.bin"); +MODULE_FIRMWARE("amdgpu/cyan_skillfish_pfp.bin"); +MODULE_FIRMWARE("amdgpu/cyan_skillfish_me.bin"); +MODULE_FIRMWARE("amdgpu/cyan_skillfish_mec.bin"); +MODULE_FIRMWARE("amdgpu/cyan_skillfish_mec2.bin"); +MODULE_FIRMWARE("amdgpu/cyan_skillfish_rlc.bin"); + +MODULE_FIRMWARE("amdgpu/cyan_skillfish2_ce.bin"); +MODULE_FIRMWARE("amdgpu/cyan_skillfish2_pfp.bin"); +MODULE_FIRMWARE("amdgpu/cyan_skillfish2_me.bin"); +MODULE_FIRMWARE("amdgpu/cyan_skillfish2_mec.bin"); +MODULE_FIRMWARE("amdgpu/cyan_skillfish2_mec2.bin"); +MODULE_FIRMWARE("amdgpu/cyan_skillfish2_rlc.bin"); + +static const struct soc15_reg_golden golden_settings_gc_10_0[] = +{ + SOC15_REG_GOLDEN_VALUE(GC, 0, mmGRBM_CAM_INDEX, 0xffffffff, 0x00000000), + /* TA_GRAD_ADJ_UCONFIG -> TA_GRAD_ADJ */ + SOC15_REG_GOLDEN_VALUE(GC, 0, mmGRBM_CAM_DATA, 0xffffffff, 0x2544c382), + /* VGT_TF_RING_SIZE_UMD -> VGT_TF_RING_SIZE */ + SOC15_REG_GOLDEN_VALUE(GC, 0, mmGRBM_CAM_DATA, 0xffffffff, 0x2262c24e), + /* VGT_HS_OFFCHIP_PARAM_UMD -> VGT_HS_OFFCHIP_PARAM */ + SOC15_REG_GOLDEN_VALUE(GC, 0, mmGRBM_CAM_DATA, 0xffffffff, 0x226cc24f), + /* VGT_TF_MEMORY_BASE_UMD -> VGT_TF_MEMORY_BASE */ + SOC15_REG_GOLDEN_VALUE(GC, 0, mmGRBM_CAM_DATA, 0xffffffff, 0x226ec250), + /* VGT_TF_MEMORY_BASE_HI_UMD -> VGT_TF_MEMORY_BASE_HI */ + SOC15_REG_GOLDEN_VALUE(GC, 0, mmGRBM_CAM_DATA, 0xffffffff, 0x2278c261), + /* VGT_ESGS_RING_SIZE_UMD -> VGT_ESGS_RING_SIZE */ + SOC15_REG_GOLDEN_VALUE(GC, 0, mmGRBM_CAM_DATA, 0xffffffff, 0x2232c240), + /* VGT_GSVS_RING_SIZE_UMD -> VGT_GSVS_RING_SIZE */ + SOC15_REG_GOLDEN_VALUE(GC, 0, mmGRBM_CAM_DATA, 0xffffffff, 0x2233c241), +}; + static const struct soc15_reg_golden golden_settings_gc_10_1[] = { SOC15_REG_GOLDEN_VALUE(GC, 0, mmCB_HW_CONTROL_4, 0xffffffff, 0x00400014), @@ -1425,38 +1482,36 @@ static const struct soc15_reg_golden golden_settings_gc_10_1_2[] = SOC15_REG_GOLDEN_VALUE(GC, 0, mmUTCL1_CTRL, 0xffffffff, 0x00c00000) }; -static bool gfx_v10_is_rlcg_rw(struct amdgpu_device *adev, u32 offset, uint32_t *flag, bool write) -{ - /* always programed by rlcg, only for gc */ - if (offset == SOC15_REG_OFFSET(GC, 0, mmRLC_CSIB_ADDR_HI) || - offset == SOC15_REG_OFFSET(GC, 0, mmRLC_CSIB_ADDR_LO) || - offset == SOC15_REG_OFFSET(GC, 0, mmRLC_CSIB_LENGTH) || - offset == SOC15_REG_OFFSET(GC, 0, mmGRBM_GFX_CNTL) || - offset == SOC15_REG_OFFSET(GC, 0, mmGRBM_GFX_INDEX) || - offset == SOC15_REG_OFFSET(GC, 0, mmCP_ME_CNTL)) { - if (!amdgpu_sriov_reg_indirect_gc(adev)) - *flag = GFX_RLCG_GC_WRITE_OLD; - else - *flag = write ? GFX_RLCG_GC_WRITE : GFX_RLCG_GC_READ; +static bool gfx_v10_get_rlcg_flag(struct amdgpu_device *adev, u32 acc_flags, u32 hwip, + int write, u32 *rlcg_flag) +{ + switch (hwip) { + case GC_HWIP: + if (amdgpu_sriov_reg_indirect_gc(adev)) { + *rlcg_flag = write ? GFX_RLCG_GC_WRITE : GFX_RLCG_GC_READ; - return true; - } + return true; + /* only in new version, AMDGPU_REGS_NO_KIQ and AMDGPU_REGS_RLC enabled simultaneously */ + } else if ((acc_flags & AMDGPU_REGS_RLC) && !(acc_flags & AMDGPU_REGS_NO_KIQ)) { + *rlcg_flag = GFX_RLCG_GC_WRITE_OLD; - /* currently support gc read/write, mmhub write */ - if (offset >= SOC15_REG_OFFSET(GC, 0, mmSDMA0_DEC_START) && - offset <= SOC15_REG_OFFSET(GC, 0, mmRLC_GTS_OFFSET_MSB)) { - if (amdgpu_sriov_reg_indirect_gc(adev)) - *flag = write ? GFX_RLCG_GC_WRITE : GFX_RLCG_GC_READ; - else - return false; - } else { - if (amdgpu_sriov_reg_indirect_mmhub(adev)) - *flag = GFX_RLCG_MMHUB_WRITE; - else - return false; + return true; + } + + break; + case MMHUB_HWIP: + if (amdgpu_sriov_reg_indirect_mmhub(adev) && + (acc_flags & AMDGPU_REGS_RLC) && write) { + *rlcg_flag = GFX_RLCG_MMHUB_WRITE; + return true; + } + + break; + default: + DRM_DEBUG("Not program register by RLCG\n"); } - return true; + return false; } static u32 gfx_v10_rlcg_rw(struct amdgpu_device *adev, u32 offset, u32 v, uint32_t flag) @@ -1471,6 +1526,7 @@ static u32 gfx_v10_rlcg_rw(struct amdgpu_device *adev, u32 offset, u32 v, uint32 uint32_t i = 0; uint32_t retries = 50000; u32 ret = 0; + u32 tmp; scratch_reg0 = adev->rmmio + (adev->reg_offset[GC_HWIP][0][mmSCRATCH_REG0_BASE_IDX] + mmSCRATCH_REG0) * 4; @@ -1480,8 +1536,15 @@ static u32 gfx_v10_rlcg_rw(struct amdgpu_device *adev, u32 offset, u32 v, uint32 (adev->reg_offset[GC_HWIP][0][mmSCRATCH_REG0_BASE_IDX] + mmSCRATCH_REG2) * 4; scratch_reg3 = adev->rmmio + (adev->reg_offset[GC_HWIP][0][mmSCRATCH_REG1_BASE_IDX] + mmSCRATCH_REG3) * 4; - spare_int = adev->rmmio + - (adev->reg_offset[GC_HWIP][0][mmRLC_SPARE_INT_BASE_IDX] + mmRLC_SPARE_INT) * 4; + + if (adev->asic_type >= CHIP_SIENNA_CICHLID) { + spare_int = adev->rmmio + + (adev->reg_offset[GC_HWIP][0][mmRLC_SPARE_INT_0_Sienna_Cichlid_BASE_IDX] + + mmRLC_SPARE_INT_0_Sienna_Cichlid) * 4; + } else { + spare_int = adev->rmmio + + (adev->reg_offset[GC_HWIP][0][mmRLC_SPARE_INT_BASE_IDX] + mmRLC_SPARE_INT) * 4; + } grbm_cntl = adev->reg_offset[GC_HWIP][0][mmGRBM_GFX_CNTL_BASE_IDX] + mmGRBM_GFX_CNTL; grbm_idx = adev->reg_offset[GC_HWIP][0][mmGRBM_GFX_INDEX_BASE_IDX] + mmGRBM_GFX_INDEX; @@ -1497,9 +1560,8 @@ static u32 gfx_v10_rlcg_rw(struct amdgpu_device *adev, u32 offset, u32 v, uint32 writel(v, scratch_reg0); writel(offset | flag, scratch_reg1); writel(1, spare_int); - for (i = 0; i < retries; i++) { - u32 tmp; + for (i = 0; i < retries; i++) { tmp = readl(scratch_reg1); if (!(tmp & flag)) break; @@ -1507,8 +1569,19 @@ static u32 gfx_v10_rlcg_rw(struct amdgpu_device *adev, u32 offset, u32 v, uint32 udelay(10); } - if (i >= retries) - pr_err("timeout: rlcg program reg:0x%05x failed !\n", offset); + if (i >= retries) { + if (RLCG_ERROR_REPORT_ENABLED(adev)) { + if (tmp & RLCG_VFGATE_DISABLED) + pr_err("The vfgate is disabled, program reg:0x%05x failed!\n", offset); + else if (tmp & RLCG_WRONG_OPERATION_TYPE) + pr_err("Wrong operation type, program reg:0x%05x failed!\n", offset); + else if (tmp & RLCG_NOT_IN_RANGE) + pr_err("The register is not in range, program reg:0x%05x failed!\n", offset); + else + pr_err("Unknown error type, program reg:0x%05x failed!\n", offset); + } else + pr_err("timeout: rlcg program reg:0x%05x failed!\n", offset); + } } ret = readl(scratch_reg0); @@ -1516,36 +1589,34 @@ static u32 gfx_v10_rlcg_rw(struct amdgpu_device *adev, u32 offset, u32 v, uint32 return ret; } -static void gfx_v10_rlcg_wreg(struct amdgpu_device *adev, u32 offset, u32 value, u32 flag) +static void gfx_v10_sriov_wreg(struct amdgpu_device *adev, u32 offset, u32 value, u32 acc_flags, u32 hwip) { - uint32_t rlcg_flag; + u32 rlcg_flag; - if (amdgpu_sriov_fullaccess(adev) && - gfx_v10_is_rlcg_rw(adev, offset, &rlcg_flag, 1)) { + if (!amdgpu_sriov_runtime(adev) && + gfx_v10_get_rlcg_flag(adev, acc_flags, hwip, 1, &rlcg_flag)) { gfx_v10_rlcg_rw(adev, offset, value, rlcg_flag); - return; } - if (flag & AMDGPU_REGS_NO_KIQ) + + if (acc_flags & AMDGPU_REGS_NO_KIQ) WREG32_NO_KIQ(offset, value); else WREG32(offset, value); } -static u32 gfx_v10_rlcg_rreg(struct amdgpu_device *adev, u32 offset, u32 flag) +static u32 gfx_v10_sriov_rreg(struct amdgpu_device *adev, u32 offset, u32 acc_flags, u32 hwip) { - uint32_t rlcg_flag; + u32 rlcg_flag; - if (amdgpu_sriov_fullaccess(adev) && - gfx_v10_is_rlcg_rw(adev, offset, &rlcg_flag, 0)) + if (!amdgpu_sriov_runtime(adev) && + gfx_v10_get_rlcg_flag(adev, acc_flags, hwip, 0, &rlcg_flag)) return gfx_v10_rlcg_rw(adev, offset, 0, rlcg_flag); - if (flag & AMDGPU_REGS_NO_KIQ) + if (acc_flags & AMDGPU_REGS_NO_KIQ) return RREG32_NO_KIQ(offset); else return RREG32(offset); - - return 0; } static const struct soc15_reg_golden golden_settings_gc_10_1_nv14[] = @@ -3280,6 +3351,7 @@ static const struct soc15_reg_golden golden_settings_gc_10_3[] = SOC15_REG_GOLDEN_VALUE(GC, 0, mmSQ_PERFCOUNTER7_SELECT, 0xf0f001ff, 0x00000000), SOC15_REG_GOLDEN_VALUE(GC, 0, mmSQ_PERFCOUNTER8_SELECT, 0xf0f001ff, 0x00000000), SOC15_REG_GOLDEN_VALUE(GC, 0, mmSQ_PERFCOUNTER9_SELECT, 0xf0f001ff, 0x00000000), + SOC15_REG_GOLDEN_VALUE(GC, 0, mmSX_DEBUG_1, 0x00010000, 0x00010020), SOC15_REG_GOLDEN_VALUE(GC, 0, mmTA_CNTL_AUX, 0xfff7ffff, 0x01030000), SOC15_REG_GOLDEN_VALUE(GC, 0, mmUTCL1_CTRL, 0xffbfffff, 0x00a00000) }; @@ -3359,6 +3431,7 @@ static const struct soc15_reg_golden golden_settings_gc_10_3_vangogh[] = SOC15_REG_GOLDEN_VALUE(GC, 0, mmPA_SC_ENHANCE_2, 0xffffffbf, 0x00000020), SOC15_REG_GOLDEN_VALUE(GC, 0, mmSPI_CONFIG_CNTL_1_Vangogh, 0xffffffff, 0x00070103), SOC15_REG_GOLDEN_VALUE(GC, 0, mmSQG_CONFIG, 0x000017ff, 0x00001000), + SOC15_REG_GOLDEN_VALUE(GC, 0, mmSX_DEBUG_1, 0x00010000, 0x00010020), SOC15_REG_GOLDEN_VALUE(GC, 0, mmTA_CNTL_AUX, 0xfff7ffff, 0x01030000), SOC15_REG_GOLDEN_VALUE(GC, 0, mmUTCL1_CTRL, 0xffffffff, 0x00400000), SOC15_REG_GOLDEN_VALUE(GC, 0, mmVGT_GS_MAX_WAVE_ID, 0x00000fff, 0x000000ff), @@ -3367,6 +3440,30 @@ static const struct soc15_reg_golden golden_settings_gc_10_3_vangogh[] = SOC15_REG_GOLDEN_VALUE(GC, 0, mmLDS_CONFIG, 0x00000020, 0x00000020), }; +static const struct soc15_reg_golden golden_settings_gc_10_3_3[] = +{ + SOC15_REG_GOLDEN_VALUE(GC, 0, mmCGTT_SPI_CS_CLK_CTRL, 0xff7f0fff, 0x78000100), + SOC15_REG_GOLDEN_VALUE(GC, 0, mmCH_PIPE_STEER, 0x000000ff, 0x000000e4), + SOC15_REG_GOLDEN_VALUE(GC, 0, mmCPF_GCR_CNTL, 0x0007ffff, 0x0000c200), + SOC15_REG_GOLDEN_VALUE(GC, 0, mmDB_DEBUG3, 0xffffffff, 0x00000280), + SOC15_REG_GOLDEN_VALUE(GC, 0, mmDB_DEBUG4, 0xffffffff, 0x00800000), + SOC15_REG_GOLDEN_VALUE(GC, 0, mmGB_ADDR_CONFIG, 0x0c1807ff, 0x00000242), + SOC15_REG_GOLDEN_VALUE(GC, 0, mmGCR_GENERAL_CNTL, 0x1ff1ffff, 0x00000500), + SOC15_REG_GOLDEN_VALUE(GC, 0, mmGL1_PIPE_STEER, 0x000000ff, 0x000000e4), + SOC15_REG_GOLDEN_VALUE(GC, 0, mmGL2_PIPE_STEER_0, 0x77777777, 0x32103210), + SOC15_REG_GOLDEN_VALUE(GC, 0, mmGL2_PIPE_STEER_1, 0x77777777, 0x32103210), + SOC15_REG_GOLDEN_VALUE(GC, 0, mmGL2A_ADDR_MATCH_MASK, 0xffffffff, 0xfffffff3), + SOC15_REG_GOLDEN_VALUE(GC, 0, mmGL2C_ADDR_MATCH_MASK, 0xffffffff, 0xfffffff3), + SOC15_REG_GOLDEN_VALUE(GC, 0, mmGL2C_CM_CTRL1, 0xff8fff0f, 0x580f1008), + SOC15_REG_GOLDEN_VALUE(GC, 0, mmGL2C_CTRL3, 0xf7ffffff, 0x00f80988), + SOC15_REG_GOLDEN_VALUE(GC, 0, mmLDS_CONFIG, 0x000001ff, 0x00000020), + SOC15_REG_GOLDEN_VALUE(GC, 0, mmPA_CL_ENHANCE, 0xf17fffff, 0x01200007), + SOC15_REG_GOLDEN_VALUE(GC, 0, mmPA_SC_BINNER_TIMEOUT_COUNTER, 0xffffffff, 0x00000800), + SOC15_REG_GOLDEN_VALUE(GC, 0, mmPA_SC_ENHANCE_2, 0xffffffbf, 0x00000820), + SOC15_REG_GOLDEN_VALUE(GC, 0, mmTA_CNTL_AUX, 0xfff7ffff, 0x01030000), + SOC15_REG_GOLDEN_VALUE(GC, 0, mmUTCL1_CTRL, 0xffffffff, 0x00100000) +}; + static const struct soc15_reg_golden golden_settings_gc_10_3_4[] = { SOC15_REG_GOLDEN_VALUE(GC, 0, mmCGTT_SPI_CS_CLK_CTRL, 0x78000000, 0x78000100), @@ -3401,17 +3498,92 @@ static const struct soc15_reg_golden golden_settings_gc_10_3_4[] = SOC15_REG_GOLDEN_VALUE(GC, 0, mmSQ_PERFCOUNTER7_SELECT, 0xf0f001ff, 0x00000000), SOC15_REG_GOLDEN_VALUE(GC, 0, mmSQ_PERFCOUNTER8_SELECT, 0xf0f001ff, 0x00000000), SOC15_REG_GOLDEN_VALUE(GC, 0, mmSQ_PERFCOUNTER9_SELECT, 0xf0f001ff, 0x00000000), + SOC15_REG_GOLDEN_VALUE(GC, 0, mmSX_DEBUG_1, 0x00010000, 0x00010020), SOC15_REG_GOLDEN_VALUE(GC, 0, mmTA_CNTL_AUX, 0x01030000, 0x01030000), SOC15_REG_GOLDEN_VALUE(GC, 0, mmUTCL1_CTRL, 0x03a00000, 0x00a00000), SOC15_REG_GOLDEN_VALUE(GC, 0, mmLDS_CONFIG, 0x00000020, 0x00000020) }; +static const struct soc15_reg_golden golden_settings_gc_10_3_5[] = { + SOC15_REG_GOLDEN_VALUE(GC, 0, mmCGTT_SPI_CS_CLK_CTRL, 0x78000000, 0x78000100), + SOC15_REG_GOLDEN_VALUE(GC, 0, mmCGTT_SPI_RA0_CLK_CTRL, 0xb0000ff0, 0x30000100), + SOC15_REG_GOLDEN_VALUE(GC, 0, mmCGTT_SPI_RA1_CLK_CTRL, 0xff000000, 0x7e000100), + SOC15_REG_GOLDEN_VALUE(GC, 0, mmCPF_GCR_CNTL, 0x0007ffff, 0x0000c000), + SOC15_REG_GOLDEN_VALUE(GC, 0, mmDB_DEBUG3, 0xffffffff, 0x00000280), + SOC15_REG_GOLDEN_VALUE(GC, 0, mmDB_DEBUG4, 0xffffffff, 0x00800000), + SOC15_REG_GOLDEN_VALUE(GC, 0, mmGCR_GENERAL_CNTL_Sienna_Cichlid, 0x1ff1ffff, 0x00000500), + SOC15_REG_GOLDEN_VALUE(GC, 0, mmGL2A_ADDR_MATCH_MASK, 0xffffffff, 0xffffffcf), + SOC15_REG_GOLDEN_VALUE(GC, 0, mmGL2C_ADDR_MATCH_MASK, 0xffffffff, 0xffffffcf), + SOC15_REG_GOLDEN_VALUE(GC, 0, mmGL2C_CM_CTRL1, 0xff8fff0f, 0x580f1008), + SOC15_REG_GOLDEN_VALUE(GC, 0, mmGL2C_CTRL3, 0xf7ffffff, 0x00f80988), + SOC15_REG_GOLDEN_VALUE(GC, 0, mmLDS_CONFIG, 0x000001ff, 0x00000020), + SOC15_REG_GOLDEN_VALUE(GC, 0, mmPA_CL_ENHANCE, 0xf17fffff, 0x01200007), + SOC15_REG_GOLDEN_VALUE(GC, 0, mmSQ_CONFIG, 0xe07df47f, 0x00180070), + SOC15_REG_GOLDEN_VALUE(GC, 0, mmSQ_PERFCOUNTER0_SELECT, 0xf0f001ff, 0x00000000), + SOC15_REG_GOLDEN_VALUE(GC, 0, mmSQ_PERFCOUNTER1_SELECT, 0xf0f001ff, 0x00000000), + SOC15_REG_GOLDEN_VALUE(GC, 0, mmSQ_PERFCOUNTER10_SELECT, 0xf0f001ff, 0x00000000), + SOC15_REG_GOLDEN_VALUE(GC, 0, mmSQ_PERFCOUNTER11_SELECT, 0xf0f001ff, 0x00000000), + SOC15_REG_GOLDEN_VALUE(GC, 0, mmSQ_PERFCOUNTER12_SELECT, 0xf0f001ff, 0x00000000), + SOC15_REG_GOLDEN_VALUE(GC, 0, mmSQ_PERFCOUNTER13_SELECT, 0xf0f001ff, 0x00000000), + SOC15_REG_GOLDEN_VALUE(GC, 0, mmSQ_PERFCOUNTER14_SELECT, 0xf0f001ff, 0x00000000), + SOC15_REG_GOLDEN_VALUE(GC, 0, mmSQ_PERFCOUNTER15_SELECT, 0xf0f001ff, 0x00000000), + SOC15_REG_GOLDEN_VALUE(GC, 0, mmSQ_PERFCOUNTER2_SELECT, 0xf0f001ff, 0x00000000), + SOC15_REG_GOLDEN_VALUE(GC, 0, mmSQ_PERFCOUNTER3_SELECT, 0xf0f001ff, 0x00000000), + SOC15_REG_GOLDEN_VALUE(GC, 0, mmSQ_PERFCOUNTER4_SELECT, 0xf0f001ff, 0x00000000), + SOC15_REG_GOLDEN_VALUE(GC, 0, mmSQ_PERFCOUNTER5_SELECT, 0xf0f001ff, 0x00000000), + SOC15_REG_GOLDEN_VALUE(GC, 0, mmSQ_PERFCOUNTER6_SELECT, 0xf0f001ff, 0x00000000), + SOC15_REG_GOLDEN_VALUE(GC, 0, mmSQ_PERFCOUNTER7_SELECT, 0xf0f001ff, 0x00000000), + SOC15_REG_GOLDEN_VALUE(GC, 0, mmSQ_PERFCOUNTER8_SELECT, 0xf0f001ff, 0x00000000), + SOC15_REG_GOLDEN_VALUE(GC, 0, mmSQ_PERFCOUNTER9_SELECT, 0xf0f001ff, 0x00000000), + SOC15_REG_GOLDEN_VALUE(GC, 0, mmTA_CNTL_AUX,0xfff7ffff, 0x01030000), + SOC15_REG_GOLDEN_VALUE(GC, 0, mmUTCL1_CTRL, 0xffbfffff, 0x00a00000) +}; + +static const struct soc15_reg_golden golden_settings_gc_10_0_cyan_skillfish[] = { + SOC15_REG_GOLDEN_VALUE(GC, 0, mmGRBM_GFX_INDEX, 0xffffffff, 0xe0000000), + SOC15_REG_GOLDEN_VALUE(GC, 0, mmGE_FAST_CLKS, 0x3fffffff, 0x0000493e), + SOC15_REG_GOLDEN_VALUE(GC, 0, mmCGTT_CPF_CLK_CTRL, 0xfcff8fff, 0xf8000100), + SOC15_REG_GOLDEN_VALUE(GC, 0, mmCGTT_SPI_CLK_CTRL, 0xff7f0fff, 0x3c000100), + SOC15_REG_GOLDEN_VALUE(GC, 0, mmCB_HW_CONTROL_3, 0xa0000000, 0xa0000000), + SOC15_REG_GOLDEN_VALUE(GC, 0, mmCB_HW_CONTROL_4, 0x00008000, 0x003c8014), + SOC15_REG_GOLDEN_VALUE(GC, 0, mmCH_DRAM_BURST_CTRL, 0x00000010, 0x00000017), + SOC15_REG_GOLDEN_VALUE(GC, 0, mmCH_PIPE_STEER, 0xffffffff, 0xd8d8d8d8), + SOC15_REG_GOLDEN_VALUE(GC, 0, mmCH_VC5_ENABLE, 0x00000003, 0x00000003), + SOC15_REG_GOLDEN_VALUE(GC, 0, mmCP_SD_CNTL, 0x800007ff, 0x000005ff), + SOC15_REG_GOLDEN_VALUE(GC, 0, mmDB_DEBUG, 0xffffffff, 0x20000000), + SOC15_REG_GOLDEN_VALUE(GC, 0, mmDB_DEBUG3, 0xffffffff, 0x00000200), + SOC15_REG_GOLDEN_VALUE(GC, 0, mmDB_DEBUG4, 0xffffffff, 0x04800000), + SOC15_REG_GOLDEN_VALUE(GC, 0, mmDB_LAST_OF_BURST_CONFIG, 0xffffffff, 0x03860210), + SOC15_REG_GOLDEN_VALUE(GC, 0, mmGB_ADDR_CONFIG, 0x0c1800ff, 0x00000044), + SOC15_REG_GOLDEN_VALUE(GC, 0, mmGCR_GENERAL_CNTL, 0x00009d00, 0x00008500), + SOC15_REG_GOLDEN_VALUE(GC, 0, mmGCMC_VM_CACHEABLE_DRAM_ADDRESS_END, 0xffffffff, 0x000fffff), + SOC15_REG_GOLDEN_VALUE(GC, 0, mmGL1_DRAM_BURST_CTRL, 0x00000010, 0x00000017), + SOC15_REG_GOLDEN_VALUE(GC, 0, mmGL1_PIPE_STEER, 0xfcfcfcfc, 0xd8d8d8d8), + SOC15_REG_GOLDEN_VALUE(GC, 0, mmGL2_PIPE_STEER_0, 0x77707770, 0x21302130), + SOC15_REG_GOLDEN_VALUE(GC, 0, mmGL2_PIPE_STEER_1, 0x77707770, 0x21302130), + SOC15_REG_GOLDEN_VALUE(GC, 0, mmGL2A_ADDR_MATCH_MASK, 0xffffffff, 0xffffffcf), + SOC15_REG_GOLDEN_VALUE(GC, 0, mmGL2C_ADDR_MATCH_MASK, 0xffffffff, 0xffffffcf), + SOC15_REG_GOLDEN_VALUE(GC, 0, mmGL2C_CGTT_SCLK_CTRL, 0x10000000, 0x10000100), + SOC15_REG_GOLDEN_VALUE(GC, 0, mmGL2C_CTRL2, 0xfc02002f, 0x9402002f), + SOC15_REG_GOLDEN_VALUE(GC, 0, mmGL2C_CTRL3, 0x00002188, 0x00000188), + SOC15_REG_GOLDEN_VALUE(GC, 0, mmPA_SC_ENHANCE, 0x08000009, 0x08000009), + SOC15_REG_GOLDEN_VALUE(GC, 0, mmPA_SC_BINNER_EVENT_CNTL_0, 0xcc3fcc03, 0x842a4c02), + SOC15_REG_GOLDEN_VALUE(GC, 0, mmPA_SC_LINE_STIPPLE_STATE, 0x0000000f, 0x00000000), + SOC15_REG_GOLDEN_VALUE(GC, 0, mmRMI_SPARE, 0xffff3109, 0xffff3101), + SOC15_REG_GOLDEN_VALUE(GC, 0, mmSQ_ARB_CONFIG, 0x00000100, 0x00000130), + SOC15_REG_GOLDEN_VALUE(GC, 0, mmSQ_LDS_CLK_CTRL, 0xffffffff, 0xffffffff), + SOC15_REG_GOLDEN_VALUE(GC, 0, mmTA_CNTL_AUX, 0x00030008, 0x01030000), + SOC15_REG_GOLDEN_VALUE(GC, 0, mmUTCL1_CTRL, 0x00800000, 0x00800000) +}; + #define DEFAULT_SH_MEM_CONFIG \ ((SH_MEM_ADDRESS_MODE_64 << SH_MEM_CONFIG__ADDRESS_MODE__SHIFT) | \ (SH_MEM_ALIGNMENT_MODE_UNALIGNED << SH_MEM_CONFIG__ALIGNMENT_MODE__SHIFT) | \ (SH_MEM_RETRY_MODE_ALL << SH_MEM_CONFIG__RETRY_MODE__SHIFT) | \ (3 << SH_MEM_CONFIG__INITIAL_INST_PREFETCH__SHIFT)) +/* TODO: pending on golden setting value of gb address config */ +#define CYAN_SKILLFISH_GB_ADDR_CONFIG_GOLDEN 0x00100044 static void gfx_v10_0_set_ring_funcs(struct amdgpu_device *adev); static void gfx_v10_0_set_irq_funcs(struct amdgpu_device *adev); @@ -3621,11 +3793,29 @@ static void gfx_v10_0_init_golden_registers(struct amdgpu_device *adev) golden_settings_gc_10_3_vangogh, (const u32)ARRAY_SIZE(golden_settings_gc_10_3_vangogh)); break; + case CHIP_YELLOW_CARP: + soc15_program_register_sequence(adev, + golden_settings_gc_10_3_3, + (const u32)ARRAY_SIZE(golden_settings_gc_10_3_3)); + break; case CHIP_DIMGREY_CAVEFISH: soc15_program_register_sequence(adev, golden_settings_gc_10_3_4, (const u32)ARRAY_SIZE(golden_settings_gc_10_3_4)); break; + case CHIP_BEIGE_GOBY: + soc15_program_register_sequence(adev, + golden_settings_gc_10_3_5, + (const u32)ARRAY_SIZE(golden_settings_gc_10_3_5)); + break; + case CHIP_CYAN_SKILLFISH: + soc15_program_register_sequence(adev, + golden_settings_gc_10_0, + (const u32)ARRAY_SIZE(golden_settings_gc_10_0)); + soc15_program_register_sequence(adev, + golden_settings_gc_10_0_cyan_skillfish, + (const u32)ARRAY_SIZE(golden_settings_gc_10_0_cyan_skillfish)); + break; default: break; } @@ -3799,6 +3989,7 @@ static void gfx_v10_0_check_fw_write_wait(struct amdgpu_device *adev) case CHIP_NAVI10: case CHIP_NAVI12: case CHIP_NAVI14: + case CHIP_CYAN_SKILLFISH: if ((adev->gfx.me_fw_version >= 0x00000046) && (adev->gfx.me_feature_version >= 27) && (adev->gfx.pfp_fw_version >= 0x00000068) && @@ -3811,6 +4002,8 @@ static void gfx_v10_0_check_fw_write_wait(struct amdgpu_device *adev) case CHIP_NAVY_FLOUNDER: case CHIP_VANGOGH: case CHIP_DIMGREY_CAVEFISH: + case CHIP_BEIGE_GOBY: + case CHIP_YELLOW_CARP: adev->gfx.cp_fw_write_wait = true; break; default: @@ -3887,7 +4080,7 @@ static int gfx_v10_0_init_microcode(struct amdgpu_device *adev) { const char *chip_name; char fw_name[40]; - char wks[10]; + char *wks = ""; int err; struct amdgpu_firmware_info *info = NULL; const struct common_firmware_header *header = NULL; @@ -3900,7 +4093,6 @@ static int gfx_v10_0_init_microcode(struct amdgpu_device *adev) DRM_DEBUG("\n"); - memset(wks, 0, sizeof(wks)); switch (adev->asic_type) { case CHIP_NAVI10: chip_name = "navi10"; @@ -3909,7 +4101,7 @@ static int gfx_v10_0_init_microcode(struct amdgpu_device *adev) chip_name = "navi14"; if (!(adev->pdev->device == 0x7340 && adev->pdev->revision != 0x00)) - snprintf(wks, sizeof(wks), "_wks"); + wks = "_wks"; break; case CHIP_NAVI12: chip_name = "navi12"; @@ -3926,6 +4118,18 @@ static int gfx_v10_0_init_microcode(struct amdgpu_device *adev) case CHIP_DIMGREY_CAVEFISH: chip_name = "dimgrey_cavefish"; break; + case CHIP_BEIGE_GOBY: + chip_name = "beige_goby"; + break; + case CHIP_YELLOW_CARP: + chip_name = "yellow_carp"; + break; + case CHIP_CYAN_SKILLFISH: + if (adev->apu_flags & AMD_APU_IS_CYAN_SKILLFISH2) + chip_name = "cyan_skillfish2"; + else + chip_name = "cyan_skillfish"; + break; default: BUG(); } @@ -4417,6 +4621,7 @@ static void gfx_v10_0_read_wave_data(struct amdgpu_device *adev, uint32_t simd, dst[(*no_fields)++] = wave_read_ind(adev, wave, ixSQ_WAVE_IB_STS2); dst[(*no_fields)++] = wave_read_ind(adev, wave, ixSQ_WAVE_IB_DBG1); dst[(*no_fields)++] = wave_read_ind(adev, wave, ixSQ_WAVE_M0); + dst[(*no_fields)++] = wave_read_ind(adev, wave, ixSQ_WAVE_MODE); } static void gfx_v10_0_read_wave_sgprs(struct amdgpu_device *adev, uint32_t simd, @@ -4494,6 +4699,8 @@ static void gfx_v10_0_gpu_early_init(struct amdgpu_device *adev) case CHIP_NAVY_FLOUNDER: case CHIP_VANGOGH: case CHIP_DIMGREY_CAVEFISH: + case CHIP_BEIGE_GOBY: + case CHIP_YELLOW_CARP: adev->gfx.config.max_hw_contexts = 8; adev->gfx.config.sc_prim_fifo_size_frontend = 0x20; adev->gfx.config.sc_prim_fifo_size_backend = 0x100; @@ -4503,6 +4710,14 @@ static void gfx_v10_0_gpu_early_init(struct amdgpu_device *adev) adev->gfx.config.gb_addr_config_fields.num_pkrs = 1 << REG_GET_FIELD(gb_addr_config, GB_ADDR_CONFIG, NUM_PKRS); break; + case CHIP_CYAN_SKILLFISH: + adev->gfx.config.max_hw_contexts = 8; + adev->gfx.config.sc_prim_fifo_size_frontend = 0x20; + adev->gfx.config.sc_prim_fifo_size_backend = 0x100; + adev->gfx.config.sc_hiz_tile_fifo_size = 0x30; + adev->gfx.config.sc_earlyz_tile_fifo_size = 0x4C0; + gb_addr_config = CYAN_SKILLFISH_GB_ADDR_CONFIG_GOLDEN; + break; default: BUG(); break; @@ -4607,6 +4822,7 @@ static int gfx_v10_0_sw_init(void *handle) case CHIP_NAVI10: case CHIP_NAVI14: case CHIP_NAVI12: + case CHIP_CYAN_SKILLFISH: adev->gfx.me.num_me = 1; adev->gfx.me.num_pipe_per_me = 1; adev->gfx.me.num_queue_per_pipe = 1; @@ -4618,6 +4834,8 @@ static int gfx_v10_0_sw_init(void *handle) case CHIP_NAVY_FLOUNDER: case CHIP_VANGOGH: case CHIP_DIMGREY_CAVEFISH: + case CHIP_BEIGE_GOBY: + case CHIP_YELLOW_CARP: adev->gfx.me.num_me = 1; adev->gfx.me.num_pipe_per_me = 1; adev->gfx.me.num_queue_per_pipe = 1; @@ -4850,7 +5068,8 @@ static void gfx_v10_0_setup_rb(struct amdgpu_device *adev) for (i = 0; i < adev->gfx.config.max_shader_engines; i++) { for (j = 0; j < adev->gfx.config.max_sh_per_se; j++) { bitmap = i * adev->gfx.config.max_sh_per_se + j; - if ((adev->asic_type == CHIP_SIENNA_CICHLID) && + if (((adev->asic_type == CHIP_SIENNA_CICHLID) || + (adev->asic_type == CHIP_YELLOW_CARP)) && ((gfx_v10_3_get_disabled_sa(adev) >> bitmap) & 1)) continue; gfx_v10_0_select_se_sh(adev, i, j, 0xffffffff); @@ -4985,47 +5204,44 @@ static void gfx_v10_0_tcp_harvest(struct amdgpu_device *adev) 4 + /* RMI */ 1); /* SQG */ - if (adev->asic_type == CHIP_NAVI10 || - adev->asic_type == CHIP_NAVI14 || - adev->asic_type == CHIP_NAVI12) { - mutex_lock(&adev->grbm_idx_mutex); - for (i = 0; i < adev->gfx.config.max_shader_engines; i++) { - for (j = 0; j < adev->gfx.config.max_sh_per_se; j++) { - gfx_v10_0_select_se_sh(adev, i, j, 0xffffffff); - wgp_active_bitmap = gfx_v10_0_get_wgp_active_bitmap_per_sh(adev); - /* - * Set corresponding TCP bits for the inactive WGPs in - * GCRD_SA_TARGETS_DISABLE - */ - gcrd_targets_disable_tcp = 0; - /* Set TCP & SQC bits in UTCL1_UTCL0_INVREQ_DISABLE */ - utcl_invreq_disable = 0; - - for (k = 0; k < max_wgp_per_sh; k++) { - if (!(wgp_active_bitmap & (1 << k))) { - gcrd_targets_disable_tcp |= 3 << (2 * k); - utcl_invreq_disable |= (3 << (2 * k)) | - (3 << (2 * (max_wgp_per_sh + k))); - } + mutex_lock(&adev->grbm_idx_mutex); + for (i = 0; i < adev->gfx.config.max_shader_engines; i++) { + for (j = 0; j < adev->gfx.config.max_sh_per_se; j++) { + gfx_v10_0_select_se_sh(adev, i, j, 0xffffffff); + wgp_active_bitmap = gfx_v10_0_get_wgp_active_bitmap_per_sh(adev); + /* + * Set corresponding TCP bits for the inactive WGPs in + * GCRD_SA_TARGETS_DISABLE + */ + gcrd_targets_disable_tcp = 0; + /* Set TCP & SQC bits in UTCL1_UTCL0_INVREQ_DISABLE */ + utcl_invreq_disable = 0; + + for (k = 0; k < max_wgp_per_sh; k++) { + if (!(wgp_active_bitmap & (1 << k))) { + gcrd_targets_disable_tcp |= 3 << (2 * k); + gcrd_targets_disable_tcp |= 1 << (k + (max_wgp_per_sh * 2)); + utcl_invreq_disable |= (3 << (2 * k)) | + (3 << (2 * (max_wgp_per_sh + k))); } - - tmp = RREG32_SOC15(GC, 0, mmUTCL1_UTCL0_INVREQ_DISABLE); - /* only override TCP & SQC bits */ - tmp &= 0xffffffff << (4 * max_wgp_per_sh); - tmp |= (utcl_invreq_disable & utcl_invreq_disable_mask); - WREG32_SOC15(GC, 0, mmUTCL1_UTCL0_INVREQ_DISABLE, tmp); - - tmp = RREG32_SOC15(GC, 0, mmGCRD_SA_TARGETS_DISABLE); - /* only override TCP bits */ - tmp &= 0xffffffff << (2 * max_wgp_per_sh); - tmp |= (gcrd_targets_disable_tcp & gcrd_targets_disable_mask); - WREG32_SOC15(GC, 0, mmGCRD_SA_TARGETS_DISABLE, tmp); } - } - gfx_v10_0_select_se_sh(adev, 0xffffffff, 0xffffffff, 0xffffffff); - mutex_unlock(&adev->grbm_idx_mutex); + tmp = RREG32_SOC15(GC, 0, mmUTCL1_UTCL0_INVREQ_DISABLE); + /* only override TCP & SQC bits */ + tmp &= (0xffffffffU << (4 * max_wgp_per_sh)); + tmp |= (utcl_invreq_disable & utcl_invreq_disable_mask); + WREG32_SOC15(GC, 0, mmUTCL1_UTCL0_INVREQ_DISABLE, tmp); + + tmp = RREG32_SOC15(GC, 0, mmGCRD_SA_TARGETS_DISABLE); + /* only override TCP & SQC bits */ + tmp &= (0xffffffffU << (3 * max_wgp_per_sh)); + tmp |= (gcrd_targets_disable_tcp & gcrd_targets_disable_mask); + WREG32_SOC15(GC, 0, mmGCRD_SA_TARGETS_DISABLE, tmp); + } } + + gfx_v10_0_select_se_sh(adev, 0xffffffff, 0xffffffff, 0xffffffff); + mutex_unlock(&adev->grbm_idx_mutex); } static void gfx_v10_0_get_tcc_info(struct amdgpu_device *adev) @@ -5180,10 +5396,10 @@ static void gfx_v10_0_rlc_enable_srm(struct amdgpu_device *adev) uint32_t tmp; /* enable Save Restore Machine */ - tmp = RREG32(SOC15_REG_OFFSET(GC, 0, mmRLC_SRM_CNTL)); + tmp = RREG32_SOC15(GC, 0, mmRLC_SRM_CNTL); tmp |= RLC_SRM_CNTL__AUTO_INCR_ADDR_MASK; tmp |= RLC_SRM_CNTL__SRM_ENABLE_MASK; - WREG32(SOC15_REG_OFFSET(GC, 0, mmRLC_SRM_CNTL), tmp); + WREG32_SOC15(GC, 0, mmRLC_SRM_CNTL, tmp); } static int gfx_v10_0_rlc_load_microcode(struct amdgpu_device *adev) @@ -5218,7 +5434,8 @@ static int gfx_v10_0_rlc_resume(struct amdgpu_device *adev) { int r; - if (adev->firmware.load_type == AMDGPU_FW_LOAD_PSP) { + if (adev->firmware.load_type == AMDGPU_FW_LOAD_PSP && + adev->psp.autoload_supported) { r = gfx_v10_0_wait_for_rlc_autoload_complete(adev); if (r) @@ -5278,7 +5495,7 @@ static int gfx_v10_0_parse_rlc_toc(struct amdgpu_device *adev) int ret; RLC_TABLE_OF_CONTENT *rlc_toc; - ret = amdgpu_bo_create_reserved(adev, adev->psp.toc_bin_size, PAGE_SIZE, + ret = amdgpu_bo_create_reserved(adev, adev->psp.toc.size_bytes, PAGE_SIZE, AMDGPU_GEM_DOMAIN_GTT, &adev->gfx.rlc.rlc_toc_bo, &adev->gfx.rlc.rlc_toc_gpu_addr, @@ -5289,7 +5506,7 @@ static int gfx_v10_0_parse_rlc_toc(struct amdgpu_device *adev) } /* Copy toc from psp sos fw to rlc toc buffer */ - memcpy(adev->gfx.rlc.rlc_toc_buf, adev->psp.toc_start_addr, adev->psp.toc_bin_size); + memcpy(adev->gfx.rlc.rlc_toc_buf, adev->psp.toc.start_addr, adev->psp.toc.size_bytes); rlc_toc = (RLC_TABLE_OF_CONTENT *)adev->gfx.rlc.rlc_toc_buf; while (rlc_toc && (rlc_toc->id > FIRMWARE_ID_INVALID) && @@ -6125,6 +6342,8 @@ static void gfx_v10_0_cp_gfx_set_doorbell(struct amdgpu_device *adev, case CHIP_NAVY_FLOUNDER: case CHIP_VANGOGH: case CHIP_DIMGREY_CAVEFISH: + case CHIP_BEIGE_GOBY: + case CHIP_YELLOW_CARP: tmp = REG_SET_FIELD(0, CP_RB_DOORBELL_RANGE_LOWER, DOORBELL_RANGE_LOWER_Sienna_Cichlid, ring->doorbell_index); WREG32_SOC15(GC, 0, mmCP_RB_DOORBELL_RANGE_LOWER, tmp); @@ -6260,6 +6479,8 @@ static void gfx_v10_0_cp_compute_enable(struct amdgpu_device *adev, bool enable) case CHIP_NAVY_FLOUNDER: case CHIP_VANGOGH: case CHIP_DIMGREY_CAVEFISH: + case CHIP_BEIGE_GOBY: + case CHIP_YELLOW_CARP: WREG32_SOC15(GC, 0, mmCP_MEC_CNTL_Sienna_Cichlid, 0); break; default: @@ -6272,6 +6493,8 @@ static void gfx_v10_0_cp_compute_enable(struct amdgpu_device *adev, bool enable) case CHIP_NAVY_FLOUNDER: case CHIP_VANGOGH: case CHIP_DIMGREY_CAVEFISH: + case CHIP_BEIGE_GOBY: + case CHIP_YELLOW_CARP: WREG32_SOC15(GC, 0, mmCP_MEC_CNTL_Sienna_Cichlid, (CP_MEC_CNTL__MEC_ME1_HALT_MASK | CP_MEC_CNTL__MEC_ME2_HALT_MASK)); @@ -6368,6 +6591,8 @@ static void gfx_v10_0_kiq_setting(struct amdgpu_ring *ring) case CHIP_NAVY_FLOUNDER: case CHIP_VANGOGH: case CHIP_DIMGREY_CAVEFISH: + case CHIP_BEIGE_GOBY: + case CHIP_YELLOW_CARP: tmp = RREG32_SOC15(GC, 0, mmRLC_CP_SCHEDULERS_Sienna_Cichlid); tmp &= 0xffffff00; tmp |= (ring->me << 5) | (ring->pipe << 3) | (ring->queue); @@ -7082,6 +7307,7 @@ static bool gfx_v10_0_check_grbm_cam_remapping(struct amdgpu_device *adev) case CHIP_SIENNA_CICHLID: case CHIP_NAVY_FLOUNDER: case CHIP_DIMGREY_CAVEFISH: + case CHIP_BEIGE_GOBY: data = RREG32_SOC15(GC, 0, mmVGT_ESGS_RING_SIZE_Sienna_Cichlid); WREG32_SOC15(GC, 0, mmVGT_ESGS_RING_SIZE_Sienna_Cichlid, 0); WREG32_SOC15(GC, 0, mmVGT_ESGS_RING_SIZE_UMD, pattern); @@ -7095,6 +7321,7 @@ static bool gfx_v10_0_check_grbm_cam_remapping(struct amdgpu_device *adev) } break; case CHIP_VANGOGH: + case CHIP_YELLOW_CARP: return true; default: data = RREG32_SOC15(GC, 0, mmVGT_ESGS_RING_SIZE); @@ -7116,6 +7343,9 @@ static void gfx_v10_0_setup_grbm_cam_remapping(struct amdgpu_device *adev) { uint32_t data; + if (amdgpu_sriov_vf(adev)) + return; + /* initialize cam_index to 0 * index will auto-inc after each data writting */ WREG32_SOC15(GC, 0, mmGRBM_CAM_INDEX, 0); @@ -7125,6 +7355,8 @@ static void gfx_v10_0_setup_grbm_cam_remapping(struct amdgpu_device *adev) case CHIP_NAVY_FLOUNDER: case CHIP_VANGOGH: case CHIP_DIMGREY_CAVEFISH: + case CHIP_BEIGE_GOBY: + case CHIP_YELLOW_CARP: /* mmVGT_TF_RING_SIZE_UMD -> mmVGT_TF_RING_SIZE */ data = (SOC15_REG_OFFSET(GC, 0, mmVGT_TF_RING_SIZE_UMD) << GRBM_CAM_DATA__CAM_ADDR__SHIFT) | @@ -7288,7 +7520,10 @@ static int gfx_v10_0_hw_init(void *handle) * init golden registers and rlc resume may override some registers, * reconfig them here */ - gfx_v10_0_tcp_harvest(adev); + if (adev->asic_type == CHIP_NAVI10 || + adev->asic_type == CHIP_NAVI14 || + adev->asic_type == CHIP_NAVI12) + gfx_v10_0_tcp_harvest(adev); r = gfx_v10_0_cp_resume(adev); if (r) @@ -7334,7 +7569,7 @@ static int gfx_v10_0_hw_fini(void *handle) amdgpu_irq_put(adev, &adev->gfx.priv_reg_irq, 0); amdgpu_irq_put(adev, &adev->gfx.priv_inst_irq, 0); - if (!adev->in_pci_err_recovery) { + if (!adev->no_hw_access) { #ifndef BRING_UP_DEBUG if (amdgpu_async_gfx_ring) { r = gfx_v10_0_kiq_disable_kgq(adev); @@ -7349,9 +7584,15 @@ static int gfx_v10_0_hw_fini(void *handle) if (amdgpu_sriov_vf(adev)) { gfx_v10_0_cp_gfx_enable(adev, false); /* Program KIQ position of RLC_CP_SCHEDULERS during destroy */ - tmp = RREG32_SOC15(GC, 0, mmRLC_CP_SCHEDULERS); - tmp &= 0xffffff00; - WREG32_SOC15(GC, 0, mmRLC_CP_SCHEDULERS, tmp); + if (adev->asic_type >= CHIP_SIENNA_CICHLID) { + tmp = RREG32_SOC15(GC, 0, mmRLC_CP_SCHEDULERS_Sienna_Cichlid); + tmp &= 0xffffff00; + WREG32_SOC15(GC, 0, mmRLC_CP_SCHEDULERS_Sienna_Cichlid, tmp); + } else { + tmp = RREG32_SOC15(GC, 0, mmRLC_CP_SCHEDULERS); + tmp &= 0xffffff00; + WREG32_SOC15(GC, 0, mmRLC_CP_SCHEDULERS, tmp); + } return 0; } @@ -7434,6 +7675,8 @@ static int gfx_v10_0_soft_reset(void *handle) case CHIP_NAVY_FLOUNDER: case CHIP_VANGOGH: case CHIP_DIMGREY_CAVEFISH: + case CHIP_BEIGE_GOBY: + case CHIP_YELLOW_CARP: if (REG_GET_FIELD(tmp, GRBM_STATUS2, RLC_BUSY_Sienna_Cichlid)) grbm_soft_reset = REG_SET_FIELD(grbm_soft_reset, GRBM_SOFT_RESET, @@ -7481,22 +7724,30 @@ static int gfx_v10_0_soft_reset(void *handle) static uint64_t gfx_v10_0_get_gpu_clock_counter(struct amdgpu_device *adev) { - uint64_t clock; + uint64_t clock, clock_lo, clock_hi, hi_check; - amdgpu_gfx_off_ctrl(adev, false); - mutex_lock(&adev->gfx.gpu_clock_mutex); switch (adev->asic_type) { case CHIP_VANGOGH: + case CHIP_YELLOW_CARP: clock = (uint64_t)RREG32_SOC15(SMUIO, 0, mmGOLDEN_TSC_COUNT_LOWER_Vangogh) | ((uint64_t)RREG32_SOC15(SMUIO, 0, mmGOLDEN_TSC_COUNT_UPPER_Vangogh) << 32ULL); break; default: - clock = (uint64_t)RREG32_SOC15(SMUIO, 0, mmGOLDEN_TSC_COUNT_LOWER) | - ((uint64_t)RREG32_SOC15(SMUIO, 0, mmGOLDEN_TSC_COUNT_UPPER) << 32ULL); + preempt_disable(); + clock_hi = RREG32_SOC15_NO_KIQ(SMUIO, 0, mmGOLDEN_TSC_COUNT_UPPER); + clock_lo = RREG32_SOC15_NO_KIQ(SMUIO, 0, mmGOLDEN_TSC_COUNT_LOWER); + hi_check = RREG32_SOC15_NO_KIQ(SMUIO, 0, mmGOLDEN_TSC_COUNT_UPPER); + /* The SMUIO TSC clock frequency is 100MHz, which sets 32-bit carry over + * roughly every 42 seconds. + */ + if (hi_check != clock_hi) { + clock_lo = RREG32_SOC15_NO_KIQ(SMUIO, 0, mmGOLDEN_TSC_COUNT_LOWER); + clock_hi = hi_check; + } + preempt_enable(); + clock = clock_lo | (clock_hi << 32ULL); break; } - mutex_unlock(&adev->gfx.gpu_clock_mutex); - amdgpu_gfx_off_ctrl(adev, true); return clock; } @@ -7537,12 +7788,15 @@ static int gfx_v10_0_early_init(void *handle) case CHIP_NAVI10: case CHIP_NAVI14: case CHIP_NAVI12: + case CHIP_CYAN_SKILLFISH: adev->gfx.num_gfx_rings = GFX10_NUM_GFX_RINGS_NV1X; break; case CHIP_SIENNA_CICHLID: case CHIP_NAVY_FLOUNDER: case CHIP_VANGOGH: case CHIP_DIMGREY_CAVEFISH: + case CHIP_BEIGE_GOBY: + case CHIP_YELLOW_CARP: adev->gfx.num_gfx_rings = GFX10_NUM_GFX_RINGS_Sienna_Cichlid; break; default: @@ -7599,6 +7853,8 @@ static void gfx_v10_0_set_safe_mode(struct amdgpu_device *adev) case CHIP_NAVY_FLOUNDER: case CHIP_VANGOGH: case CHIP_DIMGREY_CAVEFISH: + case CHIP_BEIGE_GOBY: + case CHIP_YELLOW_CARP: WREG32_SOC15(GC, 0, mmRLC_SAFE_MODE_Sienna_Cichlid, data); /* wait for RLC_SAFE_MODE */ @@ -7633,6 +7889,8 @@ static void gfx_v10_0_unset_safe_mode(struct amdgpu_device *adev) case CHIP_NAVY_FLOUNDER: case CHIP_VANGOGH: case CHIP_DIMGREY_CAVEFISH: + case CHIP_BEIGE_GOBY: + case CHIP_YELLOW_CARP: WREG32_SOC15(GC, 0, mmRLC_SAFE_MODE_Sienna_Cichlid, data); break; default: @@ -7646,6 +7904,9 @@ static void gfx_v10_0_update_medium_grain_clock_gating(struct amdgpu_device *ade { uint32_t data, def; + if (!(adev->cg_flags & (AMD_CG_SUPPORT_GFX_MGCG | AMD_CG_SUPPORT_GFX_MGLS))) + return; + /* It is disabled by HW by default */ if (enable && (adev->cg_flags & AMD_CG_SUPPORT_GFX_MGCG)) { /* 0 - Disable some blocks' MGCG */ @@ -7660,6 +7921,7 @@ static void gfx_v10_0_update_medium_grain_clock_gating(struct amdgpu_device *ade RLC_CGTT_MGCG_OVERRIDE__RLC_CGTT_SCLK_OVERRIDE_MASK | RLC_CGTT_MGCG_OVERRIDE__GFXIP_MGCG_OVERRIDE_MASK | RLC_CGTT_MGCG_OVERRIDE__GFXIP_MGLS_OVERRIDE_MASK | + RLC_CGTT_MGCG_OVERRIDE__GFXIP_FGCG_OVERRIDE_MASK | RLC_CGTT_MGCG_OVERRIDE__ENABLE_CGTS_LEGACY_MASK); if (def != data) @@ -7682,13 +7944,15 @@ static void gfx_v10_0_update_medium_grain_clock_gating(struct amdgpu_device *ade WREG32_SOC15(GC, 0, mmCP_MEM_SLP_CNTL, data); } } - } else { + } else if (!enable || !(adev->cg_flags & AMD_CG_SUPPORT_GFX_MGCG)) { /* 1 - MGCG_OVERRIDE */ def = data = RREG32_SOC15(GC, 0, mmRLC_CGTT_MGCG_OVERRIDE); data |= (RLC_CGTT_MGCG_OVERRIDE__RLC_CGTT_SCLK_OVERRIDE_MASK | RLC_CGTT_MGCG_OVERRIDE__GRBM_CGTT_SCLK_OVERRIDE_MASK | RLC_CGTT_MGCG_OVERRIDE__GFXIP_MGCG_OVERRIDE_MASK | - RLC_CGTT_MGCG_OVERRIDE__GFXIP_MGLS_OVERRIDE_MASK); + RLC_CGTT_MGCG_OVERRIDE__GFXIP_MGLS_OVERRIDE_MASK | + RLC_CGTT_MGCG_OVERRIDE__GFXIP_FGCG_OVERRIDE_MASK | + RLC_CGTT_MGCG_OVERRIDE__ENABLE_CGTS_LEGACY_MASK); if (def != data) WREG32_SOC15(GC, 0, mmRLC_CGTT_MGCG_OVERRIDE, data); @@ -7714,22 +7978,34 @@ static void gfx_v10_0_update_3d_clock_gating(struct amdgpu_device *adev, { uint32_t data, def; + if (!(adev->cg_flags & (AMD_CG_SUPPORT_GFX_3D_CGCG | AMD_CG_SUPPORT_GFX_3D_CGLS))) + return; + /* Enable 3D CGCG/CGLS */ - if (enable && (adev->cg_flags & AMD_CG_SUPPORT_GFX_3D_CGCG)) { + if (enable) { /* write cmd to clear cgcg/cgls ov */ def = data = RREG32_SOC15(GC, 0, mmRLC_CGTT_MGCG_OVERRIDE); + /* unset CGCG override */ - data &= ~RLC_CGTT_MGCG_OVERRIDE__GFXIP_GFX3D_CG_OVERRIDE_MASK; + if (adev->cg_flags & AMD_CG_SUPPORT_GFX_3D_CGCG) + data &= ~RLC_CGTT_MGCG_OVERRIDE__GFXIP_GFX3D_CG_OVERRIDE_MASK; + /* update CGCG and CGLS override bits */ if (def != data) WREG32_SOC15(GC, 0, mmRLC_CGTT_MGCG_OVERRIDE, data); + /* enable 3Dcgcg FSM(0x0000363f) */ def = RREG32_SOC15(GC, 0, mmRLC_CGCG_CGLS_CTRL_3D); - data = (0x36 << RLC_CGCG_CGLS_CTRL_3D__CGCG_GFX_IDLE_THRESHOLD__SHIFT) | - RLC_CGCG_CGLS_CTRL_3D__CGCG_EN_MASK; + data = 0; + + if (adev->cg_flags & AMD_CG_SUPPORT_GFX_3D_CGCG) + data = (0x36 << RLC_CGCG_CGLS_CTRL_3D__CGCG_GFX_IDLE_THRESHOLD__SHIFT) | + RLC_CGCG_CGLS_CTRL_3D__CGCG_EN_MASK; + if (adev->cg_flags & AMD_CG_SUPPORT_GFX_3D_CGLS) data |= (0x000F << RLC_CGCG_CGLS_CTRL_3D__CGLS_REP_COMPANSAT_DELAY__SHIFT) | RLC_CGCG_CGLS_CTRL_3D__CGLS_EN_MASK; + if (def != data) WREG32_SOC15(GC, 0, mmRLC_CGCG_CGLS_CTRL_3D, data); @@ -7742,9 +8018,14 @@ static void gfx_v10_0_update_3d_clock_gating(struct amdgpu_device *adev, } else { /* Disable CGCG/CGLS */ def = data = RREG32_SOC15(GC, 0, mmRLC_CGCG_CGLS_CTRL_3D); + /* disable cgcg, cgls should be disabled */ - data &= ~(RLC_CGCG_CGLS_CTRL_3D__CGCG_EN_MASK | - RLC_CGCG_CGLS_CTRL_3D__CGLS_EN_MASK); + if (adev->cg_flags & AMD_CG_SUPPORT_GFX_3D_CGCG) + data &= ~RLC_CGCG_CGLS_CTRL_3D__CGCG_EN_MASK; + + if (adev->cg_flags & AMD_CG_SUPPORT_GFX_3D_CGLS) + data &= ~RLC_CGCG_CGLS_CTRL_3D__CGLS_EN_MASK; + /* disable cgcg and cgls in FSM */ if (def != data) WREG32_SOC15(GC, 0, mmRLC_CGCG_CGLS_CTRL_3D, data); @@ -7756,25 +8037,35 @@ static void gfx_v10_0_update_coarse_grain_clock_gating(struct amdgpu_device *ade { uint32_t def, data; - if (enable && (adev->cg_flags & AMD_CG_SUPPORT_GFX_CGCG)) { + if (!(adev->cg_flags & (AMD_CG_SUPPORT_GFX_CGCG | AMD_CG_SUPPORT_GFX_CGLS))) + return; + + if (enable) { def = data = RREG32_SOC15(GC, 0, mmRLC_CGTT_MGCG_OVERRIDE); + /* unset CGCG override */ - data &= ~RLC_CGTT_MGCG_OVERRIDE__GFXIP_CGCG_OVERRIDE_MASK; + if (adev->cg_flags & AMD_CG_SUPPORT_GFX_CGCG) + data &= ~RLC_CGTT_MGCG_OVERRIDE__GFXIP_CGCG_OVERRIDE_MASK; + if (adev->cg_flags & AMD_CG_SUPPORT_GFX_CGLS) data &= ~RLC_CGTT_MGCG_OVERRIDE__GFXIP_CGLS_OVERRIDE_MASK; - else - data |= RLC_CGTT_MGCG_OVERRIDE__GFXIP_CGLS_OVERRIDE_MASK; + /* update CGCG and CGLS override bits */ if (def != data) WREG32_SOC15(GC, 0, mmRLC_CGTT_MGCG_OVERRIDE, data); /* enable cgcg FSM(0x0000363F) */ def = RREG32_SOC15(GC, 0, mmRLC_CGCG_CGLS_CTRL); - data = (0x36 << RLC_CGCG_CGLS_CTRL__CGCG_GFX_IDLE_THRESHOLD__SHIFT) | - RLC_CGCG_CGLS_CTRL__CGCG_EN_MASK; + data = 0; + + if (adev->cg_flags & AMD_CG_SUPPORT_GFX_CGCG) + data = (0x36 << RLC_CGCG_CGLS_CTRL__CGCG_GFX_IDLE_THRESHOLD__SHIFT) | + RLC_CGCG_CGLS_CTRL__CGCG_EN_MASK; + if (adev->cg_flags & AMD_CG_SUPPORT_GFX_CGLS) data |= (0x000F << RLC_CGCG_CGLS_CTRL__CGLS_REP_COMPANSAT_DELAY__SHIFT) | RLC_CGCG_CGLS_CTRL__CGLS_EN_MASK; + if (def != data) WREG32_SOC15(GC, 0, mmRLC_CGCG_CGLS_CTRL, data); @@ -7786,8 +8077,14 @@ static void gfx_v10_0_update_coarse_grain_clock_gating(struct amdgpu_device *ade WREG32_SOC15(GC, 0, mmCP_RB_WPTR_POLL_CNTL, data); } else { def = data = RREG32_SOC15(GC, 0, mmRLC_CGCG_CGLS_CTRL); + /* reset CGCG/CGLS bits */ - data &= ~(RLC_CGCG_CGLS_CTRL__CGCG_EN_MASK | RLC_CGCG_CGLS_CTRL__CGLS_EN_MASK); + if (adev->cg_flags & AMD_CG_SUPPORT_GFX_CGCG) + data &= ~RLC_CGCG_CGLS_CTRL__CGCG_EN_MASK; + + if (adev->cg_flags & AMD_CG_SUPPORT_GFX_CGLS) + data &= ~RLC_CGCG_CGLS_CTRL__CGLS_EN_MASK; + /* disable cgcg and cgls in FSM */ if (def != data) WREG32_SOC15(GC, 0, mmRLC_CGCG_CGLS_CTRL, data); @@ -7799,7 +8096,10 @@ static void gfx_v10_0_update_fine_grain_clock_gating(struct amdgpu_device *adev, { uint32_t def, data; - if (enable && (adev->cg_flags & AMD_CG_SUPPORT_GFX_FGCG)) { + if (!(adev->cg_flags & AMD_CG_SUPPORT_GFX_FGCG)) + return; + + if (enable) { def = data = RREG32_SOC15(GC, 0, mmRLC_CGTT_MGCG_OVERRIDE); /* unset FGCG override */ data &= ~RLC_CGTT_MGCG_OVERRIDE__GFXIP_FGCG_OVERRIDE_MASK; @@ -7830,6 +8130,97 @@ static void gfx_v10_0_update_fine_grain_clock_gating(struct amdgpu_device *adev, } } +static void gfx_v10_0_apply_medium_grain_clock_gating_workaround(struct amdgpu_device *adev) +{ + uint32_t reg_data = 0; + uint32_t reg_idx = 0; + uint32_t i; + + const uint32_t tcp_ctrl_regs[] = { + mmCGTS_SA0_WGP00_CU0_TCP_CTRL_REG, + mmCGTS_SA0_WGP00_CU1_TCP_CTRL_REG, + mmCGTS_SA0_WGP01_CU0_TCP_CTRL_REG, + mmCGTS_SA0_WGP01_CU1_TCP_CTRL_REG, + mmCGTS_SA0_WGP02_CU0_TCP_CTRL_REG, + mmCGTS_SA0_WGP02_CU1_TCP_CTRL_REG, + mmCGTS_SA0_WGP10_CU0_TCP_CTRL_REG, + mmCGTS_SA0_WGP10_CU1_TCP_CTRL_REG, + mmCGTS_SA0_WGP11_CU0_TCP_CTRL_REG, + mmCGTS_SA0_WGP11_CU1_TCP_CTRL_REG, + mmCGTS_SA0_WGP12_CU0_TCP_CTRL_REG, + mmCGTS_SA0_WGP12_CU1_TCP_CTRL_REG, + mmCGTS_SA1_WGP00_CU0_TCP_CTRL_REG, + mmCGTS_SA1_WGP00_CU1_TCP_CTRL_REG, + mmCGTS_SA1_WGP01_CU0_TCP_CTRL_REG, + mmCGTS_SA1_WGP01_CU1_TCP_CTRL_REG, + mmCGTS_SA1_WGP02_CU0_TCP_CTRL_REG, + mmCGTS_SA1_WGP02_CU1_TCP_CTRL_REG, + mmCGTS_SA1_WGP10_CU0_TCP_CTRL_REG, + mmCGTS_SA1_WGP10_CU1_TCP_CTRL_REG, + mmCGTS_SA1_WGP11_CU0_TCP_CTRL_REG, + mmCGTS_SA1_WGP11_CU1_TCP_CTRL_REG, + mmCGTS_SA1_WGP12_CU0_TCP_CTRL_REG, + mmCGTS_SA1_WGP12_CU1_TCP_CTRL_REG + }; + + const uint32_t tcp_ctrl_regs_nv12[] = { + mmCGTS_SA0_WGP00_CU0_TCP_CTRL_REG, + mmCGTS_SA0_WGP00_CU1_TCP_CTRL_REG, + mmCGTS_SA0_WGP01_CU0_TCP_CTRL_REG, + mmCGTS_SA0_WGP01_CU1_TCP_CTRL_REG, + mmCGTS_SA0_WGP02_CU0_TCP_CTRL_REG, + mmCGTS_SA0_WGP02_CU1_TCP_CTRL_REG, + mmCGTS_SA0_WGP10_CU0_TCP_CTRL_REG, + mmCGTS_SA0_WGP10_CU1_TCP_CTRL_REG, + mmCGTS_SA0_WGP11_CU0_TCP_CTRL_REG, + mmCGTS_SA0_WGP11_CU1_TCP_CTRL_REG, + mmCGTS_SA1_WGP00_CU0_TCP_CTRL_REG, + mmCGTS_SA1_WGP00_CU1_TCP_CTRL_REG, + mmCGTS_SA1_WGP01_CU0_TCP_CTRL_REG, + mmCGTS_SA1_WGP01_CU1_TCP_CTRL_REG, + mmCGTS_SA1_WGP02_CU0_TCP_CTRL_REG, + mmCGTS_SA1_WGP02_CU1_TCP_CTRL_REG, + mmCGTS_SA1_WGP10_CU0_TCP_CTRL_REG, + mmCGTS_SA1_WGP10_CU1_TCP_CTRL_REG, + mmCGTS_SA1_WGP11_CU0_TCP_CTRL_REG, + mmCGTS_SA1_WGP11_CU1_TCP_CTRL_REG, + }; + + const uint32_t sm_ctlr_regs[] = { + mmCGTS_SA0_QUAD0_SM_CTRL_REG, + mmCGTS_SA0_QUAD1_SM_CTRL_REG, + mmCGTS_SA1_QUAD0_SM_CTRL_REG, + mmCGTS_SA1_QUAD1_SM_CTRL_REG + }; + + if (adev->asic_type == CHIP_NAVI12) { + for (i = 0; i < ARRAY_SIZE(tcp_ctrl_regs_nv12); i++) { + reg_idx = adev->reg_offset[GC_HWIP][0][mmCGTS_SA0_WGP00_CU0_TCP_CTRL_REG_BASE_IDX] + + tcp_ctrl_regs_nv12[i]; + reg_data = RREG32(reg_idx); + reg_data |= CGTS_SA0_WGP00_CU0_TCP_CTRL_REG__TCPI_LS_OVERRIDE_MASK; + WREG32(reg_idx, reg_data); + } + } else { + for (i = 0; i < ARRAY_SIZE(tcp_ctrl_regs); i++) { + reg_idx = adev->reg_offset[GC_HWIP][0][mmCGTS_SA0_WGP00_CU0_TCP_CTRL_REG_BASE_IDX] + + tcp_ctrl_regs[i]; + reg_data = RREG32(reg_idx); + reg_data |= CGTS_SA0_WGP00_CU0_TCP_CTRL_REG__TCPI_LS_OVERRIDE_MASK; + WREG32(reg_idx, reg_data); + } + } + + for (i = 0; i < ARRAY_SIZE(sm_ctlr_regs); i++) { + reg_idx = adev->reg_offset[GC_HWIP][0][mmCGTS_SA0_QUAD0_SM_CTRL_REG_BASE_IDX] + + sm_ctlr_regs[i]; + reg_data = RREG32(reg_idx); + reg_data &= ~CGTS_SA0_QUAD0_SM_CTRL_REG__SM_MODE_MASK; + reg_data |= 2 << CGTS_SA0_QUAD0_SM_CTRL_REG__SM_MODE__SHIFT; + WREG32(reg_idx, reg_data); + } +} + static int gfx_v10_0_update_gfx_clock_gating(struct amdgpu_device *adev, bool enable) { @@ -7846,6 +8237,10 @@ static int gfx_v10_0_update_gfx_clock_gating(struct amdgpu_device *adev, gfx_v10_0_update_3d_clock_gating(adev, enable); /* === CGCG + CGLS === */ gfx_v10_0_update_coarse_grain_clock_gating(adev, enable); + + if ((adev->asic_type >= CHIP_NAVI10) && + (adev->asic_type <= CHIP_NAVI12)) + gfx_v10_0_apply_medium_grain_clock_gating_workaround(adev); } else { /* CGCG/CGLS should be disabled before MGCG/MGLS * === CGCG + CGLS === @@ -7875,12 +8270,12 @@ static int gfx_v10_0_update_gfx_clock_gating(struct amdgpu_device *adev, static void gfx_v10_0_update_spm_vmid(struct amdgpu_device *adev, unsigned vmid) { u32 reg, data; - + /* not for *_SOC15 */ reg = SOC15_REG_OFFSET(GC, 0, mmRLC_SPM_MC_CNTL); if (amdgpu_sriov_is_pp_one_vf(adev)) data = RREG32_NO_KIQ(reg); else - data = RREG32(reg); + data = RREG32_SOC15(GC, 0, mmRLC_SPM_MC_CNTL); data &= ~RLC_SPM_MC_CNTL__RLC_SPM_VMID_MASK; data |= (vmid & RLC_SPM_MC_CNTL__RLC_SPM_VMID_MASK) << RLC_SPM_MC_CNTL__RLC_SPM_VMID__SHIFT; @@ -7935,12 +8330,23 @@ static void gfx_v10_cntl_power_gating(struct amdgpu_device *adev, bool enable) * in refclk count. Note that RLC FW is modified to take 16 bits from * RLC_PG_DELAY_3[15:0] as the hysteresis instead of just 8 bits. * - * The recommendation from RLC team is setting RLC_PG_DELAY_3 to 200us(0x4E20) - * as part of CGPG enablement starting point. + * The recommendation from RLC team is setting RLC_PG_DELAY_3 to 200us as part) + * of CGPG enablement starting point. + * Power/performance team will optimize it and might give a new value later. */ - if (enable && (adev->pg_flags & AMD_PG_SUPPORT_GFX_PG) && adev->asic_type == CHIP_VANGOGH) { - data = 0x4E20 & RLC_PG_DELAY_3__CGCG_ACTIVE_BEFORE_CGPG_MASK_Vangogh; - WREG32_SOC15(GC, 0, mmRLC_PG_DELAY_3, data); + if (enable && (adev->pg_flags & AMD_PG_SUPPORT_GFX_PG)) { + switch (adev->asic_type) { + case CHIP_VANGOGH: + data = 0x4E20 & RLC_PG_DELAY_3__CGCG_ACTIVE_BEFORE_CGPG_MASK_Vangogh; + WREG32_SOC15(GC, 0, mmRLC_PG_DELAY_3, data); + break; + case CHIP_YELLOW_CARP: + data = 0x1388 & RLC_PG_DELAY_3__CGCG_ACTIVE_BEFORE_CGPG_MASK_Vangogh; + WREG32_SOC15(GC, 0, mmRLC_PG_DELAY_3, data); + break; + default: + break; + } } } @@ -7979,8 +8385,8 @@ static const struct amdgpu_rlc_funcs gfx_v10_0_rlc_funcs_sriov = { .reset = gfx_v10_0_rlc_reset, .start = gfx_v10_0_rlc_start, .update_spm_vmid = gfx_v10_0_update_spm_vmid, - .rlcg_wreg = gfx_v10_rlcg_wreg, - .rlcg_rreg = gfx_v10_rlcg_rreg, + .sriov_wreg = gfx_v10_sriov_wreg, + .sriov_rreg = gfx_v10_sriov_rreg, .is_rlcg_access_range = gfx_v10_0_is_rlcg_access_range, }; @@ -8000,9 +8406,11 @@ static int gfx_v10_0_set_powergating_state(void *handle, case CHIP_SIENNA_CICHLID: case CHIP_NAVY_FLOUNDER: case CHIP_DIMGREY_CAVEFISH: + case CHIP_BEIGE_GOBY: amdgpu_gfx_off_ctrl(adev, enable); break; case CHIP_VANGOGH: + case CHIP_YELLOW_CARP: gfx_v10_cntl_pg(adev, enable); amdgpu_gfx_off_ctrl(adev, enable); break; @@ -8028,6 +8436,8 @@ static int gfx_v10_0_set_clockgating_state(void *handle, case CHIP_NAVY_FLOUNDER: case CHIP_VANGOGH: case CHIP_DIMGREY_CAVEFISH: + case CHIP_BEIGE_GOBY: + case CHIP_YELLOW_CARP: gfx_v10_0_update_gfx_clock_gating(adev, state == AMD_CG_STATE_GATE); break; @@ -8620,16 +9030,16 @@ gfx_v10_0_set_gfx_eop_interrupt_state(struct amdgpu_device *adev, switch (state) { case AMDGPU_IRQ_STATE_DISABLE: - cp_int_cntl = RREG32(cp_int_cntl_reg); + cp_int_cntl = RREG32_SOC15_IP(GC, cp_int_cntl_reg); cp_int_cntl = REG_SET_FIELD(cp_int_cntl, CP_INT_CNTL_RING0, TIME_STAMP_INT_ENABLE, 0); - WREG32(cp_int_cntl_reg, cp_int_cntl); + WREG32_SOC15_IP(GC, cp_int_cntl_reg, cp_int_cntl); break; case AMDGPU_IRQ_STATE_ENABLE: - cp_int_cntl = RREG32(cp_int_cntl_reg); + cp_int_cntl = RREG32_SOC15_IP(GC, cp_int_cntl_reg); cp_int_cntl = REG_SET_FIELD(cp_int_cntl, CP_INT_CNTL_RING0, TIME_STAMP_INT_ENABLE, 1); - WREG32(cp_int_cntl_reg, cp_int_cntl); + WREG32_SOC15_IP(GC, cp_int_cntl_reg, cp_int_cntl); break; default: break; @@ -8673,16 +9083,16 @@ static void gfx_v10_0_set_compute_eop_interrupt_state(struct amdgpu_device *adev switch (state) { case AMDGPU_IRQ_STATE_DISABLE: - mec_int_cntl = RREG32(mec_int_cntl_reg); + mec_int_cntl = RREG32_SOC15_IP(GC, mec_int_cntl_reg); mec_int_cntl = REG_SET_FIELD(mec_int_cntl, CP_ME1_PIPE0_INT_CNTL, TIME_STAMP_INT_ENABLE, 0); - WREG32(mec_int_cntl_reg, mec_int_cntl); + WREG32_SOC15_IP(GC, mec_int_cntl_reg, mec_int_cntl); break; case AMDGPU_IRQ_STATE_ENABLE: - mec_int_cntl = RREG32(mec_int_cntl_reg); + mec_int_cntl = RREG32_SOC15_IP(GC, mec_int_cntl_reg); mec_int_cntl = REG_SET_FIELD(mec_int_cntl, CP_ME1_PIPE0_INT_CNTL, TIME_STAMP_INT_ENABLE, 1); - WREG32(mec_int_cntl_reg, mec_int_cntl); + WREG32_SOC15_IP(GC, mec_int_cntl_reg, mec_int_cntl); break; default: break; @@ -8878,20 +9288,20 @@ static int gfx_v10_0_kiq_set_interrupt_state(struct amdgpu_device *adev, GENERIC2_INT_ENABLE, 0); WREG32_SOC15(GC, 0, mmCPC_INT_CNTL, tmp); - tmp = RREG32(target); + tmp = RREG32_SOC15_IP(GC, target); tmp = REG_SET_FIELD(tmp, CP_ME2_PIPE0_INT_CNTL, GENERIC2_INT_ENABLE, 0); - WREG32(target, tmp); + WREG32_SOC15_IP(GC, target, tmp); } else { tmp = RREG32_SOC15(GC, 0, mmCPC_INT_CNTL); tmp = REG_SET_FIELD(tmp, CPC_INT_CNTL, GENERIC2_INT_ENABLE, 1); WREG32_SOC15(GC, 0, mmCPC_INT_CNTL, tmp); - tmp = RREG32(target); + tmp = RREG32_SOC15_IP(GC, target); tmp = REG_SET_FIELD(tmp, CP_ME2_PIPE0_INT_CNTL, GENERIC2_INT_ENABLE, 1); - WREG32(target, tmp); + WREG32_SOC15_IP(GC, target, tmp); } break; default: @@ -9134,13 +9544,16 @@ static void gfx_v10_0_set_rlc_funcs(struct amdgpu_device *adev) switch (adev->asic_type) { case CHIP_NAVI10: case CHIP_NAVI14: - case CHIP_SIENNA_CICHLID: case CHIP_NAVY_FLOUNDER: case CHIP_VANGOGH: case CHIP_DIMGREY_CAVEFISH: + case CHIP_BEIGE_GOBY: + case CHIP_YELLOW_CARP: + case CHIP_CYAN_SKILLFISH: adev->gfx.rlc.funcs = &gfx_v10_0_rlc_funcs; break; case CHIP_NAVI12: + case CHIP_SIENNA_CICHLID: adev->gfx.rlc.funcs = &gfx_v10_0_rlc_funcs_sriov; break; default: @@ -9176,17 +9589,22 @@ static void gfx_v10_0_set_user_wgp_inactive_bitmap_per_sh(struct amdgpu_device * static u32 gfx_v10_0_get_wgp_active_bitmap_per_sh(struct amdgpu_device *adev) { - u32 data, wgp_bitmask; - data = RREG32_SOC15(GC, 0, mmCC_GC_SHADER_ARRAY_CONFIG); - data |= RREG32_SOC15(GC, 0, mmGC_USER_SHADER_ARRAY_CONFIG); + u32 disabled_mask = + ~amdgpu_gfx_create_bitmask(adev->gfx.config.max_cu_per_sh >> 1); + u32 efuse_setting = 0; + u32 vbios_setting = 0; + + efuse_setting = RREG32_SOC15(GC, 0, mmCC_GC_SHADER_ARRAY_CONFIG); + efuse_setting &= CC_GC_SHADER_ARRAY_CONFIG__INACTIVE_WGPS_MASK; + efuse_setting >>= CC_GC_SHADER_ARRAY_CONFIG__INACTIVE_WGPS__SHIFT; - data &= CC_GC_SHADER_ARRAY_CONFIG__INACTIVE_WGPS_MASK; - data >>= CC_GC_SHADER_ARRAY_CONFIG__INACTIVE_WGPS__SHIFT; + vbios_setting = RREG32_SOC15(GC, 0, mmGC_USER_SHADER_ARRAY_CONFIG); + vbios_setting &= GC_USER_SHADER_ARRAY_CONFIG__INACTIVE_WGPS_MASK; + vbios_setting >>= GC_USER_SHADER_ARRAY_CONFIG__INACTIVE_WGPS__SHIFT; - wgp_bitmask = - amdgpu_gfx_create_bitmask(adev->gfx.config.max_cu_per_sh >> 1); + disabled_mask |= efuse_setting | vbios_setting; - return (~data) & wgp_bitmask; + return (~disabled_mask); } static u32 gfx_v10_0_get_cu_active_bitmap_per_sh(struct amdgpu_device *adev) @@ -9223,7 +9641,8 @@ static int gfx_v10_0_get_cu_info(struct amdgpu_device *adev, for (i = 0; i < adev->gfx.config.max_shader_engines; i++) { for (j = 0; j < adev->gfx.config.max_sh_per_se; j++) { bitmap = i * adev->gfx.config.max_sh_per_se + j; - if ((adev->asic_type == CHIP_SIENNA_CICHLID) && + if (((adev->asic_type == CHIP_SIENNA_CICHLID) || + (adev->asic_type == CHIP_YELLOW_CARP)) && ((gfx_v10_3_get_disabled_sa(adev) >> bitmap) & 1)) continue; mask = 1; diff --git a/drivers/gpu/drm/amd/amdgpu/gfx_v6_0.c b/drivers/gpu/drm/amd/amdgpu/gfx_v6_0.c index 3a8d52a54873..6a8dadea40f9 100644 --- a/drivers/gpu/drm/amd/amdgpu/gfx_v6_0.c +++ b/drivers/gpu/drm/amd/amdgpu/gfx_v6_0.c @@ -3027,6 +3027,7 @@ static void gfx_v6_0_read_wave_data(struct amdgpu_device *adev, uint32_t simd, u dst[(*no_fields)++] = wave_read_ind(adev, simd, wave, ixSQ_WAVE_TMA_HI); dst[(*no_fields)++] = wave_read_ind(adev, simd, wave, ixSQ_WAVE_IB_DBG0); dst[(*no_fields)++] = wave_read_ind(adev, simd, wave, ixSQ_WAVE_M0); + dst[(*no_fields)++] = wave_read_ind(adev, simd, wave, ixSQ_WAVE_MODE); } static void gfx_v6_0_read_wave_sgprs(struct amdgpu_device *adev, uint32_t simd, diff --git a/drivers/gpu/drm/amd/amdgpu/gfx_v7_0.c b/drivers/gpu/drm/amd/amdgpu/gfx_v7_0.c index c35fdd2ef2d4..37b4a3db6360 100644 --- a/drivers/gpu/drm/amd/amdgpu/gfx_v7_0.c +++ b/drivers/gpu/drm/amd/amdgpu/gfx_v7_0.c @@ -2116,7 +2116,7 @@ error_free_scratch: } /** - * gfx_v7_0_ring_emit_hdp - emit an hdp flush on the cp + * gfx_v7_0_ring_emit_hdp_flush - emit an hdp flush on the cp * * @ring: amdgpu_ring structure holding ring information * @@ -2242,7 +2242,7 @@ static void gfx_v7_0_ring_emit_fence_compute(struct amdgpu_ring *ring, * IB stuff */ /** - * gfx_v7_0_ring_emit_ib - emit an IB (Indirect Buffer) on the ring + * gfx_v7_0_ring_emit_ib_gfx - emit an IB (Indirect Buffer) on the ring * * @ring: amdgpu_ring structure holding ring information * @job: job to retrieve vmid from @@ -3196,7 +3196,7 @@ static int gfx_v7_0_cp_resume(struct amdgpu_device *adev) } /** - * gfx_v7_0_ring_emit_vm_flush - cik vm flush using the CP + * gfx_v7_0_ring_emit_pipeline_sync - cik vm flush using the CP * * @ring: the ring to emit the commands to * @@ -4198,6 +4198,7 @@ static void gfx_v7_0_read_wave_data(struct amdgpu_device *adev, uint32_t simd, u dst[(*no_fields)++] = wave_read_ind(adev, simd, wave, ixSQ_WAVE_TMA_HI); dst[(*no_fields)++] = wave_read_ind(adev, simd, wave, ixSQ_WAVE_IB_DBG0); dst[(*no_fields)++] = wave_read_ind(adev, simd, wave, ixSQ_WAVE_M0); + dst[(*no_fields)++] = wave_read_ind(adev, simd, wave, ixSQ_WAVE_MODE); } static void gfx_v7_0_read_wave_sgprs(struct amdgpu_device *adev, uint32_t simd, diff --git a/drivers/gpu/drm/amd/amdgpu/gfx_v8_0.c b/drivers/gpu/drm/amd/amdgpu/gfx_v8_0.c index c26e06059466..e0302c23e9a7 100644 --- a/drivers/gpu/drm/amd/amdgpu/gfx_v8_0.c +++ b/drivers/gpu/drm/amd/amdgpu/gfx_v8_0.c @@ -5279,6 +5279,7 @@ static void gfx_v8_0_read_wave_data(struct amdgpu_device *adev, uint32_t simd, u dst[(*no_fields)++] = wave_read_ind(adev, simd, wave, ixSQ_WAVE_TMA_HI); dst[(*no_fields)++] = wave_read_ind(adev, simd, wave, ixSQ_WAVE_IB_DBG0); dst[(*no_fields)++] = wave_read_ind(adev, simd, wave, ixSQ_WAVE_M0); + dst[(*no_fields)++] = wave_read_ind(adev, simd, wave, ixSQ_WAVE_MODE); } static void gfx_v8_0_read_wave_sgprs(struct amdgpu_device *adev, uint32_t simd, diff --git a/drivers/gpu/drm/amd/amdgpu/gfx_v9_0.c b/drivers/gpu/drm/amd/amdgpu/gfx_v9_0.c index 516467e962b7..603c259b073b 100644 --- a/drivers/gpu/drm/amd/amdgpu/gfx_v9_0.c +++ b/drivers/gpu/drm/amd/amdgpu/gfx_v9_0.c @@ -734,7 +734,7 @@ static const u32 GFX_RLC_SRM_INDEX_CNTL_DATA_OFFSETS[] = mmRLC_SRM_INDEX_CNTL_DATA_7 - mmRLC_SRM_INDEX_CNTL_DATA_0, }; -static void gfx_v9_0_rlcg_rw(struct amdgpu_device *adev, u32 offset, u32 v, u32 flag) +static void gfx_v9_0_rlcg_w(struct amdgpu_device *adev, u32 offset, u32 v, u32 flag) { static void *scratch_reg0; static void *scratch_reg1; @@ -787,15 +787,17 @@ static void gfx_v9_0_rlcg_rw(struct amdgpu_device *adev, u32 offset, u32 v, u32 } -static void gfx_v9_0_rlcg_wreg(struct amdgpu_device *adev, u32 offset, u32 v, u32 flag) +static void gfx_v9_0_sriov_wreg(struct amdgpu_device *adev, u32 offset, + u32 v, u32 acc_flags, u32 hwip) { - if (amdgpu_sriov_fullaccess(adev)) { - gfx_v9_0_rlcg_rw(adev, offset, v, flag); + if ((acc_flags & AMDGPU_REGS_RLC) && + amdgpu_sriov_fullaccess(adev)) { + gfx_v9_0_rlcg_w(adev, offset, v, acc_flags); return; } - if (flag & AMDGPU_REGS_NO_KIQ) + if (acc_flags & AMDGPU_REGS_NO_KIQ) WREG32_NO_KIQ(offset, v); else WREG32(offset, v); @@ -1293,6 +1295,16 @@ static bool is_raven_kicker(struct amdgpu_device *adev) return false; } +static bool check_if_enlarge_doorbell_range(struct amdgpu_device *adev) +{ + if ((adev->asic_type == CHIP_RENOIR) && + (adev->gfx.me_fw_version >= 0x000000a5) && + (adev->gfx.me_feature_version >= 52)) + return true; + else + return false; +} + static void gfx_v9_0_check_if_need_gfxoff(struct amdgpu_device *adev) { if (gfx_v9_0_should_disable_gfxoff(adev->pdev)) @@ -2078,6 +2090,7 @@ static void gfx_v9_0_read_wave_data(struct amdgpu_device *adev, uint32_t simd, u dst[(*no_fields)++] = wave_read_ind(adev, simd, wave, ixSQ_WAVE_IB_STS); dst[(*no_fields)++] = wave_read_ind(adev, simd, wave, ixSQ_WAVE_IB_DBG0); dst[(*no_fields)++] = wave_read_ind(adev, simd, wave, ixSQ_WAVE_M0); + dst[(*no_fields)++] = wave_read_ind(adev, simd, wave, ixSQ_WAVE_MODE); } static void gfx_v9_0_read_wave_sgprs(struct amdgpu_device *adev, uint32_t simd, @@ -3673,7 +3686,16 @@ static int gfx_v9_0_kiq_init_register(struct amdgpu_ring *ring) if (ring->use_doorbell) { WREG32_SOC15(GC, 0, mmCP_MEC_DOORBELL_RANGE_LOWER, (adev->doorbell_index.kiq * 2) << 2); - WREG32_SOC15(GC, 0, mmCP_MEC_DOORBELL_RANGE_UPPER, + /* If GC has entered CGPG, ringing doorbell > first page + * doesn't wakeup GC. Enlarge CP_MEC_DOORBELL_RANGE_UPPER to + * workaround this issue. And this change has to align with firmware + * update. + */ + if (check_if_enlarge_doorbell_range(adev)) + WREG32_SOC15(GC, 0, mmCP_MEC_DOORBELL_RANGE_UPPER, + (adev->doorbell.size - 4)); + else + WREG32_SOC15(GC, 0, mmCP_MEC_DOORBELL_RANGE_UPPER, (adev->doorbell_index.userqueue_end * 2) << 2); } @@ -3937,7 +3959,8 @@ static void gfx_v9_0_init_tcp_config(struct amdgpu_device *adev) { u32 tmp; - if (adev->asic_type != CHIP_ARCTURUS) + if (adev->asic_type != CHIP_ARCTURUS && + adev->asic_type != CHIP_ALDEBARAN) return; tmp = RREG32_SOC15(GC, 0, mmTCP_ADDR_CONFIG); @@ -4559,8 +4582,7 @@ static int gfx_v9_0_do_edc_gpr_workarounds(struct amdgpu_device *adev) if (!ring->sched.ready) return 0; - if (adev->asic_type == CHIP_ARCTURUS || - adev->asic_type == CHIP_ALDEBARAN) { + if (adev->asic_type == CHIP_ARCTURUS) { vgpr_init_shader_ptr = vgpr_init_compute_shader_arcturus; vgpr_init_shader_size = sizeof(vgpr_init_compute_shader_arcturus); vgpr_init_regs_ptr = vgpr_init_regs_arcturus; @@ -4745,7 +4767,11 @@ static int gfx_v9_0_ecc_late_init(void *handle) } /* requires IBs so do in late init after IB pool is initialized */ - r = gfx_v9_0_do_edc_gpr_workarounds(adev); + if (adev->asic_type == CHIP_ALDEBARAN) + r = gfx_v9_4_2_do_edc_gpr_workarounds(adev); + else + r = gfx_v9_0_do_edc_gpr_workarounds(adev); + if (r) return r; @@ -5125,7 +5151,7 @@ static const struct amdgpu_rlc_funcs gfx_v9_0_rlc_funcs = { .reset = gfx_v9_0_rlc_reset, .start = gfx_v9_0_rlc_start, .update_spm_vmid = gfx_v9_0_update_spm_vmid, - .rlcg_wreg = gfx_v9_0_rlcg_wreg, + .sriov_wreg = gfx_v9_0_sriov_wreg, .is_rlcg_access_range = gfx_v9_0_is_rlcg_access_range, }; diff --git a/drivers/gpu/drm/amd/amdgpu/gfx_v9_4_2.c b/drivers/gpu/drm/amd/amdgpu/gfx_v9_4_2.c index a30c7c10cd9a..00a2b36a24b3 100644 --- a/drivers/gpu/drm/amd/amdgpu/gfx_v9_4_2.c +++ b/drivers/gpu/drm/amd/amdgpu/gfx_v9_4_2.c @@ -22,6 +22,7 @@ */ #include "amdgpu.h" #include "soc15.h" +#include "soc15d.h" #include "gc/gc_9_4_2_offset.h" #include "gc/gc_9_4_2_sh_mask.h" @@ -31,6 +32,11 @@ #include "amdgpu_ras.h" #include "amdgpu_gfx.h" +#define SE_ID_MAX 8 +#define CU_ID_MAX 16 +#define SIMD_ID_MAX 4 +#define WAVE_ID_MAX 10 + enum gfx_v9_4_2_utc_type { VML2_MEM, VML2_WALKER_MEM, @@ -79,6 +85,634 @@ static const struct soc15_reg_golden golden_settings_gc_9_4_2_alde[] = { SOC15_REG_GOLDEN_VALUE(GC, 0, regTCI_CNTL_3, 0xff, 0x20), }; +/* + * This shader is used to clear VGPRS and LDS, and also write the input + * pattern into the write back buffer, which will be used by driver to + * check whether all SIMDs have been covered. +*/ +static const u32 vgpr_init_compute_shader_aldebaran[] = { + 0xb8840904, 0xb8851a04, 0xb8861344, 0xb8831804, 0x9208ff06, 0x00000280, + 0x9209a805, 0x920a8a04, 0x81080908, 0x81080a08, 0x81080308, 0x8e078208, + 0x81078407, 0xc0410080, 0x00000007, 0xbf8c0000, 0xbf8a0000, 0xd3d94000, + 0x18000080, 0xd3d94001, 0x18000080, 0xd3d94002, 0x18000080, 0xd3d94003, + 0x18000080, 0xd3d94004, 0x18000080, 0xd3d94005, 0x18000080, 0xd3d94006, + 0x18000080, 0xd3d94007, 0x18000080, 0xd3d94008, 0x18000080, 0xd3d94009, + 0x18000080, 0xd3d9400a, 0x18000080, 0xd3d9400b, 0x18000080, 0xd3d9400c, + 0x18000080, 0xd3d9400d, 0x18000080, 0xd3d9400e, 0x18000080, 0xd3d9400f, + 0x18000080, 0xd3d94010, 0x18000080, 0xd3d94011, 0x18000080, 0xd3d94012, + 0x18000080, 0xd3d94013, 0x18000080, 0xd3d94014, 0x18000080, 0xd3d94015, + 0x18000080, 0xd3d94016, 0x18000080, 0xd3d94017, 0x18000080, 0xd3d94018, + 0x18000080, 0xd3d94019, 0x18000080, 0xd3d9401a, 0x18000080, 0xd3d9401b, + 0x18000080, 0xd3d9401c, 0x18000080, 0xd3d9401d, 0x18000080, 0xd3d9401e, + 0x18000080, 0xd3d9401f, 0x18000080, 0xd3d94020, 0x18000080, 0xd3d94021, + 0x18000080, 0xd3d94022, 0x18000080, 0xd3d94023, 0x18000080, 0xd3d94024, + 0x18000080, 0xd3d94025, 0x18000080, 0xd3d94026, 0x18000080, 0xd3d94027, + 0x18000080, 0xd3d94028, 0x18000080, 0xd3d94029, 0x18000080, 0xd3d9402a, + 0x18000080, 0xd3d9402b, 0x18000080, 0xd3d9402c, 0x18000080, 0xd3d9402d, + 0x18000080, 0xd3d9402e, 0x18000080, 0xd3d9402f, 0x18000080, 0xd3d94030, + 0x18000080, 0xd3d94031, 0x18000080, 0xd3d94032, 0x18000080, 0xd3d94033, + 0x18000080, 0xd3d94034, 0x18000080, 0xd3d94035, 0x18000080, 0xd3d94036, + 0x18000080, 0xd3d94037, 0x18000080, 0xd3d94038, 0x18000080, 0xd3d94039, + 0x18000080, 0xd3d9403a, 0x18000080, 0xd3d9403b, 0x18000080, 0xd3d9403c, + 0x18000080, 0xd3d9403d, 0x18000080, 0xd3d9403e, 0x18000080, 0xd3d9403f, + 0x18000080, 0xd3d94040, 0x18000080, 0xd3d94041, 0x18000080, 0xd3d94042, + 0x18000080, 0xd3d94043, 0x18000080, 0xd3d94044, 0x18000080, 0xd3d94045, + 0x18000080, 0xd3d94046, 0x18000080, 0xd3d94047, 0x18000080, 0xd3d94048, + 0x18000080, 0xd3d94049, 0x18000080, 0xd3d9404a, 0x18000080, 0xd3d9404b, + 0x18000080, 0xd3d9404c, 0x18000080, 0xd3d9404d, 0x18000080, 0xd3d9404e, + 0x18000080, 0xd3d9404f, 0x18000080, 0xd3d94050, 0x18000080, 0xd3d94051, + 0x18000080, 0xd3d94052, 0x18000080, 0xd3d94053, 0x18000080, 0xd3d94054, + 0x18000080, 0xd3d94055, 0x18000080, 0xd3d94056, 0x18000080, 0xd3d94057, + 0x18000080, 0xd3d94058, 0x18000080, 0xd3d94059, 0x18000080, 0xd3d9405a, + 0x18000080, 0xd3d9405b, 0x18000080, 0xd3d9405c, 0x18000080, 0xd3d9405d, + 0x18000080, 0xd3d9405e, 0x18000080, 0xd3d9405f, 0x18000080, 0xd3d94060, + 0x18000080, 0xd3d94061, 0x18000080, 0xd3d94062, 0x18000080, 0xd3d94063, + 0x18000080, 0xd3d94064, 0x18000080, 0xd3d94065, 0x18000080, 0xd3d94066, + 0x18000080, 0xd3d94067, 0x18000080, 0xd3d94068, 0x18000080, 0xd3d94069, + 0x18000080, 0xd3d9406a, 0x18000080, 0xd3d9406b, 0x18000080, 0xd3d9406c, + 0x18000080, 0xd3d9406d, 0x18000080, 0xd3d9406e, 0x18000080, 0xd3d9406f, + 0x18000080, 0xd3d94070, 0x18000080, 0xd3d94071, 0x18000080, 0xd3d94072, + 0x18000080, 0xd3d94073, 0x18000080, 0xd3d94074, 0x18000080, 0xd3d94075, + 0x18000080, 0xd3d94076, 0x18000080, 0xd3d94077, 0x18000080, 0xd3d94078, + 0x18000080, 0xd3d94079, 0x18000080, 0xd3d9407a, 0x18000080, 0xd3d9407b, + 0x18000080, 0xd3d9407c, 0x18000080, 0xd3d9407d, 0x18000080, 0xd3d9407e, + 0x18000080, 0xd3d9407f, 0x18000080, 0xd3d94080, 0x18000080, 0xd3d94081, + 0x18000080, 0xd3d94082, 0x18000080, 0xd3d94083, 0x18000080, 0xd3d94084, + 0x18000080, 0xd3d94085, 0x18000080, 0xd3d94086, 0x18000080, 0xd3d94087, + 0x18000080, 0xd3d94088, 0x18000080, 0xd3d94089, 0x18000080, 0xd3d9408a, + 0x18000080, 0xd3d9408b, 0x18000080, 0xd3d9408c, 0x18000080, 0xd3d9408d, + 0x18000080, 0xd3d9408e, 0x18000080, 0xd3d9408f, 0x18000080, 0xd3d94090, + 0x18000080, 0xd3d94091, 0x18000080, 0xd3d94092, 0x18000080, 0xd3d94093, + 0x18000080, 0xd3d94094, 0x18000080, 0xd3d94095, 0x18000080, 0xd3d94096, + 0x18000080, 0xd3d94097, 0x18000080, 0xd3d94098, 0x18000080, 0xd3d94099, + 0x18000080, 0xd3d9409a, 0x18000080, 0xd3d9409b, 0x18000080, 0xd3d9409c, + 0x18000080, 0xd3d9409d, 0x18000080, 0xd3d9409e, 0x18000080, 0xd3d9409f, + 0x18000080, 0xd3d940a0, 0x18000080, 0xd3d940a1, 0x18000080, 0xd3d940a2, + 0x18000080, 0xd3d940a3, 0x18000080, 0xd3d940a4, 0x18000080, 0xd3d940a5, + 0x18000080, 0xd3d940a6, 0x18000080, 0xd3d940a7, 0x18000080, 0xd3d940a8, + 0x18000080, 0xd3d940a9, 0x18000080, 0xd3d940aa, 0x18000080, 0xd3d940ab, + 0x18000080, 0xd3d940ac, 0x18000080, 0xd3d940ad, 0x18000080, 0xd3d940ae, + 0x18000080, 0xd3d940af, 0x18000080, 0xd3d940b0, 0x18000080, 0xd3d940b1, + 0x18000080, 0xd3d940b2, 0x18000080, 0xd3d940b3, 0x18000080, 0xd3d940b4, + 0x18000080, 0xd3d940b5, 0x18000080, 0xd3d940b6, 0x18000080, 0xd3d940b7, + 0x18000080, 0xd3d940b8, 0x18000080, 0xd3d940b9, 0x18000080, 0xd3d940ba, + 0x18000080, 0xd3d940bb, 0x18000080, 0xd3d940bc, 0x18000080, 0xd3d940bd, + 0x18000080, 0xd3d940be, 0x18000080, 0xd3d940bf, 0x18000080, 0xd3d940c0, + 0x18000080, 0xd3d940c1, 0x18000080, 0xd3d940c2, 0x18000080, 0xd3d940c3, + 0x18000080, 0xd3d940c4, 0x18000080, 0xd3d940c5, 0x18000080, 0xd3d940c6, + 0x18000080, 0xd3d940c7, 0x18000080, 0xd3d940c8, 0x18000080, 0xd3d940c9, + 0x18000080, 0xd3d940ca, 0x18000080, 0xd3d940cb, 0x18000080, 0xd3d940cc, + 0x18000080, 0xd3d940cd, 0x18000080, 0xd3d940ce, 0x18000080, 0xd3d940cf, + 0x18000080, 0xd3d940d0, 0x18000080, 0xd3d940d1, 0x18000080, 0xd3d940d2, + 0x18000080, 0xd3d940d3, 0x18000080, 0xd3d940d4, 0x18000080, 0xd3d940d5, + 0x18000080, 0xd3d940d6, 0x18000080, 0xd3d940d7, 0x18000080, 0xd3d940d8, + 0x18000080, 0xd3d940d9, 0x18000080, 0xd3d940da, 0x18000080, 0xd3d940db, + 0x18000080, 0xd3d940dc, 0x18000080, 0xd3d940dd, 0x18000080, 0xd3d940de, + 0x18000080, 0xd3d940df, 0x18000080, 0xd3d940e0, 0x18000080, 0xd3d940e1, + 0x18000080, 0xd3d940e2, 0x18000080, 0xd3d940e3, 0x18000080, 0xd3d940e4, + 0x18000080, 0xd3d940e5, 0x18000080, 0xd3d940e6, 0x18000080, 0xd3d940e7, + 0x18000080, 0xd3d940e8, 0x18000080, 0xd3d940e9, 0x18000080, 0xd3d940ea, + 0x18000080, 0xd3d940eb, 0x18000080, 0xd3d940ec, 0x18000080, 0xd3d940ed, + 0x18000080, 0xd3d940ee, 0x18000080, 0xd3d940ef, 0x18000080, 0xd3d940f0, + 0x18000080, 0xd3d940f1, 0x18000080, 0xd3d940f2, 0x18000080, 0xd3d940f3, + 0x18000080, 0xd3d940f4, 0x18000080, 0xd3d940f5, 0x18000080, 0xd3d940f6, + 0x18000080, 0xd3d940f7, 0x18000080, 0xd3d940f8, 0x18000080, 0xd3d940f9, + 0x18000080, 0xd3d940fa, 0x18000080, 0xd3d940fb, 0x18000080, 0xd3d940fc, + 0x18000080, 0xd3d940fd, 0x18000080, 0xd3d940fe, 0x18000080, 0xd3d940ff, + 0x18000080, 0xb07c0000, 0xbe8a00ff, 0x000000f8, 0xbf11080a, 0x7e000280, + 0x7e020280, 0x7e040280, 0x7e060280, 0x7e080280, 0x7e0a0280, 0x7e0c0280, + 0x7e0e0280, 0x808a880a, 0xbe80320a, 0xbf84fff5, 0xbf9c0000, 0xd28c0001, + 0x0001007f, 0xd28d0001, 0x0002027e, 0x10020288, 0xbe8b0004, 0xb78b4000, + 0xd1196a01, 0x00001701, 0xbe8a0087, 0xbefc00c1, 0xd89c4000, 0x00020201, + 0xd89cc080, 0x00040401, 0x320202ff, 0x00000800, 0x808a810a, 0xbf84fff8, + 0xbf810000, +}; + +const struct soc15_reg_entry vgpr_init_regs_aldebaran[] = { + { SOC15_REG_ENTRY(GC, 0, regCOMPUTE_RESOURCE_LIMITS), 0x0000000 }, + { SOC15_REG_ENTRY(GC, 0, regCOMPUTE_NUM_THREAD_X), 0x40 }, + { SOC15_REG_ENTRY(GC, 0, regCOMPUTE_NUM_THREAD_Y), 4 }, + { SOC15_REG_ENTRY(GC, 0, regCOMPUTE_NUM_THREAD_Z), 1 }, + { SOC15_REG_ENTRY(GC, 0, regCOMPUTE_PGM_RSRC1), 0xbf }, + { SOC15_REG_ENTRY(GC, 0, regCOMPUTE_PGM_RSRC2), 0x400006 }, /* 64KB LDS */ + { SOC15_REG_ENTRY(GC, 0, regCOMPUTE_PGM_RSRC3), 0x3F }, /* 63 - accum-offset = 256 */ + { SOC15_REG_ENTRY(GC, 0, regCOMPUTE_STATIC_THREAD_MGMT_SE0), 0xffffffff }, + { SOC15_REG_ENTRY(GC, 0, regCOMPUTE_STATIC_THREAD_MGMT_SE1), 0xffffffff }, + { SOC15_REG_ENTRY(GC, 0, regCOMPUTE_STATIC_THREAD_MGMT_SE2), 0xffffffff }, + { SOC15_REG_ENTRY(GC, 0, regCOMPUTE_STATIC_THREAD_MGMT_SE3), 0xffffffff }, + { SOC15_REG_ENTRY(GC, 0, regCOMPUTE_STATIC_THREAD_MGMT_SE4), 0xffffffff }, + { SOC15_REG_ENTRY(GC, 0, regCOMPUTE_STATIC_THREAD_MGMT_SE5), 0xffffffff }, + { SOC15_REG_ENTRY(GC, 0, regCOMPUTE_STATIC_THREAD_MGMT_SE6), 0xffffffff }, + { SOC15_REG_ENTRY(GC, 0, regCOMPUTE_STATIC_THREAD_MGMT_SE7), 0xffffffff }, +}; + +/* + * The below shaders are used to clear SGPRS, and also write the input + * pattern into the write back buffer. The first two dispatch should be + * scheduled simultaneously which make sure that all SGPRS could be + * allocated, so the dispatch 1 need check write back buffer before scheduled, + * make sure that waves of dispatch 0 are all dispacthed to all simds + * balanced. both dispatch 0 and dispatch 1 should be halted until all waves + * are dispatched, and then driver write a pattern to the shared memory to make + * all waves continue. +*/ +static const u32 sgpr112_init_compute_shader_aldebaran[] = { + 0xb8840904, 0xb8851a04, 0xb8861344, 0xb8831804, 0x9208ff06, 0x00000280, + 0x9209a805, 0x920a8a04, 0x81080908, 0x81080a08, 0x81080308, 0x8e078208, + 0x81078407, 0xc0410080, 0x00000007, 0xbf8c0000, 0xbf8e003f, 0xc0030200, + 0x00000000, 0xbf8c0000, 0xbf06ff08, 0xdeadbeaf, 0xbf84fff9, 0x81028102, + 0xc0410080, 0x00000007, 0xbf8c0000, 0xbf8a0000, 0xbefc0080, 0xbeea0080, + 0xbeeb0080, 0xbf00f280, 0xbee60080, 0xbee70080, 0xbee80080, 0xbee90080, + 0xbefe0080, 0xbeff0080, 0xbe880080, 0xbe890080, 0xbe8a0080, 0xbe8b0080, + 0xbe8c0080, 0xbe8d0080, 0xbe8e0080, 0xbe8f0080, 0xbe900080, 0xbe910080, + 0xbe920080, 0xbe930080, 0xbe940080, 0xbe950080, 0xbe960080, 0xbe970080, + 0xbe980080, 0xbe990080, 0xbe9a0080, 0xbe9b0080, 0xbe9c0080, 0xbe9d0080, + 0xbe9e0080, 0xbe9f0080, 0xbea00080, 0xbea10080, 0xbea20080, 0xbea30080, + 0xbea40080, 0xbea50080, 0xbea60080, 0xbea70080, 0xbea80080, 0xbea90080, + 0xbeaa0080, 0xbeab0080, 0xbeac0080, 0xbead0080, 0xbeae0080, 0xbeaf0080, + 0xbeb00080, 0xbeb10080, 0xbeb20080, 0xbeb30080, 0xbeb40080, 0xbeb50080, + 0xbeb60080, 0xbeb70080, 0xbeb80080, 0xbeb90080, 0xbeba0080, 0xbebb0080, + 0xbebc0080, 0xbebd0080, 0xbebe0080, 0xbebf0080, 0xbec00080, 0xbec10080, + 0xbec20080, 0xbec30080, 0xbec40080, 0xbec50080, 0xbec60080, 0xbec70080, + 0xbec80080, 0xbec90080, 0xbeca0080, 0xbecb0080, 0xbecc0080, 0xbecd0080, + 0xbece0080, 0xbecf0080, 0xbed00080, 0xbed10080, 0xbed20080, 0xbed30080, + 0xbed40080, 0xbed50080, 0xbed60080, 0xbed70080, 0xbed80080, 0xbed90080, + 0xbeda0080, 0xbedb0080, 0xbedc0080, 0xbedd0080, 0xbede0080, 0xbedf0080, + 0xbee00080, 0xbee10080, 0xbee20080, 0xbee30080, 0xbee40080, 0xbee50080, + 0xbf810000 +}; + +const struct soc15_reg_entry sgpr112_init_regs_aldebaran[] = { + { SOC15_REG_ENTRY(GC, 0, regCOMPUTE_RESOURCE_LIMITS), 0x0000000 }, + { SOC15_REG_ENTRY(GC, 0, regCOMPUTE_NUM_THREAD_X), 0x40 }, + { SOC15_REG_ENTRY(GC, 0, regCOMPUTE_NUM_THREAD_Y), 8 }, + { SOC15_REG_ENTRY(GC, 0, regCOMPUTE_NUM_THREAD_Z), 1 }, + { SOC15_REG_ENTRY(GC, 0, regCOMPUTE_PGM_RSRC1), 0x340 }, + { SOC15_REG_ENTRY(GC, 0, regCOMPUTE_PGM_RSRC2), 0x6 }, + { SOC15_REG_ENTRY(GC, 0, regCOMPUTE_PGM_RSRC3), 0x0 }, + { SOC15_REG_ENTRY(GC, 0, regCOMPUTE_STATIC_THREAD_MGMT_SE0), 0xffffffff }, + { SOC15_REG_ENTRY(GC, 0, regCOMPUTE_STATIC_THREAD_MGMT_SE1), 0xffffffff }, + { SOC15_REG_ENTRY(GC, 0, regCOMPUTE_STATIC_THREAD_MGMT_SE2), 0xffffffff }, + { SOC15_REG_ENTRY(GC, 0, regCOMPUTE_STATIC_THREAD_MGMT_SE3), 0xffffffff }, + { SOC15_REG_ENTRY(GC, 0, regCOMPUTE_STATIC_THREAD_MGMT_SE4), 0xffffffff }, + { SOC15_REG_ENTRY(GC, 0, regCOMPUTE_STATIC_THREAD_MGMT_SE5), 0xffffffff }, + { SOC15_REG_ENTRY(GC, 0, regCOMPUTE_STATIC_THREAD_MGMT_SE6), 0xffffffff }, + { SOC15_REG_ENTRY(GC, 0, regCOMPUTE_STATIC_THREAD_MGMT_SE7), 0xffffffff }, +}; + +static const u32 sgpr96_init_compute_shader_aldebaran[] = { + 0xb8840904, 0xb8851a04, 0xb8861344, 0xb8831804, 0x9208ff06, 0x00000280, + 0x9209a805, 0x920a8a04, 0x81080908, 0x81080a08, 0x81080308, 0x8e078208, + 0x81078407, 0xc0410080, 0x00000007, 0xbf8c0000, 0xbf8e003f, 0xc0030200, + 0x00000000, 0xbf8c0000, 0xbf06ff08, 0xdeadbeaf, 0xbf84fff9, 0x81028102, + 0xc0410080, 0x00000007, 0xbf8c0000, 0xbf8a0000, 0xbefc0080, 0xbeea0080, + 0xbeeb0080, 0xbf00f280, 0xbee60080, 0xbee70080, 0xbee80080, 0xbee90080, + 0xbefe0080, 0xbeff0080, 0xbe880080, 0xbe890080, 0xbe8a0080, 0xbe8b0080, + 0xbe8c0080, 0xbe8d0080, 0xbe8e0080, 0xbe8f0080, 0xbe900080, 0xbe910080, + 0xbe920080, 0xbe930080, 0xbe940080, 0xbe950080, 0xbe960080, 0xbe970080, + 0xbe980080, 0xbe990080, 0xbe9a0080, 0xbe9b0080, 0xbe9c0080, 0xbe9d0080, + 0xbe9e0080, 0xbe9f0080, 0xbea00080, 0xbea10080, 0xbea20080, 0xbea30080, + 0xbea40080, 0xbea50080, 0xbea60080, 0xbea70080, 0xbea80080, 0xbea90080, + 0xbeaa0080, 0xbeab0080, 0xbeac0080, 0xbead0080, 0xbeae0080, 0xbeaf0080, + 0xbeb00080, 0xbeb10080, 0xbeb20080, 0xbeb30080, 0xbeb40080, 0xbeb50080, + 0xbeb60080, 0xbeb70080, 0xbeb80080, 0xbeb90080, 0xbeba0080, 0xbebb0080, + 0xbebc0080, 0xbebd0080, 0xbebe0080, 0xbebf0080, 0xbec00080, 0xbec10080, + 0xbec20080, 0xbec30080, 0xbec40080, 0xbec50080, 0xbec60080, 0xbec70080, + 0xbec80080, 0xbec90080, 0xbeca0080, 0xbecb0080, 0xbecc0080, 0xbecd0080, + 0xbece0080, 0xbecf0080, 0xbed00080, 0xbed10080, 0xbed20080, 0xbed30080, + 0xbed40080, 0xbed50080, 0xbed60080, 0xbed70080, 0xbed80080, 0xbed90080, + 0xbf810000, +}; + +const struct soc15_reg_entry sgpr96_init_regs_aldebaran[] = { + { SOC15_REG_ENTRY(GC, 0, regCOMPUTE_RESOURCE_LIMITS), 0x0000000 }, + { SOC15_REG_ENTRY(GC, 0, regCOMPUTE_NUM_THREAD_X), 0x40 }, + { SOC15_REG_ENTRY(GC, 0, regCOMPUTE_NUM_THREAD_Y), 0xc }, + { SOC15_REG_ENTRY(GC, 0, regCOMPUTE_NUM_THREAD_Z), 1 }, + { SOC15_REG_ENTRY(GC, 0, regCOMPUTE_PGM_RSRC1), 0x2c0 }, + { SOC15_REG_ENTRY(GC, 0, regCOMPUTE_PGM_RSRC2), 0x6 }, + { SOC15_REG_ENTRY(GC, 0, regCOMPUTE_PGM_RSRC3), 0x0 }, + { SOC15_REG_ENTRY(GC, 0, regCOMPUTE_STATIC_THREAD_MGMT_SE0), 0xffffffff }, + { SOC15_REG_ENTRY(GC, 0, regCOMPUTE_STATIC_THREAD_MGMT_SE1), 0xffffffff }, + { SOC15_REG_ENTRY(GC, 0, regCOMPUTE_STATIC_THREAD_MGMT_SE2), 0xffffffff }, + { SOC15_REG_ENTRY(GC, 0, regCOMPUTE_STATIC_THREAD_MGMT_SE3), 0xffffffff }, + { SOC15_REG_ENTRY(GC, 0, regCOMPUTE_STATIC_THREAD_MGMT_SE4), 0xffffffff }, + { SOC15_REG_ENTRY(GC, 0, regCOMPUTE_STATIC_THREAD_MGMT_SE5), 0xffffffff }, + { SOC15_REG_ENTRY(GC, 0, regCOMPUTE_STATIC_THREAD_MGMT_SE6), 0xffffffff }, + { SOC15_REG_ENTRY(GC, 0, regCOMPUTE_STATIC_THREAD_MGMT_SE7), 0xffffffff }, +}; + +/* + * This shader is used to clear the uninitiated sgprs after the above + * two dispatches, because of hardware feature, dispath 0 couldn't clear + * top hole sgprs. Therefore need 4 waves per SIMD to cover these sgprs +*/ +static const u32 sgpr64_init_compute_shader_aldebaran[] = { + 0xb8840904, 0xb8851a04, 0xb8861344, 0xb8831804, 0x9208ff06, 0x00000280, + 0x9209a805, 0x920a8a04, 0x81080908, 0x81080a08, 0x81080308, 0x8e078208, + 0x81078407, 0xc0410080, 0x00000007, 0xbf8c0000, 0xbf8e003f, 0xc0030200, + 0x00000000, 0xbf8c0000, 0xbf06ff08, 0xdeadbeaf, 0xbf84fff9, 0x81028102, + 0xc0410080, 0x00000007, 0xbf8c0000, 0xbf8a0000, 0xbefc0080, 0xbeea0080, + 0xbeeb0080, 0xbf00f280, 0xbee60080, 0xbee70080, 0xbee80080, 0xbee90080, + 0xbefe0080, 0xbeff0080, 0xbe880080, 0xbe890080, 0xbe8a0080, 0xbe8b0080, + 0xbe8c0080, 0xbe8d0080, 0xbe8e0080, 0xbe8f0080, 0xbe900080, 0xbe910080, + 0xbe920080, 0xbe930080, 0xbe940080, 0xbe950080, 0xbe960080, 0xbe970080, + 0xbe980080, 0xbe990080, 0xbe9a0080, 0xbe9b0080, 0xbe9c0080, 0xbe9d0080, + 0xbe9e0080, 0xbe9f0080, 0xbea00080, 0xbea10080, 0xbea20080, 0xbea30080, + 0xbea40080, 0xbea50080, 0xbea60080, 0xbea70080, 0xbea80080, 0xbea90080, + 0xbeaa0080, 0xbeab0080, 0xbeac0080, 0xbead0080, 0xbeae0080, 0xbeaf0080, + 0xbeb00080, 0xbeb10080, 0xbeb20080, 0xbeb30080, 0xbeb40080, 0xbeb50080, + 0xbeb60080, 0xbeb70080, 0xbeb80080, 0xbeb90080, 0xbf810000, +}; + +const struct soc15_reg_entry sgpr64_init_regs_aldebaran[] = { + { SOC15_REG_ENTRY(GC, 0, regCOMPUTE_RESOURCE_LIMITS), 0x0000000 }, + { SOC15_REG_ENTRY(GC, 0, regCOMPUTE_NUM_THREAD_X), 0x40 }, + { SOC15_REG_ENTRY(GC, 0, regCOMPUTE_NUM_THREAD_Y), 0x10 }, + { SOC15_REG_ENTRY(GC, 0, regCOMPUTE_NUM_THREAD_Z), 1 }, + { SOC15_REG_ENTRY(GC, 0, regCOMPUTE_PGM_RSRC1), 0x1c0 }, + { SOC15_REG_ENTRY(GC, 0, regCOMPUTE_PGM_RSRC2), 0x6 }, + { SOC15_REG_ENTRY(GC, 0, regCOMPUTE_PGM_RSRC3), 0x0 }, + { SOC15_REG_ENTRY(GC, 0, regCOMPUTE_STATIC_THREAD_MGMT_SE0), 0xffffffff }, + { SOC15_REG_ENTRY(GC, 0, regCOMPUTE_STATIC_THREAD_MGMT_SE1), 0xffffffff }, + { SOC15_REG_ENTRY(GC, 0, regCOMPUTE_STATIC_THREAD_MGMT_SE2), 0xffffffff }, + { SOC15_REG_ENTRY(GC, 0, regCOMPUTE_STATIC_THREAD_MGMT_SE3), 0xffffffff }, + { SOC15_REG_ENTRY(GC, 0, regCOMPUTE_STATIC_THREAD_MGMT_SE4), 0xffffffff }, + { SOC15_REG_ENTRY(GC, 0, regCOMPUTE_STATIC_THREAD_MGMT_SE5), 0xffffffff }, + { SOC15_REG_ENTRY(GC, 0, regCOMPUTE_STATIC_THREAD_MGMT_SE6), 0xffffffff }, + { SOC15_REG_ENTRY(GC, 0, regCOMPUTE_STATIC_THREAD_MGMT_SE7), 0xffffffff }, +}; + +static int gfx_v9_4_2_run_shader(struct amdgpu_device *adev, + struct amdgpu_ring *ring, + struct amdgpu_ib *ib, + const u32 *shader_ptr, u32 shader_size, + const struct soc15_reg_entry *init_regs, u32 regs_size, + u32 compute_dim_x, u64 wb_gpu_addr, u32 pattern, + struct dma_fence **fence_ptr) +{ + int r, i; + uint32_t total_size, shader_offset; + u64 gpu_addr; + + total_size = (regs_size * 3 + 4 + 5 + 5) * 4; + total_size = ALIGN(total_size, 256); + shader_offset = total_size; + total_size += ALIGN(shader_size, 256); + + /* allocate an indirect buffer to put the commands in */ + memset(ib, 0, sizeof(*ib)); + r = amdgpu_ib_get(adev, NULL, total_size, + AMDGPU_IB_POOL_DIRECT, ib); + if (r) { + dev_err(adev->dev, "failed to get ib (%d).\n", r); + return r; + } + + /* load the compute shaders */ + for (i = 0; i < shader_size/sizeof(u32); i++) + ib->ptr[i + (shader_offset / 4)] = shader_ptr[i]; + + /* init the ib length to 0 */ + ib->length_dw = 0; + + /* write the register state for the compute dispatch */ + for (i = 0; i < regs_size; i++) { + ib->ptr[ib->length_dw++] = PACKET3(PACKET3_SET_SH_REG, 1); + ib->ptr[ib->length_dw++] = SOC15_REG_ENTRY_OFFSET(init_regs[i]) + - PACKET3_SET_SH_REG_START; + ib->ptr[ib->length_dw++] = init_regs[i].reg_value; + } + + /* write the shader start address: mmCOMPUTE_PGM_LO, mmCOMPUTE_PGM_HI */ + gpu_addr = (ib->gpu_addr + (u64)shader_offset) >> 8; + ib->ptr[ib->length_dw++] = PACKET3(PACKET3_SET_SH_REG, 2); + ib->ptr[ib->length_dw++] = SOC15_REG_OFFSET(GC, 0, regCOMPUTE_PGM_LO) + - PACKET3_SET_SH_REG_START; + ib->ptr[ib->length_dw++] = lower_32_bits(gpu_addr); + ib->ptr[ib->length_dw++] = upper_32_bits(gpu_addr); + + /* write the wb buffer address */ + ib->ptr[ib->length_dw++] = PACKET3(PACKET3_SET_SH_REG, 3); + ib->ptr[ib->length_dw++] = SOC15_REG_OFFSET(GC, 0, regCOMPUTE_USER_DATA_0) + - PACKET3_SET_SH_REG_START; + ib->ptr[ib->length_dw++] = lower_32_bits(wb_gpu_addr); + ib->ptr[ib->length_dw++] = upper_32_bits(wb_gpu_addr); + ib->ptr[ib->length_dw++] = pattern; + + /* write dispatch packet */ + ib->ptr[ib->length_dw++] = PACKET3(PACKET3_DISPATCH_DIRECT, 3); + ib->ptr[ib->length_dw++] = compute_dim_x; /* x */ + ib->ptr[ib->length_dw++] = 1; /* y */ + ib->ptr[ib->length_dw++] = 1; /* z */ + ib->ptr[ib->length_dw++] = + REG_SET_FIELD(0, COMPUTE_DISPATCH_INITIATOR, COMPUTE_SHADER_EN, 1); + + /* shedule the ib on the ring */ + r = amdgpu_ib_schedule(ring, 1, ib, NULL, fence_ptr); + if (r) { + dev_err(adev->dev, "ib submit failed (%d).\n", r); + amdgpu_ib_free(adev, ib, NULL); + } + return r; +} + +static void gfx_v9_4_2_log_wave_assignment(struct amdgpu_device *adev, uint32_t *wb_ptr) +{ + uint32_t se, cu, simd, wave; + uint32_t offset = 0; + char *str; + int size; + + str = kmalloc(256, GFP_KERNEL); + if (!str) + return; + + dev_dbg(adev->dev, "wave assignment:\n"); + + for (se = 0; se < adev->gfx.config.max_shader_engines; se++) { + for (cu = 0; cu < CU_ID_MAX; cu++) { + memset(str, 0, 256); + size = sprintf(str, "SE[%02d]CU[%02d]: ", se, cu); + for (simd = 0; simd < SIMD_ID_MAX; simd++) { + size += sprintf(str + size, "["); + for (wave = 0; wave < WAVE_ID_MAX; wave++) { + size += sprintf(str + size, "%x", wb_ptr[offset]); + offset++; + } + size += sprintf(str + size, "] "); + } + dev_dbg(adev->dev, "%s\n", str); + } + } + + kfree(str); +} + +static int gfx_v9_4_2_wait_for_waves_assigned(struct amdgpu_device *adev, + uint32_t *wb_ptr, uint32_t mask, + uint32_t pattern, uint32_t num_wave, bool wait) +{ + uint32_t se, cu, simd, wave; + uint32_t loop = 0; + uint32_t wave_cnt; + uint32_t offset; + + do { + wave_cnt = 0; + offset = 0; + + for (se = 0; se < adev->gfx.config.max_shader_engines; se++) + for (cu = 0; cu < CU_ID_MAX; cu++) + for (simd = 0; simd < SIMD_ID_MAX; simd++) + for (wave = 0; wave < WAVE_ID_MAX; wave++) { + if (((1 << wave) & mask) && + (wb_ptr[offset] == pattern)) + wave_cnt++; + + offset++; + } + + if (wave_cnt == num_wave) + return 0; + + mdelay(1); + } while (++loop < 2000 && wait); + + dev_err(adev->dev, "actual wave num: %d, expected wave num: %d\n", + wave_cnt, num_wave); + + gfx_v9_4_2_log_wave_assignment(adev, wb_ptr); + + return -EBADSLT; +} + +static int gfx_v9_4_2_do_sgprs_init(struct amdgpu_device *adev) +{ + int r; + int wb_size = adev->gfx.config.max_shader_engines * + CU_ID_MAX * SIMD_ID_MAX * WAVE_ID_MAX; + struct amdgpu_ib wb_ib; + struct amdgpu_ib disp_ibs[3]; + struct dma_fence *fences[3]; + u32 pattern[3] = { 0x1, 0x5, 0xa }; + + /* bail if the compute ring is not ready */ + if (!adev->gfx.compute_ring[0].sched.ready || + !adev->gfx.compute_ring[1].sched.ready) + return 0; + + /* allocate the write-back buffer from IB */ + memset(&wb_ib, 0, sizeof(wb_ib)); + r = amdgpu_ib_get(adev, NULL, (1 + wb_size) * sizeof(uint32_t), + AMDGPU_IB_POOL_DIRECT, &wb_ib); + if (r) { + dev_err(adev->dev, "failed to get ib (%d) for wb\n", r); + return r; + } + memset(wb_ib.ptr, 0, (1 + wb_size) * sizeof(uint32_t)); + + r = gfx_v9_4_2_run_shader(adev, + &adev->gfx.compute_ring[0], + &disp_ibs[0], + sgpr112_init_compute_shader_aldebaran, + sizeof(sgpr112_init_compute_shader_aldebaran), + sgpr112_init_regs_aldebaran, + ARRAY_SIZE(sgpr112_init_regs_aldebaran), + adev->gfx.cu_info.number, + wb_ib.gpu_addr, pattern[0], &fences[0]); + if (r) { + dev_err(adev->dev, "failed to clear first 224 sgprs\n"); + goto pro_end; + } + + r = gfx_v9_4_2_wait_for_waves_assigned(adev, + &wb_ib.ptr[1], 0b11, + pattern[0], + adev->gfx.cu_info.number * SIMD_ID_MAX * 2, + true); + if (r) { + dev_err(adev->dev, "wave coverage failed when clear first 224 sgprs\n"); + wb_ib.ptr[0] = 0xdeadbeaf; /* stop waves */ + goto disp0_failed; + } + + r = gfx_v9_4_2_run_shader(adev, + &adev->gfx.compute_ring[1], + &disp_ibs[1], + sgpr96_init_compute_shader_aldebaran, + sizeof(sgpr96_init_compute_shader_aldebaran), + sgpr96_init_regs_aldebaran, + ARRAY_SIZE(sgpr96_init_regs_aldebaran), + adev->gfx.cu_info.number * 2, + wb_ib.gpu_addr, pattern[1], &fences[1]); + if (r) { + dev_err(adev->dev, "failed to clear next 576 sgprs\n"); + goto disp0_failed; + } + + r = gfx_v9_4_2_wait_for_waves_assigned(adev, + &wb_ib.ptr[1], 0b11111100, + pattern[1], adev->gfx.cu_info.number * SIMD_ID_MAX * 6, + true); + if (r) { + dev_err(adev->dev, "wave coverage failed when clear first 576 sgprs\n"); + wb_ib.ptr[0] = 0xdeadbeaf; /* stop waves */ + goto disp1_failed; + } + + wb_ib.ptr[0] = 0xdeadbeaf; /* stop waves */ + + /* wait for the GPU to finish processing the IB */ + r = dma_fence_wait(fences[0], false); + if (r) { + dev_err(adev->dev, "timeout to clear first 224 sgprs\n"); + goto disp1_failed; + } + + r = dma_fence_wait(fences[1], false); + if (r) { + dev_err(adev->dev, "timeout to clear first 576 sgprs\n"); + goto disp1_failed; + } + + memset(wb_ib.ptr, 0, (1 + wb_size) * sizeof(uint32_t)); + r = gfx_v9_4_2_run_shader(adev, + &adev->gfx.compute_ring[0], + &disp_ibs[2], + sgpr64_init_compute_shader_aldebaran, + sizeof(sgpr64_init_compute_shader_aldebaran), + sgpr64_init_regs_aldebaran, + ARRAY_SIZE(sgpr64_init_regs_aldebaran), + adev->gfx.cu_info.number, + wb_ib.gpu_addr, pattern[2], &fences[2]); + if (r) { + dev_err(adev->dev, "failed to clear first 256 sgprs\n"); + goto disp1_failed; + } + + r = gfx_v9_4_2_wait_for_waves_assigned(adev, + &wb_ib.ptr[1], 0b1111, + pattern[2], + adev->gfx.cu_info.number * SIMD_ID_MAX * 4, + true); + if (r) { + dev_err(adev->dev, "wave coverage failed when clear first 256 sgprs\n"); + wb_ib.ptr[0] = 0xdeadbeaf; /* stop waves */ + goto disp2_failed; + } + + wb_ib.ptr[0] = 0xdeadbeaf; /* stop waves */ + + r = dma_fence_wait(fences[2], false); + if (r) { + dev_err(adev->dev, "timeout to clear first 256 sgprs\n"); + goto disp2_failed; + } + +disp2_failed: + amdgpu_ib_free(adev, &disp_ibs[2], NULL); + dma_fence_put(fences[2]); +disp1_failed: + amdgpu_ib_free(adev, &disp_ibs[1], NULL); + dma_fence_put(fences[1]); +disp0_failed: + amdgpu_ib_free(adev, &disp_ibs[0], NULL); + dma_fence_put(fences[0]); +pro_end: + amdgpu_ib_free(adev, &wb_ib, NULL); + + if (r) + dev_info(adev->dev, "Init SGPRS Failed\n"); + else + dev_info(adev->dev, "Init SGPRS Successfully\n"); + + return r; +} + +static int gfx_v9_4_2_do_vgprs_init(struct amdgpu_device *adev) +{ + int r; + /* CU_ID: 0~15, SIMD_ID: 0~3, WAVE_ID: 0 ~ 9 */ + int wb_size = adev->gfx.config.max_shader_engines * + CU_ID_MAX * SIMD_ID_MAX * WAVE_ID_MAX; + struct amdgpu_ib wb_ib; + struct amdgpu_ib disp_ib; + struct dma_fence *fence; + u32 pattern = 0xa; + + /* bail if the compute ring is not ready */ + if (!adev->gfx.compute_ring[0].sched.ready) + return 0; + + /* allocate the write-back buffer from IB */ + memset(&wb_ib, 0, sizeof(wb_ib)); + r = amdgpu_ib_get(adev, NULL, (1 + wb_size) * sizeof(uint32_t), + AMDGPU_IB_POOL_DIRECT, &wb_ib); + if (r) { + dev_err(adev->dev, "failed to get ib (%d) for wb.\n", r); + return r; + } + memset(wb_ib.ptr, 0, (1 + wb_size) * sizeof(uint32_t)); + + r = gfx_v9_4_2_run_shader(adev, + &adev->gfx.compute_ring[0], + &disp_ib, + vgpr_init_compute_shader_aldebaran, + sizeof(vgpr_init_compute_shader_aldebaran), + vgpr_init_regs_aldebaran, + ARRAY_SIZE(vgpr_init_regs_aldebaran), + adev->gfx.cu_info.number, + wb_ib.gpu_addr, pattern, &fence); + if (r) { + dev_err(adev->dev, "failed to clear vgprs\n"); + goto pro_end; + } + + /* wait for the GPU to finish processing the IB */ + r = dma_fence_wait(fence, false); + if (r) { + dev_err(adev->dev, "timeout to clear vgprs\n"); + goto disp_failed; + } + + r = gfx_v9_4_2_wait_for_waves_assigned(adev, + &wb_ib.ptr[1], 0b1, + pattern, + adev->gfx.cu_info.number * SIMD_ID_MAX, + false); + if (r) { + dev_err(adev->dev, "failed to cover all simds when clearing vgprs\n"); + goto disp_failed; + } + +disp_failed: + amdgpu_ib_free(adev, &disp_ib, NULL); + dma_fence_put(fence); +pro_end: + amdgpu_ib_free(adev, &wb_ib, NULL); + + if (r) + dev_info(adev->dev, "Init VGPRS Failed\n"); + else + dev_info(adev->dev, "Init VGPRS Successfully\n"); + + return r; +} + +int gfx_v9_4_2_do_edc_gpr_workarounds(struct amdgpu_device *adev) +{ + /* only support when RAS is enabled */ + if (!amdgpu_ras_is_supported(adev, AMDGPU_RAS_BLOCK__GFX)) + return 0; + + gfx_v9_4_2_do_sgprs_init(adev); + + gfx_v9_4_2_do_vgprs_init(adev); + + return 0; +} + static void gfx_v9_4_2_query_sq_timeout_status(struct amdgpu_device *adev); static void gfx_v9_4_2_reset_sq_timeout_status(struct amdgpu_device *adev); @@ -148,11 +782,6 @@ void gfx_v9_4_2_set_power_brake_sequence(struct amdgpu_device *adev) tmp = REG_SET_FIELD(tmp, GC_THROTTLE_CTRL1, PWRBRK_STALL_EN, 1); WREG32_SOC15(GC, 0, regGC_THROTTLE_CTRL1, tmp); - WREG32_SOC15(GC, 0, regDIDT_IND_INDEX, ixDIDT_SQ_THROTTLE_CTRL); - tmp = 0; - tmp = REG_SET_FIELD(tmp, DIDT_SQ_THROTTLE_CTRL, PWRBRK_STALL_EN, 1); - WREG32_SOC15(GC, 0, regDIDT_IND_DATA, tmp); - WREG32_SOC15(GC, 0, regGC_CAC_IND_INDEX, ixPWRBRK_STALL_PATTERN_CTRL); tmp = 0; tmp = REG_SET_FIELD(tmp, PWRBRK_STALL_PATTERN_CTRL, PWRBRK_END_STEP, 0x12); @@ -808,8 +1437,9 @@ static struct gfx_v9_4_2_utc_block gfx_v9_4_2_utc_blocks[] = { REG_SET_FIELD(0, ATC_L2_CACHE_4K_DSM_CNTL, WRITE_COUNTERS, 1) }, }; -static const struct soc15_reg_entry gfx_v9_4_2_ea_err_status_regs = - { SOC15_REG_ENTRY(GC, 0, regGCEA_ERR_STATUS), 0, 1, 16 }; +static const struct soc15_reg_entry gfx_v9_4_2_ea_err_status_regs = { + SOC15_REG_ENTRY(GC, 0, regGCEA_ERR_STATUS), 0, 1, 16 +}; static int gfx_v9_4_2_get_reg_error_count(struct amdgpu_device *adev, const struct soc15_reg_entry *reg, @@ -1006,8 +1636,8 @@ static int gfx_v9_4_2_query_utc_edc_count(struct amdgpu_device *adev, return 0; } -int gfx_v9_4_2_query_ras_error_count(struct amdgpu_device *adev, - void *ras_error_status) +static int gfx_v9_4_2_query_ras_error_count(struct amdgpu_device *adev, + void *ras_error_status) { struct ras_err_data *err_data = (struct ras_err_data *)ras_error_status; uint32_t sec_count = 0, ded_count = 0; @@ -1039,20 +1669,24 @@ static void gfx_v9_4_2_reset_utc_err_status(struct amdgpu_device *adev) static void gfx_v9_4_2_reset_ea_err_status(struct amdgpu_device *adev) { uint32_t i, j; + uint32_t value; mutex_lock(&adev->grbm_idx_mutex); for (i = 0; i < gfx_v9_4_2_ea_err_status_regs.se_num; i++) { for (j = 0; j < gfx_v9_4_2_ea_err_status_regs.instance; j++) { gfx_v9_4_2_select_se_sh(adev, i, 0, j); - WREG32(SOC15_REG_ENTRY_OFFSET(gfx_v9_4_2_ea_err_status_regs), 0x10); + value = RREG32(SOC15_REG_ENTRY_OFFSET( + gfx_v9_4_2_ea_err_status_regs)); + value = REG_SET_FIELD(value, GCEA_ERR_STATUS, CLEAR_ERROR_STATUS, 0x1); + WREG32(SOC15_REG_ENTRY_OFFSET(gfx_v9_4_2_ea_err_status_regs), value); } } gfx_v9_4_2_select_se_sh(adev, 0xffffffff, 0xffffffff, 0xffffffff); mutex_unlock(&adev->grbm_idx_mutex); } -void gfx_v9_4_2_reset_ras_error_count(struct amdgpu_device *adev) +static void gfx_v9_4_2_reset_ras_error_count(struct amdgpu_device *adev) { if (!amdgpu_ras_is_supported(adev, AMDGPU_RAS_BLOCK__GFX)) return; @@ -1061,7 +1695,7 @@ void gfx_v9_4_2_reset_ras_error_count(struct amdgpu_device *adev) gfx_v9_4_2_query_utc_edc_count(adev, NULL, NULL); } -int gfx_v9_4_2_ras_error_inject(struct amdgpu_device *adev, void *inject_if) +static int gfx_v9_4_2_ras_error_inject(struct amdgpu_device *adev, void *inject_if) { struct ras_inject_if *info = (struct ras_inject_if *)inject_if; int ret; @@ -1096,6 +1730,7 @@ static void gfx_v9_4_2_query_ea_err_status(struct amdgpu_device *adev) gfx_v9_4_2_select_se_sh(adev, i, 0, j); reg_value = RREG32(SOC15_REG_ENTRY_OFFSET( gfx_v9_4_2_ea_err_status_regs)); + if (REG_GET_FIELD(reg_value, GCEA_ERR_STATUS, SDP_RDRSP_STATUS) || REG_GET_FIELD(reg_value, GCEA_ERR_STATUS, SDP_WRRSP_STATUS) || REG_GET_FIELD(reg_value, GCEA_ERR_STATUS, SDP_RDRSP_DATAPARITY_ERROR)) { @@ -1103,7 +1738,9 @@ static void gfx_v9_4_2_query_ea_err_status(struct amdgpu_device *adev) j, reg_value); } /* clear after read */ - WREG32(SOC15_REG_ENTRY_OFFSET(gfx_v9_4_2_ea_err_status_regs), 0x10); + reg_value = REG_SET_FIELD(reg_value, GCEA_ERR_STATUS, + CLEAR_ERROR_STATUS, 0x1); + WREG32(SOC15_REG_ENTRY_OFFSET(gfx_v9_4_2_ea_err_status_regs), reg_value); } } @@ -1134,7 +1771,7 @@ static void gfx_v9_4_2_query_utc_err_status(struct amdgpu_device *adev) } } -void gfx_v9_4_2_query_ras_error_status(struct amdgpu_device *adev) +static void gfx_v9_4_2_query_ras_error_status(struct amdgpu_device *adev) { if (!amdgpu_ras_is_supported(adev, AMDGPU_RAS_BLOCK__GFX)) return; @@ -1144,7 +1781,7 @@ void gfx_v9_4_2_query_ras_error_status(struct amdgpu_device *adev) gfx_v9_4_2_query_sq_timeout_status(adev); } -void gfx_v9_4_2_reset_ras_error_status(struct amdgpu_device *adev) +static void gfx_v9_4_2_reset_ras_error_status(struct amdgpu_device *adev) { if (!amdgpu_ras_is_supported(adev, AMDGPU_RAS_BLOCK__GFX)) return; @@ -1154,7 +1791,7 @@ void gfx_v9_4_2_reset_ras_error_status(struct amdgpu_device *adev) gfx_v9_4_2_reset_sq_timeout_status(adev); } -void gfx_v9_4_2_enable_watchdog_timer(struct amdgpu_device *adev) +static void gfx_v9_4_2_enable_watchdog_timer(struct amdgpu_device *adev) { uint32_t i; uint32_t data; diff --git a/drivers/gpu/drm/amd/amdgpu/gfx_v9_4_2.h b/drivers/gpu/drm/amd/amdgpu/gfx_v9_4_2.h index 81c5833b6b9f..6db1f88509af 100644 --- a/drivers/gpu/drm/amd/amdgpu/gfx_v9_4_2.h +++ b/drivers/gpu/drm/amd/amdgpu/gfx_v9_4_2.h @@ -29,6 +29,7 @@ void gfx_v9_4_2_debug_trap_config_init(struct amdgpu_device *adev, void gfx_v9_4_2_init_golden_registers(struct amdgpu_device *adev, uint32_t die_id); void gfx_v9_4_2_set_power_brake_sequence(struct amdgpu_device *adev); +int gfx_v9_4_2_do_edc_gpr_workarounds(struct amdgpu_device *adev); extern const struct amdgpu_gfx_ras_funcs gfx_v9_4_2_ras_funcs; diff --git a/drivers/gpu/drm/amd/amdgpu/gfxhub_v1_0.c b/drivers/gpu/drm/amd/amdgpu/gfxhub_v1_0.c index 1e4678cb98f0..bda1542ef1dd 100644 --- a/drivers/gpu/drm/amd/amdgpu/gfxhub_v1_0.c +++ b/drivers/gpu/drm/amd/amdgpu/gfxhub_v1_0.c @@ -283,10 +283,14 @@ static void gfxhub_v1_0_setup_vmid_config(struct amdgpu_device *adev) tmp = REG_SET_FIELD(tmp, VM_CONTEXT1_CNTL, PAGE_TABLE_BLOCK_SIZE, block_size); - /* Send no-retry XNACK on fault to suppress VM fault storm. */ + /* Send no-retry XNACK on fault to suppress VM fault storm. + * On Aldebaran, XNACK can be enabled in the SQ per-process. + * Retry faults need to be enabled for that to work. + */ tmp = REG_SET_FIELD(tmp, VM_CONTEXT1_CNTL, RETRY_PERMISSION_OR_INVALID_PAGE_FAULT, - !adev->gmc.noretry); + !adev->gmc.noretry || + adev->asic_type == CHIP_ALDEBARAN); WREG32_SOC15_OFFSET(GC, 0, mmVM_CONTEXT1_CNTL, i * hub->ctx_distance, tmp); WREG32_SOC15_OFFSET(GC, 0, mmVM_CONTEXT1_PAGE_TABLE_START_ADDR_LO32, @@ -317,18 +321,6 @@ static void gfxhub_v1_0_program_invalidation(struct amdgpu_device *adev) static int gfxhub_v1_0_gart_enable(struct amdgpu_device *adev) { - if (amdgpu_sriov_vf(adev) && adev->asic_type != CHIP_ARCTURUS) { - /* - * MC_VM_FB_LOCATION_BASE/TOP is NULL for VF, becuase they are - * VF copy registers so vbios post doesn't program them, for - * SRIOV driver need to program them - */ - WREG32_SOC15_RLC(GC, 0, mmMC_VM_FB_LOCATION_BASE, - adev->gmc.vram_start >> 24); - WREG32_SOC15_RLC(GC, 0, mmMC_VM_FB_LOCATION_TOP, - adev->gmc.vram_end >> 24); - } - /* GART Enable. */ gfxhub_v1_0_init_gart_aperture_regs(adev); gfxhub_v1_0_init_system_aperture_regs(adev); diff --git a/drivers/gpu/drm/amd/amdgpu/gfxhub_v1_1.c b/drivers/gpu/drm/amd/amdgpu/gfxhub_v1_1.c index 8fca72ebd11c..497b86c376c6 100644 --- a/drivers/gpu/drm/amd/amdgpu/gfxhub_v1_1.c +++ b/drivers/gpu/drm/amd/amdgpu/gfxhub_v1_1.c @@ -75,9 +75,8 @@ int gfxhub_v1_1_get_xgmi_info(struct amdgpu_device *adev) max_physical_node_id = 7; break; case CHIP_ALDEBARAN: - /* just using duplicates for Aldebaran support, revisit later */ - max_num_physical_nodes = 8; - max_physical_node_id = 7; + max_num_physical_nodes = 16; + max_physical_node_id = 15; break; default: return -EINVAL; diff --git a/drivers/gpu/drm/amd/amdgpu/gfxhub_v2_1.c b/drivers/gpu/drm/amd/amdgpu/gfxhub_v2_1.c index 41807817de7d..1a374ec0514a 100644 --- a/drivers/gpu/drm/amd/amdgpu/gfxhub_v2_1.c +++ b/drivers/gpu/drm/amd/amdgpu/gfxhub_v2_1.c @@ -31,6 +31,9 @@ #include "soc15_common.h" +#define mmGCUTCL2_HARVEST_BYPASS_GROUPS_YELLOW_CARP 0x16f8 +#define mmGCUTCL2_HARVEST_BYPASS_GROUPS_YELLOW_CARP_BASE_IDX 0 + static const char *gfxhub_client_ids[] = { "CB/DB", "Reserved", @@ -531,6 +534,42 @@ static int gfxhub_v2_1_get_xgmi_info(struct amdgpu_device *adev) return 0; } +static void gfxhub_v2_1_utcl2_harvest(struct amdgpu_device *adev) +{ + int i; + u32 tmp = 0, disabled_sa = 0; + u32 efuse_setting, vbios_setting; + + u32 max_sa_mask = amdgpu_gfx_create_bitmask( + adev->gfx.config.max_sh_per_se * + adev->gfx.config.max_shader_engines); + + if (adev->asic_type == CHIP_YELLOW_CARP) { + /* Get SA disabled bitmap from eFuse setting */ + efuse_setting = RREG32_SOC15(GC, 0, mmCC_GC_SA_UNIT_DISABLE); + efuse_setting &= CC_GC_SA_UNIT_DISABLE__SA_DISABLE_MASK; + efuse_setting >>= CC_GC_SA_UNIT_DISABLE__SA_DISABLE__SHIFT; + + /* Get SA disabled bitmap from VBIOS setting */ + vbios_setting = RREG32_SOC15(GC, 0, mmGC_USER_SA_UNIT_DISABLE); + vbios_setting &= GC_USER_SA_UNIT_DISABLE__SA_DISABLE_MASK; + vbios_setting >>= GC_USER_SA_UNIT_DISABLE__SA_DISABLE__SHIFT; + + disabled_sa |= efuse_setting | vbios_setting; + /* Make sure not to report harvested SAs beyond the max SA count */ + disabled_sa &= max_sa_mask; + + for (i = 0; disabled_sa > 0; i++) { + if (disabled_sa & 1) + tmp |= 0x3 << (i * 2); + disabled_sa >>= 1; + } + disabled_sa = tmp; + + WREG32_SOC15(GC, 0, mmGCUTCL2_HARVEST_BYPASS_GROUPS_YELLOW_CARP, disabled_sa); + } +} + const struct amdgpu_gfxhub_funcs gfxhub_v2_1_funcs = { .get_fb_location = gfxhub_v2_1_get_fb_location, .get_mc_fb_offset = gfxhub_v2_1_get_mc_fb_offset, @@ -540,4 +579,5 @@ const struct amdgpu_gfxhub_funcs gfxhub_v2_1_funcs = { .set_fault_enable_default = gfxhub_v2_1_set_fault_enable_default, .init = gfxhub_v2_1_init, .get_xgmi_info = gfxhub_v2_1_get_xgmi_info, + .utcl2_harvest = gfxhub_v2_1_utcl2_harvest, }; diff --git a/drivers/gpu/drm/amd/amdgpu/gmc_v10_0.c b/drivers/gpu/drm/amd/amdgpu/gmc_v10_0.c index 498b28a35f5b..41c3a0d70b7c 100644 --- a/drivers/gpu/drm/amd/amdgpu/gmc_v10_0.c +++ b/drivers/gpu/drm/amd/amdgpu/gmc_v10_0.c @@ -93,6 +93,7 @@ static int gmc_v10_0_process_interrupt(struct amdgpu_device *adev, struct amdgpu_iv_entry *entry) { bool retry_fault = !!(entry->src_data[1] & 0x80); + bool write_fault = !!(entry->src_data[1] & 0x20); struct amdgpu_vmhub *hub = &adev->vmhub[entry->vmid_src]; struct amdgpu_task_info task_info; uint32_t status = 0; @@ -121,7 +122,7 @@ static int gmc_v10_0_process_interrupt(struct amdgpu_device *adev, /* Try to handle the recoverable page faults by filling page * tables */ - if (amdgpu_vm_handle_fault(adev, entry->pasid, addr)) + if (amdgpu_vm_handle_fault(adev, entry->pasid, addr, write_fault)) return 1; } @@ -229,6 +230,10 @@ static void gmc_v10_0_flush_vm_hub(struct amdgpu_device *adev, uint32_t vmid, /* Use register 17 for GART */ const unsigned eng = 17; unsigned int i; + unsigned char hub_ip = 0; + + hub_ip = (vmhub == AMDGPU_GFXHUB_0) ? + GC_HWIP : MMHUB_HWIP; spin_lock(&adev->gmc.invalidate_lock); /* @@ -242,8 +247,9 @@ static void gmc_v10_0_flush_vm_hub(struct amdgpu_device *adev, uint32_t vmid, if (use_semaphore) { for (i = 0; i < adev->usec_timeout; i++) { /* a read return value of 1 means semaphore acuqire */ - tmp = RREG32_NO_KIQ(hub->vm_inv_eng0_sem + - hub->eng_distance * eng); + tmp = RREG32_RLC_NO_KIQ(hub->vm_inv_eng0_sem + + hub->eng_distance * eng, hub_ip); + if (tmp & 0x1) break; udelay(1); @@ -253,7 +259,9 @@ static void gmc_v10_0_flush_vm_hub(struct amdgpu_device *adev, uint32_t vmid, DRM_ERROR("Timeout waiting for sem acquire in VM flush!\n"); } - WREG32_NO_KIQ(hub->vm_inv_eng0_req + hub->eng_distance * eng, inv_req); + WREG32_RLC_NO_KIQ(hub->vm_inv_eng0_req + + hub->eng_distance * eng, + inv_req, hub_ip); /* * Issue a dummy read to wait for the ACK register to be cleared @@ -261,12 +269,14 @@ static void gmc_v10_0_flush_vm_hub(struct amdgpu_device *adev, uint32_t vmid, */ if ((vmhub == AMDGPU_GFXHUB_0) && (adev->asic_type < CHIP_SIENNA_CICHLID)) - RREG32_NO_KIQ(hub->vm_inv_eng0_req + hub->eng_distance * eng); + RREG32_RLC_NO_KIQ(hub->vm_inv_eng0_req + + hub->eng_distance * eng, hub_ip); /* Wait for ACK with a delay.*/ for (i = 0; i < adev->usec_timeout; i++) { - tmp = RREG32_NO_KIQ(hub->vm_inv_eng0_ack + - hub->eng_distance * eng); + tmp = RREG32_RLC_NO_KIQ(hub->vm_inv_eng0_ack + + hub->eng_distance * eng, hub_ip); + tmp &= 1 << vmid; if (tmp) break; @@ -280,15 +290,15 @@ static void gmc_v10_0_flush_vm_hub(struct amdgpu_device *adev, uint32_t vmid, * add semaphore release after invalidation, * write with 0 means semaphore release */ - WREG32_NO_KIQ(hub->vm_inv_eng0_sem + - hub->eng_distance * eng, 0); + WREG32_RLC_NO_KIQ(hub->vm_inv_eng0_sem + + hub->eng_distance * eng, 0, hub_ip); spin_unlock(&adev->gmc.invalidate_lock); if (i < adev->usec_timeout) return; - DRM_ERROR("Timeout waiting for VM flush ACK!\n"); + DRM_ERROR("Timeout waiting for VM flush hub: %d!\n", vmhub); } /** @@ -666,6 +676,7 @@ static void gmc_v10_0_set_mmhub_funcs(struct amdgpu_device *adev) { switch (adev->asic_type) { case CHIP_VANGOGH: + case CHIP_YELLOW_CARP: adev->mmhub.funcs = &mmhub_v2_3_funcs; break; default: @@ -681,6 +692,8 @@ static void gmc_v10_0_set_gfxhub_funcs(struct amdgpu_device *adev) case CHIP_NAVY_FLOUNDER: case CHIP_VANGOGH: case CHIP_DIMGREY_CAVEFISH: + case CHIP_BEIGE_GOBY: + case CHIP_YELLOW_CARP: adev->gfxhub.funcs = &gfxhub_v2_1_funcs; break; default: @@ -796,6 +809,9 @@ static int gmc_v10_0_mc_init(struct amdgpu_device *adev) case CHIP_NAVY_FLOUNDER: case CHIP_VANGOGH: case CHIP_DIMGREY_CAVEFISH: + case CHIP_BEIGE_GOBY: + case CHIP_YELLOW_CARP: + case CHIP_CYAN_SKILLFISH: default: adev->gmc.gart_size = 512ULL << 20; break; @@ -863,6 +879,9 @@ static int gmc_v10_0_sw_init(void *handle) case CHIP_NAVY_FLOUNDER: case CHIP_VANGOGH: case CHIP_DIMGREY_CAVEFISH: + case CHIP_BEIGE_GOBY: + case CHIP_YELLOW_CARP: + case CHIP_CYAN_SKILLFISH: adev->num_vmhubs = 2; /* * To fulfill 4-level page support, @@ -920,6 +939,7 @@ static int gmc_v10_0_sw_init(void *handle) return r; amdgpu_gmc_get_vbios_allocations(adev); + amdgpu_gmc_get_reserved_allocation(adev); /* Memory manager */ r = amdgpu_bo_init(adev); @@ -944,7 +964,7 @@ static int gmc_v10_0_sw_init(void *handle) } /** - * gmc_v8_0_gart_fini - vm fini callback + * gmc_v10_0_gart_fini - vm fini callback * * @adev: amdgpu_device pointer * @@ -953,7 +973,6 @@ static int gmc_v10_0_sw_init(void *handle) static void gmc_v10_0_gart_fini(struct amdgpu_device *adev) { amdgpu_gart_table_vram_free(adev); - amdgpu_gart_fini(adev); } static int gmc_v10_0_sw_fini(void *handle) @@ -978,6 +997,9 @@ static void gmc_v10_0_init_golden_registers(struct amdgpu_device *adev) case CHIP_NAVY_FLOUNDER: case CHIP_VANGOGH: case CHIP_DIMGREY_CAVEFISH: + case CHIP_BEIGE_GOBY: + case CHIP_YELLOW_CARP: + case CHIP_CYAN_SKILLFISH: break; default: break; @@ -1041,6 +1063,13 @@ static int gmc_v10_0_hw_init(void *handle) /* The sequence of these two function calls matters.*/ gmc_v10_0_init_golden_registers(adev); + /* + * harvestable groups in gc_utcl2 need to be programmed before any GFX block + * register setup within GMC, or else system hang when harvesting SA. + */ + if (adev->gfxhub.funcs && adev->gfxhub.funcs->utcl2_harvest) + adev->gfxhub.funcs->utcl2_harvest(adev); + r = gmc_v10_0_gart_enable(adev); if (r) return r; @@ -1133,7 +1162,7 @@ static int gmc_v10_0_set_clockgating_state(void *handle, return r; if (adev->asic_type >= CHIP_SIENNA_CICHLID && - adev->asic_type <= CHIP_DIMGREY_CAVEFISH) + adev->asic_type <= CHIP_YELLOW_CARP) return athub_v2_1_set_clockgating(adev, state); else return athub_v2_0_set_clockgating(adev, state); @@ -1146,7 +1175,7 @@ static void gmc_v10_0_get_clockgating_state(void *handle, u32 *flags) adev->mmhub.funcs->get_clockgating(adev, flags); if (adev->asic_type >= CHIP_SIENNA_CICHLID && - adev->asic_type <= CHIP_DIMGREY_CAVEFISH) + adev->asic_type <= CHIP_YELLOW_CARP) athub_v2_1_get_clockgating(adev, flags); else athub_v2_0_get_clockgating(adev, flags); diff --git a/drivers/gpu/drm/amd/amdgpu/gmc_v6_0.c b/drivers/gpu/drm/amd/amdgpu/gmc_v6_0.c index 405d6ad09022..0e81e03e9b49 100644 --- a/drivers/gpu/drm/amd/amdgpu/gmc_v6_0.c +++ b/drivers/gpu/drm/amd/amdgpu/gmc_v6_0.c @@ -898,7 +898,6 @@ static int gmc_v6_0_sw_fini(void *handle) amdgpu_vm_manager_fini(adev); amdgpu_gart_table_vram_free(adev); amdgpu_bo_fini(adev); - amdgpu_gart_fini(adev); release_firmware(adev->gmc.fw); adev->gmc.fw = NULL; diff --git a/drivers/gpu/drm/amd/amdgpu/gmc_v7_0.c b/drivers/gpu/drm/amd/amdgpu/gmc_v7_0.c index 210ada2289ec..0a50fdaced7e 100644 --- a/drivers/gpu/drm/amd/amdgpu/gmc_v7_0.c +++ b/drivers/gpu/drm/amd/amdgpu/gmc_v7_0.c @@ -516,7 +516,7 @@ static void gmc_v7_0_get_vm_pte(struct amdgpu_device *adev, } /** - * gmc_v8_0_set_fault_enable_default - update VM fault handling + * gmc_v7_0_set_fault_enable_default - update VM fault handling * * @adev: amdgpu_device pointer * @value: true redirects VM faults to the default page @@ -1085,7 +1085,6 @@ static int gmc_v7_0_sw_fini(void *handle) kfree(adev->gmc.vm_fault_info); amdgpu_gart_table_vram_free(adev); amdgpu_bo_fini(adev); - amdgpu_gart_fini(adev); release_firmware(adev->gmc.fw); adev->gmc.fw = NULL; diff --git a/drivers/gpu/drm/amd/amdgpu/gmc_v8_0.c b/drivers/gpu/drm/amd/amdgpu/gmc_v8_0.c index e4f27b3f28fb..492ebed2915b 100644 --- a/drivers/gpu/drm/amd/amdgpu/gmc_v8_0.c +++ b/drivers/gpu/drm/amd/amdgpu/gmc_v8_0.c @@ -1201,7 +1201,6 @@ static int gmc_v8_0_sw_fini(void *handle) kfree(adev->gmc.vm_fault_info); amdgpu_gart_table_vram_free(adev); amdgpu_bo_fini(adev); - amdgpu_gart_fini(adev); release_firmware(adev->gmc.fw); adev->gmc.fw = NULL; diff --git a/drivers/gpu/drm/amd/amdgpu/gmc_v9_0.c b/drivers/gpu/drm/amd/amdgpu/gmc_v9_0.c index 455bb91060d0..d90c16a6b2b8 100644 --- a/drivers/gpu/drm/amd/amdgpu/gmc_v9_0.c +++ b/drivers/gpu/drm/amd/amdgpu/gmc_v9_0.c @@ -53,6 +53,9 @@ #include "mmhub_v1_7.h" #include "umc_v6_1.h" #include "umc_v6_0.h" +#include "umc_v6_7.h" +#include "hdp_v4_0.h" +#include "mca_v3_0.h" #include "ivsrcid/vmc/irqsrcs_vmc_1_0.h" @@ -504,6 +507,7 @@ static int gmc_v9_0_process_interrupt(struct amdgpu_device *adev, struct amdgpu_iv_entry *entry) { bool retry_fault = !!(entry->src_data[1] & 0x80); + bool write_fault = !!(entry->src_data[1] & 0x20); uint32_t status = 0, cid = 0, rw = 0; struct amdgpu_task_info task_info; struct amdgpu_vmhub *hub; @@ -534,7 +538,7 @@ static int gmc_v9_0_process_interrupt(struct amdgpu_device *adev, /* Try to handle the recoverable page faults by filling page * tables */ - if (amdgpu_vm_handle_fault(adev, entry->pasid, addr)) + if (amdgpu_vm_handle_fault(adev, entry->pasid, addr, write_fault)) return 1; } @@ -1167,6 +1171,18 @@ static void gmc_v9_0_set_umc_funcs(struct amdgpu_device *adev) adev->umc.channel_idx_tbl = &umc_v6_1_channel_idx_tbl[0][0]; adev->umc.ras_funcs = &umc_v6_1_ras_funcs; break; + case CHIP_ALDEBARAN: + adev->umc.max_ras_err_cnt_per_query = UMC_V6_7_TOTAL_CHANNEL_NUM; + adev->umc.channel_inst_num = UMC_V6_7_CHANNEL_INSTANCE_NUM; + adev->umc.umc_inst_num = UMC_V6_7_UMC_INSTANCE_NUM; + adev->umc.channel_offs = UMC_V6_7_PER_CHANNEL_OFFSET; + if (!adev->gmc.xgmi.connected_to_cpu) + adev->umc.ras_funcs = &umc_v6_7_ras_funcs; + if (1 & adev->smuio.funcs->get_die_id(adev)) + adev->umc.channel_idx_tbl = &umc_v6_7_channel_idx_tbl_first[0][0]; + else + adev->umc.channel_idx_tbl = &umc_v6_7_channel_idx_tbl_second[0][0]; + break; default: break; } @@ -1210,6 +1226,23 @@ static void gmc_v9_0_set_gfxhub_funcs(struct amdgpu_device *adev) adev->gfxhub.funcs = &gfxhub_v1_0_funcs; } +static void gmc_v9_0_set_hdp_ras_funcs(struct amdgpu_device *adev) +{ + adev->hdp.ras_funcs = &hdp_v4_0_ras_funcs; +} + +static void gmc_v9_0_set_mca_funcs(struct amdgpu_device *adev) +{ + switch (adev->asic_type) { + case CHIP_ALDEBARAN: + if (!adev->gmc.xgmi.connected_to_cpu) + adev->mca.funcs = &mca_v3_0_funcs; + break; + default: + break; + } +} + static int gmc_v9_0_early_init(void *handle) { struct amdgpu_device *adev = (struct amdgpu_device *)handle; @@ -1230,6 +1263,8 @@ static int gmc_v9_0_early_init(void *handle) gmc_v9_0_set_mmhub_funcs(adev); gmc_v9_0_set_mmhub_ras_funcs(adev); gmc_v9_0_set_gfxhub_funcs(adev); + gmc_v9_0_set_hdp_ras_funcs(adev); + gmc_v9_0_set_mca_funcs(adev); adev->gmc.shared_aperture_start = 0x2000000000000000ULL; adev->gmc.shared_aperture_end = @@ -1255,15 +1290,21 @@ static int gmc_v9_0_late_init(void *handle) * writes, while disables HBM ECC for vega10. */ if (!amdgpu_sriov_vf(adev) && (adev->asic_type == CHIP_VEGA10)) { - if (!(adev->ras_features & (1 << AMDGPU_RAS_BLOCK__UMC))) { + if (!(adev->ras_enabled & (1 << AMDGPU_RAS_BLOCK__UMC))) { if (adev->df.funcs->enable_ecc_force_par_wr_rmw) adev->df.funcs->enable_ecc_force_par_wr_rmw(adev, false); } } - if (adev->mmhub.ras_funcs && - adev->mmhub.ras_funcs->reset_ras_error_count) - adev->mmhub.ras_funcs->reset_ras_error_count(adev); + if (!amdgpu_persistent_edc_harvesting_supported(adev)) { + if (adev->mmhub.ras_funcs && + adev->mmhub.ras_funcs->reset_ras_error_count) + adev->mmhub.ras_funcs->reset_ras_error_count(adev); + + if (adev->hdp.ras_funcs && + adev->hdp.ras_funcs->reset_ras_error_count) + adev->hdp.ras_funcs->reset_ras_error_count(adev); + } r = amdgpu_gmc_ras_late_init(adev); if (r) @@ -1275,10 +1316,7 @@ static int gmc_v9_0_late_init(void *handle) static void gmc_v9_0_vram_gtt_location(struct amdgpu_device *adev, struct amdgpu_gmc *mc) { - u64 base = 0; - - if (!amdgpu_sriov_vf(adev)) - base = adev->mmhub.funcs->get_fb_location(adev); + u64 base = adev->mmhub.funcs->get_fb_location(adev); /* add the xgmi offset of the physical node */ base += adev->gmc.xgmi.physical_node_id * adev->gmc.xgmi.node_segment_size; @@ -1438,6 +1476,8 @@ static int gmc_v9_0_sw_init(void *handle) adev->gfxhub.funcs->init(adev); adev->mmhub.funcs->init(adev); + if (adev->mca.funcs) + adev->mca.funcs->init(adev); spin_lock_init(&adev->gmc.invalidate_lock); @@ -1602,7 +1642,6 @@ static int gmc_v9_0_sw_fini(void *handle) amdgpu_gart_table_vram_free(adev); amdgpu_bo_unref(&adev->gmc.pdb0_bo); amdgpu_bo_fini(adev); - amdgpu_gart_fini(adev); return 0; } diff --git a/drivers/gpu/drm/amd/amdgpu/hdp_v4_0.c b/drivers/gpu/drm/amd/amdgpu/hdp_v4_0.c index edbd35d293eb..74b90cc2bf48 100644 --- a/drivers/gpu/drm/amd/amdgpu/hdp_v4_0.c +++ b/drivers/gpu/drm/amd/amdgpu/hdp_v4_0.c @@ -59,12 +59,31 @@ static void hdp_v4_0_invalidate_hdp(struct amdgpu_device *adev, HDP, 0, mmHDP_READ_CACHE_INVALIDATE), 1); } +static void hdp_v4_0_query_ras_error_count(struct amdgpu_device *adev, + void *ras_error_status) +{ + struct ras_err_data *err_data = (struct ras_err_data *)ras_error_status; + + err_data->ue_count = 0; + err_data->ce_count = 0; + + if (!amdgpu_ras_is_supported(adev, AMDGPU_RAS_BLOCK__HDP)) + return; + + /* HDP SRAM errors are uncorrectable ones (i.e. fatal errors) */ + err_data->ue_count += RREG32_SOC15(HDP, 0, mmHDP_EDC_CNT); +}; + static void hdp_v4_0_reset_ras_error_count(struct amdgpu_device *adev) { if (!amdgpu_ras_is_supported(adev, AMDGPU_RAS_BLOCK__HDP)) return; - /*read back hdp ras counter to reset it to 0 */ - RREG32_SOC15(HDP, 0, mmHDP_EDC_CNT); + + if (adev->asic_type >= CHIP_ALDEBARAN) + WREG32_SOC15(HDP, 0, mmHDP_EDC_CNT, 0); + else + /*read back hdp ras counter to reset it to 0 */ + RREG32_SOC15(HDP, 0, mmHDP_EDC_CNT); } static void hdp_v4_0_update_clock_gating(struct amdgpu_device *adev, @@ -130,10 +149,16 @@ static void hdp_v4_0_init_registers(struct amdgpu_device *adev) WREG32_SOC15(HDP, 0, mmHDP_NONSURFACE_BASE_HI, (adev->gmc.vram_start >> 40)); } +const struct amdgpu_hdp_ras_funcs hdp_v4_0_ras_funcs = { + .ras_late_init = amdgpu_hdp_ras_late_init, + .ras_fini = amdgpu_hdp_ras_fini, + .query_ras_error_count = hdp_v4_0_query_ras_error_count, + .reset_ras_error_count = hdp_v4_0_reset_ras_error_count, +}; + const struct amdgpu_hdp_funcs hdp_v4_0_funcs = { .flush_hdp = hdp_v4_0_flush_hdp, .invalidate_hdp = hdp_v4_0_invalidate_hdp, - .reset_ras_error_count = hdp_v4_0_reset_ras_error_count, .update_clock_gating = hdp_v4_0_update_clock_gating, .get_clock_gating_state = hdp_v4_0_get_clockgating_state, .init_registers = hdp_v4_0_init_registers, diff --git a/drivers/gpu/drm/amd/amdgpu/hdp_v4_0.h b/drivers/gpu/drm/amd/amdgpu/hdp_v4_0.h index d1e6399e8c46..dc3a1b81dd62 100644 --- a/drivers/gpu/drm/amd/amdgpu/hdp_v4_0.h +++ b/drivers/gpu/drm/amd/amdgpu/hdp_v4_0.h @@ -27,5 +27,6 @@ #include "soc15_common.h" extern const struct amdgpu_hdp_funcs hdp_v4_0_funcs; +extern const struct amdgpu_hdp_ras_funcs hdp_v4_0_ras_funcs; #endif diff --git a/drivers/gpu/drm/amd/amdgpu/hdp_v5_0.c b/drivers/gpu/drm/amd/amdgpu/hdp_v5_0.c index 7a15e669b68d..5793977953cc 100644 --- a/drivers/gpu/drm/amd/amdgpu/hdp_v5_0.c +++ b/drivers/gpu/drm/amd/amdgpu/hdp_v5_0.c @@ -90,45 +90,56 @@ static void hdp_v5_0_update_mem_power_gating(struct amdgpu_device *adev, RC_MEM_POWER_SD_EN, 0); WREG32_SOC15(HDP, 0, mmHDP_MEM_POWER_CTRL, hdp_mem_pwr_cntl); - /* only one clock gating mode (LS/DS/SD) can be enabled */ - if (adev->cg_flags & AMD_CG_SUPPORT_HDP_LS) { - hdp_mem_pwr_cntl = REG_SET_FIELD(hdp_mem_pwr_cntl, - HDP_MEM_POWER_CTRL, - IPH_MEM_POWER_LS_EN, enable); - hdp_mem_pwr_cntl = REG_SET_FIELD(hdp_mem_pwr_cntl, - HDP_MEM_POWER_CTRL, - RC_MEM_POWER_LS_EN, enable); - } else if (adev->cg_flags & AMD_CG_SUPPORT_HDP_DS) { - hdp_mem_pwr_cntl = REG_SET_FIELD(hdp_mem_pwr_cntl, - HDP_MEM_POWER_CTRL, - IPH_MEM_POWER_DS_EN, enable); - hdp_mem_pwr_cntl = REG_SET_FIELD(hdp_mem_pwr_cntl, - HDP_MEM_POWER_CTRL, - RC_MEM_POWER_DS_EN, enable); - } else if (adev->cg_flags & AMD_CG_SUPPORT_HDP_SD) { - hdp_mem_pwr_cntl = REG_SET_FIELD(hdp_mem_pwr_cntl, - HDP_MEM_POWER_CTRL, - IPH_MEM_POWER_SD_EN, enable); - /* RC should not use shut down mode, fallback to ds */ - hdp_mem_pwr_cntl = REG_SET_FIELD(hdp_mem_pwr_cntl, - HDP_MEM_POWER_CTRL, - RC_MEM_POWER_DS_EN, enable); - } - - /* confirmed that IPH_MEM_POWER_CTRL_EN and RC_MEM_POWER_CTRL_EN have to - * be set for SRAM LS/DS/SD */ - if (adev->cg_flags & (AMD_CG_SUPPORT_HDP_LS | AMD_CG_SUPPORT_HDP_DS | - AMD_CG_SUPPORT_HDP_SD)) { - hdp_mem_pwr_cntl = REG_SET_FIELD(hdp_mem_pwr_cntl, HDP_MEM_POWER_CTRL, - IPH_MEM_POWER_CTRL_EN, 1); - hdp_mem_pwr_cntl = REG_SET_FIELD(hdp_mem_pwr_cntl, HDP_MEM_POWER_CTRL, - RC_MEM_POWER_CTRL_EN, 1); + /* Already disabled above. The actions below are for "enabled" only */ + if (enable) { + /* only one clock gating mode (LS/DS/SD) can be enabled */ + if (adev->cg_flags & AMD_CG_SUPPORT_HDP_LS) { + hdp_mem_pwr_cntl = REG_SET_FIELD(hdp_mem_pwr_cntl, + HDP_MEM_POWER_CTRL, + IPH_MEM_POWER_LS_EN, 1); + hdp_mem_pwr_cntl = REG_SET_FIELD(hdp_mem_pwr_cntl, + HDP_MEM_POWER_CTRL, + RC_MEM_POWER_LS_EN, 1); + } else if (adev->cg_flags & AMD_CG_SUPPORT_HDP_DS) { + hdp_mem_pwr_cntl = REG_SET_FIELD(hdp_mem_pwr_cntl, + HDP_MEM_POWER_CTRL, + IPH_MEM_POWER_DS_EN, 1); + hdp_mem_pwr_cntl = REG_SET_FIELD(hdp_mem_pwr_cntl, + HDP_MEM_POWER_CTRL, + RC_MEM_POWER_DS_EN, 1); + } else if (adev->cg_flags & AMD_CG_SUPPORT_HDP_SD) { + hdp_mem_pwr_cntl = REG_SET_FIELD(hdp_mem_pwr_cntl, + HDP_MEM_POWER_CTRL, + IPH_MEM_POWER_SD_EN, 1); + /* RC should not use shut down mode, fallback to ds or ls if allowed */ + if (adev->cg_flags & AMD_CG_SUPPORT_HDP_DS) + hdp_mem_pwr_cntl = REG_SET_FIELD(hdp_mem_pwr_cntl, + HDP_MEM_POWER_CTRL, + RC_MEM_POWER_DS_EN, 1); + else if (adev->cg_flags & AMD_CG_SUPPORT_HDP_LS) + hdp_mem_pwr_cntl = REG_SET_FIELD(hdp_mem_pwr_cntl, + HDP_MEM_POWER_CTRL, + RC_MEM_POWER_LS_EN, 1); + } + + /* confirmed that IPH_MEM_POWER_CTRL_EN and RC_MEM_POWER_CTRL_EN have to + * be set for SRAM LS/DS/SD */ + if (adev->cg_flags & (AMD_CG_SUPPORT_HDP_LS | AMD_CG_SUPPORT_HDP_DS | + AMD_CG_SUPPORT_HDP_SD)) { + hdp_mem_pwr_cntl = REG_SET_FIELD(hdp_mem_pwr_cntl, HDP_MEM_POWER_CTRL, + IPH_MEM_POWER_CTRL_EN, 1); + hdp_mem_pwr_cntl = REG_SET_FIELD(hdp_mem_pwr_cntl, HDP_MEM_POWER_CTRL, + RC_MEM_POWER_CTRL_EN, 1); + WREG32_SOC15(HDP, 0, mmHDP_MEM_POWER_CTRL, hdp_mem_pwr_cntl); + } } - WREG32_SOC15(HDP, 0, mmHDP_MEM_POWER_CTRL, hdp_mem_pwr_cntl); - - /* restore IPH & RC clock override after clock/power mode changing */ - WREG32_SOC15(HDP, 0, mmHDP_CLK_CNTL, hdp_clk_cntl1); + /* disable IPH & RC clock override after clock/power mode changing */ + hdp_clk_cntl = REG_SET_FIELD(hdp_clk_cntl, HDP_CLK_CNTL, + IPH_MEM_CLK_SOFT_OVERRIDE, 0); + hdp_clk_cntl = REG_SET_FIELD(hdp_clk_cntl, HDP_CLK_CNTL, + RC_MEM_CLK_SOFT_OVERRIDE, 0); + WREG32_SOC15(HDP, 0, mmHDP_CLK_CNTL, hdp_clk_cntl); } static void hdp_v5_0_update_medium_grain_clock_gating(struct amdgpu_device *adev, diff --git a/drivers/gpu/drm/amd/amdgpu/iceland_ih.c b/drivers/gpu/drm/amd/amdgpu/iceland_ih.c index cc957471f31e..ddfe4eaeea05 100644 --- a/drivers/gpu/drm/amd/amdgpu/iceland_ih.c +++ b/drivers/gpu/drm/amd/amdgpu/iceland_ih.c @@ -300,8 +300,7 @@ static int iceland_ih_sw_fini(void *handle) { struct amdgpu_device *adev = (struct amdgpu_device *)handle; - amdgpu_irq_fini(adev); - amdgpu_ih_ring_fini(adev, &adev->irq.ih); + amdgpu_irq_fini_sw(adev); amdgpu_irq_remove_domain(adev); return 0; diff --git a/drivers/gpu/drm/amd/amdgpu/jpeg_v3_0.c b/drivers/gpu/drm/amd/amdgpu/jpeg_v3_0.c index bd77794315bc..01c242c5abc3 100644 --- a/drivers/gpu/drm/amd/amdgpu/jpeg_v3_0.c +++ b/drivers/gpu/drm/amd/amdgpu/jpeg_v3_0.c @@ -49,10 +49,13 @@ static int jpeg_v3_0_set_powergating_state(void *handle, static int jpeg_v3_0_early_init(void *handle) { struct amdgpu_device *adev = (struct amdgpu_device *)handle; - u32 harvest = RREG32_SOC15(JPEG, 0, mmCC_UVD_HARVESTING); - if (harvest & CC_UVD_HARVESTING__UVD_DISABLE_MASK) - return -ENOENT; + if (adev->asic_type != CHIP_YELLOW_CARP) { + u32 harvest = RREG32_SOC15(JPEG, 0, mmCC_UVD_HARVESTING); + + if (harvest & CC_UVD_HARVESTING__UVD_DISABLE_MASK) + return -ENOENT; + } adev->jpeg.num_jpeg_inst = 1; diff --git a/drivers/gpu/drm/amd/amdgpu/mca_v3_0.c b/drivers/gpu/drm/amd/amdgpu/mca_v3_0.c new file mode 100644 index 000000000000..058b65730a84 --- /dev/null +++ b/drivers/gpu/drm/amd/amdgpu/mca_v3_0.c @@ -0,0 +1,125 @@ +/* + * Copyright 2021 Advanced Micro Devices, Inc. + * + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and associated documentation files (the "Software"), + * to deal in the Software without restriction, including without limitation + * the rights to use, copy, modify, merge, publish, distribute, sublicense, + * and/or sell copies of the Software, and to permit persons to whom the + * Software is furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included in + * all copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL + * THE COPYRIGHT HOLDER(S) OR AUTHOR(S) BE LIABLE FOR ANY CLAIM, DAMAGES OR + * OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, + * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR + * OTHER DEALINGS IN THE SOFTWARE. + * + */ +#include "amdgpu_ras.h" +#include "amdgpu.h" +#include "amdgpu_mca.h" + +#define smnMCMP0_STATUST0 0x03830408 +#define smnMCMP1_STATUST0 0x03b30408 +#define smnMCMPIO_STATUST0 0x0c930408 + + +static void mca_v3_0_mp0_query_ras_error_count(struct amdgpu_device *adev, + void *ras_error_status) +{ + amdgpu_mca_query_ras_error_count(adev, + smnMCMP0_STATUST0, + ras_error_status); +} + +static int mca_v3_0_mp0_ras_late_init(struct amdgpu_device *adev) +{ + return amdgpu_mca_ras_late_init(adev, &adev->mca.mp0); +} + +static void mca_v3_0_mp0_ras_fini(struct amdgpu_device *adev) +{ + amdgpu_mca_ras_fini(adev, &adev->mca.mp0); +} + +const struct amdgpu_mca_ras_funcs mca_v3_0_mp0_ras_funcs = { + .ras_late_init = mca_v3_0_mp0_ras_late_init, + .ras_fini = mca_v3_0_mp0_ras_fini, + .query_ras_error_count = mca_v3_0_mp0_query_ras_error_count, + .query_ras_error_address = NULL, + .ras_block = AMDGPU_RAS_BLOCK__MP0, + .sysfs_name = "mp0_err_count", +}; + +static void mca_v3_0_mp1_query_ras_error_count(struct amdgpu_device *adev, + void *ras_error_status) +{ + amdgpu_mca_query_ras_error_count(adev, + smnMCMP1_STATUST0, + ras_error_status); +} + +static int mca_v3_0_mp1_ras_late_init(struct amdgpu_device *adev) +{ + return amdgpu_mca_ras_late_init(adev, &adev->mca.mp1); +} + +static void mca_v3_0_mp1_ras_fini(struct amdgpu_device *adev) +{ + amdgpu_mca_ras_fini(adev, &adev->mca.mp1); +} + +const struct amdgpu_mca_ras_funcs mca_v3_0_mp1_ras_funcs = { + .ras_late_init = mca_v3_0_mp1_ras_late_init, + .ras_fini = mca_v3_0_mp1_ras_fini, + .query_ras_error_count = mca_v3_0_mp1_query_ras_error_count, + .query_ras_error_address = NULL, + .ras_block = AMDGPU_RAS_BLOCK__MP1, + .sysfs_name = "mp1_err_count", +}; + +static void mca_v3_0_mpio_query_ras_error_count(struct amdgpu_device *adev, + void *ras_error_status) +{ + amdgpu_mca_query_ras_error_count(adev, + smnMCMPIO_STATUST0, + ras_error_status); +} + +static int mca_v3_0_mpio_ras_late_init(struct amdgpu_device *adev) +{ + return amdgpu_mca_ras_late_init(adev, &adev->mca.mpio); +} + +static void mca_v3_0_mpio_ras_fini(struct amdgpu_device *adev) +{ + amdgpu_mca_ras_fini(adev, &adev->mca.mpio); +} + +const struct amdgpu_mca_ras_funcs mca_v3_0_mpio_ras_funcs = { + .ras_late_init = mca_v3_0_mpio_ras_late_init, + .ras_fini = mca_v3_0_mpio_ras_fini, + .query_ras_error_count = mca_v3_0_mpio_query_ras_error_count, + .query_ras_error_address = NULL, + .ras_block = AMDGPU_RAS_BLOCK__MPIO, + .sysfs_name = "mpio_err_count", +}; + + +static void mca_v3_0_init(struct amdgpu_device *adev) +{ + struct amdgpu_mca *mca = &adev->mca; + + mca->mp0.ras_funcs = &mca_v3_0_mp0_ras_funcs; + mca->mp1.ras_funcs = &mca_v3_0_mp1_ras_funcs; + mca->mpio.ras_funcs = &mca_v3_0_mpio_ras_funcs; +} + +const struct amdgpu_mca_funcs mca_v3_0_funcs = { + .init = mca_v3_0_init, +};
\ No newline at end of file diff --git a/drivers/gpu/drm/amd/amdgpu/mca_v3_0.h b/drivers/gpu/drm/amd/amdgpu/mca_v3_0.h new file mode 100644 index 000000000000..b899b86194c2 --- /dev/null +++ b/drivers/gpu/drm/amd/amdgpu/mca_v3_0.h @@ -0,0 +1,26 @@ +/* + * Copyright (C) 2021 Advanced Micro Devices, Inc. + * + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and associated documentation files (the "Software"), + * to deal in the Software without restriction, including without limitation + * the rights to use, copy, modify, merge, publish, distribute, sublicense, + * and/or sell copies of the Software, and to permit persons to whom the + * Software is furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included + * in all copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS + * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL + * THE COPYRIGHT HOLDER(S) BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN + * AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN + * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. + */ +#ifndef __MCA_V3_0_H__ +#define __MCA_V3_0_H__ + +extern const struct amdgpu_mca_funcs mca_v3_0_funcs; + +#endif diff --git a/drivers/gpu/drm/amd/amdgpu/mmhub_v1_7.c b/drivers/gpu/drm/amd/amdgpu/mmhub_v1_7.c index 0103a5ab28e6..f80a14a1b82d 100644 --- a/drivers/gpu/drm/amd/amdgpu/mmhub_v1_7.c +++ b/drivers/gpu/drm/amd/amdgpu/mmhub_v1_7.c @@ -111,6 +111,9 @@ static void mmhub_v1_7_init_system_aperture_regs(struct amdgpu_device *adev) WREG32_SOC15(MMHUB, 0, regMC_VM_AGP_BOT, adev->gmc.agp_start >> 24); WREG32_SOC15(MMHUB, 0, regMC_VM_AGP_TOP, adev->gmc.agp_end >> 24); + if (amdgpu_sriov_vf(adev)) + return; + /* Program the system aperture low logical page number. */ WREG32_SOC15(MMHUB, 0, regMC_VM_SYSTEM_APERTURE_LOW_ADDR, min(adev->gmc.fb_start, adev->gmc.agp_start) >> 18); @@ -129,8 +132,6 @@ static void mmhub_v1_7_init_system_aperture_regs(struct amdgpu_device *adev) WREG32_SOC15(MMHUB, 0, regMC_VM_SYSTEM_APERTURE_LOW_ADDR, 0x3FFFFFFF); WREG32_SOC15(MMHUB, 0, regMC_VM_SYSTEM_APERTURE_HIGH_ADDR, 0); } - if (amdgpu_sriov_vf(adev)) - return; /* Set default page address. */ value = amdgpu_gmc_vram_mc2pa(adev, adev->vram_scratch.gpu_addr); @@ -296,10 +297,12 @@ static void mmhub_v1_7_setup_vmid_config(struct amdgpu_device *adev) tmp = REG_SET_FIELD(tmp, VM_CONTEXT1_CNTL, PAGE_TABLE_BLOCK_SIZE, block_size); - /* Send no-retry XNACK on fault to suppress VM fault storm. */ + /* On Aldebaran, XNACK can be enabled in the SQ per-process. + * Retry faults need to be enabled for that to work. + */ tmp = REG_SET_FIELD(tmp, VM_CONTEXT1_CNTL, RETRY_PERMISSION_OR_INVALID_PAGE_FAULT, - !adev->gmc.noretry); + 1); WREG32_SOC15_OFFSET(MMHUB, 0, regVM_CONTEXT1_CNTL, i * hub->ctx_distance, tmp); WREG32_SOC15_OFFSET(MMHUB, 0, regVM_CONTEXT1_PAGE_TABLE_START_ADDR_LO32, @@ -330,18 +333,6 @@ static void mmhub_v1_7_program_invalidation(struct amdgpu_device *adev) static int mmhub_v1_7_gart_enable(struct amdgpu_device *adev) { - if (amdgpu_sriov_vf(adev)) { - /* - * MC_VM_FB_LOCATION_BASE/TOP is NULL for VF, becuase they are - * VF copy registers so vbios post doesn't program them, for - * SRIOV driver need to program them - */ - WREG32_SOC15(MMHUB, 0, regMC_VM_FB_LOCATION_BASE, - adev->gmc.vram_start >> 24); - WREG32_SOC15(MMHUB, 0, regMC_VM_FB_LOCATION_TOP, - adev->gmc.vram_end >> 24); - } - /* GART Enable. */ mmhub_v1_7_init_gart_aperture_regs(adev); mmhub_v1_7_init_system_aperture_regs(adev); @@ -1313,12 +1304,31 @@ static void mmhub_v1_7_query_ras_error_status(struct amdgpu_device *adev) } } +static void mmhub_v1_7_reset_ras_error_status(struct amdgpu_device *adev) +{ + int i; + uint32_t reg_value; + + if (!amdgpu_ras_is_supported(adev, AMDGPU_RAS_BLOCK__MMHUB)) + return; + + for (i = 0; i < ARRAY_SIZE(mmhub_v1_7_ea_err_status_regs); i++) { + reg_value = RREG32(SOC15_REG_ENTRY_OFFSET( + mmhub_v1_7_ea_err_status_regs[i])); + reg_value = REG_SET_FIELD(reg_value, MMEA0_ERR_STATUS, + CLEAR_ERROR_STATUS, 0x01); + WREG32(SOC15_REG_ENTRY_OFFSET(mmhub_v1_7_ea_err_status_regs[i]), + reg_value); + } +} + const struct amdgpu_mmhub_ras_funcs mmhub_v1_7_ras_funcs = { .ras_late_init = amdgpu_mmhub_ras_late_init, .ras_fini = amdgpu_mmhub_ras_fini, .query_ras_error_count = mmhub_v1_7_query_ras_error_count, .reset_ras_error_count = mmhub_v1_7_reset_ras_error_count, .query_ras_error_status = mmhub_v1_7_query_ras_error_status, + .reset_ras_error_status = mmhub_v1_7_reset_ras_error_status, }; const struct amdgpu_mmhub_funcs mmhub_v1_7_funcs = { diff --git a/drivers/gpu/drm/amd/amdgpu/mmhub_v2_0.c b/drivers/gpu/drm/amd/amdgpu/mmhub_v2_0.c index ac76081b91d5..7ded6b2f058e 100644 --- a/drivers/gpu/drm/amd/amdgpu/mmhub_v2_0.c +++ b/drivers/gpu/drm/amd/amdgpu/mmhub_v2_0.c @@ -29,6 +29,7 @@ #include "mmhub/mmhub_2_0_0_default.h" #include "navi10_enum.h" +#include "gc/gc_10_1_0_offset.h" #include "soc15_common.h" #define mmMM_ATC_L2_MISC_CG_Sienna_Cichlid 0x064d @@ -93,6 +94,30 @@ static const char *mmhub_client_ids_sienna_cichlid[][2] = { [15][1] = "OSS", }; +static const char *mmhub_client_ids_beige_goby[][2] = { + [3][0] = "DCEDMC", + [4][0] = "DCEVGA", + [5][0] = "MP0", + [6][0] = "MP1", + [8][0] = "VMC", + [9][0] = "VCNU0", + [11][0] = "VCN0", + [14][0] = "HDP", + [15][0] = "OSS", + [0][1] = "DBGU0", + [1][1] = "DBGU1", + [2][1] = "DCEDWB", + [3][1] = "DCEDMC", + [4][1] = "DCEVGA", + [5][1] = "MP0", + [6][1] = "MP1", + [7][1] = "XDP", + [9][1] = "VCNU0", + [11][1] = "VCN0", + [14][1] = "HDP", + [15][1] = "OSS", +}; + static uint32_t mmhub_v2_0_get_invalidate_req(unsigned int vmid, uint32_t flush_type) { @@ -139,6 +164,9 @@ mmhub_v2_0_print_l2_protection_fault_status(struct amdgpu_device *adev, case CHIP_DIMGREY_CAVEFISH: mmhub_cid = mmhub_client_ids_sienna_cichlid[cid][rw]; break; + case CHIP_BEIGE_GOBY: + mmhub_cid = mmhub_client_ids_beige_goby[cid][rw]; + break; default: mmhub_cid = NULL; break; @@ -165,11 +193,11 @@ static void mmhub_v2_0_setup_vm_pt_regs(struct amdgpu_device *adev, uint32_t vmi { struct amdgpu_vmhub *hub = &adev->vmhub[AMDGPU_MMHUB_0]; - WREG32_SOC15_OFFSET(MMHUB, 0, mmMMVM_CONTEXT0_PAGE_TABLE_BASE_ADDR_LO32, + WREG32_SOC15_OFFSET_RLC(MMHUB, 0, mmMMVM_CONTEXT0_PAGE_TABLE_BASE_ADDR_LO32, hub->ctx_addr_distance * vmid, lower_32_bits(page_table_base)); - WREG32_SOC15_OFFSET(MMHUB, 0, mmMMVM_CONTEXT0_PAGE_TABLE_BASE_ADDR_HI32, + WREG32_SOC15_OFFSET_RLC(MMHUB, 0, mmMMVM_CONTEXT0_PAGE_TABLE_BASE_ADDR_HI32, hub->ctx_addr_distance * vmid, upper_32_bits(page_table_base)); } @@ -180,14 +208,14 @@ static void mmhub_v2_0_init_gart_aperture_regs(struct amdgpu_device *adev) mmhub_v2_0_setup_vm_pt_regs(adev, 0, pt_base); - WREG32_SOC15(MMHUB, 0, mmMMVM_CONTEXT0_PAGE_TABLE_START_ADDR_LO32, + WREG32_SOC15_RLC(MMHUB, 0, mmMMVM_CONTEXT0_PAGE_TABLE_START_ADDR_LO32, (u32)(adev->gmc.gart_start >> 12)); - WREG32_SOC15(MMHUB, 0, mmMMVM_CONTEXT0_PAGE_TABLE_START_ADDR_HI32, + WREG32_SOC15_RLC(MMHUB, 0, mmMMVM_CONTEXT0_PAGE_TABLE_START_ADDR_HI32, (u32)(adev->gmc.gart_start >> 44)); - WREG32_SOC15(MMHUB, 0, mmMMVM_CONTEXT0_PAGE_TABLE_END_ADDR_LO32, + WREG32_SOC15_RLC(MMHUB, 0, mmMMVM_CONTEXT0_PAGE_TABLE_END_ADDR_LO32, (u32)(adev->gmc.gart_end >> 12)); - WREG32_SOC15(MMHUB, 0, mmMMVM_CONTEXT0_PAGE_TABLE_END_ADDR_HI32, + WREG32_SOC15_RLC(MMHUB, 0, mmMMVM_CONTEXT0_PAGE_TABLE_END_ADDR_HI32, (u32)(adev->gmc.gart_end >> 44)); } @@ -196,12 +224,12 @@ static void mmhub_v2_0_init_system_aperture_regs(struct amdgpu_device *adev) uint64_t value; uint32_t tmp; - /* Program the AGP BAR */ - WREG32_SOC15(MMHUB, 0, mmMMMC_VM_AGP_BASE, 0); - WREG32_SOC15(MMHUB, 0, mmMMMC_VM_AGP_BOT, adev->gmc.agp_start >> 24); - WREG32_SOC15(MMHUB, 0, mmMMMC_VM_AGP_TOP, adev->gmc.agp_end >> 24); - if (!amdgpu_sriov_vf(adev)) { + /* Program the AGP BAR */ + WREG32_SOC15_RLC(MMHUB, 0, mmMMMC_VM_AGP_BASE, 0); + WREG32_SOC15_RLC(MMHUB, 0, mmMMMC_VM_AGP_BOT, adev->gmc.agp_start >> 24); + WREG32_SOC15_RLC(MMHUB, 0, mmMMMC_VM_AGP_TOP, adev->gmc.agp_end >> 24); + /* Program the system aperture low logical page number. */ WREG32_SOC15(MMHUB, 0, mmMMMC_VM_SYSTEM_APERTURE_LOW_ADDR, min(adev->gmc.fb_start, adev->gmc.agp_start) >> 18); @@ -308,7 +336,7 @@ static void mmhub_v2_0_enable_system_domain(struct amdgpu_device *adev) tmp = REG_SET_FIELD(tmp, MMVM_CONTEXT0_CNTL, PAGE_TABLE_DEPTH, 0); tmp = REG_SET_FIELD(tmp, MMVM_CONTEXT0_CNTL, RETRY_PERMISSION_OR_INVALID_PAGE_FAULT, 0); - WREG32_SOC15(MMHUB, 0, mmMMVM_CONTEXT0_CNTL, tmp); + WREG32_SOC15_RLC(MMHUB, 0, mmMMVM_CONTEXT0_CNTL, tmp); } static void mmhub_v2_0_disable_identity_aperture(struct amdgpu_device *adev) @@ -370,16 +398,16 @@ static void mmhub_v2_0_setup_vmid_config(struct amdgpu_device *adev) tmp = REG_SET_FIELD(tmp, MMVM_CONTEXT1_CNTL, RETRY_PERMISSION_OR_INVALID_PAGE_FAULT, !adev->gmc.noretry); - WREG32_SOC15_OFFSET(MMHUB, 0, mmMMVM_CONTEXT1_CNTL, + WREG32_SOC15_OFFSET_RLC(MMHUB, 0, mmMMVM_CONTEXT1_CNTL, i * hub->ctx_distance, tmp); - WREG32_SOC15_OFFSET(MMHUB, 0, mmMMVM_CONTEXT1_PAGE_TABLE_START_ADDR_LO32, + WREG32_SOC15_OFFSET_RLC(MMHUB, 0, mmMMVM_CONTEXT1_PAGE_TABLE_START_ADDR_LO32, i * hub->ctx_addr_distance, 0); - WREG32_SOC15_OFFSET(MMHUB, 0, mmMMVM_CONTEXT1_PAGE_TABLE_START_ADDR_HI32, + WREG32_SOC15_OFFSET_RLC(MMHUB, 0, mmMMVM_CONTEXT1_PAGE_TABLE_START_ADDR_HI32, i * hub->ctx_addr_distance, 0); - WREG32_SOC15_OFFSET(MMHUB, 0, mmMMVM_CONTEXT1_PAGE_TABLE_END_ADDR_LO32, + WREG32_SOC15_OFFSET_RLC(MMHUB, 0, mmMMVM_CONTEXT1_PAGE_TABLE_END_ADDR_LO32, i * hub->ctx_addr_distance, lower_32_bits(adev->vm_manager.max_pfn - 1)); - WREG32_SOC15_OFFSET(MMHUB, 0, mmMMVM_CONTEXT1_PAGE_TABLE_END_ADDR_HI32, + WREG32_SOC15_OFFSET_RLC(MMHUB, 0, mmMMVM_CONTEXT1_PAGE_TABLE_END_ADDR_HI32, i * hub->ctx_addr_distance, upper_32_bits(adev->vm_manager.max_pfn - 1)); } @@ -391,9 +419,9 @@ static void mmhub_v2_0_program_invalidation(struct amdgpu_device *adev) unsigned i; for (i = 0; i < 18; ++i) { - WREG32_SOC15_OFFSET(MMHUB, 0, mmMMVM_INVALIDATE_ENG0_ADDR_RANGE_LO32, + WREG32_SOC15_OFFSET_RLC(MMHUB, 0, mmMMVM_INVALIDATE_ENG0_ADDR_RANGE_LO32, i * hub->eng_addr_distance, 0xffffffff); - WREG32_SOC15_OFFSET(MMHUB, 0, mmMMVM_INVALIDATE_ENG0_ADDR_RANGE_HI32, + WREG32_SOC15_OFFSET_RLC(MMHUB, 0, mmMMVM_INVALIDATE_ENG0_ADDR_RANGE_HI32, i * hub->eng_addr_distance, 0x1f); } } @@ -422,7 +450,7 @@ static void mmhub_v2_0_gart_disable(struct amdgpu_device *adev) /* Disable all tables */ for (i = 0; i < AMDGPU_NUM_VMID; i++) - WREG32_SOC15_OFFSET(MMHUB, 0, mmMMVM_CONTEXT0_CNTL, + WREG32_SOC15_OFFSET_RLC(MMHUB, 0, mmMMVM_CONTEXT0_CNTL, i * hub->ctx_distance, 0); /* Setup TLB control */ @@ -540,10 +568,14 @@ static void mmhub_v2_0_update_medium_grain_clock_gating(struct amdgpu_device *ad { uint32_t def, data, def1, data1; + if (!(adev->cg_flags & AMD_CG_SUPPORT_MC_MGCG)) + return; + switch (adev->asic_type) { case CHIP_SIENNA_CICHLID: case CHIP_NAVY_FLOUNDER: case CHIP_DIMGREY_CAVEFISH: + case CHIP_BEIGE_GOBY: def = data = RREG32_SOC15(MMHUB, 0, mmMM_ATC_L2_MISC_CG_Sienna_Cichlid); def1 = data1 = RREG32_SOC15(MMHUB, 0, mmDAGB0_CNTL_MISC2_Sienna_Cichlid); break; @@ -553,7 +585,7 @@ static void mmhub_v2_0_update_medium_grain_clock_gating(struct amdgpu_device *ad break; } - if (enable && (adev->cg_flags & AMD_CG_SUPPORT_MC_MGCG)) { + if (enable) { data |= MM_ATC_L2_MISC_CG__ENABLE_MASK; data1 &= ~(DAGB0_CNTL_MISC2__DISABLE_WRREQ_CG_MASK | @@ -578,6 +610,7 @@ static void mmhub_v2_0_update_medium_grain_clock_gating(struct amdgpu_device *ad case CHIP_SIENNA_CICHLID: case CHIP_NAVY_FLOUNDER: case CHIP_DIMGREY_CAVEFISH: + case CHIP_BEIGE_GOBY: if (def != data) WREG32_SOC15(MMHUB, 0, mmMM_ATC_L2_MISC_CG_Sienna_Cichlid, data); if (def1 != data1) @@ -597,10 +630,14 @@ static void mmhub_v2_0_update_medium_grain_light_sleep(struct amdgpu_device *ade { uint32_t def, data; + if (!(adev->cg_flags & AMD_CG_SUPPORT_MC_LS)) + return; + switch (adev->asic_type) { case CHIP_SIENNA_CICHLID: case CHIP_NAVY_FLOUNDER: case CHIP_DIMGREY_CAVEFISH: + case CHIP_BEIGE_GOBY: def = data = RREG32_SOC15(MMHUB, 0, mmMM_ATC_L2_MISC_CG_Sienna_Cichlid); break; default: @@ -608,7 +645,7 @@ static void mmhub_v2_0_update_medium_grain_light_sleep(struct amdgpu_device *ade break; } - if (enable && (adev->cg_flags & AMD_CG_SUPPORT_MC_LS)) + if (enable) data |= MM_ATC_L2_MISC_CG__MEM_LS_ENABLE_MASK; else data &= ~MM_ATC_L2_MISC_CG__MEM_LS_ENABLE_MASK; @@ -618,6 +655,7 @@ static void mmhub_v2_0_update_medium_grain_light_sleep(struct amdgpu_device *ade case CHIP_SIENNA_CICHLID: case CHIP_NAVY_FLOUNDER: case CHIP_DIMGREY_CAVEFISH: + case CHIP_BEIGE_GOBY: WREG32_SOC15(MMHUB, 0, mmMM_ATC_L2_MISC_CG_Sienna_Cichlid, data); break; default: @@ -640,6 +678,7 @@ static int mmhub_v2_0_set_clockgating(struct amdgpu_device *adev, case CHIP_SIENNA_CICHLID: case CHIP_NAVY_FLOUNDER: case CHIP_DIMGREY_CAVEFISH: + case CHIP_BEIGE_GOBY: mmhub_v2_0_update_medium_grain_clock_gating(adev, state == AMD_CG_STATE_GATE); mmhub_v2_0_update_medium_grain_light_sleep(adev, @@ -663,6 +702,7 @@ static void mmhub_v2_0_get_clockgating(struct amdgpu_device *adev, u32 *flags) case CHIP_SIENNA_CICHLID: case CHIP_NAVY_FLOUNDER: case CHIP_DIMGREY_CAVEFISH: + case CHIP_BEIGE_GOBY: data = RREG32_SOC15(MMHUB, 0, mmMM_ATC_L2_MISC_CG_Sienna_Cichlid); data1 = RREG32_SOC15(MMHUB, 0, mmDAGB0_CNTL_MISC2_Sienna_Cichlid); break; diff --git a/drivers/gpu/drm/amd/amdgpu/mmhub_v2_3.c b/drivers/gpu/drm/amd/amdgpu/mmhub_v2_3.c index a9899335d0b1..88e457a150e0 100644 --- a/drivers/gpu/drm/amd/amdgpu/mmhub_v2_3.c +++ b/drivers/gpu/drm/amd/amdgpu/mmhub_v2_3.c @@ -92,6 +92,7 @@ mmhub_v2_3_print_l2_protection_fault_status(struct amdgpu_device *adev, status); switch (adev->asic_type) { case CHIP_VANGOGH: + case CHIP_YELLOW_CARP: mmhub_cid = mmhub_client_ids_vangogh[cid][rw]; break; default: @@ -569,9 +570,9 @@ static int mmhub_v2_3_set_clockgating(struct amdgpu_device *adev, return 0; mmhub_v2_3_update_medium_grain_clock_gating(adev, - state == AMD_CG_STATE_GATE ? true : false); + state == AMD_CG_STATE_GATE); mmhub_v2_3_update_medium_grain_light_sleep(adev, - state == AMD_CG_STATE_GATE ? true : false); + state == AMD_CG_STATE_GATE); return 0; } diff --git a/drivers/gpu/drm/amd/amdgpu/mmhub_v9_4.c b/drivers/gpu/drm/amd/amdgpu/mmhub_v9_4.c index 47c8dd9d1c78..c4ef822bbe8c 100644 --- a/drivers/gpu/drm/amd/amdgpu/mmhub_v9_4.c +++ b/drivers/gpu/drm/amd/amdgpu/mmhub_v9_4.c @@ -436,7 +436,7 @@ static void mmhub_v9_4_gart_disable(struct amdgpu_device *adev) } /** - * mmhub_v1_0_set_fault_enable_default - update GART/VM fault handling + * mmhub_v9_4_set_fault_enable_default - update GART/VM fault handling * * @adev: amdgpu_device pointer * @value: true redirects VM faults to the default page diff --git a/drivers/gpu/drm/amd/amdgpu/mmsch_v1_0.h b/drivers/gpu/drm/amd/amdgpu/mmsch_v1_0.h index 20958639b601..2cdab8062c86 100644 --- a/drivers/gpu/drm/amd/amdgpu/mmsch_v1_0.h +++ b/drivers/gpu/drm/amd/amdgpu/mmsch_v1_0.h @@ -24,9 +24,7 @@ #ifndef __MMSCH_V1_0_H__ #define __MMSCH_V1_0_H__ -#define MMSCH_VERSION_MAJOR 1 -#define MMSCH_VERSION_MINOR 0 -#define MMSCH_VERSION (MMSCH_VERSION_MAJOR << 16 | MMSCH_VERSION_MINOR) +#define MMSCH_VERSION 0x1 enum mmsch_v1_0_command_type { MMSCH_COMMAND__DIRECT_REG_WRITE = 0, diff --git a/drivers/gpu/drm/amd/amdgpu/mxgpu_ai.c b/drivers/gpu/drm/amd/amdgpu/mxgpu_ai.c index 3ee481557fc9..23b066bcffb2 100644 --- a/drivers/gpu/drm/amd/amdgpu/mxgpu_ai.c +++ b/drivers/gpu/drm/amd/amdgpu/mxgpu_ai.c @@ -252,12 +252,14 @@ static void xgpu_ai_mailbox_flr_work(struct work_struct *work) * otherwise the mailbox msg will be ruined/reseted by * the VF FLR. */ - if (!down_read_trylock(&adev->reset_sem)) + if (!down_write_trylock(&adev->reset_sem)) return; amdgpu_virt_fini_data_exchange(adev); atomic_set(&adev->in_gpu_reset, 1); + xgpu_ai_mailbox_trans_msg(adev, IDH_READY_TO_RESET, 0, 0, 0); + do { if (xgpu_ai_mailbox_peek_msg(adev) == IDH_FLR_NOTIFICATION_CMPL) goto flr_done; @@ -268,7 +270,7 @@ static void xgpu_ai_mailbox_flr_work(struct work_struct *work) flr_done: atomic_set(&adev->in_gpu_reset, 0); - up_read(&adev->reset_sem); + up_write(&adev->reset_sem); /* Trigger recovery for world switch failure if no TDR */ if (amdgpu_device_should_recover_gpu(adev) diff --git a/drivers/gpu/drm/amd/amdgpu/mxgpu_ai.h b/drivers/gpu/drm/amd/amdgpu/mxgpu_ai.h index 50572635d0f8..bd3b23171579 100644 --- a/drivers/gpu/drm/amd/amdgpu/mxgpu_ai.h +++ b/drivers/gpu/drm/amd/amdgpu/mxgpu_ai.h @@ -37,6 +37,7 @@ enum idh_request { IDH_REQ_GPU_RESET_ACCESS, IDH_LOG_VF_ERROR = 200, + IDH_READY_TO_RESET = 201, }; enum idh_event { diff --git a/drivers/gpu/drm/amd/amdgpu/mxgpu_nv.c b/drivers/gpu/drm/amd/amdgpu/mxgpu_nv.c index 48e588d3c409..a35e6d87e537 100644 --- a/drivers/gpu/drm/amd/amdgpu/mxgpu_nv.c +++ b/drivers/gpu/drm/amd/amdgpu/mxgpu_nv.c @@ -96,7 +96,11 @@ static int xgpu_nv_poll_ack(struct amdgpu_device *adev) static int xgpu_nv_poll_msg(struct amdgpu_device *adev, enum idh_event event) { - int r, timeout = NV_MAILBOX_POLL_MSG_TIMEDOUT; + int r; + uint64_t timeout, now; + + now = (uint64_t)ktime_to_ms(ktime_get()); + timeout = now + NV_MAILBOX_POLL_MSG_TIMEDOUT; do { r = xgpu_nv_mailbox_rcv_msg(adev, event); @@ -104,8 +108,8 @@ static int xgpu_nv_poll_msg(struct amdgpu_device *adev, enum idh_event event) return 0; msleep(10); - timeout -= 10; - } while (timeout > 1); + now = (uint64_t)ktime_to_ms(ktime_get()); + } while (timeout > now); return -ETIME; @@ -149,9 +153,10 @@ static void xgpu_nv_mailbox_trans_msg (struct amdgpu_device *adev, static int xgpu_nv_send_access_requests(struct amdgpu_device *adev, enum idh_request req) { - int r; + int r, retry = 1; enum idh_event event = -1; +send_request: xgpu_nv_mailbox_trans_msg(adev, req, 0, 0, 0); switch (req) { @@ -170,6 +175,9 @@ static int xgpu_nv_send_access_requests(struct amdgpu_device *adev, if (event != -1) { r = xgpu_nv_poll_msg(adev, event); if (r) { + if (retry++ < 2) + goto send_request; + if (req != IDH_REQ_GPU_INIT_DATA) { pr_err("Doesn't get msg:%d from pf, error=%d\n", event, r); return r; @@ -273,12 +281,14 @@ static void xgpu_nv_mailbox_flr_work(struct work_struct *work) * otherwise the mailbox msg will be ruined/reseted by * the VF FLR. */ - if (!down_read_trylock(&adev->reset_sem)) + if (!down_write_trylock(&adev->reset_sem)) return; amdgpu_virt_fini_data_exchange(adev); atomic_set(&adev->in_gpu_reset, 1); + xgpu_nv_mailbox_trans_msg(adev, IDH_READY_TO_RESET, 0, 0, 0); + do { if (xgpu_nv_mailbox_peek_msg(adev) == IDH_FLR_NOTIFICATION_CMPL) goto flr_done; @@ -289,7 +299,7 @@ static void xgpu_nv_mailbox_flr_work(struct work_struct *work) flr_done: atomic_set(&adev->in_gpu_reset, 0); - up_read(&adev->reset_sem); + up_write(&adev->reset_sem); /* Trigger recovery for world switch failure if no TDR */ if (amdgpu_device_should_recover_gpu(adev) diff --git a/drivers/gpu/drm/amd/amdgpu/mxgpu_nv.h b/drivers/gpu/drm/amd/amdgpu/mxgpu_nv.h index 9f5808616174..73887b0aa1d6 100644 --- a/drivers/gpu/drm/amd/amdgpu/mxgpu_nv.h +++ b/drivers/gpu/drm/amd/amdgpu/mxgpu_nv.h @@ -37,7 +37,8 @@ enum idh_request { IDH_REQ_GPU_RESET_ACCESS, IDH_REQ_GPU_INIT_DATA, - IDH_LOG_VF_ERROR = 200, + IDH_LOG_VF_ERROR = 200, + IDH_READY_TO_RESET = 201, }; enum idh_event { diff --git a/drivers/gpu/drm/amd/amdgpu/navi10_ih.c b/drivers/gpu/drm/amd/amdgpu/navi10_ih.c index f4e4040bbd25..530011622801 100644 --- a/drivers/gpu/drm/amd/amdgpu/navi10_ih.c +++ b/drivers/gpu/drm/amd/amdgpu/navi10_ih.c @@ -120,11 +120,23 @@ force_update_wptr_for_self_int(struct amdgpu_device *adev, ih_rb_cntl = REG_SET_FIELD(ih_rb_cntl, IH_RB_CNTL_RING1, RB_USED_INT_THRESHOLD, threshold); - WREG32_SOC15(OSSSYS, 0, mmIH_RB_CNTL_RING1, ih_rb_cntl); + if (amdgpu_sriov_vf(adev) && amdgpu_sriov_reg_indirect_ih(adev)) { + if (psp_reg_program(&adev->psp, PSP_REG_IH_RB_CNTL_RING1, ih_rb_cntl)) + return; + } else { + WREG32_SOC15(OSSSYS, 0, mmIH_RB_CNTL_RING1, ih_rb_cntl); + } + ih_rb_cntl = RREG32_SOC15(OSSSYS, 0, mmIH_RB_CNTL_RING2); ih_rb_cntl = REG_SET_FIELD(ih_rb_cntl, IH_RB_CNTL_RING2, RB_USED_INT_THRESHOLD, threshold); - WREG32_SOC15(OSSSYS, 0, mmIH_RB_CNTL_RING2, ih_rb_cntl); + if (amdgpu_sriov_vf(adev) && amdgpu_sriov_reg_indirect_ih(adev)) { + if (psp_reg_program(&adev->psp, PSP_REG_IH_RB_CNTL_RING2, ih_rb_cntl)) + return; + } else { + WREG32_SOC15(OSSSYS, 0, mmIH_RB_CNTL_RING2, ih_rb_cntl); + } + WREG32_SOC15(OSSSYS, 0, mmIH_CNTL2, ih_cntl); } @@ -151,7 +163,13 @@ static int navi10_ih_toggle_ring_interrupts(struct amdgpu_device *adev, /* enable_intr field is only valid in ring0 */ if (ih == &adev->irq.ih) tmp = REG_SET_FIELD(tmp, IH_RB_CNTL, ENABLE_INTR, (enable ? 1 : 0)); - WREG32(ih_regs->ih_rb_cntl, tmp); + + if (amdgpu_sriov_vf(adev) && amdgpu_sriov_reg_indirect_ih(adev)) { + if (psp_reg_program(&adev->psp, ih_regs->psp_reg_id, tmp)) + return -ETIMEDOUT; + } else { + WREG32(ih_regs->ih_rb_cntl, tmp); + } if (enable) { ih->enabled = true; @@ -261,7 +279,15 @@ static int navi10_ih_enable_ring(struct amdgpu_device *adev, tmp = REG_SET_FIELD(tmp, IH_RB_CNTL, WPTR_OVERFLOW_ENABLE, 0); tmp = REG_SET_FIELD(tmp, IH_RB_CNTL, RB_FULL_DRAIN_ENABLE, 1); } - WREG32(ih_regs->ih_rb_cntl, tmp); + + if (amdgpu_sriov_vf(adev) && amdgpu_sriov_reg_indirect_ih(adev)) { + if (psp_reg_program(&adev->psp, ih_regs->psp_reg_id, tmp)) { + DRM_ERROR("PSP program IH_RB_CNTL failed!\n"); + return -ETIMEDOUT; + } + } else { + WREG32(ih_regs->ih_rb_cntl, tmp); + } if (ih == &adev->irq.ih) { /* set the ih ring 0 writeback address whether it's enabled or not */ @@ -311,6 +337,8 @@ static int navi10_ih_irq_init(struct amdgpu_device *adev) case CHIP_NAVY_FLOUNDER: case CHIP_VANGOGH: case CHIP_DIMGREY_CAVEFISH: + case CHIP_BEIGE_GOBY: + case CHIP_YELLOW_CARP: ih_chicken = RREG32_SOC15(OSSSYS, 0, mmIH_CHICKEN_Sienna_Cichlid); ih_chicken = REG_SET_FIELD(ih_chicken, IH_CHICKEN, MC_SPACE_GPA_ENABLE, 1); @@ -569,11 +597,7 @@ static int navi10_ih_sw_fini(void *handle) { struct amdgpu_device *adev = (struct amdgpu_device *)handle; - amdgpu_irq_fini(adev); - amdgpu_ih_ring_fini(adev, &adev->irq.ih_soft); - amdgpu_ih_ring_fini(adev, &adev->irq.ih2); - amdgpu_ih_ring_fini(adev, &adev->irq.ih1); - amdgpu_ih_ring_fini(adev, &adev->irq.ih); + amdgpu_irq_fini_sw(adev); return 0; } diff --git a/drivers/gpu/drm/amd/amdgpu/nbio_v2_3.c b/drivers/gpu/drm/amd/amdgpu/nbio_v2_3.c index 05ddec7ba7e2..b184b656b9b6 100644 --- a/drivers/gpu/drm/amd/amdgpu/nbio_v2_3.c +++ b/drivers/gpu/drm/amd/amdgpu/nbio_v2_3.c @@ -51,6 +51,8 @@ #define mmBIF_MMSCH1_DOORBELL_RANGE 0x01d8 #define mmBIF_MMSCH1_DOORBELL_RANGE_BASE_IDX 2 +#define smnPCIE_LC_LINK_WIDTH_CNTL 0x11140288 + static void nbio_v2_3_remap_hdp_registers(struct amdgpu_device *adev) { WREG32_SOC15(NBIO, 0, mmREMAP_HDP_MEM_FLUSH_CNTL, @@ -218,8 +220,11 @@ static void nbio_v2_3_update_medium_grain_clock_gating(struct amdgpu_device *ade { uint32_t def, data; + if (!(adev->cg_flags & AMD_CG_SUPPORT_BIF_MGCG)) + return; + def = data = RREG32_PCIE(smnCPM_CONTROL); - if (enable && (adev->cg_flags & AMD_CG_SUPPORT_BIF_MGCG)) { + if (enable) { data |= (CPM_CONTROL__LCLK_DYN_GATE_ENABLE_MASK | CPM_CONTROL__TXCLK_DYN_GATE_ENABLE_MASK | CPM_CONTROL__TXCLK_LCNT_GATE_ENABLE_MASK | @@ -244,8 +249,11 @@ static void nbio_v2_3_update_medium_grain_light_sleep(struct amdgpu_device *adev { uint32_t def, data; + if (!(adev->cg_flags & AMD_CG_SUPPORT_BIF_LS)) + return; + def = data = RREG32_PCIE(smnPCIE_CNTL2); - if (enable && (adev->cg_flags & AMD_CG_SUPPORT_BIF_LS)) { + if (enable) { data |= (PCIE_CNTL2__SLV_MEM_LS_EN_MASK | PCIE_CNTL2__MST_MEM_LS_EN_MASK | PCIE_CNTL2__REPLAY_MEM_LS_EN_MASK); @@ -463,6 +471,63 @@ static void nbio_v2_3_program_aspm(struct amdgpu_device *adev) WREG32_PCIE(smnPCIE_LC_CNTL3, data); } +static void nbio_v2_3_apply_lc_spc_mode_wa(struct amdgpu_device *adev) +{ + uint32_t reg_data = 0; + uint32_t link_width = 0; + + if (!((adev->asic_type >= CHIP_NAVI10) && + (adev->asic_type <= CHIP_NAVI12))) + return; + + reg_data = RREG32_PCIE(smnPCIE_LC_LINK_WIDTH_CNTL); + link_width = (reg_data & PCIE_LC_LINK_WIDTH_CNTL__LC_LINK_WIDTH_RD_MASK) + >> PCIE_LC_LINK_WIDTH_CNTL__LC_LINK_WIDTH_RD__SHIFT; + + /* + * Program PCIE_LC_CNTL6.LC_SPC_MODE_8GT to 0x2 (4 symbols per clock data) + * if link_width is 0x3 (x4) + */ + if (0x3 == link_width) { + reg_data = RREG32_PCIE(smnPCIE_LC_CNTL6); + reg_data &= ~PCIE_LC_CNTL6__LC_SPC_MODE_8GT_MASK; + reg_data |= (0x2 << PCIE_LC_CNTL6__LC_SPC_MODE_8GT__SHIFT); + WREG32_PCIE(smnPCIE_LC_CNTL6, reg_data); + } +} + +static void nbio_v2_3_apply_l1_link_width_reconfig_wa(struct amdgpu_device *adev) +{ + uint32_t reg_data = 0; + + if (adev->asic_type != CHIP_NAVI10) + return; + + reg_data = RREG32_PCIE(smnPCIE_LC_LINK_WIDTH_CNTL); + reg_data |= PCIE_LC_LINK_WIDTH_CNTL__LC_L1_RECONFIG_EN_MASK; + WREG32_PCIE(smnPCIE_LC_LINK_WIDTH_CNTL, reg_data); +} + +static void nbio_v2_3_clear_doorbell_interrupt(struct amdgpu_device *adev) +{ + uint32_t reg, reg_data; + + if (adev->asic_type != CHIP_SIENNA_CICHLID) + return; + + reg = RREG32_SOC15(NBIO, 0, mmBIF_RB_CNTL); + + /* Clear Interrupt Status + */ + if ((reg & BIF_RB_CNTL__RB_ENABLE_MASK) == 0) { + reg = RREG32_SOC15(NBIO, 0, mmBIF_DOORBELL_INT_CNTL); + if (reg & BIF_DOORBELL_INT_CNTL__DOORBELL_INTERRUPT_STATUS_MASK) { + reg_data = 1 << BIF_DOORBELL_INT_CNTL__DOORBELL_INTERRUPT_CLEAR__SHIFT; + WREG32_SOC15(NBIO, 0, mmBIF_DOORBELL_INT_CNTL, reg_data); + } + } +} + const struct amdgpu_nbio_funcs nbio_v2_3_funcs = { .get_hdp_flush_req_offset = nbio_v2_3_get_hdp_flush_req_offset, .get_hdp_flush_done_offset = nbio_v2_3_get_hdp_flush_done_offset, @@ -484,4 +549,7 @@ const struct amdgpu_nbio_funcs nbio_v2_3_funcs = { .remap_hdp_registers = nbio_v2_3_remap_hdp_registers, .enable_aspm = nbio_v2_3_enable_aspm, .program_aspm = nbio_v2_3_program_aspm, + .apply_lc_spc_mode_wa = nbio_v2_3_apply_lc_spc_mode_wa, + .apply_l1_link_width_reconfig_wa = nbio_v2_3_apply_l1_link_width_reconfig_wa, + .clear_doorbell_interrupt = nbio_v2_3_clear_doorbell_interrupt, }; diff --git a/drivers/gpu/drm/amd/amdgpu/nbio_v7_2.c b/drivers/gpu/drm/amd/amdgpu/nbio_v7_2.c index 598ce0e93627..8f2a315e7c73 100644 --- a/drivers/gpu/drm/amd/amdgpu/nbio_v7_2.c +++ b/drivers/gpu/drm/amd/amdgpu/nbio_v7_2.c @@ -28,6 +28,25 @@ #include "nbio/nbio_7_2_0_sh_mask.h" #include <uapi/linux/kfd_ioctl.h> +#define regRCC_STRAP0_RCC_DEV0_EPF0_STRAP0_YC 0x0015 +#define regRCC_STRAP0_RCC_DEV0_EPF0_STRAP0_YC_BASE_IDX 2 +#define regBIF_BX0_BIF_FB_EN_YC 0x0100 +#define regBIF_BX0_BIF_FB_EN_YC_BASE_IDX 2 +#define regBIF1_PCIE_MST_CTRL_3 0x4601c6 +#define regBIF1_PCIE_MST_CTRL_3_BASE_IDX 5 +#define BIF1_PCIE_MST_CTRL_3__CI_SWUS_MAX_READ_REQUEST_SIZE_MODE__SHIFT \ + 0x1b +#define BIF1_PCIE_MST_CTRL_3__CI_SWUS_MAX_READ_REQUEST_SIZE_PRIV__SHIFT \ + 0x1c +#define BIF1_PCIE_MST_CTRL_3__CI_SWUS_MAX_READ_REQUEST_SIZE_MODE_MASK \ + 0x08000000L +#define BIF1_PCIE_MST_CTRL_3__CI_SWUS_MAX_READ_REQUEST_SIZE_PRIV_MASK \ + 0x30000000L +#define regBIF1_PCIE_TX_POWER_CTRL_1 0x460187 +#define regBIF1_PCIE_TX_POWER_CTRL_1_BASE_IDX 5 +#define BIF1_PCIE_TX_POWER_CTRL_1__MST_MEM_LS_EN_MASK 0x00000001L +#define BIF1_PCIE_TX_POWER_CTRL_1__REPLAY_MEM_LS_EN_MASK 0x00000008L + static void nbio_v7_2_remap_hdp_registers(struct amdgpu_device *adev) { WREG32_SOC15(NBIO, 0, regBIF_BX0_REMAP_HDP_MEM_FLUSH_CNTL, @@ -38,7 +57,12 @@ static void nbio_v7_2_remap_hdp_registers(struct amdgpu_device *adev) static u32 nbio_v7_2_get_rev_id(struct amdgpu_device *adev) { - u32 tmp = RREG32_SOC15(NBIO, 0, regRCC_STRAP0_RCC_DEV0_EPF0_STRAP0); + u32 tmp; + + if (adev->asic_type == CHIP_YELLOW_CARP) + tmp = RREG32_SOC15(NBIO, 0, regRCC_STRAP0_RCC_DEV0_EPF0_STRAP0_YC); + else + tmp = RREG32_SOC15(NBIO, 0, regRCC_STRAP0_RCC_DEV0_EPF0_STRAP0); tmp &= RCC_STRAP0_RCC_DEV0_EPF0_STRAP0__STRAP_ATI_REV_ID_DEV0_F0_MASK; tmp >>= RCC_STRAP0_RCC_DEV0_EPF0_STRAP0__STRAP_ATI_REV_ID_DEV0_F0__SHIFT; @@ -49,11 +73,19 @@ static u32 nbio_v7_2_get_rev_id(struct amdgpu_device *adev) static void nbio_v7_2_mc_access_enable(struct amdgpu_device *adev, bool enable) { if (enable) - WREG32_SOC15(NBIO, 0, regBIF_BX0_BIF_FB_EN, - BIF_BX0_BIF_FB_EN__FB_READ_EN_MASK | - BIF_BX0_BIF_FB_EN__FB_WRITE_EN_MASK); + if (adev->asic_type == CHIP_YELLOW_CARP) + WREG32_SOC15(NBIO, 0, regBIF_BX0_BIF_FB_EN_YC, + BIF_BX0_BIF_FB_EN__FB_READ_EN_MASK | + BIF_BX0_BIF_FB_EN__FB_WRITE_EN_MASK); + else + WREG32_SOC15(NBIO, 0, regBIF_BX0_BIF_FB_EN, + BIF_BX0_BIF_FB_EN__FB_READ_EN_MASK | + BIF_BX0_BIF_FB_EN__FB_WRITE_EN_MASK); else - WREG32_SOC15(NBIO, 0, regBIF_BX0_BIF_FB_EN, 0); + if (adev->asic_type == CHIP_YELLOW_CARP) + WREG32_SOC15(NBIO, 0, regBIF_BX0_BIF_FB_EN_YC, 0); + else + WREG32_SOC15(NBIO, 0, regBIF_BX0_BIF_FB_EN, 0); } static u32 nbio_v7_2_get_memsize(struct amdgpu_device *adev) @@ -92,13 +124,13 @@ static void nbio_v7_2_vcn_doorbell_range(struct amdgpu_device *adev, bool use_do if (use_doorbell) { doorbell_range = REG_SET_FIELD(doorbell_range, - GDC0_BIF_VCN0_DOORBELL_RANGE, OFFSET, - doorbell_index); + GDC0_BIF_VCN0_DOORBELL_RANGE, OFFSET, + doorbell_index); doorbell_range = REG_SET_FIELD(doorbell_range, - GDC0_BIF_VCN0_DOORBELL_RANGE, SIZE, 8); + GDC0_BIF_VCN0_DOORBELL_RANGE, SIZE, 8); } else { doorbell_range = REG_SET_FIELD(doorbell_range, - GDC0_BIF_VCN0_DOORBELL_RANGE, SIZE, 0); + GDC0_BIF_VCN0_DOORBELL_RANGE, SIZE, 0); } WREG32_PCIE_PORT(reg, doorbell_range); @@ -123,22 +155,22 @@ static void nbio_v7_2_enable_doorbell_selfring_aperture(struct amdgpu_device *ad if (enable) { tmp = REG_SET_FIELD(tmp, BIF_BX_PF0_DOORBELL_SELFRING_GPA_APER_CNTL, - DOORBELL_SELFRING_GPA_APER_EN, 1) | - REG_SET_FIELD(tmp, BIF_BX_PF0_DOORBELL_SELFRING_GPA_APER_CNTL, - DOORBELL_SELFRING_GPA_APER_MODE, 1) | - REG_SET_FIELD(tmp, BIF_BX_PF0_DOORBELL_SELFRING_GPA_APER_CNTL, - DOORBELL_SELFRING_GPA_APER_SIZE, 0); + DOORBELL_SELFRING_GPA_APER_EN, 1) | + REG_SET_FIELD(tmp, BIF_BX_PF0_DOORBELL_SELFRING_GPA_APER_CNTL, + DOORBELL_SELFRING_GPA_APER_MODE, 1) | + REG_SET_FIELD(tmp, BIF_BX_PF0_DOORBELL_SELFRING_GPA_APER_CNTL, + DOORBELL_SELFRING_GPA_APER_SIZE, 0); WREG32_SOC15(NBIO, 0, - regBIF_BX_PF0_DOORBELL_SELFRING_GPA_APER_BASE_LOW, - lower_32_bits(adev->doorbell.base)); + regBIF_BX_PF0_DOORBELL_SELFRING_GPA_APER_BASE_LOW, + lower_32_bits(adev->doorbell.base)); WREG32_SOC15(NBIO, 0, - regBIF_BX_PF0_DOORBELL_SELFRING_GPA_APER_BASE_HIGH, - upper_32_bits(adev->doorbell.base)); + regBIF_BX_PF0_DOORBELL_SELFRING_GPA_APER_BASE_HIGH, + upper_32_bits(adev->doorbell.base)); } WREG32_SOC15(NBIO, 0, regBIF_BX_PF0_DOORBELL_SELFRING_GPA_APER_CNTL, - tmp); + tmp); } @@ -218,19 +250,42 @@ static void nbio_v7_2_update_medium_grain_light_sleep(struct amdgpu_device *adev { uint32_t def, data; - def = data = RREG32_PCIE_PORT(SOC15_REG_OFFSET(NBIO, 0, regPCIE_CNTL2)); - if (enable && (adev->cg_flags & AMD_CG_SUPPORT_BIF_LS)) { - data |= (PCIE_CNTL2__SLV_MEM_LS_EN_MASK | - PCIE_CNTL2__MST_MEM_LS_EN_MASK | - PCIE_CNTL2__REPLAY_MEM_LS_EN_MASK); + if (adev->asic_type == CHIP_YELLOW_CARP) { + def = data = RREG32_PCIE_PORT(SOC15_REG_OFFSET(NBIO, 0, regPCIE_CNTL2)); + if (enable && (adev->cg_flags & AMD_CG_SUPPORT_BIF_LS)) + data |= PCIE_CNTL2__SLV_MEM_LS_EN_MASK; + else + data &= ~PCIE_CNTL2__SLV_MEM_LS_EN_MASK; + + if (def != data) + WREG32_PCIE_PORT(SOC15_REG_OFFSET(NBIO, 0, regPCIE_CNTL2), data); + + data = RREG32_PCIE_PORT(SOC15_REG_OFFSET(NBIO, 0, regBIF1_PCIE_TX_POWER_CTRL_1)); + def = data; + if (enable && (adev->cg_flags & AMD_CG_SUPPORT_BIF_LS)) + data |= (BIF1_PCIE_TX_POWER_CTRL_1__MST_MEM_LS_EN_MASK | + BIF1_PCIE_TX_POWER_CTRL_1__REPLAY_MEM_LS_EN_MASK); + else + data &= ~(BIF1_PCIE_TX_POWER_CTRL_1__MST_MEM_LS_EN_MASK | + BIF1_PCIE_TX_POWER_CTRL_1__REPLAY_MEM_LS_EN_MASK); + + if (def != data) + WREG32_PCIE_PORT(SOC15_REG_OFFSET(NBIO, 0, regBIF1_PCIE_TX_POWER_CTRL_1), + data); } else { - data &= ~(PCIE_CNTL2__SLV_MEM_LS_EN_MASK | - PCIE_CNTL2__MST_MEM_LS_EN_MASK | - PCIE_CNTL2__REPLAY_MEM_LS_EN_MASK); + def = data = RREG32_PCIE_PORT(SOC15_REG_OFFSET(NBIO, 0, regPCIE_CNTL2)); + if (enable && (adev->cg_flags & AMD_CG_SUPPORT_BIF_LS)) + data |= (PCIE_CNTL2__SLV_MEM_LS_EN_MASK | + PCIE_CNTL2__MST_MEM_LS_EN_MASK | + PCIE_CNTL2__REPLAY_MEM_LS_EN_MASK); + else + data &= ~(PCIE_CNTL2__SLV_MEM_LS_EN_MASK | + PCIE_CNTL2__MST_MEM_LS_EN_MASK | + PCIE_CNTL2__REPLAY_MEM_LS_EN_MASK); + + if (def != data) + WREG32_PCIE_PORT(SOC15_REG_OFFSET(NBIO, 0, regPCIE_CNTL2), data); } - - if (def != data) - WREG32_PCIE_PORT(SOC15_REG_OFFSET(NBIO, 0, regPCIE_CNTL2), data); } static void nbio_v7_2_get_clockgating_state(struct amdgpu_device *adev, @@ -297,14 +352,25 @@ const struct nbio_hdp_flush_reg nbio_v7_2_hdp_flush_reg = { static void nbio_v7_2_init_registers(struct amdgpu_device *adev) { uint32_t def, data; - - def = data = RREG32_PCIE_PORT(SOC15_REG_OFFSET(NBIO, 0, regPCIE_CONFIG_CNTL)); - data = REG_SET_FIELD(data, PCIE_CONFIG_CNTL, CI_SWUS_MAX_READ_REQUEST_SIZE_MODE, 1); - data = REG_SET_FIELD(data, PCIE_CONFIG_CNTL, CI_SWUS_MAX_READ_REQUEST_SIZE_PRIV, 1); - - if (def != data) - WREG32_PCIE_PORT(SOC15_REG_OFFSET(NBIO, 0, regPCIE_CONFIG_CNTL), - data); + if (adev->asic_type == CHIP_YELLOW_CARP) { + def = data = RREG32_PCIE_PORT(SOC15_REG_OFFSET(NBIO, 0, regBIF1_PCIE_MST_CTRL_3)); + data = REG_SET_FIELD(data, BIF1_PCIE_MST_CTRL_3, + CI_SWUS_MAX_READ_REQUEST_SIZE_MODE, 1); + data = REG_SET_FIELD(data, BIF1_PCIE_MST_CTRL_3, + CI_SWUS_MAX_READ_REQUEST_SIZE_PRIV, 1); + + if (def != data) + WREG32_PCIE_PORT(SOC15_REG_OFFSET(NBIO, 0, regBIF1_PCIE_MST_CTRL_3), data); + } else { + def = data = RREG32_PCIE_PORT(SOC15_REG_OFFSET(NBIO, 0, regPCIE_CONFIG_CNTL)); + data = REG_SET_FIELD(data, PCIE_CONFIG_CNTL, + CI_SWUS_MAX_READ_REQUEST_SIZE_MODE, 1); + data = REG_SET_FIELD(data, PCIE_CONFIG_CNTL, + CI_SWUS_MAX_READ_REQUEST_SIZE_PRIV, 1); + + if (def != data) + WREG32_PCIE_PORT(SOC15_REG_OFFSET(NBIO, 0, regPCIE_CONFIG_CNTL), data); + } } const struct amdgpu_nbio_funcs nbio_v7_2_funcs = { diff --git a/drivers/gpu/drm/amd/amdgpu/nbio_v7_4.c b/drivers/gpu/drm/amd/amdgpu/nbio_v7_4.c index cef929746739..f50045cebd44 100644 --- a/drivers/gpu/drm/amd/amdgpu/nbio_v7_4.c +++ b/drivers/gpu/drm/amd/amdgpu/nbio_v7_4.c @@ -85,6 +85,14 @@ #define mmRCC_DEV0_EPF0_STRAP0_ALDE 0x0015 #define mmRCC_DEV0_EPF0_STRAP0_ALDE_BASE_IDX 2 +#define mmBIF_DOORBELL_INT_CNTL_ALDE 0x00fe +#define mmBIF_DOORBELL_INT_CNTL_ALDE_BASE_IDX 2 +#define BIF_DOORBELL_INT_CNTL_ALDE__DOORBELL_INTERRUPT_DISABLE__SHIFT 0x18 +#define BIF_DOORBELL_INT_CNTL_ALDE__DOORBELL_INTERRUPT_DISABLE_MASK 0x01000000L + +#define mmBIF_INTR_CNTL_ALDE 0x0101 +#define mmBIF_INTR_CNTL_ALDE_BASE_IDX 2 + static void nbio_v7_4_query_ras_error_count(struct amdgpu_device *adev, void *ras_error_status); @@ -346,14 +354,21 @@ static void nbio_v7_4_handle_ras_controller_intr_no_bifring(struct amdgpu_device struct ras_err_data err_data = {0, 0, 0, NULL}; struct amdgpu_ras *ras = amdgpu_ras_get_context(adev); - bif_doorbell_intr_cntl = RREG32_SOC15(NBIO, 0, mmBIF_DOORBELL_INT_CNTL); + if (adev->asic_type == CHIP_ALDEBARAN) + bif_doorbell_intr_cntl = RREG32_SOC15(NBIO, 0, mmBIF_DOORBELL_INT_CNTL_ALDE); + else + bif_doorbell_intr_cntl = RREG32_SOC15(NBIO, 0, mmBIF_DOORBELL_INT_CNTL); + if (REG_GET_FIELD(bif_doorbell_intr_cntl, BIF_DOORBELL_INT_CNTL, RAS_CNTLR_INTERRUPT_STATUS)) { /* driver has to clear the interrupt status when bif ring is disabled */ bif_doorbell_intr_cntl = REG_SET_FIELD(bif_doorbell_intr_cntl, BIF_DOORBELL_INT_CNTL, RAS_CNTLR_INTERRUPT_CLEAR, 1); - WREG32_SOC15(NBIO, 0, mmBIF_DOORBELL_INT_CNTL, bif_doorbell_intr_cntl); + if (adev->asic_type == CHIP_ALDEBARAN) + WREG32_SOC15(NBIO, 0, mmBIF_DOORBELL_INT_CNTL_ALDE, bif_doorbell_intr_cntl); + else + WREG32_SOC15(NBIO, 0, mmBIF_DOORBELL_INT_CNTL, bif_doorbell_intr_cntl); if (!ras->disable_ras_err_cnt_harvest) { /* @@ -372,13 +387,13 @@ static void nbio_v7_4_handle_ras_controller_intr_no_bifring(struct amdgpu_device "errors detected in %s block, " "no user action is needed.\n", obj->err_data.ce_count, - adev->nbio.ras_if->name); + ras_block_str(adev->nbio.ras_if->block)); if (err_data.ue_count) dev_info(adev->dev, "%ld uncorrectable hardware " "errors detected in %s block\n", obj->err_data.ue_count, - adev->nbio.ras_if->name); + ras_block_str(adev->nbio.ras_if->block)); } dev_info(adev->dev, "RAS controller interrupt triggered " @@ -395,14 +410,22 @@ static void nbio_v7_4_handle_ras_err_event_athub_intr_no_bifring(struct amdgpu_d { uint32_t bif_doorbell_intr_cntl; - bif_doorbell_intr_cntl = RREG32_SOC15(NBIO, 0, mmBIF_DOORBELL_INT_CNTL); + if (adev->asic_type == CHIP_ALDEBARAN) + bif_doorbell_intr_cntl = RREG32_SOC15(NBIO, 0, mmBIF_DOORBELL_INT_CNTL_ALDE); + else + bif_doorbell_intr_cntl = RREG32_SOC15(NBIO, 0, mmBIF_DOORBELL_INT_CNTL); + if (REG_GET_FIELD(bif_doorbell_intr_cntl, BIF_DOORBELL_INT_CNTL, RAS_ATHUB_ERR_EVENT_INTERRUPT_STATUS)) { /* driver has to clear the interrupt status when bif ring is disabled */ bif_doorbell_intr_cntl = REG_SET_FIELD(bif_doorbell_intr_cntl, BIF_DOORBELL_INT_CNTL, RAS_ATHUB_ERR_EVENT_INTERRUPT_CLEAR, 1); - WREG32_SOC15(NBIO, 0, mmBIF_DOORBELL_INT_CNTL, bif_doorbell_intr_cntl); + + if (adev->asic_type == CHIP_ALDEBARAN) + WREG32_SOC15(NBIO, 0, mmBIF_DOORBELL_INT_CNTL_ALDE, bif_doorbell_intr_cntl); + else + WREG32_SOC15(NBIO, 0, mmBIF_DOORBELL_INT_CNTL, bif_doorbell_intr_cntl); amdgpu_ras_global_ras_isr(adev); } @@ -420,14 +443,23 @@ static int nbio_v7_4_set_ras_controller_irq_state(struct amdgpu_device *adev, */ uint32_t bif_intr_cntl; - bif_intr_cntl = RREG32_SOC15(NBIO, 0, mmBIF_INTR_CNTL); + if (adev->asic_type == CHIP_ALDEBARAN) + bif_intr_cntl = RREG32_SOC15(NBIO, 0, mmBIF_INTR_CNTL_ALDE); + else + bif_intr_cntl = RREG32_SOC15(NBIO, 0, mmBIF_INTR_CNTL); + if (state == AMDGPU_IRQ_STATE_ENABLE) { /* set interrupt vector select bit to 0 to select * vetcor 1 for bare metal case */ bif_intr_cntl = REG_SET_FIELD(bif_intr_cntl, BIF_INTR_CNTL, RAS_INTR_VEC_SEL, 0); - WREG32_SOC15(NBIO, 0, mmBIF_INTR_CNTL, bif_intr_cntl); + + if (adev->asic_type == CHIP_ALDEBARAN) + WREG32_SOC15(NBIO, 0, mmBIF_INTR_CNTL_ALDE, bif_intr_cntl); + else + WREG32_SOC15(NBIO, 0, mmBIF_INTR_CNTL, bif_intr_cntl); + } return 0; @@ -456,14 +488,22 @@ static int nbio_v7_4_set_ras_err_event_athub_irq_state(struct amdgpu_device *ade */ uint32_t bif_intr_cntl; - bif_intr_cntl = RREG32_SOC15(NBIO, 0, mmBIF_INTR_CNTL); + if (adev->asic_type == CHIP_ALDEBARAN) + bif_intr_cntl = RREG32_SOC15(NBIO, 0, mmBIF_INTR_CNTL_ALDE); + else + bif_intr_cntl = RREG32_SOC15(NBIO, 0, mmBIF_INTR_CNTL); + if (state == AMDGPU_IRQ_STATE_ENABLE) { /* set interrupt vector select bit to 0 to select * vetcor 1 for bare metal case */ bif_intr_cntl = REG_SET_FIELD(bif_intr_cntl, BIF_INTR_CNTL, RAS_INTR_VEC_SEL, 0); - WREG32_SOC15(NBIO, 0, mmBIF_INTR_CNTL, bif_intr_cntl); + + if (adev->asic_type == CHIP_ALDEBARAN) + WREG32_SOC15(NBIO, 0, mmBIF_INTR_CNTL_ALDE, bif_intr_cntl); + else + WREG32_SOC15(NBIO, 0, mmBIF_INTR_CNTL, bif_intr_cntl); } return 0; @@ -572,7 +612,11 @@ static void nbio_v7_4_query_ras_error_count(struct amdgpu_device *adev, static void nbio_v7_4_enable_doorbell_interrupt(struct amdgpu_device *adev, bool enable) { - WREG32_FIELD15(NBIO, 0, BIF_DOORBELL_INT_CNTL, + if (adev->asic_type == CHIP_ALDEBARAN) + WREG32_FIELD15(NBIO, 0, BIF_DOORBELL_INT_CNTL_ALDE, + DOORBELL_INTERRUPT_DISABLE, enable ? 0 : 1); + else + WREG32_FIELD15(NBIO, 0, BIF_DOORBELL_INT_CNTL, DOORBELL_INTERRUPT_DISABLE, enable ? 0 : 1); } diff --git a/drivers/gpu/drm/amd/amdgpu/nv.c b/drivers/gpu/drm/amd/amdgpu/nv.c index d290ca0b06da..ff80786e3918 100644 --- a/drivers/gpu/drm/amd/amdgpu/nv.c +++ b/drivers/gpu/drm/amd/amdgpu/nv.c @@ -58,7 +58,7 @@ #include "jpeg_v2_0.h" #include "vcn_v3_0.h" #include "jpeg_v3_0.h" -#include "dce_virtual.h" +#include "amdgpu_vkms.h" #include "mes_v10_1.h" #include "mxgpu_nv.h" #include "smuio_v11_0.h" @@ -69,20 +69,8 @@ static const struct amd_ip_funcs nv_common_ip_funcs; /* Navi */ static const struct amdgpu_video_codec_info nv_video_codecs_encode_array[] = { - { - .codec_type = AMDGPU_INFO_VIDEO_CAPS_CODEC_IDX_MPEG4_AVC, - .max_width = 4096, - .max_height = 2304, - .max_pixels_per_frame = 4096 * 2304, - .max_level = 0, - }, - { - .codec_type = AMDGPU_INFO_VIDEO_CAPS_CODEC_IDX_HEVC, - .max_width = 4096, - .max_height = 2304, - .max_pixels_per_frame = 4096 * 2304, - .max_level = 0, - }, + {codec_info_build(AMDGPU_INFO_VIDEO_CAPS_CODEC_IDX_MPEG4_AVC, 4096, 2304, 0)}, + {codec_info_build(AMDGPU_INFO_VIDEO_CAPS_CODEC_IDX_HEVC, 4096, 2304, 0)}, }; static const struct amdgpu_video_codecs nv_video_codecs_encode = @@ -94,55 +82,13 @@ static const struct amdgpu_video_codecs nv_video_codecs_encode = /* Navi1x */ static const struct amdgpu_video_codec_info nv_video_codecs_decode_array[] = { - { - .codec_type = AMDGPU_INFO_VIDEO_CAPS_CODEC_IDX_MPEG2, - .max_width = 4096, - .max_height = 4096, - .max_pixels_per_frame = 4096 * 4096, - .max_level = 3, - }, - { - .codec_type = AMDGPU_INFO_VIDEO_CAPS_CODEC_IDX_MPEG4, - .max_width = 4096, - .max_height = 4096, - .max_pixels_per_frame = 4096 * 4096, - .max_level = 5, - }, - { - .codec_type = AMDGPU_INFO_VIDEO_CAPS_CODEC_IDX_MPEG4_AVC, - .max_width = 4096, - .max_height = 4096, - .max_pixels_per_frame = 4096 * 4096, - .max_level = 52, - }, - { - .codec_type = AMDGPU_INFO_VIDEO_CAPS_CODEC_IDX_VC1, - .max_width = 4096, - .max_height = 4096, - .max_pixels_per_frame = 4096 * 4096, - .max_level = 4, - }, - { - .codec_type = AMDGPU_INFO_VIDEO_CAPS_CODEC_IDX_HEVC, - .max_width = 8192, - .max_height = 4352, - .max_pixels_per_frame = 8192 * 4352, - .max_level = 186, - }, - { - .codec_type = AMDGPU_INFO_VIDEO_CAPS_CODEC_IDX_JPEG, - .max_width = 4096, - .max_height = 4096, - .max_pixels_per_frame = 4096 * 4096, - .max_level = 0, - }, - { - .codec_type = AMDGPU_INFO_VIDEO_CAPS_CODEC_IDX_VP9, - .max_width = 8192, - .max_height = 4352, - .max_pixels_per_frame = 8192 * 4352, - .max_level = 0, - }, + {codec_info_build(AMDGPU_INFO_VIDEO_CAPS_CODEC_IDX_MPEG2, 4096, 4906, 3)}, + {codec_info_build(AMDGPU_INFO_VIDEO_CAPS_CODEC_IDX_MPEG4, 4096, 4906, 5)}, + {codec_info_build(AMDGPU_INFO_VIDEO_CAPS_CODEC_IDX_MPEG4_AVC, 4096, 4906, 52)}, + {codec_info_build(AMDGPU_INFO_VIDEO_CAPS_CODEC_IDX_VC1, 4096, 4906, 4)}, + {codec_info_build(AMDGPU_INFO_VIDEO_CAPS_CODEC_IDX_HEVC, 8192, 4352, 186)}, + {codec_info_build(AMDGPU_INFO_VIDEO_CAPS_CODEC_IDX_JPEG, 4096, 4096, 0)}, + {codec_info_build(AMDGPU_INFO_VIDEO_CAPS_CODEC_IDX_VP9, 8192, 4352, 0)}, }; static const struct amdgpu_video_codecs nv_video_codecs_decode = @@ -154,62 +100,14 @@ static const struct amdgpu_video_codecs nv_video_codecs_decode = /* Sienna Cichlid */ static const struct amdgpu_video_codec_info sc_video_codecs_decode_array[] = { - { - .codec_type = AMDGPU_INFO_VIDEO_CAPS_CODEC_IDX_MPEG2, - .max_width = 4096, - .max_height = 4096, - .max_pixels_per_frame = 4096 * 4096, - .max_level = 3, - }, - { - .codec_type = AMDGPU_INFO_VIDEO_CAPS_CODEC_IDX_MPEG4, - .max_width = 4096, - .max_height = 4096, - .max_pixels_per_frame = 4096 * 4096, - .max_level = 5, - }, - { - .codec_type = AMDGPU_INFO_VIDEO_CAPS_CODEC_IDX_MPEG4_AVC, - .max_width = 4096, - .max_height = 4096, - .max_pixels_per_frame = 4096 * 4096, - .max_level = 52, - }, - { - .codec_type = AMDGPU_INFO_VIDEO_CAPS_CODEC_IDX_VC1, - .max_width = 4096, - .max_height = 4096, - .max_pixels_per_frame = 4096 * 4096, - .max_level = 4, - }, - { - .codec_type = AMDGPU_INFO_VIDEO_CAPS_CODEC_IDX_HEVC, - .max_width = 8192, - .max_height = 4352, - .max_pixels_per_frame = 8192 * 4352, - .max_level = 186, - }, - { - .codec_type = AMDGPU_INFO_VIDEO_CAPS_CODEC_IDX_JPEG, - .max_width = 4096, - .max_height = 4096, - .max_pixels_per_frame = 4096 * 4096, - .max_level = 0, - }, - { - .codec_type = AMDGPU_INFO_VIDEO_CAPS_CODEC_IDX_VP9, - .max_width = 8192, - .max_height = 4352, - .max_pixels_per_frame = 8192 * 4352, - .max_level = 0, - }, - { - .codec_type = AMDGPU_INFO_VIDEO_CAPS_CODEC_IDX_AV1, - .max_width = 8192, - .max_height = 4352, - .max_pixels_per_frame = 8192 * 4352, - .max_level = 0, - }, + {codec_info_build(AMDGPU_INFO_VIDEO_CAPS_CODEC_IDX_MPEG2, 4096, 4906, 3)}, + {codec_info_build(AMDGPU_INFO_VIDEO_CAPS_CODEC_IDX_MPEG4, 4096, 4906, 5)}, + {codec_info_build(AMDGPU_INFO_VIDEO_CAPS_CODEC_IDX_MPEG4_AVC, 4096, 4906, 52)}, + {codec_info_build(AMDGPU_INFO_VIDEO_CAPS_CODEC_IDX_VC1, 4096, 4906, 4)}, + {codec_info_build(AMDGPU_INFO_VIDEO_CAPS_CODEC_IDX_HEVC, 8192, 4352, 186)}, + {codec_info_build(AMDGPU_INFO_VIDEO_CAPS_CODEC_IDX_JPEG, 4096, 4096, 0)}, + {codec_info_build(AMDGPU_INFO_VIDEO_CAPS_CODEC_IDX_VP9, 8192, 4352, 0)}, + {codec_info_build(AMDGPU_INFO_VIDEO_CAPS_CODEC_IDX_AV1, 8192, 4352, 0)}, }; static const struct amdgpu_video_codecs sc_video_codecs_decode = @@ -218,11 +116,84 @@ static const struct amdgpu_video_codecs sc_video_codecs_decode = .codec_array = sc_video_codecs_decode_array, }; +/* SRIOV Sienna Cichlid, not const since data is controlled by host */ +static struct amdgpu_video_codec_info sriov_sc_video_codecs_encode_array[] = +{ + {codec_info_build(AMDGPU_INFO_VIDEO_CAPS_CODEC_IDX_MPEG4_AVC, 4096, 2304, 0)}, + {codec_info_build(AMDGPU_INFO_VIDEO_CAPS_CODEC_IDX_HEVC, 4096, 2304, 0)}, +}; + +static struct amdgpu_video_codec_info sriov_sc_video_codecs_decode_array[] = +{ + {codec_info_build(AMDGPU_INFO_VIDEO_CAPS_CODEC_IDX_MPEG2, 4096, 4906, 3)}, + {codec_info_build(AMDGPU_INFO_VIDEO_CAPS_CODEC_IDX_MPEG4, 4096, 4906, 5)}, + {codec_info_build(AMDGPU_INFO_VIDEO_CAPS_CODEC_IDX_MPEG4_AVC, 4096, 4906, 52)}, + {codec_info_build(AMDGPU_INFO_VIDEO_CAPS_CODEC_IDX_VC1, 4096, 4906, 4)}, + {codec_info_build(AMDGPU_INFO_VIDEO_CAPS_CODEC_IDX_HEVC, 8192, 4352, 186)}, + {codec_info_build(AMDGPU_INFO_VIDEO_CAPS_CODEC_IDX_JPEG, 4096, 4096, 0)}, + {codec_info_build(AMDGPU_INFO_VIDEO_CAPS_CODEC_IDX_VP9, 8192, 4352, 0)}, + {codec_info_build(AMDGPU_INFO_VIDEO_CAPS_CODEC_IDX_AV1, 8192, 4352, 0)}, +}; + +static struct amdgpu_video_codecs sriov_sc_video_codecs_encode = +{ + .codec_count = ARRAY_SIZE(sriov_sc_video_codecs_encode_array), + .codec_array = sriov_sc_video_codecs_encode_array, +}; + +static struct amdgpu_video_codecs sriov_sc_video_codecs_decode = +{ + .codec_count = ARRAY_SIZE(sriov_sc_video_codecs_decode_array), + .codec_array = sriov_sc_video_codecs_decode_array, +}; + +/* Beige Goby*/ +static const struct amdgpu_video_codec_info bg_video_codecs_decode_array[] = { + {codec_info_build(AMDGPU_INFO_VIDEO_CAPS_CODEC_IDX_MPEG4_AVC, 4096, 4906, 52)}, + {codec_info_build(AMDGPU_INFO_VIDEO_CAPS_CODEC_IDX_HEVC, 8192, 4352, 186)}, + {codec_info_build(AMDGPU_INFO_VIDEO_CAPS_CODEC_IDX_VP9, 8192, 4352, 0)}, +}; + +static const struct amdgpu_video_codecs bg_video_codecs_decode = { + .codec_count = ARRAY_SIZE(bg_video_codecs_decode_array), + .codec_array = bg_video_codecs_decode_array, +}; + +static const struct amdgpu_video_codecs bg_video_codecs_encode = { + .codec_count = 0, + .codec_array = NULL, +}; + +/* Yellow Carp*/ +static const struct amdgpu_video_codec_info yc_video_codecs_decode_array[] = { + {codec_info_build(AMDGPU_INFO_VIDEO_CAPS_CODEC_IDX_MPEG4_AVC, 4096, 4906, 52)}, + {codec_info_build(AMDGPU_INFO_VIDEO_CAPS_CODEC_IDX_HEVC, 8192, 4352, 186)}, + {codec_info_build(AMDGPU_INFO_VIDEO_CAPS_CODEC_IDX_VP9, 8192, 4352, 0)}, + {codec_info_build(AMDGPU_INFO_VIDEO_CAPS_CODEC_IDX_JPEG, 4096, 4096, 0)}, +}; + +static const struct amdgpu_video_codecs yc_video_codecs_decode = { + .codec_count = ARRAY_SIZE(yc_video_codecs_decode_array), + .codec_array = yc_video_codecs_decode_array, +}; + static int nv_query_video_codecs(struct amdgpu_device *adev, bool encode, const struct amdgpu_video_codecs **codecs) { switch (adev->asic_type) { case CHIP_SIENNA_CICHLID: + if (amdgpu_sriov_vf(adev)) { + if (encode) + *codecs = &sriov_sc_video_codecs_encode; + else + *codecs = &sriov_sc_video_codecs_decode; + } else { + if (encode) + *codecs = &nv_video_codecs_encode; + else + *codecs = &sc_video_codecs_decode; + } + return 0; case CHIP_NAVY_FLOUNDER: case CHIP_DIMGREY_CAVEFISH: case CHIP_VANGOGH: @@ -231,6 +202,18 @@ static int nv_query_video_codecs(struct amdgpu_device *adev, bool encode, else *codecs = &sc_video_codecs_decode; return 0; + case CHIP_YELLOW_CARP: + if (encode) + *codecs = &nv_video_codecs_encode; + else + *codecs = &yc_video_codecs_decode; + return 0; + case CHIP_BEIGE_GOBY: + if (encode) + *codecs = &bg_video_codecs_encode; + else + *codecs = &bg_video_codecs_decode; + return 0; case CHIP_NAVI10: case CHIP_NAVI14: case CHIP_NAVI12: @@ -363,7 +346,7 @@ void nv_grbm_select(struct amdgpu_device *adev, grbm_gfx_cntl = REG_SET_FIELD(grbm_gfx_cntl, GRBM_GFX_CNTL, VMID, vmid); grbm_gfx_cntl = REG_SET_FIELD(grbm_gfx_cntl, GRBM_GFX_CNTL, QUEUEID, queue); - WREG32(SOC15_REG_OFFSET(GC, 0, mmGRBM_GFX_CNTL), grbm_gfx_cntl); + WREG32_SOC15(GC, 0, mmGRBM_GFX_CNTL, grbm_gfx_cntl); } static void nv_vga_set_state(struct amdgpu_device *adev, bool state) @@ -530,10 +513,12 @@ nv_asic_reset_method(struct amdgpu_device *adev) switch (adev->asic_type) { case CHIP_VANGOGH: + case CHIP_YELLOW_CARP: return AMD_RESET_METHOD_MODE2; case CHIP_SIENNA_CICHLID: case CHIP_NAVY_FLOUNDER: case CHIP_DIMGREY_CAVEFISH: + case CHIP_BEIGE_GOBY: return AMD_RESET_METHOD_MODE1; default: if (amdgpu_dpm_is_baco_supported(adev)) @@ -598,7 +583,7 @@ static void nv_pcie_gen3_enable(struct amdgpu_device *adev) static void nv_program_aspm(struct amdgpu_device *adev) { - if (amdgpu_aspm != 1) + if (!amdgpu_aspm) return; if (!(adev->flags & AMD_IS_APU) && @@ -675,6 +660,15 @@ legacy_init: case CHIP_DIMGREY_CAVEFISH: dimgrey_cavefish_reg_base_init(adev); break; + case CHIP_BEIGE_GOBY: + beige_goby_reg_base_init(adev); + break; + case CHIP_YELLOW_CARP: + yellow_carp_reg_base_init(adev); + break; + case CHIP_CYAN_SKILLFISH: + cyan_skillfish_reg_base_init(adev); + break; default: return -EINVAL; } @@ -691,7 +685,10 @@ int nv_set_ip_blocks(struct amdgpu_device *adev) { int r; - if (adev->flags & AMD_IS_APU) { + if (adev->asic_type == CHIP_CYAN_SKILLFISH) { + adev->nbio.funcs = &nbio_v2_3_funcs; + adev->nbio.hdp_flush_reg = &nbio_v2_3_hdp_flush_reg; + } else if (adev->flags & AMD_IS_APU) { adev->nbio.funcs = &nbio_v7_2_funcs; adev->nbio.hdp_flush_reg = &nbio_v7_2_hdp_flush_reg; } else { @@ -724,7 +721,7 @@ int nv_set_ip_blocks(struct amdgpu_device *adev) !amdgpu_sriov_vf(adev)) amdgpu_device_ip_block_add(adev, &smu_v11_0_ip_block); if (adev->enable_virtual_display || amdgpu_sriov_vf(adev)) - amdgpu_device_ip_block_add(adev, &dce_virtual_ip_block); + amdgpu_device_ip_block_add(adev, &amdgpu_vkms_ip_block); #if defined(CONFIG_DRM_AMD_DC) else if (amdgpu_device_has_dc_support(adev)) amdgpu_device_ip_block_add(adev, &dm_ip_block); @@ -742,12 +739,17 @@ int nv_set_ip_blocks(struct amdgpu_device *adev) case CHIP_NAVI12: amdgpu_device_ip_block_add(adev, &nv_common_ip_block); amdgpu_device_ip_block_add(adev, &gmc_v10_0_ip_block); - amdgpu_device_ip_block_add(adev, &navi10_ih_ip_block); - amdgpu_device_ip_block_add(adev, &psp_v11_0_ip_block); + if (!amdgpu_sriov_vf(adev)) { + amdgpu_device_ip_block_add(adev, &navi10_ih_ip_block); + amdgpu_device_ip_block_add(adev, &psp_v11_0_ip_block); + } else { + amdgpu_device_ip_block_add(adev, &psp_v11_0_ip_block); + amdgpu_device_ip_block_add(adev, &navi10_ih_ip_block); + } if (adev->firmware.load_type == AMDGPU_FW_LOAD_PSP) amdgpu_device_ip_block_add(adev, &smu_v11_0_ip_block); if (adev->enable_virtual_display || amdgpu_sriov_vf(adev)) - amdgpu_device_ip_block_add(adev, &dce_virtual_ip_block); + amdgpu_device_ip_block_add(adev, &amdgpu_vkms_ip_block); #if defined(CONFIG_DRM_AMD_DC) else if (amdgpu_device_has_dc_support(adev)) amdgpu_device_ip_block_add(adev, &dm_ip_block); @@ -764,14 +766,20 @@ int nv_set_ip_blocks(struct amdgpu_device *adev) case CHIP_SIENNA_CICHLID: amdgpu_device_ip_block_add(adev, &nv_common_ip_block); amdgpu_device_ip_block_add(adev, &gmc_v10_0_ip_block); - amdgpu_device_ip_block_add(adev, &navi10_ih_ip_block); - if (likely(adev->firmware.load_type == AMDGPU_FW_LOAD_PSP)) - amdgpu_device_ip_block_add(adev, &psp_v11_0_ip_block); + if (!amdgpu_sriov_vf(adev)) { + amdgpu_device_ip_block_add(adev, &navi10_ih_ip_block); + if (likely(adev->firmware.load_type == AMDGPU_FW_LOAD_PSP)) + amdgpu_device_ip_block_add(adev, &psp_v11_0_ip_block); + } else { + if (likely(adev->firmware.load_type == AMDGPU_FW_LOAD_PSP)) + amdgpu_device_ip_block_add(adev, &psp_v11_0_ip_block); + amdgpu_device_ip_block_add(adev, &navi10_ih_ip_block); + } if (adev->firmware.load_type == AMDGPU_FW_LOAD_PSP && is_support_sw_smu(adev)) amdgpu_device_ip_block_add(adev, &smu_v11_0_ip_block); if (adev->enable_virtual_display || amdgpu_sriov_vf(adev)) - amdgpu_device_ip_block_add(adev, &dce_virtual_ip_block); + amdgpu_device_ip_block_add(adev, &amdgpu_vkms_ip_block); #if defined(CONFIG_DRM_AMD_DC) else if (amdgpu_device_has_dc_support(adev)) amdgpu_device_ip_block_add(adev, &dm_ip_block); @@ -794,7 +802,7 @@ int nv_set_ip_blocks(struct amdgpu_device *adev) is_support_sw_smu(adev)) amdgpu_device_ip_block_add(adev, &smu_v11_0_ip_block); if (adev->enable_virtual_display || amdgpu_sriov_vf(adev)) - amdgpu_device_ip_block_add(adev, &dce_virtual_ip_block); + amdgpu_device_ip_block_add(adev, &amdgpu_vkms_ip_block); #if defined(CONFIG_DRM_AMD_DC) else if (amdgpu_device_has_dc_support(adev)) amdgpu_device_ip_block_add(adev, &dm_ip_block); @@ -815,7 +823,7 @@ int nv_set_ip_blocks(struct amdgpu_device *adev) amdgpu_device_ip_block_add(adev, &psp_v11_0_ip_block); amdgpu_device_ip_block_add(adev, &smu_v11_0_ip_block); if (adev->enable_virtual_display || amdgpu_sriov_vf(adev)) - amdgpu_device_ip_block_add(adev, &dce_virtual_ip_block); + amdgpu_device_ip_block_add(adev, &amdgpu_vkms_ip_block); #if defined(CONFIG_DRM_AMD_DC) else if (amdgpu_device_has_dc_support(adev)) amdgpu_device_ip_block_add(adev, &dm_ip_block); @@ -835,7 +843,7 @@ int nv_set_ip_blocks(struct amdgpu_device *adev) is_support_sw_smu(adev)) amdgpu_device_ip_block_add(adev, &smu_v11_0_ip_block); if (adev->enable_virtual_display || amdgpu_sriov_vf(adev)) - amdgpu_device_ip_block_add(adev, &dce_virtual_ip_block); + amdgpu_device_ip_block_add(adev, &amdgpu_vkms_ip_block); #if defined(CONFIG_DRM_AMD_DC) else if (amdgpu_device_has_dc_support(adev)) amdgpu_device_ip_block_add(adev, &dm_ip_block); @@ -845,6 +853,62 @@ int nv_set_ip_blocks(struct amdgpu_device *adev) amdgpu_device_ip_block_add(adev, &vcn_v3_0_ip_block); amdgpu_device_ip_block_add(adev, &jpeg_v3_0_ip_block); break; + case CHIP_BEIGE_GOBY: + amdgpu_device_ip_block_add(adev, &nv_common_ip_block); + amdgpu_device_ip_block_add(adev, &gmc_v10_0_ip_block); + amdgpu_device_ip_block_add(adev, &navi10_ih_ip_block); + if (likely(adev->firmware.load_type == AMDGPU_FW_LOAD_PSP)) + amdgpu_device_ip_block_add(adev, &psp_v11_0_ip_block); + if (adev->firmware.load_type == AMDGPU_FW_LOAD_PSP && + is_support_sw_smu(adev)) + amdgpu_device_ip_block_add(adev, &smu_v11_0_ip_block); + amdgpu_device_ip_block_add(adev, &gfx_v10_0_ip_block); + amdgpu_device_ip_block_add(adev, &sdma_v5_2_ip_block); + if (adev->enable_virtual_display || amdgpu_sriov_vf(adev)) + amdgpu_device_ip_block_add(adev, &amdgpu_vkms_ip_block); +#if defined(CONFIG_DRM_AMD_DC) + else if (amdgpu_device_has_dc_support(adev)) + amdgpu_device_ip_block_add(adev, &dm_ip_block); +#endif + if (adev->firmware.load_type == AMDGPU_FW_LOAD_DIRECT && + is_support_sw_smu(adev)) + amdgpu_device_ip_block_add(adev, &smu_v11_0_ip_block); + amdgpu_device_ip_block_add(adev, &vcn_v3_0_ip_block); + break; + case CHIP_YELLOW_CARP: + amdgpu_device_ip_block_add(adev, &nv_common_ip_block); + amdgpu_device_ip_block_add(adev, &gmc_v10_0_ip_block); + amdgpu_device_ip_block_add(adev, &navi10_ih_ip_block); + if (likely(adev->firmware.load_type == AMDGPU_FW_LOAD_PSP)) + amdgpu_device_ip_block_add(adev, &psp_v13_0_ip_block); + amdgpu_device_ip_block_add(adev, &smu_v13_0_ip_block); + if (adev->enable_virtual_display || amdgpu_sriov_vf(adev)) + amdgpu_device_ip_block_add(adev, &amdgpu_vkms_ip_block); + amdgpu_device_ip_block_add(adev, &gfx_v10_0_ip_block); + amdgpu_device_ip_block_add(adev, &sdma_v5_2_ip_block); + if (adev->enable_virtual_display || amdgpu_sriov_vf(adev)) + amdgpu_device_ip_block_add(adev, &amdgpu_vkms_ip_block); +#if defined(CONFIG_DRM_AMD_DC) + else if (amdgpu_device_has_dc_support(adev)) + amdgpu_device_ip_block_add(adev, &dm_ip_block); +#endif + amdgpu_device_ip_block_add(adev, &vcn_v3_0_ip_block); + amdgpu_device_ip_block_add(adev, &jpeg_v3_0_ip_block); + break; + case CHIP_CYAN_SKILLFISH: + amdgpu_device_ip_block_add(adev, &nv_common_ip_block); + amdgpu_device_ip_block_add(adev, &gmc_v10_0_ip_block); + amdgpu_device_ip_block_add(adev, &navi10_ih_ip_block); + if (adev->apu_flags & AMD_APU_IS_CYAN_SKILLFISH2) { + if (likely(adev->firmware.load_type == AMDGPU_FW_LOAD_PSP)) + amdgpu_device_ip_block_add(adev, &psp_v11_0_8_ip_block); + amdgpu_device_ip_block_add(adev, &smu_v11_0_ip_block); + } + if (adev->enable_virtual_display || amdgpu_sriov_vf(adev)) + amdgpu_device_ip_block_add(adev, &amdgpu_vkms_ip_block); + amdgpu_device_ip_block_add(adev, &gfx_v10_0_ip_block); + amdgpu_device_ip_block_add(adev, &sdma_v5_0_ip_block); + break; default: return -EINVAL; } @@ -1068,6 +1132,7 @@ static int nv_common_early_init(void *handle) case CHIP_SIENNA_CICHLID: adev->cg_flags = AMD_CG_SUPPORT_GFX_MGCG | AMD_CG_SUPPORT_GFX_CGCG | + AMD_CG_SUPPORT_GFX_CGLS | AMD_CG_SUPPORT_GFX_3D_CGCG | AMD_CG_SUPPORT_MC_MGCG | AMD_CG_SUPPORT_VCN_MGCG | @@ -1091,6 +1156,7 @@ static int nv_common_early_init(void *handle) case CHIP_NAVY_FLOUNDER: adev->cg_flags = AMD_CG_SUPPORT_GFX_MGCG | AMD_CG_SUPPORT_GFX_CGCG | + AMD_CG_SUPPORT_GFX_CGLS | AMD_CG_SUPPORT_GFX_3D_CGCG | AMD_CG_SUPPORT_VCN_MGCG | AMD_CG_SUPPORT_JPEG_MGCG | @@ -1108,7 +1174,6 @@ static int nv_common_early_init(void *handle) break; case CHIP_VANGOGH: - adev->apu_flags |= AMD_APU_IS_VANGOGH; adev->cg_flags = AMD_CG_SUPPORT_GFX_MGCG | AMD_CG_SUPPORT_GFX_MGLS | AMD_CG_SUPPORT_GFX_CP_LS | @@ -1121,6 +1186,8 @@ static int nv_common_early_init(void *handle) AMD_CG_SUPPORT_MC_LS | AMD_CG_SUPPORT_GFX_FGCG | AMD_CG_SUPPORT_VCN_MGCG | + AMD_CG_SUPPORT_SDMA_MGCG | + AMD_CG_SUPPORT_SDMA_LS | AMD_CG_SUPPORT_JPEG_MGCG; adev->pg_flags = AMD_PG_SUPPORT_GFX_PG | AMD_PG_SUPPORT_VCN | @@ -1132,6 +1199,7 @@ static int nv_common_early_init(void *handle) case CHIP_DIMGREY_CAVEFISH: adev->cg_flags = AMD_CG_SUPPORT_GFX_MGCG | AMD_CG_SUPPORT_GFX_CGCG | + AMD_CG_SUPPORT_GFX_CGLS | AMD_CG_SUPPORT_GFX_3D_CGCG | AMD_CG_SUPPORT_VCN_MGCG | AMD_CG_SUPPORT_JPEG_MGCG | @@ -1147,6 +1215,57 @@ static int nv_common_early_init(void *handle) AMD_PG_SUPPORT_MMHUB; adev->external_rev_id = adev->rev_id + 0x3c; break; + case CHIP_BEIGE_GOBY: + adev->cg_flags = AMD_CG_SUPPORT_GFX_MGCG | + AMD_CG_SUPPORT_GFX_CGCG | + AMD_CG_SUPPORT_GFX_CGLS | + AMD_CG_SUPPORT_GFX_3D_CGCG | + AMD_CG_SUPPORT_MC_MGCG | + AMD_CG_SUPPORT_MC_LS | + AMD_CG_SUPPORT_HDP_MGCG | + AMD_CG_SUPPORT_HDP_LS | + AMD_CG_SUPPORT_IH_CG | + AMD_CG_SUPPORT_VCN_MGCG; + adev->pg_flags = AMD_PG_SUPPORT_VCN | + AMD_PG_SUPPORT_VCN_DPG | + AMD_PG_SUPPORT_ATHUB | + AMD_PG_SUPPORT_MMHUB; + adev->external_rev_id = adev->rev_id + 0x46; + break; + case CHIP_YELLOW_CARP: + adev->cg_flags = AMD_CG_SUPPORT_GFX_MGCG | + AMD_CG_SUPPORT_GFX_MGLS | + AMD_CG_SUPPORT_GFX_CGCG | + AMD_CG_SUPPORT_GFX_CGLS | + AMD_CG_SUPPORT_GFX_3D_CGCG | + AMD_CG_SUPPORT_GFX_3D_CGLS | + AMD_CG_SUPPORT_GFX_RLC_LS | + AMD_CG_SUPPORT_GFX_CP_LS | + AMD_CG_SUPPORT_GFX_FGCG | + AMD_CG_SUPPORT_MC_MGCG | + AMD_CG_SUPPORT_MC_LS | + AMD_CG_SUPPORT_SDMA_LS | + AMD_CG_SUPPORT_HDP_MGCG | + AMD_CG_SUPPORT_HDP_LS | + AMD_CG_SUPPORT_ATHUB_MGCG | + AMD_CG_SUPPORT_ATHUB_LS | + AMD_CG_SUPPORT_IH_CG | + AMD_CG_SUPPORT_VCN_MGCG | + AMD_CG_SUPPORT_JPEG_MGCG; + adev->pg_flags = AMD_PG_SUPPORT_GFX_PG | + AMD_PG_SUPPORT_VCN | + AMD_PG_SUPPORT_VCN_DPG | + AMD_PG_SUPPORT_JPEG; + if (adev->pdev->device == 0x1681) + adev->external_rev_id = adev->rev_id + 0x19; + else + adev->external_rev_id = adev->rev_id + 0x01; + break; + case CHIP_CYAN_SKILLFISH: + adev->cg_flags = 0; + adev->pg_flags = 0; + adev->external_rev_id = adev->rev_id + 0x82; + break; default: /* FIXME: not supported yet */ return -EINVAL; @@ -1169,8 +1288,12 @@ static int nv_common_late_init(void *handle) { struct amdgpu_device *adev = (struct amdgpu_device *)handle; - if (amdgpu_sriov_vf(adev)) + if (amdgpu_sriov_vf(adev)) { xgpu_nv_mailbox_get_irq(adev); + amdgpu_virt_update_sriov_video_codec(adev, + sriov_sc_video_codecs_encode_array, ARRAY_SIZE(sriov_sc_video_codecs_encode_array), + sriov_sc_video_codecs_decode_array, ARRAY_SIZE(sriov_sc_video_codecs_decode_array)); + } return 0; } @@ -1194,6 +1317,12 @@ static int nv_common_hw_init(void *handle) { struct amdgpu_device *adev = (struct amdgpu_device *)handle; + if (adev->nbio.funcs->apply_lc_spc_mode_wa) + adev->nbio.funcs->apply_lc_spc_mode_wa(adev); + + if (adev->nbio.funcs->apply_l1_link_width_reconfig_wa) + adev->nbio.funcs->apply_l1_link_width_reconfig_wa(adev); + /* enable pcie gen2/3 link */ nv_pcie_gen3_enable(adev); /* enable aspm */ @@ -1266,6 +1395,7 @@ static int nv_common_set_clockgating_state(void *handle, case CHIP_SIENNA_CICHLID: case CHIP_NAVY_FLOUNDER: case CHIP_DIMGREY_CAVEFISH: + case CHIP_BEIGE_GOBY: adev->nbio.funcs->update_medium_grain_clock_gating(adev, state == AMD_CG_STATE_GATE); adev->nbio.funcs->update_medium_grain_light_sleep(adev, diff --git a/drivers/gpu/drm/amd/amdgpu/nv.h b/drivers/gpu/drm/amd/amdgpu/nv.h index 515d67bf249f..1f40ba3b0460 100644 --- a/drivers/gpu/drm/amd/amdgpu/nv.h +++ b/drivers/gpu/drm/amd/amdgpu/nv.h @@ -36,4 +36,8 @@ int navi12_reg_base_init(struct amdgpu_device *adev); int sienna_cichlid_reg_base_init(struct amdgpu_device *adev); void vangogh_reg_base_init(struct amdgpu_device *adev); int dimgrey_cavefish_reg_base_init(struct amdgpu_device *adev); +int beige_goby_reg_base_init(struct amdgpu_device *adev); +int yellow_carp_reg_base_init(struct amdgpu_device *adev); +int cyan_skillfish_reg_base_init(struct amdgpu_device *adev); + #endif diff --git a/drivers/gpu/drm/amd/amdgpu/psp_gfx_if.h b/drivers/gpu/drm/amd/amdgpu/psp_gfx_if.h index 96064c343163..dd0dce254901 100644 --- a/drivers/gpu/drm/amd/amdgpu/psp_gfx_if.h +++ b/drivers/gpu/drm/amd/amdgpu/psp_gfx_if.h @@ -97,7 +97,6 @@ enum psp_gfx_cmd_id GFX_CMD_ID_SETUP_VMR = 0x00000009, /* setup VMR region */ GFX_CMD_ID_DESTROY_VMR = 0x0000000A, /* destroy VMR region */ GFX_CMD_ID_PROG_REG = 0x0000000B, /* program regs */ - GFX_CMD_ID_CLEAR_VF_FW = 0x0000000D, /* Clear VF FW, to be used on VF shutdown. */ GFX_CMD_ID_GET_FW_ATTESTATION = 0x0000000F, /* Query GPUVA of the Fw Attestation DB */ /* IDs upto 0x1F are reserved for older programs (Raven, Vega 10/12/20) */ GFX_CMD_ID_LOAD_TOC = 0x00000020, /* Load TOC and obtain TMR size */ @@ -333,11 +332,16 @@ struct psp_gfx_uresp_fwar_db_info uint32_t fwar_db_addr_hi; }; +/* Command-specific response for boot config. */ +struct psp_gfx_uresp_bootcfg { + uint32_t boot_cfg; /* boot config data */ +}; + /* Union of command-specific responses for GPCOM ring. */ -union psp_gfx_uresp -{ - struct psp_gfx_uresp_reserved reserved; - struct psp_gfx_uresp_fwar_db_info fwar_db_info; +union psp_gfx_uresp { + struct psp_gfx_uresp_reserved reserved; + struct psp_gfx_uresp_bootcfg boot_cfg; + struct psp_gfx_uresp_fwar_db_info fwar_db_info; }; /* Structure of GFX Response buffer. diff --git a/drivers/gpu/drm/amd/amdgpu/psp_v10_0.c b/drivers/gpu/drm/amd/amdgpu/psp_v10_0.c index 4b1cc5e9ee92..5872d68ed13d 100644 --- a/drivers/gpu/drm/amd/amdgpu/psp_v10_0.c +++ b/drivers/gpu/drm/amd/amdgpu/psp_v10_0.c @@ -84,29 +84,29 @@ static int psp_v10_0_init_microcode(struct psp_context *psp) ta_hdr = (const struct ta_firmware_header_v1_0 *) adev->psp.ta_fw->data; - adev->psp.ta_hdcp_ucode_version = - le32_to_cpu(ta_hdr->ta_hdcp_ucode_version); - adev->psp.ta_hdcp_ucode_size = - le32_to_cpu(ta_hdr->ta_hdcp_size_bytes); - adev->psp.ta_hdcp_start_addr = + adev->psp.hdcp.feature_version = + le32_to_cpu(ta_hdr->hdcp.fw_version); + adev->psp.hdcp.size_bytes = + le32_to_cpu(ta_hdr->hdcp.size_bytes); + adev->psp.hdcp.start_addr = (uint8_t *)ta_hdr + le32_to_cpu(ta_hdr->header.ucode_array_offset_bytes); - adev->psp.ta_dtm_ucode_version = - le32_to_cpu(ta_hdr->ta_dtm_ucode_version); - adev->psp.ta_dtm_ucode_size = - le32_to_cpu(ta_hdr->ta_dtm_size_bytes); - adev->psp.ta_dtm_start_addr = - (uint8_t *)adev->psp.ta_hdcp_start_addr + - le32_to_cpu(ta_hdr->ta_dtm_offset_bytes); - - adev->psp.ta_securedisplay_ucode_version = - le32_to_cpu(ta_hdr->ta_securedisplay_ucode_version); - adev->psp.ta_securedisplay_ucode_size = - le32_to_cpu(ta_hdr->ta_securedisplay_size_bytes); - adev->psp.ta_securedisplay_start_addr = - (uint8_t *)adev->psp.ta_hdcp_start_addr + - le32_to_cpu(ta_hdr->ta_securedisplay_offset_bytes); + adev->psp.dtm.feature_version = + le32_to_cpu(ta_hdr->dtm.fw_version); + adev->psp.dtm.size_bytes = + le32_to_cpu(ta_hdr->dtm.size_bytes); + adev->psp.dtm.start_addr = + (uint8_t *)adev->psp.hdcp.start_addr + + le32_to_cpu(ta_hdr->dtm.offset_bytes); + + adev->psp.securedisplay.feature_version = + le32_to_cpu(ta_hdr->securedisplay.fw_version); + adev->psp.securedisplay.size_bytes = + le32_to_cpu(ta_hdr->securedisplay.size_bytes); + adev->psp.securedisplay.start_addr = + (uint8_t *)adev->psp.hdcp.start_addr + + le32_to_cpu(ta_hdr->securedisplay.offset_bytes); adev->psp.ta_fw_version = le32_to_cpu(ta_hdr->header.ucode_version); } diff --git a/drivers/gpu/drm/amd/amdgpu/psp_v11_0.c b/drivers/gpu/drm/amd/amdgpu/psp_v11_0.c index 02bba1f3c42e..29bf9f09944b 100644 --- a/drivers/gpu/drm/amd/amdgpu/psp_v11_0.c +++ b/drivers/gpu/drm/amd/amdgpu/psp_v11_0.c @@ -23,6 +23,7 @@ #include <linux/firmware.h> #include <linux/module.h> #include <linux/vmalloc.h> +#include <drm/drm_drv.h> #include "amdgpu.h" #include "amdgpu_psp.h" @@ -63,6 +64,8 @@ MODULE_FIRMWARE("amdgpu/vangogh_asd.bin"); MODULE_FIRMWARE("amdgpu/vangogh_toc.bin"); MODULE_FIRMWARE("amdgpu/dimgrey_cavefish_sos.bin"); MODULE_FIRMWARE("amdgpu/dimgrey_cavefish_ta.bin"); +MODULE_FIRMWARE("amdgpu/beige_goby_sos.bin"); +MODULE_FIRMWARE("amdgpu/beige_goby_ta.bin"); /* address block */ #define smnMP1_FIRMWARE_FLAGS 0x3010024 @@ -77,6 +80,9 @@ MODULE_FIRMWARE("amdgpu/dimgrey_cavefish_ta.bin"); /* For large FW files the time to complete can be very long */ #define USBC_PD_POLLING_LIMIT_S 240 +/* Read USB-PD from LFB */ +#define GFX_CMD_USB_PD_USE_LFB 0x480 + static int psp_v11_0_init_microcode(struct psp_context *psp) { struct amdgpu_device *adev = psp->adev; @@ -115,6 +121,9 @@ static int psp_v11_0_init_microcode(struct psp_context *psp) case CHIP_DIMGREY_CAVEFISH: chip_name = "dimgrey_cavefish"; break; + case CHIP_BEIGE_GOBY: + chip_name = "beige_goby"; + break; default: BUG(); } @@ -142,15 +151,15 @@ static int psp_v11_0_init_microcode(struct psp_context *psp) goto out2; ta_hdr = (const struct ta_firmware_header_v1_0 *)adev->psp.ta_fw->data; - adev->psp.ta_xgmi_ucode_version = le32_to_cpu(ta_hdr->ta_xgmi_ucode_version); - adev->psp.ta_xgmi_ucode_size = le32_to_cpu(ta_hdr->ta_xgmi_size_bytes); - adev->psp.ta_xgmi_start_addr = (uint8_t *)ta_hdr + + adev->psp.xgmi.feature_version = le32_to_cpu(ta_hdr->xgmi.fw_version); + adev->psp.xgmi.size_bytes = le32_to_cpu(ta_hdr->xgmi.size_bytes); + adev->psp.xgmi.start_addr = (uint8_t *)ta_hdr + le32_to_cpu(ta_hdr->header.ucode_array_offset_bytes); adev->psp.ta_fw_version = le32_to_cpu(ta_hdr->header.ucode_version); - adev->psp.ta_ras_ucode_version = le32_to_cpu(ta_hdr->ta_ras_ucode_version); - adev->psp.ta_ras_ucode_size = le32_to_cpu(ta_hdr->ta_ras_size_bytes); - adev->psp.ta_ras_start_addr = (uint8_t *)adev->psp.ta_xgmi_start_addr + - le32_to_cpu(ta_hdr->ta_ras_offset_bytes); + adev->psp.ras.feature_version = le32_to_cpu(ta_hdr->ras.fw_version); + adev->psp.ras.size_bytes = le32_to_cpu(ta_hdr->ras.size_bytes); + adev->psp.ras.start_addr = (uint8_t *)adev->psp.xgmi.start_addr + + le32_to_cpu(ta_hdr->ras.offset_bytes); } break; case CHIP_NAVI10: @@ -177,17 +186,17 @@ static int psp_v11_0_init_microcode(struct psp_context *psp) goto out2; ta_hdr = (const struct ta_firmware_header_v1_0 *)adev->psp.ta_fw->data; - adev->psp.ta_hdcp_ucode_version = le32_to_cpu(ta_hdr->ta_hdcp_ucode_version); - adev->psp.ta_hdcp_ucode_size = le32_to_cpu(ta_hdr->ta_hdcp_size_bytes); - adev->psp.ta_hdcp_start_addr = (uint8_t *)ta_hdr + + adev->psp.hdcp.feature_version = le32_to_cpu(ta_hdr->hdcp.fw_version); + adev->psp.hdcp.size_bytes = le32_to_cpu(ta_hdr->hdcp.size_bytes); + adev->psp.hdcp.start_addr = (uint8_t *)ta_hdr + le32_to_cpu(ta_hdr->header.ucode_array_offset_bytes); adev->psp.ta_fw_version = le32_to_cpu(ta_hdr->header.ucode_version); - adev->psp.ta_dtm_ucode_version = le32_to_cpu(ta_hdr->ta_dtm_ucode_version); - adev->psp.ta_dtm_ucode_size = le32_to_cpu(ta_hdr->ta_dtm_size_bytes); - adev->psp.ta_dtm_start_addr = (uint8_t *)adev->psp.ta_hdcp_start_addr + - le32_to_cpu(ta_hdr->ta_dtm_offset_bytes); + adev->psp.dtm.feature_version = le32_to_cpu(ta_hdr->dtm.fw_version); + adev->psp.dtm.size_bytes = le32_to_cpu(ta_hdr->dtm.size_bytes); + adev->psp.dtm.start_addr = (uint8_t *)adev->psp.hdcp.start_addr + + le32_to_cpu(ta_hdr->dtm.offset_bytes); } break; case CHIP_SIENNA_CICHLID: @@ -200,6 +209,14 @@ static int psp_v11_0_init_microcode(struct psp_context *psp) if (err) return err; break; + case CHIP_BEIGE_GOBY: + err = psp_init_sos_microcode(psp, chip_name); + if (err) + return err; + err = psp_init_ta_microcode(psp, chip_name); + if (err) + return err; + break; case CHIP_VANGOGH: err = psp_init_asd_microcode(psp, chip_name); if (err) @@ -269,10 +286,8 @@ static int psp_v11_0_bootloader_load_kdb(struct psp_context *psp) if (ret) return ret; - memset(psp->fw_pri_buf, 0, PSP_1_MEG); - /* Copy PSP KDB binary to memory */ - memcpy(psp->fw_pri_buf, psp->kdb_start_addr, psp->kdb_bin_size); + psp_copy_fw(psp, psp->kdb.start_addr, psp->kdb.size_bytes); /* Provide the PSP KDB to bootloader */ WREG32_SOC15(MP0, 0, mmMP0_SMN_C2PMSG_36, @@ -302,10 +317,8 @@ static int psp_v11_0_bootloader_load_spl(struct psp_context *psp) if (ret) return ret; - memset(psp->fw_pri_buf, 0, PSP_1_MEG); - /* Copy PSP SPL binary to memory */ - memcpy(psp->fw_pri_buf, psp->spl_start_addr, psp->spl_bin_size); + psp_copy_fw(psp, psp->spl.start_addr, psp->spl.size_bytes); /* Provide the PSP SPL to bootloader */ WREG32_SOC15(MP0, 0, mmMP0_SMN_C2PMSG_36, @@ -335,10 +348,8 @@ static int psp_v11_0_bootloader_load_sysdrv(struct psp_context *psp) if (ret) return ret; - memset(psp->fw_pri_buf, 0, PSP_1_MEG); - /* Copy PSP System Driver binary to memory */ - memcpy(psp->fw_pri_buf, psp->sys_start_addr, psp->sys_bin_size); + psp_copy_fw(psp, psp->sys.start_addr, psp->sys.size_bytes); /* Provide the sys driver to bootloader */ WREG32_SOC15(MP0, 0, mmMP0_SMN_C2PMSG_36, @@ -371,10 +382,8 @@ static int psp_v11_0_bootloader_load_sos(struct psp_context *psp) if (ret) return ret; - memset(psp->fw_pri_buf, 0, PSP_1_MEG); - /* Copy Secure OS binary to PSP memory */ - memcpy(psp->fw_pri_buf, psp->sos_start_addr, psp->sos_bin_size); + psp_copy_fw(psp, psp->sos.start_addr, psp->sos.size_bytes); /* Provide the PSP secure OS to bootloader */ WREG32_SOC15(MP0, 0, mmMP0_SMN_C2PMSG_36, @@ -455,6 +464,7 @@ static int psp_v11_0_ring_create(struct psp_context *psp, struct amdgpu_device *adev = psp->adev; if (amdgpu_sriov_vf(adev)) { + ring->ring_wptr = 0; ret = psp_v11_0_ring_stop(psp, ring_type); if (ret) { DRM_ERROR("psp_v11_0_ring_stop_sriov failed!\n"); @@ -608,7 +618,7 @@ static int psp_v11_0_memory_training(struct psp_context *psp, uint32_t ops) uint32_t p2c_header[4]; uint32_t sz; void *buf; - int ret; + int ret, idx; if (ctx->init == PSP_MEM_TRAIN_NOT_SUPPORT) { DRM_DEBUG("Memory training is not supported.\n"); @@ -681,17 +691,24 @@ static int psp_v11_0_memory_training(struct psp_context *psp, uint32_t ops) return -ENOMEM; } - memcpy_fromio(buf, adev->mman.aper_base_kaddr, sz); - ret = psp_v11_0_memory_training_send_msg(psp, PSP_BL__DRAM_LONG_TRAIN); - if (ret) { - DRM_ERROR("Send long training msg failed.\n"); + if (drm_dev_enter(&adev->ddev, &idx)) { + memcpy_fromio(buf, adev->mman.aper_base_kaddr, sz); + ret = psp_v11_0_memory_training_send_msg(psp, PSP_BL__DRAM_LONG_TRAIN); + if (ret) { + DRM_ERROR("Send long training msg failed.\n"); + vfree(buf); + drm_dev_exit(idx); + return ret; + } + + memcpy_toio(adev->mman.aper_base_kaddr, buf, sz); + adev->hdp.funcs->flush_hdp(adev, NULL); vfree(buf); - return ret; + drm_dev_exit(idx); + } else { + vfree(buf); + return -ENODEV; } - - memcpy_toio(adev->mman.aper_base_kaddr, buf, sz); - adev->hdp.funcs->flush_hdp(adev, NULL); - vfree(buf); } if (ops & PSP_MEM_TRAIN_SAVE) { @@ -739,44 +756,26 @@ static void psp_v11_0_ring_set_wptr(struct psp_context *psp, uint32_t value) WREG32_SOC15(MP0, 0, mmMP0_SMN_C2PMSG_67, value); } -static int psp_v11_0_load_usbc_pd_fw(struct psp_context *psp, dma_addr_t dma_addr) +static int psp_v11_0_load_usbc_pd_fw(struct psp_context *psp, uint64_t fw_pri_mc_addr) { struct amdgpu_device *adev = psp->adev; uint32_t reg_status; int ret, i = 0; - /* Write lower 32-bit address of the PD Controller FW */ - WREG32_SOC15(MP0, 0, mmMP0_SMN_C2PMSG_36, lower_32_bits(dma_addr)); - ret = psp_wait_for(psp, SOC15_REG_OFFSET(MP0, 0, mmMP0_SMN_C2PMSG_35), - 0x80000000, 0x80000000, false); - if (ret) - return ret; - - /* Fireup interrupt so PSP can pick up the lower address */ - WREG32_SOC15(MP0, 0, mmMP0_SMN_C2PMSG_35, 0x800000); - ret = psp_wait_for(psp, SOC15_REG_OFFSET(MP0, 0, mmMP0_SMN_C2PMSG_35), - 0x80000000, 0x80000000, false); - if (ret) - return ret; - - reg_status = RREG32_SOC15(MP0, 0, mmMP0_SMN_C2PMSG_35); - - if ((reg_status & 0xFFFF) != 0) { - DRM_ERROR("Lower address load failed - MP0_SMN_C2PMSG_35.Bits [15:0] = %02x...\n", - reg_status & 0xFFFF); - return -EIO; - } - - /* Write upper 32-bit address of the PD Controller FW */ - WREG32_SOC15(MP0, 0, mmMP0_SMN_C2PMSG_36, upper_32_bits(dma_addr)); + /* + * LFB address which is aligned to 1MB address and has to be + * right-shifted by 20 so that LFB address can be passed on a 32-bit C2P + * register + */ + WREG32_SOC15(MP0, 0, mmMP0_SMN_C2PMSG_36, (fw_pri_mc_addr >> 20)); ret = psp_wait_for(psp, SOC15_REG_OFFSET(MP0, 0, mmMP0_SMN_C2PMSG_35), 0x80000000, 0x80000000, false); if (ret) return ret; - /* Fireup interrupt so PSP can pick up the upper address */ - WREG32_SOC15(MP0, 0, mmMP0_SMN_C2PMSG_35, 0x4000000); + /* Fireup interrupt so PSP can pick up the address */ + WREG32_SOC15(MP0, 0, mmMP0_SMN_C2PMSG_35, (GFX_CMD_USB_PD_USE_LFB << 16)); /* FW load takes very long time */ do { @@ -792,7 +791,7 @@ static int psp_v11_0_load_usbc_pd_fw(struct psp_context *psp, dma_addr_t dma_add done: if ((reg_status & 0xFFFF) != 0) { - DRM_ERROR("Upper address load failed - MP0_SMN_C2PMSG_35.Bits [15:0] = x%04x\n", + DRM_ERROR("Address load failed - MP0_SMN_C2PMSG_35.Bits [15:0] = 0x%04x\n", reg_status & 0xFFFF); return -EIO; } diff --git a/drivers/gpu/drm/amd/amdgpu/psp_v11_0_8.c b/drivers/gpu/drm/amd/amdgpu/psp_v11_0_8.c new file mode 100644 index 000000000000..ff13e1beb49b --- /dev/null +++ b/drivers/gpu/drm/amd/amdgpu/psp_v11_0_8.c @@ -0,0 +1,208 @@ +/* + * Copyright 2021 Advanced Micro Devices, Inc. + * + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and associated documentation files (the "Software"), + * to deal in the Software without restriction, including without limitation + * the rights to use, copy, modify, merge, publish, distribute, sublicense, + * and/or sell copies of the Software, and to permit persons to whom the + * Software is furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included in + * all copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL + * THE COPYRIGHT HOLDER(S) OR AUTHOR(S) BE LIABLE FOR ANY CLAIM, DAMAGES OR + * OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, + * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR + * OTHER DEALINGS IN THE SOFTWARE. + * + */ +#include "amdgpu.h" +#include "amdgpu_psp.h" +#include "amdgpu_ucode.h" +#include "soc15_common.h" +#include "psp_v11_0_8.h" + +#include "mp/mp_11_0_8_offset.h" + +static int psp_v11_0_8_ring_init(struct psp_context *psp, + enum psp_ring_type ring_type) +{ + int ret = 0; + struct psp_ring *ring; + struct amdgpu_device *adev = psp->adev; + + ring = &psp->km_ring; + + ring->ring_type = ring_type; + + /* allocate 4k Page of Local Frame Buffer memory for ring */ + ring->ring_size = 0x1000; + ret = amdgpu_bo_create_kernel(adev, ring->ring_size, PAGE_SIZE, + AMDGPU_GEM_DOMAIN_VRAM, + &adev->firmware.rbuf, + &ring->ring_mem_mc_addr, + (void **)&ring->ring_mem); + if (ret) { + ring->ring_size = 0; + return ret; + } + + return 0; +} + +static int psp_v11_0_8_ring_stop(struct psp_context *psp, + enum psp_ring_type ring_type) +{ + int ret = 0; + struct amdgpu_device *adev = psp->adev; + + if (amdgpu_sriov_vf(adev)) { + /* Write the ring destroy command*/ + WREG32_SOC15(MP0, 0, mmMP0_SMN_C2PMSG_101, + GFX_CTRL_CMD_ID_DESTROY_GPCOM_RING); + /* there might be handshake issue with hardware which needs delay */ + mdelay(20); + /* Wait for response flag (bit 31) */ + ret = psp_wait_for(psp, SOC15_REG_OFFSET(MP0, 0, mmMP0_SMN_C2PMSG_101), + 0x80000000, 0x80000000, false); + } else { + /* Write the ring destroy command*/ + WREG32_SOC15(MP0, 0, mmMP0_SMN_C2PMSG_64, + GFX_CTRL_CMD_ID_DESTROY_RINGS); + /* there might be handshake issue with hardware which needs delay */ + mdelay(20); + /* Wait for response flag (bit 31) */ + ret = psp_wait_for(psp, SOC15_REG_OFFSET(MP0, 0, mmMP0_SMN_C2PMSG_64), + 0x80000000, 0x80000000, false); + } + + return ret; +} + +static int psp_v11_0_8_ring_create(struct psp_context *psp, + enum psp_ring_type ring_type) +{ + int ret = 0; + unsigned int psp_ring_reg = 0; + struct psp_ring *ring = &psp->km_ring; + struct amdgpu_device *adev = psp->adev; + + if (amdgpu_sriov_vf(adev)) { + ret = psp_v11_0_8_ring_stop(psp, ring_type); + if (ret) { + DRM_ERROR("psp_v11_0_8_ring_stop_sriov failed!\n"); + return ret; + } + + /* Write low address of the ring to C2PMSG_102 */ + psp_ring_reg = lower_32_bits(ring->ring_mem_mc_addr); + WREG32_SOC15(MP0, 0, mmMP0_SMN_C2PMSG_102, psp_ring_reg); + /* Write high address of the ring to C2PMSG_103 */ + psp_ring_reg = upper_32_bits(ring->ring_mem_mc_addr); + WREG32_SOC15(MP0, 0, mmMP0_SMN_C2PMSG_103, psp_ring_reg); + + /* Write the ring initialization command to C2PMSG_101 */ + WREG32_SOC15(MP0, 0, mmMP0_SMN_C2PMSG_101, + GFX_CTRL_CMD_ID_INIT_GPCOM_RING); + + /* there might be handshake issue with hardware which needs delay */ + mdelay(20); + + /* Wait for response flag (bit 31) in C2PMSG_101 */ + ret = psp_wait_for(psp, SOC15_REG_OFFSET(MP0, 0, mmMP0_SMN_C2PMSG_101), + 0x80000000, 0x8000FFFF, false); + + } else { + /* Wait for sOS ready for ring creation */ + ret = psp_wait_for(psp, SOC15_REG_OFFSET(MP0, 0, mmMP0_SMN_C2PMSG_64), + 0x80000000, 0x80000000, false); + if (ret) { + DRM_ERROR("Failed to wait for trust OS ready for ring creation\n"); + return ret; + } + + /* Write low address of the ring to C2PMSG_69 */ + psp_ring_reg = lower_32_bits(ring->ring_mem_mc_addr); + WREG32_SOC15(MP0, 0, mmMP0_SMN_C2PMSG_69, psp_ring_reg); + /* Write high address of the ring to C2PMSG_70 */ + psp_ring_reg = upper_32_bits(ring->ring_mem_mc_addr); + WREG32_SOC15(MP0, 0, mmMP0_SMN_C2PMSG_70, psp_ring_reg); + /* Write size of ring to C2PMSG_71 */ + psp_ring_reg = ring->ring_size; + WREG32_SOC15(MP0, 0, mmMP0_SMN_C2PMSG_71, psp_ring_reg); + /* Write the ring initialization command to C2PMSG_64 */ + psp_ring_reg = ring_type; + psp_ring_reg = psp_ring_reg << 16; + WREG32_SOC15(MP0, 0, mmMP0_SMN_C2PMSG_64, psp_ring_reg); + + /* there might be handshake issue with hardware which needs delay */ + mdelay(20); + + /* Wait for response flag (bit 31) in C2PMSG_64 */ + ret = psp_wait_for(psp, SOC15_REG_OFFSET(MP0, 0, mmMP0_SMN_C2PMSG_64), + 0x80000000, 0x8000FFFF, false); + } + + return ret; +} + +static int psp_v11_0_8_ring_destroy(struct psp_context *psp, + enum psp_ring_type ring_type) +{ + int ret = 0; + struct psp_ring *ring = &psp->km_ring; + struct amdgpu_device *adev = psp->adev; + + ret = psp_v11_0_8_ring_stop(psp, ring_type); + if (ret) + DRM_ERROR("Fail to stop psp ring\n"); + + amdgpu_bo_free_kernel(&adev->firmware.rbuf, + &ring->ring_mem_mc_addr, + (void **)&ring->ring_mem); + + return ret; +} + +static uint32_t psp_v11_0_8_ring_get_wptr(struct psp_context *psp) +{ + uint32_t data; + struct amdgpu_device *adev = psp->adev; + + if (amdgpu_sriov_vf(adev)) + data = RREG32_SOC15(MP0, 0, mmMP0_SMN_C2PMSG_102); + else + data = RREG32_SOC15(MP0, 0, mmMP0_SMN_C2PMSG_67); + + return data; +} + +static void psp_v11_0_8_ring_set_wptr(struct psp_context *psp, uint32_t value) +{ + struct amdgpu_device *adev = psp->adev; + + if (amdgpu_sriov_vf(adev)) { + WREG32_SOC15(MP0, 0, mmMP0_SMN_C2PMSG_102, value); + WREG32_SOC15(MP0, 0, mmMP0_SMN_C2PMSG_101, + GFX_CTRL_CMD_ID_CONSUME_CMD); + } else + WREG32_SOC15(MP0, 0, mmMP0_SMN_C2PMSG_67, value); +} + +static const struct psp_funcs psp_v11_0_8_funcs = { + .ring_init = psp_v11_0_8_ring_init, + .ring_create = psp_v11_0_8_ring_create, + .ring_stop = psp_v11_0_8_ring_stop, + .ring_destroy = psp_v11_0_8_ring_destroy, + .ring_get_wptr = psp_v11_0_8_ring_get_wptr, + .ring_set_wptr = psp_v11_0_8_ring_set_wptr, +}; + +void psp_v11_0_8_set_psp_funcs(struct psp_context *psp) +{ + psp->funcs = &psp_v11_0_8_funcs; +} diff --git a/drivers/gpu/drm/amd/amdgpu/dce_virtual.h b/drivers/gpu/drm/amd/amdgpu/psp_v11_0_8.h index ed422012c8c6..890377a5afe0 100644 --- a/drivers/gpu/drm/amd/amdgpu/dce_virtual.h +++ b/drivers/gpu/drm/amd/amdgpu/psp_v11_0_8.h @@ -1,5 +1,5 @@ /* - * Copyright 2014 Advanced Micro Devices, Inc. + * Copyright 2021 Advanced Micro Devices, Inc. * * Permission is hereby granted, free of charge, to any person obtaining a * copy of this software and associated documentation files (the "Software"), @@ -20,11 +20,11 @@ * OTHER DEALINGS IN THE SOFTWARE. * */ +#ifndef __PSP_V11_0_8_H__ +#define __PSP_V11_0_8_H__ -#ifndef __DCE_VIRTUAL_H__ -#define __DCE_VIRTUAL_H__ +#include "amdgpu_psp.h" -extern const struct amdgpu_ip_block_version dce_virtual_ip_block; +void psp_v11_0_8_set_psp_funcs(struct psp_context *psp); #endif - diff --git a/drivers/gpu/drm/amd/amdgpu/psp_v12_0.c b/drivers/gpu/drm/amd/amdgpu/psp_v12_0.c index c4828bd3264b..cc649406234b 100644 --- a/drivers/gpu/drm/amd/amdgpu/psp_v12_0.c +++ b/drivers/gpu/drm/amd/amdgpu/psp_v12_0.c @@ -67,7 +67,7 @@ static int psp_v12_0_init_microcode(struct psp_context *psp) err = psp_init_asd_microcode(psp, chip_name); if (err) - goto out; + return err; snprintf(fw_name, sizeof(fw_name), "amdgpu/%s_ta.bin", chip_name); err = request_firmware(&adev->psp.ta_fw, fw_name, adev->dev); @@ -80,35 +80,34 @@ static int psp_v12_0_init_microcode(struct psp_context *psp) } else { err = amdgpu_ucode_validate(adev->psp.ta_fw); if (err) - goto out2; + goto out; ta_hdr = (const struct ta_firmware_header_v1_0 *) adev->psp.ta_fw->data; - adev->psp.ta_hdcp_ucode_version = - le32_to_cpu(ta_hdr->ta_hdcp_ucode_version); - adev->psp.ta_hdcp_ucode_size = - le32_to_cpu(ta_hdr->ta_hdcp_size_bytes); - adev->psp.ta_hdcp_start_addr = + adev->psp.hdcp.feature_version = + le32_to_cpu(ta_hdr->hdcp.fw_version); + adev->psp.hdcp.size_bytes = + le32_to_cpu(ta_hdr->hdcp.size_bytes); + adev->psp.hdcp.start_addr = (uint8_t *)ta_hdr + le32_to_cpu(ta_hdr->header.ucode_array_offset_bytes); adev->psp.ta_fw_version = le32_to_cpu(ta_hdr->header.ucode_version); - adev->psp.ta_dtm_ucode_version = - le32_to_cpu(ta_hdr->ta_dtm_ucode_version); - adev->psp.ta_dtm_ucode_size = - le32_to_cpu(ta_hdr->ta_dtm_size_bytes); - adev->psp.ta_dtm_start_addr = - (uint8_t *)adev->psp.ta_hdcp_start_addr + - le32_to_cpu(ta_hdr->ta_dtm_offset_bytes); + adev->psp.dtm.feature_version = + le32_to_cpu(ta_hdr->dtm.fw_version); + adev->psp.dtm.size_bytes = + le32_to_cpu(ta_hdr->dtm.size_bytes); + adev->psp.dtm.start_addr = + (uint8_t *)adev->psp.hdcp.start_addr + + le32_to_cpu(ta_hdr->dtm.offset_bytes); } return 0; -out2: +out: release_firmware(adev->psp.ta_fw); adev->psp.ta_fw = NULL; -out: if (err) { dev_err(adev->dev, "psp v12.0: Failed to load firmware \"%s\"\n", @@ -138,10 +137,8 @@ static int psp_v12_0_bootloader_load_sysdrv(struct psp_context *psp) if (ret) return ret; - memset(psp->fw_pri_buf, 0, PSP_1_MEG); - /* Copy PSP System Driver binary to memory */ - memcpy(psp->fw_pri_buf, psp->sys_start_addr, psp->sys_bin_size); + psp_copy_fw(psp, psp->sys.start_addr, psp->sys.size_bytes); /* Provide the sys driver to bootloader */ WREG32_SOC15(MP0, 0, mmMP0_SMN_C2PMSG_36, @@ -179,10 +176,8 @@ static int psp_v12_0_bootloader_load_sos(struct psp_context *psp) if (ret) return ret; - memset(psp->fw_pri_buf, 0, PSP_1_MEG); - /* Copy Secure OS binary to PSP memory */ - memcpy(psp->fw_pri_buf, psp->sos_start_addr, psp->sos_bin_size); + psp_copy_fw(psp, psp->sos.start_addr, psp->sos.size_bytes); /* Provide the PSP secure OS to bootloader */ WREG32_SOC15(MP0, 0, mmMP0_SMN_C2PMSG_36, diff --git a/drivers/gpu/drm/amd/amdgpu/psp_v13_0.c b/drivers/gpu/drm/amd/amdgpu/psp_v13_0.c index fcdce46445d6..47a500f64db2 100644 --- a/drivers/gpu/drm/amd/amdgpu/psp_v13_0.c +++ b/drivers/gpu/drm/amd/amdgpu/psp_v13_0.c @@ -31,6 +31,15 @@ MODULE_FIRMWARE("amdgpu/aldebaran_sos.bin"); MODULE_FIRMWARE("amdgpu/aldebaran_ta.bin"); +MODULE_FIRMWARE("amdgpu/yellow_carp_asd.bin"); +MODULE_FIRMWARE("amdgpu/yellow_carp_toc.bin"); +MODULE_FIRMWARE("amdgpu/yellow_carp_ta.bin"); + +/* For large FW files the time to complete can be very long */ +#define USBC_PD_POLLING_LIMIT_S 240 + +/* Read USB-PD from LFB */ +#define GFX_CMD_USB_PD_USE_LFB 0x480 static int psp_v13_0_init_microcode(struct psp_context *psp) { @@ -42,17 +51,37 @@ static int psp_v13_0_init_microcode(struct psp_context *psp) case CHIP_ALDEBARAN: chip_name = "aldebaran"; break; + case CHIP_YELLOW_CARP: + chip_name = "yellow_carp"; + break; + default: + BUG(); + } + switch (adev->asic_type) { + case CHIP_ALDEBARAN: + err = psp_init_sos_microcode(psp, chip_name); + if (err) + return err; + err = psp_init_ta_microcode(&adev->psp, chip_name); + if (err) + return err; + break; + case CHIP_YELLOW_CARP: + err = psp_init_asd_microcode(psp, chip_name); + if (err) + return err; + err = psp_init_toc_microcode(psp, chip_name); + if (err) + return err; + err = psp_init_ta_microcode(psp, chip_name); + if (err) + return err; + break; default: BUG(); } - err = psp_init_sos_microcode(psp, chip_name); - if (err) - return err; - - err = psp_init_ta_microcode(&adev->psp, chip_name); - - return err; + return 0; } static bool psp_v13_0_is_sos_alive(struct psp_context *psp) @@ -88,7 +117,9 @@ static int psp_v13_0_wait_for_bootloader(struct psp_context *psp) return ret; } -static int psp_v13_0_bootloader_load_kdb(struct psp_context *psp) +static int psp_v13_0_bootloader_load_component(struct psp_context *psp, + struct psp_bin_desc *bin_desc, + enum psp_bootloader_cmd bl_cmd) { int ret; uint32_t psp_gfxdrv_command_reg = 0; @@ -107,12 +138,12 @@ static int psp_v13_0_bootloader_load_kdb(struct psp_context *psp) memset(psp->fw_pri_buf, 0, PSP_1_MEG); /* Copy PSP KDB binary to memory */ - memcpy(psp->fw_pri_buf, psp->kdb_start_addr, psp->kdb_bin_size); + memcpy(psp->fw_pri_buf, bin_desc->start_addr, bin_desc->size_bytes); /* Provide the PSP KDB to bootloader */ WREG32_SOC15(MP0, 0, regMP0_SMN_C2PMSG_36, (uint32_t)(psp->fw_pri_mc_addr >> 20)); - psp_gfxdrv_command_reg = PSP_BL__LOAD_KEY_DATABASE; + psp_gfxdrv_command_reg = bl_cmd; WREG32_SOC15(MP0, 0, regMP0_SMN_C2PMSG_35, psp_gfxdrv_command_reg); @@ -121,40 +152,29 @@ static int psp_v13_0_bootloader_load_kdb(struct psp_context *psp) return ret; } -static int psp_v13_0_bootloader_load_sysdrv(struct psp_context *psp) +static int psp_v13_0_bootloader_load_kdb(struct psp_context *psp) { - int ret; - uint32_t psp_gfxdrv_command_reg = 0; - struct amdgpu_device *adev = psp->adev; - - /* Check sOS sign of life register to confirm sys driver and sOS - * are already been loaded. - */ - if (psp_v13_0_is_sos_alive(psp)) - return 0; - - ret = psp_v13_0_wait_for_bootloader(psp); - if (ret) - return ret; - - memset(psp->fw_pri_buf, 0, PSP_1_MEG); - - /* Copy PSP System Driver binary to memory */ - memcpy(psp->fw_pri_buf, psp->sys_start_addr, psp->sys_bin_size); + return psp_v13_0_bootloader_load_component(psp, &psp->kdb, PSP_BL__LOAD_KEY_DATABASE); +} - /* Provide the sys driver to bootloader */ - WREG32_SOC15(MP0, 0, regMP0_SMN_C2PMSG_36, - (uint32_t)(psp->fw_pri_mc_addr >> 20)); - psp_gfxdrv_command_reg = PSP_BL__LOAD_SYSDRV; - WREG32_SOC15(MP0, 0, regMP0_SMN_C2PMSG_35, - psp_gfxdrv_command_reg); +static int psp_v13_0_bootloader_load_sysdrv(struct psp_context *psp) +{ + return psp_v13_0_bootloader_load_component(psp, &psp->sys, PSP_BL__LOAD_SYSDRV); +} - /* there might be handshake issue with hardware which needs delay */ - mdelay(20); +static int psp_v13_0_bootloader_load_soc_drv(struct psp_context *psp) +{ + return psp_v13_0_bootloader_load_component(psp, &psp->soc_drv, PSP_BL__LOAD_SOCDRV); +} - ret = psp_v13_0_wait_for_bootloader(psp); +static int psp_v13_0_bootloader_load_intf_drv(struct psp_context *psp) +{ + return psp_v13_0_bootloader_load_component(psp, &psp->intf_drv, PSP_BL__LOAD_INTFDRV); +} - return ret; +static int psp_v13_0_bootloader_load_dbg_drv(struct psp_context *psp) +{ + return psp_v13_0_bootloader_load_component(psp, &psp->dbg_drv, PSP_BL__LOAD_DBGDRV); } static int psp_v13_0_bootloader_load_sos(struct psp_context *psp) @@ -176,7 +196,7 @@ static int psp_v13_0_bootloader_load_sos(struct psp_context *psp) memset(psp->fw_pri_buf, 0, PSP_1_MEG); /* Copy Secure OS binary to PSP memory */ - memcpy(psp->fw_pri_buf, psp->sos_start_addr, psp->sos_bin_size); + memcpy(psp->fw_pri_buf, psp->sos.start_addr, psp->sos.size_bytes); /* Provide the PSP secure OS to bootloader */ WREG32_SOC15(MP0, 0, regMP0_SMN_C2PMSG_36, @@ -359,10 +379,71 @@ static void psp_v13_0_ring_set_wptr(struct psp_context *psp, uint32_t value) WREG32_SOC15(MP0, 0, regMP0_SMN_C2PMSG_67, value); } +static int psp_v13_0_load_usbc_pd_fw(struct psp_context *psp, uint64_t fw_pri_mc_addr) +{ + struct amdgpu_device *adev = psp->adev; + uint32_t reg_status; + int ret, i = 0; + + /* + * LFB address which is aligned to 1MB address and has to be + * right-shifted by 20 so that LFB address can be passed on a 32-bit C2P + * register + */ + WREG32_SOC15(MP0, 0, regMP0_SMN_C2PMSG_36, (fw_pri_mc_addr >> 20)); + + ret = psp_wait_for(psp, SOC15_REG_OFFSET(MP0, 0, regMP0_SMN_C2PMSG_35), + 0x80000000, 0x80000000, false); + if (ret) + return ret; + + /* Fireup interrupt so PSP can pick up the address */ + WREG32_SOC15(MP0, 0, regMP0_SMN_C2PMSG_35, (GFX_CMD_USB_PD_USE_LFB << 16)); + + /* FW load takes very long time */ + do { + msleep(1000); + reg_status = RREG32_SOC15(MP0, 0, regMP0_SMN_C2PMSG_35); + + if (reg_status & 0x80000000) + goto done; + + } while (++i < USBC_PD_POLLING_LIMIT_S); + + return -ETIME; +done: + + if ((reg_status & 0xFFFF) != 0) { + DRM_ERROR("Address load failed - MP0_SMN_C2PMSG_35.Bits [15:0] = %04x\n", + reg_status & 0xFFFF); + return -EIO; + } + + return 0; +} + +static int psp_v13_0_read_usbc_pd_fw(struct psp_context *psp, uint32_t *fw_ver) +{ + struct amdgpu_device *adev = psp->adev; + int ret; + + WREG32_SOC15(MP0, 0, regMP0_SMN_C2PMSG_35, C2PMSG_CMD_GFX_USB_PD_FW_VER); + + ret = psp_wait_for(psp, SOC15_REG_OFFSET(MP0, 0, regMP0_SMN_C2PMSG_35), + 0x80000000, 0x80000000, false); + if (!ret) + *fw_ver = RREG32_SOC15(MP0, 0, regMP0_SMN_C2PMSG_36); + + return ret; +} + static const struct psp_funcs psp_v13_0_funcs = { .init_microcode = psp_v13_0_init_microcode, .bootloader_load_kdb = psp_v13_0_bootloader_load_kdb, .bootloader_load_sysdrv = psp_v13_0_bootloader_load_sysdrv, + .bootloader_load_soc_drv = psp_v13_0_bootloader_load_soc_drv, + .bootloader_load_intf_drv = psp_v13_0_bootloader_load_intf_drv, + .bootloader_load_dbg_drv = psp_v13_0_bootloader_load_dbg_drv, .bootloader_load_sos = psp_v13_0_bootloader_load_sos, .ring_init = psp_v13_0_ring_init, .ring_create = psp_v13_0_ring_create, @@ -370,6 +451,8 @@ static const struct psp_funcs psp_v13_0_funcs = { .ring_destroy = psp_v13_0_ring_destroy, .ring_get_wptr = psp_v13_0_ring_get_wptr, .ring_set_wptr = psp_v13_0_ring_set_wptr, + .load_usbc_pd_fw = psp_v13_0_load_usbc_pd_fw, + .read_usbc_pd_fw = psp_v13_0_read_usbc_pd_fw }; void psp_v13_0_set_psp_funcs(struct psp_context *psp) diff --git a/drivers/gpu/drm/amd/amdgpu/psp_v3_1.c b/drivers/gpu/drm/amd/amdgpu/psp_v3_1.c index 908664a5774b..1ed357cb0f49 100644 --- a/drivers/gpu/drm/amd/amdgpu/psp_v3_1.c +++ b/drivers/gpu/drm/amd/amdgpu/psp_v3_1.c @@ -102,10 +102,8 @@ static int psp_v3_1_bootloader_load_sysdrv(struct psp_context *psp) if (ret) return ret; - memset(psp->fw_pri_buf, 0, PSP_1_MEG); - /* Copy PSP System Driver binary to memory */ - memcpy(psp->fw_pri_buf, psp->sys_start_addr, psp->sys_bin_size); + psp_copy_fw(psp, psp->sys.start_addr, psp->sys.size_bytes); /* Provide the sys driver to bootloader */ WREG32_SOC15(MP0, 0, mmMP0_SMN_C2PMSG_36, @@ -143,10 +141,8 @@ static int psp_v3_1_bootloader_load_sos(struct psp_context *psp) if (ret) return ret; - memset(psp->fw_pri_buf, 0, PSP_1_MEG); - /* Copy Secure OS binary to PSP memory */ - memcpy(psp->fw_pri_buf, psp->sos_start_addr, psp->sos_bin_size); + psp_copy_fw(psp, psp->sos.start_addr, psp->sos.size_bytes); /* Provide the PSP secure OS to bootloader */ WREG32_SOC15(MP0, 0, mmMP0_SMN_C2PMSG_36, @@ -231,6 +227,7 @@ static int psp_v3_1_ring_create(struct psp_context *psp, psp_v3_1_reroute_ih(psp); if (amdgpu_sriov_vf(adev)) { + ring->ring_wptr = 0; ret = psp_v3_1_ring_stop(psp, ring_type); if (ret) { DRM_ERROR("psp_v3_1_ring_stop_sriov failed!\n"); diff --git a/drivers/gpu/drm/amd/amdgpu/sdma_v2_4.c b/drivers/gpu/drm/amd/amdgpu/sdma_v2_4.c index 9f0dda040ec8..4509bd4cce2d 100644 --- a/drivers/gpu/drm/amd/amdgpu/sdma_v2_4.c +++ b/drivers/gpu/drm/amd/amdgpu/sdma_v2_4.c @@ -271,7 +271,7 @@ static void sdma_v2_4_ring_emit_ib(struct amdgpu_ring *ring, } /** - * sdma_v2_4_hdp_flush_ring_emit - emit an hdp flush on the DMA ring + * sdma_v2_4_ring_emit_hdp_flush - emit an hdp flush on the DMA ring * * @ring: amdgpu ring pointer * diff --git a/drivers/gpu/drm/amd/amdgpu/sdma_v4_0.c b/drivers/gpu/drm/amd/amdgpu/sdma_v4_0.c index 5715be6770ec..8931000dcd41 100644 --- a/drivers/gpu/drm/amd/amdgpu/sdma_v4_0.c +++ b/drivers/gpu/drm/amd/amdgpu/sdma_v4_0.c @@ -144,7 +144,7 @@ static const struct soc15_reg_golden golden_settings_sdma_4_1[] = { SOC15_REG_GOLDEN_VALUE(SDMA0, 0, mmSDMA0_RLC0_RB_WPTR_POLL_CNTL, 0xfffffff7, 0x00403000), SOC15_REG_GOLDEN_VALUE(SDMA0, 0, mmSDMA0_RLC1_IB_CNTL, 0x800f0111, 0x00000100), SOC15_REG_GOLDEN_VALUE(SDMA0, 0, mmSDMA0_RLC1_RB_WPTR_POLL_CNTL, 0xfffffff7, 0x00403000), - SOC15_REG_GOLDEN_VALUE(SDMA0, 0, mmSDMA0_UTCL1_PAGE, 0x000003ff, 0x000003c0), + SOC15_REG_GOLDEN_VALUE(SDMA0, 0, mmSDMA0_UTCL1_PAGE, 0x000003ff, 0x000003e0), SOC15_REG_GOLDEN_VALUE(SDMA0, 0, mmSDMA0_UTCL1_WATERMK, 0xfc000000, 0x00000000) }; @@ -288,7 +288,7 @@ static const struct soc15_reg_golden golden_settings_sdma_4_3[] = { SOC15_REG_GOLDEN_VALUE(SDMA0, 0, mmSDMA0_POWER_CNTL, 0x003fff07, 0x40000051), SOC15_REG_GOLDEN_VALUE(SDMA0, 0, mmSDMA0_RLC0_RB_WPTR_POLL_CNTL, 0xfffffff7, 0x00403000), SOC15_REG_GOLDEN_VALUE(SDMA0, 0, mmSDMA0_RLC1_RB_WPTR_POLL_CNTL, 0xfffffff7, 0x00403000), - SOC15_REG_GOLDEN_VALUE(SDMA0, 0, mmSDMA0_UTCL1_PAGE, 0x000003ff, 0x000003c0), + SOC15_REG_GOLDEN_VALUE(SDMA0, 0, mmSDMA0_UTCL1_PAGE, 0x000003ff, 0x000003e0), SOC15_REG_GOLDEN_VALUE(SDMA0, 0, mmSDMA0_UTCL1_WATERMK, 0xfc000000, 0x03fbe1fe) }; @@ -754,7 +754,7 @@ static uint64_t sdma_v4_0_ring_get_wptr(struct amdgpu_ring *ring) } /** - * sdma_v4_0_page_ring_set_wptr - commit the write pointer + * sdma_v4_0_ring_set_wptr - commit the write pointer * * @ring: amdgpu ring pointer * @@ -820,7 +820,7 @@ static uint64_t sdma_v4_0_page_ring_get_wptr(struct amdgpu_ring *ring) } /** - * sdma_v4_0_ring_set_wptr - commit the write pointer + * sdma_v4_0_page_ring_set_wptr - commit the write pointer * * @ring: amdgpu ring pointer * @@ -1109,6 +1109,8 @@ static void sdma_v4_0_ctx_switch_enable(struct amdgpu_device *adev, bool enable) if (adev->asic_type == CHIP_ARCTURUS && adev->sdma.instance[i].fw_version >= 14) WREG32_SDMA(i, mmSDMA0_PUB_DUMMY_REG2, enable); + /* Extend page fault timeout to avoid interrupt storm */ + WREG32_SDMA(i, mmSDMA0_UTCL1_TIMEOUT, 0x00800080); } } @@ -1894,8 +1896,11 @@ static int sdma_v4_0_late_init(void *handle) sdma_v4_0_setup_ulv(adev); - if (adev->sdma.funcs && adev->sdma.funcs->reset_ras_error_count) - adev->sdma.funcs->reset_ras_error_count(adev); + if (!amdgpu_persistent_edc_harvesting_supported(adev)) { + if (adev->sdma.funcs && + adev->sdma.funcs->reset_ras_error_count) + adev->sdma.funcs->reset_ras_error_count(adev); + } if (adev->sdma.funcs && adev->sdma.funcs->ras_late_init) return adev->sdma.funcs->ras_late_init(adev, &ih_info); @@ -2227,7 +2232,7 @@ static int sdma_v4_0_print_iv_entry(struct amdgpu_device *adev, memset(&task_info, 0, sizeof(struct amdgpu_task_info)); amdgpu_vm_get_task_info(adev, entry->pasid, &task_info); - dev_info(adev->dev, + dev_dbg_ratelimited(adev->dev, "[sdma%d] address:0x%016llx src_id:%u ring:%u vmid:%u " "pasid:%u, for process %s pid %d thread %s pid %d\n", instance, addr, entry->src_id, entry->ring_id, entry->vmid, @@ -2240,7 +2245,7 @@ static int sdma_v4_0_process_vm_hole_irq(struct amdgpu_device *adev, struct amdgpu_irq_src *source, struct amdgpu_iv_entry *entry) { - dev_err(adev->dev, "MC or SEM address in VM hole\n"); + dev_dbg_ratelimited(adev->dev, "MC or SEM address in VM hole\n"); sdma_v4_0_print_iv_entry(adev, entry); return 0; } @@ -2249,7 +2254,7 @@ static int sdma_v4_0_process_doorbell_invalid_irq(struct amdgpu_device *adev, struct amdgpu_irq_src *source, struct amdgpu_iv_entry *entry) { - dev_err(adev->dev, "SDMA received a doorbell from BIF with byte_enable !=0xff\n"); + dev_dbg_ratelimited(adev->dev, "SDMA received a doorbell from BIF with byte_enable !=0xff\n"); sdma_v4_0_print_iv_entry(adev, entry); return 0; } @@ -2258,7 +2263,7 @@ static int sdma_v4_0_process_pool_timeout_irq(struct amdgpu_device *adev, struct amdgpu_irq_src *source, struct amdgpu_iv_entry *entry) { - dev_err(adev->dev, + dev_dbg_ratelimited(adev->dev, "Polling register/memory timeout executing POLL_REG/MEM with finite timer\n"); sdma_v4_0_print_iv_entry(adev, entry); return 0; @@ -2268,7 +2273,7 @@ static int sdma_v4_0_process_srbm_write_irq(struct amdgpu_device *adev, struct amdgpu_irq_src *source, struct amdgpu_iv_entry *entry) { - dev_err(adev->dev, + dev_dbg_ratelimited(adev->dev, "SDMA gets an Register Write SRBM_WRITE command in non-privilege command buffer\n"); sdma_v4_0_print_iv_entry(adev, entry); return 0; @@ -2597,27 +2602,18 @@ static const struct amdgpu_irq_src_funcs sdma_v4_0_srbm_write_irq_funcs = { static void sdma_v4_0_set_irq_funcs(struct amdgpu_device *adev) { + adev->sdma.trap_irq.num_types = adev->sdma.num_instances; + adev->sdma.ecc_irq.num_types = adev->sdma.num_instances; + /*For Arcturus and Aldebaran, add another 4 irq handler*/ switch (adev->sdma.num_instances) { - case 1: - adev->sdma.trap_irq.num_types = AMDGPU_SDMA_IRQ_INSTANCE1; - adev->sdma.ecc_irq.num_types = AMDGPU_SDMA_IRQ_INSTANCE1; - break; case 5: - adev->sdma.trap_irq.num_types = AMDGPU_SDMA_IRQ_INSTANCE5; - adev->sdma.ecc_irq.num_types = AMDGPU_SDMA_IRQ_INSTANCE5; - break; case 8: - adev->sdma.trap_irq.num_types = AMDGPU_SDMA_IRQ_LAST; - adev->sdma.ecc_irq.num_types = AMDGPU_SDMA_IRQ_LAST; - adev->sdma.vm_hole_irq.num_types = AMDGPU_SDMA_IRQ_INSTANCE5; - adev->sdma.doorbell_invalid_irq.num_types = AMDGPU_SDMA_IRQ_LAST; - adev->sdma.pool_timeout_irq.num_types = AMDGPU_SDMA_IRQ_LAST; - adev->sdma.srbm_write_irq.num_types = AMDGPU_SDMA_IRQ_LAST; + adev->sdma.vm_hole_irq.num_types = adev->sdma.num_instances; + adev->sdma.doorbell_invalid_irq.num_types = adev->sdma.num_instances; + adev->sdma.pool_timeout_irq.num_types = adev->sdma.num_instances; + adev->sdma.srbm_write_irq.num_types = adev->sdma.num_instances; break; - case 2: default: - adev->sdma.trap_irq.num_types = AMDGPU_SDMA_IRQ_INSTANCE2; - adev->sdma.ecc_irq.num_types = AMDGPU_SDMA_IRQ_INSTANCE2; break; } adev->sdma.trap_irq.funcs = &sdma_v4_0_trap_irq_funcs; diff --git a/drivers/gpu/drm/amd/amdgpu/sdma_v5_0.c b/drivers/gpu/drm/amd/amdgpu/sdma_v5_0.c index 8859133ce37e..50bf3b71bc93 100644 --- a/drivers/gpu/drm/amd/amdgpu/sdma_v5_0.c +++ b/drivers/gpu/drm/amd/amdgpu/sdma_v5_0.c @@ -51,6 +51,12 @@ MODULE_FIRMWARE("amdgpu/navi14_sdma1.bin"); MODULE_FIRMWARE("amdgpu/navi12_sdma.bin"); MODULE_FIRMWARE("amdgpu/navi12_sdma1.bin"); +MODULE_FIRMWARE("amdgpu/cyan_skillfish_sdma.bin"); +MODULE_FIRMWARE("amdgpu/cyan_skillfish_sdma1.bin"); + +MODULE_FIRMWARE("amdgpu/cyan_skillfish2_sdma.bin"); +MODULE_FIRMWARE("amdgpu/cyan_skillfish2_sdma1.bin"); + #define SDMA1_REG_OFFSET 0x600 #define SDMA0_HYP_DEC_REG_START 0x5880 #define SDMA0_HYP_DEC_REG_END 0x5893 @@ -130,6 +136,37 @@ static const struct soc15_reg_golden golden_settings_sdma_nv12[] = { SOC15_REG_GOLDEN_VALUE(GC, 0, mmSDMA1_RLC3_RB_WPTR_POLL_CNTL, 0xfffffff7, 0x00403000), }; +static const struct soc15_reg_golden golden_settings_sdma_cyan_skillfish[] = { + SOC15_REG_GOLDEN_VALUE(GC, 0, mmSDMA0_CHICKEN_BITS, 0xffbf1f0f, 0x03ab0107), + SOC15_REG_GOLDEN_VALUE(GC, 0, mmSDMA0_GB_ADDR_CONFIG, 0x001877ff, 0x00000044), + SOC15_REG_GOLDEN_VALUE(GC, 0, mmSDMA0_GB_ADDR_CONFIG_READ, 0x001877ff, 0x00000044), + SOC15_REG_GOLDEN_VALUE(GC, 0, mmSDMA0_GFX_RB_WPTR_POLL_CNTL, 0xfffffff7, 0x00403000), + SOC15_REG_GOLDEN_VALUE(GC, 0, mmSDMA0_PAGE_RB_WPTR_POLL_CNTL, 0xfffffff7, 0x00403000), + SOC15_REG_GOLDEN_VALUE(GC, 0, mmSDMA0_RLC0_RB_WPTR_POLL_CNTL, 0xfffffff7, 0x00403000), + SOC15_REG_GOLDEN_VALUE(GC, 0, mmSDMA0_RLC1_RB_WPTR_POLL_CNTL, 0xfffffff7, 0x00403000), + SOC15_REG_GOLDEN_VALUE(GC, 0, mmSDMA0_RLC2_RB_WPTR_POLL_CNTL, 0xfffffff7, 0x00403000), + SOC15_REG_GOLDEN_VALUE(GC, 0, mmSDMA0_RLC3_RB_WPTR_POLL_CNTL, 0xfffffff7, 0x00403000), + SOC15_REG_GOLDEN_VALUE(GC, 0, mmSDMA0_RLC4_RB_WPTR_POLL_CNTL, 0xfffffff7, 0x00403000), + SOC15_REG_GOLDEN_VALUE(GC, 0, mmSDMA0_RLC5_RB_WPTR_POLL_CNTL, 0xfffffff7, 0x00403000), + SOC15_REG_GOLDEN_VALUE(GC, 0, mmSDMA0_RLC6_RB_WPTR_POLL_CNTL, 0xfffffff7, 0x00403000), + SOC15_REG_GOLDEN_VALUE(GC, 0, mmSDMA0_RLC7_RB_WPTR_POLL_CNTL, 0xfffffff7, 0x00403000), + SOC15_REG_GOLDEN_VALUE(GC, 0, mmSDMA0_UTCL1_PAGE, 0x007fffff, 0x004c5c00), + SOC15_REG_GOLDEN_VALUE(GC, 0, mmSDMA1_CHICKEN_BITS, 0xffbf1f0f, 0x03ab0107), + SOC15_REG_GOLDEN_VALUE(GC, 0, mmSDMA1_GB_ADDR_CONFIG, 0x001877ff, 0x00000044), + SOC15_REG_GOLDEN_VALUE(GC, 0, mmSDMA1_GB_ADDR_CONFIG_READ, 0x001877ff, 0x00000044), + SOC15_REG_GOLDEN_VALUE(GC, 0, mmSDMA1_GFX_RB_WPTR_POLL_CNTL, 0xfffffff7, 0x00403000), + SOC15_REG_GOLDEN_VALUE(GC, 0, mmSDMA1_PAGE_RB_WPTR_POLL_CNTL, 0xfffffff7, 0x00403000), + SOC15_REG_GOLDEN_VALUE(GC, 0, mmSDMA1_RLC0_RB_WPTR_POLL_CNTL, 0xfffffff7, 0x00403000), + SOC15_REG_GOLDEN_VALUE(GC, 0, mmSDMA1_RLC1_RB_WPTR_POLL_CNTL, 0xfffffff7, 0x00403000), + SOC15_REG_GOLDEN_VALUE(GC, 0, mmSDMA1_RLC2_RB_WPTR_POLL_CNTL, 0xfffffff7, 0x00403000), + SOC15_REG_GOLDEN_VALUE(GC, 0, mmSDMA1_RLC3_RB_WPTR_POLL_CNTL, 0xfffffff7, 0x00403000), + SOC15_REG_GOLDEN_VALUE(GC, 0, mmSDMA1_RLC4_RB_WPTR_POLL_CNTL, 0xfffffff7, 0x00403000), + SOC15_REG_GOLDEN_VALUE(GC, 0, mmSDMA1_RLC5_RB_WPTR_POLL_CNTL, 0xfffffff7, 0x00403000), + SOC15_REG_GOLDEN_VALUE(GC, 0, mmSDMA1_RLC6_RB_WPTR_POLL_CNTL, 0xfffffff7, 0x00403000), + SOC15_REG_GOLDEN_VALUE(GC, 0, mmSDMA1_RLC7_RB_WPTR_POLL_CNTL, 0xfffffff7, 0x00403000), + SOC15_REG_GOLDEN_VALUE(GC, 0, mmSDMA1_UTCL1_PAGE, 0x007fffff, 0x004c5c00) +}; + static u32 sdma_v5_0_get_reg_offset(struct amdgpu_device *adev, u32 instance, u32 internal_offset) { u32 base; @@ -180,6 +217,11 @@ static void sdma_v5_0_init_golden_registers(struct amdgpu_device *adev) golden_settings_sdma_nv12, (const u32)ARRAY_SIZE(golden_settings_sdma_nv12)); break; + case CHIP_CYAN_SKILLFISH: + soc15_program_register_sequence(adev, + golden_settings_sdma_cyan_skillfish, + (const u32)ARRAY_SIZE(golden_settings_sdma_cyan_skillfish)); + break; default: break; } @@ -200,7 +242,7 @@ static void sdma_v5_0_init_golden_registers(struct amdgpu_device *adev) static int sdma_v5_0_init_microcode(struct amdgpu_device *adev) { const char *chip_name; - char fw_name[30]; + char fw_name[40]; int err = 0, i; struct amdgpu_firmware_info *info = NULL; const struct common_firmware_header *header = NULL; @@ -221,6 +263,12 @@ static int sdma_v5_0_init_microcode(struct amdgpu_device *adev) case CHIP_NAVI12: chip_name = "navi12"; break; + case CHIP_CYAN_SKILLFISH: + if (adev->apu_flags & AMD_APU_IS_CYAN_SKILLFISH2) + chip_name = "cyan_skillfish2"; + else + chip_name = "cyan_skillfish"; + break; default: BUG(); } @@ -328,9 +376,9 @@ static uint64_t sdma_v5_0_ring_get_wptr(struct amdgpu_ring *ring) wptr = READ_ONCE(*((u64 *)&adev->wb.wb[ring->wptr_offs])); DRM_DEBUG("wptr/doorbell before shift == 0x%016llx\n", wptr); } else { - wptr = RREG32(sdma_v5_0_get_reg_offset(adev, ring->me, mmSDMA0_GFX_RB_WPTR_HI)); + wptr = RREG32_SOC15_IP(GC, sdma_v5_0_get_reg_offset(adev, ring->me, mmSDMA0_GFX_RB_WPTR_HI)); wptr = wptr << 32; - wptr |= RREG32(sdma_v5_0_get_reg_offset(adev, ring->me, mmSDMA0_GFX_RB_WPTR)); + wptr |= RREG32_SOC15_IP(GC, sdma_v5_0_get_reg_offset(adev, ring->me, mmSDMA0_GFX_RB_WPTR)); DRM_DEBUG("wptr before shift [%i] wptr == 0x%016llx\n", ring->me, wptr); } @@ -371,9 +419,9 @@ static void sdma_v5_0_ring_set_wptr(struct amdgpu_ring *ring) lower_32_bits(ring->wptr << 2), ring->me, upper_32_bits(ring->wptr << 2)); - WREG32(sdma_v5_0_get_reg_offset(adev, ring->me, mmSDMA0_GFX_RB_WPTR), + WREG32_SOC15_IP(GC, sdma_v5_0_get_reg_offset(adev, ring->me, mmSDMA0_GFX_RB_WPTR), lower_32_bits(ring->wptr << 2)); - WREG32(sdma_v5_0_get_reg_offset(adev, ring->me, mmSDMA0_GFX_RB_WPTR_HI), + WREG32_SOC15_IP(GC, sdma_v5_0_get_reg_offset(adev, ring->me, mmSDMA0_GFX_RB_WPTR_HI), upper_32_bits(ring->wptr << 2)); } } @@ -409,18 +457,6 @@ static void sdma_v5_0_ring_emit_ib(struct amdgpu_ring *ring, unsigned vmid = AMDGPU_JOB_GET_VMID(job); uint64_t csa_mc_addr = amdgpu_sdma_get_csa_mc_addr(ring, vmid); - /* Invalidate L2, because if we don't do it, we might get stale cache - * lines from previous IBs. - */ - amdgpu_ring_write(ring, SDMA_PKT_HEADER_OP(SDMA_OP_GCR_REQ)); - amdgpu_ring_write(ring, 0); - amdgpu_ring_write(ring, (SDMA_GCR_GL2_INV | - SDMA_GCR_GL2_WB | - SDMA_GCR_GLM_INV | - SDMA_GCR_GLM_WB) << 16); - amdgpu_ring_write(ring, 0xffffff80); - amdgpu_ring_write(ring, 0xffff); - /* An IB packet must end on a 8 DW boundary--the next dword * must be on a 8-dword boundary. Our IB packet below is 6 * dwords long, thus add x number of NOPs, such that, in @@ -442,6 +478,32 @@ static void sdma_v5_0_ring_emit_ib(struct amdgpu_ring *ring, } /** + * sdma_v5_0_ring_emit_mem_sync - flush the IB by graphics cache rinse + * + * @ring: amdgpu ring pointer + * @job: job to retrieve vmid from + * @ib: IB object to schedule + * + * flush the IB by graphics cache rinse. + */ +static void sdma_v5_0_ring_emit_mem_sync(struct amdgpu_ring *ring) +{ + uint32_t gcr_cntl = SDMA_GCR_GL2_INV | SDMA_GCR_GL2_WB | SDMA_GCR_GLM_INV | + SDMA_GCR_GL1_INV | SDMA_GCR_GLV_INV | SDMA_GCR_GLK_INV | + SDMA_GCR_GLI_INV(1); + + /* flush entire cache L0/L1/L2, this can be optimized by performance requirement */ + amdgpu_ring_write(ring, SDMA_PKT_HEADER_OP(SDMA_OP_GCR_REQ)); + amdgpu_ring_write(ring, SDMA_PKT_GCR_REQ_PAYLOAD1_BASE_VA_31_7(0)); + amdgpu_ring_write(ring, SDMA_PKT_GCR_REQ_PAYLOAD2_GCR_CONTROL_15_0(gcr_cntl) | + SDMA_PKT_GCR_REQ_PAYLOAD2_BASE_VA_47_32(0)); + amdgpu_ring_write(ring, SDMA_PKT_GCR_REQ_PAYLOAD3_LIMIT_VA_31_7(0) | + SDMA_PKT_GCR_REQ_PAYLOAD3_GCR_CONTROL_18_16(gcr_cntl >> 16)); + amdgpu_ring_write(ring, SDMA_PKT_GCR_REQ_PAYLOAD4_LIMIT_VA_47_32(0) | + SDMA_PKT_GCR_REQ_PAYLOAD4_VMID(0)); +} + +/** * sdma_v5_0_ring_emit_hdp_flush - emit an hdp flush on the DMA ring * * @ring: amdgpu ring pointer @@ -534,12 +596,12 @@ static void sdma_v5_0_gfx_stop(struct amdgpu_device *adev) amdgpu_ttm_set_buffer_funcs_status(adev, false); for (i = 0; i < adev->sdma.num_instances; i++) { - rb_cntl = RREG32(sdma_v5_0_get_reg_offset(adev, i, mmSDMA0_GFX_RB_CNTL)); + rb_cntl = RREG32_SOC15_IP(GC, sdma_v5_0_get_reg_offset(adev, i, mmSDMA0_GFX_RB_CNTL)); rb_cntl = REG_SET_FIELD(rb_cntl, SDMA0_GFX_RB_CNTL, RB_ENABLE, 0); - WREG32(sdma_v5_0_get_reg_offset(adev, i, mmSDMA0_GFX_RB_CNTL), rb_cntl); - ib_cntl = RREG32(sdma_v5_0_get_reg_offset(adev, i, mmSDMA0_GFX_IB_CNTL)); + WREG32_SOC15_IP(GC, sdma_v5_0_get_reg_offset(adev, i, mmSDMA0_GFX_RB_CNTL), rb_cntl); + ib_cntl = RREG32_SOC15_IP(GC, sdma_v5_0_get_reg_offset(adev, i, mmSDMA0_GFX_IB_CNTL)); ib_cntl = REG_SET_FIELD(ib_cntl, SDMA0_GFX_IB_CNTL, IB_ENABLE, 0); - WREG32(sdma_v5_0_get_reg_offset(adev, i, mmSDMA0_GFX_IB_CNTL), ib_cntl); + WREG32_SOC15_IP(GC, sdma_v5_0_get_reg_offset(adev, i, mmSDMA0_GFX_IB_CNTL), ib_cntl); } } @@ -556,7 +618,7 @@ static void sdma_v5_0_rlc_stop(struct amdgpu_device *adev) } /** - * sdma_v_0_ctx_switch_enable - stop the async dma engines context switch + * sdma_v5_0_ctx_switch_enable - stop the async dma engines context switch * * @adev: amdgpu_device pointer * @enable: enable/disable the DMA MEs context switch. @@ -600,11 +662,11 @@ static void sdma_v5_0_ctx_switch_enable(struct amdgpu_device *adev, bool enable) } if (enable && amdgpu_sdma_phase_quantum) { - WREG32(sdma_v5_0_get_reg_offset(adev, i, mmSDMA0_PHASE0_QUANTUM), + WREG32_SOC15_IP(GC, sdma_v5_0_get_reg_offset(adev, i, mmSDMA0_PHASE0_QUANTUM), phase_quantum); - WREG32(sdma_v5_0_get_reg_offset(adev, i, mmSDMA0_PHASE1_QUANTUM), + WREG32_SOC15_IP(GC, sdma_v5_0_get_reg_offset(adev, i, mmSDMA0_PHASE1_QUANTUM), phase_quantum); - WREG32(sdma_v5_0_get_reg_offset(adev, i, mmSDMA0_PHASE2_QUANTUM), + WREG32_SOC15_IP(GC, sdma_v5_0_get_reg_offset(adev, i, mmSDMA0_PHASE2_QUANTUM), phase_quantum); } if (!amdgpu_sriov_vf(adev)) @@ -671,58 +733,63 @@ static int sdma_v5_0_gfx_resume(struct amdgpu_device *adev) /* Set ring buffer size in dwords */ rb_bufsz = order_base_2(ring->ring_size / 4); - rb_cntl = RREG32(sdma_v5_0_get_reg_offset(adev, i, mmSDMA0_GFX_RB_CNTL)); + rb_cntl = RREG32_SOC15_IP(GC, sdma_v5_0_get_reg_offset(adev, i, mmSDMA0_GFX_RB_CNTL)); rb_cntl = REG_SET_FIELD(rb_cntl, SDMA0_GFX_RB_CNTL, RB_SIZE, rb_bufsz); #ifdef __BIG_ENDIAN rb_cntl = REG_SET_FIELD(rb_cntl, SDMA0_GFX_RB_CNTL, RB_SWAP_ENABLE, 1); rb_cntl = REG_SET_FIELD(rb_cntl, SDMA0_GFX_RB_CNTL, RPTR_WRITEBACK_SWAP_ENABLE, 1); #endif - WREG32(sdma_v5_0_get_reg_offset(adev, i, mmSDMA0_GFX_RB_CNTL), rb_cntl); + WREG32_SOC15_IP(GC, sdma_v5_0_get_reg_offset(adev, i, mmSDMA0_GFX_RB_CNTL), rb_cntl); /* Initialize the ring buffer's read and write pointers */ - WREG32(sdma_v5_0_get_reg_offset(adev, i, mmSDMA0_GFX_RB_RPTR), 0); - WREG32(sdma_v5_0_get_reg_offset(adev, i, mmSDMA0_GFX_RB_RPTR_HI), 0); - WREG32(sdma_v5_0_get_reg_offset(adev, i, mmSDMA0_GFX_RB_WPTR), 0); - WREG32(sdma_v5_0_get_reg_offset(adev, i, mmSDMA0_GFX_RB_WPTR_HI), 0); + WREG32_SOC15_IP(GC, sdma_v5_0_get_reg_offset(adev, i, mmSDMA0_GFX_RB_RPTR), 0); + WREG32_SOC15_IP(GC, sdma_v5_0_get_reg_offset(adev, i, mmSDMA0_GFX_RB_RPTR_HI), 0); + WREG32_SOC15_IP(GC, sdma_v5_0_get_reg_offset(adev, i, mmSDMA0_GFX_RB_WPTR), 0); + WREG32_SOC15_IP(GC, sdma_v5_0_get_reg_offset(adev, i, mmSDMA0_GFX_RB_WPTR_HI), 0); /* setup the wptr shadow polling */ wptr_gpu_addr = adev->wb.gpu_addr + (ring->wptr_offs * 4); - WREG32(sdma_v5_0_get_reg_offset(adev, i, mmSDMA0_GFX_RB_WPTR_POLL_ADDR_LO), + WREG32_SOC15_IP(GC, sdma_v5_0_get_reg_offset(adev, i, mmSDMA0_GFX_RB_WPTR_POLL_ADDR_LO), lower_32_bits(wptr_gpu_addr)); - WREG32(sdma_v5_0_get_reg_offset(adev, i, mmSDMA0_GFX_RB_WPTR_POLL_ADDR_HI), + WREG32_SOC15_IP(GC, sdma_v5_0_get_reg_offset(adev, i, mmSDMA0_GFX_RB_WPTR_POLL_ADDR_HI), upper_32_bits(wptr_gpu_addr)); - wptr_poll_cntl = RREG32(sdma_v5_0_get_reg_offset(adev, i, + wptr_poll_cntl = RREG32_SOC15_IP(GC, sdma_v5_0_get_reg_offset(adev, i, mmSDMA0_GFX_RB_WPTR_POLL_CNTL)); wptr_poll_cntl = REG_SET_FIELD(wptr_poll_cntl, SDMA0_GFX_RB_WPTR_POLL_CNTL, F32_POLL_ENABLE, 1); - WREG32(sdma_v5_0_get_reg_offset(adev, i, mmSDMA0_GFX_RB_WPTR_POLL_CNTL), + WREG32_SOC15_IP(GC, sdma_v5_0_get_reg_offset(adev, i, mmSDMA0_GFX_RB_WPTR_POLL_CNTL), wptr_poll_cntl); /* set the wb address whether it's enabled or not */ - WREG32(sdma_v5_0_get_reg_offset(adev, i, mmSDMA0_GFX_RB_RPTR_ADDR_HI), + WREG32_SOC15_IP(GC, sdma_v5_0_get_reg_offset(adev, i, mmSDMA0_GFX_RB_RPTR_ADDR_HI), upper_32_bits(adev->wb.gpu_addr + wb_offset) & 0xFFFFFFFF); - WREG32(sdma_v5_0_get_reg_offset(adev, i, mmSDMA0_GFX_RB_RPTR_ADDR_LO), + WREG32_SOC15_IP(GC, sdma_v5_0_get_reg_offset(adev, i, mmSDMA0_GFX_RB_RPTR_ADDR_LO), lower_32_bits(adev->wb.gpu_addr + wb_offset) & 0xFFFFFFFC); rb_cntl = REG_SET_FIELD(rb_cntl, SDMA0_GFX_RB_CNTL, RPTR_WRITEBACK_ENABLE, 1); - WREG32(sdma_v5_0_get_reg_offset(adev, i, mmSDMA0_GFX_RB_BASE), ring->gpu_addr >> 8); - WREG32(sdma_v5_0_get_reg_offset(adev, i, mmSDMA0_GFX_RB_BASE_HI), ring->gpu_addr >> 40); + WREG32_SOC15_IP(GC, sdma_v5_0_get_reg_offset(adev, i, mmSDMA0_GFX_RB_BASE), + ring->gpu_addr >> 8); + WREG32_SOC15_IP(GC, sdma_v5_0_get_reg_offset(adev, i, mmSDMA0_GFX_RB_BASE_HI), + ring->gpu_addr >> 40); ring->wptr = 0; /* before programing wptr to a less value, need set minor_ptr_update first */ - WREG32(sdma_v5_0_get_reg_offset(adev, i, mmSDMA0_GFX_MINOR_PTR_UPDATE), 1); + WREG32_SOC15_IP(GC, sdma_v5_0_get_reg_offset(adev, i, mmSDMA0_GFX_MINOR_PTR_UPDATE), 1); if (!amdgpu_sriov_vf(adev)) { /* only bare-metal use register write for wptr */ - WREG32(sdma_v5_0_get_reg_offset(adev, i, mmSDMA0_GFX_RB_WPTR), lower_32_bits(ring->wptr) << 2); - WREG32(sdma_v5_0_get_reg_offset(adev, i, mmSDMA0_GFX_RB_WPTR_HI), upper_32_bits(ring->wptr) << 2); + WREG32(sdma_v5_0_get_reg_offset(adev, i, mmSDMA0_GFX_RB_WPTR), + lower_32_bits(ring->wptr) << 2); + WREG32(sdma_v5_0_get_reg_offset(adev, i, mmSDMA0_GFX_RB_WPTR_HI), + upper_32_bits(ring->wptr) << 2); } - doorbell = RREG32(sdma_v5_0_get_reg_offset(adev, i, mmSDMA0_GFX_DOORBELL)); - doorbell_offset = RREG32(sdma_v5_0_get_reg_offset(adev, i, mmSDMA0_GFX_DOORBELL_OFFSET)); + doorbell = RREG32_SOC15_IP(GC, sdma_v5_0_get_reg_offset(adev, i, mmSDMA0_GFX_DOORBELL)); + doorbell_offset = RREG32_SOC15_IP(GC, sdma_v5_0_get_reg_offset(adev, i, + mmSDMA0_GFX_DOORBELL_OFFSET)); if (ring->use_doorbell) { doorbell = REG_SET_FIELD(doorbell, SDMA0_GFX_DOORBELL, ENABLE, 1); @@ -731,8 +798,9 @@ static int sdma_v5_0_gfx_resume(struct amdgpu_device *adev) } else { doorbell = REG_SET_FIELD(doorbell, SDMA0_GFX_DOORBELL, ENABLE, 0); } - WREG32(sdma_v5_0_get_reg_offset(adev, i, mmSDMA0_GFX_DOORBELL), doorbell); - WREG32(sdma_v5_0_get_reg_offset(adev, i, mmSDMA0_GFX_DOORBELL_OFFSET), doorbell_offset); + WREG32_SOC15_IP(GC, sdma_v5_0_get_reg_offset(adev, i, mmSDMA0_GFX_DOORBELL), doorbell); + WREG32_SOC15_IP(GC, sdma_v5_0_get_reg_offset(adev, i, mmSDMA0_GFX_DOORBELL_OFFSET), + doorbell_offset); adev->nbio.funcs->sdma_doorbell_range(adev, i, ring->use_doorbell, ring->doorbell_index, 20); @@ -741,7 +809,7 @@ static int sdma_v5_0_gfx_resume(struct amdgpu_device *adev) sdma_v5_0_ring_set_wptr(ring); /* set minor_ptr_update to 0 after wptr programed */ - WREG32(sdma_v5_0_get_reg_offset(adev, i, mmSDMA0_GFX_MINOR_PTR_UPDATE), 0); + WREG32_SOC15_IP(GC, sdma_v5_0_get_reg_offset(adev, i, mmSDMA0_GFX_MINOR_PTR_UPDATE), 0); if (!amdgpu_sriov_vf(adev)) { /* set utc l1 enable flag always to 1 */ @@ -775,15 +843,15 @@ static int sdma_v5_0_gfx_resume(struct amdgpu_device *adev) /* enable DMA RB */ rb_cntl = REG_SET_FIELD(rb_cntl, SDMA0_GFX_RB_CNTL, RB_ENABLE, 1); - WREG32(sdma_v5_0_get_reg_offset(adev, i, mmSDMA0_GFX_RB_CNTL), rb_cntl); + WREG32_SOC15_IP(GC, sdma_v5_0_get_reg_offset(adev, i, mmSDMA0_GFX_RB_CNTL), rb_cntl); - ib_cntl = RREG32(sdma_v5_0_get_reg_offset(adev, i, mmSDMA0_GFX_IB_CNTL)); + ib_cntl = RREG32_SOC15_IP(GC, sdma_v5_0_get_reg_offset(adev, i, mmSDMA0_GFX_IB_CNTL)); ib_cntl = REG_SET_FIELD(ib_cntl, SDMA0_GFX_IB_CNTL, IB_ENABLE, 1); #ifdef __BIG_ENDIAN ib_cntl = REG_SET_FIELD(ib_cntl, SDMA0_GFX_IB_CNTL, IB_SWAP_ENABLE, 1); #endif /* enable DMA IBs */ - WREG32(sdma_v5_0_get_reg_offset(adev, i, mmSDMA0_GFX_IB_CNTL), ib_cntl); + WREG32_SOC15_IP(GC, sdma_v5_0_get_reg_offset(adev, i, mmSDMA0_GFX_IB_CNTL), ib_cntl); ring->sched.ready = true; @@ -1647,6 +1715,7 @@ static const struct amdgpu_ring_funcs sdma_v5_0_ring_funcs = { 10 + 10 + 10, /* sdma_v5_0_ring_emit_fence x3 for user fence, vm fence */ .emit_ib_size = 5 + 7 + 6, /* sdma_v5_0_ring_emit_ib */ .emit_ib = sdma_v5_0_ring_emit_ib, + .emit_mem_sync = sdma_v5_0_ring_emit_mem_sync, .emit_fence = sdma_v5_0_ring_emit_fence, .emit_pipeline_sync = sdma_v5_0_ring_emit_pipeline_sync, .emit_vm_flush = sdma_v5_0_ring_emit_vm_flush, diff --git a/drivers/gpu/drm/amd/amdgpu/sdma_v5_2.c b/drivers/gpu/drm/amd/amdgpu/sdma_v5_2.c index 240596b25fe4..779f5c911e11 100644 --- a/drivers/gpu/drm/amd/amdgpu/sdma_v5_2.c +++ b/drivers/gpu/drm/amd/amdgpu/sdma_v5_2.c @@ -47,8 +47,10 @@ MODULE_FIRMWARE("amdgpu/sienna_cichlid_sdma.bin"); MODULE_FIRMWARE("amdgpu/navy_flounder_sdma.bin"); MODULE_FIRMWARE("amdgpu/dimgrey_cavefish_sdma.bin"); +MODULE_FIRMWARE("amdgpu/beige_goby_sdma.bin"); MODULE_FIRMWARE("amdgpu/vangogh_sdma.bin"); +MODULE_FIRMWARE("amdgpu/yellow_carp_sdma.bin"); #define SDMA1_REG_OFFSET 0x600 #define SDMA3_REG_OFFSET 0x400 @@ -85,19 +87,6 @@ static u32 sdma_v5_2_get_reg_offset(struct amdgpu_device *adev, u32 instance, u3 return base + internal_offset; } -static void sdma_v5_2_init_golden_registers(struct amdgpu_device *adev) -{ - switch (adev->asic_type) { - case CHIP_SIENNA_CICHLID: - case CHIP_NAVY_FLOUNDER: - case CHIP_VANGOGH: - case CHIP_DIMGREY_CAVEFISH: - break; - default: - break; - } -} - static int sdma_v5_2_init_inst_ctx(struct amdgpu_sdma_instance *sdma_inst) { int err = 0; @@ -145,9 +134,6 @@ static int sdma_v5_2_init_microcode(struct amdgpu_device *adev) struct amdgpu_firmware_info *info = NULL; const struct common_firmware_header *header = NULL; - if (amdgpu_sriov_vf(adev) && (adev->asic_type == CHIP_SIENNA_CICHLID)) - return 0; - DRM_DEBUG("\n"); switch (adev->asic_type) { @@ -163,6 +149,12 @@ static int sdma_v5_2_init_microcode(struct amdgpu_device *adev) case CHIP_DIMGREY_CAVEFISH: chip_name = "dimgrey_cavefish"; break; + case CHIP_BEIGE_GOBY: + chip_name = "beige_goby"; + break; + case CHIP_YELLOW_CARP: + chip_name = "yellow_carp"; + break; default: BUG(); } @@ -182,6 +174,9 @@ static int sdma_v5_2_init_microcode(struct amdgpu_device *adev) (void *)&adev->sdma.instance[0], sizeof(struct amdgpu_sdma_instance)); + if (amdgpu_sriov_vf(adev) && (adev->asic_type == CHIP_SIENNA_CICHLID)) + return 0; + DRM_DEBUG("psp_load == '%s'\n", adev->firmware.load_type == AMDGPU_FW_LOAD_PSP ? "true" : "false"); @@ -490,12 +485,12 @@ static void sdma_v5_2_gfx_stop(struct amdgpu_device *adev) amdgpu_ttm_set_buffer_funcs_status(adev, false); for (i = 0; i < adev->sdma.num_instances; i++) { - rb_cntl = RREG32(sdma_v5_2_get_reg_offset(adev, i, mmSDMA0_GFX_RB_CNTL)); + rb_cntl = RREG32_SOC15_IP(GC, sdma_v5_2_get_reg_offset(adev, i, mmSDMA0_GFX_RB_CNTL)); rb_cntl = REG_SET_FIELD(rb_cntl, SDMA0_GFX_RB_CNTL, RB_ENABLE, 0); - WREG32(sdma_v5_2_get_reg_offset(adev, i, mmSDMA0_GFX_RB_CNTL), rb_cntl); - ib_cntl = RREG32(sdma_v5_2_get_reg_offset(adev, i, mmSDMA0_GFX_IB_CNTL)); + WREG32_SOC15_IP(GC, sdma_v5_2_get_reg_offset(adev, i, mmSDMA0_GFX_RB_CNTL), rb_cntl); + ib_cntl = RREG32_SOC15_IP(GC, sdma_v5_2_get_reg_offset(adev, i, mmSDMA0_GFX_IB_CNTL)); ib_cntl = REG_SET_FIELD(ib_cntl, SDMA0_GFX_IB_CNTL, IB_ENABLE, 0); - WREG32(sdma_v5_2_get_reg_offset(adev, i, mmSDMA0_GFX_IB_CNTL), ib_cntl); + WREG32_SOC15_IP(GC, sdma_v5_2_get_reg_offset(adev, i, mmSDMA0_GFX_IB_CNTL), ib_cntl); } } @@ -512,7 +507,7 @@ static void sdma_v5_2_rlc_stop(struct amdgpu_device *adev) } /** - * sdma_v_0_ctx_switch_enable - stop the async dma engines context switch + * sdma_v5_2_ctx_switch_enable - stop the async dma engines context switch * * @adev: amdgpu_device pointer * @enable: enable/disable the DMA MEs context switch. @@ -553,11 +548,11 @@ static void sdma_v5_2_ctx_switch_enable(struct amdgpu_device *adev, bool enable) f32_cntl = REG_SET_FIELD(f32_cntl, SDMA0_CNTL, AUTO_CTXSW_ENABLE, enable ? 1 : 0); if (enable && amdgpu_sdma_phase_quantum) { - WREG32(sdma_v5_2_get_reg_offset(adev, i, mmSDMA0_PHASE0_QUANTUM), + WREG32_SOC15_IP(GC, sdma_v5_2_get_reg_offset(adev, i, mmSDMA0_PHASE0_QUANTUM), phase_quantum); - WREG32(sdma_v5_2_get_reg_offset(adev, i, mmSDMA0_PHASE1_QUANTUM), + WREG32_SOC15_IP(GC, sdma_v5_2_get_reg_offset(adev, i, mmSDMA0_PHASE1_QUANTUM), phase_quantum); - WREG32(sdma_v5_2_get_reg_offset(adev, i, mmSDMA0_PHASE2_QUANTUM), + WREG32_SOC15_IP(GC, sdma_v5_2_get_reg_offset(adev, i, mmSDMA0_PHASE2_QUANTUM), phase_quantum); } WREG32(sdma_v5_2_get_reg_offset(adev, i, mmSDMA0_CNTL), f32_cntl); @@ -615,62 +610,62 @@ static int sdma_v5_2_gfx_resume(struct amdgpu_device *adev) ring = &adev->sdma.instance[i].ring; wb_offset = (ring->rptr_offs * 4); - WREG32(sdma_v5_2_get_reg_offset(adev, i, mmSDMA0_SEM_WAIT_FAIL_TIMER_CNTL), 0); + WREG32_SOC15_IP(GC, sdma_v5_2_get_reg_offset(adev, i, mmSDMA0_SEM_WAIT_FAIL_TIMER_CNTL), 0); /* Set ring buffer size in dwords */ rb_bufsz = order_base_2(ring->ring_size / 4); - rb_cntl = RREG32(sdma_v5_2_get_reg_offset(adev, i, mmSDMA0_GFX_RB_CNTL)); + rb_cntl = RREG32_SOC15_IP(GC, sdma_v5_2_get_reg_offset(adev, i, mmSDMA0_GFX_RB_CNTL)); rb_cntl = REG_SET_FIELD(rb_cntl, SDMA0_GFX_RB_CNTL, RB_SIZE, rb_bufsz); #ifdef __BIG_ENDIAN rb_cntl = REG_SET_FIELD(rb_cntl, SDMA0_GFX_RB_CNTL, RB_SWAP_ENABLE, 1); rb_cntl = REG_SET_FIELD(rb_cntl, SDMA0_GFX_RB_CNTL, RPTR_WRITEBACK_SWAP_ENABLE, 1); #endif - WREG32(sdma_v5_2_get_reg_offset(adev, i, mmSDMA0_GFX_RB_CNTL), rb_cntl); + WREG32_SOC15_IP(GC, sdma_v5_2_get_reg_offset(adev, i, mmSDMA0_GFX_RB_CNTL), rb_cntl); /* Initialize the ring buffer's read and write pointers */ - WREG32(sdma_v5_2_get_reg_offset(adev, i, mmSDMA0_GFX_RB_RPTR), 0); - WREG32(sdma_v5_2_get_reg_offset(adev, i, mmSDMA0_GFX_RB_RPTR_HI), 0); - WREG32(sdma_v5_2_get_reg_offset(adev, i, mmSDMA0_GFX_RB_WPTR), 0); - WREG32(sdma_v5_2_get_reg_offset(adev, i, mmSDMA0_GFX_RB_WPTR_HI), 0); + WREG32_SOC15_IP(GC, sdma_v5_2_get_reg_offset(adev, i, mmSDMA0_GFX_RB_RPTR), 0); + WREG32_SOC15_IP(GC, sdma_v5_2_get_reg_offset(adev, i, mmSDMA0_GFX_RB_RPTR_HI), 0); + WREG32_SOC15_IP(GC, sdma_v5_2_get_reg_offset(adev, i, mmSDMA0_GFX_RB_WPTR), 0); + WREG32_SOC15_IP(GC, sdma_v5_2_get_reg_offset(adev, i, mmSDMA0_GFX_RB_WPTR_HI), 0); /* setup the wptr shadow polling */ wptr_gpu_addr = adev->wb.gpu_addr + (ring->wptr_offs * 4); - WREG32(sdma_v5_2_get_reg_offset(adev, i, mmSDMA0_GFX_RB_WPTR_POLL_ADDR_LO), + WREG32_SOC15_IP(GC, sdma_v5_2_get_reg_offset(adev, i, mmSDMA0_GFX_RB_WPTR_POLL_ADDR_LO), lower_32_bits(wptr_gpu_addr)); - WREG32(sdma_v5_2_get_reg_offset(adev, i, mmSDMA0_GFX_RB_WPTR_POLL_ADDR_HI), + WREG32_SOC15_IP(GC, sdma_v5_2_get_reg_offset(adev, i, mmSDMA0_GFX_RB_WPTR_POLL_ADDR_HI), upper_32_bits(wptr_gpu_addr)); - wptr_poll_cntl = RREG32(sdma_v5_2_get_reg_offset(adev, i, + wptr_poll_cntl = RREG32_SOC15_IP(GC, sdma_v5_2_get_reg_offset(adev, i, mmSDMA0_GFX_RB_WPTR_POLL_CNTL)); wptr_poll_cntl = REG_SET_FIELD(wptr_poll_cntl, SDMA0_GFX_RB_WPTR_POLL_CNTL, F32_POLL_ENABLE, 1); - WREG32(sdma_v5_2_get_reg_offset(adev, i, mmSDMA0_GFX_RB_WPTR_POLL_CNTL), + WREG32_SOC15_IP(GC, sdma_v5_2_get_reg_offset(adev, i, mmSDMA0_GFX_RB_WPTR_POLL_CNTL), wptr_poll_cntl); /* set the wb address whether it's enabled or not */ - WREG32(sdma_v5_2_get_reg_offset(adev, i, mmSDMA0_GFX_RB_RPTR_ADDR_HI), + WREG32_SOC15_IP(GC, sdma_v5_2_get_reg_offset(adev, i, mmSDMA0_GFX_RB_RPTR_ADDR_HI), upper_32_bits(adev->wb.gpu_addr + wb_offset) & 0xFFFFFFFF); - WREG32(sdma_v5_2_get_reg_offset(adev, i, mmSDMA0_GFX_RB_RPTR_ADDR_LO), + WREG32_SOC15_IP(GC, sdma_v5_2_get_reg_offset(adev, i, mmSDMA0_GFX_RB_RPTR_ADDR_LO), lower_32_bits(adev->wb.gpu_addr + wb_offset) & 0xFFFFFFFC); rb_cntl = REG_SET_FIELD(rb_cntl, SDMA0_GFX_RB_CNTL, RPTR_WRITEBACK_ENABLE, 1); - WREG32(sdma_v5_2_get_reg_offset(adev, i, mmSDMA0_GFX_RB_BASE), ring->gpu_addr >> 8); - WREG32(sdma_v5_2_get_reg_offset(adev, i, mmSDMA0_GFX_RB_BASE_HI), ring->gpu_addr >> 40); + WREG32_SOC15_IP(GC, sdma_v5_2_get_reg_offset(adev, i, mmSDMA0_GFX_RB_BASE), ring->gpu_addr >> 8); + WREG32_SOC15_IP(GC, sdma_v5_2_get_reg_offset(adev, i, mmSDMA0_GFX_RB_BASE_HI), ring->gpu_addr >> 40); ring->wptr = 0; /* before programing wptr to a less value, need set minor_ptr_update first */ - WREG32(sdma_v5_2_get_reg_offset(adev, i, mmSDMA0_GFX_MINOR_PTR_UPDATE), 1); + WREG32_SOC15_IP(GC, sdma_v5_2_get_reg_offset(adev, i, mmSDMA0_GFX_MINOR_PTR_UPDATE), 1); if (!amdgpu_sriov_vf(adev)) { /* only bare-metal use register write for wptr */ WREG32(sdma_v5_2_get_reg_offset(adev, i, mmSDMA0_GFX_RB_WPTR), lower_32_bits(ring->wptr) << 2); WREG32(sdma_v5_2_get_reg_offset(adev, i, mmSDMA0_GFX_RB_WPTR_HI), upper_32_bits(ring->wptr) << 2); } - doorbell = RREG32(sdma_v5_2_get_reg_offset(adev, i, mmSDMA0_GFX_DOORBELL)); - doorbell_offset = RREG32(sdma_v5_2_get_reg_offset(adev, i, mmSDMA0_GFX_DOORBELL_OFFSET)); + doorbell = RREG32_SOC15_IP(GC, sdma_v5_2_get_reg_offset(adev, i, mmSDMA0_GFX_DOORBELL)); + doorbell_offset = RREG32_SOC15_IP(GC, sdma_v5_2_get_reg_offset(adev, i, mmSDMA0_GFX_DOORBELL_OFFSET)); if (ring->use_doorbell) { doorbell = REG_SET_FIELD(doorbell, SDMA0_GFX_DOORBELL, ENABLE, 1); @@ -679,8 +674,8 @@ static int sdma_v5_2_gfx_resume(struct amdgpu_device *adev) } else { doorbell = REG_SET_FIELD(doorbell, SDMA0_GFX_DOORBELL, ENABLE, 0); } - WREG32(sdma_v5_2_get_reg_offset(adev, i, mmSDMA0_GFX_DOORBELL), doorbell); - WREG32(sdma_v5_2_get_reg_offset(adev, i, mmSDMA0_GFX_DOORBELL_OFFSET), doorbell_offset); + WREG32_SOC15_IP(GC, sdma_v5_2_get_reg_offset(adev, i, mmSDMA0_GFX_DOORBELL), doorbell); + WREG32_SOC15_IP(GC, sdma_v5_2_get_reg_offset(adev, i, mmSDMA0_GFX_DOORBELL_OFFSET), doorbell_offset); adev->nbio.funcs->sdma_doorbell_range(adev, i, ring->use_doorbell, ring->doorbell_index, @@ -690,7 +685,7 @@ static int sdma_v5_2_gfx_resume(struct amdgpu_device *adev) sdma_v5_2_ring_set_wptr(ring); /* set minor_ptr_update to 0 after wptr programed */ - WREG32(sdma_v5_2_get_reg_offset(adev, i, mmSDMA0_GFX_MINOR_PTR_UPDATE), 0); + WREG32_SOC15_IP(GC, sdma_v5_2_get_reg_offset(adev, i, mmSDMA0_GFX_MINOR_PTR_UPDATE), 0); /* set utc l1 enable flag always to 1 */ temp = RREG32(sdma_v5_2_get_reg_offset(adev, i, mmSDMA0_CNTL)); @@ -701,19 +696,19 @@ static int sdma_v5_2_gfx_resume(struct amdgpu_device *adev) WREG32(sdma_v5_2_get_reg_offset(adev, i, mmSDMA0_CNTL), temp); /* Set up RESP_MODE to non-copy addresses */ - temp = RREG32(sdma_v5_2_get_reg_offset(adev, i, mmSDMA0_UTCL1_CNTL)); + temp = RREG32_SOC15_IP(GC, sdma_v5_2_get_reg_offset(adev, i, mmSDMA0_UTCL1_CNTL)); temp = REG_SET_FIELD(temp, SDMA0_UTCL1_CNTL, RESP_MODE, 3); temp = REG_SET_FIELD(temp, SDMA0_UTCL1_CNTL, REDO_DELAY, 9); - WREG32(sdma_v5_2_get_reg_offset(adev, i, mmSDMA0_UTCL1_CNTL), temp); + WREG32_SOC15_IP(GC, sdma_v5_2_get_reg_offset(adev, i, mmSDMA0_UTCL1_CNTL), temp); /* program default cache read and write policy */ - temp = RREG32(sdma_v5_2_get_reg_offset(adev, i, mmSDMA0_UTCL1_PAGE)); + temp = RREG32_SOC15_IP(GC, sdma_v5_2_get_reg_offset(adev, i, mmSDMA0_UTCL1_PAGE)); /* clean read policy and write policy bits */ temp &= 0xFF0FFF; temp |= ((CACHE_READ_POLICY_L2__DEFAULT << 12) | (CACHE_WRITE_POLICY_L2__DEFAULT << 14) | SDMA0_UTCL1_PAGE__LLC_NOALLOC_MASK); - WREG32(sdma_v5_2_get_reg_offset(adev, i, mmSDMA0_UTCL1_PAGE), temp); + WREG32_SOC15_IP(GC, sdma_v5_2_get_reg_offset(adev, i, mmSDMA0_UTCL1_PAGE), temp); if (!amdgpu_sriov_vf(adev)) { /* unhalt engine */ @@ -724,15 +719,15 @@ static int sdma_v5_2_gfx_resume(struct amdgpu_device *adev) /* enable DMA RB */ rb_cntl = REG_SET_FIELD(rb_cntl, SDMA0_GFX_RB_CNTL, RB_ENABLE, 1); - WREG32(sdma_v5_2_get_reg_offset(adev, i, mmSDMA0_GFX_RB_CNTL), rb_cntl); + WREG32_SOC15_IP(GC, sdma_v5_2_get_reg_offset(adev, i, mmSDMA0_GFX_RB_CNTL), rb_cntl); - ib_cntl = RREG32(sdma_v5_2_get_reg_offset(adev, i, mmSDMA0_GFX_IB_CNTL)); + ib_cntl = RREG32_SOC15_IP(GC, sdma_v5_2_get_reg_offset(adev, i, mmSDMA0_GFX_IB_CNTL)); ib_cntl = REG_SET_FIELD(ib_cntl, SDMA0_GFX_IB_CNTL, IB_ENABLE, 1); #ifdef __BIG_ENDIAN ib_cntl = REG_SET_FIELD(ib_cntl, SDMA0_GFX_IB_CNTL, IB_SWAP_ENABLE, 1); #endif /* enable DMA IBs */ - WREG32(sdma_v5_2_get_reg_offset(adev, i, mmSDMA0_GFX_IB_CNTL), ib_cntl); + WREG32_SOC15_IP(GC, sdma_v5_2_get_reg_offset(adev, i, mmSDMA0_GFX_IB_CNTL), ib_cntl); ring->sched.ready = true; @@ -1223,6 +1218,8 @@ static int sdma_v5_2_early_init(void *handle) adev->sdma.num_instances = 2; break; case CHIP_VANGOGH: + case CHIP_BEIGE_GOBY: + case CHIP_YELLOW_CARP: adev->sdma.num_instances = 1; break; default: @@ -1333,8 +1330,6 @@ static int sdma_v5_2_hw_init(void *handle) int r; struct amdgpu_device *adev = (struct amdgpu_device *)handle; - sdma_v5_2_init_golden_registers(adev); - r = sdma_v5_2_start(adev); return r; @@ -1551,6 +1546,10 @@ static void sdma_v5_2_update_medium_grain_clock_gating(struct amdgpu_device *ade int i; for (i = 0; i < adev->sdma.num_instances; i++) { + + if (adev->sdma.instance[i].fw_version < 70 && adev->asic_type == CHIP_VANGOGH) + adev->cg_flags &= ~AMD_CG_SUPPORT_SDMA_MGCG; + if (enable && (adev->cg_flags & AMD_CG_SUPPORT_SDMA_MGCG)) { /* Enable sdma clock gating */ def = data = RREG32(sdma_v5_2_get_reg_offset(adev, i, mmSDMA0_CLK_CTRL)); @@ -1584,6 +1583,10 @@ static void sdma_v5_2_update_medium_grain_light_sleep(struct amdgpu_device *adev int i; for (i = 0; i < adev->sdma.num_instances; i++) { + + if (adev->sdma.instance[i].fw_version < 70 && adev->asic_type == CHIP_VANGOGH) + adev->cg_flags &= ~AMD_CG_SUPPORT_SDMA_LS; + if (enable && (adev->cg_flags & AMD_CG_SUPPORT_SDMA_LS)) { /* Enable sdma mem light sleep */ def = data = RREG32(sdma_v5_2_get_reg_offset(adev, i, mmSDMA0_POWER_CNTL)); @@ -1615,6 +1618,8 @@ static int sdma_v5_2_set_clockgating_state(void *handle, case CHIP_NAVY_FLOUNDER: case CHIP_VANGOGH: case CHIP_DIMGREY_CAVEFISH: + case CHIP_BEIGE_GOBY: + case CHIP_YELLOW_CARP: sdma_v5_2_update_medium_grain_clock_gating(adev, state == AMD_CG_STATE_GATE); sdma_v5_2_update_medium_grain_light_sleep(adev, diff --git a/drivers/gpu/drm/amd/amdgpu/si.c b/drivers/gpu/drm/amd/amdgpu/si.c index 7cbc2bb03bc6..e6d2f74a7976 100644 --- a/drivers/gpu/drm/amd/amdgpu/si.c +++ b/drivers/gpu/drm/amd/amdgpu/si.c @@ -44,7 +44,7 @@ #include "dce_v6_0.h" #include "si.h" #include "uvd_v3_1.h" -#include "dce_virtual.h" +#include "amdgpu_vkms.h" #include "gca/gfx_6_0_d.h" #include "oss/oss_1_0_d.h" #include "oss/oss_1_0_sh_mask.h" @@ -2759,7 +2759,7 @@ int si_set_ip_blocks(struct amdgpu_device *adev) amdgpu_device_ip_block_add(adev, &si_dma_ip_block); amdgpu_device_ip_block_add(adev, &si_smu_ip_block); if (adev->enable_virtual_display) - amdgpu_device_ip_block_add(adev, &dce_virtual_ip_block); + amdgpu_device_ip_block_add(adev, &amdgpu_vkms_ip_block); #if defined(CONFIG_DRM_AMD_DC) && defined(CONFIG_DRM_AMD_DC_SI) else if (amdgpu_device_has_dc_support(adev)) amdgpu_device_ip_block_add(adev, &dm_ip_block); @@ -2777,7 +2777,7 @@ int si_set_ip_blocks(struct amdgpu_device *adev) amdgpu_device_ip_block_add(adev, &si_dma_ip_block); amdgpu_device_ip_block_add(adev, &si_smu_ip_block); if (adev->enable_virtual_display) - amdgpu_device_ip_block_add(adev, &dce_virtual_ip_block); + amdgpu_device_ip_block_add(adev, &amdgpu_vkms_ip_block); #if defined(CONFIG_DRM_AMD_DC) && defined(CONFIG_DRM_AMD_DC_SI) else if (amdgpu_device_has_dc_support(adev)) amdgpu_device_ip_block_add(adev, &dm_ip_block); @@ -2795,7 +2795,7 @@ int si_set_ip_blocks(struct amdgpu_device *adev) amdgpu_device_ip_block_add(adev, &si_dma_ip_block); amdgpu_device_ip_block_add(adev, &si_smu_ip_block); if (adev->enable_virtual_display) - amdgpu_device_ip_block_add(adev, &dce_virtual_ip_block); + amdgpu_device_ip_block_add(adev, &amdgpu_vkms_ip_block); break; default: BUG(); diff --git a/drivers/gpu/drm/amd/amdgpu/si_dma.c b/drivers/gpu/drm/amd/amdgpu/si_dma.c index cb703e307238..195b45bcb8ad 100644 --- a/drivers/gpu/drm/amd/amdgpu/si_dma.c +++ b/drivers/gpu/drm/amd/amdgpu/si_dma.c @@ -305,7 +305,7 @@ err0: } /** - * cik_dma_vm_copy_pte - update PTEs by copying them from the GART + * si_dma_vm_copy_pte - update PTEs by copying them from the GART * * @ib: indirect buffer to fill with commands * @pe: addr of the page entry @@ -402,7 +402,7 @@ static void si_dma_vm_set_pte_pde(struct amdgpu_ib *ib, } /** - * si_dma_pad_ib - pad the IB to the required number of dw + * si_dma_ring_pad_ib - pad the IB to the required number of dw * * @ring: amdgpu_ring pointer * @ib: indirect buffer to fill with padding @@ -415,7 +415,7 @@ static void si_dma_ring_pad_ib(struct amdgpu_ring *ring, struct amdgpu_ib *ib) } /** - * cik_sdma_ring_emit_pipeline_sync - sync the pipeline + * si_dma_ring_emit_pipeline_sync - sync the pipeline * * @ring: amdgpu_ring pointer * diff --git a/drivers/gpu/drm/amd/amdgpu/si_ih.c b/drivers/gpu/drm/amd/amdgpu/si_ih.c index 51880f6ef634..9a24f17a5750 100644 --- a/drivers/gpu/drm/amd/amdgpu/si_ih.c +++ b/drivers/gpu/drm/amd/amdgpu/si_ih.c @@ -175,8 +175,7 @@ static int si_ih_sw_fini(void *handle) { struct amdgpu_device *adev = (struct amdgpu_device *)handle; - amdgpu_irq_fini(adev); - amdgpu_ih_ring_fini(adev, &adev->irq.ih); + amdgpu_irq_fini_sw(adev); return 0; } diff --git a/drivers/gpu/drm/amd/amdgpu/smu_v11_0_i2c.c b/drivers/gpu/drm/amd/amdgpu/smu_v11_0_i2c.c index 5c7d769aee3f..73ffa8fde3df 100644 --- a/drivers/gpu/drm/amd/amdgpu/smu_v11_0_i2c.c +++ b/drivers/gpu/drm/amd/amdgpu/smu_v11_0_i2c.c @@ -41,9 +41,7 @@ #define I2C_SW_TIMEOUT 8 #define I2C_ABORT 0x10 -/* I2C transaction flags */ -#define I2C_NO_STOP 1 -#define I2C_RESTART 2 +#define I2C_X_RESTART BIT(31) #define to_amdgpu_device(x) (container_of(x, struct amdgpu_device, pm.smu_i2c)) @@ -56,12 +54,48 @@ static void smu_v11_0_i2c_set_clock_gating(struct i2c_adapter *control, bool en) WREG32_SOC15(SMUIO, 0, mmSMUIO_PWRMGT, reg); } +/* The T_I2C_POLL_US is defined as follows: + * + * "Define a timer interval (t_i2c_poll) equal to 10 times the + * signalling period for the highest I2C transfer speed used in the + * system and supported by DW_apb_i2c. For instance, if the highest + * I2C data transfer mode is 400 kb/s, then t_i2c_poll is 25 us." -- + * DesignWare DW_apb_i2c Databook, Version 1.21a, section 3.8.3.1, + * page 56, with grammar and syntax corrections. + * + * Vcc for our device is at 1.8V which puts it at 400 kHz, + * see Atmel AT24CM02 datasheet, section 8.3 DC Characteristics table, page 14. + * + * The procedure to disable the IP block is described in section + * 3.8.3 Disabling DW_apb_i2c on page 56. + */ +#define I2C_SPEED_MODE_FAST 2 +#define T_I2C_POLL_US 25 +#define I2C_MAX_T_POLL_COUNT 1000 -static void smu_v11_0_i2c_enable(struct i2c_adapter *control, bool enable) +static int smu_v11_0_i2c_enable(struct i2c_adapter *control, bool enable) { struct amdgpu_device *adev = to_amdgpu_device(control); WREG32_SOC15(SMUIO, 0, mmCKSVII2C_IC_ENABLE, enable ? 1 : 0); + + if (!enable) { + int ii; + + for (ii = I2C_MAX_T_POLL_COUNT; ii > 0; ii--) { + u32 en_stat = RREG32_SOC15(SMUIO, + 0, + mmCKSVII2C_IC_ENABLE_STATUS); + if (REG_GET_FIELD(en_stat, CKSVII2C_IC_ENABLE_STATUS, IC_EN)) + udelay(T_I2C_POLL_US); + else + return I2C_OK; + } + + return I2C_ABORT; + } + + return I2C_OK; } static void smu_v11_0_i2c_clear_status(struct i2c_adapter *control) @@ -83,8 +117,13 @@ static void smu_v11_0_i2c_configure(struct i2c_adapter *control) reg = REG_SET_FIELD(reg, CKSVII2C_IC_CON, IC_RESTART_EN, 1); reg = REG_SET_FIELD(reg, CKSVII2C_IC_CON, IC_10BITADDR_MASTER, 0); reg = REG_SET_FIELD(reg, CKSVII2C_IC_CON, IC_10BITADDR_SLAVE, 0); - /* Standard mode */ - reg = REG_SET_FIELD(reg, CKSVII2C_IC_CON, IC_MAX_SPEED_MODE, 2); + /* The values of IC_MAX_SPEED_MODE are, + * 1: standard mode, 0 - 100 Kb/s, + * 2: fast mode, <= 400 Kb/s, or fast mode plus, <= 1000 Kb/s, + * 3: high speed mode, <= 3.4 Mb/s. + */ + reg = REG_SET_FIELD(reg, CKSVII2C_IC_CON, IC_MAX_SPEED_MODE, + I2C_SPEED_MODE_FAST); reg = REG_SET_FIELD(reg, CKSVII2C_IC_CON, IC_MASTER_MODE, 1); WREG32_SOC15(SMUIO, 0, mmCKSVII2C_IC_CON, reg); @@ -113,13 +152,15 @@ static void smu_v11_0_i2c_set_clock(struct i2c_adapter *control) WREG32_SOC15(SMUIO, 0, mmCKSVII2C_IC_SDA_HOLD, 20); } -static void smu_v11_0_i2c_set_address(struct i2c_adapter *control, uint8_t address) +static void smu_v11_0_i2c_set_address(struct i2c_adapter *control, u16 address) { struct amdgpu_device *adev = to_amdgpu_device(control); - /* Convert fromr 8-bit to 7-bit address */ - address >>= 1; - WREG32_SOC15(SMUIO, 0, mmCKSVII2C_IC_TAR, (address & 0xFF)); + /* The IC_TAR::IC_TAR field is 10-bits wide. + * It takes a 7-bit or 10-bit addresses as an address, + * i.e. no read/write bit--no wire format, just the address. + */ + WREG32_SOC15(SMUIO, 0, mmCKSVII2C_IC_TAR, address & 0x3FF); } static uint32_t smu_v11_0_i2c_poll_tx_status(struct i2c_adapter *control) @@ -206,9 +247,6 @@ static uint32_t smu_v11_0_i2c_poll_rx_status(struct i2c_adapter *control) return ret; } - - - /** * smu_v11_0_i2c_transmit - Send a block of data over the I2C bus to a slave device. * @@ -221,17 +259,17 @@ static uint32_t smu_v11_0_i2c_poll_rx_status(struct i2c_adapter *control) * Returns 0 on success or error. */ static uint32_t smu_v11_0_i2c_transmit(struct i2c_adapter *control, - uint8_t address, uint8_t *data, - uint32_t numbytes, uint32_t i2c_flag) + u16 address, u8 *data, + u32 numbytes, u32 i2c_flag) { struct amdgpu_device *adev = to_amdgpu_device(control); - uint32_t bytes_sent, reg, ret = 0; + u32 bytes_sent, reg, ret = I2C_OK; unsigned long timeout_counter; bytes_sent = 0; DRM_DEBUG_DRIVER("I2C_Transmit(), address = %x, bytes = %d , data: ", - (uint16_t)address, numbytes); + address, numbytes); if (drm_debug_enabled(DRM_UT_DRIVER)) { print_hex_dump(KERN_INFO, "data: ", DUMP_PREFIX_NONE, @@ -246,53 +284,49 @@ static uint32_t smu_v11_0_i2c_transmit(struct i2c_adapter *control, /* Clear status bits */ smu_v11_0_i2c_clear_status(control); - timeout_counter = jiffies + msecs_to_jiffies(20); while (numbytes > 0) { reg = RREG32_SOC15(SMUIO, 0, mmCKSVII2C_IC_STATUS); - if (REG_GET_FIELD(reg, CKSVII2C_IC_STATUS, TFNF)) { - do { - reg = 0; - /* - * Prepare transaction, no need to set RESTART. I2C engine will send - * START as soon as it sees data in TXFIFO - */ - if (bytes_sent == 0) - reg = REG_SET_FIELD(reg, CKSVII2C_IC_DATA_CMD, RESTART, - (i2c_flag & I2C_RESTART) ? 1 : 0); - reg = REG_SET_FIELD(reg, CKSVII2C_IC_DATA_CMD, DAT, data[bytes_sent]); - - /* determine if we need to send STOP bit or not */ - if (numbytes == 1) - /* Final transaction, so send stop unless I2C_NO_STOP */ - reg = REG_SET_FIELD(reg, CKSVII2C_IC_DATA_CMD, STOP, - (i2c_flag & I2C_NO_STOP) ? 0 : 1); - /* Write */ - reg = REG_SET_FIELD(reg, CKSVII2C_IC_DATA_CMD, CMD, 0); - WREG32_SOC15(SMUIO, 0, mmCKSVII2C_IC_DATA_CMD, reg); - - /* Record that the bytes were transmitted */ - bytes_sent++; - numbytes--; - - reg = RREG32_SOC15(SMUIO, 0, mmCKSVII2C_IC_STATUS); - - } while (numbytes && REG_GET_FIELD(reg, CKSVII2C_IC_STATUS, TFNF)); - } - - /* - * We waited too long for the transmission FIFO to become not-full. - * Exit the loop with error. - */ - if (time_after(jiffies, timeout_counter)) { - ret |= I2C_SW_TIMEOUT; - goto Err; + if (!REG_GET_FIELD(reg, CKSVII2C_IC_STATUS, TFNF)) { + /* + * We waited for too long for the transmission + * FIFO to become not-full. Exit the loop + * with error. + */ + if (time_after(jiffies, timeout_counter)) { + ret |= I2C_SW_TIMEOUT; + goto Err; + } + } else { + reg = REG_SET_FIELD(reg, CKSVII2C_IC_DATA_CMD, DAT, + data[bytes_sent]); + + /* Final message, final byte, must generate a + * STOP to release the bus, i.e. don't hold + * SCL low. + */ + if (numbytes == 1 && i2c_flag & I2C_M_STOP) + reg = REG_SET_FIELD(reg, + CKSVII2C_IC_DATA_CMD, + STOP, 1); + + if (bytes_sent == 0 && i2c_flag & I2C_X_RESTART) + reg = REG_SET_FIELD(reg, + CKSVII2C_IC_DATA_CMD, + RESTART, 1); + + /* Write */ + reg = REG_SET_FIELD(reg, CKSVII2C_IC_DATA_CMD, CMD, 0); + WREG32_SOC15(SMUIO, 0, mmCKSVII2C_IC_DATA_CMD, reg); + + /* Record that the bytes were transmitted */ + bytes_sent++; + numbytes--; } } ret = smu_v11_0_i2c_poll_tx_status(control); - Err: /* Any error, no point in proceeding */ if (ret != I2C_OK) { @@ -323,8 +357,8 @@ Err: * Returns 0 on success or error. */ static uint32_t smu_v11_0_i2c_receive(struct i2c_adapter *control, - uint8_t address, uint8_t *data, - uint32_t numbytes, uint8_t i2c_flag) + u16 address, u8 *data, + u32 numbytes, u32 i2c_flag) { struct amdgpu_device *adev = to_amdgpu_device(control); uint32_t bytes_received, ret = I2C_OK; @@ -342,23 +376,21 @@ static uint32_t smu_v11_0_i2c_receive(struct i2c_adapter *control, smu_v11_0_i2c_clear_status(control); - /* Prepare transaction */ - - /* Each time we disable I2C, so this is not a restart */ - if (bytes_received == 0) - reg = REG_SET_FIELD(reg, CKSVII2C_IC_DATA_CMD, RESTART, - (i2c_flag & I2C_RESTART) ? 1 : 0); - reg = REG_SET_FIELD(reg, CKSVII2C_IC_DATA_CMD, DAT, 0); /* Read */ reg = REG_SET_FIELD(reg, CKSVII2C_IC_DATA_CMD, CMD, 1); - /* Transmitting last byte */ - if (numbytes == 1) - /* Final transaction, so send stop if requested */ - reg = REG_SET_FIELD(reg, CKSVII2C_IC_DATA_CMD, STOP, - (i2c_flag & I2C_NO_STOP) ? 0 : 1); + /* Final message, final byte, must generate a STOP + * to release the bus, i.e. don't hold SCL low. + */ + if (numbytes == 1 && i2c_flag & I2C_M_STOP) + reg = REG_SET_FIELD(reg, CKSVII2C_IC_DATA_CMD, + STOP, 1); + + if (bytes_received == 0 && i2c_flag & I2C_X_RESTART) + reg = REG_SET_FIELD(reg, CKSVII2C_IC_DATA_CMD, + RESTART, 1); WREG32_SOC15(SMUIO, 0, mmCKSVII2C_IC_DATA_CMD, reg); @@ -413,7 +445,6 @@ static void smu_v11_0_i2c_abort(struct i2c_adapter *control) DRM_DEBUG_DRIVER("I2C_Abort() Done."); } - static bool smu_v11_0_i2c_activity_done(struct i2c_adapter *control) { struct amdgpu_device *adev = to_amdgpu_device(control); @@ -425,7 +456,6 @@ static bool smu_v11_0_i2c_activity_done(struct i2c_adapter *control) reg_ic_enable_status = RREG32_SOC15(SMUIO, 0, mmCKSVII2C_IC_ENABLE_STATUS); reg_ic_enable = RREG32_SOC15(SMUIO, 0, mmCKSVII2C_IC_ENABLE); - if ((REG_GET_FIELD(reg_ic_enable, CKSVII2C_IC_ENABLE, ENABLE) == 0) && (REG_GET_FIELD(reg_ic_enable_status, CKSVII2C_IC_ENABLE_STATUS, IC_EN) == 1)) { /* @@ -455,6 +485,8 @@ static bool smu_v11_0_i2c_activity_done(struct i2c_adapter *control) static void smu_v11_0_i2c_init(struct i2c_adapter *control) { + int res; + /* Disable clock gating */ smu_v11_0_i2c_set_clock_gating(control, false); @@ -462,7 +494,9 @@ static void smu_v11_0_i2c_init(struct i2c_adapter *control) DRM_WARN("I2C busy !"); /* Disable I2C */ - smu_v11_0_i2c_enable(control, false); + res = smu_v11_0_i2c_enable(control, false); + if (res != I2C_OK) + smu_v11_0_i2c_abort(control); /* Configure I2C to operate as master and in standard mode */ smu_v11_0_i2c_configure(control); @@ -475,21 +509,22 @@ static void smu_v11_0_i2c_init(struct i2c_adapter *control) static void smu_v11_0_i2c_fini(struct i2c_adapter *control) { struct amdgpu_device *adev = to_amdgpu_device(control); - uint32_t reg_ic_enable_status, reg_ic_enable; + u32 status, enable, en_stat; + int res; - smu_v11_0_i2c_enable(control, false); + res = smu_v11_0_i2c_enable(control, false); + if (res != I2C_OK) { + status = RREG32_SOC15(SMUIO, 0, mmCKSVII2C_IC_STATUS); + enable = RREG32_SOC15(SMUIO, 0, mmCKSVII2C_IC_ENABLE); + en_stat = RREG32_SOC15(SMUIO, 0, mmCKSVII2C_IC_ENABLE_STATUS); - /* Double check if disabled, else force abort */ - reg_ic_enable_status = RREG32_SOC15(SMUIO, 0, mmCKSVII2C_IC_ENABLE_STATUS); - reg_ic_enable = RREG32_SOC15(SMUIO, 0, mmCKSVII2C_IC_ENABLE); - - if ((REG_GET_FIELD(reg_ic_enable, CKSVII2C_IC_ENABLE, ENABLE) == 0) && - (REG_GET_FIELD(reg_ic_enable_status, - CKSVII2C_IC_ENABLE_STATUS, IC_EN) == 1)) { - /* - * Nobody is using I2C engine, but engine remains active because - * someone missed to send STOP + /* Nobody is using the I2C engine, yet it remains + * active, possibly because someone missed to send + * STOP. */ + DRM_DEBUG_DRIVER("Aborting from fini: status:0x%08x " + "enable:0x%08x enable_stat:0x%08x", + status, enable, en_stat); smu_v11_0_i2c_abort(control); } @@ -531,22 +566,12 @@ static bool smu_v11_0_i2c_bus_unlock(struct i2c_adapter *control) /***************************** I2C GLUE ****************************/ static uint32_t smu_v11_0_i2c_read_data(struct i2c_adapter *control, - uint8_t address, - uint8_t *data, - uint32_t numbytes) + struct i2c_msg *msg, uint32_t i2c_flag) { - uint32_t ret = 0; - - /* First 2 bytes are dummy write to set EEPROM address */ - ret = smu_v11_0_i2c_transmit(control, address, data, 2, I2C_NO_STOP); - if (ret != I2C_OK) - goto Fail; + uint32_t ret; - /* Now read data starting with that address */ - ret = smu_v11_0_i2c_receive(control, address, data + 2, numbytes - 2, - I2C_RESTART); + ret = smu_v11_0_i2c_receive(control, msg->addr, msg->buf, msg->len, i2c_flag); -Fail: if (ret != I2C_OK) DRM_ERROR("ReadData() - I2C error occurred :%x", ret); @@ -554,28 +579,15 @@ Fail: } static uint32_t smu_v11_0_i2c_write_data(struct i2c_adapter *control, - uint8_t address, - uint8_t *data, - uint32_t numbytes) + struct i2c_msg *msg, uint32_t i2c_flag) { uint32_t ret; - ret = smu_v11_0_i2c_transmit(control, address, data, numbytes, 0); + ret = smu_v11_0_i2c_transmit(control, msg->addr, msg->buf, msg->len, i2c_flag); if (ret != I2C_OK) DRM_ERROR("WriteI2CData() - I2C error occurred :%x", ret); - else - /* - * According to EEPROM spec there is a MAX of 10 ms required for - * EEPROM to flush internal RX buffer after STOP was issued at the - * end of write transaction. During this time the EEPROM will not be - * responsive to any more commands - so wait a bit more. - * - * TODO Improve to wait for first ACK for slave address after - * internal write cycle done. - */ - msleep(10); - + return ret; } @@ -584,12 +596,11 @@ static void lock_bus(struct i2c_adapter *i2c, unsigned int flags) { struct amdgpu_device *adev = to_amdgpu_device(i2c); - if (!smu_v11_0_i2c_bus_lock(i2c)) { + mutex_lock(&adev->pm.smu_i2c_mutex); + if (!smu_v11_0_i2c_bus_lock(i2c)) DRM_ERROR("Failed to lock the bus from SMU"); - return; - } - - adev->pm.bus_locked = true; + else + adev->pm.bus_locked = true; } static int trylock_bus(struct i2c_adapter *i2c, unsigned int flags) @@ -602,12 +613,11 @@ static void unlock_bus(struct i2c_adapter *i2c, unsigned int flags) { struct amdgpu_device *adev = to_amdgpu_device(i2c); - if (!smu_v11_0_i2c_bus_unlock(i2c)) { + if (!smu_v11_0_i2c_bus_unlock(i2c)) DRM_ERROR("Failed to unlock the bus from SMU"); - return; - } - - adev->pm.bus_locked = false; + else + adev->pm.bus_locked = false; + mutex_unlock(&adev->pm.smu_i2c_mutex); } static const struct i2c_lock_operations smu_v11_0_i2c_i2c_lock_ops = { @@ -617,27 +627,60 @@ static const struct i2c_lock_operations smu_v11_0_i2c_i2c_lock_ops = { }; static int smu_v11_0_i2c_xfer(struct i2c_adapter *i2c_adap, - struct i2c_msg *msgs, int num) + struct i2c_msg *msg, int num) { int i, ret; - struct amdgpu_device *adev = to_amdgpu_device(i2c_adap); - - if (!adev->pm.bus_locked) { - DRM_ERROR("I2C bus unlocked, stopping transaction!"); - return -EIO; - } + u16 addr, dir; smu_v11_0_i2c_init(i2c_adap); + /* From the client's point of view, this sequence of + * messages-- the array i2c_msg *msg, is a single transaction + * on the bus, starting with START and ending with STOP. + * + * The client is welcome to send any sequence of messages in + * this array, as processing under this function here is + * striving to be agnostic. + * + * Record the first address and direction we see. If either + * changes for a subsequent message, generate ReSTART. The + * DW_apb_i2c databook, v1.21a, specifies that ReSTART is + * generated when the direction changes, with the default IP + * block parameter settings, but it doesn't specify if ReSTART + * is generated when the address changes (possibly...). We + * don't rely on the default IP block parameter settings as + * the block is shared and they may change. + */ + if (num > 0) { + addr = msg[0].addr; + dir = msg[0].flags & I2C_M_RD; + } + for (i = 0; i < num; i++) { - if (msgs[i].flags & I2C_M_RD) + u32 i2c_flag = 0; + + if (msg[i].addr != addr || (msg[i].flags ^ dir) & I2C_M_RD) { + addr = msg[i].addr; + dir = msg[i].flags & I2C_M_RD; + i2c_flag |= I2C_X_RESTART; + } + + if (i == num - 1) { + /* Set the STOP bit on the last message, so + * that the IP block generates a STOP after + * the last byte of the message. + */ + i2c_flag |= I2C_M_STOP; + } + + if (msg[i].flags & I2C_M_RD) ret = smu_v11_0_i2c_read_data(i2c_adap, - (uint8_t)msgs[i].addr, - msgs[i].buf, msgs[i].len); + msg + i, + i2c_flag); else ret = smu_v11_0_i2c_write_data(i2c_adap, - (uint8_t)msgs[i].addr, - msgs[i].buf, msgs[i].len); + msg + i, + i2c_flag); if (ret != I2C_OK) { num = -EIO; @@ -654,23 +697,28 @@ static u32 smu_v11_0_i2c_func(struct i2c_adapter *adap) return I2C_FUNC_I2C | I2C_FUNC_SMBUS_EMUL; } - static const struct i2c_algorithm smu_v11_0_i2c_algo = { .master_xfer = smu_v11_0_i2c_xfer, .functionality = smu_v11_0_i2c_func, }; +static const struct i2c_adapter_quirks smu_v11_0_i2c_control_quirks = { + .flags = I2C_AQ_NO_ZERO_LEN, +}; + int smu_v11_0_i2c_control_init(struct i2c_adapter *control) { struct amdgpu_device *adev = to_amdgpu_device(control); int res; + mutex_init(&adev->pm.smu_i2c_mutex); control->owner = THIS_MODULE; - control->class = I2C_CLASS_SPD; + control->class = I2C_CLASS_HWMON; control->dev.parent = &adev->pdev->dev; control->algo = &smu_v11_0_i2c_algo; snprintf(control->name, sizeof(control->name), "AMDGPU SMU"); control->lock_ops = &smu_v11_0_i2c_i2c_lock_ops; + control->quirks = &smu_v11_0_i2c_control_quirks; res = i2c_add_adapter(control); if (res) diff --git a/drivers/gpu/drm/amd/amdgpu/smuio_v11_0.c b/drivers/gpu/drm/amd/amdgpu/smuio_v11_0.c index e9c474c217ec..b6f1322f908c 100644 --- a/drivers/gpu/drm/amd/amdgpu/smuio_v11_0.c +++ b/drivers/gpu/drm/amd/amdgpu/smuio_v11_0.c @@ -43,9 +43,12 @@ static void smuio_v11_0_update_rom_clock_gating(struct amdgpu_device *adev, bool if (adev->flags & AMD_IS_APU) return; + if (!(adev->cg_flags & AMD_CG_SUPPORT_ROM_MGCG)) + return; + def = data = RREG32_SOC15(SMUIO, 0, mmCGTT_ROM_CLK_CTRL0); - if (enable && (adev->cg_flags & AMD_CG_SUPPORT_ROM_MGCG)) + if (enable) data &= ~(CGTT_ROM_CLK_CTRL0__SOFT_OVERRIDE0_MASK | CGTT_ROM_CLK_CTRL0__SOFT_OVERRIDE1_MASK); else diff --git a/drivers/gpu/drm/amd/amdgpu/smuio_v13_0.c b/drivers/gpu/drm/amd/amdgpu/smuio_v13_0.c index 079b094c48ad..39b7c206770f 100644 --- a/drivers/gpu/drm/amd/amdgpu/smuio_v13_0.c +++ b/drivers/gpu/drm/amd/amdgpu/smuio_v13_0.c @@ -89,7 +89,24 @@ static u32 smuio_v13_0_get_die_id(struct amdgpu_device *adev) } /** - * smuio_v13_0_supports_host_gpu_xgmi - detect xgmi interface between cpu and gpu/s. + * smuio_v13_0_get_socket_id - query socket id from FCH + * + * @adev: amdgpu device pointer + * + * Returns socket id + */ +static u32 smuio_v13_0_get_socket_id(struct amdgpu_device *adev) +{ + u32 data, socket_id; + + data = RREG32_SOC15(SMUIO, 0, regSMUIO_MCM_CONFIG); + socket_id = REG_GET_FIELD(data, SMUIO_MCM_CONFIG, SOCKET_ID); + + return socket_id; +} + +/** + * smuio_v13_0_is_host_gpu_xgmi_supported - detect xgmi interface between cpu and gpu/s. * * @adev: amdgpu device pointer * @@ -115,6 +132,7 @@ const struct amdgpu_smuio_funcs smuio_v13_0_funcs = { .get_rom_index_offset = smuio_v13_0_get_rom_index_offset, .get_rom_data_offset = smuio_v13_0_get_rom_data_offset, .get_die_id = smuio_v13_0_get_die_id, + .get_socket_id = smuio_v13_0_get_socket_id, .is_host_gpu_xgmi_supported = smuio_v13_0_is_host_gpu_xgmi_supported, .update_rom_clock_gating = smuio_v13_0_update_rom_clock_gating, .get_clock_gating_state = smuio_v13_0_get_clock_gating_state, diff --git a/drivers/gpu/drm/amd/amdgpu/soc15.c b/drivers/gpu/drm/amd/amdgpu/soc15.c index e65c286f93a6..0fc97c364fd7 100644 --- a/drivers/gpu/drm/amd/amdgpu/soc15.c +++ b/drivers/gpu/drm/amd/amdgpu/soc15.c @@ -74,7 +74,7 @@ #include "smuio_v9_0.h" #include "smuio_v11_0.h" #include "smuio_v13_0.h" -#include "dce_virtual.h" +#include "amdgpu_vkms.h" #include "mxgpu_ai.h" #include "amdgpu_ras.h" #include "amdgpu_xgmi.h" @@ -88,20 +88,8 @@ /* Vega, Raven, Arcturus */ static const struct amdgpu_video_codec_info vega_video_codecs_encode_array[] = { - { - .codec_type = AMDGPU_INFO_VIDEO_CAPS_CODEC_IDX_MPEG4_AVC, - .max_width = 4096, - .max_height = 2304, - .max_pixels_per_frame = 4096 * 2304, - .max_level = 0, - }, - { - .codec_type = AMDGPU_INFO_VIDEO_CAPS_CODEC_IDX_HEVC, - .max_width = 4096, - .max_height = 2304, - .max_pixels_per_frame = 4096 * 2304, - .max_level = 0, - }, + {codec_info_build(AMDGPU_INFO_VIDEO_CAPS_CODEC_IDX_MPEG4_AVC, 4096, 2304, 0)}, + {codec_info_build(AMDGPU_INFO_VIDEO_CAPS_CODEC_IDX_HEVC, 4096, 2304, 0)}, }; static const struct amdgpu_video_codecs vega_video_codecs_encode = @@ -113,48 +101,12 @@ static const struct amdgpu_video_codecs vega_video_codecs_encode = /* Vega */ static const struct amdgpu_video_codec_info vega_video_codecs_decode_array[] = { - { - .codec_type = AMDGPU_INFO_VIDEO_CAPS_CODEC_IDX_MPEG2, - .max_width = 4096, - .max_height = 4096, - .max_pixels_per_frame = 4096 * 4096, - .max_level = 3, - }, - { - .codec_type = AMDGPU_INFO_VIDEO_CAPS_CODEC_IDX_MPEG4, - .max_width = 4096, - .max_height = 4096, - .max_pixels_per_frame = 4096 * 4096, - .max_level = 5, - }, - { - .codec_type = AMDGPU_INFO_VIDEO_CAPS_CODEC_IDX_MPEG4_AVC, - .max_width = 4096, - .max_height = 4096, - .max_pixels_per_frame = 4096 * 4096, - .max_level = 52, - }, - { - .codec_type = AMDGPU_INFO_VIDEO_CAPS_CODEC_IDX_VC1, - .max_width = 4096, - .max_height = 4096, - .max_pixels_per_frame = 4096 * 4096, - .max_level = 4, - }, - { - .codec_type = AMDGPU_INFO_VIDEO_CAPS_CODEC_IDX_HEVC, - .max_width = 4096, - .max_height = 4096, - .max_pixels_per_frame = 4096 * 4096, - .max_level = 186, - }, - { - .codec_type = AMDGPU_INFO_VIDEO_CAPS_CODEC_IDX_JPEG, - .max_width = 4096, - .max_height = 4096, - .max_pixels_per_frame = 4096 * 4096, - .max_level = 0, - }, + {codec_info_build(AMDGPU_INFO_VIDEO_CAPS_CODEC_IDX_MPEG2, 4096, 4906, 3)}, + {codec_info_build(AMDGPU_INFO_VIDEO_CAPS_CODEC_IDX_MPEG4, 4096, 4906, 5)}, + {codec_info_build(AMDGPU_INFO_VIDEO_CAPS_CODEC_IDX_MPEG4_AVC, 4096, 4906, 52)}, + {codec_info_build(AMDGPU_INFO_VIDEO_CAPS_CODEC_IDX_VC1, 4096, 4906, 4)}, + {codec_info_build(AMDGPU_INFO_VIDEO_CAPS_CODEC_IDX_HEVC, 4096, 4096, 186)}, + {codec_info_build(AMDGPU_INFO_VIDEO_CAPS_CODEC_IDX_JPEG, 4096, 4096, 0)}, }; static const struct amdgpu_video_codecs vega_video_codecs_decode = @@ -166,55 +118,13 @@ static const struct amdgpu_video_codecs vega_video_codecs_decode = /* Raven */ static const struct amdgpu_video_codec_info rv_video_codecs_decode_array[] = { - { - .codec_type = AMDGPU_INFO_VIDEO_CAPS_CODEC_IDX_MPEG2, - .max_width = 4096, - .max_height = 4096, - .max_pixels_per_frame = 4096 * 4096, - .max_level = 3, - }, - { - .codec_type = AMDGPU_INFO_VIDEO_CAPS_CODEC_IDX_MPEG4, - .max_width = 4096, - .max_height = 4096, - .max_pixels_per_frame = 4096 * 4096, - .max_level = 5, - }, - { - .codec_type = AMDGPU_INFO_VIDEO_CAPS_CODEC_IDX_MPEG4_AVC, - .max_width = 4096, - .max_height = 4096, - .max_pixels_per_frame = 4096 * 4096, - .max_level = 52, - }, - { - .codec_type = AMDGPU_INFO_VIDEO_CAPS_CODEC_IDX_VC1, - .max_width = 4096, - .max_height = 4096, - .max_pixels_per_frame = 4096 * 4096, - .max_level = 4, - }, - { - .codec_type = AMDGPU_INFO_VIDEO_CAPS_CODEC_IDX_HEVC, - .max_width = 4096, - .max_height = 4096, - .max_pixels_per_frame = 4096 * 4096, - .max_level = 186, - }, - { - .codec_type = AMDGPU_INFO_VIDEO_CAPS_CODEC_IDX_JPEG, - .max_width = 4096, - .max_height = 4096, - .max_pixels_per_frame = 4096 * 4096, - .max_level = 0, - }, - { - .codec_type = AMDGPU_INFO_VIDEO_CAPS_CODEC_IDX_VP9, - .max_width = 4096, - .max_height = 4096, - .max_pixels_per_frame = 4096 * 4096, - .max_level = 0, - }, + {codec_info_build(AMDGPU_INFO_VIDEO_CAPS_CODEC_IDX_MPEG2, 4096, 4906, 3)}, + {codec_info_build(AMDGPU_INFO_VIDEO_CAPS_CODEC_IDX_MPEG4, 4096, 4906, 5)}, + {codec_info_build(AMDGPU_INFO_VIDEO_CAPS_CODEC_IDX_MPEG4_AVC, 4096, 4906, 52)}, + {codec_info_build(AMDGPU_INFO_VIDEO_CAPS_CODEC_IDX_VC1, 4096, 4906, 4)}, + {codec_info_build(AMDGPU_INFO_VIDEO_CAPS_CODEC_IDX_HEVC, 4096, 4096, 186)}, + {codec_info_build(AMDGPU_INFO_VIDEO_CAPS_CODEC_IDX_JPEG, 4096, 4096, 0)}, + {codec_info_build(AMDGPU_INFO_VIDEO_CAPS_CODEC_IDX_VP9, 4096, 4096, 0)}, }; static const struct amdgpu_video_codecs rv_video_codecs_decode = @@ -226,55 +136,13 @@ static const struct amdgpu_video_codecs rv_video_codecs_decode = /* Renoir, Arcturus */ static const struct amdgpu_video_codec_info rn_video_codecs_decode_array[] = { - { - .codec_type = AMDGPU_INFO_VIDEO_CAPS_CODEC_IDX_MPEG2, - .max_width = 4096, - .max_height = 4096, - .max_pixels_per_frame = 4096 * 4096, - .max_level = 3, - }, - { - .codec_type = AMDGPU_INFO_VIDEO_CAPS_CODEC_IDX_MPEG4, - .max_width = 4096, - .max_height = 4096, - .max_pixels_per_frame = 4096 * 4096, - .max_level = 5, - }, - { - .codec_type = AMDGPU_INFO_VIDEO_CAPS_CODEC_IDX_MPEG4_AVC, - .max_width = 4096, - .max_height = 4096, - .max_pixels_per_frame = 4096 * 4096, - .max_level = 52, - }, - { - .codec_type = AMDGPU_INFO_VIDEO_CAPS_CODEC_IDX_VC1, - .max_width = 4096, - .max_height = 4096, - .max_pixels_per_frame = 4096 * 4096, - .max_level = 4, - }, - { - .codec_type = AMDGPU_INFO_VIDEO_CAPS_CODEC_IDX_HEVC, - .max_width = 8192, - .max_height = 4352, - .max_pixels_per_frame = 4096 * 4096, - .max_level = 186, - }, - { - .codec_type = AMDGPU_INFO_VIDEO_CAPS_CODEC_IDX_JPEG, - .max_width = 4096, - .max_height = 4096, - .max_pixels_per_frame = 4096 * 4096, - .max_level = 0, - }, - { - .codec_type = AMDGPU_INFO_VIDEO_CAPS_CODEC_IDX_VP9, - .max_width = 8192, - .max_height = 4352, - .max_pixels_per_frame = 4096 * 4096, - .max_level = 0, - }, + {codec_info_build(AMDGPU_INFO_VIDEO_CAPS_CODEC_IDX_MPEG2, 4096, 4906, 3)}, + {codec_info_build(AMDGPU_INFO_VIDEO_CAPS_CODEC_IDX_MPEG4, 4096, 4906, 5)}, + {codec_info_build(AMDGPU_INFO_VIDEO_CAPS_CODEC_IDX_MPEG4_AVC, 4096, 4906, 52)}, + {codec_info_build(AMDGPU_INFO_VIDEO_CAPS_CODEC_IDX_VC1, 4096, 4906, 4)}, + {codec_info_build(AMDGPU_INFO_VIDEO_CAPS_CODEC_IDX_HEVC, 8192, 4352, 186)}, + {codec_info_build(AMDGPU_INFO_VIDEO_CAPS_CODEC_IDX_JPEG, 4096, 4096, 0)}, + {codec_info_build(AMDGPU_INFO_VIDEO_CAPS_CODEC_IDX_VP9, 8192, 4352, 0)}, }; static const struct amdgpu_video_codecs rn_video_codecs_decode = @@ -633,7 +501,9 @@ void soc15_program_register_sequence(struct amdgpu_device *adev, if (entry->and_mask == 0xffffffff) { tmp = entry->or_mask; } else { - tmp = RREG32(reg); + tmp = (entry->hwip == GC_HWIP) ? + RREG32_SOC15_IP(GC, reg) : RREG32(reg); + tmp &= ~(entry->and_mask); tmp |= (entry->or_mask & entry->and_mask); } @@ -644,7 +514,8 @@ void soc15_program_register_sequence(struct amdgpu_device *adev, reg == SOC15_REG_OFFSET(GC, 0, mmSH_MEM_CONFIG)) WREG32_RLC(reg, tmp); else - WREG32(reg, tmp); + (entry->hwip == GC_HWIP) ? + WREG32_SOC15_IP(GC, reg, tmp) : WREG32(reg, tmp); } @@ -656,7 +527,7 @@ static int soc15_asic_baco_reset(struct amdgpu_device *adev) int ret = 0; /* avoid NBIF got stuck when do RAS recovery in BACO reset */ - if (ras && ras->supported) + if (ras && adev->ras_enabled) adev->nbio.funcs->enable_doorbell_interrupt(adev, false); ret = amdgpu_dpm_baco_reset(adev); @@ -664,7 +535,7 @@ static int soc15_asic_baco_reset(struct amdgpu_device *adev) return ret; /* re-enable doorbell interrupt after BACO exit */ - if (ras && ras->supported) + if (ras && adev->ras_enabled) adev->nbio.funcs->enable_doorbell_interrupt(adev, true); return 0; @@ -704,14 +575,15 @@ soc15_asic_reset_method(struct amdgpu_device *adev) baco_reset = amdgpu_dpm_is_baco_supported(adev); break; case CHIP_VEGA20: - if (adev->psp.sos_fw_version >= 0x80067) + if (adev->psp.sos.fw_version >= 0x80067) baco_reset = amdgpu_dpm_is_baco_supported(adev); /* * 1. PMFW version > 0x284300: all cases use baco * 2. PMFW version <= 0x284300: only sGPU w/o RAS use baco */ - if ((ras && ras->supported) && adev->pm.fw_version <= 0x283400) + if (ras && adev->ras_enabled && + adev->pm.fw_version <= 0x283400) baco_reset = false; break; case CHIP_ALDEBARAN: @@ -763,7 +635,7 @@ static bool soc15_supports_baco(struct amdgpu_device *adev) case CHIP_ARCTURUS: return amdgpu_dpm_is_baco_supported(adev); case CHIP_VEGA20: - if (adev->psp.sos_fw_version >= 0x80067) + if (adev->psp.sos.fw_version >= 0x80067) return amdgpu_dpm_is_baco_supported(adev); return false; default: @@ -817,7 +689,7 @@ static void soc15_pcie_gen3_enable(struct amdgpu_device *adev) static void soc15_program_aspm(struct amdgpu_device *adev) { - if (amdgpu_aspm != 1) + if (!amdgpu_aspm) return; if (!(adev->flags & AMD_IS_APU) && @@ -971,7 +843,7 @@ int soc15_set_ip_blocks(struct amdgpu_device *adev) amdgpu_device_ip_block_add(adev, &pp_smu_ip_block); } if (adev->enable_virtual_display || amdgpu_sriov_vf(adev)) - amdgpu_device_ip_block_add(adev, &dce_virtual_ip_block); + amdgpu_device_ip_block_add(adev, &amdgpu_vkms_ip_block); #if defined(CONFIG_DRM_AMD_DC) else if (amdgpu_device_has_dc_support(adev)) amdgpu_device_ip_block_add(adev, &dm_ip_block); @@ -991,7 +863,7 @@ int soc15_set_ip_blocks(struct amdgpu_device *adev) amdgpu_device_ip_block_add(adev, &sdma_v4_0_ip_block); amdgpu_device_ip_block_add(adev, &pp_smu_ip_block); if (adev->enable_virtual_display || amdgpu_sriov_vf(adev)) - amdgpu_device_ip_block_add(adev, &dce_virtual_ip_block); + amdgpu_device_ip_block_add(adev, &amdgpu_vkms_ip_block); #if defined(CONFIG_DRM_AMD_DC) else if (amdgpu_device_has_dc_support(adev)) amdgpu_device_ip_block_add(adev, &dm_ip_block); @@ -1013,7 +885,7 @@ int soc15_set_ip_blocks(struct amdgpu_device *adev) } if (adev->enable_virtual_display || amdgpu_sriov_vf(adev)) - amdgpu_device_ip_block_add(adev, &dce_virtual_ip_block); + amdgpu_device_ip_block_add(adev, &amdgpu_vkms_ip_block); amdgpu_device_ip_block_add(adev, &gfx_v9_0_ip_block); amdgpu_device_ip_block_add(adev, &sdma_v4_0_ip_block); amdgpu_device_ip_block_add(adev, &smu_v11_0_ip_block); @@ -1037,7 +909,7 @@ int soc15_set_ip_blocks(struct amdgpu_device *adev) amdgpu_device_ip_block_add(adev, &gfx_v9_0_ip_block); amdgpu_device_ip_block_add(adev, &sdma_v4_0_ip_block); if (adev->enable_virtual_display || amdgpu_sriov_vf(adev)) - amdgpu_device_ip_block_add(adev, &dce_virtual_ip_block); + amdgpu_device_ip_block_add(adev, &amdgpu_vkms_ip_block); #if defined(CONFIG_DRM_AMD_DC) else if (amdgpu_device_has_dc_support(adev)) amdgpu_device_ip_block_add(adev, &dm_ip_block); @@ -1356,10 +1228,7 @@ static int soc15_common_early_init(void *handle) break; case CHIP_RAVEN: adev->asic_funcs = &soc15_asic_funcs; - if (adev->pdev->device == 0x15dd) - adev->apu_flags |= AMD_APU_IS_RAVEN; - if (adev->pdev->device == 0x15d8) - adev->apu_flags |= AMD_APU_IS_PICASSO; + if (adev->rev_id >= 0x8) adev->apu_flags |= AMD_APU_IS_RAVEN2; @@ -1451,11 +1320,6 @@ static int soc15_common_early_init(void *handle) break; case CHIP_RENOIR: adev->asic_funcs = &soc15_asic_funcs; - if ((adev->pdev->device == 0x1636) || - (adev->pdev->device == 0x164c)) - adev->apu_flags |= AMD_APU_IS_RENOIR; - else - adev->apu_flags |= AMD_APU_IS_GREEN_SARDINE; if (adev->apu_flags & AMD_APU_IS_RENOIR) adev->external_rev_id = adev->rev_id + 0x91; @@ -1489,8 +1353,6 @@ static int soc15_common_early_init(void *handle) adev->asic_funcs = &vega20_asic_funcs; adev->cg_flags = AMD_CG_SUPPORT_GFX_MGCG | AMD_CG_SUPPORT_GFX_MGLS | - AMD_CG_SUPPORT_GFX_CGCG | - AMD_CG_SUPPORT_GFX_CGLS | AMD_CG_SUPPORT_GFX_CP_LS | AMD_CG_SUPPORT_HDP_LS | AMD_CG_SUPPORT_SDMA_MGCG | @@ -1521,9 +1383,6 @@ static int soc15_common_late_init(void *handle) if (amdgpu_sriov_vf(adev)) xgpu_ai_mailbox_get_irq(adev); - if (adev->hdp.funcs->reset_ras_error_count) - adev->hdp.funcs->reset_ras_error_count(adev); - if (adev->nbio.ras_funcs && adev->nbio.ras_funcs->ras_late_init) r = adev->nbio.ras_funcs->ras_late_init(adev); diff --git a/drivers/gpu/drm/amd/amdgpu/soc15_common.h b/drivers/gpu/drm/amd/amdgpu/soc15_common.h index 14bd794bbea6..8a9ca87d8663 100644 --- a/drivers/gpu/drm/amd/amdgpu/soc15_common.h +++ b/drivers/gpu/drm/amd/amdgpu/soc15_common.h @@ -27,28 +27,51 @@ /* Register Access Macros */ #define SOC15_REG_OFFSET(ip, inst, reg) (adev->reg_offset[ip##_HWIP][inst][reg##_BASE_IDX] + reg) +#define __WREG32_SOC15_RLC__(reg, value, flag, hwip) \ + ((amdgpu_sriov_vf(adev) && adev->gfx.rlc.funcs && adev->gfx.rlc.funcs->sriov_wreg) ? \ + adev->gfx.rlc.funcs->sriov_wreg(adev, reg, value, flag, hwip) : \ + WREG32(reg, value)) + +#define __RREG32_SOC15_RLC__(reg, flag, hwip) \ + ((amdgpu_sriov_vf(adev) && adev->gfx.rlc.funcs && adev->gfx.rlc.funcs->sriov_rreg) ? \ + adev->gfx.rlc.funcs->sriov_rreg(adev, reg, flag, hwip) : \ + RREG32(reg)) + #define WREG32_FIELD15(ip, idx, reg, field, val) \ - WREG32(adev->reg_offset[ip##_HWIP][idx][mm##reg##_BASE_IDX] + mm##reg, \ - (RREG32(adev->reg_offset[ip##_HWIP][idx][mm##reg##_BASE_IDX] + mm##reg) \ - & ~REG_FIELD_MASK(reg, field)) | (val) << REG_FIELD_SHIFT(reg, field)) + __WREG32_SOC15_RLC__(adev->reg_offset[ip##_HWIP][idx][mm##reg##_BASE_IDX] + mm##reg, \ + (__RREG32_SOC15_RLC__( \ + adev->reg_offset[ip##_HWIP][idx][mm##reg##_BASE_IDX] + mm##reg, \ + 0, ip##_HWIP) & \ + ~REG_FIELD_MASK(reg, field)) | (val) << REG_FIELD_SHIFT(reg, field), \ + 0, ip##_HWIP) #define RREG32_SOC15(ip, inst, reg) \ - RREG32(adev->reg_offset[ip##_HWIP][inst][reg##_BASE_IDX] + reg) + __RREG32_SOC15_RLC__(adev->reg_offset[ip##_HWIP][inst][reg##_BASE_IDX] + reg, \ + 0, ip##_HWIP) + +#define RREG32_SOC15_IP(ip, reg) __RREG32_SOC15_RLC__(reg, 0, ip##_HWIP) #define RREG32_SOC15_NO_KIQ(ip, inst, reg) \ - RREG32_NO_KIQ(adev->reg_offset[ip##_HWIP][inst][reg##_BASE_IDX] + reg) + __RREG32_SOC15_RLC__(adev->reg_offset[ip##_HWIP][inst][reg##_BASE_IDX] + reg, \ + AMDGPU_REGS_NO_KIQ, ip##_HWIP) #define RREG32_SOC15_OFFSET(ip, inst, reg, offset) \ - RREG32((adev->reg_offset[ip##_HWIP][inst][reg##_BASE_IDX] + reg) + offset) + __RREG32_SOC15_RLC__((adev->reg_offset[ip##_HWIP][inst][reg##_BASE_IDX] + reg) + offset, 0, ip##_HWIP) #define WREG32_SOC15(ip, inst, reg, value) \ - WREG32((adev->reg_offset[ip##_HWIP][inst][reg##_BASE_IDX] + reg), value) + __WREG32_SOC15_RLC__((adev->reg_offset[ip##_HWIP][inst][reg##_BASE_IDX] + reg), \ + value, 0, ip##_HWIP) + +#define WREG32_SOC15_IP(ip, reg, value) \ + __WREG32_SOC15_RLC__(reg, value, 0, ip##_HWIP) #define WREG32_SOC15_NO_KIQ(ip, inst, reg, value) \ - WREG32_NO_KIQ((adev->reg_offset[ip##_HWIP][inst][reg##_BASE_IDX] + reg), value) + __WREG32_SOC15_RLC__(adev->reg_offset[ip##_HWIP][inst][reg##_BASE_IDX] + reg, \ + value, AMDGPU_REGS_NO_KIQ, ip##_HWIP) #define WREG32_SOC15_OFFSET(ip, inst, reg, offset, value) \ - WREG32((adev->reg_offset[ip##_HWIP][inst][reg##_BASE_IDX] + reg) + offset, value) + __WREG32_SOC15_RLC__((adev->reg_offset[ip##_HWIP][inst][reg##_BASE_IDX] + reg) + offset, \ + value, 0, ip##_HWIP) #define SOC15_WAIT_ON_RREG(ip, inst, reg, expected_value, mask) \ ({ int ret = 0; \ @@ -77,12 +100,7 @@ }) #define WREG32_RLC(reg, value) \ - do { \ - if (adev->gfx.rlc.funcs->rlcg_wreg) \ - adev->gfx.rlc.funcs->rlcg_wreg(adev, reg, value, 0); \ - else \ - WREG32(reg, value); \ - } while (0) + __WREG32_SOC15_RLC__(reg, value, AMDGPU_REGS_RLC, GC_HWIP) #define WREG32_RLC_EX(prefix, reg, value) \ do { \ @@ -108,24 +126,19 @@ } \ } while (0) +/* shadow the registers in the callback function */ #define WREG32_SOC15_RLC_SHADOW(ip, inst, reg, value) \ - WREG32_RLC((adev->reg_offset[ip##_HWIP][inst][reg##_BASE_IDX] + reg), value) + __WREG32_SOC15_RLC__((adev->reg_offset[ip##_HWIP][inst][reg##_BASE_IDX] + reg), value, AMDGPU_REGS_RLC, GC_HWIP) +/* for GC only */ #define RREG32_RLC(reg) \ - (adev->gfx.rlc.funcs->rlcg_rreg ? \ - adev->gfx.rlc.funcs->rlcg_rreg(adev, reg, 0) : RREG32(reg)) - -#define WREG32_RLC_NO_KIQ(reg, value) \ - do { \ - if (adev->gfx.rlc.funcs->rlcg_wreg) \ - adev->gfx.rlc.funcs->rlcg_wreg(adev, reg, value, AMDGPU_REGS_NO_KIQ); \ - else \ - WREG32_NO_KIQ(reg, value); \ - } while (0) + __RREG32_SOC15_RLC__(reg, AMDGPU_REGS_RLC, GC_HWIP) + +#define WREG32_RLC_NO_KIQ(reg, value, hwip) \ + __WREG32_SOC15_RLC__(reg, value, AMDGPU_REGS_NO_KIQ | AMDGPU_REGS_RLC, hwip) -#define RREG32_RLC_NO_KIQ(reg) \ - (adev->gfx.rlc.funcs->rlcg_rreg ? \ - adev->gfx.rlc.funcs->rlcg_rreg(adev, reg, AMDGPU_REGS_NO_KIQ) : RREG32_NO_KIQ(reg)) +#define RREG32_RLC_NO_KIQ(reg, hwip) \ + __RREG32_SOC15_RLC__(reg, AMDGPU_REGS_NO_KIQ | AMDGPU_REGS_RLC, hwip) #define WREG32_SOC15_RLC_SHADOW_EX(prefix, ip, inst, reg, value) \ do { \ @@ -146,12 +159,12 @@ } while (0) #define RREG32_SOC15_RLC(ip, inst, reg) \ - RREG32_RLC(adev->reg_offset[ip##_HWIP][inst][reg##_BASE_IDX] + reg) + __RREG32_SOC15_RLC__(adev->reg_offset[ip##_HWIP][inst][reg##_BASE_IDX] + reg, AMDGPU_REGS_RLC, ip##_HWIP) #define WREG32_SOC15_RLC(ip, inst, reg, value) \ do { \ uint32_t target_reg = adev->reg_offset[ip##_HWIP][0][reg##_BASE_IDX] + reg;\ - WREG32_RLC(target_reg, value); \ + __WREG32_SOC15_RLC__(target_reg, value, AMDGPU_REGS_RLC, ip##_HWIP); \ } while (0) #define WREG32_SOC15_RLC_EX(prefix, ip, inst, reg, value) \ @@ -161,14 +174,16 @@ } while (0) #define WREG32_FIELD15_RLC(ip, idx, reg, field, val) \ - WREG32_RLC((adev->reg_offset[ip##_HWIP][idx][mm##reg##_BASE_IDX] + mm##reg), \ - (RREG32_RLC(adev->reg_offset[ip##_HWIP][idx][mm##reg##_BASE_IDX] + mm##reg) \ - & ~REG_FIELD_MASK(reg, field)) | (val) << REG_FIELD_SHIFT(reg, field)) + __WREG32_SOC15_RLC__((adev->reg_offset[ip##_HWIP][idx][mm##reg##_BASE_IDX] + mm##reg), \ + (__RREG32_SOC15_RLC__(adev->reg_offset[ip##_HWIP][idx][mm##reg##_BASE_IDX] + mm##reg, \ + AMDGPU_REGS_RLC, ip##_HWIP) & \ + ~REG_FIELD_MASK(reg, field)) | (val) << REG_FIELD_SHIFT(reg, field), \ + AMDGPU_REGS_RLC, ip##_HWIP) #define WREG32_SOC15_OFFSET_RLC(ip, inst, reg, offset, value) \ - WREG32_RLC(((adev->reg_offset[ip##_HWIP][inst][reg##_BASE_IDX] + reg) + offset), value) + __WREG32_SOC15_RLC__((adev->reg_offset[ip##_HWIP][inst][reg##_BASE_IDX] + reg) + offset, value, AMDGPU_REGS_RLC, ip##_HWIP) #define RREG32_SOC15_OFFSET_RLC(ip, inst, reg, offset) \ - RREG32_RLC(((adev->reg_offset[ip##_HWIP][inst][reg##_BASE_IDX] + reg) + offset)) + __RREG32_SOC15_RLC__((adev->reg_offset[ip##_HWIP][inst][reg##_BASE_IDX] + reg) + offset, AMDGPU_REGS_RLC, ip##_HWIP) #endif diff --git a/drivers/gpu/drm/amd/amdgpu/ta_ras_if.h b/drivers/gpu/drm/amd/amdgpu/ta_ras_if.h index 745ed0fba1ed..0f214a398dd8 100644 --- a/drivers/gpu/drm/amd/amdgpu/ta_ras_if.h +++ b/drivers/gpu/drm/amd/amdgpu/ta_ras_if.h @@ -105,6 +105,12 @@ struct ta_ras_trigger_error_input { uint64_t value; // method if error injection. i.e persistent, coherent etc. }; +struct ta_ras_init_flags +{ + uint8_t poison_mode_en; + uint8_t dgpu_mode; +}; + struct ta_ras_output_flags { uint8_t ras_init_success_flag; @@ -115,6 +121,7 @@ struct ta_ras_output_flags /* Common input structure for RAS callbacks */ /**********************************************************/ union ta_ras_cmd_input { + struct ta_ras_init_flags init_flags; struct ta_ras_enable_features_input enable_features; struct ta_ras_disable_features_input disable_features; struct ta_ras_trigger_error_input trigger_error; diff --git a/drivers/gpu/drm/amd/amdgpu/ta_xgmi_if.h b/drivers/gpu/drm/amd/amdgpu/ta_xgmi_if.h index ac2c27b7630c..da815a93d46e 100644 --- a/drivers/gpu/drm/amd/amdgpu/ta_xgmi_if.h +++ b/drivers/gpu/drm/amd/amdgpu/ta_xgmi_if.h @@ -33,7 +33,8 @@ enum ta_command_xgmi { TA_COMMAND_XGMI__GET_NODE_ID = 0x01, TA_COMMAND_XGMI__GET_HIVE_ID = 0x02, TA_COMMAND_XGMI__GET_GET_TOPOLOGY_INFO = 0x03, - TA_COMMAND_XGMI__SET_TOPOLOGY_INFO = 0x04 + TA_COMMAND_XGMI__SET_TOPOLOGY_INFO = 0x04, + TA_COMMAND_XGMI__GET_PEER_LINKS = 0x0B }; /* XGMI related enumerations */ @@ -75,6 +76,11 @@ struct ta_xgmi_node_info { enum ta_xgmi_assigned_sdma_engine sdma_engine; }; +struct ta_xgmi_peer_link_info { + uint64_t node_id; + uint8_t num_links; +}; + struct ta_xgmi_cmd_initialize_output { uint32_t status; }; @@ -97,6 +103,11 @@ struct ta_xgmi_cmd_get_topology_info_output { struct ta_xgmi_node_info nodes[TA_XGMI__MAX_CONNECTED_NODES]; }; +struct ta_xgmi_cmd_get_peer_link_info_output { + uint32_t num_nodes; + struct ta_xgmi_peer_link_info nodes[TA_XGMI__MAX_CONNECTED_NODES]; +}; + struct ta_xgmi_cmd_set_topology_info_input { uint32_t num_nodes; struct ta_xgmi_node_info nodes[TA_XGMI__MAX_CONNECTED_NODES]; @@ -115,6 +126,7 @@ union ta_xgmi_cmd_output { struct ta_xgmi_cmd_get_node_id_output get_node_id; struct ta_xgmi_cmd_get_hive_id_output get_hive_id; struct ta_xgmi_cmd_get_topology_info_output get_topology_info; + struct ta_xgmi_cmd_get_peer_link_info_output get_link_info; }; /**********************************************************/ @@ -122,7 +134,8 @@ struct ta_xgmi_shared_memory { uint32_t cmd_id; uint32_t resp_id; enum ta_xgmi_status xgmi_status; - uint32_t reserved; + uint8_t flag_extend_link_record; + uint8_t reserved0[3]; union ta_xgmi_cmd_input xgmi_in_message; union ta_xgmi_cmd_output xgmi_out_message; }; diff --git a/drivers/gpu/drm/amd/amdgpu/tonga_ih.c b/drivers/gpu/drm/amd/amdgpu/tonga_ih.c index 249fcbee7871..b08905d1c00f 100644 --- a/drivers/gpu/drm/amd/amdgpu/tonga_ih.c +++ b/drivers/gpu/drm/amd/amdgpu/tonga_ih.c @@ -312,8 +312,7 @@ static int tonga_ih_sw_fini(void *handle) { struct amdgpu_device *adev = (struct amdgpu_device *)handle; - amdgpu_irq_fini(adev); - amdgpu_ih_ring_fini(adev, &adev->irq.ih); + amdgpu_irq_fini_sw(adev); amdgpu_irq_remove_domain(adev); return 0; diff --git a/drivers/gpu/drm/amd/amdgpu/umc_v6_7.c b/drivers/gpu/drm/amd/amdgpu/umc_v6_7.c index 3a8f787374c0..bb30336b1e8d 100644 --- a/drivers/gpu/drm/amd/amdgpu/umc_v6_7.c +++ b/drivers/gpu/drm/amd/amdgpu/umc_v6_7.c @@ -28,6 +28,21 @@ #include "umc/umc_6_7_0_offset.h" #include "umc/umc_6_7_0_sh_mask.h" +const uint32_t + umc_v6_7_channel_idx_tbl_second[UMC_V6_7_UMC_INSTANCE_NUM][UMC_V6_7_CHANNEL_INSTANCE_NUM] = { + {28, 20, 24, 16, 12, 4, 8, 0}, + {6, 30, 2, 26, 22, 14, 18, 10}, + {19, 11, 15, 7, 3, 27, 31, 23}, + {9, 1, 5, 29, 25, 17, 21, 13} +}; +const uint32_t + umc_v6_7_channel_idx_tbl_first[UMC_V6_7_UMC_INSTANCE_NUM][UMC_V6_7_CHANNEL_INSTANCE_NUM] = { + {19, 11, 15, 7, 3, 27, 31, 23}, + {9, 1, 5, 29, 25, 17, 21, 13}, + {28, 20, 24, 16, 12, 4, 8, 0}, + {6, 30, 2, 26, 22, 14, 18, 10}, +}; + static inline uint32_t get_umc_v6_7_reg_offset(struct amdgpu_device *adev, uint32_t umc_inst, uint32_t ch_inst) diff --git a/drivers/gpu/drm/amd/amdgpu/umc_v6_7.h b/drivers/gpu/drm/amd/amdgpu/umc_v6_7.h index 4eb85f247e96..57f2557e7aca 100644 --- a/drivers/gpu/drm/amd/amdgpu/umc_v6_7.h +++ b/drivers/gpu/drm/amd/amdgpu/umc_v6_7.h @@ -23,6 +23,9 @@ #ifndef __UMC_V6_7_H__ #define __UMC_V6_7_H__ +#include "soc15_common.h" +#include "amdgpu.h" + /* EccErrCnt max value */ #define UMC_V6_7_CE_CNT_MAX 0xffff /* umc ce interrupt threshold */ @@ -32,6 +35,18 @@ #define UMC_V6_7_INST_DIST 0x40000 +/* number of umc channel instance with memory map register access */ +#define UMC_V6_7_UMC_INSTANCE_NUM 4 +/* number of umc instance with memory map register access */ +#define UMC_V6_7_CHANNEL_INSTANCE_NUM 8 +/* total channel instances in one umc block */ +#define UMC_V6_7_TOTAL_CHANNEL_NUM (UMC_V6_7_CHANNEL_INSTANCE_NUM * UMC_V6_7_UMC_INSTANCE_NUM) +/* UMC regiser per channel offset */ +#define UMC_V6_7_PER_CHANNEL_OFFSET 0x400 extern const struct amdgpu_umc_ras_funcs umc_v6_7_ras_funcs; +extern const uint32_t + umc_v6_7_channel_idx_tbl_second[UMC_V6_7_UMC_INSTANCE_NUM][UMC_V6_7_CHANNEL_INSTANCE_NUM]; +extern const uint32_t + umc_v6_7_channel_idx_tbl_first[UMC_V6_7_UMC_INSTANCE_NUM][UMC_V6_7_CHANNEL_INSTANCE_NUM]; #endif diff --git a/drivers/gpu/drm/amd/amdgpu/umc_v8_7.c b/drivers/gpu/drm/amd/amdgpu/umc_v8_7.c index 89d20adfa001..af59a35788e3 100644 --- a/drivers/gpu/drm/amd/amdgpu/umc_v8_7.c +++ b/drivers/gpu/drm/amd/amdgpu/umc_v8_7.c @@ -234,7 +234,7 @@ static void umc_v8_7_query_error_address(struct amdgpu_device *adev, err_addr &= ~((0x1ULL << lsb) - 1); /* translate umc channel address to soc pa, 3 parts are included */ - retired_page = ADDR_OF_8KB_BLOCK(err_addr) | + retired_page = ADDR_OF_4KB_BLOCK(err_addr) | ADDR_OF_256B_BLOCK(channel_index) | OFFSET_IN_256B_BLOCK(err_addr); diff --git a/drivers/gpu/drm/amd/amdgpu/uvd_v3_1.c b/drivers/gpu/drm/amd/amdgpu/uvd_v3_1.c index 284447d7a579..7232241e3bfb 100644 --- a/drivers/gpu/drm/amd/amdgpu/uvd_v3_1.c +++ b/drivers/gpu/drm/amd/amdgpu/uvd_v3_1.c @@ -340,7 +340,7 @@ static int uvd_v3_1_start(struct amdgpu_device *adev) /* enable VCPU clock */ WREG32(mmUVD_VCPU_CNTL, 1 << 9); - /* disable interupt */ + /* disable interrupt */ WREG32_P(mmUVD_MASTINT_EN, 0, ~(1 << 1)); #ifdef __BIG_ENDIAN @@ -405,7 +405,7 @@ static int uvd_v3_1_start(struct amdgpu_device *adev) return r; } - /* enable interupt */ + /* enable interrupt */ WREG32_P(mmUVD_MASTINT_EN, 3<<1, ~(3 << 1)); WREG32_P(mmUVD_STATUS, 0, ~(1<<2)); @@ -698,6 +698,30 @@ static int uvd_v3_1_hw_fini(void *handle) { struct amdgpu_device *adev = (struct amdgpu_device *)handle; + /* + * Proper cleanups before halting the HW engine: + * - cancel the delayed idle work + * - enable powergating + * - enable clockgating + * - disable dpm + * + * TODO: to align with the VCN implementation, move the + * jobs for clockgating/powergating/dpm setting to + * ->set_powergating_state(). + */ + cancel_delayed_work_sync(&adev->uvd.idle_work); + + if (adev->pm.dpm_enabled) { + amdgpu_dpm_enable_uvd(adev, false); + } else { + amdgpu_asic_set_uvd_clocks(adev, 0, 0); + /* shutdown the UVD block */ + amdgpu_device_ip_set_powergating_state(adev, AMD_IP_BLOCK_TYPE_UVD, + AMD_PG_STATE_GATE); + amdgpu_device_ip_set_clockgating_state(adev, AMD_IP_BLOCK_TYPE_UVD, + AMD_CG_STATE_GATE); + } + if (RREG32(mmUVD_STATUS) != 0) uvd_v3_1_stop(adev); diff --git a/drivers/gpu/drm/amd/amdgpu/uvd_v4_2.c b/drivers/gpu/drm/amd/amdgpu/uvd_v4_2.c index a301518e4957..52d6de969f46 100644 --- a/drivers/gpu/drm/amd/amdgpu/uvd_v4_2.c +++ b/drivers/gpu/drm/amd/amdgpu/uvd_v4_2.c @@ -212,6 +212,30 @@ static int uvd_v4_2_hw_fini(void *handle) { struct amdgpu_device *adev = (struct amdgpu_device *)handle; + /* + * Proper cleanups before halting the HW engine: + * - cancel the delayed idle work + * - enable powergating + * - enable clockgating + * - disable dpm + * + * TODO: to align with the VCN implementation, move the + * jobs for clockgating/powergating/dpm setting to + * ->set_powergating_state(). + */ + cancel_delayed_work_sync(&adev->uvd.idle_work); + + if (adev->pm.dpm_enabled) { + amdgpu_dpm_enable_uvd(adev, false); + } else { + amdgpu_asic_set_uvd_clocks(adev, 0, 0); + /* shutdown the UVD block */ + amdgpu_device_ip_set_powergating_state(adev, AMD_IP_BLOCK_TYPE_UVD, + AMD_PG_STATE_GATE); + amdgpu_device_ip_set_clockgating_state(adev, AMD_IP_BLOCK_TYPE_UVD, + AMD_CG_STATE_GATE); + } + if (RREG32(mmUVD_STATUS) != 0) uvd_v4_2_stop(adev); diff --git a/drivers/gpu/drm/amd/amdgpu/uvd_v5_0.c b/drivers/gpu/drm/amd/amdgpu/uvd_v5_0.c index a4d5bd21c83c..db6d06758e4d 100644 --- a/drivers/gpu/drm/amd/amdgpu/uvd_v5_0.c +++ b/drivers/gpu/drm/amd/amdgpu/uvd_v5_0.c @@ -210,6 +210,30 @@ static int uvd_v5_0_hw_fini(void *handle) { struct amdgpu_device *adev = (struct amdgpu_device *)handle; + /* + * Proper cleanups before halting the HW engine: + * - cancel the delayed idle work + * - enable powergating + * - enable clockgating + * - disable dpm + * + * TODO: to align with the VCN implementation, move the + * jobs for clockgating/powergating/dpm setting to + * ->set_powergating_state(). + */ + cancel_delayed_work_sync(&adev->uvd.idle_work); + + if (adev->pm.dpm_enabled) { + amdgpu_dpm_enable_uvd(adev, false); + } else { + amdgpu_asic_set_uvd_clocks(adev, 0, 0); + /* shutdown the UVD block */ + amdgpu_device_ip_set_powergating_state(adev, AMD_IP_BLOCK_TYPE_UVD, + AMD_PG_STATE_GATE); + amdgpu_device_ip_set_clockgating_state(adev, AMD_IP_BLOCK_TYPE_UVD, + AMD_CG_STATE_GATE); + } + if (RREG32(mmUVD_STATUS) != 0) uvd_v5_0_stop(adev); @@ -224,7 +248,6 @@ static int uvd_v5_0_suspend(void *handle) r = uvd_v5_0_hw_fini(adev); if (r) return r; - uvd_v5_0_set_clockgating_state(adev, AMD_CG_STATE_GATE); return amdgpu_uvd_suspend(adev); } diff --git a/drivers/gpu/drm/amd/amdgpu/uvd_v6_0.c b/drivers/gpu/drm/amd/amdgpu/uvd_v6_0.c index cf3803f8f075..bc571833632e 100644 --- a/drivers/gpu/drm/amd/amdgpu/uvd_v6_0.c +++ b/drivers/gpu/drm/amd/amdgpu/uvd_v6_0.c @@ -543,6 +543,30 @@ static int uvd_v6_0_hw_fini(void *handle) { struct amdgpu_device *adev = (struct amdgpu_device *)handle; + /* + * Proper cleanups before halting the HW engine: + * - cancel the delayed idle work + * - enable powergating + * - enable clockgating + * - disable dpm + * + * TODO: to align with the VCN implementation, move the + * jobs for clockgating/powergating/dpm setting to + * ->set_powergating_state(). + */ + cancel_delayed_work_sync(&adev->uvd.idle_work); + + if (adev->pm.dpm_enabled) { + amdgpu_dpm_enable_uvd(adev, false); + } else { + amdgpu_asic_set_uvd_clocks(adev, 0, 0); + /* shutdown the UVD block */ + amdgpu_device_ip_set_powergating_state(adev, AMD_IP_BLOCK_TYPE_UVD, + AMD_PG_STATE_GATE); + amdgpu_device_ip_set_clockgating_state(adev, AMD_IP_BLOCK_TYPE_UVD, + AMD_CG_STATE_GATE); + } + if (RREG32(mmUVD_STATUS) != 0) uvd_v6_0_stop(adev); diff --git a/drivers/gpu/drm/amd/amdgpu/uvd_v7_0.c b/drivers/gpu/drm/amd/amdgpu/uvd_v7_0.c index 0cd98fcb1f9f..b6e82d75561f 100644 --- a/drivers/gpu/drm/amd/amdgpu/uvd_v7_0.c +++ b/drivers/gpu/drm/amd/amdgpu/uvd_v7_0.c @@ -363,6 +363,7 @@ static int uvd_v7_0_enc_ring_test_ib(struct amdgpu_ring *ring, long timeout) error: dma_fence_put(fence); + amdgpu_bo_unpin(bo); amdgpu_bo_unreserve(bo); amdgpu_bo_unref(&bo); return r; @@ -605,6 +606,30 @@ static int uvd_v7_0_hw_fini(void *handle) { struct amdgpu_device *adev = (struct amdgpu_device *)handle; + /* + * Proper cleanups before halting the HW engine: + * - cancel the delayed idle work + * - enable powergating + * - enable clockgating + * - disable dpm + * + * TODO: to align with the VCN implementation, move the + * jobs for clockgating/powergating/dpm setting to + * ->set_powergating_state(). + */ + cancel_delayed_work_sync(&adev->uvd.idle_work); + + if (adev->pm.dpm_enabled) { + amdgpu_dpm_enable_uvd(adev, false); + } else { + amdgpu_asic_set_uvd_clocks(adev, 0, 0); + /* shutdown the UVD block */ + amdgpu_device_ip_set_powergating_state(adev, AMD_IP_BLOCK_TYPE_UVD, + AMD_PG_STATE_GATE); + amdgpu_device_ip_set_clockgating_state(adev, AMD_IP_BLOCK_TYPE_UVD, + AMD_CG_STATE_GATE); + } + if (!amdgpu_sriov_vf(adev)) uvd_v7_0_stop(adev); else { diff --git a/drivers/gpu/drm/amd/amdgpu/vce_v2_0.c b/drivers/gpu/drm/amd/amdgpu/vce_v2_0.c index c7d28c169be5..b70c17f0c52e 100644 --- a/drivers/gpu/drm/amd/amdgpu/vce_v2_0.c +++ b/drivers/gpu/drm/amd/amdgpu/vce_v2_0.c @@ -477,6 +477,31 @@ static int vce_v2_0_hw_init(void *handle) static int vce_v2_0_hw_fini(void *handle) { + struct amdgpu_device *adev = (struct amdgpu_device *)handle; + + /* + * Proper cleanups before halting the HW engine: + * - cancel the delayed idle work + * - enable powergating + * - enable clockgating + * - disable dpm + * + * TODO: to align with the VCN implementation, move the + * jobs for clockgating/powergating/dpm setting to + * ->set_powergating_state(). + */ + cancel_delayed_work_sync(&adev->vce.idle_work); + + if (adev->pm.dpm_enabled) { + amdgpu_dpm_enable_vce(adev, false); + } else { + amdgpu_asic_set_vce_clocks(adev, 0, 0); + amdgpu_device_ip_set_powergating_state(adev, AMD_IP_BLOCK_TYPE_VCE, + AMD_PG_STATE_GATE); + amdgpu_device_ip_set_clockgating_state(adev, AMD_IP_BLOCK_TYPE_VCE, + AMD_CG_STATE_GATE); + } + return 0; } diff --git a/drivers/gpu/drm/amd/amdgpu/vce_v3_0.c b/drivers/gpu/drm/amd/amdgpu/vce_v3_0.c index 3b82fb289ef6..9de66893ccd6 100644 --- a/drivers/gpu/drm/amd/amdgpu/vce_v3_0.c +++ b/drivers/gpu/drm/amd/amdgpu/vce_v3_0.c @@ -490,6 +490,29 @@ static int vce_v3_0_hw_fini(void *handle) int r; struct amdgpu_device *adev = (struct amdgpu_device *)handle; + /* + * Proper cleanups before halting the HW engine: + * - cancel the delayed idle work + * - enable powergating + * - enable clockgating + * - disable dpm + * + * TODO: to align with the VCN implementation, move the + * jobs for clockgating/powergating/dpm setting to + * ->set_powergating_state(). + */ + cancel_delayed_work_sync(&adev->vce.idle_work); + + if (adev->pm.dpm_enabled) { + amdgpu_dpm_enable_vce(adev, false); + } else { + amdgpu_asic_set_vce_clocks(adev, 0, 0); + amdgpu_device_ip_set_powergating_state(adev, AMD_IP_BLOCK_TYPE_VCE, + AMD_PG_STATE_GATE); + amdgpu_device_ip_set_clockgating_state(adev, AMD_IP_BLOCK_TYPE_VCE, + AMD_CG_STATE_GATE); + } + r = vce_v3_0_wait_for_idle(handle); if (r) return r; diff --git a/drivers/gpu/drm/amd/amdgpu/vce_v4_0.c b/drivers/gpu/drm/amd/amdgpu/vce_v4_0.c index 8e238dea7bef..fec902b800c2 100644 --- a/drivers/gpu/drm/amd/amdgpu/vce_v4_0.c +++ b/drivers/gpu/drm/amd/amdgpu/vce_v4_0.c @@ -25,6 +25,7 @@ */ #include <linux/firmware.h> +#include <drm/drm_drv.h> #include "amdgpu.h" #include "amdgpu_vce.h" @@ -541,6 +542,29 @@ static int vce_v4_0_hw_fini(void *handle) { struct amdgpu_device *adev = (struct amdgpu_device *)handle; + /* + * Proper cleanups before halting the HW engine: + * - cancel the delayed idle work + * - enable powergating + * - enable clockgating + * - disable dpm + * + * TODO: to align with the VCN implementation, move the + * jobs for clockgating/powergating/dpm setting to + * ->set_powergating_state(). + */ + cancel_delayed_work_sync(&adev->vce.idle_work); + + if (adev->pm.dpm_enabled) { + amdgpu_dpm_enable_vce(adev, false); + } else { + amdgpu_asic_set_vce_clocks(adev, 0, 0); + amdgpu_device_ip_set_powergating_state(adev, AMD_IP_BLOCK_TYPE_VCE, + AMD_PG_STATE_GATE); + amdgpu_device_ip_set_clockgating_state(adev, AMD_IP_BLOCK_TYPE_VCE, + AMD_CG_STATE_GATE); + } + if (!amdgpu_sriov_vf(adev)) { /* vce_v4_0_wait_for_idle(handle); */ vce_v4_0_stop(adev); @@ -555,16 +579,19 @@ static int vce_v4_0_hw_fini(void *handle) static int vce_v4_0_suspend(void *handle) { struct amdgpu_device *adev = (struct amdgpu_device *)handle; - int r; + int r, idx; if (adev->vce.vcpu_bo == NULL) return 0; - if (adev->firmware.load_type == AMDGPU_FW_LOAD_PSP) { - unsigned size = amdgpu_bo_size(adev->vce.vcpu_bo); - void *ptr = adev->vce.cpu_addr; + if (drm_dev_enter(&adev->ddev, &idx)) { + if (adev->firmware.load_type == AMDGPU_FW_LOAD_PSP) { + unsigned size = amdgpu_bo_size(adev->vce.vcpu_bo); + void *ptr = adev->vce.cpu_addr; - memcpy_fromio(adev->vce.saved_bo, ptr, size); + memcpy_fromio(adev->vce.saved_bo, ptr, size); + } + drm_dev_exit(idx); } r = vce_v4_0_hw_fini(adev); @@ -577,16 +604,20 @@ static int vce_v4_0_suspend(void *handle) static int vce_v4_0_resume(void *handle) { struct amdgpu_device *adev = (struct amdgpu_device *)handle; - int r; + int r, idx; if (adev->vce.vcpu_bo == NULL) return -EINVAL; if (adev->firmware.load_type == AMDGPU_FW_LOAD_PSP) { - unsigned size = amdgpu_bo_size(adev->vce.vcpu_bo); - void *ptr = adev->vce.cpu_addr; - memcpy_toio(ptr, adev->vce.saved_bo, size); + if (drm_dev_enter(&adev->ddev, &idx)) { + unsigned size = amdgpu_bo_size(adev->vce.vcpu_bo); + void *ptr = adev->vce.cpu_addr; + + memcpy_toio(ptr, adev->vce.saved_bo, size); + drm_dev_exit(idx); + } } else { r = amdgpu_vce_resume(adev); if (r) diff --git a/drivers/gpu/drm/amd/amdgpu/vcn_v1_0.c b/drivers/gpu/drm/amd/amdgpu/vcn_v1_0.c index 27b1ced145d2..121ee9f2b8d1 100644 --- a/drivers/gpu/drm/amd/amdgpu/vcn_v1_0.c +++ b/drivers/gpu/drm/amd/amdgpu/vcn_v1_0.c @@ -119,7 +119,7 @@ static int vcn_v1_0_sw_init(void *handle) adev->firmware.ucode[AMDGPU_UCODE_ID_VCN].fw = adev->vcn.fw; adev->firmware.fw_size += ALIGN(le32_to_cpu(hdr->ucode_size_bytes), PAGE_SIZE); - DRM_INFO("PSP loading VCN firmware\n"); + dev_info(adev->dev, "Will use PSP to load VCN firmware\n"); } r = amdgpu_vcn_resume(adev); @@ -769,7 +769,7 @@ static void vcn_1_0_enable_static_power_gating(struct amdgpu_device *adev) } /** - * vcn_v1_0_start - start VCN block + * vcn_v1_0_start_spg_mode - start VCN block * * @adev: amdgpu_device pointer * @@ -1105,7 +1105,7 @@ static int vcn_v1_0_start(struct amdgpu_device *adev) } /** - * vcn_v1_0_stop - stop VCN block + * vcn_v1_0_stop_spg_mode - stop VCN block * * @adev: amdgpu_device pointer * diff --git a/drivers/gpu/drm/amd/amdgpu/vcn_v2_0.c b/drivers/gpu/drm/amd/amdgpu/vcn_v2_0.c index 8af567c546db..f4686e918e0d 100644 --- a/drivers/gpu/drm/amd/amdgpu/vcn_v2_0.c +++ b/drivers/gpu/drm/amd/amdgpu/vcn_v2_0.c @@ -122,7 +122,7 @@ static int vcn_v2_0_sw_init(void *handle) adev->firmware.ucode[AMDGPU_UCODE_ID_VCN].fw = adev->vcn.fw; adev->firmware.fw_size += ALIGN(le32_to_cpu(hdr->ucode_size_bytes), PAGE_SIZE); - DRM_INFO("PSP loading VCN firmware\n"); + dev_info(adev->dev, "Will use PSP to load VCN firmware\n"); } r = amdgpu_vcn_resume(adev); diff --git a/drivers/gpu/drm/amd/amdgpu/vcn_v2_5.c b/drivers/gpu/drm/amd/amdgpu/vcn_v2_5.c index 888b17d84691..e0c0c3734432 100644 --- a/drivers/gpu/drm/amd/amdgpu/vcn_v2_5.c +++ b/drivers/gpu/drm/amd/amdgpu/vcn_v2_5.c @@ -152,7 +152,7 @@ static int vcn_v2_5_sw_init(void *handle) adev->firmware.fw_size += ALIGN(le32_to_cpu(hdr->ucode_size_bytes), PAGE_SIZE); } - DRM_INFO("PSP loading VCN firmware\n"); + dev_info(adev->dev, "Will use PSP to load VCN firmware\n"); } r = amdgpu_vcn_resume(adev); diff --git a/drivers/gpu/drm/amd/amdgpu/vcn_v3_0.c b/drivers/gpu/drm/amd/amdgpu/vcn_v3_0.c index 3b23de996db2..3d18aab88b4e 100644 --- a/drivers/gpu/drm/amd/amdgpu/vcn_v3_0.c +++ b/drivers/gpu/drm/amd/amdgpu/vcn_v3_0.c @@ -34,6 +34,8 @@ #include "vcn/vcn_3_0_0_sh_mask.h" #include "ivsrcid/vcn/irqsrcs_vcn_2_0.h" +#include <drm/drm_drv.h> + #define mmUVD_CONTEXT_ID_INTERNAL_OFFSET 0x27 #define mmUVD_GPCOM_VCPU_CMD_INTERNAL_OFFSET 0x0f #define mmUVD_GPCOM_VCPU_DATA0_INTERNAL_OFFSET 0x10 @@ -85,6 +87,7 @@ static void vcn_v3_0_enc_ring_set_wptr(struct amdgpu_ring *ring); static int vcn_v3_0_early_init(void *handle) { struct amdgpu_device *adev = (struct amdgpu_device *)handle; + int i; if (amdgpu_sriov_vf(adev)) { adev->vcn.num_vcn_inst = VCN_INSTANCES_SIENNA_CICHLID; @@ -94,7 +97,6 @@ static int vcn_v3_0_early_init(void *handle) } else { if (adev->asic_type == CHIP_SIENNA_CICHLID) { u32 harvest; - int i; adev->vcn.num_vcn_inst = VCN_INSTANCES_SIENNA_CICHLID; for (i = 0; i < adev->vcn.num_vcn_inst; i++) { @@ -110,7 +112,10 @@ static int vcn_v3_0_early_init(void *handle) } else adev->vcn.num_vcn_inst = 1; - adev->vcn.num_enc_rings = 2; + if (adev->asic_type == CHIP_BEIGE_GOBY) + adev->vcn.num_enc_rings = 0; + else + adev->vcn.num_enc_rings = 2; } vcn_v3_0_set_dec_ring_funcs(adev); @@ -152,7 +157,7 @@ static int vcn_v3_0_sw_init(void *handle) adev->firmware.fw_size += ALIGN(le32_to_cpu(hdr->ucode_size_bytes), PAGE_SIZE); } - DRM_INFO("PSP loading VCN firmware\n"); + dev_info(adev->dev, "Will use PSP to load VCN firmware\n"); } r = amdgpu_vcn_resume(adev); @@ -268,16 +273,20 @@ static int vcn_v3_0_sw_init(void *handle) static int vcn_v3_0_sw_fini(void *handle) { struct amdgpu_device *adev = (struct amdgpu_device *)handle; - int i, r; + int i, r, idx; - for (i = 0; i < adev->vcn.num_vcn_inst; i++) { - volatile struct amdgpu_fw_shared *fw_shared; + if (drm_dev_enter(&adev->ddev, &idx)) { + for (i = 0; i < adev->vcn.num_vcn_inst; i++) { + volatile struct amdgpu_fw_shared *fw_shared; - if (adev->vcn.harvest_config & (1 << i)) - continue; - fw_shared = adev->vcn.inst[i].fw_shared_cpu_addr; - fw_shared->present_flag_0 = 0; - fw_shared->sw_ring.is_enabled = false; + if (adev->vcn.harvest_config & (1 << i)) + continue; + fw_shared = adev->vcn.inst[i].fw_shared_cpu_addr; + fw_shared->present_flag_0 = 0; + fw_shared->sw_ring.is_enabled = false; + } + + drm_dev_exit(idx); } if (amdgpu_sriov_vf(adev)) @@ -316,18 +325,26 @@ static int vcn_v3_0_hw_init(void *handle) continue; ring = &adev->vcn.inst[i].ring_dec; - if (ring->sched.ready) { + if (amdgpu_vcn_is_disabled_vcn(adev, VCN_DECODE_RING, i)) { + ring->sched.ready = false; + dev_info(adev->dev, "ring %s is disabled by hypervisor\n", ring->name); + } else { ring->wptr = 0; ring->wptr_old = 0; vcn_v3_0_dec_ring_set_wptr(ring); + ring->sched.ready = true; } for (j = 0; j < adev->vcn.num_enc_rings; ++j) { ring = &adev->vcn.inst[i].ring_enc[j]; - if (ring->sched.ready) { + if (amdgpu_vcn_is_disabled_vcn(adev, VCN_ENCODE_RING, i)) { + ring->sched.ready = false; + dev_info(adev->dev, "ring %s is disabled by hypervisor\n", ring->name); + } else { ring->wptr = 0; ring->wptr_old = 0; vcn_v3_0_enc_ring_set_wptr(ring); + ring->sched.ready = true; } } } @@ -1254,23 +1271,25 @@ static int vcn_v3_0_start(struct amdgpu_device *adev) fw_shared->rb.wptr = lower_32_bits(ring->wptr); fw_shared->multi_queue.decode_queue_mode &= cpu_to_le32(~FW_QUEUE_RING_RESET); - fw_shared->multi_queue.encode_generalpurpose_queue_mode |= cpu_to_le32(FW_QUEUE_RING_RESET); - ring = &adev->vcn.inst[i].ring_enc[0]; - WREG32_SOC15(VCN, i, mmUVD_RB_RPTR, lower_32_bits(ring->wptr)); - WREG32_SOC15(VCN, i, mmUVD_RB_WPTR, lower_32_bits(ring->wptr)); - WREG32_SOC15(VCN, i, mmUVD_RB_BASE_LO, ring->gpu_addr); - WREG32_SOC15(VCN, i, mmUVD_RB_BASE_HI, upper_32_bits(ring->gpu_addr)); - WREG32_SOC15(VCN, i, mmUVD_RB_SIZE, ring->ring_size / 4); - fw_shared->multi_queue.encode_generalpurpose_queue_mode &= cpu_to_le32(~FW_QUEUE_RING_RESET); - - fw_shared->multi_queue.encode_lowlatency_queue_mode |= cpu_to_le32(FW_QUEUE_RING_RESET); - ring = &adev->vcn.inst[i].ring_enc[1]; - WREG32_SOC15(VCN, i, mmUVD_RB_RPTR2, lower_32_bits(ring->wptr)); - WREG32_SOC15(VCN, i, mmUVD_RB_WPTR2, lower_32_bits(ring->wptr)); - WREG32_SOC15(VCN, i, mmUVD_RB_BASE_LO2, ring->gpu_addr); - WREG32_SOC15(VCN, i, mmUVD_RB_BASE_HI2, upper_32_bits(ring->gpu_addr)); - WREG32_SOC15(VCN, i, mmUVD_RB_SIZE2, ring->ring_size / 4); - fw_shared->multi_queue.encode_lowlatency_queue_mode &= cpu_to_le32(~FW_QUEUE_RING_RESET); + if (adev->asic_type != CHIP_BEIGE_GOBY) { + fw_shared->multi_queue.encode_generalpurpose_queue_mode |= cpu_to_le32(FW_QUEUE_RING_RESET); + ring = &adev->vcn.inst[i].ring_enc[0]; + WREG32_SOC15(VCN, i, mmUVD_RB_RPTR, lower_32_bits(ring->wptr)); + WREG32_SOC15(VCN, i, mmUVD_RB_WPTR, lower_32_bits(ring->wptr)); + WREG32_SOC15(VCN, i, mmUVD_RB_BASE_LO, ring->gpu_addr); + WREG32_SOC15(VCN, i, mmUVD_RB_BASE_HI, upper_32_bits(ring->gpu_addr)); + WREG32_SOC15(VCN, i, mmUVD_RB_SIZE, ring->ring_size / 4); + fw_shared->multi_queue.encode_generalpurpose_queue_mode &= cpu_to_le32(~FW_QUEUE_RING_RESET); + + fw_shared->multi_queue.encode_lowlatency_queue_mode |= cpu_to_le32(FW_QUEUE_RING_RESET); + ring = &adev->vcn.inst[i].ring_enc[1]; + WREG32_SOC15(VCN, i, mmUVD_RB_RPTR2, lower_32_bits(ring->wptr)); + WREG32_SOC15(VCN, i, mmUVD_RB_WPTR2, lower_32_bits(ring->wptr)); + WREG32_SOC15(VCN, i, mmUVD_RB_BASE_LO2, ring->gpu_addr); + WREG32_SOC15(VCN, i, mmUVD_RB_BASE_HI2, upper_32_bits(ring->gpu_addr)); + WREG32_SOC15(VCN, i, mmUVD_RB_SIZE2, ring->ring_size / 4); + fw_shared->multi_queue.encode_lowlatency_queue_mode &= cpu_to_le32(~FW_QUEUE_RING_RESET); + } } return 0; @@ -1293,8 +1312,6 @@ static int vcn_v3_0_start_sriov(struct amdgpu_device *adev) uint32_t table_size; uint32_t size, size_dw; - bool is_vcn_ready; - struct mmsch_v3_0_cmd_direct_write direct_wt = { {0} }; struct mmsch_v3_0_cmd_direct_read_modify_write @@ -1486,30 +1503,6 @@ static int vcn_v3_0_start_sriov(struct amdgpu_device *adev) } } - /* 6, check each VCN's init_status - * if it remains as 0, then this VCN is not assigned to current VF - * do not start ring for this VCN - */ - size = sizeof(struct mmsch_v3_0_init_header); - table_loc = (uint32_t *)table->cpu_addr; - memcpy(&header, (void *)table_loc, size); - - for (i = 0; i < adev->vcn.num_vcn_inst; i++) { - if (adev->vcn.harvest_config & (1 << i)) - continue; - - is_vcn_ready = (header.inst[i].init_status == 1); - if (!is_vcn_ready) - DRM_INFO("VCN(%d) engine is disabled by hypervisor\n", i); - - ring = &adev->vcn.inst[i].ring_dec; - ring->sched.ready = is_vcn_ready; - for (j = 0; j < adev->vcn.num_enc_rings; ++j) { - ring = &adev->vcn.inst[i].ring_enc[j]; - ring->sched.ready = is_vcn_ready; - } - } - return 0; } @@ -1650,31 +1643,33 @@ static int vcn_v3_0_pause_dpg_mode(struct amdgpu_device *adev, UVD_POWER_STATUS__STALL_DPG_POWER_UP_MASK, ~UVD_POWER_STATUS__STALL_DPG_POWER_UP_MASK); - /* Restore */ - fw_shared = adev->vcn.inst[inst_idx].fw_shared_cpu_addr; - fw_shared->multi_queue.encode_generalpurpose_queue_mode |= cpu_to_le32(FW_QUEUE_RING_RESET); - ring = &adev->vcn.inst[inst_idx].ring_enc[0]; - ring->wptr = 0; - WREG32_SOC15(VCN, inst_idx, mmUVD_RB_BASE_LO, ring->gpu_addr); - WREG32_SOC15(VCN, inst_idx, mmUVD_RB_BASE_HI, upper_32_bits(ring->gpu_addr)); - WREG32_SOC15(VCN, inst_idx, mmUVD_RB_SIZE, ring->ring_size / 4); - WREG32_SOC15(VCN, inst_idx, mmUVD_RB_RPTR, lower_32_bits(ring->wptr)); - WREG32_SOC15(VCN, inst_idx, mmUVD_RB_WPTR, lower_32_bits(ring->wptr)); - fw_shared->multi_queue.encode_generalpurpose_queue_mode &= cpu_to_le32(~FW_QUEUE_RING_RESET); - - fw_shared->multi_queue.encode_lowlatency_queue_mode |= cpu_to_le32(FW_QUEUE_RING_RESET); - ring = &adev->vcn.inst[inst_idx].ring_enc[1]; - ring->wptr = 0; - WREG32_SOC15(VCN, inst_idx, mmUVD_RB_BASE_LO2, ring->gpu_addr); - WREG32_SOC15(VCN, inst_idx, mmUVD_RB_BASE_HI2, upper_32_bits(ring->gpu_addr)); - WREG32_SOC15(VCN, inst_idx, mmUVD_RB_SIZE2, ring->ring_size / 4); - WREG32_SOC15(VCN, inst_idx, mmUVD_RB_RPTR2, lower_32_bits(ring->wptr)); - WREG32_SOC15(VCN, inst_idx, mmUVD_RB_WPTR2, lower_32_bits(ring->wptr)); - fw_shared->multi_queue.encode_lowlatency_queue_mode &= cpu_to_le32(~FW_QUEUE_RING_RESET); - - /* restore wptr/rptr with pointers saved in FW shared memory*/ - WREG32_SOC15(VCN, inst_idx, mmUVD_RBC_RB_RPTR, fw_shared->rb.rptr); - WREG32_SOC15(VCN, inst_idx, mmUVD_RBC_RB_WPTR, fw_shared->rb.wptr); + if (adev->asic_type != CHIP_BEIGE_GOBY) { + /* Restore */ + fw_shared = adev->vcn.inst[inst_idx].fw_shared_cpu_addr; + fw_shared->multi_queue.encode_generalpurpose_queue_mode |= cpu_to_le32(FW_QUEUE_RING_RESET); + ring = &adev->vcn.inst[inst_idx].ring_enc[0]; + ring->wptr = 0; + WREG32_SOC15(VCN, inst_idx, mmUVD_RB_BASE_LO, ring->gpu_addr); + WREG32_SOC15(VCN, inst_idx, mmUVD_RB_BASE_HI, upper_32_bits(ring->gpu_addr)); + WREG32_SOC15(VCN, inst_idx, mmUVD_RB_SIZE, ring->ring_size / 4); + WREG32_SOC15(VCN, inst_idx, mmUVD_RB_RPTR, lower_32_bits(ring->wptr)); + WREG32_SOC15(VCN, inst_idx, mmUVD_RB_WPTR, lower_32_bits(ring->wptr)); + fw_shared->multi_queue.encode_generalpurpose_queue_mode &= cpu_to_le32(~FW_QUEUE_RING_RESET); + + fw_shared->multi_queue.encode_lowlatency_queue_mode |= cpu_to_le32(FW_QUEUE_RING_RESET); + ring = &adev->vcn.inst[inst_idx].ring_enc[1]; + ring->wptr = 0; + WREG32_SOC15(VCN, inst_idx, mmUVD_RB_BASE_LO2, ring->gpu_addr); + WREG32_SOC15(VCN, inst_idx, mmUVD_RB_BASE_HI2, upper_32_bits(ring->gpu_addr)); + WREG32_SOC15(VCN, inst_idx, mmUVD_RB_SIZE2, ring->ring_size / 4); + WREG32_SOC15(VCN, inst_idx, mmUVD_RB_RPTR2, lower_32_bits(ring->wptr)); + WREG32_SOC15(VCN, inst_idx, mmUVD_RB_WPTR2, lower_32_bits(ring->wptr)); + fw_shared->multi_queue.encode_lowlatency_queue_mode &= cpu_to_le32(~FW_QUEUE_RING_RESET); + + /* restore wptr/rptr with pointers saved in FW shared memory*/ + WREG32_SOC15(VCN, inst_idx, mmUVD_RBC_RB_RPTR, fw_shared->rb.rptr); + WREG32_SOC15(VCN, inst_idx, mmUVD_RBC_RB_WPTR, fw_shared->rb.wptr); + } /* Unstall DPG */ WREG32_P(SOC15_REG_OFFSET(VCN, inst_idx, mmUVD_POWER_STATUS), @@ -2131,7 +2126,8 @@ static void vcn_v3_0_set_enc_ring_funcs(struct amdgpu_device *adev) adev->vcn.inst[i].ring_enc[j].funcs = &vcn_v3_0_enc_ring_vm_funcs; adev->vcn.inst[i].ring_enc[j].me = i; } - DRM_INFO("VCN(%d) encode is enabled in VM mode\n", i); + if (adev->vcn.num_enc_rings > 0) + DRM_INFO("VCN(%d) encode is enabled in VM mode\n", i); } } diff --git a/drivers/gpu/drm/amd/amdgpu/vega10_ih.c b/drivers/gpu/drm/amd/amdgpu/vega10_ih.c index ca8efa5c6978..a9ca6988009e 100644 --- a/drivers/gpu/drm/amd/amdgpu/vega10_ih.c +++ b/drivers/gpu/drm/amd/amdgpu/vega10_ih.c @@ -104,6 +104,7 @@ static int vega10_ih_toggle_ring_interrupts(struct amdgpu_device *adev, tmp = RREG32(ih_regs->ih_rb_cntl); tmp = REG_SET_FIELD(tmp, IH_RB_CNTL, RB_ENABLE, (enable ? 1 : 0)); + tmp = REG_SET_FIELD(tmp, IH_RB_CNTL, RB_GPU_TS_ENABLE, 1); /* enable_intr field is only valid in ring0 */ if (ih == &adev->irq.ih) tmp = REG_SET_FIELD(tmp, IH_RB_CNTL, ENABLE_INTR, (enable ? 1 : 0)); @@ -513,11 +514,7 @@ static int vega10_ih_sw_fini(void *handle) { struct amdgpu_device *adev = (struct amdgpu_device *)handle; - amdgpu_irq_fini(adev); - amdgpu_ih_ring_fini(adev, &adev->irq.ih_soft); - amdgpu_ih_ring_fini(adev, &adev->irq.ih2); - amdgpu_ih_ring_fini(adev, &adev->irq.ih1); - amdgpu_ih_ring_fini(adev, &adev->irq.ih); + amdgpu_irq_fini_sw(adev); return 0; } diff --git a/drivers/gpu/drm/amd/amdgpu/vega20_ih.c b/drivers/gpu/drm/amd/amdgpu/vega20_ih.c index 8a122b413bf5..f51dfc38ac65 100644 --- a/drivers/gpu/drm/amd/amdgpu/vega20_ih.c +++ b/drivers/gpu/drm/amd/amdgpu/vega20_ih.c @@ -565,11 +565,7 @@ static int vega20_ih_sw_fini(void *handle) { struct amdgpu_device *adev = (struct amdgpu_device *)handle; - amdgpu_irq_fini(adev); - amdgpu_ih_ring_fini(adev, &adev->irq.ih_soft); - amdgpu_ih_ring_fini(adev, &adev->irq.ih2); - amdgpu_ih_ring_fini(adev, &adev->irq.ih1); - amdgpu_ih_ring_fini(adev, &adev->irq.ih); + amdgpu_irq_fini_sw(adev); return 0; } diff --git a/drivers/gpu/drm/amd/amdgpu/vi.c b/drivers/gpu/drm/amd/amdgpu/vi.c index 735ebbd1148f..fe9a7cc8d9eb 100644 --- a/drivers/gpu/drm/amd/amdgpu/vi.c +++ b/drivers/gpu/drm/amd/amdgpu/vi.c @@ -77,7 +77,7 @@ #if defined(CONFIG_DRM_AMD_ACP) #include "amdgpu_acp.h" #endif -#include "dce_virtual.h" +#include "amdgpu_vkms.h" #include "mxgpu_vi.h" #include "amdgpu_dm.h" @@ -1136,7 +1136,7 @@ static void vi_program_aspm(struct amdgpu_device *adev) bool bL1SS = false; bool bClkReqSupport = true; - if (amdgpu_aspm != 1) + if (!amdgpu_aspm) return; if (adev->flags & AMD_IS_APU || @@ -2102,7 +2102,7 @@ int vi_set_ip_blocks(struct amdgpu_device *adev) amdgpu_device_ip_block_add(adev, &sdma_v2_4_ip_block); amdgpu_device_ip_block_add(adev, &pp_smu_ip_block); if (adev->enable_virtual_display) - amdgpu_device_ip_block_add(adev, &dce_virtual_ip_block); + amdgpu_device_ip_block_add(adev, &amdgpu_vkms_ip_block); break; case CHIP_FIJI: amdgpu_device_ip_block_add(adev, &vi_common_ip_block); @@ -2112,7 +2112,7 @@ int vi_set_ip_blocks(struct amdgpu_device *adev) amdgpu_device_ip_block_add(adev, &sdma_v3_0_ip_block); amdgpu_device_ip_block_add(adev, &pp_smu_ip_block); if (adev->enable_virtual_display || amdgpu_sriov_vf(adev)) - amdgpu_device_ip_block_add(adev, &dce_virtual_ip_block); + amdgpu_device_ip_block_add(adev, &amdgpu_vkms_ip_block); #if defined(CONFIG_DRM_AMD_DC) else if (amdgpu_device_has_dc_support(adev)) amdgpu_device_ip_block_add(adev, &dm_ip_block); @@ -2132,7 +2132,7 @@ int vi_set_ip_blocks(struct amdgpu_device *adev) amdgpu_device_ip_block_add(adev, &sdma_v3_0_ip_block); amdgpu_device_ip_block_add(adev, &pp_smu_ip_block); if (adev->enable_virtual_display || amdgpu_sriov_vf(adev)) - amdgpu_device_ip_block_add(adev, &dce_virtual_ip_block); + amdgpu_device_ip_block_add(adev, &amdgpu_vkms_ip_block); #if defined(CONFIG_DRM_AMD_DC) else if (amdgpu_device_has_dc_support(adev)) amdgpu_device_ip_block_add(adev, &dm_ip_block); @@ -2155,7 +2155,7 @@ int vi_set_ip_blocks(struct amdgpu_device *adev) amdgpu_device_ip_block_add(adev, &sdma_v3_1_ip_block); amdgpu_device_ip_block_add(adev, &pp_smu_ip_block); if (adev->enable_virtual_display) - amdgpu_device_ip_block_add(adev, &dce_virtual_ip_block); + amdgpu_device_ip_block_add(adev, &amdgpu_vkms_ip_block); #if defined(CONFIG_DRM_AMD_DC) else if (amdgpu_device_has_dc_support(adev)) amdgpu_device_ip_block_add(adev, &dm_ip_block); @@ -2173,7 +2173,7 @@ int vi_set_ip_blocks(struct amdgpu_device *adev) amdgpu_device_ip_block_add(adev, &sdma_v3_0_ip_block); amdgpu_device_ip_block_add(adev, &pp_smu_ip_block); if (adev->enable_virtual_display) - amdgpu_device_ip_block_add(adev, &dce_virtual_ip_block); + amdgpu_device_ip_block_add(adev, &amdgpu_vkms_ip_block); #if defined(CONFIG_DRM_AMD_DC) else if (amdgpu_device_has_dc_support(adev)) amdgpu_device_ip_block_add(adev, &dm_ip_block); @@ -2194,7 +2194,7 @@ int vi_set_ip_blocks(struct amdgpu_device *adev) amdgpu_device_ip_block_add(adev, &sdma_v3_0_ip_block); amdgpu_device_ip_block_add(adev, &pp_smu_ip_block); if (adev->enable_virtual_display) - amdgpu_device_ip_block_add(adev, &dce_virtual_ip_block); + amdgpu_device_ip_block_add(adev, &amdgpu_vkms_ip_block); #if defined(CONFIG_DRM_AMD_DC) else if (amdgpu_device_has_dc_support(adev)) amdgpu_device_ip_block_add(adev, &dm_ip_block); diff --git a/drivers/gpu/drm/amd/amdgpu/yellow_carp_reg_init.c b/drivers/gpu/drm/amd/amdgpu/yellow_carp_reg_init.c new file mode 100644 index 000000000000..3d89421275ed --- /dev/null +++ b/drivers/gpu/drm/amd/amdgpu/yellow_carp_reg_init.c @@ -0,0 +1,51 @@ +/* + * Copyright 2019 Advanced Micro Devices, Inc. + * + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and associated documentation files (the "Software"), + * to deal in the Software without restriction, including without limitation + * the rights to use, copy, modify, merge, publish, distribute, sublicense, + * and/or sell copies of the Software, and to permit persons to whom the + * Software is furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included in + * all copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL + * THE COPYRIGHT HOLDER(S) OR AUTHOR(S) BE LIABLE FOR ANY CLAIM, DAMAGES OR + * OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, + * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR + * OTHER DEALINGS IN THE SOFTWARE. + * + */ +#include "amdgpu.h" +#include "nv.h" + +#include "soc15_common.h" +#include "soc15_hw_ip.h" +#include "yellow_carp_offset.h" + +int yellow_carp_reg_base_init(struct amdgpu_device *adev) +{ + /* HW has more IP blocks, only initialized the block needed by driver */ + uint32_t i; + for (i = 0 ; i < MAX_INSTANCE ; ++i) { + adev->reg_offset[GC_HWIP][i] = (uint32_t *)(&(GC_BASE.instance[i])); + adev->reg_offset[HDP_HWIP][i] = (uint32_t *)(&(HDP_BASE.instance[i])); + adev->reg_offset[MMHUB_HWIP][i] = (uint32_t *)(&(MMHUB_BASE.instance[i])); + adev->reg_offset[ATHUB_HWIP][i] = (uint32_t *)(&(ATHUB_BASE.instance[i])); + adev->reg_offset[NBIO_HWIP][i] = (uint32_t *)(&(NBIO_BASE.instance[i])); + adev->reg_offset[MP0_HWIP][i] = (uint32_t *)(&(MP0_BASE.instance[i])); + adev->reg_offset[MP1_HWIP][i] = (uint32_t *)(&(MP1_BASE.instance[i])); + adev->reg_offset[VCN_HWIP][i] = (uint32_t *)(&(VCN_BASE.instance[i])); + adev->reg_offset[DF_HWIP][i] = (uint32_t *)(&(DF_BASE.instance[i])); + adev->reg_offset[DCE_HWIP][i] = (uint32_t *)(&(DCN_BASE.instance[i])); + adev->reg_offset[OSSSYS_HWIP][i] = (uint32_t *)(&(OSSSYS_BASE.instance[i])); + adev->reg_offset[SDMA0_HWIP][i] = (uint32_t *)(&(SDMA0_BASE.instance[i])); + adev->reg_offset[SMUIO_HWIP][i] = (uint32_t *)(&(SMUIO_BASE.instance[i])); + adev->reg_offset[THM_HWIP][i] = (uint32_t *)(&(THM_BASE.instance[i])); + } + return 0; +} |