diff options
Diffstat (limited to 'drivers/gpu/drm/amd/amdgpu/amdgpu_xgmi.c')
-rw-r--r-- | drivers/gpu/drm/amd/amdgpu/amdgpu_xgmi.c | 39 |
1 files changed, 28 insertions, 11 deletions
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_xgmi.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_xgmi.c index 20d51f6c9bb8..821ba2309dec 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_xgmi.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_xgmi.c @@ -434,6 +434,9 @@ static ssize_t amdgpu_xgmi_show_connected_port_num(struct device *dev, } } + if (i == top->num_nodes) + return -EINVAL; + for (i = 0; i < top->num_nodes; i++) { for (j = 0; j < top->nodes[i].num_links; j++) /* node id in sysfs starts from 1 rather than 0 so +1 here */ @@ -1035,15 +1038,16 @@ int amdgpu_xgmi_remove_device(struct amdgpu_device *adev) return 0; } -static int xgmi_v6_4_0_aca_bank_generate_report(struct aca_handle *handle, struct aca_bank *bank, enum aca_error_type type, - struct aca_bank_report *report, void *data) +static int xgmi_v6_4_0_aca_bank_parser(struct aca_handle *handle, struct aca_bank *bank, + enum aca_smu_type type, void *data) { struct amdgpu_device *adev = handle->adev; + struct aca_bank_info info; const char *error_str; - u64 status; + u64 status, count; int ret, ext_error_code; - ret = aca_bank_info_decode(bank, &report->info); + ret = aca_bank_info_decode(bank, &info); if (ret) return ret; @@ -1055,15 +1059,28 @@ static int xgmi_v6_4_0_aca_bank_generate_report(struct aca_handle *handle, struc if (error_str) dev_info(adev->dev, "%s detected\n", error_str); - if ((type == ACA_ERROR_TYPE_UE && ext_error_code == 0) || - (type == ACA_ERROR_TYPE_CE && ext_error_code == 6)) - report->count[type] = ACA_REG__MISC0__ERRCNT(bank->regs[ACA_REG_IDX_MISC0]); + count = ACA_REG__MISC0__ERRCNT(bank->regs[ACA_REG_IDX_MISC0]); - return 0; + switch (type) { + case ACA_SMU_TYPE_UE: + if (ext_error_code != 0 && ext_error_code != 9) + count = 0ULL; + + ret = aca_error_cache_log_bank_error(handle, &info, ACA_ERROR_TYPE_UE, count); + break; + case ACA_SMU_TYPE_CE: + count = ext_error_code == 6 ? count : 0ULL; + ret = aca_error_cache_log_bank_error(handle, &info, ACA_ERROR_TYPE_CE, count); + break; + default: + return -EINVAL; + } + + return ret; } static const struct aca_bank_ops xgmi_v6_4_0_aca_bank_ops = { - .aca_bank_generate_report = xgmi_v6_4_0_aca_bank_generate_report, + .aca_bank_parser = xgmi_v6_4_0_aca_bank_parser, }; static const struct aca_info xgmi_v6_4_0_aca_info = { @@ -1429,7 +1446,7 @@ static int amdgpu_ras_error_inject_xgmi(struct amdgpu_device *adev, if (amdgpu_dpm_set_df_cstate(adev, DF_CSTATE_DISALLOW)) dev_warn(adev->dev, "Failed to disallow df cstate"); - ret1 = amdgpu_dpm_set_xgmi_plpd_mode(adev, XGMI_PLPD_DISALLOW); + ret1 = amdgpu_dpm_set_pm_policy(adev, PP_PM_POLICY_XGMI_PLPD, XGMI_PLPD_DISALLOW); if (ret1 && ret1 != -EOPNOTSUPP) dev_warn(adev->dev, "Failed to disallow XGMI power down"); @@ -1438,7 +1455,7 @@ static int amdgpu_ras_error_inject_xgmi(struct amdgpu_device *adev, if (amdgpu_ras_intr_triggered()) return ret2; - ret1 = amdgpu_dpm_set_xgmi_plpd_mode(adev, XGMI_PLPD_DEFAULT); + ret1 = amdgpu_dpm_set_pm_policy(adev, PP_PM_POLICY_XGMI_PLPD, XGMI_PLPD_DEFAULT); if (ret1 && ret1 != -EOPNOTSUPP) dev_warn(adev->dev, "Failed to allow XGMI power down"); |