summaryrefslogtreecommitdiff
path: root/drivers/gpu/drm/amd/amdgpu/amdgpu_virt.h
diff options
context:
space:
mode:
authorVictor Skvortsov <victor.skvortsov@amd.com>2024-10-30 10:18:00 -0400
committerAlex Deucher <alexander.deucher@amd.com>2024-11-11 11:55:42 -0500
commit84a2947ecc85c67f433f2cc2186e54cdb9047b61 (patch)
tree4671794bdd9310925a70b1a967cdf0047ec9d4fa /drivers/gpu/drm/amd/amdgpu/amdgpu_virt.h
parent907fec2dfd061ca422d8b121f4af1b6062e098ba (diff)
drm/amdgpu: Implement virt req_ras_err_count
Enable RAS late init if VF RAS Telemetry is supported. When enabled, the VF can use this interface to query total RAS error counts from the host. The VF FB access may abruptly end due to a fatal error, therefore the VF must cache and sanitize the input. The Host allows 15 Telemetry messages every 60 seconds, afterwhich the host will ignore any more in-coming telemetry messages. The VF will rate limit its msg calling to once every 5 seconds (12 times in 60 seconds). While the VF is rate limited, it will continue to report the last good cached data. v2: Flip generate report & update statistics order for VF Signed-off-by: Victor Skvortsov <victor.skvortsov@amd.com> Acked-by: Tao Zhou <tao.zhou1@amd.com> Reviewed-by: Zhigang Luo <zhigang.luo@amd.com> Signed-off-by: Alex Deucher <alexander.deucher@amd.com>
Diffstat (limited to 'drivers/gpu/drm/amd/amdgpu/amdgpu_virt.h')
-rw-r--r--drivers/gpu/drm/amd/amdgpu/amdgpu_virt.h15
1 files changed, 15 insertions, 0 deletions
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_virt.h b/drivers/gpu/drm/amd/amdgpu/amdgpu_virt.h
index f0ff84add692..5381b8d596e6 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_virt.h
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_virt.h
@@ -104,6 +104,7 @@ struct amdgpu_virt_ops {
struct amdgpu_virt_fw_reserve {
struct amd_sriov_msg_pf2vf_info_header *p_pf2vf;
struct amd_sriov_msg_vf2pf_info_header *p_vf2pf;
+ void *ras_telemetry;
unsigned int checksum_key;
};
@@ -138,6 +139,7 @@ enum AMDGIM_FEATURE_FLAG {
/* MES info */
AMDGIM_FEATURE_MES_INFO_ENABLE = (1 << 8),
AMDGIM_FEATURE_RAS_CAPS = (1 << 9),
+ AMDGIM_FEATURE_RAS_TELEMETRY = (1 << 10),
};
enum AMDGIM_REG_ACCESS_FLAG {
@@ -280,6 +282,10 @@ struct amdgpu_virt {
struct mutex rlcg_reg_lock;
union amd_sriov_ras_caps ras_en_caps;
+ union amd_sriov_ras_caps ras_telemetry_en_caps;
+
+ struct ratelimit_state ras_telemetry_rs;
+ struct amd_sriov_ras_telemetry_error_count count_cache;
};
struct amdgpu_video_codec_info;
@@ -327,6 +333,12 @@ struct amdgpu_video_codec_info;
#define amdgpu_sriov_ras_caps_en(adev) \
((adev)->virt.gim_feature & AMDGIM_FEATURE_RAS_CAPS)
+#define amdgpu_sriov_ras_telemetry_en(adev) \
+(((adev)->virt.gim_feature & AMDGIM_FEATURE_RAS_TELEMETRY) && (adev)->virt.fw_reserve.ras_telemetry)
+
+#define amdgpu_sriov_ras_telemetry_block_en(adev, sriov_blk) \
+(amdgpu_sriov_ras_telemetry_en((adev)) && (adev)->virt.ras_telemetry_en_caps.all & BIT(sriov_blk))
+
static inline bool is_virtual_machine(void)
{
#if defined(CONFIG_X86)
@@ -391,4 +403,7 @@ bool amdgpu_virt_get_rlcg_reg_access_flag(struct amdgpu_device *adev,
bool write, u32 *rlcg_flag);
u32 amdgpu_virt_rlcg_reg_rw(struct amdgpu_device *adev, u32 offset, u32 v, u32 flag, u32 xcc_id);
bool amdgpu_virt_get_ras_capability(struct amdgpu_device *adev);
+int amdgpu_virt_req_ras_err_count(struct amdgpu_device *adev, enum amdgpu_ras_block block,
+ struct ras_err_data *err_data);
+int amdgpu_virt_ras_telemetry_post_reset(struct amdgpu_device *adev);
#endif