1 files changed, 26 insertions, 10 deletions
diff --git a/drivers/gpu/drm/amd/amdgpu/gfx_v9_4.c b/drivers/gpu/drm/amd/amdgpu/gfx_v9_4.c
index bc699d680ce8..b4789dfc2bb9 100644
--- a/drivers/gpu/drm/amd/amdgpu/gfx_v9_4.c
+++ b/drivers/gpu/drm/amd/amdgpu/gfx_v9_4.c
@@ -863,8 +863,8 @@ static int gfx_v9_4_ras_error_count(struct amdgpu_device *adev,
 	return 0;
 }
 
-int gfx_v9_4_query_ras_error_count(struct amdgpu_device *adev,
-				   void *ras_error_status)
+static int gfx_v9_4_query_ras_error_count(struct amdgpu_device *adev,
+					  void *ras_error_status)
 {
 	struct ras_err_data *err_data = (struct ras_err_data *)ras_error_status;
 	uint32_t sec_count = 0, ded_count = 0;
@@ -906,7 +906,7 @@ int gfx_v9_4_query_ras_error_count(struct amdgpu_device *adev,
 	return 0;
 }
 
-void gfx_v9_4_reset_ras_error_count(struct amdgpu_device *adev)
+static void gfx_v9_4_reset_ras_error_count(struct amdgpu_device *adev)
 {
 	int i, j, k;
 
@@ -971,7 +971,8 @@ void gfx_v9_4_reset_ras_error_count(struct amdgpu_device *adev)
 	WREG32_SOC15(GC, 0, mmATC_L2_CACHE_4K_DSM_INDEX, 255);
 }
 
-int gfx_v9_4_ras_error_inject(struct amdgpu_device *adev, void *inject_if)
+static int gfx_v9_4_ras_error_inject(struct amdgpu_device *adev,
+				     void *inject_if)
 {
 	struct ras_inject_if *info = (struct ras_inject_if *)inject_if;
 	int ret;
@@ -993,10 +994,10 @@ int gfx_v9_4_ras_error_inject(struct amdgpu_device *adev, void *inject_if)
 	return ret;
 }
 
-static const struct soc15_reg_entry gfx_v9_4_rdrsp_status_regs =
+static const struct soc15_reg_entry gfx_v9_4_ea_err_status_regs =
 	{ SOC15_REG_ENTRY(GC, 0, mmGCEA_ERR_STATUS), 0, 1, 32 };
 
-void gfx_v9_4_query_ras_error_status(struct amdgpu_device *adev)
+static void gfx_v9_4_query_ras_error_status(struct amdgpu_device *adev)
 {
 	uint32_t i, j;
 	uint32_t reg_value;
@@ -1006,18 +1007,33 @@ void gfx_v9_4_query_ras_error_status(struct amdgpu_device *adev)
 
 	mutex_lock(&adev->grbm_idx_mutex);
 
-	for (i = 0; i < gfx_v9_4_rdrsp_status_regs.se_num; i++) {
-		for (j = 0; j < gfx_v9_4_rdrsp_status_regs.instance;
+	for (i = 0; i < gfx_v9_4_ea_err_status_regs.se_num; i++) {
+		for (j = 0; j < gfx_v9_4_ea_err_status_regs.instance;
 		     j++) {
 			gfx_v9_4_select_se_sh(adev, i, 0, j);
 			reg_value = RREG32(SOC15_REG_ENTRY_OFFSET(
-				gfx_v9_4_rdrsp_status_regs));
-			if (reg_value)
+				gfx_v9_4_ea_err_status_regs));
+			if (REG_GET_FIELD(reg_value, GCEA_ERR_STATUS, SDP_RDRSP_STATUS) ||
+			    REG_GET_FIELD(reg_value, GCEA_ERR_STATUS, SDP_WRRSP_STATUS) ||
+			    REG_GET_FIELD(reg_value, GCEA_ERR_STATUS, SDP_RDRSP_DATAPARITY_ERROR)) {
+				/* SDP read/write error/parity error in FUE_IS_FATAL mode
+				 * can cause system fatal error in arcturas. Harvest the error
+				 * status before GPU reset */
 				dev_warn(adev->dev, "GCEA err detected at instance: %d, status: 0x%x!\n",
 						j, reg_value);
+			}
 		}
 	}
 
 	gfx_v9_4_select_se_sh(adev, 0xffffffff, 0xffffffff, 0xffffffff);
 	mutex_unlock(&adev->grbm_idx_mutex);
 }
+
+const struct amdgpu_gfx_ras_funcs gfx_v9_4_ras_funcs = {
+        .ras_late_init = amdgpu_gfx_ras_late_init,
+        .ras_fini = amdgpu_gfx_ras_fini,
+        .ras_error_inject = &gfx_v9_4_ras_error_inject,
+        .query_ras_error_count = &gfx_v9_4_query_ras_error_count,
+        .reset_ras_error_count = &gfx_v9_4_reset_ras_error_count,
+        .query_ras_error_status = &gfx_v9_4_query_ras_error_status,
+};