diff options
Diffstat (limited to 'drivers/gpu/drm/amd/amdgpu/amdgpu_ras.c')
| -rw-r--r-- | drivers/gpu/drm/amd/amdgpu/amdgpu_ras.c | 32 | 
1 files changed, 25 insertions, 7 deletions
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_ras.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_ras.c index 2de9309a4193..dac202ae864d 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_ras.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_ras.c @@ -197,6 +197,13 @@ static ssize_t amdgpu_ras_debugfs_read(struct file *f, char __user *buf,  	if (amdgpu_ras_query_error_status(obj->adev, &info))  		return -EINVAL; +	/* Hardware counter will be reset automatically after the query on Vega20 and Arcturus */ +	if (obj->adev->ip_versions[MP0_HWIP][0] != IP_VERSION(11, 0, 2) && +	    obj->adev->ip_versions[MP0_HWIP][0] != IP_VERSION(11, 0, 4)) { +		if (amdgpu_ras_reset_error_status(obj->adev, info.head.block)) +			dev_warn(obj->adev->dev, "Failed to reset error counter and error status"); +	} +  	s = snprintf(val, sizeof(val), "%s: %lu\n%s: %lu\n",  			"ue", info.ue_count,  			"ce", info.ce_count); @@ -550,9 +557,10 @@ static ssize_t amdgpu_ras_sysfs_read(struct device *dev,  	if (amdgpu_ras_query_error_status(obj->adev, &info))  		return -EINVAL; -	if (obj->adev->asic_type == CHIP_ALDEBARAN) { +	if (obj->adev->ip_versions[MP0_HWIP][0] != IP_VERSION(11, 0, 2) && +	    obj->adev->ip_versions[MP0_HWIP][0] != IP_VERSION(11, 0, 4)) {  		if (amdgpu_ras_reset_error_status(obj->adev, info.head.block)) -			DRM_WARN("Failed to reset error counter and error status"); +			dev_warn(obj->adev->dev, "Failed to reset error counter and error status");  	}  	return sysfs_emit(buf, "%s: %lu\n%s: %lu\n", "ue", info.ue_count, @@ -1027,9 +1035,6 @@ int amdgpu_ras_query_error_status(struct amdgpu_device *adev,  		}  	} -	if (!amdgpu_persistent_edc_harvesting_supported(adev)) -		amdgpu_ras_reset_error_status(adev, info->head.block); -  	return 0;  } @@ -1149,6 +1154,12 @@ int amdgpu_ras_query_error_count(struct amdgpu_device *adev,  		if (res)  			return res; +		if (adev->ip_versions[MP0_HWIP][0] != IP_VERSION(11, 0, 2) && +		    adev->ip_versions[MP0_HWIP][0] != IP_VERSION(11, 0, 4)) { +			if (amdgpu_ras_reset_error_status(adev, info.head.block)) +				dev_warn(adev->dev, "Failed to reset error counter and error status"); +		} +  		ce += info.ce_count;  		ue += info.ue_count;  	} @@ -1792,6 +1803,12 @@ static void amdgpu_ras_log_on_err_counter(struct amdgpu_device *adev)  			continue;  		amdgpu_ras_query_error_status(adev, &info); + +		if (adev->ip_versions[MP0_HWIP][0] != IP_VERSION(11, 0, 2) && +		    adev->ip_versions[MP0_HWIP][0] != IP_VERSION(11, 0, 4)) { +			if (amdgpu_ras_reset_error_status(adev, info.head.block)) +				dev_warn(adev->dev, "Failed to reset error counter and error status"); +		}  	}  } @@ -2278,8 +2295,9 @@ static void amdgpu_ras_check_supported(struct amdgpu_device *adev)  	    !amdgpu_ras_asic_supported(adev))  		return; -	if (!(amdgpu_sriov_vf(adev) && -		(adev->ip_versions[MP1_HWIP][0] == IP_VERSION(13, 0, 2)))) +	/* If driver run on sriov guest side, only enable ras for aldebaran */ +	if (amdgpu_sriov_vf(adev) && +		adev->ip_versions[MP1_HWIP][0] != IP_VERSION(13, 0, 2))  		return;  	if (!adev->gmc.xgmi.connected_to_cpu) {  | 
