diff options
Diffstat (limited to 'drivers/gpu/drm/amd/amdgpu/amdgpu_ras.c')
| -rw-r--r-- | drivers/gpu/drm/amd/amdgpu/amdgpu_ras.c | 40 | 
1 files changed, 29 insertions, 11 deletions
| diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_ras.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_ras.c index 63dfcc98152d..3ab8a88789c8 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_ras.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_ras.c @@ -34,6 +34,7 @@  #include "amdgpu_atomfirmware.h"  #include "amdgpu_xgmi.h"  #include "ivsrcid/nbio/irqsrcs_nbif_7_4.h" +#include "nbio_v4_3.h"  #include "atom.h"  #include "amdgpu_reset.h" @@ -2428,6 +2429,13 @@ static void amdgpu_ras_check_supported(struct amdgpu_device *adev)  			else  				adev->ras_hw_enabled &= ~(1 << AMDGPU_RAS_BLOCK__VCN |  							1 << AMDGPU_RAS_BLOCK__JPEG); + +			/* +			 * XGMI RAS is not supported if xgmi num physical nodes +			 * is zero +			 */ +			if (!adev->gmc.xgmi.num_physical_nodes) +				adev->ras_hw_enabled &= ~(1 << AMDGPU_RAS_BLOCK__XGMI_WAFL);  		} else {  			dev_info(adev->dev, "SRAM ECC is not presented.\n");  		} @@ -2554,21 +2562,34 @@ int amdgpu_ras_init(struct amdgpu_device *adev)  	/* initialize nbio ras function ahead of any other  	 * ras functions so hardware fatal error interrupt  	 * can be enabled as early as possible */ -	switch (adev->asic_type) { -	case CHIP_VEGA20: -	case CHIP_ARCTURUS: -	case CHIP_ALDEBARAN: -		if (!adev->gmc.xgmi.connected_to_cpu) { +	switch (adev->ip_versions[NBIO_HWIP][0]) { +	case IP_VERSION(7, 4, 0): +	case IP_VERSION(7, 4, 1): +	case IP_VERSION(7, 4, 4): +		if (!adev->gmc.xgmi.connected_to_cpu)  			adev->nbio.ras = &nbio_v7_4_ras; -			amdgpu_ras_register_ras_block(adev, &adev->nbio.ras->ras_block); -			adev->nbio.ras_if = &adev->nbio.ras->ras_block.ras_comm; -		} +		break; +	case IP_VERSION(4, 3, 0): +		if (adev->ras_hw_enabled & (1 << AMDGPU_RAS_BLOCK__DF)) +			/* unlike other generation of nbio ras, +			 * nbio v4_3 only support fatal error interrupt +			 * to inform software that DF is freezed due to +			 * system fatal error event. driver should not +			 * enable nbio ras in such case. Instead, +			 * check DF RAS */ +			adev->nbio.ras = &nbio_v4_3_ras;  		break;  	default:  		/* nbio ras is not available */  		break;  	} +	/* nbio ras block needs to be enabled ahead of other ras blocks +	 * to handle fatal error */ +	r = amdgpu_nbio_ras_sw_init(adev); +	if (r) +		return r; +  	if (adev->nbio.ras &&  	    adev->nbio.ras->init_ras_controller_interrupt) {  		r = adev->nbio.ras->init_ras_controller_interrupt(adev); @@ -3073,9 +3094,6 @@ int amdgpu_ras_register_ras_block(struct amdgpu_device *adev,  	if (!adev || !ras_block_obj)  		return -EINVAL; -	if (!amdgpu_ras_asic_supported(adev)) -		return 0; -  	ras_node = kzalloc(sizeof(*ras_node), GFP_KERNEL);  	if (!ras_node)  		return -ENOMEM; | 
