diff options
author | Lijo Lazar <lijo.lazar@amd.com> | 2024-02-22 14:16:57 +0530 |
---|---|---|
committer | Alex Deucher <alexander.deucher@amd.com> | 2024-02-26 11:14:24 -0500 |
commit | 1b6ef74b2b03b54776778476f8adf87dd4f8beb1 (patch) | |
tree | 94795ef19a86666ce2be38638096f85f27aecb2c /drivers/gpu/drm/amd/amdgpu/amdgpu_ras.c | |
parent | 34b811a281bab42d09592fdaa6885f4f41352bd3 (diff) |
drm/amdgpu: Add fatal error detected flag
For a RAS error that needs a full reset to recover, set the fatal error
status. Clear the status once the device is reset.
Signed-off-by: Lijo Lazar <lijo.lazar@amd.com>
Reviewed-by: Asad Kamal <asad.kamal@amd.com>
Signed-off-by: Alex Deucher <alexander.deucher@amd.com>
Diffstat (limited to 'drivers/gpu/drm/amd/amdgpu/amdgpu_ras.c')
-rw-r--r-- | drivers/gpu/drm/amd/amdgpu/amdgpu_ras.c | 32 |
1 files changed, 32 insertions, 0 deletions
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_ras.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_ras.c index 46f3d1013e8c..2c94de305c69 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_ras.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_ras.c @@ -2439,6 +2439,18 @@ static void amdgpu_ras_do_recovery(struct work_struct *work) ras->gpu_reset_flags &= ~AMDGPU_RAS_GPU_RESET_MODE1_RESET; set_bit(AMDGPU_NEED_FULL_RESET, &reset_context.flags); + /* For any RAS error that needs a full reset to + * recover, set the fatal error status + */ + if (hive) { + list_for_each_entry(remote_adev, + &hive->device_list, + gmc.xgmi.head) + amdgpu_ras_set_fed(remote_adev, + true); + } else { + amdgpu_ras_set_fed(adev, true); + } psp_fatal_error_recovery_quirk(&adev->psp); } } @@ -3440,6 +3452,26 @@ int amdgpu_ras_fini(struct amdgpu_device *adev) return 0; } +bool amdgpu_ras_get_fed_status(struct amdgpu_device *adev) +{ + struct amdgpu_ras *ras; + + ras = amdgpu_ras_get_context(adev); + if (!ras) + return false; + + return atomic_read(&ras->fed); +} + +void amdgpu_ras_set_fed(struct amdgpu_device *adev, bool status) +{ + struct amdgpu_ras *ras; + + ras = amdgpu_ras_get_context(adev); + if (ras) + atomic_set(&ras->fed, !!status); +} + void amdgpu_ras_global_ras_isr(struct amdgpu_device *adev) { if (atomic_cmpxchg(&amdgpu_ras_in_intr, 0, 1) == 0) { |