summaryrefslogtreecommitdiff
path: root/drivers/gpu/drm/amd/amdgpu/amdgpu_device.c
diff options
context:
space:
mode:
authorWenhui Sheng <Wenhui.Sheng@amd.com>2020-07-13 15:14:30 +0800
committerAlex Deucher <alexander.deucher@amd.com>2020-07-15 12:41:47 -0400
commitbb5c7235eaafb4e2f957e9f0f71a187db5cf525a (patch)
treec2f200e6dbd22ed580a971c44125448d4eeaa1e3 /drivers/gpu/drm/amd/amdgpu/amdgpu_device.c
parentea8139d8d59bd6f014b317e7423345169a56fe49 (diff)
drm/amdgpu: RAS emergency restart logic refine
If we are in RAS triggered situation and BACO isn't support, emergency restart is needed, and this code is only needed for some specific cases(vega20 with given smu fw version). After we add smu mode1 reset for sienna cichlid, we need to share AMD_RESET_METHOD_MODE1 with psp mode1 reset, so in amdgpu_device_gpu_recover, we need differentiate which mode1 reset we are using, then decide if it's a full reset and then decide if emergency restart is needed, the logic will become much more complex. After discussion with Hawking, move emergency restart logic to an independent function. Signed-off-by: Likun Gao <Likun.Gao@amd.com> Signed-off-by: Wenhui Sheng <Wenhui.Sheng@amd.com> Reviewed-by: Hawking Zhang <Hawking.Zhang@amd.com> Signed-off-by: Alex Deucher <alexander.deucher@amd.com>
Diffstat (limited to 'drivers/gpu/drm/amd/amdgpu/amdgpu_device.c')
-rw-r--r--drivers/gpu/drm/amd/amdgpu/amdgpu_device.c23
1 files changed, 12 insertions, 11 deletions
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_device.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_device.c
index 81ca92127c00..3bf4ca2c5b25 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_device.c
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_device.c
@@ -4245,18 +4245,19 @@ int amdgpu_device_gpu_recover(struct amdgpu_device *adev,
struct amdgpu_hive_info *hive = NULL;
struct amdgpu_device *tmp_adev = NULL;
int i, r = 0;
- bool in_ras_intr = amdgpu_ras_intr_triggered();
- bool use_baco =
- (amdgpu_asic_reset_method(adev) == AMD_RESET_METHOD_BACO) ?
- true : false;
+ bool need_emergency_restart = false;
bool audio_suspended = false;
+ /**
+ * Special case: RAS triggered and full reset isn't supported
+ */
+ need_emergency_restart = amdgpu_ras_need_emergency_restart(adev);
+
/*
* Flush RAM to disk so that after reboot
* the user can read log and see why the system rebooted.
*/
- if (in_ras_intr && !use_baco && amdgpu_ras_get_context(adev)->reboot) {
-
+ if (need_emergency_restart && amdgpu_ras_get_context(adev)->reboot) {
DRM_WARN("Emergency reboot.");
ksys_sync_helper();
@@ -4264,7 +4265,7 @@ int amdgpu_device_gpu_recover(struct amdgpu_device *adev,
}
dev_info(adev->dev, "GPU %s begin!\n",
- (in_ras_intr && !use_baco) ? "jobs stop":"reset");
+ need_emergency_restart ? "jobs stop":"reset");
/*
* Here we trylock to avoid chain of resets executing from
@@ -4336,7 +4337,7 @@ int amdgpu_device_gpu_recover(struct amdgpu_device *adev,
amdgpu_fbdev_set_suspend(tmp_adev, 1);
/* disable ras on ALL IPs */
- if (!(in_ras_intr && !use_baco) &&
+ if (!need_emergency_restart &&
amdgpu_device_ip_need_full_reset(tmp_adev))
amdgpu_ras_suspend(tmp_adev);
@@ -4348,12 +4349,12 @@ int amdgpu_device_gpu_recover(struct amdgpu_device *adev,
drm_sched_stop(&ring->sched, job ? &job->base : NULL);
- if (in_ras_intr && !use_baco)
+ if (need_emergency_restart)
amdgpu_job_stop_all_jobs_on_sched(&ring->sched);
}
}
- if (in_ras_intr && !use_baco)
+ if (need_emergency_restart)
goto skip_sched_resume;
/*
@@ -4430,7 +4431,7 @@ skip_hw_reset:
skip_sched_resume:
list_for_each_entry(tmp_adev, device_list_handle, gmc.xgmi.head) {
/*unlock kfd: SRIOV would do it separately */
- if (!(in_ras_intr && !use_baco) && !amdgpu_sriov_vf(tmp_adev))
+ if (!need_emergency_restart && !amdgpu_sriov_vf(tmp_adev))
amdgpu_amdkfd_post_reset(tmp_adev);
if (audio_suspended)
amdgpu_device_resume_display_audio(tmp_adev);