From 9953b72f9c9cb7733334753788faab33ccc4dc0a Mon Sep 17 00:00:00 2001
From: pding <Pixel.Ding@amd.com>
Date: Thu, 26 Oct 2017 09:30:38 +0800
Subject: drm/amdgpu: change redundant init logs to debug level

When this VF stays in exclusive mode for long, other VFs will be
impacted.

The redundant messages causes exclusive mode timeout when they're
redirected. That is a normal use case for cloud service to redirect
guest log to virtual serial port.

Reviewed-by: Alex Deucher <alexander.deucher@amd.com>
Signed-off-by: pding <Pixel.Ding@amd.com>
Signed-off-by: Alex Deucher <alexander.deucher@amd.com>
---
 drivers/gpu/drm/amd/amdgpu/amdgpu_irq.c | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

(limited to 'drivers/gpu/drm/amd/amdgpu/amdgpu_irq.c')

diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_irq.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_irq.c
index 47c5ce9807db..32590e4f9f7a 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_irq.c
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_irq.c
@@ -232,7 +232,7 @@ int amdgpu_irq_init(struct amdgpu_device *adev)
 		int ret = pci_enable_msi(adev->pdev);
 		if (!ret) {
 			adev->irq.msi_enabled = true;
-			dev_info(adev->dev, "amdgpu: using MSI.\n");
+			dev_dbg(adev->dev, "amdgpu: using MSI.\n");
 		}
 	}
 
@@ -262,7 +262,7 @@ int amdgpu_irq_init(struct amdgpu_device *adev)
 		return r;
 	}
 
-	DRM_INFO("amdgpu: irq initialized.\n");
+	DRM_DEBUG("amdgpu: irq initialized.\n");
 	return 0;
 }
 
-- 
cgit v1.2.3-70-g09d2


From 5740682e66cef57626a328d237698cad329c0449 Mon Sep 17 00:00:00 2001
From: Monk Liu <Monk.Liu@amd.com>
Date: Wed, 25 Oct 2017 16:37:02 +0800
Subject: drm/amdgpu:implement new GPU recover(v3)
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

1,new imple names amdgpu_gpu_recover which gives more hint
on what it does compared with gpu_reset

2,gpu_recover unify bare-metal and SR-IOV, only the asic reset
part is implemented differently

3,gpu_recover will increase hang job karma and mark its entity/context
as guilty if exceeds limit

V2:

4,in scheduler main routine the job from guilty context  will be immedialy
fake signaled after it poped from queue and its fence be set with
"-ECANCELED" error

5,in scheduler recovery routine all jobs from the guilty entity would be
dropped

6,in run_job() routine the real IB submission would be skipped if @skip parameter
equales true or there was VRAM lost occured.

V3:

7,replace deprecated gpu reset, use new gpu recover

Signed-off-by: Monk Liu <Monk.Liu@amd.com>
Reviewed-by: Christian König <christian.koenig@amd.com>
Signed-off-by: Alex Deucher <alexander.deucher@amd.com>
---
 drivers/gpu/drm/amd/amdgpu/amdgpu.h        |   6 +-
 drivers/gpu/drm/amd/amdgpu/amdgpu_device.c | 322 ++++++++++++++---------------
 drivers/gpu/drm/amd/amdgpu/amdgpu_fence.c  |  10 +-
 drivers/gpu/drm/amd/amdgpu/amdgpu_irq.c    |   2 +-
 drivers/gpu/drm/amd/amdgpu/amdgpu_job.c    |   5 +-
 drivers/gpu/drm/amd/amdgpu/amdgpu_virt.h   |   1 -
 drivers/gpu/drm/amd/amdgpu/mxgpu_ai.c      |   2 +-
 drivers/gpu/drm/amd/amdgpu/mxgpu_vi.c      |   2 +-
 8 files changed, 166 insertions(+), 184 deletions(-)

(limited to 'drivers/gpu/drm/amd/amdgpu/amdgpu_irq.c')

diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu.h b/drivers/gpu/drm/amd/amdgpu/amdgpu.h
index 88fa19b1a802..5714b7e8cb09 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu.h
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu.h
@@ -178,6 +178,10 @@ extern int amdgpu_cik_support;
 #define CIK_CURSOR_WIDTH 128
 #define CIK_CURSOR_HEIGHT 128
 
+/* GPU RESET flags */
+#define AMDGPU_RESET_INFO_VRAM_LOST  (1 << 0)
+#define AMDGPU_RESET_INFO_FULLRESET  (1 << 1)
+
 struct amdgpu_device;
 struct amdgpu_ib;
 struct amdgpu_cs_parser;
@@ -1833,7 +1837,7 @@ amdgpu_get_sdma_instance(struct amdgpu_ring *ring)
 #define amdgpu_psp_check_fw_loading_status(adev, i) (adev)->firmware.funcs->check_fw_loading_status((adev), (i))
 
 /* Common functions */
-int amdgpu_gpu_reset(struct amdgpu_device *adev);
+int amdgpu_gpu_recover(struct amdgpu_device *adev, struct amdgpu_job* job);
 bool amdgpu_need_backup(struct amdgpu_device *adev);
 void amdgpu_pci_config_reset(struct amdgpu_device *adev);
 bool amdgpu_need_post(struct amdgpu_device *adev);
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_device.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_device.c
index e521850e9409..e287eeda2dab 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_device.c
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_device.c
@@ -2827,163 +2827,172 @@ err:
 	return r;
 }
 
-/**
- * amdgpu_sriov_gpu_reset - reset the asic
+/*
+ * amdgpu_reset - reset ASIC/GPU for bare-metal or passthrough
  *
  * @adev: amdgpu device pointer
- * @job: which job trigger hang
+ * @reset_flags: output param tells caller the reset result
  *
- * Attempt the reset the GPU if it has hung (all asics).
- * for SRIOV case.
- * Returns 0 for success or an error on failure.
- */
-int amdgpu_sriov_gpu_reset(struct amdgpu_device *adev, struct amdgpu_job *job)
+ * attempt to do soft-reset or full-reset and reinitialize Asic
+ * return 0 means successed otherwise failed
+*/
+static int amdgpu_reset(struct amdgpu_device *adev, uint64_t* reset_flags)
 {
-	int i, j, r = 0;
-	int resched;
-	struct amdgpu_bo *bo, *tmp;
-	struct amdgpu_ring *ring;
-	struct dma_fence *fence = NULL, *next = NULL;
+	bool need_full_reset, vram_lost = 0;
+	int r;
 
-	mutex_lock(&adev->virt.lock_reset);
-	atomic_inc(&adev->gpu_reset_counter);
-	adev->in_sriov_reset = true;
+	need_full_reset = amdgpu_need_full_reset(adev);
 
-	/* block TTM */
-	resched = ttm_bo_lock_delayed_workqueue(&adev->mman.bdev);
+	if (!need_full_reset) {
+		amdgpu_pre_soft_reset(adev);
+		r = amdgpu_soft_reset(adev);
+		amdgpu_post_soft_reset(adev);
+		if (r || amdgpu_check_soft_reset(adev)) {
+			DRM_INFO("soft reset failed, will fallback to full reset!\n");
+			need_full_reset = true;
+		}
 
-	/* we start from the ring trigger GPU hang */
-	j = job ? job->ring->idx : 0;
+	}
 
-	/* block scheduler */
-	for (i = j; i < j + AMDGPU_MAX_RINGS; ++i) {
-		ring = adev->rings[i % AMDGPU_MAX_RINGS];
-		if (!ring || !ring->sched.thread)
-			continue;
+	if (need_full_reset) {
+		r = amdgpu_suspend(adev);
 
-		kthread_park(ring->sched.thread);
+retry:
+		amdgpu_atombios_scratch_regs_save(adev);
+		r = amdgpu_asic_reset(adev);
+		amdgpu_atombios_scratch_regs_restore(adev);
+		/* post card */
+		amdgpu_atom_asic_init(adev->mode_info.atom_context);
 
-		if (job && j != i)
-			continue;
+		if (!r) {
+			dev_info(adev->dev, "GPU reset succeeded, trying to resume\n");
+			r = amdgpu_resume_phase1(adev);
+			if (r)
+				goto out;
 
-		/* here give the last chance to check if job removed from mirror-list
-		 * since we already pay some time on kthread_park */
-		if (job && list_empty(&job->base.node)) {
-			kthread_unpark(ring->sched.thread);
-			goto give_up_reset;
+			vram_lost = amdgpu_check_vram_lost(adev);
+			if (vram_lost) {
+				DRM_ERROR("VRAM is lost!\n");
+				atomic_inc(&adev->vram_lost_counter);
+			}
+
+			r = amdgpu_ttm_recover_gart(adev);
+			if (r)
+				goto out;
+
+			r = amdgpu_resume_phase2(adev);
+			if (r)
+				goto out;
+
+			if (vram_lost)
+				amdgpu_fill_reset_magic(adev);
 		}
+	}
 
-		if (amd_sched_invalidate_job(&job->base, amdgpu_job_hang_limit))
-			amd_sched_job_kickout(&job->base);
+out:
+	if (!r) {
+		amdgpu_irq_gpu_reset_resume_helper(adev);
+		r = amdgpu_ib_ring_tests(adev);
+		if (r) {
+			dev_err(adev->dev, "ib ring test failed (%d).\n", r);
+			r = amdgpu_suspend(adev);
+			need_full_reset = true;
+			goto retry;
+		}
+	}
 
-		/* only do job_reset on the hang ring if @job not NULL */
-		amd_sched_hw_job_reset(&ring->sched, NULL);
+	if (reset_flags) {
+		if (vram_lost)
+			(*reset_flags) |= AMDGPU_RESET_INFO_VRAM_LOST;
 
-		/* after all hw jobs are reset, hw fence is meaningless, so force_completion */
-		amdgpu_fence_driver_force_completion(ring);
+		if (need_full_reset)
+			(*reset_flags) |= AMDGPU_RESET_INFO_FULLRESET;
 	}
 
-	/* request to take full control of GPU before re-initialization  */
-	if (job)
-		amdgpu_virt_reset_gpu(adev);
-	else
-		amdgpu_virt_request_full_gpu(adev, true);
+	return r;
+}
+
+/*
+ * amdgpu_reset_sriov - reset ASIC for SR-IOV vf
+ *
+ * @adev: amdgpu device pointer
+ * @reset_flags: output param tells caller the reset result
+ *
+ * do VF FLR and reinitialize Asic
+ * return 0 means successed otherwise failed
+*/
+static int amdgpu_reset_sriov(struct amdgpu_device *adev, uint64_t *reset_flags, bool from_hypervisor)
+{
+	int r;
 
+	if (from_hypervisor)
+		r = amdgpu_virt_request_full_gpu(adev, true);
+	else
+		r = amdgpu_virt_reset_gpu(adev);
+	if (r)
+		return r;
 
 	/* Resume IP prior to SMC */
-	amdgpu_sriov_reinit_early(adev);
+	r = amdgpu_sriov_reinit_early(adev);
+	if (r)
+		goto error;
 
 	/* we need recover gart prior to run SMC/CP/SDMA resume */
 	amdgpu_ttm_recover_gart(adev);
 
 	/* now we are okay to resume SMC/CP/SDMA */
-	amdgpu_sriov_reinit_late(adev);
+	r = amdgpu_sriov_reinit_late(adev);
+	if (r)
+		goto error;
 
 	amdgpu_irq_gpu_reset_resume_helper(adev);
-
-	if (amdgpu_ib_ring_tests(adev))
+	r = amdgpu_ib_ring_tests(adev);
+	if (r)
 		dev_err(adev->dev, "[GPU_RESET] ib ring test failed (%d).\n", r);
 
+error:
 	/* release full control of GPU after ib test */
 	amdgpu_virt_release_full_gpu(adev, true);
 
-	DRM_INFO("recover vram bo from shadow\n");
-
-	ring = adev->mman.buffer_funcs_ring;
-	mutex_lock(&adev->shadow_list_lock);
-	list_for_each_entry_safe(bo, tmp, &adev->shadow_list, shadow_list) {
-		next = NULL;
-		amdgpu_recover_vram_from_shadow(adev, ring, bo, &next);
-		if (fence) {
-			r = dma_fence_wait(fence, false);
-			if (r) {
-				WARN(r, "recovery from shadow isn't completed\n");
-				break;
-			}
-		}
-
-		dma_fence_put(fence);
-		fence = next;
-	}
-	mutex_unlock(&adev->shadow_list_lock);
-
-	if (fence) {
-		r = dma_fence_wait(fence, false);
-		if (r)
-			WARN(r, "recovery from shadow isn't completed\n");
-	}
-	dma_fence_put(fence);
-
-	for (i = j; i < j + AMDGPU_MAX_RINGS; ++i) {
-		ring = adev->rings[i % AMDGPU_MAX_RINGS];
-		if (!ring || !ring->sched.thread)
-			continue;
-
-		if (job && j != i) {
-			kthread_unpark(ring->sched.thread);
-			continue;
-		}
-
-		amd_sched_job_recovery(&ring->sched);
-		kthread_unpark(ring->sched.thread);
-	}
+	if (reset_flags) {
+		/* will get vram_lost from GIM in future, now all
+		 * reset request considered VRAM LOST
+		 */
+		(*reset_flags) |= ~AMDGPU_RESET_INFO_VRAM_LOST;
+		atomic_inc(&adev->vram_lost_counter);
 
-	drm_helper_resume_force_mode(adev->ddev);
-give_up_reset:
-	ttm_bo_unlock_delayed_workqueue(&adev->mman.bdev, resched);
-	if (r) {
-		/* bad news, how to tell it to userspace ? */
-		dev_info(adev->dev, "GPU reset failed\n");
-	} else {
-		dev_info(adev->dev, "GPU reset successed!\n");
+		/* VF FLR or hotlink reset is always full-reset */
+		(*reset_flags) |= AMDGPU_RESET_INFO_FULLRESET;
 	}
 
-	adev->in_sriov_reset = false;
-	mutex_unlock(&adev->virt.lock_reset);
 	return r;
 }
 
 /**
- * amdgpu_gpu_reset - reset the asic
+ * amdgpu_gpu_recover - reset the asic and recover scheduler
  *
  * @adev: amdgpu device pointer
+ * @job: which job trigger hang
  *
- * Attempt the reset the GPU if it has hung (all asics).
+ * Attempt to reset the GPU if it has hung (all asics).
  * Returns 0 for success or an error on failure.
  */
-int amdgpu_gpu_reset(struct amdgpu_device *adev)
+int amdgpu_gpu_recover(struct amdgpu_device *adev, struct amdgpu_job *job)
 {
 	struct drm_atomic_state *state = NULL;
-	int i, r;
-	int resched;
-	bool need_full_reset, vram_lost = false;
+	uint64_t reset_flags = 0;
+	int i, r, resched;
 
 	if (!amdgpu_check_soft_reset(adev)) {
 		DRM_INFO("No hardware hang detected. Did some blocks stall?\n");
 		return 0;
 	}
 
+	dev_info(adev->dev, "GPU reset begin!\n");
+
+	mutex_lock(&adev->virt.lock_reset);
 	atomic_inc(&adev->gpu_reset_counter);
+	adev->in_sriov_reset = 1;
 
 	/* block TTM */
 	resched = ttm_bo_lock_delayed_workqueue(&adev->mman.bdev);
@@ -2997,69 +3006,26 @@ int amdgpu_gpu_reset(struct amdgpu_device *adev)
 
 		if (!ring || !ring->sched.thread)
 			continue;
+
+		/* only focus on the ring hit timeout if &job not NULL */
+		if (job && job->ring->idx != i)
+			continue;
+
 		kthread_park(ring->sched.thread);
-		amd_sched_hw_job_reset(&ring->sched, NULL);
+		amd_sched_hw_job_reset(&ring->sched, &job->base);
+
 		/* after all hw jobs are reset, hw fence is meaningless, so force_completion */
 		amdgpu_fence_driver_force_completion(ring);
 	}
 
-	need_full_reset = amdgpu_need_full_reset(adev);
-
-	if (!need_full_reset) {
-		amdgpu_pre_soft_reset(adev);
-		r = amdgpu_soft_reset(adev);
-		amdgpu_post_soft_reset(adev);
-		if (r || amdgpu_check_soft_reset(adev)) {
-			DRM_INFO("soft reset failed, will fallback to full reset!\n");
-			need_full_reset = true;
-		}
-	}
-
-	if (need_full_reset) {
-		r = amdgpu_suspend(adev);
-
-retry:
-		amdgpu_atombios_scratch_regs_save(adev);
-		r = amdgpu_asic_reset(adev);
-		amdgpu_atombios_scratch_regs_restore(adev);
-		/* post card */
-		amdgpu_atom_asic_init(adev->mode_info.atom_context);
+	if (amdgpu_sriov_vf(adev))
+		r = amdgpu_reset_sriov(adev, &reset_flags, job ? false : true);
+	else
+		r = amdgpu_reset(adev, &reset_flags);
 
-		if (!r) {
-			dev_info(adev->dev, "GPU reset succeeded, trying to resume\n");
-			r = amdgpu_resume_phase1(adev);
-			if (r)
-				goto out;
-			vram_lost = amdgpu_check_vram_lost(adev);
-			if (vram_lost) {
-				DRM_ERROR("VRAM is lost!\n");
-				atomic_inc(&adev->vram_lost_counter);
-			}
-			r = amdgpu_ttm_recover_gart(adev);
-			if (r)
-				goto out;
-			r = amdgpu_resume_phase2(adev);
-			if (r)
-				goto out;
-			if (vram_lost)
-				amdgpu_fill_reset_magic(adev);
-		}
-	}
-out:
 	if (!r) {
-		amdgpu_irq_gpu_reset_resume_helper(adev);
-		r = amdgpu_ib_ring_tests(adev);
-		if (r) {
-			dev_err(adev->dev, "ib ring test failed (%d).\n", r);
-			r = amdgpu_suspend(adev);
-			need_full_reset = true;
-			goto retry;
-		}
-		/**
-		 * recovery vm page tables, since we cannot depend on VRAM is
-		 * consistent after gpu full reset.
-		 */
-		if (need_full_reset && amdgpu_need_backup(adev)) {
+		if (((reset_flags & AMDGPU_RESET_INFO_FULLRESET) && !(adev->flags & AMD_IS_APU)) ||
+			(reset_flags & AMDGPU_RESET_INFO_VRAM_LOST)) {
 			struct amdgpu_ring *ring = adev->mman.buffer_funcs_ring;
 			struct amdgpu_bo *bo, *tmp;
 			struct dma_fence *fence = NULL, *next = NULL;
@@ -3088,40 +3054,56 @@ out:
 			}
 			dma_fence_put(fence);
 		}
+
 		for (i = 0; i < AMDGPU_MAX_RINGS; ++i) {
 			struct amdgpu_ring *ring = adev->rings[i];
 
 			if (!ring || !ring->sched.thread)
 				continue;
 
+			/* only focus on the ring hit timeout if &job not NULL */
+			if (job && job->ring->idx != i)
+				continue;
+
 			amd_sched_job_recovery(&ring->sched);
 			kthread_unpark(ring->sched.thread);
 		}
 	} else {
-		dev_err(adev->dev, "asic resume failed (%d).\n", r);
 		for (i = 0; i < AMDGPU_MAX_RINGS; ++i) {
-			if (adev->rings[i] && adev->rings[i]->sched.thread) {
-				kthread_unpark(adev->rings[i]->sched.thread);
-			}
+			struct amdgpu_ring *ring = adev->rings[i];
+
+			if (!ring || !ring->sched.thread)
+				continue;
+
+			/* only focus on the ring hit timeout if &job not NULL */
+			if (job && job->ring->idx != i)
+				continue;
+
+			kthread_unpark(adev->rings[i]->sched.thread);
 		}
 	}
 
 	if (amdgpu_device_has_dc_support(adev)) {
-		r = drm_atomic_helper_resume(adev->ddev, state);
+		if (drm_atomic_helper_resume(adev->ddev, state))
+			dev_info(adev->dev, "drm resume failed:%d\n", r);
 		amdgpu_dm_display_resume(adev);
-	} else
+	} else {
 		drm_helper_resume_force_mode(adev->ddev);
+	}
 
 	ttm_bo_unlock_delayed_workqueue(&adev->mman.bdev, resched);
+
 	if (r) {
 		/* bad news, how to tell it to userspace ? */
-		dev_info(adev->dev, "GPU reset failed\n");
-	}
-	else {
-		dev_info(adev->dev, "GPU reset successed!\n");
+		dev_info(adev->dev, "GPU reset(%d) failed\n", atomic_read(&adev->gpu_reset_counter));
+		amdgpu_vf_error_put(adev, AMDGIM_ERROR_VF_GPU_RESET_FAIL, 0, r);
+	} else {
+		dev_info(adev->dev, "GPU reset(%d) successed!\n",atomic_read(&adev->gpu_reset_counter));
 	}
 
 	amdgpu_vf_error_trans_all(adev);
+	adev->in_sriov_reset = 0;
+	mutex_unlock(&adev->virt.lock_reset);
 	return r;
 }
 
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_fence.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_fence.c
index eda89dfdef5b..604ac03a42e4 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_fence.c
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_fence.c
@@ -694,25 +694,25 @@ static int amdgpu_debugfs_fence_info(struct seq_file *m, void *data)
 }
 
 /**
- * amdgpu_debugfs_gpu_reset - manually trigger a gpu reset
+ * amdgpu_debugfs_gpu_recover - manually trigger a gpu reset & recover
  *
  * Manually trigger a gpu reset at the next fence wait.
  */
-static int amdgpu_debugfs_gpu_reset(struct seq_file *m, void *data)
+static int amdgpu_debugfs_gpu_recover(struct seq_file *m, void *data)
 {
 	struct drm_info_node *node = (struct drm_info_node *) m->private;
 	struct drm_device *dev = node->minor->dev;
 	struct amdgpu_device *adev = dev->dev_private;
 
-	seq_printf(m, "gpu reset\n");
-	amdgpu_gpu_reset(adev);
+	seq_printf(m, "gpu recover\n");
+	amdgpu_gpu_recover(adev, NULL);
 
 	return 0;
 }
 
 static const struct drm_info_list amdgpu_debugfs_fence_list[] = {
 	{"amdgpu_fence_info", &amdgpu_debugfs_fence_info, 0, NULL},
-	{"amdgpu_gpu_reset", &amdgpu_debugfs_gpu_reset, 0, NULL}
+	{"amdgpu_gpu_recover", &amdgpu_debugfs_gpu_recover, 0, NULL}
 };
 
 static const struct drm_info_list amdgpu_debugfs_fence_list_sriov[] = {
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_irq.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_irq.c
index 32590e4f9f7a..c340774082ea 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_irq.c
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_irq.c
@@ -88,7 +88,7 @@ static void amdgpu_irq_reset_work_func(struct work_struct *work)
 						  reset_work);
 
 	if (!amdgpu_sriov_vf(adev))
-		amdgpu_gpu_reset(adev);
+		amdgpu_gpu_recover(adev, NULL);
 }
 
 /* Disable *all* interrupts */
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_job.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_job.c
index 0a90c768dbc1..18770a880393 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_job.c
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_job.c
@@ -37,10 +37,7 @@ static void amdgpu_job_timedout(struct amd_sched_job *s_job)
 		  atomic_read(&job->ring->fence_drv.last_seq),
 		  job->ring->fence_drv.sync_seq);
 
-	if (amdgpu_sriov_vf(job->adev))
-		amdgpu_sriov_gpu_reset(job->adev, job);
-	else
-		amdgpu_gpu_reset(job->adev);
+	amdgpu_gpu_recover(job->adev, job);
 }
 
 int amdgpu_job_alloc(struct amdgpu_device *adev, unsigned num_ibs,
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_virt.h b/drivers/gpu/drm/amd/amdgpu/amdgpu_virt.h
index d149aca71a44..20bdb8fb0b8c 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_virt.h
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_virt.h
@@ -288,7 +288,6 @@ int amdgpu_virt_request_full_gpu(struct amdgpu_device *adev, bool init);
 int amdgpu_virt_release_full_gpu(struct amdgpu_device *adev, bool init);
 int amdgpu_virt_reset_gpu(struct amdgpu_device *adev);
 int amdgpu_virt_wait_reset(struct amdgpu_device *adev);
-int amdgpu_sriov_gpu_reset(struct amdgpu_device *adev, struct amdgpu_job *job);
 int amdgpu_virt_alloc_mm_table(struct amdgpu_device *adev);
 void amdgpu_virt_free_mm_table(struct amdgpu_device *adev);
 int amdgpu_virt_fw_reserve_get_checksum(void *obj, unsigned long obj_size,
diff --git a/drivers/gpu/drm/amd/amdgpu/mxgpu_ai.c b/drivers/gpu/drm/amd/amdgpu/mxgpu_ai.c
index f91aab38637c..c32d0b0868e8 100644
--- a/drivers/gpu/drm/amd/amdgpu/mxgpu_ai.c
+++ b/drivers/gpu/drm/amd/amdgpu/mxgpu_ai.c
@@ -254,7 +254,7 @@ static void xgpu_ai_mailbox_flr_work(struct work_struct *work)
 	}
 
 	/* Trigger recovery due to world switch failure */
-	amdgpu_sriov_gpu_reset(adev, NULL);
+	amdgpu_gpu_recover(adev, NULL);
 }
 
 static int xgpu_ai_set_mailbox_rcv_irq(struct amdgpu_device *adev,
diff --git a/drivers/gpu/drm/amd/amdgpu/mxgpu_vi.c b/drivers/gpu/drm/amd/amdgpu/mxgpu_vi.c
index 27b03c773b1b..818ec0fe2f51 100644
--- a/drivers/gpu/drm/amd/amdgpu/mxgpu_vi.c
+++ b/drivers/gpu/drm/amd/amdgpu/mxgpu_vi.c
@@ -519,7 +519,7 @@ static void xgpu_vi_mailbox_flr_work(struct work_struct *work)
 	}
 
 	/* Trigger recovery due to world switch failure */
-	amdgpu_sriov_gpu_reset(adev, NULL);
+	amdgpu_gpu_recover(adev, NULL);
 }
 
 static int xgpu_vi_set_mailbox_rcv_irq(struct amdgpu_device *adev,
-- 
cgit v1.2.3-70-g09d2