From 5740682e66cef57626a328d237698cad329c0449 Mon Sep 17 00:00:00 2001 From: Monk Liu Date: Wed, 25 Oct 2017 16:37:02 +0800 Subject: drm/amdgpu:implement new GPU recover(v3) MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit 1,new imple names amdgpu_gpu_recover which gives more hint on what it does compared with gpu_reset 2,gpu_recover unify bare-metal and SR-IOV, only the asic reset part is implemented differently 3,gpu_recover will increase hang job karma and mark its entity/context as guilty if exceeds limit V2: 4,in scheduler main routine the job from guilty context will be immedialy fake signaled after it poped from queue and its fence be set with "-ECANCELED" error 5,in scheduler recovery routine all jobs from the guilty entity would be dropped 6,in run_job() routine the real IB submission would be skipped if @skip parameter equales true or there was VRAM lost occured. V3: 7,replace deprecated gpu reset, use new gpu recover Signed-off-by: Monk Liu Reviewed-by: Christian König Signed-off-by: Alex Deucher --- drivers/gpu/drm/amd/amdgpu/amdgpu.h | 6 +++++- 1 file changed, 5 insertions(+), 1 deletion(-) (limited to 'drivers/gpu/drm/amd/amdgpu/amdgpu.h') diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu.h b/drivers/gpu/drm/amd/amdgpu/amdgpu.h index 88fa19b1a802..5714b7e8cb09 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu.h +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu.h @@ -178,6 +178,10 @@ extern int amdgpu_cik_support; #define CIK_CURSOR_WIDTH 128 #define CIK_CURSOR_HEIGHT 128 +/* GPU RESET flags */ +#define AMDGPU_RESET_INFO_VRAM_LOST (1 << 0) +#define AMDGPU_RESET_INFO_FULLRESET (1 << 1) + struct amdgpu_device; struct amdgpu_ib; struct amdgpu_cs_parser; @@ -1833,7 +1837,7 @@ amdgpu_get_sdma_instance(struct amdgpu_ring *ring) #define amdgpu_psp_check_fw_loading_status(adev, i) (adev)->firmware.funcs->check_fw_loading_status((adev), (i)) /* Common functions */ -int amdgpu_gpu_reset(struct amdgpu_device *adev); +int amdgpu_gpu_recover(struct amdgpu_device *adev, struct amdgpu_job* job); bool amdgpu_need_backup(struct amdgpu_device *adev); void amdgpu_pci_config_reset(struct amdgpu_device *adev); bool amdgpu_need_post(struct amdgpu_device *adev); -- cgit v1.2.3-70-g09d2