From 583681d4a417a67afbcc3664d31fb58d0e59aeae Mon Sep 17 00:00:00 2001 From: Sunil Khatri Date: Wed, 6 Mar 2024 22:59:34 +0530 Subject: drm/amdgpu: add vm fault information to devcoredump MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Add page fault information to the devcoredump. Output of devcoredump: **** AMDGPU Device Coredump **** version: 1 kernel: 6.7.0-amd-staging-drm-next module: amdgpu time: 29.725011811 process_name: soft_recovery_p PID: 1720 Ring timed out details IP Type: 0 Ring Name: gfx_0.0.0 [gfxhub] Page fault observed Faulty page starting at address: 0x0000000000000000 Protection fault status register: 0x301031 VRAM is lost due to GPU reset! Signed-off-by: Sunil Khatri Reviewed-by: Christian König Signed-off-by: Alex Deucher --- drivers/gpu/drm/amd/amdgpu/amdgpu_reset.c | 12 ++++++++++++ 1 file changed, 12 insertions(+) (limited to 'drivers/gpu/drm/amd/amdgpu/amdgpu_reset.c') diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_reset.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_reset.c index 147100c27c2d..6d059f853adc 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_reset.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_reset.c @@ -203,6 +203,18 @@ amdgpu_devcoredump_read(char *buffer, loff_t offset, size_t count, coredump->ring->name); } + if (coredump->adev) { + struct amdgpu_vm_fault_info *fault_info = + &coredump->adev->vm_manager.fault_info; + + drm_printf(&p, "\n[%s] Page fault observed\n", + fault_info->vmhub ? "mmhub" : "gfxhub"); + drm_printf(&p, "Faulty page starting at address: 0x%016llx\n", + fault_info->addr); + drm_printf(&p, "Protection fault status register: 0x%x\n\n", + fault_info->status); + } + if (coredump->reset_vram_lost) drm_printf(&p, "VRAM is lost due to GPU reset!\n"); if (coredump->adev->reset_info.num_regs) { -- cgit v1.2.3-70-g09d2