diff options
author | Dave Airlie <airlied@redhat.com> | 2023-10-25 10:54:10 +1000 |
---|---|---|
committer | Dave Airlie <airlied@redhat.com> | 2023-10-25 10:54:22 +1000 |
commit | 0ecf4aa32b7896b9160688bdbd20153dc06a50fb (patch) | |
tree | 085fa21f58e9cfdb8142b81068af8051810e6033 /drivers/gpu/drm/amd/amdgpu/amdgpu_device.c | |
parent | 11ae5eb516b656e8a0e4efbea90ea24c152a346d (diff) | |
parent | 5b2c54e0d0ea09f7a3b500510731878326e1117e (diff) |
Merge tag 'amd-drm-next-6.7-2023-10-20' of https://gitlab.freedesktop.org/agd5f/linux into drm-next
amd-drm-next-6.7-2023-10-20:
amdgpu:
- SMU 13 updates
- UMSCH updates
- DC MPO fixes
- RAS updates
- MES 11 fixes
- Fix possible memory leaks in error pathes
- GC 11.5 fixes
- Kernel doc updates
- PSP updates
- APU IMU fixes
- Misc code cleanups
- SMU 11 fixes
- OD fix
- Frame size warning fixes
- SR-IOV fixes
- NBIO 7.11 updates
- NBIO 7.7 updates
- XGMI fixes
- devcoredump updates
amdkfd:
- Misc code cleanups
- SVM fixes
Signed-off-by: Dave Airlie <airlied@redhat.com>
From: Alex Deucher <alexander.deucher@amd.com>
Link: https://patchwork.freedesktop.org/patch/msgid/20231020195043.4937-1-alexander.deucher@amd.com
Diffstat (limited to 'drivers/gpu/drm/amd/amdgpu/amdgpu_device.c')
-rw-r--r-- | drivers/gpu/drm/amd/amdgpu/amdgpu_device.c | 96 |
1 files changed, 8 insertions, 88 deletions
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_device.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_device.c index 0f98f720d9ca..9340e8dc0413 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_device.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_device.c @@ -32,8 +32,6 @@ #include <linux/slab.h> #include <linux/iommu.h> #include <linux/pci.h> -#include <linux/devcoredump.h> -#include <generated/utsrelease.h> #include <linux/pci-p2pdma.h> #include <linux/apple-gmux.h> @@ -3578,9 +3576,7 @@ static void amdgpu_device_xgmi_reset_func(struct work_struct *__work) if (adev->asic_reset_res) goto fail; - if (adev->mmhub.ras && adev->mmhub.ras->ras_block.hw_ops && - adev->mmhub.ras->ras_block.hw_ops->reset_ras_error_count) - adev->mmhub.ras->ras_block.hw_ops->reset_ras_error_count(adev); + amdgpu_ras_reset_error_count(adev, AMDGPU_RAS_BLOCK__MMHUB); } else { task_barrier_full(&hive->tb); @@ -5050,90 +5046,16 @@ static int amdgpu_reset_reg_dumps(struct amdgpu_device *adev) lockdep_assert_held(&adev->reset_domain->sem); - for (i = 0; i < adev->num_regs; i++) { - adev->reset_dump_reg_value[i] = RREG32(adev->reset_dump_reg_list[i]); - trace_amdgpu_reset_reg_dumps(adev->reset_dump_reg_list[i], - adev->reset_dump_reg_value[i]); - } - - return 0; -} - -#ifndef CONFIG_DEV_COREDUMP -static void amdgpu_coredump(struct amdgpu_device *adev, bool vram_lost, - struct amdgpu_reset_context *reset_context) -{ -} -#else -static ssize_t amdgpu_devcoredump_read(char *buffer, loff_t offset, - size_t count, void *data, size_t datalen) -{ - struct drm_printer p; - struct amdgpu_coredump_info *coredump = data; - struct drm_print_iterator iter; - int i; - - iter.data = buffer; - iter.offset = 0; - iter.start = offset; - iter.remain = count; - - p = drm_coredump_printer(&iter); - - drm_printf(&p, "**** AMDGPU Device Coredump ****\n"); - drm_printf(&p, "kernel: " UTS_RELEASE "\n"); - drm_printf(&p, "module: " KBUILD_MODNAME "\n"); - drm_printf(&p, "time: %lld.%09ld\n", coredump->reset_time.tv_sec, coredump->reset_time.tv_nsec); - if (coredump->reset_task_info.pid) - drm_printf(&p, "process_name: %s PID: %d\n", - coredump->reset_task_info.process_name, - coredump->reset_task_info.pid); - - if (coredump->reset_vram_lost) - drm_printf(&p, "VRAM is lost due to GPU reset!\n"); - if (coredump->adev->num_regs) { - drm_printf(&p, "AMDGPU register dumps:\nOffset: Value:\n"); - - for (i = 0; i < coredump->adev->num_regs; i++) - drm_printf(&p, "0x%08x: 0x%08x\n", - coredump->adev->reset_dump_reg_list[i], - coredump->adev->reset_dump_reg_value[i]); - } + for (i = 0; i < adev->reset_info.num_regs; i++) { + adev->reset_info.reset_dump_reg_value[i] = + RREG32(adev->reset_info.reset_dump_reg_list[i]); - return count - iter.remain; -} - -static void amdgpu_devcoredump_free(void *data) -{ - kfree(data); -} - -static void amdgpu_coredump(struct amdgpu_device *adev, bool vram_lost, - struct amdgpu_reset_context *reset_context) -{ - struct amdgpu_coredump_info *coredump; - struct drm_device *dev = adev_to_drm(adev); - - coredump = kzalloc(sizeof(*coredump), GFP_NOWAIT); - - if (!coredump) { - DRM_ERROR("%s: failed to allocate memory for coredump\n", __func__); - return; + trace_amdgpu_reset_reg_dumps(adev->reset_info.reset_dump_reg_list[i], + adev->reset_info.reset_dump_reg_value[i]); } - coredump->reset_vram_lost = vram_lost; - - if (reset_context->job && reset_context->job->vm) - coredump->reset_task_info = reset_context->job->vm->task_info; - - coredump->adev = adev; - - ktime_get_ts64(&coredump->reset_time); - - dev_coredumpm(dev->dev, THIS_MODULE, coredump, 0, GFP_NOWAIT, - amdgpu_devcoredump_read, amdgpu_devcoredump_free); + return 0; } -#endif int amdgpu_do_asic_reset(struct list_head *device_list_handle, struct amdgpu_reset_context *reset_context) @@ -5201,9 +5123,7 @@ int amdgpu_do_asic_reset(struct list_head *device_list_handle, if (!r && amdgpu_ras_intr_triggered()) { list_for_each_entry(tmp_adev, device_list_handle, reset_list) { - if (tmp_adev->mmhub.ras && tmp_adev->mmhub.ras->ras_block.hw_ops && - tmp_adev->mmhub.ras->ras_block.hw_ops->reset_ras_error_count) - tmp_adev->mmhub.ras->ras_block.hw_ops->reset_ras_error_count(tmp_adev); + amdgpu_ras_reset_error_count(tmp_adev, AMDGPU_RAS_BLOCK__MMHUB); } amdgpu_ras_intr_cleared(); |