summaryrefslogtreecommitdiff
path: root/drivers/gpu/drm/amd/amdgpu/amdgpu_ras.c
diff options
context:
space:
mode:
authorLinus Torvalds <torvalds@linux-foundation.org>2023-11-07 17:10:02 -0800
committerLinus Torvalds <torvalds@linux-foundation.org>2023-11-07 17:10:02 -0800
commit25b6377007ebe1c3ede773fd6979f613386db000 (patch)
tree0367c3da469459a56974a2530a4038fd9029d640 /drivers/gpu/drm/amd/amdgpu/amdgpu_ras.c
parenteaec7c9892bd565ffc7dcd32515b157011ca2323 (diff)
parent9ccde17d46554dbb2757c427f2cdf67688701f96 (diff)
Merge tag 'drm-next-2023-11-07' of git://anongit.freedesktop.org/drm/drm
Pull more drm updates from Dave Airlie: "Geert pointed out I missed the renesas reworks in my main pull, so this pull contains the renesas next work for atomic conversion and DT support. It also contains a bunch of amdgpu and some small ssd13xx fixes. renesas: - atomic conversion - DT support ssd13xx: - dt binding fix for ssd132x - Initialize ssd130x crtc_state to NULL. amdgpu: - Fix RAS support check - RAS fixes - MES fixes - SMU13 fixes - Contiguous memory allocation fix - BACO fixes - GPU reset fixes - Min power limit fixes - GFX11 fixes - USB4/TB hotplug fixes - ARM regression fix - GFX9.4.3 fixes - KASAN/KCSAN stack size check fixes - SR-IOV fixes - SMU14 fixes - PSP13 fixes - Display blend fixes - Flexible array size fixes amdkfd: - GPUVM fix radeon: - Flexible array size fixes" * tag 'drm-next-2023-11-07' of git://anongit.freedesktop.org/drm/drm: (83 commits) drm/amd/display: Enable fast update on blendTF change drm/amd/display: Fix blend LUT programming drm/amd/display: Program plane color setting correctly drm/amdgpu: Query and report boot status drm/amdgpu: Add psp v13 function to query boot status drm/amd/swsmu: remove fw version check in sw_init. drm/amd/swsmu: update smu v14_0_0 driver if and metrics table drm/amdgpu: Add C2PMSG_109/126 reg field shift/masks drm/amdgpu: Optimize the asic type fix code drm/amdgpu: fix GRBM read timeout when do mes_self_test drm/amdgpu: check recovery status of xgmi hive in ras_reset_error_count drm/amd/pm: only check sriov vf flag once when creating hwmon sysfs drm/amdgpu: Attach eviction fence on alloc drm/amdkfd: Improve amdgpu_vm_handle_moved drm/amd/display: Increase frame warning limit with KASAN or KCSAN in dml2 drm/amd/display: Avoid NULL dereference of timing generator drm/amdkfd: Update cache info for GFX 9.4.3 drm/amdkfd: Populate cache info for GFX 9.4.3 drm/amdgpu: don't put MQDs in VRAM on ARM | ARM64 drm/amdgpu/smu13: drop compute workload workaround ...
Diffstat (limited to 'drivers/gpu/drm/amd/amdgpu/amdgpu_ras.c')
-rw-r--r--drivers/gpu/drm/amd/amdgpu/amdgpu_ras.c19
1 files changed, 14 insertions, 5 deletions
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_ras.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_ras.c
index 303fbb6a48b6..b7fe5951b166 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_ras.c
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_ras.c
@@ -1222,6 +1222,8 @@ int amdgpu_ras_reset_error_count(struct amdgpu_device *adev,
struct amdgpu_ras_block_object *block_obj = amdgpu_ras_get_ras_block(adev, block, 0);
struct amdgpu_ras *ras = amdgpu_ras_get_context(adev);
const struct amdgpu_mca_smu_funcs *mca_funcs = adev->mca.mca_funcs;
+ struct amdgpu_hive_info *hive;
+ int hive_ras_recovery = 0;
if (!block_obj || !block_obj->hw_ops) {
dev_dbg_once(adev->dev, "%s doesn't config RAS function\n",
@@ -1229,15 +1231,22 @@ int amdgpu_ras_reset_error_count(struct amdgpu_device *adev,
return -EOPNOTSUPP;
}
- /* skip ras error reset in gpu reset */
- if ((amdgpu_in_reset(adev) || atomic_read(&ras->in_recovery)) &&
- mca_funcs && mca_funcs->mca_set_debug_mode)
- return -EOPNOTSUPP;
-
if (!amdgpu_ras_is_supported(adev, block) ||
!amdgpu_ras_get_mca_debug_mode(adev))
return -EOPNOTSUPP;
+ hive = amdgpu_get_xgmi_hive(adev);
+ if (hive) {
+ hive_ras_recovery = atomic_read(&hive->ras_recovery);
+ amdgpu_put_xgmi_hive(hive);
+ }
+
+ /* skip ras error reset in gpu reset */
+ if ((amdgpu_in_reset(adev) || atomic_read(&ras->in_recovery) ||
+ hive_ras_recovery) &&
+ mca_funcs && mca_funcs->mca_set_debug_mode)
+ return -EOPNOTSUPP;
+
if (block_obj->hw_ops->reset_ras_error_count)
block_obj->hw_ops->reset_ras_error_count(adev);