summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
-rw-r--r--drivers/gpu/drm/amd/amdgpu/amdgpu_nbio.c23
-rw-r--r--drivers/gpu/drm/amd/amdgpu/amdgpu_nbio.h1
-rw-r--r--drivers/gpu/drm/amd/amdgpu/amdgpu_ras.c19
3 files changed, 35 insertions, 8 deletions
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_nbio.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_nbio.c
index 37d779b8e4a6..a3bc00577a7c 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_nbio.c
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_nbio.c
@@ -22,6 +22,29 @@
#include "amdgpu.h"
#include "amdgpu_ras.h"
+int amdgpu_nbio_ras_sw_init(struct amdgpu_device *adev)
+{
+ int err;
+ struct amdgpu_nbio_ras *ras;
+
+ if (!adev->nbio.ras)
+ return 0;
+
+ ras = adev->nbio.ras;
+ err = amdgpu_ras_register_ras_block(adev, &ras->ras_block);
+ if (err) {
+ dev_err(adev->dev, "Failed to register pcie_bif ras block!\n");
+ return err;
+ }
+
+ strcpy(ras->ras_block.ras_comm.name, "pcie_bif");
+ ras->ras_block.ras_comm.block = AMDGPU_RAS_BLOCK__PCIE_BIF;
+ ras->ras_block.ras_comm.type = AMDGPU_RAS_ERROR__MULTI_UNCORRECTABLE;
+ adev->nbio.ras_if = &ras->ras_block.ras_comm;
+
+ return 0;
+}
+
int amdgpu_nbio_ras_late_init(struct amdgpu_device *adev, struct ras_common_if *ras_block)
{
int r;
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_nbio.h b/drivers/gpu/drm/amd/amdgpu/amdgpu_nbio.h
index a240336bbc6b..c686ff4bcc39 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_nbio.h
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_nbio.h
@@ -106,5 +106,6 @@ struct amdgpu_nbio {
struct amdgpu_nbio_ras *ras;
};
+int amdgpu_nbio_ras_sw_init(struct amdgpu_device *adev);
int amdgpu_nbio_ras_late_init(struct amdgpu_device *adev, struct ras_common_if *ras_block);
#endif
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_ras.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_ras.c
index 63dfcc98152d..b0d050ffc200 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_ras.c
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_ras.c
@@ -2554,21 +2554,24 @@ int amdgpu_ras_init(struct amdgpu_device *adev)
/* initialize nbio ras function ahead of any other
* ras functions so hardware fatal error interrupt
* can be enabled as early as possible */
- switch (adev->asic_type) {
- case CHIP_VEGA20:
- case CHIP_ARCTURUS:
- case CHIP_ALDEBARAN:
- if (!adev->gmc.xgmi.connected_to_cpu) {
+ switch (adev->ip_versions[NBIO_HWIP][0]) {
+ case IP_VERSION(7, 4, 0):
+ case IP_VERSION(7, 4, 1):
+ case IP_VERSION(7, 4, 4):
+ if (!adev->gmc.xgmi.connected_to_cpu)
adev->nbio.ras = &nbio_v7_4_ras;
- amdgpu_ras_register_ras_block(adev, &adev->nbio.ras->ras_block);
- adev->nbio.ras_if = &adev->nbio.ras->ras_block.ras_comm;
- }
break;
default:
/* nbio ras is not available */
break;
}
+ /* nbio ras block needs to be enabled ahead of other ras blocks
+ * to handle fatal error */
+ r = amdgpu_nbio_ras_sw_init(adev);
+ if (r)
+ return r;
+
if (adev->nbio.ras &&
adev->nbio.ras->init_ras_controller_interrupt) {
r = adev->nbio.ras->init_ras_controller_interrupt(adev);