summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
-rw-r--r--drivers/gpu/drm/amd/amdgpu/amdgpu_psp.c21
-rw-r--r--drivers/gpu/drm/amd/amdgpu/amdgpu_psp.h2
-rw-r--r--drivers/gpu/drm/amd/amdgpu/amdgpu_ras.c23
-rw-r--r--drivers/gpu/drm/amd/amdgpu/amdgpu_ras.h9
-rw-r--r--drivers/gpu/drm/amd/amdgpu/amdgpu_xgmi.c5
-rw-r--r--drivers/gpu/drm/amd/amdgpu/gfx_v9_0.c6
-rw-r--r--drivers/gpu/drm/amd/amdgpu/gfx_v9_4.c4
-rw-r--r--drivers/gpu/drm/amd/amdgpu/gfx_v9_4_2.c5
8 files changed, 56 insertions, 19 deletions
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_psp.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_psp.c
index ec79a5c2f500..59b8b26e2caf 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_psp.c
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_psp.c
@@ -1672,14 +1672,33 @@ int psp_ras_initialize(struct psp_context *psp)
}
int psp_ras_trigger_error(struct psp_context *psp,
- struct ta_ras_trigger_error_input *info)
+ struct ta_ras_trigger_error_input *info, uint32_t instance_mask)
{
struct ta_ras_shared_memory *ras_cmd;
+ struct amdgpu_device *adev = psp->adev;
int ret;
+ uint32_t dev_mask;
if (!psp->ras_context.context.initialized)
return -EINVAL;
+ switch (info->block_id) {
+ case TA_RAS_BLOCK__GFX:
+ dev_mask = GET_MASK(GC, instance_mask);
+ break;
+ case TA_RAS_BLOCK__SDMA:
+ dev_mask = GET_MASK(SDMA0, instance_mask);
+ break;
+ default:
+ dev_mask = instance_mask;
+ break;
+ }
+
+ /* reuse sub_block_index for backward compatibility */
+ dev_mask <<= AMDGPU_RAS_INST_SHIFT;
+ dev_mask &= AMDGPU_RAS_INST_MASK;
+ info->sub_block_index |= dev_mask;
+
ras_cmd = (struct ta_ras_shared_memory *)psp->ras_context.context.mem_context.shared_buf;
memset(ras_cmd, 0, sizeof(struct ta_ras_shared_memory));
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_psp.h b/drivers/gpu/drm/amd/amdgpu/amdgpu_psp.h
index 0a409da749d1..d84323923a3f 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_psp.h
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_psp.h
@@ -486,7 +486,7 @@ int psp_ras_invoke(struct psp_context *psp, uint32_t ta_cmd_id);
int psp_ras_enable_features(struct psp_context *psp,
union ta_ras_cmd_input *info, bool enable);
int psp_ras_trigger_error(struct psp_context *psp,
- struct ta_ras_trigger_error_input *info);
+ struct ta_ras_trigger_error_input *info, uint32_t instance_mask);
int psp_ras_terminate(struct psp_context *psp);
int psp_hdcp_invoke(struct psp_context *psp, uint32_t ta_cmd_id);
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_ras.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_ras.c
index 64f80e8cbd63..7ae08f168f99 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_ras.c
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_ras.c
@@ -256,6 +256,8 @@ static int amdgpu_ras_debugfs_ctrl_parse_data(struct file *f,
int block_id;
uint32_t sub_block;
u64 address, value;
+ /* default value is 0 if the mask is not set by user */
+ u32 instance_mask = 0;
if (*pos)
return -EINVAL;
@@ -306,7 +308,11 @@ static int amdgpu_ras_debugfs_ctrl_parse_data(struct file *f,
data->op = op;
if (op == 2) {
- if (sscanf(str, "%*s %*s %*s 0x%x 0x%llx 0x%llx",
+ if (sscanf(str, "%*s %*s %*s 0x%x 0x%llx 0x%llx 0x%x",
+ &sub_block, &address, &value, &instance_mask) != 4 &&
+ sscanf(str, "%*s %*s %*s %u %llu %llu %u",
+ &sub_block, &address, &value, &instance_mask) != 4 &&
+ sscanf(str, "%*s %*s %*s 0x%x 0x%llx 0x%llx",
&sub_block, &address, &value) != 3 &&
sscanf(str, "%*s %*s %*s %u %llu %llu",
&sub_block, &address, &value) != 3)
@@ -314,6 +320,7 @@ static int amdgpu_ras_debugfs_ctrl_parse_data(struct file *f,
data->head.sub_block_index = sub_block;
data->inject.address = address;
data->inject.value = value;
+ data->inject.instance_mask = instance_mask;
}
} else {
if (size < sizeof(*data))
@@ -341,7 +348,7 @@ static int amdgpu_ras_debugfs_ctrl_parse_data(struct file *f,
* sub_block_index: some IPs have subcomponets. say, GFX, sDMA.
* name: the name of IP.
*
- * inject has two more members than head, they are address, value.
+ * inject has three more members than head, they are address, value and mask.
* As their names indicate, inject operation will write the
* value to the address.
*
@@ -365,7 +372,7 @@ static int amdgpu_ras_debugfs_ctrl_parse_data(struct file *f,
*
* echo "disable <block>" > /sys/kernel/debug/dri/<N>/ras/ras_ctrl
* echo "enable <block> <error>" > /sys/kernel/debug/dri/<N>/ras/ras_ctrl
- * echo "inject <block> <error> <sub-block> <address> <value> > /sys/kernel/debug/dri/<N>/ras/ras_ctrl
+ * echo "inject <block> <error> <sub-block> <address> <value> <mask>" > /sys/kernel/debug/dri/<N>/ras/ras_ctrl
*
* Where N, is the card which you want to affect.
*
@@ -382,13 +389,14 @@ static int amdgpu_ras_debugfs_ctrl_parse_data(struct file *f,
*
* The sub-block is a the sub-block index, pass 0 if there is no sub-block.
* The address and value are hexadecimal numbers, leading 0x is optional.
+ * The mask means instance mask, is optional, default value is 0x1.
*
* For instance,
*
* .. code-block:: bash
*
* echo inject umc ue 0x0 0x0 0x0 > /sys/kernel/debug/dri/0/ras/ras_ctrl
- * echo inject umc ce 0 0 0 > /sys/kernel/debug/dri/0/ras/ras_ctrl
+ * echo inject umc ce 0 0 0 3 > /sys/kernel/debug/dri/0/ras/ras_ctrl
* echo disable umc > /sys/kernel/debug/dri/0/ras/ras_ctrl
*
* How to check the result of the operation?
@@ -1117,13 +1125,14 @@ int amdgpu_ras_error_inject(struct amdgpu_device *adev,
if (info->head.block == AMDGPU_RAS_BLOCK__GFX) {
if (block_obj->hw_ops->ras_error_inject)
- ret = block_obj->hw_ops->ras_error_inject(adev, info);
+ ret = block_obj->hw_ops->ras_error_inject(adev, info, info->instance_mask);
} else {
/* If defined special ras_error_inject(e.g: xgmi), implement special ras_error_inject */
if (block_obj->hw_ops->ras_error_inject)
- ret = block_obj->hw_ops->ras_error_inject(adev, &block_info);
+ ret = block_obj->hw_ops->ras_error_inject(adev, &block_info,
+ info->instance_mask);
else /*If not defined .ras_error_inject, use default ras_error_inject*/
- ret = psp_ras_trigger_error(&adev->psp, &block_info);
+ ret = psp_ras_trigger_error(&adev->psp, &block_info, info->instance_mask);
}
if (ret)
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_ras.h b/drivers/gpu/drm/amd/amdgpu/amdgpu_ras.h
index e96333d0c269..bc43f7db17cc 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_ras.h
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_ras.h
@@ -32,6 +32,11 @@
struct amdgpu_iv_entry;
#define AMDGPU_RAS_FLAG_INIT_BY_VBIOS (0x1 << 0)
+/* position of instance value in sub_block_index of
+ * ta_ras_trigger_error_input, the sub block uses lower 12 bits
+ */
+#define AMDGPU_RAS_INST_MASK 0xfffff000
+#define AMDGPU_RAS_INST_SHIFT 0xc
enum amdgpu_ras_block {
AMDGPU_RAS_BLOCK__UMC = 0,
@@ -508,6 +513,7 @@ struct ras_inject_if {
struct ras_common_if head;
uint64_t address;
uint64_t value;
+ uint32_t instance_mask;
};
struct ras_cure_if {
@@ -545,7 +551,8 @@ struct amdgpu_ras_block_object {
};
struct amdgpu_ras_block_hw_ops {
- int (*ras_error_inject)(struct amdgpu_device *adev, void *inject_if);
+ int (*ras_error_inject)(struct amdgpu_device *adev,
+ void *inject_if, uint32_t instance_mask);
void (*query_ras_error_count)(struct amdgpu_device *adev, void *ras_error_status);
void (*query_ras_error_status)(struct amdgpu_device *adev);
void (*query_ras_error_address)(struct amdgpu_device *adev, void *ras_error_status);
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_xgmi.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_xgmi.c
index 439925477fb8..85ee1af963dd 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_xgmi.c
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_xgmi.c
@@ -1014,7 +1014,8 @@ static void amdgpu_xgmi_query_ras_error_count(struct amdgpu_device *adev,
}
/* Trigger XGMI/WAFL error */
-static int amdgpu_ras_error_inject_xgmi(struct amdgpu_device *adev, void *inject_if)
+static int amdgpu_ras_error_inject_xgmi(struct amdgpu_device *adev,
+ void *inject_if, uint32_t instance_mask)
{
int ret = 0;
struct ta_ras_trigger_error_input *block_info =
@@ -1026,7 +1027,7 @@ static int amdgpu_ras_error_inject_xgmi(struct amdgpu_device *adev, void *injec
if (amdgpu_dpm_allow_xgmi_power_down(adev, false))
dev_warn(adev->dev, "Failed to disallow XGMI power down");
- ret = psp_ras_trigger_error(&adev->psp, block_info);
+ ret = psp_ras_trigger_error(&adev->psp, block_info, instance_mask);
if (amdgpu_ras_intr_triggered())
return ret;
diff --git a/drivers/gpu/drm/amd/amdgpu/gfx_v9_0.c b/drivers/gpu/drm/amd/amdgpu/gfx_v9_0.c
index cc005e3bcd40..de8e70b3db75 100644
--- a/drivers/gpu/drm/amd/amdgpu/gfx_v9_0.c
+++ b/drivers/gpu/drm/amd/amdgpu/gfx_v9_0.c
@@ -770,7 +770,7 @@ static u64 gfx_v9_0_ring_get_rptr_compute(struct amdgpu_ring *ring);
static void gfx_v9_0_query_ras_error_count(struct amdgpu_device *adev,
void *ras_error_status);
static int gfx_v9_0_ras_error_inject(struct amdgpu_device *adev,
- void *inject_if);
+ void *inject_if, uint32_t instance_mask);
static void gfx_v9_0_reset_ras_error_count(struct amdgpu_device *adev);
static void gfx_v9_0_kiq_set_resources(struct amdgpu_ring *kiq_ring,
@@ -6335,7 +6335,7 @@ static const struct soc15_ras_field_entry gfx_v9_0_ras_fields[] = {
};
static int gfx_v9_0_ras_error_inject(struct amdgpu_device *adev,
- void *inject_if)
+ void *inject_if, uint32_t instance_mask)
{
struct ras_inject_if *info = (struct ras_inject_if *)inject_if;
int ret;
@@ -6374,7 +6374,7 @@ static int gfx_v9_0_ras_error_inject(struct amdgpu_device *adev,
block_info.value = info->value;
mutex_lock(&adev->grbm_idx_mutex);
- ret = psp_ras_trigger_error(&adev->psp, &block_info);
+ ret = psp_ras_trigger_error(&adev->psp, &block_info, instance_mask);
mutex_unlock(&adev->grbm_idx_mutex);
return ret;
diff --git a/drivers/gpu/drm/amd/amdgpu/gfx_v9_4.c b/drivers/gpu/drm/amd/amdgpu/gfx_v9_4.c
index c67e387a97f5..59abe162bbaf 100644
--- a/drivers/gpu/drm/amd/amdgpu/gfx_v9_4.c
+++ b/drivers/gpu/drm/amd/amdgpu/gfx_v9_4.c
@@ -971,7 +971,7 @@ static void gfx_v9_4_reset_ras_error_count(struct amdgpu_device *adev)
}
static int gfx_v9_4_ras_error_inject(struct amdgpu_device *adev,
- void *inject_if)
+ void *inject_if, uint32_t instance_mask)
{
struct ras_inject_if *info = (struct ras_inject_if *)inject_if;
int ret;
@@ -987,7 +987,7 @@ static int gfx_v9_4_ras_error_inject(struct amdgpu_device *adev,
block_info.value = info->value;
mutex_lock(&adev->grbm_idx_mutex);
- ret = psp_ras_trigger_error(&adev->psp, &block_info);
+ ret = psp_ras_trigger_error(&adev->psp, &block_info, instance_mask);
mutex_unlock(&adev->grbm_idx_mutex);
return ret;
diff --git a/drivers/gpu/drm/amd/amdgpu/gfx_v9_4_2.c b/drivers/gpu/drm/amd/amdgpu/gfx_v9_4_2.c
index ec7c049c5952..4906affa6f8c 100644
--- a/drivers/gpu/drm/amd/amdgpu/gfx_v9_4_2.c
+++ b/drivers/gpu/drm/amd/amdgpu/gfx_v9_4_2.c
@@ -1699,7 +1699,8 @@ static void gfx_v9_4_2_reset_ras_error_count(struct amdgpu_device *adev)
gfx_v9_4_2_query_utc_edc_count(adev, NULL, NULL);
}
-static int gfx_v9_4_2_ras_error_inject(struct amdgpu_device *adev, void *inject_if)
+static int gfx_v9_4_2_ras_error_inject(struct amdgpu_device *adev,
+ void *inject_if, uint32_t instance_mask)
{
struct ras_inject_if *info = (struct ras_inject_if *)inject_if;
int ret;
@@ -1715,7 +1716,7 @@ static int gfx_v9_4_2_ras_error_inject(struct amdgpu_device *adev, void *inject_
block_info.value = info->value;
mutex_lock(&adev->grbm_idx_mutex);
- ret = psp_ras_trigger_error(&adev->psp, &block_info);
+ ret = psp_ras_trigger_error(&adev->psp, &block_info, instance_mask);
mutex_unlock(&adev->grbm_idx_mutex);
return ret;