diff options
author | Tomer Tayar <ttayar@habana.ai> | 2024-03-10 12:18:35 +0200 |
---|---|---|
committer | Ofir Bitton <obitton@habana.ai> | 2024-06-23 09:53:03 +0300 |
commit | c8c10dcacafce3b6fb0238479b5e28e92b0366da (patch) | |
tree | 40b6ca402577c9dc85282551fb538b21d0d010b4 /drivers/accel | |
parent | c754bcf9dd06e9b532633ede41e7c5146d132249 (diff) |
accel/habanalabs/gaudi2: assume hard-reset by FW upon MC SEI severe error
FW initiates a hard reset upon an MC SEI severe error.
Align the driver to expect this reset and avoid accessing the device
until the reset is done.
Signed-off-by: Tomer Tayar <ttayar@habana.ai>
Reviewed-by: Ofir Bitton <obitton@habana.ai>
Signed-off-by: Ofir Bitton <obitton@habana.ai>
Diffstat (limited to 'drivers/accel')
-rw-r--r-- | drivers/accel/habanalabs/gaudi2/gaudi2.c | 4 |
1 files changed, 2 insertions, 2 deletions
diff --git a/drivers/accel/habanalabs/gaudi2/gaudi2.c b/drivers/accel/habanalabs/gaudi2/gaudi2.c index 18cc7b773650..4791582d157c 100644 --- a/drivers/accel/habanalabs/gaudi2/gaudi2.c +++ b/drivers/accel/habanalabs/gaudi2/gaudi2.c @@ -10004,6 +10004,7 @@ static void gaudi2_handle_eqe(struct hl_device *hdev, struct hl_eq_entry *eq_ent if (gaudi2_handle_hbm_mc_sei_err(hdev, event_type, &eq_entry->sei_data)) { reset_flags |= HL_DRV_RESET_FW_FATAL_ERR; reset_required = true; + is_critical = eq_entry->sei_data.hdr.is_critical; } error_count++; break; @@ -10235,8 +10236,7 @@ static void gaudi2_handle_eqe(struct hl_device *hdev, struct hl_eq_entry *eq_ent gaudi2_print_event(hdev, event_type, true, "No error cause for H/W event %u", event_type); - if ((gaudi2_irq_map_table[event_type].reset != EVENT_RESET_TYPE_NONE) || - reset_required) { + if ((gaudi2_irq_map_table[event_type].reset != EVENT_RESET_TYPE_NONE) || reset_required) { if (reset_required || (gaudi2_irq_map_table[event_type].reset == EVENT_RESET_TYPE_HARD)) reset_flags |= HL_DRV_RESET_HARD; |