diff options
-rw-r--r-- | Documentation/ABI/testing/debugfs-driver-habanalabs | 1 | ||||
-rw-r--r-- | drivers/misc/habanalabs/common/debugfs.c | 6 | ||||
-rw-r--r-- | drivers/misc/habanalabs/common/habanalabs.h | 2 | ||||
-rw-r--r-- | drivers/misc/habanalabs/gaudi/gaudi.c | 2 | ||||
-rw-r--r-- | drivers/misc/habanalabs/goya/goya.c | 2 |
5 files changed, 13 insertions, 0 deletions
diff --git a/Documentation/ABI/testing/debugfs-driver-habanalabs b/Documentation/ABI/testing/debugfs-driver-habanalabs index 783001a574b3..bcf6915987e4 100644 --- a/Documentation/ABI/testing/debugfs-driver-habanalabs +++ b/Documentation/ABI/testing/debugfs-driver-habanalabs @@ -222,6 +222,7 @@ KernelVersion: 5.6 Contact: ogabbay@kernel.org Description: Sets the stop-on_error option for the device engines. Value of "0" is for disable, otherwise enable. + Relevant only for GOYA and GAUDI. What: /sys/kernel/debug/habanalabs/hl<n>/timeout_locked Date: Sep 2021 diff --git a/drivers/misc/habanalabs/common/debugfs.c b/drivers/misc/habanalabs/common/debugfs.c index e3ee5f45d20c..9f0aaf0ef43b 100644 --- a/drivers/misc/habanalabs/common/debugfs.c +++ b/drivers/misc/habanalabs/common/debugfs.c @@ -1071,6 +1071,9 @@ static ssize_t hl_stop_on_err_read(struct file *f, char __user *buf, char tmp_buf[200]; ssize_t rc; + if (!hdev->asic_prop.configurable_stop_on_err) + return -EOPNOTSUPP; + if (*ppos) return 0; @@ -1089,6 +1092,9 @@ static ssize_t hl_stop_on_err_write(struct file *f, const char __user *buf, u32 value; ssize_t rc; + if (!hdev->asic_prop.configurable_stop_on_err) + return -EOPNOTSUPP; + if (hdev->reset_info.in_reset) { dev_warn_ratelimited(hdev->dev, "Can't change stop on error during reset\n"); diff --git a/drivers/misc/habanalabs/common/habanalabs.h b/drivers/misc/habanalabs/common/habanalabs.h index b06e2b0812b6..93116fe71ef6 100644 --- a/drivers/misc/habanalabs/common/habanalabs.h +++ b/drivers/misc/habanalabs/common/habanalabs.h @@ -561,6 +561,7 @@ struct hl_hints_range { * use-case of doing soft-reset in training (due * to the fact that training runs on multiple * devices) + * @configurable_stop_on_err: is stop-on-error option configurable via debugfs. */ struct asic_fixed_properties { struct hw_queue_properties *hw_queues_props; @@ -644,6 +645,7 @@ struct asic_fixed_properties { u8 use_get_power_for_reset_history; u8 supports_soft_reset; u8 allow_inference_soft_reset; + u8 configurable_stop_on_err; }; /** diff --git a/drivers/misc/habanalabs/gaudi/gaudi.c b/drivers/misc/habanalabs/gaudi/gaudi.c index f2242aa3baa2..61aa6dce6dde 100644 --- a/drivers/misc/habanalabs/gaudi/gaudi.c +++ b/drivers/misc/habanalabs/gaudi/gaudi.c @@ -669,6 +669,8 @@ static int gaudi_set_fixed_properties(struct hl_device *hdev) prop->use_get_power_for_reset_history = true; + prop->configurable_stop_on_err = true; + return 0; } diff --git a/drivers/misc/habanalabs/goya/goya.c b/drivers/misc/habanalabs/goya/goya.c index 3785fb33260d..c8143b6616af 100644 --- a/drivers/misc/habanalabs/goya/goya.c +++ b/drivers/misc/habanalabs/goya/goya.c @@ -483,6 +483,8 @@ int goya_set_fixed_properties(struct hl_device *hdev) prop->use_get_power_for_reset_history = true; + prop->configurable_stop_on_err = true; + return 0; } |