summaryrefslogtreecommitdiff
path: root/drivers/misc/uacce
diff options
context:
space:
mode:
authorKai Ye <yekai13@huawei.com>2022-11-19 07:48:15 +0000
committerGreg Kroah-Hartman <gregkh@linuxfoundation.org>2023-01-20 12:06:26 +0100
commite3e289fbc0b520cf469469e8cdba84a50424eb65 (patch)
treedacb4275ddd034fc5c657f6bc4c992f6eba584f9 /drivers/misc/uacce
parent40bf2fcd85e5997b419e49f72d0df694b8274586 (diff)
uacce: supports device isolation feature
UACCE adds the hardware error isolation feature. To improve service reliability, some uacce devices that frequently encounter hardware errors are isolated. Therefore, this feature is added. Users can configure the hardware error threshold by 'isolate_strategy' sysfs node. The user space can get the device isolated state by 'isolate' sysfs node. If the number of device errors exceeds the configured error threshold, the device will be isolated. It means the uacce device is unavailable. Signed-off-by: Kai Ye <yekai13@huawei.com> Link: https://lore.kernel.org/r/20221119074817.12063-2-yekai13@huawei.com Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
Diffstat (limited to 'drivers/misc/uacce')
-rw-r--r--drivers/misc/uacce/uacce.c50
1 files changed, 50 insertions, 0 deletions
diff --git a/drivers/misc/uacce/uacce.c b/drivers/misc/uacce/uacce.c
index 905eff1f840e..d3a217929a24 100644
--- a/drivers/misc/uacce/uacce.c
+++ b/drivers/misc/uacce/uacce.c
@@ -363,12 +363,52 @@ static ssize_t region_dus_size_show(struct device *dev,
uacce->qf_pg_num[UACCE_QFRT_DUS] << PAGE_SHIFT);
}
+static ssize_t isolate_show(struct device *dev,
+ struct device_attribute *attr, char *buf)
+{
+ struct uacce_device *uacce = to_uacce_device(dev);
+
+ return sysfs_emit(buf, "%d\n", uacce->ops->get_isolate_state(uacce));
+}
+
+static ssize_t isolate_strategy_show(struct device *dev, struct device_attribute *attr, char *buf)
+{
+ struct uacce_device *uacce = to_uacce_device(dev);
+ u32 val;
+
+ val = uacce->ops->isolate_err_threshold_read(uacce);
+
+ return sysfs_emit(buf, "%u\n", val);
+}
+
+static ssize_t isolate_strategy_store(struct device *dev, struct device_attribute *attr,
+ const char *buf, size_t count)
+{
+ struct uacce_device *uacce = to_uacce_device(dev);
+ unsigned long val;
+ int ret;
+
+ if (kstrtoul(buf, 0, &val) < 0)
+ return -EINVAL;
+
+ if (val > UACCE_MAX_ERR_THRESHOLD)
+ return -EINVAL;
+
+ ret = uacce->ops->isolate_err_threshold_write(uacce, val);
+ if (ret)
+ return ret;
+
+ return count;
+}
+
static DEVICE_ATTR_RO(api);
static DEVICE_ATTR_RO(flags);
static DEVICE_ATTR_RO(available_instances);
static DEVICE_ATTR_RO(algorithms);
static DEVICE_ATTR_RO(region_mmio_size);
static DEVICE_ATTR_RO(region_dus_size);
+static DEVICE_ATTR_RO(isolate);
+static DEVICE_ATTR_RW(isolate_strategy);
static struct attribute *uacce_dev_attrs[] = {
&dev_attr_api.attr,
@@ -377,6 +417,8 @@ static struct attribute *uacce_dev_attrs[] = {
&dev_attr_algorithms.attr,
&dev_attr_region_mmio_size.attr,
&dev_attr_region_dus_size.attr,
+ &dev_attr_isolate.attr,
+ &dev_attr_isolate_strategy.attr,
NULL,
};
@@ -392,6 +434,14 @@ static umode_t uacce_dev_is_visible(struct kobject *kobj,
(!uacce->qf_pg_num[UACCE_QFRT_DUS])))
return 0;
+ if (attr == &dev_attr_isolate_strategy.attr &&
+ (!uacce->ops->isolate_err_threshold_read &&
+ !uacce->ops->isolate_err_threshold_write))
+ return 0;
+
+ if (attr == &dev_attr_isolate.attr && !uacce->ops->get_isolate_state)
+ return 0;
+
return attr->mode;
}