From db47d5f856467ce0dd3af7e20a33df3d901266df Mon Sep 17 00:00:00 2001 From: Borislav Petkov Date: Wed, 25 Jan 2017 20:30:29 +0100 Subject: x86/nmi, EDAC: Get rid of DRAM error reporting thru PCI SERR NMI Apparently, some machines used to report DRAM errors through a PCI SERR NMI. This is why we have a call into EDAC in the NMI handler. See c0d121720220 ("drivers/edac: add new nmi rescan"). From looking at the patch above, that's two drivers: e752x_edac.c and e7xxx_edac.c. Now, I wanna say those are old machines which are probably decommissioned already. Tony says that "[t]the newest CPU supported by either of those drivers is the Xeon E7520 (a.k.a. "Nehalem") released in Q1'2010. Possibly some folks are still using these ... but people that hold onto h/w for 7 years generally cling to old s/w too ... so I'd guess it unlikely that we will get complaints for breaking these in upstream." So even if there is a small number still in use, we did load EDAC with edac_op_state == EDAC_OPSTATE_POLL by default (we still do, in fact) which means a default EDAC setup without any parameters supplied on the command line or otherwise would never even log the error in the NMI handler because we're polling by default: inline int edac_handler_set(void) { if (edac_op_state == EDAC_OPSTATE_POLL) return 0; return atomic_read(&edac_handlers); } So, long story short, I'd like to get rid of that nastiness called edac_stub.c and confine all the EDAC drivers solely to drivers/edac/. If we ever have to do stuff like that again, it should be notifiers we're using and not some insanity like this one. Signed-off-by: Borislav Petkov Acked-by: Thomas Gleixner Cc: Tony Luck --- include/linux/edac.h | 2 -- 1 file changed, 2 deletions(-) (limited to 'include') diff --git a/include/linux/edac.h b/include/linux/edac.h index 5b6adf964248..bf8daabf3d51 100644 --- a/include/linux/edac.h +++ b/include/linux/edac.h @@ -31,8 +31,6 @@ extern int edac_op_state; extern int edac_err_assert; extern atomic_t edac_handlers; -extern int edac_handler_set(void); -extern void edac_atomic_assert_error(void); extern struct bus_type *edac_get_sysfs_subsys(void); enum { -- cgit v1.2.3-70-g09d2 From 97bb6c17ad5a0892beb45070dfe8c7d6d0e5326e Mon Sep 17 00:00:00 2001 From: Borislav Petkov Date: Thu, 26 Jan 2017 16:49:59 +0100 Subject: EDAC: Get rid of edac_handlers Use mc_devices list instead to check whether we have EDAC driver instances successfully registered with EDAC core. Signed-off-by: Borislav Petkov --- drivers/edac/edac_mc.c | 6 ++---- drivers/edac/edac_stub.c | 3 --- include/linux/edac.h | 1 - 3 files changed, 2 insertions(+), 8 deletions(-) (limited to 'include') diff --git a/drivers/edac/edac_mc.c b/drivers/edac/edac_mc.c index e5573c56b15e..824d31193b69 100644 --- a/drivers/edac/edac_mc.c +++ b/drivers/edac/edac_mc.c @@ -601,7 +601,6 @@ static int add_mc_to_global_list(struct mem_ctl_info *mci) } list_add_tail_rcu(&mci->link, insert_before); - atomic_inc(&edac_handlers); return 0; fail0: @@ -619,7 +618,6 @@ fail1: static int del_mc_from_global_list(struct mem_ctl_info *mci) { - int handlers = atomic_dec_return(&edac_handlers); list_del_rcu(&mci->link); /* these are for safe removal of devices from global list while @@ -628,7 +626,7 @@ static int del_mc_from_global_list(struct mem_ctl_info *mci) synchronize_rcu(); INIT_LIST_HEAD(&mci->link); - return handlers; + return list_empty(&mc_devices); } struct mem_ctl_info *edac_mc_find(int idx) @@ -763,7 +761,7 @@ struct mem_ctl_info *edac_mc_del_mc(struct device *dev) /* mark MCI offline: */ mci->op_state = OP_OFFLINE; - if (!del_mc_from_global_list(mci)) + if (del_mc_from_global_list(mci)) edac_mc_owner = NULL; mutex_unlock(&mem_ctls_mutex); diff --git a/drivers/edac/edac_stub.c b/drivers/edac/edac_stub.c index d1389e4b7989..cfb250fa38ce 100644 --- a/drivers/edac/edac_stub.c +++ b/drivers/edac/edac_stub.c @@ -20,9 +20,6 @@ int edac_op_state = EDAC_OPSTATE_INVAL; EXPORT_SYMBOL_GPL(edac_op_state); -atomic_t edac_handlers = ATOMIC_INIT(0); -EXPORT_SYMBOL_GPL(edac_handlers); - int edac_err_assert = 0; EXPORT_SYMBOL_GPL(edac_err_assert); diff --git a/include/linux/edac.h b/include/linux/edac.h index bf8daabf3d51..9fd6fe53ab2a 100644 --- a/include/linux/edac.h +++ b/include/linux/edac.h @@ -29,7 +29,6 @@ struct device; extern int edac_op_state; extern int edac_err_assert; -extern atomic_t edac_handlers; extern struct bus_type *edac_get_sysfs_subsys(void); -- cgit v1.2.3-70-g09d2 From d3116a0837261405e0febb8043fe7040c8ebccb4 Mon Sep 17 00:00:00 2001 From: Borislav Petkov Date: Thu, 26 Jan 2017 18:25:11 +0100 Subject: EDAC: Remove edac_err_assert ... and the glue around it. It is not needed anymore. Signed-off-by: Borislav Petkov --- drivers/edac/edac_mc.c | 18 +----------------- drivers/edac/edac_stub.c | 3 --- include/linux/edac.h | 1 - 3 files changed, 1 insertion(+), 21 deletions(-) (limited to 'include') diff --git a/drivers/edac/edac_mc.c b/drivers/edac/edac_mc.c index 824d31193b69..482b6aea1ce7 100644 --- a/drivers/edac/edac_mc.c +++ b/drivers/edac/edac_mc.c @@ -504,22 +504,6 @@ struct mem_ctl_info *find_mci_by_dev(struct device *dev) } EXPORT_SYMBOL_GPL(find_mci_by_dev); -/* - * handler for EDAC to check if NMI type handler has asserted interrupt - */ -static int edac_mc_assert_error_check_and_clear(void) -{ - int old_state; - - if (edac_op_state == EDAC_OPSTATE_POLL) - return 1; - - old_state = edac_err_assert; - edac_err_assert = 0; - - return old_state; -} - /* * edac_mc_workq_function * performs the operation scheduled by a workq request @@ -536,7 +520,7 @@ static void edac_mc_workq_function(struct work_struct *work_req) return; } - if (edac_mc_assert_error_check_and_clear()) + if (edac_op_state == EDAC_OPSTATE_POLL) mci->edac_check(mci); mutex_unlock(&mem_ctls_mutex); diff --git a/drivers/edac/edac_stub.c b/drivers/edac/edac_stub.c index cfb250fa38ce..f02d21d8130f 100644 --- a/drivers/edac/edac_stub.c +++ b/drivers/edac/edac_stub.c @@ -20,9 +20,6 @@ int edac_op_state = EDAC_OPSTATE_INVAL; EXPORT_SYMBOL_GPL(edac_op_state); -int edac_err_assert = 0; -EXPORT_SYMBOL_GPL(edac_err_assert); - int edac_report_status = EDAC_REPORTING_ENABLED; EXPORT_SYMBOL_GPL(edac_report_status); diff --git a/include/linux/edac.h b/include/linux/edac.h index 9fd6fe53ab2a..c55e93975079 100644 --- a/include/linux/edac.h +++ b/include/linux/edac.h @@ -28,7 +28,6 @@ struct device; #define EDAC_OPSTATE_INT 2 extern int edac_op_state; -extern int edac_err_assert; extern struct bus_type *edac_get_sysfs_subsys(void); -- cgit v1.2.3-70-g09d2 From fee27d7d97886515a60cce38b4152b7f5b5a21fc Mon Sep 17 00:00:00 2001 From: Borislav Petkov Date: Sat, 4 Feb 2017 17:42:03 +0100 Subject: EDAC: Delete edac_stub.c Move the remaining functionality to edac_mc.c. Convert "edac_report=" to a module parameter. Signed-off-by: Borislav Petkov --- drivers/edac/Makefile | 2 +- drivers/edac/edac_mc.c | 61 ++++++++++++++++++++++++++++++++++++++++++++++++ drivers/edac/edac_stub.c | 37 ----------------------------- include/linux/edac.h | 26 +++------------------ 4 files changed, 65 insertions(+), 61 deletions(-) delete mode 100644 drivers/edac/edac_stub.c (limited to 'include') diff --git a/drivers/edac/Makefile b/drivers/edac/Makefile index a8fb734cb28d..0fd9ffa63299 100644 --- a/drivers/edac/Makefile +++ b/drivers/edac/Makefile @@ -6,7 +6,7 @@ # GNU General Public License. # -obj-$(CONFIG_EDAC) := edac_stub.o edac_core.o +obj-$(CONFIG_EDAC) := edac_core.o edac_core-y := edac_mc.o edac_device.o edac_mc_sysfs.o edac_core-y += edac_module.o edac_device_sysfs.o wq.o diff --git a/drivers/edac/edac_mc.c b/drivers/edac/edac_mc.c index 735546ea6ebe..536b65aa6fac 100644 --- a/drivers/edac/edac_mc.c +++ b/drivers/edac/edac_mc.c @@ -43,6 +43,8 @@ int edac_op_state = EDAC_OPSTATE_INVAL; EXPORT_SYMBOL_GPL(edac_op_state); +static int edac_report = EDAC_REPORTING_ENABLED; + /* lock to memory controller's control array */ static DEFINE_MUTEX(mem_ctls_mutex); static LIST_HEAD(mc_devices); @@ -55,6 +57,65 @@ static void const *edac_mc_owner; static struct bus_type mc_bus[EDAC_MAX_MCS]; +int get_edac_report_status(void) +{ + return edac_report; +} +EXPORT_SYMBOL_GPL(get_edac_report_status); + +void set_edac_report_status(int new) +{ + if (new == EDAC_REPORTING_ENABLED || + new == EDAC_REPORTING_DISABLED || + new == EDAC_REPORTING_FORCE) + edac_report = new; +} +EXPORT_SYMBOL_GPL(set_edac_report_status); + +static int edac_report_set(const char *str, const struct kernel_param *kp) +{ + if (!str) + return -EINVAL; + + if (!strncmp(str, "on", 2)) + edac_report = EDAC_REPORTING_ENABLED; + else if (!strncmp(str, "off", 3)) + edac_report = EDAC_REPORTING_DISABLED; + else if (!strncmp(str, "force", 5)) + edac_report = EDAC_REPORTING_FORCE; + + return 0; +} + +static int edac_report_get(char *buffer, const struct kernel_param *kp) +{ + int ret = 0; + + switch (edac_report) { + case EDAC_REPORTING_ENABLED: + ret = sprintf(buffer, "on"); + break; + case EDAC_REPORTING_DISABLED: + ret = sprintf(buffer, "off"); + break; + case EDAC_REPORTING_FORCE: + ret = sprintf(buffer, "force"); + break; + default: + ret = -EINVAL; + break; + } + + return ret; +} + +static const struct kernel_param_ops edac_report_ops = { + .set = edac_report_set, + .get = edac_report_get, +}; + +module_param_cb(edac_report, &edac_report_ops, &edac_report, 0644); + unsigned edac_dimm_info_location(struct dimm_info *dimm, char *buf, unsigned len) { diff --git a/drivers/edac/edac_stub.c b/drivers/edac/edac_stub.c deleted file mode 100644 index 6aacc569401e..000000000000 --- a/drivers/edac/edac_stub.c +++ /dev/null @@ -1,37 +0,0 @@ -/* - * common EDAC components that must be in kernel - * - * Author: Dave Jiang - * - * 2007 (c) MontaVista Software, Inc. - * 2010 (c) Advanced Micro Devices Inc. - * Borislav Petkov - * - * This file is licensed under the terms of the GNU General Public - * License version 2. This program is licensed "as is" without any - * warranty of any kind, whether express or implied. - * - */ -#include -#include -#include -#include - -int edac_report_status = EDAC_REPORTING_ENABLED; -EXPORT_SYMBOL_GPL(edac_report_status); - -static int __init __maybe_unused edac_report_setup(char *str) -{ - if (!str) - return -EINVAL; - - if (!strncmp(str, "on", 2)) - set_edac_report_status(EDAC_REPORTING_ENABLED); - else if (!strncmp(str, "off", 3)) - set_edac_report_status(EDAC_REPORTING_DISABLED); - else if (!strncmp(str, "force", 5)) - set_edac_report_status(EDAC_REPORTING_FORCE); - - return 0; -} -__setup("edac_report=", edac_report_setup); diff --git a/include/linux/edac.h b/include/linux/edac.h index c55e93975079..faf87e1eca21 100644 --- a/include/linux/edac.h +++ b/include/linux/edac.h @@ -29,7 +29,9 @@ struct device; extern int edac_op_state; -extern struct bus_type *edac_get_sysfs_subsys(void); +struct bus_type *edac_get_sysfs_subsys(void); +int get_edac_report_status(void); +void set_edac_report_status(int new); enum { EDAC_REPORTING_ENABLED, @@ -37,28 +39,6 @@ enum { EDAC_REPORTING_FORCE }; -extern int edac_report_status; -#ifdef CONFIG_EDAC -static inline int get_edac_report_status(void) -{ - return edac_report_status; -} - -static inline void set_edac_report_status(int new) -{ - edac_report_status = new; -} -#else -static inline int get_edac_report_status(void) -{ - return EDAC_REPORTING_DISABLED; -} - -static inline void set_edac_report_status(int new) -{ -} -#endif - static inline void opstate_init(void) { switch (edac_op_state) { -- cgit v1.2.3-70-g09d2 From bffc7dece92edd0b6445b76a378e2fa9e324c7ed Mon Sep 17 00:00:00 2001 From: Borislav Petkov Date: Sat, 4 Feb 2017 18:10:14 +0100 Subject: EDAC: Rename report status accessors Change them to have the edac_ prefix. No functionality change. Signed-off-by: Borislav Petkov --- drivers/acpi/acpi_extlog.c | 8 ++++---- drivers/edac/edac_mc.c | 8 ++++---- drivers/edac/pnd2_edac.c | 2 +- drivers/edac/sb_edac.c | 4 ++-- drivers/edac/skx_edac.c | 2 +- include/linux/edac.h | 4 ++-- 6 files changed, 14 insertions(+), 14 deletions(-) (limited to 'include') diff --git a/drivers/acpi/acpi_extlog.c b/drivers/acpi/acpi_extlog.c index a15270a806fc..502ea4dc2080 100644 --- a/drivers/acpi/acpi_extlog.c +++ b/drivers/acpi/acpi_extlog.c @@ -229,7 +229,7 @@ static int __init extlog_init(void) if (!(cap & MCG_ELOG_P) || !extlog_get_l1addr()) return -ENODEV; - if (get_edac_report_status() == EDAC_REPORTING_FORCE) { + if (edac_get_report_status() == EDAC_REPORTING_FORCE) { pr_warn("Not loading eMCA, error reporting force-enabled through EDAC.\n"); return -EPERM; } @@ -285,8 +285,8 @@ static int __init extlog_init(void) * eMCA event report method has higher priority than EDAC method, * unless EDAC event report method is mandatory. */ - old_edac_report_status = get_edac_report_status(); - set_edac_report_status(EDAC_REPORTING_DISABLED); + old_edac_report_status = edac_get_report_status(); + edac_set_report_status(EDAC_REPORTING_DISABLED); mce_register_decode_chain(&extlog_mce_dec); /* enable OS to be involved to take over management from BIOS */ ((struct extlog_l1_head *)extlog_l1_addr)->flags |= FLAG_OS_OPTIN; @@ -308,7 +308,7 @@ err: static void __exit extlog_exit(void) { - set_edac_report_status(old_edac_report_status); + edac_set_report_status(old_edac_report_status); mce_unregister_decode_chain(&extlog_mce_dec); ((struct extlog_l1_head *)extlog_l1_addr)->flags &= ~FLAG_OS_OPTIN; if (extlog_l1_addr) diff --git a/drivers/edac/edac_mc.c b/drivers/edac/edac_mc.c index 536b65aa6fac..480072139b7a 100644 --- a/drivers/edac/edac_mc.c +++ b/drivers/edac/edac_mc.c @@ -57,20 +57,20 @@ static void const *edac_mc_owner; static struct bus_type mc_bus[EDAC_MAX_MCS]; -int get_edac_report_status(void) +int edac_get_report_status(void) { return edac_report; } -EXPORT_SYMBOL_GPL(get_edac_report_status); +EXPORT_SYMBOL_GPL(edac_get_report_status); -void set_edac_report_status(int new) +void edac_set_report_status(int new) { if (new == EDAC_REPORTING_ENABLED || new == EDAC_REPORTING_DISABLED || new == EDAC_REPORTING_FORCE) edac_report = new; } -EXPORT_SYMBOL_GPL(set_edac_report_status); +EXPORT_SYMBOL_GPL(edac_set_report_status); static int edac_report_set(const char *str, const struct kernel_param *kp) { diff --git a/drivers/edac/pnd2_edac.c b/drivers/edac/pnd2_edac.c index 928e0dba41fc..1cad5a9af8d0 100644 --- a/drivers/edac/pnd2_edac.c +++ b/drivers/edac/pnd2_edac.c @@ -1349,7 +1349,7 @@ static int pnd2_mce_check_error(struct notifier_block *nb, unsigned long val, vo struct dram_addr daddr; char *type; - if (get_edac_report_status() == EDAC_REPORTING_DISABLED) + if (edac_get_report_status() == EDAC_REPORTING_DISABLED) return NOTIFY_DONE; mci = pnd2_mci; diff --git a/drivers/edac/sb_edac.c b/drivers/edac/sb_edac.c index a65ea44e3b0b..ea21cb651b3c 100644 --- a/drivers/edac/sb_edac.c +++ b/drivers/edac/sb_edac.c @@ -3075,7 +3075,7 @@ static int sbridge_mce_check_error(struct notifier_block *nb, unsigned long val, struct sbridge_pvt *pvt; char *type; - if (get_edac_report_status() == EDAC_REPORTING_DISABLED) + if (edac_get_report_status() == EDAC_REPORTING_DISABLED) return NOTIFY_DONE; mci = get_mci_for_node_id(mce->socketid); @@ -3441,7 +3441,7 @@ static int __init sbridge_init(void) if (rc >= 0) { mce_register_decode_chain(&sbridge_mce_dec); - if (get_edac_report_status() == EDAC_REPORTING_DISABLED) + if (edac_get_report_status() == EDAC_REPORTING_DISABLED) sbridge_printk(KERN_WARNING, "Loading driver, error reporting disabled.\n"); return 0; } diff --git a/drivers/edac/skx_edac.c b/drivers/edac/skx_edac.c index 1159dba4671f..64bef6c9cfb4 100644 --- a/drivers/edac/skx_edac.c +++ b/drivers/edac/skx_edac.c @@ -971,7 +971,7 @@ static int skx_mce_check_error(struct notifier_block *nb, unsigned long val, struct mem_ctl_info *mci; char *type; - if (get_edac_report_status() == EDAC_REPORTING_DISABLED) + if (edac_get_report_status() == EDAC_REPORTING_DISABLED) return NOTIFY_DONE; /* ignore unless this is memory related with an address */ diff --git a/include/linux/edac.h b/include/linux/edac.h index faf87e1eca21..8ae0f45fafd6 100644 --- a/include/linux/edac.h +++ b/include/linux/edac.h @@ -30,8 +30,8 @@ struct device; extern int edac_op_state; struct bus_type *edac_get_sysfs_subsys(void); -int get_edac_report_status(void); -void set_edac_report_status(int new); +int edac_get_report_status(void); +void edac_set_report_status(int new); enum { EDAC_REPORTING_ENABLED, -- cgit v1.2.3-70-g09d2