From 3f1f576a195aa266813cbd4ca70291deb61e0129 Mon Sep 17 00:00:00 2001 From: Borislav Petkov Date: Fri, 16 Feb 2018 12:26:38 +0100 Subject: x86/microcode: Propagate return value from updating functions ... so that callers can know when microcode was updated and act accordingly. Tested-by: Ashok Raj Signed-off-by: Borislav Petkov Reviewed-by: Ashok Raj Cc: Andy Lutomirski Cc: Arjan van de Ven Cc: Borislav Petkov Cc: Dan Williams Cc: Dave Hansen Cc: David Woodhouse Cc: Greg Kroah-Hartman Cc: Josh Poimboeuf Cc: Linus Torvalds Cc: Peter Zijlstra Cc: Thomas Gleixner Link: http://lkml.kernel.org/r/20180216112640.11554-2-bp@alien8.de Signed-off-by: Ingo Molnar --- arch/x86/kernel/cpu/microcode/core.c | 33 +++++++++++++++++---------------- 1 file changed, 17 insertions(+), 16 deletions(-) (limited to 'arch/x86/kernel/cpu/microcode/core.c') diff --git a/arch/x86/kernel/cpu/microcode/core.c b/arch/x86/kernel/cpu/microcode/core.c index 319dd65f98a2..6fdaf7cf3182 100644 --- a/arch/x86/kernel/cpu/microcode/core.c +++ b/arch/x86/kernel/cpu/microcode/core.c @@ -374,7 +374,7 @@ static int collect_cpu_info(int cpu) } struct apply_microcode_ctx { - int err; + enum ucode_state err; }; static void apply_microcode_local(void *arg) @@ -489,31 +489,29 @@ static void __exit microcode_dev_exit(void) /* fake device for request_firmware */ static struct platform_device *microcode_pdev; -static int reload_for_cpu(int cpu) +static enum ucode_state reload_for_cpu(int cpu) { struct ucode_cpu_info *uci = ucode_cpu_info + cpu; enum ucode_state ustate; - int err = 0; if (!uci->valid) - return err; + return UCODE_OK; ustate = microcode_ops->request_microcode_fw(cpu, µcode_pdev->dev, true); - if (ustate == UCODE_OK) - apply_microcode_on_target(cpu); - else - if (ustate == UCODE_ERROR) - err = -EINVAL; - return err; + if (ustate != UCODE_OK) + return ustate; + + return apply_microcode_on_target(cpu); } static ssize_t reload_store(struct device *dev, struct device_attribute *attr, const char *buf, size_t size) { + enum ucode_state tmp_ret = UCODE_OK; unsigned long val; + ssize_t ret = 0; int cpu; - ssize_t ret = 0, tmp_ret; ret = kstrtoul(buf, 0, &val); if (ret) @@ -526,15 +524,18 @@ static ssize_t reload_store(struct device *dev, mutex_lock(µcode_mutex); for_each_online_cpu(cpu) { tmp_ret = reload_for_cpu(cpu); - if (tmp_ret != 0) + if (tmp_ret > UCODE_NFOUND) { pr_warn("Error reloading microcode on CPU %d\n", cpu); - /* save retval of the first encountered reload error */ - if (!ret) - ret = tmp_ret; + /* set retval for the first encountered reload error */ + if (!ret) + ret = -EINVAL; + } } - if (!ret) + + if (!ret && tmp_ret == UCODE_UPDATED) perf_check_microcode(); + mutex_unlock(µcode_mutex); put_online_cpus(); -- cgit v1.2.3-70-g09d2 From 1008c52c09dcb23d93f8e0ea83a6246265d2cce0 Mon Sep 17 00:00:00 2001 From: Borislav Petkov Date: Fri, 16 Feb 2018 12:26:39 +0100 Subject: x86/CPU: Add a microcode loader callback Add a callback function which the microcode loader calls when microcode has been updated to a newer revision. Do the callback only when no error was encountered during loading. Tested-by: Ashok Raj Signed-off-by: Borislav Petkov Reviewed-by: Ashok Raj Cc: Andy Lutomirski Cc: Arjan van de Ven Cc: Borislav Petkov Cc: Dan Williams Cc: Dave Hansen Cc: David Woodhouse Cc: Greg Kroah-Hartman Cc: Josh Poimboeuf Cc: Linus Torvalds Cc: Peter Zijlstra Cc: Thomas Gleixner Link: http://lkml.kernel.org/r/20180216112640.11554-3-bp@alien8.de Signed-off-by: Ingo Molnar --- arch/x86/include/asm/processor.h | 1 + arch/x86/kernel/cpu/common.c | 10 ++++++++++ arch/x86/kernel/cpu/microcode/core.c | 8 ++++++-- 3 files changed, 17 insertions(+), 2 deletions(-) (limited to 'arch/x86/kernel/cpu/microcode/core.c') diff --git a/arch/x86/include/asm/processor.h b/arch/x86/include/asm/processor.h index 1bd9ed87606f..b0ccd4847a58 100644 --- a/arch/x86/include/asm/processor.h +++ b/arch/x86/include/asm/processor.h @@ -977,4 +977,5 @@ bool xen_set_default_idle(void); void stop_this_cpu(void *dummy); void df_debug(struct pt_regs *regs, long error_code); +void microcode_check(void); #endif /* _ASM_X86_PROCESSOR_H */ diff --git a/arch/x86/kernel/cpu/common.c b/arch/x86/kernel/cpu/common.c index 824aee0117bb..84f1cd88608b 100644 --- a/arch/x86/kernel/cpu/common.c +++ b/arch/x86/kernel/cpu/common.c @@ -1749,3 +1749,13 @@ static int __init init_cpu_syscore(void) return 0; } core_initcall(init_cpu_syscore); + +/* + * The microcode loader calls this upon late microcode load to recheck features, + * only when microcode has been updated. Caller holds microcode_mutex and CPU + * hotplug lock. + */ +void microcode_check(void) +{ + perf_check_microcode(); +} diff --git a/arch/x86/kernel/cpu/microcode/core.c b/arch/x86/kernel/cpu/microcode/core.c index 6fdaf7cf3182..aa1b9a422f2b 100644 --- a/arch/x86/kernel/cpu/microcode/core.c +++ b/arch/x86/kernel/cpu/microcode/core.c @@ -509,6 +509,7 @@ static ssize_t reload_store(struct device *dev, const char *buf, size_t size) { enum ucode_state tmp_ret = UCODE_OK; + bool do_callback = false; unsigned long val; ssize_t ret = 0; int cpu; @@ -531,10 +532,13 @@ static ssize_t reload_store(struct device *dev, if (!ret) ret = -EINVAL; } + + if (tmp_ret == UCODE_UPDATED) + do_callback = true; } - if (!ret && tmp_ret == UCODE_UPDATED) - perf_check_microcode(); + if (!ret && do_callback) + microcode_check(); mutex_unlock(µcode_mutex); put_online_cpus(); -- cgit v1.2.3-70-g09d2 From 854857f5944c59a881ff607b37ed9ed41d031a3b Mon Sep 17 00:00:00 2001 From: Borislav Petkov Date: Wed, 28 Feb 2018 11:28:40 +0100 Subject: x86/microcode: Get rid of struct apply_microcode_ctx It is a useless remnant from earlier times. Use the ucode_state enum directly. No functional change. Signed-off-by: Borislav Petkov Signed-off-by: Thomas Gleixner Tested-by: Tom Lendacky Tested-by: Ashok Raj Cc: Arjan Van De Ven Link: https://lkml.kernel.org/r/20180228102846.13447-2-bp@alien8.de --- arch/x86/kernel/cpu/microcode/core.c | 19 ++++++++----------- 1 file changed, 8 insertions(+), 11 deletions(-) (limited to 'arch/x86/kernel/cpu/microcode/core.c') diff --git a/arch/x86/kernel/cpu/microcode/core.c b/arch/x86/kernel/cpu/microcode/core.c index aa1b9a422f2b..63370651e376 100644 --- a/arch/x86/kernel/cpu/microcode/core.c +++ b/arch/x86/kernel/cpu/microcode/core.c @@ -373,26 +373,23 @@ static int collect_cpu_info(int cpu) return ret; } -struct apply_microcode_ctx { - enum ucode_state err; -}; - static void apply_microcode_local(void *arg) { - struct apply_microcode_ctx *ctx = arg; + enum ucode_state *err = arg; - ctx->err = microcode_ops->apply_microcode(smp_processor_id()); + *err = microcode_ops->apply_microcode(smp_processor_id()); } static int apply_microcode_on_target(int cpu) { - struct apply_microcode_ctx ctx = { .err = 0 }; + enum ucode_state err; int ret; - ret = smp_call_function_single(cpu, apply_microcode_local, &ctx, 1); - if (!ret) - ret = ctx.err; - + ret = smp_call_function_single(cpu, apply_microcode_local, &err, 1); + if (!ret) { + if (err == UCODE_ERROR) + ret = 1; + } return ret; } -- cgit v1.2.3-70-g09d2 From 30ec26da9967d0d785abc24073129a34c3211777 Mon Sep 17 00:00:00 2001 From: Ashok Raj Date: Wed, 28 Feb 2018 11:28:43 +0100 Subject: x86/microcode: Do not upload microcode if CPUs are offline Avoid loading microcode if any of the CPUs are offline, and issue a warning. Having different microcode revisions on the system at any time is outright dangerous. [ Borislav: Massage changelog. ] Signed-off-by: Ashok Raj Signed-off-by: Borislav Petkov Signed-off-by: Thomas Gleixner Tested-by: Tom Lendacky Tested-by: Ashok Raj Reviewed-by: Tom Lendacky Cc: Arjan Van De Ven Link: http://lkml.kernel.org/r/1519352533-15992-4-git-send-email-ashok.raj@intel.com Link: https://lkml.kernel.org/r/20180228102846.13447-5-bp@alien8.de --- arch/x86/kernel/cpu/microcode/core.c | 18 ++++++++++++++++++ 1 file changed, 18 insertions(+) (limited to 'arch/x86/kernel/cpu/microcode/core.c') diff --git a/arch/x86/kernel/cpu/microcode/core.c b/arch/x86/kernel/cpu/microcode/core.c index 63370651e376..fa32cb3dcca5 100644 --- a/arch/x86/kernel/cpu/microcode/core.c +++ b/arch/x86/kernel/cpu/microcode/core.c @@ -486,6 +486,16 @@ static void __exit microcode_dev_exit(void) /* fake device for request_firmware */ static struct platform_device *microcode_pdev; +static int check_online_cpus(void) +{ + if (num_online_cpus() == num_present_cpus()) + return 0; + + pr_err("Not all CPUs online, aborting microcode update.\n"); + + return -EINVAL; +} + static enum ucode_state reload_for_cpu(int cpu) { struct ucode_cpu_info *uci = ucode_cpu_info + cpu; @@ -519,7 +529,13 @@ static ssize_t reload_store(struct device *dev, return size; get_online_cpus(); + + ret = check_online_cpus(); + if (ret) + goto put; + mutex_lock(µcode_mutex); + for_each_online_cpu(cpu) { tmp_ret = reload_for_cpu(cpu); if (tmp_ret > UCODE_NFOUND) { @@ -538,6 +554,8 @@ static ssize_t reload_store(struct device *dev, microcode_check(); mutex_unlock(µcode_mutex); + +put: put_online_cpus(); if (!ret) -- cgit v1.2.3-70-g09d2 From cfb52a5a09c8ae3a1dafb44ce549fde5b69e8117 Mon Sep 17 00:00:00 2001 From: Borislav Petkov Date: Wed, 28 Feb 2018 11:28:45 +0100 Subject: x86/microcode: Request microcode on the BSP ... so that any newer version can land in the cache and can later be fished out by the application functions. Do that before grabbing the hotplug lock. Signed-off-by: Borislav Petkov Signed-off-by: Thomas Gleixner Tested-by: Tom Lendacky Tested-by: Ashok Raj Reviewed-by: Tom Lendacky Cc: Arjan Van De Ven Link: https://lkml.kernel.org/r/20180228102846.13447-7-bp@alien8.de --- arch/x86/kernel/cpu/microcode/core.c | 11 +++++------ 1 file changed, 5 insertions(+), 6 deletions(-) (limited to 'arch/x86/kernel/cpu/microcode/core.c') diff --git a/arch/x86/kernel/cpu/microcode/core.c b/arch/x86/kernel/cpu/microcode/core.c index fa32cb3dcca5..5dd157d48606 100644 --- a/arch/x86/kernel/cpu/microcode/core.c +++ b/arch/x86/kernel/cpu/microcode/core.c @@ -499,15 +499,10 @@ static int check_online_cpus(void) static enum ucode_state reload_for_cpu(int cpu) { struct ucode_cpu_info *uci = ucode_cpu_info + cpu; - enum ucode_state ustate; if (!uci->valid) return UCODE_OK; - ustate = microcode_ops->request_microcode_fw(cpu, µcode_pdev->dev, true); - if (ustate != UCODE_OK) - return ustate; - return apply_microcode_on_target(cpu); } @@ -515,11 +510,11 @@ static ssize_t reload_store(struct device *dev, struct device_attribute *attr, const char *buf, size_t size) { + int cpu, bsp = boot_cpu_data.cpu_index; enum ucode_state tmp_ret = UCODE_OK; bool do_callback = false; unsigned long val; ssize_t ret = 0; - int cpu; ret = kstrtoul(buf, 0, &val); if (ret) @@ -528,6 +523,10 @@ static ssize_t reload_store(struct device *dev, if (val != 1) return size; + tmp_ret = microcode_ops->request_microcode_fw(bsp, µcode_pdev->dev, true); + if (tmp_ret != UCODE_OK) + return size; + get_online_cpus(); ret = check_online_cpus(); -- cgit v1.2.3-70-g09d2 From a5321aec6412b20b5ad15db2d6b916c05349dbff Mon Sep 17 00:00:00 2001 From: Ashok Raj Date: Wed, 28 Feb 2018 11:28:46 +0100 Subject: x86/microcode: Synchronize late microcode loading Original idea by Ashok, completely rewritten by Borislav. Before you read any further: the early loading method is still the preferred one and you should always do that. The following patch is improving the late loading mechanism for long running jobs and cloud use cases. Gather all cores and serialize the microcode update on them by doing it one-by-one to make the late update process as reliable as possible and avoid potential issues caused by the microcode update. [ Borislav: Rewrite completely. ] Co-developed-by: Borislav Petkov Signed-off-by: Ashok Raj Signed-off-by: Borislav Petkov Signed-off-by: Thomas Gleixner Tested-by: Tom Lendacky Tested-by: Ashok Raj Reviewed-by: Tom Lendacky Cc: Arjan Van De Ven Link: https://lkml.kernel.org/r/20180228102846.13447-8-bp@alien8.de --- arch/x86/kernel/cpu/microcode/core.c | 118 +++++++++++++++++++++++++++-------- 1 file changed, 92 insertions(+), 26 deletions(-) (limited to 'arch/x86/kernel/cpu/microcode/core.c') diff --git a/arch/x86/kernel/cpu/microcode/core.c b/arch/x86/kernel/cpu/microcode/core.c index 5dd157d48606..70ecbc8099c9 100644 --- a/arch/x86/kernel/cpu/microcode/core.c +++ b/arch/x86/kernel/cpu/microcode/core.c @@ -22,13 +22,16 @@ #define pr_fmt(fmt) "microcode: " fmt #include +#include #include #include #include #include #include +#include #include #include +#include #include #include @@ -64,6 +67,11 @@ LIST_HEAD(microcode_cache); */ static DEFINE_MUTEX(microcode_mutex); +/* + * Serialize late loading so that CPUs get updated one-by-one. + */ +static DEFINE_SPINLOCK(update_lock); + struct ucode_cpu_info ucode_cpu_info[NR_CPUS]; struct cpu_info_ctx { @@ -486,6 +494,19 @@ static void __exit microcode_dev_exit(void) /* fake device for request_firmware */ static struct platform_device *microcode_pdev; +/* + * Late loading dance. Why the heavy-handed stomp_machine effort? + * + * - HT siblings must be idle and not execute other code while the other sibling + * is loading microcode in order to avoid any negative interactions caused by + * the loading. + * + * - In addition, microcode update on the cores must be serialized until this + * requirement can be relaxed in the future. Right now, this is conservative + * and good. + */ +#define SPINUNIT 100 /* 100 nsec */ + static int check_online_cpus(void) { if (num_online_cpus() == num_present_cpus()) @@ -496,23 +517,85 @@ static int check_online_cpus(void) return -EINVAL; } -static enum ucode_state reload_for_cpu(int cpu) +static atomic_t late_cpus; + +/* + * Returns: + * < 0 - on error + * 0 - no update done + * 1 - microcode was updated + */ +static int __reload_late(void *info) { - struct ucode_cpu_info *uci = ucode_cpu_info + cpu; + unsigned int timeout = NSEC_PER_SEC; + int all_cpus = num_online_cpus(); + int cpu = smp_processor_id(); + enum ucode_state err; + int ret = 0; - if (!uci->valid) - return UCODE_OK; + atomic_dec(&late_cpus); + + /* + * Wait for all CPUs to arrive. A load will not be attempted unless all + * CPUs show up. + * */ + while (atomic_read(&late_cpus)) { + if (timeout < SPINUNIT) { + pr_err("Timeout while waiting for CPUs rendezvous, remaining: %d\n", + atomic_read(&late_cpus)); + return -1; + } + + ndelay(SPINUNIT); + timeout -= SPINUNIT; + + touch_nmi_watchdog(); + } + + spin_lock(&update_lock); + apply_microcode_local(&err); + spin_unlock(&update_lock); + + if (err > UCODE_NFOUND) { + pr_warn("Error reloading microcode on CPU %d\n", cpu); + ret = -1; + } else if (err == UCODE_UPDATED) { + ret = 1; + } - return apply_microcode_on_target(cpu); + atomic_inc(&late_cpus); + + while (atomic_read(&late_cpus) != all_cpus) + cpu_relax(); + + return ret; +} + +/* + * Reload microcode late on all CPUs. Wait for a sec until they + * all gather together. + */ +static int microcode_reload_late(void) +{ + int ret; + + atomic_set(&late_cpus, num_online_cpus()); + + ret = stop_machine_cpuslocked(__reload_late, NULL, cpu_online_mask); + if (ret < 0) + return ret; + else if (ret > 0) + microcode_check(); + + return ret; } static ssize_t reload_store(struct device *dev, struct device_attribute *attr, const char *buf, size_t size) { - int cpu, bsp = boot_cpu_data.cpu_index; enum ucode_state tmp_ret = UCODE_OK; - bool do_callback = false; + int bsp = boot_cpu_data.cpu_index; unsigned long val; ssize_t ret = 0; @@ -534,30 +617,13 @@ static ssize_t reload_store(struct device *dev, goto put; mutex_lock(µcode_mutex); - - for_each_online_cpu(cpu) { - tmp_ret = reload_for_cpu(cpu); - if (tmp_ret > UCODE_NFOUND) { - pr_warn("Error reloading microcode on CPU %d\n", cpu); - - /* set retval for the first encountered reload error */ - if (!ret) - ret = -EINVAL; - } - - if (tmp_ret == UCODE_UPDATED) - do_callback = true; - } - - if (!ret && do_callback) - microcode_check(); - + ret = microcode_reload_late(); mutex_unlock(µcode_mutex); put: put_online_cpus(); - if (!ret) + if (ret >= 0) ret = size; return ret; -- cgit v1.2.3-70-g09d2 From 2613f36ed965d0e5a595a1d931fd3b480e82d6fd Mon Sep 17 00:00:00 2001 From: Borislav Petkov Date: Wed, 14 Mar 2018 19:36:14 +0100 Subject: x86/microcode: Attempt late loading only when new microcode is present Return UCODE_NEW from the scanning functions to denote that new microcode was found and only then attempt the expensive synchronization dance. Reported-by: Emanuel Czirai Signed-off-by: Borislav Petkov Signed-off-by: Thomas Gleixner Tested-by: Emanuel Czirai Tested-by: Ashok Raj Tested-by: Tom Lendacky Link: https://lkml.kernel.org/r/20180314183615.17629-1-bp@alien8.de --- arch/x86/include/asm/microcode.h | 1 + arch/x86/kernel/cpu/microcode/amd.c | 34 +++++++++++++++++++++------------- arch/x86/kernel/cpu/microcode/core.c | 8 +++----- arch/x86/kernel/cpu/microcode/intel.c | 4 +++- 4 files changed, 28 insertions(+), 19 deletions(-) (limited to 'arch/x86/kernel/cpu/microcode/core.c') diff --git a/arch/x86/include/asm/microcode.h b/arch/x86/include/asm/microcode.h index 7fb1047d61c7..6cf0e4cb7b97 100644 --- a/arch/x86/include/asm/microcode.h +++ b/arch/x86/include/asm/microcode.h @@ -39,6 +39,7 @@ struct device; enum ucode_state { UCODE_OK = 0, + UCODE_NEW, UCODE_UPDATED, UCODE_NFOUND, UCODE_ERROR, diff --git a/arch/x86/kernel/cpu/microcode/amd.c b/arch/x86/kernel/cpu/microcode/amd.c index a998e1a7d46f..48179928ff38 100644 --- a/arch/x86/kernel/cpu/microcode/amd.c +++ b/arch/x86/kernel/cpu/microcode/amd.c @@ -339,7 +339,7 @@ int __init save_microcode_in_initrd_amd(unsigned int cpuid_1_eax) return -EINVAL; ret = load_microcode_amd(true, x86_family(cpuid_1_eax), desc.data, desc.size); - if (ret != UCODE_OK) + if (ret > UCODE_UPDATED) return -EINVAL; return 0; @@ -683,27 +683,35 @@ static enum ucode_state __load_microcode_amd(u8 family, const u8 *data, static enum ucode_state load_microcode_amd(bool save, u8 family, const u8 *data, size_t size) { + struct ucode_patch *p; enum ucode_state ret; /* free old equiv table */ free_equiv_cpu_table(); ret = __load_microcode_amd(family, data, size); - - if (ret != UCODE_OK) + if (ret != UCODE_OK) { cleanup(); + return ret; + } -#ifdef CONFIG_X86_32 - /* save BSP's matching patch for early load */ - if (save) { - struct ucode_patch *p = find_patch(0); - if (p) { - memset(amd_ucode_patch, 0, PATCH_MAX_SIZE); - memcpy(amd_ucode_patch, p->data, min_t(u32, ksize(p->data), - PATCH_MAX_SIZE)); - } + p = find_patch(0); + if (!p) { + return ret; + } else { + if (boot_cpu_data.microcode == p->patch_id) + return ret; + + ret = UCODE_NEW; } -#endif + + /* save BSP's matching patch for early load */ + if (!save) + return ret; + + memset(amd_ucode_patch, 0, PATCH_MAX_SIZE); + memcpy(amd_ucode_patch, p->data, min_t(u32, ksize(p->data), PATCH_MAX_SIZE)); + return ret; } diff --git a/arch/x86/kernel/cpu/microcode/core.c b/arch/x86/kernel/cpu/microcode/core.c index 70ecbc8099c9..9f0fe5bb450d 100644 --- a/arch/x86/kernel/cpu/microcode/core.c +++ b/arch/x86/kernel/cpu/microcode/core.c @@ -607,7 +607,7 @@ static ssize_t reload_store(struct device *dev, return size; tmp_ret = microcode_ops->request_microcode_fw(bsp, µcode_pdev->dev, true); - if (tmp_ret != UCODE_OK) + if (tmp_ret != UCODE_NEW) return size; get_online_cpus(); @@ -691,10 +691,8 @@ static enum ucode_state microcode_init_cpu(int cpu, bool refresh_fw) if (system_state != SYSTEM_RUNNING) return UCODE_NFOUND; - ustate = microcode_ops->request_microcode_fw(cpu, µcode_pdev->dev, - refresh_fw); - - if (ustate == UCODE_OK) { + ustate = microcode_ops->request_microcode_fw(cpu, µcode_pdev->dev, refresh_fw); + if (ustate == UCODE_NEW) { pr_debug("CPU%d updated upon init\n", cpu); apply_microcode_on_target(cpu); } diff --git a/arch/x86/kernel/cpu/microcode/intel.c b/arch/x86/kernel/cpu/microcode/intel.c index 2aded9db1d42..32b8e5724f96 100644 --- a/arch/x86/kernel/cpu/microcode/intel.c +++ b/arch/x86/kernel/cpu/microcode/intel.c @@ -862,6 +862,7 @@ static enum ucode_state generic_load_microcode(int cpu, void *data, size_t size, unsigned int leftover = size; unsigned int curr_mc_size = 0, new_mc_size = 0; unsigned int csig, cpf; + enum ucode_state ret = UCODE_OK; while (leftover) { struct microcode_header_intel mc_header; @@ -903,6 +904,7 @@ static enum ucode_state generic_load_microcode(int cpu, void *data, size_t size, new_mc = mc; new_mc_size = mc_size; mc = NULL; /* trigger new vmalloc */ + ret = UCODE_NEW; } ucode_ptr += mc_size; @@ -932,7 +934,7 @@ static enum ucode_state generic_load_microcode(int cpu, void *data, size_t size, pr_debug("CPU%d found a matching microcode update with version 0x%x (current=0x%x)\n", cpu, new_rev, uci->cpu_sig.rev); - return UCODE_OK; + return ret; } static int get_ucode_fw(void *to, const void *from, size_t n) -- cgit v1.2.3-70-g09d2 From bb8c13d61a629276a162c1d2b1a20a815cbcfbb7 Mon Sep 17 00:00:00 2001 From: Borislav Petkov Date: Wed, 14 Mar 2018 19:36:15 +0100 Subject: x86/microcode: Fix CPU synchronization routine Emanuel reported an issue with a hang during microcode update because my dumb idea to use one atomic synchronization variable for both rendezvous - before and after update - was simply bollocks: microcode: microcode_reload_late: late_cpus: 4 microcode: __reload_late: cpu 2 entered microcode: __reload_late: cpu 1 entered microcode: __reload_late: cpu 3 entered microcode: __reload_late: cpu 0 entered microcode: __reload_late: cpu 1 left microcode: Timeout while waiting for CPUs rendezvous, remaining: 1 CPU1 above would finish, leave and the others will still spin waiting for it to join. So do two synchronization atomics instead, which makes the code a lot more straightforward. Also, since the update is serialized and it also takes quite some time per microcode engine, increase the exit timeout by the number of CPUs on the system. That's ok because the moment all CPUs are done, that timeout will be cut short. Furthermore, panic when some of the CPUs timeout when returning from a microcode update: we can't allow a system with not all cores updated. Also, as an optimization, do not do the exit sync if microcode wasn't updated. Reported-by: Emanuel Czirai Signed-off-by: Borislav Petkov Signed-off-by: Thomas Gleixner Tested-by: Emanuel Czirai Tested-by: Ashok Raj Tested-by: Tom Lendacky Link: https://lkml.kernel.org/r/20180314183615.17629-2-bp@alien8.de --- arch/x86/kernel/cpu/microcode/core.c | 68 ++++++++++++++++++++++-------------- 1 file changed, 41 insertions(+), 27 deletions(-) (limited to 'arch/x86/kernel/cpu/microcode/core.c') diff --git a/arch/x86/kernel/cpu/microcode/core.c b/arch/x86/kernel/cpu/microcode/core.c index 9f0fe5bb450d..10c4fc2c91f8 100644 --- a/arch/x86/kernel/cpu/microcode/core.c +++ b/arch/x86/kernel/cpu/microcode/core.c @@ -517,7 +517,29 @@ static int check_online_cpus(void) return -EINVAL; } -static atomic_t late_cpus; +static atomic_t late_cpus_in; +static atomic_t late_cpus_out; + +static int __wait_for_cpus(atomic_t *t, long long timeout) +{ + int all_cpus = num_online_cpus(); + + atomic_inc(t); + + while (atomic_read(t) < all_cpus) { + if (timeout < SPINUNIT) { + pr_err("Timeout while waiting for CPUs rendezvous, remaining: %d\n", + all_cpus - atomic_read(t)); + return 1; + } + + ndelay(SPINUNIT); + timeout -= SPINUNIT; + + touch_nmi_watchdog(); + } + return 0; +} /* * Returns: @@ -527,30 +549,16 @@ static atomic_t late_cpus; */ static int __reload_late(void *info) { - unsigned int timeout = NSEC_PER_SEC; - int all_cpus = num_online_cpus(); int cpu = smp_processor_id(); enum ucode_state err; int ret = 0; - atomic_dec(&late_cpus); - /* * Wait for all CPUs to arrive. A load will not be attempted unless all * CPUs show up. * */ - while (atomic_read(&late_cpus)) { - if (timeout < SPINUNIT) { - pr_err("Timeout while waiting for CPUs rendezvous, remaining: %d\n", - atomic_read(&late_cpus)); - return -1; - } - - ndelay(SPINUNIT); - timeout -= SPINUNIT; - - touch_nmi_watchdog(); - } + if (__wait_for_cpus(&late_cpus_in, NSEC_PER_SEC)) + return -1; spin_lock(&update_lock); apply_microcode_local(&err); @@ -558,15 +566,22 @@ static int __reload_late(void *info) if (err > UCODE_NFOUND) { pr_warn("Error reloading microcode on CPU %d\n", cpu); - ret = -1; - } else if (err == UCODE_UPDATED) { + return -1; + /* siblings return UCODE_OK because their engine got updated already */ + } else if (err == UCODE_UPDATED || err == UCODE_OK) { ret = 1; + } else { + return ret; } - atomic_inc(&late_cpus); - - while (atomic_read(&late_cpus) != all_cpus) - cpu_relax(); + /* + * Increase the wait timeout to a safe value here since we're + * serializing the microcode update and that could take a while on a + * large number of CPUs. And that is fine as the *actual* timeout will + * be determined by the last CPU finished updating and thus cut short. + */ + if (__wait_for_cpus(&late_cpus_out, NSEC_PER_SEC * num_online_cpus())) + panic("Timeout during microcode update!\n"); return ret; } @@ -579,12 +594,11 @@ static int microcode_reload_late(void) { int ret; - atomic_set(&late_cpus, num_online_cpus()); + atomic_set(&late_cpus_in, 0); + atomic_set(&late_cpus_out, 0); ret = stop_machine_cpuslocked(__reload_late, NULL, cpu_online_mask); - if (ret < 0) - return ret; - else if (ret > 0) + if (ret > 0) microcode_check(); return ret; -- cgit v1.2.3-70-g09d2