From 1751342095f0d2b36fa8114d8e12c5688c455ac4 Mon Sep 17 00:00:00 2001 From: David Woodhouse <dwmw@amazon.co.uk> Date: Sat, 10 Feb 2018 23:39:22 +0000 Subject: x86/speculation: Update Speculation Control microcode blacklist Intel have retroactively blessed the 0xc2 microcode on Skylake mobile and desktop parts, and the Gemini Lake 0x22 microcode is apparently fine too. We blacklisted the latter purely because it was present with all the other problematic ones in the 2018-01-08 release, but now it's explicitly listed as OK. We still list 0x84 for the various Kaby Lake / Coffee Lake parts, as that appeared in one version of the blacklist and then reverted to 0x80 again. We can change it if 0x84 is actually announced to be safe. Signed-off-by: David Woodhouse <dwmw@amazon.co.uk> Cc: Andy Lutomirski <luto@kernel.org> Cc: Arjan van de Ven <arjan@linux.intel.com> Cc: Borislav Petkov <bp@alien8.de> Cc: Dan Williams <dan.j.williams@intel.com> Cc: Dave Hansen <dave.hansen@linux.intel.com> Cc: David Woodhouse <dwmw2@infradead.org> Cc: Greg Kroah-Hartman <gregkh@linuxfoundation.org> Cc: Josh Poimboeuf <jpoimboe@redhat.com> Cc: Linus Torvalds <torvalds@linux-foundation.org> Cc: Peter Zijlstra <peterz@infradead.org> Cc: Thomas Gleixner <tglx@linutronix.de> Cc: arjan.van.de.ven@intel.com Cc: jmattson@google.com Cc: karahmed@amazon.de Cc: kvm@vger.kernel.org Cc: pbonzini@redhat.com Cc: rkrcmar@redhat.com Cc: sironi@amazon.de Link: http://lkml.kernel.org/r/1518305967-31356-2-git-send-email-dwmw@amazon.co.uk Signed-off-by: Ingo Molnar <mingo@kernel.org> --- arch/x86/kernel/cpu/intel.c | 4 ---- 1 file changed, 4 deletions(-) (limited to 'arch/x86/kernel') diff --git a/arch/x86/kernel/cpu/intel.c b/arch/x86/kernel/cpu/intel.c index 319bf989fad1..f73b8148dd55 100644 --- a/arch/x86/kernel/cpu/intel.c +++ b/arch/x86/kernel/cpu/intel.c @@ -123,8 +123,6 @@ static const struct sku_microcode spectre_bad_microcodes[] = { { INTEL_FAM6_KABYLAKE_MOBILE, 0x09, 0x84 }, { INTEL_FAM6_SKYLAKE_X, 0x03, 0x0100013e }, { INTEL_FAM6_SKYLAKE_X, 0x04, 0x0200003c }, - { INTEL_FAM6_SKYLAKE_MOBILE, 0x03, 0xc2 }, - { INTEL_FAM6_SKYLAKE_DESKTOP, 0x03, 0xc2 }, { INTEL_FAM6_BROADWELL_CORE, 0x04, 0x28 }, { INTEL_FAM6_BROADWELL_GT3E, 0x01, 0x1b }, { INTEL_FAM6_BROADWELL_XEON_D, 0x02, 0x14 }, @@ -136,8 +134,6 @@ static const struct sku_microcode spectre_bad_microcodes[] = { { INTEL_FAM6_HASWELL_X, 0x02, 0x3b }, { INTEL_FAM6_HASWELL_X, 0x04, 0x10 }, { INTEL_FAM6_IVYBRIDGE_X, 0x04, 0x42a }, - /* Updated in the 20180108 release; blacklist until we know otherwise */ - { INTEL_FAM6_ATOM_GEMINI_LAKE, 0x01, 0x22 }, /* Observed in the wild */ { INTEL_FAM6_SANDYBRIDGE_X, 0x06, 0x61b }, { INTEL_FAM6_SANDYBRIDGE_X, 0x07, 0x712 }, -- cgit v1.2.3-70-g09d2 From c80c5ec1b2fa8d3675fc2a6807a64771ea156698 Mon Sep 17 00:00:00 2001 From: Borislav Petkov <bp@suse.de> Date: Sat, 10 Feb 2018 15:53:14 +0100 Subject: x86/MCE: Fix build warning introduced by "x86: do not use print_symbol()" The following commit: 7b6061627eb8 ("x86: do not use print_symbol()") ... introduced a new build warning on 32-bit x86: arch/x86/kernel/cpu/mcheck/mce.c:237:21: warning: cast to pointer from integer of different size [-Wint-to-pointer-cast] pr_cont("{%pS}", (void *)m->ip); ^ Fix the type mismatch between the 'void *' expected by %pS and the mce->ip field which is u64 by casting to long. Signed-off-by: Borislav Petkov <bp@suse.de> Cc: Linus Torvalds <torvalds@linux-foundation.org> Cc: Peter Zijlstra <peterz@infradead.org> Cc: Sergey Senozhatsky <sergey.senozhatsky.work@gmail.com> Cc: Thomas Gleixner <tglx@linutronix.de> Cc: Tony Luck <tony.luck@intel.com> Cc: linux-kernel@vger.kernel.org Fixes: 7b6061627eb8 ("x86: do not use print_symbol()") Link: http://lkml.kernel.org/r/20180210145314.22174-1-bp@alien8.de [ Cleaned up the changelog. ] Signed-off-by: Ingo Molnar <mingo@kernel.org> --- arch/x86/kernel/cpu/mcheck/mce.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'arch/x86/kernel') diff --git a/arch/x86/kernel/cpu/mcheck/mce.c b/arch/x86/kernel/cpu/mcheck/mce.c index 3a8e88a611eb..75f405ac085c 100644 --- a/arch/x86/kernel/cpu/mcheck/mce.c +++ b/arch/x86/kernel/cpu/mcheck/mce.c @@ -234,7 +234,7 @@ static void __print_mce(struct mce *m) m->cs, m->ip); if (m->cs == __KERNEL_CS) - pr_cont("{%pS}", (void *)m->ip); + pr_cont("{%pS}", (void *)(unsigned long)m->ip); pr_cont("\n"); } -- cgit v1.2.3-70-g09d2 From d37fc6d360a404b208547ba112e7dabb6533c7fc Mon Sep 17 00:00:00 2001 From: David Woodhouse <dwmw@amazon.co.uk> Date: Mon, 12 Feb 2018 15:27:34 +0000 Subject: x86/speculation: Correct Speculation Control microcode blacklist again Arjan points out that the Intel document only clears the 0xc2 microcode on *some* parts with CPUID 506E3 (INTEL_FAM6_SKYLAKE_DESKTOP stepping 3). For the Skylake H/S platform it's OK but for Skylake E3 which has the same CPUID it isn't (yet) cleared. So removing it from the blacklist was premature. Put it back for now. Also, Arjan assures me that the 0x84 microcode for Kaby Lake which was featured in one of the early revisions of the Intel document was never released to the public, and won't be until/unless it is also validated as safe. So those can change to 0x80 which is what all *other* versions of the doc have identified. Once the retrospective testing of existing public microcodes is done, we should be back into a mode where new microcodes are only released in batches and we shouldn't even need to update the blacklist for those anyway, so this tweaking of the list isn't expected to be a thing which keeps happening. Requested-by: Arjan van de Ven <arjan.van.de.ven@intel.com> Signed-off-by: David Woodhouse <dwmw@amazon.co.uk> Cc: Andy Lutomirski <luto@kernel.org> Cc: Arjan van de Ven <arjan@linux.intel.com> Cc: Borislav Petkov <bp@alien8.de> Cc: Dan Williams <dan.j.williams@intel.com> Cc: Dave Hansen <dave.hansen@linux.intel.com> Cc: David Woodhouse <dwmw2@infradead.org> Cc: Greg Kroah-Hartman <gregkh@linuxfoundation.org> Cc: Josh Poimboeuf <jpoimboe@redhat.com> Cc: Linus Torvalds <torvalds@linux-foundation.org> Cc: Peter Zijlstra <peterz@infradead.org> Cc: Thomas Gleixner <tglx@linutronix.de> Cc: arjan.van.de.ven@intel.com Cc: dave.hansen@intel.com Cc: kvm@vger.kernel.org Cc: pbonzini@redhat.com Link: http://lkml.kernel.org/r/1518449255-2182-1-git-send-email-dwmw@amazon.co.uk Signed-off-by: Ingo Molnar <mingo@kernel.org> --- arch/x86/kernel/cpu/intel.c | 11 ++++++----- 1 file changed, 6 insertions(+), 5 deletions(-) (limited to 'arch/x86/kernel') diff --git a/arch/x86/kernel/cpu/intel.c b/arch/x86/kernel/cpu/intel.c index f73b8148dd55..ef796f14f7ae 100644 --- a/arch/x86/kernel/cpu/intel.c +++ b/arch/x86/kernel/cpu/intel.c @@ -116,13 +116,14 @@ struct sku_microcode { u32 microcode; }; static const struct sku_microcode spectre_bad_microcodes[] = { - { INTEL_FAM6_KABYLAKE_DESKTOP, 0x0B, 0x84 }, - { INTEL_FAM6_KABYLAKE_DESKTOP, 0x0A, 0x84 }, - { INTEL_FAM6_KABYLAKE_DESKTOP, 0x09, 0x84 }, - { INTEL_FAM6_KABYLAKE_MOBILE, 0x0A, 0x84 }, - { INTEL_FAM6_KABYLAKE_MOBILE, 0x09, 0x84 }, + { INTEL_FAM6_KABYLAKE_DESKTOP, 0x0B, 0x80 }, + { INTEL_FAM6_KABYLAKE_DESKTOP, 0x0A, 0x80 }, + { INTEL_FAM6_KABYLAKE_DESKTOP, 0x09, 0x80 }, + { INTEL_FAM6_KABYLAKE_MOBILE, 0x0A, 0x80 }, + { INTEL_FAM6_KABYLAKE_MOBILE, 0x09, 0x80 }, { INTEL_FAM6_SKYLAKE_X, 0x03, 0x0100013e }, { INTEL_FAM6_SKYLAKE_X, 0x04, 0x0200003c }, + { INTEL_FAM6_SKYLAKE_DESKTOP, 0x03, 0xc2 }, { INTEL_FAM6_BROADWELL_CORE, 0x04, 0x28 }, { INTEL_FAM6_BROADWELL_GT3E, 0x01, 0x1b }, { INTEL_FAM6_BROADWELL_XEON_D, 0x02, 0x14 }, -- cgit v1.2.3-70-g09d2 From f208820a321f9b23d77d7eed89945d862d62a3ed Mon Sep 17 00:00:00 2001 From: David Woodhouse <dwmw@amazon.co.uk> Date: Sat, 10 Feb 2018 23:39:23 +0000 Subject: Revert "x86/speculation: Simplify indirect_branch_prediction_barrier()" This reverts commit 64e16720ea0879f8ab4547e3b9758936d483909b. We cannot call C functions like that, without marking all the call-clobbered registers as, well, clobbered. We might have got away with it for now because the __ibp_barrier() function was *fairly* unlikely to actually use any other registers. But no. Just no. Signed-off-by: David Woodhouse <dwmw@amazon.co.uk> Cc: Andy Lutomirski <luto@kernel.org> Cc: Arjan van de Ven <arjan@linux.intel.com> Cc: Borislav Petkov <bp@alien8.de> Cc: Dan Williams <dan.j.williams@intel.com> Cc: Dave Hansen <dave.hansen@linux.intel.com> Cc: David Woodhouse <dwmw2@infradead.org> Cc: Greg Kroah-Hartman <gregkh@linuxfoundation.org> Cc: Josh Poimboeuf <jpoimboe@redhat.com> Cc: Linus Torvalds <torvalds@linux-foundation.org> Cc: Peter Zijlstra <peterz@infradead.org> Cc: Thomas Gleixner <tglx@linutronix.de> Cc: arjan.van.de.ven@intel.com Cc: dave.hansen@intel.com Cc: jmattson@google.com Cc: karahmed@amazon.de Cc: kvm@vger.kernel.org Cc: pbonzini@redhat.com Cc: rkrcmar@redhat.com Cc: sironi@amazon.de Link: http://lkml.kernel.org/r/1518305967-31356-3-git-send-email-dwmw@amazon.co.uk Signed-off-by: Ingo Molnar <mingo@kernel.org> --- arch/x86/include/asm/nospec-branch.h | 13 +++++++++---- arch/x86/include/asm/processor.h | 3 --- arch/x86/kernel/cpu/bugs.c | 6 ------ 3 files changed, 9 insertions(+), 13 deletions(-) (limited to 'arch/x86/kernel') diff --git a/arch/x86/include/asm/nospec-branch.h b/arch/x86/include/asm/nospec-branch.h index 4d57894635f2..300cc159b4a0 100644 --- a/arch/x86/include/asm/nospec-branch.h +++ b/arch/x86/include/asm/nospec-branch.h @@ -164,10 +164,15 @@ static inline void vmexit_fill_RSB(void) static inline void indirect_branch_prediction_barrier(void) { - alternative_input("", - "call __ibp_barrier", - X86_FEATURE_USE_IBPB, - ASM_NO_INPUT_CLOBBER("eax", "ecx", "edx", "memory")); + asm volatile(ALTERNATIVE("", + "movl %[msr], %%ecx\n\t" + "movl %[val], %%eax\n\t" + "movl $0, %%edx\n\t" + "wrmsr", + X86_FEATURE_USE_IBPB) + : : [msr] "i" (MSR_IA32_PRED_CMD), + [val] "i" (PRED_CMD_IBPB) + : "eax", "ecx", "edx", "memory"); } #endif /* __ASSEMBLY__ */ diff --git a/arch/x86/include/asm/processor.h b/arch/x86/include/asm/processor.h index 513f9604c192..99799fbd0f7e 100644 --- a/arch/x86/include/asm/processor.h +++ b/arch/x86/include/asm/processor.h @@ -969,7 +969,4 @@ bool xen_set_default_idle(void); void stop_this_cpu(void *dummy); void df_debug(struct pt_regs *regs, long error_code); - -void __ibp_barrier(void); - #endif /* _ASM_X86_PROCESSOR_H */ diff --git a/arch/x86/kernel/cpu/bugs.c b/arch/x86/kernel/cpu/bugs.c index 71949bf2de5a..61152aa53377 100644 --- a/arch/x86/kernel/cpu/bugs.c +++ b/arch/x86/kernel/cpu/bugs.c @@ -337,9 +337,3 @@ ssize_t cpu_show_spectre_v2(struct device *dev, spectre_v2_module_string()); } #endif - -void __ibp_barrier(void) -{ - __wrmsr(MSR_IA32_PRED_CMD, PRED_CMD_IBPB, 0); -} -EXPORT_SYMBOL_GPL(__ibp_barrier); -- cgit v1.2.3-70-g09d2 From 21e433bdb95bdf3aa48226fd3d33af608437f293 Mon Sep 17 00:00:00 2001 From: Ingo Molnar <mingo@kernel.org> Date: Tue, 13 Feb 2018 09:03:08 +0100 Subject: x86/speculation: Clean up various Spectre related details Harmonize all the Spectre messages so that a: dmesg | grep -i spectre ... gives us most Spectre related kernel boot messages. Also fix a few other details: - clarify a comment about firmware speculation control - s/KPTI/PTI - remove various line-breaks that made the code uglier Acked-by: David Woodhouse <dwmw@amazon.co.uk> Cc: Andy Lutomirski <luto@kernel.org> Cc: Arjan van de Ven <arjan@linux.intel.com> Cc: Borislav Petkov <bp@alien8.de> Cc: Dan Williams <dan.j.williams@intel.com> Cc: Dave Hansen <dave.hansen@linux.intel.com> Cc: David Woodhouse <dwmw2@infradead.org> Cc: Greg Kroah-Hartman <gregkh@linuxfoundation.org> Cc: Josh Poimboeuf <jpoimboe@redhat.com> Cc: Linus Torvalds <torvalds@linux-foundation.org> Cc: Peter Zijlstra <peterz@infradead.org> Cc: Thomas Gleixner <tglx@linutronix.de> Cc: linux-kernel@vger.kernel.org Signed-off-by: Ingo Molnar <mingo@kernel.org> --- arch/x86/kernel/cpu/bugs.c | 28 +++++++++++----------------- 1 file changed, 11 insertions(+), 17 deletions(-) (limited to 'arch/x86/kernel') diff --git a/arch/x86/kernel/cpu/bugs.c b/arch/x86/kernel/cpu/bugs.c index 61152aa53377..4acf16a76d1e 100644 --- a/arch/x86/kernel/cpu/bugs.c +++ b/arch/x86/kernel/cpu/bugs.c @@ -162,8 +162,7 @@ static enum spectre_v2_mitigation_cmd __init spectre_v2_parse_cmdline(void) if (cmdline_find_option_bool(boot_command_line, "nospectre_v2")) return SPECTRE_V2_CMD_NONE; else { - ret = cmdline_find_option(boot_command_line, "spectre_v2", arg, - sizeof(arg)); + ret = cmdline_find_option(boot_command_line, "spectre_v2", arg, sizeof(arg)); if (ret < 0) return SPECTRE_V2_CMD_AUTO; @@ -175,8 +174,7 @@ static enum spectre_v2_mitigation_cmd __init spectre_v2_parse_cmdline(void) } if (i >= ARRAY_SIZE(mitigation_options)) { - pr_err("unknown option (%s). Switching to AUTO select\n", - mitigation_options[i].option); + pr_err("unknown option (%s). Switching to AUTO select\n", mitigation_options[i].option); return SPECTRE_V2_CMD_AUTO; } } @@ -185,8 +183,7 @@ static enum spectre_v2_mitigation_cmd __init spectre_v2_parse_cmdline(void) cmd == SPECTRE_V2_CMD_RETPOLINE_AMD || cmd == SPECTRE_V2_CMD_RETPOLINE_GENERIC) && !IS_ENABLED(CONFIG_RETPOLINE)) { - pr_err("%s selected but not compiled in. Switching to AUTO select\n", - mitigation_options[i].option); + pr_err("%s selected but not compiled in. Switching to AUTO select\n", mitigation_options[i].option); return SPECTRE_V2_CMD_AUTO; } @@ -256,14 +253,14 @@ static void __init spectre_v2_select_mitigation(void) goto retpoline_auto; break; } - pr_err("kernel not compiled with retpoline; no mitigation available!"); + pr_err("Spectre mitigation: kernel not compiled with retpoline; no mitigation available!"); return; retpoline_auto: if (boot_cpu_data.x86_vendor == X86_VENDOR_AMD) { retpoline_amd: if (!boot_cpu_has(X86_FEATURE_LFENCE_RDTSC)) { - pr_err("LFENCE not serializing. Switching to generic retpoline\n"); + pr_err("Spectre mitigation: LFENCE not serializing, switching to generic retpoline\n"); goto retpoline_generic; } mode = retp_compiler() ? SPECTRE_V2_RETPOLINE_AMD : @@ -281,7 +278,7 @@ retpoline_auto: pr_info("%s\n", spectre_v2_strings[mode]); /* - * If neither SMEP or KPTI are available, there is a risk of + * If neither SMEP nor PTI are available, there is a risk of * hitting userspace addresses in the RSB after a context switch * from a shallow call stack to a deeper one. To prevent this fill * the entire RSB, even when using IBRS. @@ -295,21 +292,20 @@ retpoline_auto: if ((!boot_cpu_has(X86_FEATURE_PTI) && !boot_cpu_has(X86_FEATURE_SMEP)) || is_skylake_era()) { setup_force_cpu_cap(X86_FEATURE_RSB_CTXSW); - pr_info("Filling RSB on context switch\n"); + pr_info("Spectre v2 mitigation: Filling RSB on context switch\n"); } /* Initialize Indirect Branch Prediction Barrier if supported */ if (boot_cpu_has(X86_FEATURE_IBPB)) { setup_force_cpu_cap(X86_FEATURE_USE_IBPB); - pr_info("Enabling Indirect Branch Prediction Barrier\n"); + pr_info("Spectre v2 mitigation: Enabling Indirect Branch Prediction Barrier\n"); } } #undef pr_fmt #ifdef CONFIG_SYSFS -ssize_t cpu_show_meltdown(struct device *dev, - struct device_attribute *attr, char *buf) +ssize_t cpu_show_meltdown(struct device *dev, struct device_attribute *attr, char *buf) { if (!boot_cpu_has_bug(X86_BUG_CPU_MELTDOWN)) return sprintf(buf, "Not affected\n"); @@ -318,16 +314,14 @@ ssize_t cpu_show_meltdown(struct device *dev, return sprintf(buf, "Vulnerable\n"); } -ssize_t cpu_show_spectre_v1(struct device *dev, - struct device_attribute *attr, char *buf) +ssize_t cpu_show_spectre_v1(struct device *dev, struct device_attribute *attr, char *buf) { if (!boot_cpu_has_bug(X86_BUG_SPECTRE_V1)) return sprintf(buf, "Not affected\n"); return sprintf(buf, "Mitigation: __user pointer sanitization\n"); } -ssize_t cpu_show_spectre_v2(struct device *dev, - struct device_attribute *attr, char *buf) +ssize_t cpu_show_spectre_v2(struct device *dev, struct device_attribute *attr, char *buf) { if (!boot_cpu_has_bug(X86_BUG_SPECTRE_V2)) return sprintf(buf, "Not affected\n"); -- cgit v1.2.3-70-g09d2 From 295cc7eb314eb3321fb6d67ca6f7305f5c50d10f Mon Sep 17 00:00:00 2001 From: Masayoshi Mizuma <m.mizuma@jp.fujitsu.com> Date: Thu, 8 Feb 2018 09:19:08 -0500 Subject: x86/smpboot: Fix uncore_pci_remove() indexing bug when hot-removing a physical CPU When a physical CPU is hot-removed, the following warning messages are shown while the uncore device is removed in uncore_pci_remove(): WARNING: CPU: 120 PID: 5 at arch/x86/events/intel/uncore.c:988 uncore_pci_remove+0xf1/0x110 ... CPU: 120 PID: 5 Comm: kworker/u1024:0 Not tainted 4.15.0-rc8 #1 Workqueue: kacpi_hotplug acpi_hotplug_work_fn ... Call Trace: pci_device_remove+0x36/0xb0 device_release_driver_internal+0x145/0x210 pci_stop_bus_device+0x76/0xa0 pci_stop_root_bus+0x44/0x60 acpi_pci_root_remove+0x1f/0x80 acpi_bus_trim+0x54/0x90 acpi_bus_trim+0x2e/0x90 acpi_device_hotplug+0x2bc/0x4b0 acpi_hotplug_work_fn+0x1a/0x30 process_one_work+0x141/0x340 worker_thread+0x47/0x3e0 kthread+0xf5/0x130 When uncore_pci_remove() runs, it tries to get the package ID to clear the value of uncore_extra_pci_dev[].dev[] by using topology_phys_to_logical_pkg(). The warning messesages are shown because topology_phys_to_logical_pkg() returns -1. arch/x86/events/intel/uncore.c: static void uncore_pci_remove(struct pci_dev *pdev) { ... phys_id = uncore_pcibus_to_physid(pdev->bus); ... pkg = topology_phys_to_logical_pkg(phys_id); // returns -1 for (i = 0; i < UNCORE_EXTRA_PCI_DEV_MAX; i++) { if (uncore_extra_pci_dev[pkg].dev[i] == pdev) { uncore_extra_pci_dev[pkg].dev[i] = NULL; break; } } WARN_ON_ONCE(i >= UNCORE_EXTRA_PCI_DEV_MAX); // <=========== HERE!! topology_phys_to_logical_pkg() tries to find cpuinfo_x86->phys_proc_id that matches the phys_pkg argument. arch/x86/kernel/smpboot.c: int topology_phys_to_logical_pkg(unsigned int phys_pkg) { int cpu; for_each_possible_cpu(cpu) { struct cpuinfo_x86 *c = &cpu_data(cpu); if (c->initialized && c->phys_proc_id == phys_pkg) return c->logical_proc_id; } return -1; } However, the phys_proc_id was already set to 0 by remove_siblinginfo() when the CPU was offlined. So, topology_phys_to_logical_pkg() cannot find the correct logical_proc_id and always returns -1. As the result, uncore_pci_remove() calls WARN_ON_ONCE() and the warning messages are shown. What is worse is that the bogus 'pkg' index results in two bugs: - We dereference uncore_extra_pci_dev[] with a negative index - We fail to clean up a stale pointer in uncore_extra_pci_dev[][] To fix these bugs, remove the clearing of ->phys_proc_id from remove_siblinginfo(). This should not cause any problems, because ->phys_proc_id is not used after it is hot-removed and it is re-set while hot-adding. Signed-off-by: Masayoshi Mizuma <m.mizuma@jp.fujitsu.com> Acked-by: Thomas Gleixner <tglx@linutronix.de> Cc: Linus Torvalds <torvalds@linux-foundation.org> Cc: Peter Zijlstra <peterz@infradead.org> Cc: yasu.isimatu@gmail.com Cc: <stable@vger.kernel.org> Fixes: 30bb9811856f ("x86/topology: Avoid wasting 128k for package id array") Link: http://lkml.kernel.org/r/ed738d54-0f01-b38b-b794-c31dc118c207@gmail.com Signed-off-by: Ingo Molnar <mingo@kernel.org> --- arch/x86/kernel/smpboot.c | 1 - 1 file changed, 1 deletion(-) (limited to 'arch/x86/kernel') diff --git a/arch/x86/kernel/smpboot.c b/arch/x86/kernel/smpboot.c index 6f27facbaa9b..cfc61e1d45e2 100644 --- a/arch/x86/kernel/smpboot.c +++ b/arch/x86/kernel/smpboot.c @@ -1430,7 +1430,6 @@ static void remove_siblinginfo(int cpu) cpumask_clear(cpu_llc_shared_mask(cpu)); cpumask_clear(topology_sibling_cpumask(cpu)); cpumask_clear(topology_core_cpumask(cpu)); - c->phys_proc_id = 0; c->cpu_core_id = 0; cpumask_clear_cpu(cpu, cpu_sibling_setup_mask); recompute_smt_state(); -- cgit v1.2.3-70-g09d2 From c25d99d20ba69824a1e2cc118e04b877cd427afc Mon Sep 17 00:00:00 2001 From: "mike.travis@hpe.com" <mike.travis@hpe.com> Date: Mon, 5 Feb 2018 16:15:04 -0600 Subject: x86/platform/UV: Fix GAM Range Table entries less than 1GB The latest UV platforms include the new ApachePass NVDIMMs into the UV address space. This has introduced address ranges in the Global Address Map Table that are less than the previous lowest range, which was 2GB. Fix the address calculation so it accommodates address ranges from bytes to exabytes. Signed-off-by: Mike Travis <mike.travis@hpe.com> Reviewed-by: Andrew Banman <andrew.banman@hpe.com> Reviewed-by: Dimitri Sivanich <dimitri.sivanich@hpe.com> Cc: Linus Torvalds <torvalds@linux-foundation.org> Cc: Peter Zijlstra <peterz@infradead.org> Cc: Russ Anderson <russ.anderson@hpe.com> Cc: Thomas Gleixner <tglx@linutronix.de> Link: http://lkml.kernel.org/r/20180205221503.190219903@stormcage.americas.sgi.com Signed-off-by: Ingo Molnar <mingo@kernel.org> --- arch/x86/kernel/apic/x2apic_uv_x.c | 15 ++++++++++++--- 1 file changed, 12 insertions(+), 3 deletions(-) (limited to 'arch/x86/kernel') diff --git a/arch/x86/kernel/apic/x2apic_uv_x.c b/arch/x86/kernel/apic/x2apic_uv_x.c index 46b675aaf20b..f11910b44638 100644 --- a/arch/x86/kernel/apic/x2apic_uv_x.c +++ b/arch/x86/kernel/apic/x2apic_uv_x.c @@ -1176,16 +1176,25 @@ static void __init decode_gam_rng_tbl(unsigned long ptr) uv_gre_table = gre; for (; gre->type != UV_GAM_RANGE_TYPE_UNUSED; gre++) { + unsigned long size = ((unsigned long)(gre->limit - lgre) + << UV_GAM_RANGE_SHFT); + int order = 0; + char suffix[] = " KMGTPE"; + + while (size > 9999 && order < sizeof(suffix)) { + size /= 1024; + order++; + } + if (!index) { pr_info("UV: GAM Range Table...\n"); pr_info("UV: # %20s %14s %5s %4s %5s %3s %2s\n", "Range", "", "Size", "Type", "NASID", "SID", "PN"); } - pr_info("UV: %2d: 0x%014lx-0x%014lx %5luG %3d %04x %02x %02x\n", + pr_info("UV: %2d: 0x%014lx-0x%014lx %5lu%c %3d %04x %02x %02x\n", index++, (unsigned long)lgre << UV_GAM_RANGE_SHFT, (unsigned long)gre->limit << UV_GAM_RANGE_SHFT, - ((unsigned long)(gre->limit - lgre)) >> - (30 - UV_GAM_RANGE_SHFT), /* 64M -> 1G */ + size, suffix[order], gre->type, gre->nasid, gre->sockid, gre->pnode); lgre = gre->limit; -- cgit v1.2.3-70-g09d2 From fd0e786d9d09024f67bd71ec094b110237dc3840 Mon Sep 17 00:00:00 2001 From: Tony Luck <tony.luck@intel.com> Date: Thu, 25 Jan 2018 14:23:48 -0800 Subject: x86/mm, mm/hwpoison: Don't unconditionally unmap kernel 1:1 pages In the following commit: ce0fa3e56ad2 ("x86/mm, mm/hwpoison: Clear PRESENT bit for kernel 1:1 mappings of poison pages") ... we added code to memory_failure() to unmap the page from the kernel 1:1 virtual address space to avoid speculative access to the page logging additional errors. But memory_failure() may not always succeed in taking the page offline, especially if the page belongs to the kernel. This can happen if there are too many corrected errors on a page and either mcelog(8) or drivers/ras/cec.c asks to take a page offline. Since we remove the 1:1 mapping early in memory_failure(), we can end up with the page unmapped, but still in use. On the next access the kernel crashes :-( There are also various debug paths that call memory_failure() to simulate occurrence of an error. Since there is no actual error in memory, we don't need to map out the page for those cases. Revert most of the previous attempt and keep the solution local to arch/x86/kernel/cpu/mcheck/mce.c. Unmap the page only when: 1) there is a real error 2) memory_failure() succeeds. All of this only applies to 64-bit systems. 32-bit kernel doesn't map all of memory into kernel space. It isn't worth adding the code to unmap the piece that is mapped because nobody would run a 32-bit kernel on a machine that has recoverable machine checks. Signed-off-by: Tony Luck <tony.luck@intel.com> Cc: Andrew Morton <akpm@linux-foundation.org> Cc: Andy Lutomirski <luto@kernel.org> Cc: Borislav Petkov <bp@suse.de> Cc: Brian Gerst <brgerst@gmail.com> Cc: Dave <dave.hansen@intel.com> Cc: Denys Vlasenko <dvlasenk@redhat.com> Cc: Josh Poimboeuf <jpoimboe@redhat.com> Cc: Linus Torvalds <torvalds@linux-foundation.org> Cc: Naoya Horiguchi <n-horiguchi@ah.jp.nec.com> Cc: Peter Zijlstra <peterz@infradead.org> Cc: Robert (Persistent Memory) <elliott@hpe.com> Cc: Thomas Gleixner <tglx@linutronix.de> Cc: linux-mm@kvack.org Cc: stable@vger.kernel.org #v4.14 Fixes: ce0fa3e56ad2 ("x86/mm, mm/hwpoison: Clear PRESENT bit for kernel 1:1 mappings of poison pages") Signed-off-by: Ingo Molnar <mingo@kernel.org> --- arch/x86/include/asm/page_64.h | 4 ---- arch/x86/kernel/cpu/mcheck/mce-internal.h | 15 +++++++++++++++ arch/x86/kernel/cpu/mcheck/mce.c | 17 +++++++++++------ include/linux/mm_inline.h | 6 ------ mm/memory-failure.c | 2 -- 5 files changed, 26 insertions(+), 18 deletions(-) (limited to 'arch/x86/kernel') diff --git a/arch/x86/include/asm/page_64.h b/arch/x86/include/asm/page_64.h index 4baa6bceb232..d652a3808065 100644 --- a/arch/x86/include/asm/page_64.h +++ b/arch/x86/include/asm/page_64.h @@ -52,10 +52,6 @@ static inline void clear_page(void *page) void copy_page(void *to, void *from); -#ifdef CONFIG_X86_MCE -#define arch_unmap_kpfn arch_unmap_kpfn -#endif - #endif /* !__ASSEMBLY__ */ #ifdef CONFIG_X86_VSYSCALL_EMULATION diff --git a/arch/x86/kernel/cpu/mcheck/mce-internal.h b/arch/x86/kernel/cpu/mcheck/mce-internal.h index aa0d5df9dc60..e956eb267061 100644 --- a/arch/x86/kernel/cpu/mcheck/mce-internal.h +++ b/arch/x86/kernel/cpu/mcheck/mce-internal.h @@ -115,4 +115,19 @@ static inline void mce_unregister_injector_chain(struct notifier_block *nb) { } extern struct mca_config mca_cfg; +#ifndef CONFIG_X86_64 +/* + * On 32-bit systems it would be difficult to safely unmap a poison page + * from the kernel 1:1 map because there are no non-canonical addresses that + * we can use to refer to the address without risking a speculative access. + * However, this isn't much of an issue because: + * 1) Few unmappable pages are in the 1:1 map. Most are in HIGHMEM which + * are only mapped into the kernel as needed + * 2) Few people would run a 32-bit kernel on a machine that supports + * recoverable errors because they have too much memory to boot 32-bit. + */ +static inline void mce_unmap_kpfn(unsigned long pfn) {} +#define mce_unmap_kpfn mce_unmap_kpfn +#endif + #endif /* __X86_MCE_INTERNAL_H__ */ diff --git a/arch/x86/kernel/cpu/mcheck/mce.c b/arch/x86/kernel/cpu/mcheck/mce.c index 75f405ac085c..8ff94d1e2dce 100644 --- a/arch/x86/kernel/cpu/mcheck/mce.c +++ b/arch/x86/kernel/cpu/mcheck/mce.c @@ -105,6 +105,10 @@ static struct irq_work mce_irq_work; static void (*quirk_no_way_out)(int bank, struct mce *m, struct pt_regs *regs); +#ifndef mce_unmap_kpfn +static void mce_unmap_kpfn(unsigned long pfn); +#endif + /* * CPU/chipset specific EDAC code can register a notifier call here to print * MCE errors in a human-readable form. @@ -590,7 +594,8 @@ static int srao_decode_notifier(struct notifier_block *nb, unsigned long val, if (mce_usable_address(mce) && (mce->severity == MCE_AO_SEVERITY)) { pfn = mce->addr >> PAGE_SHIFT; - memory_failure(pfn, 0); + if (!memory_failure(pfn, 0)) + mce_unmap_kpfn(pfn); } return NOTIFY_OK; @@ -1057,12 +1062,13 @@ static int do_memory_failure(struct mce *m) ret = memory_failure(m->addr >> PAGE_SHIFT, flags); if (ret) pr_err("Memory error not recovered"); + else + mce_unmap_kpfn(m->addr >> PAGE_SHIFT); return ret; } -#if defined(arch_unmap_kpfn) && defined(CONFIG_MEMORY_FAILURE) - -void arch_unmap_kpfn(unsigned long pfn) +#ifndef mce_unmap_kpfn +static void mce_unmap_kpfn(unsigned long pfn) { unsigned long decoy_addr; @@ -1073,7 +1079,7 @@ void arch_unmap_kpfn(unsigned long pfn) * We would like to just call: * set_memory_np((unsigned long)pfn_to_kaddr(pfn), 1); * but doing that would radically increase the odds of a - * speculative access to the posion page because we'd have + * speculative access to the poison page because we'd have * the virtual address of the kernel 1:1 mapping sitting * around in registers. * Instead we get tricky. We create a non-canonical address @@ -1098,7 +1104,6 @@ void arch_unmap_kpfn(unsigned long pfn) if (set_memory_np(decoy_addr, 1)) pr_warn("Could not invalidate pfn=0x%lx from 1:1 map\n", pfn); - } #endif diff --git a/include/linux/mm_inline.h b/include/linux/mm_inline.h index c30b32e3c862..10191c28fc04 100644 --- a/include/linux/mm_inline.h +++ b/include/linux/mm_inline.h @@ -127,10 +127,4 @@ static __always_inline enum lru_list page_lru(struct page *page) #define lru_to_page(head) (list_entry((head)->prev, struct page, lru)) -#ifdef arch_unmap_kpfn -extern void arch_unmap_kpfn(unsigned long pfn); -#else -static __always_inline void arch_unmap_kpfn(unsigned long pfn) { } -#endif - #endif diff --git a/mm/memory-failure.c b/mm/memory-failure.c index 4b80ccee4535..8291b75f42c8 100644 --- a/mm/memory-failure.c +++ b/mm/memory-failure.c @@ -1139,8 +1139,6 @@ int memory_failure(unsigned long pfn, int flags) return 0; } - arch_unmap_kpfn(pfn); - orig_head = hpage = compound_head(p); num_poisoned_pages_inc(); -- cgit v1.2.3-70-g09d2 From 3b3a371cc9bc980429baabe0a8e5f307f3d1f463 Mon Sep 17 00:00:00 2001 From: Peter Zijlstra <peterz@infradead.org> Date: Fri, 9 Feb 2018 13:16:59 +0100 Subject: x86/debug: Use UD2 for WARN() Since the Intel SDM added an ModR/M byte to UD0 and binutils followed that specification, we now cannot disassemble our kernel anymore. This now means Intel and AMD disagree on the encoding of UD0. And instead of playing games with additional bytes that are valid ModR/M and single byte instructions (0xd6 for instance), simply use UD2 for both WARN() and BUG(). Requested-by: Linus Torvalds <torvalds@linux-foundation.org> Signed-off-by: Peter Zijlstra (Intel) <peterz@infradead.org> Acked-by: Linus Torvalds <torvalds@linux-foundation.org> Cc: Andy Lutomirski <luto@kernel.org> Cc: Arjan van de Ven <arjan@linux.intel.com> Cc: Borislav Petkov <bp@alien8.de> Cc: Brian Gerst <brgerst@gmail.com> Cc: Denys Vlasenko <dvlasenk@redhat.com> Cc: H. Peter Anvin <hpa@zytor.com> Cc: Josh Poimboeuf <jpoimboe@redhat.com> Cc: Peter Zijlstra <peterz@infradead.org> Cc: Thomas Gleixner <tglx@linutronix.de> Link: http://lkml.kernel.org/r/20180208194406.GD25181@hirez.programming.kicks-ass.net Signed-off-by: Ingo Molnar <mingo@kernel.org> --- arch/x86/include/asm/bug.h | 15 ++++++--------- arch/x86/kernel/traps.c | 2 +- 2 files changed, 7 insertions(+), 10 deletions(-) (limited to 'arch/x86/kernel') diff --git a/arch/x86/include/asm/bug.h b/arch/x86/include/asm/bug.h index 71e6f4bf9161..6804d6642767 100644 --- a/arch/x86/include/asm/bug.h +++ b/arch/x86/include/asm/bug.h @@ -5,23 +5,20 @@ #include <linux/stringify.h> /* - * Since some emulators terminate on UD2, we cannot use it for WARN. - * Since various instruction decoders disagree on the length of UD1, - * we cannot use it either. So use UD0 for WARN. + * Despite that some emulators terminate on UD2, we use it for WARN(). * - * (binutils knows about "ud1" but {en,de}codes it as 2 bytes, whereas - * our kernel decoder thinks it takes a ModRM byte, which seems consistent - * with various things like the Intel SDM instruction encoding rules) + * Since various instruction decoders/specs disagree on the encoding of + * UD0/UD1. */ -#define ASM_UD0 ".byte 0x0f, 0xff" +#define ASM_UD0 ".byte 0x0f, 0xff" /* + ModRM (for Intel) */ #define ASM_UD1 ".byte 0x0f, 0xb9" /* + ModRM */ #define ASM_UD2 ".byte 0x0f, 0x0b" #define INSN_UD0 0xff0f #define INSN_UD2 0x0b0f -#define LEN_UD0 2 +#define LEN_UD2 2 #ifdef CONFIG_GENERIC_BUG @@ -79,7 +76,7 @@ do { \ #define __WARN_FLAGS(flags) \ do { \ - _BUG_FLAGS(ASM_UD0, BUGFLAG_WARNING|(flags)); \ + _BUG_FLAGS(ASM_UD2, BUGFLAG_WARNING|(flags)); \ annotate_reachable(); \ } while (0) diff --git a/arch/x86/kernel/traps.c b/arch/x86/kernel/traps.c index 446c9ef8cfc3..3d9b2308e7fa 100644 --- a/arch/x86/kernel/traps.c +++ b/arch/x86/kernel/traps.c @@ -181,7 +181,7 @@ int fixup_bug(struct pt_regs *regs, int trapnr) break; case BUG_TRAP_TYPE_WARN: - regs->ip += LEN_UD0; + regs->ip += LEN_UD2; return 1; } -- cgit v1.2.3-70-g09d2 From 1299ef1d8870d2d9f09a5aadf2f8b2c887c2d033 Mon Sep 17 00:00:00 2001 From: Andy Lutomirski <luto@kernel.org> Date: Wed, 31 Jan 2018 08:03:10 -0800 Subject: x86/mm: Rename flush_tlb_single() and flush_tlb_one() to __flush_tlb_one_[user|kernel]() flush_tlb_single() and flush_tlb_one() sound almost identical, but they really mean "flush one user translation" and "flush one kernel translation". Rename them to flush_tlb_one_user() and flush_tlb_one_kernel() to make the semantics more obvious. [ I was looking at some PTI-related code, and the flush-one-address code is unnecessarily hard to understand because the names of the helpers are uninformative. This came up during PTI review, but no one got around to doing it. ] Signed-off-by: Andy Lutomirski <luto@kernel.org> Acked-by: Peter Zijlstra (Intel) <peterz@infradead.org> Cc: Boris Ostrovsky <boris.ostrovsky@oracle.com> Cc: Borislav Petkov <bp@alien8.de> Cc: Brian Gerst <brgerst@gmail.com> Cc: Dave Hansen <dave.hansen@intel.com> Cc: Eduardo Valentin <eduval@amazon.com> Cc: Hugh Dickins <hughd@google.com> Cc: Josh Poimboeuf <jpoimboe@redhat.com> Cc: Juergen Gross <jgross@suse.com> Cc: Kees Cook <keescook@google.com> Cc: Linus Torvalds <torvalds@linux-foundation.org> Cc: Linux-MM <linux-mm@kvack.org> Cc: Rik van Riel <riel@redhat.com> Cc: Thomas Gleixner <tglx@linutronix.de> Cc: Will Deacon <will.deacon@arm.com> Link: http://lkml.kernel.org/r/3303b02e3c3d049dc5235d5651e0ae6d29a34354.1517414378.git.luto@kernel.org Signed-off-by: Ingo Molnar <mingo@kernel.org> --- arch/x86/include/asm/paravirt.h | 4 ++-- arch/x86/include/asm/paravirt_types.h | 2 +- arch/x86/include/asm/pgtable_32.h | 2 +- arch/x86/include/asm/tlbflush.h | 27 ++++++++++++++++++++------- arch/x86/kernel/paravirt.c | 6 +++--- arch/x86/mm/init_64.c | 2 +- arch/x86/mm/ioremap.c | 2 +- arch/x86/mm/kmmio.c | 2 +- arch/x86/mm/pgtable_32.c | 2 +- arch/x86/mm/tlb.c | 6 +++--- arch/x86/platform/uv/tlb_uv.c | 2 +- arch/x86/xen/mmu_pv.c | 6 +++--- include/trace/events/xen.h | 2 +- 13 files changed, 39 insertions(+), 26 deletions(-) (limited to 'arch/x86/kernel') diff --git a/arch/x86/include/asm/paravirt.h b/arch/x86/include/asm/paravirt.h index 892df375b615..554841fab717 100644 --- a/arch/x86/include/asm/paravirt.h +++ b/arch/x86/include/asm/paravirt.h @@ -297,9 +297,9 @@ static inline void __flush_tlb_global(void) { PVOP_VCALL0(pv_mmu_ops.flush_tlb_kernel); } -static inline void __flush_tlb_single(unsigned long addr) +static inline void __flush_tlb_one_user(unsigned long addr) { - PVOP_VCALL1(pv_mmu_ops.flush_tlb_single, addr); + PVOP_VCALL1(pv_mmu_ops.flush_tlb_one_user, addr); } static inline void flush_tlb_others(const struct cpumask *cpumask, diff --git a/arch/x86/include/asm/paravirt_types.h b/arch/x86/include/asm/paravirt_types.h index 6ec54d01972d..f624f1f10316 100644 --- a/arch/x86/include/asm/paravirt_types.h +++ b/arch/x86/include/asm/paravirt_types.h @@ -217,7 +217,7 @@ struct pv_mmu_ops { /* TLB operations */ void (*flush_tlb_user)(void); void (*flush_tlb_kernel)(void); - void (*flush_tlb_single)(unsigned long addr); + void (*flush_tlb_one_user)(unsigned long addr); void (*flush_tlb_others)(const struct cpumask *cpus, const struct flush_tlb_info *info); diff --git a/arch/x86/include/asm/pgtable_32.h b/arch/x86/include/asm/pgtable_32.h index e67c0620aec2..e55466760ff8 100644 --- a/arch/x86/include/asm/pgtable_32.h +++ b/arch/x86/include/asm/pgtable_32.h @@ -61,7 +61,7 @@ void paging_init(void); #define kpte_clear_flush(ptep, vaddr) \ do { \ pte_clear(&init_mm, (vaddr), (ptep)); \ - __flush_tlb_one((vaddr)); \ + __flush_tlb_one_kernel((vaddr)); \ } while (0) #endif /* !__ASSEMBLY__ */ diff --git a/arch/x86/include/asm/tlbflush.h b/arch/x86/include/asm/tlbflush.h index 2b8f18ca5874..84137c22fdfa 100644 --- a/arch/x86/include/asm/tlbflush.h +++ b/arch/x86/include/asm/tlbflush.h @@ -140,7 +140,7 @@ static inline unsigned long build_cr3_noflush(pgd_t *pgd, u16 asid) #else #define __flush_tlb() __native_flush_tlb() #define __flush_tlb_global() __native_flush_tlb_global() -#define __flush_tlb_single(addr) __native_flush_tlb_single(addr) +#define __flush_tlb_one_user(addr) __native_flush_tlb_one_user(addr) #endif static inline bool tlb_defer_switch_to_init_mm(void) @@ -400,7 +400,7 @@ static inline void __native_flush_tlb_global(void) /* * flush one page in the user mapping */ -static inline void __native_flush_tlb_single(unsigned long addr) +static inline void __native_flush_tlb_one_user(unsigned long addr) { u32 loaded_mm_asid = this_cpu_read(cpu_tlbstate.loaded_mm_asid); @@ -437,18 +437,31 @@ static inline void __flush_tlb_all(void) /* * flush one page in the kernel mapping */ -static inline void __flush_tlb_one(unsigned long addr) +static inline void __flush_tlb_one_kernel(unsigned long addr) { count_vm_tlb_event(NR_TLB_LOCAL_FLUSH_ONE); - __flush_tlb_single(addr); + + /* + * If PTI is off, then __flush_tlb_one_user() is just INVLPG or its + * paravirt equivalent. Even with PCID, this is sufficient: we only + * use PCID if we also use global PTEs for the kernel mapping, and + * INVLPG flushes global translations across all address spaces. + * + * If PTI is on, then the kernel is mapped with non-global PTEs, and + * __flush_tlb_one_user() will flush the given address for the current + * kernel address space and for its usermode counterpart, but it does + * not flush it for other address spaces. + */ + __flush_tlb_one_user(addr); if (!static_cpu_has(X86_FEATURE_PTI)) return; /* - * __flush_tlb_single() will have cleared the TLB entry for this ASID, - * but since kernel space is replicated across all, we must also - * invalidate all others. + * See above. We need to propagate the flush to all other address + * spaces. In principle, we only need to propagate it to kernelmode + * address spaces, but the extra bookkeeping we would need is not + * worth it. */ invalidate_other_asid(); } diff --git a/arch/x86/kernel/paravirt.c b/arch/x86/kernel/paravirt.c index 041096bdef86..99dc79e76bdc 100644 --- a/arch/x86/kernel/paravirt.c +++ b/arch/x86/kernel/paravirt.c @@ -200,9 +200,9 @@ static void native_flush_tlb_global(void) __native_flush_tlb_global(); } -static void native_flush_tlb_single(unsigned long addr) +static void native_flush_tlb_one_user(unsigned long addr) { - __native_flush_tlb_single(addr); + __native_flush_tlb_one_user(addr); } struct static_key paravirt_steal_enabled; @@ -401,7 +401,7 @@ struct pv_mmu_ops pv_mmu_ops __ro_after_init = { .flush_tlb_user = native_flush_tlb, .flush_tlb_kernel = native_flush_tlb_global, - .flush_tlb_single = native_flush_tlb_single, + .flush_tlb_one_user = native_flush_tlb_one_user, .flush_tlb_others = native_flush_tlb_others, .pgd_alloc = __paravirt_pgd_alloc, diff --git a/arch/x86/mm/init_64.c b/arch/x86/mm/init_64.c index 4a837289f2ad..60ae1fe3609f 100644 --- a/arch/x86/mm/init_64.c +++ b/arch/x86/mm/init_64.c @@ -256,7 +256,7 @@ static void __set_pte_vaddr(pud_t *pud, unsigned long vaddr, pte_t new_pte) * It's enough to flush this one mapping. * (PGE mappings get flushed as well) */ - __flush_tlb_one(vaddr); + __flush_tlb_one_kernel(vaddr); } void set_pte_vaddr_p4d(p4d_t *p4d_page, unsigned long vaddr, pte_t new_pte) diff --git a/arch/x86/mm/ioremap.c b/arch/x86/mm/ioremap.c index c45b6ec5357b..e2db83bebc3b 100644 --- a/arch/x86/mm/ioremap.c +++ b/arch/x86/mm/ioremap.c @@ -820,5 +820,5 @@ void __init __early_set_fixmap(enum fixed_addresses idx, set_pte(pte, pfn_pte(phys >> PAGE_SHIFT, flags)); else pte_clear(&init_mm, addr, pte); - __flush_tlb_one(addr); + __flush_tlb_one_kernel(addr); } diff --git a/arch/x86/mm/kmmio.c b/arch/x86/mm/kmmio.c index 58477ec3d66d..7c8686709636 100644 --- a/arch/x86/mm/kmmio.c +++ b/arch/x86/mm/kmmio.c @@ -168,7 +168,7 @@ static int clear_page_presence(struct kmmio_fault_page *f, bool clear) return -1; } - __flush_tlb_one(f->addr); + __flush_tlb_one_kernel(f->addr); return 0; } diff --git a/arch/x86/mm/pgtable_32.c b/arch/x86/mm/pgtable_32.c index c3c5274410a9..9bb7f0ab9fe6 100644 --- a/arch/x86/mm/pgtable_32.c +++ b/arch/x86/mm/pgtable_32.c @@ -63,7 +63,7 @@ void set_pte_vaddr(unsigned long vaddr, pte_t pteval) * It's enough to flush this one mapping. * (PGE mappings get flushed as well) */ - __flush_tlb_one(vaddr); + __flush_tlb_one_kernel(vaddr); } unsigned long __FIXADDR_TOP = 0xfffff000; diff --git a/arch/x86/mm/tlb.c b/arch/x86/mm/tlb.c index 012d02624848..0c936435ea93 100644 --- a/arch/x86/mm/tlb.c +++ b/arch/x86/mm/tlb.c @@ -492,7 +492,7 @@ static void flush_tlb_func_common(const struct flush_tlb_info *f, * flush that changes context.tlb_gen from 2 to 3. If they get * processed on this CPU in reverse order, we'll see * local_tlb_gen == 1, mm_tlb_gen == 3, and end != TLB_FLUSH_ALL. - * If we were to use __flush_tlb_single() and set local_tlb_gen to + * If we were to use __flush_tlb_one_user() and set local_tlb_gen to * 3, we'd be break the invariant: we'd update local_tlb_gen above * 1 without the full flush that's needed for tlb_gen 2. * @@ -513,7 +513,7 @@ static void flush_tlb_func_common(const struct flush_tlb_info *f, addr = f->start; while (addr < f->end) { - __flush_tlb_single(addr); + __flush_tlb_one_user(addr); addr += PAGE_SIZE; } if (local) @@ -660,7 +660,7 @@ static void do_kernel_range_flush(void *info) /* flush range by one by one 'invlpg' */ for (addr = f->start; addr < f->end; addr += PAGE_SIZE) - __flush_tlb_one(addr); + __flush_tlb_one_kernel(addr); } void flush_tlb_kernel_range(unsigned long start, unsigned long end) diff --git a/arch/x86/platform/uv/tlb_uv.c b/arch/x86/platform/uv/tlb_uv.c index 8538a6723171..7d5d53f36a7a 100644 --- a/arch/x86/platform/uv/tlb_uv.c +++ b/arch/x86/platform/uv/tlb_uv.c @@ -299,7 +299,7 @@ static void bau_process_message(struct msg_desc *mdp, struct bau_control *bcp, local_flush_tlb(); stat->d_alltlb++; } else { - __flush_tlb_single(msg->address); + __flush_tlb_one_user(msg->address); stat->d_onetlb++; } stat->d_requestee++; diff --git a/arch/x86/xen/mmu_pv.c b/arch/x86/xen/mmu_pv.c index d85076223a69..aae88fec9941 100644 --- a/arch/x86/xen/mmu_pv.c +++ b/arch/x86/xen/mmu_pv.c @@ -1300,12 +1300,12 @@ static void xen_flush_tlb(void) preempt_enable(); } -static void xen_flush_tlb_single(unsigned long addr) +static void xen_flush_tlb_one_user(unsigned long addr) { struct mmuext_op *op; struct multicall_space mcs; - trace_xen_mmu_flush_tlb_single(addr); + trace_xen_mmu_flush_tlb_one_user(addr); preempt_disable(); @@ -2370,7 +2370,7 @@ static const struct pv_mmu_ops xen_mmu_ops __initconst = { .flush_tlb_user = xen_flush_tlb, .flush_tlb_kernel = xen_flush_tlb, - .flush_tlb_single = xen_flush_tlb_single, + .flush_tlb_one_user = xen_flush_tlb_one_user, .flush_tlb_others = xen_flush_tlb_others, .pgd_alloc = xen_pgd_alloc, diff --git a/include/trace/events/xen.h b/include/trace/events/xen.h index b8adf05c534e..7dd8f34c37df 100644 --- a/include/trace/events/xen.h +++ b/include/trace/events/xen.h @@ -368,7 +368,7 @@ TRACE_EVENT(xen_mmu_flush_tlb, TP_printk("%s", "") ); -TRACE_EVENT(xen_mmu_flush_tlb_single, +TRACE_EVENT(xen_mmu_flush_tlb_one_user, TP_PROTO(unsigned long addr), TP_ARGS(addr), TP_STRUCT__entry( -- cgit v1.2.3-70-g09d2 From b399151cb48db30ad1e0e93dd40d68c6d007b637 Mon Sep 17 00:00:00 2001 From: Jia Zhang <qianyue.zj@alibaba-inc.com> Date: Mon, 1 Jan 2018 09:52:10 +0800 Subject: x86/cpu: Rename cpu_data.x86_mask to cpu_data.x86_stepping x86_mask is a confusing name which is hard to associate with the processor's stepping. Additionally, correct an indent issue in lib/cpu.c. Signed-off-by: Jia Zhang <qianyue.zj@alibaba-inc.com> [ Updated it to more recent kernels. ] Cc: Linus Torvalds <torvalds@linux-foundation.org> Cc: Peter Zijlstra <peterz@infradead.org> Cc: Thomas Gleixner <tglx@linutronix.de> Cc: bp@alien8.de Cc: tony.luck@intel.com Link: http://lkml.kernel.org/r/1514771530-70829-1-git-send-email-qianyue.zj@alibaba-inc.com Signed-off-by: Ingo Molnar <mingo@kernel.org> --- arch/x86/events/intel/core.c | 2 +- arch/x86/events/intel/lbr.c | 2 +- arch/x86/events/intel/p6.c | 2 +- arch/x86/include/asm/acpi.h | 2 +- arch/x86/include/asm/processor.h | 2 +- arch/x86/kernel/amd_nb.c | 2 +- arch/x86/kernel/apic/apic.c | 6 +++--- arch/x86/kernel/asm-offsets_32.c | 2 +- arch/x86/kernel/cpu/amd.c | 28 ++++++++++++++-------------- arch/x86/kernel/cpu/centaur.c | 4 ++-- arch/x86/kernel/cpu/common.c | 8 ++++---- arch/x86/kernel/cpu/cyrix.c | 2 +- arch/x86/kernel/cpu/intel.c | 18 +++++++++--------- arch/x86/kernel/cpu/intel_rdt.c | 2 +- arch/x86/kernel/cpu/microcode/intel.c | 4 ++-- arch/x86/kernel/cpu/mtrr/generic.c | 2 +- arch/x86/kernel/cpu/mtrr/main.c | 4 ++-- arch/x86/kernel/cpu/proc.c | 4 ++-- arch/x86/kernel/head_32.S | 4 ++-- arch/x86/kernel/mpparse.c | 2 +- arch/x86/lib/cpu.c | 2 +- drivers/char/hw_random/via-rng.c | 2 +- drivers/cpufreq/acpi-cpufreq.c | 2 +- drivers/cpufreq/longhaul.c | 6 +++--- drivers/cpufreq/p4-clockmod.c | 2 +- drivers/cpufreq/powernow-k7.c | 2 +- drivers/cpufreq/speedstep-centrino.c | 4 ++-- drivers/cpufreq/speedstep-lib.c | 6 +++--- drivers/crypto/padlock-aes.c | 2 +- drivers/edac/amd64_edac.c | 2 +- drivers/hwmon/coretemp.c | 6 +++--- drivers/hwmon/hwmon-vid.c | 2 +- drivers/hwmon/k10temp.c | 2 +- drivers/hwmon/k8temp.c | 2 +- drivers/video/fbdev/geode/video_gx.c | 2 +- 35 files changed, 73 insertions(+), 73 deletions(-) (limited to 'arch/x86/kernel') diff --git a/arch/x86/events/intel/core.c b/arch/x86/events/intel/core.c index 731153a4681e..56457cb73448 100644 --- a/arch/x86/events/intel/core.c +++ b/arch/x86/events/intel/core.c @@ -3559,7 +3559,7 @@ static int intel_snb_pebs_broken(int cpu) break; case INTEL_FAM6_SANDYBRIDGE_X: - switch (cpu_data(cpu).x86_mask) { + switch (cpu_data(cpu).x86_stepping) { case 6: rev = 0x618; break; case 7: rev = 0x70c; break; } diff --git a/arch/x86/events/intel/lbr.c b/arch/x86/events/intel/lbr.c index ae64d0b69729..cf372b90557e 100644 --- a/arch/x86/events/intel/lbr.c +++ b/arch/x86/events/intel/lbr.c @@ -1186,7 +1186,7 @@ void __init intel_pmu_lbr_init_atom(void) * on PMU interrupt */ if (boot_cpu_data.x86_model == 28 - && boot_cpu_data.x86_mask < 10) { + && boot_cpu_data.x86_stepping < 10) { pr_cont("LBR disabled due to erratum"); return; } diff --git a/arch/x86/events/intel/p6.c b/arch/x86/events/intel/p6.c index a5604c352930..408879b0c0d4 100644 --- a/arch/x86/events/intel/p6.c +++ b/arch/x86/events/intel/p6.c @@ -234,7 +234,7 @@ static __initconst const struct x86_pmu p6_pmu = { static __init void p6_pmu_rdpmc_quirk(void) { - if (boot_cpu_data.x86_mask < 9) { + if (boot_cpu_data.x86_stepping < 9) { /* * PPro erratum 26; fixed in stepping 9 and above. */ diff --git a/arch/x86/include/asm/acpi.h b/arch/x86/include/asm/acpi.h index 8d0ec9df1cbe..f077401869ee 100644 --- a/arch/x86/include/asm/acpi.h +++ b/arch/x86/include/asm/acpi.h @@ -94,7 +94,7 @@ static inline unsigned int acpi_processor_cstate_check(unsigned int max_cstate) if (boot_cpu_data.x86 == 0x0F && boot_cpu_data.x86_vendor == X86_VENDOR_AMD && boot_cpu_data.x86_model <= 0x05 && - boot_cpu_data.x86_mask < 0x0A) + boot_cpu_data.x86_stepping < 0x0A) return 1; else if (boot_cpu_has(X86_BUG_AMD_APIC_C1E)) return 1; diff --git a/arch/x86/include/asm/processor.h b/arch/x86/include/asm/processor.h index 99799fbd0f7e..b7c8583328c7 100644 --- a/arch/x86/include/asm/processor.h +++ b/arch/x86/include/asm/processor.h @@ -91,7 +91,7 @@ struct cpuinfo_x86 { __u8 x86; /* CPU family */ __u8 x86_vendor; /* CPU vendor */ __u8 x86_model; - __u8 x86_mask; + __u8 x86_stepping; #ifdef CONFIG_X86_64 /* Number of 4K pages in DTLB/ITLB combined(in pages): */ int x86_tlbsize; diff --git a/arch/x86/kernel/amd_nb.c b/arch/x86/kernel/amd_nb.c index 6db28f17ff28..c88e0b127810 100644 --- a/arch/x86/kernel/amd_nb.c +++ b/arch/x86/kernel/amd_nb.c @@ -235,7 +235,7 @@ int amd_cache_northbridges(void) if (boot_cpu_data.x86 == 0x10 && boot_cpu_data.x86_model >= 0x8 && (boot_cpu_data.x86_model > 0x9 || - boot_cpu_data.x86_mask >= 0x1)) + boot_cpu_data.x86_stepping >= 0x1)) amd_northbridges.flags |= AMD_NB_L3_INDEX_DISABLE; if (boot_cpu_data.x86 == 0x15) diff --git a/arch/x86/kernel/apic/apic.c b/arch/x86/kernel/apic/apic.c index 25ddf02598d2..b203af0855b5 100644 --- a/arch/x86/kernel/apic/apic.c +++ b/arch/x86/kernel/apic/apic.c @@ -546,7 +546,7 @@ static DEFINE_PER_CPU(struct clock_event_device, lapic_events); static u32 hsx_deadline_rev(void) { - switch (boot_cpu_data.x86_mask) { + switch (boot_cpu_data.x86_stepping) { case 0x02: return 0x3a; /* EP */ case 0x04: return 0x0f; /* EX */ } @@ -556,7 +556,7 @@ static u32 hsx_deadline_rev(void) static u32 bdx_deadline_rev(void) { - switch (boot_cpu_data.x86_mask) { + switch (boot_cpu_data.x86_stepping) { case 0x02: return 0x00000011; case 0x03: return 0x0700000e; case 0x04: return 0x0f00000c; @@ -568,7 +568,7 @@ static u32 bdx_deadline_rev(void) static u32 skx_deadline_rev(void) { - switch (boot_cpu_data.x86_mask) { + switch (boot_cpu_data.x86_stepping) { case 0x03: return 0x01000136; case 0x04: return 0x02000014; } diff --git a/arch/x86/kernel/asm-offsets_32.c b/arch/x86/kernel/asm-offsets_32.c index fa1261eefa16..f91ba53e06c8 100644 --- a/arch/x86/kernel/asm-offsets_32.c +++ b/arch/x86/kernel/asm-offsets_32.c @@ -18,7 +18,7 @@ void foo(void) OFFSET(CPUINFO_x86, cpuinfo_x86, x86); OFFSET(CPUINFO_x86_vendor, cpuinfo_x86, x86_vendor); OFFSET(CPUINFO_x86_model, cpuinfo_x86, x86_model); - OFFSET(CPUINFO_x86_mask, cpuinfo_x86, x86_mask); + OFFSET(CPUINFO_x86_stepping, cpuinfo_x86, x86_stepping); OFFSET(CPUINFO_cpuid_level, cpuinfo_x86, cpuid_level); OFFSET(CPUINFO_x86_capability, cpuinfo_x86, x86_capability); OFFSET(CPUINFO_x86_vendor_id, cpuinfo_x86, x86_vendor_id); diff --git a/arch/x86/kernel/cpu/amd.c b/arch/x86/kernel/cpu/amd.c index ea831c858195..e7d5a7883632 100644 --- a/arch/x86/kernel/cpu/amd.c +++ b/arch/x86/kernel/cpu/amd.c @@ -119,7 +119,7 @@ static void init_amd_k6(struct cpuinfo_x86 *c) return; } - if (c->x86_model == 6 && c->x86_mask == 1) { + if (c->x86_model == 6 && c->x86_stepping == 1) { const int K6_BUG_LOOP = 1000000; int n; void (*f_vide)(void); @@ -149,7 +149,7 @@ static void init_amd_k6(struct cpuinfo_x86 *c) /* K6 with old style WHCR */ if (c->x86_model < 8 || - (c->x86_model == 8 && c->x86_mask < 8)) { + (c->x86_model == 8 && c->x86_stepping < 8)) { /* We can only write allocate on the low 508Mb */ if (mbytes > 508) mbytes = 508; @@ -168,7 +168,7 @@ static void init_amd_k6(struct cpuinfo_x86 *c) return; } - if ((c->x86_model == 8 && c->x86_mask > 7) || + if ((c->x86_model == 8 && c->x86_stepping > 7) || c->x86_model == 9 || c->x86_model == 13) { /* The more serious chips .. */ @@ -221,7 +221,7 @@ static void init_amd_k7(struct cpuinfo_x86 *c) * are more robust with CLK_CTL set to 200xxxxx instead of 600xxxxx * As per AMD technical note 27212 0.2 */ - if ((c->x86_model == 8 && c->x86_mask >= 1) || (c->x86_model > 8)) { + if ((c->x86_model == 8 && c->x86_stepping >= 1) || (c->x86_model > 8)) { rdmsr(MSR_K7_CLK_CTL, l, h); if ((l & 0xfff00000) != 0x20000000) { pr_info("CPU: CLK_CTL MSR was %x. Reprogramming to %x\n", @@ -241,12 +241,12 @@ static void init_amd_k7(struct cpuinfo_x86 *c) * but they are not certified as MP capable. */ /* Athlon 660/661 is valid. */ - if ((c->x86_model == 6) && ((c->x86_mask == 0) || - (c->x86_mask == 1))) + if ((c->x86_model == 6) && ((c->x86_stepping == 0) || + (c->x86_stepping == 1))) return; /* Duron 670 is valid */ - if ((c->x86_model == 7) && (c->x86_mask == 0)) + if ((c->x86_model == 7) && (c->x86_stepping == 0)) return; /* @@ -256,8 +256,8 @@ static void init_amd_k7(struct cpuinfo_x86 *c) * See http://www.heise.de/newsticker/data/jow-18.10.01-000 for * more. */ - if (((c->x86_model == 6) && (c->x86_mask >= 2)) || - ((c->x86_model == 7) && (c->x86_mask >= 1)) || + if (((c->x86_model == 6) && (c->x86_stepping >= 2)) || + ((c->x86_model == 7) && (c->x86_stepping >= 1)) || (c->x86_model > 7)) if (cpu_has(c, X86_FEATURE_MP)) return; @@ -583,7 +583,7 @@ static void early_init_amd(struct cpuinfo_x86 *c) /* Set MTRR capability flag if appropriate */ if (c->x86 == 5) if (c->x86_model == 13 || c->x86_model == 9 || - (c->x86_model == 8 && c->x86_mask >= 8)) + (c->x86_model == 8 && c->x86_stepping >= 8)) set_cpu_cap(c, X86_FEATURE_K6_MTRR); #endif #if defined(CONFIG_X86_LOCAL_APIC) && defined(CONFIG_PCI) @@ -769,7 +769,7 @@ static void init_amd_zn(struct cpuinfo_x86 *c) * Fix erratum 1076: CPB feature bit not being set in CPUID. It affects * all up to and including B1. */ - if (c->x86_model <= 1 && c->x86_mask <= 1) + if (c->x86_model <= 1 && c->x86_stepping <= 1) set_cpu_cap(c, X86_FEATURE_CPB); } @@ -880,11 +880,11 @@ static unsigned int amd_size_cache(struct cpuinfo_x86 *c, unsigned int size) /* AMD errata T13 (order #21922) */ if ((c->x86 == 6)) { /* Duron Rev A0 */ - if (c->x86_model == 3 && c->x86_mask == 0) + if (c->x86_model == 3 && c->x86_stepping == 0) size = 64; /* Tbird rev A1/A2 */ if (c->x86_model == 4 && - (c->x86_mask == 0 || c->x86_mask == 1)) + (c->x86_stepping == 0 || c->x86_stepping == 1)) size = 256; } return size; @@ -1021,7 +1021,7 @@ static bool cpu_has_amd_erratum(struct cpuinfo_x86 *cpu, const int *erratum) } /* OSVW unavailable or ID unknown, match family-model-stepping range */ - ms = (cpu->x86_model << 4) | cpu->x86_mask; + ms = (cpu->x86_model << 4) | cpu->x86_stepping; while ((range = *erratum++)) if ((cpu->x86 == AMD_MODEL_RANGE_FAMILY(range)) && (ms >= AMD_MODEL_RANGE_START(range)) && diff --git a/arch/x86/kernel/cpu/centaur.c b/arch/x86/kernel/cpu/centaur.c index 68bc6d9b3132..595be776727d 100644 --- a/arch/x86/kernel/cpu/centaur.c +++ b/arch/x86/kernel/cpu/centaur.c @@ -136,7 +136,7 @@ static void init_centaur(struct cpuinfo_x86 *c) clear_cpu_cap(c, X86_FEATURE_TSC); break; case 8: - switch (c->x86_mask) { + switch (c->x86_stepping) { default: name = "2"; break; @@ -211,7 +211,7 @@ centaur_size_cache(struct cpuinfo_x86 *c, unsigned int size) * - Note, it seems this may only be in engineering samples. */ if ((c->x86 == 6) && (c->x86_model == 9) && - (c->x86_mask == 1) && (size == 65)) + (c->x86_stepping == 1) && (size == 65)) size -= 1; return size; } diff --git a/arch/x86/kernel/cpu/common.c b/arch/x86/kernel/cpu/common.c index d63f4b5706e4..a7d8df641a4c 100644 --- a/arch/x86/kernel/cpu/common.c +++ b/arch/x86/kernel/cpu/common.c @@ -731,7 +731,7 @@ void cpu_detect(struct cpuinfo_x86 *c) cpuid(0x00000001, &tfms, &misc, &junk, &cap0); c->x86 = x86_family(tfms); c->x86_model = x86_model(tfms); - c->x86_mask = x86_stepping(tfms); + c->x86_stepping = x86_stepping(tfms); if (cap0 & (1<<19)) { c->x86_clflush_size = ((misc >> 8) & 0xff) * 8; @@ -1186,7 +1186,7 @@ static void identify_cpu(struct cpuinfo_x86 *c) c->loops_per_jiffy = loops_per_jiffy; c->x86_cache_size = -1; c->x86_vendor = X86_VENDOR_UNKNOWN; - c->x86_model = c->x86_mask = 0; /* So far unknown... */ + c->x86_model = c->x86_stepping = 0; /* So far unknown... */ c->x86_vendor_id[0] = '\0'; /* Unset */ c->x86_model_id[0] = '\0'; /* Unset */ c->x86_max_cores = 1; @@ -1378,8 +1378,8 @@ void print_cpu_info(struct cpuinfo_x86 *c) pr_cont(" (family: 0x%x, model: 0x%x", c->x86, c->x86_model); - if (c->x86_mask || c->cpuid_level >= 0) - pr_cont(", stepping: 0x%x)\n", c->x86_mask); + if (c->x86_stepping || c->cpuid_level >= 0) + pr_cont(", stepping: 0x%x)\n", c->x86_stepping); else pr_cont(")\n"); } diff --git a/arch/x86/kernel/cpu/cyrix.c b/arch/x86/kernel/cpu/cyrix.c index 6b4bb335641f..8949b7ae6d92 100644 --- a/arch/x86/kernel/cpu/cyrix.c +++ b/arch/x86/kernel/cpu/cyrix.c @@ -215,7 +215,7 @@ static void init_cyrix(struct cpuinfo_x86 *c) /* common case step number/rev -- exceptions handled below */ c->x86_model = (dir1 >> 4) + 1; - c->x86_mask = dir1 & 0xf; + c->x86_stepping = dir1 & 0xf; /* Now cook; the original recipe is by Channing Corn, from Cyrix. * We do the same thing for each generation: we work out diff --git a/arch/x86/kernel/cpu/intel.c b/arch/x86/kernel/cpu/intel.c index ef796f14f7ae..d19e903214b4 100644 --- a/arch/x86/kernel/cpu/intel.c +++ b/arch/x86/kernel/cpu/intel.c @@ -146,7 +146,7 @@ static bool bad_spectre_microcode(struct cpuinfo_x86 *c) for (i = 0; i < ARRAY_SIZE(spectre_bad_microcodes); i++) { if (c->x86_model == spectre_bad_microcodes[i].model && - c->x86_mask == spectre_bad_microcodes[i].stepping) + c->x86_stepping == spectre_bad_microcodes[i].stepping) return (c->microcode <= spectre_bad_microcodes[i].microcode); } return false; @@ -193,7 +193,7 @@ static void early_init_intel(struct cpuinfo_x86 *c) * need the microcode to have already been loaded... so if it is * not, recommend a BIOS update and disable large pages. */ - if (c->x86 == 6 && c->x86_model == 0x1c && c->x86_mask <= 2 && + if (c->x86 == 6 && c->x86_model == 0x1c && c->x86_stepping <= 2 && c->microcode < 0x20e) { pr_warn("Atom PSE erratum detected, BIOS microcode update recommended\n"); clear_cpu_cap(c, X86_FEATURE_PSE); @@ -209,7 +209,7 @@ static void early_init_intel(struct cpuinfo_x86 *c) /* CPUID workaround for 0F33/0F34 CPU */ if (c->x86 == 0xF && c->x86_model == 0x3 - && (c->x86_mask == 0x3 || c->x86_mask == 0x4)) + && (c->x86_stepping == 0x3 || c->x86_stepping == 0x4)) c->x86_phys_bits = 36; /* @@ -307,7 +307,7 @@ int ppro_with_ram_bug(void) if (boot_cpu_data.x86_vendor == X86_VENDOR_INTEL && boot_cpu_data.x86 == 6 && boot_cpu_data.x86_model == 1 && - boot_cpu_data.x86_mask < 8) { + boot_cpu_data.x86_stepping < 8) { pr_info("Pentium Pro with Errata#50 detected. Taking evasive action.\n"); return 1; } @@ -324,7 +324,7 @@ static void intel_smp_check(struct cpuinfo_x86 *c) * Mask B, Pentium, but not Pentium MMX */ if (c->x86 == 5 && - c->x86_mask >= 1 && c->x86_mask <= 4 && + c->x86_stepping >= 1 && c->x86_stepping <= 4 && c->x86_model <= 3) { /* * Remember we have B step Pentia with bugs @@ -367,7 +367,7 @@ static void intel_workarounds(struct cpuinfo_x86 *c) * SEP CPUID bug: Pentium Pro reports SEP but doesn't have it until * model 3 mask 3 */ - if ((c->x86<<8 | c->x86_model<<4 | c->x86_mask) < 0x633) + if ((c->x86<<8 | c->x86_model<<4 | c->x86_stepping) < 0x633) clear_cpu_cap(c, X86_FEATURE_SEP); /* @@ -385,7 +385,7 @@ static void intel_workarounds(struct cpuinfo_x86 *c) * P4 Xeon erratum 037 workaround. * Hardware prefetcher may cause stale data to be loaded into the cache. */ - if ((c->x86 == 15) && (c->x86_model == 1) && (c->x86_mask == 1)) { + if ((c->x86 == 15) && (c->x86_model == 1) && (c->x86_stepping == 1)) { if (msr_set_bit(MSR_IA32_MISC_ENABLE, MSR_IA32_MISC_ENABLE_PREFETCH_DISABLE_BIT) > 0) { pr_info("CPU: C0 stepping P4 Xeon detected.\n"); @@ -400,7 +400,7 @@ static void intel_workarounds(struct cpuinfo_x86 *c) * Specification Update"). */ if (boot_cpu_has(X86_FEATURE_APIC) && (c->x86<<8 | c->x86_model<<4) == 0x520 && - (c->x86_mask < 0x6 || c->x86_mask == 0xb)) + (c->x86_stepping < 0x6 || c->x86_stepping == 0xb)) set_cpu_bug(c, X86_BUG_11AP); @@ -647,7 +647,7 @@ static void init_intel(struct cpuinfo_x86 *c) case 6: if (l2 == 128) p = "Celeron (Mendocino)"; - else if (c->x86_mask == 0 || c->x86_mask == 5) + else if (c->x86_stepping == 0 || c->x86_stepping == 5) p = "Celeron-A"; break; diff --git a/arch/x86/kernel/cpu/intel_rdt.c b/arch/x86/kernel/cpu/intel_rdt.c index 99442370de40..18dd8f22e353 100644 --- a/arch/x86/kernel/cpu/intel_rdt.c +++ b/arch/x86/kernel/cpu/intel_rdt.c @@ -771,7 +771,7 @@ static __init void rdt_quirks(void) cache_alloc_hsw_probe(); break; case INTEL_FAM6_SKYLAKE_X: - if (boot_cpu_data.x86_mask <= 4) + if (boot_cpu_data.x86_stepping <= 4) set_rdt_options("!cmt,!mbmtotal,!mbmlocal,!l3cat"); } } diff --git a/arch/x86/kernel/cpu/microcode/intel.c b/arch/x86/kernel/cpu/microcode/intel.c index f7c55b0e753a..b94279bb5c04 100644 --- a/arch/x86/kernel/cpu/microcode/intel.c +++ b/arch/x86/kernel/cpu/microcode/intel.c @@ -921,7 +921,7 @@ static bool is_blacklisted(unsigned int cpu) */ if (c->x86 == 6 && c->x86_model == INTEL_FAM6_BROADWELL_X && - c->x86_mask == 0x01 && + c->x86_stepping == 0x01 && llc_size_per_core > 2621440 && c->microcode < 0x0b000021) { pr_err_once("Erratum BDF90: late loading with revision < 0x0b000021 (0x%x) disabled.\n", c->microcode); @@ -944,7 +944,7 @@ static enum ucode_state request_microcode_fw(int cpu, struct device *device, return UCODE_NFOUND; sprintf(name, "intel-ucode/%02x-%02x-%02x", - c->x86, c->x86_model, c->x86_mask); + c->x86, c->x86_model, c->x86_stepping); if (request_firmware_direct(&firmware, name, device)) { pr_debug("data file %s load failed\n", name); diff --git a/arch/x86/kernel/cpu/mtrr/generic.c b/arch/x86/kernel/cpu/mtrr/generic.c index fdc55215d44d..e12ee86906c6 100644 --- a/arch/x86/kernel/cpu/mtrr/generic.c +++ b/arch/x86/kernel/cpu/mtrr/generic.c @@ -859,7 +859,7 @@ int generic_validate_add_page(unsigned long base, unsigned long size, */ if (is_cpu(INTEL) && boot_cpu_data.x86 == 6 && boot_cpu_data.x86_model == 1 && - boot_cpu_data.x86_mask <= 7) { + boot_cpu_data.x86_stepping <= 7) { if (base & ((1 << (22 - PAGE_SHIFT)) - 1)) { pr_warn("mtrr: base(0x%lx000) is not 4 MiB aligned\n", base); return -EINVAL; diff --git a/arch/x86/kernel/cpu/mtrr/main.c b/arch/x86/kernel/cpu/mtrr/main.c index 40d5a8a75212..7468de429087 100644 --- a/arch/x86/kernel/cpu/mtrr/main.c +++ b/arch/x86/kernel/cpu/mtrr/main.c @@ -711,8 +711,8 @@ void __init mtrr_bp_init(void) if (boot_cpu_data.x86_vendor == X86_VENDOR_INTEL && boot_cpu_data.x86 == 0xF && boot_cpu_data.x86_model == 0x3 && - (boot_cpu_data.x86_mask == 0x3 || - boot_cpu_data.x86_mask == 0x4)) + (boot_cpu_data.x86_stepping == 0x3 || + boot_cpu_data.x86_stepping == 0x4)) phys_addr = 36; size_or_mask = SIZE_OR_MASK_BITS(phys_addr); diff --git a/arch/x86/kernel/cpu/proc.c b/arch/x86/kernel/cpu/proc.c index e7ecedafa1c8..ee4cc388e8d3 100644 --- a/arch/x86/kernel/cpu/proc.c +++ b/arch/x86/kernel/cpu/proc.c @@ -72,8 +72,8 @@ static int show_cpuinfo(struct seq_file *m, void *v) c->x86_model, c->x86_model_id[0] ? c->x86_model_id : "unknown"); - if (c->x86_mask || c->cpuid_level >= 0) - seq_printf(m, "stepping\t: %d\n", c->x86_mask); + if (c->x86_stepping || c->cpuid_level >= 0) + seq_printf(m, "stepping\t: %d\n", c->x86_stepping); else seq_puts(m, "stepping\t: unknown\n"); if (c->microcode) diff --git a/arch/x86/kernel/head_32.S b/arch/x86/kernel/head_32.S index c29020907886..b59e4fb40fd9 100644 --- a/arch/x86/kernel/head_32.S +++ b/arch/x86/kernel/head_32.S @@ -37,7 +37,7 @@ #define X86 new_cpu_data+CPUINFO_x86 #define X86_VENDOR new_cpu_data+CPUINFO_x86_vendor #define X86_MODEL new_cpu_data+CPUINFO_x86_model -#define X86_MASK new_cpu_data+CPUINFO_x86_mask +#define X86_STEPPING new_cpu_data+CPUINFO_x86_stepping #define X86_HARD_MATH new_cpu_data+CPUINFO_hard_math #define X86_CPUID new_cpu_data+CPUINFO_cpuid_level #define X86_CAPABILITY new_cpu_data+CPUINFO_x86_capability @@ -332,7 +332,7 @@ ENTRY(startup_32_smp) shrb $4,%al movb %al,X86_MODEL andb $0x0f,%cl # mask mask revision - movb %cl,X86_MASK + movb %cl,X86_STEPPING movl %edx,X86_CAPABILITY .Lis486: diff --git a/arch/x86/kernel/mpparse.c b/arch/x86/kernel/mpparse.c index 3a4b12809ab5..bc6bc6689e68 100644 --- a/arch/x86/kernel/mpparse.c +++ b/arch/x86/kernel/mpparse.c @@ -407,7 +407,7 @@ static inline void __init construct_default_ISA_mptable(int mpc_default_type) processor.apicver = mpc_default_type > 4 ? 0x10 : 0x01; processor.cpuflag = CPU_ENABLED; processor.cpufeature = (boot_cpu_data.x86 << 8) | - (boot_cpu_data.x86_model << 4) | boot_cpu_data.x86_mask; + (boot_cpu_data.x86_model << 4) | boot_cpu_data.x86_stepping; processor.featureflag = boot_cpu_data.x86_capability[CPUID_1_EDX]; processor.reserved[0] = 0; processor.reserved[1] = 0; diff --git a/arch/x86/lib/cpu.c b/arch/x86/lib/cpu.c index d6f848d1211d..2dd1fe13a37b 100644 --- a/arch/x86/lib/cpu.c +++ b/arch/x86/lib/cpu.c @@ -18,7 +18,7 @@ unsigned int x86_model(unsigned int sig) { unsigned int fam, model; - fam = x86_family(sig); + fam = x86_family(sig); model = (sig >> 4) & 0xf; diff --git a/drivers/char/hw_random/via-rng.c b/drivers/char/hw_random/via-rng.c index d1f5bb534e0e..6e9df558325b 100644 --- a/drivers/char/hw_random/via-rng.c +++ b/drivers/char/hw_random/via-rng.c @@ -162,7 +162,7 @@ static int via_rng_init(struct hwrng *rng) /* Enable secondary noise source on CPUs where it is present. */ /* Nehemiah stepping 8 and higher */ - if ((c->x86_model == 9) && (c->x86_mask > 7)) + if ((c->x86_model == 9) && (c->x86_stepping > 7)) lo |= VIA_NOISESRC2; /* Esther */ diff --git a/drivers/cpufreq/acpi-cpufreq.c b/drivers/cpufreq/acpi-cpufreq.c index 3a2ca0f79daf..d0c34df0529c 100644 --- a/drivers/cpufreq/acpi-cpufreq.c +++ b/drivers/cpufreq/acpi-cpufreq.c @@ -629,7 +629,7 @@ static int acpi_cpufreq_blacklist(struct cpuinfo_x86 *c) if (c->x86_vendor == X86_VENDOR_INTEL) { if ((c->x86 == 15) && (c->x86_model == 6) && - (c->x86_mask == 8)) { + (c->x86_stepping == 8)) { pr_info("Intel(R) Xeon(R) 7100 Errata AL30, processors may lock up on frequency changes: disabling acpi-cpufreq\n"); return -ENODEV; } diff --git a/drivers/cpufreq/longhaul.c b/drivers/cpufreq/longhaul.c index c46a12df40dd..d5e27bc7585a 100644 --- a/drivers/cpufreq/longhaul.c +++ b/drivers/cpufreq/longhaul.c @@ -775,7 +775,7 @@ static int longhaul_cpu_init(struct cpufreq_policy *policy) break; case 7: - switch (c->x86_mask) { + switch (c->x86_stepping) { case 0: longhaul_version = TYPE_LONGHAUL_V1; cpu_model = CPU_SAMUEL2; @@ -787,7 +787,7 @@ static int longhaul_cpu_init(struct cpufreq_policy *policy) break; case 1 ... 15: longhaul_version = TYPE_LONGHAUL_V2; - if (c->x86_mask < 8) { + if (c->x86_stepping < 8) { cpu_model = CPU_SAMUEL2; cpuname = "C3 'Samuel 2' [C5B]"; } else { @@ -814,7 +814,7 @@ static int longhaul_cpu_init(struct cpufreq_policy *policy) numscales = 32; memcpy(mults, nehemiah_mults, sizeof(nehemiah_mults)); memcpy(eblcr, nehemiah_eblcr, sizeof(nehemiah_eblcr)); - switch (c->x86_mask) { + switch (c->x86_stepping) { case 0 ... 1: cpu_model = CPU_NEHEMIAH; cpuname = "C3 'Nehemiah A' [C5XLOE]"; diff --git a/drivers/cpufreq/p4-clockmod.c b/drivers/cpufreq/p4-clockmod.c index fd77812313f3..a25741b1281b 100644 --- a/drivers/cpufreq/p4-clockmod.c +++ b/drivers/cpufreq/p4-clockmod.c @@ -168,7 +168,7 @@ static int cpufreq_p4_cpu_init(struct cpufreq_policy *policy) #endif /* Errata workaround */ - cpuid = (c->x86 << 8) | (c->x86_model << 4) | c->x86_mask; + cpuid = (c->x86 << 8) | (c->x86_model << 4) | c->x86_stepping; switch (cpuid) { case 0x0f07: case 0x0f0a: diff --git a/drivers/cpufreq/powernow-k7.c b/drivers/cpufreq/powernow-k7.c index 80ac313e6c59..302e9ce793a0 100644 --- a/drivers/cpufreq/powernow-k7.c +++ b/drivers/cpufreq/powernow-k7.c @@ -131,7 +131,7 @@ static int check_powernow(void) return 0; } - if ((c->x86_model == 6) && (c->x86_mask == 0)) { + if ((c->x86_model == 6) && (c->x86_stepping == 0)) { pr_info("K7 660[A0] core detected, enabling errata workarounds\n"); have_a0 = 1; } diff --git a/drivers/cpufreq/speedstep-centrino.c b/drivers/cpufreq/speedstep-centrino.c index 41bc5397f4bb..4fa5adf16c70 100644 --- a/drivers/cpufreq/speedstep-centrino.c +++ b/drivers/cpufreq/speedstep-centrino.c @@ -37,7 +37,7 @@ struct cpu_id { __u8 x86; /* CPU family */ __u8 x86_model; /* model */ - __u8 x86_mask; /* stepping */ + __u8 x86_stepping; /* stepping */ }; enum { @@ -277,7 +277,7 @@ static int centrino_verify_cpu_id(const struct cpuinfo_x86 *c, { if ((c->x86 == x->x86) && (c->x86_model == x->x86_model) && - (c->x86_mask == x->x86_mask)) + (c->x86_stepping == x->x86_stepping)) return 1; return 0; } diff --git a/drivers/cpufreq/speedstep-lib.c b/drivers/cpufreq/speedstep-lib.c index 8085ec9000d1..e3a9962ee410 100644 --- a/drivers/cpufreq/speedstep-lib.c +++ b/drivers/cpufreq/speedstep-lib.c @@ -272,9 +272,9 @@ unsigned int speedstep_detect_processor(void) ebx = cpuid_ebx(0x00000001); ebx &= 0x000000FF; - pr_debug("ebx value is %x, x86_mask is %x\n", ebx, c->x86_mask); + pr_debug("ebx value is %x, x86_stepping is %x\n", ebx, c->x86_stepping); - switch (c->x86_mask) { + switch (c->x86_stepping) { case 4: /* * B-stepping [M-P4-M] @@ -361,7 +361,7 @@ unsigned int speedstep_detect_processor(void) msr_lo, msr_hi); if ((msr_hi & (1<<18)) && (relaxed_check ? 1 : (msr_hi & (3<<24)))) { - if (c->x86_mask == 0x01) { + if (c->x86_stepping == 0x01) { pr_debug("early PIII version\n"); return SPEEDSTEP_CPU_PIII_C_EARLY; } else diff --git a/drivers/crypto/padlock-aes.c b/drivers/crypto/padlock-aes.c index 4b6642a25df5..1c6cbda56afe 100644 --- a/drivers/crypto/padlock-aes.c +++ b/drivers/crypto/padlock-aes.c @@ -512,7 +512,7 @@ static int __init padlock_init(void) printk(KERN_NOTICE PFX "Using VIA PadLock ACE for AES algorithm.\n"); - if (c->x86 == 6 && c->x86_model == 15 && c->x86_mask == 2) { + if (c->x86 == 6 && c->x86_model == 15 && c->x86_stepping == 2) { ecb_fetch_blocks = MAX_ECB_FETCH_BLOCKS; cbc_fetch_blocks = MAX_CBC_FETCH_BLOCKS; printk(KERN_NOTICE PFX "VIA Nano stepping 2 detected: enabling workaround.\n"); diff --git a/drivers/edac/amd64_edac.c b/drivers/edac/amd64_edac.c index 8b16ec595fa7..329cb96f886f 100644 --- a/drivers/edac/amd64_edac.c +++ b/drivers/edac/amd64_edac.c @@ -3147,7 +3147,7 @@ static struct amd64_family_type *per_family_init(struct amd64_pvt *pvt) struct amd64_family_type *fam_type = NULL; pvt->ext_model = boot_cpu_data.x86_model >> 4; - pvt->stepping = boot_cpu_data.x86_mask; + pvt->stepping = boot_cpu_data.x86_stepping; pvt->model = boot_cpu_data.x86_model; pvt->fam = boot_cpu_data.x86; diff --git a/drivers/hwmon/coretemp.c b/drivers/hwmon/coretemp.c index c13a4fd86b3c..a42744c7665b 100644 --- a/drivers/hwmon/coretemp.c +++ b/drivers/hwmon/coretemp.c @@ -268,13 +268,13 @@ static int adjust_tjmax(struct cpuinfo_x86 *c, u32 id, struct device *dev) for (i = 0; i < ARRAY_SIZE(tjmax_model_table); i++) { const struct tjmax_model *tm = &tjmax_model_table[i]; if (c->x86_model == tm->model && - (tm->mask == ANY || c->x86_mask == tm->mask)) + (tm->mask == ANY || c->x86_stepping == tm->mask)) return tm->tjmax; } /* Early chips have no MSR for TjMax */ - if (c->x86_model == 0xf && c->x86_mask < 4) + if (c->x86_model == 0xf && c->x86_stepping < 4) usemsr_ee = 0; if (c->x86_model > 0xe && usemsr_ee) { @@ -425,7 +425,7 @@ static int chk_ucode_version(unsigned int cpu) * Readings might stop update when processor visited too deep sleep, * fixed for stepping D0 (6EC). */ - if (c->x86_model == 0xe && c->x86_mask < 0xc && c->microcode < 0x39) { + if (c->x86_model == 0xe && c->x86_stepping < 0xc && c->microcode < 0x39) { pr_err("Errata AE18 not fixed, update BIOS or microcode of the CPU!\n"); return -ENODEV; } diff --git a/drivers/hwmon/hwmon-vid.c b/drivers/hwmon/hwmon-vid.c index ef91b8a67549..84e91286fc4f 100644 --- a/drivers/hwmon/hwmon-vid.c +++ b/drivers/hwmon/hwmon-vid.c @@ -293,7 +293,7 @@ u8 vid_which_vrm(void) if (c->x86 < 6) /* Any CPU with family lower than 6 */ return 0; /* doesn't have VID */ - vrm_ret = find_vrm(c->x86, c->x86_model, c->x86_mask, c->x86_vendor); + vrm_ret = find_vrm(c->x86, c->x86_model, c->x86_stepping, c->x86_vendor); if (vrm_ret == 134) vrm_ret = get_via_model_d_vrm(); if (vrm_ret == 0) diff --git a/drivers/hwmon/k10temp.c b/drivers/hwmon/k10temp.c index 0721e175664a..b960015cb073 100644 --- a/drivers/hwmon/k10temp.c +++ b/drivers/hwmon/k10temp.c @@ -226,7 +226,7 @@ static bool has_erratum_319(struct pci_dev *pdev) * and AM3 formats, but that's the best we can do. */ return boot_cpu_data.x86_model < 4 || - (boot_cpu_data.x86_model == 4 && boot_cpu_data.x86_mask <= 2); + (boot_cpu_data.x86_model == 4 && boot_cpu_data.x86_stepping <= 2); } static int k10temp_probe(struct pci_dev *pdev, diff --git a/drivers/hwmon/k8temp.c b/drivers/hwmon/k8temp.c index 5a632bcf869b..e59f9113fb93 100644 --- a/drivers/hwmon/k8temp.c +++ b/drivers/hwmon/k8temp.c @@ -187,7 +187,7 @@ static int k8temp_probe(struct pci_dev *pdev, return -ENOMEM; model = boot_cpu_data.x86_model; - stepping = boot_cpu_data.x86_mask; + stepping = boot_cpu_data.x86_stepping; /* feature available since SH-C0, exclude older revisions */ if ((model == 4 && stepping == 0) || diff --git a/drivers/video/fbdev/geode/video_gx.c b/drivers/video/fbdev/geode/video_gx.c index 6082f653c68a..67773e8bbb95 100644 --- a/drivers/video/fbdev/geode/video_gx.c +++ b/drivers/video/fbdev/geode/video_gx.c @@ -127,7 +127,7 @@ void gx_set_dclk_frequency(struct fb_info *info) int timeout = 1000; /* Rev. 1 Geode GXs use a 14 MHz reference clock instead of 48 MHz. */ - if (cpu_data(0).x86_mask == 1) { + if (cpu_data(0).x86_stepping == 1) { pll_table = gx_pll_table_14MHz; pll_table_len = ARRAY_SIZE(gx_pll_table_14MHz); } else { -- cgit v1.2.3-70-g09d2 From 9de29eac8d2189424d81c0d840cd0469aa3d41c8 Mon Sep 17 00:00:00 2001 From: Dan Carpenter <dan.carpenter@oracle.com> Date: Wed, 14 Feb 2018 10:14:17 +0300 Subject: x86/spectre: Fix an error message If i == ARRAY_SIZE(mitigation_options) then we accidentally print garbage from one space beyond the end of the mitigation_options[] array. Signed-off-by: Dan Carpenter <dan.carpenter@oracle.com> Cc: Andy Lutomirski <luto@kernel.org> Cc: Borislav Petkov <bp@suse.de> Cc: David Woodhouse <dwmw@amazon.co.uk> Cc: Greg Kroah-Hartman <gregkh@linuxfoundation.org> Cc: KarimAllah Ahmed <karahmed@amazon.de> Cc: Linus Torvalds <torvalds@linux-foundation.org> Cc: Peter Zijlstra <peterz@infradead.org> Cc: Thomas Gleixner <tglx@linutronix.de> Cc: kernel-janitors@vger.kernel.org Fixes: 9005c6834c0f ("x86/spectre: Simplify spectre_v2 command line parsing") Link: http://lkml.kernel.org/r/20180214071416.GA26677@mwanda Signed-off-by: Ingo Molnar <mingo@kernel.org> --- arch/x86/kernel/cpu/bugs.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'arch/x86/kernel') diff --git a/arch/x86/kernel/cpu/bugs.c b/arch/x86/kernel/cpu/bugs.c index 4acf16a76d1e..d71c8b54b696 100644 --- a/arch/x86/kernel/cpu/bugs.c +++ b/arch/x86/kernel/cpu/bugs.c @@ -174,7 +174,7 @@ static enum spectre_v2_mitigation_cmd __init spectre_v2_parse_cmdline(void) } if (i >= ARRAY_SIZE(mitigation_options)) { - pr_err("unknown option (%s). Switching to AUTO select\n", mitigation_options[i].option); + pr_err("unknown option (%s). Switching to AUTO select\n", arg); return SPECTRE_V2_CMD_AUTO; } } -- cgit v1.2.3-70-g09d2 From 24dbc6000f4b9b0ef5a9daecb161f1907733765a Mon Sep 17 00:00:00 2001 From: "Gustavo A. R. Silva" <garsilva@embeddedor.com> Date: Tue, 13 Feb 2018 13:22:08 -0600 Subject: x86/cpu: Change type of x86_cache_size variable to unsigned int Currently, x86_cache_size is of type int, which makes no sense as we will never have a valid cache size equal or less than 0. So instead of initializing this variable to -1, it can perfectly be initialized to 0 and use it as an unsigned variable instead. Suggested-by: Thomas Gleixner <tglx@linutronix.de> Signed-off-by: Gustavo A. R. Silva <garsilva@embeddedor.com> Cc: Borislav Petkov <bp@alien8.de> Cc: Linus Torvalds <torvalds@linux-foundation.org> Cc: Peter Zijlstra <peterz@infradead.org> Addresses-Coverity-ID: 1464429 Link: http://lkml.kernel.org/r/20180213192208.GA26414@embeddedor.com Signed-off-by: Ingo Molnar <mingo@kernel.org> --- arch/x86/include/asm/processor.h | 2 +- arch/x86/kernel/cpu/common.c | 2 +- arch/x86/kernel/cpu/microcode/intel.c | 2 +- arch/x86/kernel/cpu/proc.c | 4 ++-- 4 files changed, 5 insertions(+), 5 deletions(-) (limited to 'arch/x86/kernel') diff --git a/arch/x86/include/asm/processor.h b/arch/x86/include/asm/processor.h index b7c8583328c7..44c2c4ec6d60 100644 --- a/arch/x86/include/asm/processor.h +++ b/arch/x86/include/asm/processor.h @@ -109,7 +109,7 @@ struct cpuinfo_x86 { char x86_vendor_id[16]; char x86_model_id[64]; /* in KB - valid for CPUS which support this call: */ - int x86_cache_size; + unsigned int x86_cache_size; int x86_cache_alignment; /* In bytes */ /* Cache QoS architectural values: */ int x86_cache_max_rmid; /* max index */ diff --git a/arch/x86/kernel/cpu/common.c b/arch/x86/kernel/cpu/common.c index a7d8df641a4c..824aee0117bb 100644 --- a/arch/x86/kernel/cpu/common.c +++ b/arch/x86/kernel/cpu/common.c @@ -1184,7 +1184,7 @@ static void identify_cpu(struct cpuinfo_x86 *c) int i; c->loops_per_jiffy = loops_per_jiffy; - c->x86_cache_size = -1; + c->x86_cache_size = 0; c->x86_vendor = X86_VENDOR_UNKNOWN; c->x86_model = c->x86_stepping = 0; /* So far unknown... */ c->x86_vendor_id[0] = '\0'; /* Unset */ diff --git a/arch/x86/kernel/cpu/microcode/intel.c b/arch/x86/kernel/cpu/microcode/intel.c index b94279bb5c04..a15db2b4e0d6 100644 --- a/arch/x86/kernel/cpu/microcode/intel.c +++ b/arch/x86/kernel/cpu/microcode/intel.c @@ -982,7 +982,7 @@ static struct microcode_ops microcode_intel_ops = { static int __init calc_llc_size_per_core(struct cpuinfo_x86 *c) { - u64 llc_size = c->x86_cache_size * 1024; + u64 llc_size = c->x86_cache_size * 1024ULL; do_div(llc_size, c->x86_max_cores); diff --git a/arch/x86/kernel/cpu/proc.c b/arch/x86/kernel/cpu/proc.c index ee4cc388e8d3..2c8522a39ed5 100644 --- a/arch/x86/kernel/cpu/proc.c +++ b/arch/x86/kernel/cpu/proc.c @@ -91,8 +91,8 @@ static int show_cpuinfo(struct seq_file *m, void *v) } /* Cache size */ - if (c->x86_cache_size >= 0) - seq_printf(m, "cache size\t: %d KB\n", c->x86_cache_size); + if (c->x86_cache_size) + seq_printf(m, "cache size\t: %u KB\n", c->x86_cache_size); show_cpuinfo_core(m, c, cpu); show_cpuinfo_misc(m, c); -- cgit v1.2.3-70-g09d2 From 3f1f576a195aa266813cbd4ca70291deb61e0129 Mon Sep 17 00:00:00 2001 From: Borislav Petkov <bp@suse.de> Date: Fri, 16 Feb 2018 12:26:38 +0100 Subject: x86/microcode: Propagate return value from updating functions ... so that callers can know when microcode was updated and act accordingly. Tested-by: Ashok Raj <ashok.raj@intel.com> Signed-off-by: Borislav Petkov <bp@suse.de> Reviewed-by: Ashok Raj <ashok.raj@intel.com> Cc: Andy Lutomirski <luto@kernel.org> Cc: Arjan van de Ven <arjan@linux.intel.com> Cc: Borislav Petkov <bp@alien8.de> Cc: Dan Williams <dan.j.williams@intel.com> Cc: Dave Hansen <dave.hansen@linux.intel.com> Cc: David Woodhouse <dwmw2@infradead.org> Cc: Greg Kroah-Hartman <gregkh@linuxfoundation.org> Cc: Josh Poimboeuf <jpoimboe@redhat.com> Cc: Linus Torvalds <torvalds@linux-foundation.org> Cc: Peter Zijlstra <peterz@infradead.org> Cc: Thomas Gleixner <tglx@linutronix.de> Link: http://lkml.kernel.org/r/20180216112640.11554-2-bp@alien8.de Signed-off-by: Ingo Molnar <mingo@kernel.org> --- arch/x86/include/asm/microcode.h | 9 +++++++-- arch/x86/kernel/cpu/microcode/amd.c | 10 +++++----- arch/x86/kernel/cpu/microcode/core.c | 33 +++++++++++++++++---------------- arch/x86/kernel/cpu/microcode/intel.c | 10 +++++----- 4 files changed, 34 insertions(+), 28 deletions(-) (limited to 'arch/x86/kernel') diff --git a/arch/x86/include/asm/microcode.h b/arch/x86/include/asm/microcode.h index 55520cec8b27..7fb1047d61c7 100644 --- a/arch/x86/include/asm/microcode.h +++ b/arch/x86/include/asm/microcode.h @@ -37,7 +37,12 @@ struct cpu_signature { struct device; -enum ucode_state { UCODE_ERROR, UCODE_OK, UCODE_NFOUND }; +enum ucode_state { + UCODE_OK = 0, + UCODE_UPDATED, + UCODE_NFOUND, + UCODE_ERROR, +}; struct microcode_ops { enum ucode_state (*request_microcode_user) (int cpu, @@ -54,7 +59,7 @@ struct microcode_ops { * are being called. * See also the "Synchronization" section in microcode_core.c. */ - int (*apply_microcode) (int cpu); + enum ucode_state (*apply_microcode) (int cpu); int (*collect_cpu_info) (int cpu, struct cpu_signature *csig); }; diff --git a/arch/x86/kernel/cpu/microcode/amd.c b/arch/x86/kernel/cpu/microcode/amd.c index 330b8462d426..a998e1a7d46f 100644 --- a/arch/x86/kernel/cpu/microcode/amd.c +++ b/arch/x86/kernel/cpu/microcode/amd.c @@ -498,7 +498,7 @@ static unsigned int verify_patch_size(u8 family, u32 patch_size, return patch_size; } -static int apply_microcode_amd(int cpu) +static enum ucode_state apply_microcode_amd(int cpu) { struct cpuinfo_x86 *c = &cpu_data(cpu); struct microcode_amd *mc_amd; @@ -512,7 +512,7 @@ static int apply_microcode_amd(int cpu) p = find_patch(cpu); if (!p) - return 0; + return UCODE_NFOUND; mc_amd = p->data; uci->mc = p->data; @@ -523,13 +523,13 @@ static int apply_microcode_amd(int cpu) if (rev >= mc_amd->hdr.patch_id) { c->microcode = rev; uci->cpu_sig.rev = rev; - return 0; + return UCODE_OK; } if (__apply_microcode_amd(mc_amd)) { pr_err("CPU%d: update failed for patch_level=0x%08x\n", cpu, mc_amd->hdr.patch_id); - return -1; + return UCODE_ERROR; } pr_info("CPU%d: new patch_level=0x%08x\n", cpu, mc_amd->hdr.patch_id); @@ -537,7 +537,7 @@ static int apply_microcode_amd(int cpu) uci->cpu_sig.rev = mc_amd->hdr.patch_id; c->microcode = mc_amd->hdr.patch_id; - return 0; + return UCODE_UPDATED; } static int install_equiv_cpu_table(const u8 *buf) diff --git a/arch/x86/kernel/cpu/microcode/core.c b/arch/x86/kernel/cpu/microcode/core.c index 319dd65f98a2..6fdaf7cf3182 100644 --- a/arch/x86/kernel/cpu/microcode/core.c +++ b/arch/x86/kernel/cpu/microcode/core.c @@ -374,7 +374,7 @@ static int collect_cpu_info(int cpu) } struct apply_microcode_ctx { - int err; + enum ucode_state err; }; static void apply_microcode_local(void *arg) @@ -489,31 +489,29 @@ static void __exit microcode_dev_exit(void) /* fake device for request_firmware */ static struct platform_device *microcode_pdev; -static int reload_for_cpu(int cpu) +static enum ucode_state reload_for_cpu(int cpu) { struct ucode_cpu_info *uci = ucode_cpu_info + cpu; enum ucode_state ustate; - int err = 0; if (!uci->valid) - return err; + return UCODE_OK; ustate = microcode_ops->request_microcode_fw(cpu, µcode_pdev->dev, true); - if (ustate == UCODE_OK) - apply_microcode_on_target(cpu); - else - if (ustate == UCODE_ERROR) - err = -EINVAL; - return err; + if (ustate != UCODE_OK) + return ustate; + + return apply_microcode_on_target(cpu); } static ssize_t reload_store(struct device *dev, struct device_attribute *attr, const char *buf, size_t size) { + enum ucode_state tmp_ret = UCODE_OK; unsigned long val; + ssize_t ret = 0; int cpu; - ssize_t ret = 0, tmp_ret; ret = kstrtoul(buf, 0, &val); if (ret) @@ -526,15 +524,18 @@ static ssize_t reload_store(struct device *dev, mutex_lock(µcode_mutex); for_each_online_cpu(cpu) { tmp_ret = reload_for_cpu(cpu); - if (tmp_ret != 0) + if (tmp_ret > UCODE_NFOUND) { pr_warn("Error reloading microcode on CPU %d\n", cpu); - /* save retval of the first encountered reload error */ - if (!ret) - ret = tmp_ret; + /* set retval for the first encountered reload error */ + if (!ret) + ret = -EINVAL; + } } - if (!ret) + + if (!ret && tmp_ret == UCODE_UPDATED) perf_check_microcode(); + mutex_unlock(µcode_mutex); put_online_cpus(); diff --git a/arch/x86/kernel/cpu/microcode/intel.c b/arch/x86/kernel/cpu/microcode/intel.c index a15db2b4e0d6..923054a6b760 100644 --- a/arch/x86/kernel/cpu/microcode/intel.c +++ b/arch/x86/kernel/cpu/microcode/intel.c @@ -772,7 +772,7 @@ static int collect_cpu_info(int cpu_num, struct cpu_signature *csig) return 0; } -static int apply_microcode_intel(int cpu) +static enum ucode_state apply_microcode_intel(int cpu) { struct microcode_intel *mc; struct ucode_cpu_info *uci; @@ -782,7 +782,7 @@ static int apply_microcode_intel(int cpu) /* We should bind the task to the CPU */ if (WARN_ON(raw_smp_processor_id() != cpu)) - return -1; + return UCODE_ERROR; uci = ucode_cpu_info + cpu; mc = uci->mc; @@ -790,7 +790,7 @@ static int apply_microcode_intel(int cpu) /* Look for a newer patch in our cache: */ mc = find_patch(uci); if (!mc) - return 0; + return UCODE_NFOUND; } /* write microcode via MSR 0x79 */ @@ -801,7 +801,7 @@ static int apply_microcode_intel(int cpu) if (rev != mc->hdr.rev) { pr_err("CPU%d update to revision 0x%x failed\n", cpu, mc->hdr.rev); - return -1; + return UCODE_ERROR; } if (rev != prev_rev) { @@ -818,7 +818,7 @@ static int apply_microcode_intel(int cpu) uci->cpu_sig.rev = rev; c->microcode = rev; - return 0; + return UCODE_UPDATED; } static enum ucode_state generic_load_microcode(int cpu, void *data, size_t size, -- cgit v1.2.3-70-g09d2 From 1008c52c09dcb23d93f8e0ea83a6246265d2cce0 Mon Sep 17 00:00:00 2001 From: Borislav Petkov <bp@suse.de> Date: Fri, 16 Feb 2018 12:26:39 +0100 Subject: x86/CPU: Add a microcode loader callback Add a callback function which the microcode loader calls when microcode has been updated to a newer revision. Do the callback only when no error was encountered during loading. Tested-by: Ashok Raj <ashok.raj@intel.com> Signed-off-by: Borislav Petkov <bp@suse.de> Reviewed-by: Ashok Raj <ashok.raj@intel.com> Cc: Andy Lutomirski <luto@kernel.org> Cc: Arjan van de Ven <arjan@linux.intel.com> Cc: Borislav Petkov <bp@alien8.de> Cc: Dan Williams <dan.j.williams@intel.com> Cc: Dave Hansen <dave.hansen@linux.intel.com> Cc: David Woodhouse <dwmw2@infradead.org> Cc: Greg Kroah-Hartman <gregkh@linuxfoundation.org> Cc: Josh Poimboeuf <jpoimboe@redhat.com> Cc: Linus Torvalds <torvalds@linux-foundation.org> Cc: Peter Zijlstra <peterz@infradead.org> Cc: Thomas Gleixner <tglx@linutronix.de> Link: http://lkml.kernel.org/r/20180216112640.11554-3-bp@alien8.de Signed-off-by: Ingo Molnar <mingo@kernel.org> --- arch/x86/include/asm/processor.h | 1 + arch/x86/kernel/cpu/common.c | 10 ++++++++++ arch/x86/kernel/cpu/microcode/core.c | 8 ++++++-- 3 files changed, 17 insertions(+), 2 deletions(-) (limited to 'arch/x86/kernel') diff --git a/arch/x86/include/asm/processor.h b/arch/x86/include/asm/processor.h index 1bd9ed87606f..b0ccd4847a58 100644 --- a/arch/x86/include/asm/processor.h +++ b/arch/x86/include/asm/processor.h @@ -977,4 +977,5 @@ bool xen_set_default_idle(void); void stop_this_cpu(void *dummy); void df_debug(struct pt_regs *regs, long error_code); +void microcode_check(void); #endif /* _ASM_X86_PROCESSOR_H */ diff --git a/arch/x86/kernel/cpu/common.c b/arch/x86/kernel/cpu/common.c index 824aee0117bb..84f1cd88608b 100644 --- a/arch/x86/kernel/cpu/common.c +++ b/arch/x86/kernel/cpu/common.c @@ -1749,3 +1749,13 @@ static int __init init_cpu_syscore(void) return 0; } core_initcall(init_cpu_syscore); + +/* + * The microcode loader calls this upon late microcode load to recheck features, + * only when microcode has been updated. Caller holds microcode_mutex and CPU + * hotplug lock. + */ +void microcode_check(void) +{ + perf_check_microcode(); +} diff --git a/arch/x86/kernel/cpu/microcode/core.c b/arch/x86/kernel/cpu/microcode/core.c index 6fdaf7cf3182..aa1b9a422f2b 100644 --- a/arch/x86/kernel/cpu/microcode/core.c +++ b/arch/x86/kernel/cpu/microcode/core.c @@ -509,6 +509,7 @@ static ssize_t reload_store(struct device *dev, const char *buf, size_t size) { enum ucode_state tmp_ret = UCODE_OK; + bool do_callback = false; unsigned long val; ssize_t ret = 0; int cpu; @@ -531,10 +532,13 @@ static ssize_t reload_store(struct device *dev, if (!ret) ret = -EINVAL; } + + if (tmp_ret == UCODE_UPDATED) + do_callback = true; } - if (!ret && tmp_ret == UCODE_UPDATED) - perf_check_microcode(); + if (!ret && do_callback) + microcode_check(); mutex_unlock(µcode_mutex); put_online_cpus(); -- cgit v1.2.3-70-g09d2 From 42ca8082e260dcfd8afa2afa6ec1940b9d41724c Mon Sep 17 00:00:00 2001 From: Borislav Petkov <bp@suse.de> Date: Fri, 16 Feb 2018 12:26:40 +0100 Subject: x86/CPU: Check CPU feature bits after microcode upgrade With some microcode upgrades, new CPUID features can become visible on the CPU. Check what the kernel has mirrored now and issue a warning hinting at possible things the user/admin can do to make use of the newly visible features. Originally-by: Ashok Raj <ashok.raj@intel.com> Tested-by: Ashok Raj <ashok.raj@intel.com> Signed-off-by: Borislav Petkov <bp@suse.de> Reviewed-by: Ashok Raj <ashok.raj@intel.com> Cc: Andy Lutomirski <luto@kernel.org> Cc: Arjan van de Ven <arjan@linux.intel.com> Cc: Borislav Petkov <bp@alien8.de> Cc: Dan Williams <dan.j.williams@intel.com> Cc: Dave Hansen <dave.hansen@linux.intel.com> Cc: David Woodhouse <dwmw2@infradead.org> Cc: Greg Kroah-Hartman <gregkh@linuxfoundation.org> Cc: Josh Poimboeuf <jpoimboe@redhat.com> Cc: Linus Torvalds <torvalds@linux-foundation.org> Cc: Peter Zijlstra <peterz@infradead.org> Cc: Thomas Gleixner <tglx@linutronix.de> Link: http://lkml.kernel.org/r/20180216112640.11554-4-bp@alien8.de Signed-off-by: Ingo Molnar <mingo@kernel.org> --- arch/x86/kernel/cpu/common.c | 20 ++++++++++++++++++++ 1 file changed, 20 insertions(+) (limited to 'arch/x86/kernel') diff --git a/arch/x86/kernel/cpu/common.c b/arch/x86/kernel/cpu/common.c index 84f1cd88608b..348cf4821240 100644 --- a/arch/x86/kernel/cpu/common.c +++ b/arch/x86/kernel/cpu/common.c @@ -1757,5 +1757,25 @@ core_initcall(init_cpu_syscore); */ void microcode_check(void) { + struct cpuinfo_x86 info; + perf_check_microcode(); + + /* Reload CPUID max function as it might've changed. */ + info.cpuid_level = cpuid_eax(0); + + /* + * Copy all capability leafs to pick up the synthetic ones so that + * memcmp() below doesn't fail on that. The ones coming from CPUID will + * get overwritten in get_cpu_cap(). + */ + memcpy(&info.x86_capability, &boot_cpu_data.x86_capability, sizeof(info.x86_capability)); + + get_cpu_cap(&info); + + if (!memcmp(&info.x86_capability, &boot_cpu_data.x86_capability, sizeof(info.x86_capability))) + return; + + pr_warn("x86/CPU: CPU features have changed after loading microcode, but might not take effect.\n"); + pr_warn("x86/CPU: Please consider either early loading through initrd/built-in or a potential BIOS update.\n"); } -- cgit v1.2.3-70-g09d2 From 63e708f826bb21470155d37b103a75d8a9e25b18 Mon Sep 17 00:00:00 2001 From: Prarit Bhargava <prarit@redhat.com> Date: Wed, 7 Feb 2018 18:49:23 -0500 Subject: x86/xen: Calculate __max_logical_packages on PV domains The kernel panics on PV domains because native_smp_cpus_done() is only called for HVM domains. Calculate __max_logical_packages for PV domains. Fixes: b4c0a7326f5d ("x86/smpboot: Fix __max_logical_packages estimate") Signed-off-by: Prarit Bhargava <prarit@redhat.com> Tested-and-reported-by: Simon Gaiser <simon@invisiblethingslab.com> Cc: Thomas Gleixner <tglx@linutronix.de> Cc: Ingo Molnar <mingo@redhat.com> Cc: "H. Peter Anvin" <hpa@zytor.com> Cc: x86@kernel.org Cc: Boris Ostrovsky <boris.ostrovsky@oracle.com> Cc: Juergen Gross <jgross@suse.com> Cc: Dou Liyang <douly.fnst@cn.fujitsu.com> Cc: Prarit Bhargava <prarit@redhat.com> Cc: Kate Stewart <kstewart@linuxfoundation.org> Cc: Greg Kroah-Hartman <gregkh@linuxfoundation.org> Cc: Andy Lutomirski <luto@kernel.org> Cc: Andi Kleen <ak@linux.intel.com> Cc: Vitaly Kuznetsov <vkuznets@redhat.com> Cc: xen-devel@lists.xenproject.org Reviewed-by: Boris Ostrovsky <boris.ostrovsky@oracle.com> Signed-off-by: Juergen Gross <jgross@suse.com> --- arch/x86/include/asm/smp.h | 1 + arch/x86/kernel/smpboot.c | 10 ++++++++-- arch/x86/xen/smp.c | 2 ++ 3 files changed, 11 insertions(+), 2 deletions(-) (limited to 'arch/x86/kernel') diff --git a/arch/x86/include/asm/smp.h b/arch/x86/include/asm/smp.h index 461f53d27708..a4189762b266 100644 --- a/arch/x86/include/asm/smp.h +++ b/arch/x86/include/asm/smp.h @@ -129,6 +129,7 @@ static inline void arch_send_call_function_ipi_mask(const struct cpumask *mask) void cpu_disable_common(void); void native_smp_prepare_boot_cpu(void); void native_smp_prepare_cpus(unsigned int max_cpus); +void calculate_max_logical_packages(void); void native_smp_cpus_done(unsigned int max_cpus); void common_cpu_up(unsigned int cpunum, struct task_struct *tidle); int native_cpu_up(unsigned int cpunum, struct task_struct *tidle); diff --git a/arch/x86/kernel/smpboot.c b/arch/x86/kernel/smpboot.c index cfc61e1d45e2..9eee25d07586 100644 --- a/arch/x86/kernel/smpboot.c +++ b/arch/x86/kernel/smpboot.c @@ -1281,11 +1281,10 @@ void __init native_smp_prepare_boot_cpu(void) cpu_set_state_online(me); } -void __init native_smp_cpus_done(unsigned int max_cpus) +void __init calculate_max_logical_packages(void) { int ncpus; - pr_debug("Boot done\n"); /* * Today neither Intel nor AMD support heterogenous systems so * extrapolate the boot cpu's data to all packages. @@ -1293,6 +1292,13 @@ void __init native_smp_cpus_done(unsigned int max_cpus) ncpus = cpu_data(0).booted_cores * topology_max_smt_threads(); __max_logical_packages = DIV_ROUND_UP(nr_cpu_ids, ncpus); pr_info("Max logical packages: %u\n", __max_logical_packages); +} + +void __init native_smp_cpus_done(unsigned int max_cpus) +{ + pr_debug("Boot done\n"); + + calculate_max_logical_packages(); if (x86_has_numa_in_package) set_sched_topology(x86_numa_in_package_topology); diff --git a/arch/x86/xen/smp.c b/arch/x86/xen/smp.c index 77c959cf81e7..7a43b2ae19f1 100644 --- a/arch/x86/xen/smp.c +++ b/arch/x86/xen/smp.c @@ -122,6 +122,8 @@ void __init xen_smp_cpus_done(unsigned int max_cpus) if (xen_hvm_domain()) native_smp_cpus_done(max_cpus); + else + calculate_max_logical_packages(); if (xen_have_vcpu_info_placement) return; -- cgit v1.2.3-70-g09d2 From 6262b6e78ce5ba62be47774ca80f5b0a6f0eb428 Mon Sep 17 00:00:00 2001 From: Jan Beulich <JBeulich@suse.com> Date: Mon, 19 Feb 2018 07:50:23 -0700 Subject: x86/IO-APIC: Avoid warning in 32-bit builds Constants wider than 32 bits should be tagged with ULL. Signed-off-by: Jan Beulich <jbeulich@suse.com> Acked-by: Thomas Gleixner <tglx@linutronix.de> Cc: Andy Lutomirski <luto@kernel.org> Cc: Borislav Petkov <bp@alien8.de> Cc: Brian Gerst <brgerst@gmail.com> Cc: Denys Vlasenko <dvlasenk@redhat.com> Cc: H. Peter Anvin <hpa@zytor.com> Cc: Josh Poimboeuf <jpoimboe@redhat.com> Cc: Linus Torvalds <torvalds@linux-foundation.org> Cc: Peter Zijlstra <peterz@infradead.org> Link: http://lkml.kernel.org/r/5A8AF23F02000078001A91E5@prv-mh.provo.novell.com Signed-off-by: Ingo Molnar <mingo@kernel.org> --- arch/x86/kernel/apic/io_apic.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'arch/x86/kernel') diff --git a/arch/x86/kernel/apic/io_apic.c b/arch/x86/kernel/apic/io_apic.c index 8ad2e410974f..7c5538769f7e 100644 --- a/arch/x86/kernel/apic/io_apic.c +++ b/arch/x86/kernel/apic/io_apic.c @@ -1603,7 +1603,7 @@ static void __init delay_with_tsc(void) do { rep_nop(); now = rdtsc(); - } while ((now - start) < 40000000000UL / HZ && + } while ((now - start) < 40000000000ULL / HZ && time_before_eq(jiffies, end)); } -- cgit v1.2.3-70-g09d2 From dd84441a797150dcc49298ec95c459a8891d8bb1 Mon Sep 17 00:00:00 2001 From: David Woodhouse <dwmw@amazon.co.uk> Date: Mon, 19 Feb 2018 10:50:54 +0000 Subject: x86/speculation: Use IBRS if available before calling into firmware Retpoline means the kernel is safe because it has no indirect branches. But firmware isn't, so use IBRS for firmware calls if it's available. Block preemption while IBRS is set, although in practice the call sites already had to be doing that. Ignore hpwdt.c for now. It's taking spinlocks and calling into firmware code, from an NMI handler. I don't want to touch that with a bargepole. Signed-off-by: David Woodhouse <dwmw@amazon.co.uk> Reviewed-by: Thomas Gleixner <tglx@linutronix.de> Cc: Linus Torvalds <torvalds@linux-foundation.org> Cc: Peter Zijlstra <peterz@infradead.org> Cc: arjan.van.de.ven@intel.com Cc: bp@alien8.de Cc: dave.hansen@intel.com Cc: jmattson@google.com Cc: karahmed@amazon.de Cc: kvm@vger.kernel.org Cc: pbonzini@redhat.com Cc: rkrcmar@redhat.com Link: http://lkml.kernel.org/r/1519037457-7643-2-git-send-email-dwmw@amazon.co.uk Signed-off-by: Ingo Molnar <mingo@kernel.org> --- arch/x86/include/asm/apm.h | 6 ++++++ arch/x86/include/asm/cpufeatures.h | 1 + arch/x86/include/asm/efi.h | 17 ++++++++++++++-- arch/x86/include/asm/nospec-branch.h | 39 +++++++++++++++++++++++++++--------- arch/x86/kernel/cpu/bugs.c | 12 ++++++++++- 5 files changed, 63 insertions(+), 12 deletions(-) (limited to 'arch/x86/kernel') diff --git a/arch/x86/include/asm/apm.h b/arch/x86/include/asm/apm.h index 4d4015ddcf26..c356098b6fb9 100644 --- a/arch/x86/include/asm/apm.h +++ b/arch/x86/include/asm/apm.h @@ -7,6 +7,8 @@ #ifndef _ASM_X86_MACH_DEFAULT_APM_H #define _ASM_X86_MACH_DEFAULT_APM_H +#include <asm/nospec-branch.h> + #ifdef APM_ZERO_SEGS # define APM_DO_ZERO_SEGS \ "pushl %%ds\n\t" \ @@ -32,6 +34,7 @@ static inline void apm_bios_call_asm(u32 func, u32 ebx_in, u32 ecx_in, * N.B. We do NOT need a cld after the BIOS call * because we always save and restore the flags. */ + firmware_restrict_branch_speculation_start(); __asm__ __volatile__(APM_DO_ZERO_SEGS "pushl %%edi\n\t" "pushl %%ebp\n\t" @@ -44,6 +47,7 @@ static inline void apm_bios_call_asm(u32 func, u32 ebx_in, u32 ecx_in, "=S" (*esi) : "a" (func), "b" (ebx_in), "c" (ecx_in) : "memory", "cc"); + firmware_restrict_branch_speculation_end(); } static inline bool apm_bios_call_simple_asm(u32 func, u32 ebx_in, @@ -56,6 +60,7 @@ static inline bool apm_bios_call_simple_asm(u32 func, u32 ebx_in, * N.B. We do NOT need a cld after the BIOS call * because we always save and restore the flags. */ + firmware_restrict_branch_speculation_start(); __asm__ __volatile__(APM_DO_ZERO_SEGS "pushl %%edi\n\t" "pushl %%ebp\n\t" @@ -68,6 +73,7 @@ static inline bool apm_bios_call_simple_asm(u32 func, u32 ebx_in, "=S" (si) : "a" (func), "b" (ebx_in), "c" (ecx_in) : "memory", "cc"); + firmware_restrict_branch_speculation_end(); return error; } diff --git a/arch/x86/include/asm/cpufeatures.h b/arch/x86/include/asm/cpufeatures.h index 0dfe4d3f74e2..f41079da38c5 100644 --- a/arch/x86/include/asm/cpufeatures.h +++ b/arch/x86/include/asm/cpufeatures.h @@ -213,6 +213,7 @@ #define X86_FEATURE_SEV ( 7*32+20) /* AMD Secure Encrypted Virtualization */ #define X86_FEATURE_USE_IBPB ( 7*32+21) /* "" Indirect Branch Prediction Barrier enabled */ +#define X86_FEATURE_USE_IBRS_FW ( 7*32+22) /* "" Use IBRS during runtime firmware calls */ /* Virtualization flags: Linux defined, word 8 */ #define X86_FEATURE_TPR_SHADOW ( 8*32+ 0) /* Intel TPR Shadow */ diff --git a/arch/x86/include/asm/efi.h b/arch/x86/include/asm/efi.h index 85f6ccb80b91..a399c1ebf6f0 100644 --- a/arch/x86/include/asm/efi.h +++ b/arch/x86/include/asm/efi.h @@ -6,6 +6,7 @@ #include <asm/pgtable.h> #include <asm/processor-flags.h> #include <asm/tlb.h> +#include <asm/nospec-branch.h> /* * We map the EFI regions needed for runtime services non-contiguously, @@ -36,8 +37,18 @@ extern asmlinkage unsigned long efi_call_phys(void *, ...); -#define arch_efi_call_virt_setup() kernel_fpu_begin() -#define arch_efi_call_virt_teardown() kernel_fpu_end() +#define arch_efi_call_virt_setup() \ +({ \ + kernel_fpu_begin(); \ + firmware_restrict_branch_speculation_start(); \ +}) + +#define arch_efi_call_virt_teardown() \ +({ \ + firmware_restrict_branch_speculation_end(); \ + kernel_fpu_end(); \ +}) + /* * Wrap all the virtual calls in a way that forces the parameters on the stack. @@ -73,6 +84,7 @@ struct efi_scratch { efi_sync_low_kernel_mappings(); \ preempt_disable(); \ __kernel_fpu_begin(); \ + firmware_restrict_branch_speculation_start(); \ \ if (efi_scratch.use_pgd) { \ efi_scratch.prev_cr3 = __read_cr3(); \ @@ -91,6 +103,7 @@ struct efi_scratch { __flush_tlb_all(); \ } \ \ + firmware_restrict_branch_speculation_end(); \ __kernel_fpu_end(); \ preempt_enable(); \ }) diff --git a/arch/x86/include/asm/nospec-branch.h b/arch/x86/include/asm/nospec-branch.h index af34b1e8069a..ec90c3228991 100644 --- a/arch/x86/include/asm/nospec-branch.h +++ b/arch/x86/include/asm/nospec-branch.h @@ -219,17 +219,38 @@ static inline void vmexit_fill_RSB(void) #endif } +#define alternative_msr_write(_msr, _val, _feature) \ + asm volatile(ALTERNATIVE("", \ + "movl %[msr], %%ecx\n\t" \ + "movl %[val], %%eax\n\t" \ + "movl $0, %%edx\n\t" \ + "wrmsr", \ + _feature) \ + : : [msr] "i" (_msr), [val] "i" (_val) \ + : "eax", "ecx", "edx", "memory") + static inline void indirect_branch_prediction_barrier(void) { - asm volatile(ALTERNATIVE("", - "movl %[msr], %%ecx\n\t" - "movl %[val], %%eax\n\t" - "movl $0, %%edx\n\t" - "wrmsr", - X86_FEATURE_USE_IBPB) - : : [msr] "i" (MSR_IA32_PRED_CMD), - [val] "i" (PRED_CMD_IBPB) - : "eax", "ecx", "edx", "memory"); + alternative_msr_write(MSR_IA32_PRED_CMD, PRED_CMD_IBPB, + X86_FEATURE_USE_IBPB); +} + +/* + * With retpoline, we must use IBRS to restrict branch prediction + * before calling into firmware. + */ +static inline void firmware_restrict_branch_speculation_start(void) +{ + preempt_disable(); + alternative_msr_write(MSR_IA32_SPEC_CTRL, SPEC_CTRL_IBRS, + X86_FEATURE_USE_IBRS_FW); +} + +static inline void firmware_restrict_branch_speculation_end(void) +{ + alternative_msr_write(MSR_IA32_SPEC_CTRL, 0, + X86_FEATURE_USE_IBRS_FW); + preempt_enable(); } #endif /* __ASSEMBLY__ */ diff --git a/arch/x86/kernel/cpu/bugs.c b/arch/x86/kernel/cpu/bugs.c index d71c8b54b696..bfca937bdcc3 100644 --- a/arch/x86/kernel/cpu/bugs.c +++ b/arch/x86/kernel/cpu/bugs.c @@ -300,6 +300,15 @@ retpoline_auto: setup_force_cpu_cap(X86_FEATURE_USE_IBPB); pr_info("Spectre v2 mitigation: Enabling Indirect Branch Prediction Barrier\n"); } + + /* + * Retpoline means the kernel is safe because it has no indirect + * branches. But firmware isn't, so use IBRS to protect that. + */ + if (boot_cpu_has(X86_FEATURE_IBRS)) { + setup_force_cpu_cap(X86_FEATURE_USE_IBRS_FW); + pr_info("Enabling Restricted Speculation for firmware calls\n"); + } } #undef pr_fmt @@ -326,8 +335,9 @@ ssize_t cpu_show_spectre_v2(struct device *dev, struct device_attribute *attr, c if (!boot_cpu_has_bug(X86_BUG_SPECTRE_V2)) return sprintf(buf, "Not affected\n"); - return sprintf(buf, "%s%s%s\n", spectre_v2_strings[spectre_v2_enabled], + return sprintf(buf, "%s%s%s%s\n", spectre_v2_strings[spectre_v2_enabled], boot_cpu_has(X86_FEATURE_USE_IBPB) ? ", IBPB" : "", + boot_cpu_has(X86_FEATURE_USE_IBRS_FW) ? ", IBRS_FW" : "", spectre_v2_module_string()); } #endif -- cgit v1.2.3-70-g09d2 From bd89004f6305cbf7352238f61da093207ee518d6 Mon Sep 17 00:00:00 2001 From: Peter Zijlstra <peterz@infradead.org> Date: Tue, 16 Jan 2018 10:38:09 +0100 Subject: x86/boot, objtool: Annotate indirect jump in secondary_startup_64() The objtool retpoline validation found this indirect jump. Seeing how it's on CPU bringup before we run userspace it should be safe, annotate it. Signed-off-by: Peter Zijlstra (Intel) <peterz@infradead.org> Reviewed-by: David Woodhouse <dwmw@amazon.co.uk> Acked-by: Thomas Gleixner <tglx@linutronix.de> Acked-by: Josh Poimboeuf <jpoimboe@redhat.com> Cc: Andy Lutomirski <luto@kernel.org> Cc: Arjan van de Ven <arjan@linux.intel.com> Cc: Borislav Petkov <bp@alien8.de> Cc: Dan Williams <dan.j.williams@intel.com> Cc: Dave Hansen <dave.hansen@linux.intel.com> Cc: David Woodhouse <dwmw2@infradead.org> Cc: Greg Kroah-Hartman <gregkh@linuxfoundation.org> Cc: Linus Torvalds <torvalds@linux-foundation.org> Cc: Peter Zijlstra <peterz@infradead.org> Signed-off-by: Ingo Molnar <mingo@kernel.org> --- arch/x86/kernel/head_64.S | 2 ++ 1 file changed, 2 insertions(+) (limited to 'arch/x86/kernel') diff --git a/arch/x86/kernel/head_64.S b/arch/x86/kernel/head_64.S index 04a625f0fcda..0f545b3cf926 100644 --- a/arch/x86/kernel/head_64.S +++ b/arch/x86/kernel/head_64.S @@ -23,6 +23,7 @@ #include <asm/nops.h> #include "../entry/calling.h" #include <asm/export.h> +#include <asm/nospec-branch.h> #ifdef CONFIG_PARAVIRT #include <asm/asm-offsets.h> @@ -134,6 +135,7 @@ ENTRY(secondary_startup_64) /* Ensure I am executing from virtual addresses */ movq $1f, %rax + ANNOTATE_RETPOLINE_SAFE jmp *%rax 1: UNWIND_HINT_EMPTY -- cgit v1.2.3-70-g09d2 From 9fbcc57aa16424ef84cb54e0d9db3221763de88a Mon Sep 17 00:00:00 2001 From: Josh Poimboeuf <jpoimboe@redhat.com> Date: Tue, 20 Feb 2018 11:37:53 -0600 Subject: extable: Make init_kernel_text() global Convert init_kernel_text() to a global function and use it in a few places instead of manually comparing _sinittext and _einittext. Note that kallsyms.h has a very similar function called is_kernel_inittext(), but its end check is inclusive. I'm not sure whether that's intentional behavior, so I didn't touch it. Suggested-by: Jason Baron <jbaron@akamai.com> Signed-off-by: Josh Poimboeuf <jpoimboe@redhat.com> Acked-by: Peter Zijlstra <peterz@infradead.org> Acked-by: Steven Rostedt (VMware) <rostedt@goodmis.org> Cc: Borislav Petkov <bp@suse.de> Cc: Linus Torvalds <torvalds@linux-foundation.org> Cc: Thomas Gleixner <tglx@linutronix.de> Link: http://lkml.kernel.org/r/4335d02be8d45ca7d265d2f174251d0b7ee6c5fd.1519051220.git.jpoimboe@redhat.com Signed-off-by: Ingo Molnar <mingo@kernel.org> --- arch/x86/kernel/unwind_orc.c | 3 +-- include/linux/kernel.h | 1 + kernel/extable.c | 2 +- kernel/jump_label.c | 4 +--- 4 files changed, 4 insertions(+), 6 deletions(-) (limited to 'arch/x86/kernel') diff --git a/arch/x86/kernel/unwind_orc.c b/arch/x86/kernel/unwind_orc.c index 1f9188f5357c..feb28fee6cea 100644 --- a/arch/x86/kernel/unwind_orc.c +++ b/arch/x86/kernel/unwind_orc.c @@ -5,7 +5,6 @@ #include <asm/unwind.h> #include <asm/orc_types.h> #include <asm/orc_lookup.h> -#include <asm/sections.h> #define orc_warn(fmt, ...) \ printk_deferred_once(KERN_WARNING pr_fmt("WARNING: " fmt), ##__VA_ARGS__) @@ -148,7 +147,7 @@ static struct orc_entry *orc_find(unsigned long ip) } /* vmlinux .init slow lookup: */ - if (ip >= (unsigned long)_sinittext && ip < (unsigned long)_einittext) + if (init_kernel_text(ip)) return __orc_find(__start_orc_unwind_ip, __start_orc_unwind, __stop_orc_unwind_ip - __start_orc_unwind_ip, ip); diff --git a/include/linux/kernel.h b/include/linux/kernel.h index ce51455e2adf..3fd291503576 100644 --- a/include/linux/kernel.h +++ b/include/linux/kernel.h @@ -472,6 +472,7 @@ extern bool parse_option_str(const char *str, const char *option); extern char *next_arg(char *args, char **param, char **val); extern int core_kernel_text(unsigned long addr); +extern int init_kernel_text(unsigned long addr); extern int core_kernel_data(unsigned long addr); extern int __kernel_text_address(unsigned long addr); extern int kernel_text_address(unsigned long addr); diff --git a/kernel/extable.c b/kernel/extable.c index a17fdb63dc3e..6a5b61ebc66c 100644 --- a/kernel/extable.c +++ b/kernel/extable.c @@ -64,7 +64,7 @@ const struct exception_table_entry *search_exception_tables(unsigned long addr) return e; } -static inline int init_kernel_text(unsigned long addr) +int init_kernel_text(unsigned long addr) { if (addr >= (unsigned long)_sinittext && addr < (unsigned long)_einittext) diff --git a/kernel/jump_label.c b/kernel/jump_label.c index b2f0b479191b..52a0a7af8640 100644 --- a/kernel/jump_label.c +++ b/kernel/jump_label.c @@ -16,7 +16,6 @@ #include <linux/jump_label_ratelimit.h> #include <linux/bug.h> #include <linux/cpu.h> -#include <asm/sections.h> #ifdef HAVE_JUMP_LABEL @@ -429,8 +428,7 @@ void __init jump_label_invalidate_init(void) struct jump_entry *iter; for (iter = iter_start; iter < iter_stop; iter++) { - if (iter->code >= (unsigned long)_sinittext && - iter->code < (unsigned long)_einittext) + if (init_kernel_text(iter->code)) iter->code = 0; } } -- cgit v1.2.3-70-g09d2 From b21ebf2fb4cde1618915a97cc773e287ff49173e Mon Sep 17 00:00:00 2001 From: "H.J. Lu" <hjl.tools@gmail.com> Date: Wed, 7 Feb 2018 14:20:09 -0800 Subject: x86: Treat R_X86_64_PLT32 as R_X86_64_PC32 On i386, there are 2 types of PLTs, PIC and non-PIC. PIE and shared objects must use PIC PLT. To use PIC PLT, you need to load _GLOBAL_OFFSET_TABLE_ into EBX first. There is no need for that on x86-64 since x86-64 uses PC-relative PLT. On x86-64, for 32-bit PC-relative branches, we can generate PLT32 relocation, instead of PC32 relocation, which can also be used as a marker for 32-bit PC-relative branches. Linker can always reduce PLT32 relocation to PC32 if function is defined locally. Local functions should use PC32 relocation. As far as Linux kernel is concerned, R_X86_64_PLT32 can be treated the same as R_X86_64_PC32 since Linux kernel doesn't use PLT. R_X86_64_PLT32 for 32-bit PC-relative branches has been enabled in binutils master branch which will become binutils 2.31. [ hjl is working on having better documentation on this all, but a few more notes from him: "PLT32 relocation is used as marker for PC-relative branches. Because of EBX, it looks odd to generate PLT32 relocation on i386 when EBX doesn't have GOT. As for symbol resolution, PLT32 and PC32 relocations are almost interchangeable. But when linker sees PLT32 relocation against a protected symbol, it can resolved locally at link-time since it is used on a branch instruction. Linker can't do that for PC32 relocation" but for the kernel use, the two are basically the same, and this commit gets things building and working with the current binutils master - Linus ] Signed-off-by: H.J. Lu <hjl.tools@gmail.com> Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org> --- arch/x86/kernel/machine_kexec_64.c | 1 + arch/x86/kernel/module.c | 1 + arch/x86/tools/relocs.c | 3 +++ 3 files changed, 5 insertions(+) (limited to 'arch/x86/kernel') diff --git a/arch/x86/kernel/machine_kexec_64.c b/arch/x86/kernel/machine_kexec_64.c index 1f790cf9d38f..3b7427aa7d85 100644 --- a/arch/x86/kernel/machine_kexec_64.c +++ b/arch/x86/kernel/machine_kexec_64.c @@ -542,6 +542,7 @@ int arch_kexec_apply_relocations_add(const Elf64_Ehdr *ehdr, goto overflow; break; case R_X86_64_PC32: + case R_X86_64_PLT32: value -= (u64)address; *(u32 *)location = value; break; diff --git a/arch/x86/kernel/module.c b/arch/x86/kernel/module.c index da0c160e5589..f58336af095c 100644 --- a/arch/x86/kernel/module.c +++ b/arch/x86/kernel/module.c @@ -191,6 +191,7 @@ int apply_relocate_add(Elf64_Shdr *sechdrs, goto overflow; break; case R_X86_64_PC32: + case R_X86_64_PLT32: if (*(u32 *)loc != 0) goto invalid_relocation; val -= (u64)loc; diff --git a/arch/x86/tools/relocs.c b/arch/x86/tools/relocs.c index 5d73c443e778..220e97841e49 100644 --- a/arch/x86/tools/relocs.c +++ b/arch/x86/tools/relocs.c @@ -770,9 +770,12 @@ static int do_reloc64(struct section *sec, Elf_Rel *rel, ElfW(Sym) *sym, break; case R_X86_64_PC32: + case R_X86_64_PLT32: /* * PC relative relocations don't need to be adjusted unless * referencing a percpu symbol. + * + * NB: R_X86_64_PLT32 can be treated as R_X86_64_PC32. */ if (is_percpu_sym(sym, symname)) add_reloc(&relocs32neg, offset); -- cgit v1.2.3-70-g09d2 From e84cf6aa501c58bf4bf451f1e425192ec090aed2 Mon Sep 17 00:00:00 2001 From: Thomas Gleixner <tglx@linutronix.de> Date: Thu, 22 Feb 2018 12:08:06 +0100 Subject: x86/apic/vector: Handle vector release on CPU unplug correctly When a irq vector is replaced, then the previous vector is normally released when the first interrupt happens on the new vector. If the target CPU of the previous vector is already offline when the new vector is installed, then the previous vector is silently discarded, which leads to accounting issues causing suspend failures and other problems. Adjust the logic so that the previous vector is freed in the underlying matrix allocator to ensure that the accounting stays correct. Fixes: 69cde0004a4b ("x86/vector: Use matrix allocator for vector assignment") Reported-by: Yuriy Vostrikov <delamonpansie@gmail.com> Signed-off-by: Thomas Gleixner <tglx@linutronix.de> Tested-by: Yuriy Vostrikov <delamonpansie@gmail.com> Cc: Peter Zijlstra <peterz@infradead.org> Cc: Randy Dunlap <rdunlap@infradead.org> Cc: stable@vger.kernel.org Link: https://lkml.kernel.org/r/20180222112316.930791749@linutronix.de Signed-off-by: Ingo Molnar <mingo@kernel.org> --- arch/x86/kernel/apic/vector.c | 25 ++++++++++++++++++++++--- 1 file changed, 22 insertions(+), 3 deletions(-) (limited to 'arch/x86/kernel') diff --git a/arch/x86/kernel/apic/vector.c b/arch/x86/kernel/apic/vector.c index 3cc471beb50b..bb6f7a2148d7 100644 --- a/arch/x86/kernel/apic/vector.c +++ b/arch/x86/kernel/apic/vector.c @@ -134,21 +134,40 @@ static void apic_update_vector(struct irq_data *irqd, unsigned int newvec, { struct apic_chip_data *apicd = apic_chip_data(irqd); struct irq_desc *desc = irq_data_to_desc(irqd); + bool managed = irqd_affinity_is_managed(irqd); lockdep_assert_held(&vector_lock); trace_vector_update(irqd->irq, newvec, newcpu, apicd->vector, apicd->cpu); - /* Setup the vector move, if required */ - if (apicd->vector && cpu_online(apicd->cpu)) { + /* + * If there is no vector associated or if the associated vector is + * the shutdown vector, which is associated to make PCI/MSI + * shutdown mode work, then there is nothing to release. Clear out + * prev_vector for this and the offlined target case. + */ + apicd->prev_vector = 0; + if (!apicd->vector || apicd->vector == MANAGED_IRQ_SHUTDOWN_VECTOR) + goto setnew; + /* + * If the target CPU of the previous vector is online, then mark + * the vector as move in progress and store it for cleanup when the + * first interrupt on the new vector arrives. If the target CPU is + * offline then the regular release mechanism via the cleanup + * vector is not possible and the vector can be immediately freed + * in the underlying matrix allocator. + */ + if (cpu_online(apicd->cpu)) { apicd->move_in_progress = true; apicd->prev_vector = apicd->vector; apicd->prev_cpu = apicd->cpu; } else { - apicd->prev_vector = 0; + irq_matrix_free(vector_matrix, apicd->cpu, apicd->vector, + managed); } +setnew: apicd->vector = newvec; apicd->cpu = newcpu; BUG_ON(!IS_ERR_OR_NULL(per_cpu(vector_irq, newcpu)[newvec])); -- cgit v1.2.3-70-g09d2 From 36e74d355297dde6e69a39c838d24710e442babe Mon Sep 17 00:00:00 2001 From: Wang Hui <john.wanghui@huawei.com> Date: Thu, 22 Feb 2018 19:26:03 -0800 Subject: x86/intel_rdt: Fix incorrect returned value when creating rdgroup sub-directory in resctrl file system MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit If no monitoring feature is detected because all monitoring features are disabled during boot time or there is no monitoring feature in hardware, creating rdtgroup sub-directory by "mkdir" command reports error: mkdir: cannot create directory ‘/sys/fs/resctrl/p1’: No such file or directory But the sub-directory actually is generated and content is correct: cpus cpus_list schemata tasks The error is because rdtgroup_mkdir_ctrl_mon() returns non zero value after the sub-directory is created and the returned value is reported as an error to user. Clear the returned value to report to user that the sub-directory is actually created successfully. Signed-off-by: Wang Hui <john.wanghui@huawei.com> Signed-off-by: Zhang Yanfei <yanfei.zhang@huawei.com> Signed-off-by: Fenghua Yu <fenghua.yu@intel.com> Cc: Linus Torvalds <torvalds@linux-foundation.org> Cc: Peter Zijlstra <peterz@infradead.org> Cc: Ravi V Shankar <ravi.v.shankar@intel.com> Cc: Thomas Gleixner <tglx@linutronix.de> Cc: Tony Luck <tony.luck@intel.com> Cc: Vikas <vikas.shivappa@intel.com> Cc: Xiaochen Shen <xiaochen.shen@intel.com> Link: http://lkml.kernel.org/r/1519356363-133085-1-git-send-email-fenghua.yu@intel.com Signed-off-by: Ingo Molnar <mingo@kernel.org> --- arch/x86/kernel/cpu/intel_rdt_rdtgroup.c | 1 + 1 file changed, 1 insertion(+) (limited to 'arch/x86/kernel') diff --git a/arch/x86/kernel/cpu/intel_rdt_rdtgroup.c b/arch/x86/kernel/cpu/intel_rdt_rdtgroup.c index bdab7d2f51af..fca759d272a1 100644 --- a/arch/x86/kernel/cpu/intel_rdt_rdtgroup.c +++ b/arch/x86/kernel/cpu/intel_rdt_rdtgroup.c @@ -1804,6 +1804,7 @@ static int rdtgroup_mkdir_ctrl_mon(struct kernfs_node *parent_kn, goto out_common_fail; } closid = ret; + ret = 0; rdtgrp->closid = closid; list_add(&rdtgrp->rdtgroup_list, &rdt_all_groups); -- cgit v1.2.3-70-g09d2 From 4596749339e06dc7a424fc08a15eded850ed78b7 Mon Sep 17 00:00:00 2001 From: Samuel Neves <sneves@dei.uc.pt> Date: Wed, 21 Feb 2018 20:50:36 +0000 Subject: x86/topology: Update the 'cpu cores' field in /proc/cpuinfo correctly across CPU hotplug operations Without this fix, /proc/cpuinfo will display an incorrect amount of CPU cores, after bringing them offline and online again, as exemplified below: $ cat /proc/cpuinfo | grep cores cpu cores : 4 cpu cores : 8 cpu cores : 8 cpu cores : 20 cpu cores : 4 cpu cores : 3 cpu cores : 2 cpu cores : 2 This patch fixes this by always zeroing the booted_cores variable upon turning off a logical CPU. Tested-by: Dou Liyang <douly.fnst@cn.fujitsu.com> Signed-off-by: Samuel Neves <sneves@dei.uc.pt> Cc: Linus Torvalds <torvalds@linux-foundation.org> Cc: Peter Zijlstra <peterz@infradead.org> Cc: Thomas Gleixner <tglx@linutronix.de> Cc: jgross@suse.com Cc: luto@kernel.org Cc: prarit@redhat.com Cc: vkuznets@redhat.com Link: http://lkml.kernel.org/r/20180221205036.5244-1-sneves@dei.uc.pt Signed-off-by: Ingo Molnar <mingo@kernel.org> --- arch/x86/kernel/smpboot.c | 1 + 1 file changed, 1 insertion(+) (limited to 'arch/x86/kernel') diff --git a/arch/x86/kernel/smpboot.c b/arch/x86/kernel/smpboot.c index 9eee25d07586..ff99e2b6fc54 100644 --- a/arch/x86/kernel/smpboot.c +++ b/arch/x86/kernel/smpboot.c @@ -1437,6 +1437,7 @@ static void remove_siblinginfo(int cpu) cpumask_clear(topology_sibling_cpumask(cpu)); cpumask_clear(topology_core_cpumask(cpu)); c->cpu_core_id = 0; + c->booted_cores = 0; cpumask_clear_cpu(cpu, cpu_sibling_setup_mask); recompute_smt_state(); } -- cgit v1.2.3-70-g09d2 From fe2a3027e74e40a3ece3a4c1e4e51403090a907a Mon Sep 17 00:00:00 2001 From: Radim Krčmář <rkrcmar@redhat.com> Date: Thu, 1 Feb 2018 22:16:21 +0100 Subject: KVM: x86: fix backward migration with async_PF MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Guests on new hypersiors might set KVM_ASYNC_PF_DELIVERY_AS_PF_VMEXIT bit when enabling async_PF, but this bit is reserved on old hypervisors, which results in a failure upon migration. To avoid breaking different cases, we are checking for CPUID feature bit before enabling the feature and nothing else. Fixes: 52a5c155cf79 ("KVM: async_pf: Let guest support delivery of async_pf from guest mode") Cc: <stable@vger.kernel.org> Reviewed-by: Wanpeng Li <wanpengli@tencent.com> Reviewed-by: David Hildenbrand <david@redhat.com> Signed-off-by: Radim Krčmář <rkrcmar@redhat.com> Signed-off-by: Paolo Bonzini <pbonzini@redhat.com> --- Documentation/virtual/kvm/cpuid.txt | 4 ++++ Documentation/virtual/kvm/msr.txt | 3 ++- arch/x86/include/uapi/asm/kvm_para.h | 1 + arch/x86/kernel/kvm.c | 8 ++++---- arch/x86/kvm/cpuid.c | 3 ++- 5 files changed, 13 insertions(+), 6 deletions(-) (limited to 'arch/x86/kernel') diff --git a/Documentation/virtual/kvm/cpuid.txt b/Documentation/virtual/kvm/cpuid.txt index dcab6dc11e3b..87a7506f31c2 100644 --- a/Documentation/virtual/kvm/cpuid.txt +++ b/Documentation/virtual/kvm/cpuid.txt @@ -58,6 +58,10 @@ KVM_FEATURE_PV_TLB_FLUSH || 9 || guest checks this feature bit || || before enabling paravirtualized || || tlb flush. ------------------------------------------------------------------------------ +KVM_FEATURE_ASYNC_PF_VMEXIT || 10 || paravirtualized async PF VM exit + || || can be enabled by setting bit 2 + || || when writing to msr 0x4b564d02 +------------------------------------------------------------------------------ KVM_FEATURE_CLOCKSOURCE_STABLE_BIT || 24 || host will warn if no guest-side || || per-cpu warps are expected in || || kvmclock. diff --git a/Documentation/virtual/kvm/msr.txt b/Documentation/virtual/kvm/msr.txt index 1ebecc115dc6..f3f0d57ced8e 100644 --- a/Documentation/virtual/kvm/msr.txt +++ b/Documentation/virtual/kvm/msr.txt @@ -170,7 +170,8 @@ MSR_KVM_ASYNC_PF_EN: 0x4b564d02 when asynchronous page faults are enabled on the vcpu 0 when disabled. Bit 1 is 1 if asynchronous page faults can be injected when vcpu is in cpl == 0. Bit 2 is 1 if asynchronous page faults - are delivered to L1 as #PF vmexits. + are delivered to L1 as #PF vmexits. Bit 2 can be set only if + KVM_FEATURE_ASYNC_PF_VMEXIT is present in CPUID. First 4 byte of 64 byte memory location will be written to by the hypervisor at the time of asynchronous page fault (APF) diff --git a/arch/x86/include/uapi/asm/kvm_para.h b/arch/x86/include/uapi/asm/kvm_para.h index 7a2ade4aa235..6cfa9c8cb7d6 100644 --- a/arch/x86/include/uapi/asm/kvm_para.h +++ b/arch/x86/include/uapi/asm/kvm_para.h @@ -26,6 +26,7 @@ #define KVM_FEATURE_PV_EOI 6 #define KVM_FEATURE_PV_UNHALT 7 #define KVM_FEATURE_PV_TLB_FLUSH 9 +#define KVM_FEATURE_ASYNC_PF_VMEXIT 10 /* The last 8 bits are used to indicate how to interpret the flags field * in pvclock structure. If no bits are set, all flags are ignored. diff --git a/arch/x86/kernel/kvm.c b/arch/x86/kernel/kvm.c index 4e37d1a851a6..971babe964d2 100644 --- a/arch/x86/kernel/kvm.c +++ b/arch/x86/kernel/kvm.c @@ -341,10 +341,10 @@ static void kvm_guest_cpu_init(void) #endif pa |= KVM_ASYNC_PF_ENABLED; - /* Async page fault support for L1 hypervisor is optional */ - if (wrmsr_safe(MSR_KVM_ASYNC_PF_EN, - (pa | KVM_ASYNC_PF_DELIVERY_AS_PF_VMEXIT) & 0xffffffff, pa >> 32) < 0) - wrmsrl(MSR_KVM_ASYNC_PF_EN, pa); + if (kvm_para_has_feature(KVM_FEATURE_ASYNC_PF_VMEXIT)) + pa |= KVM_ASYNC_PF_DELIVERY_AS_PF_VMEXIT; + + wrmsrl(MSR_KVM_ASYNC_PF_EN, pa); __this_cpu_write(apf_reason.enabled, 1); printk(KERN_INFO"KVM setup async PF for cpu %d\n", smp_processor_id()); diff --git a/arch/x86/kvm/cpuid.c b/arch/x86/kvm/cpuid.c index a0c5a69bc7c4..b671fc2d0422 100644 --- a/arch/x86/kvm/cpuid.c +++ b/arch/x86/kvm/cpuid.c @@ -607,7 +607,8 @@ static inline int __do_cpuid_ent(struct kvm_cpuid_entry2 *entry, u32 function, (1 << KVM_FEATURE_PV_EOI) | (1 << KVM_FEATURE_CLOCKSOURCE_STABLE_BIT) | (1 << KVM_FEATURE_PV_UNHALT) | - (1 << KVM_FEATURE_PV_TLB_FLUSH); + (1 << KVM_FEATURE_PV_TLB_FLUSH) | + (1 << KVM_FEATURE_ASYNC_PF_VMEXIT); if (sched_info_on()) entry->eax |= (1 << KVM_FEATURE_STEAL_TIME); -- cgit v1.2.3-70-g09d2 From afdc3f588850a6fbc996205ee2d472eb4426afb3 Mon Sep 17 00:00:00 2001 From: Dou Liyang <douly.fnst@cn.fujitsu.com> Date: Wed, 17 Jan 2018 11:46:54 +0800 Subject: x86/kvm: Make parse_no_xxx __init for kvm The early_param() is only called during kernel initialization, So Linux marks the functions of it with __init macro to save memory. But it forgot to mark the parse_no_kvmapf/stealacc/kvmclock_vsyscall, So, Make them __init as well. Cc: Paolo Bonzini <pbonzini@redhat.com> Cc: rkrcmar@redhat.com Cc: kvm@vger.kernel.org Cc: Thomas Gleixner <tglx@linutronix.de> Cc: Ingo Molnar <mingo@redhat.com> Cc: "H. Peter Anvin" <hpa@zytor.com> Cc: Juergen Gross <jgross@suse.com> Cc: x86@kernel.org Signed-off-by: Dou Liyang <douly.fnst@cn.fujitsu.com> Signed-off-by: Paolo Bonzini <pbonzini@redhat.com> --- arch/x86/kernel/kvm.c | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) (limited to 'arch/x86/kernel') diff --git a/arch/x86/kernel/kvm.c b/arch/x86/kernel/kvm.c index 971babe964d2..ee7d5c951864 100644 --- a/arch/x86/kernel/kvm.c +++ b/arch/x86/kernel/kvm.c @@ -49,7 +49,7 @@ static int kvmapf = 1; -static int parse_no_kvmapf(char *arg) +static int __init parse_no_kvmapf(char *arg) { kvmapf = 0; return 0; @@ -58,7 +58,7 @@ static int parse_no_kvmapf(char *arg) early_param("no-kvmapf", parse_no_kvmapf); static int steal_acc = 1; -static int parse_no_stealacc(char *arg) +static int __init parse_no_stealacc(char *arg) { steal_acc = 0; return 0; @@ -67,7 +67,7 @@ static int parse_no_stealacc(char *arg) early_param("no-steal-acc", parse_no_stealacc); static int kvmclock_vsyscall = 1; -static int parse_no_kvmclock_vsyscall(char *arg) +static int __init parse_no_kvmclock_vsyscall(char *arg) { kvmclock_vsyscall = 0; return 0; -- cgit v1.2.3-70-g09d2 From 4f2f61fc507176edd65826fbedc8987dea29b9d5 Mon Sep 17 00:00:00 2001 From: Wanpeng Li <wanpengli@tencent.com> Date: Sun, 4 Feb 2018 22:57:58 -0800 Subject: KVM: X86: Avoid traversing all the cpus for pv tlb flush when steal time is disabled MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Avoid traversing all the cpus for pv tlb flush when steal time is disabled since pv tlb flush depends on the field in steal time for shared data. Cc: Paolo Bonzini <pbonzini@redhat.com> Cc: Radim KrÄmář <rkrcmar@redhat.com> Signed-off-by: Wanpeng Li <wanpengli@tencent.com> Signed-off-by: Paolo Bonzini <pbonzini@redhat.com> --- arch/x86/kernel/kvm.c | 6 ++++-- 1 file changed, 4 insertions(+), 2 deletions(-) (limited to 'arch/x86/kernel') diff --git a/arch/x86/kernel/kvm.c b/arch/x86/kernel/kvm.c index ee7d5c951864..bc1a27280c4b 100644 --- a/arch/x86/kernel/kvm.c +++ b/arch/x86/kernel/kvm.c @@ -545,7 +545,8 @@ static void __init kvm_guest_init(void) pv_time_ops.steal_clock = kvm_steal_clock; } - if (kvm_para_has_feature(KVM_FEATURE_PV_TLB_FLUSH)) + if (kvm_para_has_feature(KVM_FEATURE_PV_TLB_FLUSH) && + !kvm_para_has_feature(KVM_FEATURE_STEAL_TIME)) pv_mmu_ops.flush_tlb_others = kvm_flush_tlb_others; if (kvm_para_has_feature(KVM_FEATURE_PV_EOI)) @@ -633,7 +634,8 @@ static __init int kvm_setup_pv_tlb_flush(void) { int cpu; - if (kvm_para_has_feature(KVM_FEATURE_PV_TLB_FLUSH)) { + if (kvm_para_has_feature(KVM_FEATURE_PV_TLB_FLUSH) && + !kvm_para_has_feature(KVM_FEATURE_STEAL_TIME)) { for_each_possible_cpu(cpu) { zalloc_cpumask_var_node(per_cpu_ptr(&__pv_tlb_mask, cpu), GFP_KERNEL, cpu_to_node(cpu)); -- cgit v1.2.3-70-g09d2 From 945fd17ab6bab8a4d05da6c3170519fbcfe62ddb Mon Sep 17 00:00:00 2001 From: Thomas Gleixner <tglx@linutronix.de> Date: Wed, 28 Feb 2018 21:14:26 +0100 Subject: x86/cpu_entry_area: Sync cpu_entry_area to initial_page_table The separation of the cpu_entry_area from the fixmap missed the fact that on 32bit non-PAE kernels the cpu_entry_area mapping might not be covered in initial_page_table by the previous synchronizations. This results in suspend/resume failures because 32bit utilizes initial page table for resume. The absence of the cpu_entry_area mapping results in a triple fault, aka. insta reboot. With PAE enabled this works by chance because the PGD entry which covers the fixmap and other parts incindentally provides the cpu_entry_area mapping as well. Synchronize the initial page table after setting up the cpu entry area. Instead of adding yet another copy of the same code, move it to a function and invoke it from the various places. It needs to be investigated if the existing calls in setup_arch() and setup_per_cpu_areas() can be replaced by the later invocation from setup_cpu_entry_areas(), but that's beyond the scope of this fix. Fixes: 92a0f81d8957 ("x86/cpu_entry_area: Move it out of the fixmap") Reported-by: Woody Suwalski <terraluna977@gmail.com> Signed-off-by: Thomas Gleixner <tglx@linutronix.de> Tested-by: Woody Suwalski <terraluna977@gmail.com> Cc: William Grant <william.grant@canonical.com> Cc: stable@vger.kernel.org Link: https://lkml.kernel.org/r/alpine.DEB.2.21.1802282137290.1392@nanos.tec.linutronix.de --- arch/x86/include/asm/pgtable_32.h | 1 + arch/x86/include/asm/pgtable_64.h | 1 + arch/x86/kernel/setup.c | 17 +++++------------ arch/x86/kernel/setup_percpu.c | 17 ++++------------- arch/x86/mm/cpu_entry_area.c | 6 ++++++ arch/x86/mm/init_32.c | 15 +++++++++++++++ 6 files changed, 32 insertions(+), 25 deletions(-) (limited to 'arch/x86/kernel') diff --git a/arch/x86/include/asm/pgtable_32.h b/arch/x86/include/asm/pgtable_32.h index e55466760ff8..b3ec519e3982 100644 --- a/arch/x86/include/asm/pgtable_32.h +++ b/arch/x86/include/asm/pgtable_32.h @@ -32,6 +32,7 @@ extern pmd_t initial_pg_pmd[]; static inline void pgtable_cache_init(void) { } static inline void check_pgt_cache(void) { } void paging_init(void); +void sync_initial_page_table(void); /* * Define this if things work differently on an i386 and an i486: diff --git a/arch/x86/include/asm/pgtable_64.h b/arch/x86/include/asm/pgtable_64.h index 81462e9a34f6..1149d2112b2e 100644 --- a/arch/x86/include/asm/pgtable_64.h +++ b/arch/x86/include/asm/pgtable_64.h @@ -28,6 +28,7 @@ extern pgd_t init_top_pgt[]; #define swapper_pg_dir init_top_pgt extern void paging_init(void); +static inline void sync_initial_page_table(void) { } #define pte_ERROR(e) \ pr_err("%s:%d: bad pte %p(%016lx)\n", \ diff --git a/arch/x86/kernel/setup.c b/arch/x86/kernel/setup.c index 1ae67e982af7..4c616be28506 100644 --- a/arch/x86/kernel/setup.c +++ b/arch/x86/kernel/setup.c @@ -1204,20 +1204,13 @@ void __init setup_arch(char **cmdline_p) kasan_init(); -#ifdef CONFIG_X86_32 - /* sync back kernel address range */ - clone_pgd_range(initial_page_table + KERNEL_PGD_BOUNDARY, - swapper_pg_dir + KERNEL_PGD_BOUNDARY, - KERNEL_PGD_PTRS); - /* - * sync back low identity map too. It is used for example - * in the 32-bit EFI stub. + * Sync back kernel address range. + * + * FIXME: Can the later sync in setup_cpu_entry_areas() replace + * this call? */ - clone_pgd_range(initial_page_table, - swapper_pg_dir + KERNEL_PGD_BOUNDARY, - min(KERNEL_PGD_PTRS, KERNEL_PGD_BOUNDARY)); -#endif + sync_initial_page_table(); tboot_probe(); diff --git a/arch/x86/kernel/setup_percpu.c b/arch/x86/kernel/setup_percpu.c index 497aa766fab3..ea554f812ee1 100644 --- a/arch/x86/kernel/setup_percpu.c +++ b/arch/x86/kernel/setup_percpu.c @@ -287,24 +287,15 @@ void __init setup_per_cpu_areas(void) /* Setup cpu initialized, callin, callout masks */ setup_cpu_local_masks(); -#ifdef CONFIG_X86_32 /* * Sync back kernel address range again. We already did this in * setup_arch(), but percpu data also needs to be available in * the smpboot asm. We can't reliably pick up percpu mappings * using vmalloc_fault(), because exception dispatch needs * percpu data. + * + * FIXME: Can the later sync in setup_cpu_entry_areas() replace + * this call? */ - clone_pgd_range(initial_page_table + KERNEL_PGD_BOUNDARY, - swapper_pg_dir + KERNEL_PGD_BOUNDARY, - KERNEL_PGD_PTRS); - - /* - * sync back low identity map too. It is used for example - * in the 32-bit EFI stub. - */ - clone_pgd_range(initial_page_table, - swapper_pg_dir + KERNEL_PGD_BOUNDARY, - min(KERNEL_PGD_PTRS, KERNEL_PGD_BOUNDARY)); -#endif + sync_initial_page_table(); } diff --git a/arch/x86/mm/cpu_entry_area.c b/arch/x86/mm/cpu_entry_area.c index b9283cc27622..476d810639a8 100644 --- a/arch/x86/mm/cpu_entry_area.c +++ b/arch/x86/mm/cpu_entry_area.c @@ -163,4 +163,10 @@ void __init setup_cpu_entry_areas(void) for_each_possible_cpu(cpu) setup_cpu_entry_area(cpu); + + /* + * This is the last essential update to swapper_pgdir which needs + * to be synchronized to initial_page_table on 32bit. + */ + sync_initial_page_table(); } diff --git a/arch/x86/mm/init_32.c b/arch/x86/mm/init_32.c index 79cb066f40c0..396e1f0151ac 100644 --- a/arch/x86/mm/init_32.c +++ b/arch/x86/mm/init_32.c @@ -453,6 +453,21 @@ static inline void permanent_kmaps_init(pgd_t *pgd_base) } #endif /* CONFIG_HIGHMEM */ +void __init sync_initial_page_table(void) +{ + clone_pgd_range(initial_page_table + KERNEL_PGD_BOUNDARY, + swapper_pg_dir + KERNEL_PGD_BOUNDARY, + KERNEL_PGD_PTRS); + + /* + * sync back low identity map too. It is used for example + * in the 32-bit EFI stub. + */ + clone_pgd_range(initial_page_table, + swapper_pg_dir + KERNEL_PGD_BOUNDARY, + min(KERNEL_PGD_PTRS, KERNEL_PGD_BOUNDARY)); +} + void __init native_pagetable_init(void) { unsigned long pfn, va; -- cgit v1.2.3-70-g09d2 From f6a015498dcaee72f80283cb7873d88deb07129c Mon Sep 17 00:00:00 2001 From: "Eric W. Biederman" <ebiederm@xmission.com> Date: Tue, 6 Mar 2018 00:29:17 -0600 Subject: signal/x86: Include the field offsets in the build time checks Due to an oversight when refactoring siginfo_t si_pkey has been in the wrong position since 4.16-rc1. Add an explicit check of the offset of every user space field in siginfo_t and compat_siginfo_t to make a mistake like this hard to make in the future. I have run this code on 4.15 and 4.16-rc1 with the position of si_pkey fixed and all of the fields show up in the same location. Signed-off-by: "Eric W. Biederman" <ebiederm@xmission.com> --- arch/x86/kernel/signal_compat.c | 65 +++++++++++++++++++++++++++++++++++++++++ 1 file changed, 65 insertions(+) (limited to 'arch/x86/kernel') diff --git a/arch/x86/kernel/signal_compat.c b/arch/x86/kernel/signal_compat.c index ac057f9b0763..0d930d8987cc 100644 --- a/arch/x86/kernel/signal_compat.c +++ b/arch/x86/kernel/signal_compat.c @@ -43,6 +43,13 @@ static inline void signal_compat_build_tests(void) BUILD_BUG_ON(offsetof(compat_siginfo_t, _sifields) != 3 * sizeof(int)); #define CHECK_CSI_OFFSET(name) BUILD_BUG_ON(_sifields_offset != offsetof(compat_siginfo_t, _sifields.name)) + BUILD_BUG_ON(offsetof(siginfo_t, si_signo) != 0); + BUILD_BUG_ON(offsetof(siginfo_t, si_errno) != 4); + BUILD_BUG_ON(offsetof(siginfo_t, si_code) != 8); + + BUILD_BUG_ON(offsetof(compat_siginfo_t, si_signo) != 0); + BUILD_BUG_ON(offsetof(compat_siginfo_t, si_errno) != 4); + BUILD_BUG_ON(offsetof(compat_siginfo_t, si_code) != 8); /* * Ensure that the size of each si_field never changes. * If it does, it is a sign that the @@ -63,36 +70,94 @@ static inline void signal_compat_build_tests(void) CHECK_CSI_SIZE (_kill, 2*sizeof(int)); CHECK_SI_SIZE (_kill, 2*sizeof(int)); + BUILD_BUG_ON(offsetof(siginfo_t, si_pid) != 0x10); + BUILD_BUG_ON(offsetof(siginfo_t, si_uid) != 0x14); + BUILD_BUG_ON(offsetof(compat_siginfo_t, si_pid) != 0xC); + BUILD_BUG_ON(offsetof(compat_siginfo_t, si_uid) != 0x10); + CHECK_CSI_OFFSET(_timer); CHECK_CSI_SIZE (_timer, 3*sizeof(int)); CHECK_SI_SIZE (_timer, 6*sizeof(int)); + BUILD_BUG_ON(offsetof(siginfo_t, si_tid) != 0x10); + BUILD_BUG_ON(offsetof(siginfo_t, si_overrun) != 0x14); + BUILD_BUG_ON(offsetof(siginfo_t, si_value) != 0x18); + BUILD_BUG_ON(offsetof(compat_siginfo_t, si_tid) != 0x0C); + BUILD_BUG_ON(offsetof(compat_siginfo_t, si_overrun) != 0x10); + BUILD_BUG_ON(offsetof(compat_siginfo_t, si_value) != 0x14); + CHECK_CSI_OFFSET(_rt); CHECK_CSI_SIZE (_rt, 3*sizeof(int)); CHECK_SI_SIZE (_rt, 4*sizeof(int)); + BUILD_BUG_ON(offsetof(siginfo_t, si_pid) != 0x10); + BUILD_BUG_ON(offsetof(siginfo_t, si_uid) != 0x14); + BUILD_BUG_ON(offsetof(siginfo_t, si_value) != 0x18); + BUILD_BUG_ON(offsetof(compat_siginfo_t, si_pid) != 0x0C); + BUILD_BUG_ON(offsetof(compat_siginfo_t, si_uid) != 0x10); + BUILD_BUG_ON(offsetof(compat_siginfo_t, si_value) != 0x14); + CHECK_CSI_OFFSET(_sigchld); CHECK_CSI_SIZE (_sigchld, 5*sizeof(int)); CHECK_SI_SIZE (_sigchld, 8*sizeof(int)); + BUILD_BUG_ON(offsetof(siginfo_t, si_pid) != 0x10); + BUILD_BUG_ON(offsetof(siginfo_t, si_uid) != 0x14); + BUILD_BUG_ON(offsetof(siginfo_t, si_status) != 0x18); + BUILD_BUG_ON(offsetof(siginfo_t, si_utime) != 0x20); + BUILD_BUG_ON(offsetof(siginfo_t, si_stime) != 0x28); + BUILD_BUG_ON(offsetof(compat_siginfo_t, si_pid) != 0x0C); + BUILD_BUG_ON(offsetof(compat_siginfo_t, si_uid) != 0x10); + BUILD_BUG_ON(offsetof(compat_siginfo_t, si_status) != 0x14); + BUILD_BUG_ON(offsetof(compat_siginfo_t, si_utime) != 0x18); + BUILD_BUG_ON(offsetof(compat_siginfo_t, si_stime) != 0x1C); + #ifdef CONFIG_X86_X32_ABI CHECK_CSI_OFFSET(_sigchld_x32); CHECK_CSI_SIZE (_sigchld_x32, 7*sizeof(int)); /* no _sigchld_x32 in the generic siginfo_t */ + BUILD_BUG_ON(offsetof(compat_siginfo_t, _sifields._sigchld_x32._utime) != 0x18); + BUILD_BUG_ON(offsetof(compat_siginfo_t, _sifields._sigchld_x32._stime) != 0x20); #endif CHECK_CSI_OFFSET(_sigfault); CHECK_CSI_SIZE (_sigfault, 4*sizeof(int)); CHECK_SI_SIZE (_sigfault, 8*sizeof(int)); + BUILD_BUG_ON(offsetof(siginfo_t, si_addr) != 0x10); + BUILD_BUG_ON(offsetof(compat_siginfo_t, si_addr) != 0x0C); + + BUILD_BUG_ON(offsetof(siginfo_t, si_addr_lsb) != 0x18); + BUILD_BUG_ON(offsetof(compat_siginfo_t, si_addr_lsb) != 0x10); + + BUILD_BUG_ON(offsetof(siginfo_t, si_lower) != 0x20); + BUILD_BUG_ON(offsetof(siginfo_t, si_upper) != 0x28); + BUILD_BUG_ON(offsetof(compat_siginfo_t, si_lower) != 0x14); + BUILD_BUG_ON(offsetof(compat_siginfo_t, si_upper) != 0x18); + + BUILD_BUG_ON(offsetof(siginfo_t, si_pkey) != 0x20); + BUILD_BUG_ON(offsetof(compat_siginfo_t, si_pkey) != 0x14); + CHECK_CSI_OFFSET(_sigpoll); CHECK_CSI_SIZE (_sigpoll, 2*sizeof(int)); CHECK_SI_SIZE (_sigpoll, 4*sizeof(int)); + BUILD_BUG_ON(offsetof(siginfo_t, si_band) != 0x10); + BUILD_BUG_ON(offsetof(siginfo_t, si_fd) != 0x18); + BUILD_BUG_ON(offsetof(compat_siginfo_t, si_band) != 0x0C); + BUILD_BUG_ON(offsetof(compat_siginfo_t, si_fd) != 0x10); + CHECK_CSI_OFFSET(_sigsys); CHECK_CSI_SIZE (_sigsys, 3*sizeof(int)); CHECK_SI_SIZE (_sigsys, 4*sizeof(int)); + BUILD_BUG_ON(offsetof(siginfo_t, si_call_addr) != 0x10); + BUILD_BUG_ON(offsetof(siginfo_t, si_syscall) != 0x18); + BUILD_BUG_ON(offsetof(siginfo_t, si_arch) != 0x1C); + BUILD_BUG_ON(offsetof(compat_siginfo_t, si_call_addr) != 0x0C); + BUILD_BUG_ON(offsetof(compat_siginfo_t, si_syscall) != 0x10); + BUILD_BUG_ON(offsetof(compat_siginfo_t, si_arch) != 0x14); + /* any new si_fields should be added here */ } -- cgit v1.2.3-70-g09d2 From 7c2178c1ff482679fb0ca0b628f720a888814548 Mon Sep 17 00:00:00 2001 From: Dominik Brodowski <linux@dominikbrodowski.net> Date: Tue, 6 Mar 2018 22:18:05 +0100 Subject: x86/syscalls: Use proper syscall definition for sys_ioperm() Using SYSCALL_DEFINEx() is recommended, so use it also here. Signed-off-by: Dominik Brodowski <linux@dominikbrodowski.net> Acked-by: Linus Torvalds <torvalds@linux-foundation.org> Acked-by: Andy Lutomirski <luto@kernel.org> Cc: Borislav Petkov <bp@alien8.de> Cc: Brian Gerst <brgerst@gmail.com> Cc: Denys Vlasenko <dvlasenk@redhat.com> Cc: H. Peter Anvin <hpa@zytor.com> Cc: Josh Poimboeuf <jpoimboe@redhat.com> Cc: Peter Zijlstra <peterz@infradead.org> Cc: Thomas Gleixner <tglx@linutronix.de> Cc: luto@amacapital.net Cc: viro@zeniv.linux.org.uk Signed-off-by: Ingo Molnar <mingo@kernel.org> --- arch/x86/kernel/ioport.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'arch/x86/kernel') diff --git a/arch/x86/kernel/ioport.c b/arch/x86/kernel/ioport.c index 2f723301eb58..38deafebb21b 100644 --- a/arch/x86/kernel/ioport.c +++ b/arch/x86/kernel/ioport.c @@ -23,7 +23,7 @@ /* * this changes the io permissions bitmap in the current task. */ -asmlinkage long sys_ioperm(unsigned long from, unsigned long num, int turn_on) +SYSCALL_DEFINE3(ioperm, unsigned long, from, unsigned long, num, int, turn_on) { struct thread_struct *t = ¤t->thread; struct tss_struct *tss; -- cgit v1.2.3-70-g09d2 From 36268223c1e9981d6cfc33aff8520b3bde4b8114 Mon Sep 17 00:00:00 2001 From: Konrad Rzeszutek Wilk <konrad.wilk@oracle.com> Date: Mon, 26 Feb 2018 09:35:01 -0500 Subject: x86/spectre_v2: Don't check microcode versions when running under hypervisors MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit As: 1) It's known that hypervisors lie about the environment anyhow (host mismatch) 2) Even if the hypervisor (Xen, KVM, VMWare, etc) provided a valid "correct" value, it all gets to be very murky when migration happens (do you provide the "new" microcode of the machine?). And in reality the cloud vendors are the ones that should make sure that the microcode that is running is correct and we should just sing lalalala and trust them. Signed-off-by: Konrad Rzeszutek Wilk <konrad.wilk@oracle.com> Signed-off-by: Thomas Gleixner <tglx@linutronix.de> Reviewed-by: Paolo Bonzini <pbonzini@redhat.com> Cc: Wanpeng Li <kernellwp@gmail.com> Cc: kvm <kvm@vger.kernel.org> Cc: Krčmář <rkrcmar@redhat.com> Cc: Borislav Petkov <bp@alien8.de> CC: "H. Peter Anvin" <hpa@zytor.com> CC: stable@vger.kernel.org Link: https://lkml.kernel.org/r/20180226213019.GE9497@char.us.oracle.com --- arch/x86/kernel/cpu/intel.c | 7 +++++++ 1 file changed, 7 insertions(+) (limited to 'arch/x86/kernel') diff --git a/arch/x86/kernel/cpu/intel.c b/arch/x86/kernel/cpu/intel.c index d19e903214b4..4aa9fd379390 100644 --- a/arch/x86/kernel/cpu/intel.c +++ b/arch/x86/kernel/cpu/intel.c @@ -144,6 +144,13 @@ static bool bad_spectre_microcode(struct cpuinfo_x86 *c) { int i; + /* + * We know that the hypervisor lie to us on the microcode version so + * we may as well hope that it is running the correct version. + */ + if (cpu_has(c, X86_FEATURE_HYPERVISOR)) + return false; + for (i = 0; i < ARRAY_SIZE(spectre_bad_microcodes); i++) { if (c->x86_model == spectre_bad_microcodes[i].model && c->x86_stepping == spectre_bad_microcodes[i].stepping) -- cgit v1.2.3-70-g09d2 From 854857f5944c59a881ff607b37ed9ed41d031a3b Mon Sep 17 00:00:00 2001 From: Borislav Petkov <bp@suse.de> Date: Wed, 28 Feb 2018 11:28:40 +0100 Subject: x86/microcode: Get rid of struct apply_microcode_ctx It is a useless remnant from earlier times. Use the ucode_state enum directly. No functional change. Signed-off-by: Borislav Petkov <bp@suse.de> Signed-off-by: Thomas Gleixner <tglx@linutronix.de> Tested-by: Tom Lendacky <thomas.lendacky@amd.com> Tested-by: Ashok Raj <ashok.raj@intel.com> Cc: Arjan Van De Ven <arjan.van.de.ven@intel.com> Link: https://lkml.kernel.org/r/20180228102846.13447-2-bp@alien8.de --- arch/x86/kernel/cpu/microcode/core.c | 19 ++++++++----------- 1 file changed, 8 insertions(+), 11 deletions(-) (limited to 'arch/x86/kernel') diff --git a/arch/x86/kernel/cpu/microcode/core.c b/arch/x86/kernel/cpu/microcode/core.c index aa1b9a422f2b..63370651e376 100644 --- a/arch/x86/kernel/cpu/microcode/core.c +++ b/arch/x86/kernel/cpu/microcode/core.c @@ -373,26 +373,23 @@ static int collect_cpu_info(int cpu) return ret; } -struct apply_microcode_ctx { - enum ucode_state err; -}; - static void apply_microcode_local(void *arg) { - struct apply_microcode_ctx *ctx = arg; + enum ucode_state *err = arg; - ctx->err = microcode_ops->apply_microcode(smp_processor_id()); + *err = microcode_ops->apply_microcode(smp_processor_id()); } static int apply_microcode_on_target(int cpu) { - struct apply_microcode_ctx ctx = { .err = 0 }; + enum ucode_state err; int ret; - ret = smp_call_function_single(cpu, apply_microcode_local, &ctx, 1); - if (!ret) - ret = ctx.err; - + ret = smp_call_function_single(cpu, apply_microcode_local, &err, 1); + if (!ret) { + if (err == UCODE_ERROR) + ret = 1; + } return ret; } -- cgit v1.2.3-70-g09d2 From c182d2b7d0ca48e0d6ff16f7d883161238c447ed Mon Sep 17 00:00:00 2001 From: Ashok Raj <ashok.raj@intel.com> Date: Wed, 28 Feb 2018 11:28:41 +0100 Subject: x86/microcode/intel: Check microcode revision before updating sibling threads After updating microcode on one of the threads of a core, the other thread sibling automatically gets the update since the microcode resources on a hyperthreaded core are shared between the two threads. Check the microcode revision on the CPU before performing a microcode update and thus save us the WRMSR 0x79 because it is a particularly expensive operation. [ Borislav: Massage changelog and coding style. ] Signed-off-by: Ashok Raj <ashok.raj@intel.com> Signed-off-by: Borislav Petkov <bp@suse.de> Signed-off-by: Thomas Gleixner <tglx@linutronix.de> Tested-by: Tom Lendacky <thomas.lendacky@amd.com> Tested-by: Ashok Raj <ashok.raj@intel.com> Cc: Arjan Van De Ven <arjan.van.de.ven@intel.com> Link: http://lkml.kernel.org/r/1519352533-15992-2-git-send-email-ashok.raj@intel.com Link: https://lkml.kernel.org/r/20180228102846.13447-3-bp@alien8.de --- arch/x86/kernel/cpu/microcode/intel.c | 27 ++++++++++++++++++++++++--- 1 file changed, 24 insertions(+), 3 deletions(-) (limited to 'arch/x86/kernel') diff --git a/arch/x86/kernel/cpu/microcode/intel.c b/arch/x86/kernel/cpu/microcode/intel.c index 923054a6b760..87bd6dc94081 100644 --- a/arch/x86/kernel/cpu/microcode/intel.c +++ b/arch/x86/kernel/cpu/microcode/intel.c @@ -589,6 +589,17 @@ static int apply_microcode_early(struct ucode_cpu_info *uci, bool early) if (!mc) return 0; + /* + * Save us the MSR write below - which is a particular expensive + * operation - when the other hyperthread has updated the microcode + * already. + */ + rev = intel_get_microcode_revision(); + if (rev >= mc->hdr.rev) { + uci->cpu_sig.rev = rev; + return UCODE_OK; + } + /* write microcode via MSR 0x79 */ native_wrmsrl(MSR_IA32_UCODE_WRITE, (unsigned long)mc->bits); @@ -776,7 +787,7 @@ static enum ucode_state apply_microcode_intel(int cpu) { struct microcode_intel *mc; struct ucode_cpu_info *uci; - struct cpuinfo_x86 *c; + struct cpuinfo_x86 *c = &cpu_data(cpu); static int prev_rev; u32 rev; @@ -793,6 +804,18 @@ static enum ucode_state apply_microcode_intel(int cpu) return UCODE_NFOUND; } + /* + * Save us the MSR write below - which is a particular expensive + * operation - when the other hyperthread has updated the microcode + * already. + */ + rev = intel_get_microcode_revision(); + if (rev >= mc->hdr.rev) { + uci->cpu_sig.rev = rev; + c->microcode = rev; + return UCODE_OK; + } + /* write microcode via MSR 0x79 */ wrmsrl(MSR_IA32_UCODE_WRITE, (unsigned long)mc->bits); @@ -813,8 +836,6 @@ static enum ucode_state apply_microcode_intel(int cpu) prev_rev = rev; } - c = &cpu_data(cpu); - uci->cpu_sig.rev = rev; c->microcode = rev; -- cgit v1.2.3-70-g09d2 From 91df9fdf51492aec9fed6b4cbd33160886740f47 Mon Sep 17 00:00:00 2001 From: Ashok Raj <ashok.raj@intel.com> Date: Wed, 28 Feb 2018 11:28:42 +0100 Subject: x86/microcode/intel: Writeback and invalidate caches before updating microcode Updating microcode is less error prone when caches have been flushed and depending on what exactly the microcode is updating. For example, some of the issues around certain Broadwell parts can be addressed by doing a full cache flush. [ Borislav: Massage it and use native_wbinvd() in both cases. ] Signed-off-by: Ashok Raj <ashok.raj@intel.com> Signed-off-by: Borislav Petkov <bp@suse.de> Signed-off-by: Thomas Gleixner <tglx@linutronix.de> Tested-by: Tom Lendacky <thomas.lendacky@amd.com> Tested-by: Ashok Raj <ashok.raj@intel.com> Cc: Arjan Van De Ven <arjan.van.de.ven@intel.com> Link: http://lkml.kernel.org/r/1519352533-15992-3-git-send-email-ashok.raj@intel.com Link: https://lkml.kernel.org/r/20180228102846.13447-4-bp@alien8.de --- arch/x86/kernel/cpu/microcode/intel.c | 12 ++++++++++++ 1 file changed, 12 insertions(+) (limited to 'arch/x86/kernel') diff --git a/arch/x86/kernel/cpu/microcode/intel.c b/arch/x86/kernel/cpu/microcode/intel.c index 87bd6dc94081..e2864bc2d575 100644 --- a/arch/x86/kernel/cpu/microcode/intel.c +++ b/arch/x86/kernel/cpu/microcode/intel.c @@ -600,6 +600,12 @@ static int apply_microcode_early(struct ucode_cpu_info *uci, bool early) return UCODE_OK; } + /* + * Writeback and invalidate caches before updating microcode to avoid + * internal issues depending on what the microcode is updating. + */ + native_wbinvd(); + /* write microcode via MSR 0x79 */ native_wrmsrl(MSR_IA32_UCODE_WRITE, (unsigned long)mc->bits); @@ -816,6 +822,12 @@ static enum ucode_state apply_microcode_intel(int cpu) return UCODE_OK; } + /* + * Writeback and invalidate caches before updating microcode to avoid + * internal issues depending on what the microcode is updating. + */ + native_wbinvd(); + /* write microcode via MSR 0x79 */ wrmsrl(MSR_IA32_UCODE_WRITE, (unsigned long)mc->bits); -- cgit v1.2.3-70-g09d2 From 30ec26da9967d0d785abc24073129a34c3211777 Mon Sep 17 00:00:00 2001 From: Ashok Raj <ashok.raj@intel.com> Date: Wed, 28 Feb 2018 11:28:43 +0100 Subject: x86/microcode: Do not upload microcode if CPUs are offline Avoid loading microcode if any of the CPUs are offline, and issue a warning. Having different microcode revisions on the system at any time is outright dangerous. [ Borislav: Massage changelog. ] Signed-off-by: Ashok Raj <ashok.raj@intel.com> Signed-off-by: Borislav Petkov <bp@suse.de> Signed-off-by: Thomas Gleixner <tglx@linutronix.de> Tested-by: Tom Lendacky <thomas.lendacky@amd.com> Tested-by: Ashok Raj <ashok.raj@intel.com> Reviewed-by: Tom Lendacky <thomas.lendacky@amd.com> Cc: Arjan Van De Ven <arjan.van.de.ven@intel.com> Link: http://lkml.kernel.org/r/1519352533-15992-4-git-send-email-ashok.raj@intel.com Link: https://lkml.kernel.org/r/20180228102846.13447-5-bp@alien8.de --- arch/x86/kernel/cpu/microcode/core.c | 18 ++++++++++++++++++ 1 file changed, 18 insertions(+) (limited to 'arch/x86/kernel') diff --git a/arch/x86/kernel/cpu/microcode/core.c b/arch/x86/kernel/cpu/microcode/core.c index 63370651e376..fa32cb3dcca5 100644 --- a/arch/x86/kernel/cpu/microcode/core.c +++ b/arch/x86/kernel/cpu/microcode/core.c @@ -486,6 +486,16 @@ static void __exit microcode_dev_exit(void) /* fake device for request_firmware */ static struct platform_device *microcode_pdev; +static int check_online_cpus(void) +{ + if (num_online_cpus() == num_present_cpus()) + return 0; + + pr_err("Not all CPUs online, aborting microcode update.\n"); + + return -EINVAL; +} + static enum ucode_state reload_for_cpu(int cpu) { struct ucode_cpu_info *uci = ucode_cpu_info + cpu; @@ -519,7 +529,13 @@ static ssize_t reload_store(struct device *dev, return size; get_online_cpus(); + + ret = check_online_cpus(); + if (ret) + goto put; + mutex_lock(µcode_mutex); + for_each_online_cpu(cpu) { tmp_ret = reload_for_cpu(cpu); if (tmp_ret > UCODE_NFOUND) { @@ -538,6 +554,8 @@ static ssize_t reload_store(struct device *dev, microcode_check(); mutex_unlock(µcode_mutex); + +put: put_online_cpus(); if (!ret) -- cgit v1.2.3-70-g09d2 From d8c3b52c00a05036e0a6b315b4b17921a7b67997 Mon Sep 17 00:00:00 2001 From: Borislav Petkov <bp@suse.de> Date: Wed, 28 Feb 2018 11:28:44 +0100 Subject: x86/microcode/intel: Look into the patch cache first The cache might contain a newer patch - look in there first. A follow-on change will make sure newest patches are loaded into the cache of microcode patches. Signed-off-by: Borislav Petkov <bp@suse.de> Signed-off-by: Thomas Gleixner <tglx@linutronix.de> Tested-by: Tom Lendacky <thomas.lendacky@amd.com> Tested-by: Ashok Raj <ashok.raj@intel.com> Cc: Arjan Van De Ven <arjan.van.de.ven@intel.com> Link: https://lkml.kernel.org/r/20180228102846.13447-6-bp@alien8.de --- arch/x86/kernel/cpu/microcode/intel.c | 11 +++++------ 1 file changed, 5 insertions(+), 6 deletions(-) (limited to 'arch/x86/kernel') diff --git a/arch/x86/kernel/cpu/microcode/intel.c b/arch/x86/kernel/cpu/microcode/intel.c index e2864bc2d575..2aded9db1d42 100644 --- a/arch/x86/kernel/cpu/microcode/intel.c +++ b/arch/x86/kernel/cpu/microcode/intel.c @@ -791,9 +791,9 @@ static int collect_cpu_info(int cpu_num, struct cpu_signature *csig) static enum ucode_state apply_microcode_intel(int cpu) { - struct microcode_intel *mc; - struct ucode_cpu_info *uci; + struct ucode_cpu_info *uci = ucode_cpu_info + cpu; struct cpuinfo_x86 *c = &cpu_data(cpu); + struct microcode_intel *mc; static int prev_rev; u32 rev; @@ -801,11 +801,10 @@ static enum ucode_state apply_microcode_intel(int cpu) if (WARN_ON(raw_smp_processor_id() != cpu)) return UCODE_ERROR; - uci = ucode_cpu_info + cpu; - mc = uci->mc; + /* Look for a newer patch in our cache: */ + mc = find_patch(uci); if (!mc) { - /* Look for a newer patch in our cache: */ - mc = find_patch(uci); + mc = uci->mc; if (!mc) return UCODE_NFOUND; } -- cgit v1.2.3-70-g09d2 From cfb52a5a09c8ae3a1dafb44ce549fde5b69e8117 Mon Sep 17 00:00:00 2001 From: Borislav Petkov <bp@suse.de> Date: Wed, 28 Feb 2018 11:28:45 +0100 Subject: x86/microcode: Request microcode on the BSP ... so that any newer version can land in the cache and can later be fished out by the application functions. Do that before grabbing the hotplug lock. Signed-off-by: Borislav Petkov <bp@suse.de> Signed-off-by: Thomas Gleixner <tglx@linutronix.de> Tested-by: Tom Lendacky <thomas.lendacky@amd.com> Tested-by: Ashok Raj <ashok.raj@intel.com> Reviewed-by: Tom Lendacky <thomas.lendacky@amd.com> Cc: Arjan Van De Ven <arjan.van.de.ven@intel.com> Link: https://lkml.kernel.org/r/20180228102846.13447-7-bp@alien8.de --- arch/x86/kernel/cpu/microcode/core.c | 11 +++++------ 1 file changed, 5 insertions(+), 6 deletions(-) (limited to 'arch/x86/kernel') diff --git a/arch/x86/kernel/cpu/microcode/core.c b/arch/x86/kernel/cpu/microcode/core.c index fa32cb3dcca5..5dd157d48606 100644 --- a/arch/x86/kernel/cpu/microcode/core.c +++ b/arch/x86/kernel/cpu/microcode/core.c @@ -499,15 +499,10 @@ static int check_online_cpus(void) static enum ucode_state reload_for_cpu(int cpu) { struct ucode_cpu_info *uci = ucode_cpu_info + cpu; - enum ucode_state ustate; if (!uci->valid) return UCODE_OK; - ustate = microcode_ops->request_microcode_fw(cpu, µcode_pdev->dev, true); - if (ustate != UCODE_OK) - return ustate; - return apply_microcode_on_target(cpu); } @@ -515,11 +510,11 @@ static ssize_t reload_store(struct device *dev, struct device_attribute *attr, const char *buf, size_t size) { + int cpu, bsp = boot_cpu_data.cpu_index; enum ucode_state tmp_ret = UCODE_OK; bool do_callback = false; unsigned long val; ssize_t ret = 0; - int cpu; ret = kstrtoul(buf, 0, &val); if (ret) @@ -528,6 +523,10 @@ static ssize_t reload_store(struct device *dev, if (val != 1) return size; + tmp_ret = microcode_ops->request_microcode_fw(bsp, µcode_pdev->dev, true); + if (tmp_ret != UCODE_OK) + return size; + get_online_cpus(); ret = check_online_cpus(); -- cgit v1.2.3-70-g09d2 From a5321aec6412b20b5ad15db2d6b916c05349dbff Mon Sep 17 00:00:00 2001 From: Ashok Raj <ashok.raj@intel.com> Date: Wed, 28 Feb 2018 11:28:46 +0100 Subject: x86/microcode: Synchronize late microcode loading Original idea by Ashok, completely rewritten by Borislav. Before you read any further: the early loading method is still the preferred one and you should always do that. The following patch is improving the late loading mechanism for long running jobs and cloud use cases. Gather all cores and serialize the microcode update on them by doing it one-by-one to make the late update process as reliable as possible and avoid potential issues caused by the microcode update. [ Borislav: Rewrite completely. ] Co-developed-by: Borislav Petkov <bp@suse.de> Signed-off-by: Ashok Raj <ashok.raj@intel.com> Signed-off-by: Borislav Petkov <bp@suse.de> Signed-off-by: Thomas Gleixner <tglx@linutronix.de> Tested-by: Tom Lendacky <thomas.lendacky@amd.com> Tested-by: Ashok Raj <ashok.raj@intel.com> Reviewed-by: Tom Lendacky <thomas.lendacky@amd.com> Cc: Arjan Van De Ven <arjan.van.de.ven@intel.com> Link: https://lkml.kernel.org/r/20180228102846.13447-8-bp@alien8.de --- arch/x86/kernel/cpu/microcode/core.c | 118 +++++++++++++++++++++++++++-------- 1 file changed, 92 insertions(+), 26 deletions(-) (limited to 'arch/x86/kernel') diff --git a/arch/x86/kernel/cpu/microcode/core.c b/arch/x86/kernel/cpu/microcode/core.c index 5dd157d48606..70ecbc8099c9 100644 --- a/arch/x86/kernel/cpu/microcode/core.c +++ b/arch/x86/kernel/cpu/microcode/core.c @@ -22,13 +22,16 @@ #define pr_fmt(fmt) "microcode: " fmt #include <linux/platform_device.h> +#include <linux/stop_machine.h> #include <linux/syscore_ops.h> #include <linux/miscdevice.h> #include <linux/capability.h> #include <linux/firmware.h> #include <linux/kernel.h> +#include <linux/delay.h> #include <linux/mutex.h> #include <linux/cpu.h> +#include <linux/nmi.h> #include <linux/fs.h> #include <linux/mm.h> @@ -64,6 +67,11 @@ LIST_HEAD(microcode_cache); */ static DEFINE_MUTEX(microcode_mutex); +/* + * Serialize late loading so that CPUs get updated one-by-one. + */ +static DEFINE_SPINLOCK(update_lock); + struct ucode_cpu_info ucode_cpu_info[NR_CPUS]; struct cpu_info_ctx { @@ -486,6 +494,19 @@ static void __exit microcode_dev_exit(void) /* fake device for request_firmware */ static struct platform_device *microcode_pdev; +/* + * Late loading dance. Why the heavy-handed stomp_machine effort? + * + * - HT siblings must be idle and not execute other code while the other sibling + * is loading microcode in order to avoid any negative interactions caused by + * the loading. + * + * - In addition, microcode update on the cores must be serialized until this + * requirement can be relaxed in the future. Right now, this is conservative + * and good. + */ +#define SPINUNIT 100 /* 100 nsec */ + static int check_online_cpus(void) { if (num_online_cpus() == num_present_cpus()) @@ -496,23 +517,85 @@ static int check_online_cpus(void) return -EINVAL; } -static enum ucode_state reload_for_cpu(int cpu) +static atomic_t late_cpus; + +/* + * Returns: + * < 0 - on error + * 0 - no update done + * 1 - microcode was updated + */ +static int __reload_late(void *info) { - struct ucode_cpu_info *uci = ucode_cpu_info + cpu; + unsigned int timeout = NSEC_PER_SEC; + int all_cpus = num_online_cpus(); + int cpu = smp_processor_id(); + enum ucode_state err; + int ret = 0; - if (!uci->valid) - return UCODE_OK; + atomic_dec(&late_cpus); + + /* + * Wait for all CPUs to arrive. A load will not be attempted unless all + * CPUs show up. + * */ + while (atomic_read(&late_cpus)) { + if (timeout < SPINUNIT) { + pr_err("Timeout while waiting for CPUs rendezvous, remaining: %d\n", + atomic_read(&late_cpus)); + return -1; + } + + ndelay(SPINUNIT); + timeout -= SPINUNIT; + + touch_nmi_watchdog(); + } + + spin_lock(&update_lock); + apply_microcode_local(&err); + spin_unlock(&update_lock); + + if (err > UCODE_NFOUND) { + pr_warn("Error reloading microcode on CPU %d\n", cpu); + ret = -1; + } else if (err == UCODE_UPDATED) { + ret = 1; + } - return apply_microcode_on_target(cpu); + atomic_inc(&late_cpus); + + while (atomic_read(&late_cpus) != all_cpus) + cpu_relax(); + + return ret; +} + +/* + * Reload microcode late on all CPUs. Wait for a sec until they + * all gather together. + */ +static int microcode_reload_late(void) +{ + int ret; + + atomic_set(&late_cpus, num_online_cpus()); + + ret = stop_machine_cpuslocked(__reload_late, NULL, cpu_online_mask); + if (ret < 0) + return ret; + else if (ret > 0) + microcode_check(); + + return ret; } static ssize_t reload_store(struct device *dev, struct device_attribute *attr, const char *buf, size_t size) { - int cpu, bsp = boot_cpu_data.cpu_index; enum ucode_state tmp_ret = UCODE_OK; - bool do_callback = false; + int bsp = boot_cpu_data.cpu_index; unsigned long val; ssize_t ret = 0; @@ -534,30 +617,13 @@ static ssize_t reload_store(struct device *dev, goto put; mutex_lock(µcode_mutex); - - for_each_online_cpu(cpu) { - tmp_ret = reload_for_cpu(cpu); - if (tmp_ret > UCODE_NFOUND) { - pr_warn("Error reloading microcode on CPU %d\n", cpu); - - /* set retval for the first encountered reload error */ - if (!ret) - ret = -EINVAL; - } - - if (tmp_ret == UCODE_UPDATED) - do_callback = true; - } - - if (!ret && do_callback) - microcode_check(); - + ret = microcode_reload_late(); mutex_unlock(µcode_mutex); put: put_online_cpus(); - if (!ret) + if (ret >= 0) ret = size; return ret; -- cgit v1.2.3-70-g09d2 From fa94d0c6e0f3431523f5701084d799c77c7d4a4f Mon Sep 17 00:00:00 2001 From: Tony Luck <tony.luck@intel.com> Date: Tue, 6 Mar 2018 15:21:41 +0100 Subject: x86/MCE: Save microcode revision in machine check records Updating microcode used to be relatively rare. Now that it has become more common we should save the microcode version in a machine check record to make sure that those people looking at the error have this important information bundled with the rest of the logged information. [ Borislav: Simplify a bit. ] Signed-off-by: Tony Luck <tony.luck@intel.com> Signed-off-by: Borislav Petkov <bp@suse.de> Signed-off-by: Thomas Gleixner <tglx@linutronix.de> Cc: Yazen Ghannam <yazen.ghannam@amd.com> Cc: linux-edac <linux-edac@vger.kernel.org> Cc: stable@vger.kernel.org Link: http://lkml.kernel.org/r/20180301233449.24311-1-tony.luck@intel.com --- arch/x86/include/uapi/asm/mce.h | 1 + arch/x86/kernel/cpu/mcheck/mce.c | 4 +++- 2 files changed, 4 insertions(+), 1 deletion(-) (limited to 'arch/x86/kernel') diff --git a/arch/x86/include/uapi/asm/mce.h b/arch/x86/include/uapi/asm/mce.h index 91723461dc1f..435db58a7bad 100644 --- a/arch/x86/include/uapi/asm/mce.h +++ b/arch/x86/include/uapi/asm/mce.h @@ -30,6 +30,7 @@ struct mce { __u64 synd; /* MCA_SYND MSR: only valid on SMCA systems */ __u64 ipid; /* MCA_IPID MSR: only valid on SMCA systems */ __u64 ppin; /* Protected Processor Inventory Number */ + __u32 microcode;/* Microcode revision */ }; #define MCE_GET_RECORD_LEN _IOR('M', 1, int) diff --git a/arch/x86/kernel/cpu/mcheck/mce.c b/arch/x86/kernel/cpu/mcheck/mce.c index 8ff94d1e2dce..b3323cab9139 100644 --- a/arch/x86/kernel/cpu/mcheck/mce.c +++ b/arch/x86/kernel/cpu/mcheck/mce.c @@ -130,6 +130,8 @@ void mce_setup(struct mce *m) if (this_cpu_has(X86_FEATURE_INTEL_PPIN)) rdmsrl(MSR_PPIN, m->ppin); + + m->microcode = boot_cpu_data.microcode; } DEFINE_PER_CPU(struct mce, injectm); @@ -262,7 +264,7 @@ static void __print_mce(struct mce *m) */ pr_emerg(HW_ERR "PROCESSOR %u:%x TIME %llu SOCKET %u APIC %x microcode %x\n", m->cpuvendor, m->cpuid, m->time, m->socketid, m->apicid, - cpu_data(m->extcpu).microcode); + m->microcode); } static void print_mce(struct mce *m) -- cgit v1.2.3-70-g09d2 From b3b7c4795ccab5be71f080774c45bbbcc75c2aaf Mon Sep 17 00:00:00 2001 From: Seunghun Han <kkamagui@gmail.com> Date: Tue, 6 Mar 2018 15:21:43 +0100 Subject: x86/MCE: Serialize sysfs changes The check_interval file in /sys/devices/system/machinecheck/machinecheck<cpu number> directory is a global timer value for MCE polling. If it is changed by one CPU, mce_restart() broadcasts the event to other CPUs to delete and restart the MCE polling timer and __mcheck_cpu_init_timer() reinitializes the mce_timer variable. If more than one CPU writes a specific value to the check_interval file concurrently, mce_timer is not protected from such concurrent accesses and all kinds of explosions happen. Since only root can write to those sysfs variables, the issue is not a big deal security-wise. However, concurrent writes to these configuration variables is void of reason so the proper thing to do is to serialize the access with a mutex. Boris: - Make store_int_with_restart() use device_store_ulong() to filter out negative intervals - Limit min interval to 1 second - Correct locking - Massage commit message Signed-off-by: Seunghun Han <kkamagui@gmail.com> Signed-off-by: Borislav Petkov <bp@suse.de> Signed-off-by: Thomas Gleixner <tglx@linutronix.de> Cc: Greg Kroah-Hartman <gregkh@linuxfoundation.org> Cc: Tony Luck <tony.luck@intel.com> Cc: linux-edac <linux-edac@vger.kernel.org> Cc: stable@vger.kernel.org Link: http://lkml.kernel.org/r/20180302202706.9434-1-kkamagui@gmail.com --- arch/x86/kernel/cpu/mcheck/mce.c | 22 +++++++++++++++++++++- 1 file changed, 21 insertions(+), 1 deletion(-) (limited to 'arch/x86/kernel') diff --git a/arch/x86/kernel/cpu/mcheck/mce.c b/arch/x86/kernel/cpu/mcheck/mce.c index b3323cab9139..466f47301334 100644 --- a/arch/x86/kernel/cpu/mcheck/mce.c +++ b/arch/x86/kernel/cpu/mcheck/mce.c @@ -56,6 +56,9 @@ static DEFINE_MUTEX(mce_log_mutex); +/* sysfs synchronization */ +static DEFINE_MUTEX(mce_sysfs_mutex); + #define CREATE_TRACE_POINTS #include <trace/events/mce.h> @@ -2088,6 +2091,7 @@ static ssize_t set_ignore_ce(struct device *s, if (kstrtou64(buf, 0, &new) < 0) return -EINVAL; + mutex_lock(&mce_sysfs_mutex); if (mca_cfg.ignore_ce ^ !!new) { if (new) { /* disable ce features */ @@ -2100,6 +2104,8 @@ static ssize_t set_ignore_ce(struct device *s, on_each_cpu(mce_enable_ce, (void *)1, 1); } } + mutex_unlock(&mce_sysfs_mutex); + return size; } @@ -2112,6 +2118,7 @@ static ssize_t set_cmci_disabled(struct device *s, if (kstrtou64(buf, 0, &new) < 0) return -EINVAL; + mutex_lock(&mce_sysfs_mutex); if (mca_cfg.cmci_disabled ^ !!new) { if (new) { /* disable cmci */ @@ -2123,6 +2130,8 @@ static ssize_t set_cmci_disabled(struct device *s, on_each_cpu(mce_enable_ce, NULL, 1); } } + mutex_unlock(&mce_sysfs_mutex); + return size; } @@ -2130,8 +2139,19 @@ static ssize_t store_int_with_restart(struct device *s, struct device_attribute *attr, const char *buf, size_t size) { - ssize_t ret = device_store_int(s, attr, buf, size); + unsigned long old_check_interval = check_interval; + ssize_t ret = device_store_ulong(s, attr, buf, size); + + if (check_interval == old_check_interval) + return ret; + + if (check_interval < 1) + check_interval = 1; + + mutex_lock(&mce_sysfs_mutex); mce_restart(); + mutex_unlock(&mce_sysfs_mutex); + return ret; } -- cgit v1.2.3-70-g09d2 From c07a8f8b08ba683ea24f3ac9159f37ae94daf47f Mon Sep 17 00:00:00 2001 From: Francis Deslauriers <francis.deslauriers@efficios.com> Date: Thu, 8 Mar 2018 22:18:12 -0500 Subject: x86/kprobes: Fix kernel crash when probing .entry_trampoline code Disable the kprobe probing of the entry trampoline: .entry_trampoline is a code area that is used to ensure page table isolation between userspace and kernelspace. At the beginning of the execution of the trampoline, we load the kernel's CR3 register. This has the effect of enabling the translation of the kernel virtual addresses to physical addresses. Before this happens most kernel addresses can not be translated because the running process' CR3 is still used. If a kprobe is placed on the trampoline code before that change of the CR3 register happens the kernel crashes because int3 handling pages are not accessible. To fix this, add the .entry_trampoline section to the kprobe blacklist to prohibit the probing of code before all the kernel pages are accessible. Signed-off-by: Francis Deslauriers <francis.deslauriers@efficios.com> Reviewed-by: Thomas Gleixner <tglx@linutronix.de> Cc: Andy Lutomirski <luto@kernel.org> Cc: Borislav Petkov <bp@alien8.de> Cc: Brian Gerst <brgerst@gmail.com> Cc: Denys Vlasenko <dvlasenk@redhat.com> Cc: H. Peter Anvin <hpa@zytor.com> Cc: Josh Poimboeuf <jpoimboe@redhat.com> Cc: Linus Torvalds <torvalds@linux-foundation.org> Cc: Peter Zijlstra <peterz@infradead.org> Cc: mathieu.desnoyers@efficios.com Cc: mhiramat@kernel.org Link: http://lkml.kernel.org/r/1520565492-4637-2-git-send-email-francis.deslauriers@efficios.com Signed-off-by: Ingo Molnar <mingo@kernel.org> --- arch/x86/include/asm/sections.h | 1 + arch/x86/kernel/kprobes/core.c | 10 +++++++++- arch/x86/kernel/vmlinux.lds.S | 2 ++ 3 files changed, 12 insertions(+), 1 deletion(-) (limited to 'arch/x86/kernel') diff --git a/arch/x86/include/asm/sections.h b/arch/x86/include/asm/sections.h index d6baf23782bc..5c019d23d06b 100644 --- a/arch/x86/include/asm/sections.h +++ b/arch/x86/include/asm/sections.h @@ -10,6 +10,7 @@ extern struct exception_table_entry __stop___ex_table[]; #if defined(CONFIG_X86_64) extern char __end_rodata_hpage_align[]; +extern char __entry_trampoline_start[], __entry_trampoline_end[]; #endif #endif /* _ASM_X86_SECTIONS_H */ diff --git a/arch/x86/kernel/kprobes/core.c b/arch/x86/kernel/kprobes/core.c index bd36f3c33cd0..0715f827607c 100644 --- a/arch/x86/kernel/kprobes/core.c +++ b/arch/x86/kernel/kprobes/core.c @@ -1168,10 +1168,18 @@ NOKPROBE_SYMBOL(longjmp_break_handler); bool arch_within_kprobe_blacklist(unsigned long addr) { + bool is_in_entry_trampoline_section = false; + +#ifdef CONFIG_X86_64 + is_in_entry_trampoline_section = + (addr >= (unsigned long)__entry_trampoline_start && + addr < (unsigned long)__entry_trampoline_end); +#endif return (addr >= (unsigned long)__kprobes_text_start && addr < (unsigned long)__kprobes_text_end) || (addr >= (unsigned long)__entry_text_start && - addr < (unsigned long)__entry_text_end); + addr < (unsigned long)__entry_text_end) || + is_in_entry_trampoline_section; } int __init arch_init_kprobes(void) diff --git a/arch/x86/kernel/vmlinux.lds.S b/arch/x86/kernel/vmlinux.lds.S index 9b138a06c1a4..b854ebf5851b 100644 --- a/arch/x86/kernel/vmlinux.lds.S +++ b/arch/x86/kernel/vmlinux.lds.S @@ -118,9 +118,11 @@ SECTIONS #ifdef CONFIG_X86_64 . = ALIGN(PAGE_SIZE); + VMLINUX_SYMBOL(__entry_trampoline_start) = .; _entry_trampoline = .; *(.entry_trampoline) . = ALIGN(PAGE_SIZE); + VMLINUX_SYMBOL(__entry_trampoline_end) = .; ASSERT(. - _entry_trampoline == PAGE_SIZE, "entry trampoline is too big"); #endif -- cgit v1.2.3-70-g09d2 From b5069782453459f6ec1fdeb495d9901a4545fcb5 Mon Sep 17 00:00:00 2001 From: Andy Lutomirski <luto@kernel.org> Date: Tue, 13 Mar 2018 22:03:12 -0700 Subject: x86/vm86/32: Fix POPF emulation POPF would trap if VIP was set regardless of whether IF was set. Fix it. Suggested-by: Stas Sergeev <stsp@list.ru> Reported-by: Bart Oldeman <bartoldeman@gmail.com> Signed-off-by: Andy Lutomirski <luto@kernel.org> Cc: Borislav Petkov <bp@alien8.de> Cc: Brian Gerst <brgerst@gmail.com> Cc: Denys Vlasenko <dvlasenk@redhat.com> Cc: H. Peter Anvin <hpa@zytor.com> Cc: Josh Poimboeuf <jpoimboe@redhat.com> Cc: Linus Torvalds <torvalds@linux-foundation.org> Cc: Peter Zijlstra <peterz@infradead.org> Cc: Thomas Gleixner <tglx@linutronix.de> Cc: stable@vger.kernel.org Fixes: 5ed92a8ab71f ("x86/vm86: Use the normal pt_regs area for vm86") Link: http://lkml.kernel.org/r/ce95f40556e7b2178b6bc06ee9557827ff94bd28.1521003603.git.luto@kernel.org Signed-off-by: Ingo Molnar <mingo@kernel.org> --- arch/x86/kernel/vm86_32.c | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) (limited to 'arch/x86/kernel') diff --git a/arch/x86/kernel/vm86_32.c b/arch/x86/kernel/vm86_32.c index 5edb27f1a2c4..9d0b5af7db91 100644 --- a/arch/x86/kernel/vm86_32.c +++ b/arch/x86/kernel/vm86_32.c @@ -727,7 +727,8 @@ void handle_vm86_fault(struct kernel_vm86_regs *regs, long error_code) return; check_vip: - if (VEFLAGS & X86_EFLAGS_VIP) { + if ((VEFLAGS & (X86_EFLAGS_VIP | X86_EFLAGS_VIF)) == + (X86_EFLAGS_VIP | X86_EFLAGS_VIF)) { save_v86_state(regs, VM86_STI); return; } -- cgit v1.2.3-70-g09d2 From e3b3121fa8da94cb20f9e0c64ab7981ae47fd085 Mon Sep 17 00:00:00 2001 From: Alexander Sergeyev <sergeev917@gmail.com> Date: Tue, 13 Mar 2018 22:38:56 +0300 Subject: x86/speculation: Remove Skylake C2 from Speculation Control microcode blacklist In accordance with Intel's microcode revision guidance from March 6 MCU rev 0xc2 is cleared on both Skylake H/S and Skylake Xeon E3 processors that share CPUID 506E3. Signed-off-by: Alexander Sergeyev <sergeev917@gmail.com> Signed-off-by: Thomas Gleixner <tglx@linutronix.de> Cc: Jia Zhang <qianyue.zj@alibaba-inc.com> Cc: Greg Kroah-Hartman <gregkh@linuxfoundation.org> Cc: Kyle Huey <me@kylehuey.com> Cc: David Woodhouse <dwmw@amazon.co.uk> Link: https://lkml.kernel.org/r/20180313193856.GA8580@localhost.localdomain --- arch/x86/kernel/cpu/intel.c | 3 +-- 1 file changed, 1 insertion(+), 2 deletions(-) (limited to 'arch/x86/kernel') diff --git a/arch/x86/kernel/cpu/intel.c b/arch/x86/kernel/cpu/intel.c index 4aa9fd379390..c3af167d0a70 100644 --- a/arch/x86/kernel/cpu/intel.c +++ b/arch/x86/kernel/cpu/intel.c @@ -105,7 +105,7 @@ static void probe_xeon_phi_r3mwait(struct cpuinfo_x86 *c) /* * Early microcode releases for the Spectre v2 mitigation were broken. * Information taken from; - * - https://newsroom.intel.com/wp-content/uploads/sites/11/2018/01/microcode-update-guidance.pdf + * - https://newsroom.intel.com/wp-content/uploads/sites/11/2018/03/microcode-update-guidance.pdf * - https://kb.vmware.com/s/article/52345 * - Microcode revisions observed in the wild * - Release note from 20180108 microcode release @@ -123,7 +123,6 @@ static const struct sku_microcode spectre_bad_microcodes[] = { { INTEL_FAM6_KABYLAKE_MOBILE, 0x09, 0x80 }, { INTEL_FAM6_SKYLAKE_X, 0x03, 0x0100013e }, { INTEL_FAM6_SKYLAKE_X, 0x04, 0x0200003c }, - { INTEL_FAM6_SKYLAKE_DESKTOP, 0x03, 0xc2 }, { INTEL_FAM6_BROADWELL_CORE, 0x04, 0x28 }, { INTEL_FAM6_BROADWELL_GT3E, 0x01, 0x1b }, { INTEL_FAM6_BROADWELL_XEON_D, 0x02, 0x14 }, -- cgit v1.2.3-70-g09d2 From 2613f36ed965d0e5a595a1d931fd3b480e82d6fd Mon Sep 17 00:00:00 2001 From: Borislav Petkov <bp@suse.de> Date: Wed, 14 Mar 2018 19:36:14 +0100 Subject: x86/microcode: Attempt late loading only when new microcode is present Return UCODE_NEW from the scanning functions to denote that new microcode was found and only then attempt the expensive synchronization dance. Reported-by: Emanuel Czirai <xftroxgpx@protonmail.com> Signed-off-by: Borislav Petkov <bp@suse.de> Signed-off-by: Thomas Gleixner <tglx@linutronix.de> Tested-by: Emanuel Czirai <xftroxgpx@protonmail.com> Tested-by: Ashok Raj <ashok.raj@intel.com> Tested-by: Tom Lendacky <thomas.lendacky@amd.com> Link: https://lkml.kernel.org/r/20180314183615.17629-1-bp@alien8.de --- arch/x86/include/asm/microcode.h | 1 + arch/x86/kernel/cpu/microcode/amd.c | 34 +++++++++++++++++++++------------- arch/x86/kernel/cpu/microcode/core.c | 8 +++----- arch/x86/kernel/cpu/microcode/intel.c | 4 +++- 4 files changed, 28 insertions(+), 19 deletions(-) (limited to 'arch/x86/kernel') diff --git a/arch/x86/include/asm/microcode.h b/arch/x86/include/asm/microcode.h index 7fb1047d61c7..6cf0e4cb7b97 100644 --- a/arch/x86/include/asm/microcode.h +++ b/arch/x86/include/asm/microcode.h @@ -39,6 +39,7 @@ struct device; enum ucode_state { UCODE_OK = 0, + UCODE_NEW, UCODE_UPDATED, UCODE_NFOUND, UCODE_ERROR, diff --git a/arch/x86/kernel/cpu/microcode/amd.c b/arch/x86/kernel/cpu/microcode/amd.c index a998e1a7d46f..48179928ff38 100644 --- a/arch/x86/kernel/cpu/microcode/amd.c +++ b/arch/x86/kernel/cpu/microcode/amd.c @@ -339,7 +339,7 @@ int __init save_microcode_in_initrd_amd(unsigned int cpuid_1_eax) return -EINVAL; ret = load_microcode_amd(true, x86_family(cpuid_1_eax), desc.data, desc.size); - if (ret != UCODE_OK) + if (ret > UCODE_UPDATED) return -EINVAL; return 0; @@ -683,27 +683,35 @@ static enum ucode_state __load_microcode_amd(u8 family, const u8 *data, static enum ucode_state load_microcode_amd(bool save, u8 family, const u8 *data, size_t size) { + struct ucode_patch *p; enum ucode_state ret; /* free old equiv table */ free_equiv_cpu_table(); ret = __load_microcode_amd(family, data, size); - - if (ret != UCODE_OK) + if (ret != UCODE_OK) { cleanup(); + return ret; + } -#ifdef CONFIG_X86_32 - /* save BSP's matching patch for early load */ - if (save) { - struct ucode_patch *p = find_patch(0); - if (p) { - memset(amd_ucode_patch, 0, PATCH_MAX_SIZE); - memcpy(amd_ucode_patch, p->data, min_t(u32, ksize(p->data), - PATCH_MAX_SIZE)); - } + p = find_patch(0); + if (!p) { + return ret; + } else { + if (boot_cpu_data.microcode == p->patch_id) + return ret; + + ret = UCODE_NEW; } -#endif + + /* save BSP's matching patch for early load */ + if (!save) + return ret; + + memset(amd_ucode_patch, 0, PATCH_MAX_SIZE); + memcpy(amd_ucode_patch, p->data, min_t(u32, ksize(p->data), PATCH_MAX_SIZE)); + return ret; } diff --git a/arch/x86/kernel/cpu/microcode/core.c b/arch/x86/kernel/cpu/microcode/core.c index 70ecbc8099c9..9f0fe5bb450d 100644 --- a/arch/x86/kernel/cpu/microcode/core.c +++ b/arch/x86/kernel/cpu/microcode/core.c @@ -607,7 +607,7 @@ static ssize_t reload_store(struct device *dev, return size; tmp_ret = microcode_ops->request_microcode_fw(bsp, µcode_pdev->dev, true); - if (tmp_ret != UCODE_OK) + if (tmp_ret != UCODE_NEW) return size; get_online_cpus(); @@ -691,10 +691,8 @@ static enum ucode_state microcode_init_cpu(int cpu, bool refresh_fw) if (system_state != SYSTEM_RUNNING) return UCODE_NFOUND; - ustate = microcode_ops->request_microcode_fw(cpu, µcode_pdev->dev, - refresh_fw); - - if (ustate == UCODE_OK) { + ustate = microcode_ops->request_microcode_fw(cpu, µcode_pdev->dev, refresh_fw); + if (ustate == UCODE_NEW) { pr_debug("CPU%d updated upon init\n", cpu); apply_microcode_on_target(cpu); } diff --git a/arch/x86/kernel/cpu/microcode/intel.c b/arch/x86/kernel/cpu/microcode/intel.c index 2aded9db1d42..32b8e5724f96 100644 --- a/arch/x86/kernel/cpu/microcode/intel.c +++ b/arch/x86/kernel/cpu/microcode/intel.c @@ -862,6 +862,7 @@ static enum ucode_state generic_load_microcode(int cpu, void *data, size_t size, unsigned int leftover = size; unsigned int curr_mc_size = 0, new_mc_size = 0; unsigned int csig, cpf; + enum ucode_state ret = UCODE_OK; while (leftover) { struct microcode_header_intel mc_header; @@ -903,6 +904,7 @@ static enum ucode_state generic_load_microcode(int cpu, void *data, size_t size, new_mc = mc; new_mc_size = mc_size; mc = NULL; /* trigger new vmalloc */ + ret = UCODE_NEW; } ucode_ptr += mc_size; @@ -932,7 +934,7 @@ static enum ucode_state generic_load_microcode(int cpu, void *data, size_t size, pr_debug("CPU%d found a matching microcode update with version 0x%x (current=0x%x)\n", cpu, new_rev, uci->cpu_sig.rev); - return UCODE_OK; + return ret; } static int get_ucode_fw(void *to, const void *from, size_t n) -- cgit v1.2.3-70-g09d2 From bb8c13d61a629276a162c1d2b1a20a815cbcfbb7 Mon Sep 17 00:00:00 2001 From: Borislav Petkov <bp@suse.de> Date: Wed, 14 Mar 2018 19:36:15 +0100 Subject: x86/microcode: Fix CPU synchronization routine Emanuel reported an issue with a hang during microcode update because my dumb idea to use one atomic synchronization variable for both rendezvous - before and after update - was simply bollocks: microcode: microcode_reload_late: late_cpus: 4 microcode: __reload_late: cpu 2 entered microcode: __reload_late: cpu 1 entered microcode: __reload_late: cpu 3 entered microcode: __reload_late: cpu 0 entered microcode: __reload_late: cpu 1 left microcode: Timeout while waiting for CPUs rendezvous, remaining: 1 CPU1 above would finish, leave and the others will still spin waiting for it to join. So do two synchronization atomics instead, which makes the code a lot more straightforward. Also, since the update is serialized and it also takes quite some time per microcode engine, increase the exit timeout by the number of CPUs on the system. That's ok because the moment all CPUs are done, that timeout will be cut short. Furthermore, panic when some of the CPUs timeout when returning from a microcode update: we can't allow a system with not all cores updated. Also, as an optimization, do not do the exit sync if microcode wasn't updated. Reported-by: Emanuel Czirai <xftroxgpx@protonmail.com> Signed-off-by: Borislav Petkov <bp@suse.de> Signed-off-by: Thomas Gleixner <tglx@linutronix.de> Tested-by: Emanuel Czirai <xftroxgpx@protonmail.com> Tested-by: Ashok Raj <ashok.raj@intel.com> Tested-by: Tom Lendacky <thomas.lendacky@amd.com> Link: https://lkml.kernel.org/r/20180314183615.17629-2-bp@alien8.de --- arch/x86/kernel/cpu/microcode/core.c | 68 ++++++++++++++++++++++-------------- 1 file changed, 41 insertions(+), 27 deletions(-) (limited to 'arch/x86/kernel') diff --git a/arch/x86/kernel/cpu/microcode/core.c b/arch/x86/kernel/cpu/microcode/core.c index 9f0fe5bb450d..10c4fc2c91f8 100644 --- a/arch/x86/kernel/cpu/microcode/core.c +++ b/arch/x86/kernel/cpu/microcode/core.c @@ -517,7 +517,29 @@ static int check_online_cpus(void) return -EINVAL; } -static atomic_t late_cpus; +static atomic_t late_cpus_in; +static atomic_t late_cpus_out; + +static int __wait_for_cpus(atomic_t *t, long long timeout) +{ + int all_cpus = num_online_cpus(); + + atomic_inc(t); + + while (atomic_read(t) < all_cpus) { + if (timeout < SPINUNIT) { + pr_err("Timeout while waiting for CPUs rendezvous, remaining: %d\n", + all_cpus - atomic_read(t)); + return 1; + } + + ndelay(SPINUNIT); + timeout -= SPINUNIT; + + touch_nmi_watchdog(); + } + return 0; +} /* * Returns: @@ -527,30 +549,16 @@ static atomic_t late_cpus; */ static int __reload_late(void *info) { - unsigned int timeout = NSEC_PER_SEC; - int all_cpus = num_online_cpus(); int cpu = smp_processor_id(); enum ucode_state err; int ret = 0; - atomic_dec(&late_cpus); - /* * Wait for all CPUs to arrive. A load will not be attempted unless all * CPUs show up. * */ - while (atomic_read(&late_cpus)) { - if (timeout < SPINUNIT) { - pr_err("Timeout while waiting for CPUs rendezvous, remaining: %d\n", - atomic_read(&late_cpus)); - return -1; - } - - ndelay(SPINUNIT); - timeout -= SPINUNIT; - - touch_nmi_watchdog(); - } + if (__wait_for_cpus(&late_cpus_in, NSEC_PER_SEC)) + return -1; spin_lock(&update_lock); apply_microcode_local(&err); @@ -558,15 +566,22 @@ static int __reload_late(void *info) if (err > UCODE_NFOUND) { pr_warn("Error reloading microcode on CPU %d\n", cpu); - ret = -1; - } else if (err == UCODE_UPDATED) { + return -1; + /* siblings return UCODE_OK because their engine got updated already */ + } else if (err == UCODE_UPDATED || err == UCODE_OK) { ret = 1; + } else { + return ret; } - atomic_inc(&late_cpus); - - while (atomic_read(&late_cpus) != all_cpus) - cpu_relax(); + /* + * Increase the wait timeout to a safe value here since we're + * serializing the microcode update and that could take a while on a + * large number of CPUs. And that is fine as the *actual* timeout will + * be determined by the last CPU finished updating and thus cut short. + */ + if (__wait_for_cpus(&late_cpus_out, NSEC_PER_SEC * num_online_cpus())) + panic("Timeout during microcode update!\n"); return ret; } @@ -579,12 +594,11 @@ static int microcode_reload_late(void) { int ret; - atomic_set(&late_cpus, num_online_cpus()); + atomic_set(&late_cpus_in, 0); + atomic_set(&late_cpus_out, 0); ret = stop_machine_cpuslocked(__reload_late, NULL, cpu_online_mask); - if (ret < 0) - return ret; - else if (ret > 0) + if (ret > 0) microcode_check(); return ret; -- cgit v1.2.3-70-g09d2 From 5927145efd5de71976e62e2822511b13014d7e56 Mon Sep 17 00:00:00 2001 From: Christoph Hellwig <hch@lst.de> Date: Mon, 19 Mar 2018 11:38:13 +0100 Subject: x86/cpu: Remove the CONFIG_X86_PPRO_FENCE=y quirk There were only a few Pentium Pro multiprocessors systems where this errata applied. They are more than 20 years old now, and we've slowly dropped places which put the workarounds in and discouraged anyone from enabling the workaround. Get rid of it for good. Tested-by: Tom Lendacky <thomas.lendacky@amd.com> Signed-off-by: Christoph Hellwig <hch@lst.de> Reviewed-by: Thomas Gleixner <tglx@linutronix.de> Reviewed-by: Konrad Rzeszutek Wilk <konrad.wilk@oracle.com> Cc: David Woodhouse <dwmw2@infradead.org> Cc: Joerg Roedel <joro@8bytes.org> Cc: Jon Mason <jdmason@kudzu.us> Cc: Linus Torvalds <torvalds@linux-foundation.org> Cc: Muli Ben-Yehuda <mulix@mulix.org> Cc: Peter Zijlstra <peterz@infradead.org> Cc: iommu@lists.linux-foundation.org Link: http://lkml.kernel.org/r/20180319103826.12853-2-hch@lst.de Signed-off-by: Ingo Molnar <mingo@kernel.org> --- arch/x86/Kconfig.cpu | 13 ------------- arch/x86/entry/vdso/vdso32/vclock_gettime.c | 2 -- arch/x86/include/asm/barrier.h | 30 ----------------------------- arch/x86/include/asm/io.h | 15 --------------- arch/x86/kernel/pci-nommu.c | 19 ------------------ arch/x86/um/asm/barrier.h | 4 ---- 6 files changed, 83 deletions(-) (limited to 'arch/x86/kernel') diff --git a/arch/x86/Kconfig.cpu b/arch/x86/Kconfig.cpu index 65a9a4716e34..f0c5ef578153 100644 --- a/arch/x86/Kconfig.cpu +++ b/arch/x86/Kconfig.cpu @@ -315,19 +315,6 @@ config X86_L1_CACHE_SHIFT default "4" if MELAN || M486 || MGEODEGX1 default "5" if MWINCHIP3D || MWINCHIPC6 || MCRUSOE || MEFFICEON || MCYRIXIII || MK6 || MPENTIUMIII || MPENTIUMII || M686 || M586MMX || M586TSC || M586 || MVIAC3_2 || MGEODE_LX -config X86_PPRO_FENCE - bool "PentiumPro memory ordering errata workaround" - depends on M686 || M586MMX || M586TSC || M586 || M486 || MGEODEGX1 - ---help--- - Old PentiumPro multiprocessor systems had errata that could cause - memory operations to violate the x86 ordering standard in rare cases. - Enabling this option will attempt to work around some (but not all) - occurrences of this problem, at the cost of much heavier spinlock and - memory barrier operations. - - If unsure, say n here. Even distro kernels should think twice before - enabling this: there are few systems, and an unlikely bug. - config X86_F00F_BUG def_bool y depends on M586MMX || M586TSC || M586 || M486 diff --git a/arch/x86/entry/vdso/vdso32/vclock_gettime.c b/arch/x86/entry/vdso/vdso32/vclock_gettime.c index 7780bbfb06ef..9242b28418d5 100644 --- a/arch/x86/entry/vdso/vdso32/vclock_gettime.c +++ b/arch/x86/entry/vdso/vdso32/vclock_gettime.c @@ -5,8 +5,6 @@ #undef CONFIG_OPTIMIZE_INLINING #endif -#undef CONFIG_X86_PPRO_FENCE - #ifdef CONFIG_X86_64 /* diff --git a/arch/x86/include/asm/barrier.h b/arch/x86/include/asm/barrier.h index e1259f043ae9..042b5e892ed1 100644 --- a/arch/x86/include/asm/barrier.h +++ b/arch/x86/include/asm/barrier.h @@ -52,11 +52,7 @@ static inline unsigned long array_index_mask_nospec(unsigned long index, #define barrier_nospec() alternative_2("", "mfence", X86_FEATURE_MFENCE_RDTSC, \ "lfence", X86_FEATURE_LFENCE_RDTSC) -#ifdef CONFIG_X86_PPRO_FENCE -#define dma_rmb() rmb() -#else #define dma_rmb() barrier() -#endif #define dma_wmb() barrier() #ifdef CONFIG_X86_32 @@ -68,30 +64,6 @@ static inline unsigned long array_index_mask_nospec(unsigned long index, #define __smp_wmb() barrier() #define __smp_store_mb(var, value) do { (void)xchg(&var, value); } while (0) -#if defined(CONFIG_X86_PPRO_FENCE) - -/* - * For this option x86 doesn't have a strong TSO memory - * model and we should fall back to full barriers. - */ - -#define __smp_store_release(p, v) \ -do { \ - compiletime_assert_atomic_type(*p); \ - __smp_mb(); \ - WRITE_ONCE(*p, v); \ -} while (0) - -#define __smp_load_acquire(p) \ -({ \ - typeof(*p) ___p1 = READ_ONCE(*p); \ - compiletime_assert_atomic_type(*p); \ - __smp_mb(); \ - ___p1; \ -}) - -#else /* regular x86 TSO memory ordering */ - #define __smp_store_release(p, v) \ do { \ compiletime_assert_atomic_type(*p); \ @@ -107,8 +79,6 @@ do { \ ___p1; \ }) -#endif - /* Atomic operations are already serializing on x86 */ #define __smp_mb__before_atomic() barrier() #define __smp_mb__after_atomic() barrier() diff --git a/arch/x86/include/asm/io.h b/arch/x86/include/asm/io.h index 95e948627fd0..f6e5b9375d8c 100644 --- a/arch/x86/include/asm/io.h +++ b/arch/x86/include/asm/io.h @@ -232,21 +232,6 @@ extern void set_iounmap_nonlazy(void); */ #define __ISA_IO_base ((char __iomem *)(PAGE_OFFSET)) -/* - * Cache management - * - * This needed for two cases - * 1. Out of order aware processors - * 2. Accidentally out of order processors (PPro errata #51) - */ - -static inline void flush_write_buffers(void) -{ -#if defined(CONFIG_X86_PPRO_FENCE) - asm volatile("lock; addl $0,0(%%esp)": : :"memory"); -#endif -} - #endif /* __KERNEL__ */ extern void native_io_delay(void); diff --git a/arch/x86/kernel/pci-nommu.c b/arch/x86/kernel/pci-nommu.c index 618285e475c6..ac7ea3a8242f 100644 --- a/arch/x86/kernel/pci-nommu.c +++ b/arch/x86/kernel/pci-nommu.c @@ -37,7 +37,6 @@ static dma_addr_t nommu_map_page(struct device *dev, struct page *page, WARN_ON(size == 0); if (!check_addr("map_single", dev, bus, size)) return NOMMU_MAPPING_ERROR; - flush_write_buffers(); return bus; } @@ -72,25 +71,9 @@ static int nommu_map_sg(struct device *hwdev, struct scatterlist *sg, return 0; s->dma_length = s->length; } - flush_write_buffers(); return nents; } -static void nommu_sync_single_for_device(struct device *dev, - dma_addr_t addr, size_t size, - enum dma_data_direction dir) -{ - flush_write_buffers(); -} - - -static void nommu_sync_sg_for_device(struct device *dev, - struct scatterlist *sg, int nelems, - enum dma_data_direction dir) -{ - flush_write_buffers(); -} - static int nommu_mapping_error(struct device *dev, dma_addr_t dma_addr) { return dma_addr == NOMMU_MAPPING_ERROR; @@ -101,8 +84,6 @@ const struct dma_map_ops nommu_dma_ops = { .free = dma_generic_free_coherent, .map_sg = nommu_map_sg, .map_page = nommu_map_page, - .sync_single_for_device = nommu_sync_single_for_device, - .sync_sg_for_device = nommu_sync_sg_for_device, .is_phys = 1, .mapping_error = nommu_mapping_error, .dma_supported = x86_dma_supported, diff --git a/arch/x86/um/asm/barrier.h b/arch/x86/um/asm/barrier.h index b7d73400ea29..f31e5d903161 100644 --- a/arch/x86/um/asm/barrier.h +++ b/arch/x86/um/asm/barrier.h @@ -30,11 +30,7 @@ #endif /* CONFIG_X86_32 */ -#ifdef CONFIG_X86_PPRO_FENCE -#define dma_rmb() rmb() -#else /* CONFIG_X86_PPRO_FENCE */ #define dma_rmb() barrier() -#endif /* CONFIG_X86_PPRO_FENCE */ #define dma_wmb() barrier() #include <asm-generic/barrier.h> -- cgit v1.2.3-70-g09d2 From d8ba61ba58c88d5207c1ba2f7d9a2280e7d03be9 Mon Sep 17 00:00:00 2001 From: Andy Lutomirski <luto@kernel.org> Date: Thu, 23 Jul 2015 15:37:48 -0700 Subject: x86/entry/64: Don't use IST entry for #BP stack There's nothing IST-worthy about #BP/int3. We don't allow kprobes in the small handful of places in the kernel that run at CPL0 with an invalid stack, and 32-bit kernels have used normal interrupt gates for #BP forever. Furthermore, we don't allow kprobes in places that have usergs while in kernel mode, so "paranoid" is also unnecessary. Signed-off-by: Andy Lutomirski <luto@kernel.org> Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org> Signed-off-by: Thomas Gleixner <tglx@linutronix.de> Cc: stable@vger.kernel.org --- arch/x86/entry/entry_64.S | 2 +- arch/x86/kernel/idt.c | 2 -- arch/x86/kernel/traps.c | 15 ++++++++------- 3 files changed, 9 insertions(+), 10 deletions(-) (limited to 'arch/x86/kernel') diff --git a/arch/x86/entry/entry_64.S b/arch/x86/entry/entry_64.S index d5c7f18f79ac..9b114675fbc0 100644 --- a/arch/x86/entry/entry_64.S +++ b/arch/x86/entry/entry_64.S @@ -1138,7 +1138,7 @@ apicinterrupt3 HYPERV_REENLIGHTENMENT_VECTOR \ #endif /* CONFIG_HYPERV */ idtentry debug do_debug has_error_code=0 paranoid=1 shift_ist=DEBUG_STACK -idtentry int3 do_int3 has_error_code=0 paranoid=1 shift_ist=DEBUG_STACK +idtentry int3 do_int3 has_error_code=0 idtentry stack_segment do_stack_segment has_error_code=1 #ifdef CONFIG_XEN diff --git a/arch/x86/kernel/idt.c b/arch/x86/kernel/idt.c index 56d99be3706a..50bee5fe1140 100644 --- a/arch/x86/kernel/idt.c +++ b/arch/x86/kernel/idt.c @@ -160,7 +160,6 @@ static const __initconst struct idt_data early_pf_idts[] = { */ static const __initconst struct idt_data dbg_idts[] = { INTG(X86_TRAP_DB, debug), - INTG(X86_TRAP_BP, int3), }; #endif @@ -183,7 +182,6 @@ gate_desc debug_idt_table[IDT_ENTRIES] __page_aligned_bss; static const __initconst struct idt_data ist_idts[] = { ISTG(X86_TRAP_DB, debug, DEBUG_STACK), ISTG(X86_TRAP_NMI, nmi, NMI_STACK), - SISTG(X86_TRAP_BP, int3, DEBUG_STACK), ISTG(X86_TRAP_DF, double_fault, DOUBLEFAULT_STACK), #ifdef CONFIG_X86_MCE ISTG(X86_TRAP_MC, &machine_check, MCE_STACK), diff --git a/arch/x86/kernel/traps.c b/arch/x86/kernel/traps.c index 3d9b2308e7fa..03f3d7695dac 100644 --- a/arch/x86/kernel/traps.c +++ b/arch/x86/kernel/traps.c @@ -577,7 +577,6 @@ do_general_protection(struct pt_regs *regs, long error_code) } NOKPROBE_SYMBOL(do_general_protection); -/* May run on IST stack. */ dotraplinkage void notrace do_int3(struct pt_regs *regs, long error_code) { #ifdef CONFIG_DYNAMIC_FTRACE @@ -592,6 +591,13 @@ dotraplinkage void notrace do_int3(struct pt_regs *regs, long error_code) if (poke_int3_handler(regs)) return; + /* + * Use ist_enter despite the fact that we don't use an IST stack. + * We can be called from a kprobe in non-CONTEXT_KERNEL kernel + * mode or even during context tracking state changes. + * + * This means that we can't schedule. That's okay. + */ ist_enter(regs); RCU_LOCKDEP_WARN(!rcu_is_watching(), "entry code didn't wake RCU"); #ifdef CONFIG_KGDB_LOW_LEVEL_TRAP @@ -609,15 +615,10 @@ dotraplinkage void notrace do_int3(struct pt_regs *regs, long error_code) SIGTRAP) == NOTIFY_STOP) goto exit; - /* - * Let others (NMI) know that the debug stack is in use - * as we may switch to the interrupt stack. - */ - debug_stack_usage_inc(); cond_local_irq_enable(regs); do_trap(X86_TRAP_BP, SIGTRAP, "int3", regs, error_code, NULL); cond_local_irq_disable(regs); - debug_stack_usage_dec(); + exit: ist_exit(regs); } -- cgit v1.2.3-70-g09d2