From 39ca5fb4920a96eeab478be2cfa6a2369fef6b02 Mon Sep 17 00:00:00 2001 From: Sebastian Andrzej Siewior Date: Mon, 1 Jul 2019 19:33:54 +0200 Subject: x86/ldt: Initialize the context lock for init_mm The mutex mm->context->lock for init_mm is not initialized for init_mm. This wasn't a problem because it remained unused. This changed however since commit 4fc19708b165c ("x86/alternatives: Initialize temporary mm for patching") Initialize the mutex for init_mm. Fixes: 4fc19708b165c ("x86/alternatives: Initialize temporary mm for patching") Signed-off-by: Sebastian Andrzej Siewior Signed-off-by: Thomas Gleixner Reviewed-by: Nadav Amit Cc: Peter Zijlstra Cc: Andy Lutomirski Link: https://lkml.kernel.org/r/20190701173354.2pe62hhliok2afea@linutronix.de Signed-off-by: Ingo Molnar --- arch/x86/include/asm/mmu.h | 1 + 1 file changed, 1 insertion(+) (limited to 'arch') diff --git a/arch/x86/include/asm/mmu.h b/arch/x86/include/asm/mmu.h index 5ff3e8af2c20..e78c7db87801 100644 --- a/arch/x86/include/asm/mmu.h +++ b/arch/x86/include/asm/mmu.h @@ -59,6 +59,7 @@ typedef struct { #define INIT_MM_CONTEXT(mm) \ .context = { \ .ctx_id = 1, \ + .lock = __MUTEX_INITIALIZER(mm.context.lock), \ } void leave_mm(int cpu); -- cgit v1.2.3-70-g09d2 From 013c66edf207ddb78422b8b636f56c87939c9e34 Mon Sep 17 00:00:00 2001 From: Ross Zwisler Date: Mon, 1 Jul 2019 09:52:08 -0600 Subject: Revert "x86/build: Move _etext to actual end of .text" This reverts commit 392bef709659abea614abfe53cf228e7a59876a4. Per the discussion here: https://lkml.kernel.org/r/201906201042.3BF5CD6@keescook the above referenced commit breaks kernel compilation with old GCC toolchains as well as current versions of the Gold linker. Revert it to fix the regression and to keep the ability to compile the kernel with these tools. Signed-off-by: Ross Zwisler Signed-off-by: Thomas Gleixner Reviewed-by: Guenter Roeck Cc: Cc: "H. Peter Anvin" Cc: Borislav Petkov Cc: Kees Cook Cc: Johannes Hirte Cc: Klaus Kusche Cc: samitolvanen@google.com Cc: Guenter Roeck Link: https://lkml.kernel.org/r/20190701155208.211815-1-zwisler@google.com Signed-off-by: Ingo Molnar --- arch/x86/kernel/vmlinux.lds.S | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) (limited to 'arch') diff --git a/arch/x86/kernel/vmlinux.lds.S b/arch/x86/kernel/vmlinux.lds.S index 0850b5149345..4d1517022a14 100644 --- a/arch/x86/kernel/vmlinux.lds.S +++ b/arch/x86/kernel/vmlinux.lds.S @@ -141,10 +141,10 @@ SECTIONS *(.text.__x86.indirect_thunk) __indirect_thunk_end = .; #endif - } :text = 0x9090 - /* End of text section */ - _etext = .; + /* End of text section */ + _etext = .; + } :text = 0x9090 NOTES :text :note -- cgit v1.2.3-70-g09d2 From 1cbec37b3f9cff074a67bef4fc34b30a09958a0a Mon Sep 17 00:00:00 2001 From: Jiri Slaby Date: Tue, 9 Jul 2019 08:34:02 +0200 Subject: x86/entry/32: Fix ENDPROC of common_spurious common_spurious is currently ENDed erroneously. common_interrupt is used in its ENDPROC. So fix this mistake. Found by my asm macros rewrite patchset. Fixes: f8a8fe61fec8 ("x86/irq: Seperate unused system vectors from spurious entry again") Signed-off-by: Jiri Slaby Signed-off-by: Thomas Gleixner Link: https://lkml.kernel.org/r/20190709063402.19847-1-jslaby@suse.cz --- arch/x86/entry/entry_32.S | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'arch') diff --git a/arch/x86/entry/entry_32.S b/arch/x86/entry/entry_32.S index 1285e5abf669..90b473297299 100644 --- a/arch/x86/entry/entry_32.S +++ b/arch/x86/entry/entry_32.S @@ -1189,7 +1189,7 @@ common_spurious: movl %esp, %eax call smp_spurious_interrupt jmp ret_from_intr -ENDPROC(common_interrupt) +ENDPROC(common_spurious) #endif /* -- cgit v1.2.3-70-g09d2 From ecc606103837b98a2b665e8f14e533a6c72bbdc0 Mon Sep 17 00:00:00 2001 From: Peter Zijlstra Date: Mon, 8 Jul 2019 15:55:30 -0500 Subject: x86/alternatives: Fix int3_emulate_call() selftest stack corruption KASAN shows the following splat during boot: BUG: KASAN: unknown-crash in unwind_next_frame+0x3f6/0x490 Read of size 8 at addr ffffffff84007db0 by task swapper/0 CPU: 0 PID: 0 Comm: swapper Tainted: G T 5.2.0-rc6-00013-g7457c0d #1 Hardware name: QEMU Standard PC (i440FX + PIIX, 1996), BIOS 1.10.2-1 04/01/2014 Call Trace: dump_stack+0x19/0x1b print_address_description+0x1b0/0x2b2 __kasan_report+0x10f/0x171 kasan_report+0x12/0x1c __asan_load8+0x54/0x81 unwind_next_frame+0x3f6/0x490 unwind_next_frame+0x1b/0x23 arch_stack_walk+0x68/0xa5 stack_trace_save+0x7b/0xa0 save_trace+0x3c/0x93 mark_lock+0x1ef/0x9b1 lock_acquire+0x122/0x221 __mutex_lock+0xb6/0x731 mutex_lock_nested+0x16/0x18 _vm_unmap_aliases+0x141/0x183 vm_unmap_aliases+0x14/0x16 change_page_attr_set_clr+0x15e/0x2f2 set_memory_4k+0x2a/0x2c check_bugs+0x11fd/0x1298 start_kernel+0x793/0x7eb x86_64_start_reservations+0x55/0x76 x86_64_start_kernel+0x87/0xaa secondary_startup_64+0xa4/0xb0 Memory state around the buggy address: ffffffff84007c80: 00 00 00 00 00 00 00 00 00 00 00 00 00 f1 f1 f1 ffffffff84007d00: f1 00 00 00 00 00 00 00 00 00 f2 f2 f2 f3 f3 f3 >ffffffff84007d80: f3 79 be 52 49 79 be 00 00 00 00 00 00 00 00 f1 It turns out that int3_selftest() is corrupting the stack. The problem is that the KASAN-ified version of int3_magic() is much less trivial than the C code appears. It clobbers several unexpected registers. So when the selftest's INT3 is converted to an emulated call to int3_magic(), the registers are clobbered and Bad Things happen when the function returns. Fix this by converting int3_magic() to the trivial ASM function it should be, avoiding all calling convention issues. Also add ASM_CALL_CONSTRAINT to the INT3 ASM, since it contains a 'CALL'. [peterz: cribbed changelog from josh] Fixes: 7457c0da024b ("x86/alternatives: Add int3_emulate_call() selftest") Reported-by: kernel test robot Debugged-by: Josh Poimboeuf Signed-off-by: Peter Zijlstra (Intel) Signed-off-by: Thomas Gleixner Reviewed-by: Josh Poimboeuf Cc: Linus Torvalds Cc: Andy Lutomirski Link: https://lkml.kernel.org/r/20190709125744.GB3402@hirez.programming.kicks-ass.net --- arch/x86/kernel/alternative.c | 25 ++++++++++++++++++++----- 1 file changed, 20 insertions(+), 5 deletions(-) (limited to 'arch') diff --git a/arch/x86/kernel/alternative.c b/arch/x86/kernel/alternative.c index 99ef8b6f9a1a..ccd32013c47a 100644 --- a/arch/x86/kernel/alternative.c +++ b/arch/x86/kernel/alternative.c @@ -625,10 +625,23 @@ extern struct paravirt_patch_site __start_parainstructions[], * * See entry_{32,64}.S for more details. */ -static void __init int3_magic(unsigned int *ptr) -{ - *ptr = 1; -} + +/* + * We define the int3_magic() function in assembly to control the calling + * convention such that we can 'call' it from assembly. + */ + +extern void int3_magic(unsigned int *ptr); /* defined in asm */ + +asm ( +" .pushsection .init.text, \"ax\", @progbits\n" +" .type int3_magic, @function\n" +"int3_magic:\n" +" movl $1, (%" _ASM_ARG1 ")\n" +" ret\n" +" .size int3_magic, .-int3_magic\n" +" .popsection\n" +); extern __initdata unsigned long int3_selftest_ip; /* defined in asm below */ @@ -676,7 +689,9 @@ static void __init int3_selftest(void) "int3_selftest_ip:\n\t" __ASM_SEL(.long, .quad) " 1b\n\t" ".popsection\n\t" - : : __ASM_SEL_RAW(a, D) (&val) : "memory"); + : ASM_CALL_CONSTRAINT + : __ASM_SEL_RAW(a, D) (&val) + : "memory"); BUG_ON(val != 1); -- cgit v1.2.3-70-g09d2 From 26515699863d68058e290e18e83f444925920be5 Mon Sep 17 00:00:00 2001 From: Arnd Bergmann Date: Wed, 10 Jul 2019 15:04:55 +0200 Subject: x86/pgtable/32: Fix LOWMEM_PAGES constant clang points out that the computation of LOWMEM_PAGES causes a signed integer overflow on 32-bit x86: arch/x86/kernel/head32.c:83:20: error: signed shift result (0x100000000) requires 34 bits to represent, but 'int' only has 32 bits [-Werror,-Wshift-overflow] (PAGE_TABLE_SIZE(LOWMEM_PAGES) << PAGE_SHIFT); ^~~~~~~~~~~~ arch/x86/include/asm/pgtable_32.h:109:27: note: expanded from macro 'LOWMEM_PAGES' #define LOWMEM_PAGES ((((2<<31) - __PAGE_OFFSET) >> PAGE_SHIFT)) ~^ ~~ arch/x86/include/asm/pgtable_32.h:98:34: note: expanded from macro 'PAGE_TABLE_SIZE' #define PAGE_TABLE_SIZE(pages) ((pages) / PTRS_PER_PGD) Use the _ULL() macro to make it a 64-bit constant. Fixes: 1e620f9b23e5 ("x86/boot/32: Convert the 32-bit pgtable setup code from assembly to C") Signed-off-by: Arnd Bergmann Signed-off-by: Thomas Gleixner Link: https://lkml.kernel.org/r/20190710130522.1802800-1-arnd@arndb.de --- arch/x86/include/asm/pgtable_32.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'arch') diff --git a/arch/x86/include/asm/pgtable_32.h b/arch/x86/include/asm/pgtable_32.h index 4fe9e7fc74d3..c78da8eda8f2 100644 --- a/arch/x86/include/asm/pgtable_32.h +++ b/arch/x86/include/asm/pgtable_32.h @@ -106,6 +106,6 @@ do { \ * with only a host target support using a 32-bit type for internal * representation. */ -#define LOWMEM_PAGES ((((2<<31) - __PAGE_OFFSET) >> PAGE_SHIFT)) +#define LOWMEM_PAGES ((((_ULL(2)<<31) - __PAGE_OFFSET) >> PAGE_SHIFT)) #endif /* _ASM_X86_PGTABLE_32_H */ -- cgit v1.2.3-70-g09d2 From 7652ac92018536eb807b6c2130100c85f1ba7e3b Mon Sep 17 00:00:00 2001 From: Thomas Gleixner Date: Wed, 10 Jul 2019 21:42:46 +0200 Subject: x86/asm: Move native_write_cr0/4() out of line The pinning of sensitive CR0 and CR4 bits caused a boot crash when loading the kvm_intel module on a kernel compiled with CONFIG_PARAVIRT=n. The reason is that the static key which controls the pinning is marked RO after init. The kvm_intel module contains a CR4 write which requires to update the static key entry list. That obviously does not work when the key is in a RO section. With CONFIG_PARAVIRT enabled this does not happen because the CR4 write uses the paravirt indirection and the actual write function is built in. As the key is intended to be immutable after init, move native_write_cr0/4() out of line. While at it consolidate the update of the cr4 shadow variable and store the value right away when the pinning is initialized on a booting CPU. No point in reading it back 20 instructions later. This allows to confine the static key and the pinning variable to cpu/common and allows to mark them static. Fixes: 8dbec27a242c ("x86/asm: Pin sensitive CR0 bits") Fixes: 873d50d58f67 ("x86/asm: Pin sensitive CR4 bits") Reported-by: Linus Torvalds Reported-by: Xi Ruoyao Signed-off-by: Thomas Gleixner Tested-by: Xi Ruoyao Acked-by: Kees Cook Acked-by: Peter Zijlstra (Intel) Link: https://lkml.kernel.org/r/alpine.DEB.2.21.1907102140340.1758@nanos.tec.linutronix.de --- arch/x86/include/asm/processor.h | 1 + arch/x86/include/asm/special_insns.h | 41 +------------------- arch/x86/kernel/cpu/common.c | 72 ++++++++++++++++++++++++++++-------- arch/x86/kernel/smpboot.c | 14 +------ arch/x86/xen/smp_pv.c | 1 + 5 files changed, 61 insertions(+), 68 deletions(-) (limited to 'arch') diff --git a/arch/x86/include/asm/processor.h b/arch/x86/include/asm/processor.h index 3eab6ece52b4..6e0a3b43d027 100644 --- a/arch/x86/include/asm/processor.h +++ b/arch/x86/include/asm/processor.h @@ -741,6 +741,7 @@ extern void load_direct_gdt(int); extern void load_fixmap_gdt(int); extern void load_percpu_segment(int); extern void cpu_init(void); +extern void cr4_init(void); static inline unsigned long get_debugctlmsr(void) { diff --git a/arch/x86/include/asm/special_insns.h b/arch/x86/include/asm/special_insns.h index b2e84d113f2a..219be88a59d2 100644 --- a/arch/x86/include/asm/special_insns.h +++ b/arch/x86/include/asm/special_insns.h @@ -18,9 +18,7 @@ */ extern unsigned long __force_order; -/* Starts false and gets enabled once CPU feature detection is done. */ -DECLARE_STATIC_KEY_FALSE(cr_pinning); -extern unsigned long cr4_pinned_bits; +void native_write_cr0(unsigned long val); static inline unsigned long native_read_cr0(void) { @@ -29,24 +27,6 @@ static inline unsigned long native_read_cr0(void) return val; } -static inline void native_write_cr0(unsigned long val) -{ - unsigned long bits_missing = 0; - -set_register: - asm volatile("mov %0,%%cr0": "+r" (val), "+m" (__force_order)); - - if (static_branch_likely(&cr_pinning)) { - if (unlikely((val & X86_CR0_WP) != X86_CR0_WP)) { - bits_missing = X86_CR0_WP; - val |= bits_missing; - goto set_register; - } - /* Warn after we've set the missing bits. */ - WARN_ONCE(bits_missing, "CR0 WP bit went missing!?\n"); - } -} - static inline unsigned long native_read_cr2(void) { unsigned long val; @@ -91,24 +71,7 @@ static inline unsigned long native_read_cr4(void) return val; } -static inline void native_write_cr4(unsigned long val) -{ - unsigned long bits_missing = 0; - -set_register: - asm volatile("mov %0,%%cr4": "+r" (val), "+m" (cr4_pinned_bits)); - - if (static_branch_likely(&cr_pinning)) { - if (unlikely((val & cr4_pinned_bits) != cr4_pinned_bits)) { - bits_missing = ~val & cr4_pinned_bits; - val |= bits_missing; - goto set_register; - } - /* Warn after we've set the missing bits. */ - WARN_ONCE(bits_missing, "CR4 bits went missing: %lx!?\n", - bits_missing); - } -} +void native_write_cr4(unsigned long val); #ifdef CONFIG_X86_64 static inline unsigned long native_read_cr8(void) diff --git a/arch/x86/kernel/cpu/common.c b/arch/x86/kernel/cpu/common.c index 309b6b9b49d4..11472178e17f 100644 --- a/arch/x86/kernel/cpu/common.c +++ b/arch/x86/kernel/cpu/common.c @@ -366,10 +366,62 @@ out: cr4_clear_bits(X86_CR4_UMIP); } -DEFINE_STATIC_KEY_FALSE_RO(cr_pinning); -EXPORT_SYMBOL(cr_pinning); -unsigned long cr4_pinned_bits __ro_after_init; -EXPORT_SYMBOL(cr4_pinned_bits); +static DEFINE_STATIC_KEY_FALSE_RO(cr_pinning); +static unsigned long cr4_pinned_bits __ro_after_init; + +void native_write_cr0(unsigned long val) +{ + unsigned long bits_missing = 0; + +set_register: + asm volatile("mov %0,%%cr0": "+r" (val), "+m" (__force_order)); + + if (static_branch_likely(&cr_pinning)) { + if (unlikely((val & X86_CR0_WP) != X86_CR0_WP)) { + bits_missing = X86_CR0_WP; + val |= bits_missing; + goto set_register; + } + /* Warn after we've set the missing bits. */ + WARN_ONCE(bits_missing, "CR0 WP bit went missing!?\n"); + } +} +EXPORT_SYMBOL(native_write_cr0); + +void native_write_cr4(unsigned long val) +{ + unsigned long bits_missing = 0; + +set_register: + asm volatile("mov %0,%%cr4": "+r" (val), "+m" (cr4_pinned_bits)); + + if (static_branch_likely(&cr_pinning)) { + if (unlikely((val & cr4_pinned_bits) != cr4_pinned_bits)) { + bits_missing = ~val & cr4_pinned_bits; + val |= bits_missing; + goto set_register; + } + /* Warn after we've set the missing bits. */ + WARN_ONCE(bits_missing, "CR4 bits went missing: %lx!?\n", + bits_missing); + } +} +EXPORT_SYMBOL(native_write_cr4); + +void cr4_init(void) +{ + unsigned long cr4 = __read_cr4(); + + if (boot_cpu_has(X86_FEATURE_PCID)) + cr4 |= X86_CR4_PCIDE; + if (static_branch_likely(&cr_pinning)) + cr4 |= cr4_pinned_bits; + + __write_cr4(cr4); + + /* Initialize cr4 shadow for this CPU. */ + this_cpu_write(cpu_tlbstate.cr4, cr4); +} /* * Once CPU feature detection is finished (and boot params have been @@ -1723,12 +1775,6 @@ void cpu_init(void) wait_for_master_cpu(cpu); - /* - * Initialize the CR4 shadow before doing anything that could - * try to read it. - */ - cr4_init_shadow(); - if (cpu) load_ucode_ap(); @@ -1823,12 +1869,6 @@ void cpu_init(void) wait_for_master_cpu(cpu); - /* - * Initialize the CR4 shadow before doing anything that could - * try to read it. - */ - cr4_init_shadow(); - show_ucode_info_early(); pr_info("Initializing CPU#%d\n", cpu); diff --git a/arch/x86/kernel/smpboot.c b/arch/x86/kernel/smpboot.c index f78801114ee1..259d1d2be076 100644 --- a/arch/x86/kernel/smpboot.c +++ b/arch/x86/kernel/smpboot.c @@ -210,28 +210,16 @@ static int enable_start_cpu0; */ static void notrace start_secondary(void *unused) { - unsigned long cr4 = __read_cr4(); - /* * Don't put *anything* except direct CPU state initialization * before cpu_init(), SMP booting is too fragile that we want to * limit the things done here to the most necessary things. */ - if (boot_cpu_has(X86_FEATURE_PCID)) - cr4 |= X86_CR4_PCIDE; - if (static_branch_likely(&cr_pinning)) - cr4 |= cr4_pinned_bits; - - __write_cr4(cr4); + cr4_init(); #ifdef CONFIG_X86_32 /* switch away from the initial page table */ load_cr3(swapper_pg_dir); - /* - * Initialize the CR4 shadow before doing anything that could - * try to read it. - */ - cr4_init_shadow(); __flush_tlb_all(); #endif load_current_idt(); diff --git a/arch/x86/xen/smp_pv.c b/arch/x86/xen/smp_pv.c index 77d81c1a63e9..802ee5bba66c 100644 --- a/arch/x86/xen/smp_pv.c +++ b/arch/x86/xen/smp_pv.c @@ -58,6 +58,7 @@ static void cpu_bringup(void) { int cpu; + cr4_init(); cpu_init(); touch_softlockup_watchdog(); preempt_disable(); -- cgit v1.2.3-70-g09d2 From cbf5b73d162b22e044fe0b7d51dcaa33be065253 Mon Sep 17 00:00:00 2001 From: Eiichi Tsukata Date: Thu, 11 Jul 2019 11:35:01 +0900 Subject: x86/stacktrace: Prevent infinite loop in arch_stack_walk_user() arch_stack_walk_user() checks `if (fp == frame.next_fp)` to prevent a infinite loop by self reference but it's not enogh for circular reference. Once a lack of return address is found, there is no point to continue the loop, so break out. Fixes: 02b67518e2b1 ("tracing: add support for userspace stacktraces in tracing/iter_ctrl") Signed-off-by: Eiichi Tsukata Signed-off-by: Thomas Gleixner Acked-by: Linus Torvalds Link: https://lkml.kernel.org/r/20190711023501.963-1-devel@etsukata.com --- arch/x86/kernel/stacktrace.c | 8 +++----- 1 file changed, 3 insertions(+), 5 deletions(-) (limited to 'arch') diff --git a/arch/x86/kernel/stacktrace.c b/arch/x86/kernel/stacktrace.c index 2abf27d7df6b..4f36d3241faf 100644 --- a/arch/x86/kernel/stacktrace.c +++ b/arch/x86/kernel/stacktrace.c @@ -129,11 +129,9 @@ void arch_stack_walk_user(stack_trace_consume_fn consume_entry, void *cookie, break; if ((unsigned long)fp < regs->sp) break; - if (frame.ret_addr) { - if (!consume_entry(cookie, frame.ret_addr, false)) - return; - } - if (fp == frame.next_fp) + if (!frame.ret_addr) + break; + if (!consume_entry(cookie, frame.ret_addr, false)) break; fp = frame.next_fp; } -- cgit v1.2.3-70-g09d2