diff options
author | Ard Biesheuvel <ardb@kernel.org> | 2021-11-25 10:26:44 +0100 |
---|---|---|
committer | Ard Biesheuvel <ardb@kernel.org> | 2021-12-06 12:49:17 +0100 |
commit | 7b9896c352073156a325c3bb0dc4c46e06e2a468 (patch) | |
tree | a9e6e3be8a6a2b91a147f3e9ae82dfb7fbd860e5 | |
parent | 4e918ab13eaf40f19938659cb5a22c93172778a8 (diff) |
ARM: percpu: add SMP_ON_UP support
Permit the use of the TPIDRPRW system register for carrying the per-CPU
offset in generic SMP configurations that also target non-SMP capable
ARMv6 cores. This uses the SMP_ON_UP code patching framework to turn all
TPIDRPRW accesses into reads/writes of entry #0 in the __per_cpu_offset
array.
While at it, switch over some existing direct TPIDRPRW accesses in asm
code to invocations of a new helper that is patched in the same way when
necessary.
Note that CPU_V6+SMP without SMP_ON_UP results in a kernel that does not
boot on v6 CPUs without SMP extensions, so add this dependency to
Kconfig as well.
Acked-by: Linus Walleij <linus.walleij@linaro.org>
Acked-by: Nicolas Pitre <nico@fluxnic.net>
Signed-off-by: Ard Biesheuvel <ardb@kernel.org>
Tested-by: Marc Zyngier <maz@kernel.org>
Tested-by: Vladimir Murzin <vladimir.murzin@arm.com> # ARMv7M
-rw-r--r-- | arch/arm/include/asm/assembler.h | 59 | ||||
-rw-r--r-- | arch/arm/include/asm/insn.h | 24 | ||||
-rw-r--r-- | arch/arm/include/asm/percpu.h | 25 | ||||
-rw-r--r-- | arch/arm/kernel/entry-armv.S | 16 | ||||
-rw-r--r-- | arch/arm/kernel/sleep.S | 4 | ||||
-rw-r--r-- | arch/arm/mm/Kconfig | 1 |
6 files changed, 107 insertions, 22 deletions
diff --git a/arch/arm/include/asm/assembler.h b/arch/arm/include/asm/assembler.h index 2095638b7140..f9b3dd0e9ef5 100644 --- a/arch/arm/include/asm/assembler.h +++ b/arch/arm/include/asm/assembler.h @@ -220,9 +220,7 @@ THUMB( fpreg .req r7 ) .macro reload_current, t1:req, t2:req #ifdef CONFIG_CURRENT_POINTER_IN_TPIDRURO - adr_l \t1, __entry_task @ get __entry_task base address - mrc p15, 0, \t2, c13, c0, 4 @ get per-CPU offset - ldr \t1, [\t1, \t2] @ load variable + ldr_this_cpu \t1, __entry_task, \t1, \t2 mcr p15, 0, \t1, c13, c0, 3 @ store in TPIDRURO #endif .endm @@ -312,6 +310,26 @@ THUMB( fpreg .req r7 ) #define ALT_UP_B(label) b label #endif + /* + * this_cpu_offset - load the per-CPU offset of this CPU into + * register 'rd' + */ + .macro this_cpu_offset, rd:req +#ifdef CONFIG_SMP +ALT_SMP(mrc p15, 0, \rd, c13, c0, 4) +#ifdef CONFIG_CPU_V6 +ALT_UP_B(.L1_\@) +.L0_\@: + .subsection 1 +.L1_\@: ldr_va \rd, __per_cpu_offset + b .L0_\@ + .previous +#endif +#else + mov \rd, #0 +#endif + .endm + /* * Instruction barrier */ @@ -649,6 +667,41 @@ THUMB( orr \reg , \reg , #PSR_T_BIT ) .endm /* + * ldr_this_cpu_armv6 - Load a 32-bit word from the per-CPU variable 'sym', + * without using a temp register. Supported in ARM mode + * only. + */ + .macro ldr_this_cpu_armv6, rd:req, sym:req + this_cpu_offset \rd + .globl \sym + .reloc .L0_\@, R_ARM_ALU_PC_G0_NC, \sym + .reloc .L1_\@, R_ARM_ALU_PC_G1_NC, \sym + .reloc .L2_\@, R_ARM_LDR_PC_G2, \sym + add \rd, \rd, pc +.L0_\@: sub \rd, \rd, #4 +.L1_\@: sub \rd, \rd, #0 +.L2_\@: ldr \rd, [\rd, #4] + .endm + + /* + * ldr_this_cpu - Load a 32-bit word from the per-CPU variable 'sym' + * into register 'rd', which may be the stack pointer, + * using 't1' and 't2' as general temp registers. These + * are permitted to overlap with 'rd' if != sp + */ + .macro ldr_this_cpu, rd:req, sym:req, t1:req, t2:req +#if __LINUX_ARM_ARCH__ >= 7 || \ + (defined(MODULE) && defined(CONFIG_ARM_MODULE_PLTS)) || \ + (defined(CONFIG_LD_IS_LLD) && CONFIG_LLD_VERSION < 140000) + this_cpu_offset \t1 + mov_l \t2, \sym + ldr \rd, [\t1, \t2] +#else + ldr_this_cpu_armv6 \rd, \sym +#endif + .endm + + /* * rev_l - byte-swap a 32-bit value * * @val: source/destination register diff --git a/arch/arm/include/asm/insn.h b/arch/arm/include/asm/insn.h index 5475cbf9fb6b..a160ed3ea427 100644 --- a/arch/arm/include/asm/insn.h +++ b/arch/arm/include/asm/insn.h @@ -2,6 +2,30 @@ #ifndef __ASM_ARM_INSN_H #define __ASM_ARM_INSN_H +#include <linux/types.h> + +/* + * Avoid a literal load by emitting a sequence of ADD/LDR instructions with the + * appropriate relocations. The combined sequence has a range of -/+ 256 MiB, + * which should be sufficient for the core kernel as well as modules loaded + * into the module region. (Not supported by LLD before release 14) + */ +#if !(defined(MODULE) && defined(CONFIG_ARM_MODULE_PLTS)) && \ + !(defined(CONFIG_LD_IS_LLD) && CONFIG_LLD_VERSION < 140000) +#define LOAD_SYM_ARMV6(reg, sym) \ + " .globl " #sym " \n\t" \ + " .reloc 10f, R_ARM_ALU_PC_G0_NC, " #sym " \n\t" \ + " .reloc 11f, R_ARM_ALU_PC_G1_NC, " #sym " \n\t" \ + " .reloc 12f, R_ARM_LDR_PC_G2, " #sym " \n\t" \ + "10: sub " #reg ", pc, #8 \n\t" \ + "11: sub " #reg ", " #reg ", #4 \n\t" \ + "12: ldr " #reg ", [" #reg ", #0] \n\t" +#else +#define LOAD_SYM_ARMV6(reg, sym) \ + " ldr " #reg ", =" #sym " \n\t" \ + " ldr " #reg ", [" #reg "] \n\t" +#endif + static inline unsigned long arm_gen_nop(void) { diff --git a/arch/arm/include/asm/percpu.h b/arch/arm/include/asm/percpu.h index e2fcb3cfd3de..a4a0d38d016a 100644 --- a/arch/arm/include/asm/percpu.h +++ b/arch/arm/include/asm/percpu.h @@ -5,15 +5,22 @@ #ifndef _ASM_ARM_PERCPU_H_ #define _ASM_ARM_PERCPU_H_ +#include <asm/insn.h> + register unsigned long current_stack_pointer asm ("sp"); /* * Same as asm-generic/percpu.h, except that we store the per cpu offset * in the TPIDRPRW. TPIDRPRW only exists on V6K and V7 */ -#if defined(CONFIG_SMP) && !defined(CONFIG_CPU_V6) +#ifdef CONFIG_SMP static inline void set_my_cpu_offset(unsigned long off) { + extern unsigned int smp_on_up; + + if (IS_ENABLED(CONFIG_CPU_V6) && !smp_on_up) + return; + /* Set TPIDRPRW */ asm volatile("mcr p15, 0, %0, c13, c0, 4" : : "r" (off) : "memory"); } @@ -27,8 +34,20 @@ static inline unsigned long __my_cpu_offset(void) * We want to allow caching the value, so avoid using volatile and * instead use a fake stack read to hazard against barrier(). */ - asm("mrc p15, 0, %0, c13, c0, 4" : "=r" (off) - : "Q" (*(const unsigned long *)current_stack_pointer)); + asm("0: mrc p15, 0, %0, c13, c0, 4 \n\t" +#ifdef CONFIG_CPU_V6 + "1: \n\t" + " .subsection 1 \n\t" + "2: " LOAD_SYM_ARMV6(%0, __per_cpu_offset) " \n\t" + " b 1b \n\t" + " .previous \n\t" + " .pushsection \".alt.smp.init\", \"a\" \n\t" + " .long 0b - . \n\t" + " b . + (2b - 0b) \n\t" + " .popsection \n\t" +#endif + : "=r" (off) + : "Q" (*(const unsigned long *)current_stack_pointer)); return off; } diff --git a/arch/arm/kernel/entry-armv.S b/arch/arm/kernel/entry-armv.S index 7f7ac963445c..43d917f0d9a9 100644 --- a/arch/arm/kernel/entry-armv.S +++ b/arch/arm/kernel/entry-armv.S @@ -35,15 +35,14 @@ .macro irq_handler, from_user:req mov r0, sp #ifdef CONFIG_IRQSTACKS - mov_l r2, irq_stack_ptr @ Take base address - mrc p15, 0, r3, c13, c0, 4 @ Get CPU offset #ifdef CONFIG_UNWINDER_ARM mov fpreg, sp @ Preserve original SP #else mov r8, fp @ Preserve original FP mov r9, sp @ Preserve original SP #endif - ldr sp, [r2, r3] @ Load SP from per-CPU var + ldr_this_cpu sp, irq_stack_ptr, r2, r3 + .if \from_user == 0 UNWIND( .setfp fpreg, sp ) @ @@ -876,16 +875,7 @@ __bad_stack: THUMB( bx pc ) THUMB( nop ) THUMB( .arm ) - mrc p15, 0, ip, c13, c0, 4 @ Get per-CPU offset - - .globl overflow_stack_ptr - .reloc 0f, R_ARM_ALU_PC_G0_NC, overflow_stack_ptr - .reloc 1f, R_ARM_ALU_PC_G1_NC, overflow_stack_ptr - .reloc 2f, R_ARM_LDR_PC_G2, overflow_stack_ptr - add ip, ip, pc -0: add ip, ip, #-4 -1: add ip, ip, #0 -2: ldr ip, [ip, #4] + ldr_this_cpu_armv6 ip, overflow_stack_ptr str sp, [ip, #-4]! @ Preserve original SP value mov sp, ip @ Switch to overflow stack diff --git a/arch/arm/kernel/sleep.S b/arch/arm/kernel/sleep.S index 803b51e5cba0..f909baf17912 100644 --- a/arch/arm/kernel/sleep.S +++ b/arch/arm/kernel/sleep.S @@ -71,9 +71,7 @@ ENTRY(__cpu_suspend) @ Run the suspend code from the overflow stack so we don't have to rely @ on vmalloc-to-phys conversions anywhere in the arch suspend code. @ The original SP value captured in R5 will be restored on the way out. - mov_l r6, overflow_stack_ptr @ Base pointer - mrc p15, 0, r7, c13, c0, 4 @ Get per-CPU offset - ldr sp, [r6, r7] @ Address of this CPU's overflow stack + ldr_this_cpu sp, overflow_stack_ptr, r6, r7 #endif add r4, r4, #12 @ Space for pgd, virt sp, phys resume fn sub sp, sp, r4 @ allocate CPU state on stack diff --git a/arch/arm/mm/Kconfig b/arch/arm/mm/Kconfig index 58afba346729..a91ff22c6c2e 100644 --- a/arch/arm/mm/Kconfig +++ b/arch/arm/mm/Kconfig @@ -386,6 +386,7 @@ config CPU_V6 select CPU_PABRT_V6 select CPU_THUMB_CAPABLE select CPU_TLB_V6 if MMU + select SMP_ON_UP if SMP # ARMv6k config CPU_V6K |