diff options
author | Linus Torvalds <torvalds@linux-foundation.org> | 2021-04-30 12:22:28 -0700 |
---|---|---|
committer | Linus Torvalds <torvalds@linux-foundation.org> | 2021-04-30 12:22:28 -0700 |
commit | c70a4be130de333ea079c59da41cc959712bb01c (patch) | |
tree | efa1b9a7aac979dcbf53ce89e2f8ffc61f6d2952 | |
parent | 437d1a5b66ca60f209e25f469b395741cc10b731 (diff) | |
parent | 5256426247837feb8703625bda7fcfc824af04cf (diff) |
Merge tag 'powerpc-5.13-1' of git://git.kernel.org/pub/scm/linux/kernel/git/powerpc/linux
Pull powerpc updates from Michael Ellerman:
- Enable KFENCE for 32-bit.
- Implement EBPF for 32-bit.
- Convert 32-bit to do interrupt entry/exit in C.
- Convert 64-bit BookE to do interrupt entry/exit in C.
- Changes to our signal handling code to use user_access_begin/end()
more extensively.
- Add support for time namespaces (CONFIG_TIME_NS)
- A series of fixes that allow us to reenable STRICT_KERNEL_RWX.
- Other smaller features, fixes & cleanups.
Thanks to Alexey Kardashevskiy, Andreas Schwab, Andrew Donnellan, Aneesh
Kumar K.V, Athira Rajeev, Bhaskar Chowdhury, Bixuan Cui, Cédric Le
Goater, Chen Huang, Chris Packham, Christophe Leroy, Christopher M.
Riedl, Colin Ian King, Dan Carpenter, Daniel Axtens, Daniel Henrique
Barboza, David Gibson, Davidlohr Bueso, Denis Efremov, dingsenjie,
Dmitry Safonov, Dominic DeMarco, Fabiano Rosas, Ganesh Goudar, Geert
Uytterhoeven, Geetika Moolchandani, Greg Kurz, Guenter Roeck, Haren
Myneni, He Ying, Jiapeng Chong, Jordan Niethe, Laurent Dufour, Lee
Jones, Leonardo Bras, Li Huafei, Madhavan Srinivasan, Mahesh Salgaonkar,
Masahiro Yamada, Nathan Chancellor, Nathan Lynch, Nicholas Piggin,
Oliver O'Halloran, Paul Menzel, Pu Lehui, Randy Dunlap, Ravi Bangoria,
Rosen Penev, Russell Currey, Santosh Sivaraj, Sebastian Andrzej Siewior,
Segher Boessenkool, Shivaprasad G Bhat, Srikar Dronamraju, Stephen
Rothwell, Thadeu Lima de Souza Cascardo, Thomas Gleixner, Tony Ambardar,
Tyrel Datwyler, Vaibhav Jain, Vincenzo Frascino, Xiongwei Song, Yang Li,
Yu Kuai, and Zhang Yunkai.
* tag 'powerpc-5.13-1' of git://git.kernel.org/pub/scm/linux/kernel/git/powerpc/linux: (302 commits)
powerpc/signal32: Fix erroneous SIGSEGV on RT signal return
powerpc: Avoid clang uninitialized warning in __get_user_size_allowed
powerpc/papr_scm: Mark nvdimm as unarmed if needed during probe
powerpc/kvm: Fix build error when PPC_MEM_KEYS/PPC_PSERIES=n
powerpc/kasan: Fix shadow start address with modules
powerpc/kernel/iommu: Use largepool as a last resort when !largealloc
powerpc/kernel/iommu: Align size for IOMMU_PAGE_SIZE() to save TCEs
powerpc/44x: fix spelling mistake in Kconfig "varients" -> "variants"
powerpc/iommu: Annotate nested lock for lockdep
powerpc/iommu: Do not immediately panic when failed IOMMU table allocation
powerpc/iommu: Allocate it_map by vmalloc
selftests/powerpc: remove unneeded semicolon
powerpc/64s: remove unneeded semicolon
powerpc/eeh: remove unneeded semicolon
powerpc/selftests: Add selftest to test concurrent perf/ptrace events
powerpc/selftests/perf-hwbreak: Add testcases for 2nd DAWR
powerpc/selftests/perf-hwbreak: Coalesce event creation code
powerpc/selftests/ptrace-hwbreak: Add testcases for 2nd DAWR
powerpc/configs: Add IBMVNIC to some 64-bit configs
selftests/powerpc: Add uaccess flush test
...
234 files changed, 6868 insertions, 5596 deletions
diff --git a/Documentation/admin-guide/sysctl/net.rst b/Documentation/admin-guide/sysctl/net.rst index c941b214e0b7..4150f74c521a 100644 --- a/Documentation/admin-guide/sysctl/net.rst +++ b/Documentation/admin-guide/sysctl/net.rst @@ -64,6 +64,7 @@ two flavors of JITs, the newer eBPF JIT currently supported on: - arm64 - arm32 - ppc64 + - ppc32 - sparc64 - mips64 - s390x @@ -73,7 +74,6 @@ two flavors of JITs, the newer eBPF JIT currently supported on: And the older cBPF JIT supported on the following archs: - mips - - ppc - sparc eBPF JITs are a superset of cBPF JITs, meaning the kernel will diff --git a/Documentation/features/debug/debug-vm-pgtable/arch-support.txt b/Documentation/features/debug/debug-vm-pgtable/arch-support.txt index 7aff505af706..fa83403b4aec 100644 --- a/Documentation/features/debug/debug-vm-pgtable/arch-support.txt +++ b/Documentation/features/debug/debug-vm-pgtable/arch-support.txt @@ -21,7 +21,7 @@ | nios2: | TODO | | openrisc: | TODO | | parisc: | TODO | - | powerpc: | TODO | + | powerpc: | ok | | riscv: | ok | | s390: | ok | | sh: | TODO | diff --git a/Documentation/powerpc/papr_hcalls.rst b/Documentation/powerpc/papr_hcalls.rst index 3d553e8a2937..fce8bc793660 100644 --- a/Documentation/powerpc/papr_hcalls.rst +++ b/Documentation/powerpc/papr_hcalls.rst @@ -275,6 +275,20 @@ Health Bitmap Flags: Given a DRC Index collect the performance statistics for NVDIMM and copy them to the resultBuffer. +**H_SCM_FLUSH** + +| Input: *drcIndex, continue-token* +| Out: *continue-token* +| Return Value: *H_SUCCESS, H_Parameter, H_P2, H_BUSY* + +Given a DRC Index Flush the data to backend NVDIMM device. + +The hcall returns H_BUSY when the flush takes longer time and the hcall needs +to be issued multiple times in order to be completely serviced. The +*continue-token* from the output to be passed in the argument list of +subsequent hcalls to the hypervisor until the hcall is completely serviced +at which point H_SUCCESS or other error is returned by the hypervisor. + References ========== .. [1] "Power Architecture Platform Reference" diff --git a/Documentation/powerpc/vas-api.rst b/Documentation/powerpc/vas-api.rst index 90c50ed839f3..bdb50fed903e 100644 --- a/Documentation/powerpc/vas-api.rst +++ b/Documentation/powerpc/vas-api.rst @@ -254,7 +254,7 @@ using this window. the signal will be issued to the thread group leader signals. NX-GZIP User's Manual: -https://github.com/libnxz/power-gzip/blob/master/power_nx_gzip_um.pdf +https://github.com/libnxz/power-gzip/blob/master/doc/power_nx_gzip_um.pdf Simple example ============== @@ -301,5 +301,5 @@ Simple example close(fd) or window can be closed upon process exit } - Refer https://github.com/abalib/power-gzip for tests or more + Refer https://github.com/libnxz/power-gzip for tests or more use cases. diff --git a/arch/arm64/include/asm/vdso/compat_gettimeofday.h b/arch/arm64/include/asm/vdso/compat_gettimeofday.h index 7508b0ac1d21..ecb6fd4c3c64 100644 --- a/arch/arm64/include/asm/vdso/compat_gettimeofday.h +++ b/arch/arm64/include/asm/vdso/compat_gettimeofday.h @@ -155,7 +155,8 @@ static __always_inline const struct vdso_data *__arch_get_vdso_data(void) } #ifdef CONFIG_TIME_NS -static __always_inline const struct vdso_data *__arch_get_timens_vdso_data(void) +static __always_inline +const struct vdso_data *__arch_get_timens_vdso_data(const struct vdso_data *vd) { const struct vdso_data *ret; diff --git a/arch/arm64/include/asm/vdso/gettimeofday.h b/arch/arm64/include/asm/vdso/gettimeofday.h index 4b4c0dac0e14..4f7a629df81f 100644 --- a/arch/arm64/include/asm/vdso/gettimeofday.h +++ b/arch/arm64/include/asm/vdso/gettimeofday.h @@ -96,7 +96,7 @@ const struct vdso_data *__arch_get_vdso_data(void) #ifdef CONFIG_TIME_NS static __always_inline -const struct vdso_data *__arch_get_timens_vdso_data(void) +const struct vdso_data *__arch_get_timens_vdso_data(const struct vdso_data *vd) { return _timens_data; } diff --git a/arch/powerpc/Kconfig b/arch/powerpc/Kconfig index 3b34c44832e0..c52b0a42062a 100644 --- a/arch/powerpc/Kconfig +++ b/arch/powerpc/Kconfig @@ -119,6 +119,7 @@ config PPC # select ARCH_32BIT_OFF_T if PPC32 select ARCH_HAS_DEBUG_VIRTUAL + select ARCH_HAS_DEBUG_VM_PGTABLE select ARCH_HAS_DEVMEM_IS_ALLOWED select ARCH_HAS_ELF_RANDOMIZE select ARCH_HAS_FORTIFY_SOURCE @@ -135,7 +136,7 @@ config PPC select ARCH_HAS_MEMBARRIER_CALLBACKS select ARCH_HAS_MEMBARRIER_SYNC_CORE select ARCH_HAS_SCALED_CPUTIME if VIRT_CPU_ACCOUNTING_NATIVE && PPC_BOOK3S_64 - select ARCH_HAS_STRICT_KERNEL_RWX if (PPC32 && !HIBERNATION) + select ARCH_HAS_STRICT_KERNEL_RWX if ((PPC_BOOK3S_64 || PPC32) && !HIBERNATION) select ARCH_HAS_TICK_BROADCAST if GENERIC_CLOCKEVENTS_BROADCAST select ARCH_HAS_UACCESS_FLUSHCACHE select ARCH_HAS_COPY_MC if PPC64 @@ -145,6 +146,7 @@ config PPC select ARCH_MIGHT_HAVE_PC_PARPORT select ARCH_MIGHT_HAVE_PC_SERIO select ARCH_OPTIONAL_KERNEL_RWX if ARCH_HAS_STRICT_KERNEL_RWX + select ARCH_STACKWALK select ARCH_SUPPORTS_ATOMIC_RMW select ARCH_SUPPORTS_DEBUG_PAGEALLOC if PPC32 || PPC_BOOK3S_64 select ARCH_USE_BUILTIN_BSWAP @@ -171,6 +173,7 @@ config PPC select GENERIC_CPU_AUTOPROBE select GENERIC_CPU_VULNERABILITIES if PPC_BARRIER_NOSPEC select GENERIC_EARLY_IOREMAP + select GENERIC_GETTIMEOFDAY select GENERIC_IRQ_SHOW select GENERIC_IRQ_SHOW_LEVEL select GENERIC_PCI_IOMAP if PCI @@ -178,13 +181,15 @@ config PPC select GENERIC_STRNCPY_FROM_USER select GENERIC_STRNLEN_USER select GENERIC_TIME_VSYSCALL - select GENERIC_GETTIMEOFDAY + select GENERIC_VDSO_TIME_NS select HAVE_ARCH_AUDITSYSCALL select HAVE_ARCH_HUGE_VMAP if PPC_BOOK3S_64 && PPC_RADIX_MMU select HAVE_ARCH_JUMP_LABEL + select HAVE_ARCH_JUMP_LABEL_RELATIVE select HAVE_ARCH_KASAN if PPC32 && PPC_PAGE_SHIFT <= 14 select HAVE_ARCH_KASAN_VMALLOC if PPC32 && PPC_PAGE_SHIFT <= 14 select HAVE_ARCH_KGDB + select HAVE_ARCH_KFENCE if PPC32 select HAVE_ARCH_MMAP_RND_BITS select HAVE_ARCH_MMAP_RND_COMPAT_BITS if COMPAT select HAVE_ARCH_NVRAM_OPS @@ -192,7 +197,6 @@ config PPC select HAVE_ARCH_TRACEHOOK select HAVE_ASM_MODVERSIONS select HAVE_C_RECORDMCOUNT - select HAVE_CBPF_JIT if !PPC64 select HAVE_STACKPROTECTOR if PPC64 && $(cc-option,-mstack-protector-guard=tls -mstack-protector-guard-reg=r13) select HAVE_STACKPROTECTOR if PPC32 && $(cc-option,-mstack-protector-guard=tls -mstack-protector-guard-reg=r2) select HAVE_CONTEXT_TRACKING if PPC64 @@ -200,7 +204,7 @@ config PPC select HAVE_DEBUG_STACKOVERFLOW select HAVE_DYNAMIC_FTRACE select HAVE_DYNAMIC_FTRACE_WITH_REGS if MPROFILE_KERNEL - select HAVE_EBPF_JIT if PPC64 + select HAVE_EBPF_JIT select HAVE_EFFICIENT_UNALIGNED_ACCESS if !(CPU_LITTLE_ENDIAN && POWER7_CPU) select HAVE_FAST_GUP select HAVE_FTRACE_MCOUNT_RECORD @@ -224,8 +228,8 @@ config PPC select HAVE_LIVEPATCH if HAVE_DYNAMIC_FTRACE_WITH_REGS select HAVE_MOD_ARCH_SPECIFIC select HAVE_NMI if PERF_EVENTS || (PPC64 && PPC_BOOK3S) - select HAVE_HARDLOCKUP_DETECTOR_ARCH if (PPC64 && PPC_BOOK3S) - select HAVE_OPTPROBES if PPC64 + select HAVE_HARDLOCKUP_DETECTOR_ARCH if PPC64 && PPC_BOOK3S && SMP + select HAVE_OPTPROBES select HAVE_PERF_EVENTS select HAVE_PERF_EVENTS_NMI if PPC64 select HAVE_HARDLOCKUP_DETECTOR_PERF if PERF_EVENTS && HAVE_PERF_EVENTS_NMI && !HAVE_HARDLOCKUP_DETECTOR_ARCH @@ -234,7 +238,7 @@ config PPC select MMU_GATHER_RCU_TABLE_FREE select MMU_GATHER_PAGE_SIZE select HAVE_REGS_AND_STACK_ACCESS_API - select HAVE_RELIABLE_STACKTRACE if PPC_BOOK3S_64 && CPU_LITTLE_ENDIAN + select HAVE_RELIABLE_STACKTRACE select HAVE_SOFTIRQ_ON_OWN_STACK select HAVE_SYSCALL_TRACEPOINTS select HAVE_VIRT_CPU_ACCOUNTING @@ -786,7 +790,7 @@ config THREAD_SHIFT config DATA_SHIFT_BOOL bool "Set custom data alignment" depends on ADVANCED_OPTIONS - depends on STRICT_KERNEL_RWX || DEBUG_PAGEALLOC + depends on STRICT_KERNEL_RWX || DEBUG_PAGEALLOC || KFENCE depends on PPC_BOOK3S_32 || (PPC_8xx && !PIN_TLB_DATA && !STRICT_KERNEL_RWX) help This option allows you to set the kernel data alignment. When @@ -798,13 +802,13 @@ config DATA_SHIFT_BOOL config DATA_SHIFT int "Data shift" if DATA_SHIFT_BOOL default 24 if STRICT_KERNEL_RWX && PPC64 - range 17 28 if (STRICT_KERNEL_RWX || DEBUG_PAGEALLOC) && PPC_BOOK3S_32 - range 19 23 if (STRICT_KERNEL_RWX || DEBUG_PAGEALLOC) && PPC_8xx + range 17 28 if (STRICT_KERNEL_RWX || DEBUG_PAGEALLOC || KFENCE) && PPC_BOOK3S_32 + range 19 23 if (STRICT_KERNEL_RWX || DEBUG_PAGEALLOC || KFENCE) && PPC_8xx default 22 if STRICT_KERNEL_RWX && PPC_BOOK3S_32 - default 18 if DEBUG_PAGEALLOC && PPC_BOOK3S_32 + default 18 if (DEBUG_PAGEALLOC || KFENCE) && PPC_BOOK3S_32 default 23 if STRICT_KERNEL_RWX && PPC_8xx - default 23 if DEBUG_PAGEALLOC && PPC_8xx && PIN_TLB_DATA - default 19 if DEBUG_PAGEALLOC && PPC_8xx + default 23 if (DEBUG_PAGEALLOC || KFENCE) && PPC_8xx && PIN_TLB_DATA + default 19 if (DEBUG_PAGEALLOC || KFENCE) && PPC_8xx default PPC_PAGE_SHIFT help On Book3S 32 (603+), DBATs are used to map kernel text and rodata RO. @@ -1217,7 +1221,7 @@ config TASK_SIZE_BOOL config TASK_SIZE hex "Size of user task space" if TASK_SIZE_BOOL default "0x80000000" if PPC_8xx - default "0xb0000000" if PPC_BOOK3S_32 && STRICT_KERNEL_RWX + default "0xb0000000" if PPC_BOOK3S_32 default "0xc0000000" endmenu diff --git a/arch/powerpc/Kconfig.debug b/arch/powerpc/Kconfig.debug index ae084357994e..6342f9da4545 100644 --- a/arch/powerpc/Kconfig.debug +++ b/arch/powerpc/Kconfig.debug @@ -353,6 +353,7 @@ config PPC_EARLY_DEBUG_CPM_ADDR config FAIL_IOMMU bool "Fault-injection capability for IOMMU" depends on FAULT_INJECTION + depends on PCI || IBMVIO help Provide fault-injection capability for IOMMU. Each device can be selectively enabled via the fail_iommu property. diff --git a/arch/powerpc/Makefile b/arch/powerpc/Makefile index 5f8544cf724a..3212d076ac6a 100644 --- a/arch/powerpc/Makefile +++ b/arch/powerpc/Makefile @@ -181,12 +181,6 @@ CC_FLAGS_FTRACE := -pg ifdef CONFIG_MPROFILE_KERNEL CC_FLAGS_FTRACE += -mprofile-kernel endif -# Work around gcc code-gen bugs with -pg / -fno-omit-frame-pointer in gcc <= 4.8 -# https://gcc.gnu.org/bugzilla/show_bug.cgi?id=44199 -# https://gcc.gnu.org/bugzilla/show_bug.cgi?id=52828 -ifndef CONFIG_CC_IS_CLANG -CC_FLAGS_FTRACE += $(call cc-ifversion, -lt, 0409, -mno-sched-epilog) -endif endif CFLAGS-$(CONFIG_TARGET_CPU_BOOL) += $(call cc-option,-mcpu=$(CONFIG_TARGET_CPU)) @@ -444,12 +438,15 @@ endif endif ifdef CONFIG_SMP +ifdef CONFIG_PPC32 prepare: task_cpu_prepare PHONY += task_cpu_prepare task_cpu_prepare: prepare0 $(eval KBUILD_CFLAGS += -D_TASK_CPU=$(shell awk '{if ($$2 == "TASK_CPU") print $$3;}' include/generated/asm-offsets.h)) -endif + +endif # CONFIG_PPC32 +endif # CONFIG_SMP PHONY += checkbin # Check toolchain versions: diff --git a/arch/powerpc/configs/ppc64_defconfig b/arch/powerpc/configs/ppc64_defconfig index 4f05a6652478..701811c91a6f 100644 --- a/arch/powerpc/configs/ppc64_defconfig +++ b/arch/powerpc/configs/ppc64_defconfig @@ -50,6 +50,7 @@ CONFIG_PPC_TRANSACTIONAL_MEM=y CONFIG_KEXEC=y CONFIG_KEXEC_FILE=y CONFIG_CRASH_DUMP=y +CONFIG_FA_DUMP=y CONFIG_IRQ_ALL_CPUS=y CONFIG_PPC_64K_PAGES=y CONFIG_SCHED_SMT=y @@ -177,6 +178,7 @@ CONFIG_CHELSIO_T1=m CONFIG_BE2NET=m CONFIG_IBMVETH=m CONFIG_EHEA=m +CONFIG_IBMVNIC=m CONFIG_E100=y CONFIG_E1000=y CONFIG_E1000E=y diff --git a/arch/powerpc/configs/pseries_defconfig b/arch/powerpc/configs/pseries_defconfig index 777221775c83..50168dde4ea5 100644 --- a/arch/powerpc/configs/pseries_defconfig +++ b/arch/powerpc/configs/pseries_defconfig @@ -41,6 +41,7 @@ CONFIG_DTL=y CONFIG_SCANLOG=m CONFIG_PPC_SMLPAR=y CONFIG_IBMEBUS=y +CONFIG_PAPR_SCM=m CONFIG_PPC_SVM=y # CONFIG_PPC_PMAC is not set CONFIG_RTAS_FLASH=m @@ -159,6 +160,7 @@ CONFIG_BE2NET=m CONFIG_S2IO=m CONFIG_IBMVETH=y CONFIG_EHEA=y +CONFIG_IBMVNIC=y CONFIG_E100=y CONFIG_E1000=y CONFIG_E1000E=y diff --git a/arch/powerpc/include/asm/Kbuild b/arch/powerpc/include/asm/Kbuild index e1f9b4ea1c53..bcf95ce0964f 100644 --- a/arch/powerpc/include/asm/Kbuild +++ b/arch/powerpc/include/asm/Kbuild @@ -1,7 +1,6 @@ # SPDX-License-Identifier: GPL-2.0 generated-y += syscall_table_32.h generated-y += syscall_table_64.h -generated-y += syscall_table_c32.h generated-y += syscall_table_spu.h generic-y += export.h generic-y += kvm_types.h diff --git a/arch/powerpc/include/asm/asm-prototypes.h b/arch/powerpc/include/asm/asm-prototypes.h index 939f3c94c8f3..1c7b75834e04 100644 --- a/arch/powerpc/include/asm/asm-prototypes.h +++ b/arch/powerpc/include/asm/asm-prototypes.h @@ -77,8 +77,6 @@ notrace unsigned long interrupt_exit_kernel_prepare(struct pt_regs *regs, unsign long ppc_fadvise64_64(int fd, int advice, u32 offset_high, u32 offset_low, u32 len_high, u32 len_low); long sys_switch_endian(void); -notrace unsigned int __check_irq_replay(void); -void notrace restore_interrupts(void); /* prom_init (OpenFirmware) */ unsigned long __init prom_init(unsigned long r3, unsigned long r4, diff --git a/arch/powerpc/include/asm/barrier.h b/arch/powerpc/include/asm/barrier.h index aecfde829d5d..7ae29cfb06c0 100644 --- a/arch/powerpc/include/asm/barrier.h +++ b/arch/powerpc/include/asm/barrier.h @@ -80,22 +80,6 @@ do { \ ___p1; \ }) -#ifdef CONFIG_PPC64 -#define smp_cond_load_relaxed(ptr, cond_expr) ({ \ - typeof(ptr) __PTR = (ptr); \ - __unqual_scalar_typeof(*ptr) VAL; \ - VAL = READ_ONCE(*__PTR); \ - if (unlikely(!(cond_expr))) { \ - spin_begin(); \ - do { \ - VAL = READ_ONCE(*__PTR); \ - } while (!(cond_expr)); \ - spin_end(); \ - } \ - (typeof(*ptr))VAL; \ -}) -#endif - #ifdef CONFIG_PPC_BOOK3S_64 #define NOSPEC_BARRIER_SLOT nop #elif defined(CONFIG_PPC_FSL_BOOK3E) diff --git a/arch/powerpc/include/asm/book3s/32/kup.h b/arch/powerpc/include/asm/book3s/32/kup.h index 73bc5d2c431d..1670dfe9d4f1 100644 --- a/arch/powerpc/include/asm/book3s/32/kup.h +++ b/arch/powerpc/include/asm/book3s/32/kup.h @@ -5,86 +5,7 @@ #include <asm/bug.h> #include <asm/book3s/32/mmu-hash.h> -#ifdef __ASSEMBLY__ - -.macro kuep_update_sr gpr1, gpr2 /* NEVER use r0 as gpr2 due to addis */ -101: mtsrin \gpr1, \gpr2 - addi \gpr1, \gpr1, 0x111 /* next VSID */ - rlwinm \gpr1, \gpr1, 0, 0xf0ffffff /* clear VSID overflow */ - addis \gpr2, \gpr2, 0x1000 /* address of next segment */ - bdnz 101b - isync -.endm - -.macro kuep_lock gpr1, gpr2 -#ifdef CONFIG_PPC_KUEP - li \gpr1, NUM_USER_SEGMENTS - li \gpr2, 0 - mtctr \gpr1 - mfsrin \gpr1, \gpr2 - oris \gpr1, \gpr1, SR_NX@h /* set Nx */ - kuep_update_sr \gpr1, \gpr2 -#endif -.endm - -.macro kuep_unlock gpr1, gpr2 -#ifdef CONFIG_PPC_KUEP - li \gpr1, NUM_USER_SEGMENTS - li \gpr2, 0 - mtctr \gpr1 - mfsrin \gpr1, \gpr2 - rlwinm \gpr1, \gpr1, 0, ~SR_NX /* Clear Nx */ - kuep_update_sr \gpr1, \gpr2 -#endif -.endm - -#ifdef CONFIG_PPC_KUAP - -.macro kuap_update_sr gpr1, gpr2, gpr3 /* NEVER use r0 as gpr2 due to addis */ -101: mtsrin \gpr1, \gpr2 - addi \gpr1, \gpr1, 0x111 /* next VSID */ - rlwinm \gpr1, \gpr1, 0, 0xf0ffffff /* clear VSID overflow */ - addis \gpr2, \gpr2, 0x1000 /* address of next segment */ - cmplw \gpr2, \gpr3 - blt- 101b - isync -.endm - -.macro kuap_save_and_lock sp, thread, gpr1, gpr2, gpr3 - lwz \gpr2, KUAP(\thread) - rlwinm. \gpr3, \gpr2, 28, 0xf0000000 - stw \gpr2, STACK_REGS_KUAP(\sp) - beq+ 102f - li \gpr1, 0 - stw \gpr1, KUAP(\thread) - mfsrin \gpr1, \gpr2 - oris \gpr1, \gpr1, SR_KS@h /* set Ks */ - kuap_update_sr \gpr1, \gpr2, \gpr3 -102: -.endm - -.macro kuap_restore sp, current, gpr1, gpr2, gpr3 - lwz \gpr2, STACK_REGS_KUAP(\sp) - rlwinm. \gpr3, \gpr2, 28, 0xf0000000 - stw \gpr2, THREAD + KUAP(\current) - beq+ 102f - mfsrin \gpr1, \gpr2 - rlwinm \gpr1, \gpr1, 0, ~SR_KS /* Clear Ks */ - kuap_update_sr \gpr1, \gpr2, \gpr3 -102: -.endm - -.macro kuap_check current, gpr -#ifdef CONFIG_PPC_KUAP_DEBUG - lwz \gpr, THREAD + KUAP(\current) -999: twnei \gpr, 0 - EMIT_BUG_ENTRY 999b, __FILE__, __LINE__, (BUGFLAG_WARNING | BUGFLAG_ONCE) -#endif -.endm - -#endif /* CONFIG_PPC_KUAP */ - -#else /* !__ASSEMBLY__ */ +#ifndef __ASSEMBLY__ #ifdef CONFIG_PPC_KUAP @@ -103,6 +24,51 @@ static inline void kuap_update_sr(u32 sr, u32 addr, u32 end) isync(); /* Context sync required after mtsr() */ } +static inline void kuap_save_and_lock(struct pt_regs *regs) +{ + unsigned long kuap = current->thread.kuap; + u32 addr = kuap & 0xf0000000; + u32 end = kuap << 28; + + regs->kuap = kuap; + if (unlikely(!kuap)) + return; + + current->thread.kuap = 0; + kuap_update_sr(mfsr(addr) | SR_KS, addr, end); /* Set Ks */ +} + +static inline void kuap_user_restore(struct pt_regs *regs) +{ +} + +static inline void kuap_kernel_restore(struct pt_regs *regs, unsigned long kuap) +{ + u32 addr = regs->kuap & 0xf0000000; + u32 end = regs->kuap << 28; + + current->thread.kuap = regs->kuap; + + if (unlikely(regs->kuap == kuap)) + return; + + kuap_update_sr(mfsr(addr) & ~SR_KS, addr, end); /* Clear Ks */ +} + +static inline unsigned long kuap_get_and_assert_locked(void) +{ + unsigned long kuap = current->thread.kuap; + + WARN_ON_ONCE(IS_ENABLED(CONFIG_PPC_KUAP_DEBUG) && kuap != 0); + + return kuap; +} + +static inline void kuap_assert_locked(void) +{ + kuap_get_and_assert_locked(); +} + static __always_inline void allow_user_access(void __user *to, const void __user *from, u32 size, unsigned long dir) { diff --git a/arch/powerpc/include/asm/book3s/32/pgtable.h b/arch/powerpc/include/asm/book3s/32/pgtable.h index 415ae29fa73a..83c65845a1a9 100644 --- a/arch/powerpc/include/asm/book3s/32/pgtable.h +++ b/arch/powerpc/include/asm/book3s/32/pgtable.h @@ -194,10 +194,8 @@ int map_kernel_page(unsigned long va, phys_addr_t pa, pgprot_t prot); #define VMALLOC_END ioremap_bot #endif -#ifdef CONFIG_STRICT_KERNEL_RWX #define MODULES_END ALIGN_DOWN(PAGE_OFFSET, SZ_256M) #define MODULES_VADDR (MODULES_END - SZ_256M) -#endif #ifndef __ASSEMBLY__ #include <linux/sched.h> diff --git a/arch/powerpc/include/asm/book3s/32/tlbflush.h b/arch/powerpc/include/asm/book3s/32/tlbflush.h index d941c06d4f2e..ba1743c52b56 100644 --- a/arch/powerpc/include/asm/book3s/32/tlbflush.h +++ b/arch/powerpc/include/asm/book3s/32/tlbflush.h @@ -79,4 +79,4 @@ static inline void local_flush_tlb_mm(struct mm_struct *mm) flush_tlb_mm(mm); } -#endif /* _ASM_POWERPC_TLBFLUSH_H */ +#endif /* _ASM_POWERPC_BOOK3S_32_TLBFLUSH_H */ diff --git a/arch/powerpc/include/asm/book3s/64/kup.h b/arch/powerpc/include/asm/book3s/64/kup.h index 8bd905050896..9700da3a4093 100644 --- a/arch/powerpc/include/asm/book3s/64/kup.h +++ b/arch/powerpc/include/asm/book3s/64/kup.h @@ -287,7 +287,7 @@ static inline void kuap_kernel_restore(struct pt_regs *regs, */ } -static inline unsigned long kuap_get_and_check_amr(void) +static inline unsigned long kuap_get_and_assert_locked(void) { if (mmu_has_feature(MMU_FTR_BOOK3S_KUAP)) { unsigned long amr = mfspr(SPRN_AMR); @@ -298,27 +298,7 @@ static inline unsigned long kuap_get_and_check_amr(void) return 0; } -#else /* CONFIG_PPC_PKEY */ - -static inline void kuap_user_restore(struct pt_regs *regs) -{ -} - -static inline void kuap_kernel_restore(struct pt_regs *regs, unsigned long amr) -{ -} - -static inline unsigned long kuap_get_and_check_amr(void) -{ - return 0; -} - -#endif /* CONFIG_PPC_PKEY */ - - -#ifdef CONFIG_PPC_KUAP - -static inline void kuap_check_amr(void) +static inline void kuap_assert_locked(void) { if (IS_ENABLED(CONFIG_PPC_KUAP_DEBUG) && mmu_has_feature(MMU_FTR_BOOK3S_KUAP)) WARN_ON_ONCE(mfspr(SPRN_AMR) != AMR_KUAP_BLOCKED); diff --git a/arch/powerpc/include/asm/book3s/64/mmu-hash.h b/arch/powerpc/include/asm/book3s/64/mmu-hash.h index f911bdb68d8b..3004f3323144 100644 --- a/arch/powerpc/include/asm/book3s/64/mmu-hash.h +++ b/arch/powerpc/include/asm/book3s/64/mmu-hash.h @@ -18,7 +18,6 @@ * complete pgtable.h but only a portion of it. */ #include <asm/book3s/64/pgtable.h> -#include <asm/bug.h> #include <asm/task_size_64.h> #include <asm/cpu_has_feature.h> diff --git a/arch/powerpc/include/asm/book3s/64/pgtable.h b/arch/powerpc/include/asm/book3s/64/pgtable.h index 058601efbc8a..a666d561b44d 100644 --- a/arch/powerpc/include/asm/book3s/64/pgtable.h +++ b/arch/powerpc/include/asm/book3s/64/pgtable.h @@ -7,6 +7,7 @@ #ifndef __ASSEMBLY__ #include <linux/mmdebug.h> #include <linux/bug.h> +#include <linux/sizes.h> #endif /* @@ -116,6 +117,7 @@ */ #define _PAGE_KERNEL_RW (_PAGE_PRIVILEGED | _PAGE_RW | _PAGE_DIRTY) #define _PAGE_KERNEL_RO (_PAGE_PRIVILEGED | _PAGE_READ) +#define _PAGE_KERNEL_ROX (_PAGE_PRIVILEGED | _PAGE_READ | _PAGE_EXEC) #define _PAGE_KERNEL_RWX (_PAGE_PRIVILEGED | _PAGE_DIRTY | \ _PAGE_RW | _PAGE_EXEC) /* @@ -323,7 +325,8 @@ extern unsigned long pci_io_base; #define PHB_IO_END (KERN_IO_START + FULL_IO_SIZE) #define IOREMAP_BASE (PHB_IO_END) #define IOREMAP_START (ioremap_bot) -#define IOREMAP_END (KERN_IO_END) +#define IOREMAP_END (KERN_IO_END - FIXADDR_SIZE) +#define FIXADDR_SIZE SZ_32M /* Advertise special mapping type for AGP */ #define HAVE_PAGE_AGP diff --git a/arch/powerpc/include/asm/book3s/64/radix.h b/arch/powerpc/include/asm/book3s/64/radix.h index c7813dc628fc..59cab558e2f0 100644 --- a/arch/powerpc/include/asm/book3s/64/radix.h +++ b/arch/powerpc/include/asm/book3s/64/radix.h @@ -222,8 +222,10 @@ static inline void radix__set_pte_at(struct mm_struct *mm, unsigned long addr, * from ptesync, it should probably go into update_mmu_cache, rather * than set_pte_at (which is used to set ptes unrelated to faults). * - * Spurious faults to vmalloc region are not tolerated, so there is - * a ptesync in flush_cache_vmap. + * Spurious faults from the kernel memory are not tolerated, so there + * is a ptesync in flush_cache_vmap, and __map_kernel_page() follows + * the pte update sequence from ISA Book III 6.10 Translation Table + * Update Synchronization Requirements. */ } diff --git a/arch/powerpc/include/asm/bug.h b/arch/powerpc/include/asm/bug.h index d1635ffbb179..0b2162890d8b 100644 --- a/arch/powerpc/include/asm/bug.h +++ b/arch/powerpc/include/asm/bug.h @@ -111,11 +111,8 @@ #ifndef __ASSEMBLY__ struct pt_regs; -long do_page_fault(struct pt_regs *); -long hash__do_page_fault(struct pt_regs *); +void hash__do_page_fault(struct pt_regs *); void bad_page_fault(struct pt_regs *, int); -void __bad_page_fault(struct pt_regs *regs, int sig); -void do_bad_page_fault_segv(struct pt_regs *regs); extern void _exception(int, struct pt_regs *, int, unsigned long); extern void _exception_pkey(struct pt_regs *, unsigned long, int); extern void die(const char *, struct pt_regs *, long); diff --git a/arch/powerpc/include/asm/cacheflush.h b/arch/powerpc/include/asm/cacheflush.h index f63495109f63..7564dd4fd12b 100644 --- a/arch/powerpc/include/asm/cacheflush.h +++ b/arch/powerpc/include/asm/cacheflush.h @@ -30,7 +30,19 @@ static inline void flush_cache_vmap(unsigned long start, unsigned long end) #endif /* CONFIG_PPC_BOOK3S_64 */ #define ARCH_IMPLEMENTS_FLUSH_DCACHE_PAGE 1 -extern void flush_dcache_page(struct page *page); +/* + * This is called when a page has been modified by the kernel. + * It just marks the page as not i-cache clean. We do the i-cache + * flush later when the page is given to a user process, if necessary. + */ +static inline void flush_dcache_page(struct page *page) +{ + if (cpu_has_feature(CPU_FTR_COHERENT_ICACHE)) + return; + /* avoid an atomic op if possible */ + if (test_bit(PG_dcache_clean, &page->flags)) + clear_bit(PG_dcache_clean, &page->flags); +} void flush_icache_range(unsigned long start, unsigned long stop); #define flush_icache_range flush_icache_range @@ -40,7 +52,6 @@ void flush_icache_user_page(struct vm_area_struct *vma, struct page *page, #define flush_icache_user_page flush_icache_user_page void flush_dcache_icache_page(struct page *page); -void __flush_dcache_icache(void *page); /** * flush_dcache_range(): Write any modified data cache blocks out to memory and diff --git a/arch/powerpc/include/asm/cpm2.h b/arch/powerpc/include/asm/cpm2.h index 2211b934ecb4..bda45788cfcc 100644 --- a/arch/powerpc/include/asm/cpm2.h +++ b/arch/powerpc/include/asm/cpm2.h @@ -594,7 +594,7 @@ typedef struct fcc_enet { uint fen_p256c; /* Total packets 256 < bytes <= 511 */ uint fen_p512c; /* Total packets 512 < bytes <= 1023 */ uint fen_p1024c; /* Total packets 1024 < bytes <= 1518 */ - uint fen_cambuf; /* Internal CAM buffer poiner */ + uint fen_cambuf; /* Internal CAM buffer pointer */ ushort fen_rfthr; /* Received frames threshold */ ushort fen_rfcnt; /* Received frames count */ } fcc_enet_t; diff --git a/arch/powerpc/include/asm/fixmap.h b/arch/powerpc/include/asm/fixmap.h index 8d03c16a3663..947b5b9c4424 100644 --- a/arch/powerpc/include/asm/fixmap.h +++ b/arch/powerpc/include/asm/fixmap.h @@ -23,12 +23,17 @@ #include <asm/kmap_size.h> #endif +#ifdef CONFIG_PPC64 +#define FIXADDR_TOP (IOREMAP_END + FIXADDR_SIZE) +#else +#define FIXADDR_SIZE 0 #ifdef CONFIG_KASAN #include <asm/kasan.h> #define FIXADDR_TOP (KASAN_SHADOW_START - PAGE_SIZE) #else #define FIXADDR_TOP ((unsigned long)(-PAGE_SIZE)) #endif +#endif /* * Here we define all the compile-time 'special' virtual @@ -50,6 +55,7 @@ */ enum fixed_addresses { FIX_HOLE, +#ifdef CONFIG_PPC32 /* reserve the top 128K for early debugging purposes */ FIX_EARLY_DEBUG_TOP = FIX_HOLE, FIX_EARLY_DEBUG_BASE = FIX_EARLY_DEBUG_TOP+(ALIGN(SZ_128K, PAGE_SIZE)/PAGE_SIZE)-1, @@ -72,6 +78,7 @@ enum fixed_addresses { FIX_IMMR_SIZE, #endif /* FIX_PCIE_MCFG, */ +#endif /* CONFIG_PPC32 */ __end_of_permanent_fixed_addresses, #define NR_FIX_BTMAPS (SZ_256K / PAGE_SIZE) @@ -98,6 +105,8 @@ enum fixed_addresses { static inline void __set_fixmap(enum fixed_addresses idx, phys_addr_t phys, pgprot_t flags) { + BUILD_BUG_ON(IS_ENABLED(CONFIG_PPC64) && __FIXADDR_SIZE > FIXADDR_SIZE); + if (__builtin_constant_p(idx)) BUILD_BUG_ON(idx >= __end_of_fixed_addresses); else if (WARN_ON(idx >= __end_of_fixed_addresses)) diff --git a/arch/powerpc/include/asm/futex.h b/arch/powerpc/include/asm/futex.h index e93ee3202e4c..b3001f8b2c1e 100644 --- a/arch/powerpc/include/asm/futex.h +++ b/arch/powerpc/include/asm/futex.h @@ -33,9 +33,8 @@ static inline int arch_futex_atomic_op_inuser(int op, int oparg, int *oval, { int oldval = 0, ret; - if (!access_ok(uaddr, sizeof(u32))) + if (!user_access_begin(uaddr, sizeof(u32))) return -EFAULT; - allow_read_write_user(uaddr, uaddr, sizeof(*uaddr)); switch (op) { case FUTEX_OP_SET: @@ -56,10 +55,10 @@ static inline int arch_futex_atomic_op_inuser(int op, int oparg, int *oval, default: ret = -ENOSYS; } + user_access_end(); *oval = oldval; - prevent_read_write_user(uaddr, uaddr, sizeof(*uaddr)); return ret; } @@ -70,11 +69,9 @@ futex_atomic_cmpxchg_inatomic(u32 *uval, u32 __user *uaddr, int ret = 0; u32 prev; - if (!access_ok(uaddr, sizeof(u32))) + if (!user_access_begin(uaddr, sizeof(u32))) return -EFAULT; - allow_read_write_user(uaddr, uaddr, sizeof(*uaddr)); - __asm__ __volatile__ ( PPC_ATOMIC_ENTRY_BARRIER "1: lwarx %1,0,%3 # futex_atomic_cmpxchg_inatomic\n\ @@ -93,8 +90,9 @@ futex_atomic_cmpxchg_inatomic(u32 *uval, u32 __user *uaddr, : "r" (uaddr), "r" (oldval), "r" (newval), "i" (-EFAULT) : "cc", "memory"); + user_access_end(); + *uval = prev; - prevent_read_write_user(uaddr, uaddr, sizeof(*uaddr)); return ret; } diff --git a/arch/powerpc/include/asm/hvcall.h b/arch/powerpc/include/asm/hvcall.h index ed6086d57b22..443050906018 100644 --- a/arch/powerpc/include/asm/hvcall.h +++ b/arch/powerpc/include/asm/hvcall.h @@ -315,7 +315,8 @@ #define H_SCM_HEALTH 0x400 #define H_SCM_PERFORMANCE_STATS 0x418 #define H_RPT_INVALIDATE 0x448 -#define MAX_HCALL_OPCODE H_RPT_INVALIDATE +#define H_SCM_FLUSH 0x44C +#define MAX_HCALL_OPCODE H_SCM_FLUSH /* Scope args for H_SCM_UNBIND_ALL */ #define H_UNBIND_SCOPE_ALL (0x1) @@ -389,6 +390,7 @@ #define H_CPU_BEHAV_FAVOUR_SECURITY (1ull << 63) // IBM bit 0 #define H_CPU_BEHAV_L1D_FLUSH_PR (1ull << 62) // IBM bit 1 #define H_CPU_BEHAV_BNDS_CHK_SPEC_BAR (1ull << 61) // IBM bit 2 +#define H_CPU_BEHAV_FAVOUR_SECURITY_H (1ull << 60) // IBM bit 3 #define H_CPU_BEHAV_FLUSH_COUNT_CACHE (1ull << 58) // IBM bit 5 #define H_CPU_BEHAV_FLUSH_LINK_STACK (1ull << 57) // IBM bit 6 diff --git a/arch/powerpc/include/asm/hvconsole.h b/arch/powerpc/include/asm/hvconsole.h index 999ed5ac9053..ccb2034506f0 100644 --- a/arch/powerpc/include/asm/hvconsole.h +++ b/arch/powerpc/include/asm/hvconsole.h @@ -24,5 +24,8 @@ extern int hvc_get_chars(uint32_t vtermno, char *buf, int count); extern int hvc_put_chars(uint32_t vtermno, const char *buf, int count); +/* Provided by HVC VIO */ +void hvc_vio_init_early(void); + #endif /* __KERNEL__ */ #endif /* _PPC64_HVCONSOLE_H */ diff --git a/arch/powerpc/include/asm/hydra.h b/arch/powerpc/include/asm/hydra.h index ae02eb53d6ef..d024447283a0 100644 --- a/arch/powerpc/include/asm/hydra.h +++ b/arch/powerpc/include/asm/hydra.h @@ -94,8 +94,6 @@ extern volatile struct Hydra __iomem *Hydra; #define HYDRA_INT_EXT7 18 /* Power Off Request */ #define HYDRA_INT_SPARE 19 -extern int hydra_init(void); - #endif /* __KERNEL__ */ #endif /* _ASMPPC_HYDRA_H */ diff --git a/arch/powerpc/include/asm/inst.h b/arch/powerpc/include/asm/inst.h index cc73c1267572..268d3bd073c8 100644 --- a/arch/powerpc/include/asm/inst.h +++ b/arch/powerpc/include/asm/inst.h @@ -4,6 +4,40 @@ #include <asm/ppc-opcode.h> +#ifdef CONFIG_PPC64 + +#define ___get_user_instr(gu_op, dest, ptr) \ +({ \ + long __gui_ret = 0; \ + unsigned long __gui_ptr = (unsigned long)ptr; \ + struct ppc_inst __gui_inst; \ + unsigned int __prefix, __suffix; \ + __gui_ret = gu_op(__prefix, (unsigned int __user *)__gui_ptr); \ + if (__gui_ret == 0) { \ + if ((__prefix >> 26) == OP_PREFIX) { \ + __gui_ret = gu_op(__suffix, \ + (unsigned int __user *)__gui_ptr + 1); \ + __gui_inst = ppc_inst_prefix(__prefix, \ + __suffix); \ + } else { \ + __gui_inst = ppc_inst(__prefix); \ + } \ + if (__gui_ret == 0) \ + (dest) = __gui_inst; \ + } \ + __gui_ret; \ +}) +#else /* !CONFIG_PPC64 */ +#define ___get_user_instr(gu_op, dest, ptr) \ + gu_op((dest).val, (u32 __user *)(ptr)) +#endif /* CONFIG_PPC64 */ + +#define get_user_instr(x, ptr) \ + ___get_user_instr(get_user, x, ptr) + +#define __get_user_instr(x, ptr) \ + ___get_user_instr(__get_user, x, ptr) + /* * Instruction data type for POWER */ @@ -68,6 +102,8 @@ static inline bool ppc_inst_equal(struct ppc_inst x, struct ppc_inst y) #define ppc_inst(x) ((struct ppc_inst){ .val = x }) +#define ppc_inst_prefix(x, y) ppc_inst(x) + static inline bool ppc_inst_prefixed(struct ppc_inst x) { return false; @@ -113,13 +149,14 @@ static inline struct ppc_inst *ppc_inst_next(void *location, struct ppc_inst *va return location + ppc_inst_len(tmp); } -static inline u64 ppc_inst_as_u64(struct ppc_inst x) +static inline unsigned long ppc_inst_as_ulong(struct ppc_inst x) { -#ifdef CONFIG_CPU_LITTLE_ENDIAN - return (u64)ppc_inst_suffix(x) << 32 | ppc_inst_val(x); -#else - return (u64)ppc_inst_val(x) << 32 | ppc_inst_suffix(x); -#endif + if (IS_ENABLED(CONFIG_PPC32)) + return ppc_inst_val(x); + else if (IS_ENABLED(CONFIG_CPU_LITTLE_ENDIAN)) + return (u64)ppc_inst_suffix(x) << 32 | ppc_inst_val(x); + else + return (u64)ppc_inst_val(x) << 32 | ppc_inst_suffix(x); } #define PPC_INST_STR_LEN sizeof("00000000 00000000") @@ -141,10 +178,6 @@ static inline char *__ppc_inst_as_str(char str[PPC_INST_STR_LEN], struct ppc_ins __str; \ }) -int probe_user_read_inst(struct ppc_inst *inst, - struct ppc_inst __user *nip); - -int probe_kernel_read_inst(struct ppc_inst *inst, - struct ppc_inst *src); +int copy_inst_from_kernel_nofault(struct ppc_inst *inst, struct ppc_inst *src); #endif /* _ASM_POWERPC_INST_H */ diff --git a/arch/powerpc/include/asm/interrupt.h b/arch/powerpc/include/asm/interrupt.h index e8d09a841373..44cde2e129b8 100644 --- a/arch/powerpc/include/asm/interrupt.h +++ b/arch/powerpc/include/asm/interrupt.h @@ -2,6 +2,70 @@ #ifndef _ASM_POWERPC_INTERRUPT_H #define _ASM_POWERPC_INTERRUPT_H +/* BookE/4xx */ +#define INTERRUPT_CRITICAL_INPUT 0x100 + +/* BookE */ +#define INTERRUPT_DEBUG 0xd00 +#ifdef CONFIG_BOOKE +#define INTERRUPT_PERFMON 0x260 +#define INTERRUPT_DOORBELL 0x280 +#endif + +/* BookS/4xx/8xx */ +#define INTERRUPT_MACHINE_CHECK 0x200 + +/* BookS/8xx */ +#define INTERRUPT_SYSTEM_RESET 0x100 + +/* BookS */ +#define INTERRUPT_DATA_SEGMENT 0x380 +#define INTERRUPT_INST_SEGMENT 0x480 +#define INTERRUPT_TRACE 0xd00 +#define INTERRUPT_H_DATA_STORAGE 0xe00 +#define INTERRUPT_HMI 0xe60 +#define INTERRUPT_H_FAC_UNAVAIL 0xf80 +#ifdef CONFIG_PPC_BOOK3S +#define INTERRUPT_DOORBELL 0xa00 +#define INTERRUPT_PERFMON 0xf00 +#define INTERRUPT_ALTIVEC_UNAVAIL 0xf20 +#endif + +/* BookE/BookS/4xx/8xx */ +#define INTERRUPT_DATA_STORAGE 0x300 +#define INTERRUPT_INST_STORAGE 0x400 +#define INTERRUPT_EXTERNAL 0x500 +#define INTERRUPT_ALIGNMENT 0x600 +#define INTERRUPT_PROGRAM 0x700 +#define INTERRUPT_SYSCALL 0xc00 +#define INTERRUPT_TRACE 0xd00 + +/* BookE/BookS/44x */ +#define INTERRUPT_FP_UNAVAIL 0x800 + +/* BookE/BookS/44x/8xx */ +#define INTERRUPT_DECREMENTER 0x900 + +#ifndef INTERRUPT_PERFMON +#define INTERRUPT_PERFMON 0x0 +#endif + +/* 8xx */ +#define INTERRUPT_SOFT_EMU_8xx 0x1000 +#define INTERRUPT_INST_TLB_MISS_8xx 0x1100 +#define INTERRUPT_DATA_TLB_MISS_8xx 0x1200 +#define INTERRUPT_INST_TLB_ERROR_8xx 0x1300 +#define INTERRUPT_DATA_TLB_ERROR_8xx 0x1400 +#define INTERRUPT_DATA_BREAKPOINT_8xx 0x1c00 +#define INTERRUPT_INST_BREAKPOINT_8xx 0x1d00 + +/* 603 */ +#define INTERRUPT_INST_TLB_MISS_603 0x1000 +#define INTERRUPT_DATA_LOAD_TLB_MISS_603 0x1100 +#define INTERRUPT_DATA_STORE_TLB_MISS_603 0x1200 + +#ifndef __ASSEMBLY__ + #include <linux/context_tracking.h> #include <linux/hardirq.h> #include <asm/cputime.h> @@ -9,10 +73,18 @@ #include <asm/kprobes.h> #include <asm/runlatch.h> -struct interrupt_state { -#ifdef CONFIG_PPC_BOOK3E_64 - enum ctx_state ctx_state; +static inline void nap_adjust_return(struct pt_regs *regs) +{ +#ifdef CONFIG_PPC_970_NAP + if (unlikely(test_thread_local_flags(_TLF_NAPPING))) { + /* Can avoid a test-and-clear because NMIs do not call this */ + clear_thread_local_flags(_TLF_NAPPING); + regs->nip = (unsigned long)power4_idle_nap_return; + } #endif +} + +struct interrupt_state { }; static inline void booke_restore_dbcr0(void) @@ -29,10 +101,19 @@ static inline void booke_restore_dbcr0(void) static inline void interrupt_enter_prepare(struct pt_regs *regs, struct interrupt_state *state) { - /* - * Book3E reconciles irq soft mask in asm - */ -#ifdef CONFIG_PPC_BOOK3S_64 +#ifdef CONFIG_PPC32 + if (!arch_irq_disabled_regs(regs)) + trace_hardirqs_off(); + + if (user_mode(regs)) { + kuep_lock(); + account_cpu_user_entry(); + } else { + kuap_save_and_lock(regs); + } +#endif + +#ifdef CONFIG_PPC64 if (irq_soft_mask_set_return(IRQS_ALL_DISABLED) == IRQS_ENABLED) trace_hardirqs_off(); local_paca->irq_happened |= PACA_IRQ_HARD_DIS; @@ -48,16 +129,12 @@ static inline void interrupt_enter_prepare(struct pt_regs *regs, struct interrup * CT_WARN_ON comes here via program_check_exception, * so avoid recursion. */ - if (TRAP(regs) != 0x700) + if (TRAP(regs) != INTERRUPT_PROGRAM) CT_WARN_ON(ct_state() != CONTEXT_KERNEL); } #endif -#ifdef CONFIG_PPC_BOOK3E_64 - state->ctx_state = exception_enter(); - if (user_mode(regs)) - account_cpu_user_entry(); -#endif + booke_restore_dbcr0(); } /* @@ -76,23 +153,8 @@ static inline void interrupt_enter_prepare(struct pt_regs *regs, struct interrup */ static inline void interrupt_exit_prepare(struct pt_regs *regs, struct interrupt_state *state) { -#ifdef CONFIG_PPC_BOOK3E_64 - exception_exit(state->ctx_state); -#endif - - /* - * Book3S exits to user via interrupt_exit_user_prepare(), which does - * context tracking, which is a cleaner way to handle PREEMPT=y - * and avoid context entry/exit in e.g., preempt_schedule_irq()), - * which is likely to be where the core code wants to end up. - * - * The above comment explains why we can't do the - * - * if (user_mode(regs)) - * user_exit_irqoff(); - * - * sequence here. - */ + if (user_mode(regs)) + kuep_unlock(); } static inline void interrupt_async_enter_prepare(struct pt_regs *regs, struct interrupt_state *state) @@ -109,24 +171,46 @@ static inline void interrupt_async_enter_prepare(struct pt_regs *regs, struct in static inline void interrupt_async_exit_prepare(struct pt_regs *regs, struct interrupt_state *state) { + /* + * Adjust at exit so the main handler sees the true NIA. This must + * come before irq_exit() because irq_exit can enable interrupts, and + * if another interrupt is taken before nap_adjust_return has run + * here, then that interrupt would return directly to idle nap return. + */ + nap_adjust_return(regs); + irq_exit(); interrupt_exit_prepare(regs, state); } struct interrupt_nmi_state { #ifdef CONFIG_PPC64 -#ifdef CONFIG_PPC_BOOK3S_64 u8 irq_soft_mask; u8 irq_happened; -#endif u8 ftrace_enabled; #endif }; +static inline bool nmi_disables_ftrace(struct pt_regs *regs) +{ + /* Allow DEC and PMI to be traced when they are soft-NMI */ + if (IS_ENABLED(CONFIG_PPC_BOOK3S_64)) { + if (TRAP(regs) == INTERRUPT_DECREMENTER) + return false; + if (TRAP(regs) == INTERRUPT_PERFMON) + return false; + } + if (IS_ENABLED(CONFIG_PPC_BOOK3E)) { + if (TRAP(regs) == INTERRUPT_PERFMON) + return false; + } + + return true; +} + static inline void interrupt_nmi_enter_prepare(struct pt_regs *regs, struct interrupt_nmi_state *state) { #ifdef CONFIG_PPC64 -#ifdef CONFIG_PPC_BOOK3S_64 state->irq_soft_mask = local_paca->irq_soft_mask; state->irq_happened = local_paca->irq_happened; @@ -139,9 +223,8 @@ static inline void interrupt_nmi_enter_prepare(struct pt_regs *regs, struct inte local_paca->irq_happened |= PACA_IRQ_HARD_DIS; /* Don't do any per-CPU operations until interrupt state is fixed */ -#endif - /* Allow DEC and PMI to be traced when they are soft-NMI */ - if (TRAP(regs) != 0x900 && TRAP(regs) != 0xf00 && TRAP(regs) != 0x260) { + + if (nmi_disables_ftrace(regs)) { state->ftrace_enabled = this_cpu_get_ftrace_enabled(); this_cpu_set_ftrace_enabled(0); } @@ -164,17 +247,20 @@ static inline void interrupt_nmi_exit_prepare(struct pt_regs *regs, struct inter radix_enabled() || (mfmsr() & MSR_DR)) nmi_exit(); + /* + * nmi does not call nap_adjust_return because nmi should not create + * new work to do (must use irq_work for that). + */ + #ifdef CONFIG_PPC64 - if (TRAP(regs) != 0x900 && TRAP(regs) != 0xf00 && TRAP(regs) != 0x260) + if (nmi_disables_ftrace(regs)) this_cpu_set_ftrace_enabled(state->ftrace_enabled); -#ifdef CONFIG_PPC_BOOK3S_64 /* Check we didn't change the pending interrupt mask. */ WARN_ON_ONCE((state->irq_happened | PACA_IRQ_HARD_DIS) != local_paca->irq_happened); local_paca->irq_happened = state->irq_happened; local_paca->irq_soft_mask = state->irq_soft_mask; #endif -#endif } /* @@ -387,6 +473,7 @@ DECLARE_INTERRUPT_HANDLER(SMIException); DECLARE_INTERRUPT_HANDLER(handle_hmi_exception); DECLARE_INTERRUPT_HANDLER(unknown_exception); DECLARE_INTERRUPT_HANDLER_ASYNC(unknown_async_exception); +DECLARE_INTERRUPT_HANDLER_NMI(unknown_nmi_exception); DECLARE_INTERRUPT_HANDLER(instruction_breakpoint_exception); DECLARE_INTERRUPT_HANDLER(RunModeException); DECLARE_INTERRUPT_HANDLER(single_step_exception); @@ -410,7 +497,7 @@ DECLARE_INTERRUPT_HANDLER(altivec_assist_exception); DECLARE_INTERRUPT_HANDLER(CacheLockingException); DECLARE_INTERRUPT_HANDLER(SPEFloatingPointException); DECLARE_INTERRUPT_HANDLER(SPEFloatingPointRoundException); -DECLARE_INTERRUPT_HANDLER(WatchdogException); +DECLARE_INTERRUPT_HANDLER_NMI(WatchdogException); DECLARE_INTERRUPT_HANDLER(kernel_bad_stack); /* slb.c */ @@ -421,7 +508,7 @@ DECLARE_INTERRUPT_HANDLER(do_bad_slb_fault); DECLARE_INTERRUPT_HANDLER_RAW(do_hash_fault); /* fault.c */ -DECLARE_INTERRUPT_HANDLER_RET(do_page_fault); +DECLARE_INTERRUPT_HANDLER(do_page_fault); DECLARE_INTERRUPT_HANDLER(do_bad_page_fault_segv); /* process.c */ @@ -436,7 +523,7 @@ DECLARE_INTERRUPT_HANDLER_NMI(hmi_exception_realmode); DECLARE_INTERRUPT_HANDLER_ASYNC(TAUException); -void unrecoverable_exception(struct pt_regs *regs); +void __noreturn unrecoverable_exception(struct pt_regs *regs); void replay_system_reset(void); void replay_soft_interrupts(void); @@ -447,4 +534,6 @@ static inline void interrupt_cond_local_irq_enable(struct pt_regs *regs) local_irq_enable(); } +#endif /* __ASSEMBLY__ */ + #endif /* _ASM_POWERPC_INTERRUPT_H */ diff --git a/arch/powerpc/include/asm/irq.h b/arch/powerpc/include/asm/irq.h index f3f264e441a7..b2bd58830430 100644 --- a/arch/powerpc/include/asm/irq.h +++ b/arch/powerpc/include/asm/irq.h @@ -53,8 +53,6 @@ extern void *mcheckirq_ctx[NR_CPUS]; extern void *hardirq_ctx[NR_CPUS]; extern void *softirq_ctx[NR_CPUS]; -void call_do_softirq(void *sp); -void call_do_irq(struct pt_regs *regs, void *sp); extern void do_IRQ(struct pt_regs *regs); extern void __init init_IRQ(void); extern void __do_irq(struct pt_regs *regs); diff --git a/arch/powerpc/include/asm/jump_label.h b/arch/powerpc/include/asm/jump_label.h index 09297ec9fa52..2d5c6bec2b4f 100644 --- a/arch/powerpc/include/asm/jump_label.h +++ b/arch/powerpc/include/asm/jump_label.h @@ -20,7 +20,8 @@ static __always_inline bool arch_static_branch(struct static_key *key, bool bran asm_volatile_goto("1:\n\t" "nop # arch_static_branch\n\t" ".pushsection __jump_table, \"aw\"\n\t" - JUMP_ENTRY_TYPE "1b, %l[l_yes], %c0\n\t" + ".long 1b - ., %l[l_yes] - .\n\t" + JUMP_ENTRY_TYPE "%c0 - .\n\t" ".popsection \n\t" : : "i" (&((char *)key)[branch]) : : l_yes); @@ -34,7 +35,8 @@ static __always_inline bool arch_static_branch_jump(struct static_key *key, bool asm_volatile_goto("1:\n\t" "b %l[l_yes] # arch_static_branch_jump\n\t" ".pushsection __jump_table, \"aw\"\n\t" - JUMP_ENTRY_TYPE "1b, %l[l_yes], %c0\n\t" + ".long 1b - ., %l[l_yes] - .\n\t" + JUMP_ENTRY_TYPE "%c0 - .\n\t" ".popsection \n\t" : : "i" (&((char *)key)[branch]) : : l_yes); @@ -43,23 +45,12 @@ l_yes: return true; } -#ifdef CONFIG_PPC64 -typedef u64 jump_label_t; -#else -typedef u32 jump_label_t; -#endif - -struct jump_entry { - jump_label_t code; - jump_label_t target; - jump_label_t key; -}; - #else #define ARCH_STATIC_BRANCH(LABEL, KEY) \ 1098: nop; \ .pushsection __jump_table, "aw"; \ - FTR_ENTRY_LONG 1098b, LABEL, KEY; \ + .long 1098b - ., LABEL - .; \ + FTR_ENTRY_LONG KEY; \ .popsection #endif diff --git a/arch/powerpc/include/asm/kasan.h b/arch/powerpc/include/asm/kasan.h index 7355ed05e65e..3c478e5ef24c 100644 --- a/arch/powerpc/include/asm/kasan.h +++ b/arch/powerpc/include/asm/kasan.h @@ -19,7 +19,7 @@ #define KASAN_SHADOW_SCALE_SHIFT 3 -#if defined(CONFIG_PPC_BOOK3S_32) && defined(CONFIG_MODULES) && defined(CONFIG_STRICT_KERNEL_RWX) +#ifdef CONFIG_MODULES #define KASAN_KERN_START ALIGN_DOWN(PAGE_OFFSET - SZ_256M, SZ_256M) #else #define KASAN_KERN_START PAGE_OFFSET diff --git a/arch/powerpc/include/asm/kfence.h b/arch/powerpc/include/asm/kfence.h new file mode 100644 index 000000000000..a9846b68c6b9 --- /dev/null +++ b/arch/powerpc/include/asm/kfence.h @@ -0,0 +1,33 @@ +/* SPDX-License-Identifier: GPL-2.0 */ +/* + * powerpc KFENCE support. + * + * Copyright (C) 2020 CS GROUP France + */ + +#ifndef __ASM_POWERPC_KFENCE_H +#define __ASM_POWERPC_KFENCE_H + +#include <linux/mm.h> +#include <asm/pgtable.h> + +static inline bool arch_kfence_init_pool(void) +{ + return true; +} + +static inline bool kfence_protect_page(unsigned long addr, bool protect) +{ + pte_t *kpte = virt_to_kpte(addr); + + if (protect) { + pte_update(&init_mm, addr, kpte, _PAGE_PRESENT, 0, 0); + flush_tlb_kernel_range(addr, addr + PAGE_SIZE); + } else { + pte_update(&init_mm, addr, kpte, 0, _PAGE_PRESENT, 0); + } + + return true; +} + +#endif /* __ASM_POWERPC_KFENCE_H */ diff --git a/arch/powerpc/include/asm/kup.h b/arch/powerpc/include/asm/kup.h index 7ec21af49a45..ec96232529ac 100644 --- a/arch/powerpc/include/asm/kup.h +++ b/arch/powerpc/include/asm/kup.h @@ -28,15 +28,6 @@ #ifdef __ASSEMBLY__ #ifndef CONFIG_PPC_KUAP -.macro kuap_save_and_lock sp, thread, gpr1, gpr2, gpr3 -.endm - -.macro kuap_restore sp, current, gpr1, gpr2, gpr3 -.endm - -.macro kuap_check current, gpr -.endm - .macro kuap_check_amr gpr1, gpr2 .endm @@ -55,6 +46,14 @@ void setup_kuep(bool disabled); static inline void setup_kuep(bool disabled) { } #endif /* CONFIG_PPC_KUEP */ +#if defined(CONFIG_PPC_KUEP) && defined(CONFIG_PPC_BOOK3S_32) +void kuep_lock(void); +void kuep_unlock(void); +#else +static inline void kuep_lock(void) { } +static inline void kuep_unlock(void) { } +#endif + #ifdef CONFIG_PPC_KUAP void setup_kuap(bool disabled); #else @@ -66,7 +65,15 @@ bad_kuap_fault(struct pt_regs *regs, unsigned long address, bool is_write) return false; } -static inline void kuap_check_amr(void) { } +static inline void kuap_assert_locked(void) { } +static inline void kuap_save_and_lock(struct pt_regs *regs) { } +static inline void kuap_user_restore(struct pt_regs *regs) { } +static inline void kuap_kernel_restore(struct pt_regs *regs, unsigned long amr) { } + +static inline unsigned long kuap_get_and_assert_locked(void) +{ + return 0; +} /* * book3s/64/kup-radix.h defines these functions for the !KUAP case to flush diff --git a/arch/powerpc/include/asm/kvm_book3s.h b/arch/powerpc/include/asm/kvm_book3s.h index 2f5f919f6cd3..c58121508157 100644 --- a/arch/powerpc/include/asm/kvm_book3s.h +++ b/arch/powerpc/include/asm/kvm_book3s.h @@ -258,6 +258,8 @@ extern long kvmppc_hv_get_dirty_log_hpt(struct kvm *kvm, extern void kvmppc_harvest_vpa_dirty(struct kvmppc_vpa *vpa, struct kvm_memory_slot *memslot, unsigned long *map); +extern unsigned long kvmppc_filter_lpcr_hv(struct kvm *kvm, + unsigned long lpcr); extern void kvmppc_update_lpcr(struct kvm *kvm, unsigned long lpcr, unsigned long mask); extern void kvmppc_set_fscr(struct kvm_vcpu *vcpu, u64 fscr); diff --git a/arch/powerpc/include/asm/kvm_ppc.h b/arch/powerpc/include/asm/kvm_ppc.h index 8aacd76bb702..9531b1c1b190 100644 --- a/arch/powerpc/include/asm/kvm_ppc.h +++ b/arch/powerpc/include/asm/kvm_ppc.h @@ -767,8 +767,7 @@ long kvmppc_h_remove(struct kvm_vcpu *vcpu, unsigned long flags, unsigned long pte_index, unsigned long avpn); long kvmppc_h_bulk_remove(struct kvm_vcpu *vcpu); long kvmppc_h_protect(struct kvm_vcpu *vcpu, unsigned long flags, - unsigned long pte_index, unsigned long avpn, - unsigned long va); + unsigned long pte_index, unsigned long avpn); long kvmppc_h_read(struct kvm_vcpu *vcpu, unsigned long flags, unsigned long pte_index); long kvmppc_h_clear_ref(struct kvm_vcpu *vcpu, unsigned long flags, diff --git a/arch/powerpc/include/asm/mmu_context.h b/arch/powerpc/include/asm/mmu_context.h index 652ce85f9410..4bc45d3ed8b0 100644 --- a/arch/powerpc/include/asm/mmu_context.h +++ b/arch/powerpc/include/asm/mmu_context.h @@ -263,7 +263,7 @@ extern void arch_exit_mmap(struct mm_struct *mm); static inline void arch_unmap(struct mm_struct *mm, unsigned long start, unsigned long end) { - unsigned long vdso_base = (unsigned long)mm->context.vdso - PAGE_SIZE; + unsigned long vdso_base = (unsigned long)mm->context.vdso; if (start <= vdso_base && vdso_base < end) mm->context.vdso = NULL; diff --git a/arch/powerpc/include/asm/nohash/32/kup-8xx.h b/arch/powerpc/include/asm/nohash/32/kup-8xx.h index 17a4a616436f..295ef5639609 100644 --- a/arch/powerpc/include/asm/nohash/32/kup-8xx.h +++ b/arch/powerpc/include/asm/nohash/32/kup-8xx.h @@ -7,33 +7,41 @@ #ifdef CONFIG_PPC_KUAP -#ifdef __ASSEMBLY__ - -.macro kuap_save_and_lock sp, thread, gpr1, gpr2, gpr3 - lis \gpr2, MD_APG_KUAP@h /* only APG0 and APG1 are used */ - mfspr \gpr1, SPRN_MD_AP - mtspr SPRN_MD_AP, \gpr2 - stw \gpr1, STACK_REGS_KUAP(\sp) -.endm - -.macro kuap_restore sp, current, gpr1, gpr2, gpr3 - lwz \gpr1, STACK_REGS_KUAP(\sp) - mtspr SPRN_MD_AP, \gpr1 -.endm - -.macro kuap_check current, gpr -#ifdef CONFIG_PPC_KUAP_DEBUG - mfspr \gpr, SPRN_MD_AP - rlwinm \gpr, \gpr, 16, 0xffff -999: twnei \gpr, MD_APG_KUAP@h - EMIT_BUG_ENTRY 999b, __FILE__, __LINE__, (BUGFLAG_WARNING | BUGFLAG_ONCE) -#endif -.endm - -#else /* !__ASSEMBLY__ */ +#ifndef __ASSEMBLY__ #include <asm/reg.h> +static inline void kuap_save_and_lock(struct pt_regs *regs) +{ + regs->kuap = mfspr(SPRN_MD_AP); + mtspr(SPRN_MD_AP, MD_APG_KUAP); +} + +static inline void kuap_user_restore(struct pt_regs *regs) +{ +} + +static inline void kuap_kernel_restore(struct pt_regs *regs, unsigned long kuap) +{ + mtspr(SPRN_MD_AP, regs->kuap); +} + +static inline unsigned long kuap_get_and_assert_locked(void) +{ + unsigned long kuap = mfspr(SPRN_MD_AP); + + if (IS_ENABLED(CONFIG_PPC_KUAP_DEBUG)) + WARN_ON_ONCE(kuap >> 16 != MD_APG_KUAP >> 16); + + return kuap; +} + +static inline void kuap_assert_locked(void) +{ + if (IS_ENABLED(CONFIG_PPC_KUAP_DEBUG)) + kuap_get_and_assert_locked(); +} + static inline void allow_user_access(void __user *to, const void __user *from, unsigned long size, unsigned long dir) { diff --git a/arch/powerpc/include/asm/nohash/32/mmu-8xx.h b/arch/powerpc/include/asm/nohash/32/mmu-8xx.h index 478249959baa..6e4faa0a9b35 100644 --- a/arch/powerpc/include/asm/nohash/32/mmu-8xx.h +++ b/arch/powerpc/include/asm/nohash/32/mmu-8xx.h @@ -172,6 +172,9 @@ #define mmu_linear_psize MMU_PAGE_8M +#define MODULES_VADDR (PAGE_OFFSET - SZ_256M) +#define MODULES_END PAGE_OFFSET + #ifndef __ASSEMBLY__ #include <linux/mmdebug.h> diff --git a/arch/powerpc/include/asm/nohash/64/pgtable.h b/arch/powerpc/include/asm/nohash/64/pgtable.h index 6cb8aa357191..57cd3892bfe0 100644 --- a/arch/powerpc/include/asm/nohash/64/pgtable.h +++ b/arch/powerpc/include/asm/nohash/64/pgtable.h @@ -6,6 +6,8 @@ * the ppc64 non-hashed page table. */ +#include <linux/sizes.h> + #include <asm/nohash/64/pgtable-4k.h> #include <asm/barrier.h> #include <asm/asm-const.h> @@ -54,7 +56,8 @@ #define PHB_IO_END (KERN_IO_START + FULL_IO_SIZE) #define IOREMAP_BASE (PHB_IO_END) #define IOREMAP_START (ioremap_bot) -#define IOREMAP_END (KERN_VIRT_START + KERN_VIRT_SIZE) +#define IOREMAP_END (KERN_VIRT_START + KERN_VIRT_SIZE - FIXADDR_SIZE) +#define FIXADDR_SIZE SZ_32M /* diff --git a/arch/powerpc/include/asm/opal.h b/arch/powerpc/include/asm/opal.h index 9986ac34b8e2..c76157237e22 100644 --- a/arch/powerpc/include/asm/opal.h +++ b/arch/powerpc/include/asm/opal.h @@ -307,7 +307,7 @@ int opal_secvar_enqueue_update(const char *key, uint64_t key_len, u8 *data, s64 opal_mpipl_update(enum opal_mpipl_ops op, u64 src, u64 dest, u64 size); s64 opal_mpipl_register_tag(enum opal_mpipl_tags tag, u64 addr); -s64 opal_mpipl_query_tag(enum opal_mpipl_tags tag, u64 *addr); +s64 opal_mpipl_query_tag(enum opal_mpipl_tags tag, __be64 *addr); s64 opal_signal_system_reset(s32 cpu); s64 opal_quiesce(u64 shutdown_type, s32 cpu); diff --git a/arch/powerpc/include/asm/perf_event_server.h b/arch/powerpc/include/asm/perf_event_server.h index 00e7e671bb4b..f4c3428e816b 100644 --- a/arch/powerpc/include/asm/perf_event_server.h +++ b/arch/powerpc/include/asm/perf_event_server.h @@ -43,7 +43,7 @@ struct power_pmu { u64 alt[]); void (*get_mem_data_src)(union perf_mem_data_src *dsrc, u32 flags, struct pt_regs *regs); - void (*get_mem_weight)(u64 *weight); + void (*get_mem_weight)(u64 *weight, u64 type); unsigned long group_constraint_mask; unsigned long group_constraint_val; u64 (*bhrb_filter_map)(u64 branch_sample_type); @@ -67,6 +67,12 @@ struct power_pmu { * the pmu supports extended perf regs capability */ int capabilities; + /* + * Function to check event code for values which are + * reserved. Function takes struct perf_event as input, + * since event code could be spread in attr.config* + */ + int (*check_attr_config)(struct perf_event *ev); }; /* diff --git a/arch/powerpc/include/asm/pgtable.h b/arch/powerpc/include/asm/pgtable.h index 4eed82172e33..c6a676714f04 100644 --- a/arch/powerpc/include/asm/pgtable.h +++ b/arch/powerpc/include/asm/pgtable.h @@ -41,8 +41,6 @@ struct mm_struct; #ifndef __ASSEMBLY__ -#include <asm/tlbflush.h> - /* Keep these as a macros to avoid include dependency mess */ #define pte_page(x) pfn_to_page(pte_pfn(x)) #define mk_pte(page, pgprot) pfn_pte(page_to_pfn(page), (pgprot)) diff --git a/arch/powerpc/include/asm/ppc-opcode.h b/arch/powerpc/include/asm/ppc-opcode.h index ed161ef2b3ca..ac41776661e9 100644 --- a/arch/powerpc/include/asm/ppc-opcode.h +++ b/arch/powerpc/include/asm/ppc-opcode.h @@ -265,6 +265,7 @@ #define PPC_INST_ORI 0x60000000 #define PPC_INST_ORIS 0x64000000 #define PPC_INST_BRANCH 0x48000000 +#define PPC_INST_BL 0x48000001 #define PPC_INST_BRANCH_COND 0x40800000 /* Prefixes */ @@ -437,6 +438,9 @@ #define PPC_RAW_STFDX(s, a, b) (0x7c0005ae | ___PPC_RS(s) | ___PPC_RA(a) | ___PPC_RB(b)) #define PPC_RAW_LVX(t, a, b) (0x7c0000ce | ___PPC_RT(t) | ___PPC_RA(a) | ___PPC_RB(b)) #define PPC_RAW_STVX(s, a, b) (0x7c0001ce | ___PPC_RS(s) | ___PPC_RA(a) | ___PPC_RB(b)) +#define PPC_RAW_ADDE(t, a, b) (0x7c000114 | ___PPC_RT(t) | ___PPC_RA(a) | ___PPC_RB(b)) +#define PPC_RAW_ADDZE(t, a) (0x7c000194 | ___PPC_RT(t) | ___PPC_RA(a)) +#define PPC_RAW_ADDME(t, a) (0x7c0001d4 | ___PPC_RT(t) | ___PPC_RA(a)) #define PPC_RAW_ADD(t, a, b) (PPC_INST_ADD | ___PPC_RT(t) | ___PPC_RA(a) | ___PPC_RB(b)) #define PPC_RAW_ADD_DOT(t, a, b) (PPC_INST_ADD | ___PPC_RT(t) | ___PPC_RA(a) | ___PPC_RB(b) | 0x1) #define PPC_RAW_ADDC(t, a, b) (0x7c000014 | ___PPC_RT(t) | ___PPC_RA(a) | ___PPC_RB(b)) @@ -445,11 +449,14 @@ #define PPC_RAW_BLR() (PPC_INST_BLR) #define PPC_RAW_BLRL() (0x4e800021) #define PPC_RAW_MTLR(r) (0x7c0803a6 | ___PPC_RT(r)) +#define PPC_RAW_MFLR(t) (PPC_INST_MFLR | ___PPC_RT(t)) #define PPC_RAW_BCTR() (PPC_INST_BCTR) #define PPC_RAW_MTCTR(r) (PPC_INST_MTCTR | ___PPC_RT(r)) #define PPC_RAW_ADDI(d, a, i) (PPC_INST_ADDI | ___PPC_RT(d) | ___PPC_RA(a) | IMM_L(i)) #define PPC_RAW_LI(r, i) PPC_RAW_ADDI(r, 0, i) #define PPC_RAW_ADDIS(d, a, i) (PPC_INST_ADDIS | ___PPC_RT(d) | ___PPC_RA(a) | IMM_L(i)) +#define PPC_RAW_ADDIC(d, a, i) (0x30000000 | ___PPC_RT(d) | ___PPC_RA(a) | IMM_L(i)) +#define PPC_RAW_ADDIC_DOT(d, a, i) (0x34000000 | ___PPC_RT(d) | ___PPC_RA(a) | IMM_L(i)) #define PPC_RAW_LIS(r, i) PPC_RAW_ADDIS(r, 0, i) #define PPC_RAW_STDX(r, base, b) (0x7c00012a | ___PPC_RS(r) | ___PPC_RA(base) | ___PPC_RB(b)) #define PPC_RAW_STDU(r, base, i) (0xf8000001 | ___PPC_RS(r) | ___PPC_RA(base) | ((i) & 0xfffc)) @@ -472,6 +479,10 @@ #define PPC_RAW_CMPLW(a, b) (0x7c000040 | ___PPC_RA(a) | ___PPC_RB(b)) #define PPC_RAW_CMPLD(a, b) (0x7c200040 | ___PPC_RA(a) | ___PPC_RB(b)) #define PPC_RAW_SUB(d, a, b) (0x7c000050 | ___PPC_RT(d) | ___PPC_RB(a) | ___PPC_RA(b)) +#define PPC_RAW_SUBFC(d, a, b) (0x7c000010 | ___PPC_RT(d) | ___PPC_RA(a) | ___PPC_RB(b)) +#define PPC_RAW_SUBFE(d, a, b) (0x7c000110 | ___PPC_RT(d) | ___PPC_RA(a) | ___PPC_RB(b)) +#define PPC_RAW_SUBFIC(d, a, i) (0x20000000 | ___PPC_RT(d) | ___PPC_RA(a) | IMM_L(i)) +#define PPC_RAW_SUBFZE(d, a) (0x7c000190 | ___PPC_RT(d) | ___PPC_RA(a)) #define PPC_RAW_MULD(d, a, b) (0x7c0001d2 | ___PPC_RT(d) | ___PPC_RA(a) | ___PPC_RB(b)) #define PPC_RAW_MULW(d, a, b) (0x7c0001d6 | ___PPC_RT(d) | ___PPC_RA(a) | ___PPC_RB(b)) #define PPC_RAW_MULHWU(d, a, b) (0x7c000016 | ___PPC_RT(d) | ___PPC_RA(a) | ___PPC_RB(b)) @@ -484,11 +495,13 @@ #define PPC_RAW_DIVDEU_DOT(t, a, b) (0x7c000312 | ___PPC_RT(t) | ___PPC_RA(a) | ___PPC_RB(b) | 0x1) #define PPC_RAW_AND(d, a, b) (0x7c000038 | ___PPC_RA(d) | ___PPC_RS(a) | ___PPC_RB(b)) #define PPC_RAW_ANDI(d, a, i) (0x70000000 | ___PPC_RA(d) | ___PPC_RS(a) | IMM_L(i)) +#define PPC_RAW_ANDIS(d, a, i) (0x74000000 | ___PPC_RA(d) | ___PPC_RS(a) | IMM_L(i)) #define PPC_RAW_AND_DOT(d, a, b) (0x7c000039 | ___PPC_RA(d) | ___PPC_RS(a) | ___PPC_RB(b)) #define PPC_RAW_OR(d, a, b) (0x7c000378 | ___PPC_RA(d) | ___PPC_RS(a) | ___PPC_RB(b)) #define PPC_RAW_MR(d, a) PPC_RAW_OR(d, a, a) #define PPC_RAW_ORI(d, a, i) (PPC_INST_ORI | ___PPC_RA(d) | ___PPC_RS(a) | IMM_L(i)) #define PPC_RAW_ORIS(d, a, i) (PPC_INST_ORIS | ___PPC_RA(d) | ___PPC_RS(a) | IMM_L(i)) +#define PPC_RAW_NOR(d, a, b) (0x7c0000f8 | ___PPC_RA(d) | ___PPC_RS(a) | ___PPC_RB(b)) #define PPC_RAW_XOR(d, a, b) (0x7c000278 | ___PPC_RA(d) | ___PPC_RS(a) | ___PPC_RB(b)) #define PPC_RAW_XORI(d, a, i) (0x68000000 | ___PPC_RA(d) | ___PPC_RS(a) | IMM_L(i)) #define PPC_RAW_XORIS(d, a, i) (0x6c000000 | ___PPC_RA(d) | ___PPC_RS(a) | IMM_L(i)) diff --git a/arch/powerpc/include/asm/ppc_asm.h b/arch/powerpc/include/asm/ppc_asm.h index 3dceb64fc9af..d6739d700f0a 100644 --- a/arch/powerpc/include/asm/ppc_asm.h +++ b/arch/powerpc/include/asm/ppc_asm.h @@ -16,36 +16,6 @@ #define SZL (BITS_PER_LONG/8) /* - * Stuff for accurate CPU time accounting. - * These macros handle transitions between user and system state - * in exception entry and exit and accumulate time to the - * user_time and system_time fields in the paca. - */ - -#ifndef CONFIG_VIRT_CPU_ACCOUNTING_NATIVE -#define ACCOUNT_CPU_USER_ENTRY(ptr, ra, rb) -#define ACCOUNT_CPU_USER_EXIT(ptr, ra, rb) -#else -#define ACCOUNT_CPU_USER_ENTRY(ptr, ra, rb) \ - MFTB(ra); /* get timebase */ \ - PPC_LL rb, ACCOUNT_STARTTIME_USER(ptr); \ - PPC_STL ra, ACCOUNT_STARTTIME(ptr); \ - subf rb,rb,ra; /* subtract start value */ \ - PPC_LL ra, ACCOUNT_USER_TIME(ptr); \ - add ra,ra,rb; /* add on to user time */ \ - PPC_STL ra, ACCOUNT_USER_TIME(ptr); \ - -#define ACCOUNT_CPU_USER_EXIT(ptr, ra, rb) \ - MFTB(ra); /* get timebase */ \ - PPC_LL rb, ACCOUNT_STARTTIME(ptr); \ - PPC_STL ra, ACCOUNT_STARTTIME_USER(ptr); \ - subf rb,rb,ra; /* subtract start value */ \ - PPC_LL ra, ACCOUNT_SYSTEM_TIME(ptr); \ - add ra,ra,rb; /* add on to system time */ \ - PPC_STL ra, ACCOUNT_SYSTEM_TIME(ptr) -#endif /* CONFIG_VIRT_CPU_ACCOUNTING_NATIVE */ - -/* * Macros for storing registers into and loading registers from * exception frames. */ diff --git a/arch/powerpc/include/asm/processor.h b/arch/powerpc/include/asm/processor.h index 8acc3590c971..7bf8a15af224 100644 --- a/arch/powerpc/include/asm/processor.h +++ b/arch/powerpc/include/asm/processor.h @@ -144,15 +144,12 @@ struct thread_struct { #endif #ifdef CONFIG_PPC32 void *pgdir; /* root of page-table tree */ - unsigned long ksp_limit; /* if ksp <= ksp_limit stack overflow */ #ifdef CONFIG_PPC_RTAS unsigned long rtas_sp; /* stack pointer for when in RTAS */ #endif -#endif #if defined(CONFIG_PPC_BOOK3S_32) && defined(CONFIG_PPC_KUAP) unsigned long kuap; /* opened segments for user access */ #endif -#ifdef CONFIG_VMAP_STACK unsigned long srr0; unsigned long srr1; unsigned long dar; @@ -161,7 +158,7 @@ struct thread_struct { unsigned long r0, r3, r4, r5, r6, r8, r9, r11; unsigned long lr, ctr; #endif -#endif +#endif /* CONFIG_PPC32 */ /* Debug Registers */ struct debug_reg debug; #ifdef CONFIG_PPC_FPU_REGS @@ -282,7 +279,6 @@ struct thread_struct { #ifdef CONFIG_PPC32 #define INIT_THREAD { \ .ksp = INIT_SP, \ - .ksp_limit = INIT_SP_LIMIT, \ .pgdir = swapper_pg_dir, \ .fpexc_mode = MSR_FE0 | MSR_FE1, \ SPEFSCR_INIT \ @@ -393,6 +389,7 @@ extern unsigned long isa300_idle_stop_mayloss(unsigned long psscr_val); extern unsigned long isa206_idle_insn_mayloss(unsigned long type); #ifdef CONFIG_PPC_970_NAP extern void power4_idle_nap(void); +void power4_idle_nap_return(void); #endif extern unsigned long cpuidle_disable; @@ -417,6 +414,8 @@ extern int fix_alignment(struct pt_regs *); #define NET_IP_ALIGN 0 #endif +int do_mathemu(struct pt_regs *regs); + #endif /* __KERNEL__ */ #endif /* __ASSEMBLY__ */ #endif /* _ASM_POWERPC_PROCESSOR_H */ diff --git a/arch/powerpc/include/asm/ptrace.h b/arch/powerpc/include/asm/ptrace.h index 1499e928ea6a..9c9ab2746168 100644 --- a/arch/powerpc/include/asm/ptrace.h +++ b/arch/powerpc/include/asm/ptrace.h @@ -185,44 +185,27 @@ static inline void regs_set_return_value(struct pt_regs *regs, unsigned long rc) #define current_pt_regs() \ ((struct pt_regs *)((unsigned long)task_stack_page(current) + THREAD_SIZE) - 1) +/* + * The 4 low bits (0xf) are available as flags to overload the trap word, + * because interrupt vectors have minimum alignment of 0x10. TRAP_FLAGS_MASK + * must cover the bits used as flags, including bit 0 which is used as the + * "norestart" bit. + */ #ifdef __powerpc64__ -#ifdef CONFIG_PPC_BOOK3S -#define TRAP_FLAGS_MASK 0x10 -#define TRAP(regs) ((regs)->trap & ~TRAP_FLAGS_MASK) -#define FULL_REGS(regs) true -#define SET_FULL_REGS(regs) do { } while (0) -#else -#define TRAP_FLAGS_MASK 0x11 -#define TRAP(regs) ((regs)->trap & ~TRAP_FLAGS_MASK) -#define FULL_REGS(regs) (((regs)->trap & 1) == 0) -#define SET_FULL_REGS(regs) ((regs)->trap &= ~1) -#endif -#define CHECK_FULL_REGS(regs) BUG_ON(!FULL_REGS(regs)) -#define NV_REG_POISON 0xdeadbeefdeadbeefUL +#define TRAP_FLAGS_MASK 0x1 #else /* - * We use the least-significant bit of the trap field to indicate - * whether we have saved the full set of registers, or only a - * partial set. A 1 there means the partial set. - * On 4xx we use the next bit to indicate whether the exception + * On 4xx we use bit 1 in the trap word to indicate whether the exception * is a critical exception (1 means it is). */ -#define TRAP_FLAGS_MASK 0x1F -#define TRAP(regs) ((regs)->trap & ~TRAP_FLAGS_MASK) -#define FULL_REGS(regs) (((regs)->trap & 1) == 0) -#define SET_FULL_REGS(regs) ((regs)->trap &= ~1) +#define TRAP_FLAGS_MASK 0xf #define IS_CRITICAL_EXC(regs) (((regs)->trap & 2) != 0) #define IS_MCHECK_EXC(regs) (((regs)->trap & 4) != 0) #define IS_DEBUG_EXC(regs) (((regs)->trap & 8) != 0) -#define NV_REG_POISON 0xdeadbeef -#define CHECK_FULL_REGS(regs) \ -do { \ - if ((regs)->trap & 1) \ - printk(KERN_CRIT "%s: partial register set\n", __func__); \ -} while (0) #endif /* __powerpc64__ */ +#define TRAP(regs) ((regs)->trap & ~TRAP_FLAGS_MASK) -static inline void set_trap(struct pt_regs *regs, unsigned long val) +static __always_inline void set_trap(struct pt_regs *regs, unsigned long val) { regs->trap = (regs->trap & TRAP_FLAGS_MASK) | (val & ~TRAP_FLAGS_MASK); } @@ -244,12 +227,12 @@ static inline bool trap_is_syscall(struct pt_regs *regs) static inline bool trap_norestart(struct pt_regs *regs) { - return regs->trap & 0x10; + return regs->trap & 0x1; } -static inline void set_trap_norestart(struct pt_regs *regs) +static __always_inline void set_trap_norestart(struct pt_regs *regs) { - regs->trap |= 0x10; + regs->trap |= 0x1; } #define arch_has_single_step() (1) diff --git a/arch/powerpc/include/asm/qspinlock.h b/arch/powerpc/include/asm/qspinlock.h index b752d34517b3..07318bc63e3d 100644 --- a/arch/powerpc/include/asm/qspinlock.h +++ b/arch/powerpc/include/asm/qspinlock.h @@ -44,20 +44,6 @@ static __always_inline void queued_spin_lock(struct qspinlock *lock) } #define queued_spin_lock queued_spin_lock -#define smp_mb__after_spinlock() smp_mb() - -static __always_inline int queued_spin_is_locked(struct qspinlock *lock) -{ - /* - * This barrier was added to simple spinlocks by commit 51d7d5205d338, - * but it should now be possible to remove it, asm arm64 has done with - * commit c6f5d02b6a0f. - */ - smp_mb(); - return atomic_read(&lock->val); -} -#define queued_spin_is_locked queued_spin_is_locked - #ifdef CONFIG_PARAVIRT_SPINLOCKS #define SPIN_THRESHOLD (1<<15) /* not tuned */ @@ -86,6 +72,13 @@ static inline void pv_spinlocks_init(void) #endif +/* + * Queued spinlocks rely heavily on smp_cond_load_relaxed() to busy-wait, + * which was found to have performance problems if implemented with + * the preferred spin_begin()/spin_end() SMT priority pattern. Use the + * generic version instead. + */ + #include <asm-generic/qspinlock.h> #endif /* _ASM_POWERPC_QSPINLOCK_H */ diff --git a/arch/powerpc/include/asm/reg.h b/arch/powerpc/include/asm/reg.h index da103e92c112..7c81d3e563b2 100644 --- a/arch/powerpc/include/asm/reg.h +++ b/arch/powerpc/include/asm/reg.h @@ -124,7 +124,7 @@ #ifdef CONFIG_PPC_TRANSACTIONAL_MEM #define MSR_TM_ACTIVE(x) (((x) & MSR_TS_MASK) != 0) /* Transaction active? */ #else -#define MSR_TM_ACTIVE(x) 0 +#define MSR_TM_ACTIVE(x) ((void)(x), 0) #endif #if defined(CONFIG_PPC_BOOK3S_64) @@ -441,6 +441,7 @@ #define LPCR_VRMA_LP1 ASM_CONST(0x0000800000000000) #define LPCR_RMLS 0x1C000000 /* Implementation dependent RMO limit sel */ #define LPCR_RMLS_SH 26 +#define LPCR_HAIL ASM_CONST(0x0000000004000000) /* HV AIL (ISAv3.1) */ #define LPCR_ILE ASM_CONST(0x0000000002000000) /* !HV irqs set MSR:LE */ #define LPCR_AIL ASM_CONST(0x0000000001800000) /* Alternate interrupt location */ #define LPCR_AIL_0 ASM_CONST(0x0000000000000000) /* MMU off exception offset 0x0 */ @@ -1393,8 +1394,7 @@ static inline void mtmsr_isync(unsigned long val) : "r" ((unsigned long)(v)) \ : "memory") #endif -#define wrtspr(rn) asm volatile("mtspr " __stringify(rn) ",0" : \ - : : "memory") +#define wrtspr(rn) asm volatile("mtspr " __stringify(rn) ",2" : : : "memory") static inline void wrtee(unsigned long val) { diff --git a/arch/powerpc/include/asm/rtas.h b/arch/powerpc/include/asm/rtas.h index 658448ca5b8a..9dc97d2f9d27 100644 --- a/arch/powerpc/include/asm/rtas.h +++ b/arch/powerpc/include/asm/rtas.h @@ -19,8 +19,8 @@ #define RTAS_UNKNOWN_SERVICE (-1) #define RTAS_INSTANTIATE_MAX (1ULL<<30) /* Don't instantiate rtas at/above this value */ -/* Buffer size for ppc_rtas system call. */ -#define RTAS_RMOBUF_MAX (64 * 1024) +/* Memory set aside for sys_rtas to use with calls that need a work area. */ +#define RTAS_USER_REGION_SIZE (64 * 1024) /* RTAS return status codes */ #define RTAS_BUSY -2 /* RTAS Busy */ @@ -357,7 +357,7 @@ extern void rtas_take_timebase(void); static inline int page_is_rtas_user_buf(unsigned long pfn) { unsigned long paddr = (pfn << PAGE_SHIFT); - if (paddr >= rtas_rmo_buf && paddr < (rtas_rmo_buf + RTAS_RMOBUF_MAX)) + if (paddr >= rtas_rmo_buf && paddr < (rtas_rmo_buf + RTAS_USER_REGION_SIZE)) return 1; return 0; } diff --git a/arch/powerpc/include/asm/simple_spinlock.h b/arch/powerpc/include/asm/simple_spinlock.h index 5b862de29dff..552f325412cc 100644 --- a/arch/powerpc/include/asm/simple_spinlock.h +++ b/arch/powerpc/include/asm/simple_spinlock.h @@ -38,8 +38,7 @@ static __always_inline int arch_spin_value_unlocked(arch_spinlock_t lock) static inline int arch_spin_is_locked(arch_spinlock_t *lock) { - smp_mb(); - return !arch_spin_value_unlocked(*lock); + return !arch_spin_value_unlocked(READ_ONCE(*lock)); } /* @@ -282,7 +281,4 @@ static inline void arch_write_unlock(arch_rwlock_t *rw) #define arch_read_relax(lock) rw_yield(lock) #define arch_write_relax(lock) rw_yield(lock) -/* See include/linux/spinlock.h */ -#define smp_mb__after_spinlock() smp_mb() - #endif /* _ASM_POWERPC_SIMPLE_SPINLOCK_H */ diff --git a/arch/powerpc/include/asm/smp.h b/arch/powerpc/include/asm/smp.h index 7a13bc20f0a0..03b3d010cbab 100644 --- a/arch/powerpc/include/asm/smp.h +++ b/arch/powerpc/include/asm/smp.h @@ -31,6 +31,7 @@ extern u32 *cpu_to_phys_id; extern bool coregroup_enabled; extern int cpu_to_chip_id(int cpu); +extern int *chip_id_lookup_table; #ifdef CONFIG_SMP @@ -121,6 +122,11 @@ static inline struct cpumask *cpu_sibling_mask(int cpu) return per_cpu(cpu_sibling_map, cpu); } +static inline struct cpumask *cpu_core_mask(int cpu) +{ + return per_cpu(cpu_core_map, cpu); +} + static inline struct cpumask *cpu_l2_cache_mask(int cpu) { return per_cpu(cpu_l2_cache_map, cpu); diff --git a/arch/powerpc/include/asm/spinlock.h b/arch/powerpc/include/asm/spinlock.h index 6ec72282888d..bd75872a6334 100644 --- a/arch/powerpc/include/asm/spinlock.h +++ b/arch/powerpc/include/asm/spinlock.h @@ -10,6 +10,9 @@ #include <asm/simple_spinlock.h> #endif +/* See include/linux/spinlock.h */ +#define smp_mb__after_spinlock() smp_mb() + #ifndef CONFIG_PARAVIRT_SPINLOCKS static inline void pv_spinlocks_init(void) { } #endif diff --git a/arch/powerpc/include/asm/thread_info.h b/arch/powerpc/include/asm/thread_info.h index 386d576673a1..b4ec6c7dd72e 100644 --- a/arch/powerpc/include/asm/thread_info.h +++ b/arch/powerpc/include/asm/thread_info.h @@ -38,7 +38,6 @@ #ifndef __ASSEMBLY__ #include <linux/cache.h> #include <asm/processor.h> -#include <asm/page.h> #include <asm/accounting.h> #define SLB_PRELOAD_NR 16U @@ -152,6 +151,12 @@ void arch_setup_new_exec(void); #ifndef __ASSEMBLY__ +static inline void clear_thread_local_flags(unsigned int flags) +{ + struct thread_info *ti = current_thread_info(); + ti->local_flags &= ~flags; +} + static inline bool test_thread_local_flags(unsigned int flags) { struct thread_info *ti = current_thread_info(); diff --git a/arch/powerpc/include/asm/topology.h b/arch/powerpc/include/asm/topology.h index 3beeb030cd78..e4db64c0e184 100644 --- a/arch/powerpc/include/asm/topology.h +++ b/arch/powerpc/include/asm/topology.h @@ -126,7 +126,7 @@ static inline int cpu_to_coregroup_id(int cpu) #define topology_physical_package_id(cpu) (cpu_to_chip_id(cpu)) #define topology_sibling_cpumask(cpu) (per_cpu(cpu_sibling_map, cpu)) -#define topology_core_cpumask(cpu) (cpu_cpu_mask(cpu)) +#define topology_core_cpumask(cpu) (per_cpu(cpu_core_map, cpu)) #define topology_core_id(cpu) (cpu_to_core_id(cpu)) #endif diff --git a/arch/powerpc/include/asm/uaccess.h b/arch/powerpc/include/asm/uaccess.h index 78e2a3990eab..a09e4240c5b1 100644 --- a/arch/powerpc/include/asm/uaccess.h +++ b/arch/powerpc/include/asm/uaccess.h @@ -43,129 +43,39 @@ static inline bool __access_ok(unsigned long addr, unsigned long size) * exception handling means that it's no longer "just"...) * */ -#define get_user(x, ptr) \ - __get_user_check((x), (ptr), sizeof(*(ptr))) -#define put_user(x, ptr) \ - __put_user_check((__typeof__(*(ptr)))(x), (ptr), sizeof(*(ptr))) - -#define __get_user(x, ptr) \ - __get_user_nocheck((x), (ptr), sizeof(*(ptr)), true) -#define __put_user(x, ptr) \ - __put_user_nocheck((__typeof__(*(ptr)))(x), (ptr), sizeof(*(ptr))) - -#define __get_user_allowed(x, ptr) \ - __get_user_nocheck((x), (ptr), sizeof(*(ptr)), false) - -#define __get_user_inatomic(x, ptr) \ - __get_user_nosleep((x), (ptr), sizeof(*(ptr))) -#define __put_user_inatomic(x, ptr) \ - __put_user_nosleep((__typeof__(*(ptr)))(x), (ptr), sizeof(*(ptr))) - -#ifdef CONFIG_PPC64 - -#define ___get_user_instr(gu_op, dest, ptr) \ -({ \ - long __gui_ret = 0; \ - unsigned long __gui_ptr = (unsigned long)ptr; \ - struct ppc_inst __gui_inst; \ - unsigned int __prefix, __suffix; \ - __gui_ret = gu_op(__prefix, (unsigned int __user *)__gui_ptr); \ - if (__gui_ret == 0) { \ - if ((__prefix >> 26) == OP_PREFIX) { \ - __gui_ret = gu_op(__suffix, \ - (unsigned int __user *)__gui_ptr + 1); \ - __gui_inst = ppc_inst_prefix(__prefix, \ - __suffix); \ - } else { \ - __gui_inst = ppc_inst(__prefix); \ - } \ - if (__gui_ret == 0) \ - (dest) = __gui_inst; \ - } \ - __gui_ret; \ -}) - -#define get_user_instr(x, ptr) \ - ___get_user_instr(get_user, x, ptr) - -#define __get_user_instr(x, ptr) \ - ___get_user_instr(__get_user, x, ptr) - -#define __get_user_instr_inatomic(x, ptr) \ - ___get_user_instr(__get_user_inatomic, x, ptr) - -#else /* !CONFIG_PPC64 */ -#define get_user_instr(x, ptr) \ - get_user((x).val, (u32 __user *)(ptr)) - -#define __get_user_instr(x, ptr) \ - __get_user_nocheck((x).val, (u32 __user *)(ptr), sizeof(u32), true) - -#define __get_user_instr_inatomic(x, ptr) \ - __get_user_nosleep((x).val, (u32 __user *)(ptr), sizeof(u32)) - -#endif /* CONFIG_PPC64 */ - -extern long __put_user_bad(void); - -#define __put_user_size(x, ptr, size, retval) \ -do { \ - __label__ __pu_failed; \ - \ - retval = 0; \ - allow_write_to_user(ptr, size); \ - __put_user_size_goto(x, ptr, size, __pu_failed); \ - prevent_write_to_user(ptr, size); \ - break; \ - \ -__pu_failed: \ - retval = -EFAULT; \ - prevent_write_to_user(ptr, size); \ -} while (0) - -#define __put_user_nocheck(x, ptr, size) \ +#define __put_user(x, ptr) \ ({ \ long __pu_err; \ __typeof__(*(ptr)) __user *__pu_addr = (ptr); \ - __typeof__(*(ptr)) __pu_val = (x); \ - __typeof__(size) __pu_size = (size); \ + __typeof__(*(ptr)) __pu_val = (__typeof__(*(ptr)))(x); \ + __typeof__(sizeof(*(ptr))) __pu_size = sizeof(*(ptr)); \ \ - if (!is_kernel_addr((unsigned long)__pu_addr)) \ - might_fault(); \ - __chk_user_ptr(__pu_addr); \ - __put_user_size(__pu_val, __pu_addr, __pu_size, __pu_err); \ + might_fault(); \ + do { \ + __label__ __pu_failed; \ + \ + allow_write_to_user(__pu_addr, __pu_size); \ + __put_user_size_goto(__pu_val, __pu_addr, __pu_size, __pu_failed); \ + prevent_write_to_user(__pu_addr, __pu_size); \ + __pu_err = 0; \ + break; \ + \ +__pu_failed: \ + prevent_write_to_user(__pu_addr, __pu_size); \ + __pu_err = -EFAULT; \ + } while (0); \ \ __pu_err; \ }) -#define __put_user_check(x, ptr, size) \ +#define put_user(x, ptr) \ ({ \ - long __pu_err = -EFAULT; \ - __typeof__(*(ptr)) __user *__pu_addr = (ptr); \ - __typeof__(*(ptr)) __pu_val = (x); \ - __typeof__(size) __pu_size = (size); \ - \ - might_fault(); \ - if (access_ok(__pu_addr, __pu_size)) \ - __put_user_size(__pu_val, __pu_addr, __pu_size, __pu_err); \ + __typeof__(*(ptr)) __user *_pu_addr = (ptr); \ \ - __pu_err; \ + access_ok(_pu_addr, sizeof(*(ptr))) ? \ + __put_user(x, _pu_addr) : -EFAULT; \ }) -#define __put_user_nosleep(x, ptr, size) \ -({ \ - long __pu_err; \ - __typeof__(*(ptr)) __user *__pu_addr = (ptr); \ - __typeof__(*(ptr)) __pu_val = (x); \ - __typeof__(size) __pu_size = (size); \ - \ - __chk_user_ptr(__pu_addr); \ - __put_user_size(__pu_val, __pu_addr, __pu_size, __pu_err); \ - \ - __pu_err; \ -}) - - /* * We don't tell gcc that we are accessing memory, but this is OK * because we do not write to any memory gcc knows about, so there @@ -198,25 +108,17 @@ __pu_failed: \ #define __put_user_size_goto(x, ptr, size, label) \ do { \ + __typeof__(*(ptr)) __user *__pus_addr = (ptr); \ + \ switch (size) { \ - case 1: __put_user_asm_goto(x, ptr, label, "stb"); break; \ - case 2: __put_user_asm_goto(x, ptr, label, "sth"); break; \ - case 4: __put_user_asm_goto(x, ptr, label, "stw"); break; \ - case 8: __put_user_asm2_goto(x, ptr, label); break; \ - default: __put_user_bad(); \ + case 1: __put_user_asm_goto(x, __pus_addr, label, "stb"); break; \ + case 2: __put_user_asm_goto(x, __pus_addr, label, "sth"); break; \ + case 4: __put_user_asm_goto(x, __pus_addr, label, "stw"); break; \ + case 8: __put_user_asm2_goto(x, __pus_addr, label); break; \ + default: BUILD_BUG(); \ } \ } while (0) -#define __unsafe_put_user_goto(x, ptr, size, label) \ -do { \ - __typeof__(*(ptr)) __user *__pu_addr = (ptr); \ - __chk_user_ptr(ptr); \ - __put_user_size_goto((x), __pu_addr, (size), label); \ -} while (0) - - -extern long __get_user_bad(void); - /* * This does an atomic 128 byte aligned load from userspace. * Upto caller to do enable_kernel_vmx() before calling! @@ -234,6 +136,59 @@ extern long __get_user_bad(void); : "=r" (err) \ : "b" (uaddr), "b" (kaddr), "i" (-EFAULT), "0" (err)) +#ifdef CONFIG_CC_HAS_ASM_GOTO_OUTPUT + +#define __get_user_asm_goto(x, addr, label, op) \ + asm_volatile_goto( \ + "1: "op"%U1%X1 %0, %1 # get_user\n" \ + EX_TABLE(1b, %l2) \ + : "=r" (x) \ + : "m"UPD_CONSTR (*addr) \ + : \ + : label) + +#ifdef __powerpc64__ +#define __get_user_asm2_goto(x, addr, label) \ + __get_user_asm_goto(x, addr, label, "ld") +#else /* __powerpc64__ */ +#define __get_user_asm2_goto(x, addr, label) \ + asm_volatile_goto( \ + "1: lwz%X1 %0, %1\n" \ + "2: lwz%X1 %L0, %L1\n" \ + EX_TABLE(1b, %l2) \ + EX_TABLE(2b, %l2) \ + : "=r" (x) \ + : "m" (*addr) \ + : \ + : label) +#endif /* __powerpc64__ */ + +#define __get_user_size_goto(x, ptr, size, label) \ +do { \ + BUILD_BUG_ON(size > sizeof(x)); \ + switch (size) { \ + case 1: __get_user_asm_goto(x, (u8 __user *)ptr, label, "lbz"); break; \ + case 2: __get_user_asm_goto(x, (u16 __user *)ptr, label, "lhz"); break; \ + case 4: __get_user_asm_goto(x, (u32 __user *)ptr, label, "lwz"); break; \ + case 8: __get_user_asm2_goto(x, (u64 __user *)ptr, label); break; \ + default: x = 0; BUILD_BUG(); \ + } \ +} while (0) + +#define __get_user_size_allowed(x, ptr, size, retval) \ +do { \ + __label__ __gus_failed; \ + \ + __get_user_size_goto(x, ptr, size, __gus_failed); \ + retval = 0; \ + break; \ +__gus_failed: \ + x = 0; \ + retval = -EFAULT; \ +} while (0) + +#else /* CONFIG_CC_HAS_ASM_GOTO_OUTPUT */ + #define __get_user_asm(x, addr, err, op) \ __asm__ __volatile__( \ "1: "op"%U2%X2 %1, %2 # get_user\n" \ @@ -271,25 +226,27 @@ extern long __get_user_bad(void); #define __get_user_size_allowed(x, ptr, size, retval) \ do { \ retval = 0; \ - __chk_user_ptr(ptr); \ - if (size > sizeof(x)) \ - (x) = __get_user_bad(); \ + BUILD_BUG_ON(size > sizeof(x)); \ switch (size) { \ case 1: __get_user_asm(x, (u8 __user *)ptr, retval, "lbz"); break; \ case 2: __get_user_asm(x, (u16 __user *)ptr, retval, "lhz"); break; \ case 4: __get_user_asm(x, (u32 __user *)ptr, retval, "lwz"); break; \ case 8: __get_user_asm2(x, (u64 __user *)ptr, retval); break; \ - default: (x) = __get_user_bad(); \ + default: x = 0; BUILD_BUG(); \ } \ } while (0) -#define __get_user_size(x, ptr, size, retval) \ +#define __get_user_size_goto(x, ptr, size, label) \ do { \ - allow_read_from_user(ptr, size); \ - __get_user_size_allowed(x, ptr, size, retval); \ - prevent_read_from_user(ptr, size); \ + long __gus_retval; \ + \ + __get_user_size_allowed(x, ptr, size, __gus_retval); \ + if (__gus_retval) \ + goto label; \ } while (0) +#endif /* CONFIG_CC_HAS_ASM_GOTO_OUTPUT */ + /* * This is a type: either unsigned long, if the argument fits into * that type, or otherwise unsigned long long. @@ -297,86 +254,36 @@ do { \ #define __long_type(x) \ __typeof__(__builtin_choose_expr(sizeof(x) > sizeof(0UL), 0ULL, 0UL)) -#define __get_user_nocheck(x, ptr, size, do_allow) \ +#define __get_user(x, ptr) \ ({ \ long __gu_err; \ __long_type(*(ptr)) __gu_val; \ __typeof__(*(ptr)) __user *__gu_addr = (ptr); \ - __typeof__(size) __gu_size = (size); \ + __typeof__(sizeof(*(ptr))) __gu_size = sizeof(*(ptr)); \ \ - __chk_user_ptr(__gu_addr); \ - if (do_allow && !is_kernel_addr((unsigned long)__gu_addr)) \ - might_fault(); \ - if (do_allow) \ - __get_user_size(__gu_val, __gu_addr, __gu_size, __gu_err); \ - else \ - __get_user_size_allowed(__gu_val, __gu_addr, __gu_size, __gu_err); \ + might_fault(); \ + allow_read_from_user(__gu_addr, __gu_size); \ + __get_user_size_allowed(__gu_val, __gu_addr, __gu_size, __gu_err); \ + prevent_read_from_user(__gu_addr, __gu_size); \ (x) = (__typeof__(*(ptr)))__gu_val; \ \ __gu_err; \ }) -#define __get_user_check(x, ptr, size) \ +#define get_user(x, ptr) \ ({ \ - long __gu_err = -EFAULT; \ - __long_type(*(ptr)) __gu_val = 0; \ - __typeof__(*(ptr)) __user *__gu_addr = (ptr); \ - __typeof__(size) __gu_size = (size); \ - \ - might_fault(); \ - if (access_ok(__gu_addr, __gu_size)) \ - __get_user_size(__gu_val, __gu_addr, __gu_size, __gu_err); \ - (x) = (__force __typeof__(*(ptr)))__gu_val; \ + __typeof__(*(ptr)) __user *_gu_addr = (ptr); \ \ - __gu_err; \ + access_ok(_gu_addr, sizeof(*(ptr))) ? \ + __get_user(x, _gu_addr) : \ + ((x) = (__force __typeof__(*(ptr)))0, -EFAULT); \ }) -#define __get_user_nosleep(x, ptr, size) \ -({ \ - long __gu_err; \ - __long_type(*(ptr)) __gu_val; \ - __typeof__(*(ptr)) __user *__gu_addr = (ptr); \ - __typeof__(size) __gu_size = (size); \ - \ - __chk_user_ptr(__gu_addr); \ - __get_user_size(__gu_val, __gu_addr, __gu_size, __gu_err); \ - (x) = (__force __typeof__(*(ptr)))__gu_val; \ - \ - __gu_err; \ -}) - - /* more complex routines */ extern unsigned long __copy_tofrom_user(void __user *to, const void __user *from, unsigned long size); -#ifdef CONFIG_ARCH_HAS_COPY_MC -unsigned long __must_check -copy_mc_generic(void *to, const void *from, unsigned long size); - -static inline unsigned long __must_check -copy_mc_to_kernel(void *to, const void *from, unsigned long size) -{ - return copy_mc_generic(to, from, size); -} -#define copy_mc_to_kernel copy_mc_to_kernel - -static inline unsigned long __must_check -copy_mc_to_user(void __user *to, const void *from, unsigned long n) -{ - if (likely(check_copy_size(from, n, true))) { - if (access_ok(to, n)) { - allow_write_to_user(to, n); - n = copy_mc_generic((void *)to, from, n); - prevent_write_to_user(to, n); - } - } - - return n; -} -#endif - #ifdef __powerpc64__ static inline unsigned long raw_copy_in_user(void __user *to, const void __user *from, unsigned long n) @@ -414,26 +321,51 @@ raw_copy_to_user(void __user *to, const void *from, unsigned long n) unsigned long __arch_clear_user(void __user *addr, unsigned long size); -static inline unsigned long clear_user(void __user *addr, unsigned long size) +static inline unsigned long __clear_user(void __user *addr, unsigned long size) { - unsigned long ret = size; + unsigned long ret; + might_fault(); - if (likely(access_ok(addr, size))) { - allow_write_to_user(addr, size); - ret = __arch_clear_user(addr, size); - prevent_write_to_user(addr, size); - } + allow_write_to_user(addr, size); + ret = __arch_clear_user(addr, size); + prevent_write_to_user(addr, size); return ret; } -static inline unsigned long __clear_user(void __user *addr, unsigned long size) +static inline unsigned long clear_user(void __user *addr, unsigned long size) { - return clear_user(addr, size); + return likely(access_ok(addr, size)) ? __clear_user(addr, size) : size; } extern long strncpy_from_user(char *dst, const char __user *src, long count); extern __must_check long strnlen_user(const char __user *str, long n); +#ifdef CONFIG_ARCH_HAS_COPY_MC +unsigned long __must_check +copy_mc_generic(void *to, const void *from, unsigned long size); + +static inline unsigned long __must_check +copy_mc_to_kernel(void *to, const void *from, unsigned long size) +{ + return copy_mc_generic(to, from, size); +} +#define copy_mc_to_kernel copy_mc_to_kernel + +static inline unsigned long __must_check +copy_mc_to_user(void __user *to, const void *from, unsigned long n) +{ + if (likely(check_copy_size(from, n, true))) { + if (access_ok(to, n)) { + allow_write_to_user(to, n); + n = copy_mc_generic((void *)to, from, n); + prevent_write_to_user(to, n); + } + } + + return n; +} +#endif + extern long __copy_from_user_flushcache(void *dst, const void __user *src, unsigned size); extern void memcpy_page_flushcache(char *to, struct page *page, size_t offset, @@ -482,10 +414,37 @@ user_write_access_begin(const void __user *ptr, size_t len) #define user_write_access_begin user_write_access_begin #define user_write_access_end prevent_current_write_to_user -#define unsafe_op_wrap(op, err) do { if (unlikely(op)) goto err; } while (0) -#define unsafe_get_user(x, p, e) unsafe_op_wrap(__get_user_allowed(x, p), e) +#define unsafe_get_user(x, p, e) do { \ + __long_type(*(p)) __gu_val; \ + __typeof__(*(p)) __user *__gu_addr = (p); \ + \ + __get_user_size_goto(__gu_val, __gu_addr, sizeof(*(p)), e); \ + (x) = (__typeof__(*(p)))__gu_val; \ +} while (0) + #define unsafe_put_user(x, p, e) \ - __unsafe_put_user_goto((__typeof__(*(p)))(x), (p), sizeof(*(p)), e) + __put_user_size_goto((__typeof__(*(p)))(x), (p), sizeof(*(p)), e) + +#define unsafe_copy_from_user(d, s, l, e) \ +do { \ + u8 *_dst = (u8 *)(d); \ + const u8 __user *_src = (const u8 __user *)(s); \ + size_t _len = (l); \ + int _i; \ + \ + for (_i = 0; _i < (_len & ~(sizeof(u64) - 1)); _i += sizeof(u64)) \ + unsafe_get_user(*(u64 *)(_dst + _i), (u64 __user *)(_src + _i), e); \ + if (_len & 4) { \ + unsafe_get_user(*(u32 *)(_dst + _i), (u32 __user *)(_src + _i), e); \ + _i += 4; \ + } \ + if (_len & 2) { \ + unsafe_get_user(*(u16 *)(_dst + _i), (u16 __user *)(_src + _i), e); \ + _i += 2; \ + } \ + if (_len & 1) \ + unsafe_get_user(*(u8 *)(_dst + _i), (u8 __user *)(_src + _i), e); \ +} while (0) #define unsafe_copy_to_user(d, s, l, e) \ do { \ @@ -494,9 +453,9 @@ do { \ size_t _len = (l); \ int _i; \ \ - for (_i = 0; _i < (_len & ~(sizeof(long) - 1)); _i += sizeof(long)) \ - unsafe_put_user(*(long*)(_src + _i), (long __user *)(_dst + _i), e); \ - if (IS_ENABLED(CONFIG_PPC64) && (_len & 4)) { \ + for (_i = 0; _i < (_len & ~(sizeof(u64) - 1)); _i += sizeof(u64)) \ + unsafe_put_user(*(u64 *)(_src + _i), (u64 __user *)(_dst + _i), e); \ + if (_len & 4) { \ unsafe_put_user(*(u32*)(_src + _i), (u32 __user *)(_dst + _i), e); \ _i += 4; \ } \ @@ -511,14 +470,8 @@ do { \ #define HAVE_GET_KERNEL_NOFAULT #define __get_kernel_nofault(dst, src, type, err_label) \ -do { \ - int __kr_err; \ - \ - __get_user_size_allowed(*((type *)(dst)), (__force type __user *)(src),\ - sizeof(type), __kr_err); \ - if (unlikely(__kr_err)) \ - goto err_label; \ -} while (0) + __get_user_size_goto(*((type *)(dst)), \ + (__force type __user *)(src), sizeof(type), err_label) #define __put_kernel_nofault(dst, src, type, err_label) \ __put_user_size_goto(*((type *)(src)), \ diff --git a/arch/powerpc/include/asm/unistd.h b/arch/powerpc/include/asm/unistd.h index 700fcdac2e3c..b541c690a31c 100644 --- a/arch/powerpc/include/asm/unistd.h +++ b/arch/powerpc/include/asm/unistd.h @@ -40,6 +40,7 @@ #define __ARCH_WANT_SYS_SIGPROCMASK #ifdef CONFIG_PPC32 #define __ARCH_WANT_OLD_STAT +#define __ARCH_WANT_SYS_OLD_SELECT #endif #ifdef CONFIG_PPC64 #define __ARCH_WANT_SYS_TIME diff --git a/arch/powerpc/include/asm/vdso/gettimeofday.h b/arch/powerpc/include/asm/vdso/gettimeofday.h index 77c635c2c90d..1faff0be1111 100644 --- a/arch/powerpc/include/asm/vdso/gettimeofday.h +++ b/arch/powerpc/include/asm/vdso/gettimeofday.h @@ -2,6 +2,8 @@ #ifndef _ASM_POWERPC_VDSO_GETTIMEOFDAY_H #define _ASM_POWERPC_VDSO_GETTIMEOFDAY_H +#include <asm/page.h> + #ifdef __ASSEMBLY__ #include <asm/ppc_asm.h> @@ -154,6 +156,14 @@ static __always_inline u64 __arch_get_hw_counter(s32 clock_mode, const struct vdso_data *__arch_get_vdso_data(void); +#ifdef CONFIG_TIME_NS +static __always_inline +const struct vdso_data *__arch_get_timens_vdso_data(const struct vdso_data *vd) +{ + return (void *)vd + PAGE_SIZE; +} +#endif + static inline bool vdso_clocksource_ok(const struct vdso_data *vd) { return true; diff --git a/arch/powerpc/include/asm/vdso_datapage.h b/arch/powerpc/include/asm/vdso_datapage.h index 3f958ecf2beb..a585c8e538ff 100644 --- a/arch/powerpc/include/asm/vdso_datapage.h +++ b/arch/powerpc/include/asm/vdso_datapage.h @@ -107,9 +107,7 @@ extern struct vdso_arch_data *vdso_data; bcl 20, 31, .+4 999: mflr \ptr -#if CONFIG_PPC_PAGE_SHIFT > 14 addis \ptr, \ptr, (_vdso_datapage - 999b)@ha -#endif addi \ptr, \ptr, (_vdso_datapage - 999b)@l .endm diff --git a/arch/powerpc/include/asm/vio.h b/arch/powerpc/include/asm/vio.h index 721c0d6715ac..e7479a4abf96 100644 --- a/arch/powerpc/include/asm/vio.h +++ b/arch/powerpc/include/asm/vio.h @@ -114,6 +114,7 @@ struct vio_driver { const struct vio_device_id *id_table; int (*probe)(struct vio_dev *dev, const struct vio_device_id *id); void (*remove)(struct vio_dev *dev); + void (*shutdown)(struct vio_dev *dev); /* A driver must have a get_desired_dma() function to * be loaded in a CMO environment if it uses DMA. */ diff --git a/arch/powerpc/include/asm/xive.h b/arch/powerpc/include/asm/xive.h index 9a312b975ca8..aa094a8655b0 100644 --- a/arch/powerpc/include/asm/xive.h +++ b/arch/powerpc/include/asm/xive.h @@ -102,6 +102,7 @@ void xive_flush_interrupt(void); /* xmon hook */ void xmon_xive_do_dump(int cpu); int xmon_xive_get_irq_config(u32 hw_irq, struct irq_data *d); +void xmon_xive_get_irq_all(void); /* APIs used by KVM */ u32 xive_native_default_eq_shift(void); diff --git a/arch/powerpc/include/uapi/asm/errno.h b/arch/powerpc/include/uapi/asm/errno.h index cc79856896a1..4ba87de32be0 100644 --- a/arch/powerpc/include/uapi/asm/errno.h +++ b/arch/powerpc/include/uapi/asm/errno.h @@ -2,6 +2,7 @@ #ifndef _ASM_POWERPC_ERRNO_H #define _ASM_POWERPC_ERRNO_H +#undef EDEADLOCK #include <asm-generic/errno.h> #undef EDEADLOCK diff --git a/arch/powerpc/include/uapi/asm/posix_types.h b/arch/powerpc/include/uapi/asm/posix_types.h index f698400e4bb0..9c0342312544 100644 --- a/arch/powerpc/include/uapi/asm/posix_types.h +++ b/arch/powerpc/include/uapi/asm/posix_types.h @@ -12,11 +12,6 @@ typedef unsigned long __kernel_old_dev_t; #define __kernel_old_dev_t __kernel_old_dev_t #else -typedef unsigned int __kernel_size_t; -typedef int __kernel_ssize_t; -typedef long __kernel_ptrdiff_t; -#define __kernel_size_t __kernel_size_t - typedef short __kernel_ipc_pid_t; #define __kernel_ipc_pid_t __kernel_ipc_pid_t #endif diff --git a/arch/powerpc/kernel/align.c b/arch/powerpc/kernel/align.c index c7797eb958c7..bbb4181621dd 100644 --- a/arch/powerpc/kernel/align.c +++ b/arch/powerpc/kernel/align.c @@ -107,7 +107,6 @@ static struct aligninfo spe_aligninfo[32] = { static int emulate_spe(struct pt_regs *regs, unsigned int reg, struct ppc_inst ppc_instr) { - int ret; union { u64 ll; u32 w[2]; @@ -127,11 +126,6 @@ static int emulate_spe(struct pt_regs *regs, unsigned int reg, nb = spe_aligninfo[instr].len; flags = spe_aligninfo[instr].flags; - /* Verify the address of the operand */ - if (unlikely(user_mode(regs) && - !access_ok(addr, nb))) - return -EFAULT; - /* userland only */ if (unlikely(!user_mode(regs))) return 0; @@ -169,26 +163,27 @@ static int emulate_spe(struct pt_regs *regs, unsigned int reg, } } else { temp.ll = data.ll = 0; - ret = 0; p = addr; + if (!user_read_access_begin(addr, nb)) + return -EFAULT; + switch (nb) { case 8: - ret |= __get_user_inatomic(temp.v[0], p++); - ret |= __get_user_inatomic(temp.v[1], p++); - ret |= __get_user_inatomic(temp.v[2], p++); - ret |= __get_user_inatomic(temp.v[3], p++); + unsafe_get_user(temp.v[0], p++, Efault_read); + unsafe_get_user(temp.v[1], p++, Efault_read); + unsafe_get_user(temp.v[2], p++, Efault_read); + unsafe_get_user(temp.v[3], p++, Efault_read); fallthrough; case 4: - ret |= __get_user_inatomic(temp.v[4], p++); - ret |= __get_user_inatomic(temp.v[5], p++); + unsafe_get_user(temp.v[4], p++, Efault_read); + unsafe_get_user(temp.v[5], p++, Efault_read); fallthrough; case 2: - ret |= __get_user_inatomic(temp.v[6], p++); - ret |= __get_user_inatomic(temp.v[7], p++); - if (unlikely(ret)) - return -EFAULT; + unsafe_get_user(temp.v[6], p++, Efault_read); + unsafe_get_user(temp.v[7], p++, Efault_read); } + user_read_access_end(); switch (instr) { case EVLDD: @@ -255,31 +250,41 @@ static int emulate_spe(struct pt_regs *regs, unsigned int reg, /* Store result to memory or update registers */ if (flags & ST) { - ret = 0; p = addr; + + if (!user_write_access_begin(addr, nb)) + return -EFAULT; + switch (nb) { case 8: - ret |= __put_user_inatomic(data.v[0], p++); - ret |= __put_user_inatomic(data.v[1], p++); - ret |= __put_user_inatomic(data.v[2], p++); - ret |= __put_user_inatomic(data.v[3], p++); + unsafe_put_user(data.v[0], p++, Efault_write); + unsafe_put_user(data.v[1], p++, Efault_write); + unsafe_put_user(data.v[2], p++, Efault_write); + unsafe_put_user(data.v[3], p++, Efault_write); fallthrough; case 4: - ret |= __put_user_inatomic(data.v[4], p++); - ret |= __put_user_inatomic(data.v[5], p++); + unsafe_put_user(data.v[4], p++, Efault_write); + unsafe_put_user(data.v[5], p++, Efault_write); fallthrough; case 2: - ret |= __put_user_inatomic(data.v[6], p++); - ret |= __put_user_inatomic(data.v[7], p++); + unsafe_put_user(data.v[6], p++, Efault_write); + unsafe_put_user(data.v[7], p++, Efault_write); } - if (unlikely(ret)) - return -EFAULT; + user_write_access_end(); } else { *evr = data.w[0]; regs->gpr[reg] = data.w[1]; } return 1; + +Efault_read: + user_read_access_end(); + return -EFAULT; + +Efault_write: + user_write_access_end(); + return -EFAULT; } #endif /* CONFIG_SPE */ @@ -299,13 +304,12 @@ int fix_alignment(struct pt_regs *regs) struct instruction_op op; int r, type; - /* - * We require a complete register set, if not, then our assembly - * is broken - */ - CHECK_FULL_REGS(regs); + if (is_kernel_addr(regs->nip)) + r = copy_inst_from_kernel_nofault(&instr, (void *)regs->nip); + else + r = __get_user_instr(instr, (void __user *)regs->nip); - if (unlikely(__get_user_instr(instr, (void __user *)regs->nip))) + if (unlikely(r)) return -EFAULT; if ((regs->msr & MSR_LE) != (MSR_KERNEL & MSR_LE)) { /* We don't handle PPC little-endian any more... */ diff --git a/arch/powerpc/kernel/asm-offsets.c b/arch/powerpc/kernel/asm-offsets.c index f3a662201a9f..28af4efb4587 100644 --- a/arch/powerpc/kernel/asm-offsets.c +++ b/arch/powerpc/kernel/asm-offsets.c @@ -91,7 +91,6 @@ int main(void) DEFINE(SIGSEGV, SIGSEGV); DEFINE(NMI_MASK, NMI_MASK); #else - OFFSET(KSP_LIMIT, thread_struct, ksp_limit); #ifdef CONFIG_PPC_RTAS OFFSET(RTAS_SP, thread_struct, rtas_sp); #endif @@ -132,7 +131,6 @@ int main(void) OFFSET(KSP_VSID, thread_struct, ksp_vsid); #else /* CONFIG_PPC64 */ OFFSET(PGDIR, thread_struct, pgdir); -#ifdef CONFIG_VMAP_STACK OFFSET(SRR0, thread_struct, srr0); OFFSET(SRR1, thread_struct, srr1); OFFSET(DAR, thread_struct, dar); @@ -149,7 +147,6 @@ int main(void) OFFSET(THLR, thread_struct, lr); OFFSET(THCTR, thread_struct, ctr); #endif -#endif #ifdef CONFIG_SPE OFFSET(THREAD_EVR0, thread_struct, evr[0]); OFFSET(THREAD_ACC, thread_struct, acc); @@ -285,21 +282,11 @@ int main(void) OFFSET(PACAHWCPUID, paca_struct, hw_cpu_id); OFFSET(PACAKEXECSTATE, paca_struct, kexec_state); OFFSET(PACA_DSCR_DEFAULT, paca_struct, dscr_default); - OFFSET(ACCOUNT_STARTTIME, paca_struct, accounting.starttime); - OFFSET(ACCOUNT_STARTTIME_USER, paca_struct, accounting.starttime_user); - OFFSET(ACCOUNT_USER_TIME, paca_struct, accounting.utime); - OFFSET(ACCOUNT_SYSTEM_TIME, paca_struct, accounting.stime); #ifdef CONFIG_PPC_BOOK3E OFFSET(PACA_TRAP_SAVE, paca_struct, trap_save); #endif OFFSET(PACA_SPRG_VDSO, paca_struct, sprg_vdso); #else /* CONFIG_PPC64 */ -#ifdef CONFIG_VIRT_CPU_ACCOUNTING_NATIVE - OFFSET(ACCOUNT_STARTTIME, thread_info, accounting.starttime); - OFFSET(ACCOUNT_STARTTIME_USER, thread_info, accounting.starttime_user); - OFFSET(ACCOUNT_USER_TIME, thread_info, accounting.utime); - OFFSET(ACCOUNT_SYSTEM_TIME, thread_info, accounting.stime); -#endif #endif /* CONFIG_PPC64 */ /* RTAS */ @@ -323,9 +310,6 @@ int main(void) STACK_PT_REGS_OFFSET(GPR11, gpr[11]); STACK_PT_REGS_OFFSET(GPR12, gpr[12]); STACK_PT_REGS_OFFSET(GPR13, gpr[13]); -#ifndef CONFIG_PPC64 - STACK_PT_REGS_OFFSET(GPR14, gpr[14]); -#endif /* CONFIG_PPC64 */ /* * Note: these symbols include _ because they overlap with special * register names @@ -381,7 +365,6 @@ int main(void) DEFINE(_CSRR1, STACK_INT_FRAME_SIZE+offsetof(struct exception_regs, csrr1)); DEFINE(_DSRR0, STACK_INT_FRAME_SIZE+offsetof(struct exception_regs, dsrr0)); DEFINE(_DSRR1, STACK_INT_FRAME_SIZE+offsetof(struct exception_regs, dsrr1)); - DEFINE(SAVED_KSP_LIMIT, STACK_INT_FRAME_SIZE+offsetof(struct exception_regs, saved_ksp_limit)); #endif #endif diff --git a/arch/powerpc/kernel/eeh.c b/arch/powerpc/kernel/eeh.c index cd60bc1c8701..f24cd53ff26e 100644 --- a/arch/powerpc/kernel/eeh.c +++ b/arch/powerpc/kernel/eeh.c @@ -362,14 +362,11 @@ static inline unsigned long eeh_token_to_phys(unsigned long token) pa = pte_pfn(*ptep); /* On radix we can do hugepage mappings for io, so handle that */ - if (hugepage_shift) { - pa <<= hugepage_shift; - pa |= token & ((1ul << hugepage_shift) - 1); - } else { - pa <<= PAGE_SHIFT; - pa |= token & (PAGE_SIZE - 1); - } + if (!hugepage_shift) + hugepage_shift = PAGE_SHIFT; + pa <<= PAGE_SHIFT; + pa |= token & ((1ul << hugepage_shift) - 1); return pa; } @@ -779,7 +776,7 @@ int pcibios_set_pcie_reset_state(struct pci_dev *dev, enum pcie_reset_state stat default: eeh_pe_state_clear(pe, EEH_PE_ISOLATED | EEH_PE_CFG_BLOCKED, true); return -EINVAL; - }; + } return 0; } @@ -1568,6 +1565,7 @@ int eeh_pe_inject_err(struct eeh_pe *pe, int type, int func, } EXPORT_SYMBOL_GPL(eeh_pe_inject_err); +#ifdef CONFIG_PROC_FS static int proc_eeh_show(struct seq_file *m, void *v) { if (!eeh_enabled()) { @@ -1594,6 +1592,7 @@ static int proc_eeh_show(struct seq_file *m, void *v) return 0; } +#endif /* CONFIG_PROC_FS */ #ifdef CONFIG_DEBUG_FS diff --git a/arch/powerpc/kernel/entry_32.S b/arch/powerpc/kernel/entry_32.S index 78c430b7f9d9..9160285cb2f4 100644 --- a/arch/powerpc/kernel/entry_32.S +++ b/arch/powerpc/kernel/entry_32.S @@ -48,195 +48,16 @@ */ .align 12 -#ifdef CONFIG_BOOKE - .globl mcheck_transfer_to_handler -mcheck_transfer_to_handler: - mfspr r0,SPRN_DSRR0 - stw r0,_DSRR0(r11) - mfspr r0,SPRN_DSRR1 - stw r0,_DSRR1(r11) - /* fall through */ -_ASM_NOKPROBE_SYMBOL(mcheck_transfer_to_handler) - - .globl debug_transfer_to_handler -debug_transfer_to_handler: - mfspr r0,SPRN_CSRR0 - stw r0,_CSRR0(r11) - mfspr r0,SPRN_CSRR1 - stw r0,_CSRR1(r11) - /* fall through */ -_ASM_NOKPROBE_SYMBOL(debug_transfer_to_handler) - - .globl crit_transfer_to_handler -crit_transfer_to_handler: -#ifdef CONFIG_PPC_BOOK3E_MMU - mfspr r0,SPRN_MAS0 - stw r0,MAS0(r11) - mfspr r0,SPRN_MAS1 - stw r0,MAS1(r11) - mfspr r0,SPRN_MAS2 - stw r0,MAS2(r11) - mfspr r0,SPRN_MAS3 - stw r0,MAS3(r11) - mfspr r0,SPRN_MAS6 - stw r0,MAS6(r11) -#ifdef CONFIG_PHYS_64BIT - mfspr r0,SPRN_MAS7 - stw r0,MAS7(r11) -#endif /* CONFIG_PHYS_64BIT */ -#endif /* CONFIG_PPC_BOOK3E_MMU */ -#ifdef CONFIG_44x - mfspr r0,SPRN_MMUCR - stw r0,MMUCR(r11) -#endif - mfspr r0,SPRN_SRR0 - stw r0,_SRR0(r11) - mfspr r0,SPRN_SRR1 - stw r0,_SRR1(r11) - - /* set the stack limit to the current stack */ - mfspr r8,SPRN_SPRG_THREAD - lwz r0,KSP_LIMIT(r8) - stw r0,SAVED_KSP_LIMIT(r11) - rlwinm r0,r1,0,0,(31 - THREAD_SHIFT) - stw r0,KSP_LIMIT(r8) - /* fall through */ -_ASM_NOKPROBE_SYMBOL(crit_transfer_to_handler) -#endif - -#ifdef CONFIG_40x - .globl crit_transfer_to_handler -crit_transfer_to_handler: - lwz r0,crit_r10@l(0) - stw r0,GPR10(r11) - lwz r0,crit_r11@l(0) - stw r0,GPR11(r11) - mfspr r0,SPRN_SRR0 - stw r0,crit_srr0@l(0) - mfspr r0,SPRN_SRR1 - stw r0,crit_srr1@l(0) - - /* set the stack limit to the current stack */ - mfspr r8,SPRN_SPRG_THREAD - lwz r0,KSP_LIMIT(r8) - stw r0,saved_ksp_limit@l(0) - rlwinm r0,r1,0,0,(31 - THREAD_SHIFT) - stw r0,KSP_LIMIT(r8) - /* fall through */ -_ASM_NOKPROBE_SYMBOL(crit_transfer_to_handler) -#endif - -/* - * This code finishes saving the registers to the exception frame - * and jumps to the appropriate handler for the exception, turning - * on address translation. - * Note that we rely on the caller having set cr0.eq iff the exception - * occurred in kernel mode (i.e. MSR:PR = 0). - */ - .globl transfer_to_handler_full -transfer_to_handler_full: - SAVE_NVGPRS(r11) -_ASM_NOKPROBE_SYMBOL(transfer_to_handler_full) - /* fall through */ - - .globl transfer_to_handler -transfer_to_handler: - stw r2,GPR2(r11) - stw r12,_NIP(r11) - stw r9,_MSR(r11) - andi. r2,r9,MSR_PR - mfctr r12 - mfspr r2,SPRN_XER - stw r12,_CTR(r11) - stw r2,_XER(r11) - mfspr r12,SPRN_SPRG_THREAD - tovirt_vmstack r12, r12 - beq 2f /* if from user, fix up THREAD.regs */ - addi r2, r12, -THREAD - addi r11,r1,STACK_FRAME_OVERHEAD - stw r11,PT_REGS(r12) -#if defined(CONFIG_40x) || defined(CONFIG_BOOKE) - /* Check to see if the dbcr0 register is set up to debug. Use the - internal debug mode bit to do this. */ - lwz r12,THREAD_DBCR0(r12) - andis. r12,r12,DBCR0_IDM@h -#endif - ACCOUNT_CPU_USER_ENTRY(r2, r11, r12) -#ifdef CONFIG_PPC_BOOK3S_32 - kuep_lock r11, r12 -#endif -#if defined(CONFIG_40x) || defined(CONFIG_BOOKE) - beq+ 3f - /* From user and task is ptraced - load up global dbcr0 */ - li r12,-1 /* clear all pending debug events */ - mtspr SPRN_DBSR,r12 - lis r11,global_dbcr0@ha - tophys(r11,r11) - addi r11,r11,global_dbcr0@l -#ifdef CONFIG_SMP - lwz r9,TASK_CPU(r2) - slwi r9,r9,2 - add r11,r11,r9 -#endif - lwz r12,0(r11) - mtspr SPRN_DBCR0,r12 -#endif - - b 3f - -2: /* if from kernel, check interrupted DOZE/NAP mode and - * check for stack overflow - */ - kuap_save_and_lock r11, r12, r9, r2, r6 - addi r2, r12, -THREAD -#ifndef CONFIG_VMAP_STACK - lwz r9,KSP_LIMIT(r12) - cmplw r1,r9 /* if r1 <= ksp_limit */ - ble- stack_ovf /* then the kernel stack overflowed */ -#endif -5: #if defined(CONFIG_PPC_BOOK3S_32) || defined(CONFIG_E500) + .globl prepare_transfer_to_handler +prepare_transfer_to_handler: + /* if from kernel, check interrupted DOZE/NAP mode */ lwz r12,TI_LOCAL_FLAGS(r2) mtcrf 0x01,r12 bt- 31-TLF_NAPPING,4f bt- 31-TLF_SLEEPING,7f -#endif /* CONFIG_PPC_BOOK3S_32 || CONFIG_E500 */ - .globl transfer_to_handler_cont -transfer_to_handler_cont: -3: - mflr r9 - tovirt_novmstack r2, r2 /* set r2 to current */ - tovirt_vmstack r9, r9 - lwz r11,0(r9) /* virtual address of handler */ - lwz r9,4(r9) /* where to go when done */ -#if defined(CONFIG_PPC_8xx) && defined(CONFIG_PERF_EVENTS) - mtspr SPRN_NRI, r0 -#endif -#ifdef CONFIG_TRACE_IRQFLAGS - /* - * When tracing IRQ state (lockdep) we enable the MMU before we call - * the IRQ tracing functions as they might access vmalloc space or - * perform IOs for console output. - * - * To speed up the syscall path where interrupts stay on, let's check - * first if we are changing the MSR value at all. - */ - tophys_novmstack r12, r1 - lwz r12,_MSR(r12) - andi. r12,r12,MSR_EE - bne 1f - - /* MSR isn't changing, just transition directly */ -#endif - mtspr SPRN_SRR0,r11 - mtspr SPRN_SRR1,r10 - mtlr r9 - rfi /* jump to handler, enable MMU */ -#ifdef CONFIG_40x - b . /* Prevent prefetch past rfi */ -#endif + blr -#if defined (CONFIG_PPC_BOOK3S_32) || defined(CONFIG_E500) 4: rlwinm r12,r12,0,~_TLF_NAPPING stw r12,TI_LOCAL_FLAGS(r2) b power_save_ppc32_restore @@ -246,97 +67,18 @@ transfer_to_handler_cont: lwz r9,_MSR(r11) /* if sleeping, clear MSR.EE */ rlwinm r9,r9,0,~MSR_EE lwz r12,_LINK(r11) /* and return to address in LR */ - kuap_restore r11, r2, r3, r4, r5 lwz r2, GPR2(r11) b fast_exception_return -#endif -_ASM_NOKPROBE_SYMBOL(transfer_to_handler) -_ASM_NOKPROBE_SYMBOL(transfer_to_handler_cont) - -#ifdef CONFIG_TRACE_IRQFLAGS -1: /* MSR is changing, re-enable MMU so we can notify lockdep. We need to - * keep interrupts disabled at this point otherwise we might risk - * taking an interrupt before we tell lockdep they are enabled. - */ - lis r12,reenable_mmu@h - ori r12,r12,reenable_mmu@l - LOAD_REG_IMMEDIATE(r0, MSR_KERNEL) - mtspr SPRN_SRR0,r12 - mtspr SPRN_SRR1,r0 - rfi -#ifdef CONFIG_40x - b . /* Prevent prefetch past rfi */ -#endif - -reenable_mmu: - /* - * We save a bunch of GPRs, - * r3 can be different from GPR3(r1) at this point, r9 and r11 - * contains the old MSR and handler address respectively, - * r0, r4-r8, r12, CCR, CTR, XER etc... are left - * clobbered as they aren't useful past this point. - */ - - stwu r1,-32(r1) - stw r9,8(r1) - stw r11,12(r1) - stw r3,16(r1) - - /* If we are disabling interrupts (normal case), simply log it with - * lockdep - */ -1: bl trace_hardirqs_off - lwz r3,16(r1) - lwz r11,12(r1) - lwz r9,8(r1) - addi r1,r1,32 - mtctr r11 - mtlr r9 - bctr /* jump to handler */ -#endif /* CONFIG_TRACE_IRQFLAGS */ - -#ifndef CONFIG_VMAP_STACK -/* - * On kernel stack overflow, load up an initial stack pointer - * and call StackOverflow(regs), which should not return. - */ -stack_ovf: - /* sometimes we use a statically-allocated stack, which is OK. */ - lis r12,_end@h - ori r12,r12,_end@l - cmplw r1,r12 - ble 5b /* r1 <= &_end is OK */ - SAVE_NVGPRS(r11) - addi r3,r1,STACK_FRAME_OVERHEAD - lis r1,init_thread_union@ha - addi r1,r1,init_thread_union@l - addi r1,r1,THREAD_SIZE-STACK_FRAME_OVERHEAD - lis r9,StackOverflow@ha - addi r9,r9,StackOverflow@l - LOAD_REG_IMMEDIATE(r10,MSR_KERNEL) -#if defined(CONFIG_PPC_8xx) && defined(CONFIG_PERF_EVENTS) - mtspr SPRN_NRI, r0 -#endif - mtspr SPRN_SRR0,r9 - mtspr SPRN_SRR1,r10 - rfi -#ifdef CONFIG_40x - b . /* Prevent prefetch past rfi */ -#endif -_ASM_NOKPROBE_SYMBOL(stack_ovf) -#endif +_ASM_NOKPROBE_SYMBOL(prepare_transfer_to_handler) +#endif /* CONFIG_PPC_BOOK3S_32 || CONFIG_E500 */ .globl transfer_to_syscall transfer_to_syscall: SAVE_NVGPRS(r1) -#ifdef CONFIG_PPC_BOOK3S_32 - kuep_lock r11, r12 -#endif /* Calling convention has r9 = orig r0, r10 = regs */ addi r10,r1,STACK_FRAME_OVERHEAD mr r9,r0 - stw r10,THREAD+PT_REGS(r2) bl system_call_exception ret_from_syscall: @@ -349,10 +91,6 @@ ret_from_syscall: cmplwi cr0,r5,0 bne- 2f #endif /* CONFIG_PPC_47x */ -#ifdef CONFIG_PPC_BOOK3S_32 - kuep_unlock r5, r7 -#endif - kuap_check r2, r4 lwz r4,_LINK(r1) lwz r5,_CCR(r1) mtlr r4 @@ -412,27 +150,6 @@ ret_from_kernel_thread: b ret_from_syscall /* - * Top-level page fault handling. - * This is in assembler because if do_page_fault tells us that - * it is a bad kernel page fault, we want to save the non-volatile - * registers before calling bad_page_fault. - */ - .globl handle_page_fault -handle_page_fault: - addi r3,r1,STACK_FRAME_OVERHEAD - bl do_page_fault - cmpwi r3,0 - beq+ ret_from_except - SAVE_NVGPRS(r1) - lwz r0,_TRAP(r1) - clrrwi r0,r0,1 - stw r0,_TRAP(r1) - mr r4,r3 /* err arg for bad_page_fault */ - addi r3,r1,STACK_FRAME_OVERHEAD - bl __bad_page_fault - b ret_from_except_full - -/* * This routine switches between two different tasks. The process * state of one is saved on its kernel stack. Then the state * of the other is restored from its kernel stack. The memory @@ -485,7 +202,6 @@ END_FTR_SECTION_IFSET(CPU_FTR_SPE) stw r10,_CCR(r1) stw r1,KSP(r3) /* Set old stack pointer */ - kuap_check r2, r0 #ifdef CONFIG_SMP /* We need a sync somewhere here to make sure that if the * previous task gets rescheduled on another CPU, it sees all @@ -529,12 +245,12 @@ END_FTR_SECTION_IFSET(CPU_FTR_SPE) fast_exception_return: #if !(defined(CONFIG_4xx) || defined(CONFIG_BOOKE)) andi. r10,r9,MSR_RI /* check for recoverable interrupt */ - beq 1f /* if not, we've got problems */ + beq 3f /* if not, we've got problems */ #endif 2: REST_4GPRS(3, r11) lwz r10,_CCR(r11) - REST_GPR(1, r11) + REST_2GPRS(1, r11) mtcr r10 lwz r10,_LINK(r11) mtlr r10 @@ -556,257 +272,147 @@ fast_exception_return: #endif _ASM_NOKPROBE_SYMBOL(fast_exception_return) -#if !(defined(CONFIG_4xx) || defined(CONFIG_BOOKE)) -/* check if the exception happened in a restartable section */ -1: lis r3,exc_exit_restart_end@ha - addi r3,r3,exc_exit_restart_end@l - cmplw r12,r3 - bge 3f - lis r4,exc_exit_restart@ha - addi r4,r4,exc_exit_restart@l - cmplw r12,r4 - blt 3f - lis r3,fee_restarts@ha - tophys(r3,r3) - lwz r5,fee_restarts@l(r3) - addi r5,r5,1 - stw r5,fee_restarts@l(r3) - mr r12,r4 /* restart at exc_exit_restart */ - b 2b - - .section .bss - .align 2 -fee_restarts: - .space 4 - .previous - /* aargh, a nonrecoverable interrupt, panic */ /* aargh, we don't know which trap this is */ 3: li r10,-1 stw r10,_TRAP(r11) - addi r3,r1,STACK_FRAME_OVERHEAD - lis r10,MSR_KERNEL@h - ori r10,r10,MSR_KERNEL@l - bl transfer_to_handler_full - .long unrecoverable_exception - .long ret_from_except -#endif - - .globl ret_from_except_full -ret_from_except_full: - REST_NVGPRS(r1) - /* fall through */ - - .globl ret_from_except -ret_from_except: - /* Hard-disable interrupts so that current_thread_info()->flags - * can't change between when we test it and when we return - * from the interrupt. */ - /* Note: We don't bother telling lockdep about it */ - LOAD_REG_IMMEDIATE(r10,MSR_KERNEL) - mtmsr r10 /* disable interrupts */ - - lwz r3,_MSR(r1) /* Returning to user mode? */ - andi. r0,r3,MSR_PR - beq resume_kernel - -user_exc_return: /* r10 contains MSR_KERNEL here */ - /* Check current_thread_info()->flags */ - lwz r9,TI_FLAGS(r2) - andi. r0,r9,_TIF_USER_WORK_MASK - bne do_work + prepare_transfer_to_handler + bl unrecoverable_exception + trap /* should not get here */ -restore_user: -#if defined(CONFIG_4xx) || defined(CONFIG_BOOKE) - /* Check whether this process has its own DBCR0 value. The internal - debug mode bit tells us that dbcr0 should be loaded. */ - lwz r0,THREAD+THREAD_DBCR0(r2) - andis. r10,r0,DBCR0_IDM@h - bnel- load_dbcr0 -#endif - ACCOUNT_CPU_USER_EXIT(r2, r10, r11) -#ifdef CONFIG_PPC_BOOK3S_32 - kuep_unlock r10, r11 -#endif + .globl interrupt_return +interrupt_return: + lwz r4,_MSR(r1) + addi r3,r1,STACK_FRAME_OVERHEAD + andi. r0,r4,MSR_PR + beq .Lkernel_interrupt_return + bl interrupt_exit_user_prepare + cmpwi r3,0 + bne- .Lrestore_nvgprs - b restore +.Lfast_user_interrupt_return: + lwz r11,_NIP(r1) + lwz r12,_MSR(r1) + mtspr SPRN_SRR0,r11 + mtspr SPRN_SRR1,r12 -/* N.B. the only way to get here is from the beq following ret_from_except. */ -resume_kernel: - /* check current_thread_info, _TIF_EMULATE_STACK_STORE */ - lwz r8,TI_FLAGS(r2) - andis. r0,r8,_TIF_EMULATE_STACK_STORE@h - beq+ 1f +BEGIN_FTR_SECTION + stwcx. r0,0,r1 /* to clear the reservation */ +FTR_SECTION_ELSE + lwarx r0,0,r1 +ALT_FTR_SECTION_END_IFCLR(CPU_FTR_STCX_CHECKS_ADDRESS) - addi r8,r1,INT_FRAME_SIZE /* Get the kprobed function entry */ + lwz r3,_CCR(r1) + lwz r4,_LINK(r1) + lwz r5,_CTR(r1) + lwz r6,_XER(r1) + li r0,0 - lwz r3,GPR1(r1) - subi r3,r3,INT_FRAME_SIZE /* dst: Allocate a trampoline exception frame */ - mr r4,r1 /* src: current exception frame */ - mr r1,r3 /* Reroute the trampoline frame to r1 */ + /* + * Leaving a stale exception_marker on the stack can confuse + * the reliable stack unwinder later on. Clear it. + */ + stw r0,8(r1) + REST_4GPRS(7, r1) + REST_2GPRS(11, r1) - /* Copy from the original to the trampoline. */ - li r5,INT_FRAME_SIZE/4 /* size: INT_FRAME_SIZE */ - li r6,0 /* start offset: 0 */ + mtcr r3 + mtlr r4 mtctr r5 -2: lwzx r0,r6,r4 - stwx r0,r6,r3 - addi r6,r6,4 - bdnz 2b - - /* Do real store operation to complete stwu */ - lwz r5,GPR1(r1) - stw r8,0(r5) + mtspr SPRN_XER,r6 - /* Clear _TIF_EMULATE_STACK_STORE flag */ - lis r11,_TIF_EMULATE_STACK_STORE@h - addi r5,r2,TI_FLAGS -0: lwarx r8,0,r5 - andc r8,r8,r11 - stwcx. r8,0,r5 - bne- 0b -1: - -#ifdef CONFIG_PREEMPTION - /* check current_thread_info->preempt_count */ - lwz r0,TI_PREEMPT(r2) - cmpwi 0,r0,0 /* if non-zero, just restore regs and return */ - bne restore_kuap - andi. r8,r8,_TIF_NEED_RESCHED - beq+ restore_kuap - lwz r3,_MSR(r1) - andi. r0,r3,MSR_EE /* interrupts off? */ - beq restore_kuap /* don't schedule if so */ -#ifdef CONFIG_TRACE_IRQFLAGS - /* Lockdep thinks irqs are enabled, we need to call - * preempt_schedule_irq with IRQs off, so we inform lockdep - * now that we -did- turn them off already - */ - bl trace_hardirqs_off -#endif - bl preempt_schedule_irq -#ifdef CONFIG_TRACE_IRQFLAGS - /* And now, to properly rebalance the above, we tell lockdep they - * are being turned back on, which will happen when we return - */ - bl trace_hardirqs_on + REST_4GPRS(2, r1) + REST_GPR(6, r1) + REST_GPR(0, r1) + REST_GPR(1, r1) + rfi +#ifdef CONFIG_40x + b . /* Prevent prefetch past rfi */ #endif -#endif /* CONFIG_PREEMPTION */ -restore_kuap: - kuap_restore r1, r2, r9, r10, r0 - - /* interrupts are hard-disabled at this point */ -restore: -#if defined(CONFIG_44x) && !defined(CONFIG_PPC_47x) - lis r4,icache_44x_need_flush@ha - lwz r5,icache_44x_need_flush@l(r4) - cmplwi cr0,r5,0 - beq+ 1f - li r6,0 - iccci r0,r0 - stw r6,icache_44x_need_flush@l(r4) -1: -#endif /* CONFIG_44x */ - lwz r9,_MSR(r1) -#ifdef CONFIG_TRACE_IRQFLAGS - /* Lockdep doesn't know about the fact that IRQs are temporarily turned - * off in this assembly code while peeking at TI_FLAGS() and such. However - * we need to inform it if the exception turned interrupts off, and we - * are about to trun them back on. - */ - andi. r10,r9,MSR_EE - beq 1f - stwu r1,-32(r1) - mflr r0 - stw r0,4(r1) - bl trace_hardirqs_on - addi r1, r1, 32 - lwz r9,_MSR(r1) -1: -#endif /* CONFIG_TRACE_IRQFLAGS */ +.Lrestore_nvgprs: + REST_NVGPRS(r1) + b .Lfast_user_interrupt_return - lwz r0,GPR0(r1) - lwz r2,GPR2(r1) - REST_4GPRS(3, r1) - REST_2GPRS(7, r1) +.Lkernel_interrupt_return: + bl interrupt_exit_kernel_prepare - lwz r10,_XER(r1) - lwz r11,_CTR(r1) - mtspr SPRN_XER,r10 - mtctr r11 +.Lfast_kernel_interrupt_return: + cmpwi cr1,r3,0 + lwz r11,_NIP(r1) + lwz r12,_MSR(r1) + mtspr SPRN_SRR0,r11 + mtspr SPRN_SRR1,r12 BEGIN_FTR_SECTION - lwarx r11,0,r1 -END_FTR_SECTION_IFSET(CPU_FTR_NEED_PAIRED_STWCX) - stwcx. r0,0,r1 /* to clear the reservation */ + stwcx. r0,0,r1 /* to clear the reservation */ +FTR_SECTION_ELSE + lwarx r0,0,r1 +ALT_FTR_SECTION_END_IFCLR(CPU_FTR_STCX_CHECKS_ADDRESS) -#if !(defined(CONFIG_4xx) || defined(CONFIG_BOOKE)) - andi. r10,r9,MSR_RI /* check if this exception occurred */ - beql nonrecoverable /* at a bad place (MSR:RI = 0) */ + lwz r3,_LINK(r1) + lwz r4,_CTR(r1) + lwz r5,_XER(r1) + lwz r6,_CCR(r1) + li r0,0 + + REST_4GPRS(7, r1) + REST_2GPRS(11, r1) - lwz r10,_CCR(r1) - lwz r11,_LINK(r1) - mtcrf 0xFF,r10 - mtlr r11 + mtlr r3 + mtctr r4 + mtspr SPRN_XER,r5 - /* Clear the exception_marker on the stack to avoid confusing stacktrace */ - li r10, 0 - stw r10, 8(r1) /* - * Once we put values in SRR0 and SRR1, we are in a state - * where exceptions are not recoverable, since taking an - * exception will trash SRR0 and SRR1. Therefore we clear the - * MSR:RI bit to indicate this. If we do take an exception, - * we can't return to the point of the exception but we - * can restart the exception exit path at the label - * exc_exit_restart below. -- paulus + * Leaving a stale exception_marker on the stack can confuse + * the reliable stack unwinder later on. Clear it. */ - LOAD_REG_IMMEDIATE(r10,MSR_KERNEL & ~MSR_RI) - mtmsr r10 /* clear the RI bit */ - .globl exc_exit_restart -exc_exit_restart: - lwz r12,_NIP(r1) - mtspr SPRN_SRR0,r12 - mtspr SPRN_SRR1,r9 - REST_4GPRS(9, r1) - lwz r1,GPR1(r1) - .globl exc_exit_restart_end -exc_exit_restart_end: + stw r0,8(r1) + + REST_4GPRS(2, r1) + + bne- cr1,1f /* emulate stack store */ + mtcr r6 + REST_GPR(6, r1) + REST_GPR(0, r1) + REST_GPR(1, r1) rfi -_ASM_NOKPROBE_SYMBOL(exc_exit_restart) -_ASM_NOKPROBE_SYMBOL(exc_exit_restart_end) +#ifdef CONFIG_40x + b . /* Prevent prefetch past rfi */ +#endif -#else /* !(CONFIG_4xx || CONFIG_BOOKE) */ - /* - * This is a bit different on 4xx/Book-E because it doesn't have - * the RI bit in the MSR. - * The TLB miss handler checks if we have interrupted - * the exception exit path and restarts it if so - * (well maybe one day it will... :). +1: /* + * Emulate stack store with update. New r1 value was already calculated + * and updated in our interrupt regs by emulate_loadstore, but we can't + * store the previous value of r1 to the stack before re-loading our + * registers from it, otherwise they could be clobbered. Use + * SPRG Scratch0 as temporary storage to hold the store + * data, as interrupts are disabled here so it won't be clobbered. */ - lwz r11,_LINK(r1) - mtlr r11 - lwz r10,_CCR(r1) - mtcrf 0xff,r10 - /* Clear the exception_marker on the stack to avoid confusing stacktrace */ - li r10, 0 - stw r10, 8(r1) - REST_2GPRS(9, r1) - .globl exc_exit_restart -exc_exit_restart: - lwz r11,_NIP(r1) - lwz r12,_MSR(r1) - mtspr SPRN_SRR0,r11 - mtspr SPRN_SRR1,r12 - REST_2GPRS(11, r1) - lwz r1,GPR1(r1) - .globl exc_exit_restart_end -exc_exit_restart_end: + mtcr r6 +#ifdef CONFIG_BOOKE + mtspr SPRN_SPRG_WSCRATCH0, r9 +#else + mtspr SPRN_SPRG_SCRATCH0, r9 +#endif + addi r9,r1,INT_FRAME_SIZE /* get original r1 */ + REST_GPR(6, r1) + REST_GPR(0, r1) + REST_GPR(1, r1) + stw r9,0(r1) /* perform store component of stwu */ +#ifdef CONFIG_BOOKE + mfspr r9, SPRN_SPRG_RSCRATCH0 +#else + mfspr r9, SPRN_SPRG_SCRATCH0 +#endif rfi - b . /* prevent prefetch past rfi */ -_ASM_NOKPROBE_SYMBOL(exc_exit_restart) +#ifdef CONFIG_40x + b . /* Prevent prefetch past rfi */ +#endif +_ASM_NOKPROBE_SYMBOL(interrupt_return) + +#if defined(CONFIG_4xx) || defined(CONFIG_BOOKE) /* * Returning from a critical interrupt in user mode doesn't need @@ -837,8 +443,7 @@ _ASM_NOKPROBE_SYMBOL(exc_exit_restart) REST_NVGPRS(r1); \ lwz r3,_MSR(r1); \ andi. r3,r3,MSR_PR; \ - LOAD_REG_IMMEDIATE(r10,MSR_KERNEL); \ - bne user_exc_return; \ + bne interrupt_return; \ lwz r0,GPR0(r1); \ lwz r2,GPR2(r1); \ REST_4GPRS(3, r1); \ @@ -906,11 +511,6 @@ _ASM_NOKPROBE_SYMBOL(exc_exit_restart) #ifdef CONFIG_40x .globl ret_from_crit_exc ret_from_crit_exc: - mfspr r9,SPRN_SPRG_THREAD - lis r10,saved_ksp_limit@ha; - lwz r10,saved_ksp_limit@l(r10); - tovirt(r9,r9); - stw r10,KSP_LIMIT(r9) lis r9,crit_srr0@ha; lwz r9,crit_srr0@l(r9); lis r10,crit_srr1@ha; @@ -924,9 +524,6 @@ _ASM_NOKPROBE_SYMBOL(ret_from_crit_exc) #ifdef CONFIG_BOOKE .globl ret_from_crit_exc ret_from_crit_exc: - mfspr r9,SPRN_SPRG_THREAD - lwz r10,SAVED_KSP_LIMIT(r1) - stw r10,KSP_LIMIT(r9) RESTORE_xSRR(SRR0,SRR1); RESTORE_MMU_REGS; RET_FROM_EXC_LEVEL(SPRN_CSRR0, SPRN_CSRR1, PPC_RFCI) @@ -934,9 +531,6 @@ _ASM_NOKPROBE_SYMBOL(ret_from_crit_exc) .globl ret_from_debug_exc ret_from_debug_exc: - mfspr r9,SPRN_SPRG_THREAD - lwz r10,SAVED_KSP_LIMIT(r1) - stw r10,KSP_LIMIT(r9) RESTORE_xSRR(SRR0,SRR1); RESTORE_xSRR(CSRR0,CSRR1); RESTORE_MMU_REGS; @@ -945,9 +539,6 @@ _ASM_NOKPROBE_SYMBOL(ret_from_debug_exc) .globl ret_from_mcheck_exc ret_from_mcheck_exc: - mfspr r9,SPRN_SPRG_THREAD - lwz r10,SAVED_KSP_LIMIT(r1) - stw r10,KSP_LIMIT(r9) RESTORE_xSRR(SRR0,SRR1); RESTORE_xSRR(CSRR0,CSRR1); RESTORE_xSRR(DSRR0,DSRR1); @@ -955,121 +546,8 @@ ret_from_mcheck_exc: RET_FROM_EXC_LEVEL(SPRN_MCSRR0, SPRN_MCSRR1, PPC_RFMCI) _ASM_NOKPROBE_SYMBOL(ret_from_mcheck_exc) #endif /* CONFIG_BOOKE */ - -/* - * Load the DBCR0 value for a task that is being ptraced, - * having first saved away the global DBCR0. Note that r0 - * has the dbcr0 value to set upon entry to this. - */ -load_dbcr0: - mfmsr r10 /* first disable debug exceptions */ - rlwinm r10,r10,0,~MSR_DE - mtmsr r10 - isync - mfspr r10,SPRN_DBCR0 - lis r11,global_dbcr0@ha - addi r11,r11,global_dbcr0@l -#ifdef CONFIG_SMP - lwz r9,TASK_CPU(r2) - slwi r9,r9,2 - add r11,r11,r9 -#endif - stw r10,0(r11) - mtspr SPRN_DBCR0,r0 - li r11,-1 - mtspr SPRN_DBSR,r11 /* clear all pending debug events */ - blr - - .section .bss - .align 4 - .global global_dbcr0 -global_dbcr0: - .space 4*NR_CPUS - .previous #endif /* !(CONFIG_4xx || CONFIG_BOOKE) */ -do_work: /* r10 contains MSR_KERNEL here */ - andi. r0,r9,_TIF_NEED_RESCHED - beq do_user_signal - -do_resched: /* r10 contains MSR_KERNEL here */ -#ifdef CONFIG_TRACE_IRQFLAGS - bl trace_hardirqs_on - mfmsr r10 -#endif - ori r10,r10,MSR_EE - mtmsr r10 /* hard-enable interrupts */ - bl schedule -recheck: - /* Note: And we don't tell it we are disabling them again - * neither. Those disable/enable cycles used to peek at - * TI_FLAGS aren't advertised. - */ - LOAD_REG_IMMEDIATE(r10,MSR_KERNEL) - mtmsr r10 /* disable interrupts */ - lwz r9,TI_FLAGS(r2) - andi. r0,r9,_TIF_NEED_RESCHED - bne- do_resched - andi. r0,r9,_TIF_USER_WORK_MASK - beq restore_user -do_user_signal: /* r10 contains MSR_KERNEL here */ - ori r10,r10,MSR_EE - mtmsr r10 /* hard-enable interrupts */ - /* save r13-r31 in the exception frame, if not already done */ - lwz r3,_TRAP(r1) - andi. r0,r3,1 - beq 2f - SAVE_NVGPRS(r1) - rlwinm r3,r3,0,0,30 - stw r3,_TRAP(r1) -2: addi r3,r1,STACK_FRAME_OVERHEAD - mr r4,r9 - bl do_notify_resume - REST_NVGPRS(r1) - b recheck - -/* - * We come here when we are at the end of handling an exception - * that occurred at a place where taking an exception will lose - * state information, such as the contents of SRR0 and SRR1. - */ -nonrecoverable: - lis r10,exc_exit_restart_end@ha - addi r10,r10,exc_exit_restart_end@l - cmplw r12,r10 - bge 3f - lis r11,exc_exit_restart@ha - addi r11,r11,exc_exit_restart@l - cmplw r12,r11 - blt 3f - lis r10,ee_restarts@ha - lwz r12,ee_restarts@l(r10) - addi r12,r12,1 - stw r12,ee_restarts@l(r10) - mr r12,r11 /* restart at exc_exit_restart */ - blr -3: /* OK, we can't recover, kill this process */ - lwz r3,_TRAP(r1) - andi. r0,r3,1 - beq 5f - SAVE_NVGPRS(r1) - rlwinm r3,r3,0,0,30 - stw r3,_TRAP(r1) -5: mfspr r2,SPRN_SPRG_THREAD - addi r2,r2,-THREAD - tovirt(r2,r2) /* set back r2 to current */ -4: addi r3,r1,STACK_FRAME_OVERHEAD - bl unrecoverable_exception - /* shouldn't return */ - b 4b -_ASM_NOKPROBE_SYMBOL(nonrecoverable) - - .section .bss - .align 2 -ee_restarts: - .space 4 - .previous - /* * PROM code for specific machines follows. Put it * here so it's easy to add arch-specific sections later. @@ -1088,7 +566,6 @@ _GLOBAL(enter_rtas) lis r6,1f@ha /* physical return address for rtas */ addi r6,r6,1f@l tophys(r6,r6) - tophys_novmstack r7, r1 lwz r8,RTASENTRY(r4) lwz r4,RTASBASE(r4) mfmsr r9 @@ -1097,24 +574,25 @@ _GLOBAL(enter_rtas) mtmsr r0 /* disable interrupts so SRR0/1 don't get trashed */ li r9,MSR_KERNEL & ~(MSR_IR|MSR_DR) mtlr r6 - stw r7, THREAD + RTAS_SP(r2) + stw r1, THREAD + RTAS_SP(r2) mtspr SPRN_SRR0,r8 mtspr SPRN_SRR1,r9 rfi -1: tophys_novmstack r9, r1 -#ifdef CONFIG_VMAP_STACK - li r0, MSR_KERNEL & ~MSR_IR /* can take DTLB miss */ - mtmsr r0 - isync -#endif - lwz r8,INT_FRAME_SIZE+4(r9) /* get return address */ - lwz r9,8(r9) /* original msr value */ - addi r1,r1,INT_FRAME_SIZE - li r0,0 - tophys_novmstack r7, r2 - stw r0, THREAD + RTAS_SP(r7) +1: + lis r8, 1f@h + ori r8, r8, 1f@l + LOAD_REG_IMMEDIATE(r9,MSR_KERNEL) mtspr SPRN_SRR0,r8 mtspr SPRN_SRR1,r9 - rfi /* return to caller */ + rfi /* Reactivate MMU translation */ +1: + lwz r8,INT_FRAME_SIZE+4(r1) /* get return address */ + lwz r9,8(r1) /* original msr value */ + addi r1,r1,INT_FRAME_SIZE + li r0,0 + stw r0, THREAD + RTAS_SP(r2) + mtlr r8 + mtmsr r9 + blr /* return to caller */ _ASM_NOKPROBE_SYMBOL(enter_rtas) #endif /* CONFIG_PPC_RTAS */ diff --git a/arch/powerpc/kernel/entry_64.S b/arch/powerpc/kernel/entry_64.S index 6c4d9e276c4d..03727308d8cc 100644 --- a/arch/powerpc/kernel/entry_64.S +++ b/arch/powerpc/kernel/entry_64.S @@ -117,13 +117,12 @@ BEGIN_FTR_SECTION END_FTR_SECTION_IFSET(CPU_FTR_HAS_PPR) /* - * RECONCILE_IRQ_STATE without calling trace_hardirqs_off(), which - * would clobber syscall parameters. Also we always enter with IRQs - * enabled and nothing pending. system_call_exception() will call - * trace_hardirqs_off(). - * - * scv enters with MSR[EE]=1, so don't set PACA_IRQ_HARD_DIS. The - * entry vector already sets PACAIRQSOFTMASK to IRQS_ALL_DISABLED. + * scv enters with MSR[EE]=1 and is immediately considered soft-masked. + * The entry vector already sets PACAIRQSOFTMASK to IRQS_ALL_DISABLED, + * and interrupts may be masked and pending already. + * system_call_exception() will call trace_hardirqs_off() which means + * interrupts could already have been blocked before trace_hardirqs_off, + * but this is the best we can do. */ /* Calling convention has r9 = orig r0, r10 = regs */ @@ -288,9 +287,8 @@ END_BTB_FLUSH_SECTION std r11,-16(r10) /* "regshere" marker */ /* - * RECONCILE_IRQ_STATE without calling trace_hardirqs_off(), which - * would clobber syscall parameters. Also we always enter with IRQs - * enabled and nothing pending. system_call_exception() will call + * We always enter kernel from userspace with irq soft-mask enabled and + * nothing pending. system_call_exception() will call * trace_hardirqs_off(). */ li r11,IRQS_ALL_DISABLED @@ -417,19 +415,6 @@ _GLOBAL(ret_from_kernel_thread) li r3,0 b .Lsyscall_exit -#ifdef CONFIG_PPC_BOOK3E -/* Save non-volatile GPRs, if not already saved. */ -_GLOBAL(save_nvgprs) - ld r11,_TRAP(r1) - andi. r0,r11,1 - beqlr- - SAVE_NVGPRS(r1) - clrrdi r0,r11,1 - std r0,_TRAP(r1) - blr -_ASM_NOKPROBE_SYMBOL(save_nvgprs); -#endif - #ifdef CONFIG_PPC_BOOK3S_64 #define FLUSH_COUNT_CACHE \ @@ -645,7 +630,6 @@ END_FTR_SECTION_IFCLR(CPU_FTR_ARCH_207S) addi r1,r1,SWITCH_FRAME_SIZE blr -#ifdef CONFIG_PPC_BOOK3S /* * If MSR EE/RI was never enabled, IRQs not reconciled, NVGPRs not * touched, no exit work created, then this can be used. @@ -657,6 +641,7 @@ _ASM_NOKPROBE_SYMBOL(fast_interrupt_return) kuap_check_amr r3, r4 ld r5,_MSR(r1) andi. r0,r5,MSR_PR +#ifdef CONFIG_PPC_BOOK3S bne .Lfast_user_interrupt_return_amr kuap_kernel_restore r3, r4 andi. r0,r5,MSR_RI @@ -665,6 +650,10 @@ _ASM_NOKPROBE_SYMBOL(fast_interrupt_return) addi r3,r1,STACK_FRAME_OVERHEAD bl unrecoverable_exception b . /* should not get here */ +#else + bne .Lfast_user_interrupt_return + b .Lfast_kernel_interrupt_return +#endif .balign IFETCH_ALIGN_BYTES .globl interrupt_return @@ -678,8 +667,10 @@ _ASM_NOKPROBE_SYMBOL(interrupt_return) cmpdi r3,0 bne- .Lrestore_nvgprs +#ifdef CONFIG_PPC_BOOK3S .Lfast_user_interrupt_return_amr: kuap_user_restore r3, r4 +#endif .Lfast_user_interrupt_return: ld r11,_NIP(r1) ld r12,_MSR(r1) @@ -788,7 +779,6 @@ ALT_FTR_SECTION_END_IFCLR(CPU_FTR_STCX_CHECKS_ADDRESS) RFI_TO_KERNEL b . /* prevent speculative execution */ -#endif /* CONFIG_PPC_BOOK3S */ #ifdef CONFIG_PPC_RTAS /* diff --git a/arch/powerpc/kernel/exceptions-64e.S b/arch/powerpc/kernel/exceptions-64e.S index e8eb9992a270..7c3654b0d0f4 100644 --- a/arch/powerpc/kernel/exceptions-64e.S +++ b/arch/powerpc/kernel/exceptions-64e.S @@ -63,9 +63,6 @@ ld reg, (SPECIAL_EXC_##name * 8 + SPECIAL_EXC_FRAME_OFFS)(r1) special_reg_save: - lbz r9,PACAIRQHAPPENED(r13) - RECONCILE_IRQ_STATE(r3,r4) - /* * We only need (or have stack space) to save this stuff if * we interrupted the kernel. @@ -119,15 +116,11 @@ BEGIN_FTR_SECTION mtspr SPRN_MAS5,r10 mtspr SPRN_MAS8,r10 END_FTR_SECTION_IFSET(CPU_FTR_EMB_HV) - SPECIAL_EXC_STORE(r9,IRQHAPPENED) - mfspr r10,SPRN_DEAR SPECIAL_EXC_STORE(r10,DEAR) mfspr r10,SPRN_ESR SPECIAL_EXC_STORE(r10,ESR) - lbz r10,PACAIRQSOFTMASK(r13) - SPECIAL_EXC_STORE(r10,SOFTE) ld r10,_NIP(r1) SPECIAL_EXC_STORE(r10,CSRR0) ld r10,_MSR(r1) @@ -139,7 +132,8 @@ ret_from_level_except: ld r3,_MSR(r1) andi. r3,r3,MSR_PR beq 1f - b ret_from_except + REST_NVGPRS(r1) + b interrupt_return 1: LOAD_REG_ADDR(r11,extlb_level_exc) @@ -193,27 +187,6 @@ BEGIN_FTR_SECTION mtspr SPRN_MAS8,r10 END_FTR_SECTION_IFSET(CPU_FTR_EMB_HV) - lbz r6,PACAIRQSOFTMASK(r13) - ld r5,SOFTE(r1) - - /* Interrupts had better not already be enabled... */ - tweqi r6,IRQS_ENABLED - - andi. r6,r5,IRQS_DISABLED - bne 1f - - TRACE_ENABLE_INTS - stb r5,PACAIRQSOFTMASK(r13) -1: - /* - * Restore PACAIRQHAPPENED rather than setting it based on - * the return MSR[EE], since we could have interrupted - * __check_irq_replay() or other inconsistent transitory - * states that must remain that way. - */ - SPECIAL_EXC_LOAD(r10,IRQHAPPENED) - stb r10,PACAIRQHAPPENED(r13) - SPECIAL_EXC_LOAD(r10,DEAR) mtspr SPRN_DEAR,r10 SPECIAL_EXC_LOAD(r10,ESR) @@ -417,14 +390,15 @@ exc_##n##_common: \ std r6,_LINK(r1); \ std r7,_CTR(r1); \ std r8,_XER(r1); \ - li r3,(n)+1; /* indicate partial regs in trap */ \ + li r3,(n); /* regs.trap vector */ \ std r9,0(r1); /* store stack frame back link */ \ std r10,_CCR(r1); /* store orig CR in stackframe */ \ std r9,GPR1(r1); /* store stack frame back link */ \ std r11,SOFTE(r1); /* and save it to stackframe */ \ std r12,STACK_FRAME_OVERHEAD-16(r1); /* mark the frame */ \ std r3,_TRAP(r1); /* set trap number */ \ - std r0,RESULT(r1); /* clear regs->result */ + std r0,RESULT(r1); /* clear regs->result */ \ + SAVE_NVGPRS(r1); #define EXCEPTION_COMMON(n) \ EXCEPTION_COMMON_LVL(n, SPRN_SPRG_GEN_SCRATCH, PACA_EXGEN) @@ -435,28 +409,6 @@ exc_##n##_common: \ #define EXCEPTION_COMMON_DBG(n) \ EXCEPTION_COMMON_LVL(n, SPRN_SPRG_DBG_SCRATCH, PACA_EXDBG) -/* - * This is meant for exceptions that don't immediately hard-enable. We - * set a bit in paca->irq_happened to ensure that a subsequent call to - * arch_local_irq_restore() will properly hard-enable and avoid the - * fast-path, and then reconcile irq state. - */ -#define INTS_DISABLE RECONCILE_IRQ_STATE(r3,r4) - -/* - * This is called by exceptions that don't use INTS_DISABLE (that did not - * touch irq indicators in the PACA). This will restore MSR:EE to it's - * previous value - * - * XXX In the long run, we may want to open-code it in order to separate the - * load from the wrtee, thus limiting the latency caused by the dependency - * but at this point, I'll favor code clarity until we have a near to final - * implementation - */ -#define INTS_RESTORE_HARD \ - ld r11,_MSR(r1); \ - wrtee r11; - /* XXX FIXME: Restore r14/r15 when necessary */ #define BAD_STACK_TRAMPOLINE(n) \ exc_##n##_bad_stack: \ @@ -505,12 +457,11 @@ exc_##n##_bad_stack: \ START_EXCEPTION(label); \ NORMAL_EXCEPTION_PROLOG(trapnum, intnum, PROLOG_ADDITION_MASKABLE)\ EXCEPTION_COMMON(trapnum) \ - INTS_DISABLE; \ ack(r8); \ CHECK_NAPPING(); \ addi r3,r1,STACK_FRAME_OVERHEAD; \ bl hdlr; \ - b ret_from_except_lite; + b interrupt_return /* This value is used to mark exception frames on the stack. */ .section ".toc","aw" @@ -561,11 +512,10 @@ __end_interrupts: CRIT_EXCEPTION_PROLOG(0x100, BOOKE_INTERRUPT_CRITICAL, PROLOG_ADDITION_NONE) EXCEPTION_COMMON_CRIT(0x100) - bl save_nvgprs bl special_reg_save CHECK_NAPPING(); addi r3,r1,STACK_FRAME_OVERHEAD - bl unknown_exception + bl unknown_nmi_exception b ret_from_crit_except /* Machine Check Interrupt */ @@ -573,7 +523,6 @@ __end_interrupts: MC_EXCEPTION_PROLOG(0x000, BOOKE_INTERRUPT_MACHINE_CHECK, PROLOG_ADDITION_NONE) EXCEPTION_COMMON_MC(0x000) - bl save_nvgprs bl special_reg_save CHECK_NAPPING(); addi r3,r1,STACK_FRAME_OVERHEAD @@ -587,7 +536,6 @@ __end_interrupts: mfspr r14,SPRN_DEAR mfspr r15,SPRN_ESR EXCEPTION_COMMON(0x300) - INTS_DISABLE b storage_fault_common /* Instruction Storage Interrupt */ @@ -597,7 +545,6 @@ __end_interrupts: li r15,0 mr r14,r10 EXCEPTION_COMMON(0x400) - INTS_DISABLE b storage_fault_common /* External Input Interrupt */ @@ -619,13 +566,12 @@ __end_interrupts: PROLOG_ADDITION_1REG) mfspr r14,SPRN_ESR EXCEPTION_COMMON(0x700) - INTS_DISABLE std r14,_DSISR(r1) addi r3,r1,STACK_FRAME_OVERHEAD ld r14,PACA_EXGEN+EX_R14(r13) - bl save_nvgprs bl program_check_exception - b ret_from_except + REST_NVGPRS(r1) + b interrupt_return /* Floating Point Unavailable Interrupt */ START_EXCEPTION(fp_unavailable); @@ -637,12 +583,10 @@ __end_interrupts: andi. r0,r12,MSR_PR; beq- 1f bl load_up_fpu - b fast_exception_return -1: INTS_DISABLE - bl save_nvgprs - addi r3,r1,STACK_FRAME_OVERHEAD + b fast_interrupt_return +1: addi r3,r1,STACK_FRAME_OVERHEAD bl kernel_fp_unavailable_exception - b ret_from_except + b interrupt_return /* Altivec Unavailable Interrupt */ START_EXCEPTION(altivec_unavailable); @@ -656,15 +600,13 @@ BEGIN_FTR_SECTION andi. r0,r12,MSR_PR; beq- 1f bl load_up_altivec - b fast_exception_return + b fast_interrupt_return 1: END_FTR_SECTION_IFSET(CPU_FTR_ALTIVEC) #endif - INTS_DISABLE - bl save_nvgprs addi r3,r1,STACK_FRAME_OVERHEAD bl altivec_unavailable_exception - b ret_from_except + b interrupt_return /* AltiVec Assist */ START_EXCEPTION(altivec_assist); @@ -672,17 +614,16 @@ END_FTR_SECTION_IFSET(CPU_FTR_ALTIVEC) BOOKE_INTERRUPT_ALTIVEC_ASSIST, PROLOG_ADDITION_NONE) EXCEPTION_COMMON(0x220) - INTS_DISABLE - bl save_nvgprs addi r3,r1,STACK_FRAME_OVERHEAD #ifdef CONFIG_ALTIVEC BEGIN_FTR_SECTION bl altivec_assist_exception END_FTR_SECTION_IFSET(CPU_FTR_ALTIVEC) + REST_NVGPRS(r1) #else bl unknown_exception #endif - b ret_from_except + b interrupt_return /* Decrementer Interrupt */ @@ -698,14 +639,13 @@ END_FTR_SECTION_IFSET(CPU_FTR_ALTIVEC) CRIT_EXCEPTION_PROLOG(0x9f0, BOOKE_INTERRUPT_WATCHDOG, PROLOG_ADDITION_NONE) EXCEPTION_COMMON_CRIT(0x9f0) - bl save_nvgprs bl special_reg_save CHECK_NAPPING(); addi r3,r1,STACK_FRAME_OVERHEAD #ifdef CONFIG_BOOKE_WDT bl WatchdogException #else - bl unknown_exception + bl unknown_nmi_exception #endif b ret_from_crit_except @@ -722,11 +662,9 @@ END_FTR_SECTION_IFSET(CPU_FTR_ALTIVEC) NORMAL_EXCEPTION_PROLOG(0xf20, BOOKE_INTERRUPT_AP_UNAVAIL, PROLOG_ADDITION_NONE) EXCEPTION_COMMON(0xf20) - INTS_DISABLE - bl save_nvgprs addi r3,r1,STACK_FRAME_OVERHEAD bl unknown_exception - b ret_from_except + b interrupt_return /* Debug exception as a critical interrupt*/ START_EXCEPTION(debug_crit); @@ -792,9 +730,9 @@ END_FTR_SECTION_IFSET(CPU_FTR_ALTIVEC) addi r3,r1,STACK_FRAME_OVERHEAD ld r14,PACA_EXCRIT+EX_R14(r13) ld r15,PACA_EXCRIT+EX_R15(r13) - bl save_nvgprs bl DebugException - b ret_from_except + REST_NVGPRS(r1) + b interrupt_return kernel_dbg_exc: b . /* NYI */ @@ -859,24 +797,22 @@ kernel_dbg_exc: */ mfspr r14,SPRN_DBSR EXCEPTION_COMMON_DBG(0xd08) - INTS_DISABLE std r14,_DSISR(r1) addi r3,r1,STACK_FRAME_OVERHEAD ld r14,PACA_EXDBG+EX_R14(r13) ld r15,PACA_EXDBG+EX_R15(r13) - bl save_nvgprs bl DebugException - b ret_from_except + REST_NVGPRS(r1) + b interrupt_return START_EXCEPTION(perfmon); NORMAL_EXCEPTION_PROLOG(0x260, BOOKE_INTERRUPT_PERFORMANCE_MONITOR, PROLOG_ADDITION_NONE) EXCEPTION_COMMON(0x260) - INTS_DISABLE CHECK_NAPPING() addi r3,r1,STACK_FRAME_OVERHEAD bl performance_monitor_exception - b ret_from_except_lite + b interrupt_return /* Doorbell interrupt */ MASKABLE_EXCEPTION(0x280, BOOKE_INTERRUPT_DOORBELL, @@ -887,11 +823,10 @@ kernel_dbg_exc: CRIT_EXCEPTION_PROLOG(0x2a0, BOOKE_INTERRUPT_DOORBELL_CRITICAL, PROLOG_ADDITION_NONE) EXCEPTION_COMMON_CRIT(0x2a0) - bl save_nvgprs bl special_reg_save CHECK_NAPPING(); addi r3,r1,STACK_FRAME_OVERHEAD - bl unknown_exception + bl unknown_nmi_exception b ret_from_crit_except /* @@ -903,21 +838,18 @@ kernel_dbg_exc: PROLOG_ADDITION_NONE) EXCEPTION_COMMON(0x2c0) addi r3,r1,STACK_FRAME_OVERHEAD - bl save_nvgprs - INTS_RESTORE_HARD bl unknown_exception - b ret_from_except + b interrupt_return /* Guest Doorbell critical Interrupt */ START_EXCEPTION(guest_doorbell_crit); CRIT_EXCEPTION_PROLOG(0x2e0, BOOKE_INTERRUPT_GUEST_DBELL_CRIT, PROLOG_ADDITION_NONE) EXCEPTION_COMMON_CRIT(0x2e0) - bl save_nvgprs bl special_reg_save CHECK_NAPPING(); addi r3,r1,STACK_FRAME_OVERHEAD - bl unknown_exception + bl unknown_nmi_exception b ret_from_crit_except /* Hypervisor call */ @@ -926,10 +858,8 @@ kernel_dbg_exc: PROLOG_ADDITION_NONE) EXCEPTION_COMMON(0x310) addi r3,r1,STACK_FRAME_OVERHEAD - bl save_nvgprs - INTS_RESTORE_HARD bl unknown_exception - b ret_from_except + b interrupt_return /* Embedded Hypervisor priviledged */ START_EXCEPTION(ehpriv); @@ -937,10 +867,8 @@ kernel_dbg_exc: PROLOG_ADDITION_NONE) EXCEPTION_COMMON(0x320) addi r3,r1,STACK_FRAME_OVERHEAD - bl save_nvgprs - INTS_RESTORE_HARD bl unknown_exception - b ret_from_except + b interrupt_return /* LRAT Error interrupt */ START_EXCEPTION(lrat_error); @@ -948,10 +876,8 @@ kernel_dbg_exc: PROLOG_ADDITION_NONE) EXCEPTION_COMMON(0x340) addi r3,r1,STACK_FRAME_OVERHEAD - bl save_nvgprs - INTS_RESTORE_HARD bl unknown_exception - b ret_from_except + b interrupt_return /* * An interrupt came in while soft-disabled; We mark paca->irq_happened @@ -1011,14 +937,7 @@ storage_fault_common: ld r14,PACA_EXGEN+EX_R14(r13) ld r15,PACA_EXGEN+EX_R15(r13) bl do_page_fault - cmpdi r3,0 - bne- 1f - b ret_from_except_lite -1: bl save_nvgprs - mr r4,r3 - addi r3,r1,STACK_FRAME_OVERHEAD - bl __bad_page_fault - b ret_from_except + b interrupt_return /* * Alignment exception doesn't fit entirely in the 0x100 bytes so it @@ -1030,291 +949,9 @@ alignment_more: addi r3,r1,STACK_FRAME_OVERHEAD ld r14,PACA_EXGEN+EX_R14(r13) ld r15,PACA_EXGEN+EX_R15(r13) - bl save_nvgprs - INTS_RESTORE_HARD bl alignment_exception - b ret_from_except - - .align 7 -_GLOBAL(ret_from_except) - ld r11,_TRAP(r1) - andi. r0,r11,1 - bne ret_from_except_lite REST_NVGPRS(r1) - -_GLOBAL(ret_from_except_lite) - /* - * Disable interrupts so that current_thread_info()->flags - * can't change between when we test it and when we return - * from the interrupt. - */ - wrteei 0 - - ld r9, PACA_THREAD_INFO(r13) - ld r3,_MSR(r1) - ld r10,PACACURRENT(r13) - ld r4,TI_FLAGS(r9) - andi. r3,r3,MSR_PR - beq resume_kernel - lwz r3,(THREAD+THREAD_DBCR0)(r10) - - /* Check current_thread_info()->flags */ - andi. r0,r4,_TIF_USER_WORK_MASK - bne 1f - /* - * Check to see if the dbcr0 register is set up to debug. - * Use the internal debug mode bit to do this. - */ - andis. r0,r3,DBCR0_IDM@h - beq restore - mfmsr r0 - rlwinm r0,r0,0,~MSR_DE /* Clear MSR.DE */ - mtmsr r0 - mtspr SPRN_DBCR0,r3 - li r10, -1 - mtspr SPRN_DBSR,r10 - b restore -1: andi. r0,r4,_TIF_NEED_RESCHED - beq 2f - bl restore_interrupts - SCHEDULE_USER - b ret_from_except_lite -2: - bl save_nvgprs - /* - * Use a non volatile GPR to save and restore our thread_info flags - * across the call to restore_interrupts. - */ - mr r30,r4 - bl restore_interrupts - mr r4,r30 - addi r3,r1,STACK_FRAME_OVERHEAD - bl do_notify_resume - b ret_from_except - -resume_kernel: - /* check current_thread_info, _TIF_EMULATE_STACK_STORE */ - andis. r8,r4,_TIF_EMULATE_STACK_STORE@h - beq+ 1f - - addi r8,r1,INT_FRAME_SIZE /* Get the kprobed function entry */ - - ld r3,GPR1(r1) - subi r3,r3,INT_FRAME_SIZE /* dst: Allocate a trampoline exception frame */ - mr r4,r1 /* src: current exception frame */ - mr r1,r3 /* Reroute the trampoline frame to r1 */ - - /* Copy from the original to the trampoline. */ - li r5,INT_FRAME_SIZE/8 /* size: INT_FRAME_SIZE */ - li r6,0 /* start offset: 0 */ - mtctr r5 -2: ldx r0,r6,r4 - stdx r0,r6,r3 - addi r6,r6,8 - bdnz 2b - - /* Do real store operation to complete stdu */ - ld r5,GPR1(r1) - std r8,0(r5) - - /* Clear _TIF_EMULATE_STACK_STORE flag */ - lis r11,_TIF_EMULATE_STACK_STORE@h - addi r5,r9,TI_FLAGS -0: ldarx r4,0,r5 - andc r4,r4,r11 - stdcx. r4,0,r5 - bne- 0b -1: - -#ifdef CONFIG_PREEMPT - /* Check if we need to preempt */ - andi. r0,r4,_TIF_NEED_RESCHED - beq+ restore - /* Check that preempt_count() == 0 and interrupts are enabled */ - lwz r8,TI_PREEMPT(r9) - cmpwi cr0,r8,0 - bne restore - ld r0,SOFTE(r1) - andi. r0,r0,IRQS_DISABLED - bne restore - - /* - * Here we are preempting the current task. We want to make - * sure we are soft-disabled first and reconcile irq state. - */ - RECONCILE_IRQ_STATE(r3,r4) - bl preempt_schedule_irq - - /* - * arch_local_irq_restore() from preempt_schedule_irq above may - * enable hard interrupt but we really should disable interrupts - * when we return from the interrupt, and so that we don't get - * interrupted after loading SRR0/1. - */ - wrteei 0 -#endif /* CONFIG_PREEMPT */ - -restore: - /* - * This is the main kernel exit path. First we check if we - * are about to re-enable interrupts - */ - ld r5,SOFTE(r1) - lbz r6,PACAIRQSOFTMASK(r13) - andi. r5,r5,IRQS_DISABLED - bne .Lrestore_irq_off - - /* We are enabling, were we already enabled ? Yes, just return */ - andi. r6,r6,IRQS_DISABLED - beq cr0,fast_exception_return - - /* - * We are about to soft-enable interrupts (we are hard disabled - * at this point). We check if there's anything that needs to - * be replayed first. - */ - lbz r0,PACAIRQHAPPENED(r13) - cmpwi cr0,r0,0 - bne- .Lrestore_check_irq_replay - - /* - * Get here when nothing happened while soft-disabled, just - * soft-enable and move-on. We will hard-enable as a side - * effect of rfi - */ -.Lrestore_no_replay: - TRACE_ENABLE_INTS - li r0,IRQS_ENABLED - stb r0,PACAIRQSOFTMASK(r13); - -/* This is the return from load_up_fpu fast path which could do with - * less GPR restores in fact, but for now we have a single return path - */ -fast_exception_return: - wrteei 0 -1: mr r0,r13 - ld r10,_MSR(r1) - REST_4GPRS(2, r1) - andi. r6,r10,MSR_PR - REST_2GPRS(6, r1) - beq 1f - ACCOUNT_CPU_USER_EXIT(r13, r10, r11) - ld r0,GPR13(r1) - -1: stdcx. r0,0,r1 /* to clear the reservation */ - - ld r8,_CCR(r1) - ld r9,_LINK(r1) - ld r10,_CTR(r1) - ld r11,_XER(r1) - mtcr r8 - mtlr r9 - mtctr r10 - mtxer r11 - REST_2GPRS(8, r1) - ld r10,GPR10(r1) - ld r11,GPR11(r1) - ld r12,GPR12(r1) - mtspr SPRN_SPRG_GEN_SCRATCH,r0 - - std r10,PACA_EXGEN+EX_R10(r13); - std r11,PACA_EXGEN+EX_R11(r13); - ld r10,_NIP(r1) - ld r11,_MSR(r1) - ld r0,GPR0(r1) - ld r1,GPR1(r1) - mtspr SPRN_SRR0,r10 - mtspr SPRN_SRR1,r11 - ld r10,PACA_EXGEN+EX_R10(r13) - ld r11,PACA_EXGEN+EX_R11(r13) - mfspr r13,SPRN_SPRG_GEN_SCRATCH - rfi - - /* - * We are returning to a context with interrupts soft disabled. - * - * However, we may also about to hard enable, so we need to - * make sure that in this case, we also clear PACA_IRQ_HARD_DIS - * or that bit can get out of sync and bad things will happen - */ -.Lrestore_irq_off: - ld r3,_MSR(r1) - lbz r7,PACAIRQHAPPENED(r13) - andi. r0,r3,MSR_EE - beq 1f - rlwinm r7,r7,0,~PACA_IRQ_HARD_DIS - stb r7,PACAIRQHAPPENED(r13) -1: -#if defined(CONFIG_PPC_IRQ_SOFT_MASK_DEBUG) && defined(CONFIG_BUG) - /* The interrupt should not have soft enabled. */ - lbz r7,PACAIRQSOFTMASK(r13) -1: tdeqi r7,IRQS_ENABLED - EMIT_BUG_ENTRY 1b,__FILE__,__LINE__,BUGFLAG_WARNING -#endif - b fast_exception_return - - /* - * Something did happen, check if a re-emit is needed - * (this also clears paca->irq_happened) - */ -.Lrestore_check_irq_replay: - /* XXX: We could implement a fast path here where we check - * for irq_happened being just 0x01, in which case we can - * clear it and return. That means that we would potentially - * miss a decrementer having wrapped all the way around. - * - * Still, this might be useful for things like hash_page - */ - bl __check_irq_replay - cmpwi cr0,r3,0 - beq .Lrestore_no_replay - - /* - * We need to re-emit an interrupt. We do so by re-using our - * existing exception frame. We first change the trap value, - * but we need to ensure we preserve the low nibble of it - */ - ld r4,_TRAP(r1) - clrldi r4,r4,60 - or r4,r4,r3 - std r4,_TRAP(r1) - - /* - * PACA_IRQ_HARD_DIS won't always be set here, so set it now - * to reconcile the IRQ state. Tracing is already accounted for. - */ - lbz r4,PACAIRQHAPPENED(r13) - ori r4,r4,PACA_IRQ_HARD_DIS - stb r4,PACAIRQHAPPENED(r13) - - /* - * Then find the right handler and call it. Interrupts are - * still soft-disabled and we keep them that way. - */ - cmpwi cr0,r3,0x500 - bne 1f - addi r3,r1,STACK_FRAME_OVERHEAD; - bl do_IRQ - b ret_from_except -1: cmpwi cr0,r3,0x900 - bne 1f - addi r3,r1,STACK_FRAME_OVERHEAD; - bl timer_interrupt - b ret_from_except -#ifdef CONFIG_PPC_DOORBELL -1: - cmpwi cr0,r3,0x280 - bne 1f - addi r3,r1,STACK_FRAME_OVERHEAD; - bl doorbell_exception -#endif /* CONFIG_PPC_DOORBELL */ -1: b ret_from_except /* What else to do here ? */ - -_ASM_NOKPROBE_SYMBOL(ret_from_except); -_ASM_NOKPROBE_SYMBOL(ret_from_except_lite); -_ASM_NOKPROBE_SYMBOL(resume_kernel); -_ASM_NOKPROBE_SYMBOL(restore); -_ASM_NOKPROBE_SYMBOL(fast_exception_return); + b interrupt_return /* * Trampolines used when spotting a bad kernel stack pointer in diff --git a/arch/powerpc/kernel/exceptions-64s.S b/arch/powerpc/kernel/exceptions-64s.S index 8082b690e874..fa8e52a0239e 100644 --- a/arch/powerpc/kernel/exceptions-64s.S +++ b/arch/powerpc/kernel/exceptions-64s.S @@ -693,25 +693,6 @@ END_FTR_SECTION_IFSET(CPU_FTR_CFAR) .endm /* - * When the idle code in power4_idle puts the CPU into NAP mode, - * it has to do so in a loop, and relies on the external interrupt - * and decrementer interrupt entry code to get it out of the loop. - * It sets the _TLF_NAPPING bit in current_thread_info()->local_flags - * to signal that it is in the loop and needs help to get out. - */ -#ifdef CONFIG_PPC_970_NAP -#define FINISH_NAP \ -BEGIN_FTR_SECTION \ - ld r11, PACA_THREAD_INFO(r13); \ - ld r9,TI_LOCAL_FLAGS(r11); \ - andi. r10,r9,_TLF_NAPPING; \ - bnel power4_fixup_nap; \ -END_FTR_SECTION_IFSET(CPU_FTR_CAN_NAP) -#else -#define FINISH_NAP -#endif - -/* * There are a few constraints to be concerned with. * - Real mode exceptions code/data must be located at their physical location. * - Virtual mode exceptions must be mapped at their 0xc000... location. @@ -1248,7 +1229,6 @@ EXC_COMMON_BEGIN(machine_check_common) */ GEN_COMMON machine_check - FINISH_NAP /* Enable MSR_RI when finished with PACA_EXMC */ li r10,MSR_RI mtmsrd r10,1 @@ -1571,7 +1551,6 @@ EXC_VIRT_BEGIN(hardware_interrupt, 0x4500, 0x100) EXC_VIRT_END(hardware_interrupt, 0x4500, 0x100) EXC_COMMON_BEGIN(hardware_interrupt_common) GEN_COMMON hardware_interrupt - FINISH_NAP addi r3,r1,STACK_FRAME_OVERHEAD bl do_IRQ b interrupt_return @@ -1801,7 +1780,6 @@ EXC_VIRT_BEGIN(decrementer, 0x4900, 0x80) EXC_VIRT_END(decrementer, 0x4900, 0x80) EXC_COMMON_BEGIN(decrementer_common) GEN_COMMON decrementer - FINISH_NAP addi r3,r1,STACK_FRAME_OVERHEAD bl timer_interrupt b interrupt_return @@ -1886,7 +1864,6 @@ EXC_VIRT_BEGIN(doorbell_super, 0x4a00, 0x100) EXC_VIRT_END(doorbell_super, 0x4a00, 0x100) EXC_COMMON_BEGIN(doorbell_super_common) GEN_COMMON doorbell_super - FINISH_NAP addi r3,r1,STACK_FRAME_OVERHEAD #ifdef CONFIG_PPC_DOORBELL bl doorbell_exception @@ -2237,7 +2214,6 @@ EXC_COMMON_BEGIN(hmi_exception_early_common) EXC_COMMON_BEGIN(hmi_exception_common) GEN_COMMON hmi_exception - FINISH_NAP addi r3,r1,STACK_FRAME_OVERHEAD bl handle_hmi_exception b interrupt_return @@ -2266,7 +2242,6 @@ EXC_VIRT_BEGIN(h_doorbell, 0x4e80, 0x20) EXC_VIRT_END(h_doorbell, 0x4e80, 0x20) EXC_COMMON_BEGIN(h_doorbell_common) GEN_COMMON h_doorbell - FINISH_NAP addi r3,r1,STACK_FRAME_OVERHEAD #ifdef CONFIG_PPC_DOORBELL bl doorbell_exception @@ -2299,7 +2274,6 @@ EXC_VIRT_BEGIN(h_virt_irq, 0x4ea0, 0x20) EXC_VIRT_END(h_virt_irq, 0x4ea0, 0x20) EXC_COMMON_BEGIN(h_virt_irq_common) GEN_COMMON h_virt_irq - FINISH_NAP addi r3,r1,STACK_FRAME_OVERHEAD bl do_IRQ b interrupt_return @@ -2345,7 +2319,6 @@ EXC_VIRT_BEGIN(performance_monitor, 0x4f00, 0x20) EXC_VIRT_END(performance_monitor, 0x4f00, 0x20) EXC_COMMON_BEGIN(performance_monitor_common) GEN_COMMON performance_monitor - FINISH_NAP addi r3,r1,STACK_FRAME_OVERHEAD bl performance_monitor_exception b interrupt_return @@ -2530,8 +2503,6 @@ EXC_VIRT_NONE(0x5100, 0x100) INT_DEFINE_BEGIN(cbe_system_error) IVEC=0x1200 IHSRR=1 - IKVM_SKIP=1 - IKVM_REAL=1 INT_DEFINE_END(cbe_system_error) EXC_REAL_BEGIN(cbe_system_error, 0x1200, 0x100) @@ -2551,11 +2522,16 @@ EXC_REAL_NONE(0x1200, 0x100) EXC_VIRT_NONE(0x5200, 0x100) #endif - +/** + * Interrupt 0x1300 - Instruction Address Breakpoint Interrupt. + * This has been removed from the ISA before 2.01, which is the earliest + * 64-bit BookS ISA supported, however the G5 / 970 implements this + * interrupt with a non-architected feature available through the support + * processor interface. + */ INT_DEFINE_BEGIN(instruction_breakpoint) IVEC=0x1300 #ifdef CONFIG_KVM_BOOK3S_PR_POSSIBLE - IKVM_SKIP=1 IKVM_REAL=1 #endif INT_DEFINE_END(instruction_breakpoint) @@ -2701,8 +2677,6 @@ EXC_COMMON_BEGIN(denorm_exception_common) INT_DEFINE_BEGIN(cbe_maintenance) IVEC=0x1600 IHSRR=1 - IKVM_SKIP=1 - IKVM_REAL=1 INT_DEFINE_END(cbe_maintenance) EXC_REAL_BEGIN(cbe_maintenance, 0x1600, 0x100) @@ -2754,8 +2728,6 @@ EXC_COMMON_BEGIN(altivec_assist_common) INT_DEFINE_BEGIN(cbe_thermal) IVEC=0x1800 IHSRR=1 - IKVM_SKIP=1 - IKVM_REAL=1 INT_DEFINE_END(cbe_thermal) EXC_REAL_BEGIN(cbe_thermal, 0x1800, 0x100) @@ -3096,24 +3068,6 @@ USE_FIXED_SECTION(virt_trampolines) __end_interrupts: DEFINE_FIXED_SYMBOL(__end_interrupts) -#ifdef CONFIG_PPC_970_NAP - /* - * Called by exception entry code if _TLF_NAPPING was set, this clears - * the NAPPING flag, and redirects the exception exit to - * power4_fixup_nap_return. - */ - .globl power4_fixup_nap -EXC_COMMON_BEGIN(power4_fixup_nap) - andc r9,r9,r10 - std r9,TI_LOCAL_FLAGS(r11) - LOAD_REG_ADDR(r10, power4_idle_nap_return) - std r10,_NIP(r1) - blr - -power4_idle_nap_return: - blr -#endif - CLOSE_FIXED_SECTION(real_vectors); CLOSE_FIXED_SECTION(real_trampolines); CLOSE_FIXED_SECTION(virt_vectors); diff --git a/arch/powerpc/kernel/fadump.c b/arch/powerpc/kernel/fadump.c index 8482739d42f3..b990075285f5 100644 --- a/arch/powerpc/kernel/fadump.c +++ b/arch/powerpc/kernel/fadump.c @@ -31,6 +31,7 @@ #include <asm/fadump.h> #include <asm/fadump-internal.h> #include <asm/setup.h> +#include <asm/interrupt.h> /* * The CPU who acquired the lock to trigger the fadump crash should @@ -44,22 +45,21 @@ static struct fw_dump fw_dump; static void __init fadump_reserve_crash_area(u64 base); -struct kobject *fadump_kobj; - #ifndef CONFIG_PRESERVE_FA_DUMP +static struct kobject *fadump_kobj; + static atomic_t cpus_in_fadump; static DEFINE_MUTEX(fadump_mutex); -struct fadump_mrange_info crash_mrange_info = { "crash", NULL, 0, 0, 0, false }; +static struct fadump_mrange_info crash_mrange_info = { "crash", NULL, 0, 0, 0, false }; #define RESERVED_RNGS_SZ 16384 /* 16K - 128 entries */ #define RESERVED_RNGS_CNT (RESERVED_RNGS_SZ / \ sizeof(struct fadump_memory_range)) static struct fadump_memory_range rngs[RESERVED_RNGS_CNT]; -struct fadump_mrange_info reserved_mrange_info = { "reserved", rngs, - RESERVED_RNGS_SZ, 0, - RESERVED_RNGS_CNT, true }; +static struct fadump_mrange_info +reserved_mrange_info = { "reserved", rngs, RESERVED_RNGS_SZ, 0, RESERVED_RNGS_CNT, true }; static void __init early_init_dt_scan_reserved_ranges(unsigned long node); @@ -79,7 +79,7 @@ static struct cma *fadump_cma; * But for some reason even if it fails we still have the memory reservation * with us and we can still continue doing fadump. */ -int __init fadump_cma_init(void) +static int __init fadump_cma_init(void) { unsigned long long base, size; int rc; @@ -292,7 +292,7 @@ static void fadump_show_config(void) * that is required for a kernel to boot successfully. * */ -static inline u64 fadump_calculate_reserve_size(void) +static __init u64 fadump_calculate_reserve_size(void) { u64 base, size, bootmem_min; int ret; @@ -728,7 +728,7 @@ void crash_fadump(struct pt_regs *regs, const char *str) * If we came in via system reset, wait a while for the secondary * CPUs to enter. */ - if (TRAP(&(fdh->regs)) == 0x100) { + if (TRAP(&(fdh->regs)) == INTERRUPT_SYSTEM_RESET) { msecs = CRASH_TIMEOUT; while ((atomic_read(&cpus_in_fadump) < ncpus) && (--msecs > 0)) mdelay(1); diff --git a/arch/powerpc/kernel/fpu.S b/arch/powerpc/kernel/fpu.S index 3ff9a8fafa46..2c57ece6671c 100644 --- a/arch/powerpc/kernel/fpu.S +++ b/arch/powerpc/kernel/fpu.S @@ -92,9 +92,7 @@ END_FTR_SECTION_IFSET(CPU_FTR_VSX) /* enable use of FP after return */ #ifdef CONFIG_PPC32 mfspr r5,SPRN_SPRG_THREAD /* current task's THREAD (phys) */ -#ifdef CONFIG_VMAP_STACK tovirt(r5, r5) -#endif lwz r4,THREAD_FPEXC_MODE(r5) ori r9,r9,MSR_FP /* enable FP for current */ or r9,r9,r4 diff --git a/arch/powerpc/kernel/head_32.h b/arch/powerpc/kernel/head_32.h index 5d4706c14572..a8221ddcbd66 100644 --- a/arch/powerpc/kernel/head_32.h +++ b/arch/powerpc/kernel/head_32.h @@ -10,36 +10,39 @@ * We assume sprg3 has the physical address of the current * task's thread_struct. */ -.macro EXCEPTION_PROLOG handle_dar_dsisr=0 +.macro EXCEPTION_PROLOG trapno name handle_dar_dsisr=0 EXCEPTION_PROLOG_0 handle_dar_dsisr=\handle_dar_dsisr EXCEPTION_PROLOG_1 - EXCEPTION_PROLOG_2 handle_dar_dsisr=\handle_dar_dsisr + EXCEPTION_PROLOG_2 \trapno \name handle_dar_dsisr=\handle_dar_dsisr .endm .macro EXCEPTION_PROLOG_0 handle_dar_dsisr=0 mtspr SPRN_SPRG_SCRATCH0,r10 mtspr SPRN_SPRG_SCRATCH1,r11 -#ifdef CONFIG_VMAP_STACK mfspr r10, SPRN_SPRG_THREAD .if \handle_dar_dsisr +#ifdef CONFIG_40x + mfspr r11, SPRN_DEAR +#else mfspr r11, SPRN_DAR +#endif stw r11, DAR(r10) +#ifdef CONFIG_40x + mfspr r11, SPRN_ESR +#else mfspr r11, SPRN_DSISR +#endif stw r11, DSISR(r10) .endif mfspr r11, SPRN_SRR0 stw r11, SRR0(r10) -#endif mfspr r11, SPRN_SRR1 /* check whether user or kernel */ -#ifdef CONFIG_VMAP_STACK stw r11, SRR1(r10) -#endif mfcr r10 andi. r11, r11, MSR_PR .endm -.macro EXCEPTION_PROLOG_1 for_rtas=0 -#ifdef CONFIG_VMAP_STACK +.macro EXCEPTION_PROLOG_1 mtspr SPRN_SPRG_SCRATCH2,r1 subi r1, r1, INT_FRAME_SIZE /* use r1 if kernel */ beq 1f @@ -47,32 +50,33 @@ lwz r1,TASK_STACK-THREAD(r1) addi r1, r1, THREAD_SIZE - INT_FRAME_SIZE 1: +#ifdef CONFIG_VMAP_STACK mtcrf 0x3f, r1 - bt 32 - THREAD_ALIGN_SHIFT, stack_overflow -#else - subi r11, r1, INT_FRAME_SIZE /* use r1 if kernel */ - beq 1f - mfspr r11,SPRN_SPRG_THREAD - lwz r11,TASK_STACK-THREAD(r11) - addi r11, r11, THREAD_SIZE - INT_FRAME_SIZE -1: tophys(r11, r11) + bt 32 - THREAD_ALIGN_SHIFT, vmap_stack_overflow #endif .endm -.macro EXCEPTION_PROLOG_2 handle_dar_dsisr=0 -#ifdef CONFIG_VMAP_STACK - li r11, MSR_KERNEL & ~(MSR_IR | MSR_RI) /* can take DTLB miss */ - mtmsr r11 - isync +.macro EXCEPTION_PROLOG_2 trapno name handle_dar_dsisr=0 +#ifdef CONFIG_PPC_8xx + .if \handle_dar_dsisr + li r11, RPN_PATTERN + mtspr SPRN_DAR, r11 /* Tag DAR, to be used in DTLB Error */ + .endif +#endif + LOAD_REG_IMMEDIATE(r11, MSR_KERNEL & ~MSR_RI) /* re-enable MMU */ + mtspr SPRN_SRR1, r11 + lis r11, 1f@h + ori r11, r11, 1f@l + mtspr SPRN_SRR0, r11 mfspr r11, SPRN_SPRG_SCRATCH2 + rfi + + .text +\name\()_virt: +1: stw r11,GPR1(r1) stw r11,0(r1) mr r11, r1 -#else - stw r1,GPR1(r11) - stw r1,0(r11) - tovirt(r1, r11) /* set new kernel sp */ -#endif stw r10,_CCR(r11) /* save registers */ stw r12,GPR12(r11) stw r9,GPR9(r11) @@ -82,7 +86,6 @@ stw r12,GPR11(r11) mflr r10 stw r10,_LINK(r11) -#ifdef CONFIG_VMAP_STACK mfspr r12, SPRN_SPRG_THREAD tovirt(r12, r12) .if \handle_dar_dsisr @@ -93,26 +96,48 @@ .endif lwz r9, SRR1(r12) lwz r12, SRR0(r12) -#else - mfspr r12,SPRN_SRR0 - mfspr r9,SPRN_SRR1 -#endif #ifdef CONFIG_40x rlwinm r9,r9,0,14,12 /* clear MSR_WE (necessary?) */ +#elif defined(CONFIG_PPC_8xx) + mtspr SPRN_EID, r2 /* Set MSR_RI */ #else -#ifdef CONFIG_VMAP_STACK - li r10, MSR_KERNEL & ~MSR_IR /* can take exceptions */ -#else - li r10,MSR_KERNEL & ~(MSR_IR|MSR_DR) /* can take exceptions */ -#endif + li r10, MSR_KERNEL /* can take exceptions */ mtmsr r10 /* (except for mach check in rtas) */ #endif - stw r0,GPR0(r11) + COMMON_EXCEPTION_PROLOG_END \trapno +_ASM_NOKPROBE_SYMBOL(\name\()_virt) +.endm + +.macro COMMON_EXCEPTION_PROLOG_END trapno + stw r0,GPR0(r1) lis r10,STACK_FRAME_REGS_MARKER@ha /* exception frame marker */ addi r10,r10,STACK_FRAME_REGS_MARKER@l - stw r10,8(r11) - SAVE_4GPRS(3, r11) - SAVE_2GPRS(7, r11) + stw r10,8(r1) + li r10, \trapno + stw r10,_TRAP(r1) + SAVE_4GPRS(3, r1) + SAVE_2GPRS(7, r1) + SAVE_NVGPRS(r1) + stw r2,GPR2(r1) + stw r12,_NIP(r1) + stw r9,_MSR(r1) + mfctr r10 + mfspr r2,SPRN_SPRG_THREAD + stw r10,_CTR(r1) + tovirt(r2, r2) + mfspr r10,SPRN_XER + addi r2, r2, -THREAD + stw r10,_XER(r1) + addi r3,r1,STACK_FRAME_OVERHEAD +.endm + +.macro prepare_transfer_to_handler +#ifdef CONFIG_PPC_BOOK3S_32 + andi. r12,r9,MSR_PR + bne 777f + bl prepare_transfer_to_handler +777: +#endif .endm .macro SYSCALL_ENTRY trapno @@ -156,54 +181,6 @@ b transfer_to_syscall /* jump to handler */ .endm -.macro save_dar_dsisr_on_stack reg1, reg2, sp -#ifndef CONFIG_VMAP_STACK - mfspr \reg1, SPRN_DAR - mfspr \reg2, SPRN_DSISR - stw \reg1, _DAR(\sp) - stw \reg2, _DSISR(\sp) -#endif -.endm - -.macro get_and_save_dar_dsisr_on_stack reg1, reg2, sp -#ifdef CONFIG_VMAP_STACK - lwz \reg1, _DAR(\sp) - lwz \reg2, _DSISR(\sp) -#else - save_dar_dsisr_on_stack \reg1, \reg2, \sp -#endif -.endm - -.macro tovirt_vmstack dst, src -#ifdef CONFIG_VMAP_STACK - tovirt(\dst, \src) -#else - .ifnc \dst, \src - mr \dst, \src - .endif -#endif -.endm - -.macro tovirt_novmstack dst, src -#ifndef CONFIG_VMAP_STACK - tovirt(\dst, \src) -#else - .ifnc \dst, \src - mr \dst, \src - .endif -#endif -.endm - -.macro tophys_novmstack dst, src -#ifndef CONFIG_VMAP_STACK - tophys(\dst, \src) -#else - .ifnc \dst, \src - mr \dst, \src - .endif -#endif -.endm - /* * Note: code which follows this uses cr0.eq (set if from kernel), * r11, r12 (SRR0), and r9 (SRR1). @@ -217,41 +194,29 @@ */ #ifdef CONFIG_PPC_BOOK3S #define START_EXCEPTION(n, label) \ + __HEAD; \ . = n; \ DO_KVM n; \ label: #else #define START_EXCEPTION(n, label) \ + __HEAD; \ . = n; \ label: #endif -#define EXCEPTION(n, label, hdlr, xfer) \ +#define EXCEPTION(n, label, hdlr) \ START_EXCEPTION(n, label) \ - EXCEPTION_PROLOG; \ - addi r3,r1,STACK_FRAME_OVERHEAD; \ - xfer(n, hdlr) - -#define EXC_XFER_TEMPLATE(hdlr, trap, msr, tfer, ret) \ - li r10,trap; \ - stw r10,_TRAP(r11); \ - LOAD_REG_IMMEDIATE(r10, msr); \ - bl tfer; \ - .long hdlr; \ - .long ret - -#define EXC_XFER_STD(n, hdlr) \ - EXC_XFER_TEMPLATE(hdlr, n, MSR_KERNEL, transfer_to_handler_full, \ - ret_from_except_full) - -#define EXC_XFER_LITE(n, hdlr) \ - EXC_XFER_TEMPLATE(hdlr, n+1, MSR_KERNEL, transfer_to_handler, \ - ret_from_except) + EXCEPTION_PROLOG n label; \ + prepare_transfer_to_handler; \ + bl hdlr; \ + b interrupt_return .macro vmap_stack_overflow_exception -#ifdef CONFIG_VMAP_STACK + __HEAD +vmap_stack_overflow: #ifdef CONFIG_SMP mfspr r1, SPRN_SPRG_THREAD lwz r1, TASK_CPU - THREAD(r1) @@ -261,16 +226,11 @@ label: lis r1, emergency_ctx@ha #endif lwz r1, emergency_ctx@l(r1) - cmpwi cr1, r1, 0 - bne cr1, 1f - lis r1, init_thread_union@ha - addi r1, r1, init_thread_union@l -1: addi r1, r1, THREAD_SIZE - INT_FRAME_SIZE - EXCEPTION_PROLOG_2 - SAVE_NVGPRS(r11) - addi r3, r1, STACK_FRAME_OVERHEAD - EXC_XFER_STD(0, stack_overflow_exception) -#endif + addi r1, r1, THREAD_SIZE - INT_FRAME_SIZE + EXCEPTION_PROLOG_2 0 vmap_stack_overflow + prepare_transfer_to_handler + bl stack_overflow_exception + b interrupt_return .endm #endif /* __HEAD_32_H__ */ diff --git a/arch/powerpc/kernel/head_40x.S b/arch/powerpc/kernel/head_40x.S index 24724a7dad49..e1360b88b6cb 100644 --- a/arch/powerpc/kernel/head_40x.S +++ b/arch/powerpc/kernel/head_40x.S @@ -89,7 +89,11 @@ _ENTRY(crit_srr0) .space 4 _ENTRY(crit_srr1) .space 4 -_ENTRY(saved_ksp_limit) +_ENTRY(crit_r1) + .space 4 +_ENTRY(crit_dear) + .space 4 +_ENTRY(crit_esr) .space 4 /* @@ -100,42 +104,62 @@ _ENTRY(saved_ksp_limit) * Instead we use a couple of words of memory at low physical addresses. * This is OK since we don't support SMP on these processors. */ -#define CRITICAL_EXCEPTION_PROLOG \ - stw r10,crit_r10@l(0); /* save two registers to work with */\ - stw r11,crit_r11@l(0); \ - mfcr r10; /* save CR in r10 for now */\ - mfspr r11,SPRN_SRR3; /* check whether user or kernel */\ - andi. r11,r11,MSR_PR; \ - lis r11,critirq_ctx@ha; \ - tophys(r11,r11); \ - lwz r11,critirq_ctx@l(r11); \ - beq 1f; \ - /* COMING FROM USER MODE */ \ - mfspr r11,SPRN_SPRG_THREAD; /* if from user, start at top of */\ - lwz r11,TASK_STACK-THREAD(r11); /* this thread's kernel stack */\ -1: addi r11,r11,THREAD_SIZE-INT_FRAME_SIZE; /* Alloc an excpt frm */\ - tophys(r11,r11); \ - stw r10,_CCR(r11); /* save various registers */\ - stw r12,GPR12(r11); \ - stw r9,GPR9(r11); \ - mflr r10; \ - stw r10,_LINK(r11); \ - mfspr r12,SPRN_DEAR; /* save DEAR and ESR in the frame */\ - stw r12,_DEAR(r11); /* since they may have had stuff */\ - mfspr r9,SPRN_ESR; /* in them at the point where the */\ - stw r9,_ESR(r11); /* exception was taken */\ - mfspr r12,SPRN_SRR2; \ - stw r1,GPR1(r11); \ - mfspr r9,SPRN_SRR3; \ - stw r1,0(r11); \ - tovirt(r1,r11); \ - rlwinm r9,r9,0,14,12; /* clear MSR_WE (necessary?) */\ - stw r0,GPR0(r11); \ - lis r10, STACK_FRAME_REGS_MARKER@ha; /* exception frame marker */\ - addi r10, r10, STACK_FRAME_REGS_MARKER@l; \ - stw r10, 8(r11); \ - SAVE_4GPRS(3, r11); \ - SAVE_2GPRS(7, r11) +.macro CRITICAL_EXCEPTION_PROLOG trapno name + stw r10,crit_r10@l(0) /* save two registers to work with */ + stw r11,crit_r11@l(0) + mfspr r10,SPRN_SRR0 + mfspr r11,SPRN_SRR1 + stw r10,crit_srr0@l(0) + stw r11,crit_srr1@l(0) + mfspr r10,SPRN_DEAR + mfspr r11,SPRN_ESR + stw r10,crit_dear@l(0) + stw r11,crit_esr@l(0) + mfcr r10 /* save CR in r10 for now */ + mfspr r11,SPRN_SRR3 /* check whether user or kernel */ + andi. r11,r11,MSR_PR + lis r11,(critirq_ctx-PAGE_OFFSET)@ha + lwz r11,(critirq_ctx-PAGE_OFFSET)@l(r11) + beq 1f + /* COMING FROM USER MODE */ + mfspr r11,SPRN_SPRG_THREAD /* if from user, start at top of */ + lwz r11,TASK_STACK-THREAD(r11) /* this thread's kernel stack */ +1: stw r1,crit_r1@l(0) + addi r1,r11,THREAD_SIZE-INT_FRAME_SIZE /* Alloc an excpt frm */ + LOAD_REG_IMMEDIATE(r11, MSR_KERNEL & ~(MSR_ME|MSR_DE|MSR_CE)) /* re-enable MMU */ + mtspr SPRN_SRR1, r11 + lis r11, 1f@h + ori r11, r11, 1f@l + mtspr SPRN_SRR0, r11 + rfi + + .text +1: +\name\()_virt: + lwz r11,crit_r1@l(0) + stw r11,GPR1(r1) + stw r11,0(r1) + mr r11,r1 + stw r10,_CCR(r11) /* save various registers */ + stw r12,GPR12(r11) + stw r9,GPR9(r11) + mflr r10 + stw r10,_LINK(r11) + lis r9,PAGE_OFFSET@ha + lwz r10,crit_r10@l(r9) + lwz r12,crit_r11@l(r9) + stw r10,GPR10(r11) + stw r12,GPR11(r11) + lwz r12,crit_dear@l(r9) + lwz r9,crit_esr@l(r9) + stw r12,_DEAR(r11) /* since they may have had stuff */ + stw r9,_ESR(r11) /* exception was taken */ + mfspr r12,SPRN_SRR2 + mfspr r9,SPRN_SRR3 + rlwinm r9,r9,0,14,12 /* clear MSR_WE (necessary?) */ + COMMON_EXCEPTION_PROLOG_END \trapno + 2 +_ASM_NOKPROBE_SYMBOL(\name\()_virt) +.endm /* * State at this point: @@ -155,10 +179,10 @@ _ENTRY(saved_ksp_limit) */ #define CRITICAL_EXCEPTION(n, label, hdlr) \ START_EXCEPTION(n, label); \ - CRITICAL_EXCEPTION_PROLOG; \ - addi r3,r1,STACK_FRAME_OVERHEAD; \ - EXC_XFER_TEMPLATE(hdlr, n+2, (MSR_KERNEL & ~(MSR_ME|MSR_DE|MSR_CE)), \ - crit_transfer_to_handler, ret_from_crit_exc) + CRITICAL_EXCEPTION_PROLOG n label; \ + prepare_transfer_to_handler; \ + bl hdlr; \ + b ret_from_crit_exc /* * 0x0100 - Critical Interrupt Exception @@ -178,69 +202,67 @@ _ENTRY(saved_ksp_limit) * if they can't resolve the lightweight TLB fault. */ START_EXCEPTION(0x0300, DataStorage) - EXCEPTION_PROLOG - mfspr r5, SPRN_ESR /* Grab the ESR, save it */ - stw r5, _ESR(r11) - mfspr r4, SPRN_DEAR /* Grab the DEAR, save it */ - stw r4, _DEAR(r11) - EXC_XFER_LITE(0x300, handle_page_fault) + EXCEPTION_PROLOG 0x300 DataStorage handle_dar_dsisr=1 + prepare_transfer_to_handler + bl do_page_fault + b interrupt_return /* * 0x0400 - Instruction Storage Exception * This is caused by a fetch from non-execute or guarded pages. */ START_EXCEPTION(0x0400, InstructionAccess) - EXCEPTION_PROLOG + EXCEPTION_PROLOG 0x400 InstructionAccess li r5,0 stw r5, _ESR(r11) /* Zero ESR */ stw r12, _DEAR(r11) /* SRR0 as DEAR */ - EXC_XFER_LITE(0x400, handle_page_fault) + prepare_transfer_to_handler + bl do_page_fault + b interrupt_return /* 0x0500 - External Interrupt Exception */ - EXCEPTION(0x0500, HardwareInterrupt, do_IRQ, EXC_XFER_LITE) + EXCEPTION(0x0500, HardwareInterrupt, do_IRQ) /* 0x0600 - Alignment Exception */ START_EXCEPTION(0x0600, Alignment) - EXCEPTION_PROLOG - mfspr r4,SPRN_DEAR /* Grab the DEAR and save it */ - stw r4,_DEAR(r11) - addi r3,r1,STACK_FRAME_OVERHEAD - EXC_XFER_STD(0x600, alignment_exception) + EXCEPTION_PROLOG 0x600 Alignment handle_dar_dsisr=1 + prepare_transfer_to_handler + bl alignment_exception + REST_NVGPRS(r1) + b interrupt_return /* 0x0700 - Program Exception */ START_EXCEPTION(0x0700, ProgramCheck) - EXCEPTION_PROLOG - mfspr r4,SPRN_ESR /* Grab the ESR and save it */ - stw r4,_ESR(r11) - addi r3,r1,STACK_FRAME_OVERHEAD - EXC_XFER_STD(0x700, program_check_exception) + EXCEPTION_PROLOG 0x700 ProgramCheck handle_dar_dsisr=1 + prepare_transfer_to_handler + bl program_check_exception + REST_NVGPRS(r1) + b interrupt_return - EXCEPTION(0x0800, Trap_08, unknown_exception, EXC_XFER_STD) - EXCEPTION(0x0900, Trap_09, unknown_exception, EXC_XFER_STD) - EXCEPTION(0x0A00, Trap_0A, unknown_exception, EXC_XFER_STD) - EXCEPTION(0x0B00, Trap_0B, unknown_exception, EXC_XFER_STD) + EXCEPTION(0x0800, Trap_08, unknown_exception) + EXCEPTION(0x0900, Trap_09, unknown_exception) + EXCEPTION(0x0A00, Trap_0A, unknown_exception) + EXCEPTION(0x0B00, Trap_0B, unknown_exception) /* 0x0C00 - System Call Exception */ START_EXCEPTION(0x0C00, SystemCall) SYSCALL_ENTRY 0xc00 /* Trap_0D is commented out to get more space for system call exception */ -/* EXCEPTION(0x0D00, Trap_0D, unknown_exception, EXC_XFER_STD) */ - EXCEPTION(0x0E00, Trap_0E, unknown_exception, EXC_XFER_STD) - EXCEPTION(0x0F00, Trap_0F, unknown_exception, EXC_XFER_STD) +/* EXCEPTION(0x0D00, Trap_0D, unknown_exception) */ + EXCEPTION(0x0E00, Trap_0E, unknown_exception) + EXCEPTION(0x0F00, Trap_0F, unknown_exception) /* 0x1000 - Programmable Interval Timer (PIT) Exception */ - . = 0x1000 + START_EXCEPTION(0x1000, DecrementerTrap) b Decrementer -/* 0x1010 - Fixed Interval Timer (FIT) Exception -*/ - . = 0x1010 +/* 0x1010 - Fixed Interval Timer (FIT) Exception */ + START_EXCEPTION(0x1010, FITExceptionTrap) b FITException -/* 0x1020 - Watchdog Timer (WDT) Exception -*/ - . = 0x1020 +/* 0x1020 - Watchdog Timer (WDT) Exception */ + START_EXCEPTION(0x1020, WDTExceptionTrap) b WDTException /* 0x1100 - Data TLB Miss Exception @@ -249,13 +271,13 @@ _ENTRY(saved_ksp_limit) * load TLB entries from the page table if they exist. */ START_EXCEPTION(0x1100, DTLBMiss) - mtspr SPRN_SPRG_SCRATCH0, r10 /* Save some working registers */ - mtspr SPRN_SPRG_SCRATCH1, r11 + mtspr SPRN_SPRG_SCRATCH5, r10 /* Save some working registers */ + mtspr SPRN_SPRG_SCRATCH6, r11 mtspr SPRN_SPRG_SCRATCH3, r12 mtspr SPRN_SPRG_SCRATCH4, r9 mfcr r12 mfspr r9, SPRN_PID - mtspr SPRN_SPRG_SCRATCH5, r9 + rlwimi r12, r9, 0, 0xff mfspr r10, SPRN_DEAR /* Get faulting address */ /* If we are faulting a kernel address, we have to use the @@ -316,13 +338,12 @@ _ENTRY(saved_ksp_limit) /* The bailout. Restore registers to pre-exception conditions * and call the heavyweights to help us out. */ - mfspr r9, SPRN_SPRG_SCRATCH5 - mtspr SPRN_PID, r9 - mtcr r12 + mtspr SPRN_PID, r12 + mtcrf 0x80, r12 mfspr r9, SPRN_SPRG_SCRATCH4 mfspr r12, SPRN_SPRG_SCRATCH3 - mfspr r11, SPRN_SPRG_SCRATCH1 - mfspr r10, SPRN_SPRG_SCRATCH0 + mfspr r11, SPRN_SPRG_SCRATCH6 + mfspr r10, SPRN_SPRG_SCRATCH5 b DataStorage /* 0x1200 - Instruction TLB Miss Exception @@ -330,13 +351,13 @@ _ENTRY(saved_ksp_limit) * registers and bailout to a different point. */ START_EXCEPTION(0x1200, ITLBMiss) - mtspr SPRN_SPRG_SCRATCH0, r10 /* Save some working registers */ - mtspr SPRN_SPRG_SCRATCH1, r11 + mtspr SPRN_SPRG_SCRATCH5, r10 /* Save some working registers */ + mtspr SPRN_SPRG_SCRATCH6, r11 mtspr SPRN_SPRG_SCRATCH3, r12 mtspr SPRN_SPRG_SCRATCH4, r9 mfcr r12 mfspr r9, SPRN_PID - mtspr SPRN_SPRG_SCRATCH5, r9 + rlwimi r12, r9, 0, 0xff mfspr r10, SPRN_SRR0 /* Get faulting address */ /* If we are faulting a kernel address, we have to use the @@ -397,28 +418,27 @@ _ENTRY(saved_ksp_limit) /* The bailout. Restore registers to pre-exception conditions * and call the heavyweights to help us out. */ - mfspr r9, SPRN_SPRG_SCRATCH5 - mtspr SPRN_PID, r9 - mtcr r12 + mtspr SPRN_PID, r12 + mtcrf 0x80, r12 mfspr r9, SPRN_SPRG_SCRATCH4 mfspr r12, SPRN_SPRG_SCRATCH3 - mfspr r11, SPRN_SPRG_SCRATCH1 - mfspr r10, SPRN_SPRG_SCRATCH0 + mfspr r11, SPRN_SPRG_SCRATCH6 + mfspr r10, SPRN_SPRG_SCRATCH5 b InstructionAccess - EXCEPTION(0x1300, Trap_13, unknown_exception, EXC_XFER_STD) - EXCEPTION(0x1400, Trap_14, unknown_exception, EXC_XFER_STD) - EXCEPTION(0x1500, Trap_15, unknown_exception, EXC_XFER_STD) - EXCEPTION(0x1600, Trap_16, unknown_exception, EXC_XFER_STD) - EXCEPTION(0x1700, Trap_17, unknown_exception, EXC_XFER_STD) - EXCEPTION(0x1800, Trap_18, unknown_exception, EXC_XFER_STD) - EXCEPTION(0x1900, Trap_19, unknown_exception, EXC_XFER_STD) - EXCEPTION(0x1A00, Trap_1A, unknown_exception, EXC_XFER_STD) - EXCEPTION(0x1B00, Trap_1B, unknown_exception, EXC_XFER_STD) - EXCEPTION(0x1C00, Trap_1C, unknown_exception, EXC_XFER_STD) - EXCEPTION(0x1D00, Trap_1D, unknown_exception, EXC_XFER_STD) - EXCEPTION(0x1E00, Trap_1E, unknown_exception, EXC_XFER_STD) - EXCEPTION(0x1F00, Trap_1F, unknown_exception, EXC_XFER_STD) + EXCEPTION(0x1300, Trap_13, unknown_exception) + EXCEPTION(0x1400, Trap_14, unknown_exception) + EXCEPTION(0x1500, Trap_15, unknown_exception) + EXCEPTION(0x1600, Trap_16, unknown_exception) + EXCEPTION(0x1700, Trap_17, unknown_exception) + EXCEPTION(0x1800, Trap_18, unknown_exception) + EXCEPTION(0x1900, Trap_19, unknown_exception) + EXCEPTION(0x1A00, Trap_1A, unknown_exception) + EXCEPTION(0x1B00, Trap_1B, unknown_exception) + EXCEPTION(0x1C00, Trap_1C, unknown_exception) + EXCEPTION(0x1D00, Trap_1D, unknown_exception) + EXCEPTION(0x1E00, Trap_1E, unknown_exception) + EXCEPTION(0x1F00, Trap_1F, unknown_exception) /* Check for a single step debug exception while in an exception * handler before state has been saved. This is to catch the case @@ -435,7 +455,7 @@ _ENTRY(saved_ksp_limit) */ /* 0x2000 - Debug Exception */ START_EXCEPTION(0x2000, DebugTrap) - CRITICAL_EXCEPTION_PROLOG + CRITICAL_EXCEPTION_PROLOG 0x2000 DebugTrap /* * If this is a single step or branch-taken exception in an @@ -477,32 +497,35 @@ _ENTRY(saved_ksp_limit) /* continue normal handling for a critical exception... */ 2: mfspr r4,SPRN_DBSR stw r4,_ESR(r11) /* DebugException takes DBSR in _ESR */ - addi r3,r1,STACK_FRAME_OVERHEAD - EXC_XFER_TEMPLATE(DebugException, 0x2002, \ - (MSR_KERNEL & ~(MSR_ME|MSR_DE|MSR_CE)), \ - crit_transfer_to_handler, ret_from_crit_exc) + prepare_transfer_to_handler + bl DebugException + b ret_from_crit_exc /* Programmable Interval Timer (PIT) Exception. (from 0x1000) */ + __HEAD Decrementer: - EXCEPTION_PROLOG + EXCEPTION_PROLOG 0x1000 Decrementer lis r0,TSR_PIS@h mtspr SPRN_TSR,r0 /* Clear the PIT exception */ - addi r3,r1,STACK_FRAME_OVERHEAD - EXC_XFER_LITE(0x1000, timer_interrupt) + prepare_transfer_to_handler + bl timer_interrupt + b interrupt_return /* Fixed Interval Timer (FIT) Exception. (from 0x1010) */ + __HEAD FITException: - EXCEPTION_PROLOG - addi r3,r1,STACK_FRAME_OVERHEAD; - EXC_XFER_STD(0x1010, unknown_exception) + EXCEPTION_PROLOG 0x1010 FITException + prepare_transfer_to_handler + bl unknown_exception + b interrupt_return /* Watchdog Timer (WDT) Exception. (from 0x1020) */ + __HEAD WDTException: - CRITICAL_EXCEPTION_PROLOG; - addi r3,r1,STACK_FRAME_OVERHEAD; - EXC_XFER_TEMPLATE(WatchdogException, 0x1020+2, - (MSR_KERNEL & ~(MSR_ME|MSR_DE|MSR_CE)), - crit_transfer_to_handler, ret_from_crit_exc) + CRITICAL_EXCEPTION_PROLOG 0x1020 WDTException + prepare_transfer_to_handler + bl WatchdogException + b ret_from_crit_exc /* Other PowerPC processors, namely those derived from the 6xx-series * have vectors from 0x2100 through 0x2F00 defined, but marked as reserved. @@ -510,6 +533,7 @@ WDTException: * reserved. */ + __HEAD /* Damn, I came up one instruction too many to fit into the * exception space :-). Both the instruction and data TLB * miss get to this point to load the TLB. @@ -543,13 +567,12 @@ finish_tlb_load: /* Done...restore registers and get out of here. */ - mfspr r9, SPRN_SPRG_SCRATCH5 - mtspr SPRN_PID, r9 - mtcr r12 + mtspr SPRN_PID, r12 + mtcrf 0x80, r12 mfspr r9, SPRN_SPRG_SCRATCH4 mfspr r12, SPRN_SPRG_SCRATCH3 - mfspr r11, SPRN_SPRG_SCRATCH1 - mfspr r10, SPRN_SPRG_SCRATCH0 + mfspr r11, SPRN_SPRG_SCRATCH6 + mfspr r10, SPRN_SPRG_SCRATCH5 rfi /* Should sync shadow TLBs */ b . /* prevent prefetch past rfi */ diff --git a/arch/powerpc/kernel/head_44x.S b/arch/powerpc/kernel/head_44x.S index 813fa305c33b..5c106ac36626 100644 --- a/arch/powerpc/kernel/head_44x.S +++ b/arch/powerpc/kernel/head_44x.S @@ -263,8 +263,7 @@ interrupt_base: INSTRUCTION_STORAGE_EXCEPTION /* External Input Interrupt */ - EXCEPTION(0x0500, BOOKE_INTERRUPT_EXTERNAL, ExternalInput, \ - do_IRQ, EXC_XFER_LITE) + EXCEPTION(0x0500, BOOKE_INTERRUPT_EXTERNAL, ExternalInput, do_IRQ) /* Alignment Interrupt */ ALIGNMENT_EXCEPTION @@ -277,7 +276,7 @@ interrupt_base: FP_UNAVAILABLE_EXCEPTION #else EXCEPTION(0x2010, BOOKE_INTERRUPT_FP_UNAVAIL, \ - FloatingPointUnavailable, unknown_exception, EXC_XFER_STD) + FloatingPointUnavailable, unknown_exception) #endif /* System Call Interrupt */ START_EXCEPTION(SystemCall) @@ -285,15 +284,14 @@ interrupt_base: /* Auxiliary Processor Unavailable Interrupt */ EXCEPTION(0x2020, BOOKE_INTERRUPT_AP_UNAVAIL, \ - AuxillaryProcessorUnavailable, unknown_exception, EXC_XFER_STD) + AuxillaryProcessorUnavailable, unknown_exception) /* Decrementer Interrupt */ DECREMENTER_EXCEPTION /* Fixed Internal Timer Interrupt */ /* TODO: Add FIT support */ - EXCEPTION(0x1010, BOOKE_INTERRUPT_FIT, FixedIntervalTimer, \ - unknown_exception, EXC_XFER_STD) + EXCEPTION(0x1010, BOOKE_INTERRUPT_FIT, FixedIntervalTimer, unknown_exception) /* Watchdog Timer Interrupt */ /* TODO: Add watchdog support */ diff --git a/arch/powerpc/kernel/head_8xx.S b/arch/powerpc/kernel/head_8xx.S index 46dff3f9c31f..7d445e4342c0 100644 --- a/arch/powerpc/kernel/head_8xx.S +++ b/arch/powerpc/kernel/head_8xx.S @@ -29,6 +29,13 @@ #include <asm/ptrace.h> #include <asm/export.h> #include <asm/code-patching-asm.h> +#include <asm/interrupt.h> + +/* + * Value for the bits that have fixed value in RPN entries. + * Also used for tagging DAR for DTLBerror. + */ +#define RPN_PATTERN 0x00f0 #include "head_32.h" @@ -42,12 +49,6 @@ #endif .endm -/* - * Value for the bits that have fixed value in RPN entries. - * Also used for tagging DAR for DTLBerror. - */ -#define RPN_PATTERN 0x00f0 - #define PAGE_SHIFT_512K 19 #define PAGE_SHIFT_8M 23 @@ -118,56 +119,54 @@ instruction_counter: #endif /* System reset */ - EXCEPTION(0x100, Reset, system_reset_exception, EXC_XFER_STD) + EXCEPTION(INTERRUPT_SYSTEM_RESET, Reset, system_reset_exception) /* Machine check */ - . = 0x200 -MachineCheck: - EXCEPTION_PROLOG handle_dar_dsisr=1 - save_dar_dsisr_on_stack r4, r5, r11 - li r6, RPN_PATTERN - mtspr SPRN_DAR, r6 /* Tag DAR, to be used in DTLB Error */ - addi r3,r1,STACK_FRAME_OVERHEAD - EXC_XFER_STD(0x200, machine_check_exception) + START_EXCEPTION(INTERRUPT_MACHINE_CHECK, MachineCheck) + EXCEPTION_PROLOG INTERRUPT_MACHINE_CHECK MachineCheck handle_dar_dsisr=1 + prepare_transfer_to_handler + bl machine_check_exception + b interrupt_return /* External interrupt */ - EXCEPTION(0x500, HardwareInterrupt, do_IRQ, EXC_XFER_LITE) + EXCEPTION(INTERRUPT_EXTERNAL, HardwareInterrupt, do_IRQ) /* Alignment exception */ - . = 0x600 -Alignment: - EXCEPTION_PROLOG handle_dar_dsisr=1 - save_dar_dsisr_on_stack r4, r5, r11 - li r6, RPN_PATTERN - mtspr SPRN_DAR, r6 /* Tag DAR, to be used in DTLB Error */ - addi r3,r1,STACK_FRAME_OVERHEAD - b .Lalignment_exception_ool + START_EXCEPTION(INTERRUPT_ALIGNMENT, Alignment) + EXCEPTION_PROLOG INTERRUPT_ALIGNMENT Alignment handle_dar_dsisr=1 + prepare_transfer_to_handler + bl alignment_exception + REST_NVGPRS(r1) + b interrupt_return /* Program check exception */ - EXCEPTION(0x700, ProgramCheck, program_check_exception, EXC_XFER_STD) + START_EXCEPTION(INTERRUPT_PROGRAM, ProgramCheck) + EXCEPTION_PROLOG INTERRUPT_PROGRAM ProgramCheck + prepare_transfer_to_handler + bl program_check_exception + REST_NVGPRS(r1) + b interrupt_return /* Decrementer */ - EXCEPTION(0x900, Decrementer, timer_interrupt, EXC_XFER_LITE) - - /* With VMAP_STACK there's not enough room for this at 0x600 */ - . = 0xa00 -.Lalignment_exception_ool: - EXC_XFER_STD(0x600, alignment_exception) + EXCEPTION(INTERRUPT_DECREMENTER, Decrementer, timer_interrupt) /* System call */ - . = 0xc00 -SystemCall: - SYSCALL_ENTRY 0xc00 + START_EXCEPTION(INTERRUPT_SYSCALL, SystemCall) + SYSCALL_ENTRY INTERRUPT_SYSCALL /* Single step - not used on 601 */ - EXCEPTION(0xd00, SingleStep, single_step_exception, EXC_XFER_STD) + EXCEPTION(INTERRUPT_TRACE, SingleStep, single_step_exception) /* On the MPC8xx, this is a software emulation interrupt. It occurs * for all unimplemented and illegal instructions. */ - EXCEPTION(0x1000, SoftEmu, emulation_assist_interrupt, EXC_XFER_STD) + START_EXCEPTION(INTERRUPT_SOFT_EMU_8xx, SoftEmu) + EXCEPTION_PROLOG INTERRUPT_SOFT_EMU_8xx SoftEmu + prepare_transfer_to_handler + bl emulation_assist_interrupt + REST_NVGPRS(r1) + b interrupt_return - . = 0x1100 /* * For the MPC8xx, this is a software tablewalk to load the instruction * TLB. The task switch loads the M_TWB register with the pointer to the first @@ -189,7 +188,7 @@ SystemCall: #define INVALIDATE_ADJACENT_PAGES_CPU15(addr, tmp) #endif -InstructionTLBMiss: + START_EXCEPTION(INTERRUPT_INST_TLB_MISS_8xx, InstructionTLBMiss) mtspr SPRN_SPRG_SCRATCH2, r10 mtspr SPRN_M_TW, r11 @@ -245,8 +244,7 @@ InstructionTLBMiss: rfi #endif - . = 0x1200 -DataStoreTLBMiss: + START_EXCEPTION(INTERRUPT_DATA_TLB_MISS_8xx, DataStoreTLBMiss) mtspr SPRN_SPRG_SCRATCH2, r10 mtspr SPRN_M_TW, r11 mfcr r11 @@ -309,83 +307,74 @@ DataStoreTLBMiss: * to many reasons, such as executing guarded memory or illegal instruction * addresses. There is nothing to do but handle a big time error fault. */ - . = 0x1300 -InstructionTLBError: - EXCEPTION_PROLOG + START_EXCEPTION(INTERRUPT_INST_TLB_ERROR_8xx, InstructionTLBError) + /* 0x400 is InstructionAccess exception, needed by bad_page_fault() */ + EXCEPTION_PROLOG INTERRUPT_INST_STORAGE InstructionTLBError andis. r5,r9,DSISR_SRR1_MATCH_32S@h /* Filter relevant SRR1 bits */ andis. r10,r9,SRR1_ISI_NOPT@h beq+ .Litlbie tlbie r12 - /* 0x400 is InstructionAccess exception, needed by bad_page_fault() */ .Litlbie: stw r12, _DAR(r11) stw r5, _DSISR(r11) - EXC_XFER_LITE(0x400, handle_page_fault) + prepare_transfer_to_handler + bl do_page_fault + b interrupt_return /* This is the data TLB error on the MPC8xx. This could be due to * many reasons, including a dirty update to a pte. We bail out to * a higher level function that can handle it. */ - . = 0x1400 -DataTLBError: + START_EXCEPTION(INTERRUPT_DATA_TLB_ERROR_8xx, DataTLBError) EXCEPTION_PROLOG_0 handle_dar_dsisr=1 mfspr r11, SPRN_DAR cmpwi cr1, r11, RPN_PATTERN beq- cr1, FixupDAR /* must be a buggy dcbX, icbi insn. */ DARFixed:/* Return from dcbx instruction bug workaround */ -#ifdef CONFIG_VMAP_STACK - li r11, RPN_PATTERN - mtspr SPRN_DAR, r11 /* Tag DAR, to be used in DTLB Error */ -#endif EXCEPTION_PROLOG_1 - EXCEPTION_PROLOG_2 handle_dar_dsisr=1 - get_and_save_dar_dsisr_on_stack r4, r5, r11 + /* 0x300 is DataAccess exception, needed by bad_page_fault() */ + EXCEPTION_PROLOG_2 INTERRUPT_DATA_STORAGE DataTLBError handle_dar_dsisr=1 + lwz r4, _DAR(r11) + lwz r5, _DSISR(r11) andis. r10,r5,DSISR_NOHPTE@h beq+ .Ldtlbie tlbie r4 .Ldtlbie: -#ifndef CONFIG_VMAP_STACK - li r10,RPN_PATTERN - mtspr SPRN_DAR,r10 /* Tag DAR, to be used in DTLB Error */ -#endif - /* 0x300 is DataAccess exception, needed by bad_page_fault() */ - EXC_XFER_LITE(0x300, handle_page_fault) + prepare_transfer_to_handler + bl do_page_fault + b interrupt_return -stack_overflow: +#ifdef CONFIG_VMAP_STACK vmap_stack_overflow_exception +#endif /* On the MPC8xx, these next four traps are used for development * support of breakpoints and such. Someday I will get around to * using them. */ -do_databreakpoint: - EXCEPTION_PROLOG_1 - EXCEPTION_PROLOG_2 handle_dar_dsisr=1 - addi r3,r1,STACK_FRAME_OVERHEAD - mfspr r4,SPRN_BAR - stw r4,_DAR(r11) -#ifndef CONFIG_VMAP_STACK - mfspr r5,SPRN_DSISR - stw r5,_DSISR(r11) -#endif - EXC_XFER_STD(0x1c00, do_break) - - . = 0x1c00 -DataBreakpoint: + START_EXCEPTION(INTERRUPT_DATA_BREAKPOINT_8xx, DataBreakpoint) EXCEPTION_PROLOG_0 handle_dar_dsisr=1 mfspr r11, SPRN_SRR0 cmplwi cr1, r11, (.Ldtlbie - PAGE_OFFSET)@l cmplwi cr7, r11, (.Litlbie - PAGE_OFFSET)@l cror 4*cr1+eq, 4*cr1+eq, 4*cr7+eq - bne cr1, do_databreakpoint + bne cr1, 1f mtcr r10 mfspr r10, SPRN_SPRG_SCRATCH0 mfspr r11, SPRN_SPRG_SCRATCH1 rfi +1: EXCEPTION_PROLOG_1 + EXCEPTION_PROLOG_2 INTERRUPT_DATA_BREAKPOINT_8xx DataBreakpoint handle_dar_dsisr=1 + mfspr r4,SPRN_BAR + stw r4,_DAR(r11) + prepare_transfer_to_handler + bl do_break + REST_NVGPRS(r1) + b interrupt_return + #ifdef CONFIG_PERF_EVENTS - . = 0x1d00 -InstructionBreakpoint: + START_EXCEPTION(INTERRUPT_INST_BREAKPOINT_8xx, InstructionBreakpoint) mtspr SPRN_SPRG_SCRATCH0, r10 lwz r10, (instruction_counter - PAGE_OFFSET)@l(0) addi r10, r10, -1 @@ -396,11 +385,12 @@ InstructionBreakpoint: mfspr r10, SPRN_SPRG_SCRATCH0 rfi #else - EXCEPTION(0x1d00, Trap_1d, unknown_exception, EXC_XFER_STD) + EXCEPTION(INTERRUPT_INST_BREAKPOINT_8xx, Trap_1d, unknown_exception) #endif - EXCEPTION(0x1e00, Trap_1e, unknown_exception, EXC_XFER_STD) - EXCEPTION(0x1f00, Trap_1f, unknown_exception, EXC_XFER_STD) + EXCEPTION(0x1e00, Trap_1e, unknown_exception) + EXCEPTION(0x1f00, Trap_1f, unknown_exception) + __HEAD . = 0x2000 /* This is the procedure to calculate the data EA for buggy dcbx,dcbi instructions @@ -510,14 +500,10 @@ FixupDAR:/* Entry point for dcbx workaround. */ 152: mfdar r11 mtctr r11 /* restore ctr reg from DAR */ -#ifdef CONFIG_VMAP_STACK mfspr r11, SPRN_SPRG_THREAD stw r10, DAR(r11) mfspr r10, SPRN_DSISR stw r10, DSISR(r11) -#else - mtdar r10 /* save fault EA to DAR */ -#endif mfspr r10,SPRN_M_TW b DARFixed /* Go back to normal TLB handling */ @@ -819,7 +805,7 @@ EXPORT_SYMBOL(empty_zero_page) swapper_pg_dir: .space PGD_TABLE_SIZE -/* Room for two PTE table poiners, usually the kernel and current user +/* Room for two PTE table pointers, usually the kernel and current user * pointer to their respective root page table (pgdir). */ .globl abatron_pteptrs diff --git a/arch/powerpc/kernel/head_book3s_32.S b/arch/powerpc/kernel/head_book3s_32.S index 565e84e20a72..065178f19a3d 100644 --- a/arch/powerpc/kernel/head_book3s_32.S +++ b/arch/powerpc/kernel/head_book3s_32.S @@ -31,6 +31,7 @@ #include <asm/kvm_book3s_asm.h> #include <asm/export.h> #include <asm/feature-fixups.h> +#include <asm/interrupt.h> #include "head_32.h" @@ -239,7 +240,7 @@ __secondary_hold_acknowledge: /* System reset */ /* core99 pmac starts the seconary here by changing the vector, and putting it back to what it was (unknown_async_exception) when done. */ - EXCEPTION(0x100, Reset, unknown_async_exception, EXC_XFER_STD) + EXCEPTION(INTERRUPT_SYSTEM_RESET, Reset, unknown_async_exception) /* Machine check */ /* @@ -255,40 +256,28 @@ __secondary_hold_acknowledge: * pointer when we take an exception from supervisor mode.) * -- paulus. */ - . = 0x200 - DO_KVM 0x200 -MachineCheck: + START_EXCEPTION(INTERRUPT_MACHINE_CHECK, MachineCheck) EXCEPTION_PROLOG_0 #ifdef CONFIG_PPC_CHRP -#ifdef CONFIG_VMAP_STACK mtspr SPRN_SPRG_SCRATCH2,r1 mfspr r1, SPRN_SPRG_THREAD lwz r1, RTAS_SP(r1) cmpwi cr1, r1, 0 bne cr1, 7f mfspr r1, SPRN_SPRG_SCRATCH2 -#else - mfspr r11, SPRN_SPRG_THREAD - lwz r11, RTAS_SP(r11) - cmpwi cr1, r11, 0 - bne cr1, 7f -#endif #endif /* CONFIG_PPC_CHRP */ - EXCEPTION_PROLOG_1 for_rtas=1 -7: EXCEPTION_PROLOG_2 - addi r3,r1,STACK_FRAME_OVERHEAD + EXCEPTION_PROLOG_1 +7: EXCEPTION_PROLOG_2 0x200 MachineCheck #ifdef CONFIG_PPC_CHRP - beq cr1, machine_check_tramp + beq cr1, 1f twi 31, 0, 0 -#else - b machine_check_tramp #endif +1: prepare_transfer_to_handler + bl machine_check_exception + b interrupt_return /* Data access exception. */ - . = 0x300 - DO_KVM 0x300 -DataAccess: -#ifdef CONFIG_VMAP_STACK + START_EXCEPTION(INTERRUPT_DATA_STORAGE, DataAccess) #ifdef CONFIG_PPC_BOOK3S_604 BEGIN_MMU_FTR_SECTION mtspr SPRN_SPRG_SCRATCH2,r10 @@ -309,30 +298,20 @@ ALT_MMU_FTR_SECTION_END_IFSET(MMU_FTR_HPTE_TABLE) #endif 1: EXCEPTION_PROLOG_0 handle_dar_dsisr=1 EXCEPTION_PROLOG_1 - b handle_page_fault_tramp_1 -#else /* CONFIG_VMAP_STACK */ - EXCEPTION_PROLOG handle_dar_dsisr=1 - get_and_save_dar_dsisr_on_stack r4, r5, r11 -#ifdef CONFIG_PPC_BOOK3S_604 -BEGIN_MMU_FTR_SECTION - andis. r0, r5, (DSISR_BAD_FAULT_32S | DSISR_DABRMATCH)@h - bne handle_page_fault_tramp_2 /* if not, try to put a PTE */ - rlwinm r3, r5, 32 - 15, 21, 21 /* DSISR_STORE -> _PAGE_RW */ - bl hash_page - b handle_page_fault_tramp_1 -MMU_FTR_SECTION_ELSE -#endif - b handle_page_fault_tramp_2 -#ifdef CONFIG_PPC_BOOK3S_604 -ALT_MMU_FTR_SECTION_END_IFSET(MMU_FTR_HPTE_TABLE) -#endif -#endif /* CONFIG_VMAP_STACK */ + EXCEPTION_PROLOG_2 INTERRUPT_DATA_STORAGE DataAccess handle_dar_dsisr=1 + prepare_transfer_to_handler + lwz r5, _DSISR(r11) + andis. r0, r5, DSISR_DABRMATCH@h + bne- 1f + bl do_page_fault + b interrupt_return +1: bl do_break + REST_NVGPRS(r1) + b interrupt_return + /* Instruction access exception. */ - . = 0x400 - DO_KVM 0x400 -InstructionAccess: -#ifdef CONFIG_VMAP_STACK + START_EXCEPTION(INTERRUPT_INST_STORAGE, InstructionAccess) mtspr SPRN_SPRG_SCRATCH0,r10 mtspr SPRN_SPRG_SCRATCH1,r11 mfspr r10, SPRN_SPRG_THREAD @@ -352,43 +331,35 @@ END_MMU_FTR_SECTION_IFSET(MMU_FTR_HPTE_TABLE) andi. r11, r11, MSR_PR EXCEPTION_PROLOG_1 - EXCEPTION_PROLOG_2 -#else /* CONFIG_VMAP_STACK */ - EXCEPTION_PROLOG - andis. r0,r9,SRR1_ISI_NOPT@h /* no pte found? */ - beq 1f /* if so, try to put a PTE */ - li r3,0 /* into the hash table */ - mr r4,r12 /* SRR0 is fault address */ -#ifdef CONFIG_PPC_BOOK3S_604 -BEGIN_MMU_FTR_SECTION - bl hash_page -END_MMU_FTR_SECTION_IFSET(MMU_FTR_HPTE_TABLE) -#endif -#endif /* CONFIG_VMAP_STACK */ + EXCEPTION_PROLOG_2 INTERRUPT_INST_STORAGE InstructionAccess andis. r5,r9,DSISR_SRR1_MATCH_32S@h /* Filter relevant SRR1 bits */ stw r5, _DSISR(r11) stw r12, _DAR(r11) - EXC_XFER_LITE(0x400, handle_page_fault) + prepare_transfer_to_handler + bl do_page_fault + b interrupt_return /* External interrupt */ - EXCEPTION(0x500, HardwareInterrupt, do_IRQ, EXC_XFER_LITE) + EXCEPTION(INTERRUPT_EXTERNAL, HardwareInterrupt, do_IRQ) /* Alignment exception */ - . = 0x600 - DO_KVM 0x600 -Alignment: - EXCEPTION_PROLOG handle_dar_dsisr=1 - save_dar_dsisr_on_stack r4, r5, r11 - addi r3,r1,STACK_FRAME_OVERHEAD - b alignment_exception_tramp + START_EXCEPTION(INTERRUPT_ALIGNMENT, Alignment) + EXCEPTION_PROLOG INTERRUPT_ALIGNMENT Alignment handle_dar_dsisr=1 + prepare_transfer_to_handler + bl alignment_exception + REST_NVGPRS(r1) + b interrupt_return /* Program check exception */ - EXCEPTION(0x700, ProgramCheck, program_check_exception, EXC_XFER_STD) + START_EXCEPTION(INTERRUPT_PROGRAM, ProgramCheck) + EXCEPTION_PROLOG INTERRUPT_PROGRAM ProgramCheck + prepare_transfer_to_handler + bl program_check_exception + REST_NVGPRS(r1) + b interrupt_return /* Floating-point unavailable */ - . = 0x800 - DO_KVM 0x800 -FPUnavailable: + START_EXCEPTION(0x800, FPUnavailable) #ifdef CONFIG_PPC_FPU BEGIN_FTR_SECTION /* @@ -397,30 +368,29 @@ BEGIN_FTR_SECTION */ b ProgramCheck END_FTR_SECTION_IFSET(CPU_FTR_FPU_UNAVAILABLE) - EXCEPTION_PROLOG + EXCEPTION_PROLOG INTERRUPT_FP_UNAVAIL FPUnavailable beq 1f bl load_up_fpu /* if from user, just load it up */ b fast_exception_return -1: addi r3,r1,STACK_FRAME_OVERHEAD - EXC_XFER_LITE(0x800, kernel_fp_unavailable_exception) +1: prepare_transfer_to_handler + bl kernel_fp_unavailable_exception + b interrupt_return #else b ProgramCheck #endif /* Decrementer */ - EXCEPTION(0x900, Decrementer, timer_interrupt, EXC_XFER_LITE) + EXCEPTION(INTERRUPT_DECREMENTER, Decrementer, timer_interrupt) - EXCEPTION(0xa00, Trap_0a, unknown_exception, EXC_XFER_STD) - EXCEPTION(0xb00, Trap_0b, unknown_exception, EXC_XFER_STD) + EXCEPTION(0xa00, Trap_0a, unknown_exception) + EXCEPTION(0xb00, Trap_0b, unknown_exception) /* System call */ - . = 0xc00 - DO_KVM 0xc00 -SystemCall: - SYSCALL_ENTRY 0xc00 + START_EXCEPTION(INTERRUPT_SYSCALL, SystemCall) + SYSCALL_ENTRY INTERRUPT_SYSCALL - EXCEPTION(0xd00, SingleStep, single_step_exception, EXC_XFER_STD) - EXCEPTION(0xe00, Trap_0e, unknown_exception, EXC_XFER_STD) + EXCEPTION(INTERRUPT_TRACE, SingleStep, single_step_exception) + EXCEPTION(0xe00, Trap_0e, unknown_exception) /* * The Altivec unavailable trap is at 0x0f20. Foo. @@ -430,19 +400,18 @@ SystemCall: * non-altivec kernel running on a machine with altivec just * by executing an altivec instruction. */ - . = 0xf00 - DO_KVM 0xf00 + START_EXCEPTION(INTERRUPT_PERFMON, PerformanceMonitorTrap) b PerformanceMonitor - . = 0xf20 - DO_KVM 0xf20 + START_EXCEPTION(INTERRUPT_ALTIVEC_UNAVAIL, AltiVecUnavailableTrap) b AltiVecUnavailable + __HEAD /* * Handle TLB miss for instruction on 603/603e. * Note: we get an alternate set of r0 - r3 to use automatically. */ - . = 0x1000 + . = INTERRUPT_INST_TLB_MISS_603 InstructionTLBMiss: /* * r0: scratch @@ -508,7 +477,7 @@ InstructionAddressInvalid: /* * Handle TLB miss for DATA Load operation on 603/603e */ - . = 0x1100 + . = INTERRUPT_DATA_LOAD_TLB_MISS_603 DataLoadTLBMiss: /* * r0: scratch @@ -586,7 +555,7 @@ DataAddressInvalid: /* * Handle TLB miss for DATA Store on 603/603e */ - . = 0x1200 + . = INTERRUPT_DATA_STORE_TLB_MISS_603 DataStoreTLBMiss: /* * r0: scratch @@ -650,57 +619,39 @@ END_MMU_FTR_SECTION_IFSET(MMU_FTR_NEED_DTLB_SW_LRU) #define TAUException unknown_async_exception #endif - EXCEPTION(0x1300, Trap_13, instruction_breakpoint_exception, EXC_XFER_STD) - EXCEPTION(0x1400, SMI, SMIException, EXC_XFER_STD) - EXCEPTION(0x1500, Trap_15, unknown_exception, EXC_XFER_STD) - EXCEPTION(0x1600, Trap_16, altivec_assist_exception, EXC_XFER_STD) - EXCEPTION(0x1700, Trap_17, TAUException, EXC_XFER_STD) - EXCEPTION(0x1800, Trap_18, unknown_exception, EXC_XFER_STD) - EXCEPTION(0x1900, Trap_19, unknown_exception, EXC_XFER_STD) - EXCEPTION(0x1a00, Trap_1a, unknown_exception, EXC_XFER_STD) - EXCEPTION(0x1b00, Trap_1b, unknown_exception, EXC_XFER_STD) - EXCEPTION(0x1c00, Trap_1c, unknown_exception, EXC_XFER_STD) - EXCEPTION(0x1d00, Trap_1d, unknown_exception, EXC_XFER_STD) - EXCEPTION(0x1e00, Trap_1e, unknown_exception, EXC_XFER_STD) - EXCEPTION(0x1f00, Trap_1f, unknown_exception, EXC_XFER_STD) - EXCEPTION(0x2000, RunMode, RunModeException, EXC_XFER_STD) - EXCEPTION(0x2100, Trap_21, unknown_exception, EXC_XFER_STD) - EXCEPTION(0x2200, Trap_22, unknown_exception, EXC_XFER_STD) - EXCEPTION(0x2300, Trap_23, unknown_exception, EXC_XFER_STD) - EXCEPTION(0x2400, Trap_24, unknown_exception, EXC_XFER_STD) - EXCEPTION(0x2500, Trap_25, unknown_exception, EXC_XFER_STD) - EXCEPTION(0x2600, Trap_26, unknown_exception, EXC_XFER_STD) - EXCEPTION(0x2700, Trap_27, unknown_exception, EXC_XFER_STD) - EXCEPTION(0x2800, Trap_28, unknown_exception, EXC_XFER_STD) - EXCEPTION(0x2900, Trap_29, unknown_exception, EXC_XFER_STD) - EXCEPTION(0x2a00, Trap_2a, unknown_exception, EXC_XFER_STD) - EXCEPTION(0x2b00, Trap_2b, unknown_exception, EXC_XFER_STD) - EXCEPTION(0x2c00, Trap_2c, unknown_exception, EXC_XFER_STD) - EXCEPTION(0x2d00, Trap_2d, unknown_exception, EXC_XFER_STD) - EXCEPTION(0x2e00, Trap_2e, unknown_exception, EXC_XFER_STD) - EXCEPTION(0x2f00, Trap_2f, unknown_exception, EXC_XFER_STD) + EXCEPTION(0x1300, Trap_13, instruction_breakpoint_exception) + EXCEPTION(0x1400, SMI, SMIException) + EXCEPTION(0x1500, Trap_15, unknown_exception) + EXCEPTION(0x1600, Trap_16, altivec_assist_exception) + EXCEPTION(0x1700, Trap_17, TAUException) + EXCEPTION(0x1800, Trap_18, unknown_exception) + EXCEPTION(0x1900, Trap_19, unknown_exception) + EXCEPTION(0x1a00, Trap_1a, unknown_exception) + EXCEPTION(0x1b00, Trap_1b, unknown_exception) + EXCEPTION(0x1c00, Trap_1c, unknown_exception) + EXCEPTION(0x1d00, Trap_1d, unknown_exception) + EXCEPTION(0x1e00, Trap_1e, unknown_exception) + EXCEPTION(0x1f00, Trap_1f, unknown_exception) + EXCEPTION(0x2000, RunMode, RunModeException) + EXCEPTION(0x2100, Trap_21, unknown_exception) + EXCEPTION(0x2200, Trap_22, unknown_exception) + EXCEPTION(0x2300, Trap_23, unknown_exception) + EXCEPTION(0x2400, Trap_24, unknown_exception) + EXCEPTION(0x2500, Trap_25, unknown_exception) + EXCEPTION(0x2600, Trap_26, unknown_exception) + EXCEPTION(0x2700, Trap_27, unknown_exception) + EXCEPTION(0x2800, Trap_28, unknown_exception) + EXCEPTION(0x2900, Trap_29, unknown_exception) + EXCEPTION(0x2a00, Trap_2a, unknown_exception) + EXCEPTION(0x2b00, Trap_2b, unknown_exception) + EXCEPTION(0x2c00, Trap_2c, unknown_exception) + EXCEPTION(0x2d00, Trap_2d, unknown_exception) + EXCEPTION(0x2e00, Trap_2e, unknown_exception) + EXCEPTION(0x2f00, Trap_2f, unknown_exception) + __HEAD . = 0x3000 -machine_check_tramp: - EXC_XFER_STD(0x200, machine_check_exception) - -alignment_exception_tramp: - EXC_XFER_STD(0x600, alignment_exception) - -handle_page_fault_tramp_1: -#ifdef CONFIG_VMAP_STACK - EXCEPTION_PROLOG_2 handle_dar_dsisr=1 -#endif - lwz r5, _DSISR(r11) - /* fall through */ -handle_page_fault_tramp_2: - andis. r0, r5, DSISR_DABRMATCH@h - bne- 1f - EXC_XFER_LITE(0x300, handle_page_fault) -1: EXC_XFER_STD(0x300, do_break) - -#ifdef CONFIG_VMAP_STACK #ifdef CONFIG_PPC_BOOK3S_604 .macro save_regs_thread thread stw r0, THR0(\thread) @@ -775,26 +726,31 @@ fast_hash_page_return: rfi #endif /* CONFIG_PPC_BOOK3S_604 */ -stack_overflow: +#ifdef CONFIG_VMAP_STACK vmap_stack_overflow_exception #endif + __HEAD AltiVecUnavailable: - EXCEPTION_PROLOG + EXCEPTION_PROLOG 0xf20 AltiVecUnavailable #ifdef CONFIG_ALTIVEC beq 1f bl load_up_altivec /* if from user, just load it up */ b fast_exception_return #endif /* CONFIG_ALTIVEC */ -1: addi r3,r1,STACK_FRAME_OVERHEAD - EXC_XFER_LITE(0xf20, altivec_unavailable_exception) +1: prepare_transfer_to_handler + bl altivec_unavailable_exception + b interrupt_return + __HEAD PerformanceMonitor: - EXCEPTION_PROLOG - addi r3,r1,STACK_FRAME_OVERHEAD - EXC_XFER_STD(0xf00, performance_monitor_exception) + EXCEPTION_PROLOG 0xf00 PerformanceMonitor + prepare_transfer_to_handler + bl performance_monitor_exception + b interrupt_return + __HEAD /* * This code is jumped to from the startup code to copy * the kernel image to physical address PHYSICAL_START. diff --git a/arch/powerpc/kernel/head_booke.h b/arch/powerpc/kernel/head_booke.h index 47857795f50a..f82470091697 100644 --- a/arch/powerpc/kernel/head_booke.h +++ b/arch/powerpc/kernel/head_booke.h @@ -44,7 +44,7 @@ END_BTB_FLUSH_SECTION #endif -#define NORMAL_EXCEPTION_PROLOG(intno) \ +#define NORMAL_EXCEPTION_PROLOG(trapno, intno) \ mtspr SPRN_SPRG_WSCRATCH0, r10; /* save one register */ \ mfspr r10, SPRN_SPRG_THREAD; \ stw r11, THREAD_NORMSAVE(0)(r10); \ @@ -53,6 +53,8 @@ END_BTB_FLUSH_SECTION mfspr r11, SPRN_SRR1; \ DO_KVM BOOKE_INTERRUPT_##intno SPRN_SRR1; \ andi. r11, r11, MSR_PR; /* check whether user or kernel */\ + LOAD_REG_IMMEDIATE(r11, MSR_KERNEL); \ + mtmsr r11; \ mr r11, r1; \ beq 1f; \ BOOKE_CLEAR_BTB(r11) \ @@ -76,12 +78,39 @@ END_BTB_FLUSH_SECTION stw r1, 0(r11); \ mr r1, r11; \ rlwinm r9,r9,0,14,12; /* clear MSR_WE (necessary?) */\ - stw r0,GPR0(r11); \ - lis r10, STACK_FRAME_REGS_MARKER@ha;/* exception frame marker */ \ - addi r10, r10, STACK_FRAME_REGS_MARKER@l; \ - stw r10, 8(r11); \ - SAVE_4GPRS(3, r11); \ - SAVE_2GPRS(7, r11) + COMMON_EXCEPTION_PROLOG_END trapno + +.macro COMMON_EXCEPTION_PROLOG_END trapno + stw r0,GPR0(r1) + lis r10, STACK_FRAME_REGS_MARKER@ha /* exception frame marker */ + addi r10, r10, STACK_FRAME_REGS_MARKER@l + stw r10, 8(r1) + li r10, \trapno + stw r10,_TRAP(r1) + SAVE_4GPRS(3, r1) + SAVE_2GPRS(7, r1) + SAVE_NVGPRS(r1) + stw r2,GPR2(r1) + stw r12,_NIP(r1) + stw r9,_MSR(r1) + mfctr r10 + mfspr r2,SPRN_SPRG_THREAD + stw r10,_CTR(r1) + tovirt(r2, r2) + mfspr r10,SPRN_XER + addi r2, r2, -THREAD + stw r10,_XER(r1) + addi r3,r1,STACK_FRAME_OVERHEAD +.endm + +.macro prepare_transfer_to_handler +#ifdef CONFIG_E500 + andi. r12,r9,MSR_PR + bne 777f + bl prepare_transfer_to_handler +777: +#endif +.endm .macro SYSCALL_ENTRY trapno intno srr1 mfspr r10, SPRN_SPRG_THREAD @@ -180,7 +209,7 @@ ALT_FTR_SECTION_END_IFSET(CPU_FTR_EMB_HV) * registers as the normal prolog above. Instead we use a portion of the * critical/machine check exception stack at low physical addresses. */ -#define EXC_LEVEL_EXCEPTION_PROLOG(exc_level, intno, exc_level_srr0, exc_level_srr1) \ +#define EXC_LEVEL_EXCEPTION_PROLOG(exc_level, trapno, intno, exc_level_srr0, exc_level_srr1) \ mtspr SPRN_SPRG_WSCRATCH_##exc_level,r8; \ BOOKE_LOAD_EXC_LEVEL_STACK(exc_level);/* r8 points to the exc_level stack*/ \ stw r9,GPR9(r8); /* save various registers */\ @@ -192,6 +221,8 @@ ALT_FTR_SECTION_END_IFSET(CPU_FTR_EMB_HV) DO_KVM BOOKE_INTERRUPT_##intno exc_level_srr1; \ BOOKE_CLEAR_BTB(r10) \ andi. r11,r11,MSR_PR; \ + LOAD_REG_IMMEDIATE(r11, MSR_KERNEL & ~(MSR_ME|MSR_DE|MSR_CE)); \ + mtmsr r11; \ mfspr r11,SPRN_SPRG_THREAD; /* if from user, start at top of */\ lwz r11, TASK_STACK - THREAD(r11); /* this thread's kernel stack */\ addi r11,r11,EXC_LVL_FRAME_OVERHEAD; /* allocate stack frame */\ @@ -221,16 +252,44 @@ ALT_FTR_SECTION_END_IFSET(CPU_FTR_EMB_HV) stw r1,0(r11); \ mr r1,r11; \ rlwinm r9,r9,0,14,12; /* clear MSR_WE (necessary?) */\ - stw r0,GPR0(r11); \ - SAVE_4GPRS(3, r11); \ - SAVE_2GPRS(7, r11) + COMMON_EXCEPTION_PROLOG_END trapno + +#define SAVE_xSRR(xSRR) \ + mfspr r0,SPRN_##xSRR##0; \ + stw r0,_##xSRR##0(r1); \ + mfspr r0,SPRN_##xSRR##1; \ + stw r0,_##xSRR##1(r1) + + +.macro SAVE_MMU_REGS +#ifdef CONFIG_PPC_BOOK3E_MMU + mfspr r0,SPRN_MAS0 + stw r0,MAS0(r1) + mfspr r0,SPRN_MAS1 + stw r0,MAS1(r1) + mfspr r0,SPRN_MAS2 + stw r0,MAS2(r1) + mfspr r0,SPRN_MAS3 + stw r0,MAS3(r1) + mfspr r0,SPRN_MAS6 + stw r0,MAS6(r1) +#ifdef CONFIG_PHYS_64BIT + mfspr r0,SPRN_MAS7 + stw r0,MAS7(r1) +#endif /* CONFIG_PHYS_64BIT */ +#endif /* CONFIG_PPC_BOOK3E_MMU */ +#ifdef CONFIG_44x + mfspr r0,SPRN_MMUCR + stw r0,MMUCR(r1) +#endif +.endm -#define CRITICAL_EXCEPTION_PROLOG(intno) \ - EXC_LEVEL_EXCEPTION_PROLOG(CRIT, intno, SPRN_CSRR0, SPRN_CSRR1) -#define DEBUG_EXCEPTION_PROLOG \ - EXC_LEVEL_EXCEPTION_PROLOG(DBG, DEBUG, SPRN_DSRR0, SPRN_DSRR1) -#define MCHECK_EXCEPTION_PROLOG \ - EXC_LEVEL_EXCEPTION_PROLOG(MC, MACHINE_CHECK, \ +#define CRITICAL_EXCEPTION_PROLOG(trapno, intno) \ + EXC_LEVEL_EXCEPTION_PROLOG(CRIT, trapno+2, intno, SPRN_CSRR0, SPRN_CSRR1) +#define DEBUG_EXCEPTION_PROLOG(trapno) \ + EXC_LEVEL_EXCEPTION_PROLOG(DBG, trapno+8, DEBUG, SPRN_DSRR0, SPRN_DSRR1) +#define MCHECK_EXCEPTION_PROLOG(trapno) \ + EXC_LEVEL_EXCEPTION_PROLOG(MC, trapno+4, MACHINE_CHECK, \ SPRN_MCSRR0, SPRN_MCSRR1) /* @@ -257,44 +316,34 @@ ALT_FTR_SECTION_END_IFSET(CPU_FTR_EMB_HV) .align 5; \ label: -#define EXCEPTION(n, intno, label, hdlr, xfer) \ +#define EXCEPTION(n, intno, label, hdlr) \ START_EXCEPTION(label); \ - NORMAL_EXCEPTION_PROLOG(intno); \ - addi r3,r1,STACK_FRAME_OVERHEAD; \ - xfer(n, hdlr) + NORMAL_EXCEPTION_PROLOG(n, intno); \ + prepare_transfer_to_handler; \ + bl hdlr; \ + b interrupt_return #define CRITICAL_EXCEPTION(n, intno, label, hdlr) \ START_EXCEPTION(label); \ - CRITICAL_EXCEPTION_PROLOG(intno); \ - addi r3,r1,STACK_FRAME_OVERHEAD; \ - EXC_XFER_TEMPLATE(hdlr, n+2, (MSR_KERNEL & ~(MSR_ME|MSR_DE|MSR_CE)), \ - crit_transfer_to_handler, ret_from_crit_exc) + CRITICAL_EXCEPTION_PROLOG(n, intno); \ + SAVE_MMU_REGS; \ + SAVE_xSRR(SRR); \ + prepare_transfer_to_handler; \ + bl hdlr; \ + b ret_from_crit_exc #define MCHECK_EXCEPTION(n, label, hdlr) \ START_EXCEPTION(label); \ - MCHECK_EXCEPTION_PROLOG; \ + MCHECK_EXCEPTION_PROLOG(n); \ mfspr r5,SPRN_ESR; \ stw r5,_ESR(r11); \ - addi r3,r1,STACK_FRAME_OVERHEAD; \ - EXC_XFER_TEMPLATE(hdlr, n+4, (MSR_KERNEL & ~(MSR_ME|MSR_DE|MSR_CE)), \ - mcheck_transfer_to_handler, ret_from_mcheck_exc) - -#define EXC_XFER_TEMPLATE(hdlr, trap, msr, tfer, ret) \ - li r10,trap; \ - stw r10,_TRAP(r11); \ - lis r10,msr@h; \ - ori r10,r10,msr@l; \ - bl tfer; \ - .long hdlr; \ - .long ret - -#define EXC_XFER_STD(n, hdlr) \ - EXC_XFER_TEMPLATE(hdlr, n, MSR_KERNEL, transfer_to_handler_full, \ - ret_from_except_full) - -#define EXC_XFER_LITE(n, hdlr) \ - EXC_XFER_TEMPLATE(hdlr, n+1, MSR_KERNEL, transfer_to_handler, \ - ret_from_except) + SAVE_xSRR(DSRR); \ + SAVE_xSRR(CSRR); \ + SAVE_MMU_REGS; \ + SAVE_xSRR(SRR); \ + prepare_transfer_to_handler; \ + bl hdlr; \ + b ret_from_mcheck_exc /* Check for a single step debug exception while in an exception * handler before state has been saved. This is to catch the case @@ -311,7 +360,7 @@ label: */ #define DEBUG_DEBUG_EXCEPTION \ START_EXCEPTION(DebugDebug); \ - DEBUG_EXCEPTION_PROLOG; \ + DEBUG_EXCEPTION_PROLOG(2000); \ \ /* \ * If there is a single step or branch-taken exception in an \ @@ -360,12 +409,16 @@ label: /* continue normal handling for a debug exception... */ \ 2: mfspr r4,SPRN_DBSR; \ stw r4,_ESR(r11); /* DebugException takes DBSR in _ESR */\ - addi r3,r1,STACK_FRAME_OVERHEAD; \ - EXC_XFER_TEMPLATE(DebugException, 0x2008, (MSR_KERNEL & ~(MSR_ME|MSR_DE|MSR_CE)), debug_transfer_to_handler, ret_from_debug_exc) + SAVE_xSRR(CSRR); \ + SAVE_MMU_REGS; \ + SAVE_xSRR(SRR); \ + prepare_transfer_to_handler; \ + bl DebugException; \ + b ret_from_debug_exc #define DEBUG_CRIT_EXCEPTION \ START_EXCEPTION(DebugCrit); \ - CRITICAL_EXCEPTION_PROLOG(DEBUG); \ + CRITICAL_EXCEPTION_PROLOG(2000,DEBUG); \ \ /* \ * If there is a single step or branch-taken exception in an \ @@ -414,58 +467,71 @@ label: /* continue normal handling for a critical exception... */ \ 2: mfspr r4,SPRN_DBSR; \ stw r4,_ESR(r11); /* DebugException takes DBSR in _ESR */\ - addi r3,r1,STACK_FRAME_OVERHEAD; \ - EXC_XFER_TEMPLATE(DebugException, 0x2002, (MSR_KERNEL & ~(MSR_ME|MSR_DE|MSR_CE)), crit_transfer_to_handler, ret_from_crit_exc) + SAVE_MMU_REGS; \ + SAVE_xSRR(SRR); \ + prepare_transfer_to_handler; \ + bl DebugException; \ + b ret_from_crit_exc #define DATA_STORAGE_EXCEPTION \ START_EXCEPTION(DataStorage) \ - NORMAL_EXCEPTION_PROLOG(DATA_STORAGE); \ + NORMAL_EXCEPTION_PROLOG(0x300, DATA_STORAGE); \ mfspr r5,SPRN_ESR; /* Grab the ESR and save it */ \ stw r5,_ESR(r11); \ mfspr r4,SPRN_DEAR; /* Grab the DEAR */ \ stw r4, _DEAR(r11); \ - EXC_XFER_LITE(0x0300, handle_page_fault) + prepare_transfer_to_handler; \ + bl do_page_fault; \ + b interrupt_return #define INSTRUCTION_STORAGE_EXCEPTION \ START_EXCEPTION(InstructionStorage) \ - NORMAL_EXCEPTION_PROLOG(INST_STORAGE); \ + NORMAL_EXCEPTION_PROLOG(0x400, INST_STORAGE); \ mfspr r5,SPRN_ESR; /* Grab the ESR and save it */ \ stw r5,_ESR(r11); \ stw r12, _DEAR(r11); /* Pass SRR0 as arg2 */ \ - EXC_XFER_LITE(0x0400, handle_page_fault) + prepare_transfer_to_handler; \ + bl do_page_fault; \ + b interrupt_return #define ALIGNMENT_EXCEPTION \ START_EXCEPTION(Alignment) \ - NORMAL_EXCEPTION_PROLOG(ALIGNMENT); \ + NORMAL_EXCEPTION_PROLOG(0x600, ALIGNMENT); \ mfspr r4,SPRN_DEAR; /* Grab the DEAR and save it */ \ stw r4,_DEAR(r11); \ - addi r3,r1,STACK_FRAME_OVERHEAD; \ - EXC_XFER_STD(0x0600, alignment_exception) + prepare_transfer_to_handler; \ + bl alignment_exception; \ + REST_NVGPRS(r1); \ + b interrupt_return #define PROGRAM_EXCEPTION \ START_EXCEPTION(Program) \ - NORMAL_EXCEPTION_PROLOG(PROGRAM); \ + NORMAL_EXCEPTION_PROLOG(0x700, PROGRAM); \ mfspr r4,SPRN_ESR; /* Grab the ESR and save it */ \ stw r4,_ESR(r11); \ - addi r3,r1,STACK_FRAME_OVERHEAD; \ - EXC_XFER_STD(0x0700, program_check_exception) + prepare_transfer_to_handler; \ + bl program_check_exception; \ + REST_NVGPRS(r1); \ + b interrupt_return #define DECREMENTER_EXCEPTION \ START_EXCEPTION(Decrementer) \ - NORMAL_EXCEPTION_PROLOG(DECREMENTER); \ + NORMAL_EXCEPTION_PROLOG(0x900, DECREMENTER); \ lis r0,TSR_DIS@h; /* Setup the DEC interrupt mask */ \ mtspr SPRN_TSR,r0; /* Clear the DEC interrupt */ \ - addi r3,r1,STACK_FRAME_OVERHEAD; \ - EXC_XFER_LITE(0x0900, timer_interrupt) + prepare_transfer_to_handler; \ + bl timer_interrupt; \ + b interrupt_return #define FP_UNAVAILABLE_EXCEPTION \ START_EXCEPTION(FloatingPointUnavailable) \ - NORMAL_EXCEPTION_PROLOG(FP_UNAVAIL); \ + NORMAL_EXCEPTION_PROLOG(0x800, FP_UNAVAIL); \ beq 1f; \ bl load_up_fpu; /* if from user, just load it up */ \ b fast_exception_return; \ -1: addi r3,r1,STACK_FRAME_OVERHEAD; \ - EXC_XFER_STD(0x800, kernel_fp_unavailable_exception) +1: prepare_transfer_to_handler; \ + bl kernel_fp_unavailable_exception; \ + b interrupt_return #else /* __ASSEMBLY__ */ struct exception_regs { @@ -481,7 +547,6 @@ struct exception_regs { unsigned long csrr1; unsigned long dsrr0; unsigned long dsrr1; - unsigned long saved_ksp_limit; }; /* ensure this structure is always sized to a multiple of the stack alignment */ diff --git a/arch/powerpc/kernel/head_fsl_booke.S b/arch/powerpc/kernel/head_fsl_booke.S index 3f4a40cccef5..a1a5c3f10dc4 100644 --- a/arch/powerpc/kernel/head_fsl_booke.S +++ b/arch/powerpc/kernel/head_fsl_booke.S @@ -113,7 +113,7 @@ _ENTRY(_start); 1: /* - * We have the runtime (virutal) address of our base. + * We have the runtime (virtual) address of our base. * We calculate our shift of offset from a 64M page. * We could map the 64M page we belong to at PAGE_OFFSET and * get going from there. @@ -363,23 +363,26 @@ interrupt_base: /* Data Storage Interrupt */ START_EXCEPTION(DataStorage) - NORMAL_EXCEPTION_PROLOG(DATA_STORAGE) + NORMAL_EXCEPTION_PROLOG(0x300, DATA_STORAGE) mfspr r5,SPRN_ESR /* Grab the ESR, save it */ stw r5,_ESR(r11) mfspr r4,SPRN_DEAR /* Grab the DEAR, save it */ stw r4, _DEAR(r11) andis. r10,r5,(ESR_ILK|ESR_DLK)@h bne 1f - EXC_XFER_LITE(0x0300, handle_page_fault) + prepare_transfer_to_handler + bl do_page_fault + b interrupt_return 1: - addi r3,r1,STACK_FRAME_OVERHEAD - EXC_XFER_LITE(0x0300, CacheLockingException) + prepare_transfer_to_handler + bl CacheLockingException + b interrupt_return /* Instruction Storage Interrupt */ INSTRUCTION_STORAGE_EXCEPTION /* External Input Interrupt */ - EXCEPTION(0x0500, EXTERNAL, ExternalInput, do_IRQ, EXC_XFER_LITE) + EXCEPTION(0x0500, EXTERNAL, ExternalInput, do_IRQ) /* Alignment Interrupt */ ALIGNMENT_EXCEPTION @@ -391,8 +394,7 @@ interrupt_base: #ifdef CONFIG_PPC_FPU FP_UNAVAILABLE_EXCEPTION #else - EXCEPTION(0x0800, FP_UNAVAIL, FloatingPointUnavailable, \ - unknown_exception, EXC_XFER_STD) + EXCEPTION(0x0800, FP_UNAVAIL, FloatingPointUnavailable, unknown_exception) #endif /* System Call Interrupt */ @@ -400,16 +402,14 @@ interrupt_base: SYSCALL_ENTRY 0xc00 BOOKE_INTERRUPT_SYSCALL SPRN_SRR1 /* Auxiliary Processor Unavailable Interrupt */ - EXCEPTION(0x2900, AP_UNAVAIL, AuxillaryProcessorUnavailable, \ - unknown_exception, EXC_XFER_STD) + EXCEPTION(0x2900, AP_UNAVAIL, AuxillaryProcessorUnavailable, unknown_exception) /* Decrementer Interrupt */ DECREMENTER_EXCEPTION /* Fixed Internal Timer Interrupt */ /* TODO: Add FIT support */ - EXCEPTION(0x3100, FIT, FixedIntervalTimer, \ - unknown_exception, EXC_XFER_STD) + EXCEPTION(0x3100, FIT, FixedIntervalTimer, unknown_exception) /* Watchdog Timer Interrupt */ #ifdef CONFIG_BOOKE_WDT @@ -497,7 +497,7 @@ END_BTB_FLUSH_SECTION #endif #endif - bne 2f /* Bail if permission/valid mismach */ + bne 2f /* Bail if permission/valid mismatch */ /* Jump to common tlb load */ b finish_tlb_load @@ -592,7 +592,7 @@ END_BTB_FLUSH_SECTION #endif #endif - bne 2f /* Bail if permission mismach */ + bne 2f /* Bail if permission mismatch */ /* Jump to common TLB load point */ b finish_tlb_load @@ -614,38 +614,44 @@ END_BTB_FLUSH_SECTION #ifdef CONFIG_SPE /* SPE Unavailable */ START_EXCEPTION(SPEUnavailable) - NORMAL_EXCEPTION_PROLOG(SPE_UNAVAIL) + NORMAL_EXCEPTION_PROLOG(0x2010, SPE_UNAVAIL) beq 1f bl load_up_spe b fast_exception_return -1: addi r3,r1,STACK_FRAME_OVERHEAD - EXC_XFER_LITE(0x2010, KernelSPE) +1: prepare_transfer_to_handler + bl KernelSPE + b interrupt_return #elif defined(CONFIG_SPE_POSSIBLE) - EXCEPTION(0x2020, SPE_UNAVAIL, SPEUnavailable, \ - unknown_exception, EXC_XFER_STD) + EXCEPTION(0x2020, SPE_UNAVAIL, SPEUnavailable, unknown_exception) #endif /* CONFIG_SPE_POSSIBLE */ /* SPE Floating Point Data */ #ifdef CONFIG_SPE - EXCEPTION(0x2030, SPE_FP_DATA, SPEFloatingPointData, - SPEFloatingPointException, EXC_XFER_STD) + START_EXCEPTION(SPEFloatingPointData) + NORMAL_EXCEPTION_PROLOG(0x2030, SPE_FP_DATA) + prepare_transfer_to_handler + bl SPEFloatingPointException + REST_NVGPRS(r1) + b interrupt_return /* SPE Floating Point Round */ - EXCEPTION(0x2050, SPE_FP_ROUND, SPEFloatingPointRound, \ - SPEFloatingPointRoundException, EXC_XFER_STD) + START_EXCEPTION(SPEFloatingPointRound) + NORMAL_EXCEPTION_PROLOG(0x2050, SPE_FP_ROUND) + prepare_transfer_to_handler + bl SPEFloatingPointRoundException + REST_NVGPRS(r1) + b interrupt_return #elif defined(CONFIG_SPE_POSSIBLE) - EXCEPTION(0x2040, SPE_FP_DATA, SPEFloatingPointData, - unknown_exception, EXC_XFER_STD) - EXCEPTION(0x2050, SPE_FP_ROUND, SPEFloatingPointRound, \ - unknown_exception, EXC_XFER_STD) + EXCEPTION(0x2040, SPE_FP_DATA, SPEFloatingPointData, unknown_exception) + EXCEPTION(0x2050, SPE_FP_ROUND, SPEFloatingPointRound, unknown_exception) #endif /* CONFIG_SPE_POSSIBLE */ /* Performance Monitor */ EXCEPTION(0x2060, PERFORMANCE_MONITOR, PerformanceMonitor, \ - performance_monitor_exception, EXC_XFER_STD) + performance_monitor_exception) - EXCEPTION(0x2070, DOORBELL, Doorbell, doorbell_exception, EXC_XFER_STD) + EXCEPTION(0x2070, DOORBELL, Doorbell, doorbell_exception) CRITICAL_EXCEPTION(0x2080, DOORBELL_CRITICAL, \ CriticalDoorbell, unknown_exception) @@ -660,10 +666,10 @@ END_BTB_FLUSH_SECTION unknown_exception) /* Hypercall */ - EXCEPTION(0, HV_SYSCALL, Hypercall, unknown_exception, EXC_XFER_STD) + EXCEPTION(0, HV_SYSCALL, Hypercall, unknown_exception) /* Embedded Hypervisor Privilege */ - EXCEPTION(0, HV_PRIV, Ehvpriv, unknown_exception, EXC_XFER_STD) + EXCEPTION(0, HV_PRIV, Ehvpriv, unknown_exception) interrupt_end: @@ -854,7 +860,7 @@ KernelSPE: lwz r5,_NIP(r1) bl printk #endif - b ret_from_except + b interrupt_return #ifdef CONFIG_PRINTK 87: .string "SPE used in kernel (task=%p, pc=%x) \n" #endif diff --git a/arch/powerpc/kernel/hw_breakpoint_constraints.c b/arch/powerpc/kernel/hw_breakpoint_constraints.c index 867ee4aa026a..675d1f66ab72 100644 --- a/arch/powerpc/kernel/hw_breakpoint_constraints.c +++ b/arch/powerpc/kernel/hw_breakpoint_constraints.c @@ -141,7 +141,7 @@ void wp_get_instr_detail(struct pt_regs *regs, struct ppc_inst *instr, { struct instruction_op op; - if (__get_user_instr_inatomic(*instr, (void __user *)regs->nip)) + if (__get_user_instr(*instr, (void __user *)regs->nip)) return; analyse_instr(&op, regs, *instr); diff --git a/arch/powerpc/kernel/idle_6xx.S b/arch/powerpc/kernel/idle_6xx.S index 69df840f7253..13cad9297d82 100644 --- a/arch/powerpc/kernel/idle_6xx.S +++ b/arch/powerpc/kernel/idle_6xx.S @@ -145,9 +145,7 @@ END_FTR_SECTION_IFSET(CPU_FTR_ALTIVEC) /* * Return from NAP/DOZE mode, restore some CPU specific registers, - * we are called with DR/IR still off and r2 containing physical - * address of current. R11 points to the exception frame (physical - * address). We have to preserve r10. + * R11 points to the exception frame. We have to preserve r10. */ _GLOBAL(power_save_ppc32_restore) lwz r9,_LINK(r11) /* interrupted in ppc6xx_idle: */ @@ -166,11 +164,7 @@ BEGIN_FTR_SECTION mfspr r9,SPRN_HID0 andis. r9,r9,HID0_NAP@h beq 1f -#ifdef CONFIG_VMAP_STACK addis r9, r11, nap_save_msscr0@ha -#else - addis r9,r11,(nap_save_msscr0-KERNELBASE)@ha -#endif lwz r9,nap_save_msscr0@l(r9) mtspr SPRN_MSSCR0, r9 sync @@ -178,15 +172,11 @@ BEGIN_FTR_SECTION 1: END_FTR_SECTION_IFSET(CPU_FTR_NAP_DISABLE_L2_PR) BEGIN_FTR_SECTION -#ifdef CONFIG_VMAP_STACK addis r9, r11, nap_save_hid1@ha -#else - addis r9,r11,(nap_save_hid1-KERNELBASE)@ha -#endif lwz r9,nap_save_hid1@l(r9) mtspr SPRN_HID1, r9 END_FTR_SECTION_IFSET(CPU_FTR_DUAL_PLL_750FX) - b transfer_to_handler_cont + blr _ASM_NOKPROBE_SYMBOL(power_save_ppc32_restore) .data diff --git a/arch/powerpc/kernel/idle_book3s.S b/arch/powerpc/kernel/idle_book3s.S index f9e6d83e6720..abb719b21cae 100644 --- a/arch/powerpc/kernel/idle_book3s.S +++ b/arch/powerpc/kernel/idle_book3s.S @@ -209,4 +209,8 @@ _GLOBAL(power4_idle_nap) mtmsrd r7 isync b 1b + + .globl power4_idle_nap_return +power4_idle_nap_return: + blr #endif diff --git a/arch/powerpc/kernel/idle_e500.S b/arch/powerpc/kernel/idle_e500.S index 72c85b6f3898..9e1bc4502c50 100644 --- a/arch/powerpc/kernel/idle_e500.S +++ b/arch/powerpc/kernel/idle_e500.S @@ -74,20 +74,12 @@ END_FTR_SECTION_IFSET(CPU_FTR_CAN_NAP) /* * Return from NAP/DOZE mode, restore some CPU specific registers, - * r2 containing physical address of current. - * r11 points to the exception frame (physical address). + * r2 containing address of current. + * r11 points to the exception frame. * We have to preserve r10. */ _GLOBAL(power_save_ppc32_restore) lwz r9,_LINK(r11) /* interrupted in e500_idle */ stw r9,_NIP(r11) /* make it do a blr */ - -#ifdef CONFIG_SMP - lwz r11,TASK_CPU(r2) /* get cpu number * 4 */ - slwi r11,r11,2 -#else - li r11,0 -#endif - - b transfer_to_handler_cont + blr _ASM_NOKPROBE_SYMBOL(power_save_ppc32_restore) diff --git a/arch/powerpc/kernel/interrupt.c b/arch/powerpc/kernel/interrupt.c index c475a229a42a..e4559f8914eb 100644 --- a/arch/powerpc/kernel/interrupt.c +++ b/arch/powerpc/kernel/interrupt.c @@ -20,6 +20,10 @@ #include <asm/time.h> #include <asm/unistd.h> +#if defined(CONFIG_PPC_ADV_DEBUG_REGS) && defined(CONFIG_PPC32) +unsigned long global_dbcr0[NR_CPUS]; +#endif + typedef long (*syscall_fn)(long, long, long, long, long, long); /* Has to run notrace because it is entered not completely "reconciled" */ @@ -29,20 +33,24 @@ notrace long system_call_exception(long r3, long r4, long r5, { syscall_fn f; + kuep_lock(); +#ifdef CONFIG_PPC32 + kuap_save_and_lock(regs); +#endif + regs->orig_gpr3 = r3; if (IS_ENABLED(CONFIG_PPC_IRQ_SOFT_MASK_DEBUG)) BUG_ON(irq_soft_mask_return() != IRQS_ALL_DISABLED); + trace_hardirqs_off(); /* finish reconciling */ + CT_WARN_ON(ct_state() == CONTEXT_KERNEL); user_exit_irqoff(); - trace_hardirqs_off(); /* finish reconciling */ - if (!IS_ENABLED(CONFIG_BOOKE) && !IS_ENABLED(CONFIG_40x)) BUG_ON(!(regs->msr & MSR_RI)); BUG_ON(!(regs->msr & MSR_PR)); - BUG_ON(!FULL_REGS(regs)); BUG_ON(arch_irq_disabled_regs(regs)); #ifdef CONFIG_PPC_PKEY @@ -69,9 +77,7 @@ notrace long system_call_exception(long r3, long r4, long r5, isync(); } else #endif -#ifdef CONFIG_PPC64 - kuap_check_amr(); -#endif + kuap_assert_locked(); booke_restore_dbcr0(); @@ -247,9 +253,7 @@ notrace unsigned long syscall_exit_prepare(unsigned long r3, CT_WARN_ON(ct_state() == CONTEXT_USER); -#ifdef CONFIG_PPC64 - kuap_check_amr(); -#endif + kuap_assert_locked(); regs->result = r3; @@ -344,16 +348,13 @@ again: account_cpu_user_exit(); -#ifdef CONFIG_PPC_BOOK3S_64 /* BOOK3E and ppc32 not using this */ - /* - * We do this at the end so that we do context switch with KERNEL AMR - */ + /* Restore user access locks last */ kuap_user_restore(regs); -#endif + kuep_unlock(); + return ret; } -#ifndef CONFIG_PPC_BOOK3E_64 /* BOOK3E not yet using this */ notrace unsigned long interrupt_exit_user_prepare(struct pt_regs *regs, unsigned long msr) { unsigned long ti_flags; @@ -363,7 +364,6 @@ notrace unsigned long interrupt_exit_user_prepare(struct pt_regs *regs, unsigned if (!IS_ENABLED(CONFIG_BOOKE) && !IS_ENABLED(CONFIG_40x)) BUG_ON(!(regs->msr & MSR_RI)); BUG_ON(!(regs->msr & MSR_PR)); - BUG_ON(!FULL_REGS(regs)); BUG_ON(arch_irq_disabled_regs(regs)); CT_WARN_ON(ct_state() == CONTEXT_USER); @@ -371,9 +371,7 @@ notrace unsigned long interrupt_exit_user_prepare(struct pt_regs *regs, unsigned * We don't need to restore AMR on the way back to userspace for KUAP. * AMR can only have been unlocked if we interrupted the kernel. */ -#ifdef CONFIG_PPC64 - kuap_check_amr(); -#endif + kuap_assert_locked(); local_irq_save(flags); @@ -392,7 +390,7 @@ again: ti_flags = READ_ONCE(current_thread_info()->flags); } - if (IS_ENABLED(CONFIG_PPC_BOOK3S) && IS_ENABLED(CONFIG_PPC_FPU)) { + if (IS_ENABLED(CONFIG_PPC_BOOK3S_64) && IS_ENABLED(CONFIG_PPC_FPU)) { if (IS_ENABLED(CONFIG_PPC_TRANSACTIONAL_MEM) && unlikely((ti_flags & _TIF_RESTORE_TM))) { restore_tm_state(regs); @@ -427,12 +425,9 @@ again: account_cpu_user_exit(); - /* - * We do this at the end so that we do context switch with KERNEL AMR - */ -#ifdef CONFIG_PPC64 + /* Restore user access locks last */ kuap_user_restore(regs); -#endif + return ret; } @@ -442,25 +437,20 @@ notrace unsigned long interrupt_exit_kernel_prepare(struct pt_regs *regs, unsign { unsigned long flags; unsigned long ret = 0; -#ifdef CONFIG_PPC64 - unsigned long amr; -#endif + unsigned long kuap; if (!IS_ENABLED(CONFIG_BOOKE) && !IS_ENABLED(CONFIG_40x) && unlikely(!(regs->msr & MSR_RI))) unrecoverable_exception(regs); BUG_ON(regs->msr & MSR_PR); - BUG_ON(!FULL_REGS(regs)); /* * CT_WARN_ON comes here via program_check_exception, * so avoid recursion. */ - if (TRAP(regs) != 0x700) + if (TRAP(regs) != INTERRUPT_PROGRAM) CT_WARN_ON(ct_state() == CONTEXT_USER); -#ifdef CONFIG_PPC64 - amr = kuap_get_and_check_amr(); -#endif + kuap = kuap_get_and_assert_locked(); if (unlikely(current_thread_info()->flags & _TIF_EMULATE_STACK_STORE)) { clear_bits(_TIF_EMULATE_STACK_STORE, ¤t_thread_info()->flags); @@ -498,14 +488,11 @@ again: #endif /* - * Don't want to mfspr(SPRN_AMR) here, because this comes after mtmsr, - * which would cause Read-After-Write stalls. Hence, we take the AMR - * value from the check above. + * 64s does not want to mfspr(SPRN_AMR) here, because this comes after + * mtmsr, which would cause Read-After-Write stalls. Hence, take the + * AMR value from the check above. */ -#ifdef CONFIG_PPC64 - kuap_kernel_restore(regs, amr); -#endif + kuap_kernel_restore(regs, kuap); return ret; } -#endif diff --git a/arch/powerpc/kernel/iommu.c b/arch/powerpc/kernel/iommu.c index c00214a4355c..57d6b85e9b96 100644 --- a/arch/powerpc/kernel/iommu.c +++ b/arch/powerpc/kernel/iommu.c @@ -72,8 +72,7 @@ static void iommu_debugfs_del(struct iommu_table *tbl) sprintf(name, "%08lx", tbl->it_index); liobn_entry = debugfs_lookup(name, iommu_debugfs_dir); - if (liobn_entry) - debugfs_remove(liobn_entry); + debugfs_remove(liobn_entry); } #else static void iommu_debugfs_add(struct iommu_table *tbl){} @@ -297,6 +296,15 @@ again: pass++; goto again; + } else if (pass == tbl->nr_pools + 1) { + /* Last resort: try largepool */ + spin_unlock(&pool->lock); + pool = &tbl->large_pool; + spin_lock(&pool->lock); + pool->hint = pool->start; + pass++; + goto again; + } else { /* Give up */ spin_unlock_irqrestore(&(pool->lock), flags); @@ -719,7 +727,6 @@ struct iommu_table *iommu_init_table(struct iommu_table *tbl, int nid, { unsigned long sz; static int welcomed = 0; - struct page *page; unsigned int i; struct iommu_pool *p; @@ -728,11 +735,11 @@ struct iommu_table *iommu_init_table(struct iommu_table *tbl, int nid, /* number of bytes needed for the bitmap */ sz = BITS_TO_LONGS(tbl->it_size) * sizeof(unsigned long); - page = alloc_pages_node(nid, GFP_KERNEL, get_order(sz)); - if (!page) - panic("iommu_init_table: Can't allocate %ld bytes\n", sz); - tbl->it_map = page_address(page); - memset(tbl->it_map, 0, sz); + tbl->it_map = vzalloc_node(sz, nid); + if (!tbl->it_map) { + pr_err("%s: Can't allocate %ld bytes\n", __func__, sz); + return NULL; + } iommu_table_reserve_pages(tbl, res_start, res_end); @@ -774,8 +781,6 @@ struct iommu_table *iommu_init_table(struct iommu_table *tbl, int nid, static void iommu_table_free(struct kref *kref) { - unsigned long bitmap_sz; - unsigned int order; struct iommu_table *tbl; tbl = container_of(kref, struct iommu_table, it_kref); @@ -796,12 +801,8 @@ static void iommu_table_free(struct kref *kref) if (!bitmap_empty(tbl->it_map, tbl->it_size)) pr_warn("%s: Unexpected TCEs\n", __func__); - /* calculate bitmap size in bytes */ - bitmap_sz = BITS_TO_LONGS(tbl->it_size) * sizeof(unsigned long); - /* free bitmap */ - order = get_order(bitmap_sz); - free_pages((unsigned long) tbl->it_map, order); + vfree(tbl->it_map); /* free table */ kfree(tbl); @@ -897,6 +898,7 @@ void *iommu_alloc_coherent(struct device *dev, struct iommu_table *tbl, unsigned int order; unsigned int nio_pages, io_order; struct page *page; + size_t size_io = size; size = PAGE_ALIGN(size); order = get_order(size); @@ -923,8 +925,9 @@ void *iommu_alloc_coherent(struct device *dev, struct iommu_table *tbl, memset(ret, 0, size); /* Set up tces to cover the allocated range */ - nio_pages = size >> tbl->it_page_shift; - io_order = get_iommu_order(size, tbl); + size_io = IOMMU_PAGE_ALIGN(size_io, tbl); + nio_pages = size_io >> tbl->it_page_shift; + io_order = get_iommu_order(size_io, tbl); mapping = iommu_alloc(dev, tbl, ret, nio_pages, DMA_BIDIRECTIONAL, mask >> tbl->it_page_shift, io_order, 0); if (mapping == DMA_MAPPING_ERROR) { @@ -939,10 +942,9 @@ void iommu_free_coherent(struct iommu_table *tbl, size_t size, void *vaddr, dma_addr_t dma_handle) { if (tbl) { - unsigned int nio_pages; + size_t size_io = IOMMU_PAGE_ALIGN(size, tbl); + unsigned int nio_pages = size_io >> tbl->it_page_shift; - size = PAGE_ALIGN(size); - nio_pages = size >> tbl->it_page_shift; iommu_free(tbl, dma_handle, nio_pages); size = PAGE_ALIGN(size); free_pages((unsigned long)vaddr, get_order(size)); @@ -1096,7 +1098,7 @@ int iommu_take_ownership(struct iommu_table *tbl) spin_lock_irqsave(&tbl->large_pool.lock, flags); for (i = 0; i < tbl->nr_pools; i++) - spin_lock(&tbl->pools[i].lock); + spin_lock_nest_lock(&tbl->pools[i].lock, &tbl->large_pool.lock); iommu_table_release_pages(tbl); @@ -1124,7 +1126,7 @@ void iommu_release_ownership(struct iommu_table *tbl) spin_lock_irqsave(&tbl->large_pool.lock, flags); for (i = 0; i < tbl->nr_pools; i++) - spin_lock(&tbl->pools[i].lock); + spin_lock_nest_lock(&tbl->pools[i].lock, &tbl->large_pool.lock); memset(tbl->it_map, 0, sz); diff --git a/arch/powerpc/kernel/irq.c b/arch/powerpc/kernel/irq.c index d71fd10a1dd4..72cb45393ef2 100644 --- a/arch/powerpc/kernel/irq.c +++ b/arch/powerpc/kernel/irq.c @@ -104,82 +104,6 @@ static inline notrace unsigned long get_irq_happened(void) return happened; } -#ifdef CONFIG_PPC_BOOK3E - -/* This is called whenever we are re-enabling interrupts - * and returns either 0 (nothing to do) or 500/900/280 if - * there's an EE, DEC or DBELL to generate. - * - * This is called in two contexts: From arch_local_irq_restore() - * before soft-enabling interrupts, and from the exception exit - * path when returning from an interrupt from a soft-disabled to - * a soft enabled context. In both case we have interrupts hard - * disabled. - * - * We take care of only clearing the bits we handled in the - * PACA irq_happened field since we can only re-emit one at a - * time and we don't want to "lose" one. - */ -notrace unsigned int __check_irq_replay(void) -{ - /* - * We use local_paca rather than get_paca() to avoid all - * the debug_smp_processor_id() business in this low level - * function - */ - unsigned char happened = local_paca->irq_happened; - - /* - * We are responding to the next interrupt, so interrupt-off - * latencies should be reset here. - */ - trace_hardirqs_on(); - trace_hardirqs_off(); - - if (happened & PACA_IRQ_DEC) { - local_paca->irq_happened &= ~PACA_IRQ_DEC; - return 0x900; - } - - if (happened & PACA_IRQ_EE) { - local_paca->irq_happened &= ~PACA_IRQ_EE; - return 0x500; - } - - if (happened & PACA_IRQ_DBELL) { - local_paca->irq_happened &= ~PACA_IRQ_DBELL; - return 0x280; - } - - if (happened & PACA_IRQ_HARD_DIS) - local_paca->irq_happened &= ~PACA_IRQ_HARD_DIS; - - /* There should be nothing left ! */ - BUG_ON(local_paca->irq_happened != 0); - - return 0; -} - -/* - * This is specifically called by assembly code to re-enable interrupts - * if they are currently disabled. This is typically called before - * schedule() or do_signal() when returning to userspace. We do it - * in C to avoid the burden of dealing with lockdep etc... - * - * NOTE: This is called with interrupts hard disabled but not marked - * as such in paca->irq_happened, so we need to resync this. - */ -void notrace restore_interrupts(void) -{ - if (irqs_disabled()) { - local_paca->irq_happened |= PACA_IRQ_HARD_DIS; - local_irq_enable(); - } else - __hard_irq_enable(); -} - -#endif /* CONFIG_PPC_BOOK3E */ - void replay_soft_interrupts(void) { struct pt_regs regs; @@ -218,7 +142,7 @@ again: */ if (IS_ENABLED(CONFIG_PPC_BOOK3S) && (local_paca->irq_happened & PACA_IRQ_HMI)) { local_paca->irq_happened &= ~PACA_IRQ_HMI; - regs.trap = 0xe60; + regs.trap = INTERRUPT_HMI; handle_hmi_exception(®s); if (!(local_paca->irq_happened & PACA_IRQ_HARD_DIS)) hard_irq_disable(); @@ -226,7 +150,7 @@ again: if (local_paca->irq_happened & PACA_IRQ_DEC) { local_paca->irq_happened &= ~PACA_IRQ_DEC; - regs.trap = 0x900; + regs.trap = INTERRUPT_DECREMENTER; timer_interrupt(®s); if (!(local_paca->irq_happened & PACA_IRQ_HARD_DIS)) hard_irq_disable(); @@ -234,7 +158,7 @@ again: if (local_paca->irq_happened & PACA_IRQ_EE) { local_paca->irq_happened &= ~PACA_IRQ_EE; - regs.trap = 0x500; + regs.trap = INTERRUPT_EXTERNAL; do_IRQ(®s); if (!(local_paca->irq_happened & PACA_IRQ_HARD_DIS)) hard_irq_disable(); @@ -242,10 +166,7 @@ again: if (IS_ENABLED(CONFIG_PPC_DOORBELL) && (local_paca->irq_happened & PACA_IRQ_DBELL)) { local_paca->irq_happened &= ~PACA_IRQ_DBELL; - if (IS_ENABLED(CONFIG_PPC_BOOK3E)) - regs.trap = 0x280; - else - regs.trap = 0xa00; + regs.trap = INTERRUPT_DOORBELL; doorbell_exception(®s); if (!(local_paca->irq_happened & PACA_IRQ_HARD_DIS)) hard_irq_disable(); @@ -254,7 +175,7 @@ again: /* Book3E does not support soft-masking PMI interrupts */ if (IS_ENABLED(CONFIG_PPC_BOOK3S) && (local_paca->irq_happened & PACA_IRQ_PMI)) { local_paca->irq_happened &= ~PACA_IRQ_PMI; - regs.trap = 0xf00; + regs.trap = INTERRUPT_PERFMON; performance_monitor_exception(®s); if (!(local_paca->irq_happened & PACA_IRQ_HARD_DIS)) hard_irq_disable(); @@ -282,7 +203,7 @@ static inline void replay_soft_interrupts_irqrestore(void) * and re-locking AMR but we shouldn't get here in the first place, * hence the warning. */ - kuap_check_amr(); + kuap_assert_locked(); if (kuap_state != AMR_KUAP_BLOCKED) set_kuap(AMR_KUAP_BLOCKED); @@ -667,6 +588,47 @@ static inline void check_stack_overflow(void) } } +static __always_inline void call_do_softirq(const void *sp) +{ + /* Temporarily switch r1 to sp, call __do_softirq() then restore r1. */ + asm volatile ( + PPC_STLU " %%r1, %[offset](%[sp]) ;" + "mr %%r1, %[sp] ;" + "bl %[callee] ;" + PPC_LL " %%r1, 0(%%r1) ;" + : // Outputs + : // Inputs + [sp] "b" (sp), [offset] "i" (THREAD_SIZE - STACK_FRAME_OVERHEAD), + [callee] "i" (__do_softirq) + : // Clobbers + "lr", "xer", "ctr", "memory", "cr0", "cr1", "cr5", "cr6", + "cr7", "r0", "r3", "r4", "r5", "r6", "r7", "r8", "r9", "r10", + "r11", "r12" + ); +} + +static __always_inline void call_do_irq(struct pt_regs *regs, void *sp) +{ + register unsigned long r3 asm("r3") = (unsigned long)regs; + + /* Temporarily switch r1 to sp, call __do_irq() then restore r1. */ + asm volatile ( + PPC_STLU " %%r1, %[offset](%[sp]) ;" + "mr %%r1, %[sp] ;" + "bl %[callee] ;" + PPC_LL " %%r1, 0(%%r1) ;" + : // Outputs + "+r" (r3) + : // Inputs + [sp] "b" (sp), [offset] "i" (THREAD_SIZE - STACK_FRAME_OVERHEAD), + [callee] "i" (__do_irq) + : // Clobbers + "lr", "xer", "ctr", "memory", "cr0", "cr1", "cr5", "cr6", + "cr7", "r0", "r4", "r5", "r6", "r7", "r8", "r9", "r10", + "r11", "r12" + ); +} + void __do_irq(struct pt_regs *regs) { unsigned int irq; diff --git a/arch/powerpc/kernel/jump_label.c b/arch/powerpc/kernel/jump_label.c index 144858027fa3..ce87dc5ea23c 100644 --- a/arch/powerpc/kernel/jump_label.c +++ b/arch/powerpc/kernel/jump_label.c @@ -11,10 +11,10 @@ void arch_jump_label_transform(struct jump_entry *entry, enum jump_label_type type) { - struct ppc_inst *addr = (struct ppc_inst *)(unsigned long)entry->code; + struct ppc_inst *addr = (struct ppc_inst *)jump_entry_code(entry); if (type == JUMP_LABEL_JMP) - patch_branch(addr, entry->target, 0); + patch_branch(addr, jump_entry_target(entry), 0); else patch_instruction(addr, ppc_inst(PPC_INST_NOP)); } diff --git a/arch/powerpc/kernel/kgdb.c b/arch/powerpc/kernel/kgdb.c index 409080208a6c..7dd2ad3603ad 100644 --- a/arch/powerpc/kernel/kgdb.c +++ b/arch/powerpc/kernel/kgdb.c @@ -376,7 +376,7 @@ void kgdb_arch_set_pc(struct pt_regs *regs, unsigned long pc) } /* - * This function does PowerPC specific procesing for interfacing to gdb. + * This function does PowerPC specific processing for interfacing to gdb. */ int kgdb_arch_handle_exception(int vector, int signo, int err_code, char *remcom_in_buffer, char *remcom_out_buffer, diff --git a/arch/powerpc/kernel/legacy_serial.c b/arch/powerpc/kernel/legacy_serial.c index f061e06e9f51..8b2c1a8553a0 100644 --- a/arch/powerpc/kernel/legacy_serial.c +++ b/arch/powerpc/kernel/legacy_serial.c @@ -15,6 +15,7 @@ #include <asm/udbg.h> #include <asm/pci-bridge.h> #include <asm/ppc-pci.h> +#include <asm/early_ioremap.h> #undef DEBUG @@ -34,6 +35,7 @@ static struct legacy_serial_info { unsigned int clock; int irq_check_parent; phys_addr_t taddr; + void __iomem *early_addr; } legacy_serial_infos[MAX_LEGACY_SERIAL_PORTS]; static const struct of_device_id legacy_serial_parents[] __initconst = { @@ -325,17 +327,16 @@ static void __init setup_legacy_serial_console(int console) { struct legacy_serial_info *info = &legacy_serial_infos[console]; struct plat_serial8250_port *port = &legacy_serial_ports[console]; - void __iomem *addr; unsigned int stride; stride = 1 << port->regshift; /* Check if a translated MMIO address has been found */ if (info->taddr) { - addr = ioremap(info->taddr, 0x1000); - if (addr == NULL) + info->early_addr = early_ioremap(info->taddr, 0x1000); + if (info->early_addr == NULL) return; - udbg_uart_init_mmio(addr, stride); + udbg_uart_init_mmio(info->early_addr, stride); } else { /* Check if it's PIO and we support untranslated PIO */ if (port->iotype == UPIO_PORT && isa_io_special) @@ -353,6 +354,30 @@ static void __init setup_legacy_serial_console(int console) udbg_uart_setup(info->speed, info->clock); } +static int __init ioremap_legacy_serial_console(void) +{ + struct legacy_serial_info *info = &legacy_serial_infos[legacy_serial_console]; + struct plat_serial8250_port *port = &legacy_serial_ports[legacy_serial_console]; + void __iomem *vaddr; + + if (legacy_serial_console < 0) + return 0; + + if (!info->early_addr) + return 0; + + vaddr = ioremap(info->taddr, 0x1000); + if (WARN_ON(!vaddr)) + return -ENOMEM; + + udbg_uart_init_mmio(vaddr, 1 << port->regshift); + early_iounmap(info->early_addr, 0x1000); + info->early_addr = NULL; + + return 0; +} +early_initcall(ioremap_legacy_serial_console); + /* * This is called very early, as part of setup_system() or eventually * setup_arch(), basically before anything else in this file. This function diff --git a/arch/powerpc/kernel/mce.c b/arch/powerpc/kernel/mce.c index 11f0cae086ed..9a3c2a84a2ac 100644 --- a/arch/powerpc/kernel/mce.c +++ b/arch/powerpc/kernel/mce.c @@ -40,7 +40,7 @@ static struct irq_work mce_ue_event_irq_work = { .func = machine_check_ue_irq_work, }; -DECLARE_WORK(mce_ue_event_work, machine_process_ue_event); +static DECLARE_WORK(mce_ue_event_work, machine_process_ue_event); static BLOCKING_NOTIFIER_HEAD(mce_notifier_list); @@ -131,6 +131,8 @@ void save_mce_event(struct pt_regs *regs, long handled, * Populate the mce error_type and type-specific error_type. */ mce_set_error_info(mce, mce_err); + if (mce->error_type == MCE_ERROR_TYPE_UE) + mce->u.ue_error.ignore_event = mce_err->ignore_event; if (!addr) return; @@ -159,7 +161,6 @@ void save_mce_event(struct pt_regs *regs, long handled, if (phys_addr != ULONG_MAX) { mce->u.ue_error.physical_address_provided = true; mce->u.ue_error.physical_address = phys_addr; - mce->u.ue_error.ignore_event = mce_err->ignore_event; machine_check_ue_event(mce); } } diff --git a/arch/powerpc/kernel/misc_32.S b/arch/powerpc/kernel/misc_32.S index 717e658b90fd..6a076bef2932 100644 --- a/arch/powerpc/kernel/misc_32.S +++ b/arch/powerpc/kernel/misc_32.S @@ -28,45 +28,6 @@ .text /* - * We store the saved ksp_limit in the unused part - * of the STACK_FRAME_OVERHEAD - */ -_GLOBAL(call_do_softirq) - mflr r0 - stw r0,4(r1) - lwz r10,THREAD+KSP_LIMIT(r2) - stw r3, THREAD+KSP_LIMIT(r2) - stwu r1,THREAD_SIZE-STACK_FRAME_OVERHEAD(r3) - mr r1,r3 - stw r10,8(r1) - bl __do_softirq - lwz r10,8(r1) - lwz r1,0(r1) - lwz r0,4(r1) - stw r10,THREAD+KSP_LIMIT(r2) - mtlr r0 - blr - -/* - * void call_do_irq(struct pt_regs *regs, void *sp); - */ -_GLOBAL(call_do_irq) - mflr r0 - stw r0,4(r1) - lwz r10,THREAD+KSP_LIMIT(r2) - stw r4, THREAD+KSP_LIMIT(r2) - stwu r1,THREAD_SIZE-STACK_FRAME_OVERHEAD(r4) - mr r1,r4 - stw r10,8(r1) - bl __do_irq - lwz r10,8(r1) - lwz r1,0(r1) - lwz r0,4(r1) - stw r10,THREAD+KSP_LIMIT(r2) - mtlr r0 - blr - -/* * This returns the high 64 bits of the product of two 64-bit numbers. */ _GLOBAL(mulhdu) diff --git a/arch/powerpc/kernel/misc_64.S b/arch/powerpc/kernel/misc_64.S index 070465825c21..4b761a18a74d 100644 --- a/arch/powerpc/kernel/misc_64.S +++ b/arch/powerpc/kernel/misc_64.S @@ -27,28 +27,6 @@ .text -_GLOBAL(call_do_softirq) - mflr r0 - std r0,16(r1) - stdu r1,THREAD_SIZE-STACK_FRAME_OVERHEAD(r3) - mr r1,r3 - bl __do_softirq - ld r1,0(r1) - ld r0,16(r1) - mtlr r0 - blr - -_GLOBAL(call_do_irq) - mflr r0 - std r0,16(r1) - stdu r1,THREAD_SIZE-STACK_FRAME_OVERHEAD(r4) - mr r1,r4 - bl __do_irq - ld r1,0(r1) - ld r0,16(r1) - mtlr r0 - blr - _GLOBAL(__bswapdi2) EXPORT_SYMBOL(__bswapdi2) srdi r8,r3,32 diff --git a/arch/powerpc/kernel/module.c b/arch/powerpc/kernel/module.c index a211b0253cdb..fab84024650c 100644 --- a/arch/powerpc/kernel/module.c +++ b/arch/powerpc/kernel/module.c @@ -14,6 +14,7 @@ #include <asm/firmware.h> #include <linux/sort.h> #include <asm/setup.h> +#include <asm/sections.h> static LIST_HEAD(module_bug_list); @@ -88,12 +89,28 @@ int module_finalize(const Elf_Ehdr *hdr, } #ifdef MODULES_VADDR +static __always_inline void * +__module_alloc(unsigned long size, unsigned long start, unsigned long end) +{ + return __vmalloc_node_range(size, 1, start, end, GFP_KERNEL, + PAGE_KERNEL_EXEC, VM_FLUSH_RESET_PERMS, NUMA_NO_NODE, + __builtin_return_address(0)); +} + void *module_alloc(unsigned long size) { + unsigned long limit = (unsigned long)_etext - SZ_32M; + void *ptr = NULL; + BUILD_BUG_ON(TASK_SIZE > MODULES_VADDR); - return __vmalloc_node_range(size, 1, MODULES_VADDR, MODULES_END, GFP_KERNEL, - PAGE_KERNEL_EXEC, VM_FLUSH_RESET_PERMS, NUMA_NO_NODE, - __builtin_return_address(0)); + /* First try within 32M limit from _etext to avoid branch trampolines */ + if (MODULES_VADDR < PAGE_OFFSET && MODULES_END > limit) + ptr = __module_alloc(size, limit, MODULES_END); + + if (!ptr) + ptr = __module_alloc(size, MODULES_VADDR, MODULES_END); + + return ptr; } #endif diff --git a/arch/powerpc/kernel/optprobes.c b/arch/powerpc/kernel/optprobes.c index 7f7cdbeacd1a..cdf87086fa33 100644 --- a/arch/powerpc/kernel/optprobes.c +++ b/arch/powerpc/kernel/optprobes.c @@ -141,11 +141,21 @@ void arch_remove_optimized_kprobe(struct optimized_kprobe *op) } } +static void patch_imm32_load_insns(unsigned long val, int reg, kprobe_opcode_t *addr) +{ + patch_instruction((struct ppc_inst *)addr, + ppc_inst(PPC_RAW_LIS(reg, IMM_H(val)))); + addr++; + + patch_instruction((struct ppc_inst *)addr, + ppc_inst(PPC_RAW_ORI(reg, reg, IMM_L(val)))); +} + /* * Generate instructions to load provided immediate 64-bit value * to register 'reg' and patch these instructions at 'addr'. */ -static void patch_imm64_load_insns(unsigned long val, int reg, kprobe_opcode_t *addr) +static void patch_imm64_load_insns(unsigned long long val, int reg, kprobe_opcode_t *addr) { /* lis reg,(op)@highest */ patch_instruction((struct ppc_inst *)addr, @@ -177,6 +187,14 @@ static void patch_imm64_load_insns(unsigned long val, int reg, kprobe_opcode_t * ___PPC_RS(reg) | (val & 0xffff))); } +static void patch_imm_load_insns(unsigned long val, int reg, kprobe_opcode_t *addr) +{ + if (IS_ENABLED(CONFIG_PPC64)) + patch_imm64_load_insns(val, reg, addr); + else + patch_imm32_load_insns(val, reg, addr); +} + int arch_prepare_optimized_kprobe(struct optimized_kprobe *op, struct kprobe *p) { struct ppc_inst branch_op_callback, branch_emulate_step, temp; @@ -230,7 +248,7 @@ int arch_prepare_optimized_kprobe(struct optimized_kprobe *op, struct kprobe *p) * Fixup the template with instructions to: * 1. load the address of the actual probepoint */ - patch_imm64_load_insns((unsigned long)op, 3, buff + TMPL_OP_IDX); + patch_imm_load_insns((unsigned long)op, 3, buff + TMPL_OP_IDX); /* * 2. branch to optimized_callback() and emulate_step() @@ -264,7 +282,7 @@ int arch_prepare_optimized_kprobe(struct optimized_kprobe *op, struct kprobe *p) * 3. load instruction to be emulated into relevant register, and */ temp = ppc_inst_read((struct ppc_inst *)p->ainsn.insn); - patch_imm64_load_insns(ppc_inst_as_u64(temp), 4, buff + TMPL_INSN_IDX); + patch_imm_load_insns(ppc_inst_as_ulong(temp), 4, buff + TMPL_INSN_IDX); /* * 4. branch back from trampoline diff --git a/arch/powerpc/kernel/optprobes_head.S b/arch/powerpc/kernel/optprobes_head.S index ff8ba4d3824d..19ea3312403c 100644 --- a/arch/powerpc/kernel/optprobes_head.S +++ b/arch/powerpc/kernel/optprobes_head.S @@ -9,6 +9,16 @@ #include <asm/ptrace.h> #include <asm/asm-offsets.h> +#ifdef CONFIG_PPC64 +#define SAVE_30GPRS(base) SAVE_10GPRS(2,base); SAVE_10GPRS(12,base); SAVE_10GPRS(22,base) +#define REST_30GPRS(base) REST_10GPRS(2,base); REST_10GPRS(12,base); REST_10GPRS(22,base) +#define TEMPLATE_FOR_IMM_LOAD_INSNS nop; nop; nop; nop; nop +#else +#define SAVE_30GPRS(base) stmw r2, GPR2(base) +#define REST_30GPRS(base) lmw r2, GPR2(base) +#define TEMPLATE_FOR_IMM_LOAD_INSNS nop; nop; nop +#endif + #define OPT_SLOT_SIZE 65536 .balign 4 @@ -30,39 +40,41 @@ optinsn_slot: .global optprobe_template_entry optprobe_template_entry: /* Create an in-memory pt_regs */ - stdu r1,-INT_FRAME_SIZE(r1) + PPC_STLU r1,-INT_FRAME_SIZE(r1) SAVE_GPR(0,r1) /* Save the previous SP into stack */ addi r0,r1,INT_FRAME_SIZE - std r0,GPR1(r1) - SAVE_10GPRS(2,r1) - SAVE_10GPRS(12,r1) - SAVE_10GPRS(22,r1) + PPC_STL r0,GPR1(r1) + SAVE_30GPRS(r1) /* Save SPRS */ mfmsr r5 - std r5,_MSR(r1) + PPC_STL r5,_MSR(r1) li r5,0x700 - std r5,_TRAP(r1) + PPC_STL r5,_TRAP(r1) li r5,0 - std r5,ORIG_GPR3(r1) - std r5,RESULT(r1) + PPC_STL r5,ORIG_GPR3(r1) + PPC_STL r5,RESULT(r1) mfctr r5 - std r5,_CTR(r1) + PPC_STL r5,_CTR(r1) mflr r5 - std r5,_LINK(r1) + PPC_STL r5,_LINK(r1) mfspr r5,SPRN_XER - std r5,_XER(r1) + PPC_STL r5,_XER(r1) mfcr r5 - std r5,_CCR(r1) + PPC_STL r5,_CCR(r1) +#ifdef CONFIG_PPC64 lbz r5,PACAIRQSOFTMASK(r13) std r5,SOFTE(r1) +#endif /* * We may get here from a module, so load the kernel TOC in r2. * The original TOC gets restored when pt_regs is restored * further below. */ +#ifdef CONFIG_PPC64 ld r2,PACATOC(r13) +#endif .global optprobe_template_op_address optprobe_template_op_address: @@ -70,11 +82,8 @@ optprobe_template_op_address: * Parameters to optimized_callback(): * 1. optimized_kprobe structure in r3 */ - nop - nop - nop - nop - nop + TEMPLATE_FOR_IMM_LOAD_INSNS + /* 2. pt_regs pointer in r4 */ addi r4,r1,STACK_FRAME_OVERHEAD @@ -92,11 +101,7 @@ optprobe_template_call_handler: .global optprobe_template_insn optprobe_template_insn: /* 2, Pass instruction to be emulated in r4 */ - nop - nop - nop - nop - nop + TEMPLATE_FOR_IMM_LOAD_INSNS .global optprobe_template_call_emulate optprobe_template_call_emulate: @@ -107,20 +112,18 @@ optprobe_template_call_emulate: * All done. * Now, restore the registers... */ - ld r5,_MSR(r1) + PPC_LL r5,_MSR(r1) mtmsr r5 - ld r5,_CTR(r1) + PPC_LL r5,_CTR(r1) mtctr r5 - ld r5,_LINK(r1) + PPC_LL r5,_LINK(r1) mtlr r5 - ld r5,_XER(r1) + PPC_LL r5,_XER(r1) mtxer r5 - ld r5,_CCR(r1) + PPC_LL r5,_CCR(r1) mtcr r5 REST_GPR(0,r1) - REST_10GPRS(2,r1) - REST_10GPRS(12,r1) - REST_10GPRS(22,r1) + REST_30GPRS(r1) /* Restore the previous SP */ addi r1,r1,INT_FRAME_SIZE diff --git a/arch/powerpc/kernel/process.c b/arch/powerpc/kernel/process.c index 3231c2df9e26..89e34aa273e2 100644 --- a/arch/powerpc/kernel/process.c +++ b/arch/powerpc/kernel/process.c @@ -1117,9 +1117,10 @@ void restore_tm_state(struct pt_regs *regs) regs->msr |= msr_diff; } -#else +#else /* !CONFIG_PPC_TRANSACTIONAL_MEM */ #define tm_recheckpoint_new_task(new) #define __switch_to_tm(prev, new) +void tm_reclaim_current(uint8_t cause) {} #endif /* CONFIG_PPC_TRANSACTIONAL_MEM */ static inline void save_sprs(struct thread_struct *t) @@ -1255,6 +1256,9 @@ struct task_struct *__switch_to(struct task_struct *prev, */ restore_sprs(old_thread, new_thread); +#ifdef CONFIG_PPC32 + kuap_assert_locked(); +#endif last = _switch(old_thread, new_thread); #ifdef CONFIG_PPC_BOOK3S_64 @@ -1444,11 +1448,9 @@ static void print_msr_bits(unsigned long val) #ifdef CONFIG_PPC64 #define REG "%016lx" #define REGS_PER_LINE 4 -#define LAST_VOLATILE 13 #else #define REG "%08lx" #define REGS_PER_LINE 8 -#define LAST_VOLATILE 12 #endif static void __show_regs(struct pt_regs *regs) @@ -1465,7 +1467,9 @@ static void __show_regs(struct pt_regs *regs) trap = TRAP(regs); if (!trap_is_syscall(regs) && cpu_has_feature(CPU_FTR_CFAR)) pr_cont("CFAR: "REG" ", regs->orig_gpr3); - if (trap == 0x200 || trap == 0x300 || trap == 0x600) { + if (trap == INTERRUPT_MACHINE_CHECK || + trap == INTERRUPT_DATA_STORAGE || + trap == INTERRUPT_ALIGNMENT) { if (IS_ENABLED(CONFIG_4xx) || IS_ENABLED(CONFIG_BOOKE)) pr_cont("DEAR: "REG" ESR: "REG" ", regs->dar, regs->dsisr); else @@ -1484,8 +1488,6 @@ static void __show_regs(struct pt_regs *regs) if ((i % REGS_PER_LINE) == 0) pr_cont("\nGPR%02d: ", i); pr_cont(REG " ", regs->gpr[i]); - if (i == LAST_VOLATILE && !FULL_REGS(regs)) - break; } pr_cont("\n"); /* @@ -1688,7 +1690,6 @@ int copy_thread(unsigned long clone_flags, unsigned long usp, } else { /* user thread */ struct pt_regs *regs = current_pt_regs(); - CHECK_FULL_REGS(regs); *childregs = *regs; if (usp) childregs->gpr[1] = usp; @@ -1724,9 +1725,6 @@ int copy_thread(unsigned long clone_flags, unsigned long usp, kregs = (struct pt_regs *) sp; sp -= STACK_FRAME_OVERHEAD; p->thread.ksp = sp; -#ifdef CONFIG_PPC32 - p->thread.ksp_limit = (unsigned long)end_of_stack(p); -#endif #ifdef CONFIG_HAVE_HW_BREAKPOINT for (i = 0; i < nr_wp_slots(); i++) p->thread.ptrace_bps[i] = NULL; @@ -1796,13 +1794,6 @@ void start_thread(struct pt_regs *regs, unsigned long start, unsigned long sp) regs->ccr = 0; regs->gpr[1] = sp; - /* - * We have just cleared all the nonvolatile GPRs, so make - * FULL_REGS(regs) return true. This is necessary to allow - * ptrace to examine the thread immediately after exec. - */ - SET_FULL_REGS(regs); - #ifdef CONFIG_PPC32 regs->mq = 0; regs->nip = start; diff --git a/arch/powerpc/kernel/prom.c b/arch/powerpc/kernel/prom.c index 9a4797d1d40d..fbe9deebc8e1 100644 --- a/arch/powerpc/kernel/prom.c +++ b/arch/powerpc/kernel/prom.c @@ -65,6 +65,8 @@ #define DBG(fmt...) #endif +int *chip_id_lookup_table; + #ifdef CONFIG_PPC64 int __initdata iommu_is_off; int __initdata iommu_force_on; @@ -267,7 +269,7 @@ static struct feature_property { }; #if defined(CONFIG_44x) && defined(CONFIG_PPC_FPU) -static inline void identical_pvr_fixup(unsigned long node) +static __init void identical_pvr_fixup(unsigned long node) { unsigned int pvr; const char *model = of_get_flat_dt_prop(node, "model", NULL); @@ -914,13 +916,22 @@ EXPORT_SYMBOL(of_get_ibm_chip_id); int cpu_to_chip_id(int cpu) { struct device_node *np; + int ret = -1, idx; + + idx = cpu / threads_per_core; + if (chip_id_lookup_table && chip_id_lookup_table[idx] != -1) + return chip_id_lookup_table[idx]; np = of_get_cpu_node(cpu, NULL); - if (!np) - return -1; + if (np) { + ret = of_get_ibm_chip_id(np); + of_node_put(np); + + if (chip_id_lookup_table) + chip_id_lookup_table[idx] = ret; + } - of_node_put(np); - return of_get_ibm_chip_id(np); + return ret; } EXPORT_SYMBOL(cpu_to_chip_id); diff --git a/arch/powerpc/kernel/prom_init.c b/arch/powerpc/kernel/prom_init.c index ccf77b985c8f..41ed7e33d897 100644 --- a/arch/powerpc/kernel/prom_init.c +++ b/arch/powerpc/kernel/prom_init.c @@ -2983,7 +2983,7 @@ static void __init fixup_device_tree_efika_add_phy(void) " 0x3 encode-int encode+" " s\" interrupts\" property" " finish-device"); - }; + } /* Check for a PHY device node - if missing then create one and * give it's phandle to the ethernet node */ diff --git a/arch/powerpc/kernel/ptrace/ptrace-view.c b/arch/powerpc/kernel/ptrace/ptrace-view.c index 6ccffc65ac97..773bcc4ca843 100644 --- a/arch/powerpc/kernel/ptrace/ptrace-view.c +++ b/arch/powerpc/kernel/ptrace/ptrace-view.c @@ -111,7 +111,7 @@ static unsigned long get_user_msr(struct task_struct *task) return task->thread.regs->msr | task->thread.fpexc_mode; } -static int set_user_msr(struct task_struct *task, unsigned long msr) +static __always_inline int set_user_msr(struct task_struct *task, unsigned long msr) { task->thread.regs->msr &= ~MSR_DEBUGCHANGE; task->thread.regs->msr |= msr & MSR_DEBUGCHANGE; @@ -147,7 +147,7 @@ static int set_user_dscr(struct task_struct *task, unsigned long dscr) * We prevent mucking around with the reserved area of trap * which are used internally by the kernel. */ -static int set_user_trap(struct task_struct *task, unsigned long trap) +static __always_inline int set_user_trap(struct task_struct *task, unsigned long trap) { set_trap(task->thread.regs, trap); return 0; @@ -221,17 +221,9 @@ static int gpr_get(struct task_struct *target, const struct user_regset *regset, #ifdef CONFIG_PPC64 struct membuf to_softe = membuf_at(&to, offsetof(struct pt_regs, softe)); #endif - int i; - if (target->thread.regs == NULL) return -EIO; - if (!FULL_REGS(target->thread.regs)) { - /* We have a partial register set. Fill 14-31 with bogus values */ - for (i = 14; i < 32; i++) - target->thread.regs->gpr[i] = NV_REG_POISON; - } - membuf_write(&to, target->thread.regs, sizeof(struct user_pt_regs)); membuf_store(&to_msr, get_user_msr(target)); @@ -252,8 +244,6 @@ static int gpr_set(struct task_struct *target, const struct user_regset *regset, if (target->thread.regs == NULL) return -EIO; - CHECK_FULL_REGS(target->thread.regs); - ret = user_regset_copyin(&pos, &count, &kbuf, &ubuf, target->thread.regs, 0, PT_MSR * sizeof(reg)); @@ -659,6 +649,9 @@ int gpr32_set_common(struct task_struct *target, const compat_ulong_t __user *u = ubuf; compat_ulong_t reg; + if (!kbuf && !user_read_access_begin(u, count)) + return -EFAULT; + pos /= sizeof(reg); count /= sizeof(reg); @@ -667,8 +660,7 @@ int gpr32_set_common(struct task_struct *target, regs[pos++] = *k++; else for (; count > 0 && pos < PT_MSR; --count) { - if (__get_user(reg, u++)) - return -EFAULT; + unsafe_get_user(reg, u++, Efault); regs[pos++] = reg; } @@ -676,8 +668,8 @@ int gpr32_set_common(struct task_struct *target, if (count > 0 && pos == PT_MSR) { if (kbuf) reg = *k++; - else if (__get_user(reg, u++)) - return -EFAULT; + else + unsafe_get_user(reg, u++, Efault); set_user_msr(target, reg); ++pos; --count; @@ -690,24 +682,24 @@ int gpr32_set_common(struct task_struct *target, ++k; } else { for (; count > 0 && pos <= PT_MAX_PUT_REG; --count) { - if (__get_user(reg, u++)) - return -EFAULT; + unsafe_get_user(reg, u++, Efault); regs[pos++] = reg; } for (; count > 0 && pos < PT_TRAP; --count, ++pos) - if (__get_user(reg, u++)) - return -EFAULT; + unsafe_get_user(reg, u++, Efault); } if (count > 0 && pos == PT_TRAP) { if (kbuf) reg = *k++; - else if (__get_user(reg, u++)) - return -EFAULT; + else + unsafe_get_user(reg, u++, Efault); set_user_trap(target, reg); ++pos; --count; } + if (!kbuf) + user_read_access_end(); kbuf = k; ubuf = u; @@ -715,25 +707,19 @@ int gpr32_set_common(struct task_struct *target, count *= sizeof(reg); return user_regset_copyin_ignore(&pos, &count, &kbuf, &ubuf, (PT_TRAP + 1) * sizeof(reg), -1); + +Efault: + user_read_access_end(); + return -EFAULT; } static int gpr32_get(struct task_struct *target, const struct user_regset *regset, struct membuf to) { - int i; - if (target->thread.regs == NULL) return -EIO; - if (!FULL_REGS(target->thread.regs)) { - /* - * We have a partial register set. - * Fill 14-31 with bogus values. - */ - for (i = 14; i < 32; i++) - target->thread.regs->gpr[i] = NV_REG_POISON; - } return gpr32_get_common(target, regset, to, &target->thread.regs->gpr[0]); } @@ -746,7 +732,6 @@ static int gpr32_set(struct task_struct *target, if (target->thread.regs == NULL) return -EIO; - CHECK_FULL_REGS(target->thread.regs); return gpr32_set_common(target, regset, pos, count, kbuf, ubuf, &target->thread.regs->gpr[0]); } diff --git a/arch/powerpc/kernel/ptrace/ptrace.c b/arch/powerpc/kernel/ptrace/ptrace.c index 4f3d4ff3728c..0a0a33eb0d28 100644 --- a/arch/powerpc/kernel/ptrace/ptrace.c +++ b/arch/powerpc/kernel/ptrace/ptrace.c @@ -59,7 +59,6 @@ long arch_ptrace(struct task_struct *child, long request, if ((addr & (sizeof(long) - 1)) || !child->thread.regs) break; - CHECK_FULL_REGS(child->thread.regs); if (index < PT_FPR0) ret = ptrace_get_reg(child, (int) index, &tmp); else @@ -81,7 +80,6 @@ long arch_ptrace(struct task_struct *child, long request, if ((addr & (sizeof(long) - 1)) || !child->thread.regs) break; - CHECK_FULL_REGS(child->thread.regs); if (index < PT_FPR0) ret = ptrace_put_reg(child, index, data); else @@ -354,8 +352,6 @@ void __init pt_regs_check(void) offsetof(struct user_pt_regs, nip)); BUILD_BUG_ON(offsetof(struct pt_regs, msr) != offsetof(struct user_pt_regs, msr)); - BUILD_BUG_ON(offsetof(struct pt_regs, msr) != - offsetof(struct user_pt_regs, msr)); BUILD_BUG_ON(offsetof(struct pt_regs, orig_gpr3) != offsetof(struct user_pt_regs, orig_gpr3)); BUILD_BUG_ON(offsetof(struct pt_regs, ctr) != diff --git a/arch/powerpc/kernel/ptrace/ptrace32.c b/arch/powerpc/kernel/ptrace/ptrace32.c index d30b9ad70edc..19c224808982 100644 --- a/arch/powerpc/kernel/ptrace/ptrace32.c +++ b/arch/powerpc/kernel/ptrace/ptrace32.c @@ -83,7 +83,6 @@ long compat_arch_ptrace(struct task_struct *child, compat_long_t request, if ((addr & 3) || (index > PT_FPSCR32)) break; - CHECK_FULL_REGS(child->thread.regs); if (index < PT_FPR0) { ret = ptrace_get_reg(child, index, &tmp); if (ret) @@ -133,7 +132,6 @@ long compat_arch_ptrace(struct task_struct *child, compat_long_t request, if ((addr & 3) || numReg > PT_FPSCR) break; - CHECK_FULL_REGS(child->thread.regs); if (numReg >= PT_FPR0) { flush_fp_to_thread(child); /* get 64 bit FPR */ @@ -187,7 +185,6 @@ long compat_arch_ptrace(struct task_struct *child, compat_long_t request, if ((addr & 3) || (index > PT_FPSCR32)) break; - CHECK_FULL_REGS(child->thread.regs); if (index < PT_FPR0) { ret = ptrace_put_reg(child, index, data); } else { @@ -226,7 +223,6 @@ long compat_arch_ptrace(struct task_struct *child, compat_long_t request, */ if ((addr & 3) || (numReg > PT_FPSCR)) break; - CHECK_FULL_REGS(child->thread.regs); if (numReg < PT_FPR0) { unsigned long freg; ret = ptrace_get_reg(child, numReg, &freg); diff --git a/arch/powerpc/kernel/rtas-proc.c b/arch/powerpc/kernel/rtas-proc.c index 2d33f342a293..6857a5b0a1c3 100644 --- a/arch/powerpc/kernel/rtas-proc.c +++ b/arch/powerpc/kernel/rtas-proc.c @@ -755,11 +755,18 @@ static int ppc_rtas_tone_volume_show(struct seq_file *m, void *v) return 0; } -#define RMO_READ_BUF_MAX 30 - -/* RTAS Userspace access */ +/** + * ppc_rtas_rmo_buf_show() - Describe RTAS-addressable region for user space. + * + * Base + size description of a range of RTAS-addressable memory set + * aside for user space to use as work area(s) for certain RTAS + * functions. User space accesses this region via /dev/mem. Apart from + * security policies, the kernel does not arbitrate or serialize + * access to this region, and user space must ensure that concurrent + * users do not interfere with each other. + */ static int ppc_rtas_rmo_buf_show(struct seq_file *m, void *v) { - seq_printf(m, "%016lx %x\n", rtas_rmo_buf, RTAS_RMOBUF_MAX); + seq_printf(m, "%016lx %x\n", rtas_rmo_buf, RTAS_USER_REGION_SIZE); return 0; } diff --git a/arch/powerpc/kernel/rtas.c b/arch/powerpc/kernel/rtas.c index d126d71ea5bd..6bada744402b 100644 --- a/arch/powerpc/kernel/rtas.c +++ b/arch/powerpc/kernel/rtas.c @@ -828,7 +828,6 @@ void rtas_activate_firmware(void) pr_err("ibm,activate-firmware failed (%i)\n", fwrc); } -static int ibm_suspend_me_token = RTAS_UNKNOWN_SERVICE; #ifdef CONFIG_PPC_PSERIES /** * rtas_call_reentrant() - Used for reentrant rtas calls @@ -988,10 +987,10 @@ static struct rtas_filter rtas_filters[] __ro_after_init = { static bool in_rmo_buf(u32 base, u32 end) { return base >= rtas_rmo_buf && - base < (rtas_rmo_buf + RTAS_RMOBUF_MAX) && + base < (rtas_rmo_buf + RTAS_USER_REGION_SIZE) && base <= end && end >= rtas_rmo_buf && - end < (rtas_rmo_buf + RTAS_RMOBUF_MAX); + end < (rtas_rmo_buf + RTAS_USER_REGION_SIZE); } static bool block_rtas_call(int token, int nargs, @@ -1052,6 +1051,14 @@ err: return true; } +static void __init rtas_syscall_filter_init(void) +{ + unsigned int i; + + for (i = 0; i < ARRAY_SIZE(rtas_filters); i++) + rtas_filters[i].token = rtas_token(rtas_filters[i].name); +} + #else static bool block_rtas_call(int token, int nargs, @@ -1060,6 +1067,10 @@ static bool block_rtas_call(int token, int nargs, return false; } +static void __init rtas_syscall_filter_init(void) +{ +} + #endif /* CONFIG_PPC_RTAS_FILTER */ /* We assume to be passed big endian arguments */ @@ -1103,7 +1114,7 @@ SYSCALL_DEFINE1(rtas, struct rtas_args __user *, uargs) return -EINVAL; /* Need to handle ibm,suspend_me call specially */ - if (token == ibm_suspend_me_token) { + if (token == rtas_token("ibm,suspend-me")) { /* * rtas_ibm_suspend_me assumes the streamid handle is in cpu @@ -1163,9 +1174,6 @@ void __init rtas_initialize(void) unsigned long rtas_region = RTAS_INSTANTIATE_MAX; u32 base, size, entry; int no_base, no_size, no_entry; -#ifdef CONFIG_PPC_RTAS_FILTER - int i; -#endif /* Get RTAS dev node and fill up our "rtas" structure with infos * about it. @@ -1191,12 +1199,10 @@ void __init rtas_initialize(void) * the stop-self token if any */ #ifdef CONFIG_PPC64 - if (firmware_has_feature(FW_FEATURE_LPAR)) { + if (firmware_has_feature(FW_FEATURE_LPAR)) rtas_region = min(ppc64_rma_size, RTAS_INSTANTIATE_MAX); - ibm_suspend_me_token = rtas_token("ibm,suspend-me"); - } #endif - rtas_rmo_buf = memblock_phys_alloc_range(RTAS_RMOBUF_MAX, PAGE_SIZE, + rtas_rmo_buf = memblock_phys_alloc_range(RTAS_USER_REGION_SIZE, PAGE_SIZE, 0, rtas_region); if (!rtas_rmo_buf) panic("ERROR: RTAS: Failed to allocate %lx bytes below %pa\n", @@ -1206,11 +1212,7 @@ void __init rtas_initialize(void) rtas_last_error_token = rtas_token("rtas-last-error"); #endif -#ifdef CONFIG_PPC_RTAS_FILTER - for (i = 0; i < ARRAY_SIZE(rtas_filters); i++) { - rtas_filters[i].token = rtas_token(rtas_filters[i].name); - } -#endif + rtas_syscall_filter_init(); } int __init early_init_dt_scan_rtas(unsigned long node, diff --git a/arch/powerpc/kernel/security.c b/arch/powerpc/kernel/security.c index e4e1a94ccf6a..0fdfcdd9d880 100644 --- a/arch/powerpc/kernel/security.c +++ b/arch/powerpc/kernel/security.c @@ -7,6 +7,7 @@ #include <linux/cpu.h> #include <linux/kernel.h> #include <linux/device.h> +#include <linux/memblock.h> #include <linux/nospec.h> #include <linux/prctl.h> #include <linux/seq_buf.h> @@ -18,6 +19,7 @@ #include <asm/setup.h> #include <asm/inst.h> +#include "setup.h" u64 powerpc_security_features __read_mostly = SEC_FTR_DEFAULT; @@ -250,7 +252,7 @@ ssize_t cpu_show_spectre_v2(struct device *dev, struct device_attribute *attr, c static enum stf_barrier_type stf_enabled_flush_types; static bool no_stf_barrier; -bool stf_barrier; +static bool stf_barrier; static int __init handle_no_stf_barrier(char *p) { @@ -541,6 +543,178 @@ void setup_count_cache_flush(void) toggle_branch_cache_flush(enable); } +static enum l1d_flush_type enabled_flush_types; +static void *l1d_flush_fallback_area; +static bool no_rfi_flush; +static bool no_entry_flush; +static bool no_uaccess_flush; +bool rfi_flush; +static bool entry_flush; +static bool uaccess_flush; +DEFINE_STATIC_KEY_FALSE(uaccess_flush_key); +EXPORT_SYMBOL(uaccess_flush_key); + +static int __init handle_no_rfi_flush(char *p) +{ + pr_info("rfi-flush: disabled on command line."); + no_rfi_flush = true; + return 0; +} +early_param("no_rfi_flush", handle_no_rfi_flush); + +static int __init handle_no_entry_flush(char *p) +{ + pr_info("entry-flush: disabled on command line."); + no_entry_flush = true; + return 0; +} +early_param("no_entry_flush", handle_no_entry_flush); + +static int __init handle_no_uaccess_flush(char *p) +{ + pr_info("uaccess-flush: disabled on command line."); + no_uaccess_flush = true; + return 0; +} +early_param("no_uaccess_flush", handle_no_uaccess_flush); + +/* + * The RFI flush is not KPTI, but because users will see doco that says to use + * nopti we hijack that option here to also disable the RFI flush. + */ +static int __init handle_no_pti(char *p) +{ + pr_info("rfi-flush: disabling due to 'nopti' on command line.\n"); + handle_no_rfi_flush(NULL); + return 0; +} +early_param("nopti", handle_no_pti); + +static void do_nothing(void *unused) +{ + /* + * We don't need to do the flush explicitly, just enter+exit kernel is + * sufficient, the RFI exit handlers will do the right thing. + */ +} + +void rfi_flush_enable(bool enable) +{ + if (enable) { + do_rfi_flush_fixups(enabled_flush_types); + on_each_cpu(do_nothing, NULL, 1); + } else + do_rfi_flush_fixups(L1D_FLUSH_NONE); + + rfi_flush = enable; +} + +static void entry_flush_enable(bool enable) +{ + if (enable) { + do_entry_flush_fixups(enabled_flush_types); + on_each_cpu(do_nothing, NULL, 1); + } else { + do_entry_flush_fixups(L1D_FLUSH_NONE); + } + + entry_flush = enable; +} + +static void uaccess_flush_enable(bool enable) +{ + if (enable) { + do_uaccess_flush_fixups(enabled_flush_types); + static_branch_enable(&uaccess_flush_key); + on_each_cpu(do_nothing, NULL, 1); + } else { + static_branch_disable(&uaccess_flush_key); + do_uaccess_flush_fixups(L1D_FLUSH_NONE); + } + + uaccess_flush = enable; +} + +static void __ref init_fallback_flush(void) +{ + u64 l1d_size, limit; + int cpu; + + /* Only allocate the fallback flush area once (at boot time). */ + if (l1d_flush_fallback_area) + return; + + l1d_size = ppc64_caches.l1d.size; + + /* + * If there is no d-cache-size property in the device tree, l1d_size + * could be zero. That leads to the loop in the asm wrapping around to + * 2^64-1, and then walking off the end of the fallback area and + * eventually causing a page fault which is fatal. Just default to + * something vaguely sane. + */ + if (!l1d_size) + l1d_size = (64 * 1024); + + limit = min(ppc64_bolted_size(), ppc64_rma_size); + + /* + * Align to L1d size, and size it at 2x L1d size, to catch possible + * hardware prefetch runoff. We don't have a recipe for load patterns to + * reliably avoid the prefetcher. + */ + l1d_flush_fallback_area = memblock_alloc_try_nid(l1d_size * 2, + l1d_size, MEMBLOCK_LOW_LIMIT, + limit, NUMA_NO_NODE); + if (!l1d_flush_fallback_area) + panic("%s: Failed to allocate %llu bytes align=0x%llx max_addr=%pa\n", + __func__, l1d_size * 2, l1d_size, &limit); + + + for_each_possible_cpu(cpu) { + struct paca_struct *paca = paca_ptrs[cpu]; + paca->rfi_flush_fallback_area = l1d_flush_fallback_area; + paca->l1d_flush_size = l1d_size; + } +} + +void setup_rfi_flush(enum l1d_flush_type types, bool enable) +{ + if (types & L1D_FLUSH_FALLBACK) { + pr_info("rfi-flush: fallback displacement flush available\n"); + init_fallback_flush(); + } + + if (types & L1D_FLUSH_ORI) + pr_info("rfi-flush: ori type flush available\n"); + + if (types & L1D_FLUSH_MTTRIG) + pr_info("rfi-flush: mttrig type flush available\n"); + + enabled_flush_types = types; + + if (!cpu_mitigations_off() && !no_rfi_flush) + rfi_flush_enable(enable); +} + +void setup_entry_flush(bool enable) +{ + if (cpu_mitigations_off()) + return; + + if (!no_entry_flush) + entry_flush_enable(enable); +} + +void setup_uaccess_flush(bool enable) +{ + if (cpu_mitigations_off()) + return; + + if (!no_uaccess_flush) + uaccess_flush_enable(enable); +} + #ifdef CONFIG_DEBUG_FS static int count_cache_flush_set(void *data, u64 val) { @@ -579,5 +753,92 @@ static __init int count_cache_flush_debugfs_init(void) return 0; } device_initcall(count_cache_flush_debugfs_init); + +static int rfi_flush_set(void *data, u64 val) +{ + bool enable; + + if (val == 1) + enable = true; + else if (val == 0) + enable = false; + else + return -EINVAL; + + /* Only do anything if we're changing state */ + if (enable != rfi_flush) + rfi_flush_enable(enable); + + return 0; +} + +static int rfi_flush_get(void *data, u64 *val) +{ + *val = rfi_flush ? 1 : 0; + return 0; +} + +DEFINE_SIMPLE_ATTRIBUTE(fops_rfi_flush, rfi_flush_get, rfi_flush_set, "%llu\n"); + +static int entry_flush_set(void *data, u64 val) +{ + bool enable; + + if (val == 1) + enable = true; + else if (val == 0) + enable = false; + else + return -EINVAL; + + /* Only do anything if we're changing state */ + if (enable != entry_flush) + entry_flush_enable(enable); + + return 0; +} + +static int entry_flush_get(void *data, u64 *val) +{ + *val = entry_flush ? 1 : 0; + return 0; +} + +DEFINE_SIMPLE_ATTRIBUTE(fops_entry_flush, entry_flush_get, entry_flush_set, "%llu\n"); + +static int uaccess_flush_set(void *data, u64 val) +{ + bool enable; + + if (val == 1) + enable = true; + else if (val == 0) + enable = false; + else + return -EINVAL; + + /* Only do anything if we're changing state */ + if (enable != uaccess_flush) + uaccess_flush_enable(enable); + + return 0; +} + +static int uaccess_flush_get(void *data, u64 *val) +{ + *val = uaccess_flush ? 1 : 0; + return 0; +} + +DEFINE_SIMPLE_ATTRIBUTE(fops_uaccess_flush, uaccess_flush_get, uaccess_flush_set, "%llu\n"); + +static __init int rfi_flush_debugfs_init(void) +{ + debugfs_create_file("rfi_flush", 0600, powerpc_debugfs_root, NULL, &fops_rfi_flush); + debugfs_create_file("entry_flush", 0600, powerpc_debugfs_root, NULL, &fops_entry_flush); + debugfs_create_file("uaccess_flush", 0600, powerpc_debugfs_root, NULL, &fops_uaccess_flush); + return 0; +} +device_initcall(rfi_flush_debugfs_init); #endif /* CONFIG_DEBUG_FS */ #endif /* CONFIG_PPC_BOOK3S_64 */ diff --git a/arch/powerpc/kernel/setup-common.c b/arch/powerpc/kernel/setup-common.c index bee984b1887b..74a98fff2c2f 100644 --- a/arch/powerpc/kernel/setup-common.c +++ b/arch/powerpc/kernel/setup-common.c @@ -69,7 +69,6 @@ #include "setup.h" #ifdef DEBUG -#include <asm/udbg.h> #define DBG(fmt...) udbg_printf(fmt) #else #define DBG(fmt...) @@ -829,7 +828,7 @@ static __init void print_system_info(void) } #ifdef CONFIG_SMP -static void smp_setup_pacas(void) +static void __init smp_setup_pacas(void) { int cpu; diff --git a/arch/powerpc/kernel/setup_32.c b/arch/powerpc/kernel/setup_32.c index 8ba49a6bf515..d7c1f92152af 100644 --- a/arch/powerpc/kernel/setup_32.c +++ b/arch/powerpc/kernel/setup_32.c @@ -164,7 +164,7 @@ void __init irqstack_early_init(void) } #ifdef CONFIG_VMAP_STACK -void *emergency_ctx[NR_CPUS] __ro_after_init; +void *emergency_ctx[NR_CPUS] __ro_after_init = {[0] = &init_stack}; void __init emergency_stack_init(void) { diff --git a/arch/powerpc/kernel/setup_64.c b/arch/powerpc/kernel/setup_64.c index 560ed8b975e7..b779d25761cf 100644 --- a/arch/powerpc/kernel/setup_64.c +++ b/arch/powerpc/kernel/setup_64.c @@ -232,10 +232,23 @@ static void cpu_ready_for_interrupts(void) * If we are not in hypervisor mode the job is done once for * the whole partition in configure_exceptions(). */ - if (cpu_has_feature(CPU_FTR_HVMODE) && - cpu_has_feature(CPU_FTR_ARCH_207S)) { + if (cpu_has_feature(CPU_FTR_HVMODE)) { unsigned long lpcr = mfspr(SPRN_LPCR); - mtspr(SPRN_LPCR, lpcr | LPCR_AIL_3); + unsigned long new_lpcr = lpcr; + + if (cpu_has_feature(CPU_FTR_ARCH_31)) { + /* P10 DD1 does not have HAIL */ + if (pvr_version_is(PVR_POWER10) && + (mfspr(SPRN_PVR) & 0xf00) == 0x100) + new_lpcr |= LPCR_AIL_3; + else + new_lpcr |= LPCR_HAIL; + } else if (cpu_has_feature(CPU_FTR_ARCH_207S)) { + new_lpcr |= LPCR_AIL_3; + } + + if (new_lpcr != lpcr) + mtspr(SPRN_LPCR, new_lpcr); } /* @@ -941,266 +954,3 @@ static int __init disable_hardlockup_detector(void) return 0; } early_initcall(disable_hardlockup_detector); - -#ifdef CONFIG_PPC_BOOK3S_64 -static enum l1d_flush_type enabled_flush_types; -static void *l1d_flush_fallback_area; -static bool no_rfi_flush; -static bool no_entry_flush; -static bool no_uaccess_flush; -bool rfi_flush; -bool entry_flush; -bool uaccess_flush; -DEFINE_STATIC_KEY_FALSE(uaccess_flush_key); -EXPORT_SYMBOL(uaccess_flush_key); - -static int __init handle_no_rfi_flush(char *p) -{ - pr_info("rfi-flush: disabled on command line."); - no_rfi_flush = true; - return 0; -} -early_param("no_rfi_flush", handle_no_rfi_flush); - -static int __init handle_no_entry_flush(char *p) -{ - pr_info("entry-flush: disabled on command line."); - no_entry_flush = true; - return 0; -} -early_param("no_entry_flush", handle_no_entry_flush); - -static int __init handle_no_uaccess_flush(char *p) -{ - pr_info("uaccess-flush: disabled on command line."); - no_uaccess_flush = true; - return 0; -} -early_param("no_uaccess_flush", handle_no_uaccess_flush); - -/* - * The RFI flush is not KPTI, but because users will see doco that says to use - * nopti we hijack that option here to also disable the RFI flush. - */ -static int __init handle_no_pti(char *p) -{ - pr_info("rfi-flush: disabling due to 'nopti' on command line.\n"); - handle_no_rfi_flush(NULL); - return 0; -} -early_param("nopti", handle_no_pti); - -static void do_nothing(void *unused) -{ - /* - * We don't need to do the flush explicitly, just enter+exit kernel is - * sufficient, the RFI exit handlers will do the right thing. - */ -} - -void rfi_flush_enable(bool enable) -{ - if (enable) { - do_rfi_flush_fixups(enabled_flush_types); - on_each_cpu(do_nothing, NULL, 1); - } else - do_rfi_flush_fixups(L1D_FLUSH_NONE); - - rfi_flush = enable; -} - -static void entry_flush_enable(bool enable) -{ - if (enable) { - do_entry_flush_fixups(enabled_flush_types); - on_each_cpu(do_nothing, NULL, 1); - } else { - do_entry_flush_fixups(L1D_FLUSH_NONE); - } - - entry_flush = enable; -} - -static void uaccess_flush_enable(bool enable) -{ - if (enable) { - do_uaccess_flush_fixups(enabled_flush_types); - static_branch_enable(&uaccess_flush_key); - on_each_cpu(do_nothing, NULL, 1); - } else { - static_branch_disable(&uaccess_flush_key); - do_uaccess_flush_fixups(L1D_FLUSH_NONE); - } - - uaccess_flush = enable; -} - -static void __ref init_fallback_flush(void) -{ - u64 l1d_size, limit; - int cpu; - - /* Only allocate the fallback flush area once (at boot time). */ - if (l1d_flush_fallback_area) - return; - - l1d_size = ppc64_caches.l1d.size; - - /* - * If there is no d-cache-size property in the device tree, l1d_size - * could be zero. That leads to the loop in the asm wrapping around to - * 2^64-1, and then walking off the end of the fallback area and - * eventually causing a page fault which is fatal. Just default to - * something vaguely sane. - */ - if (!l1d_size) - l1d_size = (64 * 1024); - - limit = min(ppc64_bolted_size(), ppc64_rma_size); - - /* - * Align to L1d size, and size it at 2x L1d size, to catch possible - * hardware prefetch runoff. We don't have a recipe for load patterns to - * reliably avoid the prefetcher. - */ - l1d_flush_fallback_area = memblock_alloc_try_nid(l1d_size * 2, - l1d_size, MEMBLOCK_LOW_LIMIT, - limit, NUMA_NO_NODE); - if (!l1d_flush_fallback_area) - panic("%s: Failed to allocate %llu bytes align=0x%llx max_addr=%pa\n", - __func__, l1d_size * 2, l1d_size, &limit); - - - for_each_possible_cpu(cpu) { - struct paca_struct *paca = paca_ptrs[cpu]; - paca->rfi_flush_fallback_area = l1d_flush_fallback_area; - paca->l1d_flush_size = l1d_size; - } -} - -void setup_rfi_flush(enum l1d_flush_type types, bool enable) -{ - if (types & L1D_FLUSH_FALLBACK) { - pr_info("rfi-flush: fallback displacement flush available\n"); - init_fallback_flush(); - } - - if (types & L1D_FLUSH_ORI) - pr_info("rfi-flush: ori type flush available\n"); - - if (types & L1D_FLUSH_MTTRIG) - pr_info("rfi-flush: mttrig type flush available\n"); - - enabled_flush_types = types; - - if (!cpu_mitigations_off() && !no_rfi_flush) - rfi_flush_enable(enable); -} - -void setup_entry_flush(bool enable) -{ - if (cpu_mitigations_off()) - return; - - if (!no_entry_flush) - entry_flush_enable(enable); -} - -void setup_uaccess_flush(bool enable) -{ - if (cpu_mitigations_off()) - return; - - if (!no_uaccess_flush) - uaccess_flush_enable(enable); -} - -#ifdef CONFIG_DEBUG_FS -static int rfi_flush_set(void *data, u64 val) -{ - bool enable; - - if (val == 1) - enable = true; - else if (val == 0) - enable = false; - else - return -EINVAL; - - /* Only do anything if we're changing state */ - if (enable != rfi_flush) - rfi_flush_enable(enable); - - return 0; -} - -static int rfi_flush_get(void *data, u64 *val) -{ - *val = rfi_flush ? 1 : 0; - return 0; -} - -DEFINE_SIMPLE_ATTRIBUTE(fops_rfi_flush, rfi_flush_get, rfi_flush_set, "%llu\n"); - -static int entry_flush_set(void *data, u64 val) -{ - bool enable; - - if (val == 1) - enable = true; - else if (val == 0) - enable = false; - else - return -EINVAL; - - /* Only do anything if we're changing state */ - if (enable != entry_flush) - entry_flush_enable(enable); - - return 0; -} - -static int entry_flush_get(void *data, u64 *val) -{ - *val = entry_flush ? 1 : 0; - return 0; -} - -DEFINE_SIMPLE_ATTRIBUTE(fops_entry_flush, entry_flush_get, entry_flush_set, "%llu\n"); - -static int uaccess_flush_set(void *data, u64 val) -{ - bool enable; - - if (val == 1) - enable = true; - else if (val == 0) - enable = false; - else - return -EINVAL; - - /* Only do anything if we're changing state */ - if (enable != uaccess_flush) - uaccess_flush_enable(enable); - - return 0; -} - -static int uaccess_flush_get(void *data, u64 *val) -{ - *val = uaccess_flush ? 1 : 0; - return 0; -} - -DEFINE_SIMPLE_ATTRIBUTE(fops_uaccess_flush, uaccess_flush_get, uaccess_flush_set, "%llu\n"); - -static __init int rfi_flush_debugfs_init(void) -{ - debugfs_create_file("rfi_flush", 0600, powerpc_debugfs_root, NULL, &fops_rfi_flush); - debugfs_create_file("entry_flush", 0600, powerpc_debugfs_root, NULL, &fops_entry_flush); - debugfs_create_file("uaccess_flush", 0600, powerpc_debugfs_root, NULL, &fops_uaccess_flush); - return 0; -} -device_initcall(rfi_flush_debugfs_init); -#endif -#endif /* CONFIG_PPC_BOOK3S_64 */ diff --git a/arch/powerpc/kernel/signal.h b/arch/powerpc/kernel/signal.h index 2559a681536e..f4aafa337c2e 100644 --- a/arch/powerpc/kernel/signal.h +++ b/arch/powerpc/kernel/signal.h @@ -19,6 +19,15 @@ extern int handle_signal32(struct ksignal *ksig, sigset_t *oldset, extern int handle_rt_signal32(struct ksignal *ksig, sigset_t *oldset, struct task_struct *tsk); +static inline int __get_user_sigset(sigset_t *dst, const sigset_t __user *src) +{ + BUILD_BUG_ON(sizeof(sigset_t) != sizeof(u64)); + + return __get_user(dst->sig[0], (u64 __user *)&src->sig[0]); +} +#define unsafe_get_user_sigset(dst, src, label) \ + unsafe_get_user((dst)->sig[0], (u64 __user *)&(src)->sig[0], label) + #ifdef CONFIG_VSX extern unsigned long copy_vsx_to_user(void __user *to, struct task_struct *task); @@ -53,6 +62,26 @@ unsigned long copy_ckfpr_from_user(struct task_struct *task, void __user *from); &buf[i], label);\ } while (0) +#define unsafe_copy_fpr_from_user(task, from, label) do { \ + struct task_struct *__t = task; \ + u64 __user *buf = (u64 __user *)from; \ + int i; \ + \ + for (i = 0; i < ELF_NFPREG - 1; i++) \ + unsafe_get_user(__t->thread.TS_FPR(i), &buf[i], label); \ + unsafe_get_user(__t->thread.fp_state.fpscr, &buf[i], label); \ +} while (0) + +#define unsafe_copy_vsx_from_user(task, from, label) do { \ + struct task_struct *__t = task; \ + u64 __user *buf = (u64 __user *)from; \ + int i; \ + \ + for (i = 0; i < ELF_NVSRHALFREG ; i++) \ + unsafe_get_user(__t->thread.fp_state.fpr[i][TS_VSRLOWOFFSET], \ + &buf[i], label); \ +} while (0) + #ifdef CONFIG_PPC_TRANSACTIONAL_MEM #define unsafe_copy_ckfpr_to_user(to, task, label) do { \ struct task_struct *__t = task; \ @@ -73,6 +102,26 @@ unsigned long copy_ckfpr_from_user(struct task_struct *task, void __user *from); unsafe_put_user(__t->thread.ckfp_state.fpr[i][TS_VSRLOWOFFSET], \ &buf[i], label);\ } while (0) + +#define unsafe_copy_ckfpr_from_user(task, from, label) do { \ + struct task_struct *__t = task; \ + u64 __user *buf = (u64 __user *)from; \ + int i; \ + \ + for (i = 0; i < ELF_NFPREG - 1 ; i++) \ + unsafe_get_user(__t->thread.TS_CKFPR(i), &buf[i], label);\ + unsafe_get_user(__t->thread.ckfp_state.fpscr, &buf[i], failed); \ +} while (0) + +#define unsafe_copy_ckvsx_from_user(task, from, label) do { \ + struct task_struct *__t = task; \ + u64 __user *buf = (u64 __user *)from; \ + int i; \ + \ + for (i = 0; i < ELF_NVSRHALFREG ; i++) \ + unsafe_get_user(__t->thread.ckfp_state.fpr[i][TS_VSRLOWOFFSET], \ + &buf[i], label); \ +} while (0) #endif #elif defined(CONFIG_PPC_FPU_REGS) @@ -80,6 +129,10 @@ unsigned long copy_ckfpr_from_user(struct task_struct *task, void __user *from); unsafe_copy_to_user(to, (task)->thread.fp_state.fpr, \ ELF_NFPREG * sizeof(double), label) +#define unsafe_copy_fpr_from_user(task, from, label) \ + unsafe_copy_from_user((task)->thread.fp_state.fpr, from, \ + ELF_NFPREG * sizeof(double), label) + static inline unsigned long copy_fpr_to_user(void __user *to, struct task_struct *task) { @@ -115,6 +168,8 @@ copy_ckfpr_from_user(struct task_struct *task, void __user *from) #else #define unsafe_copy_fpr_to_user(to, task, label) do { } while (0) +#define unsafe_copy_fpr_from_user(task, from, label) do { } while (0) + static inline unsigned long copy_fpr_to_user(void __user *to, struct task_struct *task) { diff --git a/arch/powerpc/kernel/signal_32.c b/arch/powerpc/kernel/signal_32.c index f651b992fe01..8f05ed0da292 100644 --- a/arch/powerpc/kernel/signal_32.c +++ b/arch/powerpc/kernel/signal_32.c @@ -83,24 +83,17 @@ * implementation that makes things simple for little endian only) */ #define unsafe_put_sigset_t unsafe_put_compat_sigset - -static inline int get_sigset_t(sigset_t *set, - const compat_sigset_t __user *uset) -{ - return get_compat_sigset(set, uset); -} +#define unsafe_get_sigset_t unsafe_get_compat_sigset #define to_user_ptr(p) ptr_to_compat(p) #define from_user_ptr(p) compat_ptr(p) static __always_inline int -save_general_regs_unsafe(struct pt_regs *regs, struct mcontext __user *frame) +__unsafe_save_general_regs(struct pt_regs *regs, struct mcontext __user *frame) { elf_greg_t64 *gregs = (elf_greg_t64 *)regs; int val, i; - WARN_ON(!FULL_REGS(regs)); - for (i = 0; i <= PT_RESULT; i ++) { /* Force usr to alway see softe as 1 (interrupts enabled) */ if (i == PT_SOFTE) @@ -116,8 +109,8 @@ failed: return 1; } -static inline int restore_general_regs(struct pt_regs *regs, - struct mcontext __user *sr) +static __always_inline int +__unsafe_restore_general_regs(struct pt_regs *regs, struct mcontext __user *sr) { elf_greg_t64 *gregs = (elf_greg_t64 *)regs; int i; @@ -125,10 +118,12 @@ static inline int restore_general_regs(struct pt_regs *regs, for (i = 0; i <= PT_RESULT; i++) { if ((i == PT_MSR) || (i == PT_SOFTE)) continue; - if (__get_user(gregs[i], &sr->mc_gregs[i])) - return -EFAULT; + unsafe_get_user(gregs[i], &sr->mc_gregs[i], failed); } return 0; + +failed: + return 1; } #else /* CONFIG_PPC64 */ @@ -142,18 +137,14 @@ static inline int restore_general_regs(struct pt_regs *regs, unsafe_copy_to_user(__us, __s, sizeof(*__us), label); \ } while (0) -static inline int get_sigset_t(sigset_t *set, const sigset_t __user *uset) -{ - return copy_from_user(set, uset, sizeof(*uset)); -} +#define unsafe_get_sigset_t unsafe_get_user_sigset #define to_user_ptr(p) ((unsigned long)(p)) #define from_user_ptr(p) ((void __user *)(p)) static __always_inline int -save_general_regs_unsafe(struct pt_regs *regs, struct mcontext __user *frame) +__unsafe_save_general_regs(struct pt_regs *regs, struct mcontext __user *frame) { - WARN_ON(!FULL_REGS(regs)); unsafe_copy_to_user(&frame->mc_gregs, regs, GP_REGS_SIZE, failed); return 0; @@ -161,23 +152,30 @@ failed: return 1; } -static inline int restore_general_regs(struct pt_regs *regs, - struct mcontext __user *sr) +static __always_inline +int __unsafe_restore_general_regs(struct pt_regs *regs, struct mcontext __user *sr) { /* copy up to but not including MSR */ - if (__copy_from_user(regs, &sr->mc_gregs, - PT_MSR * sizeof(elf_greg_t))) - return -EFAULT; + unsafe_copy_from_user(regs, &sr->mc_gregs, PT_MSR * sizeof(elf_greg_t), failed); + /* copy from orig_r3 (the word after the MSR) up to the end */ - if (__copy_from_user(®s->orig_gpr3, &sr->mc_gregs[PT_ORIG_R3], - GP_REGS_SIZE - PT_ORIG_R3 * sizeof(elf_greg_t))) - return -EFAULT; + unsafe_copy_from_user(®s->orig_gpr3, &sr->mc_gregs[PT_ORIG_R3], + GP_REGS_SIZE - PT_ORIG_R3 * sizeof(elf_greg_t), failed); + return 0; + +failed: + return 1; } #endif #define unsafe_save_general_regs(regs, frame, label) do { \ - if (save_general_regs_unsafe(regs, frame)) \ + if (__unsafe_save_general_regs(regs, frame)) \ + goto label; \ +} while (0) + +#define unsafe_restore_general_regs(regs, frame, label) do { \ + if (__unsafe_restore_general_regs(regs, frame)) \ goto label; \ } while (0) @@ -260,8 +258,8 @@ static void prepare_save_user_regs(int ctx_has_vsx_region) #endif } -static int save_user_regs_unsafe(struct pt_regs *regs, struct mcontext __user *frame, - struct mcontext __user *tm_frame, int ctx_has_vsx_region) +static int __unsafe_save_user_regs(struct pt_regs *regs, struct mcontext __user *frame, + struct mcontext __user *tm_frame, int ctx_has_vsx_region) { unsigned long msr = regs->msr; @@ -338,7 +336,7 @@ failed: } #define unsafe_save_user_regs(regs, frame, tm_frame, has_vsx, label) do { \ - if (save_user_regs_unsafe(regs, frame, tm_frame, has_vsx)) \ + if (__unsafe_save_user_regs(regs, frame, tm_frame, has_vsx)) \ goto label; \ } while (0) @@ -350,7 +348,7 @@ failed: * We also save the transactional registers to a second ucontext in the * frame. * - * See save_user_regs_unsafe() and signal_64.c:setup_tm_sigcontexts(). + * See __unsafe_save_user_regs() and signal_64.c:setup_tm_sigcontexts(). */ static void prepare_save_tm_user_regs(void) { @@ -441,7 +439,7 @@ static int save_tm_user_regs_unsafe(struct pt_regs *regs, struct mcontext __user #endif /* CONFIG_VSX */ #ifdef CONFIG_SPE /* SPE regs are not checkpointed with TM, so this section is - * simply the same as in save_user_regs_unsafe(). + * simply the same as in __unsafe_save_user_regs(). */ if (current->thread.used_spe) { unsafe_copy_to_user(&frame->mc_vregs, current->thread.evr, @@ -485,26 +483,25 @@ static int save_tm_user_regs_unsafe(struct pt_regs *regs, struct mcontext __user static long restore_user_regs(struct pt_regs *regs, struct mcontext __user *sr, int sig) { - long err; unsigned int save_r2 = 0; unsigned long msr; #ifdef CONFIG_VSX int i; #endif + if (!user_read_access_begin(sr, sizeof(*sr))) + return 1; /* * restore general registers but not including MSR or SOFTE. Also * take care of keeping r2 (TLS) intact if not a signal */ if (!sig) save_r2 = (unsigned int)regs->gpr[2]; - err = restore_general_regs(regs, sr); + unsafe_restore_general_regs(regs, sr, failed); set_trap_norestart(regs); - err |= __get_user(msr, &sr->mc_gregs[PT_MSR]); + unsafe_get_user(msr, &sr->mc_gregs[PT_MSR], failed); if (!sig) regs->gpr[2] = (unsigned long) save_r2; - if (err) - return 1; /* if doing signal return, restore the previous little-endian mode */ if (sig) @@ -518,22 +515,19 @@ static long restore_user_regs(struct pt_regs *regs, regs->msr &= ~MSR_VEC; if (msr & MSR_VEC) { /* restore altivec registers from the stack */ - if (__copy_from_user(¤t->thread.vr_state, &sr->mc_vregs, - sizeof(sr->mc_vregs))) - return 1; + unsafe_copy_from_user(¤t->thread.vr_state, &sr->mc_vregs, + sizeof(sr->mc_vregs), failed); current->thread.used_vr = true; } else if (current->thread.used_vr) memset(¤t->thread.vr_state, 0, ELF_NVRREG * sizeof(vector128)); /* Always get VRSAVE back */ - if (__get_user(current->thread.vrsave, (u32 __user *)&sr->mc_vregs[32])) - return 1; + unsafe_get_user(current->thread.vrsave, (u32 __user *)&sr->mc_vregs[32], failed); if (cpu_has_feature(CPU_FTR_ALTIVEC)) mtspr(SPRN_VRSAVE, current->thread.vrsave); #endif /* CONFIG_ALTIVEC */ - if (copy_fpr_from_user(current, &sr->mc_fregs)) - return 1; + unsafe_copy_fpr_from_user(current, &sr->mc_fregs, failed); #ifdef CONFIG_VSX /* @@ -546,8 +540,7 @@ static long restore_user_regs(struct pt_regs *regs, * Restore altivec registers from the stack to a local * buffer, then write this out to the thread_struct */ - if (copy_vsx_from_user(current, &sr->mc_vsregs)) - return 1; + unsafe_copy_vsx_from_user(current, &sr->mc_vsregs, failed); current->thread.used_vsr = true; } else if (current->thread.used_vsr) for (i = 0; i < 32 ; i++) @@ -565,19 +558,22 @@ static long restore_user_regs(struct pt_regs *regs, regs->msr &= ~MSR_SPE; if (msr & MSR_SPE) { /* restore spe registers from the stack */ - if (__copy_from_user(current->thread.evr, &sr->mc_vregs, - ELF_NEVRREG * sizeof(u32))) - return 1; + unsafe_copy_from_user(current->thread.evr, &sr->mc_vregs, + ELF_NEVRREG * sizeof(u32), failed); current->thread.used_spe = true; } else if (current->thread.used_spe) memset(current->thread.evr, 0, ELF_NEVRREG * sizeof(u32)); /* Always get SPEFSCR back */ - if (__get_user(current->thread.spefscr, (u32 __user *)&sr->mc_vregs + ELF_NEVRREG)) - return 1; + unsafe_get_user(current->thread.spefscr, (u32 __user *)&sr->mc_vregs + ELF_NEVRREG, failed); #endif /* CONFIG_SPE */ + user_read_access_end(); return 0; + +failed: + user_read_access_end(); + return 1; } #ifdef CONFIG_PPC_TRANSACTIONAL_MEM @@ -590,7 +586,6 @@ static long restore_tm_user_regs(struct pt_regs *regs, struct mcontext __user *sr, struct mcontext __user *tm_sr) { - long err; unsigned long msr, msr_hi; #ifdef CONFIG_VSX int i; @@ -605,15 +600,13 @@ static long restore_tm_user_regs(struct pt_regs *regs, * TFHAR is restored from the checkpointed NIP; TEXASR and TFIAR * were set by the signal delivery. */ - err = restore_general_regs(regs, tm_sr); - err |= restore_general_regs(¤t->thread.ckpt_regs, sr); - - err |= __get_user(current->thread.tm_tfhar, &sr->mc_gregs[PT_NIP]); - - err |= __get_user(msr, &sr->mc_gregs[PT_MSR]); - if (err) + if (!user_read_access_begin(sr, sizeof(*sr))) return 1; + unsafe_restore_general_regs(¤t->thread.ckpt_regs, sr, failed); + unsafe_get_user(current->thread.tm_tfhar, &sr->mc_gregs[PT_NIP], failed); + unsafe_get_user(msr, &sr->mc_gregs[PT_MSR], failed); + /* Restore the previous little-endian mode */ regs->msr = (regs->msr & ~MSR_LE) | (msr & MSR_LE); @@ -621,12 +614,8 @@ static long restore_tm_user_regs(struct pt_regs *regs, regs->msr &= ~MSR_VEC; if (msr & MSR_VEC) { /* restore altivec registers from the stack */ - if (__copy_from_user(¤t->thread.ckvr_state, &sr->mc_vregs, - sizeof(sr->mc_vregs)) || - __copy_from_user(¤t->thread.vr_state, - &tm_sr->mc_vregs, - sizeof(sr->mc_vregs))) - return 1; + unsafe_copy_from_user(¤t->thread.ckvr_state, &sr->mc_vregs, + sizeof(sr->mc_vregs), failed); current->thread.used_vr = true; } else if (current->thread.used_vr) { memset(¤t->thread.vr_state, 0, @@ -636,20 +625,15 @@ static long restore_tm_user_regs(struct pt_regs *regs, } /* Always get VRSAVE back */ - if (__get_user(current->thread.ckvrsave, - (u32 __user *)&sr->mc_vregs[32]) || - __get_user(current->thread.vrsave, - (u32 __user *)&tm_sr->mc_vregs[32])) - return 1; + unsafe_get_user(current->thread.ckvrsave, + (u32 __user *)&sr->mc_vregs[32], failed); if (cpu_has_feature(CPU_FTR_ALTIVEC)) mtspr(SPRN_VRSAVE, current->thread.ckvrsave); #endif /* CONFIG_ALTIVEC */ regs->msr &= ~(MSR_FP | MSR_FE0 | MSR_FE1); - if (copy_fpr_from_user(current, &sr->mc_fregs) || - copy_ckfpr_from_user(current, &tm_sr->mc_fregs)) - return 1; + unsafe_copy_fpr_from_user(current, &sr->mc_fregs, failed); #ifdef CONFIG_VSX regs->msr &= ~MSR_VSX; @@ -658,9 +642,7 @@ static long restore_tm_user_regs(struct pt_regs *regs, * Restore altivec registers from the stack to a local * buffer, then write this out to the thread_struct */ - if (copy_vsx_from_user(current, &tm_sr->mc_vsregs) || - copy_ckvsx_from_user(current, &sr->mc_vsregs)) - return 1; + unsafe_copy_ckvsx_from_user(current, &sr->mc_vsregs, failed); current->thread.used_vsr = true; } else if (current->thread.used_vsr) for (i = 0; i < 32 ; i++) { @@ -675,23 +657,54 @@ static long restore_tm_user_regs(struct pt_regs *regs, */ regs->msr &= ~MSR_SPE; if (msr & MSR_SPE) { - if (__copy_from_user(current->thread.evr, &sr->mc_vregs, - ELF_NEVRREG * sizeof(u32))) - return 1; + unsafe_copy_from_user(current->thread.evr, &sr->mc_vregs, + ELF_NEVRREG * sizeof(u32), failed); current->thread.used_spe = true; } else if (current->thread.used_spe) memset(current->thread.evr, 0, ELF_NEVRREG * sizeof(u32)); /* Always get SPEFSCR back */ - if (__get_user(current->thread.spefscr, (u32 __user *)&sr->mc_vregs - + ELF_NEVRREG)) - return 1; + unsafe_get_user(current->thread.spefscr, + (u32 __user *)&sr->mc_vregs + ELF_NEVRREG, failed); #endif /* CONFIG_SPE */ - /* Get the top half of the MSR from the user context */ - if (__get_user(msr_hi, &tm_sr->mc_gregs[PT_MSR])) + user_read_access_end(); + + if (!user_read_access_begin(tm_sr, sizeof(*tm_sr))) return 1; + + unsafe_restore_general_regs(regs, tm_sr, failed); + +#ifdef CONFIG_ALTIVEC + /* restore altivec registers from the stack */ + if (msr & MSR_VEC) + unsafe_copy_from_user(¤t->thread.vr_state, &tm_sr->mc_vregs, + sizeof(sr->mc_vregs), failed); + + /* Always get VRSAVE back */ + unsafe_get_user(current->thread.vrsave, + (u32 __user *)&tm_sr->mc_vregs[32], failed); +#endif /* CONFIG_ALTIVEC */ + + unsafe_copy_ckfpr_from_user(current, &tm_sr->mc_fregs, failed); + +#ifdef CONFIG_VSX + if (msr & MSR_VSX) { + /* + * Restore altivec registers from the stack to a local + * buffer, then write this out to the thread_struct + */ + unsafe_copy_vsx_from_user(current, &tm_sr->mc_vsregs, failed); + current->thread.used_vsr = true; + } +#endif /* CONFIG_VSX */ + + /* Get the top half of the MSR from the user context */ + unsafe_get_user(msr_hi, &tm_sr->mc_gregs[PT_MSR], failed); msr_hi <<= 32; + + user_read_access_end(); + /* If TM bits are set to the reserved value, it's an invalid context */ if (MSR_TM_RESV(msr_hi)) return 1; @@ -739,6 +752,16 @@ static long restore_tm_user_regs(struct pt_regs *regs, preempt_enable(); return 0; + +failed: + user_read_access_end(); + return 1; +} +#else +static long restore_tm_user_regs(struct pt_regs *regs, struct mcontext __user *sr, + struct mcontext __user *tm_sr) +{ + return 0; } #endif @@ -944,28 +967,31 @@ static int do_setcontext(struct ucontext __user *ucp, struct pt_regs *regs, int sigset_t set; struct mcontext __user *mcp; - if (get_sigset_t(&set, &ucp->uc_sigmask)) + if (!user_read_access_begin(ucp, sizeof(*ucp))) return -EFAULT; + + unsafe_get_sigset_t(&set, &ucp->uc_sigmask, failed); #ifdef CONFIG_PPC64 { u32 cmcp; - if (__get_user(cmcp, &ucp->uc_regs)) - return -EFAULT; + unsafe_get_user(cmcp, &ucp->uc_regs, failed); mcp = (struct mcontext __user *)(u64)cmcp; - /* no need to check access_ok(mcp), since mcp < 4GB */ } #else - if (__get_user(mcp, &ucp->uc_regs)) - return -EFAULT; - if (!access_ok(mcp, sizeof(*mcp))) - return -EFAULT; + unsafe_get_user(mcp, &ucp->uc_regs, failed); #endif + user_read_access_end(); + set_current_blocked(&set); if (restore_user_regs(regs, mcp, sig)) return -EFAULT; return 0; + +failed: + user_read_access_end(); + return -EFAULT; } #ifdef CONFIG_PPC_TRANSACTIONAL_MEM @@ -979,11 +1005,15 @@ static int do_setcontext_tm(struct ucontext __user *ucp, u32 cmcp; u32 tm_cmcp; - if (get_sigset_t(&set, &ucp->uc_sigmask)) + if (!user_read_access_begin(ucp, sizeof(*ucp))) return -EFAULT; - if (__get_user(cmcp, &ucp->uc_regs) || - __get_user(tm_cmcp, &tm_ucp->uc_regs)) + unsafe_get_sigset_t(&set, &ucp->uc_sigmask, failed); + unsafe_get_user(cmcp, &ucp->uc_regs, failed); + + user_read_access_end(); + + if (__get_user(tm_cmcp, &tm_ucp->uc_regs)) return -EFAULT; mcp = (struct mcontext __user *)(u64)cmcp; tm_mcp = (struct mcontext __user *)(u64)tm_cmcp; @@ -994,6 +1024,10 @@ static int do_setcontext_tm(struct ucontext __user *ucp, return -EFAULT; return 0; + +failed: + user_read_access_end(); + return -EFAULT; } #endif @@ -1311,19 +1345,16 @@ SYSCALL_DEFINE0(sigreturn) struct sigcontext __user *sc; struct sigcontext sigctx; struct mcontext __user *sr; - void __user *addr; sigset_t set; -#ifdef CONFIG_PPC_TRANSACTIONAL_MEM - struct mcontext __user *mcp, *tm_mcp; - unsigned long msr_hi; -#endif + struct mcontext __user *mcp; + struct mcontext __user *tm_mcp = NULL; + unsigned long long msr_hi = 0; /* Always make any pending restarted system calls return -EINTR */ current->restart_block.fn = do_no_restart_syscall; sf = (struct sigframe __user *)(regs->gpr[1] + __SIGNAL_FRAMESIZE); sc = &sf->sctx; - addr = sc; if (copy_from_user(&sigctx, sc, sizeof(sigctx))) goto badframe; @@ -1339,31 +1370,32 @@ SYSCALL_DEFINE0(sigreturn) #endif set_current_blocked(&set); -#ifdef CONFIG_PPC_TRANSACTIONAL_MEM mcp = (struct mcontext __user *)&sf->mctx; +#ifdef CONFIG_PPC_TRANSACTIONAL_MEM tm_mcp = (struct mcontext __user *)&sf->mctx_transact; if (__get_user(msr_hi, &tm_mcp->mc_gregs[PT_MSR])) goto badframe; +#endif if (MSR_TM_ACTIVE(msr_hi<<32)) { if (!cpu_has_feature(CPU_FTR_TM)) goto badframe; if (restore_tm_user_regs(regs, mcp, tm_mcp)) goto badframe; - } else -#endif - { + } else { sr = (struct mcontext __user *)from_user_ptr(sigctx.regs); - addr = sr; - if (!access_ok(sr, sizeof(*sr)) - || restore_user_regs(regs, sr, 1)) - goto badframe; + if (restore_user_regs(regs, sr, 1)) { + signal_fault(current, regs, "sys_sigreturn", sr); + + force_sig(SIGSEGV); + return 0; + } } set_thread_flag(TIF_RESTOREALL); return 0; badframe: - signal_fault(current, regs, "sys_sigreturn", addr); + signal_fault(current, regs, "sys_sigreturn", sc); force_sig(SIGSEGV); return 0; diff --git a/arch/powerpc/kernel/signal_64.c b/arch/powerpc/kernel/signal_64.c index f9e4a1ac440f..dca66481d0c2 100644 --- a/arch/powerpc/kernel/signal_64.c +++ b/arch/powerpc/kernel/signal_64.c @@ -79,13 +79,36 @@ static elf_vrreg_t __user *sigcontext_vmx_regs(struct sigcontext __user *sc) } #endif +static void prepare_setup_sigcontext(struct task_struct *tsk) +{ +#ifdef CONFIG_ALTIVEC + /* save altivec registers */ + if (tsk->thread.used_vr) + flush_altivec_to_thread(tsk); + if (cpu_has_feature(CPU_FTR_ALTIVEC)) + tsk->thread.vrsave = mfspr(SPRN_VRSAVE); +#endif /* CONFIG_ALTIVEC */ + + flush_fp_to_thread(tsk); + +#ifdef CONFIG_VSX + if (tsk->thread.used_vsr) + flush_vsx_to_thread(tsk); +#endif /* CONFIG_VSX */ +} + /* * Set up the sigcontext for the signal frame. */ -static long setup_sigcontext(struct sigcontext __user *sc, - struct task_struct *tsk, int signr, sigset_t *set, - unsigned long handler, int ctx_has_vsx_region) +#define unsafe_setup_sigcontext(sc, tsk, signr, set, handler, ctx_has_vsx_region, label)\ +do { \ + if (__unsafe_setup_sigcontext(sc, tsk, signr, set, handler, ctx_has_vsx_region))\ + goto label; \ +} while (0) +static long notrace __unsafe_setup_sigcontext(struct sigcontext __user *sc, + struct task_struct *tsk, int signr, sigset_t *set, + unsigned long handler, int ctx_has_vsx_region) { /* When CONFIG_ALTIVEC is set, we _always_ setup v_regs even if the * process never used altivec yet (MSR_VEC is zero in pt_regs of @@ -97,25 +120,22 @@ static long setup_sigcontext(struct sigcontext __user *sc, */ #ifdef CONFIG_ALTIVEC elf_vrreg_t __user *v_regs = sigcontext_vmx_regs(sc); - unsigned long vrsave; #endif struct pt_regs *regs = tsk->thread.regs; unsigned long msr = regs->msr; - long err = 0; /* Force usr to alway see softe as 1 (interrupts enabled) */ unsigned long softe = 0x1; BUG_ON(tsk != current); #ifdef CONFIG_ALTIVEC - err |= __put_user(v_regs, &sc->v_regs); + unsafe_put_user(v_regs, &sc->v_regs, efault_out); /* save altivec registers */ if (tsk->thread.used_vr) { - flush_altivec_to_thread(tsk); /* Copy 33 vec registers (vr0..31 and vscr) to the stack */ - err |= __copy_to_user(v_regs, &tsk->thread.vr_state, - 33 * sizeof(vector128)); + unsafe_copy_to_user(v_regs, &tsk->thread.vr_state, + 33 * sizeof(vector128), efault_out); /* set MSR_VEC in the MSR value in the frame to indicate that sc->v_reg) * contains valid data. */ @@ -124,19 +144,12 @@ static long setup_sigcontext(struct sigcontext __user *sc, /* We always copy to/from vrsave, it's 0 if we don't have or don't * use altivec. */ - vrsave = 0; - if (cpu_has_feature(CPU_FTR_ALTIVEC)) { - vrsave = mfspr(SPRN_VRSAVE); - tsk->thread.vrsave = vrsave; - } - - err |= __put_user(vrsave, (u32 __user *)&v_regs[33]); + unsafe_put_user(tsk->thread.vrsave, (u32 __user *)&v_regs[33], efault_out); #else /* CONFIG_ALTIVEC */ - err |= __put_user(0, &sc->v_regs); + unsafe_put_user(0, &sc->v_regs, efault_out); #endif /* CONFIG_ALTIVEC */ - flush_fp_to_thread(tsk); /* copy fpr regs and fpscr */ - err |= copy_fpr_to_user(&sc->fp_regs, tsk); + unsafe_copy_fpr_to_user(&sc->fp_regs, tsk, efault_out); /* * Clear the MSR VSX bit to indicate there is no valid state attached @@ -150,26 +163,27 @@ static long setup_sigcontext(struct sigcontext __user *sc, * VMX data. */ if (tsk->thread.used_vsr && ctx_has_vsx_region) { - flush_vsx_to_thread(tsk); v_regs += ELF_NVRREG; - err |= copy_vsx_to_user(v_regs, tsk); + unsafe_copy_vsx_to_user(v_regs, tsk, efault_out); /* set MSR_VSX in the MSR value in the frame to * indicate that sc->vs_reg) contains valid data. */ msr |= MSR_VSX; } #endif /* CONFIG_VSX */ - err |= __put_user(&sc->gp_regs, &sc->regs); - WARN_ON(!FULL_REGS(regs)); - err |= __copy_to_user(&sc->gp_regs, regs, GP_REGS_SIZE); - err |= __put_user(msr, &sc->gp_regs[PT_MSR]); - err |= __put_user(softe, &sc->gp_regs[PT_SOFTE]); - err |= __put_user(signr, &sc->signal); - err |= __put_user(handler, &sc->handler); + unsafe_put_user(&sc->gp_regs, &sc->regs, efault_out); + unsafe_copy_to_user(&sc->gp_regs, regs, GP_REGS_SIZE, efault_out); + unsafe_put_user(msr, &sc->gp_regs[PT_MSR], efault_out); + unsafe_put_user(softe, &sc->gp_regs[PT_SOFTE], efault_out); + unsafe_put_user(signr, &sc->signal, efault_out); + unsafe_put_user(handler, &sc->handler, efault_out); if (set != NULL) - err |= __put_user(set->sig[0], &sc->oldmask); + unsafe_put_user(set->sig[0], &sc->oldmask, efault_out); - return err; + return 0; + +efault_out: + return -EFAULT; } #ifdef CONFIG_PPC_TRANSACTIONAL_MEM @@ -294,7 +308,6 @@ static long setup_tm_sigcontexts(struct sigcontext __user *sc, err |= __put_user(&sc->gp_regs, &sc->regs); err |= __put_user(&tm_sc->gp_regs, &tm_sc->regs); - WARN_ON(!FULL_REGS(regs)); err |= __copy_to_user(&tm_sc->gp_regs, regs, GP_REGS_SIZE); err |= __copy_to_user(&sc->gp_regs, &tsk->thread.ckpt_regs, GP_REGS_SIZE); @@ -312,14 +325,16 @@ static long setup_tm_sigcontexts(struct sigcontext __user *sc, /* * Restore the sigcontext from the signal frame. */ - -static long restore_sigcontext(struct task_struct *tsk, sigset_t *set, int sig, - struct sigcontext __user *sc) +#define unsafe_restore_sigcontext(tsk, set, sig, sc, label) do { \ + if (__unsafe_restore_sigcontext(tsk, set, sig, sc)) \ + goto label; \ +} while (0) +static long notrace __unsafe_restore_sigcontext(struct task_struct *tsk, sigset_t *set, + int sig, struct sigcontext __user *sc) { #ifdef CONFIG_ALTIVEC elf_vrreg_t __user *v_regs; #endif - unsigned long err = 0; unsigned long save_r13 = 0; unsigned long msr; struct pt_regs *regs = tsk->thread.regs; @@ -334,27 +349,27 @@ static long restore_sigcontext(struct task_struct *tsk, sigset_t *set, int sig, save_r13 = regs->gpr[13]; /* copy the GPRs */ - err |= __copy_from_user(regs->gpr, sc->gp_regs, sizeof(regs->gpr)); - err |= __get_user(regs->nip, &sc->gp_regs[PT_NIP]); + unsafe_copy_from_user(regs->gpr, sc->gp_regs, sizeof(regs->gpr), efault_out); + unsafe_get_user(regs->nip, &sc->gp_regs[PT_NIP], efault_out); /* get MSR separately, transfer the LE bit if doing signal return */ - err |= __get_user(msr, &sc->gp_regs[PT_MSR]); + unsafe_get_user(msr, &sc->gp_regs[PT_MSR], efault_out); if (sig) regs->msr = (regs->msr & ~MSR_LE) | (msr & MSR_LE); - err |= __get_user(regs->orig_gpr3, &sc->gp_regs[PT_ORIG_R3]); - err |= __get_user(regs->ctr, &sc->gp_regs[PT_CTR]); - err |= __get_user(regs->link, &sc->gp_regs[PT_LNK]); - err |= __get_user(regs->xer, &sc->gp_regs[PT_XER]); - err |= __get_user(regs->ccr, &sc->gp_regs[PT_CCR]); + unsafe_get_user(regs->orig_gpr3, &sc->gp_regs[PT_ORIG_R3], efault_out); + unsafe_get_user(regs->ctr, &sc->gp_regs[PT_CTR], efault_out); + unsafe_get_user(regs->link, &sc->gp_regs[PT_LNK], efault_out); + unsafe_get_user(regs->xer, &sc->gp_regs[PT_XER], efault_out); + unsafe_get_user(regs->ccr, &sc->gp_regs[PT_CCR], efault_out); /* Don't allow userspace to set SOFTE */ set_trap_norestart(regs); - err |= __get_user(regs->dar, &sc->gp_regs[PT_DAR]); - err |= __get_user(regs->dsisr, &sc->gp_regs[PT_DSISR]); - err |= __get_user(regs->result, &sc->gp_regs[PT_RESULT]); + unsafe_get_user(regs->dar, &sc->gp_regs[PT_DAR], efault_out); + unsafe_get_user(regs->dsisr, &sc->gp_regs[PT_DSISR], efault_out); + unsafe_get_user(regs->result, &sc->gp_regs[PT_RESULT], efault_out); if (!sig) regs->gpr[13] = save_r13; if (set != NULL) - err |= __get_user(set->sig[0], &sc->oldmask); + unsafe_get_user(set->sig[0], &sc->oldmask, efault_out); /* * Force reload of FP/VEC. @@ -364,29 +379,27 @@ static long restore_sigcontext(struct task_struct *tsk, sigset_t *set, int sig, regs->msr &= ~(MSR_FP | MSR_FE0 | MSR_FE1 | MSR_VEC | MSR_VSX); #ifdef CONFIG_ALTIVEC - err |= __get_user(v_regs, &sc->v_regs); - if (err) - return err; + unsafe_get_user(v_regs, &sc->v_regs, efault_out); if (v_regs && !access_ok(v_regs, 34 * sizeof(vector128))) return -EFAULT; /* Copy 33 vec registers (vr0..31 and vscr) from the stack */ if (v_regs != NULL && (msr & MSR_VEC) != 0) { - err |= __copy_from_user(&tsk->thread.vr_state, v_regs, - 33 * sizeof(vector128)); + unsafe_copy_from_user(&tsk->thread.vr_state, v_regs, + 33 * sizeof(vector128), efault_out); tsk->thread.used_vr = true; } else if (tsk->thread.used_vr) { memset(&tsk->thread.vr_state, 0, 33 * sizeof(vector128)); } /* Always get VRSAVE back */ if (v_regs != NULL) - err |= __get_user(tsk->thread.vrsave, (u32 __user *)&v_regs[33]); + unsafe_get_user(tsk->thread.vrsave, (u32 __user *)&v_regs[33], efault_out); else tsk->thread.vrsave = 0; if (cpu_has_feature(CPU_FTR_ALTIVEC)) mtspr(SPRN_VRSAVE, tsk->thread.vrsave); #endif /* CONFIG_ALTIVEC */ /* restore floating point */ - err |= copy_fpr_from_user(tsk, &sc->fp_regs); + unsafe_copy_fpr_from_user(tsk, &sc->fp_regs, efault_out); #ifdef CONFIG_VSX /* * Get additional VSX data. Update v_regs to point after the @@ -395,14 +408,17 @@ static long restore_sigcontext(struct task_struct *tsk, sigset_t *set, int sig, */ v_regs += ELF_NVRREG; if ((msr & MSR_VSX) != 0) { - err |= copy_vsx_from_user(tsk, v_regs); + unsafe_copy_vsx_from_user(tsk, v_regs, efault_out); tsk->thread.used_vsr = true; } else { for (i = 0; i < 32 ; i++) tsk->thread.fp_state.fpr[i][TS_VSRLOWOFFSET] = 0; } #endif - return err; + return 0; + +efault_out: + return -EFAULT; } #ifdef CONFIG_PPC_TRANSACTIONAL_MEM @@ -586,6 +602,12 @@ static long restore_tm_sigcontexts(struct task_struct *tsk, return err; } +#else /* !CONFIG_PPC_TRANSACTIONAL_MEM */ +static long restore_tm_sigcontexts(struct task_struct *tsk, struct sigcontext __user *sc, + struct sigcontext __user *tm_sc) +{ + return -EINVAL; +} #endif /* @@ -655,12 +677,16 @@ SYSCALL_DEFINE3(swapcontext, struct ucontext __user *, old_ctx, ctx_has_vsx_region = 1; if (old_ctx != NULL) { - if (!access_ok(old_ctx, ctx_size) - || setup_sigcontext(&old_ctx->uc_mcontext, current, 0, NULL, 0, - ctx_has_vsx_region) - || __copy_to_user(&old_ctx->uc_sigmask, - ¤t->blocked, sizeof(sigset_t))) + prepare_setup_sigcontext(current); + if (!user_write_access_begin(old_ctx, ctx_size)) return -EFAULT; + + unsafe_setup_sigcontext(&old_ctx->uc_mcontext, current, 0, NULL, + 0, ctx_has_vsx_region, efault_out); + unsafe_copy_to_user(&old_ctx->uc_sigmask, ¤t->blocked, + sizeof(sigset_t), efault_out); + + user_write_access_end(); } if (new_ctx == NULL) return 0; @@ -680,15 +706,25 @@ SYSCALL_DEFINE3(swapcontext, struct ucontext __user *, old_ctx, * We kill the task with a SIGSEGV in this situation. */ - if (__copy_from_user(&set, &new_ctx->uc_sigmask, sizeof(set))) + if (__get_user_sigset(&set, &new_ctx->uc_sigmask)) do_exit(SIGSEGV); set_current_blocked(&set); - if (restore_sigcontext(current, NULL, 0, &new_ctx->uc_mcontext)) + + if (!user_read_access_begin(new_ctx, ctx_size)) + return -EFAULT; + if (__unsafe_restore_sigcontext(current, NULL, 0, &new_ctx->uc_mcontext)) { + user_read_access_end(); do_exit(SIGSEGV); + } + user_read_access_end(); /* This returns like rt_sigreturn */ set_thread_flag(TIF_RESTOREALL); return 0; + +efault_out: + user_write_access_end(); + return -EFAULT; } @@ -701,9 +737,7 @@ SYSCALL_DEFINE0(rt_sigreturn) struct pt_regs *regs = current_pt_regs(); struct ucontext __user *uc = (struct ucontext __user *)regs->gpr[1]; sigset_t set; -#ifdef CONFIG_PPC_TRANSACTIONAL_MEM unsigned long msr; -#endif /* Always make any pending restarted system calls return -EINTR */ current->restart_block.fn = do_no_restart_syscall; @@ -711,52 +745,54 @@ SYSCALL_DEFINE0(rt_sigreturn) if (!access_ok(uc, sizeof(*uc))) goto badframe; - if (__copy_from_user(&set, &uc->uc_sigmask, sizeof(set))) + if (__get_user_sigset(&set, &uc->uc_sigmask)) goto badframe; set_current_blocked(&set); -#ifdef CONFIG_PPC_TRANSACTIONAL_MEM - /* - * If there is a transactional state then throw it away. - * The purpose of a sigreturn is to destroy all traces of the - * signal frame, this includes any transactional state created - * within in. We only check for suspended as we can never be - * active in the kernel, we are active, there is nothing better to - * do than go ahead and Bad Thing later. - * The cause is not important as there will never be a - * recheckpoint so it's not user visible. - */ - if (MSR_TM_SUSPENDED(mfmsr())) - tm_reclaim_current(0); + if (IS_ENABLED(CONFIG_PPC_TRANSACTIONAL_MEM)) { + /* + * If there is a transactional state then throw it away. + * The purpose of a sigreturn is to destroy all traces of the + * signal frame, this includes any transactional state created + * within in. We only check for suspended as we can never be + * active in the kernel, we are active, there is nothing better to + * do than go ahead and Bad Thing later. + * The cause is not important as there will never be a + * recheckpoint so it's not user visible. + */ + if (MSR_TM_SUSPENDED(mfmsr())) + tm_reclaim_current(0); - /* - * Disable MSR[TS] bit also, so, if there is an exception in the - * code below (as a page fault in copy_ckvsx_to_user()), it does - * not recheckpoint this task if there was a context switch inside - * the exception. - * - * A major page fault can indirectly call schedule(). A reschedule - * process in the middle of an exception can have a side effect - * (Changing the CPU MSR[TS] state), since schedule() is called - * with the CPU MSR[TS] disable and returns with MSR[TS]=Suspended - * (switch_to() calls tm_recheckpoint() for the 'new' process). In - * this case, the process continues to be the same in the CPU, but - * the CPU state just changed. - * - * This can cause a TM Bad Thing, since the MSR in the stack will - * have the MSR[TS]=0, and this is what will be used to RFID. - * - * Clearing MSR[TS] state here will avoid a recheckpoint if there - * is any process reschedule in kernel space. The MSR[TS] state - * does not need to be saved also, since it will be replaced with - * the MSR[TS] that came from user context later, at - * restore_tm_sigcontexts. - */ - regs->msr &= ~MSR_TS_MASK; + /* + * Disable MSR[TS] bit also, so, if there is an exception in the + * code below (as a page fault in copy_ckvsx_to_user()), it does + * not recheckpoint this task if there was a context switch inside + * the exception. + * + * A major page fault can indirectly call schedule(). A reschedule + * process in the middle of an exception can have a side effect + * (Changing the CPU MSR[TS] state), since schedule() is called + * with the CPU MSR[TS] disable and returns with MSR[TS]=Suspended + * (switch_to() calls tm_recheckpoint() for the 'new' process). In + * this case, the process continues to be the same in the CPU, but + * the CPU state just changed. + * + * This can cause a TM Bad Thing, since the MSR in the stack will + * have the MSR[TS]=0, and this is what will be used to RFID. + * + * Clearing MSR[TS] state here will avoid a recheckpoint if there + * is any process reschedule in kernel space. The MSR[TS] state + * does not need to be saved also, since it will be replaced with + * the MSR[TS] that came from user context later, at + * restore_tm_sigcontexts. + */ + regs->msr &= ~MSR_TS_MASK; - if (__get_user(msr, &uc->uc_mcontext.gp_regs[PT_MSR])) - goto badframe; - if (MSR_TM_ACTIVE(msr)) { + if (__get_user(msr, &uc->uc_mcontext.gp_regs[PT_MSR])) + goto badframe; + } + + if (IS_ENABLED(CONFIG_PPC_TRANSACTIONAL_MEM) && MSR_TM_ACTIVE(msr)) { /* We recheckpoint on return. */ struct ucontext __user *uc_transact; @@ -769,9 +805,7 @@ SYSCALL_DEFINE0(rt_sigreturn) if (restore_tm_sigcontexts(current, &uc->uc_mcontext, &uc_transact->uc_mcontext)) goto badframe; - } else -#endif - { + } else { /* * Fall through, for non-TM restore * @@ -785,8 +819,13 @@ SYSCALL_DEFINE0(rt_sigreturn) * causing a TM bad thing. */ current->thread.regs->msr &= ~MSR_TS_MASK; - if (restore_sigcontext(current, NULL, 1, &uc->uc_mcontext)) + if (!user_read_access_begin(&uc->uc_mcontext, sizeof(uc->uc_mcontext))) goto badframe; + + unsafe_restore_sigcontext(current, NULL, 1, &uc->uc_mcontext, + badframe_block); + + user_read_access_end(); } if (restore_altstack(&uc->uc_stack)) @@ -795,6 +834,8 @@ SYSCALL_DEFINE0(rt_sigreturn) set_thread_flag(TIF_RESTOREALL); return 0; +badframe_block: + user_read_access_end(); badframe: signal_fault(current, regs, "rt_sigreturn", uc); @@ -809,46 +850,57 @@ int handle_rt_signal64(struct ksignal *ksig, sigset_t *set, unsigned long newsp = 0; long err = 0; struct pt_regs *regs = tsk->thread.regs; -#ifdef CONFIG_PPC_TRANSACTIONAL_MEM /* Save the thread's msr before get_tm_stackpointer() changes it */ unsigned long msr = regs->msr; -#endif frame = get_sigframe(ksig, tsk, sizeof(*frame), 0); - if (!access_ok(frame, sizeof(*frame))) - goto badframe; - err |= __put_user(&frame->info, &frame->pinfo); - err |= __put_user(&frame->uc, &frame->puc); - err |= copy_siginfo_to_user(&frame->info, &ksig->info); - if (err) + /* + * This only applies when calling unsafe_setup_sigcontext() and must be + * called before opening the uaccess window. + */ + if (!MSR_TM_ACTIVE(msr)) + prepare_setup_sigcontext(tsk); + + if (!user_write_access_begin(frame, sizeof(*frame))) goto badframe; + unsafe_put_user(&frame->info, &frame->pinfo, badframe_block); + unsafe_put_user(&frame->uc, &frame->puc, badframe_block); + /* Create the ucontext. */ - err |= __put_user(0, &frame->uc.uc_flags); - err |= __save_altstack(&frame->uc.uc_stack, regs->gpr[1]); -#ifdef CONFIG_PPC_TRANSACTIONAL_MEM + unsafe_put_user(0, &frame->uc.uc_flags, badframe_block); + unsafe_save_altstack(&frame->uc.uc_stack, regs->gpr[1], badframe_block); + if (MSR_TM_ACTIVE(msr)) { +#ifdef CONFIG_PPC_TRANSACTIONAL_MEM /* The ucontext_t passed to userland points to the second * ucontext_t (for transactional state) with its uc_link ptr. */ - err |= __put_user(&frame->uc_transact, &frame->uc.uc_link); + unsafe_put_user(&frame->uc_transact, &frame->uc.uc_link, badframe_block); + + user_write_access_end(); + err |= setup_tm_sigcontexts(&frame->uc.uc_mcontext, &frame->uc_transact.uc_mcontext, tsk, ksig->sig, NULL, (unsigned long)ksig->ka.sa.sa_handler, msr); - } else + + if (!user_write_access_begin(&frame->uc.uc_sigmask, + sizeof(frame->uc.uc_sigmask))) + goto badframe; + #endif - { - err |= __put_user(0, &frame->uc.uc_link); - err |= setup_sigcontext(&frame->uc.uc_mcontext, tsk, ksig->sig, + } else { + unsafe_put_user(0, &frame->uc.uc_link, badframe_block); + unsafe_setup_sigcontext(&frame->uc.uc_mcontext, tsk, ksig->sig, NULL, (unsigned long)ksig->ka.sa.sa_handler, - 1); + 1, badframe_block); } - err |= __copy_to_user(&frame->uc.uc_sigmask, set, sizeof(*set)); - if (err) - goto badframe; + + unsafe_copy_to_user(&frame->uc.uc_sigmask, set, sizeof(*set), badframe_block); + user_write_access_end(); /* Make sure signal handler doesn't get spurious FP exceptions */ tsk->thread.fp_state.fpscr = 0; @@ -863,6 +915,11 @@ int handle_rt_signal64(struct ksignal *ksig, sigset_t *set, regs->nip = (unsigned long) &frame->tramp[0]; } + + /* Save the siginfo outside of the unsafe block. */ + if (copy_siginfo_to_user(&frame->info, &ksig->info)) + goto badframe; + /* Allocate a dummy caller frame for the signal handler. */ newsp = ((unsigned long)frame) - __SIGNAL_FRAMESIZE; err |= put_user(regs->gpr[1], (unsigned long __user *)newsp); @@ -902,6 +959,8 @@ int handle_rt_signal64(struct ksignal *ksig, sigset_t *set, return 0; +badframe_block: + user_write_access_end(); badframe: signal_fault(current, regs, "handle_rt_signal64", frame); diff --git a/arch/powerpc/kernel/smp.c b/arch/powerpc/kernel/smp.c index 5a4d59a1070d..2e05c783440a 100644 --- a/arch/powerpc/kernel/smp.c +++ b/arch/powerpc/kernel/smp.c @@ -83,7 +83,7 @@ DEFINE_PER_CPU(cpumask_var_t, cpu_sibling_map); DEFINE_PER_CPU(cpumask_var_t, cpu_smallcore_map); DEFINE_PER_CPU(cpumask_var_t, cpu_l2_cache_map); DEFINE_PER_CPU(cpumask_var_t, cpu_core_map); -DEFINE_PER_CPU(cpumask_var_t, cpu_coregroup_map); +static DEFINE_PER_CPU(cpumask_var_t, cpu_coregroup_map); EXPORT_PER_CPU_SYMBOL(cpu_sibling_map); EXPORT_PER_CPU_SYMBOL(cpu_l2_cache_map); @@ -122,14 +122,14 @@ static struct thread_groups_list tgl[NR_CPUS] __initdata; * On big-cores system, thread_group_l1_cache_map for each CPU corresponds to * the set its siblings that share the L1-cache. */ -DEFINE_PER_CPU(cpumask_var_t, thread_group_l1_cache_map); +static DEFINE_PER_CPU(cpumask_var_t, thread_group_l1_cache_map); /* * On some big-cores system, thread_group_l2_cache_map for each CPU * corresponds to the set its siblings within the core that share the * L2-cache. */ -DEFINE_PER_CPU(cpumask_var_t, thread_group_l2_cache_map); +static DEFINE_PER_CPU(cpumask_var_t, thread_group_l2_cache_map); /* SMP operations for this machine */ struct smp_ops_t *smp_ops; @@ -1057,17 +1057,12 @@ void __init smp_prepare_cpus(unsigned int max_cpus) local_memory_node(numa_cpu_lookup_table[cpu])); } #endif - /* - * cpu_core_map is now more updated and exists only since - * its been exported for long. It only will have a snapshot - * of cpu_cpu_mask. - */ - cpumask_copy(per_cpu(cpu_core_map, cpu), cpu_cpu_mask(cpu)); } /* Init the cpumasks so the boot CPU is related to itself */ cpumask_set_cpu(boot_cpuid, cpu_sibling_mask(boot_cpuid)); cpumask_set_cpu(boot_cpuid, cpu_l2_cache_mask(boot_cpuid)); + cpumask_set_cpu(boot_cpuid, cpu_core_mask(boot_cpuid)); if (has_coregroup_support()) cpumask_set_cpu(boot_cpuid, cpu_coregroup_mask(boot_cpuid)); @@ -1078,6 +1073,20 @@ void __init smp_prepare_cpus(unsigned int max_cpus) cpu_smallcore_mask(boot_cpuid)); } + if (cpu_to_chip_id(boot_cpuid) != -1) { + int idx = num_possible_cpus() / threads_per_core; + + /* + * All threads of a core will all belong to the same core, + * chip_id_lookup_table will have one entry per core. + * Assumption: if boot_cpuid doesn't have a chip-id, then no + * other CPUs, will also not have chip-id. + */ + chip_id_lookup_table = kcalloc(idx, sizeof(int), GFP_KERNEL); + if (chip_id_lookup_table) + memset(chip_id_lookup_table, -1, sizeof(int) * idx); + } + if (smp_ops && smp_ops->probe) smp_ops->probe(); } @@ -1408,6 +1417,9 @@ static void remove_cpu_from_masks(int cpu) set_cpus_unrelated(cpu, i, cpu_smallcore_mask); } + for_each_cpu(i, cpu_core_mask(cpu)) + set_cpus_unrelated(cpu, i, cpu_core_mask); + if (has_coregroup_support()) { for_each_cpu(i, cpu_coregroup_mask(cpu)) set_cpus_unrelated(cpu, i, cpu_coregroup_mask); @@ -1468,8 +1480,11 @@ static void update_coregroup_mask(int cpu, cpumask_var_t *mask) static void add_cpu_to_masks(int cpu) { + struct cpumask *(*submask_fn)(int) = cpu_sibling_mask; int first_thread = cpu_first_thread_sibling(cpu); cpumask_var_t mask; + int chip_id = -1; + bool ret; int i; /* @@ -1485,12 +1500,39 @@ static void add_cpu_to_masks(int cpu) add_cpu_to_smallcore_masks(cpu); /* In CPU-hotplug path, hence use GFP_ATOMIC */ - alloc_cpumask_var_node(&mask, GFP_ATOMIC, cpu_to_node(cpu)); + ret = alloc_cpumask_var_node(&mask, GFP_ATOMIC, cpu_to_node(cpu)); update_mask_by_l2(cpu, &mask); if (has_coregroup_support()) update_coregroup_mask(cpu, &mask); + if (chip_id_lookup_table && ret) + chip_id = cpu_to_chip_id(cpu); + + if (chip_id == -1) { + cpumask_copy(per_cpu(cpu_core_map, cpu), cpu_cpu_mask(cpu)); + goto out; + } + + if (shared_caches) + submask_fn = cpu_l2_cache_mask; + + /* Update core_mask with all the CPUs that are part of submask */ + or_cpumasks_related(cpu, cpu, submask_fn, cpu_core_mask); + + /* Skip all CPUs already part of current CPU core mask */ + cpumask_andnot(mask, cpu_online_mask, cpu_core_mask(cpu)); + + for_each_cpu(i, mask) { + if (chip_id == cpu_to_chip_id(i)) { + or_cpumasks_related(cpu, i, submask_fn, cpu_core_mask); + cpumask_andnot(mask, mask, submask_fn(i)); + } else { + cpumask_andnot(mask, mask, cpu_core_mask(i)); + } + } + +out: free_cpumask_var(mask); } @@ -1521,6 +1563,9 @@ void start_secondary(void *unused) vdso_getcpu_init(); #endif + set_numa_node(numa_cpu_lookup_table[cpu]); + set_numa_mem(local_memory_node(numa_cpu_lookup_table[cpu])); + /* Update topology CPU masks */ add_cpu_to_masks(cpu); @@ -1539,9 +1584,6 @@ void start_secondary(void *unused) shared_caches = true; } - set_numa_node(numa_cpu_lookup_table[cpu]); - set_numa_mem(local_memory_node(numa_cpu_lookup_table[cpu])); - smp_wmb(); notify_cpu_starting(cpu); set_cpu_online(cpu, true); diff --git a/arch/powerpc/kernel/stacktrace.c b/arch/powerpc/kernel/stacktrace.c index b6440657ef92..1deb1bf331dd 100644 --- a/arch/powerpc/kernel/stacktrace.c +++ b/arch/powerpc/kernel/stacktrace.c @@ -23,90 +23,56 @@ #include <asm/paca.h> -/* - * Save stack-backtrace addresses into a stack_trace buffer. - */ -static void save_context_stack(struct stack_trace *trace, unsigned long sp, - struct task_struct *tsk, int savesched) +void arch_stack_walk(stack_trace_consume_fn consume_entry, void *cookie, + struct task_struct *task, struct pt_regs *regs) { + unsigned long sp; + + if (regs && !consume_entry(cookie, regs->nip)) + return; + + if (regs) + sp = regs->gpr[1]; + else if (task == current) + sp = current_stack_frame(); + else + sp = task->thread.ksp; + for (;;) { unsigned long *stack = (unsigned long *) sp; unsigned long newsp, ip; - if (!validate_sp(sp, tsk, STACK_FRAME_OVERHEAD)) + if (!validate_sp(sp, task, STACK_FRAME_OVERHEAD)) return; newsp = stack[0]; ip = stack[STACK_FRAME_LR_SAVE]; - if (savesched || !in_sched_functions(ip)) { - if (!trace->skip) - trace->entries[trace->nr_entries++] = ip; - else - trace->skip--; - } - - if (trace->nr_entries >= trace->max_entries) + if (!consume_entry(cookie, ip)) return; sp = newsp; } } -void save_stack_trace(struct stack_trace *trace) -{ - unsigned long sp; - - sp = current_stack_frame(); - - save_context_stack(trace, sp, current, 1); -} -EXPORT_SYMBOL_GPL(save_stack_trace); - -void save_stack_trace_tsk(struct task_struct *tsk, struct stack_trace *trace) -{ - unsigned long sp; - - if (!try_get_task_stack(tsk)) - return; - - if (tsk == current) - sp = current_stack_frame(); - else - sp = tsk->thread.ksp; - - save_context_stack(trace, sp, tsk, 0); - - put_task_stack(tsk); -} -EXPORT_SYMBOL_GPL(save_stack_trace_tsk); - -void -save_stack_trace_regs(struct pt_regs *regs, struct stack_trace *trace) -{ - save_context_stack(trace, regs->gpr[1], current, 0); -} -EXPORT_SYMBOL_GPL(save_stack_trace_regs); - -#ifdef CONFIG_HAVE_RELIABLE_STACKTRACE /* * This function returns an error if it detects any unreliable features of the * stack. Otherwise it guarantees that the stack trace is reliable. * * If the task is not 'current', the caller *must* ensure the task is inactive. */ -static int __save_stack_trace_tsk_reliable(struct task_struct *tsk, - struct stack_trace *trace) +int arch_stack_walk_reliable(stack_trace_consume_fn consume_entry, + void *cookie, struct task_struct *task) { unsigned long sp; unsigned long newsp; - unsigned long stack_page = (unsigned long)task_stack_page(tsk); + unsigned long stack_page = (unsigned long)task_stack_page(task); unsigned long stack_end; int graph_idx = 0; bool firstframe; stack_end = stack_page + THREAD_SIZE; - if (!is_idle_task(tsk)) { + if (!is_idle_task(task)) { /* * For user tasks, this is the SP value loaded on * kernel entry, see "PACAKSAVE(r13)" in _switch() and @@ -130,10 +96,10 @@ static int __save_stack_trace_tsk_reliable(struct task_struct *tsk, stack_end -= STACK_FRAME_OVERHEAD; } - if (tsk == current) + if (task == current) sp = current_stack_frame(); else - sp = tsk->thread.ksp; + sp = task->thread.ksp; if (sp < stack_page + sizeof(struct thread_struct) || sp > stack_end - STACK_FRAME_MIN_SIZE) { @@ -182,7 +148,7 @@ static int __save_stack_trace_tsk_reliable(struct task_struct *tsk, * FIXME: IMHO these tests do not belong in * arch-dependent code, they are generic. */ - ip = ftrace_graph_ret_addr(tsk, &graph_idx, ip, stack); + ip = ftrace_graph_ret_addr(task, &graph_idx, ip, stack); #ifdef CONFIG_KPROBES /* * Mark stacktraces with kretprobed functions on them @@ -192,36 +158,12 @@ static int __save_stack_trace_tsk_reliable(struct task_struct *tsk, return -EINVAL; #endif - if (trace->nr_entries >= trace->max_entries) - return -E2BIG; - if (!trace->skip) - trace->entries[trace->nr_entries++] = ip; - else - trace->skip--; + if (!consume_entry(cookie, ip)) + return -EINVAL; } return 0; } -int save_stack_trace_tsk_reliable(struct task_struct *tsk, - struct stack_trace *trace) -{ - int ret; - - /* - * If the task doesn't have a stack (e.g., a zombie), the stack is - * "reliably" empty. - */ - if (!try_get_task_stack(tsk)) - return 0; - - ret = __save_stack_trace_tsk_reliable(tsk, trace); - - put_task_stack(tsk); - - return ret; -} -#endif /* CONFIG_HAVE_RELIABLE_STACKTRACE */ - #if defined(CONFIG_PPC_BOOK3S_64) && defined(CONFIG_NMI_IPI) static void handle_backtrace_ipi(struct pt_regs *regs) { diff --git a/arch/powerpc/kernel/syscalls.c b/arch/powerpc/kernel/syscalls.c index 078608ec2e92..a552c9e68d7e 100644 --- a/arch/powerpc/kernel/syscalls.c +++ b/arch/powerpc/kernel/syscalls.c @@ -82,16 +82,8 @@ int ppc_select(int n, fd_set __user *inp, fd_set __user *outp, fd_set __user *exp, struct __kernel_old_timeval __user *tvp) { if ( (unsigned long)n >= 4096 ) - { - unsigned long __user *buffer = (unsigned long __user *)n; - if (!access_ok(buffer, 5*sizeof(unsigned long)) - || __get_user(n, buffer) - || __get_user(inp, ((fd_set __user * __user *)(buffer+1))) - || __get_user(outp, ((fd_set __user * __user *)(buffer+2))) - || __get_user(exp, ((fd_set __user * __user *)(buffer+3))) - || __get_user(tvp, ((struct __kernel_old_timeval __user * __user *)(buffer+4)))) - return -EFAULT; - } + return sys_old_select((void __user *)n); + return sys_select(n, inp, outp, exp, tvp); } #endif diff --git a/arch/powerpc/kernel/syscalls/Makefile b/arch/powerpc/kernel/syscalls/Makefile index 9e3be295dbba..5476f62eb80f 100644 --- a/arch/powerpc/kernel/syscalls/Makefile +++ b/arch/powerpc/kernel/syscalls/Makefile @@ -6,53 +6,38 @@ _dummy := $(shell [ -d '$(uapi)' ] || mkdir -p '$(uapi)') \ $(shell [ -d '$(kapi)' ] || mkdir -p '$(kapi)') syscall := $(src)/syscall.tbl -syshdr := $(srctree)/$(src)/syscallhdr.sh -systbl := $(srctree)/$(src)/syscalltbl.sh +syshdr := $(srctree)/scripts/syscallhdr.sh +systbl := $(srctree)/scripts/syscalltbl.sh quiet_cmd_syshdr = SYSHDR $@ - cmd_syshdr = $(CONFIG_SHELL) '$(syshdr)' '$<' '$@' \ - '$(syshdr_abis_$(basetarget))' \ - '$(syshdr_pfx_$(basetarget))' \ - '$(syshdr_offset_$(basetarget))' + cmd_syshdr = $(CONFIG_SHELL) $(syshdr) --emit-nr --abis $(abis) $< $@ quiet_cmd_systbl = SYSTBL $@ - cmd_systbl = $(CONFIG_SHELL) '$(systbl)' '$<' '$@' \ - '$(systbl_abis_$(basetarget))' \ - '$(systbl_abi_$(basetarget))' \ - '$(systbl_offset_$(basetarget))' + cmd_systbl = $(CONFIG_SHELL) $(systbl) --abis $(abis) $< $@ -syshdr_abis_unistd_32 := common,nospu,32 +$(uapi)/unistd_32.h: abis := common,nospu,32 $(uapi)/unistd_32.h: $(syscall) $(syshdr) FORCE $(call if_changed,syshdr) -syshdr_abis_unistd_64 := common,nospu,64 +$(uapi)/unistd_64.h: abis := common,nospu,64 $(uapi)/unistd_64.h: $(syscall) $(syshdr) FORCE $(call if_changed,syshdr) -systbl_abis_syscall_table_32 := common,nospu,32 -systbl_abi_syscall_table_32 := 32 +$(kapi)/syscall_table_32.h: abis := common,nospu,32 $(kapi)/syscall_table_32.h: $(syscall) $(systbl) FORCE $(call if_changed,systbl) -systbl_abis_syscall_table_64 := common,nospu,64 -systbl_abi_syscall_table_64 := 64 +$(kapi)/syscall_table_64.h: abis := common,nospu,64 $(kapi)/syscall_table_64.h: $(syscall) $(systbl) FORCE $(call if_changed,systbl) -systbl_abis_syscall_table_c32 := common,nospu,32 -systbl_abi_syscall_table_c32 := c32 -$(kapi)/syscall_table_c32.h: $(syscall) $(systbl) FORCE - $(call if_changed,systbl) - -systbl_abis_syscall_table_spu := common,spu -systbl_abi_syscall_table_spu := spu +$(kapi)/syscall_table_spu.h: abis := common,spu $(kapi)/syscall_table_spu.h: $(syscall) $(systbl) FORCE $(call if_changed,systbl) uapisyshdr-y += unistd_32.h unistd_64.h kapisyshdr-y += syscall_table_32.h \ syscall_table_64.h \ - syscall_table_c32.h \ syscall_table_spu.h uapisyshdr-y := $(addprefix $(uapi)/, $(uapisyshdr-y)) diff --git a/arch/powerpc/kernel/syscalls/syscallhdr.sh b/arch/powerpc/kernel/syscalls/syscallhdr.sh deleted file mode 100644 index 02d6751f3be3..000000000000 --- a/arch/powerpc/kernel/syscalls/syscallhdr.sh +++ /dev/null @@ -1,36 +0,0 @@ -#!/bin/sh -# SPDX-License-Identifier: GPL-2.0 - -in="$1" -out="$2" -my_abis=`echo "($3)" | tr ',' '|'` -prefix="$4" -offset="$5" - -fileguard=_UAPI_ASM_POWERPC_`basename "$out" | sed \ - -e 'y/abcdefghijklmnopqrstuvwxyz/ABCDEFGHIJKLMNOPQRSTUVWXYZ/' \ - -e 's/[^A-Z0-9_]/_/g' -e 's/__/_/g'` -grep -E "^[0-9A-Fa-fXx]+[[:space:]]+${my_abis}" "$in" | sort -n | ( - printf "#ifndef %s\n" "${fileguard}" - printf "#define %s\n" "${fileguard}" - printf "\n" - - nxt=0 - while read nr abi name entry compat ; do - if [ -z "$offset" ]; then - printf "#define __NR_%s%s\t%s\n" \ - "${prefix}" "${name}" "${nr}" - else - printf "#define __NR_%s%s\t(%s + %s)\n" \ - "${prefix}" "${name}" "${offset}" "${nr}" - fi - nxt=$((nr+1)) - done - - printf "\n" - printf "#ifdef __KERNEL__\n" - printf "#define __NR_syscalls\t%s\n" "${nxt}" - printf "#endif\n" - printf "\n" - printf "#endif /* %s */\n" "${fileguard}" -) > "$out" diff --git a/arch/powerpc/kernel/syscalls/syscalltbl.sh b/arch/powerpc/kernel/syscalls/syscalltbl.sh deleted file mode 100644 index f7393a7b18aa..000000000000 --- a/arch/powerpc/kernel/syscalls/syscalltbl.sh +++ /dev/null @@ -1,36 +0,0 @@ -#!/bin/sh -# SPDX-License-Identifier: GPL-2.0 - -in="$1" -out="$2" -my_abis=`echo "($3)" | tr ',' '|'` -my_abi="$4" -offset="$5" - -emit() { - t_nxt="$1" - t_nr="$2" - t_entry="$3" - - while [ $t_nxt -lt $t_nr ]; do - printf "__SYSCALL(%s,sys_ni_syscall)\n" "${t_nxt}" - t_nxt=$((t_nxt+1)) - done - printf "__SYSCALL(%s,%s)\n" "${t_nxt}" "${t_entry}" -} - -grep -E "^[0-9A-Fa-fXx]+[[:space:]]+${my_abis}" "$in" | sort -n | ( - nxt=0 - if [ -z "$offset" ]; then - offset=0 - fi - - while read nr abi name entry compat ; do - if [ "$my_abi" = "c32" ] && [ ! -z "$compat" ]; then - emit $((nxt+offset)) $((nr+offset)) $compat - else - emit $((nxt+offset)) $((nr+offset)) $entry - fi - nxt=$((nr+1)) - done -) > "$out" diff --git a/arch/powerpc/kernel/systbl.S b/arch/powerpc/kernel/systbl.S index d34276f3c495..cb3358886203 100644 --- a/arch/powerpc/kernel/systbl.S +++ b/arch/powerpc/kernel/systbl.S @@ -21,6 +21,7 @@ #define __SYSCALL(nr, entry) .long entry #endif +#define __SYSCALL_WITH_COMPAT(nr, native, compat) __SYSCALL(nr, native) .globl sys_call_table sys_call_table: #ifdef CONFIG_PPC64 @@ -30,8 +31,10 @@ sys_call_table: #endif #ifdef CONFIG_COMPAT +#undef __SYSCALL_WITH_COMPAT +#define __SYSCALL_WITH_COMPAT(nr, native, compat) __SYSCALL(nr, compat) .globl compat_sys_call_table compat_sys_call_table: #define compat_sys_sigsuspend sys_sigsuspend -#include <asm/syscall_table_c32.h> +#include <asm/syscall_table_32.h> #endif diff --git a/arch/powerpc/kernel/trace/ftrace.c b/arch/powerpc/kernel/trace/ftrace.c index 42761ebec9f7..ffe9537195aa 100644 --- a/arch/powerpc/kernel/trace/ftrace.c +++ b/arch/powerpc/kernel/trace/ftrace.c @@ -68,7 +68,7 @@ ftrace_modify_code(unsigned long ip, struct ppc_inst old, struct ppc_inst new) */ /* read the text we want to modify */ - if (probe_kernel_read_inst(&replaced, (void *)ip)) + if (copy_inst_from_kernel_nofault(&replaced, (void *)ip)) return -EFAULT; /* Make sure it is what we expect it to be */ @@ -130,7 +130,7 @@ __ftrace_make_nop(struct module *mod, struct ppc_inst op, pop; /* read where this goes */ - if (probe_kernel_read_inst(&op, (void *)ip)) { + if (copy_inst_from_kernel_nofault(&op, (void *)ip)) { pr_err("Fetching opcode failed.\n"); return -EFAULT; } @@ -164,7 +164,7 @@ __ftrace_make_nop(struct module *mod, /* When using -mkernel_profile there is no load to jump over */ pop = ppc_inst(PPC_INST_NOP); - if (probe_kernel_read_inst(&op, (void *)(ip - 4))) { + if (copy_inst_from_kernel_nofault(&op, (void *)(ip - 4))) { pr_err("Fetching instruction at %lx failed.\n", ip - 4); return -EFAULT; } @@ -197,7 +197,7 @@ __ftrace_make_nop(struct module *mod, * Check what is in the next instruction. We can see ld r2,40(r1), but * on first pass after boot we will see mflr r0. */ - if (probe_kernel_read_inst(&op, (void *)(ip + 4))) { + if (copy_inst_from_kernel_nofault(&op, (void *)(ip + 4))) { pr_err("Fetching op failed.\n"); return -EFAULT; } @@ -349,7 +349,7 @@ static int setup_mcount_compiler_tramp(unsigned long tramp) return -1; /* New trampoline -- read where this goes */ - if (probe_kernel_read_inst(&op, (void *)tramp)) { + if (copy_inst_from_kernel_nofault(&op, (void *)tramp)) { pr_debug("Fetching opcode failed.\n"); return -1; } @@ -399,7 +399,7 @@ static int __ftrace_make_nop_kernel(struct dyn_ftrace *rec, unsigned long addr) struct ppc_inst op; /* Read where this goes */ - if (probe_kernel_read_inst(&op, (void *)ip)) { + if (copy_inst_from_kernel_nofault(&op, (void *)ip)) { pr_err("Fetching opcode failed.\n"); return -EFAULT; } @@ -526,10 +526,10 @@ __ftrace_make_call(struct dyn_ftrace *rec, unsigned long addr) struct module *mod = rec->arch.mod; /* read where this goes */ - if (probe_kernel_read_inst(op, ip)) + if (copy_inst_from_kernel_nofault(op, ip)) return -EFAULT; - if (probe_kernel_read_inst(op + 1, ip + 4)) + if (copy_inst_from_kernel_nofault(op + 1, ip + 4)) return -EFAULT; if (!expected_nop_sequence(ip, op[0], op[1])) { @@ -592,7 +592,7 @@ __ftrace_make_call(struct dyn_ftrace *rec, unsigned long addr) unsigned long ip = rec->ip; /* read where this goes */ - if (probe_kernel_read_inst(&op, (void *)ip)) + if (copy_inst_from_kernel_nofault(&op, (void *)ip)) return -EFAULT; /* It should be pointing to a nop */ @@ -648,7 +648,7 @@ static int __ftrace_make_call_kernel(struct dyn_ftrace *rec, unsigned long addr) } /* Make sure we have a nop */ - if (probe_kernel_read_inst(&op, ip)) { + if (copy_inst_from_kernel_nofault(&op, ip)) { pr_err("Unable to read ftrace location %p\n", ip); return -EFAULT; } @@ -726,7 +726,7 @@ __ftrace_modify_call(struct dyn_ftrace *rec, unsigned long old_addr, } /* read where this goes */ - if (probe_kernel_read_inst(&op, (void *)ip)) { + if (copy_inst_from_kernel_nofault(&op, (void *)ip)) { pr_err("Fetching opcode failed.\n"); return -EFAULT; } diff --git a/arch/powerpc/kernel/traps.c b/arch/powerpc/kernel/traps.c index a44a30b0688c..b4ab95c9e94a 100644 --- a/arch/powerpc/kernel/traps.c +++ b/arch/powerpc/kernel/traps.c @@ -53,7 +53,6 @@ #ifdef CONFIG_PPC64 #include <asm/firmware.h> #include <asm/processor.h> -#include <asm/tm.h> #endif #include <asm/kexec.h> #include <asm/ppc-opcode.h> @@ -222,7 +221,7 @@ static void oops_end(unsigned long flags, struct pt_regs *regs, /* * system_reset_excption handles debugger, crash dump, panic, for 0x100 */ - if (TRAP(regs) == 0x100) + if (TRAP(regs) == INTERRUPT_SYSTEM_RESET) return; crash_fadump(regs, "die oops"); @@ -290,7 +289,7 @@ void die(const char *str, struct pt_regs *regs, long err) /* * system_reset_excption handles debugger, crash dump, panic, for 0x100 */ - if (TRAP(regs) != 0x100) { + if (TRAP(regs) != INTERRUPT_SYSTEM_RESET) { if (debugger(regs)) return; } @@ -405,7 +404,7 @@ void hv_nmi_check_nonrecoverable(struct pt_regs *regs) * Now test if the interrupt has hit a range that may be using * HSPRG1 without having RI=0 (i.e., an HSRR interrupt). The * problem ranges all run un-relocated. Test real and virt modes - * at the same time by droping the high bit of the nip (virt mode + * at the same time by dropping the high bit of the nip (virt mode * entry points still have the +0x4000 offset). */ nip &= ~0xc000000000000000ULL; @@ -864,7 +863,7 @@ static void p9_hmi_special_emu(struct pt_regs *regs) unsigned long ea, msr, msr_mask; bool swap; - if (__get_user_inatomic(instr, (unsigned int __user *)regs->nip)) + if (__get_user(instr, (unsigned int __user *)regs->nip)) return; /* @@ -1079,6 +1078,16 @@ DEFINE_INTERRUPT_HANDLER_ASYNC(unknown_async_exception) _exception(SIGTRAP, regs, TRAP_UNK, 0); } +DEFINE_INTERRUPT_HANDLER_NMI(unknown_nmi_exception) +{ + printk("Bad trap at PC: %lx, SR: %lx, vector=%lx\n", + regs->nip, regs->msr, regs->trap); + + _exception(SIGTRAP, regs, TRAP_UNK, 0); + + return 0; +} + DEFINE_INTERRUPT_HANDLER(instruction_breakpoint_exception) { if (notify_die(DIE_IABR_MATCH, "iabr_match", regs, 5, @@ -1309,7 +1318,6 @@ static int emulate_instruction(struct pt_regs *regs) if (!user_mode(regs)) return -EINVAL; - CHECK_FULL_REGS(regs); if (get_user(instword, (u32 __user *)(regs->nip))) return -EFAULT; @@ -1406,7 +1414,6 @@ int is_valid_bugaddr(unsigned long addr) static int emulate_math(struct pt_regs *regs) { int ret; - extern int do_mathemu(struct pt_regs *regs); ret = do_mathemu(regs); if (ret >= 0) @@ -1606,15 +1613,6 @@ bad: bad_page_fault(regs, sig); } -DEFINE_INTERRUPT_HANDLER(StackOverflow) -{ - pr_crit("Kernel stack overflow in process %s[%d], r1=%lx\n", - current->comm, task_pid_nr(current), regs->gpr[1]); - debugger(regs); - show_regs(regs); - panic("kernel stack overflow"); -} - DEFINE_INTERRUPT_HANDLER(stack_overflow_exception) { die("Kernel stack overflow", regs, SIGSEGV); @@ -1693,7 +1691,7 @@ DEFINE_INTERRUPT_HANDLER(facility_unavailable_exception) u8 status; bool hv; - hv = (TRAP(regs) == 0xf80); + hv = (TRAP(regs) == INTERRUPT_H_FAC_UNAVAIL); if (hv) value = mfspr(SPRN_HFSCR); else @@ -2170,11 +2168,14 @@ DEFINE_INTERRUPT_HANDLER(SPEFloatingPointRoundException) * in the MSR is 0. This indicates that SRR0/1 are live, and that * we therefore lost state by taking this exception. */ -void unrecoverable_exception(struct pt_regs *regs) +void __noreturn unrecoverable_exception(struct pt_regs *regs) { pr_emerg("Unrecoverable exception %lx at %lx (msr=%lx)\n", regs->trap, regs->nip, regs->msr); die("Unrecoverable exception", regs, SIGABRT); + /* die() should not return */ + for (;;) + ; } #if defined(CONFIG_BOOKE_WDT) || defined(CONFIG_40x) @@ -2189,10 +2190,11 @@ void __attribute__ ((weak)) WatchdogHandler(struct pt_regs *regs) return; } -DEFINE_INTERRUPT_HANDLER(WatchdogException) /* XXX NMI? async? */ +DEFINE_INTERRUPT_HANDLER_NMI(WatchdogException) { printk (KERN_EMERG "PowerPC Book-E Watchdog Exception\n"); WatchdogHandler(regs); + return 0; } #endif diff --git a/arch/powerpc/kernel/uprobes.c b/arch/powerpc/kernel/uprobes.c index e8a63713e655..186f69b11e94 100644 --- a/arch/powerpc/kernel/uprobes.c +++ b/arch/powerpc/kernel/uprobes.c @@ -41,6 +41,13 @@ int arch_uprobe_analyze_insn(struct arch_uprobe *auprobe, if (addr & 0x03) return -EINVAL; + if (cpu_has_feature(CPU_FTR_ARCH_31) && + ppc_inst_prefixed(auprobe->insn) && + (addr & 0x3f) == 60) { + pr_info_ratelimited("Cannot register a uprobe on 64 byte unaligned prefixed instruction\n"); + return -EINVAL; + } + return 0; } diff --git a/arch/powerpc/kernel/vdso.c b/arch/powerpc/kernel/vdso.c index e839a906fdf2..717f2c9a7573 100644 --- a/arch/powerpc/kernel/vdso.c +++ b/arch/powerpc/kernel/vdso.c @@ -18,6 +18,7 @@ #include <linux/security.h> #include <linux/memblock.h> #include <linux/syscalls.h> +#include <linux/time_namespace.h> #include <vdso/datapage.h> #include <asm/syscall.h> @@ -50,15 +51,21 @@ static union { } vdso_data_store __page_aligned_data; struct vdso_arch_data *vdso_data = &vdso_data_store.data; +enum vvar_pages { + VVAR_DATA_PAGE_OFFSET, + VVAR_TIMENS_PAGE_OFFSET, + VVAR_NR_PAGES, +}; + static int vdso_mremap(const struct vm_special_mapping *sm, struct vm_area_struct *new_vma, unsigned long text_size) { unsigned long new_size = new_vma->vm_end - new_vma->vm_start; - if (new_size != text_size + PAGE_SIZE) + if (new_size != text_size) return -EINVAL; - current->mm->context.vdso = (void __user *)new_vma->vm_start + PAGE_SIZE; + current->mm->context.vdso = (void __user *)new_vma->vm_start; return 0; } @@ -73,6 +80,14 @@ static int vdso64_mremap(const struct vm_special_mapping *sm, struct vm_area_str return vdso_mremap(sm, new_vma, &vdso64_end - &vdso64_start); } +static vm_fault_t vvar_fault(const struct vm_special_mapping *sm, + struct vm_area_struct *vma, struct vm_fault *vmf); + +static struct vm_special_mapping vvar_spec __ro_after_init = { + .name = "[vvar]", + .fault = vvar_fault, +}; + static struct vm_special_mapping vdso32_spec __ro_after_init = { .name = "[vdso]", .mremap = vdso32_mremap, @@ -83,17 +98,105 @@ static struct vm_special_mapping vdso64_spec __ro_after_init = { .mremap = vdso64_mremap, }; +#ifdef CONFIG_TIME_NS +struct vdso_data *arch_get_vdso_data(void *vvar_page) +{ + return ((struct vdso_arch_data *)vvar_page)->data; +} + +/* + * The vvar mapping contains data for a specific time namespace, so when a task + * changes namespace we must unmap its vvar data for the old namespace. + * Subsequent faults will map in data for the new namespace. + * + * For more details see timens_setup_vdso_data(). + */ +int vdso_join_timens(struct task_struct *task, struct time_namespace *ns) +{ + struct mm_struct *mm = task->mm; + struct vm_area_struct *vma; + + mmap_read_lock(mm); + + for (vma = mm->mmap; vma; vma = vma->vm_next) { + unsigned long size = vma->vm_end - vma->vm_start; + + if (vma_is_special_mapping(vma, &vvar_spec)) + zap_page_range(vma, vma->vm_start, size); + } + + mmap_read_unlock(mm); + return 0; +} + +static struct page *find_timens_vvar_page(struct vm_area_struct *vma) +{ + if (likely(vma->vm_mm == current->mm)) + return current->nsproxy->time_ns->vvar_page; + + /* + * VM_PFNMAP | VM_IO protect .fault() handler from being called + * through interfaces like /proc/$pid/mem or + * process_vm_{readv,writev}() as long as there's no .access() + * in special_mapping_vmops. + * For more details check_vma_flags() and __access_remote_vm() + */ + WARN(1, "vvar_page accessed remotely"); + + return NULL; +} +#else +static struct page *find_timens_vvar_page(struct vm_area_struct *vma) +{ + return NULL; +} +#endif + +static vm_fault_t vvar_fault(const struct vm_special_mapping *sm, + struct vm_area_struct *vma, struct vm_fault *vmf) +{ + struct page *timens_page = find_timens_vvar_page(vma); + unsigned long pfn; + + switch (vmf->pgoff) { + case VVAR_DATA_PAGE_OFFSET: + if (timens_page) + pfn = page_to_pfn(timens_page); + else + pfn = virt_to_pfn(vdso_data); + break; +#ifdef CONFIG_TIME_NS + case VVAR_TIMENS_PAGE_OFFSET: + /* + * If a task belongs to a time namespace then a namespace + * specific VVAR is mapped with the VVAR_DATA_PAGE_OFFSET and + * the real VVAR page is mapped with the VVAR_TIMENS_PAGE_OFFSET + * offset. + * See also the comment near timens_setup_vdso_data(). + */ + if (!timens_page) + return VM_FAULT_SIGBUS; + pfn = virt_to_pfn(vdso_data); + break; +#endif /* CONFIG_TIME_NS */ + default: + return VM_FAULT_SIGBUS; + } + + return vmf_insert_pfn(vma, vmf->address, pfn); +} + /* * This is called from binfmt_elf, we create the special vma for the * vDSO and insert it into the mm struct tree */ static int __arch_setup_additional_pages(struct linux_binprm *bprm, int uses_interp) { - struct mm_struct *mm = current->mm; + unsigned long vdso_size, vdso_base, mappings_size; struct vm_special_mapping *vdso_spec; + unsigned long vvar_size = VVAR_NR_PAGES * PAGE_SIZE; + struct mm_struct *mm = current->mm; struct vm_area_struct *vma; - unsigned long vdso_size; - unsigned long vdso_base; if (is_32bit_task()) { vdso_spec = &vdso32_spec; @@ -110,8 +213,8 @@ static int __arch_setup_additional_pages(struct linux_binprm *bprm, int uses_int vdso_base = 0; } - /* Add a page to the vdso size for the data page */ - vdso_size += PAGE_SIZE; + mappings_size = vdso_size + vvar_size; + mappings_size += (VDSO_ALIGNMENT - 1) & PAGE_MASK; /* * pick a base address for the vDSO in process space. We try to put it @@ -119,9 +222,7 @@ static int __arch_setup_additional_pages(struct linux_binprm *bprm, int uses_int * and end up putting it elsewhere. * Add enough to the size so that the result can be aligned. */ - vdso_base = get_unmapped_area(NULL, vdso_base, - vdso_size + ((VDSO_ALIGNMENT - 1) & PAGE_MASK), - 0, 0); + vdso_base = get_unmapped_area(NULL, vdso_base, mappings_size, 0, 0); if (IS_ERR_VALUE(vdso_base)) return vdso_base; @@ -133,7 +234,13 @@ static int __arch_setup_additional_pages(struct linux_binprm *bprm, int uses_int * install_special_mapping or the perf counter mmap tracking code * will fail to recognise it as a vDSO. */ - mm->context.vdso = (void __user *)vdso_base + PAGE_SIZE; + mm->context.vdso = (void __user *)vdso_base + vvar_size; + + vma = _install_special_mapping(mm, vdso_base, vvar_size, + VM_READ | VM_MAYREAD | VM_IO | + VM_DONTDUMP | VM_PFNMAP, &vvar_spec); + if (IS_ERR(vma)) + return PTR_ERR(vma); /* * our vma flags don't have VM_WRITE so by default, the process isn't @@ -145,9 +252,12 @@ static int __arch_setup_additional_pages(struct linux_binprm *bprm, int uses_int * It's fine to use that for setting breakpoints in the vDSO code * pages though. */ - vma = _install_special_mapping(mm, vdso_base, vdso_size, + vma = _install_special_mapping(mm, vdso_base + vvar_size, vdso_size, VM_READ | VM_EXEC | VM_MAYREAD | VM_MAYWRITE | VM_MAYEXEC, vdso_spec); + if (IS_ERR(vma)) + do_munmap(mm, vdso_base, vvar_size, NULL); + return PTR_ERR_OR_ZERO(vma); } @@ -249,10 +359,8 @@ static struct page ** __init vdso_setup_pages(void *start, void *end) if (!pagelist) panic("%s: Cannot allocate page list for VDSO", __func__); - pagelist[0] = virt_to_page(vdso_data); - for (i = 0; i < pages; i++) - pagelist[i + 1] = virt_to_page(start + i * PAGE_SIZE); + pagelist[i] = virt_to_page(start + i * PAGE_SIZE); return pagelist; } diff --git a/arch/powerpc/kernel/vdso32/vdso32.lds.S b/arch/powerpc/kernel/vdso32/vdso32.lds.S index a4b806b0d618..58e0099f70f4 100644 --- a/arch/powerpc/kernel/vdso32/vdso32.lds.S +++ b/arch/powerpc/kernel/vdso32/vdso32.lds.S @@ -17,7 +17,7 @@ ENTRY(_start) SECTIONS { - PROVIDE(_vdso_datapage = . - PAGE_SIZE); + PROVIDE(_vdso_datapage = . - 2 * PAGE_SIZE); . = SIZEOF_HEADERS; .hash : { *(.hash) } :text diff --git a/arch/powerpc/kernel/vdso64/vdso64.lds.S b/arch/powerpc/kernel/vdso64/vdso64.lds.S index 2f3c359cacd3..0288cad428b0 100644 --- a/arch/powerpc/kernel/vdso64/vdso64.lds.S +++ b/arch/powerpc/kernel/vdso64/vdso64.lds.S @@ -17,7 +17,7 @@ ENTRY(_start) SECTIONS { - PROVIDE(_vdso_datapage = . - PAGE_SIZE); + PROVIDE(_vdso_datapage = . - 2 * PAGE_SIZE); . = SIZEOF_HEADERS; .hash : { *(.hash) } :text diff --git a/arch/powerpc/kernel/vector.S b/arch/powerpc/kernel/vector.S index 801dc28fdcca..f5a52f444e36 100644 --- a/arch/powerpc/kernel/vector.S +++ b/arch/powerpc/kernel/vector.S @@ -67,9 +67,7 @@ _GLOBAL(load_up_altivec) #ifdef CONFIG_PPC32 mfspr r5,SPRN_SPRG_THREAD /* current task's THREAD (phys) */ oris r9,r9,MSR_VEC@h -#ifdef CONFIG_VMAP_STACK tovirt(r5, r5) -#endif #else ld r4,PACACURRENT(r13) addi r5,r4,THREAD /* Get THREAD */ diff --git a/arch/powerpc/kexec/crash.c b/arch/powerpc/kexec/crash.c index c9a889880214..0196d0c211ac 100644 --- a/arch/powerpc/kexec/crash.c +++ b/arch/powerpc/kexec/crash.c @@ -24,6 +24,7 @@ #include <asm/smp.h> #include <asm/setjmp.h> #include <asm/debug.h> +#include <asm/interrupt.h> /* * The primary CPU waits a while for all secondary CPUs to enter. This is to @@ -336,7 +337,7 @@ void default_machine_crash_shutdown(struct pt_regs *regs) * If we came in via system reset, wait a while for the secondary * CPUs to enter. */ - if (TRAP(regs) == 0x100) + if (TRAP(regs) == INTERRUPT_SYSTEM_RESET) mdelay(PRIMARY_TIMEOUT); crash_kexec_prepare_cpus(crashing_cpu); diff --git a/arch/powerpc/kvm/book3s_64_mmu_host.c b/arch/powerpc/kvm/book3s_64_mmu_host.c index e452158a18d7..c3e31fef0be1 100644 --- a/arch/powerpc/kvm/book3s_64_mmu_host.c +++ b/arch/powerpc/kvm/book3s_64_mmu_host.c @@ -8,6 +8,7 @@ */ #include <linux/kvm_host.h> +#include <linux/pkeys.h> #include <asm/kvm_ppc.h> #include <asm/kvm_book3s.h> @@ -133,6 +134,7 @@ int kvmppc_mmu_map_page(struct kvm_vcpu *vcpu, struct kvmppc_pte *orig_pte, else kvmppc_mmu_flush_icache(pfn); + rflags |= pte_to_hpte_pkey_bits(0, HPTE_USE_KERNEL_KEY); rflags = (rflags & ~HPTE_R_WIMG) | orig_pte->wimg; /* diff --git a/arch/powerpc/kvm/book3s_hv.c b/arch/powerpc/kvm/book3s_hv.c index 13bad6bf4c95..4a532410e128 100644 --- a/arch/powerpc/kvm/book3s_hv.c +++ b/arch/powerpc/kvm/book3s_hv.c @@ -803,7 +803,10 @@ static int kvmppc_h_set_mode(struct kvm_vcpu *vcpu, unsigned long mflags, vcpu->arch.dawrx1 = value2; return H_SUCCESS; case H_SET_MODE_RESOURCE_ADDR_TRANS_MODE: - /* KVM does not support mflags=2 (AIL=2) */ + /* + * KVM does not support mflags=2 (AIL=2) and AIL=1 is reserved. + * Keep this in synch with kvmppc_filter_guest_lpcr_hv. + */ if (mflags != 0 && mflags != 3) return H_UNSUPPORTED_FLAG_START; return H_TOO_HARD; @@ -1635,6 +1638,41 @@ static int kvm_arch_vcpu_ioctl_set_sregs_hv(struct kvm_vcpu *vcpu, return 0; } +/* + * Enforce limits on guest LPCR values based on hardware availability, + * guest configuration, and possibly hypervisor support and security + * concerns. + */ +unsigned long kvmppc_filter_lpcr_hv(struct kvm *kvm, unsigned long lpcr) +{ + /* LPCR_TC only applies to HPT guests */ + if (kvm_is_radix(kvm)) + lpcr &= ~LPCR_TC; + + /* On POWER8 and above, userspace can modify AIL */ + if (!cpu_has_feature(CPU_FTR_ARCH_207S)) + lpcr &= ~LPCR_AIL; + if ((lpcr & LPCR_AIL) != LPCR_AIL_3) + lpcr &= ~LPCR_AIL; /* LPCR[AIL]=1/2 is disallowed */ + + /* + * On POWER9, allow userspace to enable large decrementer for the + * guest, whether or not the host has it enabled. + */ + if (!cpu_has_feature(CPU_FTR_ARCH_300)) + lpcr &= ~LPCR_LD; + + return lpcr; +} + +static void verify_lpcr(struct kvm *kvm, unsigned long lpcr) +{ + if (lpcr != kvmppc_filter_lpcr_hv(kvm, lpcr)) { + WARN_ONCE(1, "lpcr 0x%lx differs from filtered 0x%lx\n", + lpcr, kvmppc_filter_lpcr_hv(kvm, lpcr)); + } +} + static void kvmppc_set_lpcr(struct kvm_vcpu *vcpu, u64 new_lpcr, bool preserve_top32) { @@ -1643,6 +1681,23 @@ static void kvmppc_set_lpcr(struct kvm_vcpu *vcpu, u64 new_lpcr, u64 mask; spin_lock(&vc->lock); + + /* + * Userspace can only modify + * DPFD (default prefetch depth), ILE (interrupt little-endian), + * TC (translation control), AIL (alternate interrupt location), + * LD (large decrementer). + * These are subject to restrictions from kvmppc_filter_lcpr_hv(). + */ + mask = LPCR_DPFD | LPCR_ILE | LPCR_TC | LPCR_AIL | LPCR_LD; + + /* Broken 32-bit version of LPCR must not clear top bits */ + if (preserve_top32) + mask &= 0xFFFFFFFF; + + new_lpcr = kvmppc_filter_lpcr_hv(kvm, + (vc->lpcr & ~mask) | (new_lpcr & mask)); + /* * If ILE (interrupt little-endian) has changed, update the * MSR_LE bit in the intr_msr for each vcpu in this vcore. @@ -1661,25 +1716,8 @@ static void kvmppc_set_lpcr(struct kvm_vcpu *vcpu, u64 new_lpcr, } } - /* - * Userspace can only modify DPFD (default prefetch depth), - * ILE (interrupt little-endian) and TC (translation control). - * On POWER8 and POWER9 userspace can also modify AIL (alt. interrupt loc.). - */ - mask = LPCR_DPFD | LPCR_ILE | LPCR_TC; - if (cpu_has_feature(CPU_FTR_ARCH_207S)) - mask |= LPCR_AIL; - /* - * On POWER9, allow userspace to enable large decrementer for the - * guest, whether or not the host has it enabled. - */ - if (cpu_has_feature(CPU_FTR_ARCH_300)) - mask |= LPCR_LD; + vc->lpcr = new_lpcr; - /* Broken 32-bit version of LPCR must not clear top bits */ - if (preserve_top32) - mask &= 0xFFFFFFFF; - vc->lpcr = (vc->lpcr & ~mask) | (new_lpcr & mask); spin_unlock(&vc->lock); } @@ -3728,7 +3766,10 @@ static int kvmhv_p9_guest_entry(struct kvm_vcpu *vcpu, u64 time_limit, vcpu->arch.dec_expires = dec + tb; vcpu->cpu = -1; vcpu->arch.thread_cpu = -1; + /* Save guest CTRL register, set runlatch to 1 */ vcpu->arch.ctrl = mfspr(SPRN_CTRLF); + if (!(vcpu->arch.ctrl & 1)) + mtspr(SPRN_CTRLT, vcpu->arch.ctrl | 1); vcpu->arch.iamr = mfspr(SPRN_IAMR); vcpu->arch.pspb = mfspr(SPRN_PSPB); @@ -3749,7 +3790,6 @@ static int kvmhv_p9_guest_entry(struct kvm_vcpu *vcpu, u64 time_limit, mtspr(SPRN_DSCR, host_dscr); mtspr(SPRN_TIDR, host_tidr); mtspr(SPRN_IAMR, host_iamr); - mtspr(SPRN_PSPB, 0); if (host_amr != vcpu->arch.amr) mtspr(SPRN_AMR, host_amr); @@ -4641,8 +4681,10 @@ void kvmppc_update_lpcr(struct kvm *kvm, unsigned long lpcr, unsigned long mask) struct kvmppc_vcore *vc = kvm->arch.vcores[i]; if (!vc) continue; + spin_lock(&vc->lock); vc->lpcr = (vc->lpcr & ~mask) | lpcr; + verify_lpcr(kvm, vc->lpcr); spin_unlock(&vc->lock); if (++cores_done >= kvm->arch.online_vcores) break; @@ -4970,6 +5012,7 @@ static int kvmppc_core_init_vm_hv(struct kvm *kvm) kvmppc_setup_partition_table(kvm); } + verify_lpcr(kvm, lpcr); kvm->arch.lpcr = lpcr; /* Initialization for future HPT resizes */ @@ -5369,8 +5412,10 @@ static unsigned int default_hcall_list[] = { H_READ, H_PROTECT, H_BULK_REMOVE, +#ifdef CONFIG_SPAPR_TCE_IOMMU H_GET_TCE, H_PUT_TCE, +#endif H_SET_DABR, H_SET_XDABR, H_CEDE, diff --git a/arch/powerpc/kvm/book3s_hv_builtin.c b/arch/powerpc/kvm/book3s_hv_builtin.c index 158d309b42a3..7a0e33a9c980 100644 --- a/arch/powerpc/kvm/book3s_hv_builtin.c +++ b/arch/powerpc/kvm/book3s_hv_builtin.c @@ -662,6 +662,9 @@ static void kvmppc_end_cede(struct kvm_vcpu *vcpu) void kvmppc_set_msr_hv(struct kvm_vcpu *vcpu, u64 msr) { + /* Guest must always run with ME enabled, HV disabled. */ + msr = (msr | MSR_ME) & ~MSR_HV; + /* * Check for illegal transactional state bit combination * and if we find it, force the TS field to a safe state. diff --git a/arch/powerpc/kvm/book3s_hv_nested.c b/arch/powerpc/kvm/book3s_hv_nested.c index 0cd0e7aad588..60724f674421 100644 --- a/arch/powerpc/kvm/book3s_hv_nested.c +++ b/arch/powerpc/kvm/book3s_hv_nested.c @@ -132,8 +132,33 @@ static void save_hv_return_state(struct kvm_vcpu *vcpu, int trap, } } +/* + * This can result in some L0 HV register state being leaked to an L1 + * hypervisor when the hv_guest_state is copied back to the guest after + * being modified here. + * + * There is no known problem with such a leak, and in many cases these + * register settings could be derived by the guest by observing behaviour + * and timing, interrupts, etc., but it is an issue to consider. + */ static void sanitise_hv_regs(struct kvm_vcpu *vcpu, struct hv_guest_state *hr) { + struct kvmppc_vcore *vc = vcpu->arch.vcore; + u64 mask; + + /* + * Don't let L1 change LPCR bits for the L2 except these: + */ + mask = LPCR_DPFD | LPCR_ILE | LPCR_TC | LPCR_AIL | LPCR_LD | + LPCR_LPES | LPCR_MER; + + /* + * Additional filtering is required depending on hardware + * and configuration. + */ + hr->lpcr = kvmppc_filter_lpcr_hv(vcpu->kvm, + (vc->lpcr & ~mask) | (hr->lpcr & mask)); + /* * Don't let L1 enable features for L2 which we've disabled for L1, * but preserve the interrupt cause field. @@ -271,8 +296,6 @@ long kvmhv_enter_nested_guest(struct kvm_vcpu *vcpu) u64 hv_ptr, regs_ptr; u64 hdec_exp; s64 delta_purr, delta_spurr, delta_ic, delta_vtb; - u64 mask; - unsigned long lpcr; if (vcpu->kvm->arch.l1_ptcr == 0) return H_NOT_AVAILABLE; @@ -320,10 +343,10 @@ long kvmhv_enter_nested_guest(struct kvm_vcpu *vcpu) vcpu->arch.nested = l2; vcpu->arch.nested_vcpu_id = l2_hv.vcpu_token; vcpu->arch.regs = l2_regs; - vcpu->arch.shregs.msr = vcpu->arch.regs.msr; - mask = LPCR_DPFD | LPCR_ILE | LPCR_TC | LPCR_AIL | LPCR_LD | - LPCR_LPES | LPCR_MER; - lpcr = (vc->lpcr & ~mask) | (l2_hv.lpcr & mask); + + /* Guest must always run with ME enabled, HV disabled. */ + vcpu->arch.shregs.msr = (vcpu->arch.regs.msr | MSR_ME) & ~MSR_HV; + sanitise_hv_regs(vcpu, &l2_hv); restore_hv_regs(vcpu, &l2_hv); @@ -335,7 +358,7 @@ long kvmhv_enter_nested_guest(struct kvm_vcpu *vcpu) r = RESUME_HOST; break; } - r = kvmhv_run_single_vcpu(vcpu, hdec_exp, lpcr); + r = kvmhv_run_single_vcpu(vcpu, hdec_exp, l2_hv.lpcr); } while (is_kvmppc_resume_guest(r)); /* save L2 state for return */ diff --git a/arch/powerpc/kvm/book3s_hv_rm_mmu.c b/arch/powerpc/kvm/book3s_hv_rm_mmu.c index 88da2764c1bb..7af7c70f1468 100644 --- a/arch/powerpc/kvm/book3s_hv_rm_mmu.c +++ b/arch/powerpc/kvm/book3s_hv_rm_mmu.c @@ -673,8 +673,7 @@ long kvmppc_h_bulk_remove(struct kvm_vcpu *vcpu) } long kvmppc_h_protect(struct kvm_vcpu *vcpu, unsigned long flags, - unsigned long pte_index, unsigned long avpn, - unsigned long va) + unsigned long pte_index, unsigned long avpn) { struct kvm *kvm = vcpu->kvm; __be64 *hpte; diff --git a/arch/powerpc/lib/Makefile b/arch/powerpc/lib/Makefile index d4efc182662a..f2c690ee75d1 100644 --- a/arch/powerpc/lib/Makefile +++ b/arch/powerpc/lib/Makefile @@ -16,7 +16,7 @@ CFLAGS_code-patching.o += -DDISABLE_BRANCH_PROFILING CFLAGS_feature-fixups.o += -DDISABLE_BRANCH_PROFILING endif -obj-y += alloc.o code-patching.o feature-fixups.o pmem.o inst.o test_code-patching.o +obj-y += alloc.o code-patching.o feature-fixups.o pmem.o test_code-patching.o ifndef CONFIG_KASAN obj-y += string.o memcmp_$(BITS).o diff --git a/arch/powerpc/lib/checksum_wrappers.c b/arch/powerpc/lib/checksum_wrappers.c index b895166afc82..f3999cbb2fcc 100644 --- a/arch/powerpc/lib/checksum_wrappers.c +++ b/arch/powerpc/lib/checksum_wrappers.c @@ -16,16 +16,12 @@ __wsum csum_and_copy_from_user(const void __user *src, void *dst, { __wsum csum; - might_sleep(); - - if (unlikely(!access_ok(src, len))) + if (unlikely(!user_read_access_begin(src, len))) return 0; - allow_read_from_user(src, len); - csum = csum_partial_copy_generic((void __force *)src, dst, len); - prevent_read_from_user(src, len); + user_read_access_end(); return csum; } EXPORT_SYMBOL(csum_and_copy_from_user); @@ -34,15 +30,12 @@ __wsum csum_and_copy_to_user(const void *src, void __user *dst, int len) { __wsum csum; - might_sleep(); - if (unlikely(!access_ok(dst, len))) + if (unlikely(!user_write_access_begin(dst, len))) return 0; - allow_write_to_user(dst, len); - csum = csum_partial_copy_generic(src, (void __force *)dst, len); - prevent_write_to_user(dst, len); + user_write_access_end(); return csum; } EXPORT_SYMBOL(csum_and_copy_to_user); diff --git a/arch/powerpc/lib/code-patching.c b/arch/powerpc/lib/code-patching.c index 2333625b5e31..870b30d9be2f 100644 --- a/arch/powerpc/lib/code-patching.c +++ b/arch/powerpc/lib/code-patching.c @@ -21,10 +21,15 @@ static int __patch_instruction(struct ppc_inst *exec_addr, struct ppc_inst instr, struct ppc_inst *patch_addr) { - if (!ppc_inst_prefixed(instr)) - __put_user_asm_goto(ppc_inst_val(instr), patch_addr, failed, "stw"); - else - __put_user_asm_goto(ppc_inst_as_u64(instr), patch_addr, failed, "std"); + if (!ppc_inst_prefixed(instr)) { + u32 val = ppc_inst_val(instr); + + __put_kernel_nofault(patch_addr, &val, u32, failed); + } else { + u64 val = ppc_inst_as_ulong(instr); + + __put_kernel_nofault(patch_addr, &val, u64, failed); + } asm ("dcbst 0, %0; sync; icbi 0,%1; sync; isync" :: "r" (patch_addr), "r" (exec_addr)); diff --git a/arch/powerpc/lib/inst.c b/arch/powerpc/lib/inst.c deleted file mode 100644 index 9cc17eb62462..000000000000 --- a/arch/powerpc/lib/inst.c +++ /dev/null @@ -1,73 +0,0 @@ -// SPDX-License-Identifier: GPL-2.0-or-later -/* - * Copyright 2020, IBM Corporation. - */ - -#include <linux/uaccess.h> -#include <asm/disassemble.h> -#include <asm/inst.h> -#include <asm/ppc-opcode.h> - -#ifdef CONFIG_PPC64 -int probe_user_read_inst(struct ppc_inst *inst, - struct ppc_inst __user *nip) -{ - unsigned int val, suffix; - int err; - - err = copy_from_user_nofault(&val, nip, sizeof(val)); - if (err) - return err; - if (get_op(val) == OP_PREFIX) { - err = copy_from_user_nofault(&suffix, (void __user *)nip + 4, 4); - *inst = ppc_inst_prefix(val, suffix); - } else { - *inst = ppc_inst(val); - } - return err; -} - -int probe_kernel_read_inst(struct ppc_inst *inst, - struct ppc_inst *src) -{ - unsigned int val, suffix; - int err; - - err = copy_from_kernel_nofault(&val, src, sizeof(val)); - if (err) - return err; - if (get_op(val) == OP_PREFIX) { - err = copy_from_kernel_nofault(&suffix, (void *)src + 4, 4); - *inst = ppc_inst_prefix(val, suffix); - } else { - *inst = ppc_inst(val); - } - return err; -} -#else /* !CONFIG_PPC64 */ -int probe_user_read_inst(struct ppc_inst *inst, - struct ppc_inst __user *nip) -{ - unsigned int val; - int err; - - err = copy_from_user_nofault(&val, nip, sizeof(val)); - if (!err) - *inst = ppc_inst(val); - - return err; -} - -int probe_kernel_read_inst(struct ppc_inst *inst, - struct ppc_inst *src) -{ - unsigned int val; - int err; - - err = copy_from_kernel_nofault(&val, src, sizeof(val)); - if (!err) - *inst = ppc_inst(val); - - return err; -} -#endif /* CONFIG_PPC64 */ diff --git a/arch/powerpc/lib/sstep.c b/arch/powerpc/lib/sstep.c index c6aebc149d14..45bda2520755 100644 --- a/arch/powerpc/lib/sstep.c +++ b/arch/powerpc/lib/sstep.c @@ -1401,10 +1401,6 @@ int analyse_instr(struct instruction_op *op, const struct pt_regs *regs, break; } - /* Following cases refer to regs->gpr[], so we need all regs */ - if (!FULL_REGS(regs)) - return -1; - rd = (word >> 21) & 0x1f; ra = (word >> 16) & 0x1f; rb = (word >> 11) & 0x1f; @@ -3086,15 +3082,6 @@ NOKPROBE_SYMBOL(analyse_instr); */ static nokprobe_inline int handle_stack_update(unsigned long ea, struct pt_regs *regs) { -#ifdef CONFIG_PPC32 - /* - * Check if we will touch kernel stack overflow - */ - if (ea - STACK_INT_FRAME_SIZE <= current->thread.ksp_limit) { - printk(KERN_CRIT "Can't kprobe this since kernel stack would overflow.\n"); - return -EINVAL; - } -#endif /* CONFIG_PPC32 */ /* * Check if we already set since that means we'll * lose the previous value. diff --git a/arch/powerpc/math-emu/math.c b/arch/powerpc/math-emu/math.c index 30b4b69c6941..327165f26ca6 100644 --- a/arch/powerpc/math-emu/math.c +++ b/arch/powerpc/math-emu/math.c @@ -225,7 +225,7 @@ record_exception(struct pt_regs *regs, int eflag) int do_mathemu(struct pt_regs *regs) { - void *op0 = 0, *op1 = 0, *op2 = 0, *op3 = 0; + void *op0 = NULL, *op1 = NULL, *op2 = NULL, *op3 = NULL; unsigned long pc = regs->nip; signed short sdisp; u32 insn = 0; @@ -234,7 +234,7 @@ do_mathemu(struct pt_regs *regs) int type = 0; int eflag, trap; - if (get_user(insn, (u32 *)pc)) + if (get_user(insn, (u32 __user *)pc)) return -EFAULT; switch (insn >> 26) { diff --git a/arch/powerpc/mm/Makefile b/arch/powerpc/mm/Makefile index 3b4e9e4e25ea..c3df3a8501d4 100644 --- a/arch/powerpc/mm/Makefile +++ b/arch/powerpc/mm/Makefile @@ -8,7 +8,8 @@ ccflags-$(CONFIG_PPC64) := $(NO_MINIMAL_TOC) obj-y := fault.o mem.o pgtable.o mmap.o maccess.o \ init_$(BITS).o pgtable_$(BITS).o \ pgtable-frag.o ioremap.o ioremap_$(BITS).o \ - init-common.o mmu_context.o drmem.o + init-common.o mmu_context.o drmem.o \ + cacheflush.o obj-$(CONFIG_PPC_MMU_NOHASH) += nohash/ obj-$(CONFIG_PPC_BOOK3S_32) += book3s32/ obj-$(CONFIG_PPC_BOOK3S_64) += book3s64/ diff --git a/arch/powerpc/mm/book3s32/Makefile b/arch/powerpc/mm/book3s32/Makefile index 446d9de88ce4..7f0c8a78ba0c 100644 --- a/arch/powerpc/mm/book3s32/Makefile +++ b/arch/powerpc/mm/book3s32/Makefile @@ -9,3 +9,4 @@ endif obj-y += mmu.o mmu_context.o obj-$(CONFIG_PPC_BOOK3S_603) += nohash_low.o obj-$(CONFIG_PPC_BOOK3S_604) += hash_low.o tlb.o +obj-$(CONFIG_PPC_KUEP) += kuep.o diff --git a/arch/powerpc/mm/book3s32/hash_low.S b/arch/powerpc/mm/book3s32/hash_low.S index 0e6dc830c38b..fb4233a5bdf7 100644 --- a/arch/powerpc/mm/book3s32/hash_low.S +++ b/arch/powerpc/mm/book3s32/hash_low.S @@ -140,10 +140,6 @@ _GLOBAL(hash_page) bne- .Lretry /* retry if someone got there first */ mfsrin r3,r4 /* get segment reg for segment */ -#ifndef CONFIG_VMAP_STACK - mfctr r0 - stw r0,_CTR(r11) -#endif bl create_hpte /* add the hash table entry */ #ifdef CONFIG_SMP @@ -152,17 +148,7 @@ _GLOBAL(hash_page) li r0,0 stw r0, (mmu_hash_lock - PAGE_OFFSET)@l(r8) #endif - -#ifdef CONFIG_VMAP_STACK b fast_hash_page_return -#else - /* Return from the exception */ - lwz r5,_CTR(r11) - mtctr r5 - lwz r0,GPR0(r11) - lwz r8,GPR8(r11) - b fast_exception_return -#endif #ifdef CONFIG_SMP .Lhash_page_out: diff --git a/arch/powerpc/mm/book3s32/kuep.c b/arch/powerpc/mm/book3s32/kuep.c new file mode 100644 index 000000000000..8ed1b8634839 --- /dev/null +++ b/arch/powerpc/mm/book3s32/kuep.c @@ -0,0 +1,40 @@ +// SPDX-License-Identifier: GPL-2.0-or-later + +#include <asm/kup.h> +#include <asm/reg.h> +#include <asm/task_size_32.h> +#include <asm/mmu.h> + +#define KUEP_UPDATE_TWO_USER_SEGMENTS(n) do { \ + if (TASK_SIZE > ((n) << 28)) \ + mtsr(val1, (n) << 28); \ + if (TASK_SIZE > (((n) + 1) << 28)) \ + mtsr(val2, ((n) + 1) << 28); \ + val1 = (val1 + 0x222) & 0xf0ffffff; \ + val2 = (val2 + 0x222) & 0xf0ffffff; \ +} while (0) + +static __always_inline void kuep_update(u32 val) +{ + int val1 = val; + int val2 = (val + 0x111) & 0xf0ffffff; + + KUEP_UPDATE_TWO_USER_SEGMENTS(0); + KUEP_UPDATE_TWO_USER_SEGMENTS(2); + KUEP_UPDATE_TWO_USER_SEGMENTS(4); + KUEP_UPDATE_TWO_USER_SEGMENTS(6); + KUEP_UPDATE_TWO_USER_SEGMENTS(8); + KUEP_UPDATE_TWO_USER_SEGMENTS(10); + KUEP_UPDATE_TWO_USER_SEGMENTS(12); + KUEP_UPDATE_TWO_USER_SEGMENTS(14); +} + +void kuep_lock(void) +{ + kuep_update(mfsr(0) | SR_NX); +} + +void kuep_unlock(void) +{ + kuep_update(mfsr(0) & ~SR_NX); +} diff --git a/arch/powerpc/mm/book3s32/mmu.c b/arch/powerpc/mm/book3s32/mmu.c index d7eb266a3f7a..159930351d9f 100644 --- a/arch/powerpc/mm/book3s32/mmu.c +++ b/arch/powerpc/mm/book3s32/mmu.c @@ -162,7 +162,7 @@ unsigned long __init mmu_mapin_ram(unsigned long base, unsigned long top) unsigned long border = (unsigned long)__init_begin - PAGE_OFFSET; - if (debug_pagealloc_enabled() || __map_without_bats) { + if (debug_pagealloc_enabled_or_kfence() || __map_without_bats) { pr_debug_once("Read-Write memory mapped without BATs\n"); if (base >= border) return base; @@ -184,17 +184,10 @@ static bool is_module_segment(unsigned long addr) { if (!IS_ENABLED(CONFIG_MODULES)) return false; -#ifdef MODULES_VADDR if (addr < ALIGN_DOWN(MODULES_VADDR, SZ_256M)) return false; if (addr > ALIGN(MODULES_END, SZ_256M) - 1) return false; -#else - if (addr < ALIGN_DOWN(VMALLOC_START, SZ_256M)) - return false; - if (addr > ALIGN(VMALLOC_END, SZ_256M) - 1) - return false; -#endif return true; } diff --git a/arch/powerpc/mm/book3s64/hash_pgtable.c b/arch/powerpc/mm/book3s64/hash_pgtable.c index 567e0c6b3978..ad5eff097d31 100644 --- a/arch/powerpc/mm/book3s64/hash_pgtable.c +++ b/arch/powerpc/mm/book3s64/hash_pgtable.c @@ -8,6 +8,7 @@ #include <linux/sched.h> #include <linux/mm_types.h> #include <linux/mm.h> +#include <linux/stop_machine.h> #include <asm/sections.h> #include <asm/mmu.h> @@ -400,10 +401,103 @@ EXPORT_SYMBOL_GPL(hash__has_transparent_hugepage); #endif /* CONFIG_TRANSPARENT_HUGEPAGE */ #ifdef CONFIG_STRICT_KERNEL_RWX + +struct change_memory_parms { + unsigned long start, end, newpp; + unsigned int step, nr_cpus, master_cpu; + atomic_t cpu_counter; +}; + +// We'd rather this was on the stack but it has to be in the RMO +static struct change_memory_parms chmem_parms; + +// And therefore we need a lock to protect it from concurrent use +static DEFINE_MUTEX(chmem_lock); + +static void change_memory_range(unsigned long start, unsigned long end, + unsigned int step, unsigned long newpp) +{ + unsigned long idx; + + pr_debug("Changing page protection on range 0x%lx-0x%lx, to 0x%lx, step 0x%x\n", + start, end, newpp, step); + + for (idx = start; idx < end; idx += step) + /* Not sure if we can do much with the return value */ + mmu_hash_ops.hpte_updateboltedpp(newpp, idx, mmu_linear_psize, + mmu_kernel_ssize); +} + +static int notrace chmem_secondary_loop(struct change_memory_parms *parms) +{ + unsigned long msr, tmp, flags; + int *p; + + p = &parms->cpu_counter.counter; + + local_irq_save(flags); + hard_irq_disable(); + + asm volatile ( + // Switch to real mode and leave interrupts off + "mfmsr %[msr] ;" + "li %[tmp], %[MSR_IR_DR] ;" + "andc %[tmp], %[msr], %[tmp] ;" + "mtmsrd %[tmp] ;" + + // Tell the master we are in real mode + "1: " + "lwarx %[tmp], 0, %[p] ;" + "addic %[tmp], %[tmp], -1 ;" + "stwcx. %[tmp], 0, %[p] ;" + "bne- 1b ;" + + // Spin until the counter goes to zero + "2: ;" + "lwz %[tmp], 0(%[p]) ;" + "cmpwi %[tmp], 0 ;" + "bne- 2b ;" + + // Switch back to virtual mode + "mtmsrd %[msr] ;" + + : // outputs + [msr] "=&r" (msr), [tmp] "=&b" (tmp), "+m" (*p) + : // inputs + [p] "b" (p), [MSR_IR_DR] "i" (MSR_IR | MSR_DR) + : // clobbers + "cc", "xer" + ); + + local_irq_restore(flags); + + return 0; +} + +static int change_memory_range_fn(void *data) +{ + struct change_memory_parms *parms = data; + + if (parms->master_cpu != smp_processor_id()) + return chmem_secondary_loop(parms); + + // Wait for all but one CPU (this one) to call-in + while (atomic_read(&parms->cpu_counter) > 1) + barrier(); + + change_memory_range(parms->start, parms->end, parms->step, parms->newpp); + + mb(); + + // Signal the other CPUs that we're done + atomic_dec(&parms->cpu_counter); + + return 0; +} + static bool hash__change_memory_range(unsigned long start, unsigned long end, unsigned long newpp) { - unsigned long idx; unsigned int step, shift; shift = mmu_psize_defs[mmu_linear_psize].shift; @@ -415,25 +509,43 @@ static bool hash__change_memory_range(unsigned long start, unsigned long end, if (start >= end) return false; - pr_debug("Changing page protection on range 0x%lx-0x%lx, to 0x%lx, step 0x%x\n", - start, end, newpp, step); + if (firmware_has_feature(FW_FEATURE_LPAR)) { + mutex_lock(&chmem_lock); - for (idx = start; idx < end; idx += step) - /* Not sure if we can do much with the return value */ - mmu_hash_ops.hpte_updateboltedpp(newpp, idx, mmu_linear_psize, - mmu_kernel_ssize); + chmem_parms.start = start; + chmem_parms.end = end; + chmem_parms.step = step; + chmem_parms.newpp = newpp; + chmem_parms.master_cpu = smp_processor_id(); + + cpus_read_lock(); + + atomic_set(&chmem_parms.cpu_counter, num_online_cpus()); + + // Ensure state is consistent before we call the other CPUs + mb(); + + stop_machine_cpuslocked(change_memory_range_fn, &chmem_parms, + cpu_online_mask); + + cpus_read_unlock(); + mutex_unlock(&chmem_lock); + } else + change_memory_range(start, end, step, newpp); return true; } void hash__mark_rodata_ro(void) { - unsigned long start, end; + unsigned long start, end, pp; start = (unsigned long)_stext; end = (unsigned long)__init_begin; - WARN_ON(!hash__change_memory_range(start, end, PP_RXXX)); + pp = htab_convert_pte_flags(pgprot_val(PAGE_KERNEL_ROX), HPTE_USE_KERNEL_KEY); + + WARN_ON(!hash__change_memory_range(start, end, pp)); } void hash__mark_initmem_nx(void) diff --git a/arch/powerpc/mm/book3s64/hash_utils.c b/arch/powerpc/mm/book3s64/hash_utils.c index 581b20a2feaf..96d9aa164007 100644 --- a/arch/powerpc/mm/book3s64/hash_utils.c +++ b/arch/powerpc/mm/book3s64/hash_utils.c @@ -338,7 +338,7 @@ repeat: int htab_remove_mapping(unsigned long vstart, unsigned long vend, int psize, int ssize) { - unsigned long vaddr; + unsigned long vaddr, time_limit; unsigned int step, shift; int rc; int ret = 0; @@ -351,8 +351,19 @@ int htab_remove_mapping(unsigned long vstart, unsigned long vend, /* Unmap the full range specificied */ vaddr = ALIGN_DOWN(vstart, step); + time_limit = jiffies + HZ; + for (;vaddr < vend; vaddr += step) { rc = mmu_hash_ops.hpte_removebolted(vaddr, psize, ssize); + + /* + * For large number of mappings introduce a cond_resched() + * to prevent softlockup warnings. + */ + if (time_after(jiffies, time_limit)) { + cond_resched(); + time_limit = jiffies + HZ; + } if (rc == -ENOENT) { ret = -ENOENT; continue; @@ -1145,7 +1156,7 @@ unsigned int hash_page_do_lazy_icache(unsigned int pp, pte_t pte, int trap) /* page is dirty */ if (!test_bit(PG_dcache_clean, &page->flags) && !PageReserved(page)) { - if (trap == 0x400) { + if (trap == INTERRUPT_INST_STORAGE) { flush_dcache_icache_page(page); set_bit(PG_dcache_clean, &page->flags); } else @@ -1545,10 +1556,10 @@ DEFINE_INTERRUPT_HANDLER_RET(__do_hash_fault) if (user_mode(regs) || (region_id == USER_REGION_ID)) access &= ~_PAGE_PRIVILEGED; - if (regs->trap == 0x400) + if (TRAP(regs) == INTERRUPT_INST_STORAGE) access |= _PAGE_EXEC; - err = hash_page_mm(mm, ea, access, regs->trap, flags); + err = hash_page_mm(mm, ea, access, TRAP(regs), flags); if (unlikely(err < 0)) { // failed to instert a hash PTE due to an hypervisor error if (user_mode(regs)) { @@ -1572,10 +1583,11 @@ DEFINE_INTERRUPT_HANDLER_RET(__do_hash_fault) DEFINE_INTERRUPT_HANDLER_RAW(do_hash_fault) { unsigned long dsisr = regs->dsisr; - long err; - if (unlikely(dsisr & (DSISR_BAD_FAULT_64S | DSISR_KEYFAULT))) - goto page_fault; + if (unlikely(dsisr & (DSISR_BAD_FAULT_64S | DSISR_KEYFAULT))) { + hash__do_page_fault(regs); + return 0; + } /* * If we are in an "NMI" (e.g., an interrupt when soft-disabled), then @@ -1595,13 +1607,10 @@ DEFINE_INTERRUPT_HANDLER_RAW(do_hash_fault) return 0; } - err = __do_hash_fault(regs); - if (err) { -page_fault: - err = hash__do_page_fault(regs); - } + if (__do_hash_fault(regs)) + hash__do_page_fault(regs); - return err; + return 0; } #ifdef CONFIG_PPC_MM_SLICES diff --git a/arch/powerpc/mm/book3s64/mmu_context.c b/arch/powerpc/mm/book3s64/mmu_context.c index 0c8557220ae2..c10fc8a72fb3 100644 --- a/arch/powerpc/mm/book3s64/mmu_context.c +++ b/arch/powerpc/mm/book3s64/mmu_context.c @@ -119,7 +119,7 @@ static int hash__init_new_context(struct mm_struct *mm) /* This is fork. Copy hash_context details from current->mm */ memcpy(mm->context.hash_context, current->mm->context.hash_context, sizeof(struct hash_mm_context)); #ifdef CONFIG_PPC_SUBPAGE_PROT - /* inherit subpage prot detalis if we have one. */ + /* inherit subpage prot details if we have one. */ if (current->mm->context.hash_context->spt) { mm->context.hash_context->spt = kmalloc(sizeof(struct subpage_prot_table), GFP_KERNEL); diff --git a/arch/powerpc/mm/book3s64/pkeys.c b/arch/powerpc/mm/book3s64/pkeys.c index 15dcc5ad91c5..a2d9ad138709 100644 --- a/arch/powerpc/mm/book3s64/pkeys.c +++ b/arch/powerpc/mm/book3s64/pkeys.c @@ -301,19 +301,6 @@ void setup_kuap(bool disabled) } #endif -static inline void update_current_thread_amr(u64 value) -{ - current->thread.regs->amr = value; -} - -static inline void update_current_thread_iamr(u64 value) -{ - if (!likely(pkey_execute_disable_supported)) - return; - - current->thread.regs->iamr = value; -} - #ifdef CONFIG_PPC_MEM_KEYS void pkey_mm_init(struct mm_struct *mm) { @@ -328,7 +315,7 @@ static inline void init_amr(int pkey, u8 init_bits) u64 new_amr_bits = (((u64)init_bits & 0x3UL) << pkeyshift(pkey)); u64 old_amr = current_thread_amr() & ~((u64)(0x3ul) << pkeyshift(pkey)); - update_current_thread_amr(old_amr | new_amr_bits); + current->thread.regs->amr = old_amr | new_amr_bits; } static inline void init_iamr(int pkey, u8 init_bits) @@ -336,7 +323,10 @@ static inline void init_iamr(int pkey, u8 init_bits) u64 new_iamr_bits = (((u64)init_bits & 0x1UL) << pkeyshift(pkey)); u64 old_iamr = current_thread_iamr() & ~((u64)(0x1ul) << pkeyshift(pkey)); - update_current_thread_iamr(old_iamr | new_iamr_bits); + if (!likely(pkey_execute_disable_supported)) + return; + + current->thread.regs->iamr = old_iamr | new_iamr_bits; } /* diff --git a/arch/powerpc/mm/book3s64/radix_pgtable.c b/arch/powerpc/mm/book3s64/radix_pgtable.c index 98f0b243c1ab..50d536ecc89b 100644 --- a/arch/powerpc/mm/book3s64/radix_pgtable.c +++ b/arch/powerpc/mm/book3s64/radix_pgtable.c @@ -108,7 +108,7 @@ static int early_map_kernel_page(unsigned long ea, unsigned long pa, set_the_pte: set_pte_at(&init_mm, ea, ptep, pfn_pte(pfn, flags)); - smp_wmb(); + asm volatile("ptesync": : :"memory"); return 0; } @@ -168,7 +168,7 @@ static int __map_kernel_page(unsigned long ea, unsigned long pa, set_the_pte: set_pte_at(&init_mm, ea, ptep, pfn_pte(pfn, flags)); - smp_wmb(); + asm volatile("ptesync": : :"memory"); return 0; } @@ -180,8 +180,8 @@ int radix__map_kernel_page(unsigned long ea, unsigned long pa, } #ifdef CONFIG_STRICT_KERNEL_RWX -void radix__change_memory_range(unsigned long start, unsigned long end, - unsigned long clear) +static void radix__change_memory_range(unsigned long start, unsigned long end, + unsigned long clear) { unsigned long idx; pgd_t *pgdp; @@ -1058,7 +1058,7 @@ void radix__ptep_set_access_flags(struct vm_area_struct *vma, pte_t *ptep, * Book3S does not require a TLB flush when relaxing access * restrictions when the address space is not attached to a * NMMU, because the core MMU will reload the pte after taking - * an access fault, which is defined by the architectue. + * an access fault, which is defined by the architecture. */ } /* See ptesync comment in radix__set_pte_at */ diff --git a/arch/powerpc/mm/cacheflush.c b/arch/powerpc/mm/cacheflush.c new file mode 100644 index 000000000000..63363787e000 --- /dev/null +++ b/arch/powerpc/mm/cacheflush.c @@ -0,0 +1,234 @@ +// SPDX-License-Identifier: GPL-2.0-or-later + +#include <linux/highmem.h> +#include <linux/kprobes.h> + +/** + * flush_coherent_icache() - if a CPU has a coherent icache, flush it + * Return true if the cache was flushed, false otherwise + */ +static inline bool flush_coherent_icache(void) +{ + /* + * For a snooping icache, we still need a dummy icbi to purge all the + * prefetched instructions from the ifetch buffers. We also need a sync + * before the icbi to order the the actual stores to memory that might + * have modified instructions with the icbi. + */ + if (cpu_has_feature(CPU_FTR_COHERENT_ICACHE)) { + mb(); /* sync */ + icbi((void *)PAGE_OFFSET); + mb(); /* sync */ + isync(); + return true; + } + + return false; +} + +/** + * invalidate_icache_range() - Flush the icache by issuing icbi across an address range + * @start: the start address + * @stop: the stop address (exclusive) + */ +static void invalidate_icache_range(unsigned long start, unsigned long stop) +{ + unsigned long shift = l1_icache_shift(); + unsigned long bytes = l1_icache_bytes(); + char *addr = (char *)(start & ~(bytes - 1)); + unsigned long size = stop - (unsigned long)addr + (bytes - 1); + unsigned long i; + + for (i = 0; i < size >> shift; i++, addr += bytes) + icbi(addr); + + mb(); /* sync */ + isync(); +} + +/** + * flush_icache_range: Write any modified data cache blocks out to memory + * and invalidate the corresponding blocks in the instruction cache + * + * Generic code will call this after writing memory, before executing from it. + * + * @start: the start address + * @stop: the stop address (exclusive) + */ +void flush_icache_range(unsigned long start, unsigned long stop) +{ + if (flush_coherent_icache()) + return; + + clean_dcache_range(start, stop); + + if (IS_ENABLED(CONFIG_44x)) { + /* + * Flash invalidate on 44x because we are passed kmapped + * addresses and this doesn't work for userspace pages due to + * the virtually tagged icache. + */ + iccci((void *)start); + mb(); /* sync */ + isync(); + } else + invalidate_icache_range(start, stop); +} +EXPORT_SYMBOL(flush_icache_range); + +#ifdef CONFIG_HIGHMEM +/** + * flush_dcache_icache_phys() - Flush a page by it's physical address + * @physaddr: the physical address of the page + */ +static void flush_dcache_icache_phys(unsigned long physaddr) +{ + unsigned long bytes = l1_dcache_bytes(); + unsigned long nb = PAGE_SIZE / bytes; + unsigned long addr = physaddr & PAGE_MASK; + unsigned long msr, msr0; + unsigned long loop1 = addr, loop2 = addr; + + msr0 = mfmsr(); + msr = msr0 & ~MSR_DR; + /* + * This must remain as ASM to prevent potential memory accesses + * while the data MMU is disabled + */ + asm volatile( + " mtctr %2;\n" + " mtmsr %3;\n" + " isync;\n" + "0: dcbst 0, %0;\n" + " addi %0, %0, %4;\n" + " bdnz 0b;\n" + " sync;\n" + " mtctr %2;\n" + "1: icbi 0, %1;\n" + " addi %1, %1, %4;\n" + " bdnz 1b;\n" + " sync;\n" + " mtmsr %5;\n" + " isync;\n" + : "+&r" (loop1), "+&r" (loop2) + : "r" (nb), "r" (msr), "i" (bytes), "r" (msr0) + : "ctr", "memory"); +} +NOKPROBE_SYMBOL(flush_dcache_icache_phys) +#else +static void flush_dcache_icache_phys(unsigned long physaddr) +{ +} +#endif + +/** + * __flush_dcache_icache(): Flush a particular page from the data cache to RAM. + * Note: this is necessary because the instruction cache does *not* + * snoop from the data cache. + * + * @p: the address of the page to flush + */ +static void __flush_dcache_icache(void *p) +{ + unsigned long addr = (unsigned long)p & PAGE_MASK; + + clean_dcache_range(addr, addr + PAGE_SIZE); + + /* + * We don't flush the icache on 44x. Those have a virtual icache and we + * don't have access to the virtual address here (it's not the page + * vaddr but where it's mapped in user space). The flushing of the + * icache on these is handled elsewhere, when a change in the address + * space occurs, before returning to user space. + */ + + if (mmu_has_feature(MMU_FTR_TYPE_44x)) + return; + + invalidate_icache_range(addr, addr + PAGE_SIZE); +} + +static void flush_dcache_icache_hugepage(struct page *page) +{ + int i; + int nr = compound_nr(page); + + if (!PageHighMem(page)) { + for (i = 0; i < nr; i++) + __flush_dcache_icache(lowmem_page_address(page + i)); + } else { + for (i = 0; i < nr; i++) { + void *start = kmap_local_page(page + i); + + __flush_dcache_icache(start); + kunmap_local(start); + } + } +} + +void flush_dcache_icache_page(struct page *page) +{ + if (flush_coherent_icache()) + return; + + if (PageCompound(page)) + return flush_dcache_icache_hugepage(page); + + if (!PageHighMem(page)) { + __flush_dcache_icache(lowmem_page_address(page)); + } else if (IS_ENABLED(CONFIG_BOOKE) || sizeof(phys_addr_t) > sizeof(void *)) { + void *start = kmap_local_page(page); + + __flush_dcache_icache(start); + kunmap_local(start); + } else { + flush_dcache_icache_phys(page_to_phys(page)); + } +} +EXPORT_SYMBOL(flush_dcache_icache_page); + +void clear_user_page(void *page, unsigned long vaddr, struct page *pg) +{ + clear_page(page); + + /* + * We shouldn't have to do this, but some versions of glibc + * require it (ld.so assumes zero filled pages are icache clean) + * - Anton + */ + flush_dcache_page(pg); +} +EXPORT_SYMBOL(clear_user_page); + +void copy_user_page(void *vto, void *vfrom, unsigned long vaddr, + struct page *pg) +{ + copy_page(vto, vfrom); + + /* + * We should be able to use the following optimisation, however + * there are two problems. + * Firstly a bug in some versions of binutils meant PLT sections + * were not marked executable. + * Secondly the first word in the GOT section is blrl, used + * to establish the GOT address. Until recently the GOT was + * not marked executable. + * - Anton + */ +#if 0 + if (!vma->vm_file && ((vma->vm_flags & VM_EXEC) == 0)) + return; +#endif + + flush_dcache_page(pg); +} + +void flush_icache_user_page(struct vm_area_struct *vma, struct page *page, + unsigned long addr, int len) +{ + void *maddr; + + maddr = kmap_local_page(page) + (addr & ~PAGE_MASK); + flush_icache_range((unsigned long)maddr, (unsigned long)maddr + len); + kunmap_local(maddr); +} diff --git a/arch/powerpc/mm/fault.c b/arch/powerpc/mm/fault.c index bb368257b55c..34f641d4a2fe 100644 --- a/arch/powerpc/mm/fault.c +++ b/arch/powerpc/mm/fault.c @@ -32,6 +32,8 @@ #include <linux/context_tracking.h> #include <linux/hugetlb.h> #include <linux/uaccess.h> +#include <linux/kfence.h> +#include <linux/pkeys.h> #include <asm/firmware.h> #include <asm/interrupt.h> @@ -87,7 +89,6 @@ static noinline int bad_area(struct pt_regs *regs, unsigned long address) return __bad_area(regs, address, SEGV_MAPERR); } -#ifdef CONFIG_PPC_MEM_KEYS static noinline int bad_access_pkey(struct pt_regs *regs, unsigned long address, struct vm_area_struct *vma) { @@ -127,7 +128,6 @@ static noinline int bad_access_pkey(struct pt_regs *regs, unsigned long address, return 0; } -#endif static noinline int bad_access(struct pt_regs *regs, unsigned long address) { @@ -197,7 +197,7 @@ static int mm_fault_error(struct pt_regs *regs, unsigned long addr, static bool bad_kernel_fault(struct pt_regs *regs, unsigned long error_code, unsigned long address, bool is_write) { - int is_exec = TRAP(regs) == 0x400; + int is_exec = TRAP(regs) == INTERRUPT_INST_STORAGE; /* NX faults set DSISR_PROTFAULT on the 8xx, DSISR_NOEXEC_OR_G on others */ if (is_exec && (error_code & (DSISR_NOEXEC_OR_G | DSISR_KEYFAULT | @@ -234,7 +234,6 @@ static bool bad_kernel_fault(struct pt_regs *regs, unsigned long error_code, return false; } -#ifdef CONFIG_PPC_MEM_KEYS static bool access_pkey_error(bool is_write, bool is_exec, bool is_pkey, struct vm_area_struct *vma) { @@ -248,7 +247,6 @@ static bool access_pkey_error(bool is_write, bool is_exec, bool is_pkey, return false; } -#endif static bool access_error(bool is_write, bool is_exec, struct vm_area_struct *vma) { @@ -393,7 +391,7 @@ static int ___do_page_fault(struct pt_regs *regs, unsigned long address, struct vm_area_struct * vma; struct mm_struct *mm = current->mm; unsigned int flags = FAULT_FLAG_DEFAULT; - int is_exec = TRAP(regs) == 0x400; + int is_exec = TRAP(regs) == INTERRUPT_INST_STORAGE; int is_user = user_mode(regs); int is_write = page_fault_is_write(error_code); vm_fault_t fault, major = 0; @@ -418,8 +416,12 @@ static int ___do_page_fault(struct pt_regs *regs, unsigned long address, * take a page fault to a kernel address or a page fault to a user * address outside of dedicated places */ - if (unlikely(!is_user && bad_kernel_fault(regs, error_code, address, is_write))) + if (unlikely(!is_user && bad_kernel_fault(regs, error_code, address, is_write))) { + if (kfence_handle_page_fault(address, is_write, regs)) + return 0; + return SIGSEGV; + } /* * If we're in an interrupt, have no user context or are running @@ -492,11 +494,9 @@ retry: return bad_area(regs, address); } -#ifdef CONFIG_PPC_MEM_KEYS if (unlikely(access_pkey_error(is_write, is_exec, (error_code & DSISR_KEYFAULT), vma))) return bad_access_pkey(regs, address, vma); -#endif /* CONFIG_PPC_MEM_KEYS */ if (unlikely(access_error(is_write, is_exec, vma))) return bad_access(regs, address); @@ -539,39 +539,25 @@ retry: } NOKPROBE_SYMBOL(___do_page_fault); -static long __do_page_fault(struct pt_regs *regs) +static __always_inline void __do_page_fault(struct pt_regs *regs) { - const struct exception_table_entry *entry; long err; err = ___do_page_fault(regs, regs->dar, regs->dsisr); - if (likely(!err)) - return err; - - entry = search_exception_tables(regs->nip); - if (likely(entry)) { - instruction_pointer_set(regs, extable_fixup(entry)); - return 0; - } else if (IS_ENABLED(CONFIG_PPC_BOOK3S_64)) { - __bad_page_fault(regs, err); - return 0; - } else { - /* 32 and 64e handle the bad page fault in asm */ - return err; - } + if (unlikely(err)) + bad_page_fault(regs, err); } -NOKPROBE_SYMBOL(__do_page_fault); -DEFINE_INTERRUPT_HANDLER_RET(do_page_fault) +DEFINE_INTERRUPT_HANDLER(do_page_fault) { - return __do_page_fault(regs); + __do_page_fault(regs); } #ifdef CONFIG_PPC_BOOK3S_64 /* Same as do_page_fault but interrupt entry has already run in do_hash_fault */ -long hash__do_page_fault(struct pt_regs *regs) +void hash__do_page_fault(struct pt_regs *regs) { - return __do_page_fault(regs); + __do_page_fault(regs); } NOKPROBE_SYMBOL(hash__do_page_fault); #endif @@ -581,27 +567,27 @@ NOKPROBE_SYMBOL(hash__do_page_fault); * It is called from the DSI and ISI handlers in head.S and from some * of the procedures in traps.c. */ -void __bad_page_fault(struct pt_regs *regs, int sig) +static void __bad_page_fault(struct pt_regs *regs, int sig) { int is_write = page_fault_is_write(regs->dsisr); /* kernel has accessed a bad area */ switch (TRAP(regs)) { - case 0x300: - case 0x380: - case 0xe00: + case INTERRUPT_DATA_STORAGE: + case INTERRUPT_DATA_SEGMENT: + case INTERRUPT_H_DATA_STORAGE: pr_alert("BUG: %s on %s at 0x%08lx\n", regs->dar < PAGE_SIZE ? "Kernel NULL pointer dereference" : "Unable to handle kernel data access", is_write ? "write" : "read", regs->dar); break; - case 0x400: - case 0x480: + case INTERRUPT_INST_STORAGE: + case INTERRUPT_INST_SEGMENT: pr_alert("BUG: Unable to handle kernel instruction fetch%s", regs->nip < PAGE_SIZE ? " (NULL pointer?)\n" : "\n"); break; - case 0x600: + case INTERRUPT_ALIGNMENT: pr_alert("BUG: Unable to handle kernel unaligned access at 0x%08lx\n", regs->dar); break; diff --git a/arch/powerpc/mm/init_32.c b/arch/powerpc/mm/init_32.c index 02c7db4087cb..3d690be48e84 100644 --- a/arch/powerpc/mm/init_32.c +++ b/arch/powerpc/mm/init_32.c @@ -97,6 +97,9 @@ static void __init MMU_setup(void) if (IS_ENABLED(CONFIG_PPC_8xx)) return; + if (IS_ENABLED(CONFIG_KFENCE)) + __map_without_ltlbs = 1; + if (debug_pagealloc_enabled()) __map_without_ltlbs = 1; diff --git a/arch/powerpc/mm/maccess.c b/arch/powerpc/mm/maccess.c index fa9a7a718fc6..a3c30a884076 100644 --- a/arch/powerpc/mm/maccess.c +++ b/arch/powerpc/mm/maccess.c @@ -3,7 +3,28 @@ #include <linux/uaccess.h> #include <linux/kernel.h> +#include <asm/disassemble.h> +#include <asm/inst.h> +#include <asm/ppc-opcode.h> + bool copy_from_kernel_nofault_allowed(const void *unsafe_src, size_t size) { return is_kernel_addr((unsigned long)unsafe_src); } + +int copy_inst_from_kernel_nofault(struct ppc_inst *inst, struct ppc_inst *src) +{ + unsigned int val, suffix; + int err; + + err = copy_from_kernel_nofault(&val, src, sizeof(val)); + if (err) + return err; + if (IS_ENABLED(CONFIG_PPC64) && get_op(val) == OP_PREFIX) { + err = copy_from_kernel_nofault(&suffix, (void *)src + 4, 4); + *inst = ppc_inst_prefix(val, suffix); + } else { + *inst = ppc_inst(val); + } + return err; +} diff --git a/arch/powerpc/mm/mem.c b/arch/powerpc/mm/mem.c index 4e8ce6d85232..6564b4d81324 100644 --- a/arch/powerpc/mm/mem.c +++ b/arch/powerpc/mm/mem.c @@ -12,49 +12,18 @@ * Copyright (C) 1991, 1992, 1993, 1994 Linus Torvalds */ -#include <linux/export.h> -#include <linux/sched.h> -#include <linux/kernel.h> -#include <linux/errno.h> -#include <linux/string.h> -#include <linux/gfp.h> -#include <linux/types.h> -#include <linux/mm.h> -#include <linux/stddef.h> -#include <linux/init.h> #include <linux/memblock.h> #include <linux/highmem.h> -#include <linux/initrd.h> -#include <linux/pagemap.h> #include <linux/suspend.h> -#include <linux/hugetlb.h> -#include <linux/slab.h> -#include <linux/vmalloc.h> -#include <linux/memremap.h> #include <linux/dma-direct.h> -#include <linux/kprobes.h> -#include <asm/prom.h> -#include <asm/io.h> -#include <asm/mmu_context.h> -#include <asm/mmu.h> -#include <asm/smp.h> #include <asm/machdep.h> -#include <asm/btext.h> -#include <asm/tlb.h> -#include <asm/sections.h> -#include <asm/sparsemem.h> -#include <asm/vdso.h> -#include <asm/fixmap.h> -#include <asm/swiotlb.h> #include <asm/rtas.h> #include <asm/kasan.h> #include <asm/svm.h> -#include <asm/mmzone.h> #include <mm/mmu_decl.h> -static DEFINE_MUTEX(linear_mapping_mutex); unsigned long long memory_limit; bool init_mem_is_free; @@ -72,6 +41,7 @@ pgprot_t phys_mem_access_prot(struct file *file, unsigned long pfn, EXPORT_SYMBOL(phys_mem_access_prot); #ifdef CONFIG_MEMORY_HOTPLUG +static DEFINE_MUTEX(linear_mapping_mutex); #ifdef CONFIG_NUMA int memory_add_physaddr_to_nid(u64 start) @@ -340,257 +310,6 @@ void free_initmem(void) free_initmem_default(POISON_FREE_INITMEM); } -/** - * flush_coherent_icache() - if a CPU has a coherent icache, flush it - * @addr: The base address to use (can be any valid address, the whole cache will be flushed) - * Return true if the cache was flushed, false otherwise - */ -static inline bool flush_coherent_icache(unsigned long addr) -{ - /* - * For a snooping icache, we still need a dummy icbi to purge all the - * prefetched instructions from the ifetch buffers. We also need a sync - * before the icbi to order the the actual stores to memory that might - * have modified instructions with the icbi. - */ - if (cpu_has_feature(CPU_FTR_COHERENT_ICACHE)) { - mb(); /* sync */ - allow_read_from_user((const void __user *)addr, L1_CACHE_BYTES); - icbi((void *)addr); - prevent_read_from_user((const void __user *)addr, L1_CACHE_BYTES); - mb(); /* sync */ - isync(); - return true; - } - - return false; -} - -/** - * invalidate_icache_range() - Flush the icache by issuing icbi across an address range - * @start: the start address - * @stop: the stop address (exclusive) - */ -static void invalidate_icache_range(unsigned long start, unsigned long stop) -{ - unsigned long shift = l1_icache_shift(); - unsigned long bytes = l1_icache_bytes(); - char *addr = (char *)(start & ~(bytes - 1)); - unsigned long size = stop - (unsigned long)addr + (bytes - 1); - unsigned long i; - - for (i = 0; i < size >> shift; i++, addr += bytes) - icbi(addr); - - mb(); /* sync */ - isync(); -} - -/** - * flush_icache_range: Write any modified data cache blocks out to memory - * and invalidate the corresponding blocks in the instruction cache - * - * Generic code will call this after writing memory, before executing from it. - * - * @start: the start address - * @stop: the stop address (exclusive) - */ -void flush_icache_range(unsigned long start, unsigned long stop) -{ - if (flush_coherent_icache(start)) - return; - - clean_dcache_range(start, stop); - - if (IS_ENABLED(CONFIG_44x)) { - /* - * Flash invalidate on 44x because we are passed kmapped - * addresses and this doesn't work for userspace pages due to - * the virtually tagged icache. - */ - iccci((void *)start); - mb(); /* sync */ - isync(); - } else - invalidate_icache_range(start, stop); -} -EXPORT_SYMBOL(flush_icache_range); - -#if !defined(CONFIG_PPC_8xx) && !defined(CONFIG_PPC64) -/** - * flush_dcache_icache_phys() - Flush a page by it's physical address - * @physaddr: the physical address of the page - */ -static void flush_dcache_icache_phys(unsigned long physaddr) -{ - unsigned long bytes = l1_dcache_bytes(); - unsigned long nb = PAGE_SIZE / bytes; - unsigned long addr = physaddr & PAGE_MASK; - unsigned long msr, msr0; - unsigned long loop1 = addr, loop2 = addr; - - msr0 = mfmsr(); - msr = msr0 & ~MSR_DR; - /* - * This must remain as ASM to prevent potential memory accesses - * while the data MMU is disabled - */ - asm volatile( - " mtctr %2;\n" - " mtmsr %3;\n" - " isync;\n" - "0: dcbst 0, %0;\n" - " addi %0, %0, %4;\n" - " bdnz 0b;\n" - " sync;\n" - " mtctr %2;\n" - "1: icbi 0, %1;\n" - " addi %1, %1, %4;\n" - " bdnz 1b;\n" - " sync;\n" - " mtmsr %5;\n" - " isync;\n" - : "+&r" (loop1), "+&r" (loop2) - : "r" (nb), "r" (msr), "i" (bytes), "r" (msr0) - : "ctr", "memory"); -} -NOKPROBE_SYMBOL(flush_dcache_icache_phys) -#endif // !defined(CONFIG_PPC_8xx) && !defined(CONFIG_PPC64) - -/* - * This is called when a page has been modified by the kernel. - * It just marks the page as not i-cache clean. We do the i-cache - * flush later when the page is given to a user process, if necessary. - */ -void flush_dcache_page(struct page *page) -{ - if (cpu_has_feature(CPU_FTR_COHERENT_ICACHE)) - return; - /* avoid an atomic op if possible */ - if (test_bit(PG_dcache_clean, &page->flags)) - clear_bit(PG_dcache_clean, &page->flags); -} -EXPORT_SYMBOL(flush_dcache_page); - -static void flush_dcache_icache_hugepage(struct page *page) -{ - int i; - void *start; - - BUG_ON(!PageCompound(page)); - - for (i = 0; i < compound_nr(page); i++) { - if (!PageHighMem(page)) { - __flush_dcache_icache(page_address(page+i)); - } else { - start = kmap_atomic(page+i); - __flush_dcache_icache(start); - kunmap_atomic(start); - } - } -} - -void flush_dcache_icache_page(struct page *page) -{ - - if (PageCompound(page)) - return flush_dcache_icache_hugepage(page); - -#if defined(CONFIG_PPC_8xx) || defined(CONFIG_PPC64) - /* On 8xx there is no need to kmap since highmem is not supported */ - __flush_dcache_icache(page_address(page)); -#else - if (IS_ENABLED(CONFIG_BOOKE) || sizeof(phys_addr_t) > sizeof(void *)) { - void *start = kmap_atomic(page); - __flush_dcache_icache(start); - kunmap_atomic(start); - } else { - unsigned long addr = page_to_pfn(page) << PAGE_SHIFT; - - if (flush_coherent_icache(addr)) - return; - flush_dcache_icache_phys(addr); - } -#endif -} -EXPORT_SYMBOL(flush_dcache_icache_page); - -/** - * __flush_dcache_icache(): Flush a particular page from the data cache to RAM. - * Note: this is necessary because the instruction cache does *not* - * snoop from the data cache. - * - * @page: the address of the page to flush - */ -void __flush_dcache_icache(void *p) -{ - unsigned long addr = (unsigned long)p; - - if (flush_coherent_icache(addr)) - return; - - clean_dcache_range(addr, addr + PAGE_SIZE); - - /* - * We don't flush the icache on 44x. Those have a virtual icache and we - * don't have access to the virtual address here (it's not the page - * vaddr but where it's mapped in user space). The flushing of the - * icache on these is handled elsewhere, when a change in the address - * space occurs, before returning to user space. - */ - - if (mmu_has_feature(MMU_FTR_TYPE_44x)) - return; - - invalidate_icache_range(addr, addr + PAGE_SIZE); -} - -void clear_user_page(void *page, unsigned long vaddr, struct page *pg) -{ - clear_page(page); - - /* - * We shouldn't have to do this, but some versions of glibc - * require it (ld.so assumes zero filled pages are icache clean) - * - Anton - */ - flush_dcache_page(pg); -} -EXPORT_SYMBOL(clear_user_page); - -void copy_user_page(void *vto, void *vfrom, unsigned long vaddr, - struct page *pg) -{ - copy_page(vto, vfrom); - - /* - * We should be able to use the following optimisation, however - * there are two problems. - * Firstly a bug in some versions of binutils meant PLT sections - * were not marked executable. - * Secondly the first word in the GOT section is blrl, used - * to establish the GOT address. Until recently the GOT was - * not marked executable. - * - Anton - */ -#if 0 - if (!vma->vm_file && ((vma->vm_flags & VM_EXEC) == 0)) - return; -#endif - - flush_dcache_page(pg); -} - -void flush_icache_user_page(struct vm_area_struct *vma, struct page *page, - unsigned long addr, int len) -{ - unsigned long maddr; - - maddr = (unsigned long) kmap(page) + (addr & ~PAGE_MASK); - flush_icache_range(maddr, maddr + len); - kunmap(page); -} - /* * System memory should not be in /proc/iomem but various tools expect it * (eg kdump). diff --git a/arch/powerpc/mm/mmu_context.c b/arch/powerpc/mm/mmu_context.c index 18f20da0d348..a857af401738 100644 --- a/arch/powerpc/mm/mmu_context.c +++ b/arch/powerpc/mm/mmu_context.c @@ -43,24 +43,26 @@ void switch_mm_irqs_off(struct mm_struct *prev, struct mm_struct *next, /* * This full barrier orders the store to the cpumask above vs - * a subsequent operation which allows this CPU to begin loading - * translations for next. + * a subsequent load which allows this CPU/MMU to begin loading + * translations for 'next' from page table PTEs into the TLB. * - * When using the radix MMU that operation is the load of the + * When using the radix MMU, that operation is the load of the * MMU context id, which is then moved to SPRN_PID. * * For the hash MMU it is either the first load from slb_cache - * in switch_slb(), and/or the store of paca->mm_ctx_id in - * copy_mm_to_paca(). + * in switch_slb() to preload the SLBs, or the load of + * get_user_context which loads the context for the VSID hash + * to insert a new SLB, in the SLB fault handler. * * On the other side, the barrier is in mm/tlb-radix.c for - * radix which orders earlier stores to clear the PTEs vs - * the load of mm_cpumask. And pte_xchg which does the same - * thing for hash. + * radix which orders earlier stores to clear the PTEs before + * the load of mm_cpumask to check which CPU TLBs should be + * flushed. For hash, pte_xchg to clear the PTE includes the + * barrier. * - * This full barrier is needed by membarrier when switching - * between processes after store to rq->curr, before user-space - * memory accesses. + * This full barrier is also needed by membarrier when + * switching between processes after store to rq->curr, before + * user-space memory accesses. */ smp_mb(); diff --git a/arch/powerpc/mm/mmu_decl.h b/arch/powerpc/mm/mmu_decl.h index 998810e68562..7dac910c0b21 100644 --- a/arch/powerpc/mm/mmu_decl.h +++ b/arch/powerpc/mm/mmu_decl.h @@ -185,3 +185,8 @@ void ptdump_check_wx(void); #else static inline void ptdump_check_wx(void) { } #endif + +static inline bool debug_pagealloc_enabled_or_kfence(void) +{ + return IS_ENABLED(CONFIG_KFENCE) || debug_pagealloc_enabled(); +} diff --git a/arch/powerpc/mm/nohash/8xx.c b/arch/powerpc/mm/nohash/8xx.c index 19a3eec1d8c5..71bfdbedacee 100644 --- a/arch/powerpc/mm/nohash/8xx.c +++ b/arch/powerpc/mm/nohash/8xx.c @@ -149,7 +149,7 @@ unsigned long __init mmu_mapin_ram(unsigned long base, unsigned long top) { unsigned long etext8 = ALIGN(__pa(_etext), SZ_8M); unsigned long sinittext = __pa(_sinittext); - bool strict_boundary = strict_kernel_rwx_enabled() || debug_pagealloc_enabled(); + bool strict_boundary = strict_kernel_rwx_enabled() || debug_pagealloc_enabled_or_kfence(); unsigned long boundary = strict_boundary ? sinittext : etext8; unsigned long einittext8 = ALIGN(__pa(_einittext), SZ_8M); @@ -161,7 +161,7 @@ unsigned long __init mmu_mapin_ram(unsigned long base, unsigned long top) return 0; mmu_mapin_ram_chunk(0, boundary, PAGE_KERNEL_TEXT, true); - if (debug_pagealloc_enabled()) { + if (debug_pagealloc_enabled_or_kfence()) { top = boundary; } else { mmu_mapin_ram_chunk(boundary, einittext8, PAGE_KERNEL_TEXT, true); diff --git a/arch/powerpc/net/Makefile b/arch/powerpc/net/Makefile index c2dec3a68d4c..8e60af32e51e 100644 --- a/arch/powerpc/net/Makefile +++ b/arch/powerpc/net/Makefile @@ -2,8 +2,4 @@ # # Arch-specific network modules # -ifdef CONFIG_PPC64 -obj-$(CONFIG_BPF_JIT) += bpf_jit_comp64.o -else -obj-$(CONFIG_BPF_JIT) += bpf_jit_asm.o bpf_jit_comp.o -endif +obj-$(CONFIG_BPF_JIT) += bpf_jit_comp.o bpf_jit_comp$(BITS).o diff --git a/arch/powerpc/net/bpf_jit.h b/arch/powerpc/net/bpf_jit.h index d0a67a1bbaf1..99fad093f43e 100644 --- a/arch/powerpc/net/bpf_jit.h +++ b/arch/powerpc/net/bpf_jit.h @@ -26,6 +26,9 @@ /* Long jump; (unconditional 'branch') */ #define PPC_JMP(dest) EMIT(PPC_INST_BRANCH | \ (((dest) - (ctx->idx * 4)) & 0x03fffffc)) +/* blr; (unconditional 'branch' with link) to absolute address */ +#define PPC_BL_ABS(dest) EMIT(PPC_INST_BL | \ + (((dest) - (unsigned long)(image + ctx->idx)) & 0x03fffffc)) /* "cond" here covers BO:BI fields. */ #define PPC_BCC_SHORT(cond, dest) EMIT(PPC_INST_BRANCH_COND | \ (((cond) & 0x3ff) << 16) | \ @@ -42,6 +45,10 @@ EMIT(PPC_RAW_ORI(d, d, IMM_L(i))); \ } } while(0) +#ifdef CONFIG_PPC32 +#define PPC_EX32(r, i) EMIT(PPC_RAW_LI((r), (i) < 0 ? -1 : 0)) +#endif + #define PPC_LI64(d, i) do { \ if ((long)(i) >= -2147483648 && \ (long)(i) < 2147483648) \ @@ -108,6 +115,63 @@ static inline bool is_nearbranch(int offset) #define COND_LT (CR0_LT | COND_CMP_TRUE) #define COND_LE (CR0_GT | COND_CMP_FALSE) +#define SEEN_FUNC 0x20000000 /* might call external helpers */ +#define SEEN_STACK 0x40000000 /* uses BPF stack */ +#define SEEN_TAILCALL 0x80000000 /* uses tail calls */ + +#define SEEN_VREG_MASK 0x1ff80000 /* Volatile registers r3-r12 */ +#define SEEN_NVREG_MASK 0x0003ffff /* Non volatile registers r14-r31 */ + +#ifdef CONFIG_PPC64 +extern const int b2p[MAX_BPF_JIT_REG + 2]; +#else +extern const int b2p[MAX_BPF_JIT_REG + 1]; +#endif + +struct codegen_context { + /* + * This is used to track register usage as well + * as calls to external helpers. + * - register usage is tracked with corresponding + * bits (r3-r31) + * - rest of the bits can be used to track other + * things -- for now, we use bits 0 to 2 + * encoded in SEEN_* macros above + */ + unsigned int seen; + unsigned int idx; + unsigned int stack_size; + int b2p[ARRAY_SIZE(b2p)]; +}; + +static inline void bpf_flush_icache(void *start, void *end) +{ + smp_wmb(); /* smp write barrier */ + flush_icache_range((unsigned long)start, (unsigned long)end); +} + +static inline bool bpf_is_seen_register(struct codegen_context *ctx, int i) +{ + return ctx->seen & (1 << (31 - i)); +} + +static inline void bpf_set_seen_register(struct codegen_context *ctx, int i) +{ + ctx->seen |= 1 << (31 - i); +} + +static inline void bpf_clear_seen_register(struct codegen_context *ctx, int i) +{ + ctx->seen &= ~(1 << (31 - i)); +} + +void bpf_jit_emit_func_call_rel(u32 *image, struct codegen_context *ctx, u64 func); +int bpf_jit_build_body(struct bpf_prog *fp, u32 *image, struct codegen_context *ctx, + u32 *addrs, bool extra_pass); +void bpf_jit_build_prologue(u32 *image, struct codegen_context *ctx); +void bpf_jit_build_epilogue(u32 *image, struct codegen_context *ctx); +void bpf_jit_realloc_regs(struct codegen_context *ctx); + #endif #endif diff --git a/arch/powerpc/net/bpf_jit32.h b/arch/powerpc/net/bpf_jit32.h deleted file mode 100644 index 448dfd4d98e1..000000000000 --- a/arch/powerpc/net/bpf_jit32.h +++ /dev/null @@ -1,139 +0,0 @@ -/* SPDX-License-Identifier: GPL-2.0-only */ -/* - * bpf_jit32.h: BPF JIT compiler for PPC - * - * Copyright 2011 Matt Evans <matt@ozlabs.org>, IBM Corporation - * - * Split from bpf_jit.h - */ -#ifndef _BPF_JIT32_H -#define _BPF_JIT32_H - -#include <asm/asm-compat.h> -#include "bpf_jit.h" - -#ifdef CONFIG_PPC64 -#define BPF_PPC_STACK_R3_OFF 48 -#define BPF_PPC_STACK_LOCALS 32 -#define BPF_PPC_STACK_BASIC (48+64) -#define BPF_PPC_STACK_SAVE (18*8) -#define BPF_PPC_STACKFRAME (BPF_PPC_STACK_BASIC+BPF_PPC_STACK_LOCALS+ \ - BPF_PPC_STACK_SAVE) -#define BPF_PPC_SLOWPATH_FRAME (48+64) -#else -#define BPF_PPC_STACK_R3_OFF 24 -#define BPF_PPC_STACK_LOCALS 16 -#define BPF_PPC_STACK_BASIC (24+32) -#define BPF_PPC_STACK_SAVE (18*4) -#define BPF_PPC_STACKFRAME (BPF_PPC_STACK_BASIC+BPF_PPC_STACK_LOCALS+ \ - BPF_PPC_STACK_SAVE) -#define BPF_PPC_SLOWPATH_FRAME (24+32) -#endif - -#define REG_SZ (BITS_PER_LONG/8) - -/* - * Generated code register usage: - * - * As normal PPC C ABI (e.g. r1=sp, r2=TOC), with: - * - * skb r3 (Entry parameter) - * A register r4 - * X register r5 - * addr param r6 - * r7-r10 scratch - * skb->data r14 - * skb headlen r15 (skb->len - skb->data_len) - * m[0] r16 - * m[...] ... - * m[15] r31 - */ -#define r_skb 3 -#define r_ret 3 -#define r_A 4 -#define r_X 5 -#define r_addr 6 -#define r_scratch1 7 -#define r_scratch2 8 -#define r_D 14 -#define r_HL 15 -#define r_M 16 - -#ifndef __ASSEMBLY__ - -/* - * Assembly helpers from arch/powerpc/net/bpf_jit.S: - */ -#define DECLARE_LOAD_FUNC(func) \ - extern u8 func[], func##_negative_offset[], func##_positive_offset[] - -DECLARE_LOAD_FUNC(sk_load_word); -DECLARE_LOAD_FUNC(sk_load_half); -DECLARE_LOAD_FUNC(sk_load_byte); -DECLARE_LOAD_FUNC(sk_load_byte_msh); - -#define PPC_LBZ_OFFS(r, base, i) do { if ((i) < 32768) EMIT(PPC_RAW_LBZ(r, base, i)); \ - else { EMIT(PPC_RAW_ADDIS(r, base, IMM_HA(i))); \ - EMIT(PPC_RAW_LBZ(r, r, IMM_L(i))); } } while(0) - -#define PPC_LD_OFFS(r, base, i) do { if ((i) < 32768) EMIT(PPC_RAW_LD(r, base, i)); \ - else { EMIT(PPC_RAW_ADDIS(r, base, IMM_HA(i))); \ - EMIT(PPC_RAW_LD(r, r, IMM_L(i))); } } while(0) - -#define PPC_LWZ_OFFS(r, base, i) do { if ((i) < 32768) EMIT(PPC_RAW_LWZ(r, base, i)); \ - else { EMIT(PPC_RAW_ADDIS(r, base, IMM_HA(i))); \ - EMIT(PPC_RAW_LWZ(r, r, IMM_L(i))); } } while(0) - -#define PPC_LHZ_OFFS(r, base, i) do { if ((i) < 32768) EMIT(PPC_RAW_LHZ(r, base, i)); \ - else { EMIT(PPC_RAW_ADDIS(r, base, IMM_HA(i))); \ - EMIT(PPC_RAW_LHZ(r, r, IMM_L(i))); } } while(0) - -#ifdef CONFIG_PPC64 -#define PPC_LL_OFFS(r, base, i) do { PPC_LD_OFFS(r, base, i); } while(0) -#else -#define PPC_LL_OFFS(r, base, i) do { PPC_LWZ_OFFS(r, base, i); } while(0) -#endif - -#ifdef CONFIG_SMP -#ifdef CONFIG_PPC64 -#define PPC_BPF_LOAD_CPU(r) \ - do { BUILD_BUG_ON(sizeof_field(struct paca_struct, paca_index) != 2); \ - PPC_LHZ_OFFS(r, 13, offsetof(struct paca_struct, paca_index)); \ - } while (0) -#else -#define PPC_BPF_LOAD_CPU(r) \ - do { BUILD_BUG_ON(sizeof_field(struct task_struct, cpu) != 4); \ - PPC_LHZ_OFFS(r, 2, offsetof(struct task_struct, cpu)); \ - } while(0) -#endif -#else -#define PPC_BPF_LOAD_CPU(r) do { EMIT(PPC_RAW_LI(r, 0)); } while(0) -#endif - -#define PPC_LHBRX_OFFS(r, base, i) \ - do { PPC_LI32(r, i); EMIT(PPC_RAW_LHBRX(r, r, base)); } while(0) -#ifdef __LITTLE_ENDIAN__ -#define PPC_NTOHS_OFFS(r, base, i) PPC_LHBRX_OFFS(r, base, i) -#else -#define PPC_NTOHS_OFFS(r, base, i) PPC_LHZ_OFFS(r, base, i) -#endif - -#define PPC_BPF_LL(r, base, i) do { EMIT(PPC_RAW_LWZ(r, base, i)); } while(0) -#define PPC_BPF_STL(r, base, i) do { EMIT(PPC_RAW_STW(r, base, i)); } while(0) -#define PPC_BPF_STLU(r, base, i) do { EMIT(PPC_RAW_STWU(r, base, i)); } while(0) - -#define SEEN_DATAREF 0x10000 /* might call external helpers */ -#define SEEN_XREG 0x20000 /* X reg is used */ -#define SEEN_MEM 0x40000 /* SEEN_MEM+(1<<n) = use mem[n] for temporary - * storage */ -#define SEEN_MEM_MSK 0x0ffff - -struct codegen_context { - unsigned int seen; - unsigned int idx; - int pc_ret0; /* bpf index of first RET #0 instruction (if any) */ -}; - -#endif - -#endif diff --git a/arch/powerpc/net/bpf_jit64.h b/arch/powerpc/net/bpf_jit64.h index 2e33c6673ff9..7b713edfa7e2 100644 --- a/arch/powerpc/net/bpf_jit64.h +++ b/arch/powerpc/net/bpf_jit64.h @@ -39,7 +39,7 @@ #define TMP_REG_2 (MAX_BPF_JIT_REG + 1) /* BPF to ppc register mappings */ -static const int b2p[] = { +const int b2p[MAX_BPF_JIT_REG + 2] = { /* function return value */ [BPF_REG_0] = 8, /* function arguments */ @@ -86,25 +86,6 @@ static const int b2p[] = { } while(0) #define PPC_BPF_STLU(r, base, i) do { EMIT(PPC_RAW_STDU(r, base, i)); } while(0) -#define SEEN_FUNC 0x1000 /* might call external helpers */ -#define SEEN_STACK 0x2000 /* uses BPF stack */ -#define SEEN_TAILCALL 0x4000 /* uses tail calls */ - -struct codegen_context { - /* - * This is used to track register usage as well - * as calls to external helpers. - * - register usage is tracked with corresponding - * bits (r3-r10 and r27-r31) - * - rest of the bits can be used to track other - * things -- for now, we use bits 16 to 23 - * encoded in SEEN_* macros above - */ - unsigned int seen; - unsigned int idx; - unsigned int stack_size; -}; - #endif /* !__ASSEMBLY__ */ #endif diff --git a/arch/powerpc/net/bpf_jit_asm.S b/arch/powerpc/net/bpf_jit_asm.S deleted file mode 100644 index 2f5030d8383f..000000000000 --- a/arch/powerpc/net/bpf_jit_asm.S +++ /dev/null @@ -1,226 +0,0 @@ -/* SPDX-License-Identifier: GPL-2.0-only */ -/* bpf_jit.S: Packet/header access helper functions - * for PPC64 BPF compiler. - * - * Copyright 2011 Matt Evans <matt@ozlabs.org>, IBM Corporation - */ - -#include <asm/ppc_asm.h> -#include <asm/asm-compat.h> -#include "bpf_jit32.h" - -/* - * All of these routines are called directly from generated code, - * whose register usage is: - * - * r3 skb - * r4,r5 A,X - * r6 *** address parameter to helper *** - * r7-r10 scratch - * r14 skb->data - * r15 skb headlen - * r16-31 M[] - */ - -/* - * To consider: These helpers are so small it could be better to just - * generate them inline. Inline code can do the simple headlen check - * then branch directly to slow_path_XXX if required. (In fact, could - * load a spare GPR with the address of slow_path_generic and pass size - * as an argument, making the call site a mtlr, li and bllr.) - */ - .globl sk_load_word -sk_load_word: - PPC_LCMPI r_addr, 0 - blt bpf_slow_path_word_neg - .globl sk_load_word_positive_offset -sk_load_word_positive_offset: - /* Are we accessing past headlen? */ - subi r_scratch1, r_HL, 4 - PPC_LCMP r_scratch1, r_addr - blt bpf_slow_path_word - /* Nope, just hitting the header. cr0 here is eq or gt! */ -#ifdef __LITTLE_ENDIAN__ - lwbrx r_A, r_D, r_addr -#else - lwzx r_A, r_D, r_addr -#endif - blr /* Return success, cr0 != LT */ - - .globl sk_load_half -sk_load_half: - PPC_LCMPI r_addr, 0 - blt bpf_slow_path_half_neg - .globl sk_load_half_positive_offset -sk_load_half_positive_offset: - subi r_scratch1, r_HL, 2 - PPC_LCMP r_scratch1, r_addr - blt bpf_slow_path_half -#ifdef __LITTLE_ENDIAN__ - lhbrx r_A, r_D, r_addr -#else - lhzx r_A, r_D, r_addr -#endif - blr - - .globl sk_load_byte -sk_load_byte: - PPC_LCMPI r_addr, 0 - blt bpf_slow_path_byte_neg - .globl sk_load_byte_positive_offset -sk_load_byte_positive_offset: - PPC_LCMP r_HL, r_addr - ble bpf_slow_path_byte - lbzx r_A, r_D, r_addr - blr - -/* - * BPF_LDX | BPF_B | BPF_MSH: ldxb 4*([offset]&0xf) - * r_addr is the offset value - */ - .globl sk_load_byte_msh -sk_load_byte_msh: - PPC_LCMPI r_addr, 0 - blt bpf_slow_path_byte_msh_neg - .globl sk_load_byte_msh_positive_offset -sk_load_byte_msh_positive_offset: - PPC_LCMP r_HL, r_addr - ble bpf_slow_path_byte_msh - lbzx r_X, r_D, r_addr - rlwinm r_X, r_X, 2, 32-4-2, 31-2 - blr - -/* Call out to skb_copy_bits: - * We'll need to back up our volatile regs first; we have - * local variable space at r1+(BPF_PPC_STACK_BASIC). - * Allocate a new stack frame here to remain ABI-compliant in - * stashing LR. - */ -#define bpf_slow_path_common(SIZE) \ - mflr r0; \ - PPC_STL r0, PPC_LR_STKOFF(r1); \ - /* R3 goes in parameter space of caller's frame */ \ - PPC_STL r_skb, (BPF_PPC_STACKFRAME+BPF_PPC_STACK_R3_OFF)(r1); \ - PPC_STL r_A, (BPF_PPC_STACK_BASIC+(0*REG_SZ))(r1); \ - PPC_STL r_X, (BPF_PPC_STACK_BASIC+(1*REG_SZ))(r1); \ - addi r5, r1, BPF_PPC_STACK_BASIC+(2*REG_SZ); \ - PPC_STLU r1, -BPF_PPC_SLOWPATH_FRAME(r1); \ - /* R3 = r_skb, as passed */ \ - mr r4, r_addr; \ - li r6, SIZE; \ - bl skb_copy_bits; \ - nop; \ - /* R3 = 0 on success */ \ - addi r1, r1, BPF_PPC_SLOWPATH_FRAME; \ - PPC_LL r0, PPC_LR_STKOFF(r1); \ - PPC_LL r_A, (BPF_PPC_STACK_BASIC+(0*REG_SZ))(r1); \ - PPC_LL r_X, (BPF_PPC_STACK_BASIC+(1*REG_SZ))(r1); \ - mtlr r0; \ - PPC_LCMPI r3, 0; \ - blt bpf_error; /* cr0 = LT */ \ - PPC_LL r_skb, (BPF_PPC_STACKFRAME+BPF_PPC_STACK_R3_OFF)(r1); \ - /* Great success! */ - -bpf_slow_path_word: - bpf_slow_path_common(4) - /* Data value is on stack, and cr0 != LT */ - lwz r_A, BPF_PPC_STACK_BASIC+(2*REG_SZ)(r1) - blr - -bpf_slow_path_half: - bpf_slow_path_common(2) - lhz r_A, BPF_PPC_STACK_BASIC+(2*8)(r1) - blr - -bpf_slow_path_byte: - bpf_slow_path_common(1) - lbz r_A, BPF_PPC_STACK_BASIC+(2*8)(r1) - blr - -bpf_slow_path_byte_msh: - bpf_slow_path_common(1) - lbz r_X, BPF_PPC_STACK_BASIC+(2*8)(r1) - rlwinm r_X, r_X, 2, 32-4-2, 31-2 - blr - -/* Call out to bpf_internal_load_pointer_neg_helper: - * We'll need to back up our volatile regs first; we have - * local variable space at r1+(BPF_PPC_STACK_BASIC). - * Allocate a new stack frame here to remain ABI-compliant in - * stashing LR. - */ -#define sk_negative_common(SIZE) \ - mflr r0; \ - PPC_STL r0, PPC_LR_STKOFF(r1); \ - /* R3 goes in parameter space of caller's frame */ \ - PPC_STL r_skb, (BPF_PPC_STACKFRAME+BPF_PPC_STACK_R3_OFF)(r1); \ - PPC_STL r_A, (BPF_PPC_STACK_BASIC+(0*REG_SZ))(r1); \ - PPC_STL r_X, (BPF_PPC_STACK_BASIC+(1*REG_SZ))(r1); \ - PPC_STLU r1, -BPF_PPC_SLOWPATH_FRAME(r1); \ - /* R3 = r_skb, as passed */ \ - mr r4, r_addr; \ - li r5, SIZE; \ - bl bpf_internal_load_pointer_neg_helper; \ - nop; \ - /* R3 != 0 on success */ \ - addi r1, r1, BPF_PPC_SLOWPATH_FRAME; \ - PPC_LL r0, PPC_LR_STKOFF(r1); \ - PPC_LL r_A, (BPF_PPC_STACK_BASIC+(0*REG_SZ))(r1); \ - PPC_LL r_X, (BPF_PPC_STACK_BASIC+(1*REG_SZ))(r1); \ - mtlr r0; \ - PPC_LCMPLI r3, 0; \ - beq bpf_error_slow; /* cr0 = EQ */ \ - mr r_addr, r3; \ - PPC_LL r_skb, (BPF_PPC_STACKFRAME+BPF_PPC_STACK_R3_OFF)(r1); \ - /* Great success! */ - -bpf_slow_path_word_neg: - lis r_scratch1,-32 /* SKF_LL_OFF */ - PPC_LCMP r_addr, r_scratch1 /* addr < SKF_* */ - blt bpf_error /* cr0 = LT */ - .globl sk_load_word_negative_offset -sk_load_word_negative_offset: - sk_negative_common(4) - lwz r_A, 0(r_addr) - blr - -bpf_slow_path_half_neg: - lis r_scratch1,-32 /* SKF_LL_OFF */ - PPC_LCMP r_addr, r_scratch1 /* addr < SKF_* */ - blt bpf_error /* cr0 = LT */ - .globl sk_load_half_negative_offset -sk_load_half_negative_offset: - sk_negative_common(2) - lhz r_A, 0(r_addr) - blr - -bpf_slow_path_byte_neg: - lis r_scratch1,-32 /* SKF_LL_OFF */ - PPC_LCMP r_addr, r_scratch1 /* addr < SKF_* */ - blt bpf_error /* cr0 = LT */ - .globl sk_load_byte_negative_offset -sk_load_byte_negative_offset: - sk_negative_common(1) - lbz r_A, 0(r_addr) - blr - -bpf_slow_path_byte_msh_neg: - lis r_scratch1,-32 /* SKF_LL_OFF */ - PPC_LCMP r_addr, r_scratch1 /* addr < SKF_* */ - blt bpf_error /* cr0 = LT */ - .globl sk_load_byte_msh_negative_offset -sk_load_byte_msh_negative_offset: - sk_negative_common(1) - lbz r_X, 0(r_addr) - rlwinm r_X, r_X, 2, 32-4-2, 31-2 - blr - -bpf_error_slow: - /* fabricate a cr0 = lt */ - li r_scratch1, -1 - PPC_LCMPI r_scratch1, 0 -bpf_error: - /* Entered with cr0 = lt */ - li r3, 0 - /* Generated code will 'blt epilogue', returning 0. */ - blr diff --git a/arch/powerpc/net/bpf_jit_comp.c b/arch/powerpc/net/bpf_jit_comp.c index e809cb5a1631..798ac4350a82 100644 --- a/arch/powerpc/net/bpf_jit_comp.c +++ b/arch/powerpc/net/bpf_jit_comp.c @@ -1,10 +1,11 @@ // SPDX-License-Identifier: GPL-2.0-only -/* bpf_jit_comp.c: BPF JIT compiler +/* + * eBPF JIT compiler * - * Copyright 2011 Matt Evans <matt@ozlabs.org>, IBM Corporation + * Copyright 2016 Naveen N. Rao <naveen.n.rao@linux.vnet.ibm.com> + * IBM Corporation * - * Based on the x86 BPF compiler, by Eric Dumazet (eric.dumazet@gmail.com) - * Ported to ppc32 by Denis Kirjanov <kda@linux-powerpc.org> + * Based on the powerpc classic BPF JIT compiler by Matt Evans */ #include <linux/moduleloader.h> #include <asm/cacheflush.h> @@ -12,639 +13,204 @@ #include <linux/netdevice.h> #include <linux/filter.h> #include <linux/if_vlan.h> +#include <asm/kprobes.h> +#include <linux/bpf.h> -#include "bpf_jit32.h" +#include "bpf_jit.h" -static inline void bpf_flush_icache(void *start, void *end) +static void bpf_jit_fill_ill_insns(void *area, unsigned int size) { - smp_wmb(); - flush_icache_range((unsigned long)start, (unsigned long)end); + memset32(area, BREAKPOINT_INSTRUCTION, size / 4); } -static void bpf_jit_build_prologue(struct bpf_prog *fp, u32 *image, - struct codegen_context *ctx) +/* Fix the branch target addresses for subprog calls */ +static int bpf_jit_fixup_subprog_calls(struct bpf_prog *fp, u32 *image, + struct codegen_context *ctx, u32 *addrs) { - int i; - const struct sock_filter *filter = fp->insns; - - if (ctx->seen & (SEEN_MEM | SEEN_DATAREF)) { - /* Make stackframe */ - if (ctx->seen & SEEN_DATAREF) { - /* If we call any helpers (for loads), save LR */ - EMIT(PPC_INST_MFLR | __PPC_RT(R0)); - PPC_BPF_STL(0, 1, PPC_LR_STKOFF); - - /* Back up non-volatile regs. */ - PPC_BPF_STL(r_D, 1, -(REG_SZ*(32-r_D))); - PPC_BPF_STL(r_HL, 1, -(REG_SZ*(32-r_HL))); - } - if (ctx->seen & SEEN_MEM) { - /* - * Conditionally save regs r15-r31 as some will be used - * for M[] data. - */ - for (i = r_M; i < (r_M+16); i++) { - if (ctx->seen & (1 << (i-r_M))) - PPC_BPF_STL(i, 1, -(REG_SZ*(32-i))); - } - } - PPC_BPF_STLU(1, 1, -BPF_PPC_STACKFRAME); - } - - if (ctx->seen & SEEN_DATAREF) { - /* - * If this filter needs to access skb data, - * prepare r_D and r_HL: - * r_HL = skb->len - skb->data_len - * r_D = skb->data - */ - PPC_LWZ_OFFS(r_scratch1, r_skb, offsetof(struct sk_buff, - data_len)); - PPC_LWZ_OFFS(r_HL, r_skb, offsetof(struct sk_buff, len)); - EMIT(PPC_RAW_SUB(r_HL, r_HL, r_scratch1)); - PPC_LL_OFFS(r_D, r_skb, offsetof(struct sk_buff, data)); - } + const struct bpf_insn *insn = fp->insnsi; + bool func_addr_fixed; + u64 func_addr; + u32 tmp_idx; + int i, ret; - if (ctx->seen & SEEN_XREG) { + for (i = 0; i < fp->len; i++) { /* - * TODO: Could also detect whether first instr. sets X and - * avoid this (as below, with A). + * During the extra pass, only the branch target addresses for + * the subprog calls need to be fixed. All other instructions + * can left untouched. + * + * The JITed image length does not change because we already + * ensure that the JITed instruction sequence for these calls + * are of fixed length by padding them with NOPs. */ - EMIT(PPC_RAW_LI(r_X, 0)); - } - - /* make sure we dont leak kernel information to user */ - if (bpf_needs_clear_a(&filter[0])) - EMIT(PPC_RAW_LI(r_A, 0)); -} - -static void bpf_jit_build_epilogue(u32 *image, struct codegen_context *ctx) -{ - int i; - - if (ctx->seen & (SEEN_MEM | SEEN_DATAREF)) { - EMIT(PPC_RAW_ADDI(1, 1, BPF_PPC_STACKFRAME)); - if (ctx->seen & SEEN_DATAREF) { - PPC_BPF_LL(0, 1, PPC_LR_STKOFF); - EMIT(PPC_RAW_MTLR(0)); - PPC_BPF_LL(r_D, 1, -(REG_SZ*(32-r_D))); - PPC_BPF_LL(r_HL, 1, -(REG_SZ*(32-r_HL))); - } - if (ctx->seen & SEEN_MEM) { - /* Restore any saved non-vol registers */ - for (i = r_M; i < (r_M+16); i++) { - if (ctx->seen & (1 << (i-r_M))) - PPC_BPF_LL(i, 1, -(REG_SZ*(32-i))); - } - } - } - /* The RETs have left a return value in R3. */ - - EMIT(PPC_RAW_BLR()); -} - -#define CHOOSE_LOAD_FUNC(K, func) \ - ((int)K < 0 ? ((int)K >= SKF_LL_OFF ? func##_negative_offset : func) : func##_positive_offset) - -/* Assemble the body code between the prologue & epilogue. */ -static int bpf_jit_build_body(struct bpf_prog *fp, u32 *image, - struct codegen_context *ctx, - unsigned int *addrs) -{ - const struct sock_filter *filter = fp->insns; - int flen = fp->len; - u8 *func; - unsigned int true_cond; - int i; - - /* Start of epilogue code */ - unsigned int exit_addr = addrs[flen]; - - for (i = 0; i < flen; i++) { - unsigned int K = filter[i].k; - u16 code = bpf_anc_helper(&filter[i]); + if (insn[i].code == (BPF_JMP | BPF_CALL) && + insn[i].src_reg == BPF_PSEUDO_CALL) { + ret = bpf_jit_get_func_addr(fp, &insn[i], true, + &func_addr, + &func_addr_fixed); + if (ret < 0) + return ret; - /* - * addrs[] maps a BPF bytecode address into a real offset from - * the start of the body code. - */ - addrs[i] = ctx->idx * 4; - - switch (code) { - /*** ALU ops ***/ - case BPF_ALU | BPF_ADD | BPF_X: /* A += X; */ - ctx->seen |= SEEN_XREG; - EMIT(PPC_RAW_ADD(r_A, r_A, r_X)); - break; - case BPF_ALU | BPF_ADD | BPF_K: /* A += K; */ - if (!K) - break; - EMIT(PPC_RAW_ADDI(r_A, r_A, IMM_L(K))); - if (K >= 32768) - EMIT(PPC_RAW_ADDIS(r_A, r_A, IMM_HA(K))); - break; - case BPF_ALU | BPF_SUB | BPF_X: /* A -= X; */ - ctx->seen |= SEEN_XREG; - EMIT(PPC_RAW_SUB(r_A, r_A, r_X)); - break; - case BPF_ALU | BPF_SUB | BPF_K: /* A -= K */ - if (!K) - break; - EMIT(PPC_RAW_ADDI(r_A, r_A, IMM_L(-K))); - if (K >= 32768) - EMIT(PPC_RAW_ADDIS(r_A, r_A, IMM_HA(-K))); - break; - case BPF_ALU | BPF_MUL | BPF_X: /* A *= X; */ - ctx->seen |= SEEN_XREG; - EMIT(PPC_RAW_MULW(r_A, r_A, r_X)); - break; - case BPF_ALU | BPF_MUL | BPF_K: /* A *= K */ - if (K < 32768) - EMIT(PPC_RAW_MULI(r_A, r_A, K)); - else { - PPC_LI32(r_scratch1, K); - EMIT(PPC_RAW_MULW(r_A, r_A, r_scratch1)); - } - break; - case BPF_ALU | BPF_MOD | BPF_X: /* A %= X; */ - case BPF_ALU | BPF_DIV | BPF_X: /* A /= X; */ - ctx->seen |= SEEN_XREG; - EMIT(PPC_RAW_CMPWI(r_X, 0)); - if (ctx->pc_ret0 != -1) { - PPC_BCC(COND_EQ, addrs[ctx->pc_ret0]); - } else { - PPC_BCC_SHORT(COND_NE, (ctx->idx*4)+12); - EMIT(PPC_RAW_LI(r_ret, 0)); - PPC_JMP(exit_addr); - } - if (code == (BPF_ALU | BPF_MOD | BPF_X)) { - EMIT(PPC_RAW_DIVWU(r_scratch1, r_A, r_X)); - EMIT(PPC_RAW_MULW(r_scratch1, r_X, r_scratch1)); - EMIT(PPC_RAW_SUB(r_A, r_A, r_scratch1)); - } else { - EMIT(PPC_RAW_DIVWU(r_A, r_A, r_X)); - } - break; - case BPF_ALU | BPF_MOD | BPF_K: /* A %= K; */ - PPC_LI32(r_scratch2, K); - EMIT(PPC_RAW_DIVWU(r_scratch1, r_A, r_scratch2)); - EMIT(PPC_RAW_MULW(r_scratch1, r_scratch2, r_scratch1)); - EMIT(PPC_RAW_SUB(r_A, r_A, r_scratch1)); - break; - case BPF_ALU | BPF_DIV | BPF_K: /* A /= K */ - if (K == 1) - break; - PPC_LI32(r_scratch1, K); - EMIT(PPC_RAW_DIVWU(r_A, r_A, r_scratch1)); - break; - case BPF_ALU | BPF_AND | BPF_X: - ctx->seen |= SEEN_XREG; - EMIT(PPC_RAW_AND(r_A, r_A, r_X)); - break; - case BPF_ALU | BPF_AND | BPF_K: - if (!IMM_H(K)) - EMIT(PPC_RAW_ANDI(r_A, r_A, K)); - else { - PPC_LI32(r_scratch1, K); - EMIT(PPC_RAW_AND(r_A, r_A, r_scratch1)); - } - break; - case BPF_ALU | BPF_OR | BPF_X: - ctx->seen |= SEEN_XREG; - EMIT(PPC_RAW_OR(r_A, r_A, r_X)); - break; - case BPF_ALU | BPF_OR | BPF_K: - if (IMM_L(K)) - EMIT(PPC_RAW_ORI(r_A, r_A, IMM_L(K))); - if (K >= 65536) - EMIT(PPC_RAW_ORIS(r_A, r_A, IMM_H(K))); - break; - case BPF_ANC | SKF_AD_ALU_XOR_X: - case BPF_ALU | BPF_XOR | BPF_X: /* A ^= X */ - ctx->seen |= SEEN_XREG; - EMIT(PPC_RAW_XOR(r_A, r_A, r_X)); - break; - case BPF_ALU | BPF_XOR | BPF_K: /* A ^= K */ - if (IMM_L(K)) - EMIT(PPC_RAW_XORI(r_A, r_A, IMM_L(K))); - if (K >= 65536) - EMIT(PPC_RAW_XORIS(r_A, r_A, IMM_H(K))); - break; - case BPF_ALU | BPF_LSH | BPF_X: /* A <<= X; */ - ctx->seen |= SEEN_XREG; - EMIT(PPC_RAW_SLW(r_A, r_A, r_X)); - break; - case BPF_ALU | BPF_LSH | BPF_K: - if (K == 0) - break; - else - EMIT(PPC_RAW_SLWI(r_A, r_A, K)); - break; - case BPF_ALU | BPF_RSH | BPF_X: /* A >>= X; */ - ctx->seen |= SEEN_XREG; - EMIT(PPC_RAW_SRW(r_A, r_A, r_X)); - break; - case BPF_ALU | BPF_RSH | BPF_K: /* A >>= K; */ - if (K == 0) - break; - else - EMIT(PPC_RAW_SRWI(r_A, r_A, K)); - break; - case BPF_ALU | BPF_NEG: - EMIT(PPC_RAW_NEG(r_A, r_A)); - break; - case BPF_RET | BPF_K: - PPC_LI32(r_ret, K); - if (!K) { - if (ctx->pc_ret0 == -1) - ctx->pc_ret0 = i; - } - /* - * If this isn't the very last instruction, branch to - * the epilogue if we've stuff to clean up. Otherwise, - * if there's nothing to tidy, just return. If we /are/ - * the last instruction, we're about to fall through to - * the epilogue to return. - */ - if (i != flen - 1) { - /* - * Note: 'seen' is properly valid only on pass - * #2. Both parts of this conditional are the - * same instruction size though, meaning the - * first pass will still correctly determine the - * code size/addresses. - */ - if (ctx->seen) - PPC_JMP(exit_addr); - else - EMIT(PPC_RAW_BLR()); - } - break; - case BPF_RET | BPF_A: - EMIT(PPC_RAW_MR(r_ret, r_A)); - if (i != flen - 1) { - if (ctx->seen) - PPC_JMP(exit_addr); - else - EMIT(PPC_RAW_BLR()); - } - break; - case BPF_MISC | BPF_TAX: /* X = A */ - EMIT(PPC_RAW_MR(r_X, r_A)); - break; - case BPF_MISC | BPF_TXA: /* A = X */ - ctx->seen |= SEEN_XREG; - EMIT(PPC_RAW_MR(r_A, r_X)); - break; - - /*** Constant loads/M[] access ***/ - case BPF_LD | BPF_IMM: /* A = K */ - PPC_LI32(r_A, K); - break; - case BPF_LDX | BPF_IMM: /* X = K */ - PPC_LI32(r_X, K); - break; - case BPF_LD | BPF_MEM: /* A = mem[K] */ - EMIT(PPC_RAW_MR(r_A, r_M + (K & 0xf))); - ctx->seen |= SEEN_MEM | (1<<(K & 0xf)); - break; - case BPF_LDX | BPF_MEM: /* X = mem[K] */ - EMIT(PPC_RAW_MR(r_X, r_M + (K & 0xf))); - ctx->seen |= SEEN_MEM | (1<<(K & 0xf)); - break; - case BPF_ST: /* mem[K] = A */ - EMIT(PPC_RAW_MR(r_M + (K & 0xf), r_A)); - ctx->seen |= SEEN_MEM | (1<<(K & 0xf)); - break; - case BPF_STX: /* mem[K] = X */ - EMIT(PPC_RAW_MR(r_M + (K & 0xf), r_X)); - ctx->seen |= SEEN_XREG | SEEN_MEM | (1<<(K & 0xf)); - break; - case BPF_LD | BPF_W | BPF_LEN: /* A = skb->len; */ - BUILD_BUG_ON(sizeof_field(struct sk_buff, len) != 4); - PPC_LWZ_OFFS(r_A, r_skb, offsetof(struct sk_buff, len)); - break; - case BPF_LDX | BPF_W | BPF_ABS: /* A = *((u32 *)(seccomp_data + K)); */ - PPC_LWZ_OFFS(r_A, r_skb, K); - break; - case BPF_LDX | BPF_W | BPF_LEN: /* X = skb->len; */ - PPC_LWZ_OFFS(r_X, r_skb, offsetof(struct sk_buff, len)); - break; - - /*** Ancillary info loads ***/ - case BPF_ANC | SKF_AD_PROTOCOL: /* A = ntohs(skb->protocol); */ - BUILD_BUG_ON(sizeof_field(struct sk_buff, - protocol) != 2); - PPC_NTOHS_OFFS(r_A, r_skb, offsetof(struct sk_buff, - protocol)); - break; - case BPF_ANC | SKF_AD_IFINDEX: - case BPF_ANC | SKF_AD_HATYPE: - BUILD_BUG_ON(sizeof_field(struct net_device, - ifindex) != 4); - BUILD_BUG_ON(sizeof_field(struct net_device, - type) != 2); - PPC_LL_OFFS(r_scratch1, r_skb, offsetof(struct sk_buff, - dev)); - EMIT(PPC_RAW_CMPDI(r_scratch1, 0)); - if (ctx->pc_ret0 != -1) { - PPC_BCC(COND_EQ, addrs[ctx->pc_ret0]); - } else { - /* Exit, returning 0; first pass hits here. */ - PPC_BCC_SHORT(COND_NE, ctx->idx * 4 + 12); - EMIT(PPC_RAW_LI(r_ret, 0)); - PPC_JMP(exit_addr); - } - if (code == (BPF_ANC | SKF_AD_IFINDEX)) { - PPC_LWZ_OFFS(r_A, r_scratch1, - offsetof(struct net_device, ifindex)); - } else { - PPC_LHZ_OFFS(r_A, r_scratch1, - offsetof(struct net_device, type)); - } - - break; - case BPF_ANC | SKF_AD_MARK: - BUILD_BUG_ON(sizeof_field(struct sk_buff, mark) != 4); - PPC_LWZ_OFFS(r_A, r_skb, offsetof(struct sk_buff, - mark)); - break; - case BPF_ANC | SKF_AD_RXHASH: - BUILD_BUG_ON(sizeof_field(struct sk_buff, hash) != 4); - PPC_LWZ_OFFS(r_A, r_skb, offsetof(struct sk_buff, - hash)); - break; - case BPF_ANC | SKF_AD_VLAN_TAG: - BUILD_BUG_ON(sizeof_field(struct sk_buff, vlan_tci) != 2); - - PPC_LHZ_OFFS(r_A, r_skb, offsetof(struct sk_buff, - vlan_tci)); - break; - case BPF_ANC | SKF_AD_VLAN_TAG_PRESENT: - PPC_LBZ_OFFS(r_A, r_skb, PKT_VLAN_PRESENT_OFFSET()); - if (PKT_VLAN_PRESENT_BIT) - EMIT(PPC_RAW_SRWI(r_A, r_A, PKT_VLAN_PRESENT_BIT)); - if (PKT_VLAN_PRESENT_BIT < 7) - EMIT(PPC_RAW_ANDI(r_A, r_A, 1)); - break; - case BPF_ANC | SKF_AD_QUEUE: - BUILD_BUG_ON(sizeof_field(struct sk_buff, - queue_mapping) != 2); - PPC_LHZ_OFFS(r_A, r_skb, offsetof(struct sk_buff, - queue_mapping)); - break; - case BPF_ANC | SKF_AD_PKTTYPE: - PPC_LBZ_OFFS(r_A, r_skb, PKT_TYPE_OFFSET()); - EMIT(PPC_RAW_ANDI(r_A, r_A, PKT_TYPE_MAX)); - EMIT(PPC_RAW_SRWI(r_A, r_A, 5)); - break; - case BPF_ANC | SKF_AD_CPU: - PPC_BPF_LOAD_CPU(r_A); - break; - /*** Absolute loads from packet header/data ***/ - case BPF_LD | BPF_W | BPF_ABS: - func = CHOOSE_LOAD_FUNC(K, sk_load_word); - goto common_load; - case BPF_LD | BPF_H | BPF_ABS: - func = CHOOSE_LOAD_FUNC(K, sk_load_half); - goto common_load; - case BPF_LD | BPF_B | BPF_ABS: - func = CHOOSE_LOAD_FUNC(K, sk_load_byte); - common_load: - /* Load from [K]. */ - ctx->seen |= SEEN_DATAREF; - PPC_FUNC_ADDR(r_scratch1, func); - EMIT(PPC_RAW_MTLR(r_scratch1)); - PPC_LI32(r_addr, K); - EMIT(PPC_RAW_BLRL()); /* - * Helper returns 'lt' condition on error, and an - * appropriate return value in r3 + * Save ctx->idx as this would currently point to the + * end of the JITed image and set it to the offset of + * the instruction sequence corresponding to the + * subprog call temporarily. */ - PPC_BCC(COND_LT, exit_addr); - break; - - /*** Indirect loads from packet header/data ***/ - case BPF_LD | BPF_W | BPF_IND: - func = sk_load_word; - goto common_load_ind; - case BPF_LD | BPF_H | BPF_IND: - func = sk_load_half; - goto common_load_ind; - case BPF_LD | BPF_B | BPF_IND: - func = sk_load_byte; - common_load_ind: + tmp_idx = ctx->idx; + ctx->idx = addrs[i] / 4; + bpf_jit_emit_func_call_rel(image, ctx, func_addr); + /* - * Load from [X + K]. Negative offsets are tested for - * in the helper functions. - */ - ctx->seen |= SEEN_DATAREF | SEEN_XREG; - PPC_FUNC_ADDR(r_scratch1, func); - EMIT(PPC_RAW_MTLR(r_scratch1)); - EMIT(PPC_RAW_ADDI(r_addr, r_X, IMM_L(K))); - if (K >= 32768) - EMIT(PPC_RAW_ADDIS(r_addr, r_addr, IMM_HA(K))); - EMIT(PPC_RAW_BLRL()); - /* If error, cr0.LT set */ - PPC_BCC(COND_LT, exit_addr); - break; - - case BPF_LDX | BPF_B | BPF_MSH: - func = CHOOSE_LOAD_FUNC(K, sk_load_byte_msh); - goto common_load; - break; - - /*** Jump and branches ***/ - case BPF_JMP | BPF_JA: - if (K != 0) - PPC_JMP(addrs[i + 1 + K]); - break; - - case BPF_JMP | BPF_JGT | BPF_K: - case BPF_JMP | BPF_JGT | BPF_X: - true_cond = COND_GT; - goto cond_branch; - case BPF_JMP | BPF_JGE | BPF_K: - case BPF_JMP | BPF_JGE | BPF_X: - true_cond = COND_GE; - goto cond_branch; - case BPF_JMP | BPF_JEQ | BPF_K: - case BPF_JMP | BPF_JEQ | BPF_X: - true_cond = COND_EQ; - goto cond_branch; - case BPF_JMP | BPF_JSET | BPF_K: - case BPF_JMP | BPF_JSET | BPF_X: - true_cond = COND_NE; - cond_branch: - /* same targets, can avoid doing the test :) */ - if (filter[i].jt == filter[i].jf) { - if (filter[i].jt > 0) - PPC_JMP(addrs[i + 1 + filter[i].jt]); - break; - } - - switch (code) { - case BPF_JMP | BPF_JGT | BPF_X: - case BPF_JMP | BPF_JGE | BPF_X: - case BPF_JMP | BPF_JEQ | BPF_X: - ctx->seen |= SEEN_XREG; - EMIT(PPC_RAW_CMPLW(r_A, r_X)); - break; - case BPF_JMP | BPF_JSET | BPF_X: - ctx->seen |= SEEN_XREG; - EMIT(PPC_RAW_AND_DOT(r_scratch1, r_A, r_X)); - break; - case BPF_JMP | BPF_JEQ | BPF_K: - case BPF_JMP | BPF_JGT | BPF_K: - case BPF_JMP | BPF_JGE | BPF_K: - if (K < 32768) - EMIT(PPC_RAW_CMPLWI(r_A, K)); - else { - PPC_LI32(r_scratch1, K); - EMIT(PPC_RAW_CMPLW(r_A, r_scratch1)); - } - break; - case BPF_JMP | BPF_JSET | BPF_K: - if (K < 32768) - /* PPC_ANDI is /only/ dot-form */ - EMIT(PPC_RAW_ANDI(r_scratch1, r_A, K)); - else { - PPC_LI32(r_scratch1, K); - EMIT(PPC_RAW_AND_DOT(r_scratch1, r_A, - r_scratch1)); - } - break; - } - /* Sometimes branches are constructed "backward", with - * the false path being the branch and true path being - * a fallthrough to the next instruction. + * Restore ctx->idx here. This is safe as the length + * of the JITed sequence remains unchanged. */ - if (filter[i].jt == 0) - /* Swap the sense of the branch */ - PPC_BCC(true_cond ^ COND_CMP_TRUE, - addrs[i + 1 + filter[i].jf]); - else { - PPC_BCC(true_cond, addrs[i + 1 + filter[i].jt]); - if (filter[i].jf != 0) - PPC_JMP(addrs[i + 1 + filter[i].jf]); - } - break; - default: - /* The filter contains something cruel & unusual. - * We don't handle it, but also there shouldn't be - * anything missing from our list. - */ - if (printk_ratelimit()) - pr_err("BPF filter opcode %04x (@%d) unsupported\n", - filter[i].code, i); - return -ENOTSUPP; + ctx->idx = tmp_idx; } - } - /* Set end-of-body-code address for exit. */ - addrs[i] = ctx->idx * 4; return 0; } -void bpf_jit_compile(struct bpf_prog *fp) +struct powerpc64_jit_data { + struct bpf_binary_header *header; + u32 *addrs; + u8 *image; + u32 proglen; + struct codegen_context ctx; +}; + +bool bpf_jit_needs_zext(void) +{ + return true; +} + +struct bpf_prog *bpf_int_jit_compile(struct bpf_prog *fp) { - unsigned int proglen; - unsigned int alloclen; - u32 *image = NULL; + u32 proglen; + u32 alloclen; + u8 *image = NULL; u32 *code_base; - unsigned int *addrs; + u32 *addrs; + struct powerpc64_jit_data *jit_data; struct codegen_context cgctx; int pass; - int flen = fp->len; + int flen; + struct bpf_binary_header *bpf_hdr; + struct bpf_prog *org_fp = fp; + struct bpf_prog *tmp_fp; + bool bpf_blinded = false; + bool extra_pass = false; + + if (!fp->jit_requested) + return org_fp; + + tmp_fp = bpf_jit_blind_constants(org_fp); + if (IS_ERR(tmp_fp)) + return org_fp; + + if (tmp_fp != org_fp) { + bpf_blinded = true; + fp = tmp_fp; + } + + jit_data = fp->aux->jit_data; + if (!jit_data) { + jit_data = kzalloc(sizeof(*jit_data), GFP_KERNEL); + if (!jit_data) { + fp = org_fp; + goto out; + } + fp->aux->jit_data = jit_data; + } - if (!bpf_jit_enable) - return; + flen = fp->len; + addrs = jit_data->addrs; + if (addrs) { + cgctx = jit_data->ctx; + image = jit_data->image; + bpf_hdr = jit_data->header; + proglen = jit_data->proglen; + alloclen = proglen + FUNCTION_DESCR_SIZE; + extra_pass = true; + goto skip_init_ctx; + } addrs = kcalloc(flen + 1, sizeof(*addrs), GFP_KERNEL); - if (addrs == NULL) - return; + if (addrs == NULL) { + fp = org_fp; + goto out_addrs; + } - /* - * There are multiple assembly passes as the generated code will change - * size as it settles down, figuring out the max branch offsets/exit - * paths required. - * - * The range of standard conditional branches is +/- 32Kbytes. Since - * BPF_MAXINSNS = 4096, we can only jump from (worst case) start to - * finish with 8 bytes/instruction. Not feasible, so long jumps are - * used, distinct from short branches. - * - * Current: - * - * For now, both branch types assemble to 2 words (short branches padded - * with a NOP); this is less efficient, but assembly will always complete - * after exactly 3 passes: - * - * First pass: No code buffer; Program is "faux-generated" -- no code - * emitted but maximum size of output determined (and addrs[] filled - * in). Also, we note whether we use M[], whether we use skb data, etc. - * All generation choices assumed to be 'worst-case', e.g. branches all - * far (2 instructions), return path code reduction not available, etc. - * - * Second pass: Code buffer allocated with size determined previously. - * Prologue generated to support features we have seen used. Exit paths - * determined and addrs[] is filled in again, as code may be slightly - * smaller as a result. - * - * Third pass: Code generated 'for real', and branch destinations - * determined from now-accurate addrs[] map. - * - * Ideal: - * - * If we optimise this, near branches will be shorter. On the - * first assembly pass, we should err on the side of caution and - * generate the biggest code. On subsequent passes, branches will be - * generated short or long and code size will reduce. With smaller - * code, more branches may fall into the short category, and code will - * reduce more. - * - * Finally, if we see one pass generate code the same size as the - * previous pass we have converged and should now generate code for - * real. Allocating at the end will also save the memory that would - * otherwise be wasted by the (small) current code shrinkage. - * Preferably, we should do a small number of passes (e.g. 5) and if we - * haven't converged by then, get impatient and force code to generate - * as-is, even if the odd branch would be left long. The chances of a - * long jump are tiny with all but the most enormous of BPF filter - * inputs, so we should usually converge on the third pass. - */ + memset(&cgctx, 0, sizeof(struct codegen_context)); + memcpy(cgctx.b2p, b2p, sizeof(cgctx.b2p)); + + /* Make sure that the stack is quadword aligned. */ + cgctx.stack_size = round_up(fp->aux->stack_depth, 16); - cgctx.idx = 0; - cgctx.seen = 0; - cgctx.pc_ret0 = -1; /* Scouting faux-generate pass 0 */ - if (bpf_jit_build_body(fp, 0, &cgctx, addrs)) + if (bpf_jit_build_body(fp, 0, &cgctx, addrs, false)) { /* We hit something illegal or unsupported. */ - goto out; + fp = org_fp; + goto out_addrs; + } + + /* + * If we have seen a tail call, we need a second pass. + * This is because bpf_jit_emit_common_epilogue() is called + * from bpf_jit_emit_tail_call() with a not yet stable ctx->seen. + */ + if (cgctx.seen & SEEN_TAILCALL) { + cgctx.idx = 0; + if (bpf_jit_build_body(fp, 0, &cgctx, addrs, false)) { + fp = org_fp; + goto out_addrs; + } + } + bpf_jit_realloc_regs(&cgctx); /* * Pretend to build prologue, given the features we've seen. This will * update ctgtx.idx as it pretends to output instructions, then we can * calculate total size from idx. */ - bpf_jit_build_prologue(fp, 0, &cgctx); + bpf_jit_build_prologue(0, &cgctx); bpf_jit_build_epilogue(0, &cgctx); proglen = cgctx.idx * 4; alloclen = proglen + FUNCTION_DESCR_SIZE; - image = module_alloc(alloclen); - if (!image) - goto out; - code_base = image + (FUNCTION_DESCR_SIZE/4); + bpf_hdr = bpf_jit_binary_alloc(alloclen, &image, 4, bpf_jit_fill_ill_insns); + if (!bpf_hdr) { + fp = org_fp; + goto out_addrs; + } + +skip_init_ctx: + code_base = (u32 *)(image + FUNCTION_DESCR_SIZE); + + if (extra_pass) { + /* + * Do not touch the prologue and epilogue as they will remain + * unchanged. Only fix the branch target address for subprog + * calls in the body. + * + * This does not change the offsets and lengths of the subprog + * call instruction sequences and hence, the size of the JITed + * image as well. + */ + bpf_jit_fixup_subprog_calls(fp, code_base, &cgctx, addrs); + + /* There is no need to perform the usual passes. */ + goto skip_codegen_passes; + } /* Code generation passes 1-2 */ for (pass = 1; pass < 3; pass++) { /* Now build the prologue, body code & epilogue for real. */ cgctx.idx = 0; - bpf_jit_build_prologue(fp, code_base, &cgctx); - bpf_jit_build_body(fp, code_base, &cgctx, addrs); + bpf_jit_build_prologue(code_base, &cgctx); + bpf_jit_build_body(fp, code_base, &cgctx, addrs, extra_pass); bpf_jit_build_epilogue(code_base, &cgctx); if (bpf_jit_enable > 1) @@ -652,15 +218,15 @@ void bpf_jit_compile(struct bpf_prog *fp) proglen - (cgctx.idx * 4), cgctx.seen); } +skip_codegen_passes: if (bpf_jit_enable > 1) - /* Note that we output the base address of the code_base + /* + * Note that we output the base address of the code_base * rather than image, since opcodes are in code_base. */ bpf_jit_dump(flen, proglen, pass, code_base); - bpf_flush_icache(code_base, code_base + (proglen/4)); - -#ifdef CONFIG_PPC64 +#ifdef PPC64_ELF_ABI_v1 /* Function descriptor nastiness: Address + TOC */ ((u64 *)image)[0] = (u64)code_base; ((u64 *)image)[1] = local_paca->kernel_toc; @@ -668,16 +234,38 @@ void bpf_jit_compile(struct bpf_prog *fp) fp->bpf_func = (void *)image; fp->jited = 1; + fp->jited_len = alloclen; + + bpf_flush_icache(bpf_hdr, (u8 *)bpf_hdr + (bpf_hdr->pages * PAGE_SIZE)); + if (!fp->is_func || extra_pass) { + bpf_prog_fill_jited_linfo(fp, addrs); +out_addrs: + kfree(addrs); + kfree(jit_data); + fp->aux->jit_data = NULL; + } else { + jit_data->addrs = addrs; + jit_data->ctx = cgctx; + jit_data->proglen = proglen; + jit_data->image = image; + jit_data->header = bpf_hdr; + } out: - kfree(addrs); - return; + if (bpf_blinded) + bpf_jit_prog_release_other(fp, fp == org_fp ? tmp_fp : org_fp); + + return fp; } +/* Overriding bpf_jit_free() as we don't set images read-only. */ void bpf_jit_free(struct bpf_prog *fp) { + unsigned long addr = (unsigned long)fp->bpf_func & PAGE_MASK; + struct bpf_binary_header *bpf_hdr = (void *)addr; + if (fp->jited) - module_memfree(fp->bpf_func); + bpf_jit_binary_free(bpf_hdr); bpf_prog_unlock_free(fp); } diff --git a/arch/powerpc/net/bpf_jit_comp32.c b/arch/powerpc/net/bpf_jit_comp32.c new file mode 100644 index 000000000000..bbb16099e8c7 --- /dev/null +++ b/arch/powerpc/net/bpf_jit_comp32.c @@ -0,0 +1,1100 @@ +// SPDX-License-Identifier: GPL-2.0-only +/* + * eBPF JIT compiler for PPC32 + * + * Copyright 2020 Christophe Leroy <christophe.leroy@csgroup.eu> + * CS GROUP France + * + * Based on PPC64 eBPF JIT compiler by Naveen N. Rao + */ +#include <linux/moduleloader.h> +#include <asm/cacheflush.h> +#include <asm/asm-compat.h> +#include <linux/netdevice.h> +#include <linux/filter.h> +#include <linux/if_vlan.h> +#include <asm/kprobes.h> +#include <linux/bpf.h> + +#include "bpf_jit.h" + +/* + * Stack layout: + * + * [ prev sp ] <------------- + * [ nv gpr save area ] 16 * 4 | + * fp (r31) --> [ ebpf stack space ] upto 512 | + * [ frame header ] 16 | + * sp (r1) ---> [ stack pointer ] -------------- + */ + +/* for gpr non volatile registers r17 to r31 (14) + tail call */ +#define BPF_PPC_STACK_SAVE (15 * 4 + 4) +/* stack frame, ensure this is quadword aligned */ +#define BPF_PPC_STACKFRAME(ctx) (STACK_FRAME_MIN_SIZE + BPF_PPC_STACK_SAVE + (ctx)->stack_size) + +/* BPF register usage */ +#define TMP_REG (MAX_BPF_JIT_REG + 0) + +/* BPF to ppc register mappings */ +const int b2p[MAX_BPF_JIT_REG + 1] = { + /* function return value */ + [BPF_REG_0] = 12, + /* function arguments */ + [BPF_REG_1] = 4, + [BPF_REG_2] = 6, + [BPF_REG_3] = 8, + [BPF_REG_4] = 10, + [BPF_REG_5] = 22, + /* non volatile registers */ + [BPF_REG_6] = 24, + [BPF_REG_7] = 26, + [BPF_REG_8] = 28, + [BPF_REG_9] = 30, + /* frame pointer aka BPF_REG_10 */ + [BPF_REG_FP] = 18, + /* eBPF jit internal registers */ + [BPF_REG_AX] = 20, + [TMP_REG] = 31, /* 32 bits */ +}; + +static int bpf_to_ppc(struct codegen_context *ctx, int reg) +{ + return ctx->b2p[reg]; +} + +/* PPC NVR range -- update this if we ever use NVRs below r17 */ +#define BPF_PPC_NVR_MIN 17 +#define BPF_PPC_TC 16 + +static int bpf_jit_stack_offsetof(struct codegen_context *ctx, int reg) +{ + if ((reg >= BPF_PPC_NVR_MIN && reg < 32) || reg == BPF_PPC_TC) + return BPF_PPC_STACKFRAME(ctx) - 4 * (32 - reg); + + WARN(true, "BPF JIT is asking about unknown registers, will crash the stack"); + /* Use the hole we have left for alignment */ + return BPF_PPC_STACKFRAME(ctx) - 4; +} + +void bpf_jit_realloc_regs(struct codegen_context *ctx) +{ + if (ctx->seen & SEEN_FUNC) + return; + + while (ctx->seen & SEEN_NVREG_MASK && + (ctx->seen & SEEN_VREG_MASK) != SEEN_VREG_MASK) { + int old = 32 - fls(ctx->seen & (SEEN_NVREG_MASK & 0xaaaaaaab)); + int new = 32 - fls(~ctx->seen & (SEEN_VREG_MASK & 0xaaaaaaaa)); + int i; + + for (i = BPF_REG_0; i <= TMP_REG; i++) { + if (ctx->b2p[i] != old) + continue; + ctx->b2p[i] = new; + bpf_set_seen_register(ctx, new); + bpf_clear_seen_register(ctx, old); + if (i != TMP_REG) { + bpf_set_seen_register(ctx, new - 1); + bpf_clear_seen_register(ctx, old - 1); + } + break; + } + } +} + +void bpf_jit_build_prologue(u32 *image, struct codegen_context *ctx) +{ + int i; + + /* First arg comes in as a 32 bits pointer. */ + EMIT(PPC_RAW_MR(bpf_to_ppc(ctx, BPF_REG_1), __REG_R3)); + EMIT(PPC_RAW_LI(bpf_to_ppc(ctx, BPF_REG_1) - 1, 0)); + EMIT(PPC_RAW_STWU(__REG_R1, __REG_R1, -BPF_PPC_STACKFRAME(ctx))); + + /* + * Initialize tail_call_cnt in stack frame if we do tail calls. + * Otherwise, put in NOPs so that it can be skipped when we are + * invoked through a tail call. + */ + if (ctx->seen & SEEN_TAILCALL) { + EMIT(PPC_RAW_STW(bpf_to_ppc(ctx, BPF_REG_1) - 1, __REG_R1, bpf_jit_stack_offsetof(ctx, BPF_PPC_TC))); + } else { + EMIT(PPC_RAW_NOP()); + } + +#define BPF_TAILCALL_PROLOGUE_SIZE 16 + + /* + * We need a stack frame, but we don't necessarily need to + * save/restore LR unless we call other functions + */ + if (ctx->seen & SEEN_FUNC) + EMIT(PPC_RAW_MFLR(__REG_R0)); + + /* + * Back up non-volatile regs -- registers r18-r31 + */ + for (i = BPF_PPC_NVR_MIN; i <= 31; i++) + if (bpf_is_seen_register(ctx, i)) + EMIT(PPC_RAW_STW(i, __REG_R1, bpf_jit_stack_offsetof(ctx, i))); + + /* If needed retrieve arguments 9 and 10, ie 5th 64 bits arg.*/ + if (bpf_is_seen_register(ctx, bpf_to_ppc(ctx, BPF_REG_5))) { + EMIT(PPC_RAW_LWZ(bpf_to_ppc(ctx, BPF_REG_5) - 1, __REG_R1, BPF_PPC_STACKFRAME(ctx)) + 8); + EMIT(PPC_RAW_LWZ(bpf_to_ppc(ctx, BPF_REG_5), __REG_R1, BPF_PPC_STACKFRAME(ctx)) + 12); + } + + /* Setup frame pointer to point to the bpf stack area */ + if (bpf_is_seen_register(ctx, bpf_to_ppc(ctx, BPF_REG_FP))) { + EMIT(PPC_RAW_LI(bpf_to_ppc(ctx, BPF_REG_FP) - 1, 0)); + EMIT(PPC_RAW_ADDI(bpf_to_ppc(ctx, BPF_REG_FP), __REG_R1, + STACK_FRAME_MIN_SIZE + ctx->stack_size)); + } + + if (ctx->seen & SEEN_FUNC) + EMIT(PPC_RAW_STW(__REG_R0, __REG_R1, BPF_PPC_STACKFRAME(ctx) + PPC_LR_STKOFF)); +} + +static void bpf_jit_emit_common_epilogue(u32 *image, struct codegen_context *ctx) +{ + int i; + + /* Restore NVRs */ + for (i = BPF_PPC_NVR_MIN; i <= 31; i++) + if (bpf_is_seen_register(ctx, i)) + EMIT(PPC_RAW_LWZ(i, __REG_R1, bpf_jit_stack_offsetof(ctx, i))); +} + +void bpf_jit_build_epilogue(u32 *image, struct codegen_context *ctx) +{ + EMIT(PPC_RAW_MR(__REG_R3, bpf_to_ppc(ctx, BPF_REG_0))); + + bpf_jit_emit_common_epilogue(image, ctx); + + /* Tear down our stack frame */ + + if (ctx->seen & SEEN_FUNC) + EMIT(PPC_RAW_LWZ(__REG_R0, __REG_R1, BPF_PPC_STACKFRAME(ctx) + PPC_LR_STKOFF)); + + EMIT(PPC_RAW_ADDI(__REG_R1, __REG_R1, BPF_PPC_STACKFRAME(ctx))); + + if (ctx->seen & SEEN_FUNC) + EMIT(PPC_RAW_MTLR(__REG_R0)); + + EMIT(PPC_RAW_BLR()); +} + +void bpf_jit_emit_func_call_rel(u32 *image, struct codegen_context *ctx, u64 func) +{ + s32 rel = (s32)func - (s32)(image + ctx->idx); + + if (image && rel < 0x2000000 && rel >= -0x2000000) { + PPC_BL_ABS(func); + } else { + /* Load function address into r0 */ + EMIT(PPC_RAW_LIS(__REG_R0, IMM_H(func))); + EMIT(PPC_RAW_ORI(__REG_R0, __REG_R0, IMM_L(func))); + EMIT(PPC_RAW_MTLR(__REG_R0)); + EMIT(PPC_RAW_BLRL()); + } +} + +static void bpf_jit_emit_tail_call(u32 *image, struct codegen_context *ctx, u32 out) +{ + /* + * By now, the eBPF program has already setup parameters in r3-r6 + * r3-r4/BPF_REG_1 - pointer to ctx -- passed as is to the next bpf program + * r5-r6/BPF_REG_2 - pointer to bpf_array + * r7-r8/BPF_REG_3 - index in bpf_array + */ + int b2p_bpf_array = bpf_to_ppc(ctx, BPF_REG_2); + int b2p_index = bpf_to_ppc(ctx, BPF_REG_3); + + /* + * if (index >= array->map.max_entries) + * goto out; + */ + EMIT(PPC_RAW_LWZ(__REG_R0, b2p_bpf_array, offsetof(struct bpf_array, map.max_entries))); + EMIT(PPC_RAW_CMPLW(b2p_index, __REG_R0)); + EMIT(PPC_RAW_LWZ(__REG_R0, __REG_R1, bpf_jit_stack_offsetof(ctx, BPF_PPC_TC))); + PPC_BCC(COND_GE, out); + + /* + * if (tail_call_cnt > MAX_TAIL_CALL_CNT) + * goto out; + */ + EMIT(PPC_RAW_CMPLWI(__REG_R0, MAX_TAIL_CALL_CNT)); + /* tail_call_cnt++; */ + EMIT(PPC_RAW_ADDIC(__REG_R0, __REG_R0, 1)); + PPC_BCC(COND_GT, out); + + /* prog = array->ptrs[index]; */ + EMIT(PPC_RAW_RLWINM(__REG_R3, b2p_index, 2, 0, 29)); + EMIT(PPC_RAW_ADD(__REG_R3, __REG_R3, b2p_bpf_array)); + EMIT(PPC_RAW_LWZ(__REG_R3, __REG_R3, offsetof(struct bpf_array, ptrs))); + EMIT(PPC_RAW_STW(__REG_R0, __REG_R1, bpf_jit_stack_offsetof(ctx, BPF_PPC_TC))); + + /* + * if (prog == NULL) + * goto out; + */ + EMIT(PPC_RAW_CMPLWI(__REG_R3, 0)); + PPC_BCC(COND_EQ, out); + + /* goto *(prog->bpf_func + prologue_size); */ + EMIT(PPC_RAW_LWZ(__REG_R3, __REG_R3, offsetof(struct bpf_prog, bpf_func))); + + if (ctx->seen & SEEN_FUNC) + EMIT(PPC_RAW_LWZ(__REG_R0, __REG_R1, BPF_PPC_STACKFRAME(ctx) + PPC_LR_STKOFF)); + + EMIT(PPC_RAW_ADDIC(__REG_R3, __REG_R3, BPF_TAILCALL_PROLOGUE_SIZE)); + + if (ctx->seen & SEEN_FUNC) + EMIT(PPC_RAW_MTLR(__REG_R0)); + + EMIT(PPC_RAW_MTCTR(__REG_R3)); + + EMIT(PPC_RAW_MR(__REG_R3, bpf_to_ppc(ctx, BPF_REG_1))); + + /* tear restore NVRs, ... */ + bpf_jit_emit_common_epilogue(image, ctx); + + EMIT(PPC_RAW_BCTR()); + /* out: */ +} + +/* Assemble the body code between the prologue & epilogue */ +int bpf_jit_build_body(struct bpf_prog *fp, u32 *image, struct codegen_context *ctx, + u32 *addrs, bool extra_pass) +{ + const struct bpf_insn *insn = fp->insnsi; + int flen = fp->len; + int i, ret; + + /* Start of epilogue code - will only be valid 2nd pass onwards */ + u32 exit_addr = addrs[flen]; + + for (i = 0; i < flen; i++) { + u32 code = insn[i].code; + u32 dst_reg = bpf_to_ppc(ctx, insn[i].dst_reg); + u32 dst_reg_h = dst_reg - 1; + u32 src_reg = bpf_to_ppc(ctx, insn[i].src_reg); + u32 src_reg_h = src_reg - 1; + u32 tmp_reg = bpf_to_ppc(ctx, TMP_REG); + s16 off = insn[i].off; + s32 imm = insn[i].imm; + bool func_addr_fixed; + u64 func_addr; + u32 true_cond; + + /* + * addrs[] maps a BPF bytecode address into a real offset from + * the start of the body code. + */ + addrs[i] = ctx->idx * 4; + + /* + * As an optimization, we note down which registers + * are used so that we can only save/restore those in our + * prologue and epilogue. We do this here regardless of whether + * the actual BPF instruction uses src/dst registers or not + * (for instance, BPF_CALL does not use them). The expectation + * is that those instructions will have src_reg/dst_reg set to + * 0. Even otherwise, we just lose some prologue/epilogue + * optimization but everything else should work without + * any issues. + */ + if (dst_reg >= 3 && dst_reg < 32) { + bpf_set_seen_register(ctx, dst_reg); + bpf_set_seen_register(ctx, dst_reg_h); + } + + if (src_reg >= 3 && src_reg < 32) { + bpf_set_seen_register(ctx, src_reg); + bpf_set_seen_register(ctx, src_reg_h); + } + + switch (code) { + /* + * Arithmetic operations: ADD/SUB/MUL/DIV/MOD/NEG + */ + case BPF_ALU | BPF_ADD | BPF_X: /* (u32) dst += (u32) src */ + EMIT(PPC_RAW_ADD(dst_reg, dst_reg, src_reg)); + break; + case BPF_ALU64 | BPF_ADD | BPF_X: /* dst += src */ + EMIT(PPC_RAW_ADDC(dst_reg, dst_reg, src_reg)); + EMIT(PPC_RAW_ADDE(dst_reg_h, dst_reg_h, src_reg_h)); + break; + case BPF_ALU | BPF_SUB | BPF_X: /* (u32) dst -= (u32) src */ + EMIT(PPC_RAW_SUB(dst_reg, dst_reg, src_reg)); + break; + case BPF_ALU64 | BPF_SUB | BPF_X: /* dst -= src */ + EMIT(PPC_RAW_SUBFC(dst_reg, src_reg, dst_reg)); + EMIT(PPC_RAW_SUBFE(dst_reg_h, src_reg_h, dst_reg_h)); + break; + case BPF_ALU | BPF_SUB | BPF_K: /* (u32) dst -= (u32) imm */ + imm = -imm; + fallthrough; + case BPF_ALU | BPF_ADD | BPF_K: /* (u32) dst += (u32) imm */ + if (IMM_HA(imm) & 0xffff) + EMIT(PPC_RAW_ADDIS(dst_reg, dst_reg, IMM_HA(imm))); + if (IMM_L(imm)) + EMIT(PPC_RAW_ADDI(dst_reg, dst_reg, IMM_L(imm))); + break; + case BPF_ALU64 | BPF_SUB | BPF_K: /* dst -= imm */ + imm = -imm; + fallthrough; + case BPF_ALU64 | BPF_ADD | BPF_K: /* dst += imm */ + if (!imm) + break; + + if (imm >= -32768 && imm < 32768) { + EMIT(PPC_RAW_ADDIC(dst_reg, dst_reg, imm)); + } else { + PPC_LI32(__REG_R0, imm); + EMIT(PPC_RAW_ADDC(dst_reg, dst_reg, __REG_R0)); + } + if (imm >= 0) + EMIT(PPC_RAW_ADDZE(dst_reg_h, dst_reg_h)); + else + EMIT(PPC_RAW_ADDME(dst_reg_h, dst_reg_h)); + break; + case BPF_ALU64 | BPF_MUL | BPF_X: /* dst *= src */ + bpf_set_seen_register(ctx, tmp_reg); + EMIT(PPC_RAW_MULW(__REG_R0, dst_reg, src_reg_h)); + EMIT(PPC_RAW_MULW(dst_reg_h, dst_reg_h, src_reg)); + EMIT(PPC_RAW_MULHWU(tmp_reg, dst_reg, src_reg)); + EMIT(PPC_RAW_MULW(dst_reg, dst_reg, src_reg)); + EMIT(PPC_RAW_ADD(dst_reg_h, dst_reg_h, __REG_R0)); + EMIT(PPC_RAW_ADD(dst_reg_h, dst_reg_h, tmp_reg)); + break; + case BPF_ALU | BPF_MUL | BPF_X: /* (u32) dst *= (u32) src */ + EMIT(PPC_RAW_MULW(dst_reg, dst_reg, src_reg)); + break; + case BPF_ALU | BPF_MUL | BPF_K: /* (u32) dst *= (u32) imm */ + if (imm >= -32768 && imm < 32768) { + EMIT(PPC_RAW_MULI(dst_reg, dst_reg, imm)); + } else { + PPC_LI32(__REG_R0, imm); + EMIT(PPC_RAW_MULW(dst_reg, dst_reg, __REG_R0)); + } + break; + case BPF_ALU64 | BPF_MUL | BPF_K: /* dst *= imm */ + if (!imm) { + PPC_LI32(dst_reg, 0); + PPC_LI32(dst_reg_h, 0); + break; + } + if (imm == 1) + break; + if (imm == -1) { + EMIT(PPC_RAW_SUBFIC(dst_reg, dst_reg, 0)); + EMIT(PPC_RAW_SUBFZE(dst_reg_h, dst_reg_h)); + break; + } + bpf_set_seen_register(ctx, tmp_reg); + PPC_LI32(tmp_reg, imm); + EMIT(PPC_RAW_MULW(dst_reg_h, dst_reg_h, tmp_reg)); + if (imm < 0) + EMIT(PPC_RAW_SUB(dst_reg_h, dst_reg_h, dst_reg)); + EMIT(PPC_RAW_MULHWU(__REG_R0, dst_reg, tmp_reg)); + EMIT(PPC_RAW_MULW(dst_reg, dst_reg, tmp_reg)); + EMIT(PPC_RAW_ADD(dst_reg_h, dst_reg_h, __REG_R0)); + break; + case BPF_ALU | BPF_DIV | BPF_X: /* (u32) dst /= (u32) src */ + EMIT(PPC_RAW_DIVWU(dst_reg, dst_reg, src_reg)); + break; + case BPF_ALU | BPF_MOD | BPF_X: /* (u32) dst %= (u32) src */ + EMIT(PPC_RAW_DIVWU(__REG_R0, dst_reg, src_reg)); + EMIT(PPC_RAW_MULW(__REG_R0, src_reg, __REG_R0)); + EMIT(PPC_RAW_SUB(dst_reg, dst_reg, __REG_R0)); + break; + case BPF_ALU64 | BPF_DIV | BPF_X: /* dst /= src */ + return -EOPNOTSUPP; + case BPF_ALU64 | BPF_MOD | BPF_X: /* dst %= src */ + return -EOPNOTSUPP; + case BPF_ALU | BPF_DIV | BPF_K: /* (u32) dst /= (u32) imm */ + if (!imm) + return -EINVAL; + if (imm == 1) + break; + + PPC_LI32(__REG_R0, imm); + EMIT(PPC_RAW_DIVWU(dst_reg, dst_reg, __REG_R0)); + break; + case BPF_ALU | BPF_MOD | BPF_K: /* (u32) dst %= (u32) imm */ + if (!imm) + return -EINVAL; + + if (!is_power_of_2((u32)imm)) { + bpf_set_seen_register(ctx, tmp_reg); + PPC_LI32(tmp_reg, imm); + EMIT(PPC_RAW_DIVWU(__REG_R0, dst_reg, tmp_reg)); + EMIT(PPC_RAW_MULW(__REG_R0, tmp_reg, __REG_R0)); + EMIT(PPC_RAW_SUB(dst_reg, dst_reg, __REG_R0)); + break; + } + if (imm == 1) + EMIT(PPC_RAW_LI(dst_reg, 0)); + else + EMIT(PPC_RAW_RLWINM(dst_reg, dst_reg, 0, 32 - ilog2((u32)imm), 31)); + + break; + case BPF_ALU64 | BPF_MOD | BPF_K: /* dst %= imm */ + if (!imm) + return -EINVAL; + if (imm < 0) + imm = -imm; + if (!is_power_of_2(imm)) + return -EOPNOTSUPP; + if (imm == 1) + EMIT(PPC_RAW_LI(dst_reg, 0)); + else + EMIT(PPC_RAW_RLWINM(dst_reg, dst_reg, 0, 32 - ilog2(imm), 31)); + EMIT(PPC_RAW_LI(dst_reg_h, 0)); + break; + case BPF_ALU64 | BPF_DIV | BPF_K: /* dst /= imm */ + if (!imm) + return -EINVAL; + if (!is_power_of_2(abs(imm))) + return -EOPNOTSUPP; + + if (imm < 0) { + EMIT(PPC_RAW_SUBFIC(dst_reg, dst_reg, 0)); + EMIT(PPC_RAW_SUBFZE(dst_reg_h, dst_reg_h)); + imm = -imm; + } + if (imm == 1) + break; + imm = ilog2(imm); + EMIT(PPC_RAW_RLWINM(dst_reg, dst_reg, 32 - imm, imm, 31)); + EMIT(PPC_RAW_RLWIMI(dst_reg, dst_reg_h, 32 - imm, 0, imm - 1)); + EMIT(PPC_RAW_SRAWI(dst_reg_h, dst_reg_h, imm)); + break; + case BPF_ALU | BPF_NEG: /* (u32) dst = -dst */ + EMIT(PPC_RAW_NEG(dst_reg, dst_reg)); + break; + case BPF_ALU64 | BPF_NEG: /* dst = -dst */ + EMIT(PPC_RAW_SUBFIC(dst_reg, dst_reg, 0)); + EMIT(PPC_RAW_SUBFZE(dst_reg_h, dst_reg_h)); + break; + + /* + * Logical operations: AND/OR/XOR/[A]LSH/[A]RSH + */ + case BPF_ALU64 | BPF_AND | BPF_X: /* dst = dst & src */ + EMIT(PPC_RAW_AND(dst_reg, dst_reg, src_reg)); + EMIT(PPC_RAW_AND(dst_reg_h, dst_reg_h, src_reg_h)); + break; + case BPF_ALU | BPF_AND | BPF_X: /* (u32) dst = dst & src */ + EMIT(PPC_RAW_AND(dst_reg, dst_reg, src_reg)); + break; + case BPF_ALU64 | BPF_AND | BPF_K: /* dst = dst & imm */ + if (imm >= 0) + EMIT(PPC_RAW_LI(dst_reg_h, 0)); + fallthrough; + case BPF_ALU | BPF_AND | BPF_K: /* (u32) dst = dst & imm */ + if (!IMM_H(imm)) { + EMIT(PPC_RAW_ANDI(dst_reg, dst_reg, IMM_L(imm))); + } else if (!IMM_L(imm)) { + EMIT(PPC_RAW_ANDIS(dst_reg, dst_reg, IMM_H(imm))); + } else if (imm == (((1 << fls(imm)) - 1) ^ ((1 << (ffs(i) - 1)) - 1))) { + EMIT(PPC_RAW_RLWINM(dst_reg, dst_reg, 0, + 32 - fls(imm), 32 - ffs(imm))); + } else { + PPC_LI32(__REG_R0, imm); + EMIT(PPC_RAW_AND(dst_reg, dst_reg, __REG_R0)); + } + break; + case BPF_ALU64 | BPF_OR | BPF_X: /* dst = dst | src */ + EMIT(PPC_RAW_OR(dst_reg, dst_reg, src_reg)); + EMIT(PPC_RAW_OR(dst_reg_h, dst_reg_h, src_reg_h)); + break; + case BPF_ALU | BPF_OR | BPF_X: /* dst = (u32) dst | (u32) src */ + EMIT(PPC_RAW_OR(dst_reg, dst_reg, src_reg)); + break; + case BPF_ALU64 | BPF_OR | BPF_K:/* dst = dst | imm */ + /* Sign-extended */ + if (imm < 0) + EMIT(PPC_RAW_LI(dst_reg_h, -1)); + fallthrough; + case BPF_ALU | BPF_OR | BPF_K:/* dst = (u32) dst | (u32) imm */ + if (IMM_L(imm)) + EMIT(PPC_RAW_ORI(dst_reg, dst_reg, IMM_L(imm))); + if (IMM_H(imm)) + EMIT(PPC_RAW_ORIS(dst_reg, dst_reg, IMM_H(imm))); + break; + case BPF_ALU64 | BPF_XOR | BPF_X: /* dst ^= src */ + if (dst_reg == src_reg) { + EMIT(PPC_RAW_LI(dst_reg, 0)); + EMIT(PPC_RAW_LI(dst_reg_h, 0)); + } else { + EMIT(PPC_RAW_XOR(dst_reg, dst_reg, src_reg)); + EMIT(PPC_RAW_XOR(dst_reg_h, dst_reg_h, src_reg_h)); + } + break; + case BPF_ALU | BPF_XOR | BPF_X: /* (u32) dst ^= src */ + if (dst_reg == src_reg) + EMIT(PPC_RAW_LI(dst_reg, 0)); + else + EMIT(PPC_RAW_XOR(dst_reg, dst_reg, src_reg)); + break; + case BPF_ALU64 | BPF_XOR | BPF_K: /* dst ^= imm */ + if (imm < 0) + EMIT(PPC_RAW_NOR(dst_reg_h, dst_reg_h, dst_reg_h)); + fallthrough; + case BPF_ALU | BPF_XOR | BPF_K: /* (u32) dst ^= (u32) imm */ + if (IMM_L(imm)) + EMIT(PPC_RAW_XORI(dst_reg, dst_reg, IMM_L(imm))); + if (IMM_H(imm)) + EMIT(PPC_RAW_XORIS(dst_reg, dst_reg, IMM_H(imm))); + break; + case BPF_ALU | BPF_LSH | BPF_X: /* (u32) dst <<= (u32) src */ + EMIT(PPC_RAW_SLW(dst_reg, dst_reg, src_reg)); + break; + case BPF_ALU64 | BPF_LSH | BPF_X: /* dst <<= src; */ + bpf_set_seen_register(ctx, tmp_reg); + EMIT(PPC_RAW_SUBFIC(__REG_R0, src_reg, 32)); + EMIT(PPC_RAW_SLW(dst_reg_h, dst_reg_h, src_reg)); + EMIT(PPC_RAW_ADDI(tmp_reg, src_reg, 32)); + EMIT(PPC_RAW_SRW(__REG_R0, dst_reg, __REG_R0)); + EMIT(PPC_RAW_SLW(tmp_reg, dst_reg, tmp_reg)); + EMIT(PPC_RAW_OR(dst_reg_h, dst_reg_h, __REG_R0)); + EMIT(PPC_RAW_SLW(dst_reg, dst_reg, src_reg)); + EMIT(PPC_RAW_OR(dst_reg_h, dst_reg_h, tmp_reg)); + break; + case BPF_ALU | BPF_LSH | BPF_K: /* (u32) dst <<= (u32) imm */ + if (!imm) + break; + EMIT(PPC_RAW_SLWI(dst_reg, dst_reg, imm)); + break; + case BPF_ALU64 | BPF_LSH | BPF_K: /* dst <<= imm */ + if (imm < 0) + return -EINVAL; + if (!imm) + break; + if (imm < 32) { + EMIT(PPC_RAW_RLWINM(dst_reg_h, dst_reg_h, imm, 0, 31 - imm)); + EMIT(PPC_RAW_RLWIMI(dst_reg_h, dst_reg, imm, 32 - imm, 31)); + EMIT(PPC_RAW_RLWINM(dst_reg, dst_reg, imm, 0, 31 - imm)); + break; + } + if (imm < 64) + EMIT(PPC_RAW_RLWINM(dst_reg_h, dst_reg, imm, 0, 31 - imm)); + else + EMIT(PPC_RAW_LI(dst_reg_h, 0)); + EMIT(PPC_RAW_LI(dst_reg, 0)); + break; + case BPF_ALU | BPF_RSH | BPF_X: /* (u32) dst >>= (u32) src */ + EMIT(PPC_RAW_SRW(dst_reg, dst_reg, src_reg)); + break; + case BPF_ALU64 | BPF_RSH | BPF_X: /* dst >>= src */ + bpf_set_seen_register(ctx, tmp_reg); + EMIT(PPC_RAW_SUBFIC(__REG_R0, src_reg, 32)); + EMIT(PPC_RAW_SRW(dst_reg, dst_reg, src_reg)); + EMIT(PPC_RAW_ADDI(tmp_reg, src_reg, 32)); + EMIT(PPC_RAW_SLW(__REG_R0, dst_reg_h, __REG_R0)); + EMIT(PPC_RAW_SRW(tmp_reg, dst_reg_h, tmp_reg)); + EMIT(PPC_RAW_OR(dst_reg, dst_reg, __REG_R0)); + EMIT(PPC_RAW_SRW(dst_reg_h, dst_reg_h, src_reg)); + EMIT(PPC_RAW_OR(dst_reg, dst_reg, tmp_reg)); + break; + case BPF_ALU | BPF_RSH | BPF_K: /* (u32) dst >>= (u32) imm */ + if (!imm) + break; + EMIT(PPC_RAW_SRWI(dst_reg, dst_reg, imm)); + break; + case BPF_ALU64 | BPF_RSH | BPF_K: /* dst >>= imm */ + if (imm < 0) + return -EINVAL; + if (!imm) + break; + if (imm < 32) { + EMIT(PPC_RAW_RLWINM(dst_reg, dst_reg, 32 - imm, imm, 31)); + EMIT(PPC_RAW_RLWIMI(dst_reg, dst_reg_h, 32 - imm, 0, imm - 1)); + EMIT(PPC_RAW_RLWINM(dst_reg_h, dst_reg_h, 32 - imm, imm, 31)); + break; + } + if (imm < 64) + EMIT(PPC_RAW_RLWINM(dst_reg, dst_reg_h, 64 - imm, imm - 32, 31)); + else + EMIT(PPC_RAW_LI(dst_reg, 0)); + EMIT(PPC_RAW_LI(dst_reg_h, 0)); + break; + case BPF_ALU | BPF_ARSH | BPF_X: /* (s32) dst >>= src */ + EMIT(PPC_RAW_SRAW(dst_reg_h, dst_reg, src_reg)); + break; + case BPF_ALU64 | BPF_ARSH | BPF_X: /* (s64) dst >>= src */ + bpf_set_seen_register(ctx, tmp_reg); + EMIT(PPC_RAW_SUBFIC(__REG_R0, src_reg, 32)); + EMIT(PPC_RAW_SRW(dst_reg, dst_reg, src_reg)); + EMIT(PPC_RAW_SLW(__REG_R0, dst_reg_h, __REG_R0)); + EMIT(PPC_RAW_ADDI(tmp_reg, src_reg, 32)); + EMIT(PPC_RAW_OR(dst_reg, dst_reg, __REG_R0)); + EMIT(PPC_RAW_RLWINM(__REG_R0, tmp_reg, 0, 26, 26)); + EMIT(PPC_RAW_SRAW(tmp_reg, dst_reg_h, tmp_reg)); + EMIT(PPC_RAW_SRAW(dst_reg_h, dst_reg_h, src_reg)); + EMIT(PPC_RAW_SLW(tmp_reg, tmp_reg, __REG_R0)); + EMIT(PPC_RAW_OR(dst_reg, dst_reg, tmp_reg)); + break; + case BPF_ALU | BPF_ARSH | BPF_K: /* (s32) dst >>= imm */ + if (!imm) + break; + EMIT(PPC_RAW_SRAWI(dst_reg, dst_reg, imm)); + break; + case BPF_ALU64 | BPF_ARSH | BPF_K: /* (s64) dst >>= imm */ + if (imm < 0) + return -EINVAL; + if (!imm) + break; + if (imm < 32) { + EMIT(PPC_RAW_RLWINM(dst_reg, dst_reg, 32 - imm, imm, 31)); + EMIT(PPC_RAW_RLWIMI(dst_reg, dst_reg_h, 32 - imm, 0, imm - 1)); + EMIT(PPC_RAW_SRAWI(dst_reg_h, dst_reg_h, imm)); + break; + } + if (imm < 64) + EMIT(PPC_RAW_SRAWI(dst_reg, dst_reg_h, imm - 32)); + else + EMIT(PPC_RAW_SRAWI(dst_reg, dst_reg_h, 31)); + EMIT(PPC_RAW_SRAWI(dst_reg_h, dst_reg_h, 31)); + break; + + /* + * MOV + */ + case BPF_ALU64 | BPF_MOV | BPF_X: /* dst = src */ + if (dst_reg == src_reg) + break; + EMIT(PPC_RAW_MR(dst_reg, src_reg)); + EMIT(PPC_RAW_MR(dst_reg_h, src_reg_h)); + break; + case BPF_ALU | BPF_MOV | BPF_X: /* (u32) dst = src */ + /* special mov32 for zext */ + if (imm == 1) + EMIT(PPC_RAW_LI(dst_reg_h, 0)); + else if (dst_reg != src_reg) + EMIT(PPC_RAW_MR(dst_reg, src_reg)); + break; + case BPF_ALU64 | BPF_MOV | BPF_K: /* dst = (s64) imm */ + PPC_LI32(dst_reg, imm); + PPC_EX32(dst_reg_h, imm); + break; + case BPF_ALU | BPF_MOV | BPF_K: /* (u32) dst = imm */ + PPC_LI32(dst_reg, imm); + break; + + /* + * BPF_FROM_BE/LE + */ + case BPF_ALU | BPF_END | BPF_FROM_LE: + switch (imm) { + case 16: + /* Copy 16 bits to upper part */ + EMIT(PPC_RAW_RLWIMI(dst_reg, dst_reg, 16, 0, 15)); + /* Rotate 8 bits right & mask */ + EMIT(PPC_RAW_RLWINM(dst_reg, dst_reg, 24, 16, 31)); + break; + case 32: + /* + * Rotate word left by 8 bits: + * 2 bytes are already in their final position + * -- byte 2 and 4 (of bytes 1, 2, 3 and 4) + */ + EMIT(PPC_RAW_RLWINM(__REG_R0, dst_reg, 8, 0, 31)); + /* Rotate 24 bits and insert byte 1 */ + EMIT(PPC_RAW_RLWIMI(__REG_R0, dst_reg, 24, 0, 7)); + /* Rotate 24 bits and insert byte 3 */ + EMIT(PPC_RAW_RLWIMI(__REG_R0, dst_reg, 24, 16, 23)); + EMIT(PPC_RAW_MR(dst_reg, __REG_R0)); + break; + case 64: + bpf_set_seen_register(ctx, tmp_reg); + EMIT(PPC_RAW_RLWINM(tmp_reg, dst_reg, 8, 0, 31)); + EMIT(PPC_RAW_RLWINM(__REG_R0, dst_reg_h, 8, 0, 31)); + /* Rotate 24 bits and insert byte 1 */ + EMIT(PPC_RAW_RLWIMI(tmp_reg, dst_reg, 24, 0, 7)); + EMIT(PPC_RAW_RLWIMI(__REG_R0, dst_reg_h, 24, 0, 7)); + /* Rotate 24 bits and insert byte 3 */ + EMIT(PPC_RAW_RLWIMI(tmp_reg, dst_reg, 24, 16, 23)); + EMIT(PPC_RAW_RLWIMI(__REG_R0, dst_reg_h, 24, 16, 23)); + EMIT(PPC_RAW_MR(dst_reg, __REG_R0)); + EMIT(PPC_RAW_MR(dst_reg_h, tmp_reg)); + break; + } + break; + case BPF_ALU | BPF_END | BPF_FROM_BE: + switch (imm) { + case 16: + /* zero-extend 16 bits into 32 bits */ + EMIT(PPC_RAW_RLWINM(dst_reg, dst_reg, 0, 16, 31)); + break; + case 32: + case 64: + /* nop */ + break; + } + break; + + /* + * BPF_ST(X) + */ + case BPF_STX | BPF_MEM | BPF_B: /* *(u8 *)(dst + off) = src */ + EMIT(PPC_RAW_STB(src_reg, dst_reg, off)); + break; + case BPF_ST | BPF_MEM | BPF_B: /* *(u8 *)(dst + off) = imm */ + PPC_LI32(__REG_R0, imm); + EMIT(PPC_RAW_STB(__REG_R0, dst_reg, off)); + break; + case BPF_STX | BPF_MEM | BPF_H: /* (u16 *)(dst + off) = src */ + EMIT(PPC_RAW_STH(src_reg, dst_reg, off)); + break; + case BPF_ST | BPF_MEM | BPF_H: /* (u16 *)(dst + off) = imm */ + PPC_LI32(__REG_R0, imm); + EMIT(PPC_RAW_STH(__REG_R0, dst_reg, off)); + break; + case BPF_STX | BPF_MEM | BPF_W: /* *(u32 *)(dst + off) = src */ + EMIT(PPC_RAW_STW(src_reg, dst_reg, off)); + break; + case BPF_ST | BPF_MEM | BPF_W: /* *(u32 *)(dst + off) = imm */ + PPC_LI32(__REG_R0, imm); + EMIT(PPC_RAW_STW(__REG_R0, dst_reg, off)); + break; + case BPF_STX | BPF_MEM | BPF_DW: /* (u64 *)(dst + off) = src */ + EMIT(PPC_RAW_STW(src_reg_h, dst_reg, off)); + EMIT(PPC_RAW_STW(src_reg, dst_reg, off + 4)); + break; + case BPF_ST | BPF_MEM | BPF_DW: /* *(u64 *)(dst + off) = imm */ + PPC_LI32(__REG_R0, imm); + EMIT(PPC_RAW_STW(__REG_R0, dst_reg, off + 4)); + PPC_EX32(__REG_R0, imm); + EMIT(PPC_RAW_STW(__REG_R0, dst_reg, off)); + break; + + /* + * BPF_STX XADD (atomic_add) + */ + case BPF_STX | BPF_XADD | BPF_W: /* *(u32 *)(dst + off) += src */ + bpf_set_seen_register(ctx, tmp_reg); + /* Get offset into TMP_REG */ + EMIT(PPC_RAW_LI(tmp_reg, off)); + /* load value from memory into r0 */ + EMIT(PPC_RAW_LWARX(__REG_R0, tmp_reg, dst_reg, 0)); + /* add value from src_reg into this */ + EMIT(PPC_RAW_ADD(__REG_R0, __REG_R0, src_reg)); + /* store result back */ + EMIT(PPC_RAW_STWCX(__REG_R0, tmp_reg, dst_reg)); + /* we're done if this succeeded */ + PPC_BCC_SHORT(COND_NE, (ctx->idx - 3) * 4); + break; + + case BPF_STX | BPF_XADD | BPF_DW: /* *(u64 *)(dst + off) += src */ + return -EOPNOTSUPP; + + /* + * BPF_LDX + */ + case BPF_LDX | BPF_MEM | BPF_B: /* dst = *(u8 *)(ul) (src + off) */ + EMIT(PPC_RAW_LBZ(dst_reg, src_reg, off)); + if (!fp->aux->verifier_zext) + EMIT(PPC_RAW_LI(dst_reg_h, 0)); + break; + case BPF_LDX | BPF_MEM | BPF_H: /* dst = *(u16 *)(ul) (src + off) */ + EMIT(PPC_RAW_LHZ(dst_reg, src_reg, off)); + if (!fp->aux->verifier_zext) + EMIT(PPC_RAW_LI(dst_reg_h, 0)); + break; + case BPF_LDX | BPF_MEM | BPF_W: /* dst = *(u32 *)(ul) (src + off) */ + EMIT(PPC_RAW_LWZ(dst_reg, src_reg, off)); + if (!fp->aux->verifier_zext) + EMIT(PPC_RAW_LI(dst_reg_h, 0)); + break; + case BPF_LDX | BPF_MEM | BPF_DW: /* dst = *(u64 *)(ul) (src + off) */ + EMIT(PPC_RAW_LWZ(dst_reg_h, src_reg, off)); + EMIT(PPC_RAW_LWZ(dst_reg, src_reg, off + 4)); + break; + + /* + * Doubleword load + * 16 byte instruction that uses two 'struct bpf_insn' + */ + case BPF_LD | BPF_IMM | BPF_DW: /* dst = (u64) imm */ + PPC_LI32(dst_reg_h, (u32)insn[i + 1].imm); + PPC_LI32(dst_reg, (u32)insn[i].imm); + /* Adjust for two bpf instructions */ + addrs[++i] = ctx->idx * 4; + break; + + /* + * Return/Exit + */ + case BPF_JMP | BPF_EXIT: + /* + * If this isn't the very last instruction, branch to + * the epilogue. If we _are_ the last instruction, + * we'll just fall through to the epilogue. + */ + if (i != flen - 1) + PPC_JMP(exit_addr); + /* else fall through to the epilogue */ + break; + + /* + * Call kernel helper or bpf function + */ + case BPF_JMP | BPF_CALL: + ctx->seen |= SEEN_FUNC; + + ret = bpf_jit_get_func_addr(fp, &insn[i], extra_pass, + &func_addr, &func_addr_fixed); + if (ret < 0) + return ret; + + if (bpf_is_seen_register(ctx, bpf_to_ppc(ctx, BPF_REG_5))) { + EMIT(PPC_RAW_STW(bpf_to_ppc(ctx, BPF_REG_5) - 1, __REG_R1, 8)); + EMIT(PPC_RAW_STW(bpf_to_ppc(ctx, BPF_REG_5), __REG_R1, 12)); + } + + bpf_jit_emit_func_call_rel(image, ctx, func_addr); + + EMIT(PPC_RAW_MR(bpf_to_ppc(ctx, BPF_REG_0) - 1, __REG_R3)); + EMIT(PPC_RAW_MR(bpf_to_ppc(ctx, BPF_REG_0), __REG_R4)); + break; + + /* + * Jumps and branches + */ + case BPF_JMP | BPF_JA: + PPC_JMP(addrs[i + 1 + off]); + break; + + case BPF_JMP | BPF_JGT | BPF_K: + case BPF_JMP | BPF_JGT | BPF_X: + case BPF_JMP | BPF_JSGT | BPF_K: + case BPF_JMP | BPF_JSGT | BPF_X: + case BPF_JMP32 | BPF_JGT | BPF_K: + case BPF_JMP32 | BPF_JGT | BPF_X: + case BPF_JMP32 | BPF_JSGT | BPF_K: + case BPF_JMP32 | BPF_JSGT | BPF_X: + true_cond = COND_GT; + goto cond_branch; + case BPF_JMP | BPF_JLT | BPF_K: + case BPF_JMP | BPF_JLT | BPF_X: + case BPF_JMP | BPF_JSLT | BPF_K: + case BPF_JMP | BPF_JSLT | BPF_X: + case BPF_JMP32 | BPF_JLT | BPF_K: + case BPF_JMP32 | BPF_JLT | BPF_X: + case BPF_JMP32 | BPF_JSLT | BPF_K: + case BPF_JMP32 | BPF_JSLT | BPF_X: + true_cond = COND_LT; + goto cond_branch; + case BPF_JMP | BPF_JGE | BPF_K: + case BPF_JMP | BPF_JGE | BPF_X: + case BPF_JMP | BPF_JSGE | BPF_K: + case BPF_JMP | BPF_JSGE | BPF_X: + case BPF_JMP32 | BPF_JGE | BPF_K: + case BPF_JMP32 | BPF_JGE | BPF_X: + case BPF_JMP32 | BPF_JSGE | BPF_K: + case BPF_JMP32 | BPF_JSGE | BPF_X: + true_cond = COND_GE; + goto cond_branch; + case BPF_JMP | BPF_JLE | BPF_K: + case BPF_JMP | BPF_JLE | BPF_X: + case BPF_JMP | BPF_JSLE | BPF_K: + case BPF_JMP | BPF_JSLE | BPF_X: + case BPF_JMP32 | BPF_JLE | BPF_K: + case BPF_JMP32 | BPF_JLE | BPF_X: + case BPF_JMP32 | BPF_JSLE | BPF_K: + case BPF_JMP32 | BPF_JSLE | BPF_X: + true_cond = COND_LE; + goto cond_branch; + case BPF_JMP | BPF_JEQ | BPF_K: + case BPF_JMP | BPF_JEQ | BPF_X: + case BPF_JMP32 | BPF_JEQ | BPF_K: + case BPF_JMP32 | BPF_JEQ | BPF_X: + true_cond = COND_EQ; + goto cond_branch; + case BPF_JMP | BPF_JNE | BPF_K: + case BPF_JMP | BPF_JNE | BPF_X: + case BPF_JMP32 | BPF_JNE | BPF_K: + case BPF_JMP32 | BPF_JNE | BPF_X: + true_cond = COND_NE; + goto cond_branch; + case BPF_JMP | BPF_JSET | BPF_K: + case BPF_JMP | BPF_JSET | BPF_X: + case BPF_JMP32 | BPF_JSET | BPF_K: + case BPF_JMP32 | BPF_JSET | BPF_X: + true_cond = COND_NE; + /* fallthrough; */ + +cond_branch: + switch (code) { + case BPF_JMP | BPF_JGT | BPF_X: + case BPF_JMP | BPF_JLT | BPF_X: + case BPF_JMP | BPF_JGE | BPF_X: + case BPF_JMP | BPF_JLE | BPF_X: + case BPF_JMP | BPF_JEQ | BPF_X: + case BPF_JMP | BPF_JNE | BPF_X: + /* unsigned comparison */ + EMIT(PPC_RAW_CMPLW(dst_reg_h, src_reg_h)); + PPC_BCC_SHORT(COND_NE, (ctx->idx + 2) * 4); + EMIT(PPC_RAW_CMPLW(dst_reg, src_reg)); + break; + case BPF_JMP32 | BPF_JGT | BPF_X: + case BPF_JMP32 | BPF_JLT | BPF_X: + case BPF_JMP32 | BPF_JGE | BPF_X: + case BPF_JMP32 | BPF_JLE | BPF_X: + case BPF_JMP32 | BPF_JEQ | BPF_X: + case BPF_JMP32 | BPF_JNE | BPF_X: + /* unsigned comparison */ + EMIT(PPC_RAW_CMPLW(dst_reg, src_reg)); + break; + case BPF_JMP | BPF_JSGT | BPF_X: + case BPF_JMP | BPF_JSLT | BPF_X: + case BPF_JMP | BPF_JSGE | BPF_X: + case BPF_JMP | BPF_JSLE | BPF_X: + /* signed comparison */ + EMIT(PPC_RAW_CMPW(dst_reg_h, src_reg_h)); + PPC_BCC_SHORT(COND_NE, (ctx->idx + 2) * 4); + EMIT(PPC_RAW_CMPLW(dst_reg, src_reg)); + break; + case BPF_JMP32 | BPF_JSGT | BPF_X: + case BPF_JMP32 | BPF_JSLT | BPF_X: + case BPF_JMP32 | BPF_JSGE | BPF_X: + case BPF_JMP32 | BPF_JSLE | BPF_X: + /* signed comparison */ + EMIT(PPC_RAW_CMPW(dst_reg, src_reg)); + break; + case BPF_JMP | BPF_JSET | BPF_X: + EMIT(PPC_RAW_AND_DOT(__REG_R0, dst_reg_h, src_reg_h)); + PPC_BCC_SHORT(COND_NE, (ctx->idx + 2) * 4); + EMIT(PPC_RAW_AND_DOT(__REG_R0, dst_reg, src_reg)); + break; + case BPF_JMP32 | BPF_JSET | BPF_X: { + EMIT(PPC_RAW_AND_DOT(__REG_R0, dst_reg, src_reg)); + break; + case BPF_JMP | BPF_JNE | BPF_K: + case BPF_JMP | BPF_JEQ | BPF_K: + case BPF_JMP | BPF_JGT | BPF_K: + case BPF_JMP | BPF_JLT | BPF_K: + case BPF_JMP | BPF_JGE | BPF_K: + case BPF_JMP | BPF_JLE | BPF_K: + /* + * Need sign-extended load, so only positive + * values can be used as imm in cmplwi + */ + if (imm >= 0 && imm < 32768) { + EMIT(PPC_RAW_CMPLWI(dst_reg_h, 0)); + PPC_BCC_SHORT(COND_NE, (ctx->idx + 2) * 4); + EMIT(PPC_RAW_CMPLWI(dst_reg, imm)); + } else { + /* sign-extending load ... but unsigned comparison */ + PPC_EX32(__REG_R0, imm); + EMIT(PPC_RAW_CMPLW(dst_reg_h, __REG_R0)); + PPC_LI32(__REG_R0, imm); + PPC_BCC_SHORT(COND_NE, (ctx->idx + 2) * 4); + EMIT(PPC_RAW_CMPLW(dst_reg, __REG_R0)); + } + break; + case BPF_JMP32 | BPF_JNE | BPF_K: + case BPF_JMP32 | BPF_JEQ | BPF_K: + case BPF_JMP32 | BPF_JGT | BPF_K: + case BPF_JMP32 | BPF_JLT | BPF_K: + case BPF_JMP32 | BPF_JGE | BPF_K: + case BPF_JMP32 | BPF_JLE | BPF_K: + if (imm >= 0 && imm < 65536) { + EMIT(PPC_RAW_CMPLWI(dst_reg, imm)); + } else { + PPC_LI32(__REG_R0, imm); + EMIT(PPC_RAW_CMPLW(dst_reg, __REG_R0)); + } + break; + } + case BPF_JMP | BPF_JSGT | BPF_K: + case BPF_JMP | BPF_JSLT | BPF_K: + case BPF_JMP | BPF_JSGE | BPF_K: + case BPF_JMP | BPF_JSLE | BPF_K: + if (imm >= 0 && imm < 65536) { + EMIT(PPC_RAW_CMPWI(dst_reg_h, imm < 0 ? -1 : 0)); + PPC_BCC_SHORT(COND_NE, (ctx->idx + 2) * 4); + EMIT(PPC_RAW_CMPLWI(dst_reg, imm)); + } else { + /* sign-extending load */ + EMIT(PPC_RAW_CMPWI(dst_reg_h, imm < 0 ? -1 : 0)); + PPC_LI32(__REG_R0, imm); + PPC_BCC_SHORT(COND_NE, (ctx->idx + 2) * 4); + EMIT(PPC_RAW_CMPLW(dst_reg, __REG_R0)); + } + break; + case BPF_JMP32 | BPF_JSGT | BPF_K: + case BPF_JMP32 | BPF_JSLT | BPF_K: + case BPF_JMP32 | BPF_JSGE | BPF_K: + case BPF_JMP32 | BPF_JSLE | BPF_K: + /* + * signed comparison, so any 16-bit value + * can be used in cmpwi + */ + if (imm >= -32768 && imm < 32768) { + EMIT(PPC_RAW_CMPWI(dst_reg, imm)); + } else { + /* sign-extending load */ + PPC_LI32(__REG_R0, imm); + EMIT(PPC_RAW_CMPW(dst_reg, __REG_R0)); + } + break; + case BPF_JMP | BPF_JSET | BPF_K: + /* andi does not sign-extend the immediate */ + if (imm >= 0 && imm < 32768) { + /* PPC_ANDI is _only/always_ dot-form */ + EMIT(PPC_RAW_ANDI(__REG_R0, dst_reg, imm)); + } else { + PPC_LI32(__REG_R0, imm); + if (imm < 0) { + EMIT(PPC_RAW_CMPWI(dst_reg_h, 0)); + PPC_BCC_SHORT(COND_NE, (ctx->idx + 2) * 4); + } + EMIT(PPC_RAW_AND_DOT(__REG_R0, dst_reg, __REG_R0)); + } + break; + case BPF_JMP32 | BPF_JSET | BPF_K: + /* andi does not sign-extend the immediate */ + if (imm >= -32768 && imm < 32768) { + /* PPC_ANDI is _only/always_ dot-form */ + EMIT(PPC_RAW_ANDI(__REG_R0, dst_reg, imm)); + } else { + PPC_LI32(__REG_R0, imm); + EMIT(PPC_RAW_AND_DOT(__REG_R0, dst_reg, __REG_R0)); + } + break; + } + PPC_BCC(true_cond, addrs[i + 1 + off]); + break; + + /* + * Tail call + */ + case BPF_JMP | BPF_TAIL_CALL: + ctx->seen |= SEEN_TAILCALL; + bpf_jit_emit_tail_call(image, ctx, addrs[i + 1]); + break; + + default: + /* + * The filter contains something cruel & unusual. + * We don't handle it, but also there shouldn't be + * anything missing from our list. + */ + pr_err_ratelimited("eBPF filter opcode %04x (@%d) unsupported\n", code, i); + return -EOPNOTSUPP; + } + if (BPF_CLASS(code) == BPF_ALU && !fp->aux->verifier_zext && + !insn_is_zext(&insn[i + 1])) + EMIT(PPC_RAW_LI(dst_reg_h, 0)); + } + + /* Set end-of-body-code address for exit. */ + addrs[i] = ctx->idx * 4; + + return 0; +} diff --git a/arch/powerpc/net/bpf_jit_comp64.c b/arch/powerpc/net/bpf_jit_comp64.c index aaf1a887f653..57a8c1153851 100644 --- a/arch/powerpc/net/bpf_jit_comp64.c +++ b/arch/powerpc/net/bpf_jit_comp64.c @@ -18,27 +18,6 @@ #include "bpf_jit64.h" -static void bpf_jit_fill_ill_insns(void *area, unsigned int size) -{ - memset32(area, BREAKPOINT_INSTRUCTION, size/4); -} - -static inline void bpf_flush_icache(void *start, void *end) -{ - smp_wmb(); - flush_icache_range((unsigned long)start, (unsigned long)end); -} - -static inline bool bpf_is_seen_register(struct codegen_context *ctx, int i) -{ - return (ctx->seen & (1 << (31 - b2p[i]))); -} - -static inline void bpf_set_seen_register(struct codegen_context *ctx, int i) -{ - ctx->seen |= (1 << (31 - b2p[i])); -} - static inline bool bpf_has_stack_frame(struct codegen_context *ctx) { /* @@ -47,7 +26,7 @@ static inline bool bpf_has_stack_frame(struct codegen_context *ctx) * - the bpf program uses its stack area * The latter condition is deduced from the usage of BPF_REG_FP */ - return ctx->seen & SEEN_FUNC || bpf_is_seen_register(ctx, BPF_REG_FP); + return ctx->seen & SEEN_FUNC || bpf_is_seen_register(ctx, b2p[BPF_REG_FP]); } /* @@ -85,7 +64,11 @@ static int bpf_jit_stack_offsetof(struct codegen_context *ctx, int reg) BUG(); } -static void bpf_jit_build_prologue(u32 *image, struct codegen_context *ctx) +void bpf_jit_realloc_regs(struct codegen_context *ctx) +{ +} + +void bpf_jit_build_prologue(u32 *image, struct codegen_context *ctx) { int i; @@ -124,11 +107,11 @@ static void bpf_jit_build_prologue(u32 *image, struct codegen_context *ctx) * in the protected zone below the previous stack frame */ for (i = BPF_REG_6; i <= BPF_REG_10; i++) - if (bpf_is_seen_register(ctx, i)) + if (bpf_is_seen_register(ctx, b2p[i])) PPC_BPF_STL(b2p[i], 1, bpf_jit_stack_offsetof(ctx, b2p[i])); /* Setup frame pointer to point to the bpf stack area */ - if (bpf_is_seen_register(ctx, BPF_REG_FP)) + if (bpf_is_seen_register(ctx, b2p[BPF_REG_FP])) EMIT(PPC_RAW_ADDI(b2p[BPF_REG_FP], 1, STACK_FRAME_MIN_SIZE + ctx->stack_size)); } @@ -139,7 +122,7 @@ static void bpf_jit_emit_common_epilogue(u32 *image, struct codegen_context *ctx /* Restore NVRs */ for (i = BPF_REG_6; i <= BPF_REG_10; i++) - if (bpf_is_seen_register(ctx, i)) + if (bpf_is_seen_register(ctx, b2p[i])) PPC_BPF_LL(b2p[i], 1, bpf_jit_stack_offsetof(ctx, b2p[i])); /* Tear down our stack frame */ @@ -152,7 +135,7 @@ static void bpf_jit_emit_common_epilogue(u32 *image, struct codegen_context *ctx } } -static void bpf_jit_build_epilogue(u32 *image, struct codegen_context *ctx) +void bpf_jit_build_epilogue(u32 *image, struct codegen_context *ctx) { bpf_jit_emit_common_epilogue(image, ctx); @@ -187,8 +170,7 @@ static void bpf_jit_emit_func_call_hlp(u32 *image, struct codegen_context *ctx, EMIT(PPC_RAW_BLRL()); } -static void bpf_jit_emit_func_call_rel(u32 *image, struct codegen_context *ctx, - u64 func) +void bpf_jit_emit_func_call_rel(u32 *image, struct codegen_context *ctx, u64 func) { unsigned int i, ctx_idx = ctx->idx; @@ -289,9 +271,8 @@ static void bpf_jit_emit_tail_call(u32 *image, struct codegen_context *ctx, u32 } /* Assemble the body code between the prologue & epilogue */ -static int bpf_jit_build_body(struct bpf_prog *fp, u32 *image, - struct codegen_context *ctx, - u32 *addrs, bool extra_pass) +int bpf_jit_build_body(struct bpf_prog *fp, u32 *image, struct codegen_context *ctx, + u32 *addrs, bool extra_pass) { const struct bpf_insn *insn = fp->insnsi; int flen = fp->len; @@ -330,9 +311,9 @@ static int bpf_jit_build_body(struct bpf_prog *fp, u32 *image, * any issues. */ if (dst_reg >= BPF_PPC_NVR_MIN && dst_reg < 32) - bpf_set_seen_register(ctx, insn[i].dst_reg); + bpf_set_seen_register(ctx, dst_reg); if (src_reg >= BPF_PPC_NVR_MIN && src_reg < 32) - bpf_set_seen_register(ctx, insn[i].src_reg); + bpf_set_seen_register(ctx, src_reg); switch (code) { /* @@ -1026,249 +1007,3 @@ cond_branch: return 0; } - -/* Fix the branch target addresses for subprog calls */ -static int bpf_jit_fixup_subprog_calls(struct bpf_prog *fp, u32 *image, - struct codegen_context *ctx, u32 *addrs) -{ - const struct bpf_insn *insn = fp->insnsi; - bool func_addr_fixed; - u64 func_addr; - u32 tmp_idx; - int i, ret; - - for (i = 0; i < fp->len; i++) { - /* - * During the extra pass, only the branch target addresses for - * the subprog calls need to be fixed. All other instructions - * can left untouched. - * - * The JITed image length does not change because we already - * ensure that the JITed instruction sequence for these calls - * are of fixed length by padding them with NOPs. - */ - if (insn[i].code == (BPF_JMP | BPF_CALL) && - insn[i].src_reg == BPF_PSEUDO_CALL) { - ret = bpf_jit_get_func_addr(fp, &insn[i], true, - &func_addr, - &func_addr_fixed); - if (ret < 0) - return ret; - - /* - * Save ctx->idx as this would currently point to the - * end of the JITed image and set it to the offset of - * the instruction sequence corresponding to the - * subprog call temporarily. - */ - tmp_idx = ctx->idx; - ctx->idx = addrs[i] / 4; - bpf_jit_emit_func_call_rel(image, ctx, func_addr); - - /* - * Restore ctx->idx here. This is safe as the length - * of the JITed sequence remains unchanged. - */ - ctx->idx = tmp_idx; - } - } - - return 0; -} - -struct powerpc64_jit_data { - struct bpf_binary_header *header; - u32 *addrs; - u8 *image; - u32 proglen; - struct codegen_context ctx; -}; - -bool bpf_jit_needs_zext(void) -{ - return true; -} - -struct bpf_prog *bpf_int_jit_compile(struct bpf_prog *fp) -{ - u32 proglen; - u32 alloclen; - u8 *image = NULL; - u32 *code_base; - u32 *addrs; - struct powerpc64_jit_data *jit_data; - struct codegen_context cgctx; - int pass; - int flen; - struct bpf_binary_header *bpf_hdr; - struct bpf_prog *org_fp = fp; - struct bpf_prog *tmp_fp; - bool bpf_blinded = false; - bool extra_pass = false; - - if (!fp->jit_requested) - return org_fp; - - tmp_fp = bpf_jit_blind_constants(org_fp); - if (IS_ERR(tmp_fp)) - return org_fp; - - if (tmp_fp != org_fp) { - bpf_blinded = true; - fp = tmp_fp; - } - - jit_data = fp->aux->jit_data; - if (!jit_data) { - jit_data = kzalloc(sizeof(*jit_data), GFP_KERNEL); - if (!jit_data) { - fp = org_fp; - goto out; - } - fp->aux->jit_data = jit_data; - } - - flen = fp->len; - addrs = jit_data->addrs; - if (addrs) { - cgctx = jit_data->ctx; - image = jit_data->image; - bpf_hdr = jit_data->header; - proglen = jit_data->proglen; - alloclen = proglen + FUNCTION_DESCR_SIZE; - extra_pass = true; - goto skip_init_ctx; - } - - addrs = kcalloc(flen + 1, sizeof(*addrs), GFP_KERNEL); - if (addrs == NULL) { - fp = org_fp; - goto out_addrs; - } - - memset(&cgctx, 0, sizeof(struct codegen_context)); - - /* Make sure that the stack is quadword aligned. */ - cgctx.stack_size = round_up(fp->aux->stack_depth, 16); - - /* Scouting faux-generate pass 0 */ - if (bpf_jit_build_body(fp, 0, &cgctx, addrs, false)) { - /* We hit something illegal or unsupported. */ - fp = org_fp; - goto out_addrs; - } - - /* - * If we have seen a tail call, we need a second pass. - * This is because bpf_jit_emit_common_epilogue() is called - * from bpf_jit_emit_tail_call() with a not yet stable ctx->seen. - */ - if (cgctx.seen & SEEN_TAILCALL) { - cgctx.idx = 0; - if (bpf_jit_build_body(fp, 0, &cgctx, addrs, false)) { - fp = org_fp; - goto out_addrs; - } - } - - /* - * Pretend to build prologue, given the features we've seen. This will - * update ctgtx.idx as it pretends to output instructions, then we can - * calculate total size from idx. - */ - bpf_jit_build_prologue(0, &cgctx); - bpf_jit_build_epilogue(0, &cgctx); - - proglen = cgctx.idx * 4; - alloclen = proglen + FUNCTION_DESCR_SIZE; - - bpf_hdr = bpf_jit_binary_alloc(alloclen, &image, 4, - bpf_jit_fill_ill_insns); - if (!bpf_hdr) { - fp = org_fp; - goto out_addrs; - } - -skip_init_ctx: - code_base = (u32 *)(image + FUNCTION_DESCR_SIZE); - - if (extra_pass) { - /* - * Do not touch the prologue and epilogue as they will remain - * unchanged. Only fix the branch target address for subprog - * calls in the body. - * - * This does not change the offsets and lengths of the subprog - * call instruction sequences and hence, the size of the JITed - * image as well. - */ - bpf_jit_fixup_subprog_calls(fp, code_base, &cgctx, addrs); - - /* There is no need to perform the usual passes. */ - goto skip_codegen_passes; - } - - /* Code generation passes 1-2 */ - for (pass = 1; pass < 3; pass++) { - /* Now build the prologue, body code & epilogue for real. */ - cgctx.idx = 0; - bpf_jit_build_prologue(code_base, &cgctx); - bpf_jit_build_body(fp, code_base, &cgctx, addrs, extra_pass); - bpf_jit_build_epilogue(code_base, &cgctx); - - if (bpf_jit_enable > 1) - pr_info("Pass %d: shrink = %d, seen = 0x%x\n", pass, - proglen - (cgctx.idx * 4), cgctx.seen); - } - -skip_codegen_passes: - if (bpf_jit_enable > 1) - /* - * Note that we output the base address of the code_base - * rather than image, since opcodes are in code_base. - */ - bpf_jit_dump(flen, proglen, pass, code_base); - -#ifdef PPC64_ELF_ABI_v1 - /* Function descriptor nastiness: Address + TOC */ - ((u64 *)image)[0] = (u64)code_base; - ((u64 *)image)[1] = local_paca->kernel_toc; -#endif - - fp->bpf_func = (void *)image; - fp->jited = 1; - fp->jited_len = alloclen; - - bpf_flush_icache(bpf_hdr, (u8 *)bpf_hdr + (bpf_hdr->pages * PAGE_SIZE)); - if (!fp->is_func || extra_pass) { - bpf_prog_fill_jited_linfo(fp, addrs); -out_addrs: - kfree(addrs); - kfree(jit_data); - fp->aux->jit_data = NULL; - } else { - jit_data->addrs = addrs; - jit_data->ctx = cgctx; - jit_data->proglen = proglen; - jit_data->image = image; - jit_data->header = bpf_hdr; - } - -out: - if (bpf_blinded) - bpf_jit_prog_release_other(fp, fp == org_fp ? tmp_fp : org_fp); - - return fp; -} - -/* Overriding bpf_jit_free() as we don't set images read-only. */ -void bpf_jit_free(struct bpf_prog *fp) -{ - unsigned long addr = (unsigned long)fp->bpf_func & PAGE_MASK; - struct bpf_binary_header *bpf_hdr = (void *)addr; - - if (fp->jited) - bpf_jit_binary_free(bpf_hdr); - - bpf_prog_unlock_free(fp); -} diff --git a/arch/powerpc/perf/core-book3s.c b/arch/powerpc/perf/core-book3s.c index 766f064f00fb..16d4d1b6a1ff 100644 --- a/arch/powerpc/perf/core-book3s.c +++ b/arch/powerpc/perf/core-book3s.c @@ -17,6 +17,7 @@ #include <asm/firmware.h> #include <asm/ptrace.h> #include <asm/code-patching.h> +#include <asm/interrupt.h> #ifdef CONFIG_PPC64 #include "internal.h" @@ -168,7 +169,7 @@ static bool regs_use_siar(struct pt_regs *regs) * they have not been setup using perf_read_regs() and so regs->result * is something random. */ - return ((TRAP(regs) == 0xf00) && regs->result); + return ((TRAP(regs) == INTERRUPT_PERFMON) && regs->result); } /* @@ -347,7 +348,7 @@ static inline void perf_read_regs(struct pt_regs *regs) * hypervisor samples as well as samples in the kernel with * interrupts off hence the userspace check. */ - if (TRAP(regs) != 0xf00) + if (TRAP(regs) != INTERRUPT_PERFMON) use_siar = 0; else if ((ppmu->flags & PPMU_NO_SIAR)) use_siar = 0; @@ -1963,6 +1964,17 @@ static int power_pmu_event_init(struct perf_event *event) return -ENOENT; } + /* + * PMU config registers have fields that are + * reserved and some specific values for bit fields are reserved. + * For ex., MMCRA[61:62] is Randome Sampling Mode (SM) + * and value of 0b11 to this field is reserved. + * Check for invalid values in attr.config. + */ + if (ppmu->check_attr_config && + ppmu->check_attr_config(event)) + return -EINVAL; + event->hw.config_base = ev; event->hw.idx = 0; @@ -2206,9 +2218,9 @@ static void record_and_restart(struct perf_event *event, unsigned long val, ppmu->get_mem_data_src) ppmu->get_mem_data_src(&data.data_src, ppmu->flags, regs); - if (event->attr.sample_type & PERF_SAMPLE_WEIGHT && + if (event->attr.sample_type & PERF_SAMPLE_WEIGHT_TYPE && ppmu->get_mem_weight) - ppmu->get_mem_weight(&data.weight.full); + ppmu->get_mem_weight(&data.weight.full, event->attr.sample_type); if (perf_event_overflow(event, &data, regs)) power_pmu_stop(event, 0); diff --git a/arch/powerpc/perf/hv-24x7.c b/arch/powerpc/perf/hv-24x7.c index e5eb33255066..1816f560a465 100644 --- a/arch/powerpc/perf/hv-24x7.c +++ b/arch/powerpc/perf/hv-24x7.c @@ -226,14 +226,14 @@ static struct attribute_group event_long_desc_group = { static struct kmem_cache *hv_page_cache; -DEFINE_PER_CPU(int, hv_24x7_txn_flags); -DEFINE_PER_CPU(int, hv_24x7_txn_err); +static DEFINE_PER_CPU(int, hv_24x7_txn_flags); +static DEFINE_PER_CPU(int, hv_24x7_txn_err); struct hv_24x7_hw { struct perf_event *events[255]; }; -DEFINE_PER_CPU(struct hv_24x7_hw, hv_24x7_hw); +static DEFINE_PER_CPU(struct hv_24x7_hw, hv_24x7_hw); /* * request_buffer and result_buffer are not required to be 4k aligned, @@ -241,8 +241,8 @@ DEFINE_PER_CPU(struct hv_24x7_hw, hv_24x7_hw); * the simplest way to ensure that. */ #define H24x7_DATA_BUFFER_SIZE 4096 -DEFINE_PER_CPU(char, hv_24x7_reqb[H24x7_DATA_BUFFER_SIZE]) __aligned(4096); -DEFINE_PER_CPU(char, hv_24x7_resb[H24x7_DATA_BUFFER_SIZE]) __aligned(4096); +static DEFINE_PER_CPU(char, hv_24x7_reqb[H24x7_DATA_BUFFER_SIZE]) __aligned(4096); +static DEFINE_PER_CPU(char, hv_24x7_resb[H24x7_DATA_BUFFER_SIZE]) __aligned(4096); static unsigned int max_num_requests(int interface_version) { diff --git a/arch/powerpc/perf/isa207-common.c b/arch/powerpc/perf/isa207-common.c index e4f577da33d8..f92bf5f6b74f 100644 --- a/arch/powerpc/perf/isa207-common.c +++ b/arch/powerpc/perf/isa207-common.c @@ -21,7 +21,7 @@ PMU_FORMAT_ATTR(thresh_stop, "config:32-35"); PMU_FORMAT_ATTR(thresh_start, "config:36-39"); PMU_FORMAT_ATTR(thresh_cmp, "config:40-49"); -struct attribute *isa207_pmu_format_attr[] = { +static struct attribute *isa207_pmu_format_attr[] = { &format_attr_event.attr, &format_attr_pmcxsel.attr, &format_attr_mark.attr, @@ -275,17 +275,47 @@ void isa207_get_mem_data_src(union perf_mem_data_src *dsrc, u32 flags, sier = mfspr(SPRN_SIER); val = (sier & ISA207_SIER_TYPE_MASK) >> ISA207_SIER_TYPE_SHIFT; - if (val == 1 || val == 2) { - idx = (sier & ISA207_SIER_LDST_MASK) >> ISA207_SIER_LDST_SHIFT; - sub_idx = (sier & ISA207_SIER_DATA_SRC_MASK) >> ISA207_SIER_DATA_SRC_SHIFT; + if (val != 1 && val != 2 && !(val == 7 && cpu_has_feature(CPU_FTR_ARCH_31))) + return; + + idx = (sier & ISA207_SIER_LDST_MASK) >> ISA207_SIER_LDST_SHIFT; + sub_idx = (sier & ISA207_SIER_DATA_SRC_MASK) >> ISA207_SIER_DATA_SRC_SHIFT; + + dsrc->val = isa207_find_source(idx, sub_idx); + if (val == 7) { + u64 mmcra; + u32 op_type; - dsrc->val = isa207_find_source(idx, sub_idx); + /* + * Type 0b111 denotes either larx or stcx instruction. Use the + * MMCRA sampling bits [57:59] along with the type value + * to determine the exact instruction type. If the sampling + * criteria is neither load or store, set the type as default + * to NA. + */ + mmcra = mfspr(SPRN_MMCRA); + + op_type = (mmcra >> MMCRA_SAMP_ELIG_SHIFT) & MMCRA_SAMP_ELIG_MASK; + switch (op_type) { + case 5: + dsrc->val |= P(OP, LOAD); + break; + case 7: + dsrc->val |= P(OP, STORE); + break; + default: + dsrc->val |= P(OP, NA); + break; + } + } else { dsrc->val |= (val == 1) ? P(OP, LOAD) : P(OP, STORE); } } -void isa207_get_mem_weight(u64 *weight) +void isa207_get_mem_weight(u64 *weight, u64 type) { + union perf_sample_weight *weight_fields; + u64 weight_lat; u64 mmcra = mfspr(SPRN_MMCRA); u64 exp = MMCRA_THR_CTR_EXP(mmcra); u64 mantissa = MMCRA_THR_CTR_MANT(mmcra); @@ -295,10 +325,31 @@ void isa207_get_mem_weight(u64 *weight) if (cpu_has_feature(CPU_FTR_ARCH_31)) mantissa = P10_MMCRA_THR_CTR_MANT(mmcra); - if (val == 0 || val == 7) - *weight = 0; + if (val == 0 || (val == 7 && !cpu_has_feature(CPU_FTR_ARCH_31))) + weight_lat = 0; else - *weight = mantissa << (2 * exp); + weight_lat = mantissa << (2 * exp); + + /* + * Use 64 bit weight field (full) if sample type is + * WEIGHT. + * + * if sample type is WEIGHT_STRUCT: + * - store memory latency in the lower 32 bits. + * - For ISA v3.1, use remaining two 16 bit fields of + * perf_sample_weight to store cycle counter values + * from sier2. + */ + weight_fields = (union perf_sample_weight *)weight; + if (type & PERF_SAMPLE_WEIGHT) + weight_fields->full = weight_lat; + else { + weight_fields->var1_dw = (u32)weight_lat; + if (cpu_has_feature(CPU_FTR_ARCH_31)) { + weight_fields->var2_w = P10_SIER2_FINISH_CYC(mfspr(SPRN_SIER2)); + weight_fields->var3_w = P10_SIER2_DISPATCH_CYC(mfspr(SPRN_SIER2)); + } + } } int isa207_get_constraint(u64 event, unsigned long *maskp, unsigned long *valp, u64 event_config1) @@ -447,8 +498,8 @@ ebb_bhrb: * EBB events are pinned & exclusive, so this should never actually * hit, but we leave it as a fallback in case. */ - mask |= CNST_EBB_VAL(ebb); - value |= CNST_EBB_MASK; + mask |= CNST_EBB_MASK; + value |= CNST_EBB_VAL(ebb); *maskp = mask; *valp = value; @@ -694,3 +745,45 @@ int isa207_get_alternatives(u64 event, u64 alt[], int size, unsigned int flags, return num_alt; } + +int isa3XX_check_attr_config(struct perf_event *ev) +{ + u64 val, sample_mode; + u64 event = ev->attr.config; + + val = (event >> EVENT_SAMPLE_SHIFT) & EVENT_SAMPLE_MASK; + sample_mode = val & 0x3; + + /* + * MMCRA[61:62] is Random Sampling Mode (SM). + * value of 0b11 is reserved. + */ + if (sample_mode == 0x3) + return -EINVAL; + + /* + * Check for all reserved value + * Source: Performance Monitoring Unit User Guide + */ + switch (val) { + case 0x5: + case 0x9: + case 0xD: + case 0x19: + case 0x1D: + case 0x1A: + case 0x1E: + return -EINVAL; + } + + /* + * MMCRA[48:51]/[52:55]) Threshold Start/Stop + * Events Selection. + * 0b11110000/0b00001111 is reserved. + */ + val = (event >> EVENT_THR_CTL_SHIFT) & EVENT_THR_CTL_MASK; + if (((val & 0xF0) == 0xF0) || ((val & 0xF) == 0xF)) + return -EINVAL; + + return 0; +} diff --git a/arch/powerpc/perf/isa207-common.h b/arch/powerpc/perf/isa207-common.h index 1af0e8c97ac7..4a2cbc3dc047 100644 --- a/arch/powerpc/perf/isa207-common.h +++ b/arch/powerpc/perf/isa207-common.h @@ -220,6 +220,7 @@ /* Bits in MMCRA for PowerISA v2.07 */ #define MMCRA_SAMP_MODE_SHIFT 1 #define MMCRA_SAMP_ELIG_SHIFT 4 +#define MMCRA_SAMP_ELIG_MASK 7 #define MMCRA_THR_CTL_SHIFT 8 #define MMCRA_THR_SEL_SHIFT 16 #define MMCRA_THR_CMP_SHIFT 32 @@ -265,6 +266,10 @@ #define ISA207_SIER_DATA_SRC_SHIFT 53 #define ISA207_SIER_DATA_SRC_MASK (0x7ull << ISA207_SIER_DATA_SRC_SHIFT) +/* Bits in SIER2/SIER3 for Power10 */ +#define P10_SIER2_FINISH_CYC(sier2) (((sier2) >> (63 - 37)) & 0x7fful) +#define P10_SIER2_DISPATCH_CYC(sier2) (((sier2) >> (63 - 13)) & 0x7fful) + #define P(a, b) PERF_MEM_S(a, b) #define PH(a, b) (P(LVL, HIT) | P(a, b)) #define PM(a, b) (P(LVL, MISS) | P(a, b)) @@ -278,6 +283,8 @@ int isa207_get_alternatives(u64 event, u64 alt[], int size, unsigned int flags, const unsigned int ev_alt[][MAX_ALT]); void isa207_get_mem_data_src(union perf_mem_data_src *dsrc, u32 flags, struct pt_regs *regs); -void isa207_get_mem_weight(u64 *weight); +void isa207_get_mem_weight(u64 *weight, u64 type); + +int isa3XX_check_attr_config(struct perf_event *ev); #endif diff --git a/arch/powerpc/perf/power10-events-list.h b/arch/powerpc/perf/power10-events-list.h index e45dafe818ed..93be7197d250 100644 --- a/arch/powerpc/perf/power10-events-list.h +++ b/arch/powerpc/perf/power10-events-list.h @@ -75,5 +75,5 @@ EVENT(PM_RUN_INST_CMPL_ALT, 0x00002); * thresh end (TE) */ -EVENT(MEM_LOADS, 0x34340401e0); -EVENT(MEM_STORES, 0x343c0401e0); +EVENT(MEM_LOADS, 0x35340401e0); +EVENT(MEM_STORES, 0x353c0401e0); diff --git a/arch/powerpc/perf/power10-pmu.c b/arch/powerpc/perf/power10-pmu.c index a901c1348cad..f9d64c63bb4a 100644 --- a/arch/powerpc/perf/power10-pmu.c +++ b/arch/powerpc/perf/power10-pmu.c @@ -106,6 +106,18 @@ static int power10_get_alternatives(u64 event, unsigned int flags, u64 alt[]) return num_alt; } +static int power10_check_attr_config(struct perf_event *ev) +{ + u64 val; + u64 event = ev->attr.config; + + val = (event >> EVENT_SAMPLE_SHIFT) & EVENT_SAMPLE_MASK; + if (val == 0x10 || isa3XX_check_attr_config(ev)) + return -EINVAL; + + return 0; +} + GENERIC_EVENT_ATTR(cpu-cycles, PM_RUN_CYC); GENERIC_EVENT_ATTR(instructions, PM_RUN_INST_CMPL); GENERIC_EVENT_ATTR(branch-instructions, PM_BR_CMPL); @@ -559,6 +571,7 @@ static struct power_pmu power10_pmu = { .attr_groups = power10_pmu_attr_groups, .bhrb_nr = 32, .capabilities = PERF_PMU_CAP_EXTENDED_REGS, + .check_attr_config = power10_check_attr_config, }; int init_power10_pmu(void) diff --git a/arch/powerpc/perf/power9-pmu.c b/arch/powerpc/perf/power9-pmu.c index 2a57e93a79dc..ff3382140d7e 100644 --- a/arch/powerpc/perf/power9-pmu.c +++ b/arch/powerpc/perf/power9-pmu.c @@ -151,6 +151,18 @@ static int power9_get_alternatives(u64 event, unsigned int flags, u64 alt[]) return num_alt; } +static int power9_check_attr_config(struct perf_event *ev) +{ + u64 val; + u64 event = ev->attr.config; + + val = (event >> EVENT_SAMPLE_SHIFT) & EVENT_SAMPLE_MASK; + if (val == 0xC || isa3XX_check_attr_config(ev)) + return -EINVAL; + + return 0; +} + GENERIC_EVENT_ATTR(cpu-cycles, PM_CYC); GENERIC_EVENT_ATTR(stalled-cycles-frontend, PM_ICT_NOSLOT_CYC); GENERIC_EVENT_ATTR(stalled-cycles-backend, PM_CMPLU_STALL); @@ -437,6 +449,7 @@ static struct power_pmu power9_pmu = { .attr_groups = power9_pmu_attr_groups, .bhrb_nr = 32, .capabilities = PERF_PMU_CAP_EXTENDED_REGS, + .check_attr_config = power9_check_attr_config, }; int init_power9_pmu(void) diff --git a/arch/powerpc/platforms/44x/Kconfig b/arch/powerpc/platforms/44x/Kconfig index 7d41e9264510..83975ef50975 100644 --- a/arch/powerpc/platforms/44x/Kconfig +++ b/arch/powerpc/platforms/44x/Kconfig @@ -5,7 +5,7 @@ config PPC_47x select MPIC help This option enables support for the 47x family of processors and is - not currently compatible with other 44x or 46x varients + not currently compatible with other 44x or 46x variants config BAMBOO bool "Bamboo" diff --git a/arch/powerpc/platforms/52xx/lite5200_sleep.S b/arch/powerpc/platforms/52xx/lite5200_sleep.S index 11475c58ea43..afee8b1515a8 100644 --- a/arch/powerpc/platforms/52xx/lite5200_sleep.S +++ b/arch/powerpc/platforms/52xx/lite5200_sleep.S @@ -181,7 +181,7 @@ sram_code: udelay: /* r11 - tb_ticks_per_usec, r12 - usecs, overwrites r13 */ mullw r12, r12, r11 mftb r13 /* start */ - addi r12, r13, r12 /* end */ + add r12, r13, r12 /* end */ 1: mftb r13 /* current */ cmp cr0, r13, r12 diff --git a/arch/powerpc/platforms/Kconfig.cputype b/arch/powerpc/platforms/Kconfig.cputype index 3ce907523b1e..e4b05667686e 100644 --- a/arch/powerpc/platforms/Kconfig.cputype +++ b/arch/powerpc/platforms/Kconfig.cputype @@ -101,6 +101,8 @@ config PPC_BOOK3S_64 select ARCH_SUPPORTS_NUMA_BALANCING select IRQ_WORK select PPC_MM_SLICES + select PPC_HAVE_KUEP + select PPC_HAVE_KUAP config PPC_BOOK3E_64 bool "Embedded processors" @@ -306,6 +308,7 @@ config PHYS_64BIT config ALTIVEC bool "AltiVec Support" depends on PPC_BOOK3S_32 || PPC_BOOK3S_64 || (PPC_E500MC && PPC64) + select PPC_FPU help This option enables kernel support for the Altivec extensions to the PowerPC processor. The kernel currently supports saving and restoring @@ -363,8 +366,6 @@ config PPC_RADIX_MMU bool "Radix MMU Support" depends on PPC_BOOK3S_64 select ARCH_HAS_GIGANTIC_PAGE - select PPC_HAVE_KUEP - select PPC_HAVE_KUAP default y help Enable support for the Power ISA 3.0 Radix style MMU. Currently this diff --git a/arch/powerpc/platforms/cell/iommu.c b/arch/powerpc/platforms/cell/iommu.c index 2124831cf57c..fa08699aedeb 100644 --- a/arch/powerpc/platforms/cell/iommu.c +++ b/arch/powerpc/platforms/cell/iommu.c @@ -486,7 +486,8 @@ cell_iommu_setup_window(struct cbe_iommu *iommu, struct device_node *np, window->table.it_size = size >> window->table.it_page_shift; window->table.it_ops = &cell_iommu_ops; - iommu_init_table(&window->table, iommu->nid, 0, 0); + if (!iommu_init_table(&window->table, iommu->nid, 0, 0)) + panic("Failed to initialize iommu table"); pr_debug("\tioid %d\n", window->ioid); pr_debug("\tblocksize %ld\n", window->table.it_blocksize); diff --git a/arch/powerpc/platforms/cell/spu_callbacks.c b/arch/powerpc/platforms/cell/spu_callbacks.c index abdef9bcf432..fe0d8797a00a 100644 --- a/arch/powerpc/platforms/cell/spu_callbacks.c +++ b/arch/powerpc/platforms/cell/spu_callbacks.c @@ -35,9 +35,9 @@ */ static void *spu_syscall_table[] = { +#define __SYSCALL_WITH_COMPAT(nr, entry, compat) __SYSCALL(nr, entry) #define __SYSCALL(nr, entry) [nr] = entry, #include <asm/syscall_table_spu.h> -#undef __SYSCALL }; long spu_sys_callback(struct spu_syscall_block *s) diff --git a/arch/powerpc/platforms/chrp/pci.c b/arch/powerpc/platforms/chrp/pci.c index 8c421dc78b28..76e6256cb0a7 100644 --- a/arch/powerpc/platforms/chrp/pci.c +++ b/arch/powerpc/platforms/chrp/pci.c @@ -131,8 +131,7 @@ static struct pci_ops rtas_pci_ops = volatile struct Hydra __iomem *Hydra = NULL; -int __init -hydra_init(void) +static int __init hydra_init(void) { struct device_node *np; struct resource r; diff --git a/arch/powerpc/platforms/embedded6xx/Kconfig b/arch/powerpc/platforms/embedded6xx/Kconfig index c1920961f410..4c6d703a4284 100644 --- a/arch/powerpc/platforms/embedded6xx/Kconfig +++ b/arch/powerpc/platforms/embedded6xx/Kconfig @@ -71,11 +71,6 @@ config MPC10X_BRIDGE bool select PPC_INDIRECT_PCI -config MV64X60 - bool - select PPC_INDIRECT_PCI - select CHECK_CACHE_COHERENCY - config GAMECUBE_COMMON bool diff --git a/arch/powerpc/platforms/maple/pci.c b/arch/powerpc/platforms/maple/pci.c index a20b9576de22..37875e478b3a 100644 --- a/arch/powerpc/platforms/maple/pci.c +++ b/arch/powerpc/platforms/maple/pci.c @@ -34,7 +34,7 @@ static struct pci_controller *u3_agp, *u3_ht, *u4_pcie; static int __init fixup_one_level_bus_range(struct device_node *node, int higher) { - for (; node != 0;node = node->sibling) { + for (; node; node = node->sibling) { const int *bus_range; const unsigned int *class_code; int len; diff --git a/arch/powerpc/platforms/pasemi/iommu.c b/arch/powerpc/platforms/pasemi/iommu.c index b500a6e47e6b..5be7242fbd86 100644 --- a/arch/powerpc/platforms/pasemi/iommu.c +++ b/arch/powerpc/platforms/pasemi/iommu.c @@ -146,7 +146,9 @@ static void iommu_table_iobmap_setup(void) */ iommu_table_iobmap.it_blocksize = 4; iommu_table_iobmap.it_ops = &iommu_table_iobmap_ops; - iommu_init_table(&iommu_table_iobmap, 0, 0, 0); + if (!iommu_init_table(&iommu_table_iobmap, 0, 0, 0)) + panic("Failed to initialize iommu table"); + pr_debug(" <- %s\n", __func__); } diff --git a/arch/powerpc/platforms/powernv/memtrace.c b/arch/powerpc/platforms/powernv/memtrace.c index 019669eb21d2..71c1262589fe 100644 --- a/arch/powerpc/platforms/powernv/memtrace.c +++ b/arch/powerpc/platforms/powernv/memtrace.c @@ -46,10 +46,26 @@ static ssize_t memtrace_read(struct file *filp, char __user *ubuf, return simple_read_from_buffer(ubuf, count, ppos, ent->mem, ent->size); } +static int memtrace_mmap(struct file *filp, struct vm_area_struct *vma) +{ + struct memtrace_entry *ent = filp->private_data; + + if (ent->size < vma->vm_end - vma->vm_start) + return -EINVAL; + + if (vma->vm_pgoff << PAGE_SHIFT >= ent->size) + return -EINVAL; + + vma->vm_page_prot = pgprot_noncached(vma->vm_page_prot); + return remap_pfn_range(vma, vma->vm_start, PHYS_PFN(ent->start) + vma->vm_pgoff, + vma->vm_end - vma->vm_start, vma->vm_page_prot); +} + static const struct file_operations memtrace_fops = { .llseek = default_llseek, .read = memtrace_read, .open = simple_open, + .mmap = memtrace_mmap, }; #define FLUSH_CHUNK_SIZE SZ_1G @@ -187,7 +203,7 @@ static int memtrace_init_debugfs(void) dir = debugfs_create_dir(ent->name, memtrace_debugfs_dir); ent->dir = dir; - debugfs_create_file("trace", 0400, dir, ent, &memtrace_fops); + debugfs_create_file_unsafe("trace", 0600, dir, ent, &memtrace_fops); debugfs_create_x64("start", 0400, dir, &ent->start); debugfs_create_x64("size", 0400, dir, &ent->size); } diff --git a/arch/powerpc/platforms/powernv/opal-core.c b/arch/powerpc/platforms/powernv/opal-core.c index 0d9ba70f7251..5b9736bbc2aa 100644 --- a/arch/powerpc/platforms/powernv/opal-core.c +++ b/arch/powerpc/platforms/powernv/opal-core.c @@ -71,7 +71,7 @@ static LIST_HEAD(opalcore_list); static struct opalcore_config *oc_conf; static const struct opal_mpipl_fadump *opalc_metadata; static const struct opal_mpipl_fadump *opalc_cpu_metadata; -struct kobject *mpipl_kobj; +static struct kobject *mpipl_kobj; /* * Set crashing CPU's signal to SIGUSR1. if the kernel is triggered diff --git a/arch/powerpc/platforms/powernv/opal-prd.c b/arch/powerpc/platforms/powernv/opal-prd.c index deddaebf8c14..a191f4c60ce7 100644 --- a/arch/powerpc/platforms/powernv/opal-prd.c +++ b/arch/powerpc/platforms/powernv/opal-prd.c @@ -105,7 +105,6 @@ static int opal_prd_mmap(struct file *file, struct vm_area_struct *vma) { size_t addr, size; pgprot_t page_prot; - int rc; pr_devel("opal_prd_mmap(0x%016lx, 0x%016lx, 0x%lx, 0x%lx)\n", vma->vm_start, vma->vm_end, vma->vm_pgoff, @@ -121,10 +120,8 @@ static int opal_prd_mmap(struct file *file, struct vm_area_struct *vma) page_prot = phys_mem_access_prot(file, vma->vm_pgoff, size, vma->vm_page_prot); - rc = remap_pfn_range(vma, vma->vm_start, vma->vm_pgoff, size, + return remap_pfn_range(vma, vma->vm_start, vma->vm_pgoff, size, page_prot); - - return rc; } static bool opal_msg_queue_empty(void) diff --git a/arch/powerpc/platforms/powernv/pci-ioda.c b/arch/powerpc/platforms/powernv/pci-ioda.c index f0f901683a2f..66c3c3337334 100644 --- a/arch/powerpc/platforms/powernv/pci-ioda.c +++ b/arch/powerpc/platforms/powernv/pci-ioda.c @@ -1762,7 +1762,8 @@ found: tbl->it_ops = &pnv_ioda1_iommu_ops; pe->table_group.tce32_start = tbl->it_offset << tbl->it_page_shift; pe->table_group.tce32_size = tbl->it_size << tbl->it_page_shift; - iommu_init_table(tbl, phb->hose->node, 0, 0); + if (!iommu_init_table(tbl, phb->hose->node, 0, 0)) + panic("Failed to initialize iommu table"); pe->dma_setup_done = true; return; @@ -1930,16 +1931,16 @@ static long pnv_pci_ioda2_setup_default_config(struct pnv_ioda_pe *pe) res_start = pe->phb->ioda.m32_pci_base >> tbl->it_page_shift; res_end = min(window_size, SZ_4G) >> tbl->it_page_shift; } - iommu_init_table(tbl, pe->phb->hose->node, res_start, res_end); - rc = pnv_pci_ioda2_set_window(&pe->table_group, 0, tbl); + if (iommu_init_table(tbl, pe->phb->hose->node, res_start, res_end)) + rc = pnv_pci_ioda2_set_window(&pe->table_group, 0, tbl); + else + rc = -ENOMEM; if (rc) { - pe_err(pe, "Failed to configure 32-bit TCE table, err %ld\n", - rc); + pe_err(pe, "Failed to configure 32-bit TCE table, err %ld\n", rc); iommu_tce_table_put(tbl); - return rc; + tbl = NULL; /* This clears iommu_table_base below */ } - if (!pnv_iommu_bypass_disabled) pnv_pci_ioda2_set_bypass(pe, true); diff --git a/arch/powerpc/platforms/powernv/setup.c b/arch/powerpc/platforms/powernv/setup.c index aadf932c4e61..a8db3f153063 100644 --- a/arch/powerpc/platforms/powernv/setup.c +++ b/arch/powerpc/platforms/powernv/setup.c @@ -157,7 +157,7 @@ static void __init pnv_check_guarded_cores(void) for_each_node_by_type(dn, "cpu") { if (of_property_match_string(dn, "status", "bad") >= 0) bad_count++; - }; + } if (bad_count) { printk(" _ _______________\n"); diff --git a/arch/powerpc/platforms/pseries/dlpar.c b/arch/powerpc/platforms/pseries/dlpar.c index 233503fcf8f0..3ac70790ec7a 100644 --- a/arch/powerpc/platforms/pseries/dlpar.c +++ b/arch/powerpc/platforms/pseries/dlpar.c @@ -329,6 +329,20 @@ int dlpar_release_drc(u32 drc_index) return 0; } +int dlpar_unisolate_drc(u32 drc_index) +{ + int dr_status, rc; + + rc = rtas_call(rtas_token("get-sensor-state"), 2, 2, &dr_status, + DR_ENTITY_SENSE, drc_index); + if (rc || dr_status != DR_ENTITY_PRESENT) + return -1; + + rtas_set_indicator(ISOLATION_STATE, drc_index, UNISOLATE); + + return 0; +} + int handle_dlpar_errorlog(struct pseries_hp_errorlog *hp_elog) { int rc; diff --git a/arch/powerpc/platforms/pseries/hotplug-cpu.c b/arch/powerpc/platforms/pseries/hotplug-cpu.c index 12cbffd3c2e3..7e970f81d8ff 100644 --- a/arch/powerpc/platforms/pseries/hotplug-cpu.c +++ b/arch/powerpc/platforms/pseries/hotplug-cpu.c @@ -47,9 +47,6 @@ static void rtas_stop_self(void) BUG_ON(rtas_stop_self_token == RTAS_UNKNOWN_SERVICE); - printk("cpu %u (hwid %u) Ready to die...\n", - smp_processor_id(), hard_smp_processor_id()); - rtas_call_unlocked(&args, rtas_stop_self_token, 0, 1, NULL); panic("Alas, I survived.\n"); @@ -271,6 +268,19 @@ static int dlpar_offline_cpu(struct device_node *dn) if (!cpu_online(cpu)) break; + /* + * device_offline() will return -EBUSY (via cpu_down()) if there + * is only one CPU left. Check it here to fail earlier and with a + * more informative error message, while also retaining the + * cpu_add_remove_lock to be sure that no CPUs are being + * online/offlined during this check. + */ + if (num_online_cpus() == 1) { + pr_warn("Unable to remove last online CPU %pOFn\n", dn); + rc = -EBUSY; + goto out_unlock; + } + cpu_maps_update_done(); rc = device_offline(get_cpu_device(cpu)); if (rc) @@ -283,6 +293,7 @@ static int dlpar_offline_cpu(struct device_node *dn) thread); } } +out_unlock: cpu_maps_update_done(); out: @@ -802,8 +813,16 @@ int dlpar_cpu(struct pseries_hp_errorlog *hp_elog) case PSERIES_HP_ELOG_ACTION_REMOVE: if (hp_elog->id_type == PSERIES_HP_ELOG_ID_DRC_COUNT) rc = dlpar_cpu_remove_by_count(count); - else if (hp_elog->id_type == PSERIES_HP_ELOG_ID_DRC_INDEX) + else if (hp_elog->id_type == PSERIES_HP_ELOG_ID_DRC_INDEX) { rc = dlpar_cpu_remove_by_index(drc_index); + /* + * Setting the isolation state of an UNISOLATED/CONFIGURED + * device to UNISOLATE is a no-op, but the hypervisor can + * use it as a hint that the CPU removal failed. + */ + if (rc) + dlpar_unisolate_drc(drc_index); + } else rc = -EINVAL; break; diff --git a/arch/powerpc/platforms/pseries/hvCall_inst.c b/arch/powerpc/platforms/pseries/hvCall_inst.c index 2c59b4986ea5..3a50612a78db 100644 --- a/arch/powerpc/platforms/pseries/hvCall_inst.c +++ b/arch/powerpc/platforms/pseries/hvCall_inst.c @@ -26,7 +26,7 @@ struct hcall_stats { }; #define HCALL_STAT_ARRAY_SIZE ((MAX_HCALL_OPCODE >> 2) + 1) -DEFINE_PER_CPU(struct hcall_stats[HCALL_STAT_ARRAY_SIZE], hcall_stats); +static DEFINE_PER_CPU(struct hcall_stats[HCALL_STAT_ARRAY_SIZE], hcall_stats); /* * Routines for displaying the statistics in debugfs diff --git a/arch/powerpc/platforms/pseries/iommu.c b/arch/powerpc/platforms/pseries/iommu.c index 9fc5217f0c8e..0c55b991f665 100644 --- a/arch/powerpc/platforms/pseries/iommu.c +++ b/arch/powerpc/platforms/pseries/iommu.c @@ -638,7 +638,8 @@ static void pci_dma_bus_setup_pSeries(struct pci_bus *bus) iommu_table_setparms(pci->phb, dn, tbl); tbl->it_ops = &iommu_table_pseries_ops; - iommu_init_table(tbl, pci->phb->node, 0, 0); + if (!iommu_init_table(tbl, pci->phb->node, 0, 0)) + panic("Failed to initialize iommu table"); /* Divide the rest (1.75GB) among the children */ pci->phb->dma_window_size = 0x80000000ul; @@ -720,7 +721,8 @@ static void pci_dma_bus_setup_pSeriesLP(struct pci_bus *bus) iommu_table_setparms_lpar(ppci->phb, pdn, tbl, ppci->table_group, dma_window); tbl->it_ops = &iommu_table_lpar_multi_ops; - iommu_init_table(tbl, ppci->phb->node, 0, 0); + if (!iommu_init_table(tbl, ppci->phb->node, 0, 0)) + panic("Failed to initialize iommu table"); iommu_register_group(ppci->table_group, pci_domain_nr(bus), 0); pr_debug(" created table: %p\n", ppci->table_group); @@ -749,7 +751,9 @@ static void pci_dma_dev_setup_pSeries(struct pci_dev *dev) tbl = PCI_DN(dn)->table_group->tables[0]; iommu_table_setparms(phb, dn, tbl); tbl->it_ops = &iommu_table_pseries_ops; - iommu_init_table(tbl, phb->node, 0, 0); + if (!iommu_init_table(tbl, phb->node, 0, 0)) + panic("Failed to initialize iommu table"); + set_iommu_table_base(&dev->dev, tbl); return; } @@ -1099,6 +1103,33 @@ static void reset_dma_window(struct pci_dev *dev, struct device_node *par_dn) ret); } +/* Return largest page shift based on "IO Page Sizes" output of ibm,query-pe-dma-window. */ +static int iommu_get_page_shift(u32 query_page_size) +{ + /* Supported IO page-sizes according to LoPAR */ + const int shift[] = { + __builtin_ctzll(SZ_4K), __builtin_ctzll(SZ_64K), __builtin_ctzll(SZ_16M), + __builtin_ctzll(SZ_32M), __builtin_ctzll(SZ_64M), __builtin_ctzll(SZ_128M), + __builtin_ctzll(SZ_256M), __builtin_ctzll(SZ_16G) + }; + + int i = ARRAY_SIZE(shift) - 1; + + /* + * On LoPAR, ibm,query-pe-dma-window outputs "IO Page Sizes" using a bit field: + * - bit 31 means 4k pages are supported, + * - bit 30 means 64k pages are supported, and so on. + * Larger pagesizes map more memory with the same amount of TCEs, so start probing them. + */ + for (; i >= 0 ; i--) { + if (query_page_size & (1 << i)) + return shift[i]; + } + + /* No valid page size found. */ + return 0; +} + /* * If the PE supports dynamic dma windows, and there is space for a table * that can map all pages in a linear offset, then setup such a table, @@ -1206,13 +1237,9 @@ static u64 enable_ddw(struct pci_dev *dev, struct device_node *pdn) goto out_failed; } } - if (query.page_size & 4) { - page_shift = 24; /* 16MB */ - } else if (query.page_size & 2) { - page_shift = 16; /* 64kB */ - } else if (query.page_size & 1) { - page_shift = 12; /* 4kB */ - } else { + + page_shift = iommu_get_page_shift(query.page_size); + if (!page_shift) { dev_dbg(&dev->dev, "no supported direct page size in mask %x", query.page_size); goto out_failed; @@ -1229,7 +1256,7 @@ static u64 enable_ddw(struct pci_dev *dev, struct device_node *pdn) if (pmem_present) { if (query.largest_available_block >= (1ULL << (MAX_PHYSMEM_BITS - page_shift))) - len = MAX_PHYSMEM_BITS - page_shift; + len = MAX_PHYSMEM_BITS; else dev_info(&dev->dev, "Skipping ibm,pmemory"); } diff --git a/arch/powerpc/platforms/pseries/lpar.c b/arch/powerpc/platforms/pseries/lpar.c index 3805519a6469..1f3152ad7213 100644 --- a/arch/powerpc/platforms/pseries/lpar.c +++ b/arch/powerpc/platforms/pseries/lpar.c @@ -977,11 +977,13 @@ static void pSeries_lpar_hpte_updateboltedpp(unsigned long newpp, slot = pSeries_lpar_hpte_find(vpn, psize, ssize); BUG_ON(slot == -1); - flags = newpp & 7; + flags = newpp & (HPTE_R_PP | HPTE_R_N); if (mmu_has_feature(MMU_FTR_KERNEL_RO)) /* Move pp0 into bit 8 (IBM 55) */ flags |= (newpp & HPTE_R_PP0) >> 55; + flags |= ((newpp & HPTE_R_KEY_HI) >> 48) | (newpp & HPTE_R_KEY_LO); + lpar_rc = plpar_pte_protect(flags, slot, 0); BUG_ON(lpar_rc != H_SUCCESS); @@ -1630,7 +1632,7 @@ static int pseries_lpar_resize_hpt(unsigned long shift) } msleep(delay); rc = plpar_resize_hpt_prepare(0, shift); - }; + } switch (rc) { case H_SUCCESS: diff --git a/arch/powerpc/platforms/pseries/lparcfg.c b/arch/powerpc/platforms/pseries/lparcfg.c index e278390ab28d..f71eac74ea92 100644 --- a/arch/powerpc/platforms/pseries/lparcfg.c +++ b/arch/powerpc/platforms/pseries/lparcfg.c @@ -537,6 +537,8 @@ static int pseries_lparcfg_data(struct seq_file *m, void *v) parse_em_data(m); maxmem_data(m); + seq_printf(m, "security_flavor=%u\n", pseries_security_flavor); + return 0; } diff --git a/arch/powerpc/platforms/pseries/papr_scm.c b/arch/powerpc/platforms/pseries/papr_scm.c index 835163f54244..ef26fe40efb0 100644 --- a/arch/powerpc/platforms/pseries/papr_scm.c +++ b/arch/powerpc/platforms/pseries/papr_scm.c @@ -93,6 +93,7 @@ struct papr_scm_priv { uint64_t block_size; int metadata_size; bool is_volatile; + bool hcall_flush_required; uint64_t bound_addr; @@ -117,6 +118,38 @@ struct papr_scm_priv { size_t stat_buffer_len; }; +static int papr_scm_pmem_flush(struct nd_region *nd_region, + struct bio *bio __maybe_unused) +{ + struct papr_scm_priv *p = nd_region_provider_data(nd_region); + unsigned long ret_buf[PLPAR_HCALL_BUFSIZE], token = 0; + long rc; + + dev_dbg(&p->pdev->dev, "flush drc 0x%x", p->drc_index); + + do { + rc = plpar_hcall(H_SCM_FLUSH, ret_buf, p->drc_index, token); + token = ret_buf[0]; + + /* Check if we are stalled for some time */ + if (H_IS_LONG_BUSY(rc)) { + msleep(get_longbusy_msecs(rc)); + rc = H_BUSY; + } else if (rc == H_BUSY) { + cond_resched(); + } + } while (rc == H_BUSY); + + if (rc) { + dev_err(&p->pdev->dev, "flush error: %ld", rc); + rc = -EIO; + } else { + dev_dbg(&p->pdev->dev, "flush drc 0x%x complete", p->drc_index); + } + + return rc; +} + static LIST_HEAD(papr_nd_regions); static DEFINE_MUTEX(papr_ndr_lock); @@ -914,6 +947,15 @@ static int papr_scm_nvdimm_init(struct papr_scm_priv *p) dimm_flags = 0; set_bit(NDD_LABELING, &dimm_flags); + /* + * Check if the nvdimm is unarmed. No locking needed as we are still + * initializing. Ignore error encountered if any. + */ + __drc_pmem_query_health(p); + + if (p->health_bitmap & PAPR_PMEM_UNARMED_MASK) + set_bit(NDD_UNARMED, &dimm_flags); + p->nvdimm = nvdimm_create(p->bus, p, papr_nd_attr_groups, dimm_flags, PAPR_SCM_DIMM_CMD_MASK, 0, NULL); if (!p->nvdimm) { @@ -943,6 +985,11 @@ static int papr_scm_nvdimm_init(struct papr_scm_priv *p) ndr_desc.num_mappings = 1; ndr_desc.nd_set = &p->nd_set; + if (p->hcall_flush_required) { + set_bit(ND_REGION_ASYNC, &ndr_desc.flags); + ndr_desc.flush = papr_scm_pmem_flush; + } + if (p->is_volatile) p->region = nvdimm_volatile_region_create(p->bus, &ndr_desc); else { @@ -1088,6 +1135,7 @@ static int papr_scm_probe(struct platform_device *pdev) p->block_size = block_size; p->blocks = blocks; p->is_volatile = !of_property_read_bool(dn, "ibm,cache-flush-required"); + p->hcall_flush_required = of_property_read_bool(dn, "ibm,hcall-flush-required"); /* We just need to ensure that set cookies are unique across */ uuid_parse(uuid_str, (uuid_t *) uuid); diff --git a/arch/powerpc/platforms/pseries/pci_dlpar.c b/arch/powerpc/platforms/pseries/pci_dlpar.c index f9ae17e8a0f4..a8f9140a24fa 100644 --- a/arch/powerpc/platforms/pseries/pci_dlpar.c +++ b/arch/powerpc/platforms/pseries/pci_dlpar.c @@ -50,6 +50,7 @@ EXPORT_SYMBOL_GPL(init_phb_dynamic); int remove_phb_dynamic(struct pci_controller *phb) { struct pci_bus *b = phb->bus; + struct pci_host_bridge *host_bridge = to_pci_host_bridge(b->bridge); struct resource *res; int rc, i; @@ -76,7 +77,8 @@ int remove_phb_dynamic(struct pci_controller *phb) /* Remove the PCI bus and unregister the bridge device from sysfs */ phb->bus = NULL; pci_remove_bus(b); - device_unregister(b->bridge); + host_bridge->bus = NULL; + device_unregister(&host_bridge->dev); /* Now release the IO resource */ if (res->flags & IORESOURCE_IO) diff --git a/arch/powerpc/platforms/pseries/pmem.c b/arch/powerpc/platforms/pseries/pmem.c index e1dc5d3254df..439ac72c2470 100644 --- a/arch/powerpc/platforms/pseries/pmem.c +++ b/arch/powerpc/platforms/pseries/pmem.c @@ -139,7 +139,7 @@ int dlpar_hp_pmem(struct pseries_hp_errorlog *hp_elog) return rc; } -const struct of_device_id drc_pmem_match[] = { +static const struct of_device_id drc_pmem_match[] = { { .type = "ibm,persistent-memory", }, {} }; diff --git a/arch/powerpc/platforms/pseries/pseries.h b/arch/powerpc/platforms/pseries/pseries.h index 4fe48c04c6c2..1f051a786fb3 100644 --- a/arch/powerpc/platforms/pseries/pseries.h +++ b/arch/powerpc/platforms/pseries/pseries.h @@ -43,9 +43,6 @@ extern void pSeries_final_fixup(void); /* Poweron flag used for enabling auto ups restart */ extern unsigned long rtas_poweron_auto; -/* Provided by HVC VIO */ -extern void hvc_vio_init_early(void); - /* Dynamic logical Partitioning/Mobility */ extern void dlpar_free_cc_nodes(struct device_node *); extern void dlpar_free_cc_property(struct property *); @@ -55,6 +52,7 @@ extern int dlpar_attach_node(struct device_node *, struct device_node *); extern int dlpar_detach_node(struct device_node *); extern int dlpar_acquire_drc(u32 drc_index); extern int dlpar_release_drc(u32 drc_index); +extern int dlpar_unisolate_drc(u32 drc_index); void queue_hotplug_event(struct pseries_hp_errorlog *hp_errlog); int handle_dlpar_errorlog(struct pseries_hp_errorlog *hp_errlog); @@ -111,6 +109,7 @@ static inline unsigned long cmo_get_page_size(void) int dlpar_workqueue_init(void); +extern u32 pseries_security_flavor; void pseries_setup_security_mitigations(void); void pseries_lpar_read_hblkrm_characteristics(void); diff --git a/arch/powerpc/platforms/pseries/ras.c b/arch/powerpc/platforms/pseries/ras.c index f8b390a9d9fb..9d4ef65da7f3 100644 --- a/arch/powerpc/platforms/pseries/ras.c +++ b/arch/powerpc/platforms/pseries/ras.c @@ -699,7 +699,7 @@ static int mce_handle_err_virtmode(struct pt_regs *regs, mce_err.error_type = MCE_ERROR_TYPE_DCACHE; break; case MC_ERROR_TYPE_I_CACHE: - mce_err.error_type = MCE_ERROR_TYPE_DCACHE; + mce_err.error_type = MCE_ERROR_TYPE_ICACHE; break; case MC_ERROR_TYPE_UNKNOWN: default: diff --git a/arch/powerpc/platforms/pseries/rtas-fadump.c b/arch/powerpc/platforms/pseries/rtas-fadump.c index 81343908ed33..f8f73b47b107 100644 --- a/arch/powerpc/platforms/pseries/rtas-fadump.c +++ b/arch/powerpc/platforms/pseries/rtas-fadump.c @@ -247,7 +247,7 @@ static inline int rtas_fadump_gpr_index(u64 id) return i; } -void rtas_fadump_set_regval(struct pt_regs *regs, u64 reg_id, u64 reg_val) +static void rtas_fadump_set_regval(struct pt_regs *regs, u64 reg_id, u64 reg_val) { int i; diff --git a/arch/powerpc/platforms/pseries/setup.c b/arch/powerpc/platforms/pseries/setup.c index 46e1540abc22..754e493b7c05 100644 --- a/arch/powerpc/platforms/pseries/setup.c +++ b/arch/powerpc/platforms/pseries/setup.c @@ -71,6 +71,7 @@ #include <asm/swiotlb.h> #include <asm/svm.h> #include <asm/dtl.h> +#include <asm/hvconsole.h> #include "pseries.h" #include "../../../../drivers/pci/pci.h" @@ -85,6 +86,7 @@ EXPORT_SYMBOL(CMO_PageSize); int fwnmi_active; /* TRUE if an FWNMI handler is present */ int ibm_nmi_interlock_token; +u32 pseries_security_flavor; static void pSeries_show_cpuinfo(struct seq_file *m) { @@ -534,9 +536,15 @@ static void init_cpu_char_feature_flags(struct h_cpu_char_result *result) /* * The features below are enabled by default, so we instead look to see * if firmware has *disabled* them, and clear them if so. + * H_CPU_BEHAV_FAVOUR_SECURITY_H could be set only if + * H_CPU_BEHAV_FAVOUR_SECURITY is. */ if (!(result->behaviour & H_CPU_BEHAV_FAVOUR_SECURITY)) security_ftr_clear(SEC_FTR_FAVOUR_SECURITY); + else if (result->behaviour & H_CPU_BEHAV_FAVOUR_SECURITY_H) + pseries_security_flavor = 1; + else + pseries_security_flavor = 2; if (!(result->behaviour & H_CPU_BEHAV_L1D_FLUSH_PR)) security_ftr_clear(SEC_FTR_L1D_FLUSH_PR); diff --git a/arch/powerpc/platforms/pseries/vio.c b/arch/powerpc/platforms/pseries/vio.c index 9cb4fc839fd5..e00f3725ec96 100644 --- a/arch/powerpc/platforms/pseries/vio.c +++ b/arch/powerpc/platforms/pseries/vio.c @@ -22,6 +22,7 @@ #include <linux/mm.h> #include <linux/dma-map-ops.h> #include <linux/kobject.h> +#include <linux/kexec.h> #include <asm/iommu.h> #include <asm/dma.h> @@ -1278,6 +1279,20 @@ static int vio_bus_remove(struct device *dev) return 0; } +static void vio_bus_shutdown(struct device *dev) +{ + struct vio_dev *viodev = to_vio_dev(dev); + struct vio_driver *viodrv; + + if (dev->driver) { + viodrv = to_vio_driver(dev->driver); + if (viodrv->shutdown) + viodrv->shutdown(viodev); + else if (kexec_in_progress) + vio_bus_remove(dev); + } +} + /** * vio_register_driver: - Register a new vio driver * @viodrv: The vio_driver structure to be registered. @@ -1285,6 +1300,10 @@ static int vio_bus_remove(struct device *dev) int __vio_register_driver(struct vio_driver *viodrv, struct module *owner, const char *mod_name) { + // vio_bus_type is only initialised for pseries + if (!machine_is(pseries)) + return -ENODEV; + pr_debug("%s: driver %s registering\n", __func__, viodrv->name); /* fill in 'struct driver' fields */ @@ -1613,6 +1632,7 @@ struct bus_type vio_bus_type = { .match = vio_bus_match, .probe = vio_bus_probe, .remove = vio_bus_remove, + .shutdown = vio_bus_shutdown, }; /** diff --git a/arch/powerpc/purgatory/trampoline_64.S b/arch/powerpc/purgatory/trampoline_64.S index d956b8a35fd1..b35837c13852 100644 --- a/arch/powerpc/purgatory/trampoline_64.S +++ b/arch/powerpc/purgatory/trampoline_64.S @@ -12,7 +12,6 @@ #include <asm/asm-compat.h> #include <asm/crashdump-ppc64.h> - .machine ppc64 .balign 256 .globl purgatory_start purgatory_start: diff --git a/arch/powerpc/sysdev/dart_iommu.c b/arch/powerpc/sysdev/dart_iommu.c index 6b4a34b36d98..1d33b7a5ea83 100644 --- a/arch/powerpc/sysdev/dart_iommu.c +++ b/arch/powerpc/sysdev/dart_iommu.c @@ -344,7 +344,8 @@ static void iommu_table_dart_setup(void) iommu_table_dart.it_index = 0; iommu_table_dart.it_blocksize = 1; iommu_table_dart.it_ops = &iommu_dart_ops; - iommu_init_table(&iommu_table_dart, -1, 0, 0); + if (!iommu_init_table(&iommu_table_dart, -1, 0, 0)) + panic("Failed to initialize iommu table"); /* Reserve the last page of the DART to avoid possible prefetch * past the DART mapped area diff --git a/arch/powerpc/sysdev/fsl_pci.c b/arch/powerpc/sysdev/fsl_pci.c index 040b9d01c079..69af73765783 100644 --- a/arch/powerpc/sysdev/fsl_pci.c +++ b/arch/powerpc/sysdev/fsl_pci.c @@ -455,7 +455,7 @@ static void setup_pci_atmu(struct pci_controller *hose) } } -static void __init setup_pci_cmd(struct pci_controller *hose) +static void setup_pci_cmd(struct pci_controller *hose) { u16 cmd; int cap_x; diff --git a/arch/powerpc/sysdev/xive/common.c b/arch/powerpc/sysdev/xive/common.c index 595310e056f4..50469700dec6 100644 --- a/arch/powerpc/sysdev/xive/common.c +++ b/arch/powerpc/sysdev/xive/common.c @@ -63,8 +63,19 @@ static const struct xive_ops *xive_ops; static struct irq_domain *xive_irq_domain; #ifdef CONFIG_SMP -/* The IPIs all use the same logical irq number */ -static u32 xive_ipi_irq; +/* The IPIs use the same logical irq number when on the same chip */ +static struct xive_ipi_desc { + unsigned int irq; + char name[16]; +} *xive_ipis; + +/* + * Use early_cpu_to_node() for hot-plugged CPUs + */ +static unsigned int xive_ipi_cpu_to_irq(unsigned int cpu) +{ + return xive_ipis[early_cpu_to_node(cpu)].irq; +} #endif /* Xive state for each CPU */ @@ -253,17 +264,20 @@ notrace void xmon_xive_do_dump(int cpu) xmon_printf("\n"); } +static struct irq_data *xive_get_irq_data(u32 hw_irq) +{ + unsigned int irq = irq_find_mapping(xive_irq_domain, hw_irq); + + return irq ? irq_get_irq_data(irq) : NULL; +} + int xmon_xive_get_irq_config(u32 hw_irq, struct irq_data *d) { - struct irq_chip *chip = irq_data_get_irq_chip(d); int rc; u32 target; u8 prio; u32 lirq; - if (!is_xive_irq(chip)) - return -EINVAL; - rc = xive_ops->get_irq_config(hw_irq, &target, &prio, &lirq); if (rc) { xmon_printf("IRQ 0x%08x : no config rc=%d\n", hw_irq, rc); @@ -273,6 +287,9 @@ int xmon_xive_get_irq_config(u32 hw_irq, struct irq_data *d) xmon_printf("IRQ 0x%08x : target=0x%x prio=%02x lirq=0x%x ", hw_irq, target, prio, lirq); + if (!d) + d = xive_get_irq_data(hw_irq); + if (d) { struct xive_irq_data *xd = irq_data_get_irq_handler_data(d); u64 val = xive_esb_read(xd, XIVE_ESB_GET); @@ -289,6 +306,20 @@ int xmon_xive_get_irq_config(u32 hw_irq, struct irq_data *d) return 0; } +void xmon_xive_get_irq_all(void) +{ + unsigned int i; + struct irq_desc *desc; + + for_each_irq_desc(i, desc) { + struct irq_data *d = irq_desc_get_irq_data(desc); + unsigned int hwirq = (unsigned int)irqd_to_hwirq(d); + + if (d->domain == xive_irq_domain) + xmon_xive_get_irq_config(hwirq, d); + } +} + #endif /* CONFIG_XMON */ static unsigned int xive_get_irq(void) @@ -1067,28 +1098,94 @@ static struct irq_chip xive_ipi_chip = { .irq_unmask = xive_ipi_do_nothing, }; -static void __init xive_request_ipi(void) +/* + * IPIs are marked per-cpu. We use separate HW interrupts under the + * hood but associated with the same "linux" interrupt + */ +struct xive_ipi_alloc_info { + irq_hw_number_t hwirq; +}; + +static int xive_ipi_irq_domain_alloc(struct irq_domain *domain, unsigned int virq, + unsigned int nr_irqs, void *arg) { - unsigned int virq; + struct xive_ipi_alloc_info *info = arg; + int i; - /* - * Initialization failed, move on, we might manage to - * reach the point where we display our errors before - * the system falls appart - */ - if (!xive_irq_domain) - return; + for (i = 0; i < nr_irqs; i++) { + irq_domain_set_info(domain, virq + i, info->hwirq + i, &xive_ipi_chip, + domain->host_data, handle_percpu_irq, + NULL, NULL); + } + return 0; +} - /* Initialize it */ - virq = irq_create_mapping(xive_irq_domain, XIVE_IPI_HW_IRQ); - xive_ipi_irq = virq; +static const struct irq_domain_ops xive_ipi_irq_domain_ops = { + .alloc = xive_ipi_irq_domain_alloc, +}; - WARN_ON(request_irq(virq, xive_muxed_ipi_action, - IRQF_PERCPU | IRQF_NO_THREAD, "IPI", NULL)); +static int __init xive_request_ipi(void) +{ + struct fwnode_handle *fwnode; + struct irq_domain *ipi_domain; + unsigned int node; + int ret = -ENOMEM; + + fwnode = irq_domain_alloc_named_fwnode("XIVE-IPI"); + if (!fwnode) + goto out; + + ipi_domain = irq_domain_create_linear(fwnode, nr_node_ids, + &xive_ipi_irq_domain_ops, NULL); + if (!ipi_domain) + goto out_free_fwnode; + + xive_ipis = kcalloc(nr_node_ids, sizeof(*xive_ipis), GFP_KERNEL | __GFP_NOFAIL); + if (!xive_ipis) + goto out_free_domain; + + for_each_node(node) { + struct xive_ipi_desc *xid = &xive_ipis[node]; + struct xive_ipi_alloc_info info = { node }; + + /* Skip nodes without CPUs */ + if (cpumask_empty(cpumask_of_node(node))) + continue; + + /* + * Map one IPI interrupt per node for all cpus of that node. + * Since the HW interrupt number doesn't have any meaning, + * simply use the node number. + */ + xid->irq = irq_domain_alloc_irqs(ipi_domain, 1, node, &info); + if (xid->irq < 0) { + ret = xid->irq; + goto out_free_xive_ipis; + } + + snprintf(xid->name, sizeof(xid->name), "IPI-%d", node); + + ret = request_irq(xid->irq, xive_muxed_ipi_action, + IRQF_PERCPU | IRQF_NO_THREAD, xid->name, NULL); + + WARN(ret < 0, "Failed to request IPI %d: %d\n", xid->irq, ret); + } + + return ret; + +out_free_xive_ipis: + kfree(xive_ipis); +out_free_domain: + irq_domain_remove(ipi_domain); +out_free_fwnode: + irq_domain_free_fwnode(fwnode); +out: + return ret; } static int xive_setup_cpu_ipi(unsigned int cpu) { + unsigned int xive_ipi_irq = xive_ipi_cpu_to_irq(cpu); struct xive_cpu *xc; int rc; @@ -1131,6 +1228,8 @@ static int xive_setup_cpu_ipi(unsigned int cpu) static void xive_cleanup_cpu_ipi(unsigned int cpu, struct xive_cpu *xc) { + unsigned int xive_ipi_irq = xive_ipi_cpu_to_irq(cpu); + /* Disable the IPI and free the IRQ data */ /* Already cleaned up ? */ @@ -1178,19 +1277,6 @@ static int xive_irq_domain_map(struct irq_domain *h, unsigned int virq, */ irq_clear_status_flags(virq, IRQ_LEVEL); -#ifdef CONFIG_SMP - /* IPIs are special and come up with HW number 0 */ - if (hw == XIVE_IPI_HW_IRQ) { - /* - * IPIs are marked per-cpu. We use separate HW interrupts under - * the hood but associated with the same "linux" interrupt - */ - irq_set_chip_and_handler(virq, &xive_ipi_chip, - handle_percpu_irq); - return 0; - } -#endif - rc = xive_irq_alloc_data(virq, hw); if (rc) return rc; @@ -1202,15 +1288,7 @@ static int xive_irq_domain_map(struct irq_domain *h, unsigned int virq, static void xive_irq_domain_unmap(struct irq_domain *d, unsigned int virq) { - struct irq_data *data = irq_get_irq_data(virq); - unsigned int hw_irq; - - /* XXX Assign BAD number */ - if (!data) - return; - hw_irq = (unsigned int)irqd_to_hwirq(data); - if (hw_irq != XIVE_IPI_HW_IRQ) - xive_irq_free_data(virq); + xive_irq_free_data(virq); } static int xive_irq_domain_xlate(struct irq_domain *h, struct device_node *ct, @@ -1335,17 +1413,14 @@ static int xive_prepare_cpu(unsigned int cpu) xc = per_cpu(xive_cpu, cpu); if (!xc) { - struct device_node *np; - xc = kzalloc_node(sizeof(struct xive_cpu), GFP_KERNEL, cpu_to_node(cpu)); if (!xc) return -ENOMEM; - np = of_get_cpu_node(cpu, NULL); - if (np) - xc->chip_id = of_get_ibm_chip_id(np); - of_node_put(np); xc->hw_ipi = XIVE_BAD_IRQ; + xc->chip_id = XIVE_INVALID_CHIP_ID; + if (xive_ops->prepare_cpu) + xive_ops->prepare_cpu(cpu, xc); per_cpu(xive_cpu, cpu) = xc; } @@ -1408,13 +1483,12 @@ static void xive_flush_cpu_queue(unsigned int cpu, struct xive_cpu *xc) struct irq_desc *desc = irq_to_desc(irq); struct irq_data *d = irq_desc_get_irq_data(desc); struct xive_irq_data *xd; - unsigned int hw_irq = (unsigned int)irqd_to_hwirq(d); /* * Ignore anything that isn't a XIVE irq and ignore * IPIs, so can just be dropped. */ - if (d->domain != xive_irq_domain || hw_irq == XIVE_IPI_HW_IRQ) + if (d->domain != xive_irq_domain) continue; /* @@ -1592,16 +1666,15 @@ static void xive_debug_show_cpu(struct seq_file *m, int cpu) seq_puts(m, "\n"); } -static void xive_debug_show_irq(struct seq_file *m, u32 hw_irq, struct irq_data *d) +static void xive_debug_show_irq(struct seq_file *m, struct irq_data *d) { - struct irq_chip *chip = irq_data_get_irq_chip(d); + unsigned int hw_irq = (unsigned int)irqd_to_hwirq(d); int rc; u32 target; u8 prio; u32 lirq; - - if (!is_xive_irq(chip)) - return; + struct xive_irq_data *xd; + u64 val; rc = xive_ops->get_irq_config(hw_irq, &target, &prio, &lirq); if (rc) { @@ -1612,17 +1685,14 @@ static void xive_debug_show_irq(struct seq_file *m, u32 hw_irq, struct irq_data seq_printf(m, "IRQ 0x%08x : target=0x%x prio=%02x lirq=0x%x ", hw_irq, target, prio, lirq); - if (d) { - struct xive_irq_data *xd = irq_data_get_irq_handler_data(d); - u64 val = xive_esb_read(xd, XIVE_ESB_GET); - - seq_printf(m, "flags=%c%c%c PQ=%c%c", - xd->flags & XIVE_IRQ_FLAG_STORE_EOI ? 'S' : ' ', - xd->flags & XIVE_IRQ_FLAG_LSI ? 'L' : ' ', - xd->flags & XIVE_IRQ_FLAG_H_INT_ESB ? 'H' : ' ', - val & XIVE_ESB_VAL_P ? 'P' : '-', - val & XIVE_ESB_VAL_Q ? 'Q' : '-'); - } + xd = irq_data_get_irq_handler_data(d); + val = xive_esb_read(xd, XIVE_ESB_GET); + seq_printf(m, "flags=%c%c%c PQ=%c%c", + xd->flags & XIVE_IRQ_FLAG_STORE_EOI ? 'S' : ' ', + xd->flags & XIVE_IRQ_FLAG_LSI ? 'L' : ' ', + xd->flags & XIVE_IRQ_FLAG_H_INT_ESB ? 'H' : ' ', + val & XIVE_ESB_VAL_P ? 'P' : '-', + val & XIVE_ESB_VAL_Q ? 'Q' : '-'); seq_puts(m, "\n"); } @@ -1640,16 +1710,9 @@ static int xive_core_debug_show(struct seq_file *m, void *private) for_each_irq_desc(i, desc) { struct irq_data *d = irq_desc_get_irq_data(desc); - unsigned int hw_irq; - - if (!d) - continue; - - hw_irq = (unsigned int)irqd_to_hwirq(d); - /* IPIs are special (HW number 0) */ - if (hw_irq != XIVE_IPI_HW_IRQ) - xive_debug_show_irq(m, hw_irq, d); + if (d->domain == xive_irq_domain) + xive_debug_show_irq(m, d); } return 0; } diff --git a/arch/powerpc/sysdev/xive/native.c b/arch/powerpc/sysdev/xive/native.c index 05a800a3104e..57e3f1540435 100644 --- a/arch/powerpc/sysdev/xive/native.c +++ b/arch/powerpc/sysdev/xive/native.c @@ -380,6 +380,11 @@ static void xive_native_update_pending(struct xive_cpu *xc) } } +static void xive_native_prepare_cpu(unsigned int cpu, struct xive_cpu *xc) +{ + xc->chip_id = cpu_to_chip_id(cpu); +} + static void xive_native_setup_cpu(unsigned int cpu, struct xive_cpu *xc) { s64 rc; @@ -462,6 +467,7 @@ static const struct xive_ops xive_native_ops = { .match = xive_native_match, .shutdown = xive_native_shutdown, .update_pending = xive_native_update_pending, + .prepare_cpu = xive_native_prepare_cpu, .setup_cpu = xive_native_setup_cpu, .teardown_cpu = xive_native_teardown_cpu, .sync_source = xive_native_sync_source, diff --git a/arch/powerpc/sysdev/xive/spapr.c b/arch/powerpc/sysdev/xive/spapr.c index 01ccc0786ada..f143b6f111ac 100644 --- a/arch/powerpc/sysdev/xive/spapr.c +++ b/arch/powerpc/sysdev/xive/spapr.c @@ -549,7 +549,7 @@ static void xive_spapr_cleanup_queue(unsigned int cpu, struct xive_cpu *xc, static bool xive_spapr_match(struct device_node *node) { /* Ignore cascaded controllers for the moment */ - return 1; + return true; } #ifdef CONFIG_SMP diff --git a/arch/powerpc/sysdev/xive/xive-internal.h b/arch/powerpc/sysdev/xive/xive-internal.h index 9cf57c722faa..504e7edce358 100644 --- a/arch/powerpc/sysdev/xive/xive-internal.h +++ b/arch/powerpc/sysdev/xive/xive-internal.h @@ -5,8 +5,6 @@ #ifndef __XIVE_INTERNAL_H #define __XIVE_INTERNAL_H -#define XIVE_IPI_HW_IRQ 0 /* interrupt source # for IPIs */ - /* * A "disabled" interrupt should never fire, to catch problems * we set its logical number to this @@ -46,6 +44,7 @@ struct xive_ops { u32 *sw_irq); int (*setup_queue)(unsigned int cpu, struct xive_cpu *xc, u8 prio); void (*cleanup_queue)(unsigned int cpu, struct xive_cpu *xc, u8 prio); + void (*prepare_cpu)(unsigned int cpu, struct xive_cpu *xc); void (*setup_cpu)(unsigned int cpu, struct xive_cpu *xc); void (*teardown_cpu)(unsigned int cpu, struct xive_cpu *xc); bool (*match)(struct device_node *np); diff --git a/arch/powerpc/xmon/xmon.c b/arch/powerpc/xmon/xmon.c index bf7d69625a2e..c8173e92f19d 100644 --- a/arch/powerpc/xmon/xmon.c +++ b/arch/powerpc/xmon/xmon.c @@ -54,6 +54,7 @@ #include <asm/code-patching.h> #include <asm/sections.h> #include <asm/inst.h> +#include <asm/interrupt.h> #ifdef CONFIG_PPC64 #include <asm/hvcall.h> @@ -605,7 +606,7 @@ static int xmon_core(struct pt_regs *regs, int fromipi) * debugger break (IPI). This is similar to * crash_kexec_secondary(). */ - if (TRAP(regs) != 0x100 || !wait_for_other_cpus(ncpus)) + if (TRAP(regs) != INTERRUPT_SYSTEM_RESET || !wait_for_other_cpus(ncpus)) smp_send_debugger_break(); wait_for_other_cpus(ncpus); @@ -615,7 +616,7 @@ static int xmon_core(struct pt_regs *regs, int fromipi) if (!locked_down) { /* for breakpoint or single step, print curr insn */ - if (bp || TRAP(regs) == 0xd00) + if (bp || TRAP(regs) == INTERRUPT_TRACE) ppc_inst_dump(regs->nip, 1, 0); printf("enter ? for help\n"); } @@ -684,7 +685,7 @@ static int xmon_core(struct pt_regs *regs, int fromipi) disable_surveillance(); if (!locked_down) { /* for breakpoint or single step, print current insn */ - if (bp || TRAP(regs) == 0xd00) + if (bp || TRAP(regs) == INTERRUPT_TRACE) ppc_inst_dump(regs->nip, 1, 0); printf("enter ? for help\n"); } @@ -1769,9 +1770,12 @@ static void excprint(struct pt_regs *fp) printf(" sp: %lx\n", fp->gpr[1]); printf(" msr: %lx\n", fp->msr); - if (trap == 0x300 || trap == 0x380 || trap == 0x600 || trap == 0x200) { + if (trap == INTERRUPT_DATA_STORAGE || + trap == INTERRUPT_DATA_SEGMENT || + trap == INTERRUPT_ALIGNMENT || + trap == INTERRUPT_MACHINE_CHECK) { printf(" dar: %lx\n", fp->dar); - if (trap != 0x380) + if (trap != INTERRUPT_DATA_SEGMENT) printf(" dsisr: %lx\n", fp->dsisr); } @@ -1785,7 +1789,7 @@ static void excprint(struct pt_regs *fp) current->pid, current->comm); } - if (trap == 0x700) + if (trap == INTERRUPT_PROGRAM) print_bug_trap(fp); printf(linux_banner); @@ -1815,25 +1819,16 @@ static void prregs(struct pt_regs *fp) } #ifdef CONFIG_PPC64 - if (FULL_REGS(fp)) { - for (n = 0; n < 16; ++n) - printf("R%.2d = "REG" R%.2d = "REG"\n", - n, fp->gpr[n], n+16, fp->gpr[n+16]); - } else { - for (n = 0; n < 7; ++n) - printf("R%.2d = "REG" R%.2d = "REG"\n", - n, fp->gpr[n], n+7, fp->gpr[n+7]); - } +#define R_PER_LINE 2 #else +#define R_PER_LINE 4 +#endif + for (n = 0; n < 32; ++n) { - printf("R%.2d = %.8lx%s", n, fp->gpr[n], - (n & 3) == 3? "\n": " "); - if (n == 12 && !FULL_REGS(fp)) { - printf("\n"); - break; - } + printf("R%.2d = "REG"%s", n, fp->gpr[n], + (n % R_PER_LINE) == R_PER_LINE - 1 ? "\n" : " "); } -#endif + printf("pc = "); xmon_print_symbol(fp->nip, " ", "\n"); if (!trap_is_syscall(fp) && cpu_has_feature(CPU_FTR_CFAR)) { @@ -1846,7 +1841,9 @@ static void prregs(struct pt_regs *fp) printf("ctr = "REG" xer = "REG" trap = %4lx\n", fp->ctr, fp->xer, fp->trap); trap = TRAP(fp); - if (trap == 0x300 || trap == 0x380 || trap == 0x600) + if (trap == INTERRUPT_DATA_STORAGE || + trap == INTERRUPT_DATA_SEGMENT || + trap == INTERRUPT_ALIGNMENT) printf("dar = "REG" dsisr = %.8lx\n", fp->dar, fp->dsisr); } @@ -2727,30 +2724,6 @@ static void dump_all_xives(void) dump_one_xive(cpu); } -static void dump_one_xive_irq(u32 num, struct irq_data *d) -{ - xmon_xive_get_irq_config(num, d); -} - -static void dump_all_xive_irq(void) -{ - unsigned int i; - struct irq_desc *desc; - - for_each_irq_desc(i, desc) { - struct irq_data *d = irq_desc_get_irq_data(desc); - unsigned int hwirq; - - if (!d) - continue; - - hwirq = (unsigned int)irqd_to_hwirq(d); - /* IPIs are special (HW number 0) */ - if (hwirq) - dump_one_xive_irq(hwirq, d); - } -} - static void dump_xives(void) { unsigned long num; @@ -2767,9 +2740,9 @@ static void dump_xives(void) return; } else if (c == 'i') { if (scanhex(&num)) - dump_one_xive_irq(num, NULL); + xmon_xive_get_irq_config(num, NULL); else - dump_all_xive_irq(); + xmon_xive_get_irq_all(); return; } @@ -2980,7 +2953,7 @@ generic_inst_dump(unsigned long adr, long count, int praddr, if (!ppc_inst_prefixed(inst)) dump_func(ppc_inst_val(inst), adr); else - dump_func(ppc_inst_as_u64(inst), adr); + dump_func(ppc_inst_as_ulong(inst), adr); printf("\n"); } return adr - first_adr; @@ -4212,8 +4185,7 @@ static void dump_spu_fields(struct spu *spu) DUMP_FIELD(spu, "0x%p", pdata); } -int -spu_inst_dump(unsigned long adr, long count, int praddr) +static int spu_inst_dump(unsigned long adr, long count, int praddr) { return generic_inst_dump(adr, count, praddr, print_insn_spu); } diff --git a/arch/s390/include/asm/vdso/gettimeofday.h b/arch/s390/include/asm/vdso/gettimeofday.h index ed89ef742530..383c53c3dddd 100644 --- a/arch/s390/include/asm/vdso/gettimeofday.h +++ b/arch/s390/include/asm/vdso/gettimeofday.h @@ -68,7 +68,8 @@ long clock_getres_fallback(clockid_t clkid, struct __kernel_timespec *ts) } #ifdef CONFIG_TIME_NS -static __always_inline const struct vdso_data *__arch_get_timens_vdso_data(void) +static __always_inline +const struct vdso_data *__arch_get_timens_vdso_data(const struct vdso_data *vd) { return _timens_data; } diff --git a/arch/x86/include/asm/vdso/gettimeofday.h b/arch/x86/include/asm/vdso/gettimeofday.h index df01d7349d79..1936f21ed8cd 100644 --- a/arch/x86/include/asm/vdso/gettimeofday.h +++ b/arch/x86/include/asm/vdso/gettimeofday.h @@ -58,7 +58,8 @@ extern struct ms_hyperv_tsc_page hvclock_page #endif #ifdef CONFIG_TIME_NS -static __always_inline const struct vdso_data *__arch_get_timens_vdso_data(void) +static __always_inline +const struct vdso_data *__arch_get_timens_vdso_data(const struct vdso_data *vd) { return __timens_vdso_data; } diff --git a/drivers/i2c/busses/Kconfig b/drivers/i2c/busses/Kconfig index 3eec59f1fed3..543a05ed0c82 100644 --- a/drivers/i2c/busses/Kconfig +++ b/drivers/i2c/busses/Kconfig @@ -776,7 +776,7 @@ config I2C_MT7621 config I2C_MV64XXX tristate "Marvell mv64xxx I2C Controller" - depends on MV64X60 || PLAT_ORION || ARCH_SUNXI || ARCH_MVEBU || COMPILE_TEST + depends on PLAT_ORION || ARCH_SUNXI || ARCH_MVEBU || COMPILE_TEST help If you say yes to this option, support will be included for the built-in I2C interface on the Marvell 64xxx line of host bridges. diff --git a/drivers/macintosh/via-pmu.c b/drivers/macintosh/via-pmu.c index 73e6ae88fafd..4bdd4c45e7a7 100644 --- a/drivers/macintosh/via-pmu.c +++ b/drivers/macintosh/via-pmu.c @@ -180,14 +180,13 @@ static struct proc_dir_entry *proc_pmu_options; static int option_server_mode; int pmu_battery_count; -int pmu_cur_battery; +static int pmu_cur_battery; unsigned int pmu_power_flags = PMU_PWR_AC_PRESENT; struct pmu_battery_info pmu_batteries[PMU_MAX_BATTERIES]; static int query_batt_timer = BATTERY_POLLING_COUNT; static struct adb_request batt_req; static struct proc_dir_entry *proc_pmu_batt[PMU_MAX_BATTERIES]; -int __fake_sleep; int asleep; #ifdef CONFIG_ADB @@ -1833,6 +1832,7 @@ pmu_present(void) */ static u32 save_via[8]; +static int __fake_sleep; static void save_via_state(void) diff --git a/drivers/macintosh/windfarm_core.c b/drivers/macintosh/windfarm_core.c index 77612303841e..07f91ec1f960 100644 --- a/drivers/macintosh/windfarm_core.c +++ b/drivers/macintosh/windfarm_core.c @@ -56,7 +56,7 @@ static BLOCKING_NOTIFIER_HEAD(wf_client_list); static int wf_client_count; static unsigned int wf_overtemp; static unsigned int wf_overtemp_counter; -struct task_struct *wf_thread; +static struct task_struct *wf_thread; static struct platform_device wf_platform_device = { .name = "windfarm", diff --git a/drivers/macintosh/windfarm_pm121.c b/drivers/macintosh/windfarm_pm121.c index ab467b9c31be..ba1ec6fc11d2 100644 --- a/drivers/macintosh/windfarm_pm121.c +++ b/drivers/macintosh/windfarm_pm121.c @@ -433,7 +433,7 @@ struct pm121_sys_state { struct wf_pid_state pid; }; -struct pm121_sys_state *pm121_sys_state[N_LOOPS] = {}; +static struct pm121_sys_state *pm121_sys_state[N_LOOPS] = {}; /* * ****** CPU Fans Control Loop ****** diff --git a/drivers/macintosh/windfarm_smu_controls.c b/drivers/macintosh/windfarm_smu_controls.c index 79cb1ad09bfd..75966052819a 100644 --- a/drivers/macintosh/windfarm_smu_controls.c +++ b/drivers/macintosh/windfarm_smu_controls.c @@ -94,7 +94,7 @@ static int smu_set_fan(int pwm, u8 id, u16 value) return rc; wait_for_completion(&comp); - /* Handle fallback (see coment above) */ + /* Handle fallback (see comment above) */ if (cmd.status != 0 && smu_supports_new_fans_ops) { printk(KERN_WARNING "windfarm: SMU failed new fan command " "falling back to old method\n"); diff --git a/include/linux/compat.h b/include/linux/compat.h index f0d2dd35d408..acac0b571df1 100644 --- a/include/linux/compat.h +++ b/include/linux/compat.h @@ -467,6 +467,34 @@ put_compat_sigset(compat_sigset_t __user *compat, const sigset_t *set, unsafe_put_user(__s->sig[0], &__c->sig[0], label); \ } \ } while (0) + +#define unsafe_get_compat_sigset(set, compat, label) do { \ + const compat_sigset_t __user *__c = compat; \ + compat_sigset_word hi, lo; \ + sigset_t *__s = set; \ + \ + switch (_NSIG_WORDS) { \ + case 4: \ + unsafe_get_user(lo, &__c->sig[7], label); \ + unsafe_get_user(hi, &__c->sig[6], label); \ + __s->sig[3] = hi | (((long)lo) << 32); \ + fallthrough; \ + case 3: \ + unsafe_get_user(lo, &__c->sig[5], label); \ + unsafe_get_user(hi, &__c->sig[4], label); \ + __s->sig[2] = hi | (((long)lo) << 32); \ + fallthrough; \ + case 2: \ + unsafe_get_user(lo, &__c->sig[3], label); \ + unsafe_get_user(hi, &__c->sig[2], label); \ + __s->sig[1] = hi | (((long)lo) << 32); \ + fallthrough; \ + case 1: \ + unsafe_get_user(lo, &__c->sig[1], label); \ + unsafe_get_user(hi, &__c->sig[0], label); \ + __s->sig[0] = hi | (((long)lo) << 32); \ + } \ +} while (0) #else #define unsafe_put_compat_sigset(compat, set, label) do { \ compat_sigset_t __user *__c = compat; \ @@ -474,6 +502,13 @@ put_compat_sigset(compat_sigset_t __user *compat, const sigset_t *set, \ unsafe_copy_to_user(__c, __s, sizeof(*__c), label); \ } while (0) + +#define unsafe_get_compat_sigset(set, compat, label) do { \ + const compat_sigset_t __user *__c = compat; \ + sigset_t *__s = set; \ + \ + unsafe_copy_from_user(__s, __c, sizeof(*__c), label); \ +} while (0) #endif extern int compat_ptrace_request(struct task_struct *child, diff --git a/include/linux/uaccess.h b/include/linux/uaccess.h index c7c6e8b8344d..c05e903cef02 100644 --- a/include/linux/uaccess.h +++ b/include/linux/uaccess.h @@ -397,6 +397,7 @@ long strnlen_user_nofault(const void __user *unsafe_addr, long count); #define unsafe_get_user(x,p,e) unsafe_op_wrap(__get_user(x,p),e) #define unsafe_put_user(x,p,e) unsafe_op_wrap(__put_user(x,p),e) #define unsafe_copy_to_user(d,s,l,e) unsafe_op_wrap(__copy_to_user(d,s,l),e) +#define unsafe_copy_from_user(d,s,l,e) unsafe_op_wrap(__copy_from_user(d,s,l),e) static inline unsigned long user_access_save(void) { return 0UL; } static inline void user_access_restore(unsigned long flags) { } #endif diff --git a/lib/vdso/gettimeofday.c b/lib/vdso/gettimeofday.c index 2919f1698140..ce2f69552003 100644 --- a/lib/vdso/gettimeofday.c +++ b/lib/vdso/gettimeofday.c @@ -46,16 +46,18 @@ static inline bool vdso_cycles_ok(u64 cycles) #endif #ifdef CONFIG_TIME_NS -static int do_hres_timens(const struct vdso_data *vdns, clockid_t clk, - struct __kernel_timespec *ts) +static __always_inline int do_hres_timens(const struct vdso_data *vdns, clockid_t clk, + struct __kernel_timespec *ts) { - const struct vdso_data *vd = __arch_get_timens_vdso_data(); + const struct vdso_data *vd; const struct timens_offset *offs = &vdns->offset[clk]; const struct vdso_timestamp *vdso_ts; u64 cycles, last, ns; u32 seq; s64 sec; + vd = vdns - (clk == CLOCK_MONOTONIC_RAW ? CS_RAW : CS_HRES_COARSE); + vd = __arch_get_timens_vdso_data(vd); if (clk != CLOCK_MONOTONIC_RAW) vd = &vd[CS_HRES_COARSE]; else @@ -92,13 +94,14 @@ static int do_hres_timens(const struct vdso_data *vdns, clockid_t clk, return 0; } #else -static __always_inline const struct vdso_data *__arch_get_timens_vdso_data(void) +static __always_inline +const struct vdso_data *__arch_get_timens_vdso_data(const struct vdso_data *vd) { return NULL; } -static int do_hres_timens(const struct vdso_data *vdns, clockid_t clk, - struct __kernel_timespec *ts) +static __always_inline int do_hres_timens(const struct vdso_data *vdns, clockid_t clk, + struct __kernel_timespec *ts) { return -EINVAL; } @@ -159,10 +162,10 @@ static __always_inline int do_hres(const struct vdso_data *vd, clockid_t clk, } #ifdef CONFIG_TIME_NS -static int do_coarse_timens(const struct vdso_data *vdns, clockid_t clk, - struct __kernel_timespec *ts) +static __always_inline int do_coarse_timens(const struct vdso_data *vdns, clockid_t clk, + struct __kernel_timespec *ts) { - const struct vdso_data *vd = __arch_get_timens_vdso_data(); + const struct vdso_data *vd = __arch_get_timens_vdso_data(vdns); const struct vdso_timestamp *vdso_ts = &vd->basetime[clk]; const struct timens_offset *offs = &vdns->offset[clk]; u64 nsec; @@ -188,8 +191,8 @@ static int do_coarse_timens(const struct vdso_data *vdns, clockid_t clk, return 0; } #else -static int do_coarse_timens(const struct vdso_data *vdns, clockid_t clk, - struct __kernel_timespec *ts) +static __always_inline int do_coarse_timens(const struct vdso_data *vdns, clockid_t clk, + struct __kernel_timespec *ts) { return -1; } @@ -310,7 +313,7 @@ __cvdso_gettimeofday_data(const struct vdso_data *vd, if (unlikely(tz != NULL)) { if (IS_ENABLED(CONFIG_TIME_NS) && vd->clock_mode == VDSO_CLOCKMODE_TIMENS) - vd = __arch_get_timens_vdso_data(); + vd = __arch_get_timens_vdso_data(vd); tz->tz_minuteswest = vd[CS_HRES_COARSE].tz_minuteswest; tz->tz_dsttime = vd[CS_HRES_COARSE].tz_dsttime; @@ -333,7 +336,7 @@ __cvdso_time_data(const struct vdso_data *vd, __kernel_old_time_t *time) if (IS_ENABLED(CONFIG_TIME_NS) && vd->clock_mode == VDSO_CLOCKMODE_TIMENS) - vd = __arch_get_timens_vdso_data(); + vd = __arch_get_timens_vdso_data(vd); t = READ_ONCE(vd[CS_HRES_COARSE].basetime[CLOCK_REALTIME].sec); @@ -363,7 +366,7 @@ int __cvdso_clock_getres_common(const struct vdso_data *vd, clockid_t clock, if (IS_ENABLED(CONFIG_TIME_NS) && vd->clock_mode == VDSO_CLOCKMODE_TIMENS) - vd = __arch_get_timens_vdso_data(); + vd = __arch_get_timens_vdso_data(vd); /* * Convert the clockid to a bitmask and use it to check which diff --git a/tools/testing/selftests/powerpc/alignment/alignment_handler.c b/tools/testing/selftests/powerpc/alignment/alignment_handler.c index c25cf7cd45e9..33ee34fc0828 100644 --- a/tools/testing/selftests/powerpc/alignment/alignment_handler.c +++ b/tools/testing/selftests/powerpc/alignment/alignment_handler.c @@ -10,16 +10,7 @@ * * We create two sets of source and destination buffers, one in regular memory, * the other cache-inhibited (by default we use /dev/fb0 for this, but an - * alterative path for cache-inhibited memory may be provided). - * - * One way to get cache-inhibited memory is to use the "mem" kernel parameter - * to limit the kernel to less memory than actually exists. Addresses above - * the limit may still be accessed but will be treated as cache-inhibited. For - * example, if there is actually 4GB of memory and the parameter "mem=3GB" is - * used, memory from address 0xC0000000 onwards is treated as cache-inhibited. - * To access this region /dev/mem is used. The kernel should be configured - * without CONFIG_STRICT_DEVMEM. In this case use: - * ./alignment_handler /dev/mem 0xc0000000 + * alterative path for cache-inhibited memory may be provided, e.g. memtrace). * * We initialise the source buffers, then use whichever set of load/store * instructions is under test to copy bytes from the source buffers to the diff --git a/tools/testing/selftests/powerpc/mm/Makefile b/tools/testing/selftests/powerpc/mm/Makefile index defe488d6bf1..40253abc6208 100644 --- a/tools/testing/selftests/powerpc/mm/Makefile +++ b/tools/testing/selftests/powerpc/mm/Makefile @@ -5,6 +5,7 @@ noarg: TEST_GEN_PROGS := hugetlb_vs_thp_test subpage_prot prot_sao segv_errors wild_bctr \ large_vm_fork_separation bad_accesses pkey_exec_prot \ pkey_siginfo stack_expansion_signal stack_expansion_ldst +TEST_PROGS := stress_code_patching.sh TEST_GEN_PROGS_EXTENDED := tlbie_test TEST_GEN_FILES := tempfile diff --git a/tools/testing/selftests/powerpc/mm/stress_code_patching.sh b/tools/testing/selftests/powerpc/mm/stress_code_patching.sh new file mode 100755 index 000000000000..e454509659f6 --- /dev/null +++ b/tools/testing/selftests/powerpc/mm/stress_code_patching.sh @@ -0,0 +1,49 @@ +#!/bin/bash +# SPDX-License-Identifier: GPL-2.0-or-later + +TIMEOUT=30 + +DEBUFS_DIR=`cat /proc/mounts | grep debugfs | awk '{print $2}'` +if [ ! -e "$DEBUFS_DIR" ] +then + echo "debugfs not found, skipping" 1>&2 + exit 4 +fi + +if [ ! -e "$DEBUFS_DIR/tracing/current_tracer" ] +then + echo "Tracing files not found, skipping" 1>&2 + exit 4 +fi + + +echo "Testing for spurious faults when mapping kernel memory..." + +if grep -q "FUNCTION TRACING IS CORRUPTED" "$DEBUFS_DIR/tracing/trace" +then + echo "FAILED: Ftrace already dead. Probably due to a spurious fault" 1>&2 + exit 1 +fi + +dmesg -C +START_TIME=`date +%s` +END_TIME=`expr $START_TIME + $TIMEOUT` +while [ `date +%s` -lt $END_TIME ] +do + echo function > $DEBUFS_DIR/tracing/current_tracer + echo nop > $DEBUFS_DIR/tracing/current_tracer + if dmesg | grep -q 'ftrace bug' + then + break + fi +done + +echo nop > $DEBUFS_DIR/tracing/current_tracer +if dmesg | grep -q 'ftrace bug' +then + echo "FAILED: Mapping kernel memory causes spurious faults" 1>&2 + exit 1 +else + echo "OK: Mapping kernel memory does not cause spurious faults" + exit 0 +fi diff --git a/tools/testing/selftests/powerpc/nx-gzip/gzfht_test.c b/tools/testing/selftests/powerpc/nx-gzip/gzfht_test.c index 02dffb65de48..b099753b50e4 100644 --- a/tools/testing/selftests/powerpc/nx-gzip/gzfht_test.c +++ b/tools/testing/selftests/powerpc/nx-gzip/gzfht_test.c @@ -324,7 +324,7 @@ int compress_file(int argc, char **argv, void *handle) fprintf(stderr, "error: cannot progress; "); fprintf(stderr, "too many faults\n"); exit(-1); - }; + } } fault_tries = NX_MAX_FAULTS; /* Reset for the next chunk */ diff --git a/tools/testing/selftests/powerpc/ptrace/.gitignore b/tools/testing/selftests/powerpc/ptrace/.gitignore index 0e96150b7c7e..eb75e5360e31 100644 --- a/tools/testing/selftests/powerpc/ptrace/.gitignore +++ b/tools/testing/selftests/powerpc/ptrace/.gitignore @@ -14,3 +14,4 @@ perf-hwbreak core-pkey ptrace-pkey ptrace-syscall +ptrace-perf-hwbreak diff --git a/tools/testing/selftests/powerpc/ptrace/Makefile b/tools/testing/selftests/powerpc/ptrace/Makefile index 8d3f006c98cc..a500639da97a 100644 --- a/tools/testing/selftests/powerpc/ptrace/Makefile +++ b/tools/testing/selftests/powerpc/ptrace/Makefile @@ -2,7 +2,7 @@ TEST_GEN_PROGS := ptrace-gpr ptrace-tm-gpr ptrace-tm-spd-gpr \ ptrace-tar ptrace-tm-tar ptrace-tm-spd-tar ptrace-vsx ptrace-tm-vsx \ ptrace-tm-spd-vsx ptrace-tm-spr ptrace-hwbreak ptrace-pkey core-pkey \ - perf-hwbreak ptrace-syscall + perf-hwbreak ptrace-syscall ptrace-perf-hwbreak top_srcdir = ../../../../.. include ../../lib.mk diff --git a/tools/testing/selftests/powerpc/ptrace/perf-hwbreak.c b/tools/testing/selftests/powerpc/ptrace/perf-hwbreak.c index c1f324afdbf3..ecde2c199f3b 100644 --- a/tools/testing/selftests/powerpc/ptrace/perf-hwbreak.c +++ b/tools/testing/selftests/powerpc/ptrace/perf-hwbreak.c @@ -21,8 +21,13 @@ #include <assert.h> #include <stdio.h> #include <stdlib.h> +#include <signal.h> #include <string.h> #include <sys/ioctl.h> +#include <sys/wait.h> +#include <sys/ptrace.h> +#include <sys/sysinfo.h> +#include <asm/ptrace.h> #include <elf.h> #include <pthread.h> #include <sys/syscall.h> @@ -30,32 +35,130 @@ #include <linux/hw_breakpoint.h> #include "utils.h" +#ifndef PPC_DEBUG_FEATURE_DATA_BP_ARCH_31 +#define PPC_DEBUG_FEATURE_DATA_BP_ARCH_31 0x20 +#endif + #define MAX_LOOPS 10000 #define DAWR_LENGTH_MAX ((0x3f + 1) * 8) -static inline int sys_perf_event_open(struct perf_event_attr *attr, pid_t pid, - int cpu, int group_fd, - unsigned long flags) +int nprocs; + +static volatile int a = 10; +static volatile int b = 10; +static volatile char c[512 + 8] __attribute__((aligned(512))); + +static void perf_event_attr_set(struct perf_event_attr *attr, + __u32 type, __u64 addr, __u64 len, + bool exclude_user) { - attr->size = sizeof(*attr); - return syscall(__NR_perf_event_open, attr, pid, cpu, group_fd, flags); + memset(attr, 0, sizeof(struct perf_event_attr)); + attr->type = PERF_TYPE_BREAKPOINT; + attr->size = sizeof(struct perf_event_attr); + attr->bp_type = type; + attr->bp_addr = addr; + attr->bp_len = len; + attr->exclude_kernel = 1; + attr->exclude_hv = 1; + attr->exclude_guest = 1; + attr->exclude_user = exclude_user; + attr->disabled = 1; } -static inline bool breakpoint_test(int len) +static int +perf_process_event_open_exclude_user(__u32 type, __u64 addr, __u64 len, bool exclude_user) +{ + struct perf_event_attr attr; + + perf_event_attr_set(&attr, type, addr, len, exclude_user); + return syscall(__NR_perf_event_open, &attr, getpid(), -1, -1, 0); +} + +static int perf_process_event_open(__u32 type, __u64 addr, __u64 len) +{ + struct perf_event_attr attr; + + perf_event_attr_set(&attr, type, addr, len, 0); + return syscall(__NR_perf_event_open, &attr, getpid(), -1, -1, 0); +} + +static int perf_cpu_event_open(long cpu, __u32 type, __u64 addr, __u64 len) { struct perf_event_attr attr; + + perf_event_attr_set(&attr, type, addr, len, 0); + return syscall(__NR_perf_event_open, &attr, -1, cpu, -1, 0); +} + +static void close_fds(int *fd, int n) +{ + int i; + + for (i = 0; i < n; i++) + close(fd[i]); +} + +static unsigned long read_fds(int *fd, int n) +{ + int i; + unsigned long c = 0; + unsigned long count = 0; + size_t res; + + for (i = 0; i < n; i++) { + res = read(fd[i], &c, sizeof(c)); + assert(res == sizeof(unsigned long long)); + count += c; + } + return count; +} + +static void reset_fds(int *fd, int n) +{ + int i; + + for (i = 0; i < n; i++) + ioctl(fd[i], PERF_EVENT_IOC_RESET); +} + +static void enable_fds(int *fd, int n) +{ + int i; + + for (i = 0; i < n; i++) + ioctl(fd[i], PERF_EVENT_IOC_ENABLE); +} + +static void disable_fds(int *fd, int n) +{ + int i; + + for (i = 0; i < n; i++) + ioctl(fd[i], PERF_EVENT_IOC_DISABLE); +} + +static int perf_systemwide_event_open(int *fd, __u32 type, __u64 addr, __u64 len) +{ + int i = 0; + + /* Assume online processors are 0 to nprocs for simplisity */ + for (i = 0; i < nprocs; i++) { + fd[i] = perf_cpu_event_open(i, type, addr, len); + if (fd[i] < 0) { + close_fds(fd, i); + return fd[i]; + } + } + return 0; +} + +static inline bool breakpoint_test(int len) +{ int fd; - /* setup counters */ - memset(&attr, 0, sizeof(attr)); - attr.disabled = 1; - attr.type = PERF_TYPE_BREAKPOINT; - attr.bp_type = HW_BREAKPOINT_R; /* bp_addr can point anywhere but needs to be aligned */ - attr.bp_addr = (__u64)(&attr) & 0xfffffffffffff800; - attr.bp_len = len; - fd = sys_perf_event_open(&attr, 0, -1, -1, 0); + fd = perf_process_event_open(HW_BREAKPOINT_R, (__u64)(&fd) & 0xfffffffffffff800, len); if (fd < 0) return false; close(fd); @@ -75,7 +178,6 @@ static inline bool dawr_supported(void) static int runtestsingle(int readwriteflag, int exclude_user, int arraytest) { int i,j; - struct perf_event_attr attr; size_t res; unsigned long long breaks, needed; int readint; @@ -85,6 +187,7 @@ static int runtestsingle(int readwriteflag, int exclude_user, int arraytest) int break_fd; int loop_num = MAX_LOOPS - (rand() % 100); /* provide some variability */ volatile int *k; + __u64 len; /* align to 0x400 boundary as required by DAWR */ readintalign = (int *)(((unsigned long)readintarraybig + 0x7ff) & @@ -94,19 +197,11 @@ static int runtestsingle(int readwriteflag, int exclude_user, int arraytest) if (arraytest) ptr = &readintalign[0]; - /* setup counters */ - memset(&attr, 0, sizeof(attr)); - attr.disabled = 1; - attr.type = PERF_TYPE_BREAKPOINT; - attr.bp_type = readwriteflag; - attr.bp_addr = (__u64)ptr; - attr.bp_len = sizeof(int); - if (arraytest) - attr.bp_len = DAWR_LENGTH_MAX; - attr.exclude_user = exclude_user; - break_fd = sys_perf_event_open(&attr, 0, -1, -1, 0); + len = arraytest ? DAWR_LENGTH_MAX : sizeof(int); + break_fd = perf_process_event_open_exclude_user(readwriteflag, (__u64)ptr, + len, exclude_user); if (break_fd < 0) { - perror("sys_perf_event_open"); + perror("perf_process_event_open_exclude_user"); exit(1); } @@ -153,7 +248,6 @@ static int runtest_dar_outside(void) void *target; volatile __u16 temp16; volatile __u64 temp64; - struct perf_event_attr attr; int break_fd; unsigned long long breaks; int fail = 0; @@ -165,21 +259,11 @@ static int runtest_dar_outside(void) exit(EXIT_FAILURE); } - /* setup counters */ - memset(&attr, 0, sizeof(attr)); - attr.disabled = 1; - attr.type = PERF_TYPE_BREAKPOINT; - attr.exclude_kernel = 1; - attr.exclude_hv = 1; - attr.exclude_guest = 1; - attr.bp_type = HW_BREAKPOINT_RW; /* watch middle half of target array */ - attr.bp_addr = (__u64)(target + 2); - attr.bp_len = 4; - break_fd = sys_perf_event_open(&attr, 0, -1, -1, 0); + break_fd = perf_process_event_open(HW_BREAKPOINT_RW, (__u64)(target + 2), 4); if (break_fd < 0) { free(target); - perror("sys_perf_event_open"); + perror("perf_process_event_open"); exit(EXIT_FAILURE); } @@ -263,11 +347,467 @@ static int runtest_dar_outside(void) return fail; } +static void multi_dawr_workload(void) +{ + a += 10; + b += 10; + c[512 + 1] += 'a'; +} + +static int test_process_multi_diff_addr(void) +{ + unsigned long long breaks1 = 0, breaks2 = 0; + int fd1, fd2; + char *desc = "Process specific, Two events, diff addr"; + size_t res; + + fd1 = perf_process_event_open(HW_BREAKPOINT_RW, (__u64)&a, (__u64)sizeof(a)); + if (fd1 < 0) { + perror("perf_process_event_open"); + exit(EXIT_FAILURE); + } + + fd2 = perf_process_event_open(HW_BREAKPOINT_RW, (__u64)&b, (__u64)sizeof(b)); + if (fd2 < 0) { + close(fd1); + perror("perf_process_event_open"); + exit(EXIT_FAILURE); + } + + ioctl(fd1, PERF_EVENT_IOC_RESET); + ioctl(fd2, PERF_EVENT_IOC_RESET); + ioctl(fd1, PERF_EVENT_IOC_ENABLE); + ioctl(fd2, PERF_EVENT_IOC_ENABLE); + multi_dawr_workload(); + ioctl(fd1, PERF_EVENT_IOC_DISABLE); + ioctl(fd2, PERF_EVENT_IOC_DISABLE); + + res = read(fd1, &breaks1, sizeof(breaks1)); + assert(res == sizeof(unsigned long long)); + res = read(fd2, &breaks2, sizeof(breaks2)); + assert(res == sizeof(unsigned long long)); + + close(fd1); + close(fd2); + + if (breaks1 != 2 || breaks2 != 2) { + printf("FAILED: %s: %lld != 2 || %lld != 2\n", desc, breaks1, breaks2); + return 1; + } + + printf("TESTED: %s\n", desc); + return 0; +} + +static int test_process_multi_same_addr(void) +{ + unsigned long long breaks1 = 0, breaks2 = 0; + int fd1, fd2; + char *desc = "Process specific, Two events, same addr"; + size_t res; + + fd1 = perf_process_event_open(HW_BREAKPOINT_RW, (__u64)&a, (__u64)sizeof(a)); + if (fd1 < 0) { + perror("perf_process_event_open"); + exit(EXIT_FAILURE); + } + + fd2 = perf_process_event_open(HW_BREAKPOINT_RW, (__u64)&a, (__u64)sizeof(a)); + if (fd2 < 0) { + close(fd1); + perror("perf_process_event_open"); + exit(EXIT_FAILURE); + } + + ioctl(fd1, PERF_EVENT_IOC_RESET); + ioctl(fd2, PERF_EVENT_IOC_RESET); + ioctl(fd1, PERF_EVENT_IOC_ENABLE); + ioctl(fd2, PERF_EVENT_IOC_ENABLE); + multi_dawr_workload(); + ioctl(fd1, PERF_EVENT_IOC_DISABLE); + ioctl(fd2, PERF_EVENT_IOC_DISABLE); + + res = read(fd1, &breaks1, sizeof(breaks1)); + assert(res == sizeof(unsigned long long)); + res = read(fd2, &breaks2, sizeof(breaks2)); + assert(res == sizeof(unsigned long long)); + + close(fd1); + close(fd2); + + if (breaks1 != 2 || breaks2 != 2) { + printf("FAILED: %s: %lld != 2 || %lld != 2\n", desc, breaks1, breaks2); + return 1; + } + + printf("TESTED: %s\n", desc); + return 0; +} + +static int test_process_multi_diff_addr_ro_wo(void) +{ + unsigned long long breaks1 = 0, breaks2 = 0; + int fd1, fd2; + char *desc = "Process specific, Two events, diff addr, one is RO, other is WO"; + size_t res; + + fd1 = perf_process_event_open(HW_BREAKPOINT_W, (__u64)&a, (__u64)sizeof(a)); + if (fd1 < 0) { + perror("perf_process_event_open"); + exit(EXIT_FAILURE); + } + + fd2 = perf_process_event_open(HW_BREAKPOINT_R, (__u64)&b, (__u64)sizeof(b)); + if (fd2 < 0) { + close(fd1); + perror("perf_process_event_open"); + exit(EXIT_FAILURE); + } + + ioctl(fd1, PERF_EVENT_IOC_RESET); + ioctl(fd2, PERF_EVENT_IOC_RESET); + ioctl(fd1, PERF_EVENT_IOC_ENABLE); + ioctl(fd2, PERF_EVENT_IOC_ENABLE); + multi_dawr_workload(); + ioctl(fd1, PERF_EVENT_IOC_DISABLE); + ioctl(fd2, PERF_EVENT_IOC_DISABLE); + + res = read(fd1, &breaks1, sizeof(breaks1)); + assert(res == sizeof(unsigned long long)); + res = read(fd2, &breaks2, sizeof(breaks2)); + assert(res == sizeof(unsigned long long)); + + close(fd1); + close(fd2); + + if (breaks1 != 1 || breaks2 != 1) { + printf("FAILED: %s: %lld != 1 || %lld != 1\n", desc, breaks1, breaks2); + return 1; + } + + printf("TESTED: %s\n", desc); + return 0; +} + +static int test_process_multi_same_addr_ro_wo(void) +{ + unsigned long long breaks1 = 0, breaks2 = 0; + int fd1, fd2; + char *desc = "Process specific, Two events, same addr, one is RO, other is WO"; + size_t res; + + fd1 = perf_process_event_open(HW_BREAKPOINT_R, (__u64)&a, (__u64)sizeof(a)); + if (fd1 < 0) { + perror("perf_process_event_open"); + exit(EXIT_FAILURE); + } + + fd2 = perf_process_event_open(HW_BREAKPOINT_W, (__u64)&a, (__u64)sizeof(a)); + if (fd2 < 0) { + close(fd1); + perror("perf_process_event_open"); + exit(EXIT_FAILURE); + } + + ioctl(fd1, PERF_EVENT_IOC_RESET); + ioctl(fd2, PERF_EVENT_IOC_RESET); + ioctl(fd1, PERF_EVENT_IOC_ENABLE); + ioctl(fd2, PERF_EVENT_IOC_ENABLE); + multi_dawr_workload(); + ioctl(fd1, PERF_EVENT_IOC_DISABLE); + ioctl(fd2, PERF_EVENT_IOC_DISABLE); + + res = read(fd1, &breaks1, sizeof(breaks1)); + assert(res == sizeof(unsigned long long)); + res = read(fd2, &breaks2, sizeof(breaks2)); + assert(res == sizeof(unsigned long long)); + + close(fd1); + close(fd2); + + if (breaks1 != 1 || breaks2 != 1) { + printf("FAILED: %s: %lld != 1 || %lld != 1\n", desc, breaks1, breaks2); + return 1; + } + + printf("TESTED: %s\n", desc); + return 0; +} + +static int test_syswide_multi_diff_addr(void) +{ + unsigned long long breaks1 = 0, breaks2 = 0; + int *fd1 = malloc(nprocs * sizeof(int)); + int *fd2 = malloc(nprocs * sizeof(int)); + char *desc = "Systemwide, Two events, diff addr"; + int ret; + + ret = perf_systemwide_event_open(fd1, HW_BREAKPOINT_RW, (__u64)&a, (__u64)sizeof(a)); + if (ret) { + perror("perf_systemwide_event_open"); + exit(EXIT_FAILURE); + } + + ret = perf_systemwide_event_open(fd2, HW_BREAKPOINT_RW, (__u64)&b, (__u64)sizeof(b)); + if (ret) { + close_fds(fd1, nprocs); + perror("perf_systemwide_event_open"); + exit(EXIT_FAILURE); + } + + reset_fds(fd1, nprocs); + reset_fds(fd2, nprocs); + enable_fds(fd1, nprocs); + enable_fds(fd2, nprocs); + multi_dawr_workload(); + disable_fds(fd1, nprocs); + disable_fds(fd2, nprocs); + + breaks1 = read_fds(fd1, nprocs); + breaks2 = read_fds(fd2, nprocs); + + close_fds(fd1, nprocs); + close_fds(fd2, nprocs); + + free(fd1); + free(fd2); + + if (breaks1 != 2 || breaks2 != 2) { + printf("FAILED: %s: %lld != 2 || %lld != 2\n", desc, breaks1, breaks2); + return 1; + } + + printf("TESTED: %s\n", desc); + return 0; +} + +static int test_syswide_multi_same_addr(void) +{ + unsigned long long breaks1 = 0, breaks2 = 0; + int *fd1 = malloc(nprocs * sizeof(int)); + int *fd2 = malloc(nprocs * sizeof(int)); + char *desc = "Systemwide, Two events, same addr"; + int ret; + + ret = perf_systemwide_event_open(fd1, HW_BREAKPOINT_RW, (__u64)&a, (__u64)sizeof(a)); + if (ret) { + perror("perf_systemwide_event_open"); + exit(EXIT_FAILURE); + } + + ret = perf_systemwide_event_open(fd2, HW_BREAKPOINT_RW, (__u64)&a, (__u64)sizeof(a)); + if (ret) { + close_fds(fd1, nprocs); + perror("perf_systemwide_event_open"); + exit(EXIT_FAILURE); + } + + reset_fds(fd1, nprocs); + reset_fds(fd2, nprocs); + enable_fds(fd1, nprocs); + enable_fds(fd2, nprocs); + multi_dawr_workload(); + disable_fds(fd1, nprocs); + disable_fds(fd2, nprocs); + + breaks1 = read_fds(fd1, nprocs); + breaks2 = read_fds(fd2, nprocs); + + close_fds(fd1, nprocs); + close_fds(fd2, nprocs); + + free(fd1); + free(fd2); + + if (breaks1 != 2 || breaks2 != 2) { + printf("FAILED: %s: %lld != 2 || %lld != 2\n", desc, breaks1, breaks2); + return 1; + } + + printf("TESTED: %s\n", desc); + return 0; +} + +static int test_syswide_multi_diff_addr_ro_wo(void) +{ + unsigned long long breaks1 = 0, breaks2 = 0; + int *fd1 = malloc(nprocs * sizeof(int)); + int *fd2 = malloc(nprocs * sizeof(int)); + char *desc = "Systemwide, Two events, diff addr, one is RO, other is WO"; + int ret; + + ret = perf_systemwide_event_open(fd1, HW_BREAKPOINT_W, (__u64)&a, (__u64)sizeof(a)); + if (ret) { + perror("perf_systemwide_event_open"); + exit(EXIT_FAILURE); + } + + ret = perf_systemwide_event_open(fd2, HW_BREAKPOINT_R, (__u64)&b, (__u64)sizeof(b)); + if (ret) { + close_fds(fd1, nprocs); + perror("perf_systemwide_event_open"); + exit(EXIT_FAILURE); + } + + reset_fds(fd1, nprocs); + reset_fds(fd2, nprocs); + enable_fds(fd1, nprocs); + enable_fds(fd2, nprocs); + multi_dawr_workload(); + disable_fds(fd1, nprocs); + disable_fds(fd2, nprocs); + + breaks1 = read_fds(fd1, nprocs); + breaks2 = read_fds(fd2, nprocs); + + close_fds(fd1, nprocs); + close_fds(fd2, nprocs); + + free(fd1); + free(fd2); + + if (breaks1 != 1 || breaks2 != 1) { + printf("FAILED: %s: %lld != 1 || %lld != 1\n", desc, breaks1, breaks2); + return 1; + } + + printf("TESTED: %s\n", desc); + return 0; +} + +static int test_syswide_multi_same_addr_ro_wo(void) +{ + unsigned long long breaks1 = 0, breaks2 = 0; + int *fd1 = malloc(nprocs * sizeof(int)); + int *fd2 = malloc(nprocs * sizeof(int)); + char *desc = "Systemwide, Two events, same addr, one is RO, other is WO"; + int ret; + + ret = perf_systemwide_event_open(fd1, HW_BREAKPOINT_W, (__u64)&a, (__u64)sizeof(a)); + if (ret) { + perror("perf_systemwide_event_open"); + exit(EXIT_FAILURE); + } + + ret = perf_systemwide_event_open(fd2, HW_BREAKPOINT_R, (__u64)&a, (__u64)sizeof(a)); + if (ret) { + close_fds(fd1, nprocs); + perror("perf_systemwide_event_open"); + exit(EXIT_FAILURE); + } + + reset_fds(fd1, nprocs); + reset_fds(fd2, nprocs); + enable_fds(fd1, nprocs); + enable_fds(fd2, nprocs); + multi_dawr_workload(); + disable_fds(fd1, nprocs); + disable_fds(fd2, nprocs); + + breaks1 = read_fds(fd1, nprocs); + breaks2 = read_fds(fd2, nprocs); + + close_fds(fd1, nprocs); + close_fds(fd2, nprocs); + + free(fd1); + free(fd2); + + if (breaks1 != 1 || breaks2 != 1) { + printf("FAILED: %s: %lld != 1 || %lld != 1\n", desc, breaks1, breaks2); + return 1; + } + + printf("TESTED: %s\n", desc); + return 0; +} + +static int runtest_multi_dawr(void) +{ + int ret = 0; + + ret |= test_process_multi_diff_addr(); + ret |= test_process_multi_same_addr(); + ret |= test_process_multi_diff_addr_ro_wo(); + ret |= test_process_multi_same_addr_ro_wo(); + ret |= test_syswide_multi_diff_addr(); + ret |= test_syswide_multi_same_addr(); + ret |= test_syswide_multi_diff_addr_ro_wo(); + ret |= test_syswide_multi_same_addr_ro_wo(); + + return ret; +} + +static int runtest_unaligned_512bytes(void) +{ + unsigned long long breaks = 0; + int fd; + char *desc = "Process specific, 512 bytes, unaligned"; + __u64 addr = (__u64)&c + 8; + size_t res; + + fd = perf_process_event_open(HW_BREAKPOINT_RW, addr, 512); + if (fd < 0) { + perror("perf_process_event_open"); + exit(EXIT_FAILURE); + } + + ioctl(fd, PERF_EVENT_IOC_RESET); + ioctl(fd, PERF_EVENT_IOC_ENABLE); + multi_dawr_workload(); + ioctl(fd, PERF_EVENT_IOC_DISABLE); + + res = read(fd, &breaks, sizeof(breaks)); + assert(res == sizeof(unsigned long long)); + + close(fd); + + if (breaks != 2) { + printf("FAILED: %s: %lld != 2\n", desc, breaks); + return 1; + } + + printf("TESTED: %s\n", desc); + return 0; +} + +/* There is no perf api to find number of available watchpoints. Use ptrace. */ +static int get_nr_wps(bool *arch_31) +{ + struct ppc_debug_info dbginfo; + int child_pid; + + child_pid = fork(); + if (!child_pid) { + int ret = ptrace(PTRACE_TRACEME, 0, NULL, 0); + if (ret) { + perror("PTRACE_TRACEME failed\n"); + exit(EXIT_FAILURE); + } + kill(getpid(), SIGUSR1); + + sleep(1); + exit(EXIT_SUCCESS); + } + + wait(NULL); + if (ptrace(PPC_PTRACE_GETHWDBGINFO, child_pid, NULL, &dbginfo)) { + perror("Can't get breakpoint info"); + exit(EXIT_FAILURE); + } + + *arch_31 = !!(dbginfo.features & PPC_DEBUG_FEATURE_DATA_BP_ARCH_31); + return dbginfo.num_data_bps; +} + static int runtest(void) { int rwflag; int exclude_user; int ret; + bool dawr = dawr_supported(); + bool arch_31 = false; + int nr_wps = get_nr_wps(&arch_31); /* * perf defines rwflag as two bits read and write and at least @@ -280,7 +820,7 @@ static int runtest(void) return ret; /* if we have the dawr, we can do an array test */ - if (!dawr_supported()) + if (!dawr) continue; ret = runtestsingle(rwflag, exclude_user, 1); if (ret) @@ -289,6 +829,19 @@ static int runtest(void) } ret = runtest_dar_outside(); + if (ret) + return ret; + + if (dawr && nr_wps > 1) { + nprocs = get_nprocs(); + ret = runtest_multi_dawr(); + if (ret) + return ret; + } + + if (dawr && arch_31) + ret = runtest_unaligned_512bytes(); + return ret; } diff --git a/tools/testing/selftests/powerpc/ptrace/ptrace-hwbreak.c b/tools/testing/selftests/powerpc/ptrace/ptrace-hwbreak.c index 2e0d86e0687e..a0635a3819aa 100644 --- a/tools/testing/selftests/powerpc/ptrace/ptrace-hwbreak.c +++ b/tools/testing/selftests/powerpc/ptrace/ptrace-hwbreak.c @@ -194,6 +194,18 @@ static void test_workload(void) big_var[rand() % DAWR_MAX_LEN] = 'a'; else cvar = big_var[rand() % DAWR_MAX_LEN]; + + /* PPC_PTRACE_SETHWDEBUG 2, MODE_RANGE, DW ALIGNED, WO test */ + gstruct.a[rand() % A_LEN] = 'a'; + + /* PPC_PTRACE_SETHWDEBUG 2, MODE_RANGE, DW UNALIGNED, RO test */ + cvar = gstruct.b[rand() % B_LEN]; + + /* PPC_PTRACE_SETHWDEBUG 2, MODE_RANGE, DAWR Overlap, WO test */ + gstruct.a[rand() % A_LEN] = 'a'; + + /* PPC_PTRACE_SETHWDEBUG 2, MODE_RANGE, DAWR Overlap, RO test */ + cvar = gstruct.a[rand() % A_LEN]; } static void check_success(pid_t child_pid, const char *name, const char *type, @@ -417,6 +429,69 @@ static void test_sethwdebug_range_aligned(pid_t child_pid) ptrace_delhwdebug(child_pid, wh); } +static void test_multi_sethwdebug_range(pid_t child_pid) +{ + struct ppc_hw_breakpoint info1, info2; + unsigned long wp_addr1, wp_addr2; + char *name1 = "PPC_PTRACE_SETHWDEBUG 2, MODE_RANGE, DW ALIGNED"; + char *name2 = "PPC_PTRACE_SETHWDEBUG 2, MODE_RANGE, DW UNALIGNED"; + int len1, len2; + int wh1, wh2; + + wp_addr1 = (unsigned long)&gstruct.a; + wp_addr2 = (unsigned long)&gstruct.b; + len1 = A_LEN; + len2 = B_LEN; + get_ppc_hw_breakpoint(&info1, PPC_BREAKPOINT_TRIGGER_WRITE, wp_addr1, len1); + get_ppc_hw_breakpoint(&info2, PPC_BREAKPOINT_TRIGGER_READ, wp_addr2, len2); + + /* PPC_PTRACE_SETHWDEBUG 2, MODE_RANGE, DW ALIGNED, WO test */ + wh1 = ptrace_sethwdebug(child_pid, &info1); + + /* PPC_PTRACE_SETHWDEBUG 2, MODE_RANGE, DW UNALIGNED, RO test */ + wh2 = ptrace_sethwdebug(child_pid, &info2); + + ptrace(PTRACE_CONT, child_pid, NULL, 0); + check_success(child_pid, name1, "WO", wp_addr1, len1); + + ptrace(PTRACE_CONT, child_pid, NULL, 0); + check_success(child_pid, name2, "RO", wp_addr2, len2); + + ptrace_delhwdebug(child_pid, wh1); + ptrace_delhwdebug(child_pid, wh2); +} + +static void test_multi_sethwdebug_range_dawr_overlap(pid_t child_pid) +{ + struct ppc_hw_breakpoint info1, info2; + unsigned long wp_addr1, wp_addr2; + char *name = "PPC_PTRACE_SETHWDEBUG 2, MODE_RANGE, DAWR Overlap"; + int len1, len2; + int wh1, wh2; + + wp_addr1 = (unsigned long)&gstruct.a; + wp_addr2 = (unsigned long)&gstruct.a; + len1 = A_LEN; + len2 = A_LEN; + get_ppc_hw_breakpoint(&info1, PPC_BREAKPOINT_TRIGGER_WRITE, wp_addr1, len1); + get_ppc_hw_breakpoint(&info2, PPC_BREAKPOINT_TRIGGER_READ, wp_addr2, len2); + + /* PPC_PTRACE_SETHWDEBUG 2, MODE_RANGE, DAWR Overlap, WO test */ + wh1 = ptrace_sethwdebug(child_pid, &info1); + + /* PPC_PTRACE_SETHWDEBUG 2, MODE_RANGE, DAWR Overlap, RO test */ + wh2 = ptrace_sethwdebug(child_pid, &info2); + + ptrace(PTRACE_CONT, child_pid, NULL, 0); + check_success(child_pid, name, "WO", wp_addr1, len1); + + ptrace(PTRACE_CONT, child_pid, NULL, 0); + check_success(child_pid, name, "RO", wp_addr2, len2); + + ptrace_delhwdebug(child_pid, wh1); + ptrace_delhwdebug(child_pid, wh2); +} + static void test_sethwdebug_range_unaligned(pid_t child_pid) { struct ppc_hw_breakpoint info; @@ -504,6 +579,10 @@ run_tests(pid_t child_pid, struct ppc_debug_info *dbginfo, bool dawr) test_sethwdebug_range_unaligned(child_pid); test_sethwdebug_range_unaligned_dar(child_pid); test_sethwdebug_dawr_max_range(child_pid); + if (dbginfo->num_data_bps > 1) { + test_multi_sethwdebug_range(child_pid); + test_multi_sethwdebug_range_dawr_overlap(child_pid); + } } } } diff --git a/tools/testing/selftests/powerpc/ptrace/ptrace-perf-hwbreak.c b/tools/testing/selftests/powerpc/ptrace/ptrace-perf-hwbreak.c new file mode 100644 index 000000000000..3344e74a97b4 --- /dev/null +++ b/tools/testing/selftests/powerpc/ptrace/ptrace-perf-hwbreak.c @@ -0,0 +1,659 @@ +// SPDX-License-Identifier: GPL-2.0+ +#include <stdio.h> +#include <string.h> +#include <signal.h> +#include <stdlib.h> +#include <unistd.h> +#include <errno.h> +#include <linux/hw_breakpoint.h> +#include <linux/perf_event.h> +#include <asm/unistd.h> +#include <sys/ptrace.h> +#include <sys/wait.h> +#include "ptrace.h" + +char data[16]; + +/* Overlapping address range */ +volatile __u64 *ptrace_data1 = (__u64 *)&data[0]; +volatile __u64 *perf_data1 = (__u64 *)&data[4]; + +/* Non-overlapping address range */ +volatile __u64 *ptrace_data2 = (__u64 *)&data[0]; +volatile __u64 *perf_data2 = (__u64 *)&data[8]; + +static unsigned long pid_max_addr(void) +{ + FILE *fp; + char *line, *c; + char addr[100]; + size_t len = 0; + + fp = fopen("/proc/kallsyms", "r"); + if (!fp) { + printf("Failed to read /proc/kallsyms. Exiting..\n"); + exit(EXIT_FAILURE); + } + + while (getline(&line, &len, fp) != -1) { + if (!strstr(line, "pid_max") || strstr(line, "pid_max_max") || + strstr(line, "pid_max_min")) + continue; + + strncpy(addr, line, len < 100 ? len : 100); + c = strchr(addr, ' '); + *c = '\0'; + return strtoul(addr, &c, 16); + } + fclose(fp); + printf("Could not find pix_max. Exiting..\n"); + exit(EXIT_FAILURE); + return -1; +} + +static void perf_user_event_attr_set(struct perf_event_attr *attr, __u64 addr, __u64 len) +{ + memset(attr, 0, sizeof(struct perf_event_attr)); + attr->type = PERF_TYPE_BREAKPOINT; + attr->size = sizeof(struct perf_event_attr); + attr->bp_type = HW_BREAKPOINT_R; + attr->bp_addr = addr; + attr->bp_len = len; + attr->exclude_kernel = 1; + attr->exclude_hv = 1; +} + +static void perf_kernel_event_attr_set(struct perf_event_attr *attr) +{ + memset(attr, 0, sizeof(struct perf_event_attr)); + attr->type = PERF_TYPE_BREAKPOINT; + attr->size = sizeof(struct perf_event_attr); + attr->bp_type = HW_BREAKPOINT_R; + attr->bp_addr = pid_max_addr(); + attr->bp_len = sizeof(unsigned long); + attr->exclude_user = 1; + attr->exclude_hv = 1; +} + +static int perf_cpu_event_open(int cpu, __u64 addr, __u64 len) +{ + struct perf_event_attr attr; + + perf_user_event_attr_set(&attr, addr, len); + return syscall(__NR_perf_event_open, &attr, -1, cpu, -1, 0); +} + +static int perf_thread_event_open(pid_t child_pid, __u64 addr, __u64 len) +{ + struct perf_event_attr attr; + + perf_user_event_attr_set(&attr, addr, len); + return syscall(__NR_perf_event_open, &attr, child_pid, -1, -1, 0); +} + +static int perf_thread_cpu_event_open(pid_t child_pid, int cpu, __u64 addr, __u64 len) +{ + struct perf_event_attr attr; + + perf_user_event_attr_set(&attr, addr, len); + return syscall(__NR_perf_event_open, &attr, child_pid, cpu, -1, 0); +} + +static int perf_thread_kernel_event_open(pid_t child_pid) +{ + struct perf_event_attr attr; + + perf_kernel_event_attr_set(&attr); + return syscall(__NR_perf_event_open, &attr, child_pid, -1, -1, 0); +} + +static int perf_cpu_kernel_event_open(int cpu) +{ + struct perf_event_attr attr; + + perf_kernel_event_attr_set(&attr); + return syscall(__NR_perf_event_open, &attr, -1, cpu, -1, 0); +} + +static int child(void) +{ + int ret; + + ret = ptrace(PTRACE_TRACEME, 0, NULL, 0); + if (ret) { + printf("Error: PTRACE_TRACEME failed\n"); + return 0; + } + kill(getpid(), SIGUSR1); /* --> parent (SIGUSR1) */ + + return 0; +} + +static void ptrace_ppc_hw_breakpoint(struct ppc_hw_breakpoint *info, int type, + __u64 addr, int len) +{ + info->version = 1; + info->trigger_type = type; + info->condition_mode = PPC_BREAKPOINT_CONDITION_NONE; + info->addr = addr; + info->addr2 = addr + len; + info->condition_value = 0; + if (!len) + info->addr_mode = PPC_BREAKPOINT_MODE_EXACT; + else + info->addr_mode = PPC_BREAKPOINT_MODE_RANGE_INCLUSIVE; +} + +static int ptrace_open(pid_t child_pid, __u64 wp_addr, int len) +{ + struct ppc_hw_breakpoint info; + + ptrace_ppc_hw_breakpoint(&info, PPC_BREAKPOINT_TRIGGER_RW, wp_addr, len); + return ptrace(PPC_PTRACE_SETHWDEBUG, child_pid, 0, &info); +} + +static int test1(pid_t child_pid) +{ + int perf_fd; + int ptrace_fd; + int ret = 0; + + /* Test: + * if (new per thread event by ptrace) + * if (existing cpu event by perf) + * if (addr range overlaps) + * fail; + */ + + perf_fd = perf_cpu_event_open(0, (__u64)perf_data1, sizeof(*perf_data1)); + if (perf_fd < 0) + return -1; + + ptrace_fd = ptrace_open(child_pid, (__u64)ptrace_data1, sizeof(*ptrace_data1)); + if (ptrace_fd > 0 || errno != ENOSPC) + ret = -1; + + close(perf_fd); + return ret; +} + +static int test2(pid_t child_pid) +{ + int perf_fd; + int ptrace_fd; + int ret = 0; + + /* Test: + * if (new per thread event by ptrace) + * if (existing cpu event by perf) + * if (addr range does not overlaps) + * allow; + */ + + perf_fd = perf_cpu_event_open(0, (__u64)perf_data2, sizeof(*perf_data2)); + if (perf_fd < 0) + return -1; + + ptrace_fd = ptrace_open(child_pid, (__u64)ptrace_data2, sizeof(*ptrace_data2)); + if (ptrace_fd < 0) { + ret = -1; + goto perf_close; + } + ptrace(PPC_PTRACE_DELHWDEBUG, child_pid, 0, ptrace_fd); + +perf_close: + close(perf_fd); + return ret; +} + +static int test3(pid_t child_pid) +{ + int perf_fd; + int ptrace_fd; + int ret = 0; + + /* Test: + * if (new per thread event by ptrace) + * if (existing thread event by perf on the same thread) + * if (addr range overlaps) + * fail; + */ + perf_fd = perf_thread_event_open(child_pid, (__u64)perf_data1, + sizeof(*perf_data1)); + if (perf_fd < 0) + return -1; + + ptrace_fd = ptrace_open(child_pid, (__u64)ptrace_data1, sizeof(*ptrace_data1)); + if (ptrace_fd > 0 || errno != ENOSPC) + ret = -1; + + close(perf_fd); + return ret; +} + +static int test4(pid_t child_pid) +{ + int perf_fd; + int ptrace_fd; + int ret = 0; + + /* Test: + * if (new per thread event by ptrace) + * if (existing thread event by perf on the same thread) + * if (addr range does not overlaps) + * fail; + */ + perf_fd = perf_thread_event_open(child_pid, (__u64)perf_data2, + sizeof(*perf_data2)); + if (perf_fd < 0) + return -1; + + ptrace_fd = ptrace_open(child_pid, (__u64)ptrace_data2, sizeof(*ptrace_data2)); + if (ptrace_fd < 0) { + ret = -1; + goto perf_close; + } + ptrace(PPC_PTRACE_DELHWDEBUG, child_pid, 0, ptrace_fd); + +perf_close: + close(perf_fd); + return ret; +} + +static int test5(pid_t child_pid) +{ + int perf_fd; + int ptrace_fd; + int cpid; + int ret = 0; + + /* Test: + * if (new per thread event by ptrace) + * if (existing thread event by perf on the different thread) + * allow; + */ + cpid = fork(); + if (!cpid) { + /* Temporary Child */ + pause(); + exit(EXIT_SUCCESS); + } + + perf_fd = perf_thread_event_open(cpid, (__u64)perf_data1, sizeof(*perf_data1)); + if (perf_fd < 0) { + ret = -1; + goto kill_child; + } + + ptrace_fd = ptrace_open(child_pid, (__u64)ptrace_data1, sizeof(*ptrace_data1)); + if (ptrace_fd < 0) { + ret = -1; + goto perf_close; + } + + ptrace(PPC_PTRACE_DELHWDEBUG, child_pid, 0, ptrace_fd); +perf_close: + close(perf_fd); +kill_child: + kill(cpid, SIGINT); + return ret; +} + +static int test6(pid_t child_pid) +{ + int perf_fd; + int ptrace_fd; + int ret = 0; + + /* Test: + * if (new per thread kernel event by perf) + * if (existing thread event by ptrace on the same thread) + * allow; + * -- OR -- + * if (new per cpu kernel event by perf) + * if (existing thread event by ptrace) + * allow; + */ + ptrace_fd = ptrace_open(child_pid, (__u64)ptrace_data1, sizeof(*ptrace_data1)); + if (ptrace_fd < 0) + return -1; + + perf_fd = perf_thread_kernel_event_open(child_pid); + if (perf_fd < 0) { + ret = -1; + goto ptrace_close; + } + close(perf_fd); + + perf_fd = perf_cpu_kernel_event_open(0); + if (perf_fd < 0) { + ret = -1; + goto ptrace_close; + } + close(perf_fd); + +ptrace_close: + ptrace(PPC_PTRACE_DELHWDEBUG, child_pid, 0, ptrace_fd); + return ret; +} + +static int test7(pid_t child_pid) +{ + int perf_fd; + int ptrace_fd; + int ret = 0; + + /* Test: + * if (new per thread event by perf) + * if (existing thread event by ptrace on the same thread) + * if (addr range overlaps) + * fail; + */ + ptrace_fd = ptrace_open(child_pid, (__u64)ptrace_data1, sizeof(*ptrace_data1)); + if (ptrace_fd < 0) + return -1; + + perf_fd = perf_thread_event_open(child_pid, (__u64)perf_data1, + sizeof(*perf_data1)); + if (perf_fd > 0 || errno != ENOSPC) + ret = -1; + + ptrace(PPC_PTRACE_DELHWDEBUG, child_pid, 0, ptrace_fd); + return ret; +} + +static int test8(pid_t child_pid) +{ + int perf_fd; + int ptrace_fd; + int ret = 0; + + /* Test: + * if (new per thread event by perf) + * if (existing thread event by ptrace on the same thread) + * if (addr range does not overlaps) + * allow; + */ + ptrace_fd = ptrace_open(child_pid, (__u64)ptrace_data2, sizeof(*ptrace_data2)); + if (ptrace_fd < 0) + return -1; + + perf_fd = perf_thread_event_open(child_pid, (__u64)perf_data2, + sizeof(*perf_data2)); + if (perf_fd < 0) { + ret = -1; + goto ptrace_close; + } + close(perf_fd); + +ptrace_close: + ptrace(PPC_PTRACE_DELHWDEBUG, child_pid, 0, ptrace_fd); + return ret; +} + +static int test9(pid_t child_pid) +{ + int perf_fd; + int ptrace_fd; + int cpid; + int ret = 0; + + /* Test: + * if (new per thread event by perf) + * if (existing thread event by ptrace on the other thread) + * allow; + */ + ptrace_fd = ptrace_open(child_pid, (__u64)ptrace_data1, sizeof(*ptrace_data1)); + if (ptrace_fd < 0) + return -1; + + cpid = fork(); + if (!cpid) { + /* Temporary Child */ + pause(); + exit(EXIT_SUCCESS); + } + + perf_fd = perf_thread_event_open(cpid, (__u64)perf_data1, sizeof(*perf_data1)); + if (perf_fd < 0) { + ret = -1; + goto kill_child; + } + close(perf_fd); + +kill_child: + kill(cpid, SIGINT); + ptrace(PPC_PTRACE_DELHWDEBUG, child_pid, 0, ptrace_fd); + return ret; +} + +static int test10(pid_t child_pid) +{ + int perf_fd; + int ptrace_fd; + int ret = 0; + + /* Test: + * if (new per cpu event by perf) + * if (existing thread event by ptrace on the same thread) + * if (addr range overlaps) + * fail; + */ + ptrace_fd = ptrace_open(child_pid, (__u64)ptrace_data1, sizeof(*ptrace_data1)); + if (ptrace_fd < 0) + return -1; + + perf_fd = perf_cpu_event_open(0, (__u64)perf_data1, sizeof(*perf_data1)); + if (perf_fd > 0 || errno != ENOSPC) + ret = -1; + + ptrace(PPC_PTRACE_DELHWDEBUG, child_pid, 0, ptrace_fd); + return ret; +} + +static int test11(pid_t child_pid) +{ + int perf_fd; + int ptrace_fd; + int ret = 0; + + /* Test: + * if (new per cpu event by perf) + * if (existing thread event by ptrace on the same thread) + * if (addr range does not overlap) + * allow; + */ + ptrace_fd = ptrace_open(child_pid, (__u64)ptrace_data2, sizeof(*ptrace_data2)); + if (ptrace_fd < 0) + return -1; + + perf_fd = perf_cpu_event_open(0, (__u64)perf_data2, sizeof(*perf_data2)); + if (perf_fd < 0) { + ret = -1; + goto ptrace_close; + } + close(perf_fd); + +ptrace_close: + ptrace(PPC_PTRACE_DELHWDEBUG, child_pid, 0, ptrace_fd); + return ret; +} + +static int test12(pid_t child_pid) +{ + int perf_fd; + int ptrace_fd; + int ret = 0; + + /* Test: + * if (new per thread and per cpu event by perf) + * if (existing thread event by ptrace on the same thread) + * if (addr range overlaps) + * fail; + */ + ptrace_fd = ptrace_open(child_pid, (__u64)ptrace_data1, sizeof(*ptrace_data1)); + if (ptrace_fd < 0) + return -1; + + perf_fd = perf_thread_cpu_event_open(child_pid, 0, (__u64)perf_data1, sizeof(*perf_data1)); + if (perf_fd > 0 || errno != ENOSPC) + ret = -1; + + ptrace(PPC_PTRACE_DELHWDEBUG, child_pid, 0, ptrace_fd); + return ret; +} + +static int test13(pid_t child_pid) +{ + int perf_fd; + int ptrace_fd; + int ret = 0; + + /* Test: + * if (new per thread and per cpu event by perf) + * if (existing thread event by ptrace on the same thread) + * if (addr range does not overlap) + * allow; + */ + ptrace_fd = ptrace_open(child_pid, (__u64)ptrace_data2, sizeof(*ptrace_data2)); + if (ptrace_fd < 0) + return -1; + + perf_fd = perf_thread_cpu_event_open(child_pid, 0, (__u64)perf_data2, sizeof(*perf_data2)); + if (perf_fd < 0) { + ret = -1; + goto ptrace_close; + } + close(perf_fd); + +ptrace_close: + ptrace(PPC_PTRACE_DELHWDEBUG, child_pid, 0, ptrace_fd); + return ret; +} + +static int test14(pid_t child_pid) +{ + int perf_fd; + int ptrace_fd; + int cpid; + int ret = 0; + + /* Test: + * if (new per thread and per cpu event by perf) + * if (existing thread event by ptrace on the other thread) + * allow; + */ + ptrace_fd = ptrace_open(child_pid, (__u64)ptrace_data1, sizeof(*ptrace_data1)); + if (ptrace_fd < 0) + return -1; + + cpid = fork(); + if (!cpid) { + /* Temporary Child */ + pause(); + exit(EXIT_SUCCESS); + } + + perf_fd = perf_thread_cpu_event_open(cpid, 0, (__u64)perf_data1, + sizeof(*perf_data1)); + if (perf_fd < 0) { + ret = -1; + goto kill_child; + } + close(perf_fd); + +kill_child: + kill(cpid, SIGINT); + ptrace(PPC_PTRACE_DELHWDEBUG, child_pid, 0, ptrace_fd); + return ret; +} + +static int do_test(const char *msg, int (*fun)(pid_t arg), pid_t arg) +{ + int ret; + + ret = fun(arg); + if (ret) + printf("%s: Error\n", msg); + else + printf("%s: Ok\n", msg); + return ret; +} + +char *desc[14] = { + "perf cpu event -> ptrace thread event (Overlapping)", + "perf cpu event -> ptrace thread event (Non-overlapping)", + "perf thread event -> ptrace same thread event (Overlapping)", + "perf thread event -> ptrace same thread event (Non-overlapping)", + "perf thread event -> ptrace other thread event", + "ptrace thread event -> perf kernel event", + "ptrace thread event -> perf same thread event (Overlapping)", + "ptrace thread event -> perf same thread event (Non-overlapping)", + "ptrace thread event -> perf other thread event", + "ptrace thread event -> perf cpu event (Overlapping)", + "ptrace thread event -> perf cpu event (Non-overlapping)", + "ptrace thread event -> perf same thread & cpu event (Overlapping)", + "ptrace thread event -> perf same thread & cpu event (Non-overlapping)", + "ptrace thread event -> perf other thread & cpu event", +}; + +static int test(pid_t child_pid) +{ + int ret = TEST_PASS; + + ret |= do_test(desc[0], test1, child_pid); + ret |= do_test(desc[1], test2, child_pid); + ret |= do_test(desc[2], test3, child_pid); + ret |= do_test(desc[3], test4, child_pid); + ret |= do_test(desc[4], test5, child_pid); + ret |= do_test(desc[5], test6, child_pid); + ret |= do_test(desc[6], test7, child_pid); + ret |= do_test(desc[7], test8, child_pid); + ret |= do_test(desc[8], test9, child_pid); + ret |= do_test(desc[9], test10, child_pid); + ret |= do_test(desc[10], test11, child_pid); + ret |= do_test(desc[11], test12, child_pid); + ret |= do_test(desc[12], test13, child_pid); + ret |= do_test(desc[13], test14, child_pid); + + return ret; +} + +static void get_dbginfo(pid_t child_pid, struct ppc_debug_info *dbginfo) +{ + if (ptrace(PPC_PTRACE_GETHWDBGINFO, child_pid, NULL, dbginfo)) { + perror("Can't get breakpoint info"); + exit(-1); + } +} + +static int ptrace_perf_hwbreak(void) +{ + int ret; + pid_t child_pid; + struct ppc_debug_info dbginfo; + + child_pid = fork(); + if (!child_pid) + return child(); + + /* parent */ + wait(NULL); /* <-- child (SIGUSR1) */ + + get_dbginfo(child_pid, &dbginfo); + SKIP_IF(dbginfo.num_data_bps <= 1); + + ret = perf_cpu_event_open(0, (__u64)perf_data1, sizeof(*perf_data1)); + SKIP_IF(ret < 0); + close(ret); + + ret = test(child_pid); + + ptrace(PTRACE_CONT, child_pid, NULL, 0); + return ret; +} + +int main(int argc, char *argv[]) +{ + return test_harness(ptrace_perf_hwbreak, "ptrace-perf-hwbreak"); +} diff --git a/tools/testing/selftests/powerpc/security/Makefile b/tools/testing/selftests/powerpc/security/Makefile index f25e854fe370..844d18cd5f93 100644 --- a/tools/testing/selftests/powerpc/security/Makefile +++ b/tools/testing/selftests/powerpc/security/Makefile @@ -1,6 +1,6 @@ # SPDX-License-Identifier: GPL-2.0+ -TEST_GEN_PROGS := rfi_flush entry_flush spectre_v2 +TEST_GEN_PROGS := rfi_flush entry_flush uaccess_flush spectre_v2 top_srcdir = ../../../../.. CFLAGS += -I../../../../../usr/include @@ -13,3 +13,4 @@ $(OUTPUT)/spectre_v2: CFLAGS += -m64 $(OUTPUT)/spectre_v2: ../pmu/event.c branch_loops.S $(OUTPUT)/rfi_flush: flush_utils.c $(OUTPUT)/entry_flush: flush_utils.c +$(OUTPUT)/uaccess_flush: flush_utils.c diff --git a/tools/testing/selftests/powerpc/security/entry_flush.c b/tools/testing/selftests/powerpc/security/entry_flush.c index 78cf914fa321..68ce377b205e 100644 --- a/tools/testing/selftests/powerpc/security/entry_flush.c +++ b/tools/testing/selftests/powerpc/security/entry_flush.c @@ -53,7 +53,7 @@ int entry_flush_test(void) entry_flush = entry_flush_orig; - fd = perf_event_open_counter(PERF_TYPE_RAW, /* L1d miss */ 0x400f0, -1); + fd = perf_event_open_counter(PERF_TYPE_HW_CACHE, PERF_L1D_READ_MISS_CONFIG, -1); FAIL_IF(fd < 0); p = (char *)memalign(zero_size, CACHELINE_SIZE); diff --git a/tools/testing/selftests/powerpc/security/flush_utils.c b/tools/testing/selftests/powerpc/security/flush_utils.c index 0c3c4c40c7fb..4d95965cb751 100644 --- a/tools/testing/selftests/powerpc/security/flush_utils.c +++ b/tools/testing/selftests/powerpc/security/flush_utils.c @@ -13,6 +13,7 @@ #include <stdlib.h> #include <string.h> #include <stdio.h> +#include <sys/utsname.h> #include "utils.h" #include "flush_utils.h" @@ -35,6 +36,18 @@ void syscall_loop(char *p, unsigned long iterations, } } +void syscall_loop_uaccess(char *p, unsigned long iterations, + unsigned long zero_size) +{ + struct utsname utsname; + + for (unsigned long i = 0; i < iterations; i++) { + for (unsigned long j = 0; j < zero_size; j += CACHELINE_SIZE) + load(p + j); + uname(&utsname); + } +} + static void sigill_handler(int signr, siginfo_t *info, void *unused) { static int warned; diff --git a/tools/testing/selftests/powerpc/security/flush_utils.h b/tools/testing/selftests/powerpc/security/flush_utils.h index 07a5eb301466..e1e68281f7ac 100644 --- a/tools/testing/selftests/powerpc/security/flush_utils.h +++ b/tools/testing/selftests/powerpc/security/flush_utils.h @@ -9,9 +9,16 @@ #define CACHELINE_SIZE 128 +#define PERF_L1D_READ_MISS_CONFIG ((PERF_COUNT_HW_CACHE_L1D) | \ + (PERF_COUNT_HW_CACHE_OP_READ << 8) | \ + (PERF_COUNT_HW_CACHE_RESULT_MISS << 16)) + void syscall_loop(char *p, unsigned long iterations, unsigned long zero_size); +void syscall_loop_uaccess(char *p, unsigned long iterations, + unsigned long zero_size); + void set_dscr(unsigned long val); #endif /* _SELFTESTS_POWERPC_SECURITY_FLUSH_UTILS_H */ diff --git a/tools/testing/selftests/powerpc/security/rfi_flush.c b/tools/testing/selftests/powerpc/security/rfi_flush.c index 7565fd786640..f73484a6470f 100644 --- a/tools/testing/selftests/powerpc/security/rfi_flush.c +++ b/tools/testing/selftests/powerpc/security/rfi_flush.c @@ -54,7 +54,7 @@ int rfi_flush_test(void) rfi_flush = rfi_flush_orig; - fd = perf_event_open_counter(PERF_TYPE_RAW, /* L1d miss */ 0x400f0, -1); + fd = perf_event_open_counter(PERF_TYPE_HW_CACHE, PERF_L1D_READ_MISS_CONFIG, -1); FAIL_IF(fd < 0); p = (char *)memalign(zero_size, CACHELINE_SIZE); diff --git a/tools/testing/selftests/powerpc/security/uaccess_flush.c b/tools/testing/selftests/powerpc/security/uaccess_flush.c new file mode 100644 index 000000000000..cf80f960e38a --- /dev/null +++ b/tools/testing/selftests/powerpc/security/uaccess_flush.c @@ -0,0 +1,158 @@ +// SPDX-License-Identifier: GPL-2.0+ + +/* + * Copyright 2018 IBM Corporation. + * Copyright 2020 Canonical Ltd. + */ + +#define __SANE_USERSPACE_TYPES__ + +#include <sys/types.h> +#include <stdint.h> +#include <malloc.h> +#include <unistd.h> +#include <signal.h> +#include <stdlib.h> +#include <string.h> +#include <stdio.h> +#include "utils.h" +#include "flush_utils.h" + +int uaccess_flush_test(void) +{ + char *p; + int repetitions = 10; + int fd, passes = 0, iter, rc = 0; + struct perf_event_read v; + __u64 l1d_misses_total = 0; + unsigned long iterations = 100000, zero_size = 24 * 1024; + unsigned long l1d_misses_expected; + int rfi_flush_orig; + int entry_flush_orig; + int uaccess_flush, uaccess_flush_orig; + + SKIP_IF(geteuid() != 0); + + // The PMU event we use only works on Power7 or later + SKIP_IF(!have_hwcap(PPC_FEATURE_ARCH_2_06)); + + if (read_debugfs_file("powerpc/rfi_flush", &rfi_flush_orig) < 0) { + perror("Unable to read powerpc/rfi_flush debugfs file"); + SKIP_IF(1); + } + + if (read_debugfs_file("powerpc/entry_flush", &entry_flush_orig) < 0) { + perror("Unable to read powerpc/entry_flush debugfs file"); + SKIP_IF(1); + } + + if (read_debugfs_file("powerpc/uaccess_flush", &uaccess_flush_orig) < 0) { + perror("Unable to read powerpc/entry_flush debugfs file"); + SKIP_IF(1); + } + + if (rfi_flush_orig != 0) { + if (write_debugfs_file("powerpc/rfi_flush", 0) < 0) { + perror("error writing to powerpc/rfi_flush debugfs file"); + FAIL_IF(1); + } + } + + if (entry_flush_orig != 0) { + if (write_debugfs_file("powerpc/entry_flush", 0) < 0) { + perror("error writing to powerpc/entry_flush debugfs file"); + FAIL_IF(1); + } + } + + uaccess_flush = uaccess_flush_orig; + + fd = perf_event_open_counter(PERF_TYPE_HW_CACHE, PERF_L1D_READ_MISS_CONFIG, -1); + FAIL_IF(fd < 0); + + p = (char *)memalign(zero_size, CACHELINE_SIZE); + + FAIL_IF(perf_event_enable(fd)); + + // disable L1 prefetching + set_dscr(1); + + iter = repetitions; + + /* + * We expect to see l1d miss for each cacheline access when entry_flush + * is set. Allow a small variation on this. + */ + l1d_misses_expected = iterations * (zero_size / CACHELINE_SIZE - 2); + +again: + FAIL_IF(perf_event_reset(fd)); + + syscall_loop_uaccess(p, iterations, zero_size); + + FAIL_IF(read(fd, &v, sizeof(v)) != sizeof(v)); + + if (uaccess_flush && v.l1d_misses >= l1d_misses_expected) + passes++; + else if (!uaccess_flush && v.l1d_misses < (l1d_misses_expected / 2)) + passes++; + + l1d_misses_total += v.l1d_misses; + + while (--iter) + goto again; + + if (passes < repetitions) { + printf("FAIL (L1D misses with uaccess_flush=%d: %llu %c %lu) [%d/%d failures]\n", + uaccess_flush, l1d_misses_total, uaccess_flush ? '<' : '>', + uaccess_flush ? repetitions * l1d_misses_expected : + repetitions * l1d_misses_expected / 2, + repetitions - passes, repetitions); + rc = 1; + } else { + printf("PASS (L1D misses with uaccess_flush=%d: %llu %c %lu) [%d/%d pass]\n", + uaccess_flush, l1d_misses_total, uaccess_flush ? '>' : '<', + uaccess_flush ? repetitions * l1d_misses_expected : + repetitions * l1d_misses_expected / 2, + passes, repetitions); + } + + if (uaccess_flush == uaccess_flush_orig) { + uaccess_flush = !uaccess_flush_orig; + if (write_debugfs_file("powerpc/uaccess_flush", uaccess_flush) < 0) { + perror("error writing to powerpc/uaccess_flush debugfs file"); + return 1; + } + iter = repetitions; + l1d_misses_total = 0; + passes = 0; + goto again; + } + + perf_event_disable(fd); + close(fd); + + set_dscr(0); + + if (write_debugfs_file("powerpc/rfi_flush", rfi_flush_orig) < 0) { + perror("unable to restore original value of powerpc/rfi_flush debugfs file"); + return 1; + } + + if (write_debugfs_file("powerpc/entry_flush", entry_flush_orig) < 0) { + perror("unable to restore original value of powerpc/entry_flush debugfs file"); + return 1; + } + + if (write_debugfs_file("powerpc/uaccess_flush", uaccess_flush_orig) < 0) { + perror("unable to restore original value of powerpc/uaccess_flush debugfs file"); + return 1; + } + + return rc; +} + +int main(int argc, char *argv[]) +{ + return test_harness(uaccess_flush_test, "uaccess_flush_test"); +} diff --git a/tools/testing/selftests/powerpc/tm/tm-trap.c b/tools/testing/selftests/powerpc/tm/tm-trap.c index c75960af8018..11521077f915 100644 --- a/tools/testing/selftests/powerpc/tm/tm-trap.c +++ b/tools/testing/selftests/powerpc/tm/tm-trap.c @@ -66,7 +66,7 @@ void trap_signal_handler(int signo, siginfo_t *si, void *uc) /* Get thread endianness: extract bit LE from MSR */ thread_endianness = MSR_LE & ucp->uc_mcontext.gp_regs[PT_MSR]; - /*** + /* * Little-Endian Machine */ @@ -126,7 +126,7 @@ void trap_signal_handler(int signo, siginfo_t *si, void *uc) } } - /*** + /* * Big-Endian Machine */ diff --git a/tools/testing/selftests/timens/gettime_perf.c b/tools/testing/selftests/timens/gettime_perf.c index 7bf841a3967b..6b13dc277724 100644 --- a/tools/testing/selftests/timens/gettime_perf.c +++ b/tools/testing/selftests/timens/gettime_perf.c @@ -25,6 +25,12 @@ static void fill_function_pointers(void) if (!vdso) vdso = dlopen("linux-gate.so.1", RTLD_LAZY | RTLD_LOCAL | RTLD_NOLOAD); + if (!vdso) + vdso = dlopen("linux-vdso32.so.1", + RTLD_LAZY | RTLD_LOCAL | RTLD_NOLOAD); + if (!vdso) + vdso = dlopen("linux-vdso64.so.1", + RTLD_LAZY | RTLD_LOCAL | RTLD_NOLOAD); if (!vdso) { pr_err("[WARN]\tfailed to find vDSO\n"); return; @@ -32,6 +38,8 @@ static void fill_function_pointers(void) vdso_clock_gettime = (vgettime_t)dlsym(vdso, "__vdso_clock_gettime"); if (!vdso_clock_gettime) + vdso_clock_gettime = (vgettime_t)dlsym(vdso, "__kernel_clock_gettime"); + if (!vdso_clock_gettime) pr_err("Warning: failed to find clock_gettime in vDSO\n"); } |