From cb8a2ef0848ca80d67d6d56e2df757cfdf6b3355 Mon Sep 17 00:00:00 2001 From: Tiezhu Yang Date: Mon, 11 Mar 2024 22:23:47 +0800 Subject: LoongArch: Add ORC stack unwinder support The kernel CONFIG_UNWINDER_ORC option enables the ORC unwinder, which is similar in concept to a DWARF unwinder. The difference is that the format of the ORC data is much simpler than DWARF, which in turn allows the ORC unwinder to be much simpler and faster. The ORC data consists of unwind tables which are generated by objtool. After analyzing all the code paths of a .o file, it determines information about the stack state at each instruction address in the file and outputs that information to the .orc_unwind and .orc_unwind_ip sections. The per-object ORC sections are combined at link time and are sorted and post-processed at boot time. The unwinder uses the resulting data to correlate instruction addresses with their stack states at run time. Most of the logic are similar with x86, in order to get ra info before ra is saved into stack, add ra_reg and ra_offset into orc_entry. At the same time, modify some arch-specific code to silence the objtool warnings. Co-developed-by: Jinyang He Signed-off-by: Jinyang He Co-developed-by: Youling Tang Signed-off-by: Youling Tang Signed-off-by: Tiezhu Yang Signed-off-by: Huacai Chen --- arch/loongarch/Kconfig | 2 + arch/loongarch/Kconfig.debug | 11 + arch/loongarch/Makefile | 23 +- arch/loongarch/include/asm/Kbuild | 2 + arch/loongarch/include/asm/bug.h | 1 + arch/loongarch/include/asm/exception.h | 2 + arch/loongarch/include/asm/module.h | 7 + arch/loongarch/include/asm/orc_header.h | 18 + arch/loongarch/include/asm/orc_lookup.h | 31 ++ arch/loongarch/include/asm/orc_types.h | 58 ++++ arch/loongarch/include/asm/stackframe.h | 3 + arch/loongarch/include/asm/unwind.h | 20 +- arch/loongarch/include/asm/unwind_hints.h | 28 ++ arch/loongarch/kernel/Makefile | 4 + arch/loongarch/kernel/entry.S | 5 + arch/loongarch/kernel/fpu.S | 7 + arch/loongarch/kernel/genex.S | 6 + arch/loongarch/kernel/lbt.S | 3 + arch/loongarch/kernel/mcount_dyn.S | 6 + arch/loongarch/kernel/module.c | 22 +- arch/loongarch/kernel/relocate_kernel.S | 7 +- arch/loongarch/kernel/rethook_trampoline.S | 1 + arch/loongarch/kernel/setup.c | 2 + arch/loongarch/kernel/stacktrace.c | 1 + arch/loongarch/kernel/traps.c | 42 ++- arch/loongarch/kernel/unwind_orc.c | 528 +++++++++++++++++++++++++++++ arch/loongarch/kernel/vmlinux.lds.S | 3 + arch/loongarch/kvm/switch.S | 9 +- arch/loongarch/lib/clear_user.S | 3 + arch/loongarch/lib/copy_user.S | 3 + arch/loongarch/lib/memcpy.S | 3 + arch/loongarch/lib/memset.S | 3 + arch/loongarch/mm/tlb.c | 27 +- arch/loongarch/mm/tlbex.S | 9 + arch/loongarch/vdso/Makefile | 1 + 35 files changed, 861 insertions(+), 40 deletions(-) create mode 100644 arch/loongarch/include/asm/orc_header.h create mode 100644 arch/loongarch/include/asm/orc_lookup.h create mode 100644 arch/loongarch/include/asm/orc_types.h create mode 100644 arch/loongarch/include/asm/unwind_hints.h create mode 100644 arch/loongarch/kernel/unwind_orc.c (limited to 'arch') diff --git a/arch/loongarch/Kconfig b/arch/loongarch/Kconfig index 929f68926b34..8d6725115ac6 100644 --- a/arch/loongarch/Kconfig +++ b/arch/loongarch/Kconfig @@ -136,6 +136,7 @@ config LOONGARCH select HAVE_KVM select HAVE_MOD_ARCH_SPECIFIC select HAVE_NMI + select HAVE_OBJTOOL if AS_HAS_EXPLICIT_RELOCS select HAVE_PCI select HAVE_PERF_EVENTS select HAVE_PERF_REGS @@ -148,6 +149,7 @@ config LOONGARCH select HAVE_SAMPLE_FTRACE_DIRECT select HAVE_SAMPLE_FTRACE_DIRECT_MULTI select HAVE_SETUP_PER_CPU_AREA if NUMA + select HAVE_STACK_VALIDATION if HAVE_OBJTOOL select HAVE_STACKPROTECTOR select HAVE_SYSCALL_TRACEPOINTS select HAVE_TIF_NOHZ diff --git a/arch/loongarch/Kconfig.debug b/arch/loongarch/Kconfig.debug index 8d36aab53008..98d60630c3d4 100644 --- a/arch/loongarch/Kconfig.debug +++ b/arch/loongarch/Kconfig.debug @@ -26,4 +26,15 @@ config UNWINDER_PROLOGUE Some of the addresses it reports may be incorrect (but better than the Guess unwinder). +config UNWINDER_ORC + bool "ORC unwinder" + select OBJTOOL + help + This option enables the ORC (Oops Rewind Capability) unwinder for + unwinding kernel stack traces. It uses a custom data format which is + a simplified version of the DWARF Call Frame Information standard. + + Enabling this option will increase the kernel's runtime memory usage + by roughly 2-4MB, depending on your kernel config. + endchoice diff --git a/arch/loongarch/Makefile b/arch/loongarch/Makefile index 983aa2b1629a..e3bc02fb7fdc 100644 --- a/arch/loongarch/Makefile +++ b/arch/loongarch/Makefile @@ -26,6 +26,18 @@ endif 32bit-emul = elf32loongarch 64bit-emul = elf64loongarch +ifdef CONFIG_UNWINDER_ORC +orc_hash_h := arch/$(SRCARCH)/include/generated/asm/orc_hash.h +orc_hash_sh := $(srctree)/scripts/orc_hash.sh +targets += $(orc_hash_h) +quiet_cmd_orc_hash = GEN $@ + cmd_orc_hash = mkdir -p $(dir $@); \ + $(CONFIG_SHELL) $(orc_hash_sh) < $< > $@ +$(orc_hash_h): $(srctree)/arch/loongarch/include/asm/orc_types.h $(orc_hash_sh) FORCE + $(call if_changed,orc_hash) +archprepare: $(orc_hash_h) +endif + ifdef CONFIG_DYNAMIC_FTRACE KBUILD_CPPFLAGS += -DCC_USING_PATCHABLE_FUNCTION_ENTRY CC_FLAGS_FTRACE := -fpatchable-function-entry=2 @@ -72,8 +84,6 @@ KBUILD_CFLAGS_KERNEL += $(call cc-option,-mdirect-extern-access) KBUILD_CFLAGS_KERNEL += $(call cc-option,-fdirect-access-external-data) KBUILD_AFLAGS_MODULE += $(call cc-option,-fno-direct-access-external-data) KBUILD_CFLAGS_MODULE += $(call cc-option,-fno-direct-access-external-data) -KBUILD_AFLAGS_MODULE += $(call cc-option,-mno-relax) $(call cc-option,-Wa$(comma)-mno-relax) -KBUILD_CFLAGS_MODULE += $(call cc-option,-mno-relax) $(call cc-option,-Wa$(comma)-mno-relax) else cflags-y += $(call cc-option,-mno-explicit-relocs) KBUILD_AFLAGS_KERNEL += -Wa,-mla-global-with-pcrel @@ -82,6 +92,15 @@ KBUILD_AFLAGS_MODULE += -Wa,-mla-global-with-abs KBUILD_CFLAGS_MODULE += -fplt -Wa,-mla-global-with-abs,-mla-local-with-abs endif +KBUILD_AFLAGS += $(call cc-option,-mno-relax) $(call cc-option,-Wa$(comma)-mno-relax) +KBUILD_CFLAGS += $(call cc-option,-mno-relax) $(call cc-option,-Wa$(comma)-mno-relax) +KBUILD_AFLAGS += $(call cc-option,-mthin-add-sub) $(call cc-option,-Wa$(comma)-mthin-add-sub) +KBUILD_CFLAGS += $(call cc-option,-mthin-add-sub) $(call cc-option,-Wa$(comma)-mthin-add-sub) + +ifdef CONFIG_OBJTOOL +KBUILD_CFLAGS += -fno-jump-tables +endif + KBUILD_RUSTFLAGS_MODULE += -Crelocation-model=pic ifeq ($(CONFIG_RELOCATABLE),y) diff --git a/arch/loongarch/include/asm/Kbuild b/arch/loongarch/include/asm/Kbuild index 93783fa24f6e..a97c0edbb866 100644 --- a/arch/loongarch/include/asm/Kbuild +++ b/arch/loongarch/include/asm/Kbuild @@ -1,4 +1,6 @@ # SPDX-License-Identifier: GPL-2.0 +generated-y += orc_hash.h + generic-y += dma-contiguous.h generic-y += mcs_spinlock.h generic-y += parport.h diff --git a/arch/loongarch/include/asm/bug.h b/arch/loongarch/include/asm/bug.h index d4ca3ba25418..08388876ade4 100644 --- a/arch/loongarch/include/asm/bug.h +++ b/arch/loongarch/include/asm/bug.h @@ -44,6 +44,7 @@ do { \ instrumentation_begin(); \ __BUG_FLAGS(BUGFLAG_WARNING|(flags)); \ + annotate_reachable(); \ instrumentation_end(); \ } while (0) diff --git a/arch/loongarch/include/asm/exception.h b/arch/loongarch/include/asm/exception.h index af74a3fdcad1..c6d20736fd92 100644 --- a/arch/loongarch/include/asm/exception.h +++ b/arch/loongarch/include/asm/exception.h @@ -6,6 +6,8 @@ #include #include +extern void *exception_table[]; + void show_registers(struct pt_regs *regs); asmlinkage void cache_parity_error(void); diff --git a/arch/loongarch/include/asm/module.h b/arch/loongarch/include/asm/module.h index 2ecd82bb64e1..f33f3fd32ecc 100644 --- a/arch/loongarch/include/asm/module.h +++ b/arch/loongarch/include/asm/module.h @@ -6,6 +6,7 @@ #define _ASM_MODULE_H #include +#include #include #define RELA_STACK_DEPTH 16 @@ -21,6 +22,12 @@ struct mod_arch_specific { struct mod_section plt; struct mod_section plt_idx; +#ifdef CONFIG_UNWINDER_ORC + unsigned int num_orcs; + int *orc_unwind_ip; + struct orc_entry *orc_unwind; +#endif + /* For CONFIG_DYNAMIC_FTRACE */ struct plt_entry *ftrace_trampolines; }; diff --git a/arch/loongarch/include/asm/orc_header.h b/arch/loongarch/include/asm/orc_header.h new file mode 100644 index 000000000000..f9d509c3fd70 --- /dev/null +++ b/arch/loongarch/include/asm/orc_header.h @@ -0,0 +1,18 @@ +/* SPDX-License-Identifier: GPL-2.0-or-later */ + +#ifndef _ORC_HEADER_H +#define _ORC_HEADER_H + +#include +#include +#include + +/* + * The header is currently a 20-byte hash of the ORC entry definition; see + * scripts/orc_hash.sh. + */ +#define ORC_HEADER \ + __used __section(".orc_header") __aligned(4) \ + static const u8 orc_header[] = { ORC_HASH } + +#endif /* _ORC_HEADER_H */ diff --git a/arch/loongarch/include/asm/orc_lookup.h b/arch/loongarch/include/asm/orc_lookup.h new file mode 100644 index 000000000000..b02e6357def4 --- /dev/null +++ b/arch/loongarch/include/asm/orc_lookup.h @@ -0,0 +1,31 @@ +/* SPDX-License-Identifier: GPL-2.0-or-later */ +#ifndef _ORC_LOOKUP_H +#define _ORC_LOOKUP_H + +/* + * This is a lookup table for speeding up access to the .orc_unwind table. + * Given an input address offset, the corresponding lookup table entry + * specifies a subset of the .orc_unwind table to search. + * + * Each block represents the end of the previous range and the start of the + * next range. An extra block is added to give the last range an end. + * + * The block size should be a power of 2 to avoid a costly 'div' instruction. + * + * A block size of 256 was chosen because it roughly doubles unwinder + * performance while only adding ~5% to the ORC data footprint. + */ +#define LOOKUP_BLOCK_ORDER 8 +#define LOOKUP_BLOCK_SIZE (1 << LOOKUP_BLOCK_ORDER) + +#ifndef LINKER_SCRIPT + +extern unsigned int orc_lookup[]; +extern unsigned int orc_lookup_end[]; + +#define LOOKUP_START_IP (unsigned long)_stext +#define LOOKUP_STOP_IP (unsigned long)_etext + +#endif /* LINKER_SCRIPT */ + +#endif /* _ORC_LOOKUP_H */ diff --git a/arch/loongarch/include/asm/orc_types.h b/arch/loongarch/include/asm/orc_types.h new file mode 100644 index 000000000000..caf1f71a1057 --- /dev/null +++ b/arch/loongarch/include/asm/orc_types.h @@ -0,0 +1,58 @@ +/* SPDX-License-Identifier: GPL-2.0-or-later */ +#ifndef _ORC_TYPES_H +#define _ORC_TYPES_H + +#include + +/* + * The ORC_REG_* registers are base registers which are used to find other + * registers on the stack. + * + * ORC_REG_PREV_SP, also known as DWARF Call Frame Address (CFA), is the + * address of the previous frame: the caller's SP before it called the current + * function. + * + * ORC_REG_UNDEFINED means the corresponding register's value didn't change in + * the current frame. + * + * The most commonly used base registers are SP and FP -- which the previous SP + * is usually based on -- and PREV_SP and UNDEFINED -- which the previous FP is + * usually based on. + * + * The rest of the base registers are needed for special cases like entry code + * and GCC realigned stacks. + */ +#define ORC_REG_UNDEFINED 0 +#define ORC_REG_PREV_SP 1 +#define ORC_REG_SP 2 +#define ORC_REG_FP 3 +#define ORC_REG_MAX 4 + +#define ORC_TYPE_UNDEFINED 0 +#define ORC_TYPE_END_OF_STACK 1 +#define ORC_TYPE_CALL 2 +#define ORC_TYPE_REGS 3 +#define ORC_TYPE_REGS_PARTIAL 4 + +#ifndef __ASSEMBLY__ +/* + * This struct is more or less a vastly simplified version of the DWARF Call + * Frame Information standard. It contains only the necessary parts of DWARF + * CFI, simplified for ease of access by the in-kernel unwinder. It tells the + * unwinder how to find the previous SP and FP (and sometimes entry regs) on + * the stack for a given code address. Each instance of the struct corresponds + * to one or more code locations. + */ +struct orc_entry { + s16 sp_offset; + s16 fp_offset; + s16 ra_offset; + unsigned int sp_reg:4; + unsigned int fp_reg:4; + unsigned int ra_reg:4; + unsigned int type:3; + unsigned int signal:1; +}; +#endif /* __ASSEMBLY__ */ + +#endif /* _ORC_TYPES_H */ diff --git a/arch/loongarch/include/asm/stackframe.h b/arch/loongarch/include/asm/stackframe.h index 4fb1e6408b98..45b507a7b06f 100644 --- a/arch/loongarch/include/asm/stackframe.h +++ b/arch/loongarch/include/asm/stackframe.h @@ -13,6 +13,7 @@ #include #include #include +#include /* Make the addition of cfi info a little easier. */ .macro cfi_rel_offset reg offset=0 docfi=0 @@ -162,6 +163,7 @@ li.w t0, CSR_CRMD_WE csrxchg t0, t0, LOONGARCH_CSR_CRMD #endif + UNWIND_HINT_REGS .endm .macro SAVE_ALL docfi=0 @@ -219,6 +221,7 @@ .macro RESTORE_SP_AND_RET docfi=0 cfi_ld sp, PT_R3, \docfi + UNWIND_HINT_FUNC ertn .endm diff --git a/arch/loongarch/include/asm/unwind.h b/arch/loongarch/include/asm/unwind.h index b9dce87afd2e..40a6763c5aec 100644 --- a/arch/loongarch/include/asm/unwind.h +++ b/arch/loongarch/include/asm/unwind.h @@ -16,6 +16,7 @@ enum unwinder_type { UNWINDER_GUESS, UNWINDER_PROLOGUE, + UNWINDER_ORC, }; struct unwind_state { @@ -24,7 +25,7 @@ struct unwind_state { struct task_struct *task; bool first, error, reset; int graph_idx; - unsigned long sp, pc, ra; + unsigned long sp, fp, pc, ra; }; bool default_next_frame(struct unwind_state *state); @@ -61,14 +62,17 @@ static __always_inline void __unwind_start(struct unwind_state *state, state->sp = regs->regs[3]; state->pc = regs->csr_era; state->ra = regs->regs[1]; + state->fp = regs->regs[22]; } else if (task && task != current) { state->sp = thread_saved_fp(task); state->pc = thread_saved_ra(task); state->ra = 0; + state->fp = 0; } else { state->sp = (unsigned long)__builtin_frame_address(0); state->pc = (unsigned long)__builtin_return_address(0); state->ra = 0; + state->fp = 0; } state->task = task; get_stack_info(state->sp, state->task, &state->stack_info); @@ -77,6 +81,18 @@ static __always_inline void __unwind_start(struct unwind_state *state, static __always_inline unsigned long __unwind_get_return_address(struct unwind_state *state) { - return unwind_done(state) ? 0 : state->pc; + if (unwind_done(state)) + return 0; + + return __kernel_text_address(state->pc) ? state->pc : 0; } + +#ifdef CONFIG_UNWINDER_ORC +void unwind_init(void); +void unwind_module_init(struct module *mod, void *orc_ip, size_t orc_ip_size, void *orc, size_t orc_size); +#else +static inline void unwind_init(void) {} +static inline void unwind_module_init(struct module *mod, void *orc_ip, size_t orc_ip_size, void *orc, size_t orc_size) {} +#endif + #endif /* _ASM_UNWIND_H */ diff --git a/arch/loongarch/include/asm/unwind_hints.h b/arch/loongarch/include/asm/unwind_hints.h new file mode 100644 index 000000000000..a01086ad9dde --- /dev/null +++ b/arch/loongarch/include/asm/unwind_hints.h @@ -0,0 +1,28 @@ +/* SPDX-License-Identifier: GPL-2.0 */ +#ifndef _ASM_LOONGARCH_UNWIND_HINTS_H +#define _ASM_LOONGARCH_UNWIND_HINTS_H + +#include +#include + +#ifdef __ASSEMBLY__ + +.macro UNWIND_HINT_UNDEFINED + UNWIND_HINT type=UNWIND_HINT_TYPE_UNDEFINED +.endm + +.macro UNWIND_HINT_END_OF_STACK + UNWIND_HINT type=UNWIND_HINT_TYPE_END_OF_STACK +.endm + +.macro UNWIND_HINT_REGS + UNWIND_HINT sp_reg=ORC_REG_SP type=UNWIND_HINT_TYPE_REGS +.endm + +.macro UNWIND_HINT_FUNC + UNWIND_HINT sp_reg=ORC_REG_SP type=UNWIND_HINT_TYPE_CALL +.endm + +#endif /* __ASSEMBLY__ */ + +#endif /* _ASM_LOONGARCH_UNWIND_HINTS_H */ diff --git a/arch/loongarch/kernel/Makefile b/arch/loongarch/kernel/Makefile index 3c808c680370..3a7620b66bc6 100644 --- a/arch/loongarch/kernel/Makefile +++ b/arch/loongarch/kernel/Makefile @@ -3,6 +3,8 @@ # Makefile for the Linux/LoongArch kernel. # +OBJECT_FILES_NON_STANDARD_head.o := y + extra-y := vmlinux.lds obj-y += head.o cpu-probe.o cacheinfo.o env.o setup.o entry.o genex.o \ @@ -21,6 +23,7 @@ obj-$(CONFIG_ARCH_STRICT_ALIGN) += unaligned.o CFLAGS_module.o += $(call cc-option,-Wno-override-init,) CFLAGS_syscall.o += $(call cc-option,-Wno-override-init,) +CFLAGS_traps.o += $(call cc-option,-Wno-override-init,) CFLAGS_perf_event.o += $(call cc-option,-Wno-override-init,) ifdef CONFIG_FUNCTION_TRACER @@ -62,6 +65,7 @@ obj-$(CONFIG_CRASH_DUMP) += crash_dump.o obj-$(CONFIG_UNWINDER_GUESS) += unwind_guess.o obj-$(CONFIG_UNWINDER_PROLOGUE) += unwind_prologue.o +obj-$(CONFIG_UNWINDER_ORC) += unwind_orc.o obj-$(CONFIG_PERF_EVENTS) += perf_event.o perf_regs.o obj-$(CONFIG_HAVE_HW_BREAKPOINT) += hw_breakpoint.o diff --git a/arch/loongarch/kernel/entry.S b/arch/loongarch/kernel/entry.S index 1ec8e4c4cc2b..48e7e34e355e 100644 --- a/arch/loongarch/kernel/entry.S +++ b/arch/loongarch/kernel/entry.S @@ -14,11 +14,13 @@ #include #include #include +#include .text .cfi_sections .debug_frame .align 5 SYM_CODE_START(handle_syscall) + UNWIND_HINT_UNDEFINED csrrd t0, PERCPU_BASE_KS la.pcrel t1, kernelsp add.d t1, t1, t0 @@ -57,6 +59,7 @@ SYM_CODE_START(handle_syscall) cfi_st fp, PT_R22 SAVE_STATIC + UNWIND_HINT_REGS #ifdef CONFIG_KGDB li.w t1, CSR_CRMD_WE @@ -75,6 +78,7 @@ SYM_CODE_END(handle_syscall) _ASM_NOKPROBE(handle_syscall) SYM_CODE_START(ret_from_fork) + UNWIND_HINT_REGS bl schedule_tail # a0 = struct task_struct *prev move a0, sp bl syscall_exit_to_user_mode @@ -84,6 +88,7 @@ SYM_CODE_START(ret_from_fork) SYM_CODE_END(ret_from_fork) SYM_CODE_START(ret_from_kernel_thread) + UNWIND_HINT_REGS bl schedule_tail # a0 = struct task_struct *prev move a0, s1 jirl ra, s0, 0 diff --git a/arch/loongarch/kernel/fpu.S b/arch/loongarch/kernel/fpu.S index 4382e36ae3d4..69a85f2479fb 100644 --- a/arch/loongarch/kernel/fpu.S +++ b/arch/loongarch/kernel/fpu.S @@ -15,6 +15,7 @@ #include #include #include +#include #define FPU_REG_WIDTH 8 #define LSX_REG_WIDTH 16 @@ -526,3 +527,9 @@ SYM_FUNC_END(_restore_lasx_context) .L_fpu_fault: li.w a0, -EFAULT # failure jr ra + +#ifdef CONFIG_CPU_HAS_LBT +STACK_FRAME_NON_STANDARD _restore_fp +STACK_FRAME_NON_STANDARD _restore_lsx +STACK_FRAME_NON_STANDARD _restore_lasx +#endif diff --git a/arch/loongarch/kernel/genex.S b/arch/loongarch/kernel/genex.S index 2bb3aa2dcfcb..86d5d90ebefe 100644 --- a/arch/loongarch/kernel/genex.S +++ b/arch/loongarch/kernel/genex.S @@ -32,6 +32,7 @@ SYM_FUNC_START(__arch_cpu_idle) SYM_FUNC_END(__arch_cpu_idle) SYM_CODE_START(handle_vint) + UNWIND_HINT_UNDEFINED BACKUP_T0T1 SAVE_ALL la_abs t1, __arch_cpu_idle @@ -49,6 +50,7 @@ SYM_CODE_START(handle_vint) SYM_CODE_END(handle_vint) SYM_CODE_START(except_vec_cex) + UNWIND_HINT_UNDEFINED b cache_parity_error SYM_CODE_END(except_vec_cex) @@ -67,6 +69,7 @@ SYM_CODE_END(except_vec_cex) .macro BUILD_HANDLER exception handler prep .align 5 SYM_CODE_START(handle_\exception) + UNWIND_HINT_UNDEFINED 666: BACKUP_T0T1 SAVE_ALL @@ -77,7 +80,9 @@ SYM_CODE_END(except_vec_cex) 668: RESTORE_ALL_AND_RET SYM_CODE_END(handle_\exception) + .pushsection ".data", "aw", %progbits SYM_DATA(unwind_hint_\exception, .word 668b - 666b) + .popsection .endm BUILD_HANDLER ade ade badv @@ -94,6 +99,7 @@ SYM_CODE_END(except_vec_cex) BUILD_HANDLER reserved reserved none /* others */ SYM_CODE_START(handle_sys) + UNWIND_HINT_UNDEFINED la_abs t0, handle_syscall jr t0 SYM_CODE_END(handle_sys) diff --git a/arch/loongarch/kernel/lbt.S b/arch/loongarch/kernel/lbt.S index 9c75120a26d8..001f061d226a 100644 --- a/arch/loongarch/kernel/lbt.S +++ b/arch/loongarch/kernel/lbt.S @@ -11,6 +11,7 @@ #include #include #include +#include #define SCR_REG_WIDTH 8 @@ -153,3 +154,5 @@ SYM_FUNC_END(_restore_ftop_context) .L_lbt_fault: li.w a0, -EFAULT # failure jr ra + +STACK_FRAME_NON_STANDARD _restore_ftop_context diff --git a/arch/loongarch/kernel/mcount_dyn.S b/arch/loongarch/kernel/mcount_dyn.S index 482aa553aa2d..0c65cf09110c 100644 --- a/arch/loongarch/kernel/mcount_dyn.S +++ b/arch/loongarch/kernel/mcount_dyn.S @@ -73,6 +73,7 @@ SYM_FUNC_START(ftrace_stub) SYM_FUNC_END(ftrace_stub) SYM_CODE_START(ftrace_common) + UNWIND_HINT_UNDEFINED PTR_ADDI a0, ra, -8 /* arg0: ip */ move a1, t0 /* arg1: parent_ip */ la.pcrel t1, function_trace_op @@ -113,12 +114,14 @@ ftrace_common_return: SYM_CODE_END(ftrace_common) SYM_CODE_START(ftrace_caller) + UNWIND_HINT_UNDEFINED ftrace_regs_entry allregs=0 b ftrace_common SYM_CODE_END(ftrace_caller) #ifdef CONFIG_DYNAMIC_FTRACE_WITH_REGS SYM_CODE_START(ftrace_regs_caller) + UNWIND_HINT_UNDEFINED ftrace_regs_entry allregs=1 b ftrace_common SYM_CODE_END(ftrace_regs_caller) @@ -126,6 +129,7 @@ SYM_CODE_END(ftrace_regs_caller) #ifdef CONFIG_FUNCTION_GRAPH_TRACER SYM_CODE_START(ftrace_graph_caller) + UNWIND_HINT_UNDEFINED PTR_L a0, sp, PT_ERA PTR_ADDI a0, a0, -8 /* arg0: self_addr */ PTR_ADDI a1, sp, PT_R1 /* arg1: parent */ @@ -134,6 +138,7 @@ SYM_CODE_START(ftrace_graph_caller) SYM_CODE_END(ftrace_graph_caller) SYM_CODE_START(return_to_handler) + UNWIND_HINT_UNDEFINED /* Save return value regs */ PTR_ADDI sp, sp, -FGRET_REGS_SIZE PTR_S a0, sp, FGRET_REGS_A0 @@ -155,6 +160,7 @@ SYM_CODE_END(return_to_handler) #ifdef CONFIG_DYNAMIC_FTRACE_WITH_DIRECT_CALLS SYM_CODE_START(ftrace_stub_direct_tramp) + UNWIND_HINT_UNDEFINED jr t0 SYM_CODE_END(ftrace_stub_direct_tramp) #endif /* CONFIG_DYNAMIC_FTRACE_WITH_DIRECT_CALLS */ diff --git a/arch/loongarch/kernel/module.c b/arch/loongarch/kernel/module.c index b13b2858fe39..c7d0338d12c1 100644 --- a/arch/loongarch/kernel/module.c +++ b/arch/loongarch/kernel/module.c @@ -20,6 +20,7 @@ #include #include #include +#include static int rela_stack_push(s64 stack_value, s64 *rela_stack, size_t *rela_stack_top) { @@ -515,15 +516,28 @@ static void module_init_ftrace_plt(const Elf_Ehdr *hdr, int module_finalize(const Elf_Ehdr *hdr, const Elf_Shdr *sechdrs, struct module *mod) { - const Elf_Shdr *s, *se; const char *secstrs = (void *)hdr + sechdrs[hdr->e_shstrndx].sh_offset; + const Elf_Shdr *s, *alt = NULL, *orc = NULL, *orc_ip = NULL, *ftrace = NULL; - for (s = sechdrs, se = sechdrs + hdr->e_shnum; s < se; s++) { + for (s = sechdrs; s < sechdrs + hdr->e_shnum; s++) { if (!strcmp(".altinstructions", secstrs + s->sh_name)) - apply_alternatives((void *)s->sh_addr, (void *)s->sh_addr + s->sh_size); + alt = s; + if (!strcmp(".orc_unwind", secstrs + s->sh_name)) + orc = s; + if (!strcmp(".orc_unwind_ip", secstrs + s->sh_name)) + orc_ip = s; if (!strcmp(".ftrace_trampoline", secstrs + s->sh_name)) - module_init_ftrace_plt(hdr, s, mod); + ftrace = s; } + if (alt) + apply_alternatives((void *)alt->sh_addr, (void *)alt->sh_addr + alt->sh_size); + + if (orc && orc_ip) + unwind_module_init(mod, (void *)orc_ip->sh_addr, orc_ip->sh_size, (void *)orc->sh_addr, orc->sh_size); + + if (ftrace) + module_init_ftrace_plt(hdr, ftrace, mod); + return 0; } diff --git a/arch/loongarch/kernel/relocate_kernel.S b/arch/loongarch/kernel/relocate_kernel.S index f49f6b053763..84e6de2fd973 100644 --- a/arch/loongarch/kernel/relocate_kernel.S +++ b/arch/loongarch/kernel/relocate_kernel.S @@ -15,6 +15,7 @@ #include SYM_CODE_START(relocate_new_kernel) + UNWIND_HINT_UNDEFINED /* * a0: EFI boot flag for the new kernel * a1: Command line pointer for the new kernel @@ -90,6 +91,7 @@ SYM_CODE_END(relocate_new_kernel) * then start at the entry point from LOONGARCH_IOCSR_MBUF0. */ SYM_CODE_START(kexec_smp_wait) + UNWIND_HINT_UNDEFINED 1: li.w t0, 0x100 /* wait for init loop */ 2: addi.w t0, t0, -1 /* limit mailbox access */ bnez t0, 2b @@ -106,6 +108,5 @@ SYM_CODE_END(kexec_smp_wait) relocate_new_kernel_end: -SYM_DATA_START(relocate_new_kernel_size) - PTR relocate_new_kernel_end - relocate_new_kernel -SYM_DATA_END(relocate_new_kernel_size) + .section ".data" +SYM_DATA(relocate_new_kernel_size, .long relocate_new_kernel_end - relocate_new_kernel) diff --git a/arch/loongarch/kernel/rethook_trampoline.S b/arch/loongarch/kernel/rethook_trampoline.S index bd5772c96338..d4ceb2fa2a5c 100644 --- a/arch/loongarch/kernel/rethook_trampoline.S +++ b/arch/loongarch/kernel/rethook_trampoline.S @@ -76,6 +76,7 @@ .endm SYM_CODE_START(arch_rethook_trampoline) + UNWIND_HINT_UNDEFINED addi.d sp, sp, -PT_SIZE save_all_base_regs diff --git a/arch/loongarch/kernel/setup.c b/arch/loongarch/kernel/setup.c index 634ef17fd38b..7bf9afaeea00 100644 --- a/arch/loongarch/kernel/setup.c +++ b/arch/loongarch/kernel/setup.c @@ -47,6 +47,7 @@ #include #include #include +#include #define SMBIOS_BIOSSIZE_OFFSET 0x09 #define SMBIOS_BIOSEXTERN_OFFSET 0x13 @@ -587,6 +588,7 @@ static void __init prefill_possible_map(void) void __init setup_arch(char **cmdline_p) { cpu_probe(); + unwind_init(); init_environ(); efi_init(); diff --git a/arch/loongarch/kernel/stacktrace.c b/arch/loongarch/kernel/stacktrace.c index f623feb2129f..eaec82e02c92 100644 --- a/arch/loongarch/kernel/stacktrace.c +++ b/arch/loongarch/kernel/stacktrace.c @@ -29,6 +29,7 @@ void arch_stack_walk(stack_trace_consume_fn consume_entry, void *cookie, regs->csr_era = thread_saved_ra(task); } regs->regs[1] = 0; + regs->regs[22] = 0; } for (unwind_start(&state, task, regs); diff --git a/arch/loongarch/kernel/traps.c b/arch/loongarch/kernel/traps.c index aebfc3733a76..f9f4eb00c92e 100644 --- a/arch/loongarch/kernel/traps.c +++ b/arch/loongarch/kernel/traps.c @@ -53,6 +53,32 @@ #include "access-helper.h" +void *exception_table[EXCCODE_INT_START] = { + [0 ... EXCCODE_INT_START - 1] = handle_reserved, + + [EXCCODE_TLBI] = handle_tlb_load, + [EXCCODE_TLBL] = handle_tlb_load, + [EXCCODE_TLBS] = handle_tlb_store, + [EXCCODE_TLBM] = handle_tlb_modify, + [EXCCODE_TLBNR] = handle_tlb_protect, + [EXCCODE_TLBNX] = handle_tlb_protect, + [EXCCODE_TLBPE] = handle_tlb_protect, + [EXCCODE_ADE] = handle_ade, + [EXCCODE_ALE] = handle_ale, + [EXCCODE_BCE] = handle_bce, + [EXCCODE_SYS] = handle_sys, + [EXCCODE_BP] = handle_bp, + [EXCCODE_INE] = handle_ri, + [EXCCODE_IPE] = handle_ri, + [EXCCODE_FPDIS] = handle_fpu, + [EXCCODE_LSXDIS] = handle_lsx, + [EXCCODE_LASXDIS] = handle_lasx, + [EXCCODE_FPE] = handle_fpe, + [EXCCODE_WATCH] = handle_watch, + [EXCCODE_BTDIS] = handle_lbt, +}; +EXPORT_SYMBOL_GPL(exception_table); + static void show_backtrace(struct task_struct *task, const struct pt_regs *regs, const char *loglvl, bool user) { @@ -1150,19 +1176,9 @@ void __init trap_init(void) for (i = EXCCODE_INT_START; i <= EXCCODE_INT_END; i++) set_handler(i * VECSIZE, handle_vint, VECSIZE); - set_handler(EXCCODE_ADE * VECSIZE, handle_ade, VECSIZE); - set_handler(EXCCODE_ALE * VECSIZE, handle_ale, VECSIZE); - set_handler(EXCCODE_BCE * VECSIZE, handle_bce, VECSIZE); - set_handler(EXCCODE_SYS * VECSIZE, handle_sys, VECSIZE); - set_handler(EXCCODE_BP * VECSIZE, handle_bp, VECSIZE); - set_handler(EXCCODE_INE * VECSIZE, handle_ri, VECSIZE); - set_handler(EXCCODE_IPE * VECSIZE, handle_ri, VECSIZE); - set_handler(EXCCODE_FPDIS * VECSIZE, handle_fpu, VECSIZE); - set_handler(EXCCODE_LSXDIS * VECSIZE, handle_lsx, VECSIZE); - set_handler(EXCCODE_LASXDIS * VECSIZE, handle_lasx, VECSIZE); - set_handler(EXCCODE_FPE * VECSIZE, handle_fpe, VECSIZE); - set_handler(EXCCODE_BTDIS * VECSIZE, handle_lbt, VECSIZE); - set_handler(EXCCODE_WATCH * VECSIZE, handle_watch, VECSIZE); + /* Set exception vector handler */ + for (i = EXCCODE_ADE; i <= EXCCODE_BTDIS; i++) + set_handler(i * VECSIZE, exception_table[i], VECSIZE); cache_error_setup(); diff --git a/arch/loongarch/kernel/unwind_orc.c b/arch/loongarch/kernel/unwind_orc.c new file mode 100644 index 000000000000..b25722876331 --- /dev/null +++ b/arch/loongarch/kernel/unwind_orc.c @@ -0,0 +1,528 @@ +// SPDX-License-Identifier: GPL-2.0-only +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include + +ORC_HEADER; + +#define orc_warn(fmt, ...) \ + printk_deferred_once(KERN_WARNING "WARNING: " fmt, ##__VA_ARGS__) + +extern int __start_orc_unwind_ip[]; +extern int __stop_orc_unwind_ip[]; +extern struct orc_entry __start_orc_unwind[]; +extern struct orc_entry __stop_orc_unwind[]; + +static bool orc_init __ro_after_init; +static unsigned int lookup_num_blocks __ro_after_init; + +/* Fake frame pointer entry -- used as a fallback for generated code */ +static struct orc_entry orc_fp_entry = { + .sp_reg = ORC_REG_FP, + .sp_offset = 16, + .fp_reg = ORC_REG_PREV_SP, + .fp_offset = -16, + .ra_reg = ORC_REG_PREV_SP, + .ra_offset = -8, + .type = ORC_TYPE_CALL +}; + +/* + * If we crash with IP==0, the last successfully executed instruction + * was probably an indirect function call with a NULL function pointer, + * and we don't have unwind information for NULL. + * This hardcoded ORC entry for IP==0 allows us to unwind from a NULL function + * pointer into its parent and then continue normally from there. + */ +static struct orc_entry orc_null_entry = { + .sp_reg = ORC_REG_SP, + .sp_offset = sizeof(long), + .fp_reg = ORC_REG_UNDEFINED, + .type = ORC_TYPE_CALL +}; + +static inline unsigned long orc_ip(const int *ip) +{ + return (unsigned long)ip + *ip; +} + +static struct orc_entry *__orc_find(int *ip_table, struct orc_entry *u_table, + unsigned int num_entries, unsigned long ip) +{ + int *first = ip_table; + int *mid = first, *found = first; + int *last = ip_table + num_entries - 1; + + if (!num_entries) + return NULL; + + /* + * Do a binary range search to find the rightmost duplicate of a given + * starting address. Some entries are section terminators which are + * "weak" entries for ensuring there are no gaps. They should be + * ignored when they conflict with a real entry. + */ + while (first <= last) { + mid = first + ((last - first) / 2); + + if (orc_ip(mid) <= ip) { + found = mid; + first = mid + 1; + } else + last = mid - 1; + } + + return u_table + (found - ip_table); +} + +#ifdef CONFIG_MODULES +static struct orc_entry *orc_module_find(unsigned long ip) +{ + struct module *mod; + + mod = __module_address(ip); + if (!mod || !mod->arch.orc_unwind || !mod->arch.orc_unwind_ip) + return NULL; + + return __orc_find(mod->arch.orc_unwind_ip, mod->arch.orc_unwind, mod->arch.num_orcs, ip); +} +#else +static struct orc_entry *orc_module_find(unsigned long ip) +{ + return NULL; +} +#endif + +#ifdef CONFIG_DYNAMIC_FTRACE +static struct orc_entry *orc_find(unsigned long ip); + +/* + * Ftrace dynamic trampolines do not have orc entries of their own. + * But they are copies of the ftrace entries that are static and + * defined in ftrace_*.S, which do have orc entries. + * + * If the unwinder comes across a ftrace trampoline, then find the + * ftrace function that was used to create it, and use that ftrace + * function's orc entry, as the placement of the return code in + * the stack will be identical. + */ +static struct orc_entry *orc_ftrace_find(unsigned long ip) +{ + struct ftrace_ops *ops; + unsigned long tramp_addr, offset; + + ops = ftrace_ops_trampoline(ip); + if (!ops) + return NULL; + + /* Set tramp_addr to the start of the code copied by the trampoline */ + if (ops->flags & FTRACE_OPS_FL_SAVE_REGS) + tramp_addr = (unsigned long)ftrace_regs_caller; + else + tramp_addr = (unsigned long)ftrace_caller; + + /* Now place tramp_addr to the location within the trampoline ip is at */ + offset = ip - ops->trampoline; + tramp_addr += offset; + + /* Prevent unlikely recursion */ + if (ip == tramp_addr) + return NULL; + + return orc_find(tramp_addr); +} +#else +static struct orc_entry *orc_ftrace_find(unsigned long ip) +{ + return NULL; +} +#endif + +static struct orc_entry *orc_find(unsigned long ip) +{ + static struct orc_entry *orc; + + if (ip == 0) + return &orc_null_entry; + + /* For non-init vmlinux addresses, use the fast lookup table: */ + if (ip >= LOOKUP_START_IP && ip < LOOKUP_STOP_IP) { + unsigned int idx, start, stop; + + idx = (ip - LOOKUP_START_IP) / LOOKUP_BLOCK_SIZE; + + if (unlikely((idx >= lookup_num_blocks-1))) { + orc_warn("WARNING: bad lookup idx: idx=%u num=%u ip=%pB\n", + idx, lookup_num_blocks, (void *)ip); + return NULL; + } + + start = orc_lookup[idx]; + stop = orc_lookup[idx + 1] + 1; + + if (unlikely((__start_orc_unwind + start >= __stop_orc_unwind) || + (__start_orc_unwind + stop > __stop_orc_unwind))) { + orc_warn("WARNING: bad lookup value: idx=%u num=%u start=%u stop=%u ip=%pB\n", + idx, lookup_num_blocks, start, stop, (void *)ip); + return NULL; + } + + return __orc_find(__start_orc_unwind_ip + start, + __start_orc_unwind + start, stop - start, ip); + } + + /* vmlinux .init slow lookup: */ + if (is_kernel_inittext(ip)) + return __orc_find(__start_orc_unwind_ip, __start_orc_unwind, + __stop_orc_unwind_ip - __start_orc_unwind_ip, ip); + + /* Module lookup: */ + orc = orc_module_find(ip); + if (orc) + return orc; + + return orc_ftrace_find(ip); +} + +#ifdef CONFIG_MODULES + +static DEFINE_MUTEX(sort_mutex); +static int *cur_orc_ip_table = __start_orc_unwind_ip; +static struct orc_entry *cur_orc_table = __start_orc_unwind; + +static void orc_sort_swap(void *_a, void *_b, int size) +{ + int delta = _b - _a; + int *a = _a, *b = _b, tmp; + struct orc_entry *orc_a, *orc_b; + + /* Swap the .orc_unwind_ip entries: */ + tmp = *a; + *a = *b + delta; + *b = tmp - delta; + + /* Swap the corresponding .orc_unwind entries: */ + orc_a = cur_orc_table + (a - cur_orc_ip_table); + orc_b = cur_orc_table + (b - cur_orc_ip_table); + swap(*orc_a, *orc_b); +} + +static int orc_sort_cmp(const void *_a, const void *_b) +{ + const int *a = _a, *b = _b; + unsigned long a_val = orc_ip(a); + unsigned long b_val = orc_ip(b); + struct orc_entry *orc_a; + + if (a_val > b_val) + return 1; + if (a_val < b_val) + return -1; + + /* + * The "weak" section terminator entries need to always be first + * to ensure the lookup code skips them in favor of real entries. + * These terminator entries exist to handle any gaps created by + * whitelisted .o files which didn't get objtool generation. + */ + orc_a = cur_orc_table + (a - cur_orc_ip_table); + + return orc_a->type == ORC_TYPE_UNDEFINED ? -1 : 1; +} + +void unwind_module_init(struct module *mod, void *_orc_ip, size_t orc_ip_size, + void *_orc, size_t orc_size) +{ + int *orc_ip = _orc_ip; + struct orc_entry *orc = _orc; + unsigned int num_entries = orc_ip_size / sizeof(int); + + WARN_ON_ONCE(orc_ip_size % sizeof(int) != 0 || + orc_size % sizeof(*orc) != 0 || + num_entries != orc_size / sizeof(*orc)); + + /* + * The 'cur_orc_*' globals allow the orc_sort_swap() callback to + * associate an .orc_unwind_ip table entry with its corresponding + * .orc_unwind entry so they can both be swapped. + */ + mutex_lock(&sort_mutex); + cur_orc_ip_table = orc_ip; + cur_orc_table = orc; + sort(orc_ip, num_entries, sizeof(int), orc_sort_cmp, orc_sort_swap); + mutex_unlock(&sort_mutex); + + mod->arch.orc_unwind_ip = orc_ip; + mod->arch.orc_unwind = orc; + mod->arch.num_orcs = num_entries; +} +#endif + +void __init unwind_init(void) +{ + int i; + size_t orc_size = (void *)__stop_orc_unwind - (void *)__start_orc_unwind; + size_t orc_ip_size = (void *)__stop_orc_unwind_ip - (void *)__start_orc_unwind_ip; + size_t num_entries = orc_ip_size / sizeof(int); + struct orc_entry *orc; + + if (!num_entries || orc_ip_size % sizeof(int) != 0 || + orc_size % sizeof(struct orc_entry) != 0 || + num_entries != orc_size / sizeof(struct orc_entry)) { + orc_warn("WARNING: Bad or missing .orc_unwind table. Disabling unwinder.\n"); + return; + } + + /* + * Note, the orc_unwind and orc_unwind_ip tables were already + * sorted at build time via the 'sorttable' tool. + * It's ready for binary search straight away, no need to sort it. + */ + + /* Initialize the fast lookup table: */ + lookup_num_blocks = orc_lookup_end - orc_lookup; + for (i = 0; i < lookup_num_blocks-1; i++) { + orc = __orc_find(__start_orc_unwind_ip, __start_orc_unwind, + num_entries, LOOKUP_START_IP + (LOOKUP_BLOCK_SIZE * i)); + if (!orc) { + orc_warn("WARNING: Corrupt .orc_unwind table. Disabling unwinder.\n"); + return; + } + + orc_lookup[i] = orc - __start_orc_unwind; + } + + /* Initialize the ending block: */ + orc = __orc_find(__start_orc_unwind_ip, __start_orc_unwind, num_entries, LOOKUP_STOP_IP); + if (!orc) { + orc_warn("WARNING: Corrupt .orc_unwind table. Disabling unwinder.\n"); + return; + } + orc_lookup[lookup_num_blocks-1] = orc - __start_orc_unwind; + + orc_init = true; +} + +static inline bool on_stack(struct stack_info *info, unsigned long addr, size_t len) +{ + unsigned long begin = info->begin; + unsigned long end = info->end; + + return (info->type != STACK_TYPE_UNKNOWN && + addr >= begin && addr < end && addr + len > begin && addr + len <= end); +} + +static bool stack_access_ok(struct unwind_state *state, unsigned long addr, size_t len) +{ + struct stack_info *info = &state->stack_info; + + if (on_stack(info, addr, len)) + return true; + + return !get_stack_info(addr, state->task, info) && on_stack(info, addr, len); +} + +unsigned long unwind_get_return_address(struct unwind_state *state) +{ + return __unwind_get_return_address(state); +} +EXPORT_SYMBOL_GPL(unwind_get_return_address); + +void unwind_start(struct unwind_state *state, struct task_struct *task, + struct pt_regs *regs) +{ + __unwind_start(state, task, regs); + state->type = UNWINDER_ORC; + if (!unwind_done(state) && !__kernel_text_address(state->pc)) + unwind_next_frame(state); +} +EXPORT_SYMBOL_GPL(unwind_start); + +static bool is_entry_func(unsigned long addr) +{ + extern u32 kernel_entry; + extern u32 kernel_entry_end; + + return addr >= (unsigned long)&kernel_entry && addr < (unsigned long)&kernel_entry_end; +} + +static inline unsigned long bt_address(unsigned long ra) +{ + extern unsigned long eentry; + + if (__kernel_text_address(ra)) + return ra; + + if (__module_text_address(ra)) + return ra; + + if (ra >= eentry && ra < eentry + EXCCODE_INT_END * VECSIZE) { + unsigned long func; + unsigned long type = (ra - eentry) / VECSIZE; + unsigned long offset = (ra - eentry) % VECSIZE; + + switch (type) { + case 0 ... EXCCODE_INT_START - 1: + func = (unsigned long)exception_table[type]; + break; + case EXCCODE_INT_START ... EXCCODE_INT_END: + func = (unsigned long)handle_vint; + break; + default: + func = (unsigned long)handle_reserved; + break; + } + + return func + offset; + } + + return ra; +} + +bool unwind_next_frame(struct unwind_state *state) +{ + unsigned long *p, pc; + struct pt_regs *regs; + struct orc_entry *orc; + struct stack_info *info = &state->stack_info; + + if (unwind_done(state)) + return false; + + /* Don't let modules unload while we're reading their ORC data. */ + preempt_disable(); + + if (is_entry_func(state->pc)) + goto end; + + orc = orc_find(state->pc); + if (!orc) { + /* + * As a fallback, try to assume this code uses a frame pointer. + * This is useful for generated code, like BPF, which ORC + * doesn't know about. This is just a guess, so the rest of + * the unwind is no longer considered reliable. + */ + orc = &orc_fp_entry; + state->error = true; + } else { + if (orc->type == ORC_TYPE_UNDEFINED) + goto err; + + if (orc->type == ORC_TYPE_END_OF_STACK) + goto end; + } + + switch (orc->sp_reg) { + case ORC_REG_SP: + if (info->type == STACK_TYPE_IRQ && state->sp == info->end) + orc->type = ORC_TYPE_REGS; + else + state->sp = state->sp + orc->sp_offset; + break; + case ORC_REG_FP: + state->sp = state->fp; + break; + default: + orc_warn("unknown SP base reg %d at %pB\n", orc->sp_reg, (void *)state->pc); + goto err; + } + + switch (orc->fp_reg) { + case ORC_REG_PREV_SP: + p = (unsigned long *)(state->sp + orc->fp_offset); + if (!stack_access_ok(state, (unsigned long)p, sizeof(unsigned long))) + goto err; + + state->fp = *p; + break; + case ORC_REG_UNDEFINED: + /* Nothing. */ + break; + default: + orc_warn("unknown FP base reg %d at %pB\n", orc->fp_reg, (void *)state->pc); + goto err; + } + + switch (orc->type) { + case ORC_TYPE_CALL: + if (orc->ra_reg == ORC_REG_PREV_SP) { + p = (unsigned long *)(state->sp + orc->ra_offset); + if (!stack_access_ok(state, (unsigned long)p, sizeof(unsigned long))) + goto err; + + pc = unwind_graph_addr(state, *p, state->sp); + pc -= LOONGARCH_INSN_SIZE; + } else if (orc->ra_reg == ORC_REG_UNDEFINED) { + if (!state->ra || state->ra == state->pc) + goto err; + + pc = unwind_graph_addr(state, state->ra, state->sp); + pc -= LOONGARCH_INSN_SIZE; + state->ra = 0; + } else { + orc_warn("unknown ra base reg %d at %pB\n", orc->ra_reg, (void *)state->pc); + goto err; + } + break; + case ORC_TYPE_REGS: + if (info->type == STACK_TYPE_IRQ && state->sp == info->end) + regs = (struct pt_regs *)info->next_sp; + else + regs = (struct pt_regs *)state->sp; + + if (!stack_access_ok(state, (unsigned long)regs, sizeof(*regs))) + goto err; + + if ((info->end == (unsigned long)regs + sizeof(*regs)) && + !regs->regs[3] && !regs->regs[1]) + goto end; + + if (user_mode(regs)) + goto end; + + pc = regs->csr_era; + if (!__kernel_text_address(pc)) + goto err; + + state->sp = regs->regs[3]; + state->ra = regs->regs[1]; + state->fp = regs->regs[22]; + get_stack_info(state->sp, state->task, info); + + break; + default: + orc_warn("unknown .orc_unwind entry type %d at %pB\n", orc->type, (void *)state->pc); + goto err; + } + + state->pc = bt_address(pc); + if (!state->pc) { + pr_err("cannot find unwind pc at %pK\n", (void *)pc); + goto err; + } + + if (!__kernel_text_address(state->pc)) + goto err; + + preempt_enable(); + return true; + +err: + state->error = true; + +end: + preempt_enable(); + state->stack_info.type = STACK_TYPE_UNKNOWN; + return false; +} +EXPORT_SYMBOL_GPL(unwind_next_frame); diff --git a/arch/loongarch/kernel/vmlinux.lds.S b/arch/loongarch/kernel/vmlinux.lds.S index a5d0cd2035da..e8e97dbf9ca4 100644 --- a/arch/loongarch/kernel/vmlinux.lds.S +++ b/arch/loongarch/kernel/vmlinux.lds.S @@ -2,6 +2,7 @@ #include #include #include +#include #define PAGE_SIZE _PAGE_SIZE #define RO_EXCEPTION_TABLE_ALIGN 4 @@ -122,6 +123,8 @@ SECTIONS } #endif + ORC_UNWIND_TABLE + .sdata : { *(.sdata) } diff --git a/arch/loongarch/kvm/switch.S b/arch/loongarch/kvm/switch.S index ba976509bfe8..1fcc4b7eda32 100644 --- a/arch/loongarch/kvm/switch.S +++ b/arch/loongarch/kvm/switch.S @@ -8,7 +8,7 @@ #include #include #include -#include +#include #define HGPR_OFFSET(x) (PT_R0 + 8*x) #define GGPR_OFFSET(x) (KVM_ARCH_GGPR + 8*x) @@ -112,6 +112,7 @@ .text .cfi_sections .debug_frame SYM_CODE_START(kvm_exc_entry) + UNWIND_HINT_UNDEFINED csrwr a2, KVM_TEMP_KS csrrd a2, KVM_VCPU_KS addi.d a2, a2, KVM_VCPU_ARCH @@ -279,3 +280,9 @@ SYM_FUNC_END(kvm_restore_lasx) .section ".rodata" SYM_DATA(kvm_exception_size, .quad kvm_exc_entry_end - kvm_exc_entry) SYM_DATA(kvm_enter_guest_size, .quad kvm_enter_guest_end - kvm_enter_guest) + +#ifdef CONFIG_CPU_HAS_LBT +STACK_FRAME_NON_STANDARD kvm_restore_fpu +STACK_FRAME_NON_STANDARD kvm_restore_lsx +STACK_FRAME_NON_STANDARD kvm_restore_lasx +#endif diff --git a/arch/loongarch/lib/clear_user.S b/arch/loongarch/lib/clear_user.S index be741544e62b..7a0db643b286 100644 --- a/arch/loongarch/lib/clear_user.S +++ b/arch/loongarch/lib/clear_user.S @@ -10,6 +10,7 @@ #include #include #include +#include SYM_FUNC_START(__clear_user) /* @@ -204,3 +205,5 @@ SYM_FUNC_START(__clear_user_fast) _asm_extable 28b, .Lsmall_fixup _asm_extable 29b, .Lexit SYM_FUNC_END(__clear_user_fast) + +STACK_FRAME_NON_STANDARD __clear_user_fast diff --git a/arch/loongarch/lib/copy_user.S b/arch/loongarch/lib/copy_user.S index feec3d362803..095ce9181c6c 100644 --- a/arch/loongarch/lib/copy_user.S +++ b/arch/loongarch/lib/copy_user.S @@ -10,6 +10,7 @@ #include #include #include +#include SYM_FUNC_START(__copy_user) /* @@ -278,3 +279,5 @@ SYM_FUNC_START(__copy_user_fast) _asm_extable 58b, .Lexit _asm_extable 59b, .Lexit SYM_FUNC_END(__copy_user_fast) + +STACK_FRAME_NON_STANDARD __copy_user_fast diff --git a/arch/loongarch/lib/memcpy.S b/arch/loongarch/lib/memcpy.S index fa1148878d2b..9517a2f961af 100644 --- a/arch/loongarch/lib/memcpy.S +++ b/arch/loongarch/lib/memcpy.S @@ -9,6 +9,7 @@ #include #include #include +#include .section .noinstr.text, "ax" @@ -197,3 +198,5 @@ SYM_FUNC_START(__memcpy_fast) jr ra SYM_FUNC_END(__memcpy_fast) _ASM_NOKPROBE(__memcpy_fast) + +STACK_FRAME_NON_STANDARD __memcpy_small diff --git a/arch/loongarch/lib/memset.S b/arch/loongarch/lib/memset.S index 06d3ca54cbfe..df3846620553 100644 --- a/arch/loongarch/lib/memset.S +++ b/arch/loongarch/lib/memset.S @@ -9,6 +9,7 @@ #include #include #include +#include .macro fill_to_64 r0 bstrins.d \r0, \r0, 15, 8 @@ -166,3 +167,5 @@ SYM_FUNC_START(__memset_fast) jr ra SYM_FUNC_END(__memset_fast) _ASM_NOKPROBE(__memset_fast) + +STACK_FRAME_NON_STANDARD __memset_fast diff --git a/arch/loongarch/mm/tlb.c b/arch/loongarch/mm/tlb.c index 0b95d32b30c9..5ac9beb5f093 100644 --- a/arch/loongarch/mm/tlb.c +++ b/arch/loongarch/mm/tlb.c @@ -9,8 +9,9 @@ #include #include -#include #include +#include +#include #include #include #include @@ -266,24 +267,20 @@ static void setup_tlb_handler(int cpu) setup_ptwalker(); local_flush_tlb_all(); + if (cpu_has_ptw) { + exception_table[EXCCODE_TLBI] = handle_tlb_load_ptw; + exception_table[EXCCODE_TLBL] = handle_tlb_load_ptw; + exception_table[EXCCODE_TLBS] = handle_tlb_store_ptw; + exception_table[EXCCODE_TLBM] = handle_tlb_modify_ptw; + } + /* The tlb handlers are generated only once */ if (cpu == 0) { memcpy((void *)tlbrentry, handle_tlb_refill, 0x80); local_flush_icache_range(tlbrentry, tlbrentry + 0x80); - if (!cpu_has_ptw) { - set_handler(EXCCODE_TLBI * VECSIZE, handle_tlb_load, VECSIZE); - set_handler(EXCCODE_TLBL * VECSIZE, handle_tlb_load, VECSIZE); - set_handler(EXCCODE_TLBS * VECSIZE, handle_tlb_store, VECSIZE); - set_handler(EXCCODE_TLBM * VECSIZE, handle_tlb_modify, VECSIZE); - } else { - set_handler(EXCCODE_TLBI * VECSIZE, handle_tlb_load_ptw, VECSIZE); - set_handler(EXCCODE_TLBL * VECSIZE, handle_tlb_load_ptw, VECSIZE); - set_handler(EXCCODE_TLBS * VECSIZE, handle_tlb_store_ptw, VECSIZE); - set_handler(EXCCODE_TLBM * VECSIZE, handle_tlb_modify_ptw, VECSIZE); - } - set_handler(EXCCODE_TLBNR * VECSIZE, handle_tlb_protect, VECSIZE); - set_handler(EXCCODE_TLBNX * VECSIZE, handle_tlb_protect, VECSIZE); - set_handler(EXCCODE_TLBPE * VECSIZE, handle_tlb_protect, VECSIZE); + + for (int i = EXCCODE_TLBL; i <= EXCCODE_TLBPE; i++) + set_handler(i * VECSIZE, exception_table[i], VECSIZE); } else { int vec_sz __maybe_unused; void *addr __maybe_unused; diff --git a/arch/loongarch/mm/tlbex.S b/arch/loongarch/mm/tlbex.S index d5d682f3d29f..a44387b838af 100644 --- a/arch/loongarch/mm/tlbex.S +++ b/arch/loongarch/mm/tlbex.S @@ -18,6 +18,7 @@ .macro tlb_do_page_fault, write SYM_CODE_START(tlb_do_page_fault_\write) + UNWIND_HINT_UNDEFINED SAVE_ALL csrrd a2, LOONGARCH_CSR_BADV move a0, sp @@ -32,6 +33,7 @@ tlb_do_page_fault 1 SYM_CODE_START(handle_tlb_protect) + UNWIND_HINT_UNDEFINED BACKUP_T0T1 SAVE_ALL move a0, sp @@ -44,6 +46,7 @@ SYM_CODE_START(handle_tlb_protect) SYM_CODE_END(handle_tlb_protect) SYM_CODE_START(handle_tlb_load) + UNWIND_HINT_UNDEFINED csrwr t0, EXCEPTION_KS0 csrwr t1, EXCEPTION_KS1 csrwr ra, EXCEPTION_KS2 @@ -190,6 +193,7 @@ nopage_tlb_load: SYM_CODE_END(handle_tlb_load) SYM_CODE_START(handle_tlb_load_ptw) + UNWIND_HINT_UNDEFINED csrwr t0, LOONGARCH_CSR_KS0 csrwr t1, LOONGARCH_CSR_KS1 la_abs t0, tlb_do_page_fault_0 @@ -197,6 +201,7 @@ SYM_CODE_START(handle_tlb_load_ptw) SYM_CODE_END(handle_tlb_load_ptw) SYM_CODE_START(handle_tlb_store) + UNWIND_HINT_UNDEFINED csrwr t0, EXCEPTION_KS0 csrwr t1, EXCEPTION_KS1 csrwr ra, EXCEPTION_KS2 @@ -346,6 +351,7 @@ nopage_tlb_store: SYM_CODE_END(handle_tlb_store) SYM_CODE_START(handle_tlb_store_ptw) + UNWIND_HINT_UNDEFINED csrwr t0, LOONGARCH_CSR_KS0 csrwr t1, LOONGARCH_CSR_KS1 la_abs t0, tlb_do_page_fault_1 @@ -353,6 +359,7 @@ SYM_CODE_START(handle_tlb_store_ptw) SYM_CODE_END(handle_tlb_store_ptw) SYM_CODE_START(handle_tlb_modify) + UNWIND_HINT_UNDEFINED csrwr t0, EXCEPTION_KS0 csrwr t1, EXCEPTION_KS1 csrwr ra, EXCEPTION_KS2 @@ -500,6 +507,7 @@ nopage_tlb_modify: SYM_CODE_END(handle_tlb_modify) SYM_CODE_START(handle_tlb_modify_ptw) + UNWIND_HINT_UNDEFINED csrwr t0, LOONGARCH_CSR_KS0 csrwr t1, LOONGARCH_CSR_KS1 la_abs t0, tlb_do_page_fault_1 @@ -507,6 +515,7 @@ SYM_CODE_START(handle_tlb_modify_ptw) SYM_CODE_END(handle_tlb_modify_ptw) SYM_CODE_START(handle_tlb_refill) + UNWIND_HINT_UNDEFINED csrwr t0, LOONGARCH_CSR_TLBRSAVE csrrd t0, LOONGARCH_CSR_PGD lddir t0, t0, 3 diff --git a/arch/loongarch/vdso/Makefile b/arch/loongarch/vdso/Makefile index f597cd08a96b..75c6726382c3 100644 --- a/arch/loongarch/vdso/Makefile +++ b/arch/loongarch/vdso/Makefile @@ -4,6 +4,7 @@ KASAN_SANITIZE := n UBSAN_SANITIZE := n KCOV_INSTRUMENT := n +OBJECT_FILES_NON_STANDARD := y # Include the generic Makefile to check the built vdso. include $(srctree)/lib/vdso/Makefile -- cgit v1.2.3-70-g09d2 From 199cc14cb4f1cb8668be45f67af41755ed5f0175 Mon Sep 17 00:00:00 2001 From: Jinyang He Date: Mon, 11 Mar 2024 22:23:47 +0800 Subject: LoongArch: Add kernel livepatching support The arch-specified function ftrace_regs_set_instruction_pointer() has been implemented in arch/loongarch/include/asm/ftrace.h, so here only implement arch_stack_walk_reliable() function. Here are the test logs: [root@linux fedora]# cat /proc/cmdline BOOT_IMAGE=/vmlinuz-6.8.0-rc2 root=/dev/sda3 [root@linux fedora]# modprobe livepatch-sample [root@linux fedora]# cat /proc/cmdline this has been live patched [root@linux fedora]# echo 0 > /sys/kernel/livepatch/livepatch_sample/enabled [root@linux fedora]# rmmod livepatch_sample [root@linux fedora]# cat /proc/cmdline BOOT_IMAGE=/vmlinuz-6.8.0-rc2 root=/dev/sda3 [root@linux fedora]# dmesg -t | tail -5 livepatch: enabling patch 'livepatch_sample' livepatch: 'livepatch_sample': starting patching transition livepatch: 'livepatch_sample': patching complete livepatch: 'livepatch_sample': starting unpatching transition livepatch: 'livepatch_sample': unpatching complete Signed-off-by: Jinyang He Signed-off-by: Tiezhu Yang Signed-off-by: Huacai Chen --- arch/loongarch/Kconfig | 4 ++++ arch/loongarch/include/asm/thread_info.h | 2 ++ arch/loongarch/kernel/stacktrace.c | 40 ++++++++++++++++++++++++++++++++ 3 files changed, 46 insertions(+) (limited to 'arch') diff --git a/arch/loongarch/Kconfig b/arch/loongarch/Kconfig index 8d6725115ac6..99a0a15ce5f7 100644 --- a/arch/loongarch/Kconfig +++ b/arch/loongarch/Kconfig @@ -134,6 +134,7 @@ config LOONGARCH select HAVE_KPROBES_ON_FTRACE select HAVE_KRETPROBES select HAVE_KVM + select HAVE_LIVEPATCH select HAVE_MOD_ARCH_SPECIFIC select HAVE_NMI select HAVE_OBJTOOL if AS_HAS_EXPLICIT_RELOCS @@ -143,6 +144,7 @@ config LOONGARCH select HAVE_PERF_USER_STACK_DUMP select HAVE_PREEMPT_DYNAMIC_KEY select HAVE_REGS_AND_STACK_ACCESS_API + select HAVE_RELIABLE_STACKTRACE if UNWINDER_ORC select HAVE_RETHOOK select HAVE_RSEQ select HAVE_RUST @@ -636,6 +638,8 @@ config RANDOMIZE_BASE_MAX_OFFSET This is limited by the size of the lower address memory, 256MB. +source "kernel/livepatch/Kconfig" + endmenu config ARCH_SELECT_MEMORY_MODEL diff --git a/arch/loongarch/include/asm/thread_info.h b/arch/loongarch/include/asm/thread_info.h index 8cb653d49a54..8bf0e6f51546 100644 --- a/arch/loongarch/include/asm/thread_info.h +++ b/arch/loongarch/include/asm/thread_info.h @@ -86,6 +86,7 @@ register unsigned long current_stack_pointer __asm__("$sp"); #define TIF_LASX_CTX_LIVE 18 /* LASX context must be preserved */ #define TIF_USEDLBT 19 /* LBT was used by this task this quantum (SMP) */ #define TIF_LBT_CTX_LIVE 20 /* LBT context must be preserved */ +#define TIF_PATCH_PENDING 21 /* pending live patching update */ #define _TIF_SIGPENDING (1<regs[3] = (unsigned long)__builtin_frame_address(0); + regs->csr_era = (unsigned long)__builtin_return_address(0); + } else { + regs->regs[3] = thread_saved_fp(task); + regs->csr_era = thread_saved_ra(task); + } + regs->regs[1] = 0; + regs->regs[22] = 0; + + for (unwind_start(&state, task, regs); + !unwind_done(&state) && !unwind_error(&state); unwind_next_frame(&state)) { + addr = unwind_get_return_address(&state); + + /* + * A NULL or invalid return address probably means there's some + * generated code which __kernel_text_address() doesn't know about. + */ + if (!addr) + return -EINVAL; + + if (!consume_entry(cookie, addr)) + return -EINVAL; + } + + /* Check for stack corruption */ + if (unwind_error(&state)) + return -EINVAL; + + return 0; +} + static int copy_stack_frame(unsigned long fp, struct stack_frame *frame) { -- cgit v1.2.3-70-g09d2 From 8b5db5e5337ecb0a7954c39020861867a85b6206 Mon Sep 17 00:00:00 2001 From: Huacai Chen Date: Tue, 19 Mar 2024 15:50:27 +0800 Subject: LoongArch: Select ARCH_HAS_CURRENT_STACK_POINTER in Kconfig LoongArch has implemented the current_stack_pointer macro, so select ARCH_HAS_CURRENT_STACK_POINTER in Kconfig. This will let it be used in non-arch places (like HARDENED_USERCOPY). Reviewed-by: Guo Ren Signed-off-by: Huacai Chen --- arch/loongarch/Kconfig | 1 + 1 file changed, 1 insertion(+) (limited to 'arch') diff --git a/arch/loongarch/Kconfig b/arch/loongarch/Kconfig index 99a0a15ce5f7..6df2b421895a 100644 --- a/arch/loongarch/Kconfig +++ b/arch/loongarch/Kconfig @@ -15,6 +15,7 @@ config LOONGARCH select ARCH_ENABLE_THP_MIGRATION if TRANSPARENT_HUGEPAGE select ARCH_HAS_ACPI_TABLE_UPGRADE if ACPI select ARCH_HAS_CPU_FINALIZE_INIT + select ARCH_HAS_CURRENT_STACK_POINTER select ARCH_HAS_FORTIFY_SOURCE select ARCH_HAS_KCOV select ARCH_HAS_NMI_SAFE_THIS_CPU_OPS -- cgit v1.2.3-70-g09d2 From f48ad26e5e57016b447461f22ecf7c3ed8337353 Mon Sep 17 00:00:00 2001 From: Huacai Chen Date: Tue, 19 Mar 2024 15:50:34 +0800 Subject: LoongArch: Select HAVE_ARCH_USERFAULTFD_MINOR in Kconfig This allocates the VM flag needed to support the userfaultfd minor fault functionality. See commit 7677f7fd8be7665 ("userfaultfd: add minor fault registration mode") for more information. Signed-off-by: Huacai Chen --- arch/loongarch/Kconfig | 1 + 1 file changed, 1 insertion(+) (limited to 'arch') diff --git a/arch/loongarch/Kconfig b/arch/loongarch/Kconfig index 6df2b421895a..526a88598fbf 100644 --- a/arch/loongarch/Kconfig +++ b/arch/loongarch/Kconfig @@ -105,6 +105,7 @@ config LOONGARCH select HAVE_ARCH_SECCOMP_FILTER select HAVE_ARCH_TRACEHOOK select HAVE_ARCH_TRANSPARENT_HUGEPAGE + select HAVE_ARCH_USERFAULTFD_MINOR if USERFAULTFD select HAVE_ASM_MODVERSIONS select HAVE_CONTEXT_TRACKING_USER select HAVE_C_RECORDMCOUNT -- cgit v1.2.3-70-g09d2 From c87e12e0e8c1241410e758e181ca6bf23efa5b5b Mon Sep 17 00:00:00 2001 From: Huacai Chen Date: Tue, 19 Mar 2024 15:50:34 +0800 Subject: LoongArch: Change __my_cpu_offset definition to avoid mis-optimization From GCC commit 3f13154553f8546a ("df-scan: remove ad-hoc handling of global regs in asms"), global registers will no longer be forced to add to the def-use chain. Then current_thread_info(), current_stack_pointer and __my_cpu_offset may be lifted out of the loop because they are no longer treated as "volatile variables". This optimization is still correct for the current_thread_info() and current_stack_pointer usages because they are associated to a thread. However it is wrong for __my_cpu_offset because it is associated to a CPU rather than a thread: if the thread migrates to a different CPU in the loop, __my_cpu_offset should be changed. Change __my_cpu_offset definition to treat it as a "volatile variable", in order to avoid such a mis-optimization. Cc: stable@vger.kernel.org Reported-by: Xiaotian Wu Reported-by: Miao Wang Signed-off-by: Xing Li Signed-off-by: Hongchen Zhang Signed-off-by: Rui Wang Signed-off-by: Huacai Chen --- arch/loongarch/include/asm/percpu.h | 7 ++++++- 1 file changed, 6 insertions(+), 1 deletion(-) (limited to 'arch') diff --git a/arch/loongarch/include/asm/percpu.h b/arch/loongarch/include/asm/percpu.h index 9b36ac003f89..8f290e5546cf 100644 --- a/arch/loongarch/include/asm/percpu.h +++ b/arch/loongarch/include/asm/percpu.h @@ -29,7 +29,12 @@ static inline void set_my_cpu_offset(unsigned long off) __my_cpu_offset = off; csr_write64(off, PERCPU_BASE_KS); } -#define __my_cpu_offset __my_cpu_offset + +#define __my_cpu_offset \ +({ \ + __asm__ __volatile__("":"+r"(__my_cpu_offset)); \ + __my_cpu_offset; \ +}) #define PERCPU_OP(op, asm_op, c_op) \ static __always_inline unsigned long __percpu_##op(void *ptr, \ -- cgit v1.2.3-70-g09d2 From d42ab9af605ee406ec339e5e80a1c3a708637fd6 Mon Sep 17 00:00:00 2001 From: Max Kellermann Date: Tue, 19 Mar 2024 15:50:34 +0800 Subject: LoongArch: Move {dmw,tlb}_virt_to_page() definition to page.h These two functions are implemented in pgtable.c, and they are needed only by the virt_to_page() macro in page.h. Having the prototypes in pgtable.h causes a circular dependency between page.h and pgtable.h, because the virt_to_page() macro in page.h needs pgtable.h for these two functions, while pgtable.h needs various definitions from page.h (e.g. pte_t and pgt_t). Let's avoid this circular dependency by moving the function prototypes to page.h. Signed-off-by: Max Kellermann Signed-off-by: Huacai Chen --- arch/loongarch/include/asm/page.h | 3 +++ arch/loongarch/include/asm/pgtable.h | 3 --- 2 files changed, 3 insertions(+), 3 deletions(-) (limited to 'arch') diff --git a/arch/loongarch/include/asm/page.h b/arch/loongarch/include/asm/page.h index 63f137ce82a4..2391fcd18908 100644 --- a/arch/loongarch/include/asm/page.h +++ b/arch/loongarch/include/asm/page.h @@ -83,6 +83,9 @@ typedef struct { unsigned long pgprot; } pgprot_t; #define pfn_to_kaddr(pfn) __va((pfn) << PAGE_SHIFT) #define sym_to_pfn(x) __phys_to_pfn(__pa_symbol(x)) +struct page *dmw_virt_to_page(unsigned long kaddr); +struct page *tlb_virt_to_page(unsigned long kaddr); + #define virt_to_pfn(kaddr) PFN_DOWN(PHYSADDR(kaddr)) #define virt_to_page(kaddr) \ diff --git a/arch/loongarch/include/asm/pgtable.h b/arch/loongarch/include/asm/pgtable.h index 8b5df1bbf9e9..af3acdf3481a 100644 --- a/arch/loongarch/include/asm/pgtable.h +++ b/arch/loongarch/include/asm/pgtable.h @@ -363,9 +363,6 @@ static inline void pte_clear(struct mm_struct *mm, unsigned long addr, pte_t *pt extern pgd_t swapper_pg_dir[]; extern pgd_t invalid_pg_dir[]; -struct page *dmw_virt_to_page(unsigned long kaddr); -struct page *tlb_virt_to_page(unsigned long kaddr); - /* * The following only work if pte_present() is true. * Undefined behaviour if not.. -- cgit v1.2.3-70-g09d2 From 82bf60a6fed806d57e284a1fb40dbc1ad5097611 Mon Sep 17 00:00:00 2001 From: Huacai Chen Date: Tue, 19 Mar 2024 15:50:34 +0800 Subject: LoongArch: Remove superfluous flush_dcache_page() definition LoongArch doesn't have cache aliases, so flush_dcache_page() is a no-op. There is a generic implementation for this case in include/asm-generic/ cacheflush.h. So remove the superfluous flush_dcache_page() definition, which also silences such build warnings: In file included from crypto/scompress.c:12: include/crypto/scatterwalk.h: In function 'scatterwalk_pagedone': include/crypto/scatterwalk.h:76:30: warning: variable 'page' set but not used [-Wunused-but-set-variable] 76 | struct page *page; | ^~~~ crypto/scompress.c: In function 'scomp_acomp_comp_decomp': >> crypto/scompress.c:174:38: warning: unused variable 'dst_page' [-Wunused-variable] 174 | struct page *dst_page = sg_page(req->dst); | Reported-by: kernel test robot Closes: https://lore.kernel.org/oe-kbuild-all/202403091614.NeUw5zcv-lkp@intel.com/ Suggested-by: Barry Song Acked-by: Barry Song Signed-off-by: Huacai Chen --- arch/loongarch/include/asm/cacheflush.h | 3 --- 1 file changed, 3 deletions(-) (limited to 'arch') diff --git a/arch/loongarch/include/asm/cacheflush.h b/arch/loongarch/include/asm/cacheflush.h index 80bd74106985..f8754d08a31a 100644 --- a/arch/loongarch/include/asm/cacheflush.h +++ b/arch/loongarch/include/asm/cacheflush.h @@ -37,8 +37,6 @@ void local_flush_icache_range(unsigned long start, unsigned long end); #define flush_icache_range local_flush_icache_range #define flush_icache_user_range local_flush_icache_range -#define ARCH_IMPLEMENTS_FLUSH_DCACHE_PAGE 0 - #define flush_cache_all() do { } while (0) #define flush_cache_mm(mm) do { } while (0) #define flush_cache_dup_mm(mm) do { } while (0) @@ -47,7 +45,6 @@ void local_flush_icache_range(unsigned long start, unsigned long end); #define flush_cache_vmap(start, end) do { } while (0) #define flush_cache_vunmap(start, end) do { } while (0) #define flush_icache_user_page(vma, page, addr, len) do { } while (0) -#define flush_dcache_page(page) do { } while (0) #define flush_dcache_mmap_lock(mapping) do { } while (0) #define flush_dcache_mmap_unlock(mapping) do { } while (0) -- cgit v1.2.3-70-g09d2 From 9c68ece8b2a5c5ff9b2fcaea923dd73efeb174cd Mon Sep 17 00:00:00 2001 From: Huacai Chen Date: Tue, 19 Mar 2024 15:50:34 +0800 Subject: LoongArch: Define the __io_aw() hook as mmiowb() Commit fb24ea52f78e0d595852e ("drivers: Remove explicit invocations of mmiowb()") remove all mmiowb() in drivers, but it says: "NOTE: mmiowb() has only ever guaranteed ordering in conjunction with spin_unlock(). However, pairing each mmiowb() removal in this patch with the corresponding call to spin_unlock() is not at all trivial, so there is a small chance that this change may regress any drivers incorrectly relying on mmiowb() to order MMIO writes between CPUs using lock-free synchronisation." The mmio in radeon_ring_commit() is protected by a mutex rather than a spinlock, but in the mutex fastpath it behaves similar to spinlock. We can add mmiowb() calls in the radeon driver but the maintainer says he doesn't like such a workaround, and radeon is not the only example of mutex protected mmio. So we should extend the mmiowb tracking system from spinlock to mutex, and maybe other locking primitives. This is not easy and error prone, so we solve it in the architectural code, by simply defining the __io_aw() hook as mmiowb(). And we no longer need to override queued_spin_unlock() so use the generic definition. Without this, we get such an error when run 'glxgears' on weak ordering architectures such as LoongArch: radeon 0000:04:00.0: ring 0 stalled for more than 10324msec radeon 0000:04:00.0: ring 3 stalled for more than 10240msec radeon 0000:04:00.0: GPU lockup (current fence id 0x000000000001f412 last fence id 0x000000000001f414 on ring 3) radeon 0000:04:00.0: GPU lockup (current fence id 0x000000000000f940 last fence id 0x000000000000f941 on ring 0) radeon 0000:04:00.0: scheduling IB failed (-35). [drm:radeon_gem_va_ioctl [radeon]] *ERROR* Couldn't update BO_VA (-35) radeon 0000:04:00.0: scheduling IB failed (-35). [drm:radeon_gem_va_ioctl [radeon]] *ERROR* Couldn't update BO_VA (-35) radeon 0000:04:00.0: scheduling IB failed (-35). [drm:radeon_gem_va_ioctl [radeon]] *ERROR* Couldn't update BO_VA (-35) radeon 0000:04:00.0: scheduling IB failed (-35). [drm:radeon_gem_va_ioctl [radeon]] *ERROR* Couldn't update BO_VA (-35) radeon 0000:04:00.0: scheduling IB failed (-35). [drm:radeon_gem_va_ioctl [radeon]] *ERROR* Couldn't update BO_VA (-35) radeon 0000:04:00.0: scheduling IB failed (-35). [drm:radeon_gem_va_ioctl [radeon]] *ERROR* Couldn't update BO_VA (-35) radeon 0000:04:00.0: scheduling IB failed (-35). [drm:radeon_gem_va_ioctl [radeon]] *ERROR* Couldn't update BO_VA (-35) Link: https://lore.kernel.org/dri-devel/29df7e26-d7a8-4f67-b988-44353c4270ac@amd.com/T/#t Link: https://lore.kernel.org/linux-arch/20240301130532.3953167-1-chenhuacai@loongson.cn/T/#t Cc: stable@vger.kernel.org Signed-off-by: Huacai Chen --- arch/loongarch/include/asm/Kbuild | 1 + arch/loongarch/include/asm/io.h | 2 ++ arch/loongarch/include/asm/qspinlock.h | 18 ------------------ 3 files changed, 3 insertions(+), 18 deletions(-) delete mode 100644 arch/loongarch/include/asm/qspinlock.h (limited to 'arch') diff --git a/arch/loongarch/include/asm/Kbuild b/arch/loongarch/include/asm/Kbuild index a97c0edbb866..2dbec7853ae8 100644 --- a/arch/loongarch/include/asm/Kbuild +++ b/arch/loongarch/include/asm/Kbuild @@ -6,6 +6,7 @@ generic-y += mcs_spinlock.h generic-y += parport.h generic-y += early_ioremap.h generic-y += qrwlock.h +generic-y += qspinlock.h generic-y += rwsem.h generic-y += segment.h generic-y += user.h diff --git a/arch/loongarch/include/asm/io.h b/arch/loongarch/include/asm/io.h index c486c2341b66..4a8adcca329b 100644 --- a/arch/loongarch/include/asm/io.h +++ b/arch/loongarch/include/asm/io.h @@ -71,6 +71,8 @@ extern void __memcpy_fromio(void *to, const volatile void __iomem *from, size_t #define memcpy_fromio(a, c, l) __memcpy_fromio((a), (c), (l)) #define memcpy_toio(c, a, l) __memcpy_toio((c), (a), (l)) +#define __io_aw() mmiowb() + #include #define ARCH_HAS_VALID_PHYS_ADDR_RANGE diff --git a/arch/loongarch/include/asm/qspinlock.h b/arch/loongarch/include/asm/qspinlock.h deleted file mode 100644 index 34f43f8ad591..000000000000 --- a/arch/loongarch/include/asm/qspinlock.h +++ /dev/null @@ -1,18 +0,0 @@ -/* SPDX-License-Identifier: GPL-2.0 */ -#ifndef _ASM_QSPINLOCK_H -#define _ASM_QSPINLOCK_H - -#include - -#define queued_spin_unlock queued_spin_unlock - -static inline void queued_spin_unlock(struct qspinlock *lock) -{ - compiletime_assert_atomic_type(lock->locked); - c_sync(); - WRITE_ONCE(lock->locked, 0); -} - -#include - -#endif /* _ASM_QSPINLOCK_H */ -- cgit v1.2.3-70-g09d2 From fea1c949f6ca5059e12de00d0483645debc5b206 Mon Sep 17 00:00:00 2001 From: Yuli Wang Date: Tue, 19 Mar 2024 15:50:34 +0800 Subject: LoongArch/crypto: Clean up useless assignment operations The LoongArch CRC32 hw acceleration is based on arch/mips/crypto/ crc32-mips.c. While the MIPS code supports both MIPS32 and MIPS64, but LoongArch32 lacks the CRC instruction. As a result, the line "len -= sizeof(u32)" is unnecessary. Removing it can make context code style more unified and improve code readability. Cc: stable@vger.kernel.org Reviewed-by: WANG Xuerui Suggested-by: Wentao Guan Signed-off-by: Yuli Wang Signed-off-by: Huacai Chen --- arch/loongarch/crypto/crc32-loongarch.c | 2 -- 1 file changed, 2 deletions(-) (limited to 'arch') diff --git a/arch/loongarch/crypto/crc32-loongarch.c b/arch/loongarch/crypto/crc32-loongarch.c index a49e507af38c..3eebea3a7b47 100644 --- a/arch/loongarch/crypto/crc32-loongarch.c +++ b/arch/loongarch/crypto/crc32-loongarch.c @@ -44,7 +44,6 @@ static u32 crc32_loongarch_hw(u32 crc_, const u8 *p, unsigned int len) CRC32(crc, value, w); p += sizeof(u32); - len -= sizeof(u32); } if (len & sizeof(u16)) { @@ -80,7 +79,6 @@ static u32 crc32c_loongarch_hw(u32 crc_, const u8 *p, unsigned int len) CRC32C(crc, value, w); p += sizeof(u32); - len -= sizeof(u32); } if (len & sizeof(u16)) { -- cgit v1.2.3-70-g09d2