From ab7fa6b05ebbe2a8cc07114014f14fd2326fb80a Mon Sep 17 00:00:00 2001 From: Jisheng Zhang Date: Wed, 24 May 2023 00:54:59 +0800 Subject: riscv: move options to keep entries sorted Recently, some commits break the entries order. Properly move their locations to keep entries sorted. Signed-off-by: Jisheng Zhang Reviewed-by: Conor Dooley Acked-by: Guo Ren Tested-by: Nick Desaulniers # build Link: https://lore.kernel.org/r/20230523165502.2592-2-jszhang@kernel.org Signed-off-by: Palmer Dabbelt --- arch/riscv/Kconfig | 12 ++++++------ 1 file changed, 6 insertions(+), 6 deletions(-) (limited to 'arch') diff --git a/arch/riscv/Kconfig b/arch/riscv/Kconfig index 348c0fa1fc8c..8f55aa4aae34 100644 --- a/arch/riscv/Kconfig +++ b/arch/riscv/Kconfig @@ -101,6 +101,11 @@ config RISCV select HAVE_CONTEXT_TRACKING_USER select HAVE_DEBUG_KMEMLEAK select HAVE_DMA_CONTIGUOUS if MMU + select HAVE_DYNAMIC_FTRACE if !XIP_KERNEL && MMU && (CLANG_SUPPORTS_DYNAMIC_FTRACE || GCC_SUPPORTS_DYNAMIC_FTRACE) + select HAVE_DYNAMIC_FTRACE_WITH_REGS if HAVE_DYNAMIC_FTRACE + select HAVE_FTRACE_MCOUNT_RECORD if !XIP_KERNEL + select HAVE_FUNCTION_GRAPH_TRACER + select HAVE_FUNCTION_TRACER if !XIP_KERNEL && !PREEMPTION select HAVE_EBPF_JIT if MMU select HAVE_FUNCTION_ARG_ACCESS_API select HAVE_FUNCTION_ERROR_INJECTION @@ -110,7 +115,6 @@ config RISCV select HAVE_KPROBES if !XIP_KERNEL select HAVE_KPROBES_ON_FTRACE if !XIP_KERNEL select HAVE_KRETPROBES if !XIP_KERNEL - select HAVE_RETHOOK if !XIP_KERNEL select HAVE_MOVE_PMD select HAVE_MOVE_PUD select HAVE_PCI @@ -119,6 +123,7 @@ config RISCV select HAVE_PERF_USER_STACK_DUMP select HAVE_POSIX_CPU_TIMERS_TASK_WORK select HAVE_REGS_AND_STACK_ACCESS_API + select HAVE_RETHOOK if !XIP_KERNEL select HAVE_RSEQ select HAVE_STACKPROTECTOR select HAVE_SYSCALL_TRACEPOINTS @@ -142,11 +147,6 @@ config RISCV select TRACE_IRQFLAGS_SUPPORT select UACCESS_MEMCPY if !MMU select ZONE_DMA32 if 64BIT - select HAVE_DYNAMIC_FTRACE if !XIP_KERNEL && MMU && (CLANG_SUPPORTS_DYNAMIC_FTRACE || GCC_SUPPORTS_DYNAMIC_FTRACE) - select HAVE_DYNAMIC_FTRACE_WITH_REGS if HAVE_DYNAMIC_FTRACE - select HAVE_FTRACE_MCOUNT_RECORD if !XIP_KERNEL - select HAVE_FUNCTION_GRAPH_TRACER - select HAVE_FUNCTION_TRACER if !XIP_KERNEL && !PREEMPTION config CLANG_SUPPORTS_DYNAMIC_FTRACE def_bool CC_IS_CLANG -- cgit v1.2.3-70-g09d2 From cead443a306262a16056d84d63b5a6a10908eb62 Mon Sep 17 00:00:00 2001 From: Jisheng Zhang Date: Wed, 24 May 2023 00:55:00 +0800 Subject: riscv: vmlinux-xip.lds.S: remove .alternative section ALTERNATIVE mechanism can't work on XIP, and this is also reflected by below Kconfig dependency: RISCV_ALTERNATIVE ... depends on !XIP_KERNEL ... So there's no .alternative section at all for XIP case, remove it. Signed-off-by: Jisheng Zhang Reviewed-by: Conor Dooley Reviewed-by: Guo Ren Tested-by: Nick Desaulniers # build Link: https://lore.kernel.org/r/20230523165502.2592-3-jszhang@kernel.org Signed-off-by: Palmer Dabbelt --- arch/riscv/kernel/vmlinux-xip.lds.S | 6 ------ 1 file changed, 6 deletions(-) (limited to 'arch') diff --git a/arch/riscv/kernel/vmlinux-xip.lds.S b/arch/riscv/kernel/vmlinux-xip.lds.S index eab9edc3b631..50767647fbc6 100644 --- a/arch/riscv/kernel/vmlinux-xip.lds.S +++ b/arch/riscv/kernel/vmlinux-xip.lds.S @@ -98,12 +98,6 @@ SECTIONS __soc_builtin_dtb_table_end = .; } - . = ALIGN(8); - .alternative : { - __alt_start = .; - *(.alternative) - __alt_end = .; - } __init_end = .; . = ALIGN(16); -- cgit v1.2.3-70-g09d2 From c828856b51bb4180c0803c12ffaeb86c41336c11 Mon Sep 17 00:00:00 2001 From: Zhangjin Wu Date: Wed, 24 May 2023 00:55:02 +0800 Subject: riscv: enable HAVE_LD_DEAD_CODE_DATA_ELIMINATION Select CONFIG_HAVE_LD_DEAD_CODE_DATA_ELIMINATION for RISC-V, allowing the user to enable dead code elimination. In order for this to work, ensure that we keep the alternative table by annotating them with KEEP. This boots well on qemu with both rv32_defconfig & rv64 defconfig, but it only shrinks their builds by ~1%, a smaller config is thereforce customized to test this feature: | rv32 | rv64 --------|------------------------|--------------------- No DCE | 4460684 | 4893488 DCE | 3986716 | 4376400 Shrink | 473968 (~10.6%) | 517088 (~10.5%) The config used above only reserves necessary options to boot on qemu with serial console, more like the size-critical embedded scenes: - rv64 config: https://pastebin.com/crz82T0s - rv32 config: rv64 config + 32-bit.config Here is Jisheng's original commit-msg: When trying to run linux with various opensource riscv core on resource limited FPGA platforms, for example, those FPGAs with less than 16MB SDRAM, I want to save mem as much as possible. One of the major technologies is kernel size optimizations, I found that riscv does not currently support HAVE_LD_DEAD_CODE_DATA_ELIMINATION, which passes -fdata-sections, -ffunction-sections to CFLAGS and passes the --gc-sections flag to the linker. This not only benefits my case on FPGA but also benefits defconfigs. Here are some notable improvements from enabling this with defconfigs: nommu_k210_defconfig: text data bss dec hex 1112009 410288 59837 1582134 182436 before 962838 376656 51285 1390779 1538bb after rv32_defconfig: text data bss dec hex 8804455 2816544 290577 11911576 b5c198 before 8692295 2779872 288977 11761144 b375f8 after defconfig: text data bss dec hex 9438267 3391332 485333 13314932 cb2b74 before 9285914 3350052 483349 13119315 c82f53 after Signed-off-by: Zhangjin Wu Co-developed-by: Jisheng Zhang Signed-off-by: Jisheng Zhang Reviewed-by: Guo Ren Tested-by: Bin Meng Reviewed-by: Kefeng Wang Tested-by: Nick Desaulniers # build Link: https://lore.kernel.org/r/20230523165502.2592-5-jszhang@kernel.org Signed-off-by: Palmer Dabbelt --- arch/riscv/Kconfig | 1 + arch/riscv/kernel/vmlinux.lds.S | 6 +++--- 2 files changed, 4 insertions(+), 3 deletions(-) (limited to 'arch') diff --git a/arch/riscv/Kconfig b/arch/riscv/Kconfig index 8f55aa4aae34..62e84fee2cfd 100644 --- a/arch/riscv/Kconfig +++ b/arch/riscv/Kconfig @@ -115,6 +115,7 @@ config RISCV select HAVE_KPROBES if !XIP_KERNEL select HAVE_KPROBES_ON_FTRACE if !XIP_KERNEL select HAVE_KRETPROBES if !XIP_KERNEL + select HAVE_LD_DEAD_CODE_DATA_ELIMINATION select HAVE_MOVE_PMD select HAVE_MOVE_PUD select HAVE_PCI diff --git a/arch/riscv/kernel/vmlinux.lds.S b/arch/riscv/kernel/vmlinux.lds.S index e5f9f4677bbf..492dd4b8f3d6 100644 --- a/arch/riscv/kernel/vmlinux.lds.S +++ b/arch/riscv/kernel/vmlinux.lds.S @@ -85,11 +85,11 @@ SECTIONS INIT_DATA_SECTION(16) .init.pi : { - *(.init.pi*) + KEEP(*(.init.pi*)) } .init.bss : { - *(.init.bss) /* from the EFI stub */ + KEEP(*(.init.bss*)) /* from the EFI stub */ } .exit.data : { @@ -112,7 +112,7 @@ SECTIONS . = ALIGN(8); .alternative : { __alt_start = .; - *(.alternative) + KEEP(*(.alternative)) __alt_end = .; } __init_end = .; -- cgit v1.2.3-70-g09d2 From f7584322e4fef794b290e5fdb290fa92a925236e Mon Sep 17 00:00:00 2001 From: Nick Desaulniers Date: Fri, 23 Jun 2023 10:06:17 -0700 Subject: riscv: disable HAVE_LD_DEAD_CODE_DATA_ELIMINATION for LLD Linking allyesconfig with ld.lld-17 with CONFIG_DEAD_CODE_ELIMINATION=y takes hours. Assuming this is a performance regression that can be fixed, tentatively disable this for now so that allyesconfig builds don't start timing out. If and when there's a fix to ld.lld, this can be converted to a version check instead so that users of older but still supported versions of ld.lld don't hurt themselves by enabling CONFIG_LD_DEAD_CODE_DATA_ELIMINATION=y. Link: https://github.com/ClangBuiltLinux/linux/issues/1881 Link: https://lore.kernel.org/linux-riscv/ZJXTwqZIkXLxXaSi@google.com/ Reported-by: Palmer Dabbelt Suggested-by: Nathan Chancellor Signed-off-by: Nick Desaulniers Signed-off-by: Palmer Dabbelt --- arch/riscv/Kconfig | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) (limited to 'arch') diff --git a/arch/riscv/Kconfig b/arch/riscv/Kconfig index 62e84fee2cfd..a8a9387eb284 100644 --- a/arch/riscv/Kconfig +++ b/arch/riscv/Kconfig @@ -115,7 +115,8 @@ config RISCV select HAVE_KPROBES if !XIP_KERNEL select HAVE_KPROBES_ON_FTRACE if !XIP_KERNEL select HAVE_KRETPROBES if !XIP_KERNEL - select HAVE_LD_DEAD_CODE_DATA_ELIMINATION + # https://github.com/ClangBuiltLinux/linux/issues/1881 + select HAVE_LD_DEAD_CODE_DATA_ELIMINATION if !LD_IS_LLD select HAVE_MOVE_PMD select HAVE_MOVE_PUD select HAVE_PCI -- cgit v1.2.3-70-g09d2 From e50db34efdc8cac2f17b8f5d32fddd7b58914ce6 Mon Sep 17 00:00:00 2001 From: Palmer Dabbelt Date: Mon, 19 Jun 2023 10:21:01 -0700 Subject: RISC-V: Fix up some vector state related build failures MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit I get a few build failures along the lines of ./arch/riscv/include/uapi/asm/sigcontext.h:19:36: error: field ‘v_state’ has incomplete type 19 | struct __riscv_v_ext_state v_state; | ^~~~~~~ ./arch/riscv/include/uapi/asm/sigcontext.h:32:49: error: field ‘sc_extdesc’ has incomplete type 32 | struct __riscv_extra_ext_header sc_extdesc; The V structures in question are defined for !assembly, so let's just do so for the others. Fixes: 8ee0b41898fa ("riscv: signal: Add sigcontext save/restore for vector") Reviewed-by: Conor Dooley Link: https://lore.kernel.org/r/20230619172101.18692-1-palmer@rivosinc.com Signed-off-by: Palmer Dabbelt --- arch/riscv/include/uapi/asm/sigcontext.h | 4 ++++ 1 file changed, 4 insertions(+) (limited to 'arch') diff --git a/arch/riscv/include/uapi/asm/sigcontext.h b/arch/riscv/include/uapi/asm/sigcontext.h index 8b8a8541673a..8c8712aa9551 100644 --- a/arch/riscv/include/uapi/asm/sigcontext.h +++ b/arch/riscv/include/uapi/asm/sigcontext.h @@ -15,6 +15,8 @@ /* The size of END signal context header. */ #define END_HDR_SIZE 0x0 +#ifndef __ASSEMBLY__ + struct __sc_riscv_v_state { struct __riscv_v_ext_state v_state; } __attribute__((aligned(16))); @@ -33,4 +35,6 @@ struct sigcontext { }; }; +#endif /*!__ASSEMBLY__*/ + #endif /* _UAPI_ASM_RISCV_SIGCONTEXT_H */ -- cgit v1.2.3-70-g09d2 From 26c38cd802c947401cfbcc285b7d841256b5f17f Mon Sep 17 00:00:00 2001 From: Andy Chiu Date: Sun, 25 Jun 2023 15:54:15 +0000 Subject: riscv: vector: only enable interrupts in the first-use trap MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit The function irqentry_exit_to_user_mode() must be called with interrupt disabled. The caller of do_trap_insn_illegal() also assumes running without interrupts. So, we should turn off interrupts after riscv_v_first_use_handler() returns. Fixes: cd054837243b ("riscv: Allocate user's vector context in the first-use trap") Signed-off-by: Andy Chiu Reviewed-by: Björn Töpel Link: https://lore.kernel.org/r/20230625155416.18629-1-andy.chiu@sifive.com Signed-off-by: Palmer Dabbelt --- arch/riscv/kernel/traps.c | 8 +++++++- 1 file changed, 7 insertions(+), 1 deletion(-) (limited to 'arch') diff --git a/arch/riscv/kernel/traps.c b/arch/riscv/kernel/traps.c index 5158961ea977..bc02b282a403 100644 --- a/arch/riscv/kernel/traps.c +++ b/arch/riscv/kernel/traps.c @@ -150,12 +150,18 @@ DO_ERROR_INFO(do_trap_insn_fault, asmlinkage __visible __trap_section void do_trap_insn_illegal(struct pt_regs *regs) { + bool handled; + if (user_mode(regs)) { irqentry_enter_from_user_mode(regs); local_irq_enable(); - if (!riscv_v_first_use_handler(regs)) + handled = riscv_v_first_use_handler(regs); + + local_irq_disable(); + + if (!handled) do_trap_error(regs, SIGILL, ILL_ILLOPC, regs->epc, "Oops - illegal instruction"); -- cgit v1.2.3-70-g09d2 From 75b59f2a90aa7ccac62e3dcb680dfb967b341431 Mon Sep 17 00:00:00 2001 From: Andy Chiu Date: Tue, 27 Jun 2023 01:55:54 +0000 Subject: riscv: vector: clear V-reg in the first-use trap MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit If there is no context switch happens after we enable V for a process, then we return to user space with whatever left on the CPU's V registers accessible to the process. The leaked data could belong to another process's V-context saved from last context switch, impacting process's confidentiality on the system. To prevent this from happening, we clear V registers by restoring zero'd V context after turining on V. Fixes: cd054837243b ("riscv: Allocate user's vector context in the first-use trap") Signed-off-by: Andy Chiu Reviewed-by: Björn Töpel Link: https://lore.kernel.org/r/20230627015556.12329-2-andy.chiu@sifive.com Signed-off-by: Palmer Dabbelt --- arch/riscv/kernel/vector.c | 1 + 1 file changed, 1 insertion(+) (limited to 'arch') diff --git a/arch/riscv/kernel/vector.c b/arch/riscv/kernel/vector.c index f9c8e19ab301..8d92fb6c522c 100644 --- a/arch/riscv/kernel/vector.c +++ b/arch/riscv/kernel/vector.c @@ -167,6 +167,7 @@ bool riscv_v_first_use_handler(struct pt_regs *regs) return true; } riscv_v_vstate_on(regs); + riscv_v_vstate_restore(current, regs); return true; } -- cgit v1.2.3-70-g09d2 From 54cdede08f2f4414629001b124110d656161080a Mon Sep 17 00:00:00 2001 From: Ben Dooks Date: Fri, 16 Jun 2023 12:43:57 +0100 Subject: riscv: vdso: include vdso/vsyscall.h for vdso_data Add include of to pull in the defition of vdso_data to remove the following sparse warning: arch/riscv/kernel/vdso.c:39:18: warning: symbol 'vdso_data' was not declared. Should it be static? Signed-off-by: Ben Dooks Link: https://lore.kernel.org/r/20230616114357.159601-1-ben.dooks@codethink.co.uk Signed-off-by: Palmer Dabbelt --- arch/riscv/kernel/vdso.c | 1 + 1 file changed, 1 insertion(+) (limited to 'arch') diff --git a/arch/riscv/kernel/vdso.c b/arch/riscv/kernel/vdso.c index 9a68e7eaae4d..2cf76218a5bd 100644 --- a/arch/riscv/kernel/vdso.c +++ b/arch/riscv/kernel/vdso.c @@ -15,6 +15,7 @@ #include #include #include +#include enum vvar_pages { VVAR_DATA_PAGE_OFFSET, -- cgit v1.2.3-70-g09d2 From c1f048a6bd7d7cb42e9bfd79eff85b33894997fe Mon Sep 17 00:00:00 2001 From: Song Shuai Date: Mon, 29 May 2023 18:15:24 +0800 Subject: riscv: Enable ARCH_SUSPEND_POSSIBLE for s2idle With this configuration opened, the basic platform-independent s2idle is provided by the sole "s2idle" string in `/sys/power/mem_sleep`. At the end of s2idle, harts will hit the `wfi` instruction or enter the SUSPENDED state through the sbi_cpuidle driver. The interrupt of possible wakeup devices will be kept to wake the system up. And platform-specific sleep states can be provided by future ACPI and SBI SUSP extension support. Signed-off-by: Song Shuai Reviewed-by: Andrew Jones Link: https://lore.kernel.org/r/20230529101524.322076-1-songshuaishuai@tinylab.org Signed-off-by: Palmer Dabbelt --- arch/riscv/Kconfig | 3 +++ 1 file changed, 3 insertions(+) (limited to 'arch') diff --git a/arch/riscv/Kconfig b/arch/riscv/Kconfig index c8c22cf11602..e242bf63c0d2 100644 --- a/arch/riscv/Kconfig +++ b/arch/riscv/Kconfig @@ -873,6 +873,9 @@ config ARCH_HIBERNATION_POSSIBLE config ARCH_HIBERNATION_HEADER def_bool HIBERNATION +config ARCH_SUSPEND_POSSIBLE + def_bool y + endmenu # "Power management options" menu "CPU Power Management" -- cgit v1.2.3-70-g09d2 From 85fadc0d04119c2fe4a20287767ab904c6d21ba1 Mon Sep 17 00:00:00 2001 From: Woody Zhang Date: Wed, 14 Jun 2023 21:19:07 +0800 Subject: riscv: move memblock_allow_resize() after linear mapping is ready The initial memblock metadata is accessed from kernel image mapping. The regions arrays need to "reallocated" from memblock and accessed through linear mapping to cover more memblock regions. So the resizing should not be allowed until linear mapping is ready. Note that there are memblock allocations when building linear mapping. This patch is similar to 24cc61d8cb5a ("arm64: memblock: don't permit memblock resizing until linear mapping is up"). In following log, many memblock regions are reserved before create_linear_mapping_page_table(). And then it triggered reallocation of memblock.reserved.regions and memcpy the old array in kernel image mapping to the new array in linear mapping which caused a page fault. [ 0.000000] memblock_reserve: [0x00000000bf01f000-0x00000000bf01ffff] early_init_fdt_scan_reserved_mem+0x28c/0x2c6 [ 0.000000] memblock_reserve: [0x00000000bf021000-0x00000000bf021fff] early_init_fdt_scan_reserved_mem+0x28c/0x2c6 [ 0.000000] memblock_reserve: [0x00000000bf023000-0x00000000bf023fff] early_init_fdt_scan_reserved_mem+0x28c/0x2c6 [ 0.000000] memblock_reserve: [0x00000000bf025000-0x00000000bf025fff] early_init_fdt_scan_reserved_mem+0x28c/0x2c6 [ 0.000000] memblock_reserve: [0x00000000bf027000-0x00000000bf027fff] early_init_fdt_scan_reserved_mem+0x28c/0x2c6 [ 0.000000] memblock_reserve: [0x00000000bf029000-0x00000000bf029fff] early_init_fdt_scan_reserved_mem+0x28c/0x2c6 [ 0.000000] memblock_reserve: [0x00000000bf02b000-0x00000000bf02bfff] early_init_fdt_scan_reserved_mem+0x28c/0x2c6 [ 0.000000] memblock_reserve: [0x00000000bf02d000-0x00000000bf02dfff] early_init_fdt_scan_reserved_mem+0x28c/0x2c6 [ 0.000000] memblock_reserve: [0x00000000bf02f000-0x00000000bf02ffff] early_init_fdt_scan_reserved_mem+0x28c/0x2c6 [ 0.000000] memblock_reserve: [0x00000000bf030000-0x00000000bf030fff] early_init_fdt_scan_reserved_mem+0x28c/0x2c6 [ 0.000000] OF: reserved mem: 0x0000000080000000..0x000000008007ffff (512 KiB) map non-reusable mmode_resv0@80000000 [ 0.000000] memblock_reserve: [0x00000000bf000000-0x00000000bf001fed] paging_init+0x19a/0x5ae [ 0.000000] memblock_phys_alloc_range: 4096 bytes align=0x1000 from=0x0000000000000000 max_addr=0x0000000000000000 alloc_pmd_fixmap+0x14/0x1c [ 0.000000] memblock_reserve: [0x000000017ffff000-0x000000017fffffff] memblock_alloc_range_nid+0xb8/0x128 [ 0.000000] memblock: reserved is doubled to 256 at [0x000000017fffd000-0x000000017fffe7ff] [ 0.000000] Unable to handle kernel paging request at virtual address ff600000ffffd000 [ 0.000000] Oops [#1] [ 0.000000] Modules linked in: [ 0.000000] CPU: 0 PID: 0 Comm: swapper Not tainted 6.4.0-rc1-00011-g99a670b2069c #66 [ 0.000000] Hardware name: riscv-virtio,qemu (DT) [ 0.000000] epc : __memcpy+0x60/0xf8 [ 0.000000] ra : memblock_double_array+0x192/0x248 [ 0.000000] epc : ffffffff8081d214 ra : ffffffff80a3dfc0 sp : ffffffff81403bd0 [ 0.000000] gp : ffffffff814fbb38 tp : ffffffff8140dac0 t0 : 0000000001600000 [ 0.000000] t1 : 0000000000000000 t2 : 000000008f001000 s0 : ffffffff81403c60 [ 0.000000] s1 : ffffffff80c0bc98 a0 : ff600000ffffd000 a1 : ffffffff80c0bcd8 [ 0.000000] a2 : 0000000000000c00 a3 : ffffffff80c0c8d8 a4 : 0000000080000000 [ 0.000000] a5 : 0000000000080000 a6 : 0000000000000000 a7 : 0000000080200000 [ 0.000000] s2 : ff600000ffffd000 s3 : 0000000000002000 s4 : 0000000000000c00 [ 0.000000] s5 : ffffffff80c0bc60 s6 : ffffffff80c0bcc8 s7 : 0000000000000000 [ 0.000000] s8 : ffffffff814fd0a8 s9 : 000000017fffe7ff s10: 0000000000000000 [ 0.000000] s11: 0000000000001000 t3 : 0000000000001000 t4 : 0000000000000000 [ 0.000000] t5 : 000000008f003000 t6 : ff600000ffffd000 [ 0.000000] status: 0000000200000100 badaddr: ff600000ffffd000 cause: 000000000000000f [ 0.000000] [] __memcpy+0x60/0xf8 [ 0.000000] [] memblock_add_range.isra.14+0x12c/0x162 [ 0.000000] [] memblock_reserve+0x6e/0x8c [ 0.000000] [] memblock_alloc_range_nid+0xb8/0x128 [ 0.000000] [] memblock_phys_alloc_range+0x5e/0x6a [ 0.000000] [] alloc_pmd_fixmap+0x14/0x1c [ 0.000000] [] alloc_p4d_fixmap+0xc/0x14 [ 0.000000] [] create_pgd_mapping+0x98/0x17c [ 0.000000] [] create_linear_mapping_range.constprop.10+0xe4/0x112 [ 0.000000] [] paging_init+0x3ec/0x5ae [ 0.000000] [] setup_arch+0xb2/0x576 [ 0.000000] [] start_kernel+0x72/0x57e [ 0.000000] Code: b303 0285 b383 0305 be03 0385 be83 0405 bf03 0485 (b023) 00ef [ 0.000000] ---[ end trace 0000000000000000 ]--- [ 0.000000] Kernel panic - not syncing: Attempted to kill the idle task! [ 0.000000] ---[ end Kernel panic - not syncing: Attempted to kill the idle task! ]--- Fixes: 671f9a3e2e24 ("RISC-V: Setup initial page tables in two stages") Signed-off-by: Woody Zhang Tested-by: Song Shuai Link: https://lore.kernel.org/r/tencent_FBB94CE615C5CCE7701CD39C15CCE0EE9706@qq.com Signed-off-by: Palmer Dabbelt --- arch/riscv/mm/init.c | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) (limited to 'arch') diff --git a/arch/riscv/mm/init.c b/arch/riscv/mm/init.c index 3b1e927a06b7..59df29fc635d 100644 --- a/arch/riscv/mm/init.c +++ b/arch/riscv/mm/init.c @@ -267,7 +267,6 @@ static void __init setup_bootmem(void) dma_contiguous_reserve(dma32_phys_limit); if (IS_ENABLED(CONFIG_64BIT)) hugetlb_cma_reserve(PUD_SHIFT - PAGE_SHIFT); - memblock_allow_resize(); } #ifdef CONFIG_MMU @@ -1370,6 +1369,9 @@ void __init paging_init(void) { setup_bootmem(); setup_vm_final(); + + /* Depend on that Linear Mapping is ready */ + memblock_allow_resize(); } void __init misc_mem_init(void) -- cgit v1.2.3-70-g09d2 From 9657e9b7d2538dc73c24947aa00a8525dfb8062c Mon Sep 17 00:00:00 2001 From: Björn Töpel Date: Thu, 29 Jun 2023 16:22:28 +0200 Subject: riscv: Discard vector state on syscalls MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit The RISC-V vector specification states: Executing a system call causes all caller-saved vector registers (v0-v31, vl, vtype) and vstart to become unspecified. The vector registers are set to all 1s, vill is set (invalid), and the vector status is set to Dirty. That way we can prevent userspace from accidentally relying on the stated save. Rémi pointed out [1] that writing to the registers might be superfluous, and setting vill is sufficient. Link: https://lore.kernel.org/linux-riscv/12784326.9UPPK3MAeB@basile.remlab.net/ # [1] Suggested-by: Darius Rad Suggested-by: Palmer Dabbelt Suggested-by: Rémi Denis-Courmont Signed-off-by: Björn Töpel Link: https://lore.kernel.org/r/20230629142228.1125715-1-bjorn@kernel.org Signed-off-by: Palmer Dabbelt --- arch/riscv/include/asm/vector.h | 34 ++++++++++++++++++++++++++++++++++ arch/riscv/kernel/traps.c | 2 ++ 2 files changed, 36 insertions(+) (limited to 'arch') diff --git a/arch/riscv/include/asm/vector.h b/arch/riscv/include/asm/vector.h index 04c0b07bf6cd..3d78930cab51 100644 --- a/arch/riscv/include/asm/vector.h +++ b/arch/riscv/include/asm/vector.h @@ -33,6 +33,11 @@ static inline void __riscv_v_vstate_clean(struct pt_regs *regs) regs->status = (regs->status & ~SR_VS) | SR_VS_CLEAN; } +static inline void __riscv_v_vstate_dirty(struct pt_regs *regs) +{ + regs->status = (regs->status & ~SR_VS) | SR_VS_DIRTY; +} + static inline void riscv_v_vstate_off(struct pt_regs *regs) { regs->status = (regs->status & ~SR_VS) | SR_VS_OFF; @@ -128,6 +133,34 @@ static inline void __riscv_v_vstate_restore(struct __riscv_v_ext_state *restore_ riscv_v_disable(); } +static inline void __riscv_v_vstate_discard(void) +{ + unsigned long vl, vtype_inval = 1UL << (BITS_PER_LONG - 1); + + riscv_v_enable(); + asm volatile ( + ".option push\n\t" + ".option arch, +v\n\t" + "vsetvli %0, x0, e8, m8, ta, ma\n\t" + "vmv.v.i v0, -1\n\t" + "vmv.v.i v8, -1\n\t" + "vmv.v.i v16, -1\n\t" + "vmv.v.i v24, -1\n\t" + "vsetvl %0, x0, %1\n\t" + ".option pop\n\t" + : "=&r" (vl) : "r" (vtype_inval) : "memory"); + riscv_v_disable(); +} + +static inline void riscv_v_vstate_discard(struct pt_regs *regs) +{ + if ((regs->status & SR_VS) == SR_VS_OFF) + return; + + __riscv_v_vstate_discard(); + __riscv_v_vstate_dirty(regs); +} + static inline void riscv_v_vstate_save(struct task_struct *task, struct pt_regs *regs) { @@ -173,6 +206,7 @@ static inline bool riscv_v_first_use_handler(struct pt_regs *regs) { return fals static inline bool riscv_v_vstate_query(struct pt_regs *regs) { return false; } static inline bool riscv_v_vstate_ctrl_user_allowed(void) { return false; } #define riscv_v_vsize (0) +#define riscv_v_vstate_discard(regs) do {} while (0) #define riscv_v_vstate_save(task, regs) do {} while (0) #define riscv_v_vstate_restore(task, regs) do {} while (0) #define __switch_to_vector(__prev, __next) do {} while (0) diff --git a/arch/riscv/kernel/traps.c b/arch/riscv/kernel/traps.c index bc02b282a403..f910dfccbf5d 100644 --- a/arch/riscv/kernel/traps.c +++ b/arch/riscv/kernel/traps.c @@ -302,6 +302,8 @@ asmlinkage __visible __trap_section void do_trap_ecall_u(struct pt_regs *regs) regs->epc += 4; regs->orig_a0 = regs->a0; + riscv_v_vstate_discard(regs); + syscall = syscall_enter_from_user_mode(regs, syscall); if (syscall < NR_syscalls) -- cgit v1.2.3-70-g09d2 From 52909f1768023656d5c429873e2246a134289a95 Mon Sep 17 00:00:00 2001 From: Conor Dooley Date: Thu, 29 Jun 2023 12:33:34 +0100 Subject: RISC-V: drop error print from riscv_hartid_to_cpuid() As of commit 2ac874343749 ("RISC-V: split early & late of_node to hartid mapping") my CI complains about newly added pr_err() messages during boot, for example: [ 0.000000] Couldn't find cpu id for hartid [0] [ 0.000000] riscv-intc: unable to find hart id for /cpus/cpu@0/interrupt-controller Before the split, riscv_of_processor_hartid() contained a check for whether the cpu was "available", before calling riscv_hartid_to_cpuid(), but after the split riscv_of_processor_hartid() can be called for cpus that are disabled. Most callers of riscv_hartid_to_cpuid() already report custom errors where it falls, making this print superfluous in those case. In other places, the print adds nothing - see riscv_intc_init() for example. Fixes: 2ac874343749 ("RISC-V: split early & late of_node to hartid mapping") Signed-off-by: Conor Dooley Link: https://lore.kernel.org/r/20230629-paternity-grafted-b901b76d04a0@wendy Signed-off-by: Palmer Dabbelt --- arch/riscv/kernel/smp.c | 1 - 1 file changed, 1 deletion(-) (limited to 'arch') diff --git a/arch/riscv/kernel/smp.c b/arch/riscv/kernel/smp.c index 23e533766a49..85bbce0f758c 100644 --- a/arch/riscv/kernel/smp.c +++ b/arch/riscv/kernel/smp.c @@ -58,7 +58,6 @@ int riscv_hartid_to_cpuid(unsigned long hartid) if (cpuid_to_hartid_map(i) == hartid) return i; - pr_err("Couldn't find cpu id for hartid [%lu]\n", hartid); return -ENOENT; } -- cgit v1.2.3-70-g09d2 From 62ba41d2761206664a1fdc998051324457da2dd6 Mon Sep 17 00:00:00 2001 From: John Hubbard Date: Mon, 3 Jul 2023 12:00:44 -0700 Subject: mm: riscv: fix an unsafe pte read in huge_pte_alloc() The WARN_ON_ONCE() statement in riscv's huge_pte_alloc() is susceptible to false positives, because the pte is read twice at the C language level, locklessly, within the same conditional statement. Depending on compiler behavior, this can lead to generated machine code that actually reads the pte just once, or twice. Reading twice will expose the code to changing pte values and cause incorrect behavior. In [1], similar code actually caused a kernel crash on 64-bit x86, when using clang to build the kernel, but only after the conversion from *pte reads, to ptep_get(pte). The latter uses READ_ONCE(), which forced a double read of *pte. Rather than waiting for the upcoming ptep_get() conversion, just convert this part of the code now, but in a way that avoids the above problem: take a single snapshot of the pte before using it in the WARN conditional. As expected, this preparatory step does not actually change the generated code ("make mm/hugetlbpage.s"), on riscv64, when using a gcc 12.2 cross compiler. [1] https://lore.kernel.org/20230630013203.1955064-1-jhubbard@nvidia.com Suggested-by: James Houghton Cc: Ryan Roberts Signed-off-by: John Hubbard Reviewed-by: Andrew Jones Reviewed-by: Ryan Roberts Link: https://lore.kernel.org/r/20230703190044.311730-1-jhubbard@nvidia.com Signed-off-by: Palmer Dabbelt --- arch/riscv/mm/hugetlbpage.c | 6 +++++- 1 file changed, 5 insertions(+), 1 deletion(-) (limited to 'arch') diff --git a/arch/riscv/mm/hugetlbpage.c b/arch/riscv/mm/hugetlbpage.c index 542883b3b49b..96225a8533ad 100644 --- a/arch/riscv/mm/hugetlbpage.c +++ b/arch/riscv/mm/hugetlbpage.c @@ -73,7 +73,11 @@ pte_t *huge_pte_alloc(struct mm_struct *mm, } out: - WARN_ON_ONCE(pte && pte_present(*pte) && !pte_huge(*pte)); + if (pte) { + pte_t pteval = ptep_get_lockless(pte); + + WARN_ON_ONCE(pte_present(pteval) && !pte_huge(pteval)); + } return pte; } -- cgit v1.2.3-70-g09d2 From 6259f3443c6a376aa077816ac92e9ddeb0817d09 Mon Sep 17 00:00:00 2001 From: Marc Zyngier Date: Mon, 3 Jul 2023 19:31:26 +0100 Subject: risc-v: Fix order of IPI enablement vs RCU startup Conor reports that risc-v tries to enable IPIs before telling the core code to enable RCU. With the introduction of the mapple tree as a backing store for the irq descriptors, this results in a very shouty boot sequence, as RCU is legitimately upset. Restore some sanity by moving the risc_ipi_enable() call after notify_cpu_starting(), which explicitly enables RCU on the calling CPU. Fixes: 832f15f42646 ("RISC-V: Treat IPIs as normal Linux IRQs") Reported-by: Conor Dooley Signed-off-by: Marc Zyngier Link: https://lore.kernel.org/r/20230703-dupe-frying-79ae2ccf94eb@spud Cc: Anup Patel Cc: Palmer Dabbelt Cc: Linus Torvalds Tested-by: Conor Dooley Link: https://lore.kernel.org/r/20230703183126.1567625-1-maz@kernel.org Signed-off-by: Palmer Dabbelt --- arch/riscv/kernel/smpboot.c | 5 +++-- 1 file changed, 3 insertions(+), 2 deletions(-) (limited to 'arch') diff --git a/arch/riscv/kernel/smpboot.c b/arch/riscv/kernel/smpboot.c index bb0b76e1a6d4..f4d6acb38dd0 100644 --- a/arch/riscv/kernel/smpboot.c +++ b/arch/riscv/kernel/smpboot.c @@ -238,10 +238,11 @@ asmlinkage __visible void smp_callin(void) mmgrab(mm); current->active_mm = mm; - riscv_ipi_enable(); - store_cpu_topology(curr_cpuid); notify_cpu_starting(curr_cpuid); + + riscv_ipi_enable(); + numa_add_cpu(curr_cpuid); set_cpu_online(curr_cpuid, 1); probe_vendor_features(curr_cpuid); -- cgit v1.2.3-70-g09d2 From a2492ca86c98f676561f7d318b1e2e1786af0caf Mon Sep 17 00:00:00 2001 From: Samuel Holland Date: Fri, 23 Jun 2023 23:03:20 -0700 Subject: riscv: Select HAVE_ARCH_USERFAULTFD_MINOR This allocates the VM flag needed to support the userfaultfd minor fault functionality. Because the flag bit is >= bit 32, it can only be enabled for 64-bit kernels. See commit 7677f7fd8be7 ("userfaultfd: add minor fault registration mode") for more information. Signed-off-by: Samuel Holland Link: https://lore.kernel.org/r/20230624060321.3401504-1-samuel.holland@sifive.com Signed-off-by: Palmer Dabbelt --- arch/riscv/Kconfig | 1 + 1 file changed, 1 insertion(+) (limited to 'arch') diff --git a/arch/riscv/Kconfig b/arch/riscv/Kconfig index e242bf63c0d2..e545713caf8f 100644 --- a/arch/riscv/Kconfig +++ b/arch/riscv/Kconfig @@ -100,6 +100,7 @@ config RISCV select HAVE_ARCH_THREAD_STRUCT_WHITELIST select HAVE_ARCH_TRACEHOOK select HAVE_ARCH_TRANSPARENT_HUGEPAGE if 64BIT && MMU + select HAVE_ARCH_USERFAULTFD_MINOR if 64BIT && USERFAULTFD select HAVE_ARCH_VMAP_STACK if MMU && 64BIT select HAVE_ASM_MODVERSIONS select HAVE_CONTEXT_TRACKING_USER -- cgit v1.2.3-70-g09d2 From 31ca5d49264ba6197aa48a926f6a035ed08b3715 Mon Sep 17 00:00:00 2001 From: Jisheng Zhang Date: Thu, 15 Jun 2023 00:55:02 +0800 Subject: riscv: errata: thead: only set cbom size & noncoherent during boot The CBOM size and whether the HW is noncoherent is known and determined during booting and won't change after that. Signed-off-by: Jisheng Zhang Reviewed-by: Conor Dooley Link: https://lore.kernel.org/r/20230614165504.532-2-jszhang@kernel.org Signed-off-by: Palmer Dabbelt --- arch/riscv/errata/thead/errata.c | 7 +++++-- 1 file changed, 5 insertions(+), 2 deletions(-) (limited to 'arch') diff --git a/arch/riscv/errata/thead/errata.c b/arch/riscv/errata/thead/errata.c index c259dc925ec1..be84b14f0118 100644 --- a/arch/riscv/errata/thead/errata.c +++ b/arch/riscv/errata/thead/errata.c @@ -45,8 +45,11 @@ static bool errata_probe_cmo(unsigned int stage, if (stage == RISCV_ALTERNATIVES_EARLY_BOOT) return false; - riscv_cbom_block_size = L1_CACHE_BYTES; - riscv_noncoherent_supported(); + if (stage == RISCV_ALTERNATIVES_BOOT) { + riscv_cbom_block_size = L1_CACHE_BYTES; + riscv_noncoherent_supported(); + } + return true; } -- cgit v1.2.3-70-g09d2 From 3b472f860c5c73244a9b951c10c289cc9ee080f3 Mon Sep 17 00:00:00 2001 From: Jisheng Zhang Date: Thu, 15 Jun 2023 00:55:03 +0800 Subject: riscv: mm: mark CBO relate initialization funcs as __init The two functions cbo_get_block_size() and riscv_init_cbo_blocksizes() are only called during booting, mark them as __init. Signed-off-by: Jisheng Zhang Reviewed-by: Conor Dooley Link: https://lore.kernel.org/r/20230614165504.532-3-jszhang@kernel.org Signed-off-by: Palmer Dabbelt --- arch/riscv/mm/cacheflush.c | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-) (limited to 'arch') diff --git a/arch/riscv/mm/cacheflush.c b/arch/riscv/mm/cacheflush.c index fca532ddf3ec..fbc59b3f69f2 100644 --- a/arch/riscv/mm/cacheflush.c +++ b/arch/riscv/mm/cacheflush.c @@ -104,9 +104,9 @@ EXPORT_SYMBOL_GPL(riscv_cbom_block_size); unsigned int riscv_cboz_block_size; EXPORT_SYMBOL_GPL(riscv_cboz_block_size); -static void cbo_get_block_size(struct device_node *node, - const char *name, u32 *block_size, - unsigned long *first_hartid) +static void __init cbo_get_block_size(struct device_node *node, + const char *name, u32 *block_size, + unsigned long *first_hartid) { unsigned long hartid; u32 val; @@ -126,7 +126,7 @@ static void cbo_get_block_size(struct device_node *node, } } -void riscv_init_cbo_blocksizes(void) +void __init riscv_init_cbo_blocksizes(void) { unsigned long cbom_hartid, cboz_hartid; u32 cbom_block_size = 0, cboz_block_size = 0; -- cgit v1.2.3-70-g09d2 From 8500808a991f0e569b3d835a6223848c0717a6c7 Mon Sep 17 00:00:00 2001 From: Jisheng Zhang Date: Thu, 15 Jun 2023 00:55:04 +0800 Subject: riscv: mm: mark noncoherent_supported as __ro_after_init The noncoherent_supported indicates whether the HW is coherent or not, it won't change after booting, mark it as __ro_after_init. Signed-off-by: Jisheng Zhang Reviewed-by: Conor Dooley Link: https://lore.kernel.org/r/20230614165504.532-4-jszhang@kernel.org Signed-off-by: Palmer Dabbelt --- arch/riscv/mm/dma-noncoherent.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'arch') diff --git a/arch/riscv/mm/dma-noncoherent.c b/arch/riscv/mm/dma-noncoherent.c index d919efab6eba..d51a75864e53 100644 --- a/arch/riscv/mm/dma-noncoherent.c +++ b/arch/riscv/mm/dma-noncoherent.c @@ -10,7 +10,7 @@ #include #include -static bool noncoherent_supported; +static bool noncoherent_supported __ro_after_init; void arch_sync_dma_for_device(phys_addr_t paddr, size_t size, enum dma_data_direction dir) -- cgit v1.2.3-70-g09d2