summaryrefslogtreecommitdiff
path: root/arch/riscv
diff options
context:
space:
mode:
authorLinus Torvalds <torvalds@linux-foundation.org>2024-11-27 11:19:09 -0800
committerLinus Torvalds <torvalds@linux-foundation.org>2024-11-27 11:19:09 -0800
commit91dbbe6c9ffe5eded9a3e75d773ff92da8d2bc57 (patch)
tree30c98b08c3906a35d34327eff7725618c9045588 /arch/riscv
parentc94696977527f69cbb41aa6a9af9d1991895d002 (diff)
parent8d4f1e05ff821a5d59116ab8c3a30fcae81d8597 (diff)
Merge tag 'riscv-for-linus-6.13-mw1' of git://git.kernel.org/pub/scm/linux/kernel/git/riscv/linux
Pull RISC-v updates from Palmer Dabbelt: - Support for pointer masking in userspace - Support for probing vector misaligned access performance - Support for qspinlock on systems with Zacas and Zabha * tag 'riscv-for-linus-6.13-mw1' of git://git.kernel.org/pub/scm/linux/kernel/git/riscv/linux: (38 commits) RISC-V: Remove unnecessary include from compat.h riscv: Fix default misaligned access trap riscv: Add qspinlock support dt-bindings: riscv: Add Ziccrse ISA extension description riscv: Add ISA extension parsing for Ziccrse asm-generic: ticket-lock: Add separate ticket-lock.h asm-generic: ticket-lock: Reuse arch_spinlock_t of qspinlock riscv: Implement xchg8/16() using Zabha riscv: Implement arch_cmpxchg128() using Zacas riscv: Improve zacas fully-ordered cmpxchg() riscv: Implement cmpxchg8/16() using Zabha dt-bindings: riscv: Add Zabha ISA extension description riscv: Implement cmpxchg32/64() using Zacas riscv: Do not fail to build on byte/halfword operations with Zawrs riscv: Move cpufeature.h macros into their own header KVM: riscv: selftests: Add Smnpm and Ssnpm to get-reg-list test RISC-V: KVM: Allow Smnpm and Ssnpm extensions for guests riscv: hwprobe: Export the Supm ISA extension riscv: selftests: Add a pointer masking test riscv: Allow ptrace control of the tagged address ABI ...
Diffstat (limited to 'arch/riscv')
-rw-r--r--arch/riscv/Kconfig138
-rw-r--r--arch/riscv/Makefile6
-rw-r--r--arch/riscv/configs/defconfig1
-rw-r--r--arch/riscv/include/asm/Kbuild4
-rw-r--r--arch/riscv/include/asm/cmpxchg.h286
-rw-r--r--arch/riscv/include/asm/compat.h1
-rw-r--r--arch/riscv/include/asm/cpufeature-macros.h66
-rw-r--r--arch/riscv/include/asm/cpufeature.h73
-rw-r--r--arch/riscv/include/asm/csr.h16
-rw-r--r--arch/riscv/include/asm/entry-common.h1
-rw-r--r--arch/riscv/include/asm/hwcap.h7
-rw-r--r--arch/riscv/include/asm/hwprobe.h2
-rw-r--r--arch/riscv/include/asm/mmu.h7
-rw-r--r--arch/riscv/include/asm/mmu_context.h13
-rw-r--r--arch/riscv/include/asm/processor.h9
-rw-r--r--arch/riscv/include/asm/spinlock.h47
-rw-r--r--arch/riscv/include/asm/switch_to.h19
-rw-r--r--arch/riscv/include/asm/uaccess.h43
-rw-r--r--arch/riscv/include/asm/vector.h2
-rw-r--r--arch/riscv/include/uapi/asm/hwprobe.h6
-rw-r--r--arch/riscv/include/uapi/asm/kvm.h2
-rw-r--r--arch/riscv/kernel/Makefile3
-rw-r--r--arch/riscv/kernel/copy-unaligned.h5
-rw-r--r--arch/riscv/kernel/cpufeature.c16
-rw-r--r--arch/riscv/kernel/fpu.S4
-rw-r--r--arch/riscv/kernel/process.c154
-rw-r--r--arch/riscv/kernel/ptrace.c42
-rw-r--r--arch/riscv/kernel/setup.c37
-rw-r--r--arch/riscv/kernel/smpboot.c2
-rw-r--r--arch/riscv/kernel/suspend.c4
-rw-r--r--arch/riscv/kernel/sys_hwprobe.c44
-rw-r--r--arch/riscv/kernel/traps_misaligned.c139
-rw-r--r--arch/riscv/kernel/unaligned_access_speed.c156
-rw-r--r--arch/riscv/kernel/vdso/Makefile9
-rw-r--r--arch/riscv/kernel/vec-copy-unaligned.S58
-rw-r--r--arch/riscv/kernel/vector.c2
-rw-r--r--arch/riscv/kvm/vcpu_onereg.c4
37 files changed, 1247 insertions, 181 deletions
diff --git a/arch/riscv/Kconfig b/arch/riscv/Kconfig
index ff1e353b0d6f..cc63aef41e94 100644
--- a/arch/riscv/Kconfig
+++ b/arch/riscv/Kconfig
@@ -83,6 +83,7 @@ config RISCV
select ARCH_WANT_OPTIMIZE_HUGETLB_VMEMMAP
select ARCH_WANTS_NO_INSTR
select ARCH_WANTS_THP_SWAP if HAVE_ARCH_TRANSPARENT_HUGEPAGE
+ select ARCH_WEAK_RELEASE_ACQUIRE if ARCH_USE_QUEUED_SPINLOCKS
select BINFMT_FLAT_NO_DATA_START_OFFSET if !MMU
select BUILDTIME_TABLE_SORT if MMU
select CLINT_TIMER if RISCV_M_MODE
@@ -116,6 +117,7 @@ config RISCV
select GENERIC_VDSO_TIME_NS if HAVE_GENERIC_VDSO
select HARDIRQS_SW_RESEND
select HAS_IOPORT if MMU
+ select HAVE_ALIGNED_STRUCT_PAGE
select HAVE_ARCH_AUDITSYSCALL
select HAVE_ARCH_HUGE_VMALLOC if HAVE_ARCH_HUGE_VMAP
select HAVE_ARCH_HUGE_VMAP if MMU && 64BIT
@@ -507,6 +509,39 @@ config NODES_SHIFT
Specify the maximum number of NUMA Nodes available on the target
system. Increases memory reserved to accommodate various tables.
+choice
+ prompt "RISC-V spinlock type"
+ default RISCV_COMBO_SPINLOCKS
+
+config RISCV_TICKET_SPINLOCKS
+ bool "Using ticket spinlock"
+
+config RISCV_QUEUED_SPINLOCKS
+ bool "Using queued spinlock"
+ depends on SMP && MMU && NONPORTABLE
+ select ARCH_USE_QUEUED_SPINLOCKS
+ help
+ The queued spinlock implementation requires the forward progress
+ guarantee of cmpxchg()/xchg() atomic operations: CAS with Zabha or
+ LR/SC with Ziccrse provide such guarantee.
+
+ Select this if and only if Zabha or Ziccrse is available on your
+ platform, RISCV_QUEUED_SPINLOCKS must not be selected for platforms
+ without one of those extensions.
+
+ If unsure, select RISCV_COMBO_SPINLOCKS, which will use qspinlocks
+ when supported and otherwise ticket spinlocks.
+
+config RISCV_COMBO_SPINLOCKS
+ bool "Using combo spinlock"
+ depends on SMP && MMU
+ select ARCH_USE_QUEUED_SPINLOCKS
+ help
+ Embed both queued spinlock and ticket lock so that the spinlock
+ implementation can be chosen at runtime.
+
+endchoice
+
config RISCV_ALTERNATIVE
bool
depends on !XIP_KERNEL
@@ -532,6 +567,17 @@ config RISCV_ISA_C
If you don't know what to do here, say Y.
+config RISCV_ISA_SUPM
+ bool "Supm extension for userspace pointer masking"
+ depends on 64BIT
+ default y
+ help
+ Add support for pointer masking in userspace (Supm) when the
+ underlying hardware extension (Smnpm or Ssnpm) is detected at boot.
+
+ If this option is disabled, userspace will be unable to use
+ the prctl(PR_{SET,GET}_TAGGED_ADDR_CTRL) API.
+
config RISCV_ISA_SVNAPOT
bool "Svnapot extension support for supervisor mode NAPOT pages"
depends on 64BIT && MMU
@@ -633,6 +679,40 @@ config RISCV_ISA_ZAWRS
use of these instructions in the kernel when the Zawrs extension is
detected at boot.
+config TOOLCHAIN_HAS_ZABHA
+ bool
+ default y
+ depends on !64BIT || $(cc-option,-mabi=lp64 -march=rv64ima_zabha)
+ depends on !32BIT || $(cc-option,-mabi=ilp32 -march=rv32ima_zabha)
+ depends on AS_HAS_OPTION_ARCH
+
+config RISCV_ISA_ZABHA
+ bool "Zabha extension support for atomic byte/halfword operations"
+ depends on TOOLCHAIN_HAS_ZABHA
+ depends on RISCV_ALTERNATIVE
+ default y
+ help
+ Enable the use of the Zabha ISA-extension to implement kernel
+ byte/halfword atomic memory operations when it is detected at boot.
+
+ If you don't know what to do here, say Y.
+
+config TOOLCHAIN_HAS_ZACAS
+ bool
+ default y
+ depends on !64BIT || $(cc-option,-mabi=lp64 -march=rv64ima_zacas)
+ depends on !32BIT || $(cc-option,-mabi=ilp32 -march=rv32ima_zacas)
+ depends on AS_HAS_OPTION_ARCH
+
+config RISCV_ISA_ZACAS
+ bool "Zacas extension support for atomic CAS"
+ depends on TOOLCHAIN_HAS_ZACAS
+ depends on RISCV_ALTERNATIVE
+ default y
+ help
+ Enable the use of the Zacas ISA-extension to implement kernel atomic
+ cmpxchg operations when it is detected at boot.
+
If you don't know what to do here, say Y.
config TOOLCHAIN_HAS_ZBB
@@ -786,10 +866,24 @@ config THREAD_SIZE_ORDER
config RISCV_MISALIGNED
bool
+ help
+ Embed support for detecting and emulating misaligned
+ scalar or vector loads and stores.
+
+config RISCV_SCALAR_MISALIGNED
+ bool
+ select RISCV_MISALIGNED
select SYSCTL_ARCH_UNALIGN_ALLOW
help
Embed support for emulating misaligned loads and stores.
+config RISCV_VECTOR_MISALIGNED
+ bool
+ select RISCV_MISALIGNED
+ depends on RISCV_ISA_V
+ help
+ Enable detecting support for vector misaligned loads and stores.
+
choice
prompt "Unaligned Accesses Support"
default RISCV_PROBE_UNALIGNED_ACCESS
@@ -801,7 +895,7 @@ choice
config RISCV_PROBE_UNALIGNED_ACCESS
bool "Probe for hardware unaligned access support"
- select RISCV_MISALIGNED
+ select RISCV_SCALAR_MISALIGNED
help
During boot, the kernel will run a series of tests to determine the
speed of unaligned accesses. This probing will dynamically determine
@@ -812,7 +906,7 @@ config RISCV_PROBE_UNALIGNED_ACCESS
config RISCV_EMULATED_UNALIGNED_ACCESS
bool "Emulate unaligned access where system support is missing"
- select RISCV_MISALIGNED
+ select RISCV_SCALAR_MISALIGNED
help
If unaligned memory accesses trap into the kernel as they are not
supported by the system, the kernel will emulate the unaligned
@@ -841,6 +935,46 @@ config RISCV_EFFICIENT_UNALIGNED_ACCESS
endchoice
+choice
+ prompt "Vector unaligned Accesses Support"
+ depends on RISCV_ISA_V
+ default RISCV_PROBE_VECTOR_UNALIGNED_ACCESS
+ help
+ This determines the level of support for vector unaligned accesses. This
+ information is used by the kernel to perform optimizations. It is also
+ exposed to user space via the hwprobe syscall. The hardware will be
+ probed at boot by default.
+
+config RISCV_PROBE_VECTOR_UNALIGNED_ACCESS
+ bool "Probe speed of vector unaligned accesses"
+ select RISCV_VECTOR_MISALIGNED
+ depends on RISCV_ISA_V
+ help
+ During boot, the kernel will run a series of tests to determine the
+ speed of vector unaligned accesses if they are supported. This probing
+ will dynamically determine the speed of vector unaligned accesses on
+ the underlying system if they are supported.
+
+config RISCV_SLOW_VECTOR_UNALIGNED_ACCESS
+ bool "Assume the system supports slow vector unaligned memory accesses"
+ depends on NONPORTABLE
+ help
+ Assume that the system supports slow vector unaligned memory accesses. The
+ kernel and userspace programs may not be able to run at all on systems
+ that do not support unaligned memory accesses.
+
+config RISCV_EFFICIENT_VECTOR_UNALIGNED_ACCESS
+ bool "Assume the system supports fast vector unaligned memory accesses"
+ depends on NONPORTABLE
+ help
+ Assume that the system supports fast vector unaligned memory accesses. When
+ enabled, this option improves the performance of the kernel on such
+ systems. However, the kernel and userspace programs will run much more
+ slowly, or will not be able to run at all, on systems that do not
+ support efficient unaligned memory accesses.
+
+endchoice
+
source "arch/riscv/Kconfig.vendor"
endmenu # "Platform type"
diff --git a/arch/riscv/Makefile b/arch/riscv/Makefile
index d469db9f46f4..9fe1ee740dda 100644
--- a/arch/riscv/Makefile
+++ b/arch/riscv/Makefile
@@ -82,6 +82,12 @@ else
riscv-march-$(CONFIG_TOOLCHAIN_NEEDS_EXPLICIT_ZICSR_ZIFENCEI) := $(riscv-march-y)_zicsr_zifencei
endif
+# Check if the toolchain supports Zacas
+riscv-march-$(CONFIG_TOOLCHAIN_HAS_ZACAS) := $(riscv-march-y)_zacas
+
+# Check if the toolchain supports Zabha
+riscv-march-$(CONFIG_TOOLCHAIN_HAS_ZABHA) := $(riscv-march-y)_zabha
+
# Remove F,D,V from isa string for all. Keep extensions between "fd" and "v" by
# matching non-v and non-multi-letter extensions out with the filter ([^v_]*)
KBUILD_CFLAGS += -march=$(shell echo $(riscv-march-y) | sed -E 's/(rv32ima|rv64ima)fd([^v_]*)v?/\1\2/')
diff --git a/arch/riscv/configs/defconfig b/arch/riscv/configs/defconfig
index 1d5e13b148f2..b4a37345703e 100644
--- a/arch/riscv/configs/defconfig
+++ b/arch/riscv/configs/defconfig
@@ -167,6 +167,7 @@ CONFIG_PINCTRL_SOPHGO_CV1800B=y
CONFIG_PINCTRL_SOPHGO_CV1812H=y
CONFIG_PINCTRL_SOPHGO_SG2000=y
CONFIG_PINCTRL_SOPHGO_SG2002=y
+CONFIG_GPIO_DWAPB=y
CONFIG_GPIO_SIFIVE=y
CONFIG_POWER_RESET_GPIO_RESTART=y
CONFIG_SENSORS_SFCTEMP=m
diff --git a/arch/riscv/include/asm/Kbuild b/arch/riscv/include/asm/Kbuild
index 1461af12da6e..de13d5a234f8 100644
--- a/arch/riscv/include/asm/Kbuild
+++ b/arch/riscv/include/asm/Kbuild
@@ -6,10 +6,12 @@ generic-y += early_ioremap.h
generic-y += flat.h
generic-y += kvm_para.h
generic-y += mmzone.h
+generic-y += mcs_spinlock.h
generic-y += parport.h
-generic-y += spinlock.h
generic-y += spinlock_types.h
+generic-y += ticket_spinlock.h
generic-y += qrwlock.h
generic-y += qrwlock_types.h
+generic-y += qspinlock.h
generic-y += user.h
generic-y += vmlinux.lds.h
diff --git a/arch/riscv/include/asm/cmpxchg.h b/arch/riscv/include/asm/cmpxchg.h
index ebbce134917c..4cadc56220fe 100644
--- a/arch/riscv/include/asm/cmpxchg.h
+++ b/arch/riscv/include/asm/cmpxchg.h
@@ -12,30 +12,43 @@
#include <asm/fence.h>
#include <asm/hwcap.h>
#include <asm/insn-def.h>
-
-#define __arch_xchg_masked(sc_sfx, prepend, append, r, p, n) \
-({ \
- u32 *__ptr32b = (u32 *)((ulong)(p) & ~0x3); \
- ulong __s = ((ulong)(p) & (0x4 - sizeof(*p))) * BITS_PER_BYTE; \
- ulong __mask = GENMASK(((sizeof(*p)) * BITS_PER_BYTE) - 1, 0) \
- << __s; \
- ulong __newx = (ulong)(n) << __s; \
- ulong __retx; \
- ulong __rc; \
- \
- __asm__ __volatile__ ( \
- prepend \
- "0: lr.w %0, %2\n" \
- " and %1, %0, %z4\n" \
- " or %1, %1, %z3\n" \
- " sc.w" sc_sfx " %1, %1, %2\n" \
- " bnez %1, 0b\n" \
- append \
- : "=&r" (__retx), "=&r" (__rc), "+A" (*(__ptr32b)) \
- : "rJ" (__newx), "rJ" (~__mask) \
- : "memory"); \
- \
- r = (__typeof__(*(p)))((__retx & __mask) >> __s); \
+#include <asm/cpufeature-macros.h>
+
+#define __arch_xchg_masked(sc_sfx, swap_sfx, prepend, sc_append, \
+ swap_append, r, p, n) \
+({ \
+ if (IS_ENABLED(CONFIG_RISCV_ISA_ZABHA) && \
+ riscv_has_extension_unlikely(RISCV_ISA_EXT_ZABHA)) { \
+ __asm__ __volatile__ ( \
+ prepend \
+ " amoswap" swap_sfx " %0, %z2, %1\n" \
+ swap_append \
+ : "=&r" (r), "+A" (*(p)) \
+ : "rJ" (n) \
+ : "memory"); \
+ } else { \
+ u32 *__ptr32b = (u32 *)((ulong)(p) & ~0x3); \
+ ulong __s = ((ulong)(p) & (0x4 - sizeof(*p))) * BITS_PER_BYTE; \
+ ulong __mask = GENMASK(((sizeof(*p)) * BITS_PER_BYTE) - 1, 0) \
+ << __s; \
+ ulong __newx = (ulong)(n) << __s; \
+ ulong __retx; \
+ ulong __rc; \
+ \
+ __asm__ __volatile__ ( \
+ prepend \
+ "0: lr.w %0, %2\n" \
+ " and %1, %0, %z4\n" \
+ " or %1, %1, %z3\n" \
+ " sc.w" sc_sfx " %1, %1, %2\n" \
+ " bnez %1, 0b\n" \
+ sc_append \
+ : "=&r" (__retx), "=&r" (__rc), "+A" (*(__ptr32b)) \
+ : "rJ" (__newx), "rJ" (~__mask) \
+ : "memory"); \
+ \
+ r = (__typeof__(*(p)))((__retx & __mask) >> __s); \
+ } \
})
#define __arch_xchg(sfx, prepend, append, r, p, n) \
@@ -58,8 +71,13 @@
\
switch (sizeof(*__ptr)) { \
case 1: \
+ __arch_xchg_masked(sc_sfx, ".b" swap_sfx, \
+ prepend, sc_append, swap_append, \
+ __ret, __ptr, __new); \
+ break; \
case 2: \
- __arch_xchg_masked(sc_sfx, prepend, sc_append, \
+ __arch_xchg_masked(sc_sfx, ".h" swap_sfx, \
+ prepend, sc_append, swap_append, \
__ret, __ptr, __new); \
break; \
case 4: \
@@ -106,55 +124,90 @@
* store NEW in MEM. Return the initial value in MEM. Success is
* indicated by comparing RETURN with OLD.
*/
-
-#define __arch_cmpxchg_masked(sc_sfx, prepend, append, r, p, o, n) \
-({ \
- u32 *__ptr32b = (u32 *)((ulong)(p) & ~0x3); \
- ulong __s = ((ulong)(p) & (0x4 - sizeof(*p))) * BITS_PER_BYTE; \
- ulong __mask = GENMASK(((sizeof(*p)) * BITS_PER_BYTE) - 1, 0) \
- << __s; \
- ulong __newx = (ulong)(n) << __s; \
- ulong __oldx = (ulong)(o) << __s; \
- ulong __retx; \
- ulong __rc; \
- \
- __asm__ __volatile__ ( \
- prepend \
- "0: lr.w %0, %2\n" \
- " and %1, %0, %z5\n" \
- " bne %1, %z3, 1f\n" \
- " and %1, %0, %z6\n" \
- " or %1, %1, %z4\n" \
- " sc.w" sc_sfx " %1, %1, %2\n" \
- " bnez %1, 0b\n" \
- append \
- "1:\n" \
- : "=&r" (__retx), "=&r" (__rc), "+A" (*(__ptr32b)) \
- : "rJ" ((long)__oldx), "rJ" (__newx), \
- "rJ" (__mask), "rJ" (~__mask) \
- : "memory"); \
- \
- r = (__typeof__(*(p)))((__retx & __mask) >> __s); \
+#define __arch_cmpxchg_masked(sc_sfx, cas_sfx, \
+ sc_prepend, sc_append, \
+ cas_prepend, cas_append, \
+ r, p, o, n) \
+({ \
+ if (IS_ENABLED(CONFIG_RISCV_ISA_ZABHA) && \
+ IS_ENABLED(CONFIG_RISCV_ISA_ZACAS) && \
+ riscv_has_extension_unlikely(RISCV_ISA_EXT_ZABHA) && \
+ riscv_has_extension_unlikely(RISCV_ISA_EXT_ZACAS)) { \
+ r = o; \
+ \
+ __asm__ __volatile__ ( \
+ cas_prepend \
+ " amocas" cas_sfx " %0, %z2, %1\n" \
+ cas_append \
+ : "+&r" (r), "+A" (*(p)) \
+ : "rJ" (n) \
+ : "memory"); \
+ } else { \
+ u32 *__ptr32b = (u32 *)((ulong)(p) & ~0x3); \
+ ulong __s = ((ulong)(p) & (0x4 - sizeof(*p))) * BITS_PER_BYTE; \
+ ulong __mask = GENMASK(((sizeof(*p)) * BITS_PER_BYTE) - 1, 0) \
+ << __s; \
+ ulong __newx = (ulong)(n) << __s; \
+ ulong __oldx = (ulong)(o) << __s; \
+ ulong __retx; \
+ ulong __rc; \
+ \
+ __asm__ __volatile__ ( \
+ sc_prepend \
+ "0: lr.w %0, %2\n" \
+ " and %1, %0, %z5\n" \
+ " bne %1, %z3, 1f\n" \
+ " and %1, %0, %z6\n" \
+ " or %1, %1, %z4\n" \
+ " sc.w" sc_sfx " %1, %1, %2\n" \
+ " bnez %1, 0b\n" \
+ sc_append \
+ "1:\n" \
+ : "=&r" (__retx), "=&r" (__rc), "+A" (*(__ptr32b)) \
+ : "rJ" ((long)__oldx), "rJ" (__newx), \
+ "rJ" (__mask), "rJ" (~__mask) \
+ : "memory"); \
+ \
+ r = (__typeof__(*(p)))((__retx & __mask) >> __s); \
+ } \
})
-#define __arch_cmpxchg(lr_sfx, sc_sfx, prepend, append, r, p, co, o, n) \
+#define __arch_cmpxchg(lr_sfx, sc_sfx, cas_sfx, \
+ sc_prepend, sc_append, \
+ cas_prepend, cas_append, \
+ r, p, co, o, n) \
({ \
- register unsigned int __rc; \
+ if (IS_ENABLED(CONFIG_RISCV_ISA_ZACAS) && \
+ riscv_has_extension_unlikely(RISCV_ISA_EXT_ZACAS)) { \
+ r = o; \
\
- __asm__ __volatile__ ( \
- prepend \
- "0: lr" lr_sfx " %0, %2\n" \
- " bne %0, %z3, 1f\n" \
- " sc" sc_sfx " %1, %z4, %2\n" \
- " bnez %1, 0b\n" \
- append \
- "1:\n" \
- : "=&r" (r), "=&r" (__rc), "+A" (*(p)) \
- : "rJ" (co o), "rJ" (n) \
- : "memory"); \
+ __asm__ __volatile__ ( \
+ cas_prepend \
+ " amocas" cas_sfx " %0, %z2, %1\n" \
+ cas_append \
+ : "+&r" (r), "+A" (*(p)) \
+ : "rJ" (n) \
+ : "memory"); \
+ } else { \
+ register unsigned int __rc; \
+ \
+ __asm__ __volatile__ ( \
+ sc_prepend \
+ "0: lr" lr_sfx " %0, %2\n" \
+ " bne %0, %z3, 1f\n" \
+ " sc" sc_sfx " %1, %z4, %2\n" \
+ " bnez %1, 0b\n" \
+ sc_append \
+ "1:\n" \
+ : "=&r" (r), "=&r" (__rc), "+A" (*(p)) \
+ : "rJ" (co o), "rJ" (n) \
+ : "memory"); \
+ } \
})
-#define _arch_cmpxchg(ptr, old, new, sc_sfx, prepend, append) \
+#define _arch_cmpxchg(ptr, old, new, sc_sfx, cas_sfx, \
+ sc_prepend, sc_append, \
+ cas_prepend, cas_append) \
({ \
__typeof__(ptr) __ptr = (ptr); \
__typeof__(*(__ptr)) __old = (old); \
@@ -163,17 +216,28 @@
\
switch (sizeof(*__ptr)) { \
case 1: \
+ __arch_cmpxchg_masked(sc_sfx, ".b" cas_sfx, \
+ sc_prepend, sc_append, \
+ cas_prepend, cas_append, \
+ __ret, __ptr, __old, __new); \
+ break; \
case 2: \
- __arch_cmpxchg_masked(sc_sfx, prepend, append, \
- __ret, __ptr, __old, __new); \
+ __arch_cmpxchg_masked(sc_sfx, ".h" cas_sfx, \
+ sc_prepend, sc_append, \
+ cas_prepend, cas_append, \
+ __ret, __ptr, __old, __new); \
break; \
case 4: \
- __arch_cmpxchg(".w", ".w" sc_sfx, prepend, append, \
- __ret, __ptr, (long), __old, __new); \
+ __arch_cmpxchg(".w", ".w" sc_sfx, ".w" cas_sfx, \
+ sc_prepend, sc_append, \
+ cas_prepend, cas_append, \
+ __ret, __ptr, (long), __old, __new); \
break; \
case 8: \
- __arch_cmpxchg(".d", ".d" sc_sfx, prepend, append, \
- __ret, __ptr, /**/, __old, __new); \
+ __arch_cmpxchg(".d", ".d" sc_sfx, ".d" cas_sfx, \
+ sc_prepend, sc_append, \
+ cas_prepend, cas_append, \
+ __ret, __ptr, /**/, __old, __new); \
break; \
default: \
BUILD_BUG(); \
@@ -181,17 +245,40 @@
(__typeof__(*(__ptr)))__ret; \
})
+/*
+ * These macros are here to improve the readability of the arch_cmpxchg_XXX()
+ * macros.
+ */
+#define SC_SFX(x) x
+#define CAS_SFX(x) x
+#define SC_PREPEND(x) x
+#define SC_APPEND(x) x
+#define CAS_PREPEND(x) x
+#define CAS_APPEND(x) x
+
#define arch_cmpxchg_relaxed(ptr, o, n) \
- _arch_cmpxchg((ptr), (o), (n), "", "", "")
+ _arch_cmpxchg((ptr), (o), (n), \
+ SC_SFX(""), CAS_SFX(""), \
+ SC_PREPEND(""), SC_APPEND(""), \
+ CAS_PREPEND(""), CAS_APPEND(""))
#define arch_cmpxchg_acquire(ptr, o, n) \
- _arch_cmpxchg((ptr), (o), (n), "", "", RISCV_ACQUIRE_BARRIER)
+ _arch_cmpxchg((ptr), (o), (n), \
+ SC_SFX(""), CAS_SFX(""), \
+ SC_PREPEND(""), SC_APPEND(RISCV_ACQUIRE_BARRIER), \
+ CAS_PREPEND(""), CAS_APPEND(RISCV_ACQUIRE_BARRIER))
#define arch_cmpxchg_release(ptr, o, n) \
- _arch_cmpxchg((ptr), (o), (n), "", RISCV_RELEASE_BARRIER, "")
+ _arch_cmpxchg((ptr), (o), (n), \
+ SC_SFX(""), CAS_SFX(""), \
+ SC_PREPEND(RISCV_RELEASE_BARRIER), SC_APPEND(""), \
+ CAS_PREPEND(RISCV_RELEASE_BARRIER), CAS_APPEND(""))
#define arch_cmpxchg(ptr, o, n) \
- _arch_cmpxchg((ptr), (o), (n), ".rl", "", " fence rw, rw\n")
+ _arch_cmpxchg((ptr), (o), (n), \
+ SC_SFX(".rl"), CAS_SFX(".aqrl"), \
+ SC_PREPEND(""), SC_APPEND(RISCV_FULL_BARRIER), \
+ CAS_PREPEND(""), CAS_APPEND(""))
#define arch_cmpxchg_local(ptr, o, n) \
arch_cmpxchg_relaxed((ptr), (o), (n))
@@ -226,6 +313,44 @@
arch_cmpxchg_release((ptr), (o), (n)); \
})
+#if defined(CONFIG_64BIT) && defined(CONFIG_RISCV_ISA_ZACAS)
+
+#define system_has_cmpxchg128() riscv_has_extension_unlikely(RISCV_ISA_EXT_ZACAS)
+
+union __u128_halves {
+ u128 full;
+ struct {
+ u64 low, high;
+ };
+};
+
+#define __arch_cmpxchg128(p, o, n, cas_sfx) \
+({ \
+ __typeof__(*(p)) __o = (o); \
+ union __u128_halves __hn = { .full = (n) }; \
+ union __u128_halves __ho = { .full = (__o) }; \
+ register unsigned long t1 asm ("t1") = __hn.low; \
+ register unsigned long t2 asm ("t2") = __hn.high; \
+ register unsigned long t3 asm ("t3") = __ho.low; \
+ register unsigned long t4 asm ("t4") = __ho.high; \
+ \
+ __asm__ __volatile__ ( \
+ " amocas.q" cas_sfx " %0, %z3, %2" \
+ : "+&r" (t3), "+&r" (t4), "+A" (*(p)) \
+ : "rJ" (t1), "rJ" (t2) \
+ : "memory"); \
+ \
+ ((u128)t4 << 64) | t3; \
+})
+
+#define arch_cmpxchg128(ptr, o, n) \
+ __arch_cmpxchg128((ptr), (o), (n), ".aqrl")
+
+#define arch_cmpxchg128_local(ptr, o, n) \
+ __arch_cmpxchg128((ptr), (o), (n), "")
+
+#endif /* CONFIG_64BIT && CONFIG_RISCV_ISA_ZACAS */
+
#ifdef CONFIG_RISCV_ISA_ZAWRS
/*
* Despite wrs.nto being "WRS-with-no-timeout", in the absence of changes to
@@ -245,6 +370,11 @@ static __always_inline void __cmpwait(volatile void *ptr,
: : : : no_zawrs);
switch (size) {
+ case 1:
+ fallthrough;
+ case 2:
+ /* RISC-V doesn't have lr instructions on byte and half-word. */
+ goto no_zawrs;
case 4:
asm volatile(
" lr.w %0, %1\n"
diff --git a/arch/riscv/include/asm/compat.h b/arch/riscv/include/asm/compat.h
index aa103530a5c8..6081327e55f5 100644
--- a/arch/riscv/include/asm/compat.h
+++ b/arch/riscv/include/asm/compat.h
@@ -9,7 +9,6 @@
*/
#include <linux/types.h>
#include <linux/sched.h>
-#include <linux/sched/task_stack.h>
#include <asm-generic/compat.h>
static inline int is_compat_task(void)
diff --git a/arch/riscv/include/asm/cpufeature-macros.h b/arch/riscv/include/asm/cpufeature-macros.h
new file mode 100644
index 000000000000..a8103edbf51f
--- /dev/null
+++ b/arch/riscv/include/asm/cpufeature-macros.h
@@ -0,0 +1,66 @@
+/* SPDX-License-Identifier: GPL-2.0-only */
+/*
+ * Copyright 2022-2024 Rivos, Inc
+ */
+
+#ifndef _ASM_CPUFEATURE_MACROS_H
+#define _ASM_CPUFEATURE_MACROS_H
+
+#include <asm/hwcap.h>
+#include <asm/alternative-macros.h>
+
+#define STANDARD_EXT 0
+
+bool __riscv_isa_extension_available(const unsigned long *isa_bitmap, unsigned int bit);
+#define riscv_isa_extension_available(isa_bitmap, ext) \
+ __riscv_isa_extension_available(isa_bitmap, RISCV_ISA_EXT_##ext)
+
+static __always_inline bool __riscv_has_extension_likely(const unsigned long vendor,
+ const unsigned long ext)
+{
+ asm goto(ALTERNATIVE("j %l[l_no]", "nop", %[vendor], %[ext], 1)
+ :
+ : [vendor] "i" (vendor), [ext] "i" (ext)
+ :
+ : l_no);
+
+ return true;
+l_no:
+ return false;
+}
+
+static __always_inline bool __riscv_has_extension_unlikely(const unsigned long vendor,
+ const unsigned long ext)
+{
+ asm goto(ALTERNATIVE("nop", "j %l[l_yes]", %[vendor], %[ext], 1)
+ :
+ : [vendor] "i" (vendor), [ext] "i" (ext)
+ :
+ : l_yes);
+
+ return false;
+l_yes:
+ return true;
+}
+
+static __always_inline bool riscv_has_extension_unlikely(const unsigned long ext)
+{
+ compiletime_assert(ext < RISCV_ISA_EXT_MAX, "ext must be < RISCV_ISA_EXT_MAX");
+
+ if (IS_ENABLED(CONFIG_RISCV_ALTERNATIVE))
+ return __riscv_has_extension_unlikely(STANDARD_EXT, ext);
+
+ return __riscv_isa_extension_available(NULL, ext);
+}
+
+static __always_inline bool riscv_has_extension_likely(const unsigned long ext)
+{
+ compiletime_assert(ext < RISCV_ISA_EXT_MAX, "ext must be < RISCV_ISA_EXT_MAX");
+
+ if (IS_ENABLED(CONFIG_RISCV_ALTERNATIVE))
+ return __riscv_has_extension_likely(STANDARD_EXT, ext);
+
+ return __riscv_isa_extension_available(NULL, ext);
+}
+
+#endif /* _ASM_CPUFEATURE_MACROS_H */
diff --git a/arch/riscv/include/asm/cpufeature.h b/arch/riscv/include/asm/cpufeature.h
index 45f9c1171a48..4bd054c54c21 100644
--- a/arch/riscv/include/asm/cpufeature.h
+++ b/arch/riscv/include/asm/cpufeature.h
@@ -8,9 +8,12 @@
#include <linux/bitmap.h>
#include <linux/jump_label.h>
+#include <linux/workqueue.h>
+#include <linux/kconfig.h>
+#include <linux/percpu-defs.h>
+#include <linux/threads.h>
#include <asm/hwcap.h>
-#include <asm/alternative-macros.h>
-#include <asm/errno.h>
+#include <asm/cpufeature-macros.h>
/*
* These are probed via a device_initcall(), via either the SBI or directly
@@ -31,7 +34,7 @@ DECLARE_PER_CPU(struct riscv_cpuinfo, riscv_cpuinfo);
/* Per-cpu ISA extensions. */
extern struct riscv_isainfo hart_isa[NR_CPUS];
-void riscv_user_isa_enable(void);
+void __init riscv_user_isa_enable(void);
#define _RISCV_ISA_EXT_DATA(_name, _id, _subset_exts, _subset_exts_size, _validate) { \
.name = #_name, \
@@ -58,8 +61,9 @@ void riscv_user_isa_enable(void);
#define __RISCV_ISA_EXT_SUPERSET_VALIDATE(_name, _id, _sub_exts, _validate) \
_RISCV_ISA_EXT_DATA(_name, _id, _sub_exts, ARRAY_SIZE(_sub_exts), _validate)
-#if defined(CONFIG_RISCV_MISALIGNED)
bool check_unaligned_access_emulated_all_cpus(void);
+#if defined(CONFIG_RISCV_SCALAR_MISALIGNED)
+void check_unaligned_access_emulated(struct work_struct *work __always_unused);
void unaligned_emulation_finish(void);
bool unaligned_ctl_available(void);
DECLARE_PER_CPU(long, misaligned_access_speed);
@@ -70,6 +74,12 @@ static inline bool unaligned_ctl_available(void)
}
#endif
+bool check_vector_unaligned_access_emulated_all_cpus(void);
+#if defined(CONFIG_RISCV_VECTOR_MISALIGNED)
+void check_vector_unaligned_access_emulated(struct work_struct *work __always_unused);
+DECLARE_PER_CPU(long, vector_misaligned_access);
+#endif
+
#if defined(CONFIG_RISCV_PROBE_UNALIGNED_ACCESS)
DECLARE_STATIC_KEY_FALSE(fast_unaligned_access_speed_key);
@@ -103,61 +113,6 @@ extern const size_t riscv_isa_ext_count;
extern bool riscv_isa_fallback;
unsigned long riscv_isa_extension_base(const unsigned long *isa_bitmap);
-
-#define STANDARD_EXT 0
-
-bool __riscv_isa_extension_available(const unsigned long *isa_bitmap, unsigned int bit);
-#define riscv_isa_extension_available(isa_bitmap, ext) \
- __riscv_isa_extension_available(isa_bitmap, RISCV_ISA_EXT_##ext)
-
-static __always_inline bool __riscv_has_extension_likely(const unsigned long vendor,
- const unsigned long ext)
-{
- asm goto(ALTERNATIVE("j %l[l_no]", "nop", %[vendor], %[ext], 1)
- :
- : [vendor] "i" (vendor), [ext] "i" (ext)
- :
- : l_no);
-
- return true;
-l_no:
- return false;
-}
-
-static __always_inline bool __riscv_has_extension_unlikely(const unsigned long vendor,
- const unsigned long ext)
-{
- asm goto(ALTERNATIVE("nop", "j %l[l_yes]", %[vendor], %[ext], 1)
- :
- : [vendor] "i" (vendor), [ext] "i" (ext)
- :
- : l_yes);
-
- return false;
-l_yes:
- return true;
-}
-
-static __always_inline bool riscv_has_extension_unlikely(const unsigned long ext)
-{
- compiletime_assert(ext < RISCV_ISA_EXT_MAX, "ext must be < RISCV_ISA_EXT_MAX");
-
- if (IS_ENABLED(CONFIG_RISCV_ALTERNATIVE))
- return __riscv_has_extension_unlikely(STANDARD_EXT, ext);
-
- return __riscv_isa_extension_available(NULL, ext);
-}
-
-static __always_inline bool riscv_has_extension_likely(const unsigned long ext)
-{
- compiletime_assert(ext < RISCV_ISA_EXT_MAX, "ext must be < RISCV_ISA_EXT_MAX");
-
- if (IS_ENABLED(CONFIG_RISCV_ALTERNATIVE))
- return __riscv_has_extension_likely(STANDARD_EXT, ext);
-
- return __riscv_isa_extension_available(NULL, ext);
-}
-
static __always_inline bool riscv_cpu_has_extension_likely(int cpu, const unsigned long ext)
{
compiletime_assert(ext < RISCV_ISA_EXT_MAX, "ext must be < RISCV_ISA_EXT_MAX");
diff --git a/arch/riscv/include/asm/csr.h b/arch/riscv/include/asm/csr.h
index 25966995da04..fe5d4eb9adea 100644
--- a/arch/riscv/include/asm/csr.h
+++ b/arch/riscv/include/asm/csr.h
@@ -119,6 +119,10 @@
/* HSTATUS flags */
#ifdef CONFIG_64BIT
+#define HSTATUS_HUPMM _AC(0x3000000000000, UL)
+#define HSTATUS_HUPMM_PMLEN_0 _AC(0x0000000000000, UL)
+#define HSTATUS_HUPMM_PMLEN_7 _AC(0x2000000000000, UL)
+#define HSTATUS_HUPMM_PMLEN_16 _AC(0x3000000000000, UL)
#define HSTATUS_VSXL _AC(0x300000000, UL)
#define HSTATUS_VSXL_SHIFT 32
#endif
@@ -195,6 +199,10 @@
/* xENVCFG flags */
#define ENVCFG_STCE (_AC(1, ULL) << 63)
#define ENVCFG_PBMTE (_AC(1, ULL) << 62)
+#define ENVCFG_PMM (_AC(0x3, ULL) << 32)
+#define ENVCFG_PMM_PMLEN_0 (_AC(0x0, ULL) << 32)
+#define ENVCFG_PMM_PMLEN_7 (_AC(0x2, ULL) << 32)
+#define ENVCFG_PMM_PMLEN_16 (_AC(0x3, ULL) << 32)
#define ENVCFG_CBZE (_AC(1, UL) << 7)
#define ENVCFG_CBCFE (_AC(1, UL) << 6)
#define ENVCFG_CBIE_SHIFT 4
@@ -216,6 +224,12 @@
#define SMSTATEEN0_SSTATEEN0_SHIFT 63
#define SMSTATEEN0_SSTATEEN0 (_ULL(1) << SMSTATEEN0_SSTATEEN0_SHIFT)
+/* mseccfg bits */
+#define MSECCFG_PMM ENVCFG_PMM
+#define MSECCFG_PMM_PMLEN_0 ENVCFG_PMM_PMLEN_0
+#define MSECCFG_PMM_PMLEN_7 ENVCFG_PMM_PMLEN_7
+#define MSECCFG_PMM_PMLEN_16 ENVCFG_PMM_PMLEN_16
+
/* symbolic CSR names: */
#define CSR_CYCLE 0xc00
#define CSR_TIME 0xc01
@@ -382,6 +396,8 @@
#define CSR_MIP 0x344
#define CSR_PMPCFG0 0x3a0
#define CSR_PMPADDR0 0x3b0
+#define CSR_MSECCFG 0x747
+#define CSR_MSECCFGH 0x757
#define CSR_MVENDORID 0xf11
#define CSR_MARCHID 0xf12
#define CSR_MIMPID 0xf13
diff --git a/arch/riscv/include/asm/entry-common.h b/arch/riscv/include/asm/entry-common.h
index 2293e535f865..b28ccc6cdeea 100644
--- a/arch/riscv/include/asm/entry-common.h
+++ b/arch/riscv/include/asm/entry-common.h
@@ -33,6 +33,7 @@ static inline int handle_misaligned_load(struct pt_regs *regs)
{
return -1;
}
+
static inline int handle_misaligned_store(struct pt_regs *regs)
{
return -1;
diff --git a/arch/riscv/include/asm/hwcap.h b/arch/riscv/include/asm/hwcap.h
index 46d9de54179e..08d2a5697466 100644
--- a/arch/riscv/include/asm/hwcap.h
+++ b/arch/riscv/include/asm/hwcap.h
@@ -93,6 +93,11 @@
#define RISCV_ISA_EXT_ZCMOP 84
#define RISCV_ISA_EXT_ZAWRS 85
#define RISCV_ISA_EXT_SVVPTC 86
+#define RISCV_ISA_EXT_SMMPM 87
+#define RISCV_ISA_EXT_SMNPM 88
+#define RISCV_ISA_EXT_SSNPM 89
+#define RISCV_ISA_EXT_ZABHA 90
+#define RISCV_ISA_EXT_ZICCRSE 91
#define RISCV_ISA_EXT_XLINUXENVCFG 127
@@ -101,8 +106,10 @@
#ifdef CONFIG_RISCV_M_MODE
#define RISCV_ISA_EXT_SxAIA RISCV_ISA_EXT_SMAIA
+#define RISCV_ISA_EXT_SUPM RISCV_ISA_EXT_SMNPM
#else
#define RISCV_ISA_EXT_SxAIA RISCV_ISA_EXT_SSAIA
+#define RISCV_ISA_EXT_SUPM RISCV_ISA_EXT_SSNPM
#endif
#endif /* _ASM_RISCV_HWCAP_H */
diff --git a/arch/riscv/include/asm/hwprobe.h b/arch/riscv/include/asm/hwprobe.h
index ffb9484531af..1ce1df6d0ff3 100644
--- a/arch/riscv/include/asm/hwprobe.h
+++ b/arch/riscv/include/asm/hwprobe.h
@@ -8,7 +8,7 @@
#include <uapi/asm/hwprobe.h>
-#define RISCV_HWPROBE_MAX_KEY 9
+#define RISCV_HWPROBE_MAX_KEY 10
static inline bool riscv_hwprobe_key_is_valid(__s64 key)
{
diff --git a/arch/riscv/include/asm/mmu.h b/arch/riscv/include/asm/mmu.h
index c9e03e9da3dc..1cc90465d75b 100644
--- a/arch/riscv/include/asm/mmu.h
+++ b/arch/riscv/include/asm/mmu.h
@@ -26,8 +26,15 @@ typedef struct {
unsigned long exec_fdpic_loadmap;
unsigned long interp_fdpic_loadmap;
#endif
+ unsigned long flags;
+#ifdef CONFIG_RISCV_ISA_SUPM
+ u8 pmlen;
+#endif
} mm_context_t;
+/* Lock the pointer masking mode because this mm is multithreaded */
+#define MM_CONTEXT_LOCK_PMLEN 0
+
#define cntx2asid(cntx) ((cntx) & SATP_ASID_MASK)
#define cntx2version(cntx) ((cntx) & ~SATP_ASID_MASK)
diff --git a/arch/riscv/include/asm/mmu_context.h b/arch/riscv/include/asm/mmu_context.h
index 7030837adc1a..8c4bc49a3a0f 100644
--- a/arch/riscv/include/asm/mmu_context.h
+++ b/arch/riscv/include/asm/mmu_context.h
@@ -20,6 +20,9 @@ void switch_mm(struct mm_struct *prev, struct mm_struct *next,
static inline void activate_mm(struct mm_struct *prev,
struct mm_struct *next)
{
+#ifdef CONFIG_RISCV_ISA_SUPM
+ next->context.pmlen = 0;
+#endif
switch_mm(prev, next, NULL);
}
@@ -30,11 +33,21 @@ static inline int init_new_context(struct task_struct *tsk,
#ifdef CONFIG_MMU
atomic_long_set(&mm->context.id, 0);
#endif
+ if (IS_ENABLED(CONFIG_RISCV_ISA_SUPM))
+ clear_bit(MM_CONTEXT_LOCK_PMLEN, &mm->context.flags);
return 0;
}
DECLARE_STATIC_KEY_FALSE(use_asid_allocator);
+#ifdef CONFIG_RISCV_ISA_SUPM
+#define mm_untag_mask mm_untag_mask
+static inline unsigned long mm_untag_mask(struct mm_struct *mm)
+{
+ return -1UL >> mm->context.pmlen;
+}
+#endif
+
#include <asm-generic/mmu_context.h>
#endif /* _ASM_RISCV_MMU_CONTEXT_H */
diff --git a/arch/riscv/include/asm/processor.h b/arch/riscv/include/asm/processor.h
index efa1b3519b23..5f56eb9d114a 100644
--- a/arch/riscv/include/asm/processor.h
+++ b/arch/riscv/include/asm/processor.h
@@ -102,6 +102,7 @@ struct thread_struct {
unsigned long s[12]; /* s[0]: frame pointer */
struct __riscv_d_ext_state fstate;
unsigned long bad_cause;
+ unsigned long envcfg;
u32 riscv_v_flags;
u32 vstate_ctrl;
struct __riscv_v_ext_state vstate;
@@ -177,6 +178,14 @@ extern int set_unalign_ctl(struct task_struct *tsk, unsigned int val);
#define RISCV_SET_ICACHE_FLUSH_CTX(arg1, arg2) riscv_set_icache_flush_ctx(arg1, arg2)
extern int riscv_set_icache_flush_ctx(unsigned long ctx, unsigned long per_thread);
+#ifdef CONFIG_RISCV_ISA_SUPM
+/* PR_{SET,GET}_TAGGED_ADDR_CTRL prctl */
+long set_tagged_addr_ctrl(struct task_struct *task, unsigned long arg);
+long get_tagged_addr_ctrl(struct task_struct *task);
+#define SET_TAGGED_ADDR_CTRL(arg) set_tagged_addr_ctrl(current, arg)
+#define GET_TAGGED_ADDR_CTRL() get_tagged_addr_ctrl(current)
+#endif
+
#endif /* __ASSEMBLY__ */
#endif /* _ASM_RISCV_PROCESSOR_H */
diff --git a/arch/riscv/include/asm/spinlock.h b/arch/riscv/include/asm/spinlock.h
new file mode 100644
index 000000000000..e5121b89acea
--- /dev/null
+++ b/arch/riscv/include/asm/spinlock.h
@@ -0,0 +1,47 @@
+/* SPDX-License-Identifier: GPL-2.0 */
+
+#ifndef __ASM_RISCV_SPINLOCK_H
+#define __ASM_RISCV_SPINLOCK_H
+
+#ifdef CONFIG_RISCV_COMBO_SPINLOCKS
+#define _Q_PENDING_LOOPS (1 << 9)
+
+#define __no_arch_spinlock_redefine
+#include <asm/ticket_spinlock.h>
+#include <asm/qspinlock.h>
+#include <asm/jump_label.h>
+
+/*
+ * TODO: Use an alternative instead of a static key when we are able to parse
+ * the extensions string earlier in the boot process.
+ */
+DECLARE_STATIC_KEY_TRUE(qspinlock_key);
+
+#define SPINLOCK_BASE_DECLARE(op, type, type_lock) \
+static __always_inline type arch_spin_##op(type_lock lock) \
+{ \
+ if (static_branch_unlikely(&qspinlock_key)) \
+ return queued_spin_##op(lock); \
+ return ticket_spin_##op(lock); \
+}
+
+SPINLOCK_BASE_DECLARE(lock, void, arch_spinlock_t *)
+SPINLOCK_BASE_DECLARE(unlock, void, arch_spinlock_t *)
+SPINLOCK_BASE_DECLARE(is_locked, int, arch_spinlock_t *)
+SPINLOCK_BASE_DECLARE(is_contended, int, arch_spinlock_t *)
+SPINLOCK_BASE_DECLARE(trylock, bool, arch_spinlock_t *)
+SPINLOCK_BASE_DECLARE(value_unlocked, int, arch_spinlock_t)
+
+#elif defined(CONFIG_RISCV_QUEUED_SPINLOCKS)
+
+#include <asm/qspinlock.h>
+
+#else
+
+#include <asm/ticket_spinlock.h>
+
+#endif
+
+#include <asm/qrwlock.h>
+
+#endif /* __ASM_RISCV_SPINLOCK_H */
diff --git a/arch/riscv/include/asm/switch_to.h b/arch/riscv/include/asm/switch_to.h
index 7594df37cc9f..94e33216b2d9 100644
--- a/arch/riscv/include/asm/switch_to.h
+++ b/arch/riscv/include/asm/switch_to.h
@@ -70,6 +70,24 @@ static __always_inline bool has_fpu(void) { return false; }
#define __switch_to_fpu(__prev, __next) do { } while (0)
#endif
+static inline void envcfg_update_bits(struct task_struct *task,
+ unsigned long mask, unsigned long val)
+{
+ unsigned long envcfg;
+
+ envcfg = (task->thread.envcfg & ~mask) | val;
+ task->thread.envcfg = envcfg;
+ if (task == current)
+ csr_write(CSR_ENVCFG, envcfg);
+}
+
+static inline void __switch_to_envcfg(struct task_struct *next)
+{
+ asm volatile (ALTERNATIVE("nop", "csrw " __stringify(CSR_ENVCFG) ", %0",
+ 0, RISCV_ISA_EXT_XLINUXENVCFG, 1)
+ :: "r" (next->thread.envcfg) : "memory");
+}
+
extern struct task_struct *__switch_to(struct task_struct *,
struct task_struct *);
@@ -103,6 +121,7 @@ do { \
__switch_to_vector(__prev, __next); \
if (switch_to_should_flush_icache(__next)) \
local_flush_icache_all(); \
+ __switch_to_envcfg(__next); \
((last) = __switch_to(__prev, __next)); \
} while (0)
diff --git a/arch/riscv/include/asm/uaccess.h b/arch/riscv/include/asm/uaccess.h
index 72ec1d9bd3f3..fee56b0c8058 100644
--- a/arch/riscv/include/asm/uaccess.h
+++ b/arch/riscv/include/asm/uaccess.h
@@ -9,8 +9,41 @@
#define _ASM_RISCV_UACCESS_H
#include <asm/asm-extable.h>
+#include <asm/cpufeature.h>
#include <asm/pgtable.h> /* for TASK_SIZE */
+#ifdef CONFIG_RISCV_ISA_SUPM
+static inline unsigned long __untagged_addr_remote(struct mm_struct *mm, unsigned long addr)
+{
+ if (riscv_has_extension_unlikely(RISCV_ISA_EXT_SUPM)) {
+ u8 pmlen = mm->context.pmlen;
+
+ /* Virtual addresses are sign-extended; physical addresses are zero-extended. */
+ if (IS_ENABLED(CONFIG_MMU))
+ return (long)(addr << pmlen) >> pmlen;
+ else
+ return (addr << pmlen) >> pmlen;
+ }
+
+ return addr;
+}
+
+#define untagged_addr(addr) ({ \
+ unsigned long __addr = (__force unsigned long)(addr); \
+ (__force __typeof__(addr))__untagged_addr_remote(current->mm, __addr); \
+})
+
+#define untagged_addr_remote(mm, addr) ({ \
+ unsigned long __addr = (__force unsigned long)(addr); \
+ mmap_assert_locked(mm); \
+ (__force __typeof__(addr))__untagged_addr_remote(mm, __addr); \
+})
+
+#define access_ok(addr, size) likely(__access_ok(untagged_addr(addr), size))
+#else
+#define untagged_addr(addr) (addr)
+#endif
+
/*
* User space memory access functions
*/
@@ -130,7 +163,7 @@ do { \
*/
#define __get_user(x, ptr) \
({ \
- const __typeof__(*(ptr)) __user *__gu_ptr = (ptr); \
+ const __typeof__(*(ptr)) __user *__gu_ptr = untagged_addr(ptr); \
long __gu_err = 0; \
\
__chk_user_ptr(__gu_ptr); \
@@ -246,7 +279,7 @@ do { \
*/
#define __put_user(x, ptr) \
({ \
- __typeof__(*(ptr)) __user *__gu_ptr = (ptr); \
+ __typeof__(*(ptr)) __user *__gu_ptr = untagged_addr(ptr); \
__typeof__(*__gu_ptr) __val = (x); \
long __pu_err = 0; \
\
@@ -293,13 +326,13 @@ unsigned long __must_check __asm_copy_from_user(void *to,
static inline unsigned long
raw_copy_from_user(void *to, const void __user *from, unsigned long n)
{
- return __asm_copy_from_user(to, from, n);
+ return __asm_copy_from_user(to, untagged_addr(from), n);
}
static inline unsigned long
raw_copy_to_user(void __user *to, const void *from, unsigned long n)
{
- return __asm_copy_to_user(to, from, n);
+ return __asm_copy_to_user(untagged_addr(to), from, n);
}
extern long strncpy_from_user(char *dest, const char __user *src, long count);
@@ -314,7 +347,7 @@ unsigned long __must_check clear_user(void __user *to, unsigned long n)
{
might_fault();
return access_ok(to, n) ?
- __clear_user(to, n) : n;
+ __clear_user(untagged_addr(to), n) : n;
}
#define __get_kernel_nofault(dst, src, type, err_label) \
diff --git a/arch/riscv/include/asm/vector.h b/arch/riscv/include/asm/vector.h
index be7d309cca8a..c7c023afbacd 100644
--- a/arch/riscv/include/asm/vector.h
+++ b/arch/riscv/include/asm/vector.h
@@ -21,6 +21,7 @@
extern unsigned long riscv_v_vsize;
int riscv_v_setup_vsize(void);
+bool insn_is_vector(u32 insn_buf);
bool riscv_v_first_use_handler(struct pt_regs *regs);
void kernel_vector_begin(void);
void kernel_vector_end(void);
@@ -268,6 +269,7 @@ struct pt_regs;
static inline int riscv_v_setup_vsize(void) { return -EOPNOTSUPP; }
static __always_inline bool has_vector(void) { return false; }
+static __always_inline bool insn_is_vector(u32 insn_buf) { return false; }
static inline bool riscv_v_first_use_handler(struct pt_regs *regs) { return false; }
static inline bool riscv_v_vstate_query(struct pt_regs *regs) { return false; }
static inline bool riscv_v_vstate_ctrl_user_allowed(void) { return false; }
diff --git a/arch/riscv/include/uapi/asm/hwprobe.h b/arch/riscv/include/uapi/asm/hwprobe.h
index 1e153cda57db..3af142b99f77 100644
--- a/arch/riscv/include/uapi/asm/hwprobe.h
+++ b/arch/riscv/include/uapi/asm/hwprobe.h
@@ -72,6 +72,7 @@ struct riscv_hwprobe {
#define RISCV_HWPROBE_EXT_ZCF (1ULL << 46)
#define RISCV_HWPROBE_EXT_ZCMOP (1ULL << 47)
#define RISCV_HWPROBE_EXT_ZAWRS (1ULL << 48)
+#define RISCV_HWPROBE_EXT_SUPM (1ULL << 49)
#define RISCV_HWPROBE_KEY_CPUPERF_0 5
#define RISCV_HWPROBE_MISALIGNED_UNKNOWN (0 << 0)
#define RISCV_HWPROBE_MISALIGNED_EMULATED (1 << 0)
@@ -88,6 +89,11 @@ struct riscv_hwprobe {
#define RISCV_HWPROBE_MISALIGNED_SCALAR_SLOW 2
#define RISCV_HWPROBE_MISALIGNED_SCALAR_FAST 3
#define RISCV_HWPROBE_MISALIGNED_SCALAR_UNSUPPORTED 4
+#define RISCV_HWPROBE_KEY_MISALIGNED_VECTOR_PERF 10
+#define RISCV_HWPROBE_MISALIGNED_VECTOR_UNKNOWN 0
+#define RISCV_HWPROBE_MISALIGNED_VECTOR_SLOW 2
+#define RISCV_HWPROBE_MISALIGNED_VECTOR_FAST 3
+#define RISCV_HWPROBE_MISALIGNED_VECTOR_UNSUPPORTED 4
/* Increase RISCV_HWPROBE_MAX_KEY when adding items. */
/* Flags */
diff --git a/arch/riscv/include/uapi/asm/kvm.h b/arch/riscv/include/uapi/asm/kvm.h
index e97db3296456..4f24201376b1 100644
--- a/arch/riscv/include/uapi/asm/kvm.h
+++ b/arch/riscv/include/uapi/asm/kvm.h
@@ -175,6 +175,8 @@ enum KVM_RISCV_ISA_EXT_ID {
KVM_RISCV_ISA_EXT_ZCF,
KVM_RISCV_ISA_EXT_ZCMOP,
KVM_RISCV_ISA_EXT_ZAWRS,
+ KVM_RISCV_ISA_EXT_SMNPM,
+ KVM_RISCV_ISA_EXT_SSNPM,
KVM_RISCV_ISA_EXT_MAX,
};
diff --git a/arch/riscv/kernel/Makefile b/arch/riscv/kernel/Makefile
index 69dc8aaab3fb..063d1faf5a53 100644
--- a/arch/riscv/kernel/Makefile
+++ b/arch/riscv/kernel/Makefile
@@ -75,7 +75,8 @@ obj-$(CONFIG_MMU) += vdso.o vdso/
obj-$(CONFIG_RISCV_MISALIGNED) += traps_misaligned.o
obj-$(CONFIG_RISCV_MISALIGNED) += unaligned_access_speed.o
-obj-$(CONFIG_RISCV_PROBE_UNALIGNED_ACCESS) += copy-unaligned.o
+obj-$(CONFIG_RISCV_PROBE_UNALIGNED_ACCESS) += copy-unaligned.o
+obj-$(CONFIG_RISCV_PROBE_VECTOR_UNALIGNED_ACCESS) += vec-copy-unaligned.o
obj-$(CONFIG_FPU) += fpu.o
obj-$(CONFIG_FPU) += kernel_mode_fpu.o
diff --git a/arch/riscv/kernel/copy-unaligned.h b/arch/riscv/kernel/copy-unaligned.h
index e3d70d35b708..85d4d11450cb 100644
--- a/arch/riscv/kernel/copy-unaligned.h
+++ b/arch/riscv/kernel/copy-unaligned.h
@@ -10,4 +10,9 @@
void __riscv_copy_words_unaligned(void *dst, const void *src, size_t size);
void __riscv_copy_bytes_unaligned(void *dst, const void *src, size_t size);
+#ifdef CONFIG_RISCV_PROBE_VECTOR_UNALIGNED_ACCESS
+void __riscv_copy_vec_words_unaligned(void *dst, const void *src, size_t size);
+void __riscv_copy_vec_bytes_unaligned(void *dst, const void *src, size_t size);
+#endif
+
#endif /* __RISCV_KERNEL_COPY_UNALIGNED_H */
diff --git a/arch/riscv/kernel/cpufeature.c b/arch/riscv/kernel/cpufeature.c
index 826f46b21f2e..467c5c735bf5 100644
--- a/arch/riscv/kernel/cpufeature.c
+++ b/arch/riscv/kernel/cpufeature.c
@@ -29,6 +29,8 @@
#define NUM_ALPHA_EXTS ('z' - 'a' + 1)
+static bool any_cpu_has_zicboz;
+
unsigned long elf_hwcap __read_mostly;
/* Host ISA bitmap */
@@ -99,6 +101,7 @@ static int riscv_ext_zicboz_validate(const struct riscv_isa_ext_data *data,
pr_err("Zicboz disabled as cboz-block-size present, but is not a power-of-2\n");
return -EINVAL;
}
+ any_cpu_has_zicboz = true;
return 0;
}
@@ -315,6 +318,7 @@ const struct riscv_isa_ext_data riscv_isa_ext[] = {
riscv_ext_zicbom_validate),
__RISCV_ISA_EXT_SUPERSET_VALIDATE(zicboz, RISCV_ISA_EXT_ZICBOZ, riscv_xlinuxenvcfg_exts,
riscv_ext_zicboz_validate),
+ __RISCV_ISA_EXT_DATA(ziccrse, RISCV_ISA_EXT_ZICCRSE),
__RISCV_ISA_EXT_DATA(zicntr, RISCV_ISA_EXT_ZICNTR),
__RISCV_ISA_EXT_DATA(zicond, RISCV_ISA_EXT_ZICOND),
__RISCV_ISA_EXT_DATA(zicsr, RISCV_ISA_EXT_ZICSR),
@@ -323,6 +327,7 @@ const struct riscv_isa_ext_data riscv_isa_ext[] = {
__RISCV_ISA_EXT_DATA(zihintpause, RISCV_ISA_EXT_ZIHINTPAUSE),
__RISCV_ISA_EXT_DATA(zihpm, RISCV_ISA_EXT_ZIHPM),
__RISCV_ISA_EXT_DATA(zimop, RISCV_ISA_EXT_ZIMOP),
+ __RISCV_ISA_EXT_DATA(zabha, RISCV_ISA_EXT_ZABHA),
__RISCV_ISA_EXT_DATA(zacas, RISCV_ISA_EXT_ZACAS),
__RISCV_ISA_EXT_DATA(zawrs, RISCV_ISA_EXT_ZAWRS),
__RISCV_ISA_EXT_DATA(zfa, RISCV_ISA_EXT_ZFA),
@@ -375,9 +380,12 @@ const struct riscv_isa_ext_data riscv_isa_ext[] = {
__RISCV_ISA_EXT_BUNDLE(zvksg, riscv_zvksg_bundled_exts),
__RISCV_ISA_EXT_DATA(zvkt, RISCV_ISA_EXT_ZVKT),
__RISCV_ISA_EXT_DATA(smaia, RISCV_ISA_EXT_SMAIA),
+ __RISCV_ISA_EXT_DATA(smmpm, RISCV_ISA_EXT_SMMPM),
+ __RISCV_ISA_EXT_SUPERSET(smnpm, RISCV_ISA_EXT_SMNPM, riscv_xlinuxenvcfg_exts),
__RISCV_ISA_EXT_DATA(smstateen, RISCV_ISA_EXT_SMSTATEEN),
__RISCV_ISA_EXT_DATA(ssaia, RISCV_ISA_EXT_SSAIA),
__RISCV_ISA_EXT_DATA(sscofpmf, RISCV_ISA_EXT_SSCOFPMF),
+ __RISCV_ISA_EXT_SUPERSET(ssnpm, RISCV_ISA_EXT_SSNPM, riscv_xlinuxenvcfg_exts),
__RISCV_ISA_EXT_DATA(sstc, RISCV_ISA_EXT_SSTC),
__RISCV_ISA_EXT_DATA(svinval, RISCV_ISA_EXT_SVINVAL),
__RISCV_ISA_EXT_DATA(svnapot, RISCV_ISA_EXT_SVNAPOT),
@@ -918,10 +926,12 @@ unsigned long riscv_get_elf_hwcap(void)
return hwcap;
}
-void riscv_user_isa_enable(void)
+void __init riscv_user_isa_enable(void)
{
- if (riscv_cpu_has_extension_unlikely(smp_processor_id(), RISCV_ISA_EXT_ZICBOZ))
- csr_set(CSR_ENVCFG, ENVCFG_CBZE);
+ if (riscv_has_extension_unlikely(RISCV_ISA_EXT_ZICBOZ))
+ current->thread.envcfg |= ENVCFG_CBZE;
+ else if (any_cpu_has_zicboz)
+ pr_warn("Zicboz disabled as it is unavailable on some harts\n");
}
#ifdef CONFIG_RISCV_ALTERNATIVE
diff --git a/arch/riscv/kernel/fpu.S b/arch/riscv/kernel/fpu.S
index 327cf527dd7e..f74f6b60e347 100644
--- a/arch/riscv/kernel/fpu.S
+++ b/arch/riscv/kernel/fpu.S
@@ -170,7 +170,7 @@ SYM_FUNC_END(__fstate_restore)
__access_func(f31)
-#ifdef CONFIG_RISCV_MISALIGNED
+#ifdef CONFIG_RISCV_SCALAR_MISALIGNED
/*
* Disable compressed instructions set to keep a constant offset between FP
@@ -224,4 +224,4 @@ SYM_FUNC_START(get_f64_reg)
fp_access_epilogue
SYM_FUNC_END(get_f64_reg)
-#endif /* CONFIG_RISCV_MISALIGNED */
+#endif /* CONFIG_RISCV_SCALAR_MISALIGNED */
diff --git a/arch/riscv/kernel/process.c b/arch/riscv/kernel/process.c
index e3142d8a6e28..58b6482c2bf6 100644
--- a/arch/riscv/kernel/process.c
+++ b/arch/riscv/kernel/process.c
@@ -7,6 +7,7 @@
* Copyright (C) 2017 SiFive
*/
+#include <linux/bitfield.h>
#include <linux/cpu.h>
#include <linux/kernel.h>
#include <linux/sched.h>
@@ -180,6 +181,10 @@ void flush_thread(void)
memset(&current->thread.vstate, 0, sizeof(struct __riscv_v_ext_state));
clear_tsk_thread_flag(current, TIF_RISCV_V_DEFER_RESTORE);
#endif
+#ifdef CONFIG_RISCV_ISA_SUPM
+ if (riscv_has_extension_unlikely(RISCV_ISA_EXT_SUPM))
+ envcfg_update_bits(current, ENVCFG_PMM, ENVCFG_PMM_PMLEN_0);
+#endif
}
void arch_release_task_struct(struct task_struct *tsk)
@@ -208,6 +213,10 @@ int copy_thread(struct task_struct *p, const struct kernel_clone_args *args)
unsigned long tls = args->tls;
struct pt_regs *childregs = task_pt_regs(p);
+ /* Ensure all threads in this mm have the same pointer masking mode. */
+ if (IS_ENABLED(CONFIG_RISCV_ISA_SUPM) && p->mm && (clone_flags & CLONE_VM))
+ set_bit(MM_CONTEXT_LOCK_PMLEN, &p->mm->context.flags);
+
memset(&p->thread.s, 0, sizeof(p->thread.s));
/* p->thread holds context to be restored by __switch_to() */
@@ -242,3 +251,148 @@ void __init arch_task_cache_init(void)
{
riscv_v_setup_ctx_cache();
}
+
+#ifdef CONFIG_RISCV_ISA_SUPM
+enum {
+ PMLEN_0 = 0,
+ PMLEN_7 = 7,
+ PMLEN_16 = 16,
+};
+
+static bool have_user_pmlen_7;
+static bool have_user_pmlen_16;
+
+/*
+ * Control the relaxed ABI allowing tagged user addresses into the kernel.
+ */
+static unsigned int tagged_addr_disabled;
+
+long set_tagged_addr_ctrl(struct task_struct *task, unsigned long arg)
+{
+ unsigned long valid_mask = PR_PMLEN_MASK | PR_TAGGED_ADDR_ENABLE;
+ struct thread_info *ti = task_thread_info(task);
+ struct mm_struct *mm = task->mm;
+ unsigned long pmm;
+ u8 pmlen;
+
+ if (is_compat_thread(ti))
+ return -EINVAL;
+
+ if (arg & ~valid_mask)
+ return -EINVAL;
+
+ /*
+ * Prefer the smallest PMLEN that satisfies the user's request,
+ * in case choosing a larger PMLEN has a performance impact.
+ */
+ pmlen = FIELD_GET(PR_PMLEN_MASK, arg);
+ if (pmlen == PMLEN_0) {
+ pmm = ENVCFG_PMM_PMLEN_0;
+ } else if (pmlen <= PMLEN_7 && have_user_pmlen_7) {
+ pmlen = PMLEN_7;
+ pmm = ENVCFG_PMM_PMLEN_7;
+ } else if (pmlen <= PMLEN_16 && have_user_pmlen_16) {
+ pmlen = PMLEN_16;
+ pmm = ENVCFG_PMM_PMLEN_16;
+ } else {
+ return -EINVAL;
+ }
+
+ /*
+ * Do not allow the enabling of the tagged address ABI if globally
+ * disabled via sysctl abi.tagged_addr_disabled, if pointer masking
+ * is disabled for userspace.
+ */
+ if (arg & PR_TAGGED_ADDR_ENABLE && (tagged_addr_disabled || !pmlen))
+ return -EINVAL;
+
+ if (!(arg & PR_TAGGED_ADDR_ENABLE))
+ pmlen = PMLEN_0;
+
+ if (mmap_write_lock_killable(mm))
+ return -EINTR;
+
+ if (test_bit(MM_CONTEXT_LOCK_PMLEN, &mm->context.flags) && mm->context.pmlen != pmlen) {
+ mmap_write_unlock(mm);
+ return -EBUSY;
+ }
+
+ envcfg_update_bits(task, ENVCFG_PMM, pmm);
+ mm->context.pmlen = pmlen;
+
+ mmap_write_unlock(mm);
+
+ return 0;
+}
+
+long get_tagged_addr_ctrl(struct task_struct *task)
+{
+ struct thread_info *ti = task_thread_info(task);
+ long ret = 0;
+
+ if (is_compat_thread(ti))
+ return -EINVAL;
+
+ /*
+ * The mm context's pmlen is set only when the tagged address ABI is
+ * enabled, so the effective PMLEN must be extracted from envcfg.PMM.
+ */
+ switch (task->thread.envcfg & ENVCFG_PMM) {
+ case ENVCFG_PMM_PMLEN_7:
+ ret = FIELD_PREP(PR_PMLEN_MASK, PMLEN_7);
+ break;
+ case ENVCFG_PMM_PMLEN_16:
+ ret = FIELD_PREP(PR_PMLEN_MASK, PMLEN_16);
+ break;
+ }
+
+ if (task->mm->context.pmlen)
+ ret |= PR_TAGGED_ADDR_ENABLE;
+
+ return ret;
+}
+
+static bool try_to_set_pmm(unsigned long value)
+{
+ csr_set(CSR_ENVCFG, value);
+ return (csr_read_clear(CSR_ENVCFG, ENVCFG_PMM) & ENVCFG_PMM) == value;
+}
+
+/*
+ * Global sysctl to disable the tagged user addresses support. This control
+ * only prevents the tagged address ABI enabling via prctl() and does not
+ * disable it for tasks that already opted in to the relaxed ABI.
+ */
+
+static struct ctl_table tagged_addr_sysctl_table[] = {
+ {
+ .procname = "tagged_addr_disabled",
+ .mode = 0644,
+ .data = &tagged_addr_disabled,
+ .maxlen = sizeof(int),
+ .proc_handler = proc_dointvec_minmax,
+ .extra1 = SYSCTL_ZERO,
+ .extra2 = SYSCTL_ONE,
+ },
+};
+
+static int __init tagged_addr_init(void)
+{
+ if (!riscv_has_extension_unlikely(RISCV_ISA_EXT_SUPM))
+ return 0;
+
+ /*
+ * envcfg.PMM is a WARL field. Detect which values are supported.
+ * Assume the supported PMLEN values are the same on all harts.
+ */
+ csr_clear(CSR_ENVCFG, ENVCFG_PMM);
+ have_user_pmlen_7 = try_to_set_pmm(ENVCFG_PMM_PMLEN_7);
+ have_user_pmlen_16 = try_to_set_pmm(ENVCFG_PMM_PMLEN_16);
+
+ if (!register_sysctl("abi", tagged_addr_sysctl_table))
+ return -EINVAL;
+
+ return 0;
+}
+core_initcall(tagged_addr_init);
+#endif /* CONFIG_RISCV_ISA_SUPM */
diff --git a/arch/riscv/kernel/ptrace.c b/arch/riscv/kernel/ptrace.c
index 92731ff8c79a..ea67e9fb7a58 100644
--- a/arch/riscv/kernel/ptrace.c
+++ b/arch/riscv/kernel/ptrace.c
@@ -28,6 +28,9 @@ enum riscv_regset {
#ifdef CONFIG_RISCV_ISA_V
REGSET_V,
#endif
+#ifdef CONFIG_RISCV_ISA_SUPM
+ REGSET_TAGGED_ADDR_CTRL,
+#endif
};
static int riscv_gpr_get(struct task_struct *target,
@@ -152,6 +155,35 @@ static int riscv_vr_set(struct task_struct *target,
}
#endif
+#ifdef CONFIG_RISCV_ISA_SUPM
+static int tagged_addr_ctrl_get(struct task_struct *target,
+ const struct user_regset *regset,
+ struct membuf to)
+{
+ long ctrl = get_tagged_addr_ctrl(target);
+
+ if (IS_ERR_VALUE(ctrl))
+ return ctrl;
+
+ return membuf_write(&to, &ctrl, sizeof(ctrl));
+}
+
+static int tagged_addr_ctrl_set(struct task_struct *target,
+ const struct user_regset *regset,
+ unsigned int pos, unsigned int count,
+ const void *kbuf, const void __user *ubuf)
+{
+ int ret;
+ long ctrl;
+
+ ret = user_regset_copyin(&pos, &count, &kbuf, &ubuf, &ctrl, 0, -1);
+ if (ret)
+ return ret;
+
+ return set_tagged_addr_ctrl(target, ctrl);
+}
+#endif
+
static const struct user_regset riscv_user_regset[] = {
[REGSET_X] = {
.core_note_type = NT_PRSTATUS,
@@ -182,6 +214,16 @@ static const struct user_regset riscv_user_regset[] = {
.set = riscv_vr_set,
},
#endif
+#ifdef CONFIG_RISCV_ISA_SUPM
+ [REGSET_TAGGED_ADDR_CTRL] = {
+ .core_note_type = NT_RISCV_TAGGED_ADDR_CTRL,
+ .n = 1,
+ .size = sizeof(long),
+ .align = sizeof(long),
+ .regset_get = tagged_addr_ctrl_get,
+ .set = tagged_addr_ctrl_set,
+ },
+#endif
};
static const struct user_regset_view riscv_user_native_view = {
diff --git a/arch/riscv/kernel/setup.c b/arch/riscv/kernel/setup.c
index 26c886db4fb3..016b48fcd6f2 100644
--- a/arch/riscv/kernel/setup.c
+++ b/arch/riscv/kernel/setup.c
@@ -244,6 +244,42 @@ static void __init parse_dtb(void)
#endif
}
+#if defined(CONFIG_RISCV_COMBO_SPINLOCKS)
+DEFINE_STATIC_KEY_TRUE(qspinlock_key);
+EXPORT_SYMBOL(qspinlock_key);
+#endif
+
+static void __init riscv_spinlock_init(void)
+{
+ char *using_ext = NULL;
+
+ if (IS_ENABLED(CONFIG_RISCV_TICKET_SPINLOCKS)) {
+ pr_info("Ticket spinlock: enabled\n");
+ return;
+ }
+
+ if (IS_ENABLED(CONFIG_RISCV_ISA_ZABHA) &&
+ IS_ENABLED(CONFIG_RISCV_ISA_ZACAS) &&
+ riscv_isa_extension_available(NULL, ZABHA) &&
+ riscv_isa_extension_available(NULL, ZACAS)) {
+ using_ext = "using Zabha";
+ } else if (riscv_isa_extension_available(NULL, ZICCRSE)) {
+ using_ext = "using Ziccrse";
+ }
+#if defined(CONFIG_RISCV_COMBO_SPINLOCKS)
+ else {
+ static_branch_disable(&qspinlock_key);
+ pr_info("Ticket spinlock: enabled\n");
+ return;
+ }
+#endif
+
+ if (!using_ext)
+ pr_err("Queued spinlock without Zabha or Ziccrse");
+ else
+ pr_info("Queued spinlock %s: enabled\n", using_ext);
+}
+
extern void __init init_rt_signal_env(void);
void __init setup_arch(char **cmdline_p)
@@ -297,6 +333,7 @@ void __init setup_arch(char **cmdline_p)
riscv_set_dma_cache_alignment();
riscv_user_isa_enable();
+ riscv_spinlock_init();
}
bool arch_cpu_is_hotpluggable(int cpu)
diff --git a/arch/riscv/kernel/smpboot.c b/arch/riscv/kernel/smpboot.c
index 0f8f1c95ac38..e36d20205bd7 100644
--- a/arch/riscv/kernel/smpboot.c
+++ b/arch/riscv/kernel/smpboot.c
@@ -233,8 +233,6 @@ asmlinkage __visible void smp_callin(void)
numa_add_cpu(curr_cpuid);
set_cpu_online(curr_cpuid, true);
- riscv_user_isa_enable();
-
/*
* Remote cache and TLB flushes are ignored while the CPU is offline,
* so flush them both right now just in case.
diff --git a/arch/riscv/kernel/suspend.c b/arch/riscv/kernel/suspend.c
index c8cec0cc5833..9a8a0dc035b2 100644
--- a/arch/riscv/kernel/suspend.c
+++ b/arch/riscv/kernel/suspend.c
@@ -14,7 +14,7 @@
void suspend_save_csrs(struct suspend_context *context)
{
- if (riscv_cpu_has_extension_unlikely(smp_processor_id(), RISCV_ISA_EXT_XLINUXENVCFG))
+ if (riscv_has_extension_unlikely(RISCV_ISA_EXT_XLINUXENVCFG))
context->envcfg = csr_read(CSR_ENVCFG);
context->tvec = csr_read(CSR_TVEC);
context->ie = csr_read(CSR_IE);
@@ -37,7 +37,7 @@ void suspend_save_csrs(struct suspend_context *context)
void suspend_restore_csrs(struct suspend_context *context)
{
csr_write(CSR_SCRATCH, 0);
- if (riscv_cpu_has_extension_unlikely(smp_processor_id(), RISCV_ISA_EXT_XLINUXENVCFG))
+ if (riscv_has_extension_unlikely(RISCV_ISA_EXT_XLINUXENVCFG))
csr_write(CSR_ENVCFG, context->envcfg);
csr_write(CSR_TVEC, context->tvec);
csr_write(CSR_IE, context->ie);
diff --git a/arch/riscv/kernel/sys_hwprobe.c b/arch/riscv/kernel/sys_hwprobe.c
index 711a31f27c3d..cb93adfffc48 100644
--- a/arch/riscv/kernel/sys_hwprobe.c
+++ b/arch/riscv/kernel/sys_hwprobe.c
@@ -150,6 +150,9 @@ static void hwprobe_isa_ext0(struct riscv_hwprobe *pair,
EXT_KEY(ZFH);
EXT_KEY(ZFHMIN);
}
+
+ if (IS_ENABLED(CONFIG_RISCV_ISA_SUPM))
+ EXT_KEY(SUPM);
#undef EXT_KEY
}
@@ -201,6 +204,43 @@ static u64 hwprobe_misaligned(const struct cpumask *cpus)
}
#endif
+#ifdef CONFIG_RISCV_VECTOR_MISALIGNED
+static u64 hwprobe_vec_misaligned(const struct cpumask *cpus)
+{
+ int cpu;
+ u64 perf = -1ULL;
+
+ /* Return if supported or not even if speed wasn't probed */
+ for_each_cpu(cpu, cpus) {
+ int this_perf = per_cpu(vector_misaligned_access, cpu);
+
+ if (perf == -1ULL)
+ perf = this_perf;
+
+ if (perf != this_perf) {
+ perf = RISCV_HWPROBE_MISALIGNED_VECTOR_UNKNOWN;
+ break;
+ }
+ }
+
+ if (perf == -1ULL)
+ return RISCV_HWPROBE_MISALIGNED_VECTOR_UNKNOWN;
+
+ return perf;
+}
+#else
+static u64 hwprobe_vec_misaligned(const struct cpumask *cpus)
+{
+ if (IS_ENABLED(CONFIG_RISCV_EFFICIENT_VECTOR_UNALIGNED_ACCESS))
+ return RISCV_HWPROBE_MISALIGNED_VECTOR_FAST;
+
+ if (IS_ENABLED(CONFIG_RISCV_SLOW_VECTOR_UNALIGNED_ACCESS))
+ return RISCV_HWPROBE_MISALIGNED_VECTOR_SLOW;
+
+ return RISCV_HWPROBE_MISALIGNED_VECTOR_UNKNOWN;
+}
+#endif
+
static void hwprobe_one_pair(struct riscv_hwprobe *pair,
const struct cpumask *cpus)
{
@@ -229,6 +269,10 @@ static void hwprobe_one_pair(struct riscv_hwprobe *pair,
pair->value = hwprobe_misaligned(cpus);
break;
+ case RISCV_HWPROBE_KEY_MISALIGNED_VECTOR_PERF:
+ pair->value = hwprobe_vec_misaligned(cpus);
+ break;
+
case RISCV_HWPROBE_KEY_ZICBOZ_BLOCK_SIZE:
pair->value = 0;
if (hwprobe_ext0_has(cpus, RISCV_HWPROBE_EXT_ZICBOZ))
diff --git a/arch/riscv/kernel/traps_misaligned.c b/arch/riscv/kernel/traps_misaligned.c
index 1b9867136b61..7cc108aed74e 100644
--- a/arch/riscv/kernel/traps_misaligned.c
+++ b/arch/riscv/kernel/traps_misaligned.c
@@ -16,6 +16,7 @@
#include <asm/entry-common.h>
#include <asm/hwprobe.h>
#include <asm/cpufeature.h>
+#include <asm/vector.h>
#define INSN_MATCH_LB 0x3
#define INSN_MASK_LB 0x707f
@@ -320,12 +321,37 @@ union reg_data {
u64 data_u64;
};
-static bool unaligned_ctl __read_mostly;
-
/* sysctl hooks */
int unaligned_enabled __read_mostly = 1; /* Enabled by default */
-int handle_misaligned_load(struct pt_regs *regs)
+#ifdef CONFIG_RISCV_VECTOR_MISALIGNED
+static int handle_vector_misaligned_load(struct pt_regs *regs)
+{
+ unsigned long epc = regs->epc;
+ unsigned long insn;
+
+ if (get_insn(regs, epc, &insn))
+ return -1;
+
+ /* Only return 0 when in check_vector_unaligned_access_emulated */
+ if (*this_cpu_ptr(&vector_misaligned_access) == RISCV_HWPROBE_MISALIGNED_VECTOR_UNKNOWN) {
+ *this_cpu_ptr(&vector_misaligned_access) = RISCV_HWPROBE_MISALIGNED_VECTOR_UNSUPPORTED;
+ regs->epc = epc + INSN_LEN(insn);
+ return 0;
+ }
+
+ /* If vector instruction we don't emulate it yet */
+ regs->epc = epc;
+ return -1;
+}
+#else
+static int handle_vector_misaligned_load(struct pt_regs *regs)
+{
+ return -1;
+}
+#endif
+
+static int handle_scalar_misaligned_load(struct pt_regs *regs)
{
union reg_data val;
unsigned long epc = regs->epc;
@@ -433,7 +459,7 @@ int handle_misaligned_load(struct pt_regs *regs)
return 0;
}
-int handle_misaligned_store(struct pt_regs *regs)
+static int handle_scalar_misaligned_store(struct pt_regs *regs)
{
union reg_data val;
unsigned long epc = regs->epc;
@@ -524,11 +550,96 @@ int handle_misaligned_store(struct pt_regs *regs)
return 0;
}
-static bool check_unaligned_access_emulated(int cpu)
+int handle_misaligned_load(struct pt_regs *regs)
+{
+ unsigned long epc = regs->epc;
+ unsigned long insn;
+
+ if (IS_ENABLED(CONFIG_RISCV_VECTOR_MISALIGNED)) {
+ if (get_insn(regs, epc, &insn))
+ return -1;
+
+ if (insn_is_vector(insn))
+ return handle_vector_misaligned_load(regs);
+ }
+
+ if (IS_ENABLED(CONFIG_RISCV_SCALAR_MISALIGNED))
+ return handle_scalar_misaligned_load(regs);
+
+ return -1;
+}
+
+int handle_misaligned_store(struct pt_regs *regs)
{
+ if (IS_ENABLED(CONFIG_RISCV_SCALAR_MISALIGNED))
+ return handle_scalar_misaligned_store(regs);
+
+ return -1;
+}
+
+#ifdef CONFIG_RISCV_VECTOR_MISALIGNED
+void check_vector_unaligned_access_emulated(struct work_struct *work __always_unused)
+{
+ long *mas_ptr = this_cpu_ptr(&vector_misaligned_access);
+ unsigned long tmp_var;
+
+ *mas_ptr = RISCV_HWPROBE_MISALIGNED_VECTOR_UNKNOWN;
+
+ kernel_vector_begin();
+ /*
+ * In pre-13.0.0 versions of GCC, vector registers cannot appear in
+ * the clobber list. This inline asm clobbers v0, but since we do not
+ * currently build the kernel with V enabled, the v0 clobber arg is not
+ * needed (as the compiler will not emit vector code itself). If the kernel
+ * is changed to build with V enabled, the clobber arg will need to be
+ * added here.
+ */
+ __asm__ __volatile__ (
+ ".balign 4\n\t"
+ ".option push\n\t"
+ ".option arch, +zve32x\n\t"
+ " vsetivli zero, 1, e16, m1, ta, ma\n\t" // Vectors of 16b
+ " vle16.v v0, (%[ptr])\n\t" // Load bytes
+ ".option pop\n\t"
+ : : [ptr] "r" ((u8 *)&tmp_var + 1));
+ kernel_vector_end();
+}
+
+bool check_vector_unaligned_access_emulated_all_cpus(void)
+{
+ int cpu;
+
+ if (!has_vector()) {
+ for_each_online_cpu(cpu)
+ per_cpu(vector_misaligned_access, cpu) = RISCV_HWPROBE_MISALIGNED_VECTOR_UNSUPPORTED;
+ return false;
+ }
+
+ schedule_on_each_cpu(check_vector_unaligned_access_emulated);
+
+ for_each_online_cpu(cpu)
+ if (per_cpu(vector_misaligned_access, cpu)
+ == RISCV_HWPROBE_MISALIGNED_VECTOR_UNKNOWN)
+ return false;
+
+ return true;
+}
+#else
+bool check_vector_unaligned_access_emulated_all_cpus(void)
+{
+ return false;
+}
+#endif
+
+#ifdef CONFIG_RISCV_SCALAR_MISALIGNED
+
+static bool unaligned_ctl __read_mostly;
+
+void check_unaligned_access_emulated(struct work_struct *work __always_unused)
+{
+ int cpu = smp_processor_id();
long *mas_ptr = per_cpu_ptr(&misaligned_access_speed, cpu);
unsigned long tmp_var, tmp_val;
- bool misaligned_emu_detected;
*mas_ptr = RISCV_HWPROBE_MISALIGNED_SCALAR_UNKNOWN;
@@ -536,19 +647,16 @@ static bool check_unaligned_access_emulated(int cpu)
" "REG_L" %[tmp], 1(%[ptr])\n"
: [tmp] "=r" (tmp_val) : [ptr] "r" (&tmp_var) : "memory");
- misaligned_emu_detected = (*mas_ptr == RISCV_HWPROBE_MISALIGNED_SCALAR_EMULATED);
/*
* If unaligned_ctl is already set, this means that we detected that all
* CPUS uses emulated misaligned access at boot time. If that changed
* when hotplugging the new cpu, this is something we don't handle.
*/
- if (unlikely(unaligned_ctl && !misaligned_emu_detected)) {
+ if (unlikely(unaligned_ctl && (*mas_ptr != RISCV_HWPROBE_MISALIGNED_SCALAR_EMULATED))) {
pr_crit("CPU misaligned accesses non homogeneous (expected all emulated)\n");
while (true)
cpu_relax();
}
-
- return misaligned_emu_detected;
}
bool check_unaligned_access_emulated_all_cpus(void)
@@ -560,8 +668,11 @@ bool check_unaligned_access_emulated_all_cpus(void)
* accesses emulated since tasks requesting such control can run on any
* CPU.
*/
+ schedule_on_each_cpu(check_unaligned_access_emulated);
+
for_each_online_cpu(cpu)
- if (!check_unaligned_access_emulated(cpu))
+ if (per_cpu(misaligned_access_speed, cpu)
+ != RISCV_HWPROBE_MISALIGNED_SCALAR_EMULATED)
return false;
unaligned_ctl = true;
@@ -572,3 +683,9 @@ bool unaligned_ctl_available(void)
{
return unaligned_ctl;
}
+#else
+bool check_unaligned_access_emulated_all_cpus(void)
+{
+ return false;
+}
+#endif
diff --git a/arch/riscv/kernel/unaligned_access_speed.c b/arch/riscv/kernel/unaligned_access_speed.c
index 160628a2116d..91f189cf1611 100644
--- a/arch/riscv/kernel/unaligned_access_speed.c
+++ b/arch/riscv/kernel/unaligned_access_speed.c
@@ -6,11 +6,13 @@
#include <linux/cpu.h>
#include <linux/cpumask.h>
#include <linux/jump_label.h>
+#include <linux/kthread.h>
#include <linux/mm.h>
#include <linux/smp.h>
#include <linux/types.h>
#include <asm/cpufeature.h>
#include <asm/hwprobe.h>
+#include <asm/vector.h>
#include "copy-unaligned.h"
@@ -19,7 +21,8 @@
#define MISALIGNED_BUFFER_ORDER get_order(MISALIGNED_BUFFER_SIZE)
#define MISALIGNED_COPY_SIZE ((MISALIGNED_BUFFER_SIZE / 2) - 0x80)
-DEFINE_PER_CPU(long, misaligned_access_speed);
+DEFINE_PER_CPU(long, misaligned_access_speed) = RISCV_HWPROBE_MISALIGNED_SCALAR_UNKNOWN;
+DEFINE_PER_CPU(long, vector_misaligned_access) = RISCV_HWPROBE_MISALIGNED_VECTOR_UNSUPPORTED;
#ifdef CONFIG_RISCV_PROBE_UNALIGNED_ACCESS
static cpumask_t fast_misaligned_access;
@@ -191,6 +194,7 @@ static int riscv_online_cpu(unsigned int cpu)
if (per_cpu(misaligned_access_speed, cpu) != RISCV_HWPROBE_MISALIGNED_SCALAR_UNKNOWN)
goto exit;
+ check_unaligned_access_emulated(NULL);
buf = alloc_pages(GFP_KERNEL, MISALIGNED_BUFFER_ORDER);
if (!buf) {
pr_warn("Allocation failure, not measuring misaligned performance\n");
@@ -259,23 +263,159 @@ out:
kfree(bufs);
return 0;
}
+#else /* CONFIG_RISCV_PROBE_UNALIGNED_ACCESS */
+static int check_unaligned_access_speed_all_cpus(void)
+{
+ return 0;
+}
+#endif
-static int check_unaligned_access_all_cpus(void)
+#ifdef CONFIG_RISCV_PROBE_VECTOR_UNALIGNED_ACCESS
+static void check_vector_unaligned_access(struct work_struct *work __always_unused)
{
- bool all_cpus_emulated = check_unaligned_access_emulated_all_cpus();
+ int cpu = smp_processor_id();
+ u64 start_cycles, end_cycles;
+ u64 word_cycles;
+ u64 byte_cycles;
+ int ratio;
+ unsigned long start_jiffies, now;
+ struct page *page;
+ void *dst;
+ void *src;
+ long speed = RISCV_HWPROBE_MISALIGNED_VECTOR_SLOW;
- if (!all_cpus_emulated)
- return check_unaligned_access_speed_all_cpus();
+ if (per_cpu(vector_misaligned_access, cpu) != RISCV_HWPROBE_MISALIGNED_VECTOR_UNKNOWN)
+ return;
+
+ page = alloc_pages(GFP_KERNEL, MISALIGNED_BUFFER_ORDER);
+ if (!page) {
+ pr_warn("Allocation failure, not measuring vector misaligned performance\n");
+ return;
+ }
+
+ /* Make an unaligned destination buffer. */
+ dst = (void *)((unsigned long)page_address(page) | 0x1);
+ /* Unalign src as well, but differently (off by 1 + 2 = 3). */
+ src = dst + (MISALIGNED_BUFFER_SIZE / 2);
+ src += 2;
+ word_cycles = -1ULL;
+
+ /* Do a warmup. */
+ kernel_vector_begin();
+ __riscv_copy_vec_words_unaligned(dst, src, MISALIGNED_COPY_SIZE);
+ start_jiffies = jiffies;
+ while ((now = jiffies) == start_jiffies)
+ cpu_relax();
+
+ /*
+ * For a fixed amount of time, repeatedly try the function, and take
+ * the best time in cycles as the measurement.
+ */
+ while (time_before(jiffies, now + (1 << MISALIGNED_ACCESS_JIFFIES_LG2))) {
+ start_cycles = get_cycles64();
+ /* Ensure the CSR read can't reorder WRT to the copy. */
+ mb();
+ __riscv_copy_vec_words_unaligned(dst, src, MISALIGNED_COPY_SIZE);
+ /* Ensure the copy ends before the end time is snapped. */
+ mb();
+ end_cycles = get_cycles64();
+ if ((end_cycles - start_cycles) < word_cycles)
+ word_cycles = end_cycles - start_cycles;
+ }
+
+ byte_cycles = -1ULL;
+ __riscv_copy_vec_bytes_unaligned(dst, src, MISALIGNED_COPY_SIZE);
+ start_jiffies = jiffies;
+ while ((now = jiffies) == start_jiffies)
+ cpu_relax();
+
+ while (time_before(jiffies, now + (1 << MISALIGNED_ACCESS_JIFFIES_LG2))) {
+ start_cycles = get_cycles64();
+ /* Ensure the CSR read can't reorder WRT to the copy. */
+ mb();
+ __riscv_copy_vec_bytes_unaligned(dst, src, MISALIGNED_COPY_SIZE);
+ /* Ensure the copy ends before the end time is snapped. */
+ mb();
+ end_cycles = get_cycles64();
+ if ((end_cycles - start_cycles) < byte_cycles)
+ byte_cycles = end_cycles - start_cycles;
+ }
+
+ kernel_vector_end();
+
+ /* Don't divide by zero. */
+ if (!word_cycles || !byte_cycles) {
+ pr_warn("cpu%d: rdtime lacks granularity needed to measure unaligned vector access speed\n",
+ cpu);
+
+ return;
+ }
+
+ if (word_cycles < byte_cycles)
+ speed = RISCV_HWPROBE_MISALIGNED_VECTOR_FAST;
+
+ ratio = div_u64((byte_cycles * 100), word_cycles);
+ pr_info("cpu%d: Ratio of vector byte access time to vector unaligned word access is %d.%02d, unaligned accesses are %s\n",
+ cpu,
+ ratio / 100,
+ ratio % 100,
+ (speed == RISCV_HWPROBE_MISALIGNED_VECTOR_FAST) ? "fast" : "slow");
+
+ per_cpu(vector_misaligned_access, cpu) = speed;
+}
+
+static int riscv_online_cpu_vec(unsigned int cpu)
+{
+ if (!has_vector())
+ return 0;
+
+ if (per_cpu(vector_misaligned_access, cpu) != RISCV_HWPROBE_MISALIGNED_VECTOR_UNSUPPORTED)
+ return 0;
+
+ check_vector_unaligned_access_emulated(NULL);
+ check_vector_unaligned_access(NULL);
return 0;
}
-#else /* CONFIG_RISCV_PROBE_UNALIGNED_ACCESS */
-static int check_unaligned_access_all_cpus(void)
+
+/* Measure unaligned access speed on all CPUs present at boot in parallel. */
+static int vec_check_unaligned_access_speed_all_cpus(void *unused __always_unused)
{
- check_unaligned_access_emulated_all_cpus();
+ schedule_on_each_cpu(check_vector_unaligned_access);
+
+ /*
+ * Setup hotplug callbacks for any new CPUs that come online or go
+ * offline.
+ */
+ cpuhp_setup_state_nocalls(CPUHP_AP_ONLINE_DYN, "riscv:online",
+ riscv_online_cpu_vec, NULL);
return 0;
}
+#else /* CONFIG_RISCV_PROBE_VECTOR_UNALIGNED_ACCESS */
+static int vec_check_unaligned_access_speed_all_cpus(void *unused __always_unused)
+{
+ return 0;
+}
#endif
+static int check_unaligned_access_all_cpus(void)
+{
+ bool all_cpus_emulated, all_cpus_vec_unsupported;
+
+ all_cpus_emulated = check_unaligned_access_emulated_all_cpus();
+ all_cpus_vec_unsupported = check_vector_unaligned_access_emulated_all_cpus();
+
+ if (!all_cpus_vec_unsupported &&
+ IS_ENABLED(CONFIG_RISCV_PROBE_VECTOR_UNALIGNED_ACCESS)) {
+ kthread_run(vec_check_unaligned_access_speed_all_cpus,
+ NULL, "vec_check_unaligned_access_speed_all_cpus");
+ }
+
+ if (!all_cpus_emulated)
+ return check_unaligned_access_speed_all_cpus();
+
+ return 0;
+}
+
arch_initcall(check_unaligned_access_all_cpus);
diff --git a/arch/riscv/kernel/vdso/Makefile b/arch/riscv/kernel/vdso/Makefile
index 3f1c4b2d0b06..9a1b555e8733 100644
--- a/arch/riscv/kernel/vdso/Makefile
+++ b/arch/riscv/kernel/vdso/Makefile
@@ -45,7 +45,7 @@ $(obj)/vdso.o: $(obj)/vdso.so
# link rule for the .so file, .lds has to be first
$(obj)/vdso.so.dbg: $(obj)/vdso.lds $(obj-vdso) FORCE
- $(call if_changed,vdsold)
+ $(call if_changed,vdsold_and_check)
LDFLAGS_vdso.so.dbg = -shared -soname=linux-vdso.so.1 \
--build-id=sha1 --hash-style=both --eh-frame-hdr
@@ -65,7 +65,8 @@ include/generated/vdso-offsets.h: $(obj)/vdso.so.dbg FORCE
# actual build commands
# The DSO images are built using a special linker script
# Make sure only to export the intended __vdso_xxx symbol offsets.
-quiet_cmd_vdsold = VDSOLD $@
- cmd_vdsold = $(LD) $(ld_flags) -T $(filter-out FORCE,$^) -o $@.tmp && \
+quiet_cmd_vdsold_and_check = VDSOLD $@
+ cmd_vdsold_and_check = $(LD) $(ld_flags) -T $(filter-out FORCE,$^) -o $@.tmp && \
$(OBJCOPY) $(patsubst %, -G __vdso_%, $(vdso-syms)) $@.tmp $@ && \
- rm $@.tmp
+ rm $@.tmp && \
+ $(cmd_vdso_check)
diff --git a/arch/riscv/kernel/vec-copy-unaligned.S b/arch/riscv/kernel/vec-copy-unaligned.S
new file mode 100644
index 000000000000..d16f19f1b3b6
--- /dev/null
+++ b/arch/riscv/kernel/vec-copy-unaligned.S
@@ -0,0 +1,58 @@
+/* SPDX-License-Identifier: GPL-2.0 */
+/* Copyright (C) 2024 Rivos Inc. */
+
+#include <linux/args.h>
+#include <linux/linkage.h>
+#include <asm/asm.h>
+
+ .text
+
+#define WORD_EEW 32
+
+#define WORD_SEW CONCATENATE(e, WORD_EEW)
+#define VEC_L CONCATENATE(vle, WORD_EEW).v
+#define VEC_S CONCATENATE(vle, WORD_EEW).v
+
+/* void __riscv_copy_vec_words_unaligned(void *, const void *, size_t) */
+/* Performs a memcpy without aligning buffers, using word loads and stores. */
+/* Note: The size is truncated to a multiple of WORD_EEW */
+SYM_FUNC_START(__riscv_copy_vec_words_unaligned)
+ andi a4, a2, ~(WORD_EEW-1)
+ beqz a4, 2f
+ add a3, a1, a4
+ .option push
+ .option arch, +zve32x
+1:
+ vsetivli t0, 8, WORD_SEW, m8, ta, ma
+ VEC_L v0, (a1)
+ VEC_S v0, (a0)
+ addi a0, a0, WORD_EEW
+ addi a1, a1, WORD_EEW
+ bltu a1, a3, 1b
+
+2:
+ .option pop
+ ret
+SYM_FUNC_END(__riscv_copy_vec_words_unaligned)
+
+/* void __riscv_copy_vec_bytes_unaligned(void *, const void *, size_t) */
+/* Performs a memcpy without aligning buffers, using only byte accesses. */
+/* Note: The size is truncated to a multiple of 8 */
+SYM_FUNC_START(__riscv_copy_vec_bytes_unaligned)
+ andi a4, a2, ~(8-1)
+ beqz a4, 2f
+ add a3, a1, a4
+ .option push
+ .option arch, +zve32x
+1:
+ vsetivli t0, 8, e8, m8, ta, ma
+ vle8.v v0, (a1)
+ vse8.v v0, (a0)
+ addi a0, a0, 8
+ addi a1, a1, 8
+ bltu a1, a3, 1b
+
+2:
+ .option pop
+ ret
+SYM_FUNC_END(__riscv_copy_vec_bytes_unaligned)
diff --git a/arch/riscv/kernel/vector.c b/arch/riscv/kernel/vector.c
index 682b3feee451..821818886fab 100644
--- a/arch/riscv/kernel/vector.c
+++ b/arch/riscv/kernel/vector.c
@@ -66,7 +66,7 @@ void __init riscv_v_setup_ctx_cache(void)
#endif
}
-static bool insn_is_vector(u32 insn_buf)
+bool insn_is_vector(u32 insn_buf)
{
u32 opcode = insn_buf & __INSN_OPCODE_MASK;
u32 width, csr;
diff --git a/arch/riscv/kvm/vcpu_onereg.c b/arch/riscv/kvm/vcpu_onereg.c
index b319c4c13c54..5b68490ad9b7 100644
--- a/arch/riscv/kvm/vcpu_onereg.c
+++ b/arch/riscv/kvm/vcpu_onereg.c
@@ -34,9 +34,11 @@ static const unsigned long kvm_isa_ext_arr[] = {
[KVM_RISCV_ISA_EXT_M] = RISCV_ISA_EXT_m,
[KVM_RISCV_ISA_EXT_V] = RISCV_ISA_EXT_v,
/* Multi letter extensions (alphabetically sorted) */
+ [KVM_RISCV_ISA_EXT_SMNPM] = RISCV_ISA_EXT_SSNPM,
KVM_ISA_EXT_ARR(SMSTATEEN),
KVM_ISA_EXT_ARR(SSAIA),
KVM_ISA_EXT_ARR(SSCOFPMF),
+ KVM_ISA_EXT_ARR(SSNPM),
KVM_ISA_EXT_ARR(SSTC),
KVM_ISA_EXT_ARR(SVINVAL),
KVM_ISA_EXT_ARR(SVNAPOT),
@@ -127,8 +129,10 @@ static bool kvm_riscv_vcpu_isa_disable_allowed(unsigned long ext)
case KVM_RISCV_ISA_EXT_C:
case KVM_RISCV_ISA_EXT_I:
case KVM_RISCV_ISA_EXT_M:
+ case KVM_RISCV_ISA_EXT_SMNPM:
/* There is not architectural config bit to disable sscofpmf completely */
case KVM_RISCV_ISA_EXT_SSCOFPMF:
+ case KVM_RISCV_ISA_EXT_SSNPM:
case KVM_RISCV_ISA_EXT_SSTC:
case KVM_RISCV_ISA_EXT_SVINVAL:
case KVM_RISCV_ISA_EXT_SVNAPOT: