summaryrefslogtreecommitdiff
path: root/arch/riscv/include/asm/cmpxchg.h
diff options
context:
space:
mode:
Diffstat (limited to 'arch/riscv/include/asm/cmpxchg.h')
-rw-r--r--arch/riscv/include/asm/cmpxchg.h286
1 files changed, 208 insertions, 78 deletions
diff --git a/arch/riscv/include/asm/cmpxchg.h b/arch/riscv/include/asm/cmpxchg.h
index ebbce134917c..4cadc56220fe 100644
--- a/arch/riscv/include/asm/cmpxchg.h
+++ b/arch/riscv/include/asm/cmpxchg.h
@@ -12,30 +12,43 @@
#include <asm/fence.h>
#include <asm/hwcap.h>
#include <asm/insn-def.h>
-
-#define __arch_xchg_masked(sc_sfx, prepend, append, r, p, n) \
-({ \
- u32 *__ptr32b = (u32 *)((ulong)(p) & ~0x3); \
- ulong __s = ((ulong)(p) & (0x4 - sizeof(*p))) * BITS_PER_BYTE; \
- ulong __mask = GENMASK(((sizeof(*p)) * BITS_PER_BYTE) - 1, 0) \
- << __s; \
- ulong __newx = (ulong)(n) << __s; \
- ulong __retx; \
- ulong __rc; \
- \
- __asm__ __volatile__ ( \
- prepend \
- "0: lr.w %0, %2\n" \
- " and %1, %0, %z4\n" \
- " or %1, %1, %z3\n" \
- " sc.w" sc_sfx " %1, %1, %2\n" \
- " bnez %1, 0b\n" \
- append \
- : "=&r" (__retx), "=&r" (__rc), "+A" (*(__ptr32b)) \
- : "rJ" (__newx), "rJ" (~__mask) \
- : "memory"); \
- \
- r = (__typeof__(*(p)))((__retx & __mask) >> __s); \
+#include <asm/cpufeature-macros.h>
+
+#define __arch_xchg_masked(sc_sfx, swap_sfx, prepend, sc_append, \
+ swap_append, r, p, n) \
+({ \
+ if (IS_ENABLED(CONFIG_RISCV_ISA_ZABHA) && \
+ riscv_has_extension_unlikely(RISCV_ISA_EXT_ZABHA)) { \
+ __asm__ __volatile__ ( \
+ prepend \
+ " amoswap" swap_sfx " %0, %z2, %1\n" \
+ swap_append \
+ : "=&r" (r), "+A" (*(p)) \
+ : "rJ" (n) \
+ : "memory"); \
+ } else { \
+ u32 *__ptr32b = (u32 *)((ulong)(p) & ~0x3); \
+ ulong __s = ((ulong)(p) & (0x4 - sizeof(*p))) * BITS_PER_BYTE; \
+ ulong __mask = GENMASK(((sizeof(*p)) * BITS_PER_BYTE) - 1, 0) \
+ << __s; \
+ ulong __newx = (ulong)(n) << __s; \
+ ulong __retx; \
+ ulong __rc; \
+ \
+ __asm__ __volatile__ ( \
+ prepend \
+ "0: lr.w %0, %2\n" \
+ " and %1, %0, %z4\n" \
+ " or %1, %1, %z3\n" \
+ " sc.w" sc_sfx " %1, %1, %2\n" \
+ " bnez %1, 0b\n" \
+ sc_append \
+ : "=&r" (__retx), "=&r" (__rc), "+A" (*(__ptr32b)) \
+ : "rJ" (__newx), "rJ" (~__mask) \
+ : "memory"); \
+ \
+ r = (__typeof__(*(p)))((__retx & __mask) >> __s); \
+ } \
})
#define __arch_xchg(sfx, prepend, append, r, p, n) \
@@ -58,8 +71,13 @@
\
switch (sizeof(*__ptr)) { \
case 1: \
+ __arch_xchg_masked(sc_sfx, ".b" swap_sfx, \
+ prepend, sc_append, swap_append, \
+ __ret, __ptr, __new); \
+ break; \
case 2: \
- __arch_xchg_masked(sc_sfx, prepend, sc_append, \
+ __arch_xchg_masked(sc_sfx, ".h" swap_sfx, \
+ prepend, sc_append, swap_append, \
__ret, __ptr, __new); \
break; \
case 4: \
@@ -106,55 +124,90 @@
* store NEW in MEM. Return the initial value in MEM. Success is
* indicated by comparing RETURN with OLD.
*/
-
-#define __arch_cmpxchg_masked(sc_sfx, prepend, append, r, p, o, n) \
-({ \
- u32 *__ptr32b = (u32 *)((ulong)(p) & ~0x3); \
- ulong __s = ((ulong)(p) & (0x4 - sizeof(*p))) * BITS_PER_BYTE; \
- ulong __mask = GENMASK(((sizeof(*p)) * BITS_PER_BYTE) - 1, 0) \
- << __s; \
- ulong __newx = (ulong)(n) << __s; \
- ulong __oldx = (ulong)(o) << __s; \
- ulong __retx; \
- ulong __rc; \
- \
- __asm__ __volatile__ ( \
- prepend \
- "0: lr.w %0, %2\n" \
- " and %1, %0, %z5\n" \
- " bne %1, %z3, 1f\n" \
- " and %1, %0, %z6\n" \
- " or %1, %1, %z4\n" \
- " sc.w" sc_sfx " %1, %1, %2\n" \
- " bnez %1, 0b\n" \
- append \
- "1:\n" \
- : "=&r" (__retx), "=&r" (__rc), "+A" (*(__ptr32b)) \
- : "rJ" ((long)__oldx), "rJ" (__newx), \
- "rJ" (__mask), "rJ" (~__mask) \
- : "memory"); \
- \
- r = (__typeof__(*(p)))((__retx & __mask) >> __s); \
+#define __arch_cmpxchg_masked(sc_sfx, cas_sfx, \
+ sc_prepend, sc_append, \
+ cas_prepend, cas_append, \
+ r, p, o, n) \
+({ \
+ if (IS_ENABLED(CONFIG_RISCV_ISA_ZABHA) && \
+ IS_ENABLED(CONFIG_RISCV_ISA_ZACAS) && \
+ riscv_has_extension_unlikely(RISCV_ISA_EXT_ZABHA) && \
+ riscv_has_extension_unlikely(RISCV_ISA_EXT_ZACAS)) { \
+ r = o; \
+ \
+ __asm__ __volatile__ ( \
+ cas_prepend \
+ " amocas" cas_sfx " %0, %z2, %1\n" \
+ cas_append \
+ : "+&r" (r), "+A" (*(p)) \
+ : "rJ" (n) \
+ : "memory"); \
+ } else { \
+ u32 *__ptr32b = (u32 *)((ulong)(p) & ~0x3); \
+ ulong __s = ((ulong)(p) & (0x4 - sizeof(*p))) * BITS_PER_BYTE; \
+ ulong __mask = GENMASK(((sizeof(*p)) * BITS_PER_BYTE) - 1, 0) \
+ << __s; \
+ ulong __newx = (ulong)(n) << __s; \
+ ulong __oldx = (ulong)(o) << __s; \
+ ulong __retx; \
+ ulong __rc; \
+ \
+ __asm__ __volatile__ ( \
+ sc_prepend \
+ "0: lr.w %0, %2\n" \
+ " and %1, %0, %z5\n" \
+ " bne %1, %z3, 1f\n" \
+ " and %1, %0, %z6\n" \
+ " or %1, %1, %z4\n" \
+ " sc.w" sc_sfx " %1, %1, %2\n" \
+ " bnez %1, 0b\n" \
+ sc_append \
+ "1:\n" \
+ : "=&r" (__retx), "=&r" (__rc), "+A" (*(__ptr32b)) \
+ : "rJ" ((long)__oldx), "rJ" (__newx), \
+ "rJ" (__mask), "rJ" (~__mask) \
+ : "memory"); \
+ \
+ r = (__typeof__(*(p)))((__retx & __mask) >> __s); \
+ } \
})
-#define __arch_cmpxchg(lr_sfx, sc_sfx, prepend, append, r, p, co, o, n) \
+#define __arch_cmpxchg(lr_sfx, sc_sfx, cas_sfx, \
+ sc_prepend, sc_append, \
+ cas_prepend, cas_append, \
+ r, p, co, o, n) \
({ \
- register unsigned int __rc; \
+ if (IS_ENABLED(CONFIG_RISCV_ISA_ZACAS) && \
+ riscv_has_extension_unlikely(RISCV_ISA_EXT_ZACAS)) { \
+ r = o; \
\
- __asm__ __volatile__ ( \
- prepend \
- "0: lr" lr_sfx " %0, %2\n" \
- " bne %0, %z3, 1f\n" \
- " sc" sc_sfx " %1, %z4, %2\n" \
- " bnez %1, 0b\n" \
- append \
- "1:\n" \
- : "=&r" (r), "=&r" (__rc), "+A" (*(p)) \
- : "rJ" (co o), "rJ" (n) \
- : "memory"); \
+ __asm__ __volatile__ ( \
+ cas_prepend \
+ " amocas" cas_sfx " %0, %z2, %1\n" \
+ cas_append \
+ : "+&r" (r), "+A" (*(p)) \
+ : "rJ" (n) \
+ : "memory"); \
+ } else { \
+ register unsigned int __rc; \
+ \
+ __asm__ __volatile__ ( \
+ sc_prepend \
+ "0: lr" lr_sfx " %0, %2\n" \
+ " bne %0, %z3, 1f\n" \
+ " sc" sc_sfx " %1, %z4, %2\n" \
+ " bnez %1, 0b\n" \
+ sc_append \
+ "1:\n" \
+ : "=&r" (r), "=&r" (__rc), "+A" (*(p)) \
+ : "rJ" (co o), "rJ" (n) \
+ : "memory"); \
+ } \
})
-#define _arch_cmpxchg(ptr, old, new, sc_sfx, prepend, append) \
+#define _arch_cmpxchg(ptr, old, new, sc_sfx, cas_sfx, \
+ sc_prepend, sc_append, \
+ cas_prepend, cas_append) \
({ \
__typeof__(ptr) __ptr = (ptr); \
__typeof__(*(__ptr)) __old = (old); \
@@ -163,17 +216,28 @@
\
switch (sizeof(*__ptr)) { \
case 1: \
+ __arch_cmpxchg_masked(sc_sfx, ".b" cas_sfx, \
+ sc_prepend, sc_append, \
+ cas_prepend, cas_append, \
+ __ret, __ptr, __old, __new); \
+ break; \
case 2: \
- __arch_cmpxchg_masked(sc_sfx, prepend, append, \
- __ret, __ptr, __old, __new); \
+ __arch_cmpxchg_masked(sc_sfx, ".h" cas_sfx, \
+ sc_prepend, sc_append, \
+ cas_prepend, cas_append, \
+ __ret, __ptr, __old, __new); \
break; \
case 4: \
- __arch_cmpxchg(".w", ".w" sc_sfx, prepend, append, \
- __ret, __ptr, (long), __old, __new); \
+ __arch_cmpxchg(".w", ".w" sc_sfx, ".w" cas_sfx, \
+ sc_prepend, sc_append, \
+ cas_prepend, cas_append, \
+ __ret, __ptr, (long), __old, __new); \
break; \
case 8: \
- __arch_cmpxchg(".d", ".d" sc_sfx, prepend, append, \
- __ret, __ptr, /**/, __old, __new); \
+ __arch_cmpxchg(".d", ".d" sc_sfx, ".d" cas_sfx, \
+ sc_prepend, sc_append, \
+ cas_prepend, cas_append, \
+ __ret, __ptr, /**/, __old, __new); \
break; \
default: \
BUILD_BUG(); \
@@ -181,17 +245,40 @@
(__typeof__(*(__ptr)))__ret; \
})
+/*
+ * These macros are here to improve the readability of the arch_cmpxchg_XXX()
+ * macros.
+ */
+#define SC_SFX(x) x
+#define CAS_SFX(x) x
+#define SC_PREPEND(x) x
+#define SC_APPEND(x) x
+#define CAS_PREPEND(x) x
+#define CAS_APPEND(x) x
+
#define arch_cmpxchg_relaxed(ptr, o, n) \
- _arch_cmpxchg((ptr), (o), (n), "", "", "")
+ _arch_cmpxchg((ptr), (o), (n), \
+ SC_SFX(""), CAS_SFX(""), \
+ SC_PREPEND(""), SC_APPEND(""), \
+ CAS_PREPEND(""), CAS_APPEND(""))
#define arch_cmpxchg_acquire(ptr, o, n) \
- _arch_cmpxchg((ptr), (o), (n), "", "", RISCV_ACQUIRE_BARRIER)
+ _arch_cmpxchg((ptr), (o), (n), \
+ SC_SFX(""), CAS_SFX(""), \
+ SC_PREPEND(""), SC_APPEND(RISCV_ACQUIRE_BARRIER), \
+ CAS_PREPEND(""), CAS_APPEND(RISCV_ACQUIRE_BARRIER))
#define arch_cmpxchg_release(ptr, o, n) \
- _arch_cmpxchg((ptr), (o), (n), "", RISCV_RELEASE_BARRIER, "")
+ _arch_cmpxchg((ptr), (o), (n), \
+ SC_SFX(""), CAS_SFX(""), \
+ SC_PREPEND(RISCV_RELEASE_BARRIER), SC_APPEND(""), \
+ CAS_PREPEND(RISCV_RELEASE_BARRIER), CAS_APPEND(""))
#define arch_cmpxchg(ptr, o, n) \
- _arch_cmpxchg((ptr), (o), (n), ".rl", "", " fence rw, rw\n")
+ _arch_cmpxchg((ptr), (o), (n), \
+ SC_SFX(".rl"), CAS_SFX(".aqrl"), \
+ SC_PREPEND(""), SC_APPEND(RISCV_FULL_BARRIER), \
+ CAS_PREPEND(""), CAS_APPEND(""))
#define arch_cmpxchg_local(ptr, o, n) \
arch_cmpxchg_relaxed((ptr), (o), (n))
@@ -226,6 +313,44 @@
arch_cmpxchg_release((ptr), (o), (n)); \
})
+#if defined(CONFIG_64BIT) && defined(CONFIG_RISCV_ISA_ZACAS)
+
+#define system_has_cmpxchg128() riscv_has_extension_unlikely(RISCV_ISA_EXT_ZACAS)
+
+union __u128_halves {
+ u128 full;
+ struct {
+ u64 low, high;
+ };
+};
+
+#define __arch_cmpxchg128(p, o, n, cas_sfx) \
+({ \
+ __typeof__(*(p)) __o = (o); \
+ union __u128_halves __hn = { .full = (n) }; \
+ union __u128_halves __ho = { .full = (__o) }; \
+ register unsigned long t1 asm ("t1") = __hn.low; \
+ register unsigned long t2 asm ("t2") = __hn.high; \
+ register unsigned long t3 asm ("t3") = __ho.low; \
+ register unsigned long t4 asm ("t4") = __ho.high; \
+ \
+ __asm__ __volatile__ ( \
+ " amocas.q" cas_sfx " %0, %z3, %2" \
+ : "+&r" (t3), "+&r" (t4), "+A" (*(p)) \
+ : "rJ" (t1), "rJ" (t2) \
+ : "memory"); \
+ \
+ ((u128)t4 << 64) | t3; \
+})
+
+#define arch_cmpxchg128(ptr, o, n) \
+ __arch_cmpxchg128((ptr), (o), (n), ".aqrl")
+
+#define arch_cmpxchg128_local(ptr, o, n) \
+ __arch_cmpxchg128((ptr), (o), (n), "")
+
+#endif /* CONFIG_64BIT && CONFIG_RISCV_ISA_ZACAS */
+
#ifdef CONFIG_RISCV_ISA_ZAWRS
/*
* Despite wrs.nto being "WRS-with-no-timeout", in the absence of changes to
@@ -245,6 +370,11 @@ static __always_inline void __cmpwait(volatile void *ptr,
: : : : no_zawrs);
switch (size) {
+ case 1:
+ fallthrough;
+ case 2:
+ /* RISC-V doesn't have lr instructions on byte and half-word. */
+ goto no_zawrs;
case 4:
asm volatile(
" lr.w %0, %1\n"