summaryrefslogtreecommitdiff
path: root/arch/loongarch
diff options
context:
space:
mode:
authorXi Ruoyao <xry111@xry111.site>2024-05-14 12:24:18 +0800
committerHuacai Chen <chenhuacai@loongson.cn>2024-05-14 12:24:18 +0800
commit5125d033c8af733ee4d52e3e3c6ebf5784976e46 (patch)
treea30781451050630b3d334e34dbc967e757135198 /arch/loongarch
parent2cce9059599143aa950b0baaf2523b17ab47d27d (diff)
LoongArch: Select ARCH_SUPPORTS_INT128 if CC_HAS_INT128
This allows compiling a full 128-bit product of two 64-bit integers as a mul/mulh pair, instead of a nasty long sequence of 20+ instructions. However, after selecting ARCH_SUPPORTS_INT128, when optimizing for size the compiler generates calls to __ashlti3, __ashrti3, and __lshrti3 for shifting __int128 values, causing a link failure: loongarch64-unknown-linux-gnu-ld: kernel/sched/fair.o: in function `mul_u64_u32_shr': <PATH>/include/linux/math64.h:161:(.text+0x5e4): undefined reference to `__lshrti3' So provide the implementation of these functions if ARCH_SUPPORTS_INT128. Closes: https://lore.kernel.org/loongarch/CAAhV-H5EZ=7OF7CSiYyZ8_+wWuenpo=K2WT8-6mAT4CvzUC_4g@mail.gmail.com/ Signed-off-by: Xi Ruoyao <xry111@xry111.site> Signed-off-by: Huacai Chen <chenhuacai@loongson.cn>
Diffstat (limited to 'arch/loongarch')
-rw-r--r--arch/loongarch/Kconfig1
-rw-r--r--arch/loongarch/include/asm/asm-prototypes.h6
-rw-r--r--arch/loongarch/lib/Makefile2
-rw-r--r--arch/loongarch/lib/tishift.S56
4 files changed, 65 insertions, 0 deletions
diff --git a/arch/loongarch/Kconfig b/arch/loongarch/Kconfig
index 1355ec0c69cc..335a98b28167 100644
--- a/arch/loongarch/Kconfig
+++ b/arch/loongarch/Kconfig
@@ -57,6 +57,7 @@ config LOONGARCH
select ARCH_SUPPORTS_ACPI
select ARCH_SUPPORTS_ATOMIC_RMW
select ARCH_SUPPORTS_HUGETLBFS
+ select ARCH_SUPPORTS_INT128 if CC_HAS_INT128
select ARCH_SUPPORTS_LTO_CLANG
select ARCH_SUPPORTS_LTO_CLANG_THIN
select ARCH_SUPPORTS_NUMA_BALANCING
diff --git a/arch/loongarch/include/asm/asm-prototypes.h b/arch/loongarch/include/asm/asm-prototypes.h
index cf8e1a4e7c19..51f224bcfc65 100644
--- a/arch/loongarch/include/asm/asm-prototypes.h
+++ b/arch/loongarch/include/asm/asm-prototypes.h
@@ -6,3 +6,9 @@
#include <asm/page.h>
#include <asm/ftrace.h>
#include <asm-generic/asm-prototypes.h>
+
+#ifdef CONFIG_ARCH_SUPPORTS_INT128
+__int128_t __ashlti3(__int128_t a, int b);
+__int128_t __ashrti3(__int128_t a, int b);
+__int128_t __lshrti3(__int128_t a, int b);
+#endif
diff --git a/arch/loongarch/lib/Makefile b/arch/loongarch/lib/Makefile
index a77bf160bfc4..ccea3bbd4353 100644
--- a/arch/loongarch/lib/Makefile
+++ b/arch/loongarch/lib/Makefile
@@ -6,6 +6,8 @@
lib-y += delay.o memset.o memcpy.o memmove.o \
clear_user.o copy_user.o csum.o dump_tlb.o unaligned.o
+obj-$(CONFIG_ARCH_SUPPORTS_INT128) += tishift.o
+
obj-$(CONFIG_CPU_HAS_LSX) += xor_simd.o xor_simd_glue.o
obj-$(CONFIG_FUNCTION_ERROR_INJECTION) += error-inject.o
diff --git a/arch/loongarch/lib/tishift.S b/arch/loongarch/lib/tishift.S
new file mode 100644
index 000000000000..fa1d310012bc
--- /dev/null
+++ b/arch/loongarch/lib/tishift.S
@@ -0,0 +1,56 @@
+/* SPDX-License-Identifier: GPL-2.0-only */
+
+#include <asm/asmmacro.h>
+#include <linux/export.h>
+#include <linux/linkage.h>
+
+SYM_FUNC_START(__ashlti3)
+ srli.d t2, a0, 1
+ nor t3, zero, a2
+ sll.d t1, a1, a2
+ srl.d t2, t2, t3
+ andi t0, a2, 64
+ sll.d a0, a0, a2
+ or t1, t2, t1
+ maskeqz a1, a0, t0
+ masknez a0, a0, t0
+ masknez t0, t1, t0
+ or a1, t0, a1
+ jr ra
+SYM_FUNC_END(__ashlti3)
+EXPORT_SYMBOL(__ashlti3)
+
+SYM_FUNC_START(__ashrti3)
+ nor t3, zero, a2
+ slli.d t2, a1, 1
+ srl.d t1, a0, a2
+ sll.d t2, t2, t3
+ andi t0, a2, 64
+ or t1, t2, t1
+ sra.d a2, a1, a2
+ srai.d a1, a1, 63
+ maskeqz a0, a2, t0
+ maskeqz a1, a1, t0
+ masknez a2, a2, t0
+ masknez t0, t1, t0
+ or a1, a1, a2
+ or a0, t0, a0
+ jr ra
+SYM_FUNC_END(__ashrti3)
+EXPORT_SYMBOL(__ashrti3)
+
+SYM_FUNC_START(__lshrti3)
+ slli.d t2, a1, 1
+ nor t3, zero, a2
+ srl.d t1, a0, a2
+ sll.d t2, t2, t3
+ andi t0, a2, 64
+ srl.d a1, a1, a2
+ or t1, t2, t1
+ maskeqz a0, a1, t0
+ masknez a1, a1, t0
+ masknez t0, t1, t0
+ or a0, t0, a0
+ jr ra
+SYM_FUNC_END(__lshrti3)
+EXPORT_SYMBOL(__lshrti3)