summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
-rw-r--r--arch/powerpc/crypto/crc32-vpmsum_core.S31
-rw-r--r--arch/powerpc/crypto/crc32c-vpmsum_asm.S1
2 files changed, 31 insertions, 1 deletions
diff --git a/arch/powerpc/crypto/crc32-vpmsum_core.S b/arch/powerpc/crypto/crc32-vpmsum_core.S
index 7c6be6a5c977..aadb59c96a27 100644
--- a/arch/powerpc/crypto/crc32-vpmsum_core.S
+++ b/arch/powerpc/crypto/crc32-vpmsum_core.S
@@ -35,7 +35,9 @@
.text
-#if defined(__BIG_ENDIAN__)
+#if defined(__BIG_ENDIAN__) && defined(REFLECT)
+#define BYTESWAP_DATA
+#elif defined(__LITTLE_ENDIAN__) && !defined(REFLECT)
#define BYTESWAP_DATA
#else
#undef BYTESWAP_DATA
@@ -108,7 +110,11 @@ FUNC_START(CRC_FUNCTION_NAME)
/* Get the initial value into v8 */
vxor v8,v8,v8
MTVRD(v8, R3)
+#ifdef REFLECT
vsldoi v8,zeroes,v8,8 /* shift into bottom 32 bits */
+#else
+ vsldoi v8,v8,zeroes,4 /* shift into top 32 bits */
+#endif
#ifdef BYTESWAP_DATA
addis r3,r2,.byteswap_constant@toc@ha
@@ -354,6 +360,7 @@ FUNC_START(CRC_FUNCTION_NAME)
vxor v6,v6,v14
vxor v7,v7,v15
+#ifdef REFLECT
/*
* vpmsumd produces a 96 bit result in the least significant bits
* of the register. Since we are bit reflected we have to shift it
@@ -368,6 +375,7 @@ FUNC_START(CRC_FUNCTION_NAME)
vsldoi v5,v5,zeroes,4
vsldoi v6,v6,zeroes,4
vsldoi v7,v7,zeroes,4
+#endif
/* xor with last 1024 bits */
lvx v8,0,r4
@@ -511,13 +519,33 @@ FUNC_START(CRC_FUNCTION_NAME)
vsldoi v1,v0,v0,8
vxor v0,v0,v1 /* xor two 64 bit results together */
+#ifdef REFLECT
/* shift left one bit */
vspltisb v1,1
vsl v0,v0,v1
+#endif
vand v0,v0,mask_64bit
+#ifndef REFLECT
+ /*
+ * Now for the Barrett reduction algorithm. The idea is to calculate q,
+ * the multiple of our polynomial that we need to subtract. By
+ * doing the computation 2x bits higher (ie 64 bits) and shifting the
+ * result back down 2x bits, we round down to the nearest multiple.
+ */
+ VPMSUMD(v1,v0,const1) /* ma */
+ vsldoi v1,zeroes,v1,8 /* q = floor(ma/(2^64)) */
+ VPMSUMD(v1,v1,const2) /* qn */
+ vxor v0,v0,v1 /* a - qn, subtraction is xor in GF(2) */
/*
+ * Get the result into r3. We need to shift it left 8 bytes:
+ * V0 [ 0 1 2 X ]
+ * V0 [ 0 X 2 3 ]
+ */
+ vsldoi v0,v0,zeroes,8 /* shift result into top 64 bits */
+#else
+ /*
* The reflected version of Barrett reduction. Instead of bit
* reflecting our data (which is expensive to do), we bit reflect our
* constants and our algorithm, which means the intermediate data in
@@ -537,6 +565,7 @@ FUNC_START(CRC_FUNCTION_NAME)
* V0 [ 0 X 2 3 ]
*/
vsldoi v0,v0,zeroes,4 /* shift result into top 64 bits of */
+#endif
/* Get it into r3 */
MFVRD(R3, v0)
diff --git a/arch/powerpc/crypto/crc32c-vpmsum_asm.S b/arch/powerpc/crypto/crc32c-vpmsum_asm.S
index c0d080caefc1..d2bea48051a0 100644
--- a/arch/powerpc/crypto/crc32c-vpmsum_asm.S
+++ b/arch/powerpc/crypto/crc32c-vpmsum_asm.S
@@ -842,4 +842,5 @@
.octa 0x00000000000000000000000105ec76f1
#define CRC_FUNCTION_NAME __crc32c_vpmsum
+#define REFLECT
#include "crc32-vpmsum_core.S"