summaryrefslogtreecommitdiff
path: root/arch/arm64
diff options
context:
space:
mode:
Diffstat (limited to 'arch/arm64')
-rw-r--r--arch/arm64/crypto/aes-glue.c50
-rw-r--r--arch/arm64/crypto/aes-modes.S165
-rw-r--r--arch/arm64/crypto/sha1-ce-glue.c1
-rw-r--r--arch/arm64/crypto/sha2-ce-glue.c2
-rw-r--r--arch/arm64/crypto/sha3-ce-glue.c4
-rw-r--r--arch/arm64/crypto/sha512-ce-glue.c2
6 files changed, 148 insertions, 76 deletions
diff --git a/arch/arm64/crypto/aes-glue.c b/arch/arm64/crypto/aes-glue.c
index 34b8a89197be..e7f116d833b9 100644
--- a/arch/arm64/crypto/aes-glue.c
+++ b/arch/arm64/crypto/aes-glue.c
@@ -24,6 +24,7 @@
#ifdef USE_V8_CRYPTO_EXTENSIONS
#define MODE "ce"
#define PRIO 300
+#define STRIDE 5
#define aes_expandkey ce_aes_expandkey
#define aes_ecb_encrypt ce_aes_ecb_encrypt
#define aes_ecb_decrypt ce_aes_ecb_decrypt
@@ -41,6 +42,7 @@ MODULE_DESCRIPTION("AES-ECB/CBC/CTR/XTS using ARMv8 Crypto Extensions");
#else
#define MODE "neon"
#define PRIO 200
+#define STRIDE 4
#define aes_ecb_encrypt neon_aes_ecb_encrypt
#define aes_ecb_decrypt neon_aes_ecb_decrypt
#define aes_cbc_encrypt neon_aes_cbc_encrypt
@@ -55,7 +57,7 @@ MODULE_DESCRIPTION("AES-ECB/CBC/CTR/XTS using ARMv8 Crypto Extensions");
#define aes_mac_update neon_aes_mac_update
MODULE_DESCRIPTION("AES-ECB/CBC/CTR/XTS using ARMv8 NEON");
#endif
-#if defined(USE_V8_CRYPTO_EXTENSIONS) || !defined(CONFIG_CRYPTO_AES_ARM64_BS)
+#if defined(USE_V8_CRYPTO_EXTENSIONS) || !IS_ENABLED(CONFIG_CRYPTO_AES_ARM64_BS)
MODULE_ALIAS_CRYPTO("ecb(aes)");
MODULE_ALIAS_CRYPTO("cbc(aes)");
MODULE_ALIAS_CRYPTO("ctr(aes)");
@@ -87,7 +89,7 @@ asmlinkage void aes_cbc_cts_decrypt(u8 out[], u8 const in[], u32 const rk[],
int rounds, int bytes, u8 const iv[]);
asmlinkage void aes_ctr_encrypt(u8 out[], u8 const in[], u32 const rk[],
- int rounds, int blocks, u8 ctr[]);
+ int rounds, int bytes, u8 ctr[], u8 finalbuf[]);
asmlinkage void aes_xts_encrypt(u8 out[], u8 const in[], u32 const rk1[],
int rounds, int bytes, u32 const rk2[], u8 iv[],
@@ -448,34 +450,36 @@ static int ctr_encrypt(struct skcipher_request *req)
struct crypto_aes_ctx *ctx = crypto_skcipher_ctx(tfm);
int err, rounds = 6 + ctx->key_length / 4;
struct skcipher_walk walk;
- int blocks;
err = skcipher_walk_virt(&walk, req, false);
- while ((blocks = (walk.nbytes / AES_BLOCK_SIZE))) {
- kernel_neon_begin();
- aes_ctr_encrypt(walk.dst.virt.addr, walk.src.virt.addr,
- ctx->key_enc, rounds, blocks, walk.iv);
- kernel_neon_end();
- err = skcipher_walk_done(&walk, walk.nbytes % AES_BLOCK_SIZE);
- }
- if (walk.nbytes) {
- u8 __aligned(8) tail[AES_BLOCK_SIZE];
+ while (walk.nbytes > 0) {
+ const u8 *src = walk.src.virt.addr;
unsigned int nbytes = walk.nbytes;
- u8 *tdst = walk.dst.virt.addr;
- u8 *tsrc = walk.src.virt.addr;
+ u8 *dst = walk.dst.virt.addr;
+ u8 buf[AES_BLOCK_SIZE];
+ unsigned int tail;
- /*
- * Tell aes_ctr_encrypt() to process a tail block.
- */
- blocks = -1;
+ if (unlikely(nbytes < AES_BLOCK_SIZE))
+ src = memcpy(buf, src, nbytes);
+ else if (nbytes < walk.total)
+ nbytes &= ~(AES_BLOCK_SIZE - 1);
kernel_neon_begin();
- aes_ctr_encrypt(tail, NULL, ctx->key_enc, rounds,
- blocks, walk.iv);
+ aes_ctr_encrypt(dst, src, ctx->key_enc, rounds, nbytes,
+ walk.iv, buf);
kernel_neon_end();
- crypto_xor_cpy(tdst, tsrc, tail, nbytes);
- err = skcipher_walk_done(&walk, 0);
+
+ tail = nbytes % (STRIDE * AES_BLOCK_SIZE);
+ if (tail > 0 && tail < AES_BLOCK_SIZE)
+ /*
+ * The final partial block could not be returned using
+ * an overlapping store, so it was passed via buf[]
+ * instead.
+ */
+ memcpy(dst + nbytes - tail, buf, tail);
+
+ err = skcipher_walk_done(&walk, walk.nbytes - nbytes);
}
return err;
@@ -650,7 +654,7 @@ static int __maybe_unused xts_decrypt(struct skcipher_request *req)
}
static struct skcipher_alg aes_algs[] = { {
-#if defined(USE_V8_CRYPTO_EXTENSIONS) || !defined(CONFIG_CRYPTO_AES_ARM64_BS)
+#if defined(USE_V8_CRYPTO_EXTENSIONS) || !IS_ENABLED(CONFIG_CRYPTO_AES_ARM64_BS)
.base = {
.cra_name = "__ecb(aes)",
.cra_driver_name = "__ecb-aes-" MODE,
diff --git a/arch/arm64/crypto/aes-modes.S b/arch/arm64/crypto/aes-modes.S
index cf618d8f6cec..3d1f97799899 100644
--- a/arch/arm64/crypto/aes-modes.S
+++ b/arch/arm64/crypto/aes-modes.S
@@ -321,42 +321,76 @@ AES_FUNC_END(aes_cbc_cts_decrypt)
/*
* aes_ctr_encrypt(u8 out[], u8 const in[], u8 const rk[], int rounds,
- * int blocks, u8 ctr[])
+ * int bytes, u8 ctr[], u8 finalbuf[])
*/
AES_FUNC_START(aes_ctr_encrypt)
stp x29, x30, [sp, #-16]!
mov x29, sp
- enc_prepare w3, x2, x6
+ enc_prepare w3, x2, x12
ld1 {vctr.16b}, [x5]
- umov x6, vctr.d[1] /* keep swabbed ctr in reg */
- rev x6, x6
- cmn w6, w4 /* 32 bit overflow? */
- bcs .Lctrloop
+ umov x12, vctr.d[1] /* keep swabbed ctr in reg */
+ rev x12, x12
+
.LctrloopNx:
- subs w4, w4, #MAX_STRIDE
- bmi .Lctr1x
- add w7, w6, #1
+ add w7, w4, #15
+ sub w4, w4, #MAX_STRIDE << 4
+ lsr w7, w7, #4
+ mov w8, #MAX_STRIDE
+ cmp w7, w8
+ csel w7, w7, w8, lt
+ adds x12, x12, x7
+
mov v0.16b, vctr.16b
- add w8, w6, #2
mov v1.16b, vctr.16b
- add w9, w6, #3
mov v2.16b, vctr.16b
- add w9, w6, #3
- rev w7, w7
mov v3.16b, vctr.16b
- rev w8, w8
ST5( mov v4.16b, vctr.16b )
- mov v1.s[3], w7
- rev w9, w9
-ST5( add w10, w6, #4 )
- mov v2.s[3], w8
-ST5( rev w10, w10 )
- mov v3.s[3], w9
-ST5( mov v4.s[3], w10 )
- ld1 {v5.16b-v7.16b}, [x1], #48 /* get 3 input blocks */
+ bcs 0f
+
+ .subsection 1
+ /* apply carry to outgoing counter */
+0: umov x8, vctr.d[0]
+ rev x8, x8
+ add x8, x8, #1
+ rev x8, x8
+ ins vctr.d[0], x8
+
+ /* apply carry to N counter blocks for N := x12 */
+ adr x16, 1f
+ sub x16, x16, x12, lsl #3
+ br x16
+ hint 34 // bti c
+ mov v0.d[0], vctr.d[0]
+ hint 34 // bti c
+ mov v1.d[0], vctr.d[0]
+ hint 34 // bti c
+ mov v2.d[0], vctr.d[0]
+ hint 34 // bti c
+ mov v3.d[0], vctr.d[0]
+ST5( hint 34 )
+ST5( mov v4.d[0], vctr.d[0] )
+1: b 2f
+ .previous
+
+2: rev x7, x12
+ ins vctr.d[1], x7
+ sub x7, x12, #MAX_STRIDE - 1
+ sub x8, x12, #MAX_STRIDE - 2
+ sub x9, x12, #MAX_STRIDE - 3
+ rev x7, x7
+ rev x8, x8
+ mov v1.d[1], x7
+ rev x9, x9
+ST5( sub x10, x12, #MAX_STRIDE - 4 )
+ mov v2.d[1], x8
+ST5( rev x10, x10 )
+ mov v3.d[1], x9
+ST5( mov v4.d[1], x10 )
+ tbnz w4, #31, .Lctrtail
+ ld1 {v5.16b-v7.16b}, [x1], #48
ST4( bl aes_encrypt_block4x )
ST5( bl aes_encrypt_block5x )
eor v0.16b, v5.16b, v0.16b
@@ -368,47 +402,72 @@ ST5( ld1 {v5.16b-v6.16b}, [x1], #32 )
ST5( eor v4.16b, v6.16b, v4.16b )
st1 {v0.16b-v3.16b}, [x0], #64
ST5( st1 {v4.16b}, [x0], #16 )
- add x6, x6, #MAX_STRIDE
- rev x7, x6
- ins vctr.d[1], x7
cbz w4, .Lctrout
b .LctrloopNx
-.Lctr1x:
- adds w4, w4, #MAX_STRIDE
- beq .Lctrout
-.Lctrloop:
- mov v0.16b, vctr.16b
- encrypt_block v0, w3, x2, x8, w7
-
- adds x6, x6, #1 /* increment BE ctr */
- rev x7, x6
- ins vctr.d[1], x7
- bcs .Lctrcarry /* overflow? */
-
-.Lctrcarrydone:
- subs w4, w4, #1
- bmi .Lctrtailblock /* blocks <0 means tail block */
- ld1 {v3.16b}, [x1], #16
- eor v3.16b, v0.16b, v3.16b
- st1 {v3.16b}, [x0], #16
- bne .Lctrloop
.Lctrout:
st1 {vctr.16b}, [x5] /* return next CTR value */
ldp x29, x30, [sp], #16
ret
-.Lctrtailblock:
- st1 {v0.16b}, [x0]
+.Lctrtail:
+ /* XOR up to MAX_STRIDE * 16 - 1 bytes of in/output with v0 ... v3/v4 */
+ mov x16, #16
+ ands x13, x4, #0xf
+ csel x13, x13, x16, ne
+
+ST5( cmp w4, #64 - (MAX_STRIDE << 4) )
+ST5( csel x14, x16, xzr, gt )
+ cmp w4, #48 - (MAX_STRIDE << 4)
+ csel x15, x16, xzr, gt
+ cmp w4, #32 - (MAX_STRIDE << 4)
+ csel x16, x16, xzr, gt
+ cmp w4, #16 - (MAX_STRIDE << 4)
+ ble .Lctrtail1x
+
+ adr_l x12, .Lcts_permute_table
+ add x12, x12, x13
+
+ST5( ld1 {v5.16b}, [x1], x14 )
+ ld1 {v6.16b}, [x1], x15
+ ld1 {v7.16b}, [x1], x16
+
+ST4( bl aes_encrypt_block4x )
+ST5( bl aes_encrypt_block5x )
+
+ ld1 {v8.16b}, [x1], x13
+ ld1 {v9.16b}, [x1]
+ ld1 {v10.16b}, [x12]
+
+ST4( eor v6.16b, v6.16b, v0.16b )
+ST4( eor v7.16b, v7.16b, v1.16b )
+ST4( tbl v3.16b, {v3.16b}, v10.16b )
+ST4( eor v8.16b, v8.16b, v2.16b )
+ST4( eor v9.16b, v9.16b, v3.16b )
+
+ST5( eor v5.16b, v5.16b, v0.16b )
+ST5( eor v6.16b, v6.16b, v1.16b )
+ST5( tbl v4.16b, {v4.16b}, v10.16b )
+ST5( eor v7.16b, v7.16b, v2.16b )
+ST5( eor v8.16b, v8.16b, v3.16b )
+ST5( eor v9.16b, v9.16b, v4.16b )
+
+ST5( st1 {v5.16b}, [x0], x14 )
+ st1 {v6.16b}, [x0], x15
+ st1 {v7.16b}, [x0], x16
+ add x13, x13, x0
+ st1 {v9.16b}, [x13] // overlapping stores
+ st1 {v8.16b}, [x0]
b .Lctrout
-.Lctrcarry:
- umov x7, vctr.d[0] /* load upper word of ctr */
- rev x7, x7 /* ... to handle the carry */
- add x7, x7, #1
- rev x7, x7
- ins vctr.d[0], x7
- b .Lctrcarrydone
+.Lctrtail1x:
+ csel x0, x0, x6, eq // use finalbuf if less than a full block
+ ld1 {v5.16b}, [x1]
+ST5( mov v3.16b, v4.16b )
+ encrypt_block v3, w3, x2, x8, w7
+ eor v5.16b, v5.16b, v3.16b
+ st1 {v5.16b}, [x0]
+ b .Lctrout
AES_FUNC_END(aes_ctr_encrypt)
diff --git a/arch/arm64/crypto/sha1-ce-glue.c b/arch/arm64/crypto/sha1-ce-glue.c
index c93121bcfdeb..c1362861765f 100644
--- a/arch/arm64/crypto/sha1-ce-glue.c
+++ b/arch/arm64/crypto/sha1-ce-glue.c
@@ -19,6 +19,7 @@
MODULE_DESCRIPTION("SHA1 secure hash using ARMv8 Crypto Extensions");
MODULE_AUTHOR("Ard Biesheuvel <ard.biesheuvel@linaro.org>");
MODULE_LICENSE("GPL v2");
+MODULE_ALIAS_CRYPTO("sha1");
struct sha1_ce_state {
struct sha1_state sst;
diff --git a/arch/arm64/crypto/sha2-ce-glue.c b/arch/arm64/crypto/sha2-ce-glue.c
index 31ba3da5e61b..ded3a6488f81 100644
--- a/arch/arm64/crypto/sha2-ce-glue.c
+++ b/arch/arm64/crypto/sha2-ce-glue.c
@@ -19,6 +19,8 @@
MODULE_DESCRIPTION("SHA-224/SHA-256 secure hash using ARMv8 Crypto Extensions");
MODULE_AUTHOR("Ard Biesheuvel <ard.biesheuvel@linaro.org>");
MODULE_LICENSE("GPL v2");
+MODULE_ALIAS_CRYPTO("sha224");
+MODULE_ALIAS_CRYPTO("sha256");
struct sha256_ce_state {
struct sha256_state sst;
diff --git a/arch/arm64/crypto/sha3-ce-glue.c b/arch/arm64/crypto/sha3-ce-glue.c
index e5a2936f0886..7288d3046354 100644
--- a/arch/arm64/crypto/sha3-ce-glue.c
+++ b/arch/arm64/crypto/sha3-ce-glue.c
@@ -23,6 +23,10 @@
MODULE_DESCRIPTION("SHA3 secure hash using ARMv8 Crypto Extensions");
MODULE_AUTHOR("Ard Biesheuvel <ard.biesheuvel@linaro.org>");
MODULE_LICENSE("GPL v2");
+MODULE_ALIAS_CRYPTO("sha3-224");
+MODULE_ALIAS_CRYPTO("sha3-256");
+MODULE_ALIAS_CRYPTO("sha3-384");
+MODULE_ALIAS_CRYPTO("sha3-512");
asmlinkage void sha3_ce_transform(u64 *st, const u8 *data, int blocks,
int md_len);
diff --git a/arch/arm64/crypto/sha512-ce-glue.c b/arch/arm64/crypto/sha512-ce-glue.c
index faa83f6cf376..a6b1adf31c56 100644
--- a/arch/arm64/crypto/sha512-ce-glue.c
+++ b/arch/arm64/crypto/sha512-ce-glue.c
@@ -23,6 +23,8 @@
MODULE_DESCRIPTION("SHA-384/SHA-512 secure hash using ARMv8 Crypto Extensions");
MODULE_AUTHOR("Ard Biesheuvel <ard.biesheuvel@linaro.org>");
MODULE_LICENSE("GPL v2");
+MODULE_ALIAS_CRYPTO("sha384");
+MODULE_ALIAS_CRYPTO("sha512");
asmlinkage void sha512_ce_transform(struct sha512_state *sst, u8 const *src,
int blocks);