summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorLinus Torvalds <torvalds@linux-foundation.org>2021-08-30 12:57:10 -0700
committerLinus Torvalds <torvalds@linux-foundation.org>2021-08-30 12:57:10 -0700
commit44a7d4441181d0f2d622dc9bb512d7f5ca13f768 (patch)
treed7315b0896d47ef9e1d9ef3ebfbcb4b6f0a96d10
parent4ca4256453effb885c1688633676682529593f82 (diff)
parent6ae51ffe5e768d9e25a7f4298e2e7a058472bcc3 (diff)
Merge branch 'linus' of git://git.kernel.org/pub/scm/linux/kernel/git/herbert/crypto-2.6
Pull crypto updates from Herbert Xu: "Algorithms: - Add AES-NI/AVX/x86_64 implementation of SM4. Drivers: - Add Arm SMCCC TRNG based driver" [ And obviously a lot of random fixes and updates - Linus] * 'linus' of git://git.kernel.org/pub/scm/linux/kernel/git/herbert/crypto-2.6: (84 commits) crypto: sha512 - remove imaginary and mystifying clearing of variables crypto: aesni - xts_crypt() return if walk.nbytes is 0 padata: Remove repeated verbose license text crypto: ccp - Add support for new CCP/PSP device ID crypto: x86/sm4 - add AES-NI/AVX2/x86_64 implementation crypto: x86/sm4 - export reusable AESNI/AVX functions crypto: rmd320 - remove rmd320 in Makefile crypto: skcipher - in_irq() cleanup crypto: hisilicon - check _PS0 and _PR0 method crypto: hisilicon - change parameter passing of debugfs function crypto: hisilicon - support runtime PM for accelerator device crypto: hisilicon - add runtime PM ops crypto: hisilicon - using 'debugfs_create_file' instead of 'debugfs_create_regset32' crypto: tcrypt - add GCM/CCM mode test for SM4 algorithm crypto: testmgr - Add GCM/CCM mode test of SM4 algorithm crypto: tcrypt - Fix missing return value check crypto: hisilicon/sec - modify the hardware endian configuration crypto: hisilicon/sec - fix the abnormal exiting process crypto: qat - store vf.compatible flag crypto: qat - do not export adf_iov_putmsg() ...
-rw-r--r--arch/arm/crypto/curve25519-glue.c8
-rw-r--r--arch/arm64/crypto/Kconfig2
-rw-r--r--arch/arm64/crypto/sm4-ce-glue.c20
-rw-r--r--arch/x86/crypto/Makefile6
-rw-r--r--arch/x86/crypto/aesni-intel_glue.c5
-rw-r--r--arch/x86/crypto/sm4-aesni-avx-asm_64.S589
-rw-r--r--arch/x86/crypto/sm4-aesni-avx2-asm_64.S497
-rw-r--r--arch/x86/crypto/sm4-avx.h24
-rw-r--r--arch/x86/crypto/sm4_aesni_avx2_glue.c169
-rw-r--r--arch/x86/crypto/sm4_aesni_avx_glue.c487
-rw-r--r--crypto/Kconfig44
-rw-r--r--crypto/Makefile1
-rw-r--r--crypto/ecc.h5
-rw-r--r--crypto/sha512_generic.c3
-rw-r--r--crypto/skcipher.c2
-rw-r--r--crypto/sm4_generic.c180
-rw-r--r--crypto/tcrypt.c100
-rw-r--r--crypto/testmgr.c29
-rw-r--r--crypto/testmgr.h148
-rw-r--r--crypto/wp512.c2
-rw-r--r--drivers/char/hw_random/Kconfig14
-rw-r--r--drivers/char/hw_random/Makefile1
-rw-r--r--drivers/char/hw_random/amd-rng.c8
-rw-r--r--drivers/char/hw_random/arm_smccc_trng.c123
-rw-r--r--drivers/char/hw_random/geode-rng.c8
-rw-r--r--drivers/char/hw_random/intel-rng.c8
-rw-r--r--drivers/char/hw_random/via-rng.c8
-rw-r--r--drivers/crypto/allwinner/sun8i-ce/sun8i-ce-prng.c9
-rw-r--r--drivers/crypto/allwinner/sun8i-ce/sun8i-ce-trng.c3
-rw-r--r--drivers/crypto/allwinner/sun8i-ss/sun8i-ss-prng.c9
-rw-r--r--drivers/crypto/atmel-aes.c154
-rw-r--r--drivers/crypto/atmel-tdes.c66
-rw-r--r--drivers/crypto/ccp/sev-dev.c49
-rw-r--r--drivers/crypto/ccp/sp-pci.c19
-rw-r--r--drivers/crypto/hisilicon/hpre/hpre_main.c123
-rw-r--r--drivers/crypto/hisilicon/qm.c430
-rw-r--r--drivers/crypto/hisilicon/qm.h8
-rw-r--r--drivers/crypto/hisilicon/sec2/sec.h5
-rw-r--r--drivers/crypto/hisilicon/sec2/sec_main.c138
-rw-r--r--drivers/crypto/hisilicon/zip/zip_main.c83
-rw-r--r--drivers/crypto/mxs-dcp.c81
-rw-r--r--drivers/crypto/omap-aes.c8
-rw-r--r--drivers/crypto/omap-crypto.c2
-rw-r--r--drivers/crypto/omap-des.c8
-rw-r--r--drivers/crypto/omap-sham.c68
-rw-r--r--drivers/crypto/qat/qat_4xxx/adf_4xxx_hw_data.c8
-rw-r--r--drivers/crypto/qat/qat_4xxx/adf_drv.c14
-rw-r--r--drivers/crypto/qat/qat_c3xxx/adf_c3xxx_hw_data.c19
-rw-r--r--drivers/crypto/qat/qat_c3xxx/adf_c3xxx_hw_data.h1
-rw-r--r--drivers/crypto/qat/qat_c3xxx/adf_drv.c21
-rw-r--r--drivers/crypto/qat/qat_c3xxxvf/adf_c3xxxvf_hw_data.c14
-rw-r--r--drivers/crypto/qat/qat_c3xxxvf/adf_c3xxxvf_hw_data.h1
-rw-r--r--drivers/crypto/qat/qat_c3xxxvf/adf_drv.c16
-rw-r--r--drivers/crypto/qat/qat_c62x/adf_c62x_hw_data.c19
-rw-r--r--drivers/crypto/qat/qat_c62x/adf_c62x_hw_data.h1
-rw-r--r--drivers/crypto/qat/qat_c62x/adf_drv.c21
-rw-r--r--drivers/crypto/qat/qat_c62xvf/adf_c62xvf_hw_data.c14
-rw-r--r--drivers/crypto/qat/qat_c62xvf/adf_c62xvf_hw_data.h1
-rw-r--r--drivers/crypto/qat/qat_c62xvf/adf_drv.c16
-rw-r--r--drivers/crypto/qat/qat_common/adf_accel_devices.h8
-rw-r--r--drivers/crypto/qat/qat_common/adf_aer.c2
-rw-r--r--drivers/crypto/qat/qat_common/adf_common_drv.h21
-rw-r--r--drivers/crypto/qat/qat_common/adf_init.c13
-rw-r--r--drivers/crypto/qat/qat_common/adf_isr.c42
-rw-r--r--drivers/crypto/qat/qat_common/adf_pf2vf_msg.c78
-rw-r--r--drivers/crypto/qat/qat_common/adf_pf2vf_msg.h2
-rw-r--r--drivers/crypto/qat/qat_common/adf_sriov.c8
-rw-r--r--drivers/crypto/qat/qat_common/adf_vf2pf_msg.c12
-rw-r--r--drivers/crypto/qat/qat_common/adf_vf_isr.c64
-rw-r--r--drivers/crypto/qat/qat_dh895xcc/adf_dh895xcc_hw_data.c19
-rw-r--r--drivers/crypto/qat/qat_dh895xcc/adf_dh895xcc_hw_data.h1
-rw-r--r--drivers/crypto/qat/qat_dh895xcc/adf_drv.c21
-rw-r--r--drivers/crypto/qat/qat_dh895xccvf/adf_dh895xccvf_hw_data.c14
-rw-r--r--drivers/crypto/qat/qat_dh895xccvf/adf_dh895xccvf_hw_data.h1
-rw-r--r--drivers/crypto/qat/qat_dh895xccvf/adf_drv.c16
-rw-r--r--drivers/crypto/virtio/virtio_crypto_core.c4
-rw-r--r--drivers/firmware/smccc/smccc.c17
-rw-r--r--include/crypto/sm4.h25
-rw-r--r--include/linux/padata.h3
-rw-r--r--kernel/padata.c35
-rw-r--r--lib/crypto/Kconfig3
-rw-r--r--lib/crypto/Makefile3
-rw-r--r--lib/crypto/blake2s.c8
-rw-r--r--lib/crypto/chacha20poly1305.c8
-rw-r--r--lib/crypto/curve25519.c8
-rw-r--r--lib/crypto/sm4.c176
-rw-r--r--lib/mpi/mpiutil.c2
87 files changed, 3695 insertions, 839 deletions
diff --git a/arch/arm/crypto/curve25519-glue.c b/arch/arm/crypto/curve25519-glue.c
index 31eb75b6002f..9bdafd57888c 100644
--- a/arch/arm/crypto/curve25519-glue.c
+++ b/arch/arm/crypto/curve25519-glue.c
@@ -112,7 +112,7 @@ static struct kpp_alg curve25519_alg = {
.max_size = curve25519_max_size,
};
-static int __init mod_init(void)
+static int __init arm_curve25519_init(void)
{
if (elf_hwcap & HWCAP_NEON) {
static_branch_enable(&have_neon);
@@ -122,14 +122,14 @@ static int __init mod_init(void)
return 0;
}
-static void __exit mod_exit(void)
+static void __exit arm_curve25519_exit(void)
{
if (IS_REACHABLE(CONFIG_CRYPTO_KPP) && elf_hwcap & HWCAP_NEON)
crypto_unregister_kpp(&curve25519_alg);
}
-module_init(mod_init);
-module_exit(mod_exit);
+module_init(arm_curve25519_init);
+module_exit(arm_curve25519_exit);
MODULE_ALIAS_CRYPTO("curve25519");
MODULE_ALIAS_CRYPTO("curve25519-neon");
diff --git a/arch/arm64/crypto/Kconfig b/arch/arm64/crypto/Kconfig
index b8eb0453123d..55f19450091b 100644
--- a/arch/arm64/crypto/Kconfig
+++ b/arch/arm64/crypto/Kconfig
@@ -51,7 +51,7 @@ config CRYPTO_SM4_ARM64_CE
tristate "SM4 symmetric cipher (ARMv8.2 Crypto Extensions)"
depends on KERNEL_MODE_NEON
select CRYPTO_ALGAPI
- select CRYPTO_SM4
+ select CRYPTO_LIB_SM4
config CRYPTO_GHASH_ARM64_CE
tristate "GHASH/AES-GCM using ARMv8 Crypto Extensions"
diff --git a/arch/arm64/crypto/sm4-ce-glue.c b/arch/arm64/crypto/sm4-ce-glue.c
index 2754c875d39c..9c93cfc4841b 100644
--- a/arch/arm64/crypto/sm4-ce-glue.c
+++ b/arch/arm64/crypto/sm4-ce-glue.c
@@ -17,12 +17,20 @@ MODULE_LICENSE("GPL v2");
asmlinkage void sm4_ce_do_crypt(const u32 *rk, void *out, const void *in);
+static int sm4_ce_setkey(struct crypto_tfm *tfm, const u8 *key,
+ unsigned int key_len)
+{
+ struct sm4_ctx *ctx = crypto_tfm_ctx(tfm);
+
+ return sm4_expandkey(ctx, key, key_len);
+}
+
static void sm4_ce_encrypt(struct crypto_tfm *tfm, u8 *out, const u8 *in)
{
- const struct crypto_sm4_ctx *ctx = crypto_tfm_ctx(tfm);
+ const struct sm4_ctx *ctx = crypto_tfm_ctx(tfm);
if (!crypto_simd_usable()) {
- crypto_sm4_encrypt(tfm, out, in);
+ sm4_crypt_block(ctx->rkey_enc, out, in);
} else {
kernel_neon_begin();
sm4_ce_do_crypt(ctx->rkey_enc, out, in);
@@ -32,10 +40,10 @@ static void sm4_ce_encrypt(struct crypto_tfm *tfm, u8 *out, const u8 *in)
static void sm4_ce_decrypt(struct crypto_tfm *tfm, u8 *out, const u8 *in)
{
- const struct crypto_sm4_ctx *ctx = crypto_tfm_ctx(tfm);
+ const struct sm4_ctx *ctx = crypto_tfm_ctx(tfm);
if (!crypto_simd_usable()) {
- crypto_sm4_decrypt(tfm, out, in);
+ sm4_crypt_block(ctx->rkey_dec, out, in);
} else {
kernel_neon_begin();
sm4_ce_do_crypt(ctx->rkey_dec, out, in);
@@ -49,12 +57,12 @@ static struct crypto_alg sm4_ce_alg = {
.cra_priority = 200,
.cra_flags = CRYPTO_ALG_TYPE_CIPHER,
.cra_blocksize = SM4_BLOCK_SIZE,
- .cra_ctxsize = sizeof(struct crypto_sm4_ctx),
+ .cra_ctxsize = sizeof(struct sm4_ctx),
.cra_module = THIS_MODULE,
.cra_u.cipher = {
.cia_min_keysize = SM4_KEY_SIZE,
.cia_max_keysize = SM4_KEY_SIZE,
- .cia_setkey = crypto_sm4_set_key,
+ .cia_setkey = sm4_ce_setkey,
.cia_encrypt = sm4_ce_encrypt,
.cia_decrypt = sm4_ce_decrypt
}
diff --git a/arch/x86/crypto/Makefile b/arch/x86/crypto/Makefile
index d0959e7b809f..f307c93fc90a 100644
--- a/arch/x86/crypto/Makefile
+++ b/arch/x86/crypto/Makefile
@@ -88,6 +88,12 @@ nhpoly1305-avx2-y := nh-avx2-x86_64.o nhpoly1305-avx2-glue.o
obj-$(CONFIG_CRYPTO_CURVE25519_X86) += curve25519-x86_64.o
+obj-$(CONFIG_CRYPTO_SM4_AESNI_AVX_X86_64) += sm4-aesni-avx-x86_64.o
+sm4-aesni-avx-x86_64-y := sm4-aesni-avx-asm_64.o sm4_aesni_avx_glue.o
+
+obj-$(CONFIG_CRYPTO_SM4_AESNI_AVX2_X86_64) += sm4-aesni-avx2-x86_64.o
+sm4-aesni-avx2-x86_64-y := sm4-aesni-avx2-asm_64.o sm4_aesni_avx2_glue.o
+
quiet_cmd_perlasm = PERLASM $@
cmd_perlasm = $(PERL) $< > $@
$(obj)/%.S: $(src)/%.pl FORCE
diff --git a/arch/x86/crypto/aesni-intel_glue.c b/arch/x86/crypto/aesni-intel_glue.c
index 2144e54a6c89..0fc961bef299 100644
--- a/arch/x86/crypto/aesni-intel_glue.c
+++ b/arch/x86/crypto/aesni-intel_glue.c
@@ -849,6 +849,8 @@ static int xts_crypt(struct skcipher_request *req, bool encrypt)
return -EINVAL;
err = skcipher_walk_virt(&walk, req, false);
+ if (!walk.nbytes)
+ return err;
if (unlikely(tail > 0 && walk.nbytes < walk.total)) {
int blocks = DIV_ROUND_UP(req->cryptlen, AES_BLOCK_SIZE) - 2;
@@ -862,7 +864,10 @@ static int xts_crypt(struct skcipher_request *req, bool encrypt)
skcipher_request_set_crypt(&subreq, req->src, req->dst,
blocks * AES_BLOCK_SIZE, req->iv);
req = &subreq;
+
err = skcipher_walk_virt(&walk, req, false);
+ if (err)
+ return err;
} else {
tail = 0;
}
diff --git a/arch/x86/crypto/sm4-aesni-avx-asm_64.S b/arch/x86/crypto/sm4-aesni-avx-asm_64.S
new file mode 100644
index 000000000000..fa2c3f50aecb
--- /dev/null
+++ b/arch/x86/crypto/sm4-aesni-avx-asm_64.S
@@ -0,0 +1,589 @@
+/* SPDX-License-Identifier: GPL-2.0-or-later */
+/*
+ * SM4 Cipher Algorithm, AES-NI/AVX optimized.
+ * as specified in
+ * https://tools.ietf.org/id/draft-ribose-cfrg-sm4-10.html
+ *
+ * Copyright (C) 2018 Markku-Juhani O. Saarinen <mjos@iki.fi>
+ * Copyright (C) 2020 Jussi Kivilinna <jussi.kivilinna@iki.fi>
+ * Copyright (c) 2021 Tianjia Zhang <tianjia.zhang@linux.alibaba.com>
+ */
+
+/* Based on SM4 AES-NI work by libgcrypt and Markku-Juhani O. Saarinen at:
+ * https://github.com/mjosaarinen/sm4ni
+ */
+
+#include <linux/linkage.h>
+#include <asm/frame.h>
+
+#define rRIP (%rip)
+
+#define RX0 %xmm0
+#define RX1 %xmm1
+#define MASK_4BIT %xmm2
+#define RTMP0 %xmm3
+#define RTMP1 %xmm4
+#define RTMP2 %xmm5
+#define RTMP3 %xmm6
+#define RTMP4 %xmm7
+
+#define RA0 %xmm8
+#define RA1 %xmm9
+#define RA2 %xmm10
+#define RA3 %xmm11
+
+#define RB0 %xmm12
+#define RB1 %xmm13
+#define RB2 %xmm14
+#define RB3 %xmm15
+
+#define RNOT %xmm0
+#define RBSWAP %xmm1
+
+
+/* Transpose four 32-bit words between 128-bit vectors. */
+#define transpose_4x4(x0, x1, x2, x3, t1, t2) \
+ vpunpckhdq x1, x0, t2; \
+ vpunpckldq x1, x0, x0; \
+ \
+ vpunpckldq x3, x2, t1; \
+ vpunpckhdq x3, x2, x2; \
+ \
+ vpunpckhqdq t1, x0, x1; \
+ vpunpcklqdq t1, x0, x0; \
+ \
+ vpunpckhqdq x2, t2, x3; \
+ vpunpcklqdq x2, t2, x2;
+
+/* pre-SubByte transform. */
+#define transform_pre(x, lo_t, hi_t, mask4bit, tmp0) \
+ vpand x, mask4bit, tmp0; \
+ vpandn x, mask4bit, x; \
+ vpsrld $4, x, x; \
+ \
+ vpshufb tmp0, lo_t, tmp0; \
+ vpshufb x, hi_t, x; \
+ vpxor tmp0, x, x;
+
+/* post-SubByte transform. Note: x has been XOR'ed with mask4bit by
+ * 'vaeslastenc' instruction.
+ */
+#define transform_post(x, lo_t, hi_t, mask4bit, tmp0) \
+ vpandn mask4bit, x, tmp0; \
+ vpsrld $4, x, x; \
+ vpand x, mask4bit, x; \
+ \
+ vpshufb tmp0, lo_t, tmp0; \
+ vpshufb x, hi_t, x; \
+ vpxor tmp0, x, x;
+
+
+.section .rodata.cst164, "aM", @progbits, 164
+.align 16
+
+/*
+ * Following four affine transform look-up tables are from work by
+ * Markku-Juhani O. Saarinen, at https://github.com/mjosaarinen/sm4ni
+ *
+ * These allow exposing SM4 S-Box from AES SubByte.
+ */
+
+/* pre-SubByte affine transform, from SM4 field to AES field. */
+.Lpre_tf_lo_s:
+ .quad 0x9197E2E474720701, 0xC7C1B4B222245157
+.Lpre_tf_hi_s:
+ .quad 0xE240AB09EB49A200, 0xF052B91BF95BB012
+
+/* post-SubByte affine transform, from AES field to SM4 field. */
+.Lpost_tf_lo_s:
+ .quad 0x5B67F2CEA19D0834, 0xEDD14478172BBE82
+.Lpost_tf_hi_s:
+ .quad 0xAE7201DD73AFDC00, 0x11CDBE62CC1063BF
+
+/* For isolating SubBytes from AESENCLAST, inverse shift row */
+.Linv_shift_row:
+ .byte 0x00, 0x0d, 0x0a, 0x07, 0x04, 0x01, 0x0e, 0x0b
+ .byte 0x08, 0x05, 0x02, 0x0f, 0x0c, 0x09, 0x06, 0x03
+
+/* Inverse shift row + Rotate left by 8 bits on 32-bit words with vpshufb */
+.Linv_shift_row_rol_8:
+ .byte 0x07, 0x00, 0x0d, 0x0a, 0x0b, 0x04, 0x01, 0x0e
+ .byte 0x0f, 0x08, 0x05, 0x02, 0x03, 0x0c, 0x09, 0x06
+
+/* Inverse shift row + Rotate left by 16 bits on 32-bit words with vpshufb */
+.Linv_shift_row_rol_16:
+ .byte 0x0a, 0x07, 0x00, 0x0d, 0x0e, 0x0b, 0x04, 0x01
+ .byte 0x02, 0x0f, 0x08, 0x05, 0x06, 0x03, 0x0c, 0x09
+
+/* Inverse shift row + Rotate left by 24 bits on 32-bit words with vpshufb */
+.Linv_shift_row_rol_24:
+ .byte 0x0d, 0x0a, 0x07, 0x00, 0x01, 0x0e, 0x0b, 0x04
+ .byte 0x05, 0x02, 0x0f, 0x08, 0x09, 0x06, 0x03, 0x0c
+
+/* For CTR-mode IV byteswap */
+.Lbswap128_mask:
+ .byte 15, 14, 13, 12, 11, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1, 0
+
+/* For input word byte-swap */
+.Lbswap32_mask:
+ .byte 3, 2, 1, 0, 7, 6, 5, 4, 11, 10, 9, 8, 15, 14, 13, 12
+
+.align 4
+/* 4-bit mask */
+.L0f0f0f0f:
+ .long 0x0f0f0f0f
+
+
+.text
+.align 16
+
+/*
+ * void sm4_aesni_avx_crypt4(const u32 *rk, u8 *dst,
+ * const u8 *src, int nblocks)
+ */
+.align 8
+SYM_FUNC_START(sm4_aesni_avx_crypt4)
+ /* input:
+ * %rdi: round key array, CTX
+ * %rsi: dst (1..4 blocks)
+ * %rdx: src (1..4 blocks)
+ * %rcx: num blocks (1..4)
+ */
+ FRAME_BEGIN
+
+ vmovdqu 0*16(%rdx), RA0;
+ vmovdqa RA0, RA1;
+ vmovdqa RA0, RA2;
+ vmovdqa RA0, RA3;
+ cmpq $2, %rcx;
+ jb .Lblk4_load_input_done;
+ vmovdqu 1*16(%rdx), RA1;
+ je .Lblk4_load_input_done;
+ vmovdqu 2*16(%rdx), RA2;
+ cmpq $3, %rcx;
+ je .Lblk4_load_input_done;
+ vmovdqu 3*16(%rdx), RA3;
+
+.Lblk4_load_input_done:
+
+ vmovdqa .Lbswap32_mask rRIP, RTMP2;
+ vpshufb RTMP2, RA0, RA0;
+ vpshufb RTMP2, RA1, RA1;
+ vpshufb RTMP2, RA2, RA2;
+ vpshufb RTMP2, RA3, RA3;
+
+ vbroadcastss .L0f0f0f0f rRIP, MASK_4BIT;
+ vmovdqa .Lpre_tf_lo_s rRIP, RTMP4;
+ vmovdqa .Lpre_tf_hi_s rRIP, RB0;
+ vmovdqa .Lpost_tf_lo_s rRIP, RB1;
+ vmovdqa .Lpost_tf_hi_s rRIP, RB2;
+ vmovdqa .Linv_shift_row rRIP, RB3;
+ vmovdqa .Linv_shift_row_rol_8 rRIP, RTMP2;
+ vmovdqa .Linv_shift_row_rol_16 rRIP, RTMP3;
+ transpose_4x4(RA0, RA1, RA2, RA3, RTMP0, RTMP1);
+
+#define ROUND(round, s0, s1, s2, s3) \
+ vbroadcastss (4*(round))(%rdi), RX0; \
+ vpxor s1, RX0, RX0; \
+ vpxor s2, RX0, RX0; \
+ vpxor s3, RX0, RX0; /* s1 ^ s2 ^ s3 ^ rk */ \
+ \
+ /* sbox, non-linear part */ \
+ transform_pre(RX0, RTMP4, RB0, MASK_4BIT, RTMP0); \
+ vaesenclast MASK_4BIT, RX0, RX0; \
+ transform_post(RX0, RB1, RB2, MASK_4BIT, RTMP0); \
+ \
+ /* linear part */ \
+ vpshufb RB3, RX0, RTMP0; \
+ vpxor RTMP0, s0, s0; /* s0 ^ x */ \
+ vpshufb RTMP2, RX0, RTMP1; \
+ vpxor RTMP1, RTMP0, RTMP0; /* x ^ rol(x,8) */ \
+ vpshufb RTMP3, RX0, RTMP1; \
+ vpxor RTMP1, RTMP0, RTMP0; /* x ^ rol(x,8) ^ rol(x,16) */ \
+ vpshufb .Linv_shift_row_rol_24 rRIP, RX0, RTMP1; \
+ vpxor RTMP1, s0, s0; /* s0 ^ x ^ rol(x,24) */ \
+ vpslld $2, RTMP0, RTMP1; \
+ vpsrld $30, RTMP0, RTMP0; \
+ vpxor RTMP0, s0, s0; \
+ /* s0 ^ x ^ rol(x,2) ^ rol(x,10) ^ rol(x,18) ^ rol(x,24) */ \
+ vpxor RTMP1, s0, s0;
+
+ leaq (32*4)(%rdi), %rax;
+.align 16
+.Lroundloop_blk4:
+ ROUND(0, RA0, RA1, RA2, RA3);
+ ROUND(1, RA1, RA2, RA3, RA0);
+ ROUND(2, RA2, RA3, RA0, RA1);
+ ROUND(3, RA3, RA0, RA1, RA2);
+ leaq (4*4)(%rdi), %rdi;
+ cmpq %rax, %rdi;
+ jne .Lroundloop_blk4;
+
+#undef ROUND
+
+ vmovdqa .Lbswap128_mask rRIP, RTMP2;
+
+ transpose_4x4(RA0, RA1, RA2, RA3, RTMP0, RTMP1);
+ vpshufb RTMP2, RA0, RA0;
+ vpshufb RTMP2, RA1, RA1;
+ vpshufb RTMP2, RA2, RA2;
+ vpshufb RTMP2, RA3, RA3;
+
+ vmovdqu RA0, 0*16(%rsi);
+ cmpq $2, %rcx;
+ jb .Lblk4_store_output_done;
+ vmovdqu RA1, 1*16(%rsi);
+ je .Lblk4_store_output_done;
+ vmovdqu RA2, 2*16(%rsi);
+ cmpq $3, %rcx;
+ je .Lblk4_store_output_done;
+ vmovdqu RA3, 3*16(%rsi);
+
+.Lblk4_store_output_done:
+ vzeroall;
+ FRAME_END
+ ret;
+SYM_FUNC_END(sm4_aesni_avx_crypt4)
+
+.align 8
+SYM_FUNC_START_LOCAL(__sm4_crypt_blk8)
+ /* input:
+ * %rdi: round key array, CTX
+ * RA0, RA1, RA2, RA3, RB0, RB1, RB2, RB3: eight parallel
+ * plaintext blocks
+ * output:
+ * RA0, RA1, RA2, RA3, RB0, RB1, RB2, RB3: eight parallel
+ * ciphertext blocks
+ */
+ FRAME_BEGIN
+
+ vmovdqa .Lbswap32_mask rRIP, RTMP2;
+ vpshufb RTMP2, RA0, RA0;
+ vpshufb RTMP2, RA1, RA1;
+ vpshufb RTMP2, RA2, RA2;
+ vpshufb RTMP2, RA3, RA3;
+ vpshufb RTMP2, RB0, RB0;
+ vpshufb RTMP2, RB1, RB1;
+ vpshufb RTMP2, RB2, RB2;
+ vpshufb RTMP2, RB3, RB3;
+
+ vbroadcastss .L0f0f0f0f rRIP, MASK_4BIT;
+ transpose_4x4(RA0, RA1, RA2, RA3, RTMP0, RTMP1);
+ transpose_4x4(RB0, RB1, RB2, RB3, RTMP0, RTMP1);
+
+#define ROUND(round, s0, s1, s2, s3, r0, r1, r2, r3) \
+ vbroadcastss (4*(round))(%rdi), RX0; \
+ vmovdqa .Lpre_tf_lo_s rRIP, RTMP4; \
+ vmovdqa .Lpre_tf_hi_s rRIP, RTMP1; \
+ vmovdqa RX0, RX1; \
+ vpxor s1, RX0, RX0; \
+ vpxor s2, RX0, RX0; \
+ vpxor s3, RX0, RX0; /* s1 ^ s2 ^ s3 ^ rk */ \
+ vmovdqa .Lpost_tf_lo_s rRIP, RTMP2; \
+ vmovdqa .Lpost_tf_hi_s rRIP, RTMP3; \
+ vpxor r1, RX1, RX1; \
+ vpxor r2, RX1, RX1; \
+ vpxor r3, RX1, RX1; /* r1 ^ r2 ^ r3 ^ rk */ \
+ \
+ /* sbox, non-linear part */ \
+ transform_pre(RX0, RTMP4, RTMP1, MASK_4BIT, RTMP0); \
+ transform_pre(RX1, RTMP4, RTMP1, MASK_4BIT, RTMP0); \
+ vmovdqa .Linv_shift_row rRIP, RTMP4; \
+ vaesenclast MASK_4BIT, RX0, RX0; \
+ vaesenclast MASK_4BIT, RX1, RX1; \
+ transform_post(RX0, RTMP2, RTMP3, MASK_4BIT, RTMP0); \
+ transform_post(RX1, RTMP2, RTMP3, MASK_4BIT, RTMP0); \
+ \
+ /* linear part */ \
+ vpshufb RTMP4, RX0, RTMP0; \
+ vpxor RTMP0, s0, s0; /* s0 ^ x */ \
+ vpshufb RTMP4, RX1, RTMP2; \
+ vmovdqa .Linv_shift_row_rol_8 rRIP, RTMP4; \
+ vpxor RTMP2, r0, r0; /* r0 ^ x */ \
+ vpshufb RTMP4, RX0, RTMP1; \
+ vpxor RTMP1, RTMP0, RTMP0; /* x ^ rol(x,8) */ \
+ vpshufb RTMP4, RX1, RTMP3; \
+ vmovdqa .Linv_shift_row_rol_16 rRIP, RTMP4; \
+ vpxor RTMP3, RTMP2, RTMP2; /* x ^ rol(x,8) */ \
+ vpshufb RTMP4, RX0, RTMP1; \
+ vpxor RTMP1, RTMP0, RTMP0; /* x ^ rol(x,8) ^ rol(x,16) */ \
+ vpshufb RTMP4, RX1, RTMP3; \
+ vmovdqa .Linv_shift_row_rol_24 rRIP, RTMP4; \
+ vpxor RTMP3, RTMP2, RTMP2; /* x ^ rol(x,8) ^ rol(x,16) */ \
+ vpshufb RTMP4, RX0, RTMP1; \
+ vpxor RTMP1, s0, s0; /* s0 ^ x ^ rol(x,24) */ \
+ /* s0 ^ x ^ rol(x,2) ^ rol(x,10) ^ rol(x,18) ^ rol(x,24) */ \
+ vpslld $2, RTMP0, RTMP1; \
+ vpsrld $30, RTMP0, RTMP0; \
+ vpxor RTMP0, s0, s0; \
+ vpxor RTMP1, s0, s0; \
+ vpshufb RTMP4, RX1, RTMP3; \
+ vpxor RTMP3, r0, r0; /* r0 ^ x ^ rol(x,24) */ \
+ /* r0 ^ x ^ rol(x,2) ^ rol(x,10) ^ rol(x,18) ^ rol(x,24) */ \
+ vpslld $2, RTMP2, RTMP3; \
+ vpsrld $30, RTMP2, RTMP2; \
+ vpxor RTMP2, r0, r0; \
+ vpxor RTMP3, r0, r0;
+
+ leaq (32*4)(%rdi), %rax;
+.align 16
+.Lroundloop_blk8:
+ ROUND(0, RA0, RA1, RA2, RA3, RB0, RB1, RB2, RB3);
+ ROUND(1, RA1, RA2, RA3, RA0, RB1, RB2, RB3, RB0);
+ ROUND(2, RA2, RA3, RA0, RA1, RB2, RB3, RB0, RB1);
+ ROUND(3, RA3, RA0, RA1, RA2, RB3, RB0, RB1, RB2);
+ leaq (4*4)(%rdi), %rdi;
+ cmpq %rax, %rdi;
+ jne .Lroundloop_blk8;
+
+#undef ROUND
+
+ vmovdqa .Lbswap128_mask rRIP, RTMP2;
+
+ transpose_4x4(RA0, RA1, RA2, RA3, RTMP0, RTMP1);
+ transpose_4x4(RB0, RB1, RB2, RB3, RTMP0, RTMP1);
+ vpshufb RTMP2, RA0, RA0;
+ vpshufb RTMP2, RA1, RA1;
+ vpshufb RTMP2, RA2, RA2;
+ vpshufb RTMP2, RA3, RA3;
+ vpshufb RTMP2, RB0, RB0;
+ vpshufb RTMP2, RB1, RB1;
+ vpshufb RTMP2, RB2, RB2;
+ vpshufb RTMP2, RB3, RB3;
+
+ FRAME_END
+ ret;
+SYM_FUNC_END(__sm4_crypt_blk8)
+
+/*
+ * void sm4_aesni_avx_crypt8(const u32 *rk, u8 *dst,
+ * const u8 *src, int nblocks)
+ */
+.align 8
+SYM_FUNC_START(sm4_aesni_avx_crypt8)
+ /* input:
+ * %rdi: round key array, CTX
+ * %rsi: dst (1..8 blocks)
+ * %rdx: src (1..8 blocks)
+ * %rcx: num blocks (1..8)
+ */
+ FRAME_BEGIN
+
+ cmpq $5, %rcx;
+ jb sm4_aesni_avx_crypt4;
+ vmovdqu (0 * 16)(%rdx), RA0;
+ vmovdqu (1 * 16)(%rdx), RA1;
+ vmovdqu (2 * 16)(%rdx), RA2;
+ vmovdqu (3 * 16)(%rdx), RA3;
+ vmovdqu (4 * 16)(%rdx), RB0;
+ vmovdqa RB0, RB1;
+ vmovdqa RB0, RB2;
+ vmovdqa RB0, RB3;
+ je .Lblk8_load_input_done;
+ vmovdqu (5 * 16)(%rdx), RB1;
+ cmpq $7, %rcx;
+ jb .Lblk8_load_input_done;
+ vmovdqu (6 * 16)(%rdx), RB2;
+ je .Lblk8_load_input_done;
+ vmovdqu (7 * 16)(%rdx), RB3;
+
+.Lblk8_load_input_done:
+ call __sm4_crypt_blk8;
+
+ cmpq $6, %rcx;
+ vmovdqu RA0, (0 * 16)(%rsi);
+ vmovdqu RA1, (1 * 16)(%rsi);
+ vmovdqu RA2, (2 * 16)(%rsi);
+ vmovdqu RA3, (3 * 16)(%rsi);
+ vmovdqu RB0, (4 * 16)(%rsi);
+ jb .Lblk8_store_output_done;
+ vmovdqu RB1, (5 * 16)(%rsi);
+ je .Lblk8_store_output_done;
+ vmovdqu RB2, (6 * 16)(%rsi);
+ cmpq $7, %rcx;
+ je .Lblk8_store_output_done;
+ vmovdqu RB3, (7 * 16)(%rsi);
+
+.Lblk8_store_output_done:
+ vzeroall;
+ FRAME_END
+ ret;
+SYM_FUNC_END(sm4_aesni_avx_crypt8)
+
+/*
+ * void sm4_aesni_avx_ctr_enc_blk8(const u32 *rk, u8 *dst,
+ * const u8 *src, u8 *iv)
+ */
+.align 8
+SYM_FUNC_START(sm4_aesni_avx_ctr_enc_blk8)
+ /* input:
+ * %rdi: round key array, CTX
+ * %rsi: dst (8 blocks)
+ * %rdx: src (8 blocks)
+ * %rcx: iv (big endian, 128bit)
+ */
+ FRAME_BEGIN
+
+ /* load IV and byteswap */
+ vmovdqu (%rcx), RA0;
+
+ vmovdqa .Lbswap128_mask rRIP, RBSWAP;
+ vpshufb RBSWAP, RA0, RTMP0; /* be => le */
+
+ vpcmpeqd RNOT, RNOT, RNOT;
+ vpsrldq $8, RNOT, RNOT; /* low: -1, high: 0 */
+
+#define inc_le128(x, minus_one, tmp) \
+ vpcmpeqq minus_one, x, tmp; \
+ vpsubq minus_one, x, x; \
+ vpslldq $8, tmp, tmp; \
+ vpsubq tmp, x, x;
+
+ /* construct IVs */
+ inc_le128(RTMP0, RNOT, RTMP2); /* +1 */
+ vpshufb RBSWAP, RTMP0, RA1;
+ inc_le128(RTMP0, RNOT, RTMP2); /* +2 */
+ vpshufb RBSWAP, RTMP0, RA2;
+ inc_le128(RTMP0, RNOT, RTMP2); /* +3 */
+ vpshufb RBSWAP, RTMP0, RA3;
+ inc_le128(RTMP0, RNOT, RTMP2); /* +4 */
+ vpshufb RBSWAP, RTMP0, RB0;
+ inc_le128(RTMP0, RNOT, RTMP2); /* +5 */
+ vpshufb RBSWAP, RTMP0, RB1;
+ inc_le128(RTMP0, RNOT, RTMP2); /* +6 */
+ vpshufb RBSWAP, RTMP0, RB2;
+ inc_le128(RTMP0, RNOT, RTMP2); /* +7 */
+ vpshufb RBSWAP, RTMP0, RB3;
+ inc_le128(RTMP0, RNOT, RTMP2); /* +8 */
+ vpshufb RBSWAP, RTMP0, RTMP1;
+
+ /* store new IV */
+ vmovdqu RTMP1, (%rcx);
+
+ call __sm4_crypt_blk8;
+
+ vpxor (0 * 16)(%rdx), RA0, RA0;
+ vpxor (1 * 16)(%rdx), RA1, RA1;
+ vpxor (2 * 16)(%rdx), RA2, RA2;
+ vpxor (3 * 16)(%rdx), RA3, RA3;
+ vpxor (4 * 16)(%rdx), RB0, RB0;
+ vpxor (5 * 16)(%rdx), RB1, RB1;
+ vpxor (6 * 16)(%rdx), RB2, RB2;
+ vpxor (7 * 16)(%rdx), RB3, RB3;
+
+ vmovdqu RA0, (0 * 16)(%rsi);
+ vmovdqu RA1, (1 * 16)(%rsi);
+ vmovdqu RA2, (2 * 16)(%rsi);
+ vmovdqu RA3, (3 * 16)(%rsi);
+ vmovdqu RB0, (4 * 16)(%rsi);
+ vmovdqu RB1, (5 * 16)(%rsi);
+ vmovdqu RB2, (6 * 16)(%rsi);
+ vmovdqu RB3, (7 * 16)(%rsi);
+
+ vzeroall;
+ FRAME_END
+ ret;
+SYM_FUNC_END(sm4_aesni_avx_ctr_enc_blk8)
+
+/*
+ * void sm4_aesni_avx_cbc_dec_blk8(const u32 *rk, u8 *dst,
+ * const u8 *src, u8 *iv)
+ */
+.align 8
+SYM_FUNC_START(sm4_aesni_avx_cbc_dec_blk8)
+ /* input:
+ * %rdi: round key array, CTX
+ * %rsi: dst (8 blocks)
+ * %rdx: src (8 blocks)
+ * %rcx: iv
+ */
+ FRAME_BEGIN
+
+ vmovdqu (0 * 16)(%rdx), RA0;
+ vmovdqu (1 * 16)(%rdx), RA1;
+ vmovdqu (2 * 16)(%rdx), RA2;
+ vmovdqu (3 * 16)(%rdx), RA3;
+ vmovdqu (4 * 16)(%rdx), RB0;
+ vmovdqu (5 * 16)(%rdx), RB1;
+ vmovdqu (6 * 16)(%rdx), RB2;
+ vmovdqu (7 * 16)(%rdx), RB3;
+
+ call __sm4_crypt_blk8;
+
+ vmovdqu (7 * 16)(%rdx), RNOT;
+ vpxor (%rcx), RA0, RA0;
+ vpxor (0 * 16)(%rdx), RA1, RA1;
+ vpxor (1 * 16)(%rdx), RA2, RA2;
+ vpxor (2 * 16)(%rdx), RA3, RA3;
+ vpxor (3 * 16)(%rdx), RB0, RB0;
+ vpxor (4 * 16)(%rdx), RB1, RB1;
+ vpxor (5 * 16)(%rdx), RB2, RB2;
+ vpxor (6 * 16)(%rdx), RB3, RB3;
+ vmovdqu RNOT, (%rcx); /* store new IV */
+
+ vmovdqu RA0, (0 * 16)(%rsi);
+ vmovdqu RA1, (1 * 16)(%rsi);
+ vmovdqu RA2, (2 * 16)(%rsi);
+ vmovdqu RA3, (3 * 16)(%rsi);
+ vmovdqu RB0, (4 * 16)(%rsi);
+ vmovdqu RB1, (5 * 16)(%rsi);
+ vmovdqu RB2, (6 * 16)(%rsi);
+ vmovdqu RB3, (7 * 16)(%rsi);
+
+ vzeroall;
+ FRAME_END
+ ret;
+SYM_FUNC_END(sm4_aesni_avx_cbc_dec_blk8)
+
+/*
+ * void sm4_aesni_avx_cfb_dec_blk8(const u32 *rk, u8 *dst,
+ * const u8 *src, u8 *iv)
+ */
+.align 8
+SYM_FUNC_START(sm4_aesni_avx_cfb_dec_blk8)
+ /* input:
+ * %rdi: round key array, CTX
+ * %rsi: dst (8 blocks)
+ * %rdx: src (8 blocks)
+ * %rcx: iv
+ */
+ FRAME_BEGIN
+
+ /* Load input */
+ vmovdqu (%rcx), RA0;
+ vmovdqu 0 * 16(%rdx), RA1;
+ vmovdqu 1 * 16(%rdx), RA2;
+ vmovdqu 2 * 16(%rdx), RA3;
+ vmovdqu 3 * 16(%rdx), RB0;
+ vmovdqu 4 * 16(%rdx), RB1;
+ vmovdqu 5 * 16(%rdx), RB2;
+ vmovdqu 6 * 16(%rdx), RB3;
+
+ /* Update IV */
+ vmovdqu 7 * 16(%rdx), RNOT;
+ vmovdqu RNOT, (%rcx);
+
+ call __sm4_crypt_blk8;
+
+ vpxor (0 * 16)(%rdx), RA0, RA0;
+ vpxor (1 * 16)(%rdx), RA1, RA1;
+ vpxor (2 * 16)(%rdx), RA2, RA2;
+ vpxor (3 * 16)(%rdx), RA3, RA3;
+ vpxor (4 * 16)(%rdx), RB0, RB0;
+ vpxor (5 * 16)(%rdx), RB1, RB1;
+ vpxor (6 * 16)(%rdx), RB2, RB2;
+ vpxor (7 * 16)(%rdx), RB3, RB3;
+
+ vmovdqu RA0, (0 * 16)(%rsi);
+ vmovdqu RA1, (1 * 16)(%rsi);
+ vmovdqu RA2, (2 * 16)(%rsi);
+ vmovdqu RA3, (3 * 16)(%rsi);
+ vmovdqu RB0, (4 * 16)(%rsi);
+ vmovdqu RB1, (5 * 16)(%rsi);
+ vmovdqu RB2, (6 * 16)(%rsi);
+ vmovdqu RB3, (7 * 16)(%rsi);
+
+ vzeroall;
+ FRAME_END
+ ret;
+SYM_FUNC_END(sm4_aesni_avx_cfb_dec_blk8)
diff --git a/arch/x86/crypto/sm4-aesni-avx2-asm_64.S b/arch/x86/crypto/sm4-aesni-avx2-asm_64.S
new file mode 100644
index 000000000000..d2ffd7f76ee2
--- /dev/null
+++ b/arch/x86/crypto/sm4-aesni-avx2-asm_64.S
@@ -0,0 +1,497 @@
+// SPDX-License-Identifier: GPL-2.0-or-later
+/*
+ * SM4 Cipher Algorithm, AES-NI/AVX2 optimized.
+ * as specified in
+ * https://tools.ietf.org/id/draft-ribose-cfrg-sm4-10.html
+ *
+ * Copyright (C) 2018 Markku-Juhani O. Saarinen <mjos@iki.fi>
+ * Copyright (C) 2020 Jussi Kivilinna <jussi.kivilinna@iki.fi>
+ * Copyright (c) 2021 Tianjia Zhang <tianjia.zhang@linux.alibaba.com>
+ */
+
+/* Based on SM4 AES-NI work by libgcrypt and Markku-Juhani O. Saarinen at:
+ * https://github.com/mjosaarinen/sm4ni
+ */
+
+#include <linux/linkage.h>
+#include <asm/frame.h>
+
+#define rRIP (%rip)
+
+/* vector registers */
+#define RX0 %ymm0
+#define RX1 %ymm1
+#define MASK_4BIT %ymm2
+#define RTMP0 %ymm3
+#define RTMP1 %ymm4
+#define RTMP2 %ymm5
+#define RTMP3 %ymm6
+#define RTMP4 %ymm7
+
+#define RA0 %ymm8
+#define RA1 %ymm9
+#define RA2 %ymm10
+#define RA3 %ymm11
+
+#define RB0 %ymm12
+#define RB1 %ymm13
+#define RB2 %ymm14
+#define RB3 %ymm15
+
+#define RNOT %ymm0
+#define RBSWAP %ymm1
+
+#define RX0x %xmm0
+#define RX1x %xmm1
+#define MASK_4BITx %xmm2
+
+#define RNOTx %xmm0
+#define RBSWAPx %xmm1
+
+#define RTMP0x %xmm3
+#define RTMP1x %xmm4
+#define RTMP2x %xmm5
+#define RTMP3x %xmm6
+#define RTMP4x %xmm7
+
+
+/* helper macros */
+
+/* Transpose four 32-bit words between 128-bit vector lanes. */
+#define transpose_4x4(x0, x1, x2, x3, t1, t2) \
+ vpunpckhdq x1, x0, t2; \
+ vpunpckldq x1, x0, x0; \
+ \
+ vpunpckldq x3, x2, t1; \
+ vpunpckhdq x3, x2, x2; \
+ \
+ vpunpckhqdq t1, x0, x1; \
+ vpunpcklqdq t1, x0, x0; \
+ \
+ vpunpckhqdq x2, t2, x3; \
+ vpunpcklqdq x2, t2, x2;
+
+/* post-SubByte transform. */
+#define transform_pre(x, lo_t, hi_t, mask4bit, tmp0) \
+ vpand x, mask4bit, tmp0; \
+ vpandn x, mask4bit, x; \
+ vpsrld $4, x, x; \
+ \
+ vpshufb tmp0, lo_t, tmp0; \
+ vpshufb x, hi_t, x; \
+ vpxor tmp0, x, x;
+
+/* post-SubByte transform. Note: x has been XOR'ed with mask4bit by
+ * 'vaeslastenc' instruction. */
+#define transform_post(x, lo_t, hi_t, mask4bit, tmp0) \
+ vpandn mask4bit, x, tmp0; \
+ vpsrld $4, x, x; \
+ vpand x, mask4bit, x; \
+ \
+ vpshufb tmp0, lo_t, tmp0; \
+ vpshufb x, hi_t, x; \
+ vpxor tmp0, x, x;
+
+
+.section .rodata.cst164, "aM", @progbits, 164
+.align 16
+
+/*
+ * Following four affine transform look-up tables are from work by
+ * Markku-Juhani O. Saarinen, at https://github.com/mjosaarinen/sm4ni
+ *
+ * These allow exposing SM4 S-Box from AES SubByte.
+ */
+
+/* pre-SubByte affine transform, from SM4 field to AES field. */
+.Lpre_tf_lo_s:
+ .quad 0x9197E2E474720701, 0xC7C1B4B222245157
+.Lpre_tf_hi_s:
+ .quad 0xE240AB09EB49A200, 0xF052B91BF95BB012
+
+/* post-SubByte affine transform, from AES field to SM4 field. */
+.Lpost_tf_lo_s:
+ .quad 0x5B67F2CEA19D0834, 0xEDD14478172BBE82
+.Lpost_tf_hi_s:
+ .quad 0xAE7201DD73AFDC00, 0x11CDBE62CC1063BF
+
+/* For isolating SubBytes from AESENCLAST, inverse shift row */
+.Linv_shift_row:
+ .byte 0x00, 0x0d, 0x0a, 0x07, 0x04, 0x01, 0x0e, 0x0b
+ .byte 0x08, 0x05, 0x02, 0x0f, 0x0c, 0x09, 0x06, 0x03
+
+/* Inverse shift row + Rotate left by 8 bits on 32-bit words with vpshufb */
+.Linv_shift_row_rol_8:
+ .byte 0x07, 0x00, 0x0d, 0x0a, 0x0b, 0x04, 0x01, 0x0e
+ .byte 0x0f, 0x08, 0x05, 0x02, 0x03, 0x0c, 0x09, 0x06
+
+/* Inverse shift row + Rotate left by 16 bits on 32-bit words with vpshufb */
+.Linv_shift_row_rol_16:
+ .byte 0x0a, 0x07, 0x00, 0x0d, 0x0e, 0x0b, 0x04, 0x01
+ .byte 0x02, 0x0f, 0x08, 0x05, 0x06, 0x03, 0x0c, 0x09
+
+/* Inverse shift row + Rotate left by 24 bits on 32-bit words with vpshufb */
+.Linv_shift_row_rol_24:
+ .byte 0x0d, 0x0a, 0x07, 0x00, 0x01, 0x0e, 0x0b, 0x04
+ .byte 0x05, 0x02, 0x0f, 0x08, 0x09, 0x06, 0x03, 0x0c
+
+/* For CTR-mode IV byteswap */
+.Lbswap128_mask:
+ .byte 15, 14, 13, 12, 11, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1, 0
+
+/* For input word byte-swap */
+.Lbswap32_mask:
+ .byte 3, 2, 1, 0, 7, 6, 5, 4, 11, 10, 9, 8, 15, 14, 13, 12
+
+.align 4
+/* 4-bit mask */
+.L0f0f0f0f:
+ .long 0x0f0f0f0f
+
+.text
+.align 16
+
+.align 8
+SYM_FUNC_START_LOCAL(__sm4_crypt_blk16)
+ /* input:
+ * %rdi: round key array, CTX
+ * RA0, RA1, RA2, RA3, RB0, RB1, RB2, RB3: sixteen parallel
+ * plaintext blocks
+ * output:
+ * RA0, RA1, RA2, RA3, RB0, RB1, RB2, RB3: sixteen parallel
+ * ciphertext blocks
+ */
+ FRAME_BEGIN
+
+ vbroadcasti128 .Lbswap32_mask rRIP, RTMP2;
+ vpshufb RTMP2, RA0, RA0;
+ vpshufb RTMP2, RA1, RA1;
+ vpshufb RTMP2, RA2, RA2;
+ vpshufb RTMP2, RA3, RA3;
+ vpshufb RTMP2, RB0, RB0;
+ vpshufb RTMP2, RB1, RB1;
+ vpshufb RTMP2, RB2, RB2;
+ vpshufb RTMP2, RB3, RB3;
+
+ vpbroadcastd .L0f0f0f0f rRIP, MASK_4BIT;
+ transpose_4x4(RA0, RA1, RA2, RA3, RTMP0, RTMP1);
+ transpose_4x4(RB0, RB1, RB2, RB3, RTMP0, RTMP1);
+
+#define ROUND(round, s0, s1, s2, s3, r0, r1, r2, r3) \
+ vpbroadcastd (4*(round))(%rdi), RX0; \
+ vbroadcasti128 .Lpre_tf_lo_s rRIP, RTMP4; \
+ vbroadcasti128 .Lpre_tf_hi_s rRIP, RTMP1; \
+ vmovdqa RX0, RX1; \
+ vpxor s1, RX0, RX0; \
+ vpxor s2, RX0, RX0; \
+ vpxor s3, RX0, RX0; /* s1 ^ s2 ^ s3 ^ rk */ \
+ vbroadcasti128 .Lpost_tf_lo_s rRIP, RTMP2; \
+ vbroadcasti128 .Lpost_tf_hi_s rRIP, RTMP3; \
+ vpxor r1, RX1, RX1; \
+ vpxor r2, RX1, RX1; \
+ vpxor r3, RX1, RX1; /* r1 ^ r2 ^ r3 ^ rk */ \
+ \
+ /* sbox, non-linear part */ \
+ transform_pre(RX0, RTMP4, RTMP1, MASK_4BIT, RTMP0); \
+ transform_pre(RX1, RTMP4, RTMP1, MASK_4BIT, RTMP0); \
+ vextracti128 $1, RX0, RTMP4x; \
+ vextracti128 $1, RX1, RTMP0x; \
+ vaesenclast MASK_4BITx, RX0x, RX0x; \
+ vaesenclast MASK_4BITx, RTMP4x, RTMP4x; \
+ vaesenclast MASK_4BITx, RX1x, RX1x; \
+ vaesenclast MASK_4BITx, RTMP0x, RTMP0x; \
+ vinserti128 $1, RTMP4x, RX0, RX0; \
+ vbroadcasti128 .Linv_shift_row rRIP, RTMP4; \
+ vinserti128 $1, RTMP0x, RX1, RX1; \
+ transform_post(RX0, RTMP2, RTMP3, MASK_4BIT, RTMP0); \
+ transform_post(RX1, RTMP2, RTMP3, MASK_4BIT, RTMP0); \
+ \
+ /* linear part */ \
+ vpshufb RTMP4, RX0, RTMP0; \
+ vpxor RTMP0, s0, s0; /* s0 ^ x */ \
+ vpshufb RTMP4, RX1, RTMP2; \
+ vbroadcasti128 .Linv_shift_row_rol_8 rRIP, RTMP4; \
+ vpxor RTMP2, r0, r0; /* r0 ^ x */ \
+ vpshufb RTMP4, RX0, RTMP1; \
+ vpxor RTMP1, RTMP0, RTMP0; /* x ^ rol(x,8) */ \
+ vpshufb RTMP4, RX1, RTMP3; \
+ vbroadcasti128 .Linv_shift_row_rol_16 rRIP, RTMP4; \
+ vpxor RTMP3, RTMP2, RTMP2; /* x ^ rol(x,8) */ \
+ vpshufb RTMP4, RX0, RTMP1; \
+ vpxor RTMP1, RTMP0, RTMP0; /* x ^ rol(x,8) ^ rol(x,16) */ \
+ vpshufb RTMP4, RX1, RTMP3; \
+ vbroadcasti128 .Linv_shift_row_rol_24 rRIP, RTMP4; \
+ vpxor RTMP3, RTMP2, RTMP2; /* x ^ rol(x,8) ^ rol(x,16) */ \
+ vpshufb RTMP4, RX0, RTMP1; \
+ vpxor RTMP1, s0, s0; /* s0 ^ x ^ rol(x,24) */ \
+ vpslld $2, RTMP0, RTMP1; \
+ vpsrld $30, RTMP0, RTMP0; \
+ vpxor RTMP0, s0, s0; \
+ /* s0 ^ x ^ rol(x,2) ^ rol(x,10) ^ rol(x,18) ^ rol(x,24) */ \
+ vpxor RTMP1, s0, s0; \
+ vpshufb RTMP4, RX1, RTMP3; \
+ vpxor RTMP3, r0, r0; /* r0 ^ x ^ rol(x,24) */ \
+ vpslld $2, RTMP2, RTMP3; \
+ vpsrld $30, RTMP2, RTMP2; \
+ vpxor RTMP2, r0, r0; \
+ /* r0 ^ x ^ rol(x,2) ^ rol(x,10) ^ rol(x,18) ^ rol(x,24) */ \
+ vpxor RTMP3, r0, r0;
+
+ leaq (32*4)(%rdi), %rax;
+.align 16
+.Lroundloop_blk8:
+ ROUND(0, RA0, RA1, RA2, RA3, RB0, RB1, RB2, RB3);
+ ROUND(1, RA1, RA2, RA3, RA0, RB1, RB2, RB3, RB0);
+ ROUND(2, RA2, RA3, RA0, RA1, RB2, RB3, RB0, RB1);
+ ROUND(3, RA3, RA0, RA1, RA2, RB3, RB0, RB1, RB2);
+ leaq (4*4)(%rdi), %rdi;
+ cmpq %rax, %rdi;
+ jne .Lroundloop_blk8;
+
+#undef ROUND
+
+ vbroadcasti128 .Lbswap128_mask rRIP, RTMP2;
+
+ transpose_4x4(RA0, RA1, RA2, RA3, RTMP0, RTMP1);
+ transpose_4x4(RB0, RB1, RB2, RB3, RTMP0, RTMP1);
+ vpshufb RTMP2, RA0, RA0;
+ vpshufb RTMP2, RA1, RA1;
+ vpshufb RTMP2, RA2, RA2;
+ vpshufb RTMP2, RA3, RA3;
+ vpshufb RTMP2, RB0, RB0;
+ vpshufb RTMP2, RB1, RB1;
+ vpshufb RTMP2, RB2, RB2;
+ vpshufb RTMP2, RB3, RB3;
+
+ FRAME_END
+ ret;
+SYM_FUNC_END(__sm4_crypt_blk16)
+
+#define inc_le128(x, minus_one, tmp) \
+ vpcmpeqq minus_one, x, tmp; \
+ vpsubq minus_one, x, x; \
+ vpslldq $8, tmp, tmp; \
+ vpsubq tmp, x, x;
+
+/*
+ * void sm4_aesni_avx2_ctr_enc_blk16(const u32 *rk, u8 *dst,
+ * const u8 *src, u8 *iv)
+ */
+.align 8
+SYM_FUNC_START(sm4_aesni_avx2_ctr_enc_blk16)
+ /* input:
+ * %rdi: round key array, CTX
+ * %rsi: dst (16 blocks)
+ * %rdx: src (16 blocks)
+ * %rcx: iv (big endian, 128bit)
+ */
+ FRAME_BEGIN
+
+ movq 8(%rcx), %rax;
+ bswapq %rax;
+
+ vzeroupper;
+
+ vbroadcasti128 .Lbswap128_mask rRIP, RTMP3;
+ vpcmpeqd RNOT, RNOT, RNOT;
+ vpsrldq $8, RNOT, RNOT; /* ab: -1:0 ; cd: -1:0 */
+ vpaddq RNOT, RNOT, RTMP2; /* ab: -2:0 ; cd: -2:0 */
+
+ /* load IV and byteswap */
+ vmovdqu (%rcx), RTMP4x;
+ vpshufb RTMP3x, RTMP4x, RTMP4x;
+ vmovdqa RTMP4x, RTMP0x;
+ inc_le128(RTMP4x, RNOTx, RTMP1x);
+ vinserti128 $1, RTMP4x, RTMP0, RTMP0;
+ vpshufb RTMP3, RTMP0, RA0; /* +1 ; +0 */
+
+ /* check need for handling 64-bit overflow and carry */
+ cmpq $(0xffffffffffffffff - 16), %rax;
+ ja .Lhandle_ctr_carry;
+
+ /* construct IVs */
+ vpsubq RTMP2, RTMP0, RTMP0; /* +3 ; +2 */
+ vpshufb RTMP3, RTMP0, RA1;
+ vpsubq RTMP2, RTMP0, RTMP0; /* +5 ; +4 */
+ vpshufb RTMP3, RTMP0, RA2;
+ vpsubq RTMP2, RTMP0, RTMP0; /* +7 ; +6 */
+ vpshufb RTMP3, RTMP0, RA3;
+ vpsubq RTMP2, RTMP0, RTMP0; /* +9 ; +8 */
+ vpshufb RTMP3, RTMP0, RB0;
+ vpsubq RTMP2, RTMP0, RTMP0; /* +11 ; +10 */
+ vpshufb RTMP3, RTMP0, RB1;
+ vpsubq RTMP2, RTMP0, RTMP0; /* +13 ; +12 */
+ vpshufb RTMP3, RTMP0, RB2;
+ vpsubq RTMP2, RTMP0, RTMP0; /* +15 ; +14 */
+ vpshufb RTMP3, RTMP0, RB3;
+ vpsubq RTMP2, RTMP0, RTMP0; /* +16 */
+ vpshufb RTMP3x, RTMP0x, RTMP0x;
+
+ jmp .Lctr_carry_done;
+
+.Lhandle_ctr_carry:
+ /* construct IVs */
+ inc_le128(RTMP0, RNOT, RTMP1);
+ inc_le128(RTMP0, RNOT, RTMP1);
+ vpshufb RTMP3, RTMP0, RA1; /* +3 ; +2 */
+ inc_le128(RTMP0, RNOT, RTMP1);
+ inc_le128(RTMP0, RNOT, RTMP1);
+ vpshufb RTMP3, RTMP0, RA2; /* +5 ; +4 */
+ inc_le128(RTMP0, RNOT, RTMP1);
+ inc_le128(RTMP0, RNOT, RTMP1);
+ vpshufb RTMP3, RTMP0, RA3; /* +7 ; +6 */
+ inc_le128(RTMP0, RNOT, RTMP1);
+ inc_le128(RTMP0, RNOT, RTMP1);
+ vpshufb RTMP3, RTMP0, RB0; /* +9 ; +8 */
+ inc_le128(RTMP0, RNOT, RTMP1);
+ inc_le128(RTMP0, RNOT, RTMP1);
+ vpshufb RTMP3, RTMP0, RB1; /* +11 ; +10 */
+ inc_le128(RTMP0, RNOT, RTMP1);
+ inc_le128(RTMP0, RNOT, RTMP1);
+ vpshufb RTMP3, RTMP0, RB2; /* +13 ; +12 */
+ inc_le128(RTMP0, RNOT, RTMP1);
+ inc_le128(RTMP0, RNOT, RTMP1);
+ vpshufb RTMP3, RTMP0, RB3; /* +15 ; +14 */
+ inc_le128(RTMP0, RNOT, RTMP1);
+ vextracti128 $1, RTMP0, RTMP0x;
+ vpshufb RTMP3x, RTMP0x, RTMP0x; /* +16 */
+
+.align 4
+.Lctr_carry_done:
+ /* store new IV */
+ vmovdqu RTMP0x, (%rcx);
+
+ call __sm4_crypt_blk16;
+
+ vpxor (0 * 32)(%rdx), RA0, RA0;
+ vpxor (1 * 32)(%rdx), RA1, RA1;
+ vpxor (2 * 32)(%rdx), RA2, RA2;
+ vpxor (3 * 32)(%rdx), RA3, RA3;
+ vpxor (4 * 32)(%rdx), RB0, RB0;
+ vpxor (5 * 32)(%rdx), RB1, RB1;
+ vpxor (6 * 32)(%rdx), RB2, RB2;
+ vpxor (7 * 32)(%rdx), RB3, RB3;
+
+ vmovdqu RA0, (0 * 32)(%rsi);
+ vmovdqu RA1, (1 * 32)(%rsi);
+ vmovdqu RA2, (2 * 32)(%rsi);
+ vmovdqu RA3, (3 * 32)(%rsi);
+ vmovdqu RB0, (4 * 32)(%rsi);
+ vmovdqu RB1, (5 * 32)(%rsi);
+ vmovdqu RB2, (6 * 32)(%rsi);
+ vmovdqu RB3, (7 * 32)(%rsi);
+
+ vzeroall;
+ FRAME_END
+ ret;
+SYM_FUNC_END(sm4_aesni_avx2_ctr_enc_blk16)
+
+/*
+ * void sm4_aesni_avx2_cbc_dec_blk16(const u32 *rk, u8 *dst,
+ * const u8 *src, u8 *iv)
+ */
+.align 8
+SYM_FUNC_START(sm4_aesni_avx2_cbc_dec_blk16)
+ /* input:
+ * %rdi: round key array, CTX
+ * %rsi: dst (16 blocks)
+ * %rdx: src (16 blocks)
+ * %rcx: iv
+ */
+ FRAME_BEGIN
+
+ vzeroupper;
+
+ vmovdqu (0 * 32)(%rdx), RA0;
+ vmovdqu (1 * 32)(%rdx), RA1;
+ vmovdqu (2 * 32)(%rdx), RA2;
+ vmovdqu (3 * 32)(%rdx), RA3;
+ vmovdqu (4 * 32)(%rdx), RB0;
+ vmovdqu (5 * 32)(%rdx), RB1;
+ vmovdqu (6 * 32)(%rdx), RB2;
+ vmovdqu (7 * 32)(%rdx), RB3;
+
+ call __sm4_crypt_blk16;
+
+ vmovdqu (%rcx), RNOTx;
+ vinserti128 $1, (%rdx), RNOT, RNOT;
+ vpxor RNOT, RA0, RA0;
+ vpxor (0 * 32 + 16)(%rdx), RA1, RA1;
+ vpxor (1 * 32 + 16)(%rdx), RA2, RA2;
+ vpxor (2 * 32 + 16)(%rdx), RA3, RA3;
+ vpxor (3 * 32 + 16)(%rdx), RB0, RB0;
+ vpxor (4 * 32 + 16)(%rdx), RB1, RB1;
+ vpxor (5 * 32 + 16)(%rdx), RB2, RB2;
+ vpxor (6 * 32 + 16)(%rdx), RB3, RB3;
+ vmovdqu (7 * 32 + 16)(%rdx), RNOTx;
+ vmovdqu RNOTx, (%rcx); /* store new IV */
+
+ vmovdqu RA0, (0 * 32)(%rsi);
+ vmovdqu RA1, (1 * 32)(%rsi);
+ vmovdqu RA2, (2 * 32)(%rsi);
+ vmovdqu RA3, (3 * 32)(%rsi);
+ vmovdqu RB0, (4 * 32)(%rsi);
+ vmovdqu RB1, (5 * 32)(%rsi);
+ vmovdqu RB2, (6 * 32)(%rsi);
+ vmovdqu RB3, (7 * 32)(%rsi);
+
+ vzeroall;
+ FRAME_END
+ ret;
+SYM_FUNC_END(sm4_aesni_avx2_cbc_dec_blk16)
+
+/*
+ * void sm4_aesni_avx2_cfb_dec_blk16(const u32 *rk, u8 *dst,
+ * const u8 *src, u8 *iv)
+ */
+.align 8
+SYM_FUNC_START(sm4_aesni_avx2_cfb_dec_blk16)
+ /* input:
+ * %rdi: round key array, CTX
+ * %rsi: dst (16 blocks)
+ * %rdx: src (16 blocks)
+ * %rcx: iv
+ */
+ FRAME_BEGIN
+
+ vzeroupper;
+
+ /* Load input */
+ vmovdqu (%rcx), RNOTx;
+ vinserti128 $1, (%rdx), RNOT, RA0;
+ vmovdqu (0 * 32 + 16)(%rdx), RA1;
+ vmovdqu (1 * 32 + 16)(%rdx), RA2;
+ vmovdqu (2 * 32 + 16)(%rdx), RA3;
+ vmovdqu (3 * 32 + 16)(%rdx), RB0;
+ vmovdqu (4 * 32 + 16)(%rdx), RB1;
+ vmovdqu (5 * 32 + 16)(%rdx), RB2;
+ vmovdqu (6 * 32 + 16)(%rdx), RB3;
+
+ /* Update IV */
+ vmovdqu (7 * 32 + 16)(%rdx), RNOTx;
+ vmovdqu RNOTx, (%rcx);
+
+ call __sm4_crypt_blk16;
+
+ vpxor (0 * 32)(%rdx), RA0, RA0;
+ vpxor (1 * 32)(%rdx), RA1, RA1;
+ vpxor (2 * 32)(%rdx), RA2, RA2;
+ vpxor (3 * 32)(%rdx), RA3, RA3;
+ vpxor (4 * 32)(%rdx), RB0, RB0;
+ vpxor (5 * 32)(%rdx), RB1, RB1;
+ vpxor (6 * 32)(%rdx), RB2, RB2;
+ vpxor (7 * 32)(%rdx), RB3, RB3;
+
+ vmovdqu RA0, (0 * 32)(%rsi);
+ vmovdqu RA1, (1 * 32)(%rsi);
+ vmovdqu RA2, (2 * 32)(%rsi);
+ vmovdqu RA3, (3 * 32)(%rsi);
+ vmovdqu RB0, (4 * 32)(%rsi);
+ vmovdqu RB1, (5 * 32)(%rsi);
+ vmovdqu RB2, (6 * 32)(%rsi);
+ vmovdqu RB3, (7 * 32)(%rsi);
+
+ vzeroall;
+ FRAME_END
+ ret;
+SYM_FUNC_END(sm4_aesni_avx2_cfb_dec_blk16)
diff --git a/arch/x86/crypto/sm4-avx.h b/arch/x86/crypto/sm4-avx.h
new file mode 100644
index 000000000000..1bceab7516aa
--- /dev/null
+++ b/arch/x86/crypto/sm4-avx.h
@@ -0,0 +1,24 @@
+/* SPDX-License-Identifier: GPL-2.0-or-later */
+#ifndef ASM_X86_SM4_AVX_H
+#define ASM_X86_SM4_AVX_H
+
+#include <linux/types.h>
+#include <crypto/sm4.h>
+
+typedef void (*sm4_crypt_func)(const u32 *rk, u8 *dst, const u8 *src, u8 *iv);
+
+int sm4_avx_ecb_encrypt(struct skcipher_request *req);
+int sm4_avx_ecb_decrypt(struct skcipher_request *req);
+
+int sm4_cbc_encrypt(struct skcipher_request *req);
+int sm4_avx_cbc_decrypt(struct skcipher_request *req,
+ unsigned int bsize, sm4_crypt_func func);
+
+int sm4_cfb_encrypt(struct skcipher_request *req);
+int sm4_avx_cfb_decrypt(struct skcipher_request *req,
+ unsigned int bsize, sm4_crypt_func func);
+
+int sm4_avx_ctr_crypt(struct skcipher_request *req,
+ unsigned int bsize, sm4_crypt_func func);
+
+#endif
diff --git a/arch/x86/crypto/sm4_aesni_avx2_glue.c b/arch/x86/crypto/sm4_aesni_avx2_glue.c
new file mode 100644
index 000000000000..84bc718f49a3
--- /dev/null
+++ b/arch/x86/crypto/sm4_aesni_avx2_glue.c
@@ -0,0 +1,169 @@
+/* SPDX-License-Identifier: GPL-2.0-or-later */
+/*
+ * SM4 Cipher Algorithm, AES-NI/AVX2 optimized.
+ * as specified in
+ * https://tools.ietf.org/id/draft-ribose-cfrg-sm4-10.html
+ *
+ * Copyright (c) 2021, Alibaba Group.
+ * Copyright (c) 2021 Tianjia Zhang <tianjia.zhang@linux.alibaba.com>
+ */
+
+#include <linux/module.h>
+#include <linux/crypto.h>
+#include <linux/kernel.h>
+#include <asm/simd.h>
+#include <crypto/internal/simd.h>
+#include <crypto/internal/skcipher.h>
+#include <crypto/sm4.h>
+#include "sm4-avx.h"
+
+#define SM4_CRYPT16_BLOCK_SIZE (SM4_BLOCK_SIZE * 16)
+
+asmlinkage void sm4_aesni_avx2_ctr_enc_blk16(const u32 *rk, u8 *dst,
+ const u8 *src, u8 *iv);
+asmlinkage void sm4_aesni_avx2_cbc_dec_blk16(const u32 *rk, u8 *dst,
+ const u8 *src, u8 *iv);
+asmlinkage void sm4_aesni_avx2_cfb_dec_blk16(const u32 *rk, u8 *dst,
+ const u8 *src, u8 *iv);
+
+static int sm4_skcipher_setkey(struct crypto_skcipher *tfm, const u8 *key,
+ unsigned int key_len)
+{
+ struct sm4_ctx *ctx = crypto_skcipher_ctx(tfm);
+
+ return sm4_expandkey(ctx, key, key_len);
+}
+
+static int cbc_decrypt(struct skcipher_request *req)
+{
+ return sm4_avx_cbc_decrypt(req, SM4_CRYPT16_BLOCK_SIZE,
+ sm4_aesni_avx2_cbc_dec_blk16);
+}
+
+
+static int cfb_decrypt(struct skcipher_request *req)
+{
+ return sm4_avx_cfb_decrypt(req, SM4_CRYPT16_BLOCK_SIZE,
+ sm4_aesni_avx2_cfb_dec_blk16);
+}
+
+static int ctr_crypt(struct skcipher_request *req)
+{
+ return sm4_avx_ctr_crypt(req, SM4_CRYPT16_BLOCK_SIZE,
+ sm4_aesni_avx2_ctr_enc_blk16);
+}
+
+static struct skcipher_alg sm4_aesni_avx2_skciphers[] = {
+ {
+ .base = {
+ .cra_name = "__ecb(sm4)",
+ .cra_driver_name = "__ecb-sm4-aesni-avx2",
+ .cra_priority = 500,
+ .cra_flags = CRYPTO_ALG_INTERNAL,
+ .cra_blocksize = SM4_BLOCK_SIZE,
+ .cra_ctxsize = sizeof(struct sm4_ctx),
+ .cra_module = THIS_MODULE,
+ },
+ .min_keysize = SM4_KEY_SIZE,
+ .max_keysize = SM4_KEY_SIZE,
+ .walksize = 16 * SM4_BLOCK_SIZE,
+ .setkey = sm4_skcipher_setkey,
+ .encrypt = sm4_avx_ecb_encrypt,
+ .decrypt = sm4_avx_ecb_decrypt,
+ }, {
+ .base = {
+ .cra_name = "__cbc(sm4)",
+ .cra_driver_name = "__cbc-sm4-aesni-avx2",
+ .cra_priority = 500,
+ .cra_flags = CRYPTO_ALG_INTERNAL,
+ .cra_blocksize = SM4_BLOCK_SIZE,
+ .cra_ctxsize = sizeof(struct sm4_ctx),
+ .cra_module = THIS_MODULE,
+ },
+ .min_keysize = SM4_KEY_SIZE,
+ .max_keysize = SM4_KEY_SIZE,
+ .ivsize = SM4_BLOCK_SIZE,
+ .walksize = 16 * SM4_BLOCK_SIZE,
+ .setkey = sm4_skcipher_setkey,
+ .encrypt = sm4_cbc_encrypt,
+ .decrypt = cbc_decrypt,
+ }, {
+ .base = {
+ .cra_name = "__cfb(sm4)",
+ .cra_driver_name = "__cfb-sm4-aesni-avx2",
+ .cra_priority = 500,
+ .cra_flags = CRYPTO_ALG_INTERNAL,
+ .cra_blocksize = 1,
+ .cra_ctxsize = sizeof(struct sm4_ctx),
+ .cra_module = THIS_MODULE,
+ },
+ .min_keysize = SM4_KEY_SIZE,
+ .max_keysize = SM4_KEY_SIZE,
+ .ivsize = SM4_BLOCK_SIZE,
+ .chunksize = SM4_BLOCK_SIZE,
+ .walksize = 16 * SM4_BLOCK_SIZE,
+ .setkey = sm4_skcipher_setkey,
+ .encrypt = sm4_cfb_encrypt,
+ .decrypt = cfb_decrypt,
+ }, {
+ .base = {
+ .cra_name = "__ctr(sm4)",
+ .cra_driver_name = "__ctr-sm4-aesni-avx2",
+ .cra_priority = 500,
+ .cra_flags = CRYPTO_ALG_INTERNAL,
+ .cra_blocksize = 1,
+ .cra_ctxsize = sizeof(struct sm4_ctx),
+ .cra_module = THIS_MODULE,
+ },
+ .min_keysize = SM4_KEY_SIZE,
+ .max_keysize = SM4_KEY_SIZE,
+ .ivsize = SM4_BLOCK_SIZE,
+ .chunksize = SM4_BLOCK_SIZE,
+ .walksize = 16 * SM4_BLOCK_SIZE,
+ .setkey = sm4_skcipher_setkey,
+ .encrypt = ctr_crypt,
+ .decrypt = ctr_crypt,
+ }
+};
+
+static struct simd_skcipher_alg *
+simd_sm4_aesni_avx2_skciphers[ARRAY_SIZE(sm4_aesni_avx2_skciphers)];
+
+static int __init sm4_init(void)
+{
+ const char *feature_name;
+
+ if (!boot_cpu_has(X86_FEATURE_AVX) ||
+ !boot_cpu_has(X86_FEATURE_AVX2) ||
+ !boot_cpu_has(X86_FEATURE_AES) ||
+ !boot_cpu_has(X86_FEATURE_OSXSAVE)) {
+ pr_info("AVX2 or AES-NI instructions are not detected.\n");
+ return -ENODEV;
+ }
+
+ if (!cpu_has_xfeatures(XFEATURE_MASK_SSE | XFEATURE_MASK_YMM,
+ &feature_name)) {
+ pr_info("CPU feature '%s' is not supported.\n", feature_name);
+ return -ENODEV;
+ }
+
+ return simd_register_skciphers_compat(sm4_aesni_avx2_skciphers,
+ ARRAY_SIZE(sm4_aesni_avx2_skciphers),
+ simd_sm4_aesni_avx2_skciphers);
+}
+
+static void __exit sm4_exit(void)
+{
+ simd_unregister_skciphers(sm4_aesni_avx2_skciphers,
+ ARRAY_SIZE(sm4_aesni_avx2_skciphers),
+ simd_sm4_aesni_avx2_skciphers);
+}
+
+module_init(sm4_init);
+module_exit(sm4_exit);
+
+MODULE_LICENSE("GPL v2");
+MODULE_AUTHOR("Tianjia Zhang <tianjia.zhang@linux.alibaba.com>");
+MODULE_DESCRIPTION("SM4 Cipher Algorithm, AES-NI/AVX2 optimized");
+MODULE_ALIAS_CRYPTO("sm4");
+MODULE_ALIAS_CRYPTO("sm4-aesni-avx2");
diff --git a/arch/x86/crypto/sm4_aesni_avx_glue.c b/arch/x86/crypto/sm4_aesni_avx_glue.c
new file mode 100644
index 000000000000..7800f77d68ad
--- /dev/null
+++ b/arch/x86/crypto/sm4_aesni_avx_glue.c
@@ -0,0 +1,487 @@
+/* SPDX-License-Identifier: GPL-2.0-or-later */
+/*
+ * SM4 Cipher Algorithm, AES-NI/AVX optimized.
+ * as specified in
+ * https://tools.ietf.org/id/draft-ribose-cfrg-sm4-10.html
+ *
+ * Copyright (c) 2021, Alibaba Group.
+ * Copyright (c) 2021 Tianjia Zhang <tianjia.zhang@linux.alibaba.com>
+ */
+
+#include <linux/module.h>
+#include <linux/crypto.h>
+#include <linux/kernel.h>
+#include <asm/simd.h>
+#include <crypto/internal/simd.h>
+#include <crypto/internal/skcipher.h>
+#include <crypto/sm4.h>
+#include "sm4-avx.h"
+
+#define SM4_CRYPT8_BLOCK_SIZE (SM4_BLOCK_SIZE * 8)
+
+asmlinkage void sm4_aesni_avx_crypt4(const u32 *rk, u8 *dst,
+ const u8 *src, int nblocks);
+asmlinkage void sm4_aesni_avx_crypt8(const u32 *rk, u8 *dst,
+ const u8 *src, int nblocks);
+asmlinkage void sm4_aesni_avx_ctr_enc_blk8(const u32 *rk, u8 *dst,
+ const u8 *src, u8 *iv);
+asmlinkage void sm4_aesni_avx_cbc_dec_blk8(const u32 *rk, u8 *dst,
+ const u8 *src, u8 *iv);
+asmlinkage void sm4_aesni_avx_cfb_dec_blk8(const u32 *rk, u8 *dst,
+ const u8 *src, u8 *iv);
+
+static int sm4_skcipher_setkey(struct crypto_skcipher *tfm, const u8 *key,
+ unsigned int key_len)
+{
+ struct sm4_ctx *ctx = crypto_skcipher_ctx(tfm);
+
+ return sm4_expandkey(ctx, key, key_len);
+}
+
+static int ecb_do_crypt(struct skcipher_request *req, const u32 *rkey)
+{
+ struct skcipher_walk walk;
+ unsigned int nbytes;
+ int err;
+
+ err = skcipher_walk_virt(&walk, req, false);
+
+ while ((nbytes = walk.nbytes) > 0) {
+ const u8 *src = walk.src.virt.addr;
+ u8 *dst = walk.dst.virt.addr;
+
+ kernel_fpu_begin();
+ while (nbytes >= SM4_CRYPT8_BLOCK_SIZE) {
+ sm4_aesni_avx_crypt8(rkey, dst, src, 8);
+ dst += SM4_CRYPT8_BLOCK_SIZE;
+ src += SM4_CRYPT8_BLOCK_SIZE;
+ nbytes -= SM4_CRYPT8_BLOCK_SIZE;
+ }
+ while (nbytes >= SM4_BLOCK_SIZE) {
+ unsigned int nblocks = min(nbytes >> 4, 4u);
+ sm4_aesni_avx_crypt4(rkey, dst, src, nblocks);
+ dst += nblocks * SM4_BLOCK_SIZE;
+ src += nblocks * SM4_BLOCK_SIZE;
+ nbytes -= nblocks * SM4_BLOCK_SIZE;
+ }
+ kernel_fpu_end();
+
+ err = skcipher_walk_done(&walk, nbytes);
+ }
+
+ return err;
+}
+
+int sm4_avx_ecb_encrypt(struct skcipher_request *req)
+{
+ struct crypto_skcipher *tfm = crypto_skcipher_reqtfm(req);
+ struct sm4_ctx *ctx = crypto_skcipher_ctx(tfm);
+
+ return ecb_do_crypt(req, ctx->rkey_enc);
+}
+EXPORT_SYMBOL_GPL(sm4_avx_ecb_encrypt);
+
+int sm4_avx_ecb_decrypt(struct skcipher_request *req)
+{
+ struct crypto_skcipher *tfm = crypto_skcipher_reqtfm(req);
+ struct sm4_ctx *ctx = crypto_skcipher_ctx(tfm);
+
+ return ecb_do_crypt(req, ctx->rkey_dec);
+}
+EXPORT_SYMBOL_GPL(sm4_avx_ecb_decrypt);
+
+int sm4_cbc_encrypt(struct skcipher_request *req)
+{
+ struct crypto_skcipher *tfm = crypto_skcipher_reqtfm(req);
+ struct sm4_ctx *ctx = crypto_skcipher_ctx(tfm);
+ struct skcipher_walk walk;
+ unsigned int nbytes;
+ int err;
+
+ err = skcipher_walk_virt(&walk, req, false);
+
+ while ((nbytes = walk.nbytes) > 0) {
+ const u8 *iv = walk.iv;
+ const u8 *src = walk.src.virt.addr;
+ u8 *dst = walk.dst.virt.addr;
+
+ while (nbytes >= SM4_BLOCK_SIZE) {
+ crypto_xor_cpy(dst, src, iv, SM4_BLOCK_SIZE);
+ sm4_crypt_block(ctx->rkey_enc, dst, dst);
+ iv = dst;
+ src += SM4_BLOCK_SIZE;
+ dst += SM4_BLOCK_SIZE;
+ nbytes -= SM4_BLOCK_SIZE;
+ }
+ if (iv != walk.iv)
+ memcpy(walk.iv, iv, SM4_BLOCK_SIZE);
+
+ err = skcipher_walk_done(&walk, nbytes);
+ }
+
+ return err;
+}
+EXPORT_SYMBOL_GPL(sm4_cbc_encrypt);
+
+int sm4_avx_cbc_decrypt(struct skcipher_request *req,
+ unsigned int bsize, sm4_crypt_func func)
+{
+ struct crypto_skcipher *tfm = crypto_skcipher_reqtfm(req);
+ struct sm4_ctx *ctx = crypto_skcipher_ctx(tfm);
+ struct skcipher_walk walk;
+ unsigned int nbytes;
+ int err;
+
+ err = skcipher_walk_virt(&walk, req, false);
+
+ while ((nbytes = walk.nbytes) > 0) {
+ const u8 *src = walk.src.virt.addr;
+ u8 *dst = walk.dst.virt.addr;
+
+ kernel_fpu_begin();
+
+ while (nbytes >= bsize) {
+ func(ctx->rkey_dec, dst, src, walk.iv);
+ dst += bsize;
+ src += bsize;
+ nbytes -= bsize;
+ }
+
+ while (nbytes >= SM4_BLOCK_SIZE) {
+ u8 keystream[SM4_BLOCK_SIZE * 8];
+ u8 iv[SM4_BLOCK_SIZE];
+ unsigned int nblocks = min(nbytes >> 4, 8u);
+ int i;
+
+ sm4_aesni_avx_crypt8(ctx->rkey_dec, keystream,
+ src, nblocks);
+
+ src += ((int)nblocks - 2) * SM4_BLOCK_SIZE;
+ dst += (nblocks - 1) * SM4_BLOCK_SIZE;
+ memcpy(iv, src + SM4_BLOCK_SIZE, SM4_BLOCK_SIZE);
+
+ for (i = nblocks - 1; i > 0; i--) {
+ crypto_xor_cpy(dst, src,
+ &keystream[i * SM4_BLOCK_SIZE],
+ SM4_BLOCK_SIZE);
+ src -= SM4_BLOCK_SIZE;
+ dst -= SM4_BLOCK_SIZE;
+ }
+ crypto_xor_cpy(dst, walk.iv, keystream, SM4_BLOCK_SIZE);
+ memcpy(walk.iv, iv, SM4_BLOCK_SIZE);
+ dst += nblocks * SM4_BLOCK_SIZE;
+ src += (nblocks + 1) * SM4_BLOCK_SIZE;
+ nbytes -= nblocks * SM4_BLOCK_SIZE;
+ }
+
+ kernel_fpu_end();
+ err = skcipher_walk_done(&walk, nbytes);
+ }
+
+ return err;
+}
+EXPORT_SYMBOL_GPL(sm4_avx_cbc_decrypt);
+
+static int cbc_decrypt(struct skcipher_request *req)
+{
+ return sm4_avx_cbc_decrypt(req, SM4_CRYPT8_BLOCK_SIZE,
+ sm4_aesni_avx_cbc_dec_blk8);
+}
+
+int sm4_cfb_encrypt(struct skcipher_request *req)
+{
+ struct crypto_skcipher *tfm = crypto_skcipher_reqtfm(req);
+ struct sm4_ctx *ctx = crypto_skcipher_ctx(tfm);
+ struct skcipher_walk walk;
+ unsigned int nbytes;
+ int err;
+
+ err = skcipher_walk_virt(&walk, req, false);
+
+ while ((nbytes = walk.nbytes) > 0) {
+ u8 keystream[SM4_BLOCK_SIZE];
+ const u8 *iv = walk.iv;
+ const u8 *src = walk.src.virt.addr;
+ u8 *dst = walk.dst.virt.addr;
+
+ while (nbytes >= SM4_BLOCK_SIZE) {
+ sm4_crypt_block(ctx->rkey_enc, keystream, iv);
+ crypto_xor_cpy(dst, src, keystream, SM4_BLOCK_SIZE);
+ iv = dst;
+ src += SM4_BLOCK_SIZE;
+ dst += SM4_BLOCK_SIZE;
+ nbytes -= SM4_BLOCK_SIZE;
+ }
+ if (iv != walk.iv)
+ memcpy(walk.iv, iv, SM4_BLOCK_SIZE);
+
+ /* tail */
+ if (walk.nbytes == walk.total && nbytes > 0) {
+ sm4_crypt_block(ctx->rkey_enc, keystream, walk.iv);
+ crypto_xor_cpy(dst, src, keystream, nbytes);
+ nbytes = 0;
+ }
+
+ err = skcipher_walk_done(&walk, nbytes);
+ }
+
+ return err;
+}
+EXPORT_SYMBOL_GPL(sm4_cfb_encrypt);
+
+int sm4_avx_cfb_decrypt(struct skcipher_request *req,
+ unsigned int bsize, sm4_crypt_func func)
+{
+ struct crypto_skcipher *tfm = crypto_skcipher_reqtfm(req);
+ struct sm4_ctx *ctx = crypto_skcipher_ctx(tfm);
+ struct skcipher_walk walk;
+ unsigned int nbytes;
+ int err;
+
+ err = skcipher_walk_virt(&walk, req, false);
+
+ while ((nbytes = walk.nbytes) > 0) {
+ const u8 *src = walk.src.virt.addr;
+ u8 *dst = walk.dst.virt.addr;
+
+ kernel_fpu_begin();
+
+ while (nbytes >= bsize) {
+ func(ctx->rkey_enc, dst, src, walk.iv);
+ dst += bsize;
+ src += bsize;
+ nbytes -= bsize;
+ }
+
+ while (nbytes >= SM4_BLOCK_SIZE) {
+ u8 keystream[SM4_BLOCK_SIZE * 8];
+ unsigned int nblocks = min(nbytes >> 4, 8u);
+
+ memcpy(keystream, walk.iv, SM4_BLOCK_SIZE);
+ if (nblocks > 1)
+ memcpy(&keystream[SM4_BLOCK_SIZE], src,
+ (nblocks - 1) * SM4_BLOCK_SIZE);
+ memcpy(walk.iv, src + (nblocks - 1) * SM4_BLOCK_SIZE,
+ SM4_BLOCK_SIZE);
+
+ sm4_aesni_avx_crypt8(ctx->rkey_enc, keystream,
+ keystream, nblocks);
+
+ crypto_xor_cpy(dst, src, keystream,
+ nblocks * SM4_BLOCK_SIZE);
+ dst += nblocks * SM4_BLOCK_SIZE;
+ src += nblocks * SM4_BLOCK_SIZE;
+ nbytes -= nblocks * SM4_BLOCK_SIZE;
+ }
+
+ kernel_fpu_end();
+
+ /* tail */
+ if (walk.nbytes == walk.total && nbytes > 0) {
+ u8 keystream[SM4_BLOCK_SIZE];
+
+ sm4_crypt_block(ctx->rkey_enc, keystream, walk.iv);
+ crypto_xor_cpy(dst, src, keystream, nbytes);
+ nbytes = 0;
+ }
+
+ err = skcipher_walk_done(&walk, nbytes);
+ }
+
+ return err;
+}
+EXPORT_SYMBOL_GPL(sm4_avx_cfb_decrypt);
+
+static int cfb_decrypt(struct skcipher_request *req)
+{
+ return sm4_avx_cfb_decrypt(req, SM4_CRYPT8_BLOCK_SIZE,
+ sm4_aesni_avx_cfb_dec_blk8);
+}
+
+int sm4_avx_ctr_crypt(struct skcipher_request *req,
+ unsigned int bsize, sm4_crypt_func func)
+{
+ struct crypto_skcipher *tfm = crypto_skcipher_reqtfm(req);
+ struct sm4_ctx *ctx = crypto_skcipher_ctx(tfm);
+ struct skcipher_walk walk;
+ unsigned int nbytes;
+ int err;
+
+ err = skcipher_walk_virt(&walk, req, false);
+
+ while ((nbytes = walk.nbytes) > 0) {
+ const u8 *src = walk.src.virt.addr;
+ u8 *dst = walk.dst.virt.addr;
+
+ kernel_fpu_begin();
+
+ while (nbytes >= bsize) {
+ func(ctx->rkey_enc, dst, src, walk.iv);
+ dst += bsize;
+ src += bsize;
+ nbytes -= bsize;
+ }
+
+ while (nbytes >= SM4_BLOCK_SIZE) {
+ u8 keystream[SM4_BLOCK_SIZE * 8];
+ unsigned int nblocks = min(nbytes >> 4, 8u);
+ int i;
+
+ for (i = 0; i < nblocks; i++) {
+ memcpy(&keystream[i * SM4_BLOCK_SIZE],
+ walk.iv, SM4_BLOCK_SIZE);
+ crypto_inc(walk.iv, SM4_BLOCK_SIZE);
+ }
+ sm4_aesni_avx_crypt8(ctx->rkey_enc, keystream,
+ keystream, nblocks);
+
+ crypto_xor_cpy(dst, src, keystream,
+ nblocks * SM4_BLOCK_SIZE);
+ dst += nblocks * SM4_BLOCK_SIZE;
+ src += nblocks * SM4_BLOCK_SIZE;
+ nbytes -= nblocks * SM4_BLOCK_SIZE;
+ }
+
+ kernel_fpu_end();
+
+ /* tail */
+ if (walk.nbytes == walk.total && nbytes > 0) {
+ u8 keystream[SM4_BLOCK_SIZE];
+
+ memcpy(keystream, walk.iv, SM4_BLOCK_SIZE);
+ crypto_inc(walk.iv, SM4_BLOCK_SIZE);
+
+ sm4_crypt_block(ctx->rkey_enc, keystream, keystream);
+
+ crypto_xor_cpy(dst, src, keystream, nbytes);
+ dst += nbytes;
+ src += nbytes;
+ nbytes = 0;
+ }
+
+ err = skcipher_walk_done(&walk, nbytes);
+ }
+
+ return err;
+}
+EXPORT_SYMBOL_GPL(sm4_avx_ctr_crypt);
+
+static int ctr_crypt(struct skcipher_request *req)
+{
+ return sm4_avx_ctr_crypt(req, SM4_CRYPT8_BLOCK_SIZE,
+ sm4_aesni_avx_ctr_enc_blk8);
+}
+
+static struct skcipher_alg sm4_aesni_avx_skciphers[] = {
+ {
+ .base = {
+ .cra_name = "__ecb(sm4)",
+ .cra_driver_name = "__ecb-sm4-aesni-avx",
+ .cra_priority = 400,
+ .cra_flags = CRYPTO_ALG_INTERNAL,
+ .cra_blocksize = SM4_BLOCK_SIZE,
+ .cra_ctxsize = sizeof(struct sm4_ctx),
+ .cra_module = THIS_MODULE,
+ },
+ .min_keysize = SM4_KEY_SIZE,
+ .max_keysize = SM4_KEY_SIZE,
+ .walksize = 8 * SM4_BLOCK_SIZE,
+ .setkey = sm4_skcipher_setkey,
+ .encrypt = sm4_avx_ecb_encrypt,
+ .decrypt = sm4_avx_ecb_decrypt,
+ }, {
+ .base = {
+ .cra_name = "__cbc(sm4)",
+ .cra_driver_name = "__cbc-sm4-aesni-avx",
+ .cra_priority = 400,
+ .cra_flags = CRYPTO_ALG_INTERNAL,
+ .cra_blocksize = SM4_BLOCK_SIZE,
+ .cra_ctxsize = sizeof(struct sm4_ctx),
+ .cra_module = THIS_MODULE,
+ },
+ .min_keysize = SM4_KEY_SIZE,
+ .max_keysize = SM4_KEY_SIZE,
+ .ivsize = SM4_BLOCK_SIZE,
+ .walksize = 8 * SM4_BLOCK_SIZE,
+ .setkey = sm4_skcipher_setkey,
+ .encrypt = sm4_cbc_encrypt,
+ .decrypt = cbc_decrypt,
+ }, {
+ .base = {
+ .cra_name = "__cfb(sm4)",
+ .cra_driver_name = "__cfb-sm4-aesni-avx",
+ .cra_priority = 400,
+ .cra_flags = CRYPTO_ALG_INTERNAL,
+ .cra_blocksize = 1,
+ .cra_ctxsize = sizeof(struct sm4_ctx),
+ .cra_module = THIS_MODULE,
+ },
+ .min_keysize = SM4_KEY_SIZE,
+ .max_keysize = SM4_KEY_SIZE,
+ .ivsize = SM4_BLOCK_SIZE,
+ .chunksize = SM4_BLOCK_SIZE,
+ .walksize = 8 * SM4_BLOCK_SIZE,
+ .setkey = sm4_skcipher_setkey,
+ .encrypt = sm4_cfb_encrypt,
+ .decrypt = cfb_decrypt,
+ }, {
+ .base = {
+ .cra_name = "__ctr(sm4)",
+ .cra_driver_name = "__ctr-sm4-aesni-avx",
+ .cra_priority = 400,
+ .cra_flags = CRYPTO_ALG_INTERNAL,
+ .cra_blocksize = 1,
+ .cra_ctxsize = sizeof(struct sm4_ctx),
+ .cra_module = THIS_MODULE,
+ },
+ .min_keysize = SM4_KEY_SIZE,
+ .max_keysize = SM4_KEY_SIZE,
+ .ivsize = SM4_BLOCK_SIZE,
+ .chunksize = SM4_BLOCK_SIZE,
+ .walksize = 8 * SM4_BLOCK_SIZE,
+ .setkey = sm4_skcipher_setkey,
+ .encrypt = ctr_crypt,
+ .decrypt = ctr_crypt,
+ }
+};
+
+static struct simd_skcipher_alg *
+simd_sm4_aesni_avx_skciphers[ARRAY_SIZE(sm4_aesni_avx_skciphers)];
+
+static int __init sm4_init(void)
+{
+ const char *feature_name;
+
+ if (!boot_cpu_has(X86_FEATURE_AVX) ||
+ !boot_cpu_has(X86_FEATURE_AES) ||
+ !boot_cpu_has(X86_FEATURE_OSXSAVE)) {
+ pr_info("AVX or AES-NI instructions are not detected.\n");
+ return -ENODEV;
+ }
+
+ if (!cpu_has_xfeatures(XFEATURE_MASK_SSE | XFEATURE_MASK_YMM,
+ &feature_name)) {
+ pr_info("CPU feature '%s' is not supported.\n", feature_name);
+ return -ENODEV;
+ }
+
+ return simd_register_skciphers_compat(sm4_aesni_avx_skciphers,
+ ARRAY_SIZE(sm4_aesni_avx_skciphers),
+ simd_sm4_aesni_avx_skciphers);
+}
+
+static void __exit sm4_exit(void)
+{
+ simd_unregister_skciphers(sm4_aesni_avx_skciphers,
+ ARRAY_SIZE(sm4_aesni_avx_skciphers),
+ simd_sm4_aesni_avx_skciphers);
+}
+
+module_init(sm4_init);
+module_exit(sm4_exit);
+
+MODULE_LICENSE("GPL v2");
+MODULE_AUTHOR("Tianjia Zhang <tianjia.zhang@linux.alibaba.com>");
+MODULE_DESCRIPTION("SM4 Cipher Algorithm, AES-NI/AVX optimized");
+MODULE_ALIAS_CRYPTO("sm4");
+MODULE_ALIAS_CRYPTO("sm4-aesni-avx");
diff --git a/crypto/Kconfig b/crypto/Kconfig
index 64b772c5d1c9..536df4b6b825 100644
--- a/crypto/Kconfig
+++ b/crypto/Kconfig
@@ -1547,6 +1547,7 @@ config CRYPTO_SERPENT_AVX2_X86_64
config CRYPTO_SM4
tristate "SM4 cipher algorithm"
select CRYPTO_ALGAPI
+ select CRYPTO_LIB_SM4
help
SM4 cipher algorithms (OSCCA GB/T 32907-2016).
@@ -1569,6 +1570,49 @@ config CRYPTO_SM4
If unsure, say N.
+config CRYPTO_SM4_AESNI_AVX_X86_64
+ tristate "SM4 cipher algorithm (x86_64/AES-NI/AVX)"
+ depends on X86 && 64BIT
+ select CRYPTO_SKCIPHER
+ select CRYPTO_SIMD
+ select CRYPTO_ALGAPI
+ select CRYPTO_LIB_SM4
+ help
+ SM4 cipher algorithms (OSCCA GB/T 32907-2016) (x86_64/AES-NI/AVX).
+
+ SM4 (GBT.32907-2016) is a cryptographic standard issued by the
+ Organization of State Commercial Administration of China (OSCCA)
+ as an authorized cryptographic algorithms for the use within China.
+
+ This is SM4 optimized implementation using AES-NI/AVX/x86_64
+ instruction set for block cipher. Through two affine transforms,
+ we can use the AES S-Box to simulate the SM4 S-Box to achieve the
+ effect of instruction acceleration.
+
+ If unsure, say N.
+
+config CRYPTO_SM4_AESNI_AVX2_X86_64
+ tristate "SM4 cipher algorithm (x86_64/AES-NI/AVX2)"
+ depends on X86 && 64BIT
+ select CRYPTO_SKCIPHER
+ select CRYPTO_SIMD
+ select CRYPTO_ALGAPI
+ select CRYPTO_LIB_SM4
+ select CRYPTO_SM4_AESNI_AVX_X86_64
+ help
+ SM4 cipher algorithms (OSCCA GB/T 32907-2016) (x86_64/AES-NI/AVX2).
+
+ SM4 (GBT.32907-2016) is a cryptographic standard issued by the
+ Organization of State Commercial Administration of China (OSCCA)
+ as an authorized cryptographic algorithms for the use within China.
+
+ This is SM4 optimized implementation using AES-NI/AVX2/x86_64
+ instruction set for block cipher. Through two affine transforms,
+ we can use the AES S-Box to simulate the SM4 S-Box to achieve the
+ effect of instruction acceleration.
+
+ If unsure, say N.
+
config CRYPTO_TEA
tristate "TEA, XTEA and XETA cipher algorithms"
depends on CRYPTO_USER_API_ENABLE_OBSOLETE
diff --git a/crypto/Makefile b/crypto/Makefile
index 10526d4559b8..c633f15a0481 100644
--- a/crypto/Makefile
+++ b/crypto/Makefile
@@ -74,7 +74,6 @@ obj-$(CONFIG_CRYPTO_NULL2) += crypto_null.o
obj-$(CONFIG_CRYPTO_MD4) += md4.o
obj-$(CONFIG_CRYPTO_MD5) += md5.o
obj-$(CONFIG_CRYPTO_RMD160) += rmd160.o
-obj-$(CONFIG_CRYPTO_RMD320) += rmd320.o
obj-$(CONFIG_CRYPTO_SHA1) += sha1_generic.o
obj-$(CONFIG_CRYPTO_SHA256) += sha256_generic.o
obj-$(CONFIG_CRYPTO_SHA512) += sha512_generic.o
diff --git a/crypto/ecc.h b/crypto/ecc.h
index a006132646a4..1350e8eb6ac2 100644
--- a/crypto/ecc.h
+++ b/crypto/ecc.h
@@ -27,6 +27,7 @@
#define _CRYPTO_ECC_H
#include <crypto/ecc_curve.h>
+#include <asm/unaligned.h>
/* One digit is u64 qword. */
#define ECC_CURVE_NIST_P192_DIGITS 3
@@ -46,13 +47,13 @@
* @out: Output array
* @ndigits: Number of digits to copy
*/
-static inline void ecc_swap_digits(const u64 *in, u64 *out, unsigned int ndigits)
+static inline void ecc_swap_digits(const void *in, u64 *out, unsigned int ndigits)
{
const __be64 *src = (__force __be64 *)in;
int i;
for (i = 0; i < ndigits; i++)
- out[i] = be64_to_cpu(src[ndigits - 1 - i]);
+ out[i] = get_unaligned_be64(&src[ndigits - 1 - i]);
}
/**
diff --git a/crypto/sha512_generic.c b/crypto/sha512_generic.c
index c72d72ad828e..be70e76d6d86 100644
--- a/crypto/sha512_generic.c
+++ b/crypto/sha512_generic.c
@@ -143,9 +143,6 @@ sha512_transform(u64 *state, const u8 *input)
state[0] += a; state[1] += b; state[2] += c; state[3] += d;
state[4] += e; state[5] += f; state[6] += g; state[7] += h;
-
- /* erase our data */
- a = b = c = d = e = f = g = h = t1 = t2 = 0;
}
static void sha512_generic_block_fn(struct sha512_state *sst, u8 const *src,
diff --git a/crypto/skcipher.c b/crypto/skcipher.c
index a15376245416..418211180cee 100644
--- a/crypto/skcipher.c
+++ b/crypto/skcipher.c
@@ -431,7 +431,7 @@ static int skcipher_copy_iv(struct skcipher_walk *walk)
static int skcipher_walk_first(struct skcipher_walk *walk)
{
- if (WARN_ON_ONCE(in_irq()))
+ if (WARN_ON_ONCE(in_hardirq()))
return -EDEADLK;
walk->buffer = NULL;
diff --git a/crypto/sm4_generic.c b/crypto/sm4_generic.c
index 016dbc595705..4a6480a27fee 100644
--- a/crypto/sm4_generic.c
+++ b/crypto/sm4_generic.c
@@ -16,191 +16,43 @@
#include <asm/byteorder.h>
#include <asm/unaligned.h>
-static const u32 fk[4] = {
- 0xa3b1bac6, 0x56aa3350, 0x677d9197, 0xb27022dc
-};
-
-static const u8 sbox[256] = {
- 0xd6, 0x90, 0xe9, 0xfe, 0xcc, 0xe1, 0x3d, 0xb7,
- 0x16, 0xb6, 0x14, 0xc2, 0x28, 0xfb, 0x2c, 0x05,
- 0x2b, 0x67, 0x9a, 0x76, 0x2a, 0xbe, 0x04, 0xc3,
- 0xaa, 0x44, 0x13, 0x26, 0x49, 0x86, 0x06, 0x99,
- 0x9c, 0x42, 0x50, 0xf4, 0x91, 0xef, 0x98, 0x7a,
- 0x33, 0x54, 0x0b, 0x43, 0xed, 0xcf, 0xac, 0x62,
- 0xe4, 0xb3, 0x1c, 0xa9, 0xc9, 0x08, 0xe8, 0x95,
- 0x80, 0xdf, 0x94, 0xfa, 0x75, 0x8f, 0x3f, 0xa6,
- 0x47, 0x07, 0xa7, 0xfc, 0xf3, 0x73, 0x17, 0xba,
- 0x83, 0x59, 0x3c, 0x19, 0xe6, 0x85, 0x4f, 0xa8,
- 0x68, 0x6b, 0x81, 0xb2, 0x71, 0x64, 0xda, 0x8b,
- 0xf8, 0xeb, 0x0f, 0x4b, 0x70, 0x56, 0x9d, 0x35,
- 0x1e, 0x24, 0x0e, 0x5e, 0x63, 0x58, 0xd1, 0xa2,
- 0x25, 0x22, 0x7c, 0x3b, 0x01, 0x21, 0x78, 0x87,
- 0xd4, 0x00, 0x46, 0x57, 0x9f, 0xd3, 0x27, 0x52,
- 0x4c, 0x36, 0x02, 0xe7, 0xa0, 0xc4, 0xc8, 0x9e,
- 0xea, 0xbf, 0x8a, 0xd2, 0x40, 0xc7, 0x38, 0xb5,
- 0xa3, 0xf7, 0xf2, 0xce, 0xf9, 0x61, 0x15, 0xa1,
- 0xe0, 0xae, 0x5d, 0xa4, 0x9b, 0x34, 0x1a, 0x55,
- 0xad, 0x93, 0x32, 0x30, 0xf5, 0x8c, 0xb1, 0xe3,
- 0x1d, 0xf6, 0xe2, 0x2e, 0x82, 0x66, 0xca, 0x60,
- 0xc0, 0x29, 0x23, 0xab, 0x0d, 0x53, 0x4e, 0x6f,
- 0xd5, 0xdb, 0x37, 0x45, 0xde, 0xfd, 0x8e, 0x2f,
- 0x03, 0xff, 0x6a, 0x72, 0x6d, 0x6c, 0x5b, 0x51,
- 0x8d, 0x1b, 0xaf, 0x92, 0xbb, 0xdd, 0xbc, 0x7f,
- 0x11, 0xd9, 0x5c, 0x41, 0x1f, 0x10, 0x5a, 0xd8,
- 0x0a, 0xc1, 0x31, 0x88, 0xa5, 0xcd, 0x7b, 0xbd,
- 0x2d, 0x74, 0xd0, 0x12, 0xb8, 0xe5, 0xb4, 0xb0,
- 0x89, 0x69, 0x97, 0x4a, 0x0c, 0x96, 0x77, 0x7e,
- 0x65, 0xb9, 0xf1, 0x09, 0xc5, 0x6e, 0xc6, 0x84,
- 0x18, 0xf0, 0x7d, 0xec, 0x3a, 0xdc, 0x4d, 0x20,
- 0x79, 0xee, 0x5f, 0x3e, 0xd7, 0xcb, 0x39, 0x48
-};
-
-static const u32 ck[] = {
- 0x00070e15, 0x1c232a31, 0x383f464d, 0x545b6269,
- 0x70777e85, 0x8c939aa1, 0xa8afb6bd, 0xc4cbd2d9,
- 0xe0e7eef5, 0xfc030a11, 0x181f262d, 0x343b4249,
- 0x50575e65, 0x6c737a81, 0x888f969d, 0xa4abb2b9,
- 0xc0c7ced5, 0xdce3eaf1, 0xf8ff060d, 0x141b2229,
- 0x30373e45, 0x4c535a61, 0x686f767d, 0x848b9299,
- 0xa0a7aeb5, 0xbcc3cad1, 0xd8dfe6ed, 0xf4fb0209,
- 0x10171e25, 0x2c333a41, 0x484f565d, 0x646b7279
-};
-
-static u32 sm4_t_non_lin_sub(u32 x)
-{
- int i;
- u8 *b = (u8 *)&x;
-
- for (i = 0; i < 4; ++i)
- b[i] = sbox[b[i]];
-
- return x;
-}
-
-static u32 sm4_key_lin_sub(u32 x)
-{
- return x ^ rol32(x, 13) ^ rol32(x, 23);
-
-}
-
-static u32 sm4_enc_lin_sub(u32 x)
-{
- return x ^ rol32(x, 2) ^ rol32(x, 10) ^ rol32(x, 18) ^ rol32(x, 24);
-}
-
-static u32 sm4_key_sub(u32 x)
-{
- return sm4_key_lin_sub(sm4_t_non_lin_sub(x));
-}
-
-static u32 sm4_enc_sub(u32 x)
-{
- return sm4_enc_lin_sub(sm4_t_non_lin_sub(x));
-}
-
-static u32 sm4_round(const u32 *x, const u32 rk)
-{
- return x[0] ^ sm4_enc_sub(x[1] ^ x[2] ^ x[3] ^ rk);
-}
-
-
/**
- * crypto_sm4_expand_key - Expands the SM4 key as described in GB/T 32907-2016
- * @ctx: The location where the computed key will be stored.
- * @in_key: The supplied key.
- * @key_len: The length of the supplied key.
- *
- * Returns 0 on success. The function fails only if an invalid key size (or
- * pointer) is supplied.
- */
-int crypto_sm4_expand_key(struct crypto_sm4_ctx *ctx, const u8 *in_key,
- unsigned int key_len)
-{
- u32 rk[4], t;
- const u32 *key = (u32 *)in_key;
- int i;
-
- if (key_len != SM4_KEY_SIZE)
- return -EINVAL;
-
- for (i = 0; i < 4; ++i)
- rk[i] = get_unaligned_be32(&key[i]) ^ fk[i];
-
- for (i = 0; i < 32; ++i) {
- t = rk[0] ^ sm4_key_sub(rk[1] ^ rk[2] ^ rk[3] ^ ck[i]);
- ctx->rkey_enc[i] = t;
- rk[0] = rk[1];
- rk[1] = rk[2];
- rk[2] = rk[3];
- rk[3] = t;
- }
-
- for (i = 0; i < 32; ++i)
- ctx->rkey_dec[i] = ctx->rkey_enc[31 - i];
-
- return 0;
-}
-EXPORT_SYMBOL_GPL(crypto_sm4_expand_key);
-
-/**
- * crypto_sm4_set_key - Set the SM4 key.
+ * sm4_setkey - Set the SM4 key.
* @tfm: The %crypto_tfm that is used in the context.
* @in_key: The input key.
* @key_len: The size of the key.
*
- * This function uses crypto_sm4_expand_key() to expand the key.
- * &crypto_sm4_ctx _must_ be the private data embedded in @tfm which is
+ * This function uses sm4_expandkey() to expand the key.
+ * &sm4_ctx _must_ be the private data embedded in @tfm which is
* retrieved with crypto_tfm_ctx().
*
* Return: 0 on success; -EINVAL on failure (only happens for bad key lengths)
*/
-int crypto_sm4_set_key(struct crypto_tfm *tfm, const u8 *in_key,
+static int sm4_setkey(struct crypto_tfm *tfm, const u8 *in_key,
unsigned int key_len)
{
- struct crypto_sm4_ctx *ctx = crypto_tfm_ctx(tfm);
-
- return crypto_sm4_expand_key(ctx, in_key, key_len);
-}
-EXPORT_SYMBOL_GPL(crypto_sm4_set_key);
-
-static void sm4_do_crypt(const u32 *rk, u32 *out, const u32 *in)
-{
- u32 x[4], i, t;
-
- for (i = 0; i < 4; ++i)
- x[i] = get_unaligned_be32(&in[i]);
-
- for (i = 0; i < 32; ++i) {
- t = sm4_round(x, rk[i]);
- x[0] = x[1];
- x[1] = x[2];
- x[2] = x[3];
- x[3] = t;
- }
+ struct sm4_ctx *ctx = crypto_tfm_ctx(tfm);
- for (i = 0; i < 4; ++i)
- put_unaligned_be32(x[3 - i], &out[i]);
+ return sm4_expandkey(ctx, in_key, key_len);
}
/* encrypt a block of text */
-void crypto_sm4_encrypt(struct crypto_tfm *tfm, u8 *out, const u8 *in)
+static void sm4_encrypt(struct crypto_tfm *tfm, u8 *out, const u8 *in)
{
- const struct crypto_sm4_ctx *ctx = crypto_tfm_ctx(tfm);
+ const struct sm4_ctx *ctx = crypto_tfm_ctx(tfm);
- sm4_do_crypt(ctx->rkey_enc, (u32 *)out, (u32 *)in);
+ sm4_crypt_block(ctx->rkey_enc, out, in);
}
-EXPORT_SYMBOL_GPL(crypto_sm4_encrypt);
/* decrypt a block of text */
-void crypto_sm4_decrypt(struct crypto_tfm *tfm, u8 *out, const u8 *in)
+static void sm4_decrypt(struct crypto_tfm *tfm, u8 *out, const u8 *in)
{
- const struct crypto_sm4_ctx *ctx = crypto_tfm_ctx(tfm);
+ const struct sm4_ctx *ctx = crypto_tfm_ctx(tfm);
- sm4_do_crypt(ctx->rkey_dec, (u32 *)out, (u32 *)in);
+ sm4_crypt_block(ctx->rkey_dec, out, in);
}
-EXPORT_SYMBOL_GPL(crypto_sm4_decrypt);
static struct crypto_alg sm4_alg = {
.cra_name = "sm4",
@@ -208,15 +60,15 @@ static struct crypto_alg sm4_alg = {
.cra_priority = 100,
.cra_flags = CRYPTO_ALG_TYPE_CIPHER,
.cra_blocksize = SM4_BLOCK_SIZE,
- .cra_ctxsize = sizeof(struct crypto_sm4_ctx),
+ .cra_ctxsize = sizeof(struct sm4_ctx),
.cra_module = THIS_MODULE,
.cra_u = {
.cipher = {
.cia_min_keysize = SM4_KEY_SIZE,
.cia_max_keysize = SM4_KEY_SIZE,
- .cia_setkey = crypto_sm4_set_key,
- .cia_encrypt = crypto_sm4_encrypt,
- .cia_decrypt = crypto_sm4_decrypt
+ .cia_setkey = sm4_setkey,
+ .cia_encrypt = sm4_encrypt,
+ .cia_decrypt = sm4_decrypt
}
}
};
diff --git a/crypto/tcrypt.c b/crypto/tcrypt.c
index f8d06da78e4f..82b0400985a5 100644
--- a/crypto/tcrypt.c
+++ b/crypto/tcrypt.c
@@ -77,7 +77,7 @@ static const char *check[] = {
NULL
};
-static const int block_sizes[] = { 16, 64, 256, 1024, 1420, 4096, 0 };
+static const int block_sizes[] = { 16, 64, 128, 256, 1024, 1420, 4096, 0 };
static const int aead_sizes[] = { 16, 64, 256, 512, 1024, 1420, 4096, 8192, 0 };
#define XBUFSIZE 8
@@ -290,6 +290,11 @@ static void test_mb_aead_speed(const char *algo, int enc, int secs,
}
ret = crypto_aead_setauthsize(tfm, authsize);
+ if (ret) {
+ pr_err("alg: aead: Failed to setauthsize for %s: %d\n", algo,
+ ret);
+ goto out_free_tfm;
+ }
for (i = 0; i < num_mb; ++i)
if (testmgr_alloc_buf(data[i].xbuf)) {
@@ -315,7 +320,7 @@ static void test_mb_aead_speed(const char *algo, int enc, int secs,
for (i = 0; i < num_mb; ++i) {
data[i].req = aead_request_alloc(tfm, GFP_KERNEL);
if (!data[i].req) {
- pr_err("alg: skcipher: Failed to allocate request for %s\n",
+ pr_err("alg: aead: Failed to allocate request for %s\n",
algo);
while (i--)
aead_request_free(data[i].req);
@@ -567,13 +572,19 @@ static void test_aead_speed(const char *algo, int enc, unsigned int secs,
sgout = &sg[9];
tfm = crypto_alloc_aead(algo, 0, 0);
-
if (IS_ERR(tfm)) {
pr_err("alg: aead: Failed to load transform for %s: %ld\n", algo,
PTR_ERR(tfm));
goto out_notfm;
}
+ ret = crypto_aead_setauthsize(tfm, authsize);
+ if (ret) {
+ pr_err("alg: aead: Failed to setauthsize for %s: %d\n", algo,
+ ret);
+ goto out_noreq;
+ }
+
crypto_init_wait(&wait);
printk(KERN_INFO "\ntesting speed of %s (%s) %s\n", algo,
get_driver_name(crypto_aead, tfm), e);
@@ -611,8 +622,13 @@ static void test_aead_speed(const char *algo, int enc, unsigned int secs,
break;
}
}
+
ret = crypto_aead_setkey(tfm, key, *keysize);
- ret = crypto_aead_setauthsize(tfm, authsize);
+ if (ret) {
+ pr_err("setkey() failed flags=%x: %d\n",
+ crypto_aead_get_flags(tfm), ret);
+ goto out;
+ }
iv_len = crypto_aead_ivsize(tfm);
if (iv_len)
@@ -622,15 +638,8 @@ static void test_aead_speed(const char *algo, int enc, unsigned int secs,
printk(KERN_INFO "test %u (%d bit key, %d byte blocks): ",
i, *keysize * 8, bs);
-
memset(tvmem[0], 0xff, PAGE_SIZE);
- if (ret) {
- pr_err("setkey() failed flags=%x\n",
- crypto_aead_get_flags(tfm));
- goto out;
- }
-
sg_init_aead(sg, xbuf, bs + (enc ? 0 : authsize),
assoc, aad_size);
@@ -1907,6 +1916,14 @@ static int do_test(const char *alg, u32 type, u32 mask, int m, u32 num_mb)
ret += tcrypt_test("streebog512");
break;
+ case 55:
+ ret += tcrypt_test("gcm(sm4)");
+ break;
+
+ case 56:
+ ret += tcrypt_test("ccm(sm4)");
+ break;
+
case 100:
ret += tcrypt_test("hmac(md5)");
break;
@@ -1998,6 +2015,15 @@ static int do_test(const char *alg, u32 type, u32 mask, int m, u32 num_mb)
case 157:
ret += tcrypt_test("authenc(hmac(sha1),ecb(cipher_null))");
break;
+
+ case 158:
+ ret += tcrypt_test("cbcmac(sm4)");
+ break;
+
+ case 159:
+ ret += tcrypt_test("cmac(sm4)");
+ break;
+
case 181:
ret += tcrypt_test("authenc(hmac(sha1),cbc(des))");
break;
@@ -2031,6 +2057,7 @@ static int do_test(const char *alg, u32 type, u32 mask, int m, u32 num_mb)
case 191:
ret += tcrypt_test("ecb(sm4)");
ret += tcrypt_test("cbc(sm4)");
+ ret += tcrypt_test("cfb(sm4)");
ret += tcrypt_test("ctr(sm4)");
break;
case 200:
@@ -2289,6 +2316,10 @@ static int do_test(const char *alg, u32 type, u32 mask, int m, u32 num_mb)
speed_template_16);
test_cipher_speed("cbc(sm4)", DECRYPT, sec, NULL, 0,
speed_template_16);
+ test_cipher_speed("cfb(sm4)", ENCRYPT, sec, NULL, 0,
+ speed_template_16);
+ test_cipher_speed("cfb(sm4)", DECRYPT, sec, NULL, 0,
+ speed_template_16);
test_cipher_speed("ctr(sm4)", ENCRYPT, sec, NULL, 0,
speed_template_16);
test_cipher_speed("ctr(sm4)", DECRYPT, sec, NULL, 0,
@@ -2322,6 +2353,34 @@ static int do_test(const char *alg, u32 type, u32 mask, int m, u32 num_mb)
NULL, 0, 16, 8, speed_template_16);
break;
+ case 222:
+ test_aead_speed("gcm(sm4)", ENCRYPT, sec,
+ NULL, 0, 16, 8, speed_template_16);
+ test_aead_speed("gcm(sm4)", DECRYPT, sec,
+ NULL, 0, 16, 8, speed_template_16);
+ break;
+
+ case 223:
+ test_aead_speed("rfc4309(ccm(sm4))", ENCRYPT, sec,
+ NULL, 0, 16, 16, aead_speed_template_19);
+ test_aead_speed("rfc4309(ccm(sm4))", DECRYPT, sec,
+ NULL, 0, 16, 16, aead_speed_template_19);
+ break;
+
+ case 224:
+ test_mb_aead_speed("gcm(sm4)", ENCRYPT, sec, NULL, 0, 16, 8,
+ speed_template_16, num_mb);
+ test_mb_aead_speed("gcm(sm4)", DECRYPT, sec, NULL, 0, 16, 8,
+ speed_template_16, num_mb);
+ break;
+
+ case 225:
+ test_mb_aead_speed("rfc4309(ccm(sm4))", ENCRYPT, sec, NULL, 0,
+ 16, 16, aead_speed_template_19, num_mb);
+ test_mb_aead_speed("rfc4309(ccm(sm4))", DECRYPT, sec, NULL, 0,
+ 16, 16, aead_speed_template_19, num_mb);
+ break;
+
case 300:
if (alg) {
test_hash_speed(alg, sec, generic_hash_speed_template);
@@ -2757,6 +2816,25 @@ static int do_test(const char *alg, u32 type, u32 mask, int m, u32 num_mb)
speed_template_8_32);
break;
+ case 518:
+ test_acipher_speed("ecb(sm4)", ENCRYPT, sec, NULL, 0,
+ speed_template_16);
+ test_acipher_speed("ecb(sm4)", DECRYPT, sec, NULL, 0,
+ speed_template_16);
+ test_acipher_speed("cbc(sm4)", ENCRYPT, sec, NULL, 0,
+ speed_template_16);
+ test_acipher_speed("cbc(sm4)", DECRYPT, sec, NULL, 0,
+ speed_template_16);
+ test_acipher_speed("cfb(sm4)", ENCRYPT, sec, NULL, 0,
+ speed_template_16);
+ test_acipher_speed("cfb(sm4)", DECRYPT, sec, NULL, 0,
+ speed_template_16);
+ test_acipher_speed("ctr(sm4)", ENCRYPT, sec, NULL, 0,
+ speed_template_16);
+ test_acipher_speed("ctr(sm4)", DECRYPT, sec, NULL, 0,
+ speed_template_16);
+ break;
+
case 600:
test_mb_skcipher_speed("ecb(aes)", ENCRYPT, sec, NULL, 0,
speed_template_16_24_32, num_mb);
diff --git a/crypto/testmgr.c b/crypto/testmgr.c
index c978e41f11a1..70f69f0910c9 100644
--- a/crypto/testmgr.c
+++ b/crypto/testmgr.c
@@ -4451,6 +4451,12 @@ static const struct alg_test_desc alg_test_descs[] = {
.hash = __VECS(aes_cbcmac_tv_template)
}
}, {
+ .alg = "cbcmac(sm4)",
+ .test = alg_test_hash,
+ .suite = {
+ .hash = __VECS(sm4_cbcmac_tv_template)
+ }
+ }, {
.alg = "ccm(aes)",
.generic_driver = "ccm_base(ctr(aes-generic),cbcmac(aes-generic))",
.test = alg_test_aead,
@@ -4462,6 +4468,16 @@ static const struct alg_test_desc alg_test_descs[] = {
}
}
}, {
+ .alg = "ccm(sm4)",
+ .generic_driver = "ccm_base(ctr(sm4-generic),cbcmac(sm4-generic))",
+ .test = alg_test_aead,
+ .suite = {
+ .aead = {
+ ____VECS(sm4_ccm_tv_template),
+ .einval_allowed = 1,
+ }
+ }
+ }, {
.alg = "cfb(aes)",
.test = alg_test_skcipher,
.fips_allowed = 1,
@@ -4495,6 +4511,12 @@ static const struct alg_test_desc alg_test_descs[] = {
.hash = __VECS(des3_ede_cmac64_tv_template)
}
}, {
+ .alg = "cmac(sm4)",
+ .test = alg_test_hash,
+ .suite = {
+ .hash = __VECS(sm4_cmac128_tv_template)
+ }
+ }, {
.alg = "compress_null",
.test = alg_test_null,
}, {
@@ -4968,6 +4990,13 @@ static const struct alg_test_desc alg_test_descs[] = {
.aead = __VECS(aes_gcm_tv_template)
}
}, {
+ .alg = "gcm(sm4)",
+ .generic_driver = "gcm_base(ctr(sm4-generic),ghash-generic)",
+ .test = alg_test_aead,
+ .suite = {
+ .aead = __VECS(sm4_gcm_tv_template)
+ }
+ }, {
.alg = "ghash",
.test = alg_test_hash,
.fips_allowed = 1,
diff --git a/crypto/testmgr.h b/crypto/testmgr.h
index 3ed6ab34ab51..e6fca34b5b25 100644
--- a/crypto/testmgr.h
+++ b/crypto/testmgr.h
@@ -13328,6 +13328,154 @@ static const struct cipher_testvec sm4_cfb_tv_template[] = {
}
};
+static const struct aead_testvec sm4_gcm_tv_template[] = {
+ { /* From https://datatracker.ietf.org/doc/html/rfc8998#appendix-A.1 */
+ .key = "\x01\x23\x45\x67\x89\xAB\xCD\xEF"
+ "\xFE\xDC\xBA\x98\x76\x54\x32\x10",
+ .klen = 16,
+ .iv = "\x00\x00\x12\x34\x56\x78\x00\x00"
+ "\x00\x00\xAB\xCD",
+ .ptext = "\xAA\xAA\xAA\xAA\xAA\xAA\xAA\xAA"
+ "\xBB\xBB\xBB\xBB\xBB\xBB\xBB\xBB"
+ "\xCC\xCC\xCC\xCC\xCC\xCC\xCC\xCC"
+ "\xDD\xDD\xDD\xDD\xDD\xDD\xDD\xDD"
+ "\xEE\xEE\xEE\xEE\xEE\xEE\xEE\xEE"
+ "\xFF\xFF\xFF\xFF\xFF\xFF\xFF\xFF"
+ "\xEE\xEE\xEE\xEE\xEE\xEE\xEE\xEE"
+ "\xAA\xAA\xAA\xAA\xAA\xAA\xAA\xAA",
+ .plen = 64,
+ .assoc = "\xFE\xED\xFA\xCE\xDE\xAD\xBE\xEF"
+ "\xFE\xED\xFA\xCE\xDE\xAD\xBE\xEF"
+ "\xAB\xAD\xDA\xD2",
+ .alen = 20,
+ .ctext = "\x17\xF3\x99\xF0\x8C\x67\xD5\xEE"
+ "\x19\xD0\xDC\x99\x69\xC4\xBB\x7D"
+ "\x5F\xD4\x6F\xD3\x75\x64\x89\x06"
+ "\x91\x57\xB2\x82\xBB\x20\x07\x35"
+ "\xD8\x27\x10\xCA\x5C\x22\xF0\xCC"
+ "\xFA\x7C\xBF\x93\xD4\x96\xAC\x15"
+ "\xA5\x68\x34\xCB\xCF\x98\xC3\x97"
+ "\xB4\x02\x4A\x26\x91\x23\x3B\x8D"
+ "\x83\xDE\x35\x41\xE4\xC2\xB5\x81"
+ "\x77\xE0\x65\xA9\xBF\x7B\x62\xEC",
+ .clen = 80,
+ }
+};
+
+static const struct aead_testvec sm4_ccm_tv_template[] = {
+ { /* From https://datatracker.ietf.org/doc/html/rfc8998#appendix-A.2 */
+ .key = "\x01\x23\x45\x67\x89\xAB\xCD\xEF"
+ "\xFE\xDC\xBA\x98\x76\x54\x32\x10",
+ .klen = 16,
+ .iv = "\x02\x00\x00\x12\x34\x56\x78\x00"
+ "\x00\x00\x00\xAB\xCD\x00\x00\x00",
+ .ptext = "\xAA\xAA\xAA\xAA\xAA\xAA\xAA\xAA"
+ "\xBB\xBB\xBB\xBB\xBB\xBB\xBB\xBB"
+ "\xCC\xCC\xCC\xCC\xCC\xCC\xCC\xCC"
+ "\xDD\xDD\xDD\xDD\xDD\xDD\xDD\xDD"
+ "\xEE\xEE\xEE\xEE\xEE\xEE\xEE\xEE"
+ "\xFF\xFF\xFF\xFF\xFF\xFF\xFF\xFF"
+ "\xEE\xEE\xEE\xEE\xEE\xEE\xEE\xEE"
+ "\xAA\xAA\xAA\xAA\xAA\xAA\xAA\xAA",
+ .plen = 64,
+ .assoc = "\xFE\xED\xFA\xCE\xDE\xAD\xBE\xEF"
+ "\xFE\xED\xFA\xCE\xDE\xAD\xBE\xEF"
+ "\xAB\xAD\xDA\xD2",
+ .alen = 20,
+ .ctext = "\x48\xAF\x93\x50\x1F\xA6\x2A\xDB"
+ "\xCD\x41\x4C\xCE\x60\x34\xD8\x95"
+ "\xDD\xA1\xBF\x8F\x13\x2F\x04\x20"
+ "\x98\x66\x15\x72\xE7\x48\x30\x94"
+ "\xFD\x12\xE5\x18\xCE\x06\x2C\x98"
+ "\xAC\xEE\x28\xD9\x5D\xF4\x41\x6B"
+ "\xED\x31\xA2\xF0\x44\x76\xC1\x8B"
+ "\xB4\x0C\x84\xA7\x4B\x97\xDC\x5B"
+ "\x16\x84\x2D\x4F\xA1\x86\xF5\x6A"
+ "\xB3\x32\x56\x97\x1F\xA1\x10\xF4",
+ .clen = 80,
+ }
+};
+
+static const struct hash_testvec sm4_cbcmac_tv_template[] = {
+ {
+ .key = "\xff\xee\xdd\xcc\xbb\xaa\x99\x88"
+ "\x77\x66\x55\x44\x33\x22\x11\x00",
+ .plaintext = "\x01\x23\x45\x67\x89\xab\xcd\xef"
+ "\xfe\xdc\xba\x98\x76\x54\x32\x10",
+ .digest = "\x97\xb4\x75\x8f\x84\x92\x3d\x3f"
+ "\x86\x81\x0e\x0e\xea\x14\x6d\x73",
+ .psize = 16,
+ .ksize = 16,
+ }, {
+ .key = "\x01\x23\x45\x67\x89\xab\xcd\xef"
+ "\xfe\xdc\xBA\x98\x76\x54\x32\x10",
+ .plaintext = "\xaa\xaa\xaa\xaa\xaa\xaa\xaa\xaa"
+ "\xbb\xbb\xbb\xbb\xbb\xbb\xbb\xbb"
+ "\xcc\xcc\xcc\xcc\xcc\xcc\xcc\xcc"
+ "\xdd\xdd\xdd\xdd\xdd\xdd\xdd\xdd"
+ "\xee",
+ .digest = "\xc7\xdb\x17\x71\xa1\x5c\x0d\x22"
+ "\xa3\x39\x3a\x31\x88\x91\x49\xa1",
+ .psize = 33,
+ .ksize = 16,
+ }, {
+ .key = "\x01\x23\x45\x67\x89\xab\xcd\xef"
+ "\xfe\xdc\xBA\x98\x76\x54\x32\x10",
+ .plaintext = "\xfb\xd1\xbe\x92\x7e\x50\x3f\x16"
+ "\xf9\xdd\xbe\x91\x73\x53\x37\x1a"
+ "\xfe\xdd\xba\x97\x7e\x53\x3c\x1c"
+ "\xfe\xd7\xbf\x9c\x75\x5f\x3e\x11"
+ "\xf0\xd8\xbc\x96\x73\x5c\x34\x11"
+ "\xf5\xdb\xb1\x99\x7a\x5a\x32\x1f"
+ "\xf6\xdf\xb4\x95\x7f\x5f\x3b\x17"
+ "\xfd\xdb\xb1\x9b\x76\x5c\x37",
+ .digest = "\x9b\x07\x88\x7f\xd5\x95\x23\x12"
+ "\x64\x0a\x66\x7f\x4e\x25\xca\xd0",
+ .psize = 63,
+ .ksize = 16,
+ }
+};
+
+static const struct hash_testvec sm4_cmac128_tv_template[] = {
+ {
+ .key = "\xff\xee\xdd\xcc\xbb\xaa\x99\x88"
+ "\x77\x66\x55\x44\x33\x22\x11\x00",
+ .plaintext = "\x01\x23\x45\x67\x89\xab\xcd\xef"
+ "\xfe\xdc\xba\x98\x76\x54\x32\x10",
+ .digest = "\x00\xd4\x63\xb4\x9a\xf3\x52\xe2"
+ "\x74\xa9\x00\x55\x13\x54\x2a\xd1",
+ .psize = 16,
+ .ksize = 16,
+ }, {
+ .key = "\x01\x23\x45\x67\x89\xab\xcd\xef"
+ "\xfe\xdc\xBA\x98\x76\x54\x32\x10",
+ .plaintext = "\xaa\xaa\xaa\xaa\xaa\xaa\xaa\xaa"
+ "\xbb\xbb\xbb\xbb\xbb\xbb\xbb\xbb"
+ "\xcc\xcc\xcc\xcc\xcc\xcc\xcc\xcc"
+ "\xdd\xdd\xdd\xdd\xdd\xdd\xdd\xdd"
+ "\xee",
+ .digest = "\x8a\x8a\xe9\xc0\xc8\x97\x0e\x85"
+ "\x21\x57\x02\x10\x1a\xbf\x9c\xc6",
+ .psize = 33,
+ .ksize = 16,
+ }, {
+ .key = "\x01\x23\x45\x67\x89\xab\xcd\xef"
+ "\xfe\xdc\xBA\x98\x76\x54\x32\x10",
+ .plaintext = "\xfb\xd1\xbe\x92\x7e\x50\x3f\x16"
+ "\xf9\xdd\xbe\x91\x73\x53\x37\x1a"
+ "\xfe\xdd\xba\x97\x7e\x53\x3c\x1c"
+ "\xfe\xd7\xbf\x9c\x75\x5f\x3e\x11"
+ "\xf0\xd8\xbc\x96\x73\x5c\x34\x11"
+ "\xf5\xdb\xb1\x99\x7a\x5a\x32\x1f"
+ "\xf6\xdf\xb4\x95\x7f\x5f\x3b\x17"
+ "\xfd\xdb\xb1\x9b\x76\x5c\x37",
+ .digest = "\x5f\x14\xc9\xa9\x20\xb2\xb4\xf0"
+ "\x76\xe0\xd8\xd6\xdc\x4f\xe1\xbc",
+ .psize = 63,
+ .ksize = 16,
+ }
+};
+
/* Cast6 test vectors from RFC 2612 */
static const struct cipher_testvec cast6_tv_template[] = {
{
diff --git a/crypto/wp512.c b/crypto/wp512.c
index bf79fbb2340f..5e820afa3c78 100644
--- a/crypto/wp512.c
+++ b/crypto/wp512.c
@@ -775,7 +775,7 @@ static const u64 rc[WHIRLPOOL_ROUNDS] = {
0xca2dbf07ad5a8333ULL,
};
-/**
+/*
* The core Whirlpool transform.
*/
diff --git a/drivers/char/hw_random/Kconfig b/drivers/char/hw_random/Kconfig
index 3f166c8a4099..239eca4d6805 100644
--- a/drivers/char/hw_random/Kconfig
+++ b/drivers/char/hw_random/Kconfig
@@ -524,6 +524,20 @@ config HW_RANDOM_XIPHERA
To compile this driver as a module, choose M here: the
module will be called xiphera-trng.
+config HW_RANDOM_ARM_SMCCC_TRNG
+ tristate "Arm SMCCC TRNG firmware interface support"
+ depends on HAVE_ARM_SMCCC_DISCOVERY
+ default HW_RANDOM
+ help
+ Say 'Y' to enable the True Random Number Generator driver using
+ the Arm SMCCC TRNG firmware interface. This reads entropy from
+ higher exception levels (firmware, hypervisor). Uses SMCCC for
+ communicating with the firmware:
+ https://developer.arm.com/documentation/den0098/latest/
+
+ To compile this driver as a module, choose M here: the
+ module will be called arm_smccc_trng.
+
endif # HW_RANDOM
config UML_RANDOM
diff --git a/drivers/char/hw_random/Makefile b/drivers/char/hw_random/Makefile
index 8933fada74f2..a5a1c765a394 100644
--- a/drivers/char/hw_random/Makefile
+++ b/drivers/char/hw_random/Makefile
@@ -45,3 +45,4 @@ obj-$(CONFIG_HW_RANDOM_OPTEE) += optee-rng.o
obj-$(CONFIG_HW_RANDOM_NPCM) += npcm-rng.o
obj-$(CONFIG_HW_RANDOM_CCTRNG) += cctrng.o
obj-$(CONFIG_HW_RANDOM_XIPHERA) += xiphera-trng.o
+obj-$(CONFIG_HW_RANDOM_ARM_SMCCC_TRNG) += arm_smccc_trng.o
diff --git a/drivers/char/hw_random/amd-rng.c b/drivers/char/hw_random/amd-rng.c
index d8d4ef5214a1..c22d4184bb61 100644
--- a/drivers/char/hw_random/amd-rng.c
+++ b/drivers/char/hw_random/amd-rng.c
@@ -124,7 +124,7 @@ static struct hwrng amd_rng = {
.read = amd_rng_read,
};
-static int __init mod_init(void)
+static int __init amd_rng_mod_init(void)
{
int err;
struct pci_dev *pdev = NULL;
@@ -188,7 +188,7 @@ out:
return err;
}
-static void __exit mod_exit(void)
+static void __exit amd_rng_mod_exit(void)
{
struct amd768_priv *priv;
@@ -203,8 +203,8 @@ static void __exit mod_exit(void)
kfree(priv);
}
-module_init(mod_init);
-module_exit(mod_exit);
+module_init(amd_rng_mod_init);
+module_exit(amd_rng_mod_exit);
MODULE_AUTHOR("The Linux Kernel team");
MODULE_DESCRIPTION("H/W RNG driver for AMD chipsets");
diff --git a/drivers/char/hw_random/arm_smccc_trng.c b/drivers/char/hw_random/arm_smccc_trng.c
new file mode 100644
index 000000000000..b24ac39a903b
--- /dev/null
+++ b/drivers/char/hw_random/arm_smccc_trng.c
@@ -0,0 +1,123 @@
+// SPDX-License-Identifier: GPL-2.0
+/*
+ * Randomness driver for the ARM SMCCC TRNG Firmware Interface
+ * https://developer.arm.com/documentation/den0098/latest/
+ *
+ * Copyright (C) 2020 Arm Ltd.
+ *
+ * The ARM TRNG firmware interface specifies a protocol to read entropy
+ * from a higher exception level, to abstract from any machine specific
+ * implemenations and allow easier use in hypervisors.
+ *
+ * The firmware interface is realised using the SMCCC specification.
+ */
+
+#include <linux/bits.h>
+#include <linux/device.h>
+#include <linux/hw_random.h>
+#include <linux/module.h>
+#include <linux/platform_device.h>
+#include <linux/arm-smccc.h>
+
+#ifdef CONFIG_ARM64
+#define ARM_SMCCC_TRNG_RND ARM_SMCCC_TRNG_RND64
+#define MAX_BITS_PER_CALL (3 * 64UL)
+#else
+#define ARM_SMCCC_TRNG_RND ARM_SMCCC_TRNG_RND32
+#define MAX_BITS_PER_CALL (3 * 32UL)
+#endif
+
+/* We don't want to allow the firmware to stall us forever. */
+#define SMCCC_TRNG_MAX_TRIES 20
+
+#define SMCCC_RET_TRNG_INVALID_PARAMETER -2
+#define SMCCC_RET_TRNG_NO_ENTROPY -3
+
+static int copy_from_registers(char *buf, struct arm_smccc_res *res,
+ size_t bytes)
+{
+ unsigned int chunk, copied;
+
+ if (bytes == 0)
+ return 0;
+
+ chunk = min(bytes, sizeof(long));
+ memcpy(buf, &res->a3, chunk);
+ copied = chunk;
+ if (copied >= bytes)
+ return copied;
+
+ chunk = min((bytes - copied), sizeof(long));
+ memcpy(&buf[copied], &res->a2, chunk);
+ copied += chunk;
+ if (copied >= bytes)
+ return copied;
+
+ chunk = min((bytes - copied), sizeof(long));
+ memcpy(&buf[copied], &res->a1, chunk);
+
+ return copied + chunk;
+}
+
+static int smccc_trng_read(struct hwrng *rng, void *data, size_t max, bool wait)
+{
+ struct arm_smccc_res res;
+ u8 *buf = data;
+ unsigned int copied = 0;
+ int tries = 0;
+
+ while (copied < max) {
+ size_t bits = min_t(size_t, (max - copied) * BITS_PER_BYTE,
+ MAX_BITS_PER_CALL);
+
+ arm_smccc_1_1_invoke(ARM_SMCCC_TRNG_RND, bits, &res);
+ if ((int)res.a0 < 0)
+ return (int)res.a0;
+
+ switch ((int)res.a0) {
+ case SMCCC_RET_SUCCESS:
+ copied += copy_from_registers(buf + copied, &res,
+ bits / BITS_PER_BYTE);
+ tries = 0;
+ break;
+ case SMCCC_RET_TRNG_NO_ENTROPY:
+ if (!wait)
+ return copied;
+ tries++;
+ if (tries >= SMCCC_TRNG_MAX_TRIES)
+ return copied;
+ cond_resched();
+ break;
+ }
+ }
+
+ return copied;
+}
+
+static int smccc_trng_probe(struct platform_device *pdev)
+{
+ struct hwrng *trng;
+
+ trng = devm_kzalloc(&pdev->dev, sizeof(*trng), GFP_KERNEL);
+ if (!trng)
+ return -ENOMEM;
+
+ trng->name = "smccc_trng";
+ trng->read = smccc_trng_read;
+
+ platform_set_drvdata(pdev, trng);
+
+ return devm_hwrng_register(&pdev->dev, trng);
+}
+
+static struct platform_driver smccc_trng_driver = {
+ .driver = {
+ .name = "smccc_trng",
+ },
+ .probe = smccc_trng_probe,
+};
+module_platform_driver(smccc_trng_driver);
+
+MODULE_ALIAS("platform:smccc_trng");
+MODULE_AUTHOR("Andre Przywara");
+MODULE_LICENSE("GPL");
diff --git a/drivers/char/hw_random/geode-rng.c b/drivers/char/hw_random/geode-rng.c
index e1d421a36a13..138ce434f86b 100644
--- a/drivers/char/hw_random/geode-rng.c
+++ b/drivers/char/hw_random/geode-rng.c
@@ -83,7 +83,7 @@ static struct hwrng geode_rng = {
};
-static int __init mod_init(void)
+static int __init geode_rng_init(void)
{
int err = -ENODEV;
struct pci_dev *pdev = NULL;
@@ -124,7 +124,7 @@ err_unmap:
goto out;
}
-static void __exit mod_exit(void)
+static void __exit geode_rng_exit(void)
{
void __iomem *mem = (void __iomem *)geode_rng.priv;
@@ -132,8 +132,8 @@ static void __exit mod_exit(void)
iounmap(mem);
}
-module_init(mod_init);
-module_exit(mod_exit);
+module_init(geode_rng_init);
+module_exit(geode_rng_exit);
MODULE_DESCRIPTION("H/W RNG driver for AMD Geode LX CPUs");
MODULE_LICENSE("GPL");
diff --git a/drivers/char/hw_random/intel-rng.c b/drivers/char/hw_random/intel-rng.c
index d740b8814bf3..7b171cb3b825 100644
--- a/drivers/char/hw_random/intel-rng.c
+++ b/drivers/char/hw_random/intel-rng.c
@@ -325,7 +325,7 @@ PFX "RNG, try using the 'no_fwh_detect' option.\n";
}
-static int __init mod_init(void)
+static int __init intel_rng_mod_init(void)
{
int err = -ENODEV;
int i;
@@ -403,7 +403,7 @@ out:
}
-static void __exit mod_exit(void)
+static void __exit intel_rng_mod_exit(void)
{
void __iomem *mem = (void __iomem *)intel_rng.priv;
@@ -411,8 +411,8 @@ static void __exit mod_exit(void)
iounmap(mem);
}
-module_init(mod_init);
-module_exit(mod_exit);
+module_init(intel_rng_mod_init);
+module_exit(intel_rng_mod_exit);
MODULE_DESCRIPTION("H/W RNG driver for Intel chipsets");
MODULE_LICENSE("GPL");
diff --git a/drivers/char/hw_random/via-rng.c b/drivers/char/hw_random/via-rng.c
index 39943bc3651a..7444cc146e86 100644
--- a/drivers/char/hw_random/via-rng.c
+++ b/drivers/char/hw_random/via-rng.c
@@ -192,7 +192,7 @@ static struct hwrng via_rng = {
};
-static int __init mod_init(void)
+static int __init via_rng_mod_init(void)
{
int err;
@@ -209,13 +209,13 @@ static int __init mod_init(void)
out:
return err;
}
-module_init(mod_init);
+module_init(via_rng_mod_init);
-static void __exit mod_exit(void)
+static void __exit via_rng_mod_exit(void)
{
hwrng_unregister(&via_rng);
}
-module_exit(mod_exit);
+module_exit(via_rng_mod_exit);
static struct x86_cpu_id __maybe_unused via_rng_cpu_id[] = {
X86_MATCH_FEATURE(X86_FEATURE_XSTORE, NULL),
diff --git a/drivers/crypto/allwinner/sun8i-ce/sun8i-ce-prng.c b/drivers/crypto/allwinner/sun8i-ce/sun8i-ce-prng.c
index cd1baee424a1..b3a9bbfb8831 100644
--- a/drivers/crypto/allwinner/sun8i-ce/sun8i-ce-prng.c
+++ b/drivers/crypto/allwinner/sun8i-ce/sun8i-ce-prng.c
@@ -26,8 +26,7 @@ void sun8i_ce_prng_exit(struct crypto_tfm *tfm)
{
struct sun8i_ce_rng_tfm_ctx *ctx = crypto_tfm_ctx(tfm);
- memzero_explicit(ctx->seed, ctx->slen);
- kfree(ctx->seed);
+ kfree_sensitive(ctx->seed);
ctx->seed = NULL;
ctx->slen = 0;
}
@@ -38,8 +37,7 @@ int sun8i_ce_prng_seed(struct crypto_rng *tfm, const u8 *seed,
struct sun8i_ce_rng_tfm_ctx *ctx = crypto_rng_ctx(tfm);
if (ctx->seed && ctx->slen != slen) {
- memzero_explicit(ctx->seed, ctx->slen);
- kfree(ctx->seed);
+ kfree_sensitive(ctx->seed);
ctx->slen = 0;
ctx->seed = NULL;
}
@@ -157,9 +155,8 @@ err_dst:
memcpy(dst, d, dlen);
memcpy(ctx->seed, d + dlen, ctx->slen);
}
- memzero_explicit(d, todo);
err_iv:
- kfree(d);
+ kfree_sensitive(d);
err_mem:
return err;
}
diff --git a/drivers/crypto/allwinner/sun8i-ce/sun8i-ce-trng.c b/drivers/crypto/allwinner/sun8i-ce/sun8i-ce-trng.c
index 5b7af4498bd5..19cd2e52f89d 100644
--- a/drivers/crypto/allwinner/sun8i-ce/sun8i-ce-trng.c
+++ b/drivers/crypto/allwinner/sun8i-ce/sun8i-ce-trng.c
@@ -95,9 +95,8 @@ err_pm:
memcpy(data, d, max);
err = max;
}
- memzero_explicit(d, todo);
err_dst:
- kfree(d);
+ kfree_sensitive(d);
return err;
}
diff --git a/drivers/crypto/allwinner/sun8i-ss/sun8i-ss-prng.c b/drivers/crypto/allwinner/sun8i-ss/sun8i-ss-prng.c
index 3191527928e4..246a6782674c 100644
--- a/drivers/crypto/allwinner/sun8i-ss/sun8i-ss-prng.c
+++ b/drivers/crypto/allwinner/sun8i-ss/sun8i-ss-prng.c
@@ -20,8 +20,7 @@ int sun8i_ss_prng_seed(struct crypto_rng *tfm, const u8 *seed,
struct sun8i_ss_rng_tfm_ctx *ctx = crypto_rng_ctx(tfm);
if (ctx->seed && ctx->slen != slen) {
- memzero_explicit(ctx->seed, ctx->slen);
- kfree(ctx->seed);
+ kfree_sensitive(ctx->seed);
ctx->slen = 0;
ctx->seed = NULL;
}
@@ -48,8 +47,7 @@ void sun8i_ss_prng_exit(struct crypto_tfm *tfm)
{
struct sun8i_ss_rng_tfm_ctx *ctx = crypto_tfm_ctx(tfm);
- memzero_explicit(ctx->seed, ctx->slen);
- kfree(ctx->seed);
+ kfree_sensitive(ctx->seed);
ctx->seed = NULL;
ctx->slen = 0;
}
@@ -167,9 +165,8 @@ err_iv:
/* Update seed */
memcpy(ctx->seed, d + dlen, ctx->slen);
}
- memzero_explicit(d, todo);
err_free:
- kfree(d);
+ kfree_sensitive(d);
return err;
}
diff --git a/drivers/crypto/atmel-aes.c b/drivers/crypto/atmel-aes.c
index b1d286004295..9391ccc03382 100644
--- a/drivers/crypto/atmel-aes.c
+++ b/drivers/crypto/atmel-aes.c
@@ -143,6 +143,7 @@ struct atmel_aes_xts_ctx {
struct atmel_aes_base_ctx base;
u32 key2[AES_KEYSIZE_256 / sizeof(u32)];
+ struct crypto_skcipher *fallback_tfm;
};
#if IS_ENABLED(CONFIG_CRYPTO_DEV_ATMEL_AUTHENC)
@@ -155,6 +156,7 @@ struct atmel_aes_authenc_ctx {
struct atmel_aes_reqctx {
unsigned long mode;
u8 lastc[AES_BLOCK_SIZE];
+ struct skcipher_request fallback_req;
};
#if IS_ENABLED(CONFIG_CRYPTO_DEV_ATMEL_AUTHENC)
@@ -418,24 +420,15 @@ static inline size_t atmel_aes_padlen(size_t len, size_t block_size)
return len ? block_size - len : 0;
}
-static struct atmel_aes_dev *atmel_aes_find_dev(struct atmel_aes_base_ctx *ctx)
+static struct atmel_aes_dev *atmel_aes_dev_alloc(struct atmel_aes_base_ctx *ctx)
{
- struct atmel_aes_dev *aes_dd = NULL;
- struct atmel_aes_dev *tmp;
+ struct atmel_aes_dev *aes_dd;
spin_lock_bh(&atmel_aes.lock);
- if (!ctx->dd) {
- list_for_each_entry(tmp, &atmel_aes.dev_list, list) {
- aes_dd = tmp;
- break;
- }
- ctx->dd = aes_dd;
- } else {
- aes_dd = ctx->dd;
- }
-
+ /* One AES IP per SoC. */
+ aes_dd = list_first_entry_or_null(&atmel_aes.dev_list,
+ struct atmel_aes_dev, list);
spin_unlock_bh(&atmel_aes.lock);
-
return aes_dd;
}
@@ -967,7 +960,6 @@ static int atmel_aes_handle_queue(struct atmel_aes_dev *dd,
ctx = crypto_tfm_ctx(areq->tfm);
dd->areq = areq;
- dd->ctx = ctx;
start_async = (areq != new_areq);
dd->is_async = start_async;
@@ -1083,12 +1075,48 @@ static int atmel_aes_ctr_start(struct atmel_aes_dev *dd)
return atmel_aes_ctr_transfer(dd);
}
+static int atmel_aes_xts_fallback(struct skcipher_request *req, bool enc)
+{
+ struct atmel_aes_reqctx *rctx = skcipher_request_ctx(req);
+ struct atmel_aes_xts_ctx *ctx = crypto_skcipher_ctx(
+ crypto_skcipher_reqtfm(req));
+
+ skcipher_request_set_tfm(&rctx->fallback_req, ctx->fallback_tfm);
+ skcipher_request_set_callback(&rctx->fallback_req, req->base.flags,
+ req->base.complete, req->base.data);
+ skcipher_request_set_crypt(&rctx->fallback_req, req->src, req->dst,
+ req->cryptlen, req->iv);
+
+ return enc ? crypto_skcipher_encrypt(&rctx->fallback_req) :
+ crypto_skcipher_decrypt(&rctx->fallback_req);
+}
+
static int atmel_aes_crypt(struct skcipher_request *req, unsigned long mode)
{
struct crypto_skcipher *skcipher = crypto_skcipher_reqtfm(req);
struct atmel_aes_base_ctx *ctx = crypto_skcipher_ctx(skcipher);
struct atmel_aes_reqctx *rctx;
- struct atmel_aes_dev *dd;
+ u32 opmode = mode & AES_FLAGS_OPMODE_MASK;
+
+ if (opmode == AES_FLAGS_XTS) {
+ if (req->cryptlen < XTS_BLOCK_SIZE)
+ return -EINVAL;
+
+ if (!IS_ALIGNED(req->cryptlen, XTS_BLOCK_SIZE))
+ return atmel_aes_xts_fallback(req,
+ mode & AES_FLAGS_ENCRYPT);
+ }
+
+ /*
+ * ECB, CBC, CFB, OFB or CTR mode require the plaintext and ciphertext
+ * to have a positve integer length.
+ */
+ if (!req->cryptlen && opmode != AES_FLAGS_XTS)
+ return 0;
+
+ if ((opmode == AES_FLAGS_ECB || opmode == AES_FLAGS_CBC) &&
+ !IS_ALIGNED(req->cryptlen, crypto_skcipher_blocksize(skcipher)))
+ return -EINVAL;
switch (mode & AES_FLAGS_OPMODE_MASK) {
case AES_FLAGS_CFB8:
@@ -1113,14 +1141,10 @@ static int atmel_aes_crypt(struct skcipher_request *req, unsigned long mode)
}
ctx->is_aead = false;
- dd = atmel_aes_find_dev(ctx);
- if (!dd)
- return -ENODEV;
-
rctx = skcipher_request_ctx(req);
rctx->mode = mode;
- if ((mode & AES_FLAGS_OPMODE_MASK) != AES_FLAGS_ECB &&
+ if (opmode != AES_FLAGS_ECB &&
!(mode & AES_FLAGS_ENCRYPT) && req->src == req->dst) {
unsigned int ivsize = crypto_skcipher_ivsize(skcipher);
@@ -1130,7 +1154,7 @@ static int atmel_aes_crypt(struct skcipher_request *req, unsigned long mode)
ivsize, 0);
}
- return atmel_aes_handle_queue(dd, &req->base);
+ return atmel_aes_handle_queue(ctx->dd, &req->base);
}
static int atmel_aes_setkey(struct crypto_skcipher *tfm, const u8 *key,
@@ -1242,8 +1266,15 @@ static int atmel_aes_ctr_decrypt(struct skcipher_request *req)
static int atmel_aes_init_tfm(struct crypto_skcipher *tfm)
{
struct atmel_aes_ctx *ctx = crypto_skcipher_ctx(tfm);
+ struct atmel_aes_dev *dd;
+
+ dd = atmel_aes_dev_alloc(&ctx->base);
+ if (!dd)
+ return -ENODEV;
crypto_skcipher_set_reqsize(tfm, sizeof(struct atmel_aes_reqctx));
+ ctx->base.dd = dd;
+ ctx->base.dd->ctx = &ctx->base;
ctx->base.start = atmel_aes_start;
return 0;
@@ -1252,8 +1283,15 @@ static int atmel_aes_init_tfm(struct crypto_skcipher *tfm)
static int atmel_aes_ctr_init_tfm(struct crypto_skcipher *tfm)
{
struct atmel_aes_ctx *ctx = crypto_skcipher_ctx(tfm);
+ struct atmel_aes_dev *dd;
+
+ dd = atmel_aes_dev_alloc(&ctx->base);
+ if (!dd)
+ return -ENODEV;
crypto_skcipher_set_reqsize(tfm, sizeof(struct atmel_aes_reqctx));
+ ctx->base.dd = dd;
+ ctx->base.dd->ctx = &ctx->base;
ctx->base.start = atmel_aes_ctr_start;
return 0;
@@ -1290,7 +1328,7 @@ static struct skcipher_alg aes_algs[] = {
{
.base.cra_name = "ofb(aes)",
.base.cra_driver_name = "atmel-ofb-aes",
- .base.cra_blocksize = AES_BLOCK_SIZE,
+ .base.cra_blocksize = 1,
.base.cra_ctxsize = sizeof(struct atmel_aes_ctx),
.init = atmel_aes_init_tfm,
@@ -1691,20 +1729,15 @@ static int atmel_aes_gcm_crypt(struct aead_request *req,
{
struct atmel_aes_base_ctx *ctx;
struct atmel_aes_reqctx *rctx;
- struct atmel_aes_dev *dd;
ctx = crypto_aead_ctx(crypto_aead_reqtfm(req));
ctx->block_size = AES_BLOCK_SIZE;
ctx->is_aead = true;
- dd = atmel_aes_find_dev(ctx);
- if (!dd)
- return -ENODEV;
-
rctx = aead_request_ctx(req);
rctx->mode = AES_FLAGS_GCM | mode;
- return atmel_aes_handle_queue(dd, &req->base);
+ return atmel_aes_handle_queue(ctx->dd, &req->base);
}
static int atmel_aes_gcm_setkey(struct crypto_aead *tfm, const u8 *key,
@@ -1742,8 +1775,15 @@ static int atmel_aes_gcm_decrypt(struct aead_request *req)
static int atmel_aes_gcm_init(struct crypto_aead *tfm)
{
struct atmel_aes_gcm_ctx *ctx = crypto_aead_ctx(tfm);
+ struct atmel_aes_dev *dd;
+
+ dd = atmel_aes_dev_alloc(&ctx->base);
+ if (!dd)
+ return -ENODEV;
crypto_aead_set_reqsize(tfm, sizeof(struct atmel_aes_reqctx));
+ ctx->base.dd = dd;
+ ctx->base.dd->ctx = &ctx->base;
ctx->base.start = atmel_aes_gcm_start;
return 0;
@@ -1819,12 +1859,8 @@ static int atmel_aes_xts_process_data(struct atmel_aes_dev *dd)
* the order of the ciphered tweak bytes need to be reversed before
* writing them into the ODATARx registers.
*/
- for (i = 0; i < AES_BLOCK_SIZE/2; ++i) {
- u8 tmp = tweak_bytes[AES_BLOCK_SIZE - 1 - i];
-
- tweak_bytes[AES_BLOCK_SIZE - 1 - i] = tweak_bytes[i];
- tweak_bytes[i] = tmp;
- }
+ for (i = 0; i < AES_BLOCK_SIZE/2; ++i)
+ swap(tweak_bytes[i], tweak_bytes[AES_BLOCK_SIZE - 1 - i]);
/* Process the data. */
atmel_aes_write_ctrl(dd, use_dma, NULL);
@@ -1849,6 +1885,13 @@ static int atmel_aes_xts_setkey(struct crypto_skcipher *tfm, const u8 *key,
if (err)
return err;
+ crypto_skcipher_clear_flags(ctx->fallback_tfm, CRYPTO_TFM_REQ_MASK);
+ crypto_skcipher_set_flags(ctx->fallback_tfm, tfm->base.crt_flags &
+ CRYPTO_TFM_REQ_MASK);
+ err = crypto_skcipher_setkey(ctx->fallback_tfm, key, keylen);
+ if (err)
+ return err;
+
memcpy(ctx->base.key, key, keylen/2);
memcpy(ctx->key2, key + keylen/2, keylen/2);
ctx->base.keylen = keylen/2;
@@ -1869,18 +1912,40 @@ static int atmel_aes_xts_decrypt(struct skcipher_request *req)
static int atmel_aes_xts_init_tfm(struct crypto_skcipher *tfm)
{
struct atmel_aes_xts_ctx *ctx = crypto_skcipher_ctx(tfm);
+ struct atmel_aes_dev *dd;
+ const char *tfm_name = crypto_tfm_alg_name(&tfm->base);
- crypto_skcipher_set_reqsize(tfm, sizeof(struct atmel_aes_reqctx));
+ dd = atmel_aes_dev_alloc(&ctx->base);
+ if (!dd)
+ return -ENODEV;
+
+ ctx->fallback_tfm = crypto_alloc_skcipher(tfm_name, 0,
+ CRYPTO_ALG_NEED_FALLBACK);
+ if (IS_ERR(ctx->fallback_tfm))
+ return PTR_ERR(ctx->fallback_tfm);
+
+ crypto_skcipher_set_reqsize(tfm, sizeof(struct atmel_aes_reqctx) +
+ crypto_skcipher_reqsize(ctx->fallback_tfm));
+ ctx->base.dd = dd;
+ ctx->base.dd->ctx = &ctx->base;
ctx->base.start = atmel_aes_xts_start;
return 0;
}
+static void atmel_aes_xts_exit_tfm(struct crypto_skcipher *tfm)
+{
+ struct atmel_aes_xts_ctx *ctx = crypto_skcipher_ctx(tfm);
+
+ crypto_free_skcipher(ctx->fallback_tfm);
+}
+
static struct skcipher_alg aes_xts_alg = {
.base.cra_name = "xts(aes)",
.base.cra_driver_name = "atmel-xts-aes",
.base.cra_blocksize = AES_BLOCK_SIZE,
.base.cra_ctxsize = sizeof(struct atmel_aes_xts_ctx),
+ .base.cra_flags = CRYPTO_ALG_NEED_FALLBACK,
.min_keysize = 2 * AES_MIN_KEY_SIZE,
.max_keysize = 2 * AES_MAX_KEY_SIZE,
@@ -1889,6 +1954,7 @@ static struct skcipher_alg aes_xts_alg = {
.encrypt = atmel_aes_xts_encrypt,
.decrypt = atmel_aes_xts_decrypt,
.init = atmel_aes_xts_init_tfm,
+ .exit = atmel_aes_xts_exit_tfm,
};
#if IS_ENABLED(CONFIG_CRYPTO_DEV_ATMEL_AUTHENC)
@@ -2075,6 +2141,11 @@ static int atmel_aes_authenc_init_tfm(struct crypto_aead *tfm,
{
struct atmel_aes_authenc_ctx *ctx = crypto_aead_ctx(tfm);
unsigned int auth_reqsize = atmel_sha_authenc_get_reqsize();
+ struct atmel_aes_dev *dd;
+
+ dd = atmel_aes_dev_alloc(&ctx->base);
+ if (!dd)
+ return -ENODEV;
ctx->auth = atmel_sha_authenc_spawn(auth_mode);
if (IS_ERR(ctx->auth))
@@ -2082,6 +2153,8 @@ static int atmel_aes_authenc_init_tfm(struct crypto_aead *tfm,
crypto_aead_set_reqsize(tfm, (sizeof(struct atmel_aes_authenc_reqctx) +
auth_reqsize));
+ ctx->base.dd = dd;
+ ctx->base.dd->ctx = &ctx->base;
ctx->base.start = atmel_aes_authenc_start;
return 0;
@@ -2127,7 +2200,6 @@ static int atmel_aes_authenc_crypt(struct aead_request *req,
struct atmel_aes_base_ctx *ctx = crypto_aead_ctx(tfm);
u32 authsize = crypto_aead_authsize(tfm);
bool enc = (mode & AES_FLAGS_ENCRYPT);
- struct atmel_aes_dev *dd;
/* Compute text length. */
if (!enc && req->cryptlen < authsize)
@@ -2146,11 +2218,7 @@ static int atmel_aes_authenc_crypt(struct aead_request *req,
ctx->block_size = AES_BLOCK_SIZE;
ctx->is_aead = true;
- dd = atmel_aes_find_dev(ctx);
- if (!dd)
- return -ENODEV;
-
- return atmel_aes_handle_queue(dd, &req->base);
+ return atmel_aes_handle_queue(ctx->dd, &req->base);
}
static int atmel_aes_authenc_cbc_aes_encrypt(struct aead_request *req)
@@ -2358,7 +2426,7 @@ static void atmel_aes_unregister_algs(struct atmel_aes_dev *dd)
static void atmel_aes_crypto_alg_init(struct crypto_alg *alg)
{
- alg->cra_flags = CRYPTO_ALG_ASYNC;
+ alg->cra_flags |= CRYPTO_ALG_ASYNC;
alg->cra_alignmask = 0xf;
alg->cra_priority = ATMEL_AES_PRIORITY;
alg->cra_module = THIS_MODULE;
diff --git a/drivers/crypto/atmel-tdes.c b/drivers/crypto/atmel-tdes.c
index 6f01c51e3c37..e30786ec9f2d 100644
--- a/drivers/crypto/atmel-tdes.c
+++ b/drivers/crypto/atmel-tdes.c
@@ -196,23 +196,15 @@ static void atmel_tdes_write_n(struct atmel_tdes_dev *dd, u32 offset,
atmel_tdes_write(dd, offset, *value);
}
-static struct atmel_tdes_dev *atmel_tdes_find_dev(struct atmel_tdes_ctx *ctx)
+static struct atmel_tdes_dev *atmel_tdes_dev_alloc(void)
{
- struct atmel_tdes_dev *tdes_dd = NULL;
- struct atmel_tdes_dev *tmp;
+ struct atmel_tdes_dev *tdes_dd;
spin_lock_bh(&atmel_tdes.lock);
- if (!ctx->dd) {
- list_for_each_entry(tmp, &atmel_tdes.dev_list, list) {
- tdes_dd = tmp;
- break;
- }
- ctx->dd = tdes_dd;
- } else {
- tdes_dd = ctx->dd;
- }
+ /* One TDES IP per SoC. */
+ tdes_dd = list_first_entry_or_null(&atmel_tdes.dev_list,
+ struct atmel_tdes_dev, list);
spin_unlock_bh(&atmel_tdes.lock);
-
return tdes_dd;
}
@@ -320,7 +312,7 @@ static int atmel_tdes_crypt_pdc_stop(struct atmel_tdes_dev *dd)
dd->buf_out, dd->buflen, dd->dma_size, 1);
if (count != dd->dma_size) {
err = -EINVAL;
- pr_err("not all data converted: %zu\n", count);
+ dev_dbg(dd->dev, "not all data converted: %zu\n", count);
}
}
@@ -337,24 +329,24 @@ static int atmel_tdes_buff_init(struct atmel_tdes_dev *dd)
dd->buflen &= ~(DES_BLOCK_SIZE - 1);
if (!dd->buf_in || !dd->buf_out) {
- dev_err(dd->dev, "unable to alloc pages.\n");
+ dev_dbg(dd->dev, "unable to alloc pages.\n");
goto err_alloc;
}
/* MAP here */
dd->dma_addr_in = dma_map_single(dd->dev, dd->buf_in,
dd->buflen, DMA_TO_DEVICE);
- if (dma_mapping_error(dd->dev, dd->dma_addr_in)) {
- dev_err(dd->dev, "dma %zd bytes error\n", dd->buflen);
- err = -EINVAL;
+ err = dma_mapping_error(dd->dev, dd->dma_addr_in);
+ if (err) {
+ dev_dbg(dd->dev, "dma %zd bytes error\n", dd->buflen);
goto err_map_in;
}
dd->dma_addr_out = dma_map_single(dd->dev, dd->buf_out,
dd->buflen, DMA_FROM_DEVICE);
- if (dma_mapping_error(dd->dev, dd->dma_addr_out)) {
- dev_err(dd->dev, "dma %zd bytes error\n", dd->buflen);
- err = -EINVAL;
+ err = dma_mapping_error(dd->dev, dd->dma_addr_out);
+ if (err) {
+ dev_dbg(dd->dev, "dma %zd bytes error\n", dd->buflen);
goto err_map_out;
}
@@ -367,8 +359,6 @@ err_map_in:
err_alloc:
free_page((unsigned long)dd->buf_out);
free_page((unsigned long)dd->buf_in);
- if (err)
- pr_err("error: %d\n", err);
return err;
}
@@ -520,14 +510,14 @@ static int atmel_tdes_crypt_start(struct atmel_tdes_dev *dd)
err = dma_map_sg(dd->dev, dd->in_sg, 1, DMA_TO_DEVICE);
if (!err) {
- dev_err(dd->dev, "dma_map_sg() error\n");
+ dev_dbg(dd->dev, "dma_map_sg() error\n");
return -EINVAL;
}
err = dma_map_sg(dd->dev, dd->out_sg, 1,
DMA_FROM_DEVICE);
if (!err) {
- dev_err(dd->dev, "dma_map_sg() error\n");
+ dev_dbg(dd->dev, "dma_map_sg() error\n");
dma_unmap_sg(dd->dev, dd->in_sg, 1,
DMA_TO_DEVICE);
return -EINVAL;
@@ -646,7 +636,6 @@ static int atmel_tdes_handle_queue(struct atmel_tdes_dev *dd,
rctx->mode &= TDES_FLAGS_MODE_MASK;
dd->flags = (dd->flags & ~TDES_FLAGS_MODE_MASK) | rctx->mode;
dd->ctx = ctx;
- ctx->dd = dd;
err = atmel_tdes_write_ctrl(dd);
if (!err)
@@ -679,7 +668,7 @@ static int atmel_tdes_crypt_dma_stop(struct atmel_tdes_dev *dd)
dd->buf_out, dd->buflen, dd->dma_size, 1);
if (count != dd->dma_size) {
err = -EINVAL;
- pr_err("not all data converted: %zu\n", count);
+ dev_dbg(dd->dev, "not all data converted: %zu\n", count);
}
}
}
@@ -691,11 +680,15 @@ static int atmel_tdes_crypt(struct skcipher_request *req, unsigned long mode)
struct crypto_skcipher *skcipher = crypto_skcipher_reqtfm(req);
struct atmel_tdes_ctx *ctx = crypto_skcipher_ctx(skcipher);
struct atmel_tdes_reqctx *rctx = skcipher_request_ctx(req);
+ struct device *dev = ctx->dd->dev;
+
+ if (!req->cryptlen)
+ return 0;
switch (mode & TDES_FLAGS_OPMODE_MASK) {
case TDES_FLAGS_CFB8:
if (!IS_ALIGNED(req->cryptlen, CFB8_BLOCK_SIZE)) {
- pr_err("request size is not exact amount of CFB8 blocks\n");
+ dev_dbg(dev, "request size is not exact amount of CFB8 blocks\n");
return -EINVAL;
}
ctx->block_size = CFB8_BLOCK_SIZE;
@@ -703,7 +696,7 @@ static int atmel_tdes_crypt(struct skcipher_request *req, unsigned long mode)
case TDES_FLAGS_CFB16:
if (!IS_ALIGNED(req->cryptlen, CFB16_BLOCK_SIZE)) {
- pr_err("request size is not exact amount of CFB16 blocks\n");
+ dev_dbg(dev, "request size is not exact amount of CFB16 blocks\n");
return -EINVAL;
}
ctx->block_size = CFB16_BLOCK_SIZE;
@@ -711,7 +704,7 @@ static int atmel_tdes_crypt(struct skcipher_request *req, unsigned long mode)
case TDES_FLAGS_CFB32:
if (!IS_ALIGNED(req->cryptlen, CFB32_BLOCK_SIZE)) {
- pr_err("request size is not exact amount of CFB32 blocks\n");
+ dev_dbg(dev, "request size is not exact amount of CFB32 blocks\n");
return -EINVAL;
}
ctx->block_size = CFB32_BLOCK_SIZE;
@@ -719,7 +712,7 @@ static int atmel_tdes_crypt(struct skcipher_request *req, unsigned long mode)
default:
if (!IS_ALIGNED(req->cryptlen, DES_BLOCK_SIZE)) {
- pr_err("request size is not exact amount of DES blocks\n");
+ dev_dbg(dev, "request size is not exact amount of DES blocks\n");
return -EINVAL;
}
ctx->block_size = DES_BLOCK_SIZE;
@@ -897,14 +890,13 @@ static int atmel_tdes_ofb_decrypt(struct skcipher_request *req)
static int atmel_tdes_init_tfm(struct crypto_skcipher *tfm)
{
struct atmel_tdes_ctx *ctx = crypto_skcipher_ctx(tfm);
- struct atmel_tdes_dev *dd;
-
- crypto_skcipher_set_reqsize(tfm, sizeof(struct atmel_tdes_reqctx));
- dd = atmel_tdes_find_dev(ctx);
- if (!dd)
+ ctx->dd = atmel_tdes_dev_alloc();
+ if (!ctx->dd)
return -ENODEV;
+ crypto_skcipher_set_reqsize(tfm, sizeof(struct atmel_tdes_reqctx));
+
return 0;
}
@@ -999,7 +991,7 @@ static struct skcipher_alg tdes_algs[] = {
{
.base.cra_name = "ofb(des)",
.base.cra_driver_name = "atmel-ofb-des",
- .base.cra_blocksize = DES_BLOCK_SIZE,
+ .base.cra_blocksize = 1,
.base.cra_alignmask = 0x7,
.min_keysize = DES_KEY_SIZE,
diff --git a/drivers/crypto/ccp/sev-dev.c b/drivers/crypto/ccp/sev-dev.c
index 91808402e0bf..2ecb0e1f65d8 100644
--- a/drivers/crypto/ccp/sev-dev.c
+++ b/drivers/crypto/ccp/sev-dev.c
@@ -300,6 +300,9 @@ static int __sev_platform_shutdown_locked(int *error)
struct sev_device *sev = psp_master->sev_data;
int ret;
+ if (sev->state == SEV_STATE_UNINIT)
+ return 0;
+
ret = __sev_do_cmd_locked(SEV_CMD_SHUTDOWN, NULL, error);
if (ret)
return ret;
@@ -1019,6 +1022,20 @@ e_err:
return ret;
}
+static void sev_firmware_shutdown(struct sev_device *sev)
+{
+ sev_platform_shutdown(NULL);
+
+ if (sev_es_tmr) {
+ /* The TMR area was encrypted, flush it from the cache */
+ wbinvd_on_all_cpus();
+
+ free_pages((unsigned long)sev_es_tmr,
+ get_order(SEV_ES_TMR_SIZE));
+ sev_es_tmr = NULL;
+ }
+}
+
void sev_dev_destroy(struct psp_device *psp)
{
struct sev_device *sev = psp->sev_data;
@@ -1026,6 +1043,8 @@ void sev_dev_destroy(struct psp_device *psp)
if (!sev)
return;
+ sev_firmware_shutdown(sev);
+
if (sev->misc)
kref_put(&misc_dev->refcount, sev_exit);
@@ -1056,21 +1075,6 @@ void sev_pci_init(void)
if (sev_get_api_version())
goto err;
- /*
- * If platform is not in UNINIT state then firmware upgrade and/or
- * platform INIT command will fail. These command require UNINIT state.
- *
- * In a normal boot we should never run into case where the firmware
- * is not in UNINIT state on boot. But in case of kexec boot, a reboot
- * may not go through a typical shutdown sequence and may leave the
- * firmware in INIT or WORKING state.
- */
-
- if (sev->state != SEV_STATE_UNINIT) {
- sev_platform_shutdown(NULL);
- sev->state = SEV_STATE_UNINIT;
- }
-
if (sev_version_greater_or_equal(0, 15) &&
sev_update_firmware(sev->dev) == 0)
sev_get_api_version();
@@ -1115,17 +1119,10 @@ err:
void sev_pci_exit(void)
{
- if (!psp_master->sev_data)
- return;
-
- sev_platform_shutdown(NULL);
+ struct sev_device *sev = psp_master->sev_data;
- if (sev_es_tmr) {
- /* The TMR area was encrypted, flush it from the cache */
- wbinvd_on_all_cpus();
+ if (!sev)
+ return;
- free_pages((unsigned long)sev_es_tmr,
- get_order(SEV_ES_TMR_SIZE));
- sev_es_tmr = NULL;
- }
+ sev_firmware_shutdown(sev);
}
diff --git a/drivers/crypto/ccp/sp-pci.c b/drivers/crypto/ccp/sp-pci.c
index 6fb6ba35f89d..88c672ad27e4 100644
--- a/drivers/crypto/ccp/sp-pci.c
+++ b/drivers/crypto/ccp/sp-pci.c
@@ -241,6 +241,17 @@ e_err:
return ret;
}
+static void sp_pci_shutdown(struct pci_dev *pdev)
+{
+ struct device *dev = &pdev->dev;
+ struct sp_device *sp = dev_get_drvdata(dev);
+
+ if (!sp)
+ return;
+
+ sp_destroy(sp);
+}
+
static void sp_pci_remove(struct pci_dev *pdev)
{
struct device *dev = &pdev->dev;
@@ -351,6 +362,12 @@ static const struct sp_dev_vdata dev_vdata[] = {
.psp_vdata = &pspv3,
#endif
},
+ { /* 5 */
+ .bar = 2,
+#ifdef CONFIG_CRYPTO_DEV_SP_PSP
+ .psp_vdata = &pspv2,
+#endif
+ },
};
static const struct pci_device_id sp_pci_table[] = {
{ PCI_VDEVICE(AMD, 0x1537), (kernel_ulong_t)&dev_vdata[0] },
@@ -359,6 +376,7 @@ static const struct pci_device_id sp_pci_table[] = {
{ PCI_VDEVICE(AMD, 0x1486), (kernel_ulong_t)&dev_vdata[3] },
{ PCI_VDEVICE(AMD, 0x15DF), (kernel_ulong_t)&dev_vdata[4] },
{ PCI_VDEVICE(AMD, 0x1649), (kernel_ulong_t)&dev_vdata[4] },
+ { PCI_VDEVICE(AMD, 0x14CA), (kernel_ulong_t)&dev_vdata[5] },
/* Last entry must be zero */
{ 0, }
};
@@ -371,6 +389,7 @@ static struct pci_driver sp_pci_driver = {
.id_table = sp_pci_table,
.probe = sp_pci_probe,
.remove = sp_pci_remove,
+ .shutdown = sp_pci_shutdown,
.driver.pm = &sp_pci_pm_ops,
};
diff --git a/drivers/crypto/hisilicon/hpre/hpre_main.c b/drivers/crypto/hisilicon/hpre/hpre_main.c
index 8b0640fb04be..65a641396c07 100644
--- a/drivers/crypto/hisilicon/hpre/hpre_main.c
+++ b/drivers/crypto/hisilicon/hpre/hpre_main.c
@@ -9,6 +9,7 @@
#include <linux/kernel.h>
#include <linux/module.h>
#include <linux/pci.h>
+#include <linux/pm_runtime.h>
#include <linux/topology.h>
#include <linux/uacce.h>
#include "hpre.h"
@@ -81,6 +82,16 @@
#define HPRE_PREFETCH_DISABLE BIT(30)
#define HPRE_SVA_DISABLE_READY (BIT(4) | BIT(8))
+/* clock gate */
+#define HPRE_CLKGATE_CTL 0x301a10
+#define HPRE_PEH_CFG_AUTO_GATE 0x301a2c
+#define HPRE_CLUSTER_DYN_CTL 0x302010
+#define HPRE_CORE_SHB_CFG 0x302088
+#define HPRE_CLKGATE_CTL_EN BIT(0)
+#define HPRE_PEH_CFG_AUTO_GATE_EN BIT(0)
+#define HPRE_CLUSTER_DYN_CTL_EN BIT(0)
+#define HPRE_CORE_GATE_EN (BIT(30) | BIT(31))
+
#define HPRE_AM_OOO_SHUTDOWN_ENB 0x301044
#define HPRE_AM_OOO_SHUTDOWN_ENABLE BIT(0)
#define HPRE_WR_MSI_PORT BIT(2)
@@ -417,12 +428,63 @@ static void hpre_close_sva_prefetch(struct hisi_qm *qm)
pci_err(qm->pdev, "failed to close sva prefetch\n");
}
+static void hpre_enable_clock_gate(struct hisi_qm *qm)
+{
+ u32 val;
+
+ if (qm->ver < QM_HW_V3)
+ return;
+
+ val = readl(qm->io_base + HPRE_CLKGATE_CTL);
+ val |= HPRE_CLKGATE_CTL_EN;
+ writel(val, qm->io_base + HPRE_CLKGATE_CTL);
+
+ val = readl(qm->io_base + HPRE_PEH_CFG_AUTO_GATE);
+ val |= HPRE_PEH_CFG_AUTO_GATE_EN;
+ writel(val, qm->io_base + HPRE_PEH_CFG_AUTO_GATE);
+
+ val = readl(qm->io_base + HPRE_CLUSTER_DYN_CTL);
+ val |= HPRE_CLUSTER_DYN_CTL_EN;
+ writel(val, qm->io_base + HPRE_CLUSTER_DYN_CTL);
+
+ val = readl_relaxed(qm->io_base + HPRE_CORE_SHB_CFG);
+ val |= HPRE_CORE_GATE_EN;
+ writel(val, qm->io_base + HPRE_CORE_SHB_CFG);
+}
+
+static void hpre_disable_clock_gate(struct hisi_qm *qm)
+{
+ u32 val;
+
+ if (qm->ver < QM_HW_V3)
+ return;
+
+ val = readl(qm->io_base + HPRE_CLKGATE_CTL);
+ val &= ~HPRE_CLKGATE_CTL_EN;
+ writel(val, qm->io_base + HPRE_CLKGATE_CTL);
+
+ val = readl(qm->io_base + HPRE_PEH_CFG_AUTO_GATE);
+ val &= ~HPRE_PEH_CFG_AUTO_GATE_EN;
+ writel(val, qm->io_base + HPRE_PEH_CFG_AUTO_GATE);
+
+ val = readl(qm->io_base + HPRE_CLUSTER_DYN_CTL);
+ val &= ~HPRE_CLUSTER_DYN_CTL_EN;
+ writel(val, qm->io_base + HPRE_CLUSTER_DYN_CTL);
+
+ val = readl_relaxed(qm->io_base + HPRE_CORE_SHB_CFG);
+ val &= ~HPRE_CORE_GATE_EN;
+ writel(val, qm->io_base + HPRE_CORE_SHB_CFG);
+}
+
static int hpre_set_user_domain_and_cache(struct hisi_qm *qm)
{
struct device *dev = &qm->pdev->dev;
u32 val;
int ret;
+ /* disabel dynamic clock gate before sram init */
+ hpre_disable_clock_gate(qm);
+
writel(HPRE_QM_USR_CFG_MASK, qm->io_base + QM_ARUSER_M_CFG_ENABLE);
writel(HPRE_QM_USR_CFG_MASK, qm->io_base + QM_AWUSER_M_CFG_ENABLE);
writel_relaxed(HPRE_QM_AXI_CFG_MASK, qm->io_base + QM_AXI_M_CFG);
@@ -473,6 +535,8 @@ static int hpre_set_user_domain_and_cache(struct hisi_qm *qm)
/* Config data buffer pasid needed by Kunpeng 920 */
hpre_config_pasid(qm);
+ hpre_enable_clock_gate(qm);
+
return ret;
}
@@ -595,10 +659,15 @@ static ssize_t hpre_ctrl_debug_read(struct file *filp, char __user *buf,
size_t count, loff_t *pos)
{
struct hpre_debugfs_file *file = filp->private_data;
+ struct hisi_qm *qm = hpre_file_to_qm(file);
char tbuf[HPRE_DBGFS_VAL_MAX_LEN];
u32 val;
int ret;
+ ret = hisi_qm_get_dfx_access(qm);
+ if (ret)
+ return ret;
+
spin_lock_irq(&file->lock);
switch (file->type) {
case HPRE_CLEAR_ENABLE:
@@ -608,18 +677,25 @@ static ssize_t hpre_ctrl_debug_read(struct file *filp, char __user *buf,
val = hpre_cluster_inqry_read(file);
break;
default:
- spin_unlock_irq(&file->lock);
- return -EINVAL;
+ goto err_input;
}
spin_unlock_irq(&file->lock);
+
+ hisi_qm_put_dfx_access(qm);
ret = snprintf(tbuf, HPRE_DBGFS_VAL_MAX_LEN, "%u\n", val);
return simple_read_from_buffer(buf, count, pos, tbuf, ret);
+
+err_input:
+ spin_unlock_irq(&file->lock);
+ hisi_qm_put_dfx_access(qm);
+ return -EINVAL;
}
static ssize_t hpre_ctrl_debug_write(struct file *filp, const char __user *buf,
size_t count, loff_t *pos)
{
struct hpre_debugfs_file *file = filp->private_data;
+ struct hisi_qm *qm = hpre_file_to_qm(file);
char tbuf[HPRE_DBGFS_VAL_MAX_LEN];
unsigned long val;
int len, ret;
@@ -639,6 +715,10 @@ static ssize_t hpre_ctrl_debug_write(struct file *filp, const char __user *buf,
if (kstrtoul(tbuf, 0, &val))
return -EFAULT;
+ ret = hisi_qm_get_dfx_access(qm);
+ if (ret)
+ return ret;
+
spin_lock_irq(&file->lock);
switch (file->type) {
case HPRE_CLEAR_ENABLE:
@@ -655,12 +735,12 @@ static ssize_t hpre_ctrl_debug_write(struct file *filp, const char __user *buf,
ret = -EINVAL;
goto err_input;
}
- spin_unlock_irq(&file->lock);
- return count;
+ ret = count;
err_input:
spin_unlock_irq(&file->lock);
+ hisi_qm_put_dfx_access(qm);
return ret;
}
@@ -700,6 +780,24 @@ static int hpre_debugfs_atomic64_set(void *data, u64 val)
DEFINE_DEBUGFS_ATTRIBUTE(hpre_atomic64_ops, hpre_debugfs_atomic64_get,
hpre_debugfs_atomic64_set, "%llu\n");
+static int hpre_com_regs_show(struct seq_file *s, void *unused)
+{
+ hisi_qm_regs_dump(s, s->private);
+
+ return 0;
+}
+
+DEFINE_SHOW_ATTRIBUTE(hpre_com_regs);
+
+static int hpre_cluster_regs_show(struct seq_file *s, void *unused)
+{
+ hisi_qm_regs_dump(s, s->private);
+
+ return 0;
+}
+
+DEFINE_SHOW_ATTRIBUTE(hpre_cluster_regs);
+
static int hpre_create_debugfs_file(struct hisi_qm *qm, struct dentry *dir,
enum hpre_ctrl_dbgfs_file type, int indx)
{
@@ -737,8 +835,11 @@ static int hpre_pf_comm_regs_debugfs_init(struct hisi_qm *qm)
regset->regs = hpre_com_dfx_regs;
regset->nregs = ARRAY_SIZE(hpre_com_dfx_regs);
regset->base = qm->io_base;
+ regset->dev = dev;
+
+ debugfs_create_file("regs", 0444, qm->debug.debug_root,
+ regset, &hpre_com_regs_fops);
- debugfs_create_regset32("regs", 0444, qm->debug.debug_root, regset);
return 0;
}
@@ -764,8 +865,10 @@ static int hpre_cluster_debugfs_init(struct hisi_qm *qm)
regset->regs = hpre_cluster_dfx_regs;
regset->nregs = ARRAY_SIZE(hpre_cluster_dfx_regs);
regset->base = qm->io_base + hpre_cluster_offsets[i];
+ regset->dev = dev;
- debugfs_create_regset32("regs", 0444, tmp_d, regset);
+ debugfs_create_file("regs", 0444, tmp_d, regset,
+ &hpre_cluster_regs_fops);
ret = hpre_create_debugfs_file(qm, tmp_d, HPRE_CLUSTER_CTRL,
i + HPRE_CLUSTER_CTRL);
if (ret)
@@ -1017,6 +1120,8 @@ static int hpre_probe(struct pci_dev *pdev, const struct pci_device_id *id)
goto err_with_alg_register;
}
+ hisi_qm_pm_init(qm);
+
return 0;
err_with_alg_register:
@@ -1040,6 +1145,7 @@ static void hpre_remove(struct pci_dev *pdev)
struct hisi_qm *qm = pci_get_drvdata(pdev);
int ret;
+ hisi_qm_pm_uninit(qm);
hisi_qm_wait_task_finish(qm, &hpre_devices);
hisi_qm_alg_unregister(qm, &hpre_devices);
if (qm->fun_type == QM_HW_PF && qm->vfs_num) {
@@ -1062,6 +1168,10 @@ static void hpre_remove(struct pci_dev *pdev)
hisi_qm_uninit(qm);
}
+static const struct dev_pm_ops hpre_pm_ops = {
+ SET_RUNTIME_PM_OPS(hisi_qm_suspend, hisi_qm_resume, NULL)
+};
+
static const struct pci_error_handlers hpre_err_handler = {
.error_detected = hisi_qm_dev_err_detected,
.slot_reset = hisi_qm_dev_slot_reset,
@@ -1078,6 +1188,7 @@ static struct pci_driver hpre_pci_driver = {
hisi_qm_sriov_configure : NULL,
.err_handler = &hpre_err_handler,
.shutdown = hisi_qm_dev_shutdown,
+ .driver.pm = &hpre_pm_ops,
};
static void hpre_register_debugfs(void)
diff --git a/drivers/crypto/hisilicon/qm.c b/drivers/crypto/hisilicon/qm.c
index 1d67f94a1d56..369562d34d66 100644
--- a/drivers/crypto/hisilicon/qm.c
+++ b/drivers/crypto/hisilicon/qm.c
@@ -4,12 +4,12 @@
#include <linux/acpi.h>
#include <linux/aer.h>
#include <linux/bitmap.h>
-#include <linux/debugfs.h>
#include <linux/dma-mapping.h>
#include <linux/idr.h>
#include <linux/io.h>
#include <linux/irqreturn.h>
#include <linux/log2.h>
+#include <linux/pm_runtime.h>
#include <linux/seq_file.h>
#include <linux/slab.h>
#include <linux/uacce.h>
@@ -270,6 +270,8 @@
#define QM_QOS_MAX_CIR_S 11
#define QM_QOS_VAL_MAX_LEN 32
+#define QM_AUTOSUSPEND_DELAY 3000
+
#define QM_MK_CQC_DW3_V1(hop_num, pg_sz, buf_sz, cqe_sz) \
(((hop_num) << QM_CQ_HOP_NUM_SHIFT) | \
((pg_sz) << QM_CQ_PAGE_SIZE_SHIFT) | \
@@ -734,6 +736,34 @@ static u32 qm_get_irq_num_v3(struct hisi_qm *qm)
return QM_IRQ_NUM_VF_V3;
}
+static int qm_pm_get_sync(struct hisi_qm *qm)
+{
+ struct device *dev = &qm->pdev->dev;
+ int ret;
+
+ if (qm->fun_type == QM_HW_VF || qm->ver < QM_HW_V3)
+ return 0;
+
+ ret = pm_runtime_resume_and_get(dev);
+ if (ret < 0) {
+ dev_err(dev, "failed to get_sync(%d).\n", ret);
+ return ret;
+ }
+
+ return 0;
+}
+
+static void qm_pm_put_sync(struct hisi_qm *qm)
+{
+ struct device *dev = &qm->pdev->dev;
+
+ if (qm->fun_type == QM_HW_VF || qm->ver < QM_HW_V3)
+ return;
+
+ pm_runtime_mark_last_busy(dev);
+ pm_runtime_put_autosuspend(dev);
+}
+
static struct hisi_qp *qm_to_hisi_qp(struct hisi_qm *qm, struct qm_eqe *eqe)
{
u16 cqn = le32_to_cpu(eqe->dw0) & QM_EQE_CQN_MASK;
@@ -1173,16 +1203,13 @@ static struct hisi_qm *file_to_qm(struct debugfs_file *file)
return container_of(debug, struct hisi_qm, debug);
}
-static u32 current_q_read(struct debugfs_file *file)
+static u32 current_q_read(struct hisi_qm *qm)
{
- struct hisi_qm *qm = file_to_qm(file);
-
return readl(qm->io_base + QM_DFX_SQE_CNT_VF_SQN) >> QM_DFX_QN_SHIFT;
}
-static int current_q_write(struct debugfs_file *file, u32 val)
+static int current_q_write(struct hisi_qm *qm, u32 val)
{
- struct hisi_qm *qm = file_to_qm(file);
u32 tmp;
if (val >= qm->debug.curr_qm_qp_num)
@@ -1199,18 +1226,14 @@ static int current_q_write(struct debugfs_file *file, u32 val)
return 0;
}
-static u32 clear_enable_read(struct debugfs_file *file)
+static u32 clear_enable_read(struct hisi_qm *qm)
{
- struct hisi_qm *qm = file_to_qm(file);
-
return readl(qm->io_base + QM_DFX_CNT_CLR_CE);
}
/* rd_clr_ctrl 1 enable read clear, otherwise 0 disable it */
-static int clear_enable_write(struct debugfs_file *file, u32 rd_clr_ctrl)
+static int clear_enable_write(struct hisi_qm *qm, u32 rd_clr_ctrl)
{
- struct hisi_qm *qm = file_to_qm(file);
-
if (rd_clr_ctrl > 1)
return -EINVAL;
@@ -1219,16 +1242,13 @@ static int clear_enable_write(struct debugfs_file *file, u32 rd_clr_ctrl)
return 0;
}
-static u32 current_qm_read(struct debugfs_file *file)
+static u32 current_qm_read(struct hisi_qm *qm)
{
- struct hisi_qm *qm = file_to_qm(file);
-
return readl(qm->io_base + QM_DFX_MB_CNT_VF);
}
-static int current_qm_write(struct debugfs_file *file, u32 val)
+static int current_qm_write(struct hisi_qm *qm, u32 val)
{
- struct hisi_qm *qm = file_to_qm(file);
u32 tmp;
if (val > qm->vfs_num)
@@ -1259,29 +1279,39 @@ static ssize_t qm_debug_read(struct file *filp, char __user *buf,
{
struct debugfs_file *file = filp->private_data;
enum qm_debug_file index = file->index;
+ struct hisi_qm *qm = file_to_qm(file);
char tbuf[QM_DBG_TMP_BUF_LEN];
u32 val;
int ret;
+ ret = hisi_qm_get_dfx_access(qm);
+ if (ret)
+ return ret;
+
mutex_lock(&file->lock);
switch (index) {
case CURRENT_QM:
- val = current_qm_read(file);
+ val = current_qm_read(qm);
break;
case CURRENT_Q:
- val = current_q_read(file);
+ val = current_q_read(qm);
break;
case CLEAR_ENABLE:
- val = clear_enable_read(file);
+ val = clear_enable_read(qm);
break;
default:
- mutex_unlock(&file->lock);
- return -EINVAL;
+ goto err_input;
}
mutex_unlock(&file->lock);
+ hisi_qm_put_dfx_access(qm);
ret = scnprintf(tbuf, QM_DBG_TMP_BUF_LEN, "%u\n", val);
return simple_read_from_buffer(buf, count, pos, tbuf, ret);
+
+err_input:
+ mutex_unlock(&file->lock);
+ hisi_qm_put_dfx_access(qm);
+ return -EINVAL;
}
static ssize_t qm_debug_write(struct file *filp, const char __user *buf,
@@ -1289,6 +1319,7 @@ static ssize_t qm_debug_write(struct file *filp, const char __user *buf,
{
struct debugfs_file *file = filp->private_data;
enum qm_debug_file index = file->index;
+ struct hisi_qm *qm = file_to_qm(file);
unsigned long val;
char tbuf[QM_DBG_TMP_BUF_LEN];
int len, ret;
@@ -1308,22 +1339,28 @@ static ssize_t qm_debug_write(struct file *filp, const char __user *buf,
if (kstrtoul(tbuf, 0, &val))
return -EFAULT;
+ ret = hisi_qm_get_dfx_access(qm);
+ if (ret)
+ return ret;
+
mutex_lock(&file->lock);
switch (index) {
case CURRENT_QM:
- ret = current_qm_write(file, val);
+ ret = current_qm_write(qm, val);
break;
case CURRENT_Q:
- ret = current_q_write(file, val);
+ ret = current_q_write(qm, val);
break;
case CLEAR_ENABLE:
- ret = clear_enable_write(file, val);
+ ret = clear_enable_write(qm, val);
break;
default:
ret = -EINVAL;
}
mutex_unlock(&file->lock);
+ hisi_qm_put_dfx_access(qm);
+
if (ret)
return ret;
@@ -1337,13 +1374,8 @@ static const struct file_operations qm_debug_fops = {
.write = qm_debug_write,
};
-struct qm_dfx_registers {
- char *reg_name;
- u64 reg_offset;
-};
-
#define CNT_CYC_REGS_NUM 10
-static struct qm_dfx_registers qm_dfx_regs[] = {
+static const struct debugfs_reg32 qm_dfx_regs[] = {
/* XXX_CNT are reading clear register */
{"QM_ECC_1BIT_CNT ", 0x104000ull},
{"QM_ECC_MBIT_CNT ", 0x104008ull},
@@ -1369,31 +1401,59 @@ static struct qm_dfx_registers qm_dfx_regs[] = {
{"QM_DFX_FF_ST5 ", 0x1040dcull},
{"QM_DFX_FF_ST6 ", 0x1040e0ull},
{"QM_IN_IDLE_ST ", 0x1040e4ull},
- { NULL, 0}
};
-static struct qm_dfx_registers qm_vf_dfx_regs[] = {
+static const struct debugfs_reg32 qm_vf_dfx_regs[] = {
{"QM_DFX_FUNS_ACTIVE_ST ", 0x200ull},
- { NULL, 0}
};
-static int qm_regs_show(struct seq_file *s, void *unused)
+/**
+ * hisi_qm_regs_dump() - Dump registers's value.
+ * @s: debugfs file handle.
+ * @regset: accelerator registers information.
+ *
+ * Dump accelerator registers.
+ */
+void hisi_qm_regs_dump(struct seq_file *s, struct debugfs_regset32 *regset)
{
- struct hisi_qm *qm = s->private;
- struct qm_dfx_registers *regs;
+ struct pci_dev *pdev = to_pci_dev(regset->dev);
+ struct hisi_qm *qm = pci_get_drvdata(pdev);
+ const struct debugfs_reg32 *regs = regset->regs;
+ int regs_len = regset->nregs;
+ int i, ret;
u32 val;
- if (qm->fun_type == QM_HW_PF)
- regs = qm_dfx_regs;
- else
- regs = qm_vf_dfx_regs;
+ ret = hisi_qm_get_dfx_access(qm);
+ if (ret)
+ return;
- while (regs->reg_name) {
- val = readl(qm->io_base + regs->reg_offset);
- seq_printf(s, "%s= 0x%08x\n", regs->reg_name, val);
- regs++;
+ for (i = 0; i < regs_len; i++) {
+ val = readl(regset->base + regs[i].offset);
+ seq_printf(s, "%s= 0x%08x\n", regs[i].name, val);
}
+ hisi_qm_put_dfx_access(qm);
+}
+EXPORT_SYMBOL_GPL(hisi_qm_regs_dump);
+
+static int qm_regs_show(struct seq_file *s, void *unused)
+{
+ struct hisi_qm *qm = s->private;
+ struct debugfs_regset32 regset;
+
+ if (qm->fun_type == QM_HW_PF) {
+ regset.regs = qm_dfx_regs;
+ regset.nregs = ARRAY_SIZE(qm_dfx_regs);
+ } else {
+ regset.regs = qm_vf_dfx_regs;
+ regset.nregs = ARRAY_SIZE(qm_vf_dfx_regs);
+ }
+
+ regset.base = qm->io_base;
+ regset.dev = &qm->pdev->dev;
+
+ hisi_qm_regs_dump(s, &regset);
+
return 0;
}
@@ -1823,16 +1883,24 @@ static ssize_t qm_cmd_write(struct file *filp, const char __user *buffer,
if (*pos)
return 0;
+ ret = hisi_qm_get_dfx_access(qm);
+ if (ret)
+ return ret;
+
/* Judge if the instance is being reset. */
if (unlikely(atomic_read(&qm->status.flags) == QM_STOP))
return 0;
- if (count > QM_DBG_WRITE_LEN)
- return -ENOSPC;
+ if (count > QM_DBG_WRITE_LEN) {
+ ret = -ENOSPC;
+ goto put_dfx_access;
+ }
cmd_buf = memdup_user_nul(buffer, count);
- if (IS_ERR(cmd_buf))
- return PTR_ERR(cmd_buf);
+ if (IS_ERR(cmd_buf)) {
+ ret = PTR_ERR(cmd_buf);
+ goto put_dfx_access;
+ }
cmd_buf_tmp = strchr(cmd_buf, '\n');
if (cmd_buf_tmp) {
@@ -1843,12 +1911,16 @@ static ssize_t qm_cmd_write(struct file *filp, const char __user *buffer,
ret = qm_cmd_write_dump(qm, cmd_buf);
if (ret) {
kfree(cmd_buf);
- return ret;
+ goto put_dfx_access;
}
kfree(cmd_buf);
- return count;
+ ret = count;
+
+put_dfx_access:
+ hisi_qm_put_dfx_access(qm);
+ return ret;
}
static const struct file_operations qm_cmd_fops = {
@@ -2445,11 +2517,19 @@ static struct hisi_qp *qm_create_qp_nolock(struct hisi_qm *qm, u8 alg_type)
struct hisi_qp *hisi_qm_create_qp(struct hisi_qm *qm, u8 alg_type)
{
struct hisi_qp *qp;
+ int ret;
+
+ ret = qm_pm_get_sync(qm);
+ if (ret)
+ return ERR_PTR(ret);
down_write(&qm->qps_lock);
qp = qm_create_qp_nolock(qm, alg_type);
up_write(&qm->qps_lock);
+ if (IS_ERR(qp))
+ qm_pm_put_sync(qm);
+
return qp;
}
EXPORT_SYMBOL_GPL(hisi_qm_create_qp);
@@ -2475,6 +2555,8 @@ void hisi_qm_release_qp(struct hisi_qp *qp)
idr_remove(&qm->qp_idr, qp->qp_id);
up_write(&qm->qps_lock);
+
+ qm_pm_put_sync(qm);
}
EXPORT_SYMBOL_GPL(hisi_qm_release_qp);
@@ -3200,6 +3282,10 @@ static void hisi_qm_pre_init(struct hisi_qm *qm)
init_rwsem(&qm->qps_lock);
qm->qp_in_used = 0;
qm->misc_ctl = false;
+ if (qm->fun_type == QM_HW_PF && qm->ver > QM_HW_V2) {
+ if (!acpi_device_power_manageable(ACPI_COMPANION(&pdev->dev)))
+ dev_info(&pdev->dev, "_PS0 and _PR0 are not defined");
+ }
}
static void qm_cmd_uninit(struct hisi_qm *qm)
@@ -4057,10 +4143,15 @@ static ssize_t qm_algqos_read(struct file *filp, char __user *buf,
u32 qos_val, ir;
int ret;
+ ret = hisi_qm_get_dfx_access(qm);
+ if (ret)
+ return ret;
+
/* Mailbox and reset cannot be operated at the same time */
if (test_and_set_bit(QM_RESETTING, &qm->misc_ctl)) {
pci_err(qm->pdev, "dev resetting, read alg qos failed!\n");
- return -EAGAIN;
+ ret = -EAGAIN;
+ goto err_put_dfx_access;
}
if (qm->fun_type == QM_HW_PF) {
@@ -4079,6 +4170,8 @@ static ssize_t qm_algqos_read(struct file *filp, char __user *buf,
err_get_status:
clear_bit(QM_RESETTING, &qm->misc_ctl);
+err_put_dfx_access:
+ hisi_qm_put_dfx_access(qm);
return ret;
}
@@ -4159,15 +4252,23 @@ static ssize_t qm_algqos_write(struct file *filp, const char __user *buf,
fun_index = device * 8 + function;
+ ret = qm_pm_get_sync(qm);
+ if (ret) {
+ ret = -EINVAL;
+ goto err_get_status;
+ }
+
ret = qm_func_shaper_enable(qm, fun_index, val);
if (ret) {
pci_err(qm->pdev, "failed to enable function shaper!\n");
ret = -EINVAL;
- goto err_get_status;
+ goto err_put_sync;
}
- ret = count;
+ ret = count;
+err_put_sync:
+ qm_pm_put_sync(qm);
err_get_status:
clear_bit(QM_RESETTING, &qm->misc_ctl);
return ret;
@@ -4245,7 +4346,7 @@ EXPORT_SYMBOL_GPL(hisi_qm_debug_init);
*/
void hisi_qm_debug_regs_clear(struct hisi_qm *qm)
{
- struct qm_dfx_registers *regs;
+ const struct debugfs_reg32 *regs;
int i;
/* clear current_qm */
@@ -4264,7 +4365,7 @@ void hisi_qm_debug_regs_clear(struct hisi_qm *qm)
regs = qm_dfx_regs;
for (i = 0; i < CNT_CYC_REGS_NUM; i++) {
- readl(qm->io_base + regs->reg_offset);
+ readl(qm->io_base + regs->offset);
regs++;
}
@@ -4287,19 +4388,23 @@ int hisi_qm_sriov_enable(struct pci_dev *pdev, int max_vfs)
struct hisi_qm *qm = pci_get_drvdata(pdev);
int pre_existing_vfs, num_vfs, total_vfs, ret;
+ ret = qm_pm_get_sync(qm);
+ if (ret)
+ return ret;
+
total_vfs = pci_sriov_get_totalvfs(pdev);
pre_existing_vfs = pci_num_vf(pdev);
if (pre_existing_vfs) {
pci_err(pdev, "%d VFs already enabled. Please disable pre-enabled VFs!\n",
pre_existing_vfs);
- return 0;
+ goto err_put_sync;
}
num_vfs = min_t(int, max_vfs, total_vfs);
ret = qm_vf_q_assign(qm, num_vfs);
if (ret) {
pci_err(pdev, "Can't assign queues for VF!\n");
- return ret;
+ goto err_put_sync;
}
qm->vfs_num = num_vfs;
@@ -4308,12 +4413,16 @@ int hisi_qm_sriov_enable(struct pci_dev *pdev, int max_vfs)
if (ret) {
pci_err(pdev, "Can't enable VF!\n");
qm_clear_vft_config(qm);
- return ret;
+ goto err_put_sync;
}
pci_info(pdev, "VF enabled, vfs_num(=%d)!\n", num_vfs);
return num_vfs;
+
+err_put_sync:
+ qm_pm_put_sync(qm);
+ return ret;
}
EXPORT_SYMBOL_GPL(hisi_qm_sriov_enable);
@@ -4328,6 +4437,7 @@ int hisi_qm_sriov_disable(struct pci_dev *pdev, bool is_frozen)
{
struct hisi_qm *qm = pci_get_drvdata(pdev);
int total_vfs = pci_sriov_get_totalvfs(qm->pdev);
+ int ret;
if (pci_vfs_assigned(pdev)) {
pci_err(pdev, "Failed to disable VFs as VFs are assigned!\n");
@@ -4343,8 +4453,13 @@ int hisi_qm_sriov_disable(struct pci_dev *pdev, bool is_frozen)
pci_disable_sriov(pdev);
/* clear vf function shaper configure array */
memset(qm->factor + 1, 0, sizeof(struct qm_shaper_factor) * total_vfs);
+ ret = qm_clear_vft_config(qm);
+ if (ret)
+ return ret;
- return qm_clear_vft_config(qm);
+ qm_pm_put_sync(qm);
+
+ return 0;
}
EXPORT_SYMBOL_GPL(hisi_qm_sriov_disable);
@@ -5164,11 +5279,18 @@ static void hisi_qm_controller_reset(struct work_struct *rst_work)
struct hisi_qm *qm = container_of(rst_work, struct hisi_qm, rst_work);
int ret;
+ ret = qm_pm_get_sync(qm);
+ if (ret) {
+ clear_bit(QM_RST_SCHED, &qm->misc_ctl);
+ return;
+ }
+
/* reset pcie device controller */
ret = qm_controller_reset(qm);
if (ret)
dev_err(&qm->pdev->dev, "controller reset failed (%d)\n", ret);
+ qm_pm_put_sync(qm);
}
static void qm_pf_reset_vf_prepare(struct hisi_qm *qm,
@@ -5680,6 +5802,194 @@ err_pci_init:
}
EXPORT_SYMBOL_GPL(hisi_qm_init);
+/**
+ * hisi_qm_get_dfx_access() - Try to get dfx access.
+ * @qm: pointer to accelerator device.
+ *
+ * Try to get dfx access, then user can get message.
+ *
+ * If device is in suspended, return failure, otherwise
+ * bump up the runtime PM usage counter.
+ */
+int hisi_qm_get_dfx_access(struct hisi_qm *qm)
+{
+ struct device *dev = &qm->pdev->dev;
+
+ if (pm_runtime_suspended(dev)) {
+ dev_info(dev, "can not read/write - device in suspended.\n");
+ return -EAGAIN;
+ }
+
+ return qm_pm_get_sync(qm);
+}
+EXPORT_SYMBOL_GPL(hisi_qm_get_dfx_access);
+
+/**
+ * hisi_qm_put_dfx_access() - Put dfx access.
+ * @qm: pointer to accelerator device.
+ *
+ * Put dfx access, drop runtime PM usage counter.
+ */
+void hisi_qm_put_dfx_access(struct hisi_qm *qm)
+{
+ qm_pm_put_sync(qm);
+}
+EXPORT_SYMBOL_GPL(hisi_qm_put_dfx_access);
+
+/**
+ * hisi_qm_pm_init() - Initialize qm runtime PM.
+ * @qm: pointer to accelerator device.
+ *
+ * Function that initialize qm runtime PM.
+ */
+void hisi_qm_pm_init(struct hisi_qm *qm)
+{
+ struct device *dev = &qm->pdev->dev;
+
+ if (qm->fun_type == QM_HW_VF || qm->ver < QM_HW_V3)
+ return;
+
+ pm_runtime_set_autosuspend_delay(dev, QM_AUTOSUSPEND_DELAY);
+ pm_runtime_use_autosuspend(dev);
+ pm_runtime_put_noidle(dev);
+}
+EXPORT_SYMBOL_GPL(hisi_qm_pm_init);
+
+/**
+ * hisi_qm_pm_uninit() - Uninitialize qm runtime PM.
+ * @qm: pointer to accelerator device.
+ *
+ * Function that uninitialize qm runtime PM.
+ */
+void hisi_qm_pm_uninit(struct hisi_qm *qm)
+{
+ struct device *dev = &qm->pdev->dev;
+
+ if (qm->fun_type == QM_HW_VF || qm->ver < QM_HW_V3)
+ return;
+
+ pm_runtime_get_noresume(dev);
+ pm_runtime_dont_use_autosuspend(dev);
+}
+EXPORT_SYMBOL_GPL(hisi_qm_pm_uninit);
+
+static int qm_prepare_for_suspend(struct hisi_qm *qm)
+{
+ struct pci_dev *pdev = qm->pdev;
+ int ret;
+ u32 val;
+
+ ret = qm->ops->set_msi(qm, false);
+ if (ret) {
+ pci_err(pdev, "failed to disable MSI before suspending!\n");
+ return ret;
+ }
+
+ /* shutdown OOO register */
+ writel(ACC_MASTER_GLOBAL_CTRL_SHUTDOWN,
+ qm->io_base + ACC_MASTER_GLOBAL_CTRL);
+
+ ret = readl_relaxed_poll_timeout(qm->io_base + ACC_MASTER_TRANS_RETURN,
+ val,
+ (val == ACC_MASTER_TRANS_RETURN_RW),
+ POLL_PERIOD, POLL_TIMEOUT);
+ if (ret) {
+ pci_emerg(pdev, "Bus lock! Please reset system.\n");
+ return ret;
+ }
+
+ ret = qm_set_pf_mse(qm, false);
+ if (ret)
+ pci_err(pdev, "failed to disable MSE before suspending!\n");
+
+ return ret;
+}
+
+static int qm_rebuild_for_resume(struct hisi_qm *qm)
+{
+ struct pci_dev *pdev = qm->pdev;
+ int ret;
+
+ ret = qm_set_pf_mse(qm, true);
+ if (ret) {
+ pci_err(pdev, "failed to enable MSE after resuming!\n");
+ return ret;
+ }
+
+ ret = qm->ops->set_msi(qm, true);
+ if (ret) {
+ pci_err(pdev, "failed to enable MSI after resuming!\n");
+ return ret;
+ }
+
+ ret = qm_dev_hw_init(qm);
+ if (ret) {
+ pci_err(pdev, "failed to init device after resuming\n");
+ return ret;
+ }
+
+ qm_cmd_init(qm);
+ hisi_qm_dev_err_init(qm);
+
+ return 0;
+}
+
+/**
+ * hisi_qm_suspend() - Runtime suspend of given device.
+ * @dev: device to suspend.
+ *
+ * Function that suspend the device.
+ */
+int hisi_qm_suspend(struct device *dev)
+{
+ struct pci_dev *pdev = to_pci_dev(dev);
+ struct hisi_qm *qm = pci_get_drvdata(pdev);
+ int ret;
+
+ pci_info(pdev, "entering suspended state\n");
+
+ ret = hisi_qm_stop(qm, QM_NORMAL);
+ if (ret) {
+ pci_err(pdev, "failed to stop qm(%d)\n", ret);
+ return ret;
+ }
+
+ ret = qm_prepare_for_suspend(qm);
+ if (ret)
+ pci_err(pdev, "failed to prepare suspended(%d)\n", ret);
+
+ return ret;
+}
+EXPORT_SYMBOL_GPL(hisi_qm_suspend);
+
+/**
+ * hisi_qm_resume() - Runtime resume of given device.
+ * @dev: device to resume.
+ *
+ * Function that resume the device.
+ */
+int hisi_qm_resume(struct device *dev)
+{
+ struct pci_dev *pdev = to_pci_dev(dev);
+ struct hisi_qm *qm = pci_get_drvdata(pdev);
+ int ret;
+
+ pci_info(pdev, "resuming from suspend state\n");
+
+ ret = qm_rebuild_for_resume(qm);
+ if (ret) {
+ pci_err(pdev, "failed to rebuild resume(%d)\n", ret);
+ return ret;
+ }
+
+ ret = hisi_qm_start(qm);
+ if (ret)
+ pci_err(pdev, "failed to start qm(%d)\n", ret);
+
+ return 0;
+}
+EXPORT_SYMBOL_GPL(hisi_qm_resume);
+
MODULE_LICENSE("GPL v2");
MODULE_AUTHOR("Zhou Wang <wangzhou1@hisilicon.com>");
MODULE_DESCRIPTION("HiSilicon Accelerator queue manager driver");
diff --git a/drivers/crypto/hisilicon/qm.h b/drivers/crypto/hisilicon/qm.h
index 035eaf8c442d..3068093229a5 100644
--- a/drivers/crypto/hisilicon/qm.h
+++ b/drivers/crypto/hisilicon/qm.h
@@ -4,6 +4,7 @@
#define HISI_ACC_QM_H
#include <linux/bitfield.h>
+#include <linux/debugfs.h>
#include <linux/iopoll.h>
#include <linux/module.h>
#include <linux/pci.h>
@@ -430,4 +431,11 @@ void hisi_qm_dev_shutdown(struct pci_dev *pdev);
void hisi_qm_wait_task_finish(struct hisi_qm *qm, struct hisi_qm_list *qm_list);
int hisi_qm_alg_register(struct hisi_qm *qm, struct hisi_qm_list *qm_list);
void hisi_qm_alg_unregister(struct hisi_qm *qm, struct hisi_qm_list *qm_list);
+int hisi_qm_resume(struct device *dev);
+int hisi_qm_suspend(struct device *dev);
+void hisi_qm_pm_uninit(struct hisi_qm *qm);
+void hisi_qm_pm_init(struct hisi_qm *qm);
+int hisi_qm_get_dfx_access(struct hisi_qm *qm);
+void hisi_qm_put_dfx_access(struct hisi_qm *qm);
+void hisi_qm_regs_dump(struct seq_file *s, struct debugfs_regset32 *regset);
#endif
diff --git a/drivers/crypto/hisilicon/sec2/sec.h b/drivers/crypto/hisilicon/sec2/sec.h
index 018415b9840a..d97cf02b1df7 100644
--- a/drivers/crypto/hisilicon/sec2/sec.h
+++ b/drivers/crypto/hisilicon/sec2/sec.h
@@ -157,11 +157,6 @@ struct sec_ctx {
struct device *dev;
};
-enum sec_endian {
- SEC_LE = 0,
- SEC_32BE,
- SEC_64BE
-};
enum sec_debug_file_index {
SEC_CLEAR_ENABLE,
diff --git a/drivers/crypto/hisilicon/sec2/sec_main.c b/drivers/crypto/hisilicon/sec2/sec_main.c
index 490db7bccf61..90551bf38b52 100644
--- a/drivers/crypto/hisilicon/sec2/sec_main.c
+++ b/drivers/crypto/hisilicon/sec2/sec_main.c
@@ -11,6 +11,7 @@
#include <linux/kernel.h>
#include <linux/module.h>
#include <linux/pci.h>
+#include <linux/pm_runtime.h>
#include <linux/seq_file.h>
#include <linux/topology.h>
#include <linux/uacce.h>
@@ -57,10 +58,16 @@
#define SEC_MEM_START_INIT_REG 0x301100
#define SEC_MEM_INIT_DONE_REG 0x301104
+/* clock gating */
#define SEC_CONTROL_REG 0x301200
-#define SEC_TRNG_EN_SHIFT 8
+#define SEC_DYNAMIC_GATE_REG 0x30121c
+#define SEC_CORE_AUTO_GATE 0x30212c
+#define SEC_DYNAMIC_GATE_EN 0x7bff
+#define SEC_CORE_AUTO_GATE_EN GENMASK(3, 0)
#define SEC_CLK_GATE_ENABLE BIT(3)
#define SEC_CLK_GATE_DISABLE (~BIT(3))
+
+#define SEC_TRNG_EN_SHIFT 8
#define SEC_AXI_SHUTDOWN_ENABLE BIT(12)
#define SEC_AXI_SHUTDOWN_DISABLE 0xFFFFEFFF
@@ -312,31 +319,20 @@ static const struct pci_device_id sec_dev_ids[] = {
};
MODULE_DEVICE_TABLE(pci, sec_dev_ids);
-static u8 sec_get_endian(struct hisi_qm *qm)
+static void sec_set_endian(struct hisi_qm *qm)
{
u32 reg;
- /*
- * As for VF, it is a wrong way to get endian setting by
- * reading a register of the engine
- */
- if (qm->pdev->is_virtfn) {
- dev_err_ratelimited(&qm->pdev->dev,
- "cannot access a register in VF!\n");
- return SEC_LE;
- }
reg = readl_relaxed(qm->io_base + SEC_CONTROL_REG);
- /* BD little endian mode */
- if (!(reg & BIT(0)))
- return SEC_LE;
+ reg &= ~(BIT(1) | BIT(0));
+ if (!IS_ENABLED(CONFIG_64BIT))
+ reg |= BIT(1);
- /* BD 32-bits big endian mode */
- else if (!(reg & BIT(1)))
- return SEC_32BE;
- /* BD 64-bits big endian mode */
- else
- return SEC_64BE;
+ if (!IS_ENABLED(CONFIG_CPU_LITTLE_ENDIAN))
+ reg |= BIT(0);
+
+ writel_relaxed(reg, qm->io_base + SEC_CONTROL_REG);
}
static void sec_open_sva_prefetch(struct hisi_qm *qm)
@@ -378,15 +374,43 @@ static void sec_close_sva_prefetch(struct hisi_qm *qm)
pci_err(qm->pdev, "failed to close sva prefetch\n");
}
+static void sec_enable_clock_gate(struct hisi_qm *qm)
+{
+ u32 val;
+
+ if (qm->ver < QM_HW_V3)
+ return;
+
+ val = readl_relaxed(qm->io_base + SEC_CONTROL_REG);
+ val |= SEC_CLK_GATE_ENABLE;
+ writel_relaxed(val, qm->io_base + SEC_CONTROL_REG);
+
+ val = readl(qm->io_base + SEC_DYNAMIC_GATE_REG);
+ val |= SEC_DYNAMIC_GATE_EN;
+ writel(val, qm->io_base + SEC_DYNAMIC_GATE_REG);
+
+ val = readl(qm->io_base + SEC_CORE_AUTO_GATE);
+ val |= SEC_CORE_AUTO_GATE_EN;
+ writel(val, qm->io_base + SEC_CORE_AUTO_GATE);
+}
+
+static void sec_disable_clock_gate(struct hisi_qm *qm)
+{
+ u32 val;
+
+ /* Kunpeng920 needs to close clock gating */
+ val = readl_relaxed(qm->io_base + SEC_CONTROL_REG);
+ val &= SEC_CLK_GATE_DISABLE;
+ writel_relaxed(val, qm->io_base + SEC_CONTROL_REG);
+}
+
static int sec_engine_init(struct hisi_qm *qm)
{
int ret;
u32 reg;
- /* disable clock gate control */
- reg = readl_relaxed(qm->io_base + SEC_CONTROL_REG);
- reg &= SEC_CLK_GATE_DISABLE;
- writel_relaxed(reg, qm->io_base + SEC_CONTROL_REG);
+ /* disable clock gate control before mem init */
+ sec_disable_clock_gate(qm);
writel_relaxed(0x1, qm->io_base + SEC_MEM_START_INIT_REG);
@@ -429,9 +453,9 @@ static int sec_engine_init(struct hisi_qm *qm)
qm->io_base + SEC_BD_ERR_CHK_EN_REG3);
/* config endian */
- reg = readl_relaxed(qm->io_base + SEC_CONTROL_REG);
- reg |= sec_get_endian(qm);
- writel_relaxed(reg, qm->io_base + SEC_CONTROL_REG);
+ sec_set_endian(qm);
+
+ sec_enable_clock_gate(qm);
return 0;
}
@@ -533,17 +557,14 @@ static void sec_hw_error_disable(struct hisi_qm *qm)
writel(SEC_RAS_DISABLE, qm->io_base + SEC_RAS_NFE_REG);
}
-static u32 sec_clear_enable_read(struct sec_debug_file *file)
+static u32 sec_clear_enable_read(struct hisi_qm *qm)
{
- struct hisi_qm *qm = file->qm;
-
return readl(qm->io_base + SEC_CTRL_CNT_CLR_CE) &
SEC_CTRL_CNT_CLR_CE_BIT;
}
-static int sec_clear_enable_write(struct sec_debug_file *file, u32 val)
+static int sec_clear_enable_write(struct hisi_qm *qm, u32 val)
{
- struct hisi_qm *qm = file->qm;
u32 tmp;
if (val != 1 && val)
@@ -561,24 +582,34 @@ static ssize_t sec_debug_read(struct file *filp, char __user *buf,
{
struct sec_debug_file *file = filp->private_data;
char tbuf[SEC_DBGFS_VAL_MAX_LEN];
+ struct hisi_qm *qm = file->qm;
u32 val;
int ret;
+ ret = hisi_qm_get_dfx_access(qm);
+ if (ret)
+ return ret;
+
spin_lock_irq(&file->lock);
switch (file->index) {
case SEC_CLEAR_ENABLE:
- val = sec_clear_enable_read(file);
+ val = sec_clear_enable_read(qm);
break;
default:
- spin_unlock_irq(&file->lock);
- return -EINVAL;
+ goto err_input;
}
spin_unlock_irq(&file->lock);
- ret = snprintf(tbuf, SEC_DBGFS_VAL_MAX_LEN, "%u\n", val);
+ hisi_qm_put_dfx_access(qm);
+ ret = snprintf(tbuf, SEC_DBGFS_VAL_MAX_LEN, "%u\n", val);
return simple_read_from_buffer(buf, count, pos, tbuf, ret);
+
+err_input:
+ spin_unlock_irq(&file->lock);
+ hisi_qm_put_dfx_access(qm);
+ return -EINVAL;
}
static ssize_t sec_debug_write(struct file *filp, const char __user *buf,
@@ -586,6 +617,7 @@ static ssize_t sec_debug_write(struct file *filp, const char __user *buf,
{
struct sec_debug_file *file = filp->private_data;
char tbuf[SEC_DBGFS_VAL_MAX_LEN];
+ struct hisi_qm *qm = file->qm;
unsigned long val;
int len, ret;
@@ -604,11 +636,15 @@ static ssize_t sec_debug_write(struct file *filp, const char __user *buf,
if (kstrtoul(tbuf, 0, &val))
return -EFAULT;
+ ret = hisi_qm_get_dfx_access(qm);
+ if (ret)
+ return ret;
+
spin_lock_irq(&file->lock);
switch (file->index) {
case SEC_CLEAR_ENABLE:
- ret = sec_clear_enable_write(file, val);
+ ret = sec_clear_enable_write(qm, val);
if (ret)
goto err_input;
break;
@@ -617,12 +653,11 @@ static ssize_t sec_debug_write(struct file *filp, const char __user *buf,
goto err_input;
}
- spin_unlock_irq(&file->lock);
-
- return count;
+ ret = count;
err_input:
spin_unlock_irq(&file->lock);
+ hisi_qm_put_dfx_access(qm);
return ret;
}
@@ -653,6 +688,15 @@ static int sec_debugfs_atomic64_set(void *data, u64 val)
DEFINE_DEBUGFS_ATTRIBUTE(sec_atomic64_ops, sec_debugfs_atomic64_get,
sec_debugfs_atomic64_set, "%lld\n");
+static int sec_regs_show(struct seq_file *s, void *unused)
+{
+ hisi_qm_regs_dump(s, s->private);
+
+ return 0;
+}
+
+DEFINE_SHOW_ATTRIBUTE(sec_regs);
+
static int sec_core_debug_init(struct hisi_qm *qm)
{
struct sec_dev *sec = container_of(qm, struct sec_dev, qm);
@@ -671,9 +715,10 @@ static int sec_core_debug_init(struct hisi_qm *qm)
regset->regs = sec_dfx_regs;
regset->nregs = ARRAY_SIZE(sec_dfx_regs);
regset->base = qm->io_base;
+ regset->dev = dev;
if (qm->pdev->device == SEC_PF_PCI_DEVICE_ID)
- debugfs_create_regset32("regs", 0444, tmp_d, regset);
+ debugfs_create_file("regs", 0444, tmp_d, regset, &sec_regs_fops);
for (i = 0; i < ARRAY_SIZE(sec_dfx_labels); i++) {
atomic64_t *data = (atomic64_t *)((uintptr_t)dfx +
@@ -981,10 +1026,13 @@ static int sec_probe(struct pci_dev *pdev, const struct pci_device_id *id)
goto err_alg_unregister;
}
+ hisi_qm_pm_init(qm);
+
return 0;
err_alg_unregister:
- hisi_qm_alg_unregister(qm, &sec_devices);
+ if (qm->qp_num >= ctx_q_num)
+ hisi_qm_alg_unregister(qm, &sec_devices);
err_qm_stop:
sec_debugfs_exit(qm);
hisi_qm_stop(qm, QM_NORMAL);
@@ -999,6 +1047,7 @@ static void sec_remove(struct pci_dev *pdev)
{
struct hisi_qm *qm = pci_get_drvdata(pdev);
+ hisi_qm_pm_uninit(qm);
hisi_qm_wait_task_finish(qm, &sec_devices);
if (qm->qp_num >= ctx_q_num)
hisi_qm_alg_unregister(qm, &sec_devices);
@@ -1018,6 +1067,10 @@ static void sec_remove(struct pci_dev *pdev)
sec_qm_uninit(qm);
}
+static const struct dev_pm_ops sec_pm_ops = {
+ SET_RUNTIME_PM_OPS(hisi_qm_suspend, hisi_qm_resume, NULL)
+};
+
static const struct pci_error_handlers sec_err_handler = {
.error_detected = hisi_qm_dev_err_detected,
.slot_reset = hisi_qm_dev_slot_reset,
@@ -1033,6 +1086,7 @@ static struct pci_driver sec_pci_driver = {
.err_handler = &sec_err_handler,
.sriov_configure = hisi_qm_sriov_configure,
.shutdown = hisi_qm_dev_shutdown,
+ .driver.pm = &sec_pm_ops,
};
static void sec_register_debugfs(void)
diff --git a/drivers/crypto/hisilicon/zip/zip_main.c b/drivers/crypto/hisilicon/zip/zip_main.c
index f8482ceebf2a..7148201ce76e 100644
--- a/drivers/crypto/hisilicon/zip/zip_main.c
+++ b/drivers/crypto/hisilicon/zip/zip_main.c
@@ -9,6 +9,7 @@
#include <linux/kernel.h>
#include <linux/module.h>
#include <linux/pci.h>
+#include <linux/pm_runtime.h>
#include <linux/seq_file.h>
#include <linux/topology.h>
#include <linux/uacce.h>
@@ -107,6 +108,14 @@
#define HZIP_DELAY_1_US 1
#define HZIP_POLL_TIMEOUT_US 1000
+/* clock gating */
+#define HZIP_PEH_CFG_AUTO_GATE 0x3011A8
+#define HZIP_PEH_CFG_AUTO_GATE_EN BIT(0)
+#define HZIP_CORE_GATED_EN GENMASK(15, 8)
+#define HZIP_CORE_GATED_OOO_EN BIT(29)
+#define HZIP_CLOCK_GATED_EN (HZIP_CORE_GATED_EN | \
+ HZIP_CORE_GATED_OOO_EN)
+
static const char hisi_zip_name[] = "hisi_zip";
static struct dentry *hzip_debugfs_root;
@@ -312,6 +321,22 @@ static void hisi_zip_close_sva_prefetch(struct hisi_qm *qm)
pci_err(qm->pdev, "failed to close sva prefetch\n");
}
+static void hisi_zip_enable_clock_gate(struct hisi_qm *qm)
+{
+ u32 val;
+
+ if (qm->ver < QM_HW_V3)
+ return;
+
+ val = readl(qm->io_base + HZIP_CLOCK_GATE_CTRL);
+ val |= HZIP_CLOCK_GATED_EN;
+ writel(val, qm->io_base + HZIP_CLOCK_GATE_CTRL);
+
+ val = readl(qm->io_base + HZIP_PEH_CFG_AUTO_GATE);
+ val |= HZIP_PEH_CFG_AUTO_GATE_EN;
+ writel(val, qm->io_base + HZIP_PEH_CFG_AUTO_GATE);
+}
+
static int hisi_zip_set_user_domain_and_cache(struct hisi_qm *qm)
{
void __iomem *base = qm->io_base;
@@ -359,6 +384,8 @@ static int hisi_zip_set_user_domain_and_cache(struct hisi_qm *qm)
CQC_CACHE_WB_ENABLE | FIELD_PREP(SQC_CACHE_WB_THRD, 1) |
FIELD_PREP(CQC_CACHE_WB_THRD, 1), base + QM_CACHE_CTL);
+ hisi_zip_enable_clock_gate(qm);
+
return 0;
}
@@ -423,17 +450,14 @@ static inline struct hisi_qm *file_to_qm(struct ctrl_debug_file *file)
return &hisi_zip->qm;
}
-static u32 clear_enable_read(struct ctrl_debug_file *file)
+static u32 clear_enable_read(struct hisi_qm *qm)
{
- struct hisi_qm *qm = file_to_qm(file);
-
return readl(qm->io_base + HZIP_SOFT_CTRL_CNT_CLR_CE) &
HZIP_SOFT_CTRL_CNT_CLR_CE_BIT;
}
-static int clear_enable_write(struct ctrl_debug_file *file, u32 val)
+static int clear_enable_write(struct hisi_qm *qm, u32 val)
{
- struct hisi_qm *qm = file_to_qm(file);
u32 tmp;
if (val != 1 && val != 0)
@@ -450,22 +474,33 @@ static ssize_t hisi_zip_ctrl_debug_read(struct file *filp, char __user *buf,
size_t count, loff_t *pos)
{
struct ctrl_debug_file *file = filp->private_data;
+ struct hisi_qm *qm = file_to_qm(file);
char tbuf[HZIP_BUF_SIZE];
u32 val;
int ret;
+ ret = hisi_qm_get_dfx_access(qm);
+ if (ret)
+ return ret;
+
spin_lock_irq(&file->lock);
switch (file->index) {
case HZIP_CLEAR_ENABLE:
- val = clear_enable_read(file);
+ val = clear_enable_read(qm);
break;
default:
- spin_unlock_irq(&file->lock);
- return -EINVAL;
+ goto err_input;
}
spin_unlock_irq(&file->lock);
+
+ hisi_qm_put_dfx_access(qm);
ret = scnprintf(tbuf, sizeof(tbuf), "%u\n", val);
return simple_read_from_buffer(buf, count, pos, tbuf, ret);
+
+err_input:
+ spin_unlock_irq(&file->lock);
+ hisi_qm_put_dfx_access(qm);
+ return -EINVAL;
}
static ssize_t hisi_zip_ctrl_debug_write(struct file *filp,
@@ -473,6 +508,7 @@ static ssize_t hisi_zip_ctrl_debug_write(struct file *filp,
size_t count, loff_t *pos)
{
struct ctrl_debug_file *file = filp->private_data;
+ struct hisi_qm *qm = file_to_qm(file);
char tbuf[HZIP_BUF_SIZE];
unsigned long val;
int len, ret;
@@ -491,10 +527,14 @@ static ssize_t hisi_zip_ctrl_debug_write(struct file *filp,
if (kstrtoul(tbuf, 0, &val))
return -EFAULT;
+ ret = hisi_qm_get_dfx_access(qm);
+ if (ret)
+ return ret;
+
spin_lock_irq(&file->lock);
switch (file->index) {
case HZIP_CLEAR_ENABLE:
- ret = clear_enable_write(file, val);
+ ret = clear_enable_write(qm, val);
if (ret)
goto err_input;
break;
@@ -502,12 +542,12 @@ static ssize_t hisi_zip_ctrl_debug_write(struct file *filp,
ret = -EINVAL;
goto err_input;
}
- spin_unlock_irq(&file->lock);
- return count;
+ ret = count;
err_input:
spin_unlock_irq(&file->lock);
+ hisi_qm_put_dfx_access(qm);
return ret;
}
@@ -538,6 +578,15 @@ static int zip_debugfs_atomic64_get(void *data, u64 *val)
DEFINE_DEBUGFS_ATTRIBUTE(zip_atomic64_ops, zip_debugfs_atomic64_get,
zip_debugfs_atomic64_set, "%llu\n");
+static int hisi_zip_regs_show(struct seq_file *s, void *unused)
+{
+ hisi_qm_regs_dump(s, s->private);
+
+ return 0;
+}
+
+DEFINE_SHOW_ATTRIBUTE(hisi_zip_regs);
+
static int hisi_zip_core_debug_init(struct hisi_qm *qm)
{
struct device *dev = &qm->pdev->dev;
@@ -560,9 +609,11 @@ static int hisi_zip_core_debug_init(struct hisi_qm *qm)
regset->regs = hzip_dfx_regs;
regset->nregs = ARRAY_SIZE(hzip_dfx_regs);
regset->base = qm->io_base + core_offsets[i];
+ regset->dev = dev;
tmp_d = debugfs_create_dir(buf, qm->debug.debug_root);
- debugfs_create_regset32("regs", 0444, tmp_d, regset);
+ debugfs_create_file("regs", 0444, tmp_d, regset,
+ &hisi_zip_regs_fops);
}
return 0;
@@ -898,6 +949,8 @@ static int hisi_zip_probe(struct pci_dev *pdev, const struct pci_device_id *id)
goto err_qm_alg_unregister;
}
+ hisi_qm_pm_init(qm);
+
return 0;
err_qm_alg_unregister:
@@ -920,6 +973,7 @@ static void hisi_zip_remove(struct pci_dev *pdev)
{
struct hisi_qm *qm = pci_get_drvdata(pdev);
+ hisi_qm_pm_uninit(qm);
hisi_qm_wait_task_finish(qm, &zip_devices);
hisi_qm_alg_unregister(qm, &zip_devices);
@@ -932,6 +986,10 @@ static void hisi_zip_remove(struct pci_dev *pdev)
hisi_zip_qm_uninit(qm);
}
+static const struct dev_pm_ops hisi_zip_pm_ops = {
+ SET_RUNTIME_PM_OPS(hisi_qm_suspend, hisi_qm_resume, NULL)
+};
+
static const struct pci_error_handlers hisi_zip_err_handler = {
.error_detected = hisi_qm_dev_err_detected,
.slot_reset = hisi_qm_dev_slot_reset,
@@ -948,6 +1006,7 @@ static struct pci_driver hisi_zip_pci_driver = {
hisi_qm_sriov_configure : NULL,
.err_handler = &hisi_zip_err_handler,
.shutdown = hisi_qm_dev_shutdown,
+ .driver.pm = &hisi_zip_pm_ops,
};
static void hisi_zip_register_debugfs(void)
diff --git a/drivers/crypto/mxs-dcp.c b/drivers/crypto/mxs-dcp.c
index d6a7784d2988..d19e5ffb5104 100644
--- a/drivers/crypto/mxs-dcp.c
+++ b/drivers/crypto/mxs-dcp.c
@@ -170,15 +170,19 @@ static struct dcp *global_sdcp;
static int mxs_dcp_start_dma(struct dcp_async_ctx *actx)
{
+ int dma_err;
struct dcp *sdcp = global_sdcp;
const int chan = actx->chan;
uint32_t stat;
unsigned long ret;
struct dcp_dma_desc *desc = &sdcp->coh->desc[actx->chan];
-
dma_addr_t desc_phys = dma_map_single(sdcp->dev, desc, sizeof(*desc),
DMA_TO_DEVICE);
+ dma_err = dma_mapping_error(sdcp->dev, desc_phys);
+ if (dma_err)
+ return dma_err;
+
reinit_completion(&sdcp->completion[chan]);
/* Clear status register. */
@@ -216,18 +220,29 @@ static int mxs_dcp_start_dma(struct dcp_async_ctx *actx)
static int mxs_dcp_run_aes(struct dcp_async_ctx *actx,
struct skcipher_request *req, int init)
{
+ dma_addr_t key_phys, src_phys, dst_phys;
struct dcp *sdcp = global_sdcp;
struct dcp_dma_desc *desc = &sdcp->coh->desc[actx->chan];
struct dcp_aes_req_ctx *rctx = skcipher_request_ctx(req);
int ret;
- dma_addr_t key_phys = dma_map_single(sdcp->dev, sdcp->coh->aes_key,
- 2 * AES_KEYSIZE_128,
- DMA_TO_DEVICE);
- dma_addr_t src_phys = dma_map_single(sdcp->dev, sdcp->coh->aes_in_buf,
- DCP_BUF_SZ, DMA_TO_DEVICE);
- dma_addr_t dst_phys = dma_map_single(sdcp->dev, sdcp->coh->aes_out_buf,
- DCP_BUF_SZ, DMA_FROM_DEVICE);
+ key_phys = dma_map_single(sdcp->dev, sdcp->coh->aes_key,
+ 2 * AES_KEYSIZE_128, DMA_TO_DEVICE);
+ ret = dma_mapping_error(sdcp->dev, key_phys);
+ if (ret)
+ return ret;
+
+ src_phys = dma_map_single(sdcp->dev, sdcp->coh->aes_in_buf,
+ DCP_BUF_SZ, DMA_TO_DEVICE);
+ ret = dma_mapping_error(sdcp->dev, src_phys);
+ if (ret)
+ goto err_src;
+
+ dst_phys = dma_map_single(sdcp->dev, sdcp->coh->aes_out_buf,
+ DCP_BUF_SZ, DMA_FROM_DEVICE);
+ ret = dma_mapping_error(sdcp->dev, dst_phys);
+ if (ret)
+ goto err_dst;
if (actx->fill % AES_BLOCK_SIZE) {
dev_err(sdcp->dev, "Invalid block size!\n");
@@ -265,10 +280,12 @@ static int mxs_dcp_run_aes(struct dcp_async_ctx *actx,
ret = mxs_dcp_start_dma(actx);
aes_done_run:
+ dma_unmap_single(sdcp->dev, dst_phys, DCP_BUF_SZ, DMA_FROM_DEVICE);
+err_dst:
+ dma_unmap_single(sdcp->dev, src_phys, DCP_BUF_SZ, DMA_TO_DEVICE);
+err_src:
dma_unmap_single(sdcp->dev, key_phys, 2 * AES_KEYSIZE_128,
DMA_TO_DEVICE);
- dma_unmap_single(sdcp->dev, src_phys, DCP_BUF_SZ, DMA_TO_DEVICE);
- dma_unmap_single(sdcp->dev, dst_phys, DCP_BUF_SZ, DMA_FROM_DEVICE);
return ret;
}
@@ -283,21 +300,20 @@ static int mxs_dcp_aes_block_crypt(struct crypto_async_request *arq)
struct scatterlist *dst = req->dst;
struct scatterlist *src = req->src;
- const int nents = sg_nents(req->src);
+ int dst_nents = sg_nents(dst);
const int out_off = DCP_BUF_SZ;
uint8_t *in_buf = sdcp->coh->aes_in_buf;
uint8_t *out_buf = sdcp->coh->aes_out_buf;
- uint8_t *out_tmp, *src_buf, *dst_buf = NULL;
uint32_t dst_off = 0;
+ uint8_t *src_buf = NULL;
uint32_t last_out_len = 0;
uint8_t *key = sdcp->coh->aes_key;
int ret = 0;
- int split = 0;
- unsigned int i, len, clen, rem = 0, tlen = 0;
+ unsigned int i, len, clen, tlen = 0;
int init = 0;
bool limit_hit = false;
@@ -315,7 +331,7 @@ static int mxs_dcp_aes_block_crypt(struct crypto_async_request *arq)
memset(key + AES_KEYSIZE_128, 0, AES_KEYSIZE_128);
}
- for_each_sg(req->src, src, nents, i) {
+ for_each_sg(req->src, src, sg_nents(src), i) {
src_buf = sg_virt(src);
len = sg_dma_len(src);
tlen += len;
@@ -340,34 +356,17 @@ static int mxs_dcp_aes_block_crypt(struct crypto_async_request *arq)
* submit the buffer.
*/
if (actx->fill == out_off || sg_is_last(src) ||
- limit_hit) {
+ limit_hit) {
ret = mxs_dcp_run_aes(actx, req, init);
if (ret)
return ret;
init = 0;
- out_tmp = out_buf;
+ sg_pcopy_from_buffer(dst, dst_nents, out_buf,
+ actx->fill, dst_off);
+ dst_off += actx->fill;
last_out_len = actx->fill;
- while (dst && actx->fill) {
- if (!split) {
- dst_buf = sg_virt(dst);
- dst_off = 0;
- }
- rem = min(sg_dma_len(dst) - dst_off,
- actx->fill);
-
- memcpy(dst_buf + dst_off, out_tmp, rem);
- out_tmp += rem;
- dst_off += rem;
- actx->fill -= rem;
-
- if (dst_off == sg_dma_len(dst)) {
- dst = sg_next(dst);
- split = 0;
- } else {
- split = 1;
- }
- }
+ actx->fill = 0;
}
} while (len);
@@ -557,6 +556,10 @@ static int mxs_dcp_run_sha(struct ahash_request *req)
dma_addr_t buf_phys = dma_map_single(sdcp->dev, sdcp->coh->sha_in_buf,
DCP_BUF_SZ, DMA_TO_DEVICE);
+ ret = dma_mapping_error(sdcp->dev, buf_phys);
+ if (ret)
+ return ret;
+
/* Fill in the DMA descriptor. */
desc->control0 = MXS_DCP_CONTROL0_DECR_SEMAPHORE |
MXS_DCP_CONTROL0_INTERRUPT |
@@ -589,6 +592,10 @@ static int mxs_dcp_run_sha(struct ahash_request *req)
if (rctx->fini) {
digest_phys = dma_map_single(sdcp->dev, sdcp->coh->sha_out_buf,
DCP_SHA_PAY_SZ, DMA_FROM_DEVICE);
+ ret = dma_mapping_error(sdcp->dev, digest_phys);
+ if (ret)
+ goto done_run;
+
desc->control0 |= MXS_DCP_CONTROL0_HASH_TERM;
desc->payload = digest_phys;
}
diff --git a/drivers/crypto/omap-aes.c b/drivers/crypto/omap-aes.c
index 0dd4c6b157de..9b968ac4ee7b 100644
--- a/drivers/crypto/omap-aes.c
+++ b/drivers/crypto/omap-aes.c
@@ -1175,9 +1175,9 @@ static int omap_aes_probe(struct platform_device *pdev)
spin_lock_init(&dd->lock);
INIT_LIST_HEAD(&dd->list);
- spin_lock(&list_lock);
+ spin_lock_bh(&list_lock);
list_add_tail(&dd->list, &dev_list);
- spin_unlock(&list_lock);
+ spin_unlock_bh(&list_lock);
/* Initialize crypto engine */
dd->engine = crypto_engine_alloc_init(dev, 1);
@@ -1264,9 +1264,9 @@ static int omap_aes_remove(struct platform_device *pdev)
if (!dd)
return -ENODEV;
- spin_lock(&list_lock);
+ spin_lock_bh(&list_lock);
list_del(&dd->list);
- spin_unlock(&list_lock);
+ spin_unlock_bh(&list_lock);
for (i = dd->pdata->algs_info_size - 1; i >= 0; i--)
for (j = dd->pdata->algs_info[i].registered - 1; j >= 0; j--) {
diff --git a/drivers/crypto/omap-crypto.c b/drivers/crypto/omap-crypto.c
index 31bdb1d76d11..a4cc6bf146ec 100644
--- a/drivers/crypto/omap-crypto.c
+++ b/drivers/crypto/omap-crypto.c
@@ -210,7 +210,7 @@ void omap_crypto_cleanup(struct scatterlist *sg, struct scatterlist *orig,
buf = sg_virt(sg);
pages = get_order(len);
- if (orig && (flags & OMAP_CRYPTO_COPY_MASK))
+ if (orig && (flags & OMAP_CRYPTO_DATA_COPIED))
omap_crypto_copy_data(sg, orig, offset, len);
if (flags & OMAP_CRYPTO_DATA_COPIED)
diff --git a/drivers/crypto/omap-des.c b/drivers/crypto/omap-des.c
index bc8631363d72..be77656864e3 100644
--- a/drivers/crypto/omap-des.c
+++ b/drivers/crypto/omap-des.c
@@ -1033,9 +1033,9 @@ static int omap_des_probe(struct platform_device *pdev)
INIT_LIST_HEAD(&dd->list);
- spin_lock(&list_lock);
+ spin_lock_bh(&list_lock);
list_add_tail(&dd->list, &dev_list);
- spin_unlock(&list_lock);
+ spin_unlock_bh(&list_lock);
/* Initialize des crypto engine */
dd->engine = crypto_engine_alloc_init(dev, 1);
@@ -1094,9 +1094,9 @@ static int omap_des_remove(struct platform_device *pdev)
if (!dd)
return -ENODEV;
- spin_lock(&list_lock);
+ spin_lock_bh(&list_lock);
list_del(&dd->list);
- spin_unlock(&list_lock);
+ spin_unlock_bh(&list_lock);
for (i = dd->pdata->algs_info_size - 1; i >= 0; i--)
for (j = dd->pdata->algs_info[i].registered - 1; j >= 0; j--)
diff --git a/drivers/crypto/omap-sham.c b/drivers/crypto/omap-sham.c
index dd53ad9987b0..f6bf53c00b61 100644
--- a/drivers/crypto/omap-sham.c
+++ b/drivers/crypto/omap-sham.c
@@ -105,7 +105,6 @@
#define FLAGS_FINAL 1
#define FLAGS_DMA_ACTIVE 2
#define FLAGS_OUTPUT_READY 3
-#define FLAGS_INIT 4
#define FLAGS_CPU 5
#define FLAGS_DMA_READY 6
#define FLAGS_AUTO_XOR 7
@@ -368,24 +367,6 @@ static void omap_sham_copy_ready_hash(struct ahash_request *req)
hash[i] = le32_to_cpup((__le32 *)in + i);
}
-static int omap_sham_hw_init(struct omap_sham_dev *dd)
-{
- int err;
-
- err = pm_runtime_resume_and_get(dd->dev);
- if (err < 0) {
- dev_err(dd->dev, "failed to get sync: %d\n", err);
- return err;
- }
-
- if (!test_bit(FLAGS_INIT, &dd->flags)) {
- set_bit(FLAGS_INIT, &dd->flags);
- dd->err = 0;
- }
-
- return 0;
-}
-
static void omap_sham_write_ctrl_omap2(struct omap_sham_dev *dd, size_t length,
int final, int dma)
{
@@ -1093,11 +1074,14 @@ static int omap_sham_hash_one_req(struct crypto_engine *engine, void *areq)
dev_dbg(dd->dev, "hash-one: op: %u, total: %u, digcnt: %zd, final: %d",
ctx->op, ctx->total, ctx->digcnt, final);
- dd->req = req;
-
- err = omap_sham_hw_init(dd);
- if (err)
+ err = pm_runtime_resume_and_get(dd->dev);
+ if (err < 0) {
+ dev_err(dd->dev, "failed to get sync: %d\n", err);
return err;
+ }
+
+ dd->err = 0;
+ dd->req = req;
if (ctx->digcnt)
dd->pdata->copy_hash(req, 0);
@@ -1736,7 +1720,7 @@ static void omap_sham_done_task(unsigned long data)
if (test_and_clear_bit(FLAGS_OUTPUT_READY, &dd->flags))
goto finish;
} else if (test_bit(FLAGS_DMA_READY, &dd->flags)) {
- if (test_and_clear_bit(FLAGS_DMA_ACTIVE, &dd->flags)) {
+ if (test_bit(FLAGS_DMA_ACTIVE, &dd->flags)) {
omap_sham_update_dma_stop(dd);
if (dd->err) {
err = dd->err;
@@ -2129,7 +2113,6 @@ static int omap_sham_probe(struct platform_device *pdev)
dd->fallback_sz = OMAP_SHA_DMA_THRESHOLD;
pm_runtime_enable(dev);
- pm_runtime_irq_safe(dev);
err = pm_runtime_get_sync(dev);
if (err < 0) {
@@ -2144,9 +2127,9 @@ static int omap_sham_probe(struct platform_device *pdev)
(rev & dd->pdata->major_mask) >> dd->pdata->major_shift,
(rev & dd->pdata->minor_mask) >> dd->pdata->minor_shift);
- spin_lock(&sham.lock);
+ spin_lock_bh(&sham.lock);
list_add_tail(&dd->list, &sham.dev_list);
- spin_unlock(&sham.lock);
+ spin_unlock_bh(&sham.lock);
dd->engine = crypto_engine_alloc_init(dev, 1);
if (!dd->engine) {
@@ -2194,10 +2177,11 @@ err_algs:
err_engine_start:
crypto_engine_exit(dd->engine);
err_engine:
- spin_lock(&sham.lock);
+ spin_lock_bh(&sham.lock);
list_del(&dd->list);
- spin_unlock(&sham.lock);
+ spin_unlock_bh(&sham.lock);
err_pm:
+ pm_runtime_dont_use_autosuspend(dev);
pm_runtime_disable(dev);
if (!dd->polling_mode)
dma_release_channel(dd->dma_lch);
@@ -2215,9 +2199,9 @@ static int omap_sham_remove(struct platform_device *pdev)
dd = platform_get_drvdata(pdev);
if (!dd)
return -ENODEV;
- spin_lock(&sham.lock);
+ spin_lock_bh(&sham.lock);
list_del(&dd->list);
- spin_unlock(&sham.lock);
+ spin_unlock_bh(&sham.lock);
for (i = dd->pdata->algs_info_size - 1; i >= 0; i--)
for (j = dd->pdata->algs_info[i].registered - 1; j >= 0; j--) {
crypto_unregister_ahash(
@@ -2225,6 +2209,7 @@ static int omap_sham_remove(struct platform_device *pdev)
dd->pdata->algs_info[i].registered--;
}
tasklet_kill(&dd->done_task);
+ pm_runtime_dont_use_autosuspend(&pdev->dev);
pm_runtime_disable(&pdev->dev);
if (!dd->polling_mode)
@@ -2235,32 +2220,11 @@ static int omap_sham_remove(struct platform_device *pdev)
return 0;
}
-#ifdef CONFIG_PM_SLEEP
-static int omap_sham_suspend(struct device *dev)
-{
- pm_runtime_put_sync(dev);
- return 0;
-}
-
-static int omap_sham_resume(struct device *dev)
-{
- int err = pm_runtime_resume_and_get(dev);
- if (err < 0) {
- dev_err(dev, "failed to get sync: %d\n", err);
- return err;
- }
- return 0;
-}
-#endif
-
-static SIMPLE_DEV_PM_OPS(omap_sham_pm_ops, omap_sham_suspend, omap_sham_resume);
-
static struct platform_driver omap_sham_driver = {
.probe = omap_sham_probe,
.remove = omap_sham_remove,
.driver = {
.name = "omap-sham",
- .pm = &omap_sham_pm_ops,
.of_match_table = omap_sham_of_match,
},
};
diff --git a/drivers/crypto/qat/qat_4xxx/adf_4xxx_hw_data.c b/drivers/crypto/qat/qat_4xxx/adf_4xxx_hw_data.c
index 3524ddd48930..33d8e50dcbda 100644
--- a/drivers/crypto/qat/qat_4xxx/adf_4xxx_hw_data.c
+++ b/drivers/crypto/qat/qat_4xxx/adf_4xxx_hw_data.c
@@ -161,7 +161,7 @@ static void adf_enable_ints(struct adf_accel_dev *accel_dev)
ADF_CSR_WR(addr, ADF_4XXX_SMIAPF_MASK_OFFSET, 0);
}
-static int adf_pf_enable_vf2pf_comms(struct adf_accel_dev *accel_dev)
+static int adf_enable_pf2vf_comms(struct adf_accel_dev *accel_dev)
{
return 0;
}
@@ -210,21 +210,21 @@ void adf_init_hw_data_4xxx(struct adf_hw_device_data *hw_data)
hw_data->fw_mmp_name = ADF_4XXX_MMP;
hw_data->init_admin_comms = adf_init_admin_comms;
hw_data->exit_admin_comms = adf_exit_admin_comms;
- hw_data->disable_iov = adf_disable_sriov;
hw_data->send_admin_init = adf_send_admin_init;
hw_data->init_arb = adf_init_arb;
hw_data->exit_arb = adf_exit_arb;
hw_data->get_arb_mapping = adf_get_arbiter_mapping;
hw_data->enable_ints = adf_enable_ints;
- hw_data->enable_vf2pf_comms = adf_pf_enable_vf2pf_comms;
hw_data->reset_device = adf_reset_flr;
- hw_data->min_iov_compat_ver = ADF_PFVF_COMPATIBILITY_VERSION;
hw_data->admin_ae_mask = ADF_4XXX_ADMIN_AE_MASK;
hw_data->uof_get_num_objs = uof_get_num_objs;
hw_data->uof_get_name = uof_get_name;
hw_data->uof_get_ae_mask = uof_get_ae_mask;
hw_data->set_msix_rttable = set_msix_default_rttable;
hw_data->set_ssm_wdtimer = adf_gen4_set_ssm_wdtimer;
+ hw_data->enable_pfvf_comms = adf_enable_pf2vf_comms;
+ hw_data->disable_iov = adf_disable_sriov;
+ hw_data->min_iov_compat_ver = ADF_PFVF_COMPAT_THIS_VERSION;
adf_gen4_init_hw_csr_ops(&hw_data->csr_ops);
}
diff --git a/drivers/crypto/qat/qat_4xxx/adf_drv.c b/drivers/crypto/qat/qat_4xxx/adf_drv.c
index a8805c815d16..359fb7989dfb 100644
--- a/drivers/crypto/qat/qat_4xxx/adf_drv.c
+++ b/drivers/crypto/qat/qat_4xxx/adf_drv.c
@@ -221,16 +221,10 @@ static int adf_probe(struct pci_dev *pdev, const struct pci_device_id *ent)
}
/* Set DMA identifier */
- if (pci_set_dma_mask(pdev, DMA_BIT_MASK(64))) {
- if ((pci_set_dma_mask(pdev, DMA_BIT_MASK(32)))) {
- dev_err(&pdev->dev, "No usable DMA configuration.\n");
- ret = -EFAULT;
- goto out_err;
- } else {
- pci_set_consistent_dma_mask(pdev, DMA_BIT_MASK(32));
- }
- } else {
- pci_set_consistent_dma_mask(pdev, DMA_BIT_MASK(64));
+ ret = dma_set_mask_and_coherent(&pdev->dev, DMA_BIT_MASK(64));
+ if (ret) {
+ dev_err(&pdev->dev, "No usable DMA configuration.\n");
+ goto out_err;
}
/* Get accelerator capabilities mask */
diff --git a/drivers/crypto/qat/qat_c3xxx/adf_c3xxx_hw_data.c b/drivers/crypto/qat/qat_c3xxx/adf_c3xxx_hw_data.c
index 1dd64af22bea..3027c01bc89e 100644
--- a/drivers/crypto/qat/qat_c3xxx/adf_c3xxx_hw_data.c
+++ b/drivers/crypto/qat/qat_c3xxx/adf_c3xxx_hw_data.c
@@ -111,11 +111,6 @@ static u32 get_pf2vf_offset(u32 i)
return ADF_C3XXX_PF2VF_OFFSET(i);
}
-static u32 get_vintmsk_offset(u32 i)
-{
- return ADF_C3XXX_VINTMSK_OFFSET(i);
-}
-
static void adf_enable_error_correction(struct adf_accel_dev *accel_dev)
{
struct adf_hw_device_data *hw_device = accel_dev->hw_device;
@@ -159,8 +154,10 @@ static void adf_enable_ints(struct adf_accel_dev *accel_dev)
ADF_C3XXX_SMIA1_MASK);
}
-static int adf_pf_enable_vf2pf_comms(struct adf_accel_dev *accel_dev)
+static int adf_enable_pf2vf_comms(struct adf_accel_dev *accel_dev)
{
+ spin_lock_init(&accel_dev->pf.vf2pf_ints_lock);
+
return 0;
}
@@ -193,8 +190,6 @@ void adf_init_hw_data_c3xxx(struct adf_hw_device_data *hw_data)
hw_data->get_sram_bar_id = get_sram_bar_id;
hw_data->get_etr_bar_id = get_etr_bar_id;
hw_data->get_misc_bar_id = get_misc_bar_id;
- hw_data->get_pf2vf_offset = get_pf2vf_offset;
- hw_data->get_vintmsk_offset = get_vintmsk_offset;
hw_data->get_admin_info = adf_gen2_get_admin_info;
hw_data->get_arb_info = adf_gen2_get_arb_info;
hw_data->get_sku = get_sku;
@@ -203,16 +198,18 @@ void adf_init_hw_data_c3xxx(struct adf_hw_device_data *hw_data)
hw_data->init_admin_comms = adf_init_admin_comms;
hw_data->exit_admin_comms = adf_exit_admin_comms;
hw_data->configure_iov_threads = configure_iov_threads;
- hw_data->disable_iov = adf_disable_sriov;
hw_data->send_admin_init = adf_send_admin_init;
hw_data->init_arb = adf_init_arb;
hw_data->exit_arb = adf_exit_arb;
hw_data->get_arb_mapping = adf_get_arbiter_mapping;
hw_data->enable_ints = adf_enable_ints;
- hw_data->enable_vf2pf_comms = adf_pf_enable_vf2pf_comms;
hw_data->reset_device = adf_reset_flr;
- hw_data->min_iov_compat_ver = ADF_PFVF_COMPATIBILITY_VERSION;
hw_data->set_ssm_wdtimer = adf_gen2_set_ssm_wdtimer;
+ hw_data->get_pf2vf_offset = get_pf2vf_offset;
+ hw_data->enable_pfvf_comms = adf_enable_pf2vf_comms;
+ hw_data->disable_iov = adf_disable_sriov;
+ hw_data->min_iov_compat_ver = ADF_PFVF_COMPAT_THIS_VERSION;
+
adf_gen2_init_hw_csr_ops(&hw_data->csr_ops);
}
diff --git a/drivers/crypto/qat/qat_c3xxx/adf_c3xxx_hw_data.h b/drivers/crypto/qat/qat_c3xxx/adf_c3xxx_hw_data.h
index fece8e38025a..86ee02a86789 100644
--- a/drivers/crypto/qat/qat_c3xxx/adf_c3xxx_hw_data.h
+++ b/drivers/crypto/qat/qat_c3xxx/adf_c3xxx_hw_data.h
@@ -29,7 +29,6 @@
#define ADF_C3XXX_ERRSSMSH_EN BIT(3)
#define ADF_C3XXX_PF2VF_OFFSET(i) (0x3A000 + 0x280 + ((i) * 0x04))
-#define ADF_C3XXX_VINTMSK_OFFSET(i) (0x3A000 + 0x200 + ((i) * 0x04))
/* AE to function mapping */
#define ADF_C3XXX_AE2FUNC_MAP_GRP_A_NUM_REGS 48
diff --git a/drivers/crypto/qat/qat_c3xxx/adf_drv.c b/drivers/crypto/qat/qat_c3xxx/adf_drv.c
index 7fb3343ae8b0..cc6e75dc60de 100644
--- a/drivers/crypto/qat/qat_c3xxx/adf_drv.c
+++ b/drivers/crypto/qat/qat_c3xxx/adf_drv.c
@@ -159,17 +159,10 @@ static int adf_probe(struct pci_dev *pdev, const struct pci_device_id *ent)
}
/* set dma identifier */
- if (pci_set_dma_mask(pdev, DMA_BIT_MASK(64))) {
- if ((pci_set_dma_mask(pdev, DMA_BIT_MASK(32)))) {
- dev_err(&pdev->dev, "No usable DMA configuration\n");
- ret = -EFAULT;
- goto out_err_disable;
- } else {
- pci_set_consistent_dma_mask(pdev, DMA_BIT_MASK(32));
- }
-
- } else {
- pci_set_consistent_dma_mask(pdev, DMA_BIT_MASK(64));
+ ret = dma_set_mask_and_coherent(&pdev->dev, DMA_BIT_MASK(48));
+ if (ret) {
+ dev_err(&pdev->dev, "No usable DMA configuration\n");
+ goto out_err_disable;
}
if (pci_request_regions(pdev, ADF_C3XXX_DEVICE_NAME)) {
@@ -208,12 +201,12 @@ static int adf_probe(struct pci_dev *pdev, const struct pci_device_id *ent)
if (pci_save_state(pdev)) {
dev_err(&pdev->dev, "Failed to save pci state\n");
ret = -ENOMEM;
- goto out_err_free_reg;
+ goto out_err_disable_aer;
}
ret = qat_crypto_dev_config(accel_dev);
if (ret)
- goto out_err_free_reg;
+ goto out_err_disable_aer;
ret = adf_dev_init(accel_dev);
if (ret)
@@ -229,6 +222,8 @@ out_err_dev_stop:
adf_dev_stop(accel_dev);
out_err_dev_shutdown:
adf_dev_shutdown(accel_dev);
+out_err_disable_aer:
+ adf_disable_aer(accel_dev);
out_err_free_reg:
pci_release_regions(accel_pci_dev->pci_dev);
out_err_disable:
diff --git a/drivers/crypto/qat/qat_c3xxxvf/adf_c3xxxvf_hw_data.c b/drivers/crypto/qat/qat_c3xxxvf/adf_c3xxxvf_hw_data.c
index 15f6b9bdfb22..3e69b520e82f 100644
--- a/drivers/crypto/qat/qat_c3xxxvf/adf_c3xxxvf_hw_data.c
+++ b/drivers/crypto/qat/qat_c3xxxvf/adf_c3xxxvf_hw_data.c
@@ -52,11 +52,6 @@ static u32 get_pf2vf_offset(u32 i)
return ADF_C3XXXIOV_PF2VF_OFFSET;
}
-static u32 get_vintmsk_offset(u32 i)
-{
- return ADF_C3XXXIOV_VINTMSK_OFFSET;
-}
-
static int adf_vf_int_noop(struct adf_accel_dev *accel_dev)
{
return 0;
@@ -81,10 +76,10 @@ void adf_init_hw_data_c3xxxiov(struct adf_hw_device_data *hw_data)
hw_data->enable_error_correction = adf_vf_void_noop;
hw_data->init_admin_comms = adf_vf_int_noop;
hw_data->exit_admin_comms = adf_vf_void_noop;
- hw_data->send_admin_init = adf_vf2pf_init;
+ hw_data->send_admin_init = adf_vf2pf_notify_init;
hw_data->init_arb = adf_vf_int_noop;
hw_data->exit_arb = adf_vf_void_noop;
- hw_data->disable_iov = adf_vf2pf_shutdown;
+ hw_data->disable_iov = adf_vf2pf_notify_shutdown;
hw_data->get_accel_mask = get_accel_mask;
hw_data->get_ae_mask = get_ae_mask;
hw_data->get_num_accels = get_num_accels;
@@ -92,11 +87,10 @@ void adf_init_hw_data_c3xxxiov(struct adf_hw_device_data *hw_data)
hw_data->get_etr_bar_id = get_etr_bar_id;
hw_data->get_misc_bar_id = get_misc_bar_id;
hw_data->get_pf2vf_offset = get_pf2vf_offset;
- hw_data->get_vintmsk_offset = get_vintmsk_offset;
hw_data->get_sku = get_sku;
hw_data->enable_ints = adf_vf_void_noop;
- hw_data->enable_vf2pf_comms = adf_enable_vf2pf_comms;
- hw_data->min_iov_compat_ver = ADF_PFVF_COMPATIBILITY_VERSION;
+ hw_data->enable_pfvf_comms = adf_enable_vf2pf_comms;
+ hw_data->min_iov_compat_ver = ADF_PFVF_COMPAT_THIS_VERSION;
hw_data->dev_class->instances++;
adf_devmgr_update_class_index(hw_data);
adf_gen2_init_hw_csr_ops(&hw_data->csr_ops);
diff --git a/drivers/crypto/qat/qat_c3xxxvf/adf_c3xxxvf_hw_data.h b/drivers/crypto/qat/qat_c3xxxvf/adf_c3xxxvf_hw_data.h
index 7945a9cd1c60..f5de4ce66014 100644
--- a/drivers/crypto/qat/qat_c3xxxvf/adf_c3xxxvf_hw_data.h
+++ b/drivers/crypto/qat/qat_c3xxxvf/adf_c3xxxvf_hw_data.h
@@ -13,7 +13,6 @@
#define ADF_C3XXXIOV_ETR_BAR 0
#define ADF_C3XXXIOV_ETR_MAX_BANKS 1
#define ADF_C3XXXIOV_PF2VF_OFFSET 0x200
-#define ADF_C3XXXIOV_VINTMSK_OFFSET 0x208
void adf_init_hw_data_c3xxxiov(struct adf_hw_device_data *hw_data);
void adf_clean_hw_data_c3xxxiov(struct adf_hw_device_data *hw_data);
diff --git a/drivers/crypto/qat/qat_c3xxxvf/adf_drv.c b/drivers/crypto/qat/qat_c3xxxvf/adf_drv.c
index 067ca5e17d38..1df1b868978d 100644
--- a/drivers/crypto/qat/qat_c3xxxvf/adf_drv.c
+++ b/drivers/crypto/qat/qat_c3xxxvf/adf_drv.c
@@ -141,17 +141,10 @@ static int adf_probe(struct pci_dev *pdev, const struct pci_device_id *ent)
}
/* set dma identifier */
- if (pci_set_dma_mask(pdev, DMA_BIT_MASK(64))) {
- if ((pci_set_dma_mask(pdev, DMA_BIT_MASK(32)))) {
- dev_err(&pdev->dev, "No usable DMA configuration\n");
- ret = -EFAULT;
- goto out_err_disable;
- } else {
- pci_set_consistent_dma_mask(pdev, DMA_BIT_MASK(32));
- }
-
- } else {
- pci_set_consistent_dma_mask(pdev, DMA_BIT_MASK(64));
+ ret = dma_set_mask_and_coherent(&pdev->dev, DMA_BIT_MASK(48));
+ if (ret) {
+ dev_err(&pdev->dev, "No usable DMA configuration\n");
+ goto out_err_disable;
}
if (pci_request_regions(pdev, ADF_C3XXXVF_DEVICE_NAME)) {
@@ -218,6 +211,7 @@ static void adf_remove(struct pci_dev *pdev)
pr_err("QAT: Driver removal failed\n");
return;
}
+ adf_flush_vf_wq(accel_dev);
adf_dev_stop(accel_dev);
adf_dev_shutdown(accel_dev);
adf_cleanup_accel(accel_dev);
diff --git a/drivers/crypto/qat/qat_c62x/adf_c62x_hw_data.c b/drivers/crypto/qat/qat_c62x/adf_c62x_hw_data.c
index 30337390513c..b023c80873bb 100644
--- a/drivers/crypto/qat/qat_c62x/adf_c62x_hw_data.c
+++ b/drivers/crypto/qat/qat_c62x/adf_c62x_hw_data.c
@@ -113,11 +113,6 @@ static u32 get_pf2vf_offset(u32 i)
return ADF_C62X_PF2VF_OFFSET(i);
}
-static u32 get_vintmsk_offset(u32 i)
-{
- return ADF_C62X_VINTMSK_OFFSET(i);
-}
-
static void adf_enable_error_correction(struct adf_accel_dev *accel_dev)
{
struct adf_hw_device_data *hw_device = accel_dev->hw_device;
@@ -161,8 +156,10 @@ static void adf_enable_ints(struct adf_accel_dev *accel_dev)
ADF_C62X_SMIA1_MASK);
}
-static int adf_pf_enable_vf2pf_comms(struct adf_accel_dev *accel_dev)
+static int adf_enable_pf2vf_comms(struct adf_accel_dev *accel_dev)
{
+ spin_lock_init(&accel_dev->pf.vf2pf_ints_lock);
+
return 0;
}
@@ -195,8 +192,6 @@ void adf_init_hw_data_c62x(struct adf_hw_device_data *hw_data)
hw_data->get_sram_bar_id = get_sram_bar_id;
hw_data->get_etr_bar_id = get_etr_bar_id;
hw_data->get_misc_bar_id = get_misc_bar_id;
- hw_data->get_pf2vf_offset = get_pf2vf_offset;
- hw_data->get_vintmsk_offset = get_vintmsk_offset;
hw_data->get_admin_info = adf_gen2_get_admin_info;
hw_data->get_arb_info = adf_gen2_get_arb_info;
hw_data->get_sku = get_sku;
@@ -205,16 +200,18 @@ void adf_init_hw_data_c62x(struct adf_hw_device_data *hw_data)
hw_data->init_admin_comms = adf_init_admin_comms;
hw_data->exit_admin_comms = adf_exit_admin_comms;
hw_data->configure_iov_threads = configure_iov_threads;
- hw_data->disable_iov = adf_disable_sriov;
hw_data->send_admin_init = adf_send_admin_init;
hw_data->init_arb = adf_init_arb;
hw_data->exit_arb = adf_exit_arb;
hw_data->get_arb_mapping = adf_get_arbiter_mapping;
hw_data->enable_ints = adf_enable_ints;
- hw_data->enable_vf2pf_comms = adf_pf_enable_vf2pf_comms;
hw_data->reset_device = adf_reset_flr;
- hw_data->min_iov_compat_ver = ADF_PFVF_COMPATIBILITY_VERSION;
hw_data->set_ssm_wdtimer = adf_gen2_set_ssm_wdtimer;
+ hw_data->get_pf2vf_offset = get_pf2vf_offset;
+ hw_data->enable_pfvf_comms = adf_enable_pf2vf_comms;
+ hw_data->disable_iov = adf_disable_sriov;
+ hw_data->min_iov_compat_ver = ADF_PFVF_COMPAT_THIS_VERSION;
+
adf_gen2_init_hw_csr_ops(&hw_data->csr_ops);
}
diff --git a/drivers/crypto/qat/qat_c62x/adf_c62x_hw_data.h b/drivers/crypto/qat/qat_c62x/adf_c62x_hw_data.h
index 53d3cb577f5b..e6664bd20c91 100644
--- a/drivers/crypto/qat/qat_c62x/adf_c62x_hw_data.h
+++ b/drivers/crypto/qat/qat_c62x/adf_c62x_hw_data.h
@@ -30,7 +30,6 @@
#define ADF_C62X_ERRSSMSH_EN BIT(3)
#define ADF_C62X_PF2VF_OFFSET(i) (0x3A000 + 0x280 + ((i) * 0x04))
-#define ADF_C62X_VINTMSK_OFFSET(i) (0x3A000 + 0x200 + ((i) * 0x04))
/* AE to function mapping */
#define ADF_C62X_AE2FUNC_MAP_GRP_A_NUM_REGS 80
diff --git a/drivers/crypto/qat/qat_c62x/adf_drv.c b/drivers/crypto/qat/qat_c62x/adf_drv.c
index 1f5de442e1e6..bf251dfe74b3 100644
--- a/drivers/crypto/qat/qat_c62x/adf_drv.c
+++ b/drivers/crypto/qat/qat_c62x/adf_drv.c
@@ -159,17 +159,10 @@ static int adf_probe(struct pci_dev *pdev, const struct pci_device_id *ent)
}
/* set dma identifier */
- if (pci_set_dma_mask(pdev, DMA_BIT_MASK(64))) {
- if ((pci_set_dma_mask(pdev, DMA_BIT_MASK(32)))) {
- dev_err(&pdev->dev, "No usable DMA configuration\n");
- ret = -EFAULT;
- goto out_err_disable;
- } else {
- pci_set_consistent_dma_mask(pdev, DMA_BIT_MASK(32));
- }
-
- } else {
- pci_set_consistent_dma_mask(pdev, DMA_BIT_MASK(64));
+ ret = dma_set_mask_and_coherent(&pdev->dev, DMA_BIT_MASK(48));
+ if (ret) {
+ dev_err(&pdev->dev, "No usable DMA configuration\n");
+ goto out_err_disable;
}
if (pci_request_regions(pdev, ADF_C62X_DEVICE_NAME)) {
@@ -208,12 +201,12 @@ static int adf_probe(struct pci_dev *pdev, const struct pci_device_id *ent)
if (pci_save_state(pdev)) {
dev_err(&pdev->dev, "Failed to save pci state\n");
ret = -ENOMEM;
- goto out_err_free_reg;
+ goto out_err_disable_aer;
}
ret = qat_crypto_dev_config(accel_dev);
if (ret)
- goto out_err_free_reg;
+ goto out_err_disable_aer;
ret = adf_dev_init(accel_dev);
if (ret)
@@ -229,6 +222,8 @@ out_err_dev_stop:
adf_dev_stop(accel_dev);
out_err_dev_shutdown:
adf_dev_shutdown(accel_dev);
+out_err_disable_aer:
+ adf_disable_aer(accel_dev);
out_err_free_reg:
pci_release_regions(accel_pci_dev->pci_dev);
out_err_disable:
diff --git a/drivers/crypto/qat/qat_c62xvf/adf_c62xvf_hw_data.c b/drivers/crypto/qat/qat_c62xvf/adf_c62xvf_hw_data.c
index d231583428c9..3bee3e467363 100644
--- a/drivers/crypto/qat/qat_c62xvf/adf_c62xvf_hw_data.c
+++ b/drivers/crypto/qat/qat_c62xvf/adf_c62xvf_hw_data.c
@@ -52,11 +52,6 @@ static u32 get_pf2vf_offset(u32 i)
return ADF_C62XIOV_PF2VF_OFFSET;
}
-static u32 get_vintmsk_offset(u32 i)
-{
- return ADF_C62XIOV_VINTMSK_OFFSET;
-}
-
static int adf_vf_int_noop(struct adf_accel_dev *accel_dev)
{
return 0;
@@ -81,10 +76,10 @@ void adf_init_hw_data_c62xiov(struct adf_hw_device_data *hw_data)
hw_data->enable_error_correction = adf_vf_void_noop;
hw_data->init_admin_comms = adf_vf_int_noop;
hw_data->exit_admin_comms = adf_vf_void_noop;
- hw_data->send_admin_init = adf_vf2pf_init;
+ hw_data->send_admin_init = adf_vf2pf_notify_init;
hw_data->init_arb = adf_vf_int_noop;
hw_data->exit_arb = adf_vf_void_noop;
- hw_data->disable_iov = adf_vf2pf_shutdown;
+ hw_data->disable_iov = adf_vf2pf_notify_shutdown;
hw_data->get_accel_mask = get_accel_mask;
hw_data->get_ae_mask = get_ae_mask;
hw_data->get_num_accels = get_num_accels;
@@ -92,11 +87,10 @@ void adf_init_hw_data_c62xiov(struct adf_hw_device_data *hw_data)
hw_data->get_etr_bar_id = get_etr_bar_id;
hw_data->get_misc_bar_id = get_misc_bar_id;
hw_data->get_pf2vf_offset = get_pf2vf_offset;
- hw_data->get_vintmsk_offset = get_vintmsk_offset;
hw_data->get_sku = get_sku;
hw_data->enable_ints = adf_vf_void_noop;
- hw_data->enable_vf2pf_comms = adf_enable_vf2pf_comms;
- hw_data->min_iov_compat_ver = ADF_PFVF_COMPATIBILITY_VERSION;
+ hw_data->enable_pfvf_comms = adf_enable_vf2pf_comms;
+ hw_data->min_iov_compat_ver = ADF_PFVF_COMPAT_THIS_VERSION;
hw_data->dev_class->instances++;
adf_devmgr_update_class_index(hw_data);
adf_gen2_init_hw_csr_ops(&hw_data->csr_ops);
diff --git a/drivers/crypto/qat/qat_c62xvf/adf_c62xvf_hw_data.h b/drivers/crypto/qat/qat_c62xvf/adf_c62xvf_hw_data.h
index a6c04cf7a43c..794778c48678 100644
--- a/drivers/crypto/qat/qat_c62xvf/adf_c62xvf_hw_data.h
+++ b/drivers/crypto/qat/qat_c62xvf/adf_c62xvf_hw_data.h
@@ -13,7 +13,6 @@
#define ADF_C62XIOV_ETR_BAR 0
#define ADF_C62XIOV_ETR_MAX_BANKS 1
#define ADF_C62XIOV_PF2VF_OFFSET 0x200
-#define ADF_C62XIOV_VINTMSK_OFFSET 0x208
void adf_init_hw_data_c62xiov(struct adf_hw_device_data *hw_data);
void adf_clean_hw_data_c62xiov(struct adf_hw_device_data *hw_data);
diff --git a/drivers/crypto/qat/qat_c62xvf/adf_drv.c b/drivers/crypto/qat/qat_c62xvf/adf_drv.c
index 51ea88c0b17d..8103bd81d617 100644
--- a/drivers/crypto/qat/qat_c62xvf/adf_drv.c
+++ b/drivers/crypto/qat/qat_c62xvf/adf_drv.c
@@ -141,17 +141,10 @@ static int adf_probe(struct pci_dev *pdev, const struct pci_device_id *ent)
}
/* set dma identifier */
- if (pci_set_dma_mask(pdev, DMA_BIT_MASK(64))) {
- if ((pci_set_dma_mask(pdev, DMA_BIT_MASK(32)))) {
- dev_err(&pdev->dev, "No usable DMA configuration\n");
- ret = -EFAULT;
- goto out_err_disable;
- } else {
- pci_set_consistent_dma_mask(pdev, DMA_BIT_MASK(32));
- }
-
- } else {
- pci_set_consistent_dma_mask(pdev, DMA_BIT_MASK(64));
+ ret = dma_set_mask_and_coherent(&pdev->dev, DMA_BIT_MASK(48));
+ if (ret) {
+ dev_err(&pdev->dev, "No usable DMA configuration\n");
+ goto out_err_disable;
}
if (pci_request_regions(pdev, ADF_C62XVF_DEVICE_NAME)) {
@@ -218,6 +211,7 @@ static void adf_remove(struct pci_dev *pdev)
pr_err("QAT: Driver removal failed\n");
return;
}
+ adf_flush_vf_wq(accel_dev);
adf_dev_stop(accel_dev);
adf_dev_shutdown(accel_dev);
adf_cleanup_accel(accel_dev);
diff --git a/drivers/crypto/qat/qat_common/adf_accel_devices.h b/drivers/crypto/qat/qat_common/adf_accel_devices.h
index ac435b44f1d2..38c0af6d4e43 100644
--- a/drivers/crypto/qat/qat_common/adf_accel_devices.h
+++ b/drivers/crypto/qat/qat_common/adf_accel_devices.h
@@ -18,8 +18,6 @@
#define ADF_4XXX_DEVICE_NAME "4xxx"
#define ADF_4XXX_PCI_DEVICE_ID 0x4940
#define ADF_4XXXIOV_PCI_DEVICE_ID 0x4941
-#define ADF_ERRSOU3 (0x3A000 + 0x0C)
-#define ADF_ERRSOU5 (0x3A000 + 0xD8)
#define ADF_DEVICE_FUSECTL_OFFSET 0x40
#define ADF_DEVICE_LEGFUSE_OFFSET 0x4C
#define ADF_DEVICE_FUSECTL_MASK 0x80000000
@@ -156,7 +154,6 @@ struct adf_hw_device_data {
u32 (*get_num_aes)(struct adf_hw_device_data *self);
u32 (*get_num_accels)(struct adf_hw_device_data *self);
u32 (*get_pf2vf_offset)(u32 i);
- u32 (*get_vintmsk_offset)(u32 i);
void (*get_arb_info)(struct arb_info *arb_csrs_info);
void (*get_admin_info)(struct admin_info *admin_csrs_info);
enum dev_sku_info (*get_sku)(struct adf_hw_device_data *self);
@@ -174,7 +171,7 @@ struct adf_hw_device_data {
bool enable);
void (*enable_ints)(struct adf_accel_dev *accel_dev);
void (*set_ssm_wdtimer)(struct adf_accel_dev *accel_dev);
- int (*enable_vf2pf_comms)(struct adf_accel_dev *accel_dev);
+ int (*enable_pfvf_comms)(struct adf_accel_dev *accel_dev);
void (*reset_device)(struct adf_accel_dev *accel_dev);
void (*set_msix_rttable)(struct adf_accel_dev *accel_dev);
char *(*uof_get_name)(u32 obj_num);
@@ -227,7 +224,6 @@ struct adf_fw_loader_data {
struct adf_accel_vf_info {
struct adf_accel_dev *accel_dev;
- struct tasklet_struct vf2pf_bh_tasklet;
struct mutex pf2vf_lock; /* protect CSR access for PF2VF messages */
struct ratelimit_state vf2pf_ratelimit;
u32 vf_nr;
@@ -249,6 +245,8 @@ struct adf_accel_dev {
struct adf_accel_pci accel_pci_dev;
union {
struct {
+ /* protects VF2PF interrupts access */
+ spinlock_t vf2pf_ints_lock;
/* vf_info is non-zero when SR-IOV is init'ed */
struct adf_accel_vf_info *vf_info;
} pf;
diff --git a/drivers/crypto/qat/qat_common/adf_aer.c b/drivers/crypto/qat/qat_common/adf_aer.c
index d2ae293d0df6..ed3e40bc56eb 100644
--- a/drivers/crypto/qat/qat_common/adf_aer.c
+++ b/drivers/crypto/qat/qat_common/adf_aer.c
@@ -194,7 +194,7 @@ int adf_enable_aer(struct adf_accel_dev *accel_dev)
EXPORT_SYMBOL_GPL(adf_enable_aer);
/**
- * adf_disable_aer() - Enable Advance Error Reporting for acceleration device
+ * adf_disable_aer() - Disable Advance Error Reporting for acceleration device
* @accel_dev: Pointer to acceleration device.
*
* Function disables PCI Advance Error Reporting for the
diff --git a/drivers/crypto/qat/qat_common/adf_common_drv.h b/drivers/crypto/qat/qat_common/adf_common_drv.h
index c61476553728..4261749fae8d 100644
--- a/drivers/crypto/qat/qat_common/adf_common_drv.h
+++ b/drivers/crypto/qat/qat_common/adf_common_drv.h
@@ -193,22 +193,23 @@ int adf_sriov_configure(struct pci_dev *pdev, int numvfs);
void adf_disable_sriov(struct adf_accel_dev *accel_dev);
void adf_disable_vf2pf_interrupts(struct adf_accel_dev *accel_dev,
u32 vf_mask);
+void adf_disable_vf2pf_interrupts_irq(struct adf_accel_dev *accel_dev,
+ u32 vf_mask);
void adf_enable_vf2pf_interrupts(struct adf_accel_dev *accel_dev,
u32 vf_mask);
void adf_enable_pf2vf_interrupts(struct adf_accel_dev *accel_dev);
void adf_disable_pf2vf_interrupts(struct adf_accel_dev *accel_dev);
+void adf_schedule_vf2pf_handler(struct adf_accel_vf_info *vf_info);
-int adf_vf2pf_init(struct adf_accel_dev *accel_dev);
-void adf_vf2pf_shutdown(struct adf_accel_dev *accel_dev);
+int adf_vf2pf_notify_init(struct adf_accel_dev *accel_dev);
+void adf_vf2pf_notify_shutdown(struct adf_accel_dev *accel_dev);
int adf_init_pf_wq(void);
void adf_exit_pf_wq(void);
int adf_init_vf_wq(void);
void adf_exit_vf_wq(void);
+void adf_flush_vf_wq(struct adf_accel_dev *accel_dev);
#else
-static inline int adf_sriov_configure(struct pci_dev *pdev, int numvfs)
-{
- return 0;
-}
+#define adf_sriov_configure NULL
static inline void adf_disable_sriov(struct adf_accel_dev *accel_dev)
{
@@ -222,12 +223,12 @@ static inline void adf_disable_pf2vf_interrupts(struct adf_accel_dev *accel_dev)
{
}
-static inline int adf_vf2pf_init(struct adf_accel_dev *accel_dev)
+static inline int adf_vf2pf_notify_init(struct adf_accel_dev *accel_dev)
{
return 0;
}
-static inline void adf_vf2pf_shutdown(struct adf_accel_dev *accel_dev)
+static inline void adf_vf2pf_notify_shutdown(struct adf_accel_dev *accel_dev)
{
}
@@ -249,5 +250,9 @@ static inline void adf_exit_vf_wq(void)
{
}
+static inline void adf_flush_vf_wq(struct adf_accel_dev *accel_dev)
+{
+}
+
#endif
#endif
diff --git a/drivers/crypto/qat/qat_common/adf_init.c b/drivers/crypto/qat/qat_common/adf_init.c
index 744c40351428..60bc7b991d35 100644
--- a/drivers/crypto/qat/qat_common/adf_init.c
+++ b/drivers/crypto/qat/qat_common/adf_init.c
@@ -61,6 +61,7 @@ int adf_dev_init(struct adf_accel_dev *accel_dev)
struct service_hndl *service;
struct list_head *list_itr;
struct adf_hw_device_data *hw_data = accel_dev->hw_device;
+ int ret;
if (!hw_data) {
dev_err(&GET_DEV(accel_dev),
@@ -88,8 +89,6 @@ int adf_dev_init(struct adf_accel_dev *accel_dev)
return -EFAULT;
}
- hw_data->enable_ints(accel_dev);
-
if (adf_ae_init(accel_dev)) {
dev_err(&GET_DEV(accel_dev),
"Failed to initialise Acceleration Engine\n");
@@ -110,6 +109,13 @@ int adf_dev_init(struct adf_accel_dev *accel_dev)
}
set_bit(ADF_STATUS_IRQ_ALLOCATED, &accel_dev->status);
+ hw_data->enable_ints(accel_dev);
+ hw_data->enable_error_correction(accel_dev);
+
+ ret = hw_data->enable_pfvf_comms(accel_dev);
+ if (ret)
+ return ret;
+
/*
* Subservice initialisation is divided into two stages: init and start.
* This is to facilitate any ordering dependencies between services
@@ -126,9 +132,6 @@ int adf_dev_init(struct adf_accel_dev *accel_dev)
set_bit(accel_dev->accel_id, service->init_status);
}
- hw_data->enable_error_correction(accel_dev);
- hw_data->enable_vf2pf_comms(accel_dev);
-
return 0;
}
EXPORT_SYMBOL_GPL(adf_dev_init);
diff --git a/drivers/crypto/qat/qat_common/adf_isr.c b/drivers/crypto/qat/qat_common/adf_isr.c
index e3ad5587be49..c678d5c531aa 100644
--- a/drivers/crypto/qat/qat_common/adf_isr.c
+++ b/drivers/crypto/qat/qat_common/adf_isr.c
@@ -15,6 +15,14 @@
#include "adf_transport_access_macros.h"
#include "adf_transport_internal.h"
+#define ADF_MAX_NUM_VFS 32
+#define ADF_ERRSOU3 (0x3A000 + 0x0C)
+#define ADF_ERRSOU5 (0x3A000 + 0xD8)
+#define ADF_ERRMSK3 (0x3A000 + 0x1C)
+#define ADF_ERRMSK5 (0x3A000 + 0xDC)
+#define ADF_ERR_REG_VF2PF_L(vf_src) (((vf_src) & 0x01FFFE00) >> 9)
+#define ADF_ERR_REG_VF2PF_U(vf_src) (((vf_src) & 0x0000FFFF) << 16)
+
static int adf_enable_msix(struct adf_accel_dev *accel_dev)
{
struct adf_accel_pci *pci_dev_info = &accel_dev->accel_pci_dev;
@@ -71,14 +79,23 @@ static irqreturn_t adf_msix_isr_ae(int irq, void *dev_ptr)
struct adf_hw_device_data *hw_data = accel_dev->hw_device;
struct adf_bar *pmisc =
&GET_BARS(accel_dev)[hw_data->get_misc_bar_id(hw_data)];
- void __iomem *pmisc_bar_addr = pmisc->virt_addr;
- u32 vf_mask;
+ void __iomem *pmisc_addr = pmisc->virt_addr;
+ u32 errsou3, errsou5, errmsk3, errmsk5;
+ unsigned long vf_mask;
/* Get the interrupt sources triggered by VFs */
- vf_mask = ((ADF_CSR_RD(pmisc_bar_addr, ADF_ERRSOU5) &
- 0x0000FFFF) << 16) |
- ((ADF_CSR_RD(pmisc_bar_addr, ADF_ERRSOU3) &
- 0x01FFFE00) >> 9);
+ errsou3 = ADF_CSR_RD(pmisc_addr, ADF_ERRSOU3);
+ errsou5 = ADF_CSR_RD(pmisc_addr, ADF_ERRSOU5);
+ vf_mask = ADF_ERR_REG_VF2PF_L(errsou3);
+ vf_mask |= ADF_ERR_REG_VF2PF_U(errsou5);
+
+ /* To avoid adding duplicate entries to work queue, clear
+ * vf_int_mask_sets bits that are already masked in ERRMSK register.
+ */
+ errmsk3 = ADF_CSR_RD(pmisc_addr, ADF_ERRMSK3);
+ errmsk5 = ADF_CSR_RD(pmisc_addr, ADF_ERRMSK5);
+ vf_mask &= ~ADF_ERR_REG_VF2PF_L(errmsk3);
+ vf_mask &= ~ADF_ERR_REG_VF2PF_U(errmsk5);
if (vf_mask) {
struct adf_accel_vf_info *vf_info;
@@ -86,15 +103,13 @@ static irqreturn_t adf_msix_isr_ae(int irq, void *dev_ptr)
int i;
/* Disable VF2PF interrupts for VFs with pending ints */
- adf_disable_vf2pf_interrupts(accel_dev, vf_mask);
+ adf_disable_vf2pf_interrupts_irq(accel_dev, vf_mask);
/*
- * Schedule tasklets to handle VF2PF interrupt BHs
- * unless the VF is malicious and is attempting to
- * flood the host OS with VF2PF interrupts.
+ * Handle VF2PF interrupt unless the VF is malicious and
+ * is attempting to flood the host OS with VF2PF interrupts.
*/
- for_each_set_bit(i, (const unsigned long *)&vf_mask,
- (sizeof(vf_mask) * BITS_PER_BYTE)) {
+ for_each_set_bit(i, &vf_mask, ADF_MAX_NUM_VFS) {
vf_info = accel_dev->pf.vf_info + i;
if (!__ratelimit(&vf_info->vf2pf_ratelimit)) {
@@ -104,8 +119,7 @@ static irqreturn_t adf_msix_isr_ae(int irq, void *dev_ptr)
continue;
}
- /* Tasklet will re-enable ints from this VF */
- tasklet_hi_schedule(&vf_info->vf2pf_bh_tasklet);
+ adf_schedule_vf2pf_handler(vf_info);
irq_handled = true;
}
diff --git a/drivers/crypto/qat/qat_common/adf_pf2vf_msg.c b/drivers/crypto/qat/qat_common/adf_pf2vf_msg.c
index a1b77bd7a894..976b9ab7617c 100644
--- a/drivers/crypto/qat/qat_common/adf_pf2vf_msg.c
+++ b/drivers/crypto/qat/qat_common/adf_pf2vf_msg.c
@@ -11,28 +11,8 @@
#define ADF_DH895XCC_ERRMSK5 (ADF_DH895XCC_EP_OFFSET + 0xDC)
#define ADF_DH895XCC_ERRMSK5_VF2PF_U_MASK(vf_mask) (vf_mask >> 16)
-void adf_enable_pf2vf_interrupts(struct adf_accel_dev *accel_dev)
-{
- struct adf_accel_pci *pci_info = &accel_dev->accel_pci_dev;
- struct adf_hw_device_data *hw_data = accel_dev->hw_device;
- void __iomem *pmisc_bar_addr =
- pci_info->pci_bars[hw_data->get_misc_bar_id(hw_data)].virt_addr;
-
- ADF_CSR_WR(pmisc_bar_addr, hw_data->get_vintmsk_offset(0), 0x0);
-}
-
-void adf_disable_pf2vf_interrupts(struct adf_accel_dev *accel_dev)
-{
- struct adf_accel_pci *pci_info = &accel_dev->accel_pci_dev;
- struct adf_hw_device_data *hw_data = accel_dev->hw_device;
- void __iomem *pmisc_bar_addr =
- pci_info->pci_bars[hw_data->get_misc_bar_id(hw_data)].virt_addr;
-
- ADF_CSR_WR(pmisc_bar_addr, hw_data->get_vintmsk_offset(0), 0x2);
-}
-
-void adf_enable_vf2pf_interrupts(struct adf_accel_dev *accel_dev,
- u32 vf_mask)
+static void __adf_enable_vf2pf_interrupts(struct adf_accel_dev *accel_dev,
+ u32 vf_mask)
{
struct adf_hw_device_data *hw_data = accel_dev->hw_device;
struct adf_bar *pmisc =
@@ -55,7 +35,17 @@ void adf_enable_vf2pf_interrupts(struct adf_accel_dev *accel_dev,
}
}
-void adf_disable_vf2pf_interrupts(struct adf_accel_dev *accel_dev, u32 vf_mask)
+void adf_enable_vf2pf_interrupts(struct adf_accel_dev *accel_dev, u32 vf_mask)
+{
+ unsigned long flags;
+
+ spin_lock_irqsave(&accel_dev->pf.vf2pf_ints_lock, flags);
+ __adf_enable_vf2pf_interrupts(accel_dev, vf_mask);
+ spin_unlock_irqrestore(&accel_dev->pf.vf2pf_ints_lock, flags);
+}
+
+static void __adf_disable_vf2pf_interrupts(struct adf_accel_dev *accel_dev,
+ u32 vf_mask)
{
struct adf_hw_device_data *hw_data = accel_dev->hw_device;
struct adf_bar *pmisc =
@@ -78,6 +68,22 @@ void adf_disable_vf2pf_interrupts(struct adf_accel_dev *accel_dev, u32 vf_mask)
}
}
+void adf_disable_vf2pf_interrupts(struct adf_accel_dev *accel_dev, u32 vf_mask)
+{
+ unsigned long flags;
+
+ spin_lock_irqsave(&accel_dev->pf.vf2pf_ints_lock, flags);
+ __adf_disable_vf2pf_interrupts(accel_dev, vf_mask);
+ spin_unlock_irqrestore(&accel_dev->pf.vf2pf_ints_lock, flags);
+}
+
+void adf_disable_vf2pf_interrupts_irq(struct adf_accel_dev *accel_dev, u32 vf_mask)
+{
+ spin_lock(&accel_dev->pf.vf2pf_ints_lock);
+ __adf_disable_vf2pf_interrupts(accel_dev, vf_mask);
+ spin_unlock(&accel_dev->pf.vf2pf_ints_lock);
+}
+
static int __adf_iov_putmsg(struct adf_accel_dev *accel_dev, u32 msg, u8 vf_nr)
{
struct adf_accel_pci *pci_info = &accel_dev->accel_pci_dev;
@@ -186,7 +192,6 @@ int adf_iov_putmsg(struct adf_accel_dev *accel_dev, u32 msg, u8 vf_nr)
return ret;
}
-EXPORT_SYMBOL_GPL(adf_iov_putmsg);
void adf_vf2pf_req_hndl(struct adf_accel_vf_info *vf_info)
{
@@ -216,7 +221,7 @@ void adf_vf2pf_req_hndl(struct adf_accel_vf_info *vf_info)
resp = (ADF_PF2VF_MSGORIGIN_SYSTEM |
(ADF_PF2VF_MSGTYPE_VERSION_RESP <<
ADF_PF2VF_MSGTYPE_SHIFT) |
- (ADF_PFVF_COMPATIBILITY_VERSION <<
+ (ADF_PFVF_COMPAT_THIS_VERSION <<
ADF_PF2VF_VERSION_RESP_VERS_SHIFT));
dev_dbg(&GET_DEV(accel_dev),
@@ -226,19 +231,19 @@ void adf_vf2pf_req_hndl(struct adf_accel_vf_info *vf_info)
if (vf_compat_ver < hw_data->min_iov_compat_ver) {
dev_err(&GET_DEV(accel_dev),
"VF (vers %d) incompatible with PF (vers %d)\n",
- vf_compat_ver, ADF_PFVF_COMPATIBILITY_VERSION);
+ vf_compat_ver, ADF_PFVF_COMPAT_THIS_VERSION);
resp |= ADF_PF2VF_VF_INCOMPATIBLE <<
ADF_PF2VF_VERSION_RESP_RESULT_SHIFT;
- } else if (vf_compat_ver > ADF_PFVF_COMPATIBILITY_VERSION) {
+ } else if (vf_compat_ver > ADF_PFVF_COMPAT_THIS_VERSION) {
dev_err(&GET_DEV(accel_dev),
"VF (vers %d) compat with PF (vers %d) unkn.\n",
- vf_compat_ver, ADF_PFVF_COMPATIBILITY_VERSION);
+ vf_compat_ver, ADF_PFVF_COMPAT_THIS_VERSION);
resp |= ADF_PF2VF_VF_COMPAT_UNKNOWN <<
ADF_PF2VF_VERSION_RESP_RESULT_SHIFT;
} else {
dev_dbg(&GET_DEV(accel_dev),
"VF (vers %d) compatible with PF (vers %d)\n",
- vf_compat_ver, ADF_PFVF_COMPATIBILITY_VERSION);
+ vf_compat_ver, ADF_PFVF_COMPAT_THIS_VERSION);
resp |= ADF_PF2VF_VF_COMPATIBLE <<
ADF_PF2VF_VERSION_RESP_RESULT_SHIFT;
}
@@ -251,7 +256,7 @@ void adf_vf2pf_req_hndl(struct adf_accel_vf_info *vf_info)
resp = (ADF_PF2VF_MSGORIGIN_SYSTEM |
(ADF_PF2VF_MSGTYPE_VERSION_RESP <<
ADF_PF2VF_MSGTYPE_SHIFT) |
- (ADF_PFVF_COMPATIBILITY_VERSION <<
+ (ADF_PFVF_COMPAT_THIS_VERSION <<
ADF_PF2VF_VERSION_RESP_VERS_SHIFT));
resp |= ADF_PF2VF_VF_COMPATIBLE <<
ADF_PF2VF_VERSION_RESP_RESULT_SHIFT;
@@ -284,6 +289,7 @@ void adf_vf2pf_req_hndl(struct adf_accel_vf_info *vf_info)
/* re-enable interrupt on PF from this VF */
adf_enable_vf2pf_interrupts(accel_dev, (1 << vf_nr));
+
return;
err:
dev_dbg(&GET_DEV(accel_dev), "Unknown message from VF%d (0x%x);\n",
@@ -313,8 +319,10 @@ static int adf_vf2pf_request_version(struct adf_accel_dev *accel_dev)
msg = ADF_VF2PF_MSGORIGIN_SYSTEM;
msg |= ADF_VF2PF_MSGTYPE_COMPAT_VER_REQ << ADF_VF2PF_MSGTYPE_SHIFT;
- msg |= ADF_PFVF_COMPATIBILITY_VERSION << ADF_VF2PF_COMPAT_VER_REQ_SHIFT;
- BUILD_BUG_ON(ADF_PFVF_COMPATIBILITY_VERSION > 255);
+ msg |= ADF_PFVF_COMPAT_THIS_VERSION << ADF_VF2PF_COMPAT_VER_REQ_SHIFT;
+ BUILD_BUG_ON(ADF_PFVF_COMPAT_THIS_VERSION > 255);
+
+ reinit_completion(&accel_dev->vf.iov_msg_completion);
/* Send request from VF to PF */
ret = adf_iov_putmsg(accel_dev, msg, 0);
@@ -338,14 +346,16 @@ static int adf_vf2pf_request_version(struct adf_accel_dev *accel_dev)
break;
case ADF_PF2VF_VF_COMPAT_UNKNOWN:
/* VF is newer than PF and decides whether it is compatible */
- if (accel_dev->vf.pf_version >= hw_data->min_iov_compat_ver)
+ if (accel_dev->vf.pf_version >= hw_data->min_iov_compat_ver) {
+ accel_dev->vf.compatible = ADF_PF2VF_VF_COMPATIBLE;
break;
+ }
fallthrough;
case ADF_PF2VF_VF_INCOMPATIBLE:
dev_err(&GET_DEV(accel_dev),
"PF (vers %d) and VF (vers %d) are not compatible\n",
accel_dev->vf.pf_version,
- ADF_PFVF_COMPATIBILITY_VERSION);
+ ADF_PFVF_COMPAT_THIS_VERSION);
return -EINVAL;
default:
dev_err(&GET_DEV(accel_dev),
diff --git a/drivers/crypto/qat/qat_common/adf_pf2vf_msg.h b/drivers/crypto/qat/qat_common/adf_pf2vf_msg.h
index 0690c031bfce..ffd43aa50b57 100644
--- a/drivers/crypto/qat/qat_common/adf_pf2vf_msg.h
+++ b/drivers/crypto/qat/qat_common/adf_pf2vf_msg.h
@@ -52,7 +52,7 @@
* IN_USE_BY pattern as part of a collision control scheme (see adf_iov_putmsg).
*/
-#define ADF_PFVF_COMPATIBILITY_VERSION 0x1 /* PF<->VF compat */
+#define ADF_PFVF_COMPAT_THIS_VERSION 0x1 /* PF<->VF compat */
/* PF->VF messages */
#define ADF_PF2VF_INT BIT(0)
diff --git a/drivers/crypto/qat/qat_common/adf_sriov.c b/drivers/crypto/qat/qat_common/adf_sriov.c
index 8c822c2861c2..90ec057f9183 100644
--- a/drivers/crypto/qat/qat_common/adf_sriov.c
+++ b/drivers/crypto/qat/qat_common/adf_sriov.c
@@ -24,9 +24,8 @@ static void adf_iov_send_resp(struct work_struct *work)
kfree(pf2vf_resp);
}
-static void adf_vf2pf_bh_handler(void *data)
+void adf_schedule_vf2pf_handler(struct adf_accel_vf_info *vf_info)
{
- struct adf_accel_vf_info *vf_info = (struct adf_accel_vf_info *)data;
struct adf_pf2vf_resp *pf2vf_resp;
pf2vf_resp = kzalloc(sizeof(*pf2vf_resp), GFP_ATOMIC);
@@ -52,9 +51,6 @@ static int adf_enable_sriov(struct adf_accel_dev *accel_dev)
vf_info->accel_dev = accel_dev;
vf_info->vf_nr = i;
- tasklet_init(&vf_info->vf2pf_bh_tasklet,
- (void *)adf_vf2pf_bh_handler,
- (unsigned long)vf_info);
mutex_init(&vf_info->pf2vf_lock);
ratelimit_state_init(&vf_info->vf2pf_ratelimit,
DEFAULT_RATELIMIT_INTERVAL,
@@ -110,8 +106,6 @@ void adf_disable_sriov(struct adf_accel_dev *accel_dev)
hw_data->configure_iov_threads(accel_dev, false);
for (i = 0, vf = accel_dev->pf.vf_info; i < totalvfs; i++, vf++) {
- tasklet_disable(&vf->vf2pf_bh_tasklet);
- tasklet_kill(&vf->vf2pf_bh_tasklet);
mutex_destroy(&vf->pf2vf_lock);
}
diff --git a/drivers/crypto/qat/qat_common/adf_vf2pf_msg.c b/drivers/crypto/qat/qat_common/adf_vf2pf_msg.c
index e85bd62d134a..3e25fac051b2 100644
--- a/drivers/crypto/qat/qat_common/adf_vf2pf_msg.c
+++ b/drivers/crypto/qat/qat_common/adf_vf2pf_msg.c
@@ -5,14 +5,14 @@
#include "adf_pf2vf_msg.h"
/**
- * adf_vf2pf_init() - send init msg to PF
+ * adf_vf2pf_notify_init() - send init msg to PF
* @accel_dev: Pointer to acceleration VF device.
*
* Function sends an init message from the VF to a PF
*
* Return: 0 on success, error code otherwise.
*/
-int adf_vf2pf_init(struct adf_accel_dev *accel_dev)
+int adf_vf2pf_notify_init(struct adf_accel_dev *accel_dev)
{
u32 msg = (ADF_VF2PF_MSGORIGIN_SYSTEM |
(ADF_VF2PF_MSGTYPE_INIT << ADF_VF2PF_MSGTYPE_SHIFT));
@@ -25,17 +25,17 @@ int adf_vf2pf_init(struct adf_accel_dev *accel_dev)
set_bit(ADF_STATUS_PF_RUNNING, &accel_dev->status);
return 0;
}
-EXPORT_SYMBOL_GPL(adf_vf2pf_init);
+EXPORT_SYMBOL_GPL(adf_vf2pf_notify_init);
/**
- * adf_vf2pf_shutdown() - send shutdown msg to PF
+ * adf_vf2pf_notify_shutdown() - send shutdown msg to PF
* @accel_dev: Pointer to acceleration VF device.
*
* Function sends a shutdown message from the VF to a PF
*
* Return: void
*/
-void adf_vf2pf_shutdown(struct adf_accel_dev *accel_dev)
+void adf_vf2pf_notify_shutdown(struct adf_accel_dev *accel_dev)
{
u32 msg = (ADF_VF2PF_MSGORIGIN_SYSTEM |
(ADF_VF2PF_MSGTYPE_SHUTDOWN << ADF_VF2PF_MSGTYPE_SHIFT));
@@ -45,4 +45,4 @@ void adf_vf2pf_shutdown(struct adf_accel_dev *accel_dev)
dev_err(&GET_DEV(accel_dev),
"Failed to send Shutdown event to PF\n");
}
-EXPORT_SYMBOL_GPL(adf_vf2pf_shutdown);
+EXPORT_SYMBOL_GPL(adf_vf2pf_notify_shutdown);
diff --git a/drivers/crypto/qat/qat_common/adf_vf_isr.c b/drivers/crypto/qat/qat_common/adf_vf_isr.c
index 888388acb6bd..7828a6573f3e 100644
--- a/drivers/crypto/qat/qat_common/adf_vf_isr.c
+++ b/drivers/crypto/qat/qat_common/adf_vf_isr.c
@@ -18,6 +18,7 @@
#include "adf_pf2vf_msg.h"
#define ADF_VINTSOU_OFFSET 0x204
+#define ADF_VINTMSK_OFFSET 0x208
#define ADF_VINTSOU_BUN BIT(0)
#define ADF_VINTSOU_PF2VF BIT(1)
@@ -28,6 +29,27 @@ struct adf_vf_stop_data {
struct work_struct work;
};
+void adf_enable_pf2vf_interrupts(struct adf_accel_dev *accel_dev)
+{
+ struct adf_accel_pci *pci_info = &accel_dev->accel_pci_dev;
+ struct adf_hw_device_data *hw_data = accel_dev->hw_device;
+ void __iomem *pmisc_bar_addr =
+ pci_info->pci_bars[hw_data->get_misc_bar_id(hw_data)].virt_addr;
+
+ ADF_CSR_WR(pmisc_bar_addr, ADF_VINTMSK_OFFSET, 0x0);
+}
+
+void adf_disable_pf2vf_interrupts(struct adf_accel_dev *accel_dev)
+{
+ struct adf_accel_pci *pci_info = &accel_dev->accel_pci_dev;
+ struct adf_hw_device_data *hw_data = accel_dev->hw_device;
+ void __iomem *pmisc_bar_addr =
+ pci_info->pci_bars[hw_data->get_misc_bar_id(hw_data)].virt_addr;
+
+ ADF_CSR_WR(pmisc_bar_addr, ADF_VINTMSK_OFFSET, 0x2);
+}
+EXPORT_SYMBOL_GPL(adf_disable_pf2vf_interrupts);
+
static int adf_enable_msi(struct adf_accel_dev *accel_dev)
{
struct adf_accel_pci *pci_dev_info = &accel_dev->accel_pci_dev;
@@ -160,11 +182,21 @@ static irqreturn_t adf_isr(int irq, void *privdata)
struct adf_bar *pmisc =
&GET_BARS(accel_dev)[hw_data->get_misc_bar_id(hw_data)];
void __iomem *pmisc_bar_addr = pmisc->virt_addr;
- u32 v_int;
+ bool handled = false;
+ u32 v_int, v_mask;
/* Read VF INT source CSR to determine the source of VF interrupt */
v_int = ADF_CSR_RD(pmisc_bar_addr, ADF_VINTSOU_OFFSET);
+ /* Read VF INT mask CSR to determine which sources are masked */
+ v_mask = ADF_CSR_RD(pmisc_bar_addr, ADF_VINTMSK_OFFSET);
+
+ /*
+ * Recompute v_int ignoring sources that are masked. This is to
+ * avoid rescheduling the tasklet for interrupts already handled
+ */
+ v_int &= ~v_mask;
+
/* Check for PF2VF interrupt */
if (v_int & ADF_VINTSOU_PF2VF) {
/* Disable PF to VF interrupt */
@@ -172,7 +204,7 @@ static irqreturn_t adf_isr(int irq, void *privdata)
/* Schedule tasklet to handle interrupt BH */
tasklet_hi_schedule(&accel_dev->vf.pf2vf_bh_tasklet);
- return IRQ_HANDLED;
+ handled = true;
}
/* Check bundle interrupt */
@@ -184,10 +216,10 @@ static irqreturn_t adf_isr(int irq, void *privdata)
csr_ops->write_csr_int_flag_and_col(bank->csr_addr,
bank->bank_number, 0);
tasklet_hi_schedule(&bank->resp_handler);
- return IRQ_HANDLED;
+ handled = true;
}
- return IRQ_NONE;
+ return handled ? IRQ_HANDLED : IRQ_NONE;
}
static int adf_request_msi_irq(struct adf_accel_dev *accel_dev)
@@ -285,6 +317,30 @@ err_out:
}
EXPORT_SYMBOL_GPL(adf_vf_isr_resource_alloc);
+/**
+ * adf_flush_vf_wq() - Flush workqueue for VF
+ * @accel_dev: Pointer to acceleration device.
+ *
+ * Function disables the PF/VF interrupts on the VF so that no new messages
+ * are received and flushes the workqueue 'adf_vf_stop_wq'.
+ *
+ * Return: void.
+ */
+void adf_flush_vf_wq(struct adf_accel_dev *accel_dev)
+{
+ adf_disable_pf2vf_interrupts(accel_dev);
+
+ flush_workqueue(adf_vf_stop_wq);
+}
+EXPORT_SYMBOL_GPL(adf_flush_vf_wq);
+
+/**
+ * adf_init_vf_wq() - Init workqueue for VF
+ *
+ * Function init workqueue 'adf_vf_stop_wq' for VF.
+ *
+ * Return: 0 on success, error code otherwise.
+ */
int __init adf_init_vf_wq(void)
{
adf_vf_stop_wq = alloc_workqueue("adf_vf_stop_wq", WQ_MEM_RECLAIM, 0);
diff --git a/drivers/crypto/qat/qat_dh895xcc/adf_dh895xcc_hw_data.c b/drivers/crypto/qat/qat_dh895xcc/adf_dh895xcc_hw_data.c
index 7dd7cd6c3ef8..0a9ce365a544 100644
--- a/drivers/crypto/qat/qat_dh895xcc/adf_dh895xcc_hw_data.c
+++ b/drivers/crypto/qat/qat_dh895xcc/adf_dh895xcc_hw_data.c
@@ -131,11 +131,6 @@ static u32 get_pf2vf_offset(u32 i)
return ADF_DH895XCC_PF2VF_OFFSET(i);
}
-static u32 get_vintmsk_offset(u32 i)
-{
- return ADF_DH895XCC_VINTMSK_OFFSET(i);
-}
-
static void adf_enable_error_correction(struct adf_accel_dev *accel_dev)
{
struct adf_hw_device_data *hw_device = accel_dev->hw_device;
@@ -180,8 +175,10 @@ static void adf_enable_ints(struct adf_accel_dev *accel_dev)
ADF_DH895XCC_SMIA1_MASK);
}
-static int adf_pf_enable_vf2pf_comms(struct adf_accel_dev *accel_dev)
+static int adf_enable_pf2vf_comms(struct adf_accel_dev *accel_dev)
{
+ spin_lock_init(&accel_dev->pf.vf2pf_ints_lock);
+
return 0;
}
@@ -213,8 +210,6 @@ void adf_init_hw_data_dh895xcc(struct adf_hw_device_data *hw_data)
hw_data->get_num_aes = get_num_aes;
hw_data->get_etr_bar_id = get_etr_bar_id;
hw_data->get_misc_bar_id = get_misc_bar_id;
- hw_data->get_pf2vf_offset = get_pf2vf_offset;
- hw_data->get_vintmsk_offset = get_vintmsk_offset;
hw_data->get_admin_info = adf_gen2_get_admin_info;
hw_data->get_arb_info = adf_gen2_get_arb_info;
hw_data->get_sram_bar_id = get_sram_bar_id;
@@ -224,15 +219,17 @@ void adf_init_hw_data_dh895xcc(struct adf_hw_device_data *hw_data)
hw_data->init_admin_comms = adf_init_admin_comms;
hw_data->exit_admin_comms = adf_exit_admin_comms;
hw_data->configure_iov_threads = configure_iov_threads;
- hw_data->disable_iov = adf_disable_sriov;
hw_data->send_admin_init = adf_send_admin_init;
hw_data->init_arb = adf_init_arb;
hw_data->exit_arb = adf_exit_arb;
hw_data->get_arb_mapping = adf_get_arbiter_mapping;
hw_data->enable_ints = adf_enable_ints;
- hw_data->enable_vf2pf_comms = adf_pf_enable_vf2pf_comms;
hw_data->reset_device = adf_reset_sbr;
- hw_data->min_iov_compat_ver = ADF_PFVF_COMPATIBILITY_VERSION;
+ hw_data->get_pf2vf_offset = get_pf2vf_offset;
+ hw_data->enable_pfvf_comms = adf_enable_pf2vf_comms;
+ hw_data->disable_iov = adf_disable_sriov;
+ hw_data->min_iov_compat_ver = ADF_PFVF_COMPAT_THIS_VERSION;
+
adf_gen2_init_hw_csr_ops(&hw_data->csr_ops);
}
diff --git a/drivers/crypto/qat/qat_dh895xcc/adf_dh895xcc_hw_data.h b/drivers/crypto/qat/qat_dh895xcc/adf_dh895xcc_hw_data.h
index 4d613923d155..f99319cd4543 100644
--- a/drivers/crypto/qat/qat_dh895xcc/adf_dh895xcc_hw_data.h
+++ b/drivers/crypto/qat/qat_dh895xcc/adf_dh895xcc_hw_data.h
@@ -35,7 +35,6 @@
#define ADF_DH895XCC_ERRSSMSH_EN BIT(3)
#define ADF_DH895XCC_PF2VF_OFFSET(i) (0x3A000 + 0x280 + ((i) * 0x04))
-#define ADF_DH895XCC_VINTMSK_OFFSET(i) (0x3A000 + 0x200 + ((i) * 0x04))
/* AE to function mapping */
#define ADF_DH895XCC_AE2FUNC_MAP_GRP_A_NUM_REGS 96
diff --git a/drivers/crypto/qat/qat_dh895xcc/adf_drv.c b/drivers/crypto/qat/qat_dh895xcc/adf_drv.c
index a9ec4357144c..3976a81bd99b 100644
--- a/drivers/crypto/qat/qat_dh895xcc/adf_drv.c
+++ b/drivers/crypto/qat/qat_dh895xcc/adf_drv.c
@@ -159,17 +159,10 @@ static int adf_probe(struct pci_dev *pdev, const struct pci_device_id *ent)
}
/* set dma identifier */
- if (pci_set_dma_mask(pdev, DMA_BIT_MASK(64))) {
- if ((pci_set_dma_mask(pdev, DMA_BIT_MASK(32)))) {
- dev_err(&pdev->dev, "No usable DMA configuration\n");
- ret = -EFAULT;
- goto out_err_disable;
- } else {
- pci_set_consistent_dma_mask(pdev, DMA_BIT_MASK(32));
- }
-
- } else {
- pci_set_consistent_dma_mask(pdev, DMA_BIT_MASK(64));
+ ret = dma_set_mask_and_coherent(&pdev->dev, DMA_BIT_MASK(48));
+ if (ret) {
+ dev_err(&pdev->dev, "No usable DMA configuration\n");
+ goto out_err_disable;
}
if (pci_request_regions(pdev, ADF_DH895XCC_DEVICE_NAME)) {
@@ -208,12 +201,12 @@ static int adf_probe(struct pci_dev *pdev, const struct pci_device_id *ent)
if (pci_save_state(pdev)) {
dev_err(&pdev->dev, "Failed to save pci state\n");
ret = -ENOMEM;
- goto out_err_free_reg;
+ goto out_err_disable_aer;
}
ret = qat_crypto_dev_config(accel_dev);
if (ret)
- goto out_err_free_reg;
+ goto out_err_disable_aer;
ret = adf_dev_init(accel_dev);
if (ret)
@@ -229,6 +222,8 @@ out_err_dev_stop:
adf_dev_stop(accel_dev);
out_err_dev_shutdown:
adf_dev_shutdown(accel_dev);
+out_err_disable_aer:
+ adf_disable_aer(accel_dev);
out_err_free_reg:
pci_release_regions(accel_pci_dev->pci_dev);
out_err_disable:
diff --git a/drivers/crypto/qat/qat_dh895xccvf/adf_dh895xccvf_hw_data.c b/drivers/crypto/qat/qat_dh895xccvf/adf_dh895xccvf_hw_data.c
index f14fb82ed6df..7c6ed6bc8abf 100644
--- a/drivers/crypto/qat/qat_dh895xccvf/adf_dh895xccvf_hw_data.c
+++ b/drivers/crypto/qat/qat_dh895xccvf/adf_dh895xccvf_hw_data.c
@@ -52,11 +52,6 @@ static u32 get_pf2vf_offset(u32 i)
return ADF_DH895XCCIOV_PF2VF_OFFSET;
}
-static u32 get_vintmsk_offset(u32 i)
-{
- return ADF_DH895XCCIOV_VINTMSK_OFFSET;
-}
-
static int adf_vf_int_noop(struct adf_accel_dev *accel_dev)
{
return 0;
@@ -81,10 +76,10 @@ void adf_init_hw_data_dh895xcciov(struct adf_hw_device_data *hw_data)
hw_data->enable_error_correction = adf_vf_void_noop;
hw_data->init_admin_comms = adf_vf_int_noop;
hw_data->exit_admin_comms = adf_vf_void_noop;
- hw_data->send_admin_init = adf_vf2pf_init;
+ hw_data->send_admin_init = adf_vf2pf_notify_init;
hw_data->init_arb = adf_vf_int_noop;
hw_data->exit_arb = adf_vf_void_noop;
- hw_data->disable_iov = adf_vf2pf_shutdown;
+ hw_data->disable_iov = adf_vf2pf_notify_shutdown;
hw_data->get_accel_mask = get_accel_mask;
hw_data->get_ae_mask = get_ae_mask;
hw_data->get_num_accels = get_num_accels;
@@ -92,11 +87,10 @@ void adf_init_hw_data_dh895xcciov(struct adf_hw_device_data *hw_data)
hw_data->get_etr_bar_id = get_etr_bar_id;
hw_data->get_misc_bar_id = get_misc_bar_id;
hw_data->get_pf2vf_offset = get_pf2vf_offset;
- hw_data->get_vintmsk_offset = get_vintmsk_offset;
hw_data->get_sku = get_sku;
hw_data->enable_ints = adf_vf_void_noop;
- hw_data->enable_vf2pf_comms = adf_enable_vf2pf_comms;
- hw_data->min_iov_compat_ver = ADF_PFVF_COMPATIBILITY_VERSION;
+ hw_data->enable_pfvf_comms = adf_enable_vf2pf_comms;
+ hw_data->min_iov_compat_ver = ADF_PFVF_COMPAT_THIS_VERSION;
hw_data->dev_class->instances++;
adf_devmgr_update_class_index(hw_data);
adf_gen2_init_hw_csr_ops(&hw_data->csr_ops);
diff --git a/drivers/crypto/qat/qat_dh895xccvf/adf_dh895xccvf_hw_data.h b/drivers/crypto/qat/qat_dh895xccvf/adf_dh895xccvf_hw_data.h
index 2bfcc67f8f39..306ebb71a408 100644
--- a/drivers/crypto/qat/qat_dh895xccvf/adf_dh895xccvf_hw_data.h
+++ b/drivers/crypto/qat/qat_dh895xccvf/adf_dh895xccvf_hw_data.h
@@ -13,7 +13,6 @@
#define ADF_DH895XCCIOV_ETR_BAR 0
#define ADF_DH895XCCIOV_ETR_MAX_BANKS 1
#define ADF_DH895XCCIOV_PF2VF_OFFSET 0x200
-#define ADF_DH895XCCIOV_VINTMSK_OFFSET 0x208
void adf_init_hw_data_dh895xcciov(struct adf_hw_device_data *hw_data);
void adf_clean_hw_data_dh895xcciov(struct adf_hw_device_data *hw_data);
diff --git a/drivers/crypto/qat/qat_dh895xccvf/adf_drv.c b/drivers/crypto/qat/qat_dh895xccvf/adf_drv.c
index 29999da716cc..99d90f3ea2b7 100644
--- a/drivers/crypto/qat/qat_dh895xccvf/adf_drv.c
+++ b/drivers/crypto/qat/qat_dh895xccvf/adf_drv.c
@@ -141,17 +141,10 @@ static int adf_probe(struct pci_dev *pdev, const struct pci_device_id *ent)
}
/* set dma identifier */
- if (pci_set_dma_mask(pdev, DMA_BIT_MASK(64))) {
- if ((pci_set_dma_mask(pdev, DMA_BIT_MASK(32)))) {
- dev_err(&pdev->dev, "No usable DMA configuration\n");
- ret = -EFAULT;
- goto out_err_disable;
- } else {
- pci_set_consistent_dma_mask(pdev, DMA_BIT_MASK(32));
- }
-
- } else {
- pci_set_consistent_dma_mask(pdev, DMA_BIT_MASK(64));
+ ret = dma_set_mask_and_coherent(&pdev->dev, DMA_BIT_MASK(48));
+ if (ret) {
+ dev_err(&pdev->dev, "No usable DMA configuration\n");
+ goto out_err_disable;
}
if (pci_request_regions(pdev, ADF_DH895XCCVF_DEVICE_NAME)) {
@@ -218,6 +211,7 @@ static void adf_remove(struct pci_dev *pdev)
pr_err("QAT: Driver removal failed\n");
return;
}
+ adf_flush_vf_wq(accel_dev);
adf_dev_stop(accel_dev);
adf_dev_shutdown(accel_dev);
adf_cleanup_accel(accel_dev);
diff --git a/drivers/crypto/virtio/virtio_crypto_core.c b/drivers/crypto/virtio/virtio_crypto_core.c
index 080955a1dd9c..e2375d992308 100644
--- a/drivers/crypto/virtio/virtio_crypto_core.c
+++ b/drivers/crypto/virtio/virtio_crypto_core.c
@@ -187,9 +187,9 @@ static int virtcrypto_init_vqs(struct virtio_crypto *vi)
if (ret)
goto err_free;
- get_online_cpus();
+ cpus_read_lock();
virtcrypto_set_affinity(vi);
- put_online_cpus();
+ cpus_read_unlock();
return 0;
diff --git a/drivers/firmware/smccc/smccc.c b/drivers/firmware/smccc/smccc.c
index 9f937b125ab0..60ccf3e90d7d 100644
--- a/drivers/firmware/smccc/smccc.c
+++ b/drivers/firmware/smccc/smccc.c
@@ -9,6 +9,7 @@
#include <linux/init.h>
#include <linux/arm-smccc.h>
#include <linux/kernel.h>
+#include <linux/platform_device.h>
#include <asm/archrandom.h>
static u32 smccc_version = ARM_SMCCC_VERSION_1_0;
@@ -42,3 +43,19 @@ u32 arm_smccc_get_version(void)
return smccc_version;
}
EXPORT_SYMBOL_GPL(arm_smccc_get_version);
+
+static int __init smccc_devices_init(void)
+{
+ struct platform_device *pdev;
+
+ if (smccc_trng_available) {
+ pdev = platform_device_register_simple("smccc_trng", -1,
+ NULL, 0);
+ if (IS_ERR(pdev))
+ pr_err("smccc_trng: could not register device: %ld\n",
+ PTR_ERR(pdev));
+ }
+
+ return 0;
+}
+device_initcall(smccc_devices_init);
diff --git a/include/crypto/sm4.h b/include/crypto/sm4.h
index 7afd730d16ff..709f286e7b25 100644
--- a/include/crypto/sm4.h
+++ b/include/crypto/sm4.h
@@ -3,6 +3,7 @@
/*
* Common values for the SM4 algorithm
* Copyright (C) 2018 ARM Limited or its affiliates.
+ * Copyright (c) 2021 Tianjia Zhang <tianjia.zhang@linux.alibaba.com>
*/
#ifndef _CRYPTO_SM4_H
@@ -15,17 +16,29 @@
#define SM4_BLOCK_SIZE 16
#define SM4_RKEY_WORDS 32
-struct crypto_sm4_ctx {
+struct sm4_ctx {
u32 rkey_enc[SM4_RKEY_WORDS];
u32 rkey_dec[SM4_RKEY_WORDS];
};
-int crypto_sm4_set_key(struct crypto_tfm *tfm, const u8 *in_key,
- unsigned int key_len);
-int crypto_sm4_expand_key(struct crypto_sm4_ctx *ctx, const u8 *in_key,
+/**
+ * sm4_expandkey - Expands the SM4 key as described in GB/T 32907-2016
+ * @ctx: The location where the computed key will be stored.
+ * @in_key: The supplied key.
+ * @key_len: The length of the supplied key.
+ *
+ * Returns 0 on success. The function fails only if an invalid key size (or
+ * pointer) is supplied.
+ */
+int sm4_expandkey(struct sm4_ctx *ctx, const u8 *in_key,
unsigned int key_len);
-void crypto_sm4_encrypt(struct crypto_tfm *tfm, u8 *out, const u8 *in);
-void crypto_sm4_decrypt(struct crypto_tfm *tfm, u8 *out, const u8 *in);
+/**
+ * sm4_crypt_block - Encrypt or decrypt a single SM4 block
+ * @rk: The rkey_enc for encrypt or rkey_dec for decrypt
+ * @out: Buffer to store output data
+ * @in: Buffer containing the input data
+ */
+void sm4_crypt_block(const u32 *rk, u8 *out, const u8 *in);
#endif
diff --git a/include/linux/padata.h b/include/linux/padata.h
index a433f13fc4bf..495b16b6b4d7 100644
--- a/include/linux/padata.h
+++ b/include/linux/padata.h
@@ -12,6 +12,7 @@
#ifndef PADATA_H
#define PADATA_H
+#include <linux/refcount.h>
#include <linux/compiler_types.h>
#include <linux/workqueue.h>
#include <linux/spinlock.h>
@@ -96,7 +97,7 @@ struct parallel_data {
struct padata_shell *ps;
struct padata_list __percpu *reorder_list;
struct padata_serial_queue __percpu *squeue;
- atomic_t refcnt;
+ refcount_t refcnt;
unsigned int seq_nr;
unsigned int processed;
int cpu;
diff --git a/kernel/padata.c b/kernel/padata.c
index d4d3ba6e1728..18d3a5c699d8 100644
--- a/kernel/padata.c
+++ b/kernel/padata.c
@@ -9,19 +9,6 @@
*
* Copyright (c) 2020 Oracle and/or its affiliates.
* Author: Daniel Jordan <daniel.m.jordan@oracle.com>
- *
- * This program is free software; you can redistribute it and/or modify it
- * under the terms and conditions of the GNU General Public License,
- * version 2, as published by the Free Software Foundation.
- *
- * This program is distributed in the hope it will be useful, but WITHOUT
- * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
- * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for
- * more details.
- *
- * You should have received a copy of the GNU General Public License along with
- * this program; if not, write to the Free Software Foundation, Inc.,
- * 51 Franklin St - Fifth Floor, Boston, MA 02110-1301 USA.
*/
#include <linux/completion.h>
@@ -211,7 +198,7 @@ int padata_do_parallel(struct padata_shell *ps,
if ((pinst->flags & PADATA_RESET))
goto out;
- atomic_inc(&pd->refcnt);
+ refcount_inc(&pd->refcnt);
padata->pd = pd;
padata->cb_cpu = *cb_cpu;
@@ -383,7 +370,7 @@ static void padata_serial_worker(struct work_struct *serial_work)
}
local_bh_enable();
- if (atomic_sub_and_test(cnt, &pd->refcnt))
+ if (refcount_sub_and_test(cnt, &pd->refcnt))
padata_free_pd(pd);
}
@@ -593,7 +580,7 @@ static struct parallel_data *padata_alloc_pd(struct padata_shell *ps)
padata_init_reorder_list(pd);
padata_init_squeues(pd);
pd->seq_nr = -1;
- atomic_set(&pd->refcnt, 1);
+ refcount_set(&pd->refcnt, 1);
spin_lock_init(&pd->lock);
pd->cpu = cpumask_first(pd->cpumask.pcpu);
INIT_WORK(&pd->reorder_work, invoke_padata_reorder);
@@ -667,7 +654,7 @@ static int padata_replace(struct padata_instance *pinst)
synchronize_rcu();
list_for_each_entry_continue_reverse(ps, &pinst->pslist, list)
- if (atomic_dec_and_test(&ps->opd->refcnt))
+ if (refcount_dec_and_test(&ps->opd->refcnt))
padata_free_pd(ps->opd);
pinst->flags &= ~PADATA_RESET;
@@ -733,7 +720,7 @@ int padata_set_cpumask(struct padata_instance *pinst, int cpumask_type,
struct cpumask *serial_mask, *parallel_mask;
int err = -EINVAL;
- get_online_cpus();
+ cpus_read_lock();
mutex_lock(&pinst->lock);
switch (cpumask_type) {
@@ -753,7 +740,7 @@ int padata_set_cpumask(struct padata_instance *pinst, int cpumask_type,
out:
mutex_unlock(&pinst->lock);
- put_online_cpus();
+ cpus_read_unlock();
return err;
}
@@ -992,7 +979,7 @@ struct padata_instance *padata_alloc(const char *name)
if (!pinst->parallel_wq)
goto err_free_inst;
- get_online_cpus();
+ cpus_read_lock();
pinst->serial_wq = alloc_workqueue("%s_serial", WQ_MEM_RECLAIM |
WQ_CPU_INTENSIVE, 1, name);
@@ -1026,7 +1013,7 @@ struct padata_instance *padata_alloc(const char *name)
&pinst->cpu_dead_node);
#endif
- put_online_cpus();
+ cpus_read_unlock();
return pinst;
@@ -1036,7 +1023,7 @@ err_free_masks:
err_free_serial_wq:
destroy_workqueue(pinst->serial_wq);
err_put_cpus:
- put_online_cpus();
+ cpus_read_unlock();
destroy_workqueue(pinst->parallel_wq);
err_free_inst:
kfree(pinst);
@@ -1074,9 +1061,9 @@ struct padata_shell *padata_alloc_shell(struct padata_instance *pinst)
ps->pinst = pinst;
- get_online_cpus();
+ cpus_read_lock();
pd = padata_alloc_pd(ps);
- put_online_cpus();
+ cpus_read_unlock();
if (!pd)
goto out_free_ps;
diff --git a/lib/crypto/Kconfig b/lib/crypto/Kconfig
index 14c032de276e..545ccbddf6a1 100644
--- a/lib/crypto/Kconfig
+++ b/lib/crypto/Kconfig
@@ -128,3 +128,6 @@ config CRYPTO_LIB_CHACHA20POLY1305
config CRYPTO_LIB_SHA256
tristate
+
+config CRYPTO_LIB_SM4
+ tristate
diff --git a/lib/crypto/Makefile b/lib/crypto/Makefile
index 3a435629d9ce..73205ed269ba 100644
--- a/lib/crypto/Makefile
+++ b/lib/crypto/Makefile
@@ -38,6 +38,9 @@ libpoly1305-y += poly1305.o
obj-$(CONFIG_CRYPTO_LIB_SHA256) += libsha256.o
libsha256-y := sha256.o
+obj-$(CONFIG_CRYPTO_LIB_SM4) += libsm4.o
+libsm4-y := sm4.o
+
ifneq ($(CONFIG_CRYPTO_MANAGER_DISABLE_TESTS),y)
libblake2s-y += blake2s-selftest.o
libchacha20poly1305-y += chacha20poly1305-selftest.o
diff --git a/lib/crypto/blake2s.c b/lib/crypto/blake2s.c
index c64ac8bfb6a9..4055aa593ec4 100644
--- a/lib/crypto/blake2s.c
+++ b/lib/crypto/blake2s.c
@@ -73,7 +73,7 @@ void blake2s256_hmac(u8 *out, const u8 *in, const u8 *key, const size_t inlen,
}
EXPORT_SYMBOL(blake2s256_hmac);
-static int __init mod_init(void)
+static int __init blake2s_mod_init(void)
{
if (!IS_ENABLED(CONFIG_CRYPTO_MANAGER_DISABLE_TESTS) &&
WARN_ON(!blake2s_selftest()))
@@ -81,12 +81,12 @@ static int __init mod_init(void)
return 0;
}
-static void __exit mod_exit(void)
+static void __exit blake2s_mod_exit(void)
{
}
-module_init(mod_init);
-module_exit(mod_exit);
+module_init(blake2s_mod_init);
+module_exit(blake2s_mod_exit);
MODULE_LICENSE("GPL v2");
MODULE_DESCRIPTION("BLAKE2s hash function");
MODULE_AUTHOR("Jason A. Donenfeld <Jason@zx2c4.com>");
diff --git a/lib/crypto/chacha20poly1305.c b/lib/crypto/chacha20poly1305.c
index c2fcdb98cc02..fa6a9440fc95 100644
--- a/lib/crypto/chacha20poly1305.c
+++ b/lib/crypto/chacha20poly1305.c
@@ -354,7 +354,7 @@ bool chacha20poly1305_decrypt_sg_inplace(struct scatterlist *src, size_t src_len
}
EXPORT_SYMBOL(chacha20poly1305_decrypt_sg_inplace);
-static int __init mod_init(void)
+static int __init chacha20poly1305_init(void)
{
if (!IS_ENABLED(CONFIG_CRYPTO_MANAGER_DISABLE_TESTS) &&
WARN_ON(!chacha20poly1305_selftest()))
@@ -362,12 +362,12 @@ static int __init mod_init(void)
return 0;
}
-static void __exit mod_exit(void)
+static void __exit chacha20poly1305_exit(void)
{
}
-module_init(mod_init);
-module_exit(mod_exit);
+module_init(chacha20poly1305_init);
+module_exit(chacha20poly1305_exit);
MODULE_LICENSE("GPL v2");
MODULE_DESCRIPTION("ChaCha20Poly1305 AEAD construction");
MODULE_AUTHOR("Jason A. Donenfeld <Jason@zx2c4.com>");
diff --git a/lib/crypto/curve25519.c b/lib/crypto/curve25519.c
index fb29739e8c29..064b352c6907 100644
--- a/lib/crypto/curve25519.c
+++ b/lib/crypto/curve25519.c
@@ -13,7 +13,7 @@
#include <linux/module.h>
#include <linux/init.h>
-static int __init mod_init(void)
+static int __init curve25519_init(void)
{
if (!IS_ENABLED(CONFIG_CRYPTO_MANAGER_DISABLE_TESTS) &&
WARN_ON(!curve25519_selftest()))
@@ -21,12 +21,12 @@ static int __init mod_init(void)
return 0;
}
-static void __exit mod_exit(void)
+static void __exit curve25519_exit(void)
{
}
-module_init(mod_init);
-module_exit(mod_exit);
+module_init(curve25519_init);
+module_exit(curve25519_exit);
MODULE_LICENSE("GPL v2");
MODULE_DESCRIPTION("Curve25519 scalar multiplication");
diff --git a/lib/crypto/sm4.c b/lib/crypto/sm4.c
new file mode 100644
index 000000000000..633b59fed9db
--- /dev/null
+++ b/lib/crypto/sm4.c
@@ -0,0 +1,176 @@
+/* SPDX-License-Identifier: GPL-2.0-or-later */
+/*
+ * SM4, as specified in
+ * https://tools.ietf.org/id/draft-ribose-cfrg-sm4-10.html
+ *
+ * Copyright (C) 2018 ARM Limited or its affiliates.
+ * Copyright (c) 2021 Tianjia Zhang <tianjia.zhang@linux.alibaba.com>
+ */
+
+#include <linux/module.h>
+#include <asm/unaligned.h>
+#include <crypto/sm4.h>
+
+static const u32 fk[4] = {
+ 0xa3b1bac6, 0x56aa3350, 0x677d9197, 0xb27022dc
+};
+
+static const u32 __cacheline_aligned ck[32] = {
+ 0x00070e15, 0x1c232a31, 0x383f464d, 0x545b6269,
+ 0x70777e85, 0x8c939aa1, 0xa8afb6bd, 0xc4cbd2d9,
+ 0xe0e7eef5, 0xfc030a11, 0x181f262d, 0x343b4249,
+ 0x50575e65, 0x6c737a81, 0x888f969d, 0xa4abb2b9,
+ 0xc0c7ced5, 0xdce3eaf1, 0xf8ff060d, 0x141b2229,
+ 0x30373e45, 0x4c535a61, 0x686f767d, 0x848b9299,
+ 0xa0a7aeb5, 0xbcc3cad1, 0xd8dfe6ed, 0xf4fb0209,
+ 0x10171e25, 0x2c333a41, 0x484f565d, 0x646b7279
+};
+
+static const u8 __cacheline_aligned sbox[256] = {
+ 0xd6, 0x90, 0xe9, 0xfe, 0xcc, 0xe1, 0x3d, 0xb7,
+ 0x16, 0xb6, 0x14, 0xc2, 0x28, 0xfb, 0x2c, 0x05,
+ 0x2b, 0x67, 0x9a, 0x76, 0x2a, 0xbe, 0x04, 0xc3,
+ 0xaa, 0x44, 0x13, 0x26, 0x49, 0x86, 0x06, 0x99,
+ 0x9c, 0x42, 0x50, 0xf4, 0x91, 0xef, 0x98, 0x7a,
+ 0x33, 0x54, 0x0b, 0x43, 0xed, 0xcf, 0xac, 0x62,
+ 0xe4, 0xb3, 0x1c, 0xa9, 0xc9, 0x08, 0xe8, 0x95,
+ 0x80, 0xdf, 0x94, 0xfa, 0x75, 0x8f, 0x3f, 0xa6,
+ 0x47, 0x07, 0xa7, 0xfc, 0xf3, 0x73, 0x17, 0xba,
+ 0x83, 0x59, 0x3c, 0x19, 0xe6, 0x85, 0x4f, 0xa8,
+ 0x68, 0x6b, 0x81, 0xb2, 0x71, 0x64, 0xda, 0x8b,
+ 0xf8, 0xeb, 0x0f, 0x4b, 0x70, 0x56, 0x9d, 0x35,
+ 0x1e, 0x24, 0x0e, 0x5e, 0x63, 0x58, 0xd1, 0xa2,
+ 0x25, 0x22, 0x7c, 0x3b, 0x01, 0x21, 0x78, 0x87,
+ 0xd4, 0x00, 0x46, 0x57, 0x9f, 0xd3, 0x27, 0x52,
+ 0x4c, 0x36, 0x02, 0xe7, 0xa0, 0xc4, 0xc8, 0x9e,
+ 0xea, 0xbf, 0x8a, 0xd2, 0x40, 0xc7, 0x38, 0xb5,
+ 0xa3, 0xf7, 0xf2, 0xce, 0xf9, 0x61, 0x15, 0xa1,
+ 0xe0, 0xae, 0x5d, 0xa4, 0x9b, 0x34, 0x1a, 0x55,
+ 0xad, 0x93, 0x32, 0x30, 0xf5, 0x8c, 0xb1, 0xe3,
+ 0x1d, 0xf6, 0xe2, 0x2e, 0x82, 0x66, 0xca, 0x60,
+ 0xc0, 0x29, 0x23, 0xab, 0x0d, 0x53, 0x4e, 0x6f,
+ 0xd5, 0xdb, 0x37, 0x45, 0xde, 0xfd, 0x8e, 0x2f,
+ 0x03, 0xff, 0x6a, 0x72, 0x6d, 0x6c, 0x5b, 0x51,
+ 0x8d, 0x1b, 0xaf, 0x92, 0xbb, 0xdd, 0xbc, 0x7f,
+ 0x11, 0xd9, 0x5c, 0x41, 0x1f, 0x10, 0x5a, 0xd8,
+ 0x0a, 0xc1, 0x31, 0x88, 0xa5, 0xcd, 0x7b, 0xbd,
+ 0x2d, 0x74, 0xd0, 0x12, 0xb8, 0xe5, 0xb4, 0xb0,
+ 0x89, 0x69, 0x97, 0x4a, 0x0c, 0x96, 0x77, 0x7e,
+ 0x65, 0xb9, 0xf1, 0x09, 0xc5, 0x6e, 0xc6, 0x84,
+ 0x18, 0xf0, 0x7d, 0xec, 0x3a, 0xdc, 0x4d, 0x20,
+ 0x79, 0xee, 0x5f, 0x3e, 0xd7, 0xcb, 0x39, 0x48
+};
+
+static inline u32 sm4_t_non_lin_sub(u32 x)
+{
+ u32 out;
+
+ out = (u32)sbox[x & 0xff];
+ out |= (u32)sbox[(x >> 8) & 0xff] << 8;
+ out |= (u32)sbox[(x >> 16) & 0xff] << 16;
+ out |= (u32)sbox[(x >> 24) & 0xff] << 24;
+
+ return out;
+}
+
+static inline u32 sm4_key_lin_sub(u32 x)
+{
+ return x ^ rol32(x, 13) ^ rol32(x, 23);
+}
+
+static inline u32 sm4_enc_lin_sub(u32 x)
+{
+ return x ^ rol32(x, 2) ^ rol32(x, 10) ^ rol32(x, 18) ^ rol32(x, 24);
+}
+
+static inline u32 sm4_key_sub(u32 x)
+{
+ return sm4_key_lin_sub(sm4_t_non_lin_sub(x));
+}
+
+static inline u32 sm4_enc_sub(u32 x)
+{
+ return sm4_enc_lin_sub(sm4_t_non_lin_sub(x));
+}
+
+static inline u32 sm4_round(u32 x0, u32 x1, u32 x2, u32 x3, u32 rk)
+{
+ return x0 ^ sm4_enc_sub(x1 ^ x2 ^ x3 ^ rk);
+}
+
+
+/**
+ * sm4_expandkey - Expands the SM4 key as described in GB/T 32907-2016
+ * @ctx: The location where the computed key will be stored.
+ * @in_key: The supplied key.
+ * @key_len: The length of the supplied key.
+ *
+ * Returns 0 on success. The function fails only if an invalid key size (or
+ * pointer) is supplied.
+ */
+int sm4_expandkey(struct sm4_ctx *ctx, const u8 *in_key,
+ unsigned int key_len)
+{
+ u32 rk[4];
+ const u32 *key = (u32 *)in_key;
+ int i;
+
+ if (key_len != SM4_KEY_SIZE)
+ return -EINVAL;
+
+ rk[0] = get_unaligned_be32(&key[0]) ^ fk[0];
+ rk[1] = get_unaligned_be32(&key[1]) ^ fk[1];
+ rk[2] = get_unaligned_be32(&key[2]) ^ fk[2];
+ rk[3] = get_unaligned_be32(&key[3]) ^ fk[3];
+
+ for (i = 0; i < 32; i += 4) {
+ rk[0] ^= sm4_key_sub(rk[1] ^ rk[2] ^ rk[3] ^ ck[i + 0]);
+ rk[1] ^= sm4_key_sub(rk[2] ^ rk[3] ^ rk[0] ^ ck[i + 1]);
+ rk[2] ^= sm4_key_sub(rk[3] ^ rk[0] ^ rk[1] ^ ck[i + 2]);
+ rk[3] ^= sm4_key_sub(rk[0] ^ rk[1] ^ rk[2] ^ ck[i + 3]);
+
+ ctx->rkey_enc[i + 0] = rk[0];
+ ctx->rkey_enc[i + 1] = rk[1];
+ ctx->rkey_enc[i + 2] = rk[2];
+ ctx->rkey_enc[i + 3] = rk[3];
+ ctx->rkey_dec[31 - 0 - i] = rk[0];
+ ctx->rkey_dec[31 - 1 - i] = rk[1];
+ ctx->rkey_dec[31 - 2 - i] = rk[2];
+ ctx->rkey_dec[31 - 3 - i] = rk[3];
+ }
+
+ return 0;
+}
+EXPORT_SYMBOL_GPL(sm4_expandkey);
+
+/**
+ * sm4_crypt_block - Encrypt or decrypt a single SM4 block
+ * @rk: The rkey_enc for encrypt or rkey_dec for decrypt
+ * @out: Buffer to store output data
+ * @in: Buffer containing the input data
+ */
+void sm4_crypt_block(const u32 *rk, u8 *out, const u8 *in)
+{
+ u32 x[4], i;
+
+ x[0] = get_unaligned_be32(in + 0 * 4);
+ x[1] = get_unaligned_be32(in + 1 * 4);
+ x[2] = get_unaligned_be32(in + 2 * 4);
+ x[3] = get_unaligned_be32(in + 3 * 4);
+
+ for (i = 0; i < 32; i += 4) {
+ x[0] = sm4_round(x[0], x[1], x[2], x[3], rk[i + 0]);
+ x[1] = sm4_round(x[1], x[2], x[3], x[0], rk[i + 1]);
+ x[2] = sm4_round(x[2], x[3], x[0], x[1], rk[i + 2]);
+ x[3] = sm4_round(x[3], x[0], x[1], x[2], rk[i + 3]);
+ }
+
+ put_unaligned_be32(x[3 - 0], out + 0 * 4);
+ put_unaligned_be32(x[3 - 1], out + 1 * 4);
+ put_unaligned_be32(x[3 - 2], out + 2 * 4);
+ put_unaligned_be32(x[3 - 3], out + 3 * 4);
+}
+EXPORT_SYMBOL_GPL(sm4_crypt_block);
+
+MODULE_DESCRIPTION("Generic SM4 library");
+MODULE_LICENSE("GPL v2");
diff --git a/lib/mpi/mpiutil.c b/lib/mpi/mpiutil.c
index 9a75ca3f7edf..bc81419f400c 100644
--- a/lib/mpi/mpiutil.c
+++ b/lib/mpi/mpiutil.c
@@ -148,7 +148,7 @@ int mpi_resize(MPI a, unsigned nlimbs)
return 0; /* no need to do it */
if (a->d) {
- p = kmalloc_array(nlimbs, sizeof(mpi_limb_t), GFP_KERNEL);
+ p = kcalloc(nlimbs, sizeof(mpi_limb_t), GFP_KERNEL);
if (!p)
return -ENOMEM;
memcpy(p, a->d, a->alloced * sizeof(mpi_limb_t));