diff options
author | Linus Torvalds <torvalds@linux-foundation.org> | 2022-12-14 15:03:00 -0800 |
---|---|---|
committer | Linus Torvalds <torvalds@linux-foundation.org> | 2022-12-14 15:03:00 -0800 |
commit | 94a855111ed9106971ca2617c5d075269e6aefde (patch) | |
tree | 330c762a403cf70c2cdbf12e7394e5e7c2a69a79 /arch/x86/crypto | |
parent | 93761c93e9da28d8a020777cee2a84133082b477 (diff) | |
parent | f1a033cc6b9eb6d80322008422df3c87aa5d47a0 (diff) |
Merge tag 'x86_core_for_v6.2' of git://git.kernel.org/pub/scm/linux/kernel/git/tip/tip
Pull x86 core updates from Borislav Petkov:
- Add the call depth tracking mitigation for Retbleed which has been
long in the making. It is a lighterweight software-only fix for
Skylake-based cores where enabling IBRS is a big hammer and causes a
significant performance impact.
What it basically does is, it aligns all kernel functions to 16 bytes
boundary and adds a 16-byte padding before the function, objtool
collects all functions' locations and when the mitigation gets
applied, it patches a call accounting thunk which is used to track
the call depth of the stack at any time.
When that call depth reaches a magical, microarchitecture-specific
value for the Return Stack Buffer, the code stuffs that RSB and
avoids its underflow which could otherwise lead to the Intel variant
of Retbleed.
This software-only solution brings a lot of the lost performance
back, as benchmarks suggest:
https://lore.kernel.org/all/20220915111039.092790446@infradead.org/
That page above also contains a lot more detailed explanation of the
whole mechanism
- Implement a new control flow integrity scheme called FineIBT which is
based on the software kCFI implementation and uses hardware IBT
support where present to annotate and track indirect branches using a
hash to validate them
- Other misc fixes and cleanups
* tag 'x86_core_for_v6.2' of git://git.kernel.org/pub/scm/linux/kernel/git/tip/tip: (80 commits)
x86/paravirt: Use common macro for creating simple asm paravirt functions
x86/paravirt: Remove clobber bitmask from .parainstructions
x86/debug: Include percpu.h in debugreg.h to get DECLARE_PER_CPU() et al
x86/cpufeatures: Move X86_FEATURE_CALL_DEPTH from bit 18 to bit 19 of word 11, to leave space for WIP X86_FEATURE_SGX_EDECCSSA bit
x86/Kconfig: Enable kernel IBT by default
x86,pm: Force out-of-line memcpy()
objtool: Fix weak hole vs prefix symbol
objtool: Optimize elf_dirty_reloc_sym()
x86/cfi: Add boot time hash randomization
x86/cfi: Boot time selection of CFI scheme
x86/ibt: Implement FineIBT
objtool: Add --cfi to generate the .cfi_sites section
x86: Add prefix symbols for function padding
objtool: Add option to generate prefix symbols
objtool: Avoid O(bloody terrible) behaviour -- an ode to libelf
objtool: Slice up elf_create_section_symbol()
kallsyms: Revert "Take callthunks into account"
x86: Unconfuse CONFIG_ and X86_FEATURE_ namespaces
x86/retpoline: Fix crash printing warning
x86/paravirt: Fix a !PARAVIRT build warning
...
Diffstat (limited to 'arch/x86/crypto')
-rw-r--r-- | arch/x86/crypto/camellia-aesni-avx-asm_64.S | 2 | ||||
-rw-r--r-- | arch/x86/crypto/camellia-aesni-avx2-asm_64.S | 4 | ||||
-rw-r--r-- | arch/x86/crypto/cast5-avx-x86_64-asm_64.S | 2 | ||||
-rw-r--r-- | arch/x86/crypto/crct10dif-pcl-asm_64.S | 1 | ||||
-rw-r--r-- | arch/x86/crypto/poly1305-x86_64-cryptogams.pl | 1 | ||||
-rw-r--r-- | arch/x86/crypto/serpent-avx-x86_64-asm_64.S | 2 | ||||
-rw-r--r-- | arch/x86/crypto/serpent-avx2-asm_64.S | 2 | ||||
-rw-r--r-- | arch/x86/crypto/sha1_ni_asm.S | 1 | ||||
-rw-r--r-- | arch/x86/crypto/sha256-avx-asm.S | 1 | ||||
-rw-r--r-- | arch/x86/crypto/sha256-avx2-asm.S | 1 | ||||
-rw-r--r-- | arch/x86/crypto/sha256-ssse3-asm.S | 1 | ||||
-rw-r--r-- | arch/x86/crypto/sha256_ni_asm.S | 1 | ||||
-rw-r--r-- | arch/x86/crypto/sm3-avx-asm_64.S | 1 | ||||
-rw-r--r-- | arch/x86/crypto/sm4-aesni-avx-asm_64.S | 7 | ||||
-rw-r--r-- | arch/x86/crypto/sm4-aesni-avx2-asm_64.S | 6 | ||||
-rw-r--r-- | arch/x86/crypto/twofish-avx-x86_64-asm_64.S | 2 |
16 files changed, 0 insertions, 35 deletions
diff --git a/arch/x86/crypto/camellia-aesni-avx-asm_64.S b/arch/x86/crypto/camellia-aesni-avx-asm_64.S index 2e1658ddbe1a..4a30618281ec 100644 --- a/arch/x86/crypto/camellia-aesni-avx-asm_64.S +++ b/arch/x86/crypto/camellia-aesni-avx-asm_64.S @@ -712,7 +712,6 @@ SYM_FUNC_END(roundsm16_x4_x5_x6_x7_x0_x1_x2_x3_y4_y5_y6_y7_y0_y1_y2_y3_ab) .text -.align 8 SYM_FUNC_START_LOCAL(__camellia_enc_blk16) /* input: * %rdi: ctx, CTX @@ -799,7 +798,6 @@ SYM_FUNC_START_LOCAL(__camellia_enc_blk16) jmp .Lenc_done; SYM_FUNC_END(__camellia_enc_blk16) -.align 8 SYM_FUNC_START_LOCAL(__camellia_dec_blk16) /* input: * %rdi: ctx, CTX diff --git a/arch/x86/crypto/camellia-aesni-avx2-asm_64.S b/arch/x86/crypto/camellia-aesni-avx2-asm_64.S index 0e4e9abbf4de..deaf62aa73a6 100644 --- a/arch/x86/crypto/camellia-aesni-avx2-asm_64.S +++ b/arch/x86/crypto/camellia-aesni-avx2-asm_64.S @@ -221,7 +221,6 @@ * Size optimization... with inlined roundsm32 binary would be over 5 times * larger and would only marginally faster. */ -.align 8 SYM_FUNC_START_LOCAL(roundsm32_x0_x1_x2_x3_x4_x5_x6_x7_y0_y1_y2_y3_y4_y5_y6_y7_cd) roundsm32(%ymm0, %ymm1, %ymm2, %ymm3, %ymm4, %ymm5, %ymm6, %ymm7, %ymm8, %ymm9, %ymm10, %ymm11, %ymm12, %ymm13, %ymm14, %ymm15, @@ -229,7 +228,6 @@ SYM_FUNC_START_LOCAL(roundsm32_x0_x1_x2_x3_x4_x5_x6_x7_y0_y1_y2_y3_y4_y5_y6_y7_c RET; SYM_FUNC_END(roundsm32_x0_x1_x2_x3_x4_x5_x6_x7_y0_y1_y2_y3_y4_y5_y6_y7_cd) -.align 8 SYM_FUNC_START_LOCAL(roundsm32_x4_x5_x6_x7_x0_x1_x2_x3_y4_y5_y6_y7_y0_y1_y2_y3_ab) roundsm32(%ymm4, %ymm5, %ymm6, %ymm7, %ymm0, %ymm1, %ymm2, %ymm3, %ymm12, %ymm13, %ymm14, %ymm15, %ymm8, %ymm9, %ymm10, %ymm11, @@ -748,7 +746,6 @@ SYM_FUNC_END(roundsm32_x4_x5_x6_x7_x0_x1_x2_x3_y4_y5_y6_y7_y0_y1_y2_y3_ab) .text -.align 8 SYM_FUNC_START_LOCAL(__camellia_enc_blk32) /* input: * %rdi: ctx, CTX @@ -835,7 +832,6 @@ SYM_FUNC_START_LOCAL(__camellia_enc_blk32) jmp .Lenc_done; SYM_FUNC_END(__camellia_enc_blk32) -.align 8 SYM_FUNC_START_LOCAL(__camellia_dec_blk32) /* input: * %rdi: ctx, CTX diff --git a/arch/x86/crypto/cast5-avx-x86_64-asm_64.S b/arch/x86/crypto/cast5-avx-x86_64-asm_64.S index b258af420c92..0326a01503c3 100644 --- a/arch/x86/crypto/cast5-avx-x86_64-asm_64.S +++ b/arch/x86/crypto/cast5-avx-x86_64-asm_64.S @@ -208,7 +208,6 @@ .text -.align 16 SYM_FUNC_START_LOCAL(__cast5_enc_blk16) /* input: * %rdi: ctx @@ -282,7 +281,6 @@ SYM_FUNC_START_LOCAL(__cast5_enc_blk16) RET; SYM_FUNC_END(__cast5_enc_blk16) -.align 16 SYM_FUNC_START_LOCAL(__cast5_dec_blk16) /* input: * %rdi: ctx diff --git a/arch/x86/crypto/crct10dif-pcl-asm_64.S b/arch/x86/crypto/crct10dif-pcl-asm_64.S index 721474abfb71..5286db5b8165 100644 --- a/arch/x86/crypto/crct10dif-pcl-asm_64.S +++ b/arch/x86/crypto/crct10dif-pcl-asm_64.S @@ -94,7 +94,6 @@ # # Assumes len >= 16. # -.align 16 SYM_FUNC_START(crc_t10dif_pcl) movdqa .Lbswap_mask(%rip), BSWAP_MASK diff --git a/arch/x86/crypto/poly1305-x86_64-cryptogams.pl b/arch/x86/crypto/poly1305-x86_64-cryptogams.pl index 2077ce7a5647..b9abcd79c1f4 100644 --- a/arch/x86/crypto/poly1305-x86_64-cryptogams.pl +++ b/arch/x86/crypto/poly1305-x86_64-cryptogams.pl @@ -108,7 +108,6 @@ if (!$kernel) { sub declare_function() { my ($name, $align, $nargs) = @_; if($kernel) { - $code .= ".align $align\n"; $code .= "SYM_FUNC_START($name)\n"; $code .= ".L$name:\n"; } else { diff --git a/arch/x86/crypto/serpent-avx-x86_64-asm_64.S b/arch/x86/crypto/serpent-avx-x86_64-asm_64.S index 82f2313f512b..97e283621851 100644 --- a/arch/x86/crypto/serpent-avx-x86_64-asm_64.S +++ b/arch/x86/crypto/serpent-avx-x86_64-asm_64.S @@ -550,7 +550,6 @@ #define write_blocks(x0, x1, x2, x3, t0, t1, t2) \ transpose_4x4(x0, x1, x2, x3, t0, t1, t2) -.align 8 SYM_FUNC_START_LOCAL(__serpent_enc_blk8_avx) /* input: * %rdi: ctx, CTX @@ -604,7 +603,6 @@ SYM_FUNC_START_LOCAL(__serpent_enc_blk8_avx) RET; SYM_FUNC_END(__serpent_enc_blk8_avx) -.align 8 SYM_FUNC_START_LOCAL(__serpent_dec_blk8_avx) /* input: * %rdi: ctx, CTX diff --git a/arch/x86/crypto/serpent-avx2-asm_64.S b/arch/x86/crypto/serpent-avx2-asm_64.S index 8ea34c9b9316..6d60c50593a9 100644 --- a/arch/x86/crypto/serpent-avx2-asm_64.S +++ b/arch/x86/crypto/serpent-avx2-asm_64.S @@ -550,7 +550,6 @@ #define write_blocks(x0, x1, x2, x3, t0, t1, t2) \ transpose_4x4(x0, x1, x2, x3, t0, t1, t2) -.align 8 SYM_FUNC_START_LOCAL(__serpent_enc_blk16) /* input: * %rdi: ctx, CTX @@ -604,7 +603,6 @@ SYM_FUNC_START_LOCAL(__serpent_enc_blk16) RET; SYM_FUNC_END(__serpent_enc_blk16) -.align 8 SYM_FUNC_START_LOCAL(__serpent_dec_blk16) /* input: * %rdi: ctx, CTX diff --git a/arch/x86/crypto/sha1_ni_asm.S b/arch/x86/crypto/sha1_ni_asm.S index 3cae5a1bb3d6..cade913d4882 100644 --- a/arch/x86/crypto/sha1_ni_asm.S +++ b/arch/x86/crypto/sha1_ni_asm.S @@ -93,7 +93,6 @@ * numBlocks: Number of blocks to process */ .text -.align 32 SYM_TYPED_FUNC_START(sha1_ni_transform) push %rbp mov %rsp, %rbp diff --git a/arch/x86/crypto/sha256-avx-asm.S b/arch/x86/crypto/sha256-avx-asm.S index 06ea30c20828..5555b5d5215a 100644 --- a/arch/x86/crypto/sha256-avx-asm.S +++ b/arch/x86/crypto/sha256-avx-asm.S @@ -348,7 +348,6 @@ a = TMP_ ######################################################################## .text SYM_TYPED_FUNC_START(sha256_transform_avx) -.align 32 pushq %rbx pushq %r12 pushq %r13 diff --git a/arch/x86/crypto/sha256-avx2-asm.S b/arch/x86/crypto/sha256-avx2-asm.S index 2d2be531a11e..3eada9416852 100644 --- a/arch/x86/crypto/sha256-avx2-asm.S +++ b/arch/x86/crypto/sha256-avx2-asm.S @@ -525,7 +525,6 @@ STACK_SIZE = _CTX + _CTX_SIZE ######################################################################## .text SYM_TYPED_FUNC_START(sha256_transform_rorx) -.align 32 pushq %rbx pushq %r12 pushq %r13 diff --git a/arch/x86/crypto/sha256-ssse3-asm.S b/arch/x86/crypto/sha256-ssse3-asm.S index 7db28839108d..959288eecc68 100644 --- a/arch/x86/crypto/sha256-ssse3-asm.S +++ b/arch/x86/crypto/sha256-ssse3-asm.S @@ -357,7 +357,6 @@ a = TMP_ ######################################################################## .text SYM_TYPED_FUNC_START(sha256_transform_ssse3) -.align 32 pushq %rbx pushq %r12 pushq %r13 diff --git a/arch/x86/crypto/sha256_ni_asm.S b/arch/x86/crypto/sha256_ni_asm.S index 47f93937f798..537b6dcd7ed8 100644 --- a/arch/x86/crypto/sha256_ni_asm.S +++ b/arch/x86/crypto/sha256_ni_asm.S @@ -97,7 +97,6 @@ */ .text -.align 32 SYM_TYPED_FUNC_START(sha256_ni_transform) shl $6, NUM_BLKS /* convert to bytes */ diff --git a/arch/x86/crypto/sm3-avx-asm_64.S b/arch/x86/crypto/sm3-avx-asm_64.S index 8fc5ac681fd6..503bab450a91 100644 --- a/arch/x86/crypto/sm3-avx-asm_64.S +++ b/arch/x86/crypto/sm3-avx-asm_64.S @@ -328,7 +328,6 @@ * void sm3_transform_avx(struct sm3_state *state, * const u8 *data, int nblocks); */ -.align 16 SYM_TYPED_FUNC_START(sm3_transform_avx) /* input: * %rdi: ctx, CTX diff --git a/arch/x86/crypto/sm4-aesni-avx-asm_64.S b/arch/x86/crypto/sm4-aesni-avx-asm_64.S index 22b6560eb9e1..e2668d2fe6ce 100644 --- a/arch/x86/crypto/sm4-aesni-avx-asm_64.S +++ b/arch/x86/crypto/sm4-aesni-avx-asm_64.S @@ -140,13 +140,11 @@ .text -.align 16 /* * void sm4_aesni_avx_crypt4(const u32 *rk, u8 *dst, * const u8 *src, int nblocks) */ -.align 8 SYM_FUNC_START(sm4_aesni_avx_crypt4) /* input: * %rdi: round key array, CTX @@ -250,7 +248,6 @@ SYM_FUNC_START(sm4_aesni_avx_crypt4) RET; SYM_FUNC_END(sm4_aesni_avx_crypt4) -.align 8 SYM_FUNC_START_LOCAL(__sm4_crypt_blk8) /* input: * %rdi: round key array, CTX @@ -364,7 +361,6 @@ SYM_FUNC_END(__sm4_crypt_blk8) * void sm4_aesni_avx_crypt8(const u32 *rk, u8 *dst, * const u8 *src, int nblocks) */ -.align 8 SYM_FUNC_START(sm4_aesni_avx_crypt8) /* input: * %rdi: round key array, CTX @@ -420,7 +416,6 @@ SYM_FUNC_END(sm4_aesni_avx_crypt8) * void sm4_aesni_avx_ctr_enc_blk8(const u32 *rk, u8 *dst, * const u8 *src, u8 *iv) */ -.align 8 SYM_TYPED_FUNC_START(sm4_aesni_avx_ctr_enc_blk8) /* input: * %rdi: round key array, CTX @@ -495,7 +490,6 @@ SYM_FUNC_END(sm4_aesni_avx_ctr_enc_blk8) * void sm4_aesni_avx_cbc_dec_blk8(const u32 *rk, u8 *dst, * const u8 *src, u8 *iv) */ -.align 8 SYM_TYPED_FUNC_START(sm4_aesni_avx_cbc_dec_blk8) /* input: * %rdi: round key array, CTX @@ -545,7 +539,6 @@ SYM_FUNC_END(sm4_aesni_avx_cbc_dec_blk8) * void sm4_aesni_avx_cfb_dec_blk8(const u32 *rk, u8 *dst, * const u8 *src, u8 *iv) */ -.align 8 SYM_TYPED_FUNC_START(sm4_aesni_avx_cfb_dec_blk8) /* input: * %rdi: round key array, CTX diff --git a/arch/x86/crypto/sm4-aesni-avx2-asm_64.S b/arch/x86/crypto/sm4-aesni-avx2-asm_64.S index 23ee39a8ada8..98ede9459287 100644 --- a/arch/x86/crypto/sm4-aesni-avx2-asm_64.S +++ b/arch/x86/crypto/sm4-aesni-avx2-asm_64.S @@ -154,9 +154,6 @@ .long 0xdeadbeef, 0xdeadbeef, 0xdeadbeef .text -.align 16 - -.align 8 SYM_FUNC_START_LOCAL(__sm4_crypt_blk16) /* input: * %rdi: round key array, CTX @@ -282,7 +279,6 @@ SYM_FUNC_END(__sm4_crypt_blk16) * void sm4_aesni_avx2_ctr_enc_blk16(const u32 *rk, u8 *dst, * const u8 *src, u8 *iv) */ -.align 8 SYM_TYPED_FUNC_START(sm4_aesni_avx2_ctr_enc_blk16) /* input: * %rdi: round key array, CTX @@ -395,7 +391,6 @@ SYM_FUNC_END(sm4_aesni_avx2_ctr_enc_blk16) * void sm4_aesni_avx2_cbc_dec_blk16(const u32 *rk, u8 *dst, * const u8 *src, u8 *iv) */ -.align 8 SYM_TYPED_FUNC_START(sm4_aesni_avx2_cbc_dec_blk16) /* input: * %rdi: round key array, CTX @@ -449,7 +444,6 @@ SYM_FUNC_END(sm4_aesni_avx2_cbc_dec_blk16) * void sm4_aesni_avx2_cfb_dec_blk16(const u32 *rk, u8 *dst, * const u8 *src, u8 *iv) */ -.align 8 SYM_TYPED_FUNC_START(sm4_aesni_avx2_cfb_dec_blk16) /* input: * %rdi: round key array, CTX diff --git a/arch/x86/crypto/twofish-avx-x86_64-asm_64.S b/arch/x86/crypto/twofish-avx-x86_64-asm_64.S index 31f9b2ec3857..12fde271cd3f 100644 --- a/arch/x86/crypto/twofish-avx-x86_64-asm_64.S +++ b/arch/x86/crypto/twofish-avx-x86_64-asm_64.S @@ -228,7 +228,6 @@ vpxor x2, wkey, x2; \ vpxor x3, wkey, x3; -.align 8 SYM_FUNC_START_LOCAL(__twofish_enc_blk8) /* input: * %rdi: ctx, CTX @@ -270,7 +269,6 @@ SYM_FUNC_START_LOCAL(__twofish_enc_blk8) RET; SYM_FUNC_END(__twofish_enc_blk8) -.align 8 SYM_FUNC_START_LOCAL(__twofish_dec_blk8) /* input: * %rdi: ctx, CTX |