diff options
Diffstat (limited to 'arch/x86')
91 files changed, 1390 insertions, 1483 deletions
diff --git a/arch/x86/Kconfig b/arch/x86/Kconfig index 0a3cc347143f..326b2d5bab9d 100644 --- a/arch/x86/Kconfig +++ b/arch/x86/Kconfig @@ -22,7 +22,7 @@ config X86_64 def_bool y depends on 64BIT # Options that are inherently 64-bit kernel only: - select ARCH_HAS_GIGANTIC_PAGE if (MEMORY_ISOLATION && COMPACTION) || CMA + select ARCH_HAS_GIGANTIC_PAGE select ARCH_SUPPORTS_INT128 select ARCH_USE_CMPXCHG_LOCKREF select HAVE_ARCH_SOFT_DIRTY @@ -44,11 +44,9 @@ config X86 # select ACPI_LEGACY_TABLES_LOOKUP if ACPI select ACPI_SYSTEM_POWER_STATES_SUPPORT if ACPI - select ANON_INODES select ARCH_32BIT_OFF_T if X86_32 select ARCH_CLOCKSOURCE_DATA select ARCH_CLOCKSOURCE_INIT - select ARCH_DISCARD_MEMBLOCK select ARCH_HAS_ACPI_TABLE_UPGRADE if ACPI select ARCH_HAS_DEBUG_VIRTUAL select ARCH_HAS_DEVMEM_IS_ALLOWED @@ -307,9 +305,6 @@ config ZONE_DMA32 config AUDIT_ARCH def_bool y if X86_64 -config ARCH_SUPPORTS_OPTIMIZED_INLINING - def_bool y - config ARCH_SUPPORTS_DEBUG_PAGEALLOC def_bool y diff --git a/arch/x86/Kconfig.debug b/arch/x86/Kconfig.debug index 15d0fbe27872..f730680dc818 100644 --- a/arch/x86/Kconfig.debug +++ b/arch/x86/Kconfig.debug @@ -266,20 +266,6 @@ config CPA_DEBUG ---help--- Do change_page_attr() self-tests every 30 seconds. -config OPTIMIZE_INLINING - bool "Allow gcc to uninline functions marked 'inline'" - ---help--- - This option determines if the kernel forces gcc to inline the functions - developers have marked 'inline'. Doing so takes away freedom from gcc to - do what it thinks is best, which is desirable for the gcc 3.x series of - compilers. The gcc 4.x series have a rewritten inlining algorithm and - enabling this option will generate a smaller kernel there. Hopefully - this algorithm is so good that allowing gcc 4.x and above to make the - decision will become the default in the future. Until then this option - is there to test gcc for this. - - If unsure, say N. - config DEBUG_ENTRY bool "Debug low-level entry code" depends on DEBUG_KERNEL diff --git a/arch/x86/crypto/aegis128-aesni-glue.c b/arch/x86/crypto/aegis128-aesni-glue.c index 3ea71b871813..bdeee1b830be 100644 --- a/arch/x86/crypto/aegis128-aesni-glue.c +++ b/arch/x86/crypto/aegis128-aesni-glue.c @@ -11,8 +11,8 @@ * any later version. */ -#include <crypto/cryptd.h> #include <crypto/internal/aead.h> +#include <crypto/internal/simd.h> #include <crypto/internal/skcipher.h> #include <crypto/scatterwalk.h> #include <linux/module.h> @@ -242,131 +242,35 @@ static void crypto_aegis128_aesni_exit_tfm(struct crypto_aead *aead) { } -static int cryptd_aegis128_aesni_setkey(struct crypto_aead *aead, - const u8 *key, unsigned int keylen) -{ - struct cryptd_aead **ctx = crypto_aead_ctx(aead); - struct cryptd_aead *cryptd_tfm = *ctx; - - return crypto_aead_setkey(&cryptd_tfm->base, key, keylen); -} - -static int cryptd_aegis128_aesni_setauthsize(struct crypto_aead *aead, - unsigned int authsize) -{ - struct cryptd_aead **ctx = crypto_aead_ctx(aead); - struct cryptd_aead *cryptd_tfm = *ctx; - - return crypto_aead_setauthsize(&cryptd_tfm->base, authsize); -} - -static int cryptd_aegis128_aesni_encrypt(struct aead_request *req) -{ - struct crypto_aead *aead = crypto_aead_reqtfm(req); - struct cryptd_aead **ctx = crypto_aead_ctx(aead); - struct cryptd_aead *cryptd_tfm = *ctx; - - aead = &cryptd_tfm->base; - if (irq_fpu_usable() && (!in_atomic() || - !cryptd_aead_queued(cryptd_tfm))) - aead = cryptd_aead_child(cryptd_tfm); - - aead_request_set_tfm(req, aead); - - return crypto_aead_encrypt(req); -} - -static int cryptd_aegis128_aesni_decrypt(struct aead_request *req) -{ - struct crypto_aead *aead = crypto_aead_reqtfm(req); - struct cryptd_aead **ctx = crypto_aead_ctx(aead); - struct cryptd_aead *cryptd_tfm = *ctx; - - aead = &cryptd_tfm->base; - if (irq_fpu_usable() && (!in_atomic() || - !cryptd_aead_queued(cryptd_tfm))) - aead = cryptd_aead_child(cryptd_tfm); - - aead_request_set_tfm(req, aead); - - return crypto_aead_decrypt(req); -} - -static int cryptd_aegis128_aesni_init_tfm(struct crypto_aead *aead) -{ - struct cryptd_aead *cryptd_tfm; - struct cryptd_aead **ctx = crypto_aead_ctx(aead); - - cryptd_tfm = cryptd_alloc_aead("__aegis128-aesni", CRYPTO_ALG_INTERNAL, - CRYPTO_ALG_INTERNAL); - if (IS_ERR(cryptd_tfm)) - return PTR_ERR(cryptd_tfm); - - *ctx = cryptd_tfm; - crypto_aead_set_reqsize(aead, crypto_aead_reqsize(&cryptd_tfm->base)); - return 0; -} - -static void cryptd_aegis128_aesni_exit_tfm(struct crypto_aead *aead) -{ - struct cryptd_aead **ctx = crypto_aead_ctx(aead); - - cryptd_free_aead(*ctx); -} - -static struct aead_alg crypto_aegis128_aesni_alg[] = { - { - .setkey = crypto_aegis128_aesni_setkey, - .setauthsize = crypto_aegis128_aesni_setauthsize, - .encrypt = crypto_aegis128_aesni_encrypt, - .decrypt = crypto_aegis128_aesni_decrypt, - .init = crypto_aegis128_aesni_init_tfm, - .exit = crypto_aegis128_aesni_exit_tfm, - - .ivsize = AEGIS128_NONCE_SIZE, - .maxauthsize = AEGIS128_MAX_AUTH_SIZE, - .chunksize = AEGIS128_BLOCK_SIZE, - - .base = { - .cra_flags = CRYPTO_ALG_INTERNAL, - .cra_blocksize = 1, - .cra_ctxsize = sizeof(struct aegis_ctx) + - __alignof__(struct aegis_ctx), - .cra_alignmask = 0, - - .cra_name = "__aegis128", - .cra_driver_name = "__aegis128-aesni", - - .cra_module = THIS_MODULE, - } - }, { - .setkey = cryptd_aegis128_aesni_setkey, - .setauthsize = cryptd_aegis128_aesni_setauthsize, - .encrypt = cryptd_aegis128_aesni_encrypt, - .decrypt = cryptd_aegis128_aesni_decrypt, - .init = cryptd_aegis128_aesni_init_tfm, - .exit = cryptd_aegis128_aesni_exit_tfm, - - .ivsize = AEGIS128_NONCE_SIZE, - .maxauthsize = AEGIS128_MAX_AUTH_SIZE, - .chunksize = AEGIS128_BLOCK_SIZE, - - .base = { - .cra_flags = CRYPTO_ALG_ASYNC, - .cra_blocksize = 1, - .cra_ctxsize = sizeof(struct cryptd_aead *), - .cra_alignmask = 0, - - .cra_priority = 400, - - .cra_name = "aegis128", - .cra_driver_name = "aegis128-aesni", - - .cra_module = THIS_MODULE, - } +static struct aead_alg crypto_aegis128_aesni_alg = { + .setkey = crypto_aegis128_aesni_setkey, + .setauthsize = crypto_aegis128_aesni_setauthsize, + .encrypt = crypto_aegis128_aesni_encrypt, + .decrypt = crypto_aegis128_aesni_decrypt, + .init = crypto_aegis128_aesni_init_tfm, + .exit = crypto_aegis128_aesni_exit_tfm, + + .ivsize = AEGIS128_NONCE_SIZE, + .maxauthsize = AEGIS128_MAX_AUTH_SIZE, + .chunksize = AEGIS128_BLOCK_SIZE, + + .base = { + .cra_flags = CRYPTO_ALG_INTERNAL, + .cra_blocksize = 1, + .cra_ctxsize = sizeof(struct aegis_ctx) + + __alignof__(struct aegis_ctx), + .cra_alignmask = 0, + .cra_priority = 400, + + .cra_name = "__aegis128", + .cra_driver_name = "__aegis128-aesni", + + .cra_module = THIS_MODULE, } }; +static struct simd_aead_alg *simd_alg; + static int __init crypto_aegis128_aesni_module_init(void) { if (!boot_cpu_has(X86_FEATURE_XMM2) || @@ -374,14 +278,13 @@ static int __init crypto_aegis128_aesni_module_init(void) !cpu_has_xfeatures(XFEATURE_MASK_SSE, NULL)) return -ENODEV; - return crypto_register_aeads(crypto_aegis128_aesni_alg, - ARRAY_SIZE(crypto_aegis128_aesni_alg)); + return simd_register_aeads_compat(&crypto_aegis128_aesni_alg, 1, + &simd_alg); } static void __exit crypto_aegis128_aesni_module_exit(void) { - crypto_unregister_aeads(crypto_aegis128_aesni_alg, - ARRAY_SIZE(crypto_aegis128_aesni_alg)); + simd_unregister_aeads(&crypto_aegis128_aesni_alg, 1, &simd_alg); } module_init(crypto_aegis128_aesni_module_init); diff --git a/arch/x86/crypto/aegis128l-aesni-glue.c b/arch/x86/crypto/aegis128l-aesni-glue.c index 1b1b39c66c5e..80d917f7e467 100644 --- a/arch/x86/crypto/aegis128l-aesni-glue.c +++ b/arch/x86/crypto/aegis128l-aesni-glue.c @@ -11,8 +11,8 @@ * any later version. */ -#include <crypto/cryptd.h> #include <crypto/internal/aead.h> +#include <crypto/internal/simd.h> #include <crypto/internal/skcipher.h> #include <crypto/scatterwalk.h> #include <linux/module.h> @@ -242,131 +242,35 @@ static void crypto_aegis128l_aesni_exit_tfm(struct crypto_aead *aead) { } -static int cryptd_aegis128l_aesni_setkey(struct crypto_aead *aead, - const u8 *key, unsigned int keylen) -{ - struct cryptd_aead **ctx = crypto_aead_ctx(aead); - struct cryptd_aead *cryptd_tfm = *ctx; - - return crypto_aead_setkey(&cryptd_tfm->base, key, keylen); -} - -static int cryptd_aegis128l_aesni_setauthsize(struct crypto_aead *aead, - unsigned int authsize) -{ - struct cryptd_aead **ctx = crypto_aead_ctx(aead); - struct cryptd_aead *cryptd_tfm = *ctx; - - return crypto_aead_setauthsize(&cryptd_tfm->base, authsize); -} - -static int cryptd_aegis128l_aesni_encrypt(struct aead_request *req) -{ - struct crypto_aead *aead = crypto_aead_reqtfm(req); - struct cryptd_aead **ctx = crypto_aead_ctx(aead); - struct cryptd_aead *cryptd_tfm = *ctx; - - aead = &cryptd_tfm->base; - if (irq_fpu_usable() && (!in_atomic() || - !cryptd_aead_queued(cryptd_tfm))) - aead = cryptd_aead_child(cryptd_tfm); - - aead_request_set_tfm(req, aead); - - return crypto_aead_encrypt(req); -} - -static int cryptd_aegis128l_aesni_decrypt(struct aead_request *req) -{ - struct crypto_aead *aead = crypto_aead_reqtfm(req); - struct cryptd_aead **ctx = crypto_aead_ctx(aead); - struct cryptd_aead *cryptd_tfm = *ctx; - - aead = &cryptd_tfm->base; - if (irq_fpu_usable() && (!in_atomic() || - !cryptd_aead_queued(cryptd_tfm))) - aead = cryptd_aead_child(cryptd_tfm); - - aead_request_set_tfm(req, aead); - - return crypto_aead_decrypt(req); -} - -static int cryptd_aegis128l_aesni_init_tfm(struct crypto_aead *aead) -{ - struct cryptd_aead *cryptd_tfm; - struct cryptd_aead **ctx = crypto_aead_ctx(aead); - - cryptd_tfm = cryptd_alloc_aead("__aegis128l-aesni", CRYPTO_ALG_INTERNAL, - CRYPTO_ALG_INTERNAL); - if (IS_ERR(cryptd_tfm)) - return PTR_ERR(cryptd_tfm); - - *ctx = cryptd_tfm; - crypto_aead_set_reqsize(aead, crypto_aead_reqsize(&cryptd_tfm->base)); - return 0; -} - -static void cryptd_aegis128l_aesni_exit_tfm(struct crypto_aead *aead) -{ - struct cryptd_aead **ctx = crypto_aead_ctx(aead); - - cryptd_free_aead(*ctx); -} - -static struct aead_alg crypto_aegis128l_aesni_alg[] = { - { - .setkey = crypto_aegis128l_aesni_setkey, - .setauthsize = crypto_aegis128l_aesni_setauthsize, - .encrypt = crypto_aegis128l_aesni_encrypt, - .decrypt = crypto_aegis128l_aesni_decrypt, - .init = crypto_aegis128l_aesni_init_tfm, - .exit = crypto_aegis128l_aesni_exit_tfm, - - .ivsize = AEGIS128L_NONCE_SIZE, - .maxauthsize = AEGIS128L_MAX_AUTH_SIZE, - .chunksize = AEGIS128L_BLOCK_SIZE, - - .base = { - .cra_flags = CRYPTO_ALG_INTERNAL, - .cra_blocksize = 1, - .cra_ctxsize = sizeof(struct aegis_ctx) + - __alignof__(struct aegis_ctx), - .cra_alignmask = 0, - - .cra_name = "__aegis128l", - .cra_driver_name = "__aegis128l-aesni", - - .cra_module = THIS_MODULE, - } - }, { - .setkey = cryptd_aegis128l_aesni_setkey, - .setauthsize = cryptd_aegis128l_aesni_setauthsize, - .encrypt = cryptd_aegis128l_aesni_encrypt, - .decrypt = cryptd_aegis128l_aesni_decrypt, - .init = cryptd_aegis128l_aesni_init_tfm, - .exit = cryptd_aegis128l_aesni_exit_tfm, - - .ivsize = AEGIS128L_NONCE_SIZE, - .maxauthsize = AEGIS128L_MAX_AUTH_SIZE, - .chunksize = AEGIS128L_BLOCK_SIZE, - - .base = { - .cra_flags = CRYPTO_ALG_ASYNC, - .cra_blocksize = 1, - .cra_ctxsize = sizeof(struct cryptd_aead *), - .cra_alignmask = 0, - - .cra_priority = 400, - - .cra_name = "aegis128l", - .cra_driver_name = "aegis128l-aesni", - - .cra_module = THIS_MODULE, - } +static struct aead_alg crypto_aegis128l_aesni_alg = { + .setkey = crypto_aegis128l_aesni_setkey, + .setauthsize = crypto_aegis128l_aesni_setauthsize, + .encrypt = crypto_aegis128l_aesni_encrypt, + .decrypt = crypto_aegis128l_aesni_decrypt, + .init = crypto_aegis128l_aesni_init_tfm, + .exit = crypto_aegis128l_aesni_exit_tfm, + + .ivsize = AEGIS128L_NONCE_SIZE, + .maxauthsize = AEGIS128L_MAX_AUTH_SIZE, + .chunksize = AEGIS128L_BLOCK_SIZE, + + .base = { + .cra_flags = CRYPTO_ALG_INTERNAL, + .cra_blocksize = 1, + .cra_ctxsize = sizeof(struct aegis_ctx) + + __alignof__(struct aegis_ctx), + .cra_alignmask = 0, + .cra_priority = 400, + + .cra_name = "__aegis128l", + .cra_driver_name = "__aegis128l-aesni", + + .cra_module = THIS_MODULE, } }; +static struct simd_aead_alg *simd_alg; + static int __init crypto_aegis128l_aesni_module_init(void) { if (!boot_cpu_has(X86_FEATURE_XMM2) || @@ -374,14 +278,13 @@ static int __init crypto_aegis128l_aesni_module_init(void) !cpu_has_xfeatures(XFEATURE_MASK_SSE, NULL)) return -ENODEV; - return crypto_register_aeads(crypto_aegis128l_aesni_alg, - ARRAY_SIZE(crypto_aegis128l_aesni_alg)); + return simd_register_aeads_compat(&crypto_aegis128l_aesni_alg, 1, + &simd_alg); } static void __exit crypto_aegis128l_aesni_module_exit(void) { - crypto_unregister_aeads(crypto_aegis128l_aesni_alg, - ARRAY_SIZE(crypto_aegis128l_aesni_alg)); + simd_unregister_aeads(&crypto_aegis128l_aesni_alg, 1, &simd_alg); } module_init(crypto_aegis128l_aesni_module_init); diff --git a/arch/x86/crypto/aegis256-aesni-glue.c b/arch/x86/crypto/aegis256-aesni-glue.c index 6227ca3220a0..716eecb66bd5 100644 --- a/arch/x86/crypto/aegis256-aesni-glue.c +++ b/arch/x86/crypto/aegis256-aesni-glue.c @@ -11,8 +11,8 @@ * any later version. */ -#include <crypto/cryptd.h> #include <crypto/internal/aead.h> +#include <crypto/internal/simd.h> #include <crypto/internal/skcipher.h> #include <crypto/scatterwalk.h> #include <linux/module.h> @@ -242,131 +242,35 @@ static void crypto_aegis256_aesni_exit_tfm(struct crypto_aead *aead) { } -static int cryptd_aegis256_aesni_setkey(struct crypto_aead *aead, - const u8 *key, unsigned int keylen) -{ - struct cryptd_aead **ctx = crypto_aead_ctx(aead); - struct cryptd_aead *cryptd_tfm = *ctx; - - return crypto_aead_setkey(&cryptd_tfm->base, key, keylen); -} - -static int cryptd_aegis256_aesni_setauthsize(struct crypto_aead *aead, - unsigned int authsize) -{ - struct cryptd_aead **ctx = crypto_aead_ctx(aead); - struct cryptd_aead *cryptd_tfm = *ctx; - - return crypto_aead_setauthsize(&cryptd_tfm->base, authsize); -} - -static int cryptd_aegis256_aesni_encrypt(struct aead_request *req) -{ - struct crypto_aead *aead = crypto_aead_reqtfm(req); - struct cryptd_aead **ctx = crypto_aead_ctx(aead); - struct cryptd_aead *cryptd_tfm = *ctx; - - aead = &cryptd_tfm->base; - if (irq_fpu_usable() && (!in_atomic() || - !cryptd_aead_queued(cryptd_tfm))) - aead = cryptd_aead_child(cryptd_tfm); - - aead_request_set_tfm(req, aead); - - return crypto_aead_encrypt(req); -} - -static int cryptd_aegis256_aesni_decrypt(struct aead_request *req) -{ - struct crypto_aead *aead = crypto_aead_reqtfm(req); - struct cryptd_aead **ctx = crypto_aead_ctx(aead); - struct cryptd_aead *cryptd_tfm = *ctx; - - aead = &cryptd_tfm->base; - if (irq_fpu_usable() && (!in_atomic() || - !cryptd_aead_queued(cryptd_tfm))) - aead = cryptd_aead_child(cryptd_tfm); - - aead_request_set_tfm(req, aead); - - return crypto_aead_decrypt(req); -} - -static int cryptd_aegis256_aesni_init_tfm(struct crypto_aead *aead) -{ - struct cryptd_aead *cryptd_tfm; - struct cryptd_aead **ctx = crypto_aead_ctx(aead); - - cryptd_tfm = cryptd_alloc_aead("__aegis256-aesni", CRYPTO_ALG_INTERNAL, - CRYPTO_ALG_INTERNAL); - if (IS_ERR(cryptd_tfm)) - return PTR_ERR(cryptd_tfm); - - *ctx = cryptd_tfm; - crypto_aead_set_reqsize(aead, crypto_aead_reqsize(&cryptd_tfm->base)); - return 0; -} - -static void cryptd_aegis256_aesni_exit_tfm(struct crypto_aead *aead) -{ - struct cryptd_aead **ctx = crypto_aead_ctx(aead); - - cryptd_free_aead(*ctx); -} - -static struct aead_alg crypto_aegis256_aesni_alg[] = { - { - .setkey = crypto_aegis256_aesni_setkey, - .setauthsize = crypto_aegis256_aesni_setauthsize, - .encrypt = crypto_aegis256_aesni_encrypt, - .decrypt = crypto_aegis256_aesni_decrypt, - .init = crypto_aegis256_aesni_init_tfm, - .exit = crypto_aegis256_aesni_exit_tfm, - - .ivsize = AEGIS256_NONCE_SIZE, - .maxauthsize = AEGIS256_MAX_AUTH_SIZE, - .chunksize = AEGIS256_BLOCK_SIZE, - - .base = { - .cra_flags = CRYPTO_ALG_INTERNAL, - .cra_blocksize = 1, - .cra_ctxsize = sizeof(struct aegis_ctx) + - __alignof__(struct aegis_ctx), - .cra_alignmask = 0, - - .cra_name = "__aegis256", - .cra_driver_name = "__aegis256-aesni", - - .cra_module = THIS_MODULE, - } - }, { - .setkey = cryptd_aegis256_aesni_setkey, - .setauthsize = cryptd_aegis256_aesni_setauthsize, - .encrypt = cryptd_aegis256_aesni_encrypt, - .decrypt = cryptd_aegis256_aesni_decrypt, - .init = cryptd_aegis256_aesni_init_tfm, - .exit = cryptd_aegis256_aesni_exit_tfm, - - .ivsize = AEGIS256_NONCE_SIZE, - .maxauthsize = AEGIS256_MAX_AUTH_SIZE, - .chunksize = AEGIS256_BLOCK_SIZE, - - .base = { - .cra_flags = CRYPTO_ALG_ASYNC, - .cra_blocksize = 1, - .cra_ctxsize = sizeof(struct cryptd_aead *), - .cra_alignmask = 0, - - .cra_priority = 400, - - .cra_name = "aegis256", - .cra_driver_name = "aegis256-aesni", - - .cra_module = THIS_MODULE, - } +static struct aead_alg crypto_aegis256_aesni_alg = { + .setkey = crypto_aegis256_aesni_setkey, + .setauthsize = crypto_aegis256_aesni_setauthsize, + .encrypt = crypto_aegis256_aesni_encrypt, + .decrypt = crypto_aegis256_aesni_decrypt, + .init = crypto_aegis256_aesni_init_tfm, + .exit = crypto_aegis256_aesni_exit_tfm, + + .ivsize = AEGIS256_NONCE_SIZE, + .maxauthsize = AEGIS256_MAX_AUTH_SIZE, + .chunksize = AEGIS256_BLOCK_SIZE, + + .base = { + .cra_flags = CRYPTO_ALG_INTERNAL, + .cra_blocksize = 1, + .cra_ctxsize = sizeof(struct aegis_ctx) + + __alignof__(struct aegis_ctx), + .cra_alignmask = 0, + .cra_priority = 400, + + .cra_name = "__aegis256", + .cra_driver_name = "__aegis256-aesni", + + .cra_module = THIS_MODULE, } }; +static struct simd_aead_alg *simd_alg; + static int __init crypto_aegis256_aesni_module_init(void) { if (!boot_cpu_has(X86_FEATURE_XMM2) || @@ -374,14 +278,13 @@ static int __init crypto_aegis256_aesni_module_init(void) !cpu_has_xfeatures(XFEATURE_MASK_SSE, NULL)) return -ENODEV; - return crypto_register_aeads(crypto_aegis256_aesni_alg, - ARRAY_SIZE(crypto_aegis256_aesni_alg)); + return simd_register_aeads_compat(&crypto_aegis256_aesni_alg, 1, + &simd_alg); } static void __exit crypto_aegis256_aesni_module_exit(void) { - crypto_unregister_aeads(crypto_aegis256_aesni_alg, - ARRAY_SIZE(crypto_aegis256_aesni_alg)); + simd_unregister_aeads(&crypto_aegis256_aesni_alg, 1, &simd_alg); } module_init(crypto_aegis256_aesni_module_init); diff --git a/arch/x86/crypto/aesni-intel_glue.c b/arch/x86/crypto/aesni-intel_glue.c index 1e3d2102033a..21c246799aa5 100644 --- a/arch/x86/crypto/aesni-intel_glue.c +++ b/arch/x86/crypto/aesni-intel_glue.c @@ -25,14 +25,13 @@ #include <linux/err.h> #include <crypto/algapi.h> #include <crypto/aes.h> -#include <crypto/cryptd.h> #include <crypto/ctr.h> #include <crypto/b128ops.h> #include <crypto/gcm.h> #include <crypto/xts.h> #include <asm/cpu_device_id.h> -#include <asm/fpu/api.h> #include <asm/crypto/aes.h> +#include <asm/simd.h> #include <crypto/scatterwalk.h> #include <crypto/internal/aead.h> #include <crypto/internal/simd.h> @@ -333,7 +332,7 @@ static int aes_set_key_common(struct crypto_tfm *tfm, void *raw_ctx, return -EINVAL; } - if (!irq_fpu_usable()) + if (!crypto_simd_usable()) err = crypto_aes_expand_key(ctx, in_key, key_len); else { kernel_fpu_begin(); @@ -354,7 +353,7 @@ static void aes_encrypt(struct crypto_tfm *tfm, u8 *dst, const u8 *src) { struct crypto_aes_ctx *ctx = aes_ctx(crypto_tfm_ctx(tfm)); - if (!irq_fpu_usable()) + if (!crypto_simd_usable()) crypto_aes_encrypt_x86(ctx, dst, src); else { kernel_fpu_begin(); @@ -367,7 +366,7 @@ static void aes_decrypt(struct crypto_tfm *tfm, u8 *dst, const u8 *src) { struct crypto_aes_ctx *ctx = aes_ctx(crypto_tfm_ctx(tfm)); - if (!irq_fpu_usable()) + if (!crypto_simd_usable()) crypto_aes_decrypt_x86(ctx, dst, src); else { kernel_fpu_begin(); @@ -643,29 +642,6 @@ static int xts_decrypt(struct skcipher_request *req) aes_ctx(ctx->raw_crypt_ctx)); } -static int rfc4106_init(struct crypto_aead *aead) -{ - struct cryptd_aead *cryptd_tfm; - struct cryptd_aead **ctx = crypto_aead_ctx(aead); - - cryptd_tfm = cryptd_alloc_aead("__driver-gcm-aes-aesni", - CRYPTO_ALG_INTERNAL, - CRYPTO_ALG_INTERNAL); - if (IS_ERR(cryptd_tfm)) - return PTR_ERR(cryptd_tfm); - - *ctx = cryptd_tfm; - crypto_aead_set_reqsize(aead, crypto_aead_reqsize(&cryptd_tfm->base)); - return 0; -} - -static void rfc4106_exit(struct crypto_aead *aead) -{ - struct cryptd_aead **ctx = crypto_aead_ctx(aead); - - cryptd_free_aead(*ctx); -} - static int rfc4106_set_hash_subkey(u8 *hash_subkey, const u8 *key, unsigned int key_len) { @@ -710,15 +686,8 @@ static int common_rfc4106_set_key(struct crypto_aead *aead, const u8 *key, rfc4106_set_hash_subkey(ctx->hash_subkey, key, key_len); } -static int gcmaes_wrapper_set_key(struct crypto_aead *parent, const u8 *key, - unsigned int key_len) -{ - struct cryptd_aead **ctx = crypto_aead_ctx(parent); - struct cryptd_aead *cryptd_tfm = *ctx; - - return crypto_aead_setkey(&cryptd_tfm->base, key, key_len); -} - +/* This is the Integrity Check Value (aka the authentication tag) length and can + * be 8, 12 or 16 bytes long. */ static int common_rfc4106_set_authsize(struct crypto_aead *aead, unsigned int authsize) { @@ -734,17 +703,6 @@ static int common_rfc4106_set_authsize(struct crypto_aead *aead, return 0; } -/* This is the Integrity Check Value (aka the authentication tag length and can - * be 8, 12 or 16 bytes long. */ -static int gcmaes_wrapper_set_authsize(struct crypto_aead *parent, - unsigned int authsize) -{ - struct cryptd_aead **ctx = crypto_aead_ctx(parent); - struct cryptd_aead *cryptd_tfm = *ctx; - - return crypto_aead_setauthsize(&cryptd_tfm->base, authsize); -} - static int generic_gcmaes_set_authsize(struct crypto_aead *tfm, unsigned int authsize) { @@ -964,38 +922,6 @@ static int helper_rfc4106_decrypt(struct aead_request *req) return gcmaes_decrypt(req, req->assoclen - 8, ctx->hash_subkey, iv, aes_ctx); } - -static int gcmaes_wrapper_encrypt(struct aead_request *req) -{ - struct crypto_aead *tfm = crypto_aead_reqtfm(req); - struct cryptd_aead **ctx = crypto_aead_ctx(tfm); - struct cryptd_aead *cryptd_tfm = *ctx; - - tfm = &cryptd_tfm->base; - if (irq_fpu_usable() && (!in_atomic() || - !cryptd_aead_queued(cryptd_tfm))) - tfm = cryptd_aead_child(cryptd_tfm); - - aead_request_set_tfm(req, tfm); - - return crypto_aead_encrypt(req); -} - -static int gcmaes_wrapper_decrypt(struct aead_request *req) -{ - struct crypto_aead *tfm = crypto_aead_reqtfm(req); - struct cryptd_aead **ctx = crypto_aead_ctx(tfm); - struct cryptd_aead *cryptd_tfm = *ctx; - - tfm = &cryptd_tfm->base; - if (irq_fpu_usable() && (!in_atomic() || - !cryptd_aead_queued(cryptd_tfm))) - tfm = cryptd_aead_child(cryptd_tfm); - - aead_request_set_tfm(req, tfm); - - return crypto_aead_decrypt(req); -} #endif static struct crypto_alg aesni_algs[] = { { @@ -1148,31 +1074,7 @@ static int generic_gcmaes_decrypt(struct aead_request *req) aes_ctx); } -static int generic_gcmaes_init(struct crypto_aead *aead) -{ - struct cryptd_aead *cryptd_tfm; - struct cryptd_aead **ctx = crypto_aead_ctx(aead); - - cryptd_tfm = cryptd_alloc_aead("__driver-generic-gcm-aes-aesni", - CRYPTO_ALG_INTERNAL, - CRYPTO_ALG_INTERNAL); - if (IS_ERR(cryptd_tfm)) - return PTR_ERR(cryptd_tfm); - - *ctx = cryptd_tfm; - crypto_aead_set_reqsize(aead, crypto_aead_reqsize(&cryptd_tfm->base)); - - return 0; -} - -static void generic_gcmaes_exit(struct crypto_aead *aead) -{ - struct cryptd_aead **ctx = crypto_aead_ctx(aead); - - cryptd_free_aead(*ctx); -} - -static struct aead_alg aesni_aead_algs[] = { { +static struct aead_alg aesni_aeads[] = { { .setkey = common_rfc4106_set_key, .setauthsize = common_rfc4106_set_authsize, .encrypt = helper_rfc4106_encrypt, @@ -1180,8 +1082,9 @@ static struct aead_alg aesni_aead_algs[] = { { .ivsize = GCM_RFC4106_IV_SIZE, .maxauthsize = 16, .base = { - .cra_name = "__gcm-aes-aesni", - .cra_driver_name = "__driver-gcm-aes-aesni", + .cra_name = "__rfc4106(gcm(aes))", + .cra_driver_name = "__rfc4106-gcm-aesni", + .cra_priority = 400, .cra_flags = CRYPTO_ALG_INTERNAL, .cra_blocksize = 1, .cra_ctxsize = sizeof(struct aesni_rfc4106_gcm_ctx), @@ -1189,24 +1092,6 @@ static struct aead_alg aesni_aead_algs[] = { { .cra_module = THIS_MODULE, }, }, { - .init = rfc4106_init, - .exit = rfc4106_exit, - .setkey = gcmaes_wrapper_set_key, - .setauthsize = gcmaes_wrapper_set_authsize, - .encrypt = gcmaes_wrapper_encrypt, - .decrypt = gcmaes_wrapper_decrypt, - .ivsize = GCM_RFC4106_IV_SIZE, - .maxauthsize = 16, - .base = { - .cra_name = "rfc4106(gcm(aes))", - .cra_driver_name = "rfc4106-gcm-aesni", - .cra_priority = 400, - .cra_flags = CRYPTO_ALG_ASYNC, - .cra_blocksize = 1, - .cra_ctxsize = sizeof(struct cryptd_aead *), - .cra_module = THIS_MODULE, - }, -}, { .setkey = generic_gcmaes_set_key, .setauthsize = generic_gcmaes_set_authsize, .encrypt = generic_gcmaes_encrypt, @@ -1214,38 +1099,21 @@ static struct aead_alg aesni_aead_algs[] = { { .ivsize = GCM_AES_IV_SIZE, .maxauthsize = 16, .base = { - .cra_name = "__generic-gcm-aes-aesni", - .cra_driver_name = "__driver-generic-gcm-aes-aesni", - .cra_priority = 0, + .cra_name = "__gcm(aes)", + .cra_driver_name = "__generic-gcm-aesni", + .cra_priority = 400, .cra_flags = CRYPTO_ALG_INTERNAL, .cra_blocksize = 1, .cra_ctxsize = sizeof(struct generic_gcmaes_ctx), .cra_alignmask = AESNI_ALIGN - 1, .cra_module = THIS_MODULE, }, -}, { - .init = generic_gcmaes_init, - .exit = generic_gcmaes_exit, - .setkey = gcmaes_wrapper_set_key, - .setauthsize = gcmaes_wrapper_set_authsize, - .encrypt = gcmaes_wrapper_encrypt, - .decrypt = gcmaes_wrapper_decrypt, - .ivsize = GCM_AES_IV_SIZE, - .maxauthsize = 16, - .base = { - .cra_name = "gcm(aes)", - .cra_driver_name = "generic-gcm-aesni", - .cra_priority = 400, - .cra_flags = CRYPTO_ALG_ASYNC, - .cra_blocksize = 1, - .cra_ctxsize = sizeof(struct cryptd_aead *), - .cra_module = THIS_MODULE, - }, } }; #else -static struct aead_alg aesni_aead_algs[0]; +static struct aead_alg aesni_aeads[0]; #endif +static struct simd_aead_alg *aesni_simd_aeads[ARRAY_SIZE(aesni_aeads)]; static const struct x86_cpu_id aesni_cpu_id[] = { X86_FEATURE_MATCH(X86_FEATURE_AES), @@ -1253,23 +1121,9 @@ static const struct x86_cpu_id aesni_cpu_id[] = { }; MODULE_DEVICE_TABLE(x86cpu, aesni_cpu_id); -static void aesni_free_simds(void) -{ - int i; - - for (i = 0; i < ARRAY_SIZE(aesni_simd_skciphers) && - aesni_simd_skciphers[i]; i++) - simd_skcipher_free(aesni_simd_skciphers[i]); -} - static int __init aesni_init(void) { - struct simd_skcipher_alg *simd; - const char *basename; - const char *algname; - const char *drvname; int err; - int i; if (!x86_match_cpu(aesni_cpu_id)) return -ENODEV; @@ -1304,36 +1158,22 @@ static int __init aesni_init(void) if (err) return err; - err = crypto_register_skciphers(aesni_skciphers, - ARRAY_SIZE(aesni_skciphers)); + err = simd_register_skciphers_compat(aesni_skciphers, + ARRAY_SIZE(aesni_skciphers), + aesni_simd_skciphers); if (err) goto unregister_algs; - err = crypto_register_aeads(aesni_aead_algs, - ARRAY_SIZE(aesni_aead_algs)); + err = simd_register_aeads_compat(aesni_aeads, ARRAY_SIZE(aesni_aeads), + aesni_simd_aeads); if (err) goto unregister_skciphers; - for (i = 0; i < ARRAY_SIZE(aesni_skciphers); i++) { - algname = aesni_skciphers[i].base.cra_name + 2; - drvname = aesni_skciphers[i].base.cra_driver_name + 2; - basename = aesni_skciphers[i].base.cra_driver_name; - simd = simd_skcipher_create_compat(algname, drvname, basename); - err = PTR_ERR(simd); - if (IS_ERR(simd)) - goto unregister_simds; - - aesni_simd_skciphers[i] = simd; - } - return 0; -unregister_simds: - aesni_free_simds(); - crypto_unregister_aeads(aesni_aead_algs, ARRAY_SIZE(aesni_aead_algs)); unregister_skciphers: - crypto_unregister_skciphers(aesni_skciphers, - ARRAY_SIZE(aesni_skciphers)); + simd_unregister_skciphers(aesni_skciphers, ARRAY_SIZE(aesni_skciphers), + aesni_simd_skciphers); unregister_algs: crypto_unregister_algs(aesni_algs, ARRAY_SIZE(aesni_algs)); return err; @@ -1341,10 +1181,10 @@ unregister_algs: static void __exit aesni_exit(void) { - aesni_free_simds(); - crypto_unregister_aeads(aesni_aead_algs, ARRAY_SIZE(aesni_aead_algs)); - crypto_unregister_skciphers(aesni_skciphers, - ARRAY_SIZE(aesni_skciphers)); + simd_unregister_aeads(aesni_aeads, ARRAY_SIZE(aesni_aeads), + aesni_simd_aeads); + simd_unregister_skciphers(aesni_skciphers, ARRAY_SIZE(aesni_skciphers), + aesni_simd_skciphers); crypto_unregister_algs(aesni_algs, ARRAY_SIZE(aesni_algs)); } diff --git a/arch/x86/crypto/chacha_glue.c b/arch/x86/crypto/chacha_glue.c index 45c1c4143176..4967ad620775 100644 --- a/arch/x86/crypto/chacha_glue.c +++ b/arch/x86/crypto/chacha_glue.c @@ -12,10 +12,10 @@ #include <crypto/algapi.h> #include <crypto/chacha.h> +#include <crypto/internal/simd.h> #include <crypto/internal/skcipher.h> #include <linux/kernel.h> #include <linux/module.h> -#include <asm/fpu/api.h> #include <asm/simd.h> #define CHACHA_STATE_ALIGN 16 @@ -170,7 +170,7 @@ static int chacha_simd(struct skcipher_request *req) struct skcipher_walk walk; int err; - if (req->cryptlen <= CHACHA_BLOCK_SIZE || !irq_fpu_usable()) + if (req->cryptlen <= CHACHA_BLOCK_SIZE || !crypto_simd_usable()) return crypto_chacha_crypt(req); err = skcipher_walk_virt(&walk, req, true); @@ -193,7 +193,7 @@ static int xchacha_simd(struct skcipher_request *req) u8 real_iv[16]; int err; - if (req->cryptlen <= CHACHA_BLOCK_SIZE || !irq_fpu_usable()) + if (req->cryptlen <= CHACHA_BLOCK_SIZE || !crypto_simd_usable()) return crypto_xchacha_crypt(req); err = skcipher_walk_virt(&walk, req, true); diff --git a/arch/x86/crypto/crc32-pclmul_glue.c b/arch/x86/crypto/crc32-pclmul_glue.c index c8d9cdacbf10..cb4ab6645106 100644 --- a/arch/x86/crypto/crc32-pclmul_glue.c +++ b/arch/x86/crypto/crc32-pclmul_glue.c @@ -32,10 +32,11 @@ #include <linux/kernel.h> #include <linux/crc32.h> #include <crypto/internal/hash.h> +#include <crypto/internal/simd.h> #include <asm/cpufeatures.h> #include <asm/cpu_device_id.h> -#include <asm/fpu/api.h> +#include <asm/simd.h> #define CHKSUM_BLOCK_SIZE 1 #define CHKSUM_DIGEST_SIZE 4 @@ -54,7 +55,7 @@ static u32 __attribute__((pure)) unsigned int iremainder; unsigned int prealign; - if (len < PCLMUL_MIN_LEN + SCALE_F_MASK || !irq_fpu_usable()) + if (len < PCLMUL_MIN_LEN + SCALE_F_MASK || !crypto_simd_usable()) return crc32_le(crc, p, len); if ((long)p & SCALE_F_MASK) { diff --git a/arch/x86/crypto/crc32c-intel_glue.c b/arch/x86/crypto/crc32c-intel_glue.c index 5773e1161072..a58fe217c856 100644 --- a/arch/x86/crypto/crc32c-intel_glue.c +++ b/arch/x86/crypto/crc32c-intel_glue.c @@ -29,10 +29,11 @@ #include <linux/string.h> #include <linux/kernel.h> #include <crypto/internal/hash.h> +#include <crypto/internal/simd.h> #include <asm/cpufeatures.h> #include <asm/cpu_device_id.h> -#include <asm/fpu/internal.h> +#include <asm/simd.h> #define CHKSUM_BLOCK_SIZE 1 #define CHKSUM_DIGEST_SIZE 4 @@ -177,7 +178,7 @@ static int crc32c_pcl_intel_update(struct shash_desc *desc, const u8 *data, * use faster PCL version if datasize is large enough to * overcome kernel fpu state save/restore overhead */ - if (len >= CRC32C_PCL_BREAKEVEN && irq_fpu_usable()) { + if (len >= CRC32C_PCL_BREAKEVEN && crypto_simd_usable()) { kernel_fpu_begin(); *crcp = crc_pcl(data, len, *crcp); kernel_fpu_end(); @@ -189,7 +190,7 @@ static int crc32c_pcl_intel_update(struct shash_desc *desc, const u8 *data, static int __crc32c_pcl_intel_finup(u32 *crcp, const u8 *data, unsigned int len, u8 *out) { - if (len >= CRC32C_PCL_BREAKEVEN && irq_fpu_usable()) { + if (len >= CRC32C_PCL_BREAKEVEN && crypto_simd_usable()) { kernel_fpu_begin(); *(__le32 *)out = ~cpu_to_le32(crc_pcl(data, len, *crcp)); kernel_fpu_end(); diff --git a/arch/x86/crypto/crct10dif-pclmul_glue.c b/arch/x86/crypto/crct10dif-pclmul_glue.c index 0e785c0b2354..3c81e15b0873 100644 --- a/arch/x86/crypto/crct10dif-pclmul_glue.c +++ b/arch/x86/crypto/crct10dif-pclmul_glue.c @@ -26,12 +26,13 @@ #include <linux/module.h> #include <linux/crc-t10dif.h> #include <crypto/internal/hash.h> +#include <crypto/internal/simd.h> #include <linux/init.h> #include <linux/string.h> #include <linux/kernel.h> -#include <asm/fpu/api.h> #include <asm/cpufeatures.h> #include <asm/cpu_device_id.h> +#include <asm/simd.h> asmlinkage u16 crc_t10dif_pcl(u16 init_crc, const u8 *buf, size_t len); @@ -53,7 +54,7 @@ static int chksum_update(struct shash_desc *desc, const u8 *data, { struct chksum_desc_ctx *ctx = shash_desc_ctx(desc); - if (length >= 16 && irq_fpu_usable()) { + if (length >= 16 && crypto_simd_usable()) { kernel_fpu_begin(); ctx->crc = crc_t10dif_pcl(ctx->crc, data, length); kernel_fpu_end(); @@ -70,15 +71,14 @@ static int chksum_final(struct shash_desc *desc, u8 *out) return 0; } -static int __chksum_finup(__u16 *crcp, const u8 *data, unsigned int len, - u8 *out) +static int __chksum_finup(__u16 crc, const u8 *data, unsigned int len, u8 *out) { - if (len >= 16 && irq_fpu_usable()) { + if (len >= 16 && crypto_simd_usable()) { kernel_fpu_begin(); - *(__u16 *)out = crc_t10dif_pcl(*crcp, data, len); + *(__u16 *)out = crc_t10dif_pcl(crc, data, len); kernel_fpu_end(); } else - *(__u16 *)out = crc_t10dif_generic(*crcp, data, len); + *(__u16 *)out = crc_t10dif_generic(crc, data, len); return 0; } @@ -87,15 +87,13 @@ static int chksum_finup(struct shash_desc *desc, const u8 *data, { struct chksum_desc_ctx *ctx = shash_desc_ctx(desc); - return __chksum_finup(&ctx->crc, data, len, out); + return __chksum_finup(ctx->crc, data, len, out); } static int chksum_digest(struct shash_desc *desc, const u8 *data, unsigned int length, u8 *out) { - struct chksum_desc_ctx *ctx = shash_desc_ctx(desc); - - return __chksum_finup(&ctx->crc, data, length, out); + return __chksum_finup(0, data, length, out); } static struct shash_alg alg = { diff --git a/arch/x86/crypto/ghash-clmulni-intel_glue.c b/arch/x86/crypto/ghash-clmulni-intel_glue.c index 3582ae885ee1..e3f3e6fd9d65 100644 --- a/arch/x86/crypto/ghash-clmulni-intel_glue.c +++ b/arch/x86/crypto/ghash-clmulni-intel_glue.c @@ -19,8 +19,9 @@ #include <crypto/cryptd.h> #include <crypto/gf128mul.h> #include <crypto/internal/hash.h> -#include <asm/fpu/api.h> +#include <crypto/internal/simd.h> #include <asm/cpu_device_id.h> +#include <asm/simd.h> #define GHASH_BLOCK_SIZE 16 #define GHASH_DIGEST_SIZE 16 @@ -171,7 +172,6 @@ static int ghash_async_init(struct ahash_request *req) struct crypto_shash *child = cryptd_ahash_child(cryptd_tfm); desc->tfm = child; - desc->flags = req->base.flags; return crypto_shash_init(desc); } @@ -182,7 +182,7 @@ static int ghash_async_update(struct ahash_request *req) struct ghash_async_ctx *ctx = crypto_ahash_ctx(tfm); struct cryptd_ahash *cryptd_tfm = ctx->cryptd_tfm; - if (!irq_fpu_usable() || + if (!crypto_simd_usable() || (in_atomic() && cryptd_ahash_queued(cryptd_tfm))) { memcpy(cryptd_req, req, sizeof(*req)); ahash_request_set_tfm(cryptd_req, &cryptd_tfm->base); @@ -200,7 +200,7 @@ static int ghash_async_final(struct ahash_request *req) struct ghash_async_ctx *ctx = crypto_ahash_ctx(tfm); struct cryptd_ahash *cryptd_tfm = ctx->cryptd_tfm; - if (!irq_fpu_usable() || + if (!crypto_simd_usable() || (in_atomic() && cryptd_ahash_queued(cryptd_tfm))) { memcpy(cryptd_req, req, sizeof(*req)); ahash_request_set_tfm(cryptd_req, &cryptd_tfm->base); @@ -241,7 +241,7 @@ static int ghash_async_digest(struct ahash_request *req) struct ahash_request *cryptd_req = ahash_request_ctx(req); struct cryptd_ahash *cryptd_tfm = ctx->cryptd_tfm; - if (!irq_fpu_usable() || + if (!crypto_simd_usable() || (in_atomic() && cryptd_ahash_queued(cryptd_tfm))) { memcpy(cryptd_req, req, sizeof(*req)); ahash_request_set_tfm(cryptd_req, &cryptd_tfm->base); @@ -251,7 +251,6 @@ static int ghash_async_digest(struct ahash_request *req) struct crypto_shash *child = cryptd_ahash_child(cryptd_tfm); desc->tfm = child; - desc->flags = req->base.flags; return shash_ahash_digest(req, desc); } } diff --git a/arch/x86/crypto/morus1280-avx2-glue.c b/arch/x86/crypto/morus1280-avx2-glue.c index 6634907d6ccd..679627a2a824 100644 --- a/arch/x86/crypto/morus1280-avx2-glue.c +++ b/arch/x86/crypto/morus1280-avx2-glue.c @@ -12,6 +12,7 @@ */ #include <crypto/internal/aead.h> +#include <crypto/internal/simd.h> #include <crypto/morus1280_glue.h> #include <linux/module.h> #include <asm/fpu/api.h> @@ -35,7 +36,9 @@ asmlinkage void crypto_morus1280_avx2_dec_tail(void *state, const void *src, asmlinkage void crypto_morus1280_avx2_final(void *state, void *tag_xor, u64 assoclen, u64 cryptlen); -MORUS1280_DECLARE_ALGS(avx2, "morus1280-avx2", 400); +MORUS1280_DECLARE_ALG(avx2, "morus1280-avx2", 400); + +static struct simd_aead_alg *simd_alg; static int __init crypto_morus1280_avx2_module_init(void) { @@ -44,14 +47,13 @@ static int __init crypto_morus1280_avx2_module_init(void) !cpu_has_xfeatures(XFEATURE_MASK_SSE | XFEATURE_MASK_YMM, NULL)) return -ENODEV; - return crypto_register_aeads(crypto_morus1280_avx2_algs, - ARRAY_SIZE(crypto_morus1280_avx2_algs)); + return simd_register_aeads_compat(&crypto_morus1280_avx2_alg, 1, + &simd_alg); } static void __exit crypto_morus1280_avx2_module_exit(void) { - crypto_unregister_aeads(crypto_morus1280_avx2_algs, - ARRAY_SIZE(crypto_morus1280_avx2_algs)); + simd_unregister_aeads(&crypto_morus1280_avx2_alg, 1, &simd_alg); } module_init(crypto_morus1280_avx2_module_init); diff --git a/arch/x86/crypto/morus1280-sse2-glue.c b/arch/x86/crypto/morus1280-sse2-glue.c index f40244eaf14d..c35c0638d0bb 100644 --- a/arch/x86/crypto/morus1280-sse2-glue.c +++ b/arch/x86/crypto/morus1280-sse2-glue.c @@ -12,6 +12,7 @@ */ #include <crypto/internal/aead.h> +#include <crypto/internal/simd.h> #include <crypto/morus1280_glue.h> #include <linux/module.h> #include <asm/fpu/api.h> @@ -35,7 +36,9 @@ asmlinkage void crypto_morus1280_sse2_dec_tail(void *state, const void *src, asmlinkage void crypto_morus1280_sse2_final(void *state, void *tag_xor, u64 assoclen, u64 cryptlen); -MORUS1280_DECLARE_ALGS(sse2, "morus1280-sse2", 350); +MORUS1280_DECLARE_ALG(sse2, "morus1280-sse2", 350); + +static struct simd_aead_alg *simd_alg; static int __init crypto_morus1280_sse2_module_init(void) { @@ -43,14 +46,13 @@ static int __init crypto_morus1280_sse2_module_init(void) !cpu_has_xfeatures(XFEATURE_MASK_SSE, NULL)) return -ENODEV; - return crypto_register_aeads(crypto_morus1280_sse2_algs, - ARRAY_SIZE(crypto_morus1280_sse2_algs)); + return simd_register_aeads_compat(&crypto_morus1280_sse2_alg, 1, + &simd_alg); } static void __exit crypto_morus1280_sse2_module_exit(void) { - crypto_unregister_aeads(crypto_morus1280_sse2_algs, - ARRAY_SIZE(crypto_morus1280_sse2_algs)); + simd_unregister_aeads(&crypto_morus1280_sse2_alg, 1, &simd_alg); } module_init(crypto_morus1280_sse2_module_init); diff --git a/arch/x86/crypto/morus1280_glue.c b/arch/x86/crypto/morus1280_glue.c index 7e600f8bcdad..30fc1bd98ec3 100644 --- a/arch/x86/crypto/morus1280_glue.c +++ b/arch/x86/crypto/morus1280_glue.c @@ -11,7 +11,6 @@ * any later version. */ -#include <crypto/cryptd.h> #include <crypto/internal/aead.h> #include <crypto/internal/skcipher.h> #include <crypto/morus1280_glue.h> @@ -205,90 +204,6 @@ void crypto_morus1280_glue_init_ops(struct crypto_aead *aead, } EXPORT_SYMBOL_GPL(crypto_morus1280_glue_init_ops); -int cryptd_morus1280_glue_setkey(struct crypto_aead *aead, const u8 *key, - unsigned int keylen) -{ - struct cryptd_aead **ctx = crypto_aead_ctx(aead); - struct cryptd_aead *cryptd_tfm = *ctx; - - return crypto_aead_setkey(&cryptd_tfm->base, key, keylen); -} -EXPORT_SYMBOL_GPL(cryptd_morus1280_glue_setkey); - -int cryptd_morus1280_glue_setauthsize(struct crypto_aead *aead, - unsigned int authsize) -{ - struct cryptd_aead **ctx = crypto_aead_ctx(aead); - struct cryptd_aead *cryptd_tfm = *ctx; - - return crypto_aead_setauthsize(&cryptd_tfm->base, authsize); -} -EXPORT_SYMBOL_GPL(cryptd_morus1280_glue_setauthsize); - -int cryptd_morus1280_glue_encrypt(struct aead_request *req) -{ - struct crypto_aead *aead = crypto_aead_reqtfm(req); - struct cryptd_aead **ctx = crypto_aead_ctx(aead); - struct cryptd_aead *cryptd_tfm = *ctx; - - aead = &cryptd_tfm->base; - if (irq_fpu_usable() && (!in_atomic() || - !cryptd_aead_queued(cryptd_tfm))) - aead = cryptd_aead_child(cryptd_tfm); - - aead_request_set_tfm(req, aead); - - return crypto_aead_encrypt(req); -} -EXPORT_SYMBOL_GPL(cryptd_morus1280_glue_encrypt); - -int cryptd_morus1280_glue_decrypt(struct aead_request *req) -{ - struct crypto_aead *aead = crypto_aead_reqtfm(req); - struct cryptd_aead **ctx = crypto_aead_ctx(aead); - struct cryptd_aead *cryptd_tfm = *ctx; - - aead = &cryptd_tfm->base; - if (irq_fpu_usable() && (!in_atomic() || - !cryptd_aead_queued(cryptd_tfm))) - aead = cryptd_aead_child(cryptd_tfm); - - aead_request_set_tfm(req, aead); - - return crypto_aead_decrypt(req); -} -EXPORT_SYMBOL_GPL(cryptd_morus1280_glue_decrypt); - -int cryptd_morus1280_glue_init_tfm(struct crypto_aead *aead) -{ - struct cryptd_aead *cryptd_tfm; - struct cryptd_aead **ctx = crypto_aead_ctx(aead); - const char *name = crypto_aead_alg(aead)->base.cra_driver_name; - char internal_name[CRYPTO_MAX_ALG_NAME]; - - if (snprintf(internal_name, CRYPTO_MAX_ALG_NAME, "__%s", name) - >= CRYPTO_MAX_ALG_NAME) - return -ENAMETOOLONG; - - cryptd_tfm = cryptd_alloc_aead(internal_name, CRYPTO_ALG_INTERNAL, - CRYPTO_ALG_INTERNAL); - if (IS_ERR(cryptd_tfm)) - return PTR_ERR(cryptd_tfm); - - *ctx = cryptd_tfm; - crypto_aead_set_reqsize(aead, crypto_aead_reqsize(&cryptd_tfm->base)); - return 0; -} -EXPORT_SYMBOL_GPL(cryptd_morus1280_glue_init_tfm); - -void cryptd_morus1280_glue_exit_tfm(struct crypto_aead *aead) -{ - struct cryptd_aead **ctx = crypto_aead_ctx(aead); - - cryptd_free_aead(*ctx); -} -EXPORT_SYMBOL_GPL(cryptd_morus1280_glue_exit_tfm); - MODULE_LICENSE("GPL"); MODULE_AUTHOR("Ondrej Mosnacek <omosnacek@gmail.com>"); MODULE_DESCRIPTION("MORUS-1280 AEAD mode -- glue for x86 optimizations"); diff --git a/arch/x86/crypto/morus640-sse2-glue.c b/arch/x86/crypto/morus640-sse2-glue.c index 9afaf8f8565a..32da56b3bdad 100644 --- a/arch/x86/crypto/morus640-sse2-glue.c +++ b/arch/x86/crypto/morus640-sse2-glue.c @@ -12,6 +12,7 @@ */ #include <crypto/internal/aead.h> +#include <crypto/internal/simd.h> #include <crypto/morus640_glue.h> #include <linux/module.h> #include <asm/fpu/api.h> @@ -35,7 +36,9 @@ asmlinkage void crypto_morus640_sse2_dec_tail(void *state, const void *src, asmlinkage void crypto_morus640_sse2_final(void *state, void *tag_xor, u64 assoclen, u64 cryptlen); -MORUS640_DECLARE_ALGS(sse2, "morus640-sse2", 400); +MORUS640_DECLARE_ALG(sse2, "morus640-sse2", 400); + +static struct simd_aead_alg *simd_alg; static int __init crypto_morus640_sse2_module_init(void) { @@ -43,14 +46,13 @@ static int __init crypto_morus640_sse2_module_init(void) !cpu_has_xfeatures(XFEATURE_MASK_SSE, NULL)) return -ENODEV; - return crypto_register_aeads(crypto_morus640_sse2_algs, - ARRAY_SIZE(crypto_morus640_sse2_algs)); + return simd_register_aeads_compat(&crypto_morus640_sse2_alg, 1, + &simd_alg); } static void __exit crypto_morus640_sse2_module_exit(void) { - crypto_unregister_aeads(crypto_morus640_sse2_algs, - ARRAY_SIZE(crypto_morus640_sse2_algs)); + simd_unregister_aeads(&crypto_morus640_sse2_alg, 1, &simd_alg); } module_init(crypto_morus640_sse2_module_init); diff --git a/arch/x86/crypto/morus640_glue.c b/arch/x86/crypto/morus640_glue.c index cb3a81732016..1dea33d84426 100644 --- a/arch/x86/crypto/morus640_glue.c +++ b/arch/x86/crypto/morus640_glue.c @@ -11,7 +11,6 @@ * any later version. */ -#include <crypto/cryptd.h> #include <crypto/internal/aead.h> #include <crypto/internal/skcipher.h> #include <crypto/morus640_glue.h> @@ -200,90 +199,6 @@ void crypto_morus640_glue_init_ops(struct crypto_aead *aead, } EXPORT_SYMBOL_GPL(crypto_morus640_glue_init_ops); -int cryptd_morus640_glue_setkey(struct crypto_aead *aead, const u8 *key, - unsigned int keylen) -{ - struct cryptd_aead **ctx = crypto_aead_ctx(aead); - struct cryptd_aead *cryptd_tfm = *ctx; - - return crypto_aead_setkey(&cryptd_tfm->base, key, keylen); -} -EXPORT_SYMBOL_GPL(cryptd_morus640_glue_setkey); - -int cryptd_morus640_glue_setauthsize(struct crypto_aead *aead, - unsigned int authsize) -{ - struct cryptd_aead **ctx = crypto_aead_ctx(aead); - struct cryptd_aead *cryptd_tfm = *ctx; - - return crypto_aead_setauthsize(&cryptd_tfm->base, authsize); -} -EXPORT_SYMBOL_GPL(cryptd_morus640_glue_setauthsize); - -int cryptd_morus640_glue_encrypt(struct aead_request *req) -{ - struct crypto_aead *aead = crypto_aead_reqtfm(req); - struct cryptd_aead **ctx = crypto_aead_ctx(aead); - struct cryptd_aead *cryptd_tfm = *ctx; - - aead = &cryptd_tfm->base; - if (irq_fpu_usable() && (!in_atomic() || - !cryptd_aead_queued(cryptd_tfm))) - aead = cryptd_aead_child(cryptd_tfm); - - aead_request_set_tfm(req, aead); - - return crypto_aead_encrypt(req); -} -EXPORT_SYMBOL_GPL(cryptd_morus640_glue_encrypt); - -int cryptd_morus640_glue_decrypt(struct aead_request *req) -{ - struct crypto_aead *aead = crypto_aead_reqtfm(req); - struct cryptd_aead **ctx = crypto_aead_ctx(aead); - struct cryptd_aead *cryptd_tfm = *ctx; - - aead = &cryptd_tfm->base; - if (irq_fpu_usable() && (!in_atomic() || - !cryptd_aead_queued(cryptd_tfm))) - aead = cryptd_aead_child(cryptd_tfm); - - aead_request_set_tfm(req, aead); - - return crypto_aead_decrypt(req); -} -EXPORT_SYMBOL_GPL(cryptd_morus640_glue_decrypt); - -int cryptd_morus640_glue_init_tfm(struct crypto_aead *aead) -{ - struct cryptd_aead *cryptd_tfm; - struct cryptd_aead **ctx = crypto_aead_ctx(aead); - const char *name = crypto_aead_alg(aead)->base.cra_driver_name; - char internal_name[CRYPTO_MAX_ALG_NAME]; - - if (snprintf(internal_name, CRYPTO_MAX_ALG_NAME, "__%s", name) - >= CRYPTO_MAX_ALG_NAME) - return -ENAMETOOLONG; - - cryptd_tfm = cryptd_alloc_aead(internal_name, CRYPTO_ALG_INTERNAL, - CRYPTO_ALG_INTERNAL); - if (IS_ERR(cryptd_tfm)) - return PTR_ERR(cryptd_tfm); - - *ctx = cryptd_tfm; - crypto_aead_set_reqsize(aead, crypto_aead_reqsize(&cryptd_tfm->base)); - return 0; -} -EXPORT_SYMBOL_GPL(cryptd_morus640_glue_init_tfm); - -void cryptd_morus640_glue_exit_tfm(struct crypto_aead *aead) -{ - struct cryptd_aead **ctx = crypto_aead_ctx(aead); - - cryptd_free_aead(*ctx); -} -EXPORT_SYMBOL_GPL(cryptd_morus640_glue_exit_tfm); - MODULE_LICENSE("GPL"); MODULE_AUTHOR("Ondrej Mosnacek <omosnacek@gmail.com>"); MODULE_DESCRIPTION("MORUS-640 AEAD mode -- glue for x86 optimizations"); diff --git a/arch/x86/crypto/nhpoly1305-avx2-glue.c b/arch/x86/crypto/nhpoly1305-avx2-glue.c index 20d815ea4b6a..f7567cbd35b6 100644 --- a/arch/x86/crypto/nhpoly1305-avx2-glue.c +++ b/arch/x86/crypto/nhpoly1305-avx2-glue.c @@ -7,9 +7,10 @@ */ #include <crypto/internal/hash.h> +#include <crypto/internal/simd.h> #include <crypto/nhpoly1305.h> #include <linux/module.h> -#include <asm/fpu/api.h> +#include <asm/simd.h> asmlinkage void nh_avx2(const u32 *key, const u8 *message, size_t message_len, u8 hash[NH_HASH_BYTES]); @@ -24,7 +25,7 @@ static void _nh_avx2(const u32 *key, const u8 *message, size_t message_len, static int nhpoly1305_avx2_update(struct shash_desc *desc, const u8 *src, unsigned int srclen) { - if (srclen < 64 || !irq_fpu_usable()) + if (srclen < 64 || !crypto_simd_usable()) return crypto_nhpoly1305_update(desc, src, srclen); do { diff --git a/arch/x86/crypto/nhpoly1305-sse2-glue.c b/arch/x86/crypto/nhpoly1305-sse2-glue.c index ed68d164ce14..a661ede3b5cf 100644 --- a/arch/x86/crypto/nhpoly1305-sse2-glue.c +++ b/arch/x86/crypto/nhpoly1305-sse2-glue.c @@ -7,9 +7,10 @@ */ #include <crypto/internal/hash.h> +#include <crypto/internal/simd.h> #include <crypto/nhpoly1305.h> #include <linux/module.h> -#include <asm/fpu/api.h> +#include <asm/simd.h> asmlinkage void nh_sse2(const u32 *key, const u8 *message, size_t message_len, u8 hash[NH_HASH_BYTES]); @@ -24,7 +25,7 @@ static void _nh_sse2(const u32 *key, const u8 *message, size_t message_len, static int nhpoly1305_sse2_update(struct shash_desc *desc, const u8 *src, unsigned int srclen) { - if (srclen < 64 || !irq_fpu_usable()) + if (srclen < 64 || !crypto_simd_usable()) return crypto_nhpoly1305_update(desc, src, srclen); do { diff --git a/arch/x86/crypto/poly1305_glue.c b/arch/x86/crypto/poly1305_glue.c index 88cc01506c84..6eb65b237b3c 100644 --- a/arch/x86/crypto/poly1305_glue.c +++ b/arch/x86/crypto/poly1305_glue.c @@ -11,11 +11,11 @@ #include <crypto/algapi.h> #include <crypto/internal/hash.h> +#include <crypto/internal/simd.h> #include <crypto/poly1305.h> #include <linux/crypto.h> #include <linux/kernel.h> #include <linux/module.h> -#include <asm/fpu/api.h> #include <asm/simd.h> struct poly1305_simd_desc_ctx { @@ -126,7 +126,7 @@ static int poly1305_simd_update(struct shash_desc *desc, unsigned int bytes; /* kernel_fpu_begin/end is costly, use fallback for small updates */ - if (srclen <= 288 || !may_use_simd()) + if (srclen <= 288 || !crypto_simd_usable()) return crypto_poly1305_update(desc, src, srclen); kernel_fpu_begin(); diff --git a/arch/x86/crypto/sha1_ssse3_glue.c b/arch/x86/crypto/sha1_ssse3_glue.c index 7391c7de72c7..42f177afc33a 100644 --- a/arch/x86/crypto/sha1_ssse3_glue.c +++ b/arch/x86/crypto/sha1_ssse3_glue.c @@ -22,6 +22,7 @@ #define pr_fmt(fmt) KBUILD_MODNAME ": " fmt #include <crypto/internal/hash.h> +#include <crypto/internal/simd.h> #include <linux/init.h> #include <linux/module.h> #include <linux/mm.h> @@ -29,7 +30,7 @@ #include <linux/types.h> #include <crypto/sha.h> #include <crypto/sha1_base.h> -#include <asm/fpu/api.h> +#include <asm/simd.h> typedef void (sha1_transform_fn)(u32 *digest, const char *data, unsigned int rounds); @@ -39,7 +40,7 @@ static int sha1_update(struct shash_desc *desc, const u8 *data, { struct sha1_state *sctx = shash_desc_ctx(desc); - if (!irq_fpu_usable() || + if (!crypto_simd_usable() || (sctx->count % SHA1_BLOCK_SIZE) + len < SHA1_BLOCK_SIZE) return crypto_sha1_update(desc, data, len); @@ -57,7 +58,7 @@ static int sha1_update(struct shash_desc *desc, const u8 *data, static int sha1_finup(struct shash_desc *desc, const u8 *data, unsigned int len, u8 *out, sha1_transform_fn *sha1_xform) { - if (!irq_fpu_usable()) + if (!crypto_simd_usable()) return crypto_sha1_finup(desc, data, len, out); kernel_fpu_begin(); diff --git a/arch/x86/crypto/sha256_ssse3_glue.c b/arch/x86/crypto/sha256_ssse3_glue.c index 773a873d2b28..73867da3cbee 100644 --- a/arch/x86/crypto/sha256_ssse3_glue.c +++ b/arch/x86/crypto/sha256_ssse3_glue.c @@ -30,6 +30,7 @@ #define pr_fmt(fmt) KBUILD_MODNAME ": " fmt #include <crypto/internal/hash.h> +#include <crypto/internal/simd.h> #include <linux/init.h> #include <linux/module.h> #include <linux/mm.h> @@ -37,8 +38,8 @@ #include <linux/types.h> #include <crypto/sha.h> #include <crypto/sha256_base.h> -#include <asm/fpu/api.h> #include <linux/string.h> +#include <asm/simd.h> asmlinkage void sha256_transform_ssse3(u32 *digest, const char *data, u64 rounds); @@ -49,7 +50,7 @@ static int sha256_update(struct shash_desc *desc, const u8 *data, { struct sha256_state *sctx = shash_desc_ctx(desc); - if (!irq_fpu_usable() || + if (!crypto_simd_usable() || (sctx->count % SHA256_BLOCK_SIZE) + len < SHA256_BLOCK_SIZE) return crypto_sha256_update(desc, data, len); @@ -67,7 +68,7 @@ static int sha256_update(struct shash_desc *desc, const u8 *data, static int sha256_finup(struct shash_desc *desc, const u8 *data, unsigned int len, u8 *out, sha256_transform_fn *sha256_xform) { - if (!irq_fpu_usable()) + if (!crypto_simd_usable()) return crypto_sha256_finup(desc, data, len, out); kernel_fpu_begin(); diff --git a/arch/x86/crypto/sha512_ssse3_glue.c b/arch/x86/crypto/sha512_ssse3_glue.c index f1b811b60ba6..458356a3f124 100644 --- a/arch/x86/crypto/sha512_ssse3_glue.c +++ b/arch/x86/crypto/sha512_ssse3_glue.c @@ -28,16 +28,16 @@ #define pr_fmt(fmt) KBUILD_MODNAME ": " fmt #include <crypto/internal/hash.h> +#include <crypto/internal/simd.h> #include <linux/init.h> #include <linux/module.h> #include <linux/mm.h> #include <linux/cryptohash.h> +#include <linux/string.h> #include <linux/types.h> #include <crypto/sha.h> #include <crypto/sha512_base.h> -#include <asm/fpu/api.h> - -#include <linux/string.h> +#include <asm/simd.h> asmlinkage void sha512_transform_ssse3(u64 *digest, const char *data, u64 rounds); @@ -49,7 +49,7 @@ static int sha512_update(struct shash_desc *desc, const u8 *data, { struct sha512_state *sctx = shash_desc_ctx(desc); - if (!irq_fpu_usable() || + if (!crypto_simd_usable() || (sctx->count[0] % SHA512_BLOCK_SIZE) + len < SHA512_BLOCK_SIZE) return crypto_sha512_update(desc, data, len); @@ -67,7 +67,7 @@ static int sha512_update(struct shash_desc *desc, const u8 *data, static int sha512_finup(struct shash_desc *desc, const u8 *data, unsigned int len, u8 *out, sha512_transform_fn *sha512_xform) { - if (!irq_fpu_usable()) + if (!crypto_simd_usable()) return crypto_sha512_finup(desc, data, len, out); kernel_fpu_begin(); diff --git a/arch/x86/entry/common.c b/arch/x86/entry/common.c index 7bc105f47d21..a986b3c8294c 100644 --- a/arch/x86/entry/common.c +++ b/arch/x86/entry/common.c @@ -25,12 +25,14 @@ #include <linux/uprobes.h> #include <linux/livepatch.h> #include <linux/syscalls.h> +#include <linux/uaccess.h> #include <asm/desc.h> #include <asm/traps.h> #include <asm/vdso.h> -#include <linux/uaccess.h> #include <asm/cpufeature.h> +#include <asm/fpu/api.h> +#include <asm/nospec-branch.h> #define CREATE_TRACE_POINTS #include <trace/events/syscalls.h> @@ -196,6 +198,13 @@ __visible inline void prepare_exit_to_usermode(struct pt_regs *regs) if (unlikely(cached_flags & EXIT_TO_USERMODE_LOOP_FLAGS)) exit_to_usermode_loop(regs, cached_flags); + /* Reload ti->flags; we may have rescheduled above. */ + cached_flags = READ_ONCE(ti->flags); + + fpregs_assert_state_consistent(); + if (unlikely(cached_flags & _TIF_NEED_FPU_LOAD)) + switch_fpu_return(); + #ifdef CONFIG_COMPAT /* * Compat syscalls set TS_COMPAT. Make sure we clear it before @@ -212,6 +221,8 @@ __visible inline void prepare_exit_to_usermode(struct pt_regs *regs) #endif user_enter_irqoff(); + + mds_user_clear_cpu_buffers(); } #define SYSCALL_EXIT_WORK_FLAGS \ diff --git a/arch/x86/entry/syscalls/syscall_32.tbl b/arch/x86/entry/syscalls/syscall_32.tbl index 1f9607ed087c..4cd5f982b1e5 100644 --- a/arch/x86/entry/syscalls/syscall_32.tbl +++ b/arch/x86/entry/syscalls/syscall_32.tbl @@ -398,7 +398,12 @@ 384 i386 arch_prctl sys_arch_prctl __ia32_compat_sys_arch_prctl 385 i386 io_pgetevents sys_io_pgetevents_time32 __ia32_compat_sys_io_pgetevents 386 i386 rseq sys_rseq __ia32_sys_rseq -# don't use numbers 387 through 392, add new calls at the end +387 i386 open_tree sys_open_tree __ia32_sys_open_tree +388 i386 move_mount sys_move_mount __ia32_sys_move_mount +389 i386 fsopen sys_fsopen __ia32_sys_fsopen +390 i386 fsconfig sys_fsconfig __ia32_sys_fsconfig +391 i386 fsmount sys_fsmount __ia32_sys_fsmount +392 i386 fspick sys_fspick __ia32_sys_fspick 393 i386 semget sys_semget __ia32_sys_semget 394 i386 semctl sys_semctl __ia32_compat_sys_semctl 395 i386 shmget sys_shmget __ia32_sys_shmget diff --git a/arch/x86/entry/syscalls/syscall_64.tbl b/arch/x86/entry/syscalls/syscall_64.tbl index 92ee0b4378d4..64ca0d06259a 100644 --- a/arch/x86/entry/syscalls/syscall_64.tbl +++ b/arch/x86/entry/syscalls/syscall_64.tbl @@ -343,6 +343,12 @@ 332 common statx __x64_sys_statx 333 common io_pgetevents __x64_sys_io_pgetevents 334 common rseq __x64_sys_rseq +335 common open_tree __x64_sys_open_tree +336 common move_mount __x64_sys_move_mount +337 common fsopen __x64_sys_fsopen +338 common fsconfig __x64_sys_fsconfig +339 common fsmount __x64_sys_fsmount +340 common fspick __x64_sys_fspick # don't use numbers 387 through 423, add new calls after the last # 'common' entry 424 common pidfd_send_signal __x64_sys_pidfd_send_signal diff --git a/arch/x86/events/intel/bts.c b/arch/x86/events/intel/bts.c index 7cdd7b13bbda..890a3fb5706f 100644 --- a/arch/x86/events/intel/bts.c +++ b/arch/x86/events/intel/bts.c @@ -23,7 +23,7 @@ #include <linux/device.h> #include <linux/coredump.h> -#include <asm-generic/sizes.h> +#include <linux/sizes.h> #include <asm/perf_event.h> #include "../perf_event.h" diff --git a/arch/x86/ia32/ia32_signal.c b/arch/x86/ia32/ia32_signal.c index 4d5fcd47ab75..629d1ee05599 100644 --- a/arch/x86/ia32/ia32_signal.c +++ b/arch/x86/ia32/ia32_signal.c @@ -221,8 +221,7 @@ static void __user *get_sigframe(struct ksignal *ksig, struct pt_regs *regs, size_t frame_size, void __user **fpstate) { - struct fpu *fpu = ¤t->thread.fpu; - unsigned long sp; + unsigned long sp, fx_aligned, math_size; /* Default to using normal stack */ sp = regs->sp; @@ -236,15 +235,11 @@ static void __user *get_sigframe(struct ksignal *ksig, struct pt_regs *regs, ksig->ka.sa.sa_restorer) sp = (unsigned long) ksig->ka.sa.sa_restorer; - if (fpu->initialized) { - unsigned long fx_aligned, math_size; - - sp = fpu__alloc_mathframe(sp, 1, &fx_aligned, &math_size); - *fpstate = (struct _fpstate_32 __user *) sp; - if (copy_fpstate_to_sigframe(*fpstate, (void __user *)fx_aligned, - math_size) < 0) - return (void __user *) -1L; - } + sp = fpu__alloc_mathframe(sp, 1, &fx_aligned, &math_size); + *fpstate = (struct _fpstate_32 __user *) sp; + if (copy_fpstate_to_sigframe(*fpstate, (void __user *)fx_aligned, + math_size) < 0) + return (void __user *) -1L; sp -= frame_size; /* Align the stack pointer according to the i386 ABI, diff --git a/arch/x86/include/asm/cpufeatures.h b/arch/x86/include/asm/cpufeatures.h index 981ff9479648..75f27ee2c263 100644 --- a/arch/x86/include/asm/cpufeatures.h +++ b/arch/x86/include/asm/cpufeatures.h @@ -344,6 +344,7 @@ /* Intel-defined CPU features, CPUID level 0x00000007:0 (EDX), word 18 */ #define X86_FEATURE_AVX512_4VNNIW (18*32+ 2) /* AVX-512 Neural Network Instructions */ #define X86_FEATURE_AVX512_4FMAPS (18*32+ 3) /* AVX-512 Multiply Accumulation Single precision */ +#define X86_FEATURE_MD_CLEAR (18*32+10) /* VERW clears CPU buffers */ #define X86_FEATURE_TSX_FORCE_ABORT (18*32+13) /* "" TSX_FORCE_ABORT */ #define X86_FEATURE_PCONFIG (18*32+18) /* Intel PCONFIG */ #define X86_FEATURE_SPEC_CTRL (18*32+26) /* "" Speculation Control (IBRS + IBPB) */ @@ -382,5 +383,7 @@ #define X86_BUG_SPECTRE_V2 X86_BUG(16) /* CPU is affected by Spectre variant 2 attack with indirect branches */ #define X86_BUG_SPEC_STORE_BYPASS X86_BUG(17) /* CPU is affected by speculative store bypass attack */ #define X86_BUG_L1TF X86_BUG(18) /* CPU is affected by L1 Terminal Fault */ +#define X86_BUG_MDS X86_BUG(19) /* CPU is affected by Microarchitectural data sampling */ +#define X86_BUG_MSBDS_ONLY X86_BUG(20) /* CPU is only affected by the MSDBS variant of BUG_MDS */ #endif /* _ASM_X86_CPUFEATURES_H */ diff --git a/arch/x86/include/asm/dma-mapping.h b/arch/x86/include/asm/dma-mapping.h index ce4d176b3d13..6b15a24930e0 100644 --- a/arch/x86/include/asm/dma-mapping.h +++ b/arch/x86/include/asm/dma-mapping.h @@ -13,14 +13,7 @@ #include <asm/swiotlb.h> #include <linux/dma-contiguous.h> -#ifdef CONFIG_ISA -# define ISA_DMA_BIT_MASK DMA_BIT_MASK(24) -#else -# define ISA_DMA_BIT_MASK DMA_BIT_MASK(32) -#endif - extern int iommu_merge; -extern struct device x86_dma_fallback_dev; extern int panic_on_overflow; extern const struct dma_map_ops *dma_ops; @@ -30,7 +23,4 @@ static inline const struct dma_map_ops *get_arch_dma_ops(struct bus_type *bus) return dma_ops; } -bool arch_dma_alloc_attrs(struct device **dev); -#define arch_dma_alloc_attrs arch_dma_alloc_attrs - #endif diff --git a/arch/x86/include/asm/fpu/api.h b/arch/x86/include/asm/fpu/api.h index b56d504af654..b774c52e5411 100644 --- a/arch/x86/include/asm/fpu/api.h +++ b/arch/x86/include/asm/fpu/api.h @@ -10,6 +10,7 @@ #ifndef _ASM_X86_FPU_API_H #define _ASM_X86_FPU_API_H +#include <linux/bottom_half.h> /* * Use kernel_fpu_begin/end() if you intend to use FPU in kernel context. It @@ -21,6 +22,36 @@ extern void kernel_fpu_begin(void); extern void kernel_fpu_end(void); extern bool irq_fpu_usable(void); +extern void fpregs_mark_activate(void); + +/* + * Use fpregs_lock() while editing CPU's FPU registers or fpu->state. + * A context switch will (and softirq might) save CPU's FPU registers to + * fpu->state and set TIF_NEED_FPU_LOAD leaving CPU's FPU registers in + * a random state. + */ +static inline void fpregs_lock(void) +{ + preempt_disable(); + local_bh_disable(); +} + +static inline void fpregs_unlock(void) +{ + local_bh_enable(); + preempt_enable(); +} + +#ifdef CONFIG_X86_DEBUG_FPU +extern void fpregs_assert_state_consistent(void); +#else +static inline void fpregs_assert_state_consistent(void) { } +#endif + +/* + * Load the task FPU state before returning to userspace. + */ +extern void switch_fpu_return(void); /* * Query the presence of one or more xfeatures. Works on any legacy CPU as well. diff --git a/arch/x86/include/asm/fpu/internal.h b/arch/x86/include/asm/fpu/internal.h index 745a19d34f23..9e27fa05a7ae 100644 --- a/arch/x86/include/asm/fpu/internal.h +++ b/arch/x86/include/asm/fpu/internal.h @@ -14,6 +14,7 @@ #include <linux/compat.h> #include <linux/sched.h> #include <linux/slab.h> +#include <linux/mm.h> #include <asm/user.h> #include <asm/fpu/api.h> @@ -24,14 +25,12 @@ /* * High level FPU state handling functions: */ -extern void fpu__initialize(struct fpu *fpu); extern void fpu__prepare_read(struct fpu *fpu); extern void fpu__prepare_write(struct fpu *fpu); extern void fpu__save(struct fpu *fpu); -extern void fpu__restore(struct fpu *fpu); extern int fpu__restore_sig(void __user *buf, int ia32_frame); extern void fpu__drop(struct fpu *fpu); -extern int fpu__copy(struct fpu *dst_fpu, struct fpu *src_fpu); +extern int fpu__copy(struct task_struct *dst, struct task_struct *src); extern void fpu__clear(struct fpu *fpu); extern int fpu__exception_code(struct fpu *fpu, int trap_nr); extern int dump_fpu(struct pt_regs *ptregs, struct user_i387_struct *fpstate); @@ -122,6 +121,21 @@ extern void fpstate_sanitize_xstate(struct fpu *fpu); err; \ }) +#define kernel_insn_err(insn, output, input...) \ +({ \ + int err; \ + asm volatile("1:" #insn "\n\t" \ + "2:\n" \ + ".section .fixup,\"ax\"\n" \ + "3: movl $-1,%[err]\n" \ + " jmp 2b\n" \ + ".previous\n" \ + _ASM_EXTABLE(1b, 3b) \ + : [err] "=r" (err), output \ + : "0"(0), input); \ + err; \ +}) + #define kernel_insn(insn, output, input...) \ asm volatile("1:" #insn "\n\t" \ "2:\n" \ @@ -150,6 +164,14 @@ static inline void copy_kernel_to_fxregs(struct fxregs_state *fx) kernel_insn(fxrstorq %[fx], "=m" (*fx), [fx] "m" (*fx)); } +static inline int copy_kernel_to_fxregs_err(struct fxregs_state *fx) +{ + if (IS_ENABLED(CONFIG_X86_32)) + return kernel_insn_err(fxrstor %[fx], "=m" (*fx), [fx] "m" (*fx)); + else + return kernel_insn_err(fxrstorq %[fx], "=m" (*fx), [fx] "m" (*fx)); +} + static inline int copy_user_to_fxregs(struct fxregs_state __user *fx) { if (IS_ENABLED(CONFIG_X86_32)) @@ -163,6 +185,11 @@ static inline void copy_kernel_to_fregs(struct fregs_state *fx) kernel_insn(frstor %[fx], "=m" (*fx), [fx] "m" (*fx)); } +static inline int copy_kernel_to_fregs_err(struct fregs_state *fx) +{ + return kernel_insn_err(frstor %[fx], "=m" (*fx), [fx] "m" (*fx)); +} + static inline int copy_user_to_fregs(struct fregs_state __user *fx) { return user_insn(frstor %[fx], "=m" (*fx), [fx] "m" (*fx)); @@ -363,6 +390,21 @@ static inline int copy_user_to_xregs(struct xregs_state __user *buf, u64 mask) } /* + * Restore xstate from kernel space xsave area, return an error code instead of + * an exception. + */ +static inline int copy_kernel_to_xregs_err(struct xregs_state *xstate, u64 mask) +{ + u32 lmask = mask; + u32 hmask = mask >> 32; + int err; + + XSTATE_OP(XRSTOR, xstate, lmask, hmask, err); + + return err; +} + +/* * These must be called with preempt disabled. Returns * 'true' if the FPU state is still intact and we can * keep registers active. @@ -487,6 +529,25 @@ static inline void fpregs_activate(struct fpu *fpu) } /* + * Internal helper, do not use directly. Use switch_fpu_return() instead. + */ +static inline void __fpregs_load_activate(void) +{ + struct fpu *fpu = ¤t->thread.fpu; + int cpu = smp_processor_id(); + + if (WARN_ON_ONCE(current->mm == NULL)) + return; + + if (!fpregs_state_valid(fpu, cpu)) { + copy_kernel_to_fpregs(&fpu->state); + fpregs_activate(fpu); + fpu->last_cpu = cpu; + } + clear_thread_flag(TIF_NEED_FPU_LOAD); +} + +/* * FPU state switching for scheduling. * * This is a two-stage process: @@ -494,12 +555,23 @@ static inline void fpregs_activate(struct fpu *fpu) * - switch_fpu_prepare() saves the old state. * This is done within the context of the old process. * - * - switch_fpu_finish() restores the new state as - * necessary. + * - switch_fpu_finish() sets TIF_NEED_FPU_LOAD; the floating point state + * will get loaded on return to userspace, or when the kernel needs it. + * + * If TIF_NEED_FPU_LOAD is cleared then the CPU's FPU registers + * are saved in the current thread's FPU register state. + * + * If TIF_NEED_FPU_LOAD is set then CPU's FPU registers may not + * hold current()'s FPU registers. It is required to load the + * registers before returning to userland or using the content + * otherwise. + * + * The FPU context is only stored/restored for a user task and + * ->mm is used to distinguish between kernel and user threads. */ static inline void switch_fpu_prepare(struct fpu *old_fpu, int cpu) { - if (static_cpu_has(X86_FEATURE_FPU) && old_fpu->initialized) { + if (static_cpu_has(X86_FEATURE_FPU) && current->mm) { if (!copy_fpregs_to_fpstate(old_fpu)) old_fpu->last_cpu = -1; else @@ -507,8 +579,7 @@ static inline void switch_fpu_prepare(struct fpu *old_fpu, int cpu) /* But leave fpu_fpregs_owner_ctx! */ trace_x86_fpu_regs_deactivated(old_fpu); - } else - old_fpu->last_cpu = -1; + } } /* @@ -516,36 +587,32 @@ static inline void switch_fpu_prepare(struct fpu *old_fpu, int cpu) */ /* - * Set up the userspace FPU context for the new task, if the task - * has used the FPU. + * Load PKRU from the FPU context if available. Delay loading of the + * complete FPU state until the return to userland. */ -static inline void switch_fpu_finish(struct fpu *new_fpu, int cpu) +static inline void switch_fpu_finish(struct fpu *new_fpu) { - bool preload = static_cpu_has(X86_FEATURE_FPU) && - new_fpu->initialized; + u32 pkru_val = init_pkru_value; + struct pkru_state *pk; - if (preload) { - if (!fpregs_state_valid(new_fpu, cpu)) - copy_kernel_to_fpregs(&new_fpu->state); - fpregs_activate(new_fpu); - } -} + if (!static_cpu_has(X86_FEATURE_FPU)) + return; -/* - * Needs to be preemption-safe. - * - * NOTE! user_fpu_begin() must be used only immediately before restoring - * the save state. It does not do any saving/restoring on its own. In - * lazy FPU mode, it is just an optimization to avoid a #NM exception, - * the task can lose the FPU right after preempt_enable(). - */ -static inline void user_fpu_begin(void) -{ - struct fpu *fpu = ¤t->thread.fpu; + set_thread_flag(TIF_NEED_FPU_LOAD); + + if (!cpu_feature_enabled(X86_FEATURE_OSPKE)) + return; - preempt_disable(); - fpregs_activate(fpu); - preempt_enable(); + /* + * PKRU state is switched eagerly because it needs to be valid before we + * return to userland e.g. for a copy_to_user() operation. + */ + if (current->mm) { + pk = get_xsave_addr(&new_fpu->state.xsave, XFEATURE_PKRU); + if (pk) + pkru_val = pk->pkru; + } + __write_pkru(pkru_val); } /* diff --git a/arch/x86/include/asm/fpu/signal.h b/arch/x86/include/asm/fpu/signal.h index 44bbc39a57b3..7fb516b6893a 100644 --- a/arch/x86/include/asm/fpu/signal.h +++ b/arch/x86/include/asm/fpu/signal.h @@ -22,7 +22,7 @@ int ia32_setup_frame(int sig, struct ksignal *ksig, extern void convert_from_fxsr(struct user_i387_ia32_struct *env, struct task_struct *tsk); -extern void convert_to_fxsr(struct task_struct *tsk, +extern void convert_to_fxsr(struct fxregs_state *fxsave, const struct user_i387_ia32_struct *env); unsigned long diff --git a/arch/x86/include/asm/fpu/types.h b/arch/x86/include/asm/fpu/types.h index 2e32e178e064..f098f6cab94b 100644 --- a/arch/x86/include/asm/fpu/types.h +++ b/arch/x86/include/asm/fpu/types.h @@ -294,15 +294,6 @@ struct fpu { unsigned int last_cpu; /* - * @initialized: - * - * This flag indicates whether this context is initialized: if the task - * is not running then we can restore from this context, if the task - * is running then we should save into this context. - */ - unsigned char initialized; - - /* * @avx512_timestamp: * * Records the timestamp of AVX512 use during last context switch. diff --git a/arch/x86/include/asm/fpu/xstate.h b/arch/x86/include/asm/fpu/xstate.h index 48581988d78c..7e42b285c856 100644 --- a/arch/x86/include/asm/fpu/xstate.h +++ b/arch/x86/include/asm/fpu/xstate.h @@ -2,9 +2,11 @@ #ifndef __ASM_X86_XSAVE_H #define __ASM_X86_XSAVE_H +#include <linux/uaccess.h> #include <linux/types.h> + #include <asm/processor.h> -#include <linux/uaccess.h> +#include <asm/user.h> /* Bit 63 of XCR0 is reserved for future expansion */ #define XFEATURE_MASK_EXTEND (~(XFEATURE_MASK_FPSSE | (1ULL << 63))) @@ -46,8 +48,8 @@ extern void __init update_regset_xstate_info(unsigned int size, u64 xstate_mask); void fpu__xstate_clear_all_cpu_caps(void); -void *get_xsave_addr(struct xregs_state *xsave, int xstate); -const void *get_xsave_field_ptr(int xstate_field); +void *get_xsave_addr(struct xregs_state *xsave, int xfeature_nr); +const void *get_xsave_field_ptr(int xfeature_nr); int using_compacted_format(void); int copy_xstate_to_kernel(void *kbuf, struct xregs_state *xsave, unsigned int offset, unsigned int size); int copy_xstate_to_user(void __user *ubuf, struct xregs_state *xsave, unsigned int offset, unsigned int size); diff --git a/arch/x86/include/asm/hugetlb.h b/arch/x86/include/asm/hugetlb.h index 7469d321f072..f65cfb48cfdd 100644 --- a/arch/x86/include/asm/hugetlb.h +++ b/arch/x86/include/asm/hugetlb.h @@ -17,8 +17,4 @@ static inline void arch_clear_hugepage_flags(struct page *page) { } -#ifdef CONFIG_ARCH_HAS_GIGANTIC_PAGE -static inline bool gigantic_page_supported(void) { return true; } -#endif - #endif /* _ASM_X86_HUGETLB_H */ diff --git a/arch/x86/include/asm/hyperv-tlfs.h b/arch/x86/include/asm/hyperv-tlfs.h index 2bdbbbcfa393..cdf44aa9a501 100644 --- a/arch/x86/include/asm/hyperv-tlfs.h +++ b/arch/x86/include/asm/hyperv-tlfs.h @@ -1,4 +1,4 @@ -/* SPDX-License-Identifier: GPL-2.0 WITH Linux-syscall-note */ +/* SPDX-License-Identifier: GPL-2.0 */ /* * This file contains definitions from Hyper-V Hypervisor Top-Level Functional diff --git a/arch/x86/include/asm/irqflags.h b/arch/x86/include/asm/irqflags.h index 058e40fed167..8a0e56e1dcc9 100644 --- a/arch/x86/include/asm/irqflags.h +++ b/arch/x86/include/asm/irqflags.h @@ -6,6 +6,8 @@ #ifndef __ASSEMBLY__ +#include <asm/nospec-branch.h> + /* Provide __cpuidle; we can't safely include <linux/cpu.h> */ #define __cpuidle __attribute__((__section__(".cpuidle.text"))) @@ -54,11 +56,13 @@ static inline void native_irq_enable(void) static inline __cpuidle void native_safe_halt(void) { + mds_idle_clear_cpu_buffers(); asm volatile("sti; hlt": : :"memory"); } static inline __cpuidle void native_halt(void) { + mds_idle_clear_cpu_buffers(); asm volatile("hlt": : :"memory"); } diff --git a/arch/x86/include/asm/mce.h b/arch/x86/include/asm/mce.h index 22d05e3835f0..dc2d4b206ab7 100644 --- a/arch/x86/include/asm/mce.h +++ b/arch/x86/include/asm/mce.h @@ -210,16 +210,6 @@ static inline void cmci_rediscover(void) {} static inline void cmci_recheck(void) {} #endif -#ifdef CONFIG_X86_MCE_AMD -void mce_amd_feature_init(struct cpuinfo_x86 *c); -int umc_normaddr_to_sysaddr(u64 norm_addr, u16 nid, u8 umc, u64 *sys_addr); -#else -static inline void mce_amd_feature_init(struct cpuinfo_x86 *c) { } -static inline int umc_normaddr_to_sysaddr(u64 norm_addr, u16 nid, u8 umc, u64 *sys_addr) { return -EINVAL; }; -#endif - -static inline void mce_hygon_feature_init(struct cpuinfo_x86 *c) { return mce_amd_feature_init(c); } - int mce_available(struct cpuinfo_x86 *c); bool mce_is_memory_error(struct mce *m); bool mce_is_correctable(struct mce *m); @@ -345,12 +335,19 @@ extern bool amd_mce_is_memory_error(struct mce *m); extern int mce_threshold_create_device(unsigned int cpu); extern int mce_threshold_remove_device(unsigned int cpu); -#else +void mce_amd_feature_init(struct cpuinfo_x86 *c); +int umc_normaddr_to_sysaddr(u64 norm_addr, u16 nid, u8 umc, u64 *sys_addr); -static inline int mce_threshold_create_device(unsigned int cpu) { return 0; }; -static inline int mce_threshold_remove_device(unsigned int cpu) { return 0; }; -static inline bool amd_mce_is_memory_error(struct mce *m) { return false; }; +#else +static inline int mce_threshold_create_device(unsigned int cpu) { return 0; }; +static inline int mce_threshold_remove_device(unsigned int cpu) { return 0; }; +static inline bool amd_mce_is_memory_error(struct mce *m) { return false; }; +static inline void mce_amd_feature_init(struct cpuinfo_x86 *c) { } +static inline int +umc_normaddr_to_sysaddr(u64 norm_addr, u16 nid, u8 umc, u64 *sys_addr) { return -EINVAL; }; #endif +static inline void mce_hygon_feature_init(struct cpuinfo_x86 *c) { return mce_amd_feature_init(c); } + #endif /* _ASM_X86_MCE_H */ diff --git a/arch/x86/include/asm/msr-index.h b/arch/x86/include/asm/msr-index.h index 1378518cf63f..88dd202c8b00 100644 --- a/arch/x86/include/asm/msr-index.h +++ b/arch/x86/include/asm/msr-index.h @@ -2,6 +2,8 @@ #ifndef _ASM_X86_MSR_INDEX_H #define _ASM_X86_MSR_INDEX_H +#include <linux/bits.h> + /* * CPU model specific register (MSR) numbers. * @@ -40,14 +42,14 @@ /* Intel MSRs. Some also available on other CPUs */ #define MSR_IA32_SPEC_CTRL 0x00000048 /* Speculation Control */ -#define SPEC_CTRL_IBRS (1 << 0) /* Indirect Branch Restricted Speculation */ +#define SPEC_CTRL_IBRS BIT(0) /* Indirect Branch Restricted Speculation */ #define SPEC_CTRL_STIBP_SHIFT 1 /* Single Thread Indirect Branch Predictor (STIBP) bit */ -#define SPEC_CTRL_STIBP (1 << SPEC_CTRL_STIBP_SHIFT) /* STIBP mask */ +#define SPEC_CTRL_STIBP BIT(SPEC_CTRL_STIBP_SHIFT) /* STIBP mask */ #define SPEC_CTRL_SSBD_SHIFT 2 /* Speculative Store Bypass Disable bit */ -#define SPEC_CTRL_SSBD (1 << SPEC_CTRL_SSBD_SHIFT) /* Speculative Store Bypass Disable */ +#define SPEC_CTRL_SSBD BIT(SPEC_CTRL_SSBD_SHIFT) /* Speculative Store Bypass Disable */ #define MSR_IA32_PRED_CMD 0x00000049 /* Prediction Command */ -#define PRED_CMD_IBPB (1 << 0) /* Indirect Branch Prediction Barrier */ +#define PRED_CMD_IBPB BIT(0) /* Indirect Branch Prediction Barrier */ #define MSR_PPIN_CTL 0x0000004e #define MSR_PPIN 0x0000004f @@ -69,20 +71,25 @@ #define MSR_MTRRcap 0x000000fe #define MSR_IA32_ARCH_CAPABILITIES 0x0000010a -#define ARCH_CAP_RDCL_NO (1 << 0) /* Not susceptible to Meltdown */ -#define ARCH_CAP_IBRS_ALL (1 << 1) /* Enhanced IBRS support */ -#define ARCH_CAP_SKIP_VMENTRY_L1DFLUSH (1 << 3) /* Skip L1D flush on vmentry */ -#define ARCH_CAP_SSB_NO (1 << 4) /* - * Not susceptible to Speculative Store Bypass - * attack, so no Speculative Store Bypass - * control required. - */ +#define ARCH_CAP_RDCL_NO BIT(0) /* Not susceptible to Meltdown */ +#define ARCH_CAP_IBRS_ALL BIT(1) /* Enhanced IBRS support */ +#define ARCH_CAP_SKIP_VMENTRY_L1DFLUSH BIT(3) /* Skip L1D flush on vmentry */ +#define ARCH_CAP_SSB_NO BIT(4) /* + * Not susceptible to Speculative Store Bypass + * attack, so no Speculative Store Bypass + * control required. + */ +#define ARCH_CAP_MDS_NO BIT(5) /* + * Not susceptible to + * Microarchitectural Data + * Sampling (MDS) vulnerabilities. + */ #define MSR_IA32_FLUSH_CMD 0x0000010b -#define L1D_FLUSH (1 << 0) /* - * Writeback and invalidate the - * L1 data cache. - */ +#define L1D_FLUSH BIT(0) /* + * Writeback and invalidate the + * L1 data cache. + */ #define MSR_IA32_BBL_CR_CTL 0x00000119 #define MSR_IA32_BBL_CR_CTL3 0x0000011e diff --git a/arch/x86/include/asm/mwait.h b/arch/x86/include/asm/mwait.h index 39a2fb29378a..eb0f80ce8524 100644 --- a/arch/x86/include/asm/mwait.h +++ b/arch/x86/include/asm/mwait.h @@ -6,6 +6,7 @@ #include <linux/sched/idle.h> #include <asm/cpufeature.h> +#include <asm/nospec-branch.h> #define MWAIT_SUBSTATE_MASK 0xf #define MWAIT_CSTATE_MASK 0xf @@ -40,6 +41,8 @@ static inline void __monitorx(const void *eax, unsigned long ecx, static inline void __mwait(unsigned long eax, unsigned long ecx) { + mds_idle_clear_cpu_buffers(); + /* "mwait %eax, %ecx;" */ asm volatile(".byte 0x0f, 0x01, 0xc9;" :: "a" (eax), "c" (ecx)); @@ -74,6 +77,8 @@ static inline void __mwait(unsigned long eax, unsigned long ecx) static inline void __mwaitx(unsigned long eax, unsigned long ebx, unsigned long ecx) { + /* No MDS buffer clear as this is AMD/HYGON only */ + /* "mwaitx %eax, %ebx, %ecx;" */ asm volatile(".byte 0x0f, 0x01, 0xfb;" :: "a" (eax), "b" (ebx), "c" (ecx)); @@ -81,6 +86,8 @@ static inline void __mwaitx(unsigned long eax, unsigned long ebx, static inline void __sti_mwait(unsigned long eax, unsigned long ecx) { + mds_idle_clear_cpu_buffers(); + trace_hardirqs_on(); /* "mwait %eax, %ecx;" */ asm volatile("sti; .byte 0x0f, 0x01, 0xc9;" diff --git a/arch/x86/include/asm/nospec-branch.h b/arch/x86/include/asm/nospec-branch.h index daf25b60c9e3..109f974f9835 100644 --- a/arch/x86/include/asm/nospec-branch.h +++ b/arch/x86/include/asm/nospec-branch.h @@ -308,6 +308,56 @@ DECLARE_STATIC_KEY_FALSE(switch_to_cond_stibp); DECLARE_STATIC_KEY_FALSE(switch_mm_cond_ibpb); DECLARE_STATIC_KEY_FALSE(switch_mm_always_ibpb); +DECLARE_STATIC_KEY_FALSE(mds_user_clear); +DECLARE_STATIC_KEY_FALSE(mds_idle_clear); + +#include <asm/segment.h> + +/** + * mds_clear_cpu_buffers - Mitigation for MDS vulnerability + * + * This uses the otherwise unused and obsolete VERW instruction in + * combination with microcode which triggers a CPU buffer flush when the + * instruction is executed. + */ +static inline void mds_clear_cpu_buffers(void) +{ + static const u16 ds = __KERNEL_DS; + + /* + * Has to be the memory-operand variant because only that + * guarantees the CPU buffer flush functionality according to + * documentation. The register-operand variant does not. + * Works with any segment selector, but a valid writable + * data segment is the fastest variant. + * + * "cc" clobber is required because VERW modifies ZF. + */ + asm volatile("verw %[ds]" : : [ds] "m" (ds) : "cc"); +} + +/** + * mds_user_clear_cpu_buffers - Mitigation for MDS vulnerability + * + * Clear CPU buffers if the corresponding static key is enabled + */ +static inline void mds_user_clear_cpu_buffers(void) +{ + if (static_branch_likely(&mds_user_clear)) + mds_clear_cpu_buffers(); +} + +/** + * mds_idle_clear_cpu_buffers - Mitigation for MDS vulnerability + * + * Clear CPU buffers if the corresponding static key is enabled + */ +static inline void mds_idle_clear_cpu_buffers(void) +{ + if (static_branch_likely(&mds_idle_clear)) + mds_clear_cpu_buffers(); +} + #endif /* __ASSEMBLY__ */ /* diff --git a/arch/x86/include/asm/pgtable.h b/arch/x86/include/asm/pgtable.h index 3a221942f805..5e0509b41986 100644 --- a/arch/x86/include/asm/pgtable.h +++ b/arch/x86/include/asm/pgtable.h @@ -23,6 +23,8 @@ #ifndef __ASSEMBLY__ #include <asm/x86_init.h> +#include <asm/fpu/xstate.h> +#include <asm/fpu/api.h> extern pgd_t early_top_pgt[PTRS_PER_PGD]; int __init __early_make_pgtable(unsigned long address, pmdval_t pmd); @@ -127,14 +129,29 @@ static inline int pte_dirty(pte_t pte) static inline u32 read_pkru(void) { if (boot_cpu_has(X86_FEATURE_OSPKE)) - return __read_pkru(); + return rdpkru(); return 0; } static inline void write_pkru(u32 pkru) { - if (boot_cpu_has(X86_FEATURE_OSPKE)) - __write_pkru(pkru); + struct pkru_state *pk; + + if (!boot_cpu_has(X86_FEATURE_OSPKE)) + return; + + pk = get_xsave_addr(¤t->thread.fpu.state.xsave, XFEATURE_PKRU); + + /* + * The PKRU value in xstate needs to be in sync with the value that is + * written to the CPU. The FPU restore on return to userland would + * otherwise load the previous value again. + */ + fpregs_lock(); + if (pk) + pk->pkru = pkru; + __write_pkru(pkru); + fpregs_unlock(); } static inline int pte_young(pte_t pte) @@ -1358,6 +1375,12 @@ static inline pmd_t pmd_swp_clear_soft_dirty(pmd_t pmd) #define PKRU_WD_BIT 0x2 #define PKRU_BITS_PER_PKEY 2 +#ifdef CONFIG_X86_INTEL_MEMORY_PROTECTION_KEYS +extern u32 init_pkru_value; +#else +#define init_pkru_value 0 +#endif + static inline bool __pkru_allows_read(u32 pkru, u16 pkey) { int pkru_pkey_bits = pkey * PKRU_BITS_PER_PKEY; diff --git a/arch/x86/include/asm/processor.h b/arch/x86/include/asm/processor.h index 7e99ef67bff0..c34a35c78618 100644 --- a/arch/x86/include/asm/processor.h +++ b/arch/x86/include/asm/processor.h @@ -978,4 +978,10 @@ enum l1tf_mitigations { extern enum l1tf_mitigations l1tf_mitigation; +enum mds_mitigations { + MDS_MITIGATION_OFF, + MDS_MITIGATION_FULL, + MDS_MITIGATION_VMWERV, +}; + #endif /* _ASM_X86_PROCESSOR_H */ diff --git a/arch/x86/include/asm/special_insns.h b/arch/x86/include/asm/special_insns.h index 43c029cdc3fe..0a3c4cab39db 100644 --- a/arch/x86/include/asm/special_insns.h +++ b/arch/x86/include/asm/special_insns.h @@ -92,7 +92,7 @@ static inline void native_write_cr8(unsigned long val) #endif #ifdef CONFIG_X86_INTEL_MEMORY_PROTECTION_KEYS -static inline u32 __read_pkru(void) +static inline u32 rdpkru(void) { u32 ecx = 0; u32 edx, pkru; @@ -107,7 +107,7 @@ static inline u32 __read_pkru(void) return pkru; } -static inline void __write_pkru(u32 pkru) +static inline void wrpkru(u32 pkru) { u32 ecx = 0, edx = 0; @@ -118,8 +118,21 @@ static inline void __write_pkru(u32 pkru) asm volatile(".byte 0x0f,0x01,0xef\n\t" : : "a" (pkru), "c"(ecx), "d"(edx)); } + +static inline void __write_pkru(u32 pkru) +{ + /* + * WRPKRU is relatively expensive compared to RDPKRU. + * Avoid WRPKRU when it would not change the value. + */ + if (pkru == rdpkru()) + return; + + wrpkru(pkru); +} + #else -static inline u32 __read_pkru(void) +static inline u32 rdpkru(void) { return 0; } diff --git a/arch/x86/include/asm/syscall.h b/arch/x86/include/asm/syscall.h index 4c305471ec33..b05ad16174e5 100644 --- a/arch/x86/include/asm/syscall.h +++ b/arch/x86/include/asm/syscall.h @@ -105,7 +105,7 @@ static inline void syscall_set_arguments(struct task_struct *task, memcpy(®s->bx + i, args, n * sizeof(args[0])); } -static inline int syscall_get_arch(void) +static inline int syscall_get_arch(struct task_struct *task) { return AUDIT_ARCH_I386; } @@ -160,10 +160,12 @@ static inline void syscall_set_arguments(struct task_struct *task, } } -static inline int syscall_get_arch(void) +static inline int syscall_get_arch(struct task_struct *task) { /* x32 tasks should be considered AUDIT_ARCH_X86_64. */ - return in_ia32_syscall() ? AUDIT_ARCH_I386 : AUDIT_ARCH_X86_64; + return (IS_ENABLED(CONFIG_IA32_EMULATION) && + task->thread_info.status & TS_COMPAT) + ? AUDIT_ARCH_I386 : AUDIT_ARCH_X86_64; } #endif /* CONFIG_X86_32 */ diff --git a/arch/x86/include/asm/thread_info.h b/arch/x86/include/asm/thread_info.h index e0eccbcb8447..f9453536f9bb 100644 --- a/arch/x86/include/asm/thread_info.h +++ b/arch/x86/include/asm/thread_info.h @@ -88,6 +88,7 @@ struct thread_info { #define TIF_USER_RETURN_NOTIFY 11 /* notify kernel of userspace return */ #define TIF_UPROBE 12 /* breakpointed or singlestepping */ #define TIF_PATCH_PENDING 13 /* pending live patching update */ +#define TIF_NEED_FPU_LOAD 14 /* load FPU on return to userspace */ #define TIF_NOCPUID 15 /* CPUID is not accessible in userland */ #define TIF_NOTSC 16 /* TSC is not accessible in userland */ #define TIF_IA32 17 /* IA32 compatibility process */ @@ -117,6 +118,7 @@ struct thread_info { #define _TIF_USER_RETURN_NOTIFY (1 << TIF_USER_RETURN_NOTIFY) #define _TIF_UPROBE (1 << TIF_UPROBE) #define _TIF_PATCH_PENDING (1 << TIF_PATCH_PENDING) +#define _TIF_NEED_FPU_LOAD (1 << TIF_NEED_FPU_LOAD) #define _TIF_NOCPUID (1 << TIF_NOCPUID) #define _TIF_NOTSC (1 << TIF_NOTSC) #define _TIF_IA32 (1 << TIF_IA32) diff --git a/arch/x86/include/asm/trace/exceptions.h b/arch/x86/include/asm/trace/exceptions.h index e0e6d7f21399..6b1e87194809 100644 --- a/arch/x86/include/asm/trace/exceptions.h +++ b/arch/x86/include/asm/trace/exceptions.h @@ -30,7 +30,7 @@ DECLARE_EVENT_CLASS(x86_exceptions, __entry->error_code = error_code; ), - TP_printk("address=%pf ip=%pf error_code=0x%lx", + TP_printk("address=%ps ip=%ps error_code=0x%lx", (void *)__entry->address, (void *)__entry->ip, __entry->error_code) ); diff --git a/arch/x86/include/asm/trace/fpu.h b/arch/x86/include/asm/trace/fpu.h index 069c04be1507..879b77792f94 100644 --- a/arch/x86/include/asm/trace/fpu.h +++ b/arch/x86/include/asm/trace/fpu.h @@ -13,22 +13,22 @@ DECLARE_EVENT_CLASS(x86_fpu, TP_STRUCT__entry( __field(struct fpu *, fpu) - __field(bool, initialized) + __field(bool, load_fpu) __field(u64, xfeatures) __field(u64, xcomp_bv) ), TP_fast_assign( __entry->fpu = fpu; - __entry->initialized = fpu->initialized; + __entry->load_fpu = test_thread_flag(TIF_NEED_FPU_LOAD); if (boot_cpu_has(X86_FEATURE_OSXSAVE)) { __entry->xfeatures = fpu->state.xsave.header.xfeatures; __entry->xcomp_bv = fpu->state.xsave.header.xcomp_bv; } ), - TP_printk("x86/fpu: %p initialized: %d xfeatures: %llx xcomp_bv: %llx", + TP_printk("x86/fpu: %p load: %d xfeatures: %llx xcomp_bv: %llx", __entry->fpu, - __entry->initialized, + __entry->load_fpu, __entry->xfeatures, __entry->xcomp_bv ) @@ -64,11 +64,6 @@ DEFINE_EVENT(x86_fpu, x86_fpu_regs_deactivated, TP_ARGS(fpu) ); -DEFINE_EVENT(x86_fpu, x86_fpu_activate_state, - TP_PROTO(struct fpu *fpu), - TP_ARGS(fpu) -); - DEFINE_EVENT(x86_fpu, x86_fpu_init_state, TP_PROTO(struct fpu *fpu), TP_ARGS(fpu) diff --git a/arch/x86/include/uapi/asm/sockios.h b/arch/x86/include/uapi/asm/sockios.h deleted file mode 100644 index def6d4746ee7..000000000000 --- a/arch/x86/include/uapi/asm/sockios.h +++ /dev/null @@ -1 +0,0 @@ -#include <asm-generic/sockios.h> diff --git a/arch/x86/kernel/acpi/boot.c b/arch/x86/kernel/acpi/boot.c index 8dcbf6890714..9fc92e4539d8 100644 --- a/arch/x86/kernel/acpi/boot.c +++ b/arch/x86/kernel/acpi/boot.c @@ -197,7 +197,7 @@ static int acpi_register_lapic(int id, u32 acpiid, u8 enabled) } static int __init -acpi_parse_x2apic(struct acpi_subtable_header *header, const unsigned long end) +acpi_parse_x2apic(union acpi_subtable_headers *header, const unsigned long end) { struct acpi_madt_local_x2apic *processor = NULL; #ifdef CONFIG_X86_X2APIC @@ -210,7 +210,7 @@ acpi_parse_x2apic(struct acpi_subtable_header *header, const unsigned long end) if (BAD_MADT_ENTRY(processor, end)) return -EINVAL; - acpi_table_print_madt_entry(header); + acpi_table_print_madt_entry(&header->common); #ifdef CONFIG_X86_X2APIC apic_id = processor->local_apic_id; @@ -242,7 +242,7 @@ acpi_parse_x2apic(struct acpi_subtable_header *header, const unsigned long end) } static int __init -acpi_parse_lapic(struct acpi_subtable_header * header, const unsigned long end) +acpi_parse_lapic(union acpi_subtable_headers * header, const unsigned long end) { struct acpi_madt_local_apic *processor = NULL; @@ -251,7 +251,7 @@ acpi_parse_lapic(struct acpi_subtable_header * header, const unsigned long end) if (BAD_MADT_ENTRY(processor, end)) return -EINVAL; - acpi_table_print_madt_entry(header); + acpi_table_print_madt_entry(&header->common); /* Ignore invalid ID */ if (processor->id == 0xff) @@ -272,7 +272,7 @@ acpi_parse_lapic(struct acpi_subtable_header * header, const unsigned long end) } static int __init -acpi_parse_sapic(struct acpi_subtable_header *header, const unsigned long end) +acpi_parse_sapic(union acpi_subtable_headers *header, const unsigned long end) { struct acpi_madt_local_sapic *processor = NULL; @@ -281,7 +281,7 @@ acpi_parse_sapic(struct acpi_subtable_header *header, const unsigned long end) if (BAD_MADT_ENTRY(processor, end)) return -EINVAL; - acpi_table_print_madt_entry(header); + acpi_table_print_madt_entry(&header->common); acpi_register_lapic((processor->id << 8) | processor->eid,/* APIC ID */ processor->processor_id, /* ACPI ID */ @@ -291,7 +291,7 @@ acpi_parse_sapic(struct acpi_subtable_header *header, const unsigned long end) } static int __init -acpi_parse_lapic_addr_ovr(struct acpi_subtable_header * header, +acpi_parse_lapic_addr_ovr(union acpi_subtable_headers * header, const unsigned long end) { struct acpi_madt_local_apic_override *lapic_addr_ovr = NULL; @@ -301,7 +301,7 @@ acpi_parse_lapic_addr_ovr(struct acpi_subtable_header * header, if (BAD_MADT_ENTRY(lapic_addr_ovr, end)) return -EINVAL; - acpi_table_print_madt_entry(header); + acpi_table_print_madt_entry(&header->common); acpi_lapic_addr = lapic_addr_ovr->address; @@ -309,7 +309,7 @@ acpi_parse_lapic_addr_ovr(struct acpi_subtable_header * header, } static int __init -acpi_parse_x2apic_nmi(struct acpi_subtable_header *header, +acpi_parse_x2apic_nmi(union acpi_subtable_headers *header, const unsigned long end) { struct acpi_madt_local_x2apic_nmi *x2apic_nmi = NULL; @@ -319,7 +319,7 @@ acpi_parse_x2apic_nmi(struct acpi_subtable_header *header, if (BAD_MADT_ENTRY(x2apic_nmi, end)) return -EINVAL; - acpi_table_print_madt_entry(header); + acpi_table_print_madt_entry(&header->common); if (x2apic_nmi->lint != 1) printk(KERN_WARNING PREFIX "NMI not connected to LINT 1!\n"); @@ -328,7 +328,7 @@ acpi_parse_x2apic_nmi(struct acpi_subtable_header *header, } static int __init -acpi_parse_lapic_nmi(struct acpi_subtable_header * header, const unsigned long end) +acpi_parse_lapic_nmi(union acpi_subtable_headers * header, const unsigned long end) { struct acpi_madt_local_apic_nmi *lapic_nmi = NULL; @@ -337,7 +337,7 @@ acpi_parse_lapic_nmi(struct acpi_subtable_header * header, const unsigned long e if (BAD_MADT_ENTRY(lapic_nmi, end)) return -EINVAL; - acpi_table_print_madt_entry(header); + acpi_table_print_madt_entry(&header->common); if (lapic_nmi->lint != 1) printk(KERN_WARNING PREFIX "NMI not connected to LINT 1!\n"); @@ -449,7 +449,7 @@ static int __init mp_register_ioapic_irq(u8 bus_irq, u8 polarity, } static int __init -acpi_parse_ioapic(struct acpi_subtable_header * header, const unsigned long end) +acpi_parse_ioapic(union acpi_subtable_headers * header, const unsigned long end) { struct acpi_madt_io_apic *ioapic = NULL; struct ioapic_domain_cfg cfg = { @@ -462,7 +462,7 @@ acpi_parse_ioapic(struct acpi_subtable_header * header, const unsigned long end) if (BAD_MADT_ENTRY(ioapic, end)) return -EINVAL; - acpi_table_print_madt_entry(header); + acpi_table_print_madt_entry(&header->common); /* Statically assign IRQ numbers for IOAPICs hosting legacy IRQs */ if (ioapic->global_irq_base < nr_legacy_irqs()) @@ -508,7 +508,7 @@ static void __init acpi_sci_ioapic_setup(u8 bus_irq, u16 polarity, u16 trigger, } static int __init -acpi_parse_int_src_ovr(struct acpi_subtable_header * header, +acpi_parse_int_src_ovr(union acpi_subtable_headers * header, const unsigned long end) { struct acpi_madt_interrupt_override *intsrc = NULL; @@ -518,7 +518,7 @@ acpi_parse_int_src_ovr(struct acpi_subtable_header * header, if (BAD_MADT_ENTRY(intsrc, end)) return -EINVAL; - acpi_table_print_madt_entry(header); + acpi_table_print_madt_entry(&header->common); if (intsrc->source_irq == acpi_gbl_FADT.sci_interrupt) { acpi_sci_ioapic_setup(intsrc->source_irq, @@ -550,7 +550,7 @@ acpi_parse_int_src_ovr(struct acpi_subtable_header * header, } static int __init -acpi_parse_nmi_src(struct acpi_subtable_header * header, const unsigned long end) +acpi_parse_nmi_src(union acpi_subtable_headers * header, const unsigned long end) { struct acpi_madt_nmi_source *nmi_src = NULL; @@ -559,7 +559,7 @@ acpi_parse_nmi_src(struct acpi_subtable_header * header, const unsigned long end if (BAD_MADT_ENTRY(nmi_src, end)) return -EINVAL; - acpi_table_print_madt_entry(header); + acpi_table_print_madt_entry(&header->common); /* TBD: Support nimsrc entries? */ diff --git a/arch/x86/kernel/amd_gart_64.c b/arch/x86/kernel/amd_gart_64.c index 2c0aa34af69c..bf7f13ea3c64 100644 --- a/arch/x86/kernel/amd_gart_64.c +++ b/arch/x86/kernel/amd_gart_64.c @@ -233,9 +233,6 @@ static dma_addr_t gart_map_page(struct device *dev, struct page *page, unsigned long bus; phys_addr_t paddr = page_to_phys(page) + offset; - if (!dev) - dev = &x86_dma_fallback_dev; - if (!need_iommu(dev, paddr, size)) return paddr; @@ -392,9 +389,6 @@ static int gart_map_sg(struct device *dev, struct scatterlist *sg, int nents, if (nents == 0) return 0; - if (!dev) - dev = &x86_dma_fallback_dev; - out = 0; start = 0; start_sg = sg; diff --git a/arch/x86/kernel/cpu/bugs.c b/arch/x86/kernel/cpu/bugs.c index 29630393f300..03b4cc0ec3a7 100644 --- a/arch/x86/kernel/cpu/bugs.c +++ b/arch/x86/kernel/cpu/bugs.c @@ -37,6 +37,7 @@ static void __init spectre_v2_select_mitigation(void); static void __init ssb_select_mitigation(void); static void __init l1tf_select_mitigation(void); +static void __init mds_select_mitigation(void); /* The base value of the SPEC_CTRL MSR that always has to be preserved. */ u64 x86_spec_ctrl_base; @@ -63,6 +64,13 @@ DEFINE_STATIC_KEY_FALSE(switch_mm_cond_ibpb); /* Control unconditional IBPB in switch_mm() */ DEFINE_STATIC_KEY_FALSE(switch_mm_always_ibpb); +/* Control MDS CPU buffer clear before returning to user space */ +DEFINE_STATIC_KEY_FALSE(mds_user_clear); +EXPORT_SYMBOL_GPL(mds_user_clear); +/* Control MDS CPU buffer clear before idling (halt, mwait) */ +DEFINE_STATIC_KEY_FALSE(mds_idle_clear); +EXPORT_SYMBOL_GPL(mds_idle_clear); + void __init check_bugs(void) { identify_boot_cpu(); @@ -101,6 +109,10 @@ void __init check_bugs(void) l1tf_select_mitigation(); + mds_select_mitigation(); + + arch_smt_update(); + #ifdef CONFIG_X86_32 /* * Check whether we are able to run this kernel safely on SMP. @@ -207,6 +219,61 @@ static void x86_amd_ssb_disable(void) } #undef pr_fmt +#define pr_fmt(fmt) "MDS: " fmt + +/* Default mitigation for MDS-affected CPUs */ +static enum mds_mitigations mds_mitigation __ro_after_init = MDS_MITIGATION_FULL; +static bool mds_nosmt __ro_after_init = false; + +static const char * const mds_strings[] = { + [MDS_MITIGATION_OFF] = "Vulnerable", + [MDS_MITIGATION_FULL] = "Mitigation: Clear CPU buffers", + [MDS_MITIGATION_VMWERV] = "Vulnerable: Clear CPU buffers attempted, no microcode", +}; + +static void __init mds_select_mitigation(void) +{ + if (!boot_cpu_has_bug(X86_BUG_MDS) || cpu_mitigations_off()) { + mds_mitigation = MDS_MITIGATION_OFF; + return; + } + + if (mds_mitigation == MDS_MITIGATION_FULL) { + if (!boot_cpu_has(X86_FEATURE_MD_CLEAR)) + mds_mitigation = MDS_MITIGATION_VMWERV; + + static_branch_enable(&mds_user_clear); + + if (!boot_cpu_has(X86_BUG_MSBDS_ONLY) && + (mds_nosmt || cpu_mitigations_auto_nosmt())) + cpu_smt_disable(false); + } + + pr_info("%s\n", mds_strings[mds_mitigation]); +} + +static int __init mds_cmdline(char *str) +{ + if (!boot_cpu_has_bug(X86_BUG_MDS)) + return 0; + + if (!str) + return -EINVAL; + + if (!strcmp(str, "off")) + mds_mitigation = MDS_MITIGATION_OFF; + else if (!strcmp(str, "full")) + mds_mitigation = MDS_MITIGATION_FULL; + else if (!strcmp(str, "full,nosmt")) { + mds_mitigation = MDS_MITIGATION_FULL; + mds_nosmt = true; + } + + return 0; +} +early_param("mds", mds_cmdline); + +#undef pr_fmt #define pr_fmt(fmt) "Spectre V2 : " fmt static enum spectre_v2_mitigation spectre_v2_enabled __ro_after_init = @@ -575,9 +642,6 @@ specv2_set_mode: /* Set up IBPB and STIBP depending on the general spectre V2 command */ spectre_v2_user_select_mitigation(cmd); - - /* Enable STIBP if appropriate */ - arch_smt_update(); } static void update_stibp_msr(void * __unused) @@ -611,6 +675,31 @@ static void update_indir_branch_cond(void) static_branch_disable(&switch_to_cond_stibp); } +#undef pr_fmt +#define pr_fmt(fmt) fmt + +/* Update the static key controlling the MDS CPU buffer clear in idle */ +static void update_mds_branch_idle(void) +{ + /* + * Enable the idle clearing if SMT is active on CPUs which are + * affected only by MSBDS and not any other MDS variant. + * + * The other variants cannot be mitigated when SMT is enabled, so + * clearing the buffers on idle just to prevent the Store Buffer + * repartitioning leak would be a window dressing exercise. + */ + if (!boot_cpu_has_bug(X86_BUG_MSBDS_ONLY)) + return; + + if (sched_smt_active()) + static_branch_enable(&mds_idle_clear); + else + static_branch_disable(&mds_idle_clear); +} + +#define MDS_MSG_SMT "MDS CPU bug present and SMT on, data leak possible. See https://www.kernel.org/doc/html/latest/admin-guide/hw-vuln/mds.html for more details.\n" + void arch_smt_update(void) { /* Enhanced IBRS implies STIBP. No update required. */ @@ -632,6 +721,17 @@ void arch_smt_update(void) break; } + switch (mds_mitigation) { + case MDS_MITIGATION_FULL: + case MDS_MITIGATION_VMWERV: + if (sched_smt_active() && !boot_cpu_has(X86_BUG_MSBDS_ONLY)) + pr_warn_once(MDS_MSG_SMT); + update_mds_branch_idle(); + break; + case MDS_MITIGATION_OFF: + break; + } + mutex_unlock(&spec_ctrl_mutex); } @@ -1043,7 +1143,7 @@ static void __init l1tf_select_mitigation(void) pr_info("You may make it effective by booting the kernel with mem=%llu parameter.\n", half_pa); pr_info("However, doing so will make a part of your RAM unusable.\n"); - pr_info("Reading https://www.kernel.org/doc/html/latest/admin-guide/l1tf.html might help you decide.\n"); + pr_info("Reading https://www.kernel.org/doc/html/latest/admin-guide/hw-vuln/l1tf.html might help you decide.\n"); return; } @@ -1076,6 +1176,7 @@ static int __init l1tf_cmdline(char *str) early_param("l1tf", l1tf_cmdline); #undef pr_fmt +#define pr_fmt(fmt) fmt #ifdef CONFIG_SYSFS @@ -1114,6 +1215,23 @@ static ssize_t l1tf_show_state(char *buf) } #endif +static ssize_t mds_show_state(char *buf) +{ + if (!hypervisor_is_type(X86_HYPER_NATIVE)) { + return sprintf(buf, "%s; SMT Host state unknown\n", + mds_strings[mds_mitigation]); + } + + if (boot_cpu_has(X86_BUG_MSBDS_ONLY)) { + return sprintf(buf, "%s; SMT %s\n", mds_strings[mds_mitigation], + (mds_mitigation == MDS_MITIGATION_OFF ? "vulnerable" : + sched_smt_active() ? "mitigated" : "disabled")); + } + + return sprintf(buf, "%s; SMT %s\n", mds_strings[mds_mitigation], + sched_smt_active() ? "vulnerable" : "disabled"); +} + static char *stibp_state(void) { if (spectre_v2_enabled == SPECTRE_V2_IBRS_ENHANCED) @@ -1180,6 +1298,10 @@ static ssize_t cpu_show_common(struct device *dev, struct device_attribute *attr if (boot_cpu_has(X86_FEATURE_L1TF_PTEINV)) return l1tf_show_state(buf); break; + + case X86_BUG_MDS: + return mds_show_state(buf); + default: break; } @@ -1211,4 +1333,9 @@ ssize_t cpu_show_l1tf(struct device *dev, struct device_attribute *attr, char *b { return cpu_show_common(dev, attr, buf, X86_BUG_L1TF); } + +ssize_t cpu_show_mds(struct device *dev, struct device_attribute *attr, char *buf) +{ + return cpu_show_common(dev, attr, buf, X86_BUG_MDS); +} #endif diff --git a/arch/x86/kernel/cpu/common.c b/arch/x86/kernel/cpu/common.c index 37640544e12f..d7f55ad2dfb1 100644 --- a/arch/x86/kernel/cpu/common.c +++ b/arch/x86/kernel/cpu/common.c @@ -372,6 +372,8 @@ static bool pku_disabled; static __always_inline void setup_pku(struct cpuinfo_x86 *c) { + struct pkru_state *pk; + /* check the boot processor, plus compile options for PKU: */ if (!cpu_feature_enabled(X86_FEATURE_PKU)) return; @@ -382,6 +384,9 @@ static __always_inline void setup_pku(struct cpuinfo_x86 *c) return; cr4_set_bits(X86_CR4_PKE); + pk = get_xsave_addr(&init_fpstate.xsave, XFEATURE_PKRU); + if (pk) + pk->pkru = init_pkru_value; /* * Seting X86_CR4_PKE will cause the X86_FEATURE_OSPKE * cpuid bit to be set. We need to ensure that we @@ -935,61 +940,77 @@ static void identify_cpu_without_cpuid(struct cpuinfo_x86 *c) #endif } -static const __initconst struct x86_cpu_id cpu_no_speculation[] = { - { X86_VENDOR_INTEL, 6, INTEL_FAM6_ATOM_SALTWELL, X86_FEATURE_ANY }, - { X86_VENDOR_INTEL, 6, INTEL_FAM6_ATOM_SALTWELL_TABLET, X86_FEATURE_ANY }, - { X86_VENDOR_INTEL, 6, INTEL_FAM6_ATOM_BONNELL_MID, X86_FEATURE_ANY }, - { X86_VENDOR_INTEL, 6, INTEL_FAM6_ATOM_SALTWELL_MID, X86_FEATURE_ANY }, - { X86_VENDOR_INTEL, 6, INTEL_FAM6_ATOM_BONNELL, X86_FEATURE_ANY }, - { X86_VENDOR_CENTAUR, 5 }, - { X86_VENDOR_INTEL, 5 }, - { X86_VENDOR_NSC, 5 }, - { X86_VENDOR_ANY, 4 }, - {} -}; +#define NO_SPECULATION BIT(0) +#define NO_MELTDOWN BIT(1) +#define NO_SSB BIT(2) +#define NO_L1TF BIT(3) +#define NO_MDS BIT(4) +#define MSBDS_ONLY BIT(5) -static const __initconst struct x86_cpu_id cpu_no_meltdown[] = { - { X86_VENDOR_AMD }, - { X86_VENDOR_HYGON }, - {} -}; +#define VULNWL(_vendor, _family, _model, _whitelist) \ + { X86_VENDOR_##_vendor, _family, _model, X86_FEATURE_ANY, _whitelist } -/* Only list CPUs which speculate but are non susceptible to SSB */ -static const __initconst struct x86_cpu_id cpu_no_spec_store_bypass[] = { - { X86_VENDOR_INTEL, 6, INTEL_FAM6_ATOM_SILVERMONT }, - { X86_VENDOR_INTEL, 6, INTEL_FAM6_ATOM_AIRMONT }, - { X86_VENDOR_INTEL, 6, INTEL_FAM6_ATOM_SILVERMONT_X }, - { X86_VENDOR_INTEL, 6, INTEL_FAM6_ATOM_SILVERMONT_MID }, - { X86_VENDOR_INTEL, 6, INTEL_FAM6_CORE_YONAH }, - { X86_VENDOR_INTEL, 6, INTEL_FAM6_XEON_PHI_KNL }, - { X86_VENDOR_INTEL, 6, INTEL_FAM6_XEON_PHI_KNM }, - { X86_VENDOR_AMD, 0x12, }, - { X86_VENDOR_AMD, 0x11, }, - { X86_VENDOR_AMD, 0x10, }, - { X86_VENDOR_AMD, 0xf, }, - {} -}; +#define VULNWL_INTEL(model, whitelist) \ + VULNWL(INTEL, 6, INTEL_FAM6_##model, whitelist) + +#define VULNWL_AMD(family, whitelist) \ + VULNWL(AMD, family, X86_MODEL_ANY, whitelist) + +#define VULNWL_HYGON(family, whitelist) \ + VULNWL(HYGON, family, X86_MODEL_ANY, whitelist) + +static const __initconst struct x86_cpu_id cpu_vuln_whitelist[] = { + VULNWL(ANY, 4, X86_MODEL_ANY, NO_SPECULATION), + VULNWL(CENTAUR, 5, X86_MODEL_ANY, NO_SPECULATION), + VULNWL(INTEL, 5, X86_MODEL_ANY, NO_SPECULATION), + VULNWL(NSC, 5, X86_MODEL_ANY, NO_SPECULATION), + + /* Intel Family 6 */ + VULNWL_INTEL(ATOM_SALTWELL, NO_SPECULATION), + VULNWL_INTEL(ATOM_SALTWELL_TABLET, NO_SPECULATION), + VULNWL_INTEL(ATOM_SALTWELL_MID, NO_SPECULATION), + VULNWL_INTEL(ATOM_BONNELL, NO_SPECULATION), + VULNWL_INTEL(ATOM_BONNELL_MID, NO_SPECULATION), -static const __initconst struct x86_cpu_id cpu_no_l1tf[] = { - /* in addition to cpu_no_speculation */ - { X86_VENDOR_INTEL, 6, INTEL_FAM6_ATOM_SILVERMONT }, - { X86_VENDOR_INTEL, 6, INTEL_FAM6_ATOM_SILVERMONT_X }, - { X86_VENDOR_INTEL, 6, INTEL_FAM6_ATOM_AIRMONT }, - { X86_VENDOR_INTEL, 6, INTEL_FAM6_ATOM_SILVERMONT_MID }, - { X86_VENDOR_INTEL, 6, INTEL_FAM6_ATOM_AIRMONT_MID }, - { X86_VENDOR_INTEL, 6, INTEL_FAM6_ATOM_GOLDMONT }, - { X86_VENDOR_INTEL, 6, INTEL_FAM6_ATOM_GOLDMONT_X }, - { X86_VENDOR_INTEL, 6, INTEL_FAM6_ATOM_GOLDMONT_PLUS }, - { X86_VENDOR_INTEL, 6, INTEL_FAM6_XEON_PHI_KNL }, - { X86_VENDOR_INTEL, 6, INTEL_FAM6_XEON_PHI_KNM }, + VULNWL_INTEL(ATOM_SILVERMONT, NO_SSB | NO_L1TF | MSBDS_ONLY), + VULNWL_INTEL(ATOM_SILVERMONT_X, NO_SSB | NO_L1TF | MSBDS_ONLY), + VULNWL_INTEL(ATOM_SILVERMONT_MID, NO_SSB | NO_L1TF | MSBDS_ONLY), + VULNWL_INTEL(ATOM_AIRMONT, NO_SSB | NO_L1TF | MSBDS_ONLY), + VULNWL_INTEL(XEON_PHI_KNL, NO_SSB | NO_L1TF | MSBDS_ONLY), + VULNWL_INTEL(XEON_PHI_KNM, NO_SSB | NO_L1TF | MSBDS_ONLY), + + VULNWL_INTEL(CORE_YONAH, NO_SSB), + + VULNWL_INTEL(ATOM_AIRMONT_MID, NO_L1TF | MSBDS_ONLY), + + VULNWL_INTEL(ATOM_GOLDMONT, NO_MDS | NO_L1TF), + VULNWL_INTEL(ATOM_GOLDMONT_X, NO_MDS | NO_L1TF), + VULNWL_INTEL(ATOM_GOLDMONT_PLUS, NO_MDS | NO_L1TF), + + /* AMD Family 0xf - 0x12 */ + VULNWL_AMD(0x0f, NO_MELTDOWN | NO_SSB | NO_L1TF | NO_MDS), + VULNWL_AMD(0x10, NO_MELTDOWN | NO_SSB | NO_L1TF | NO_MDS), + VULNWL_AMD(0x11, NO_MELTDOWN | NO_SSB | NO_L1TF | NO_MDS), + VULNWL_AMD(0x12, NO_MELTDOWN | NO_SSB | NO_L1TF | NO_MDS), + + /* FAMILY_ANY must be last, otherwise 0x0f - 0x12 matches won't work */ + VULNWL_AMD(X86_FAMILY_ANY, NO_MELTDOWN | NO_L1TF | NO_MDS), + VULNWL_HYGON(X86_FAMILY_ANY, NO_MELTDOWN | NO_L1TF | NO_MDS), {} }; +static bool __init cpu_matches(unsigned long which) +{ + const struct x86_cpu_id *m = x86_match_cpu(cpu_vuln_whitelist); + + return m && !!(m->driver_data & which); +} + static void __init cpu_set_bug_bits(struct cpuinfo_x86 *c) { u64 ia32_cap = 0; - if (x86_match_cpu(cpu_no_speculation)) + if (cpu_matches(NO_SPECULATION)) return; setup_force_cpu_bug(X86_BUG_SPECTRE_V1); @@ -998,15 +1019,20 @@ static void __init cpu_set_bug_bits(struct cpuinfo_x86 *c) if (cpu_has(c, X86_FEATURE_ARCH_CAPABILITIES)) rdmsrl(MSR_IA32_ARCH_CAPABILITIES, ia32_cap); - if (!x86_match_cpu(cpu_no_spec_store_bypass) && - !(ia32_cap & ARCH_CAP_SSB_NO) && + if (!cpu_matches(NO_SSB) && !(ia32_cap & ARCH_CAP_SSB_NO) && !cpu_has(c, X86_FEATURE_AMD_SSB_NO)) setup_force_cpu_bug(X86_BUG_SPEC_STORE_BYPASS); if (ia32_cap & ARCH_CAP_IBRS_ALL) setup_force_cpu_cap(X86_FEATURE_IBRS_ENHANCED); - if (x86_match_cpu(cpu_no_meltdown)) + if (!cpu_matches(NO_MDS) && !(ia32_cap & ARCH_CAP_MDS_NO)) { + setup_force_cpu_bug(X86_BUG_MDS); + if (cpu_matches(MSBDS_ONLY)) + setup_force_cpu_bug(X86_BUG_MSBDS_ONLY); + } + + if (cpu_matches(NO_MELTDOWN)) return; /* Rogue Data Cache Load? No! */ @@ -1015,7 +1041,7 @@ static void __init cpu_set_bug_bits(struct cpuinfo_x86 *c) setup_force_cpu_bug(X86_BUG_CPU_MELTDOWN); - if (x86_match_cpu(cpu_no_l1tf)) + if (cpu_matches(NO_L1TF)) return; setup_force_cpu_bug(X86_BUG_L1TF); diff --git a/arch/x86/kernel/cpu/mce/amd.c b/arch/x86/kernel/cpu/mce/amd.c index e64de5149e50..d904aafe6409 100644 --- a/arch/x86/kernel/cpu/mce/amd.c +++ b/arch/x86/kernel/cpu/mce/amd.c @@ -563,33 +563,59 @@ out: return offset; } +bool amd_filter_mce(struct mce *m) +{ + enum smca_bank_types bank_type = smca_get_bank_type(m->bank); + struct cpuinfo_x86 *c = &boot_cpu_data; + u8 xec = (m->status >> 16) & 0x3F; + + /* See Family 17h Models 10h-2Fh Erratum #1114. */ + if (c->x86 == 0x17 && + c->x86_model >= 0x10 && c->x86_model <= 0x2F && + bank_type == SMCA_IF && xec == 10) + return true; + + return false; +} + /* - * Turn off MC4_MISC thresholding banks on all family 0x15 models since - * they're not supported there. + * Turn off thresholding banks for the following conditions: + * - MC4_MISC thresholding is not supported on Family 0x15. + * - Prevent possible spurious interrupts from the IF bank on Family 0x17 + * Models 0x10-0x2F due to Erratum #1114. */ -void disable_err_thresholding(struct cpuinfo_x86 *c) +void disable_err_thresholding(struct cpuinfo_x86 *c, unsigned int bank) { - int i; + int i, num_msrs; u64 hwcr; bool need_toggle; - u32 msrs[] = { - 0x00000413, /* MC4_MISC0 */ - 0xc0000408, /* MC4_MISC1 */ - }; + u32 msrs[NR_BLOCKS]; + + if (c->x86 == 0x15 && bank == 4) { + msrs[0] = 0x00000413; /* MC4_MISC0 */ + msrs[1] = 0xc0000408; /* MC4_MISC1 */ + num_msrs = 2; + } else if (c->x86 == 0x17 && + (c->x86_model >= 0x10 && c->x86_model <= 0x2F)) { - if (c->x86 != 0x15) + if (smca_get_bank_type(bank) != SMCA_IF) + return; + + msrs[0] = MSR_AMD64_SMCA_MCx_MISC(bank); + num_msrs = 1; + } else { return; + } rdmsrl(MSR_K7_HWCR, hwcr); /* McStatusWrEn has to be set */ need_toggle = !(hwcr & BIT(18)); - if (need_toggle) wrmsrl(MSR_K7_HWCR, hwcr | BIT(18)); /* Clear CntP bit safely */ - for (i = 0; i < ARRAY_SIZE(msrs); i++) + for (i = 0; i < num_msrs; i++) msr_clear_bit(msrs[i], 62); /* restore old settings */ @@ -604,12 +630,12 @@ void mce_amd_feature_init(struct cpuinfo_x86 *c) unsigned int bank, block, cpu = smp_processor_id(); int offset = -1; - disable_err_thresholding(c); - for (bank = 0; bank < mca_cfg.banks; ++bank) { if (mce_flags.smca) smca_configure(bank, cpu); + disable_err_thresholding(c, bank); + for (block = 0; block < NR_BLOCKS; ++block) { address = get_block_address(address, low, high, bank, block); if (!address) diff --git a/arch/x86/kernel/cpu/mce/core.c b/arch/x86/kernel/cpu/mce/core.c index b7fb541a4873..5112a50e6486 100644 --- a/arch/x86/kernel/cpu/mce/core.c +++ b/arch/x86/kernel/cpu/mce/core.c @@ -460,23 +460,6 @@ static void mce_irq_work_cb(struct irq_work *entry) mce_schedule_work(); } -static void mce_report_event(struct pt_regs *regs) -{ - if (regs->flags & (X86_VM_MASK|X86_EFLAGS_IF)) { - mce_notify_irq(); - /* - * Triggering the work queue here is just an insurance - * policy in case the syscall exit notify handler - * doesn't run soon enough or ends up running on the - * wrong CPU (can happen when audit sleeps) - */ - mce_schedule_work(); - return; - } - - irq_work_queue(&mce_irq_work); -} - /* * Check if the address reported by the CPU is in a format we can parse. * It would be possible to add code for most other cases, but all would @@ -712,19 +695,49 @@ bool machine_check_poll(enum mcp_flags flags, mce_banks_t *b) barrier(); m.status = mce_rdmsrl(msr_ops.status(i)); + + /* If this entry is not valid, ignore it */ if (!(m.status & MCI_STATUS_VAL)) continue; /* - * Uncorrected or signalled events are handled by the exception - * handler when it is enabled, so don't process those here. - * - * TBD do the same check for MCI_STATUS_EN here? + * If we are logging everything (at CPU online) or this + * is a corrected error, then we must log it. */ - if (!(flags & MCP_UC) && - (m.status & (mca_cfg.ser ? MCI_STATUS_S : MCI_STATUS_UC))) - continue; + if ((flags & MCP_UC) || !(m.status & MCI_STATUS_UC)) + goto log_it; + + /* + * Newer Intel systems that support software error + * recovery need to make additional checks. Other + * CPUs should skip over uncorrected errors, but log + * everything else. + */ + if (!mca_cfg.ser) { + if (m.status & MCI_STATUS_UC) + continue; + goto log_it; + } + /* Log "not enabled" (speculative) errors */ + if (!(m.status & MCI_STATUS_EN)) + goto log_it; + + /* + * Log UCNA (SDM: 15.6.3 "UCR Error Classification") + * UC == 1 && PCC == 0 && S == 0 + */ + if (!(m.status & MCI_STATUS_PCC) && !(m.status & MCI_STATUS_S)) + goto log_it; + + /* + * Skip anything else. Presumption is that our read of this + * bank is racing with a machine check. Leave the log alone + * for do_machine_check() to deal with it. + */ + continue; + +log_it: error_seen = true; mce_read_aux(&m, i); @@ -1301,7 +1314,8 @@ void do_machine_check(struct pt_regs *regs, long error_code) mce_panic("Fatal machine check on current CPU", &m, msg); if (worst > 0) - mce_report_event(regs); + irq_work_queue(&mce_irq_work); + mce_wrmsrl(MSR_IA32_MCG_STATUS, 0); sync_core(); @@ -1451,13 +1465,12 @@ EXPORT_SYMBOL_GPL(mce_notify_irq); static int __mcheck_cpu_mce_banks_init(void) { int i; - u8 num_banks = mca_cfg.banks; - mce_banks = kcalloc(num_banks, sizeof(struct mce_bank), GFP_KERNEL); + mce_banks = kcalloc(MAX_NR_BANKS, sizeof(struct mce_bank), GFP_KERNEL); if (!mce_banks) return -ENOMEM; - for (i = 0; i < num_banks; i++) { + for (i = 0; i < MAX_NR_BANKS; i++) { struct mce_bank *b = &mce_banks[i]; b->ctl = -1ULL; @@ -1471,28 +1484,19 @@ static int __mcheck_cpu_mce_banks_init(void) */ static int __mcheck_cpu_cap_init(void) { - unsigned b; u64 cap; + u8 b; rdmsrl(MSR_IA32_MCG_CAP, cap); b = cap & MCG_BANKCNT_MASK; - if (!mca_cfg.banks) - pr_info("CPU supports %d MCE banks\n", b); - - if (b > MAX_NR_BANKS) { - pr_warn("Using only %u machine check banks out of %u\n", - MAX_NR_BANKS, b); + if (WARN_ON_ONCE(b > MAX_NR_BANKS)) b = MAX_NR_BANKS; - } - /* Don't support asymmetric configurations today */ - WARN_ON(mca_cfg.banks != 0 && b != mca_cfg.banks); - mca_cfg.banks = b; + mca_cfg.banks = max(mca_cfg.banks, b); if (!mce_banks) { int err = __mcheck_cpu_mce_banks_init(); - if (err) return err; } @@ -1771,6 +1775,14 @@ static void __mcheck_cpu_init_timer(void) mce_start_timer(t); } +bool filter_mce(struct mce *m) +{ + if (boot_cpu_data.x86_vendor == X86_VENDOR_AMD) + return amd_filter_mce(m); + + return false; +} + /* Handle unconfigured int18 (should never happen) */ static void unexpected_machine_check(struct pt_regs *regs, long error_code) { @@ -2425,8 +2437,8 @@ static int fake_panic_set(void *data, u64 val) return 0; } -DEFINE_SIMPLE_ATTRIBUTE(fake_panic_fops, fake_panic_get, - fake_panic_set, "%llu\n"); +DEFINE_DEBUGFS_ATTRIBUTE(fake_panic_fops, fake_panic_get, fake_panic_set, + "%llu\n"); static int __init mcheck_debugfs_init(void) { @@ -2435,8 +2447,8 @@ static int __init mcheck_debugfs_init(void) dmce = mce_get_debugfs_dir(); if (!dmce) return -ENOMEM; - ffake_panic = debugfs_create_file("fake_panic", 0444, dmce, NULL, - &fake_panic_fops); + ffake_panic = debugfs_create_file_unsafe("fake_panic", 0444, dmce, + NULL, &fake_panic_fops); if (!ffake_panic) return -ENOMEM; @@ -2451,6 +2463,8 @@ EXPORT_SYMBOL_GPL(mcsafe_key); static int __init mcheck_late_init(void) { + pr_info("Using %d MCE banks\n", mca_cfg.banks); + if (mca_cfg.recovery) static_branch_inc(&mcsafe_key); diff --git a/arch/x86/kernel/cpu/mce/genpool.c b/arch/x86/kernel/cpu/mce/genpool.c index 3395549c51d3..64d1d5a00f39 100644 --- a/arch/x86/kernel/cpu/mce/genpool.c +++ b/arch/x86/kernel/cpu/mce/genpool.c @@ -99,6 +99,9 @@ int mce_gen_pool_add(struct mce *mce) { struct mce_evt_llist *node; + if (filter_mce(mce)) + return -EINVAL; + if (!mce_evt_pool) return -EINVAL; diff --git a/arch/x86/kernel/cpu/mce/inject.c b/arch/x86/kernel/cpu/mce/inject.c index 3da9a8823e47..a6026170af92 100644 --- a/arch/x86/kernel/cpu/mce/inject.c +++ b/arch/x86/kernel/cpu/mce/inject.c @@ -46,8 +46,6 @@ static struct mce i_mce; static struct dentry *dfs_inj; -static u8 n_banks; - #define MAX_FLAG_OPT_SIZE 4 #define NBCFG 0x44 @@ -570,9 +568,15 @@ err: static int inj_bank_set(void *data, u64 val) { struct mce *m = (struct mce *)data; + u8 n_banks; + u64 cap; + + /* Get bank count on target CPU so we can handle non-uniform values. */ + rdmsrl_on_cpu(m->extcpu, MSR_IA32_MCG_CAP, &cap); + n_banks = cap & MCG_BANKCNT_MASK; if (val >= n_banks) { - pr_err("Non-existent MCE bank: %llu\n", val); + pr_err("MCA bank %llu non-existent on CPU%d\n", val, m->extcpu); return -EINVAL; } @@ -665,10 +669,6 @@ static struct dfs_node { static int __init debugfs_init(void) { unsigned int i; - u64 cap; - - rdmsrl(MSR_IA32_MCG_CAP, cap); - n_banks = cap & MCG_BANKCNT_MASK; dfs_inj = debugfs_create_dir("mce-inject", NULL); if (!dfs_inj) diff --git a/arch/x86/kernel/cpu/mce/internal.h b/arch/x86/kernel/cpu/mce/internal.h index af5eab1e65e2..a34b55baa7aa 100644 --- a/arch/x86/kernel/cpu/mce/internal.h +++ b/arch/x86/kernel/cpu/mce/internal.h @@ -173,4 +173,13 @@ struct mca_msr_regs { extern struct mca_msr_regs msr_ops; +/* Decide whether to add MCE record to MCE event pool or filter it out. */ +extern bool filter_mce(struct mce *m); + +#ifdef CONFIG_X86_MCE_AMD +extern bool amd_filter_mce(struct mce *m); +#else +static inline bool amd_filter_mce(struct mce *m) { return false; }; +#endif + #endif /* __X86_MCE_INTERNAL_H__ */ diff --git a/arch/x86/kernel/cpu/microcode/core.c b/arch/x86/kernel/cpu/microcode/core.c index 8a4a7823451a..c321f4f513f9 100644 --- a/arch/x86/kernel/cpu/microcode/core.c +++ b/arch/x86/kernel/cpu/microcode/core.c @@ -428,7 +428,7 @@ static int do_microcode_update(const void __user *buf, size_t size) static int microcode_open(struct inode *inode, struct file *file) { - return capable(CAP_SYS_RAWIO) ? nonseekable_open(inode, file) : -EPERM; + return capable(CAP_SYS_RAWIO) ? stream_open(inode, file) : -EPERM; } static ssize_t microcode_write(struct file *file, const char __user *buf, diff --git a/arch/x86/kernel/early-quirks.c b/arch/x86/kernel/early-quirks.c index 50d5848bf22e..6c4f01540833 100644 --- a/arch/x86/kernel/early-quirks.c +++ b/arch/x86/kernel/early-quirks.c @@ -525,7 +525,8 @@ static const struct pci_device_id intel_early_ids[] __initconst = { INTEL_I945G_IDS(&gen3_early_ops), INTEL_I945GM_IDS(&gen3_early_ops), INTEL_VLV_IDS(&gen6_early_ops), - INTEL_PINEVIEW_IDS(&gen3_early_ops), + INTEL_PINEVIEW_G_IDS(&gen3_early_ops), + INTEL_PINEVIEW_M_IDS(&gen3_early_ops), INTEL_I965G_IDS(&gen3_early_ops), INTEL_G33_IDS(&gen3_early_ops), INTEL_I965GM_IDS(&gen3_early_ops), @@ -547,6 +548,7 @@ static const struct pci_device_id intel_early_ids[] __initconst = { INTEL_GLK_IDS(&gen9_early_ops), INTEL_CNL_IDS(&gen9_early_ops), INTEL_ICL_11_IDS(&gen11_early_ops), + INTEL_EHL_IDS(&gen11_early_ops), }; struct resource intel_graphics_stolen_res __ro_after_init = DEFINE_RES_MEM(0, 0); diff --git a/arch/x86/kernel/fpu/core.c b/arch/x86/kernel/fpu/core.c index 2e5003fef51a..ce243f76bdb7 100644 --- a/arch/x86/kernel/fpu/core.c +++ b/arch/x86/kernel/fpu/core.c @@ -101,24 +101,21 @@ static void __kernel_fpu_begin(void) kernel_fpu_disable(); - if (fpu->initialized) { - /* - * Ignore return value -- we don't care if reg state - * is clobbered. - */ - copy_fpregs_to_fpstate(fpu); - } else { - __cpu_invalidate_fpregs_state(); + if (current->mm) { + if (!test_thread_flag(TIF_NEED_FPU_LOAD)) { + set_thread_flag(TIF_NEED_FPU_LOAD); + /* + * Ignore return value -- we don't care if reg state + * is clobbered. + */ + copy_fpregs_to_fpstate(fpu); + } } + __cpu_invalidate_fpregs_state(); } static void __kernel_fpu_end(void) { - struct fpu *fpu = ¤t->thread.fpu; - - if (fpu->initialized) - copy_kernel_to_fpregs(&fpu->state); - kernel_fpu_enable(); } @@ -145,15 +142,17 @@ void fpu__save(struct fpu *fpu) { WARN_ON_FPU(fpu != ¤t->thread.fpu); - preempt_disable(); + fpregs_lock(); trace_x86_fpu_before_save(fpu); - if (fpu->initialized) { + + if (!test_thread_flag(TIF_NEED_FPU_LOAD)) { if (!copy_fpregs_to_fpstate(fpu)) { copy_kernel_to_fpregs(&fpu->state); } } + trace_x86_fpu_after_save(fpu); - preempt_enable(); + fpregs_unlock(); } EXPORT_SYMBOL_GPL(fpu__save); @@ -186,11 +185,14 @@ void fpstate_init(union fpregs_state *state) } EXPORT_SYMBOL_GPL(fpstate_init); -int fpu__copy(struct fpu *dst_fpu, struct fpu *src_fpu) +int fpu__copy(struct task_struct *dst, struct task_struct *src) { + struct fpu *dst_fpu = &dst->thread.fpu; + struct fpu *src_fpu = &src->thread.fpu; + dst_fpu->last_cpu = -1; - if (!src_fpu->initialized || !static_cpu_has(X86_FEATURE_FPU)) + if (!static_cpu_has(X86_FEATURE_FPU)) return 0; WARN_ON_FPU(src_fpu != ¤t->thread.fpu); @@ -202,16 +204,23 @@ int fpu__copy(struct fpu *dst_fpu, struct fpu *src_fpu) memset(&dst_fpu->state.xsave, 0, fpu_kernel_xstate_size); /* - * Save current FPU registers directly into the child - * FPU context, without any memory-to-memory copying. + * If the FPU registers are not current just memcpy() the state. + * Otherwise save current FPU registers directly into the child's FPU + * context, without any memory-to-memory copying. * * ( The function 'fails' in the FNSAVE case, which destroys - * register contents so we have to copy them back. ) + * register contents so we have to load them back. ) */ - if (!copy_fpregs_to_fpstate(dst_fpu)) { - memcpy(&src_fpu->state, &dst_fpu->state, fpu_kernel_xstate_size); - copy_kernel_to_fpregs(&src_fpu->state); - } + fpregs_lock(); + if (test_thread_flag(TIF_NEED_FPU_LOAD)) + memcpy(&dst_fpu->state, &src_fpu->state, fpu_kernel_xstate_size); + + else if (!copy_fpregs_to_fpstate(dst_fpu)) + copy_kernel_to_fpregs(&dst_fpu->state); + + fpregs_unlock(); + + set_tsk_thread_flag(dst, TIF_NEED_FPU_LOAD); trace_x86_fpu_copy_src(src_fpu); trace_x86_fpu_copy_dst(dst_fpu); @@ -223,20 +232,14 @@ int fpu__copy(struct fpu *dst_fpu, struct fpu *src_fpu) * Activate the current task's in-memory FPU context, * if it has not been used before: */ -void fpu__initialize(struct fpu *fpu) +static void fpu__initialize(struct fpu *fpu) { WARN_ON_FPU(fpu != ¤t->thread.fpu); - if (!fpu->initialized) { - fpstate_init(&fpu->state); - trace_x86_fpu_init_state(fpu); - - trace_x86_fpu_activate_state(fpu); - /* Safe to do for the current task: */ - fpu->initialized = 1; - } + set_thread_flag(TIF_NEED_FPU_LOAD); + fpstate_init(&fpu->state); + trace_x86_fpu_init_state(fpu); } -EXPORT_SYMBOL_GPL(fpu__initialize); /* * This function must be called before we read a task's fpstate. @@ -248,32 +251,20 @@ EXPORT_SYMBOL_GPL(fpu__initialize); * * - or it's called for stopped tasks (ptrace), in which case the * registers were already saved by the context-switch code when - * the task scheduled out - we only have to initialize the registers - * if they've never been initialized. + * the task scheduled out. * * If the task has used the FPU before then save it. */ void fpu__prepare_read(struct fpu *fpu) { - if (fpu == ¤t->thread.fpu) { + if (fpu == ¤t->thread.fpu) fpu__save(fpu); - } else { - if (!fpu->initialized) { - fpstate_init(&fpu->state); - trace_x86_fpu_init_state(fpu); - - trace_x86_fpu_activate_state(fpu); - /* Safe to do for current and for stopped child tasks: */ - fpu->initialized = 1; - } - } } /* * This function must be called before we write a task's fpstate. * - * If the task has used the FPU before then invalidate any cached FPU registers. - * If the task has not used the FPU before then initialize its fpstate. + * Invalidate any cached FPU registers. * * After this function call, after registers in the fpstate are * modified and the child task has woken up, the child task will @@ -290,44 +281,11 @@ void fpu__prepare_write(struct fpu *fpu) */ WARN_ON_FPU(fpu == ¤t->thread.fpu); - if (fpu->initialized) { - /* Invalidate any cached state: */ - __fpu_invalidate_fpregs_state(fpu); - } else { - fpstate_init(&fpu->state); - trace_x86_fpu_init_state(fpu); - - trace_x86_fpu_activate_state(fpu); - /* Safe to do for stopped child tasks: */ - fpu->initialized = 1; - } + /* Invalidate any cached state: */ + __fpu_invalidate_fpregs_state(fpu); } /* - * 'fpu__restore()' is called to copy FPU registers from - * the FPU fpstate to the live hw registers and to activate - * access to the hardware registers, so that FPU instructions - * can be used afterwards. - * - * Must be called with kernel preemption disabled (for example - * with local interrupts disabled, as it is in the case of - * do_device_not_available()). - */ -void fpu__restore(struct fpu *fpu) -{ - fpu__initialize(fpu); - - /* Avoid __kernel_fpu_begin() right after fpregs_activate() */ - kernel_fpu_disable(); - trace_x86_fpu_before_restore(fpu); - fpregs_activate(fpu); - copy_kernel_to_fpregs(&fpu->state); - trace_x86_fpu_after_restore(fpu); - kernel_fpu_enable(); -} -EXPORT_SYMBOL_GPL(fpu__restore); - -/* * Drops current FPU state: deactivates the fpregs and * the fpstate. NOTE: it still leaves previous contents * in the fpregs in the eager-FPU case. @@ -341,17 +299,13 @@ void fpu__drop(struct fpu *fpu) preempt_disable(); if (fpu == ¤t->thread.fpu) { - if (fpu->initialized) { - /* Ignore delayed exceptions from user space */ - asm volatile("1: fwait\n" - "2:\n" - _ASM_EXTABLE(1b, 2b)); - fpregs_deactivate(fpu); - } + /* Ignore delayed exceptions from user space */ + asm volatile("1: fwait\n" + "2:\n" + _ASM_EXTABLE(1b, 2b)); + fpregs_deactivate(fpu); } - fpu->initialized = 0; - trace_x86_fpu_dropped(fpu); preempt_enable(); @@ -363,6 +317,8 @@ void fpu__drop(struct fpu *fpu) */ static inline void copy_init_fpstate_to_fpregs(void) { + fpregs_lock(); + if (use_xsave()) copy_kernel_to_xregs(&init_fpstate.xsave, -1); else if (static_cpu_has(X86_FEATURE_FXSR)) @@ -372,6 +328,9 @@ static inline void copy_init_fpstate_to_fpregs(void) if (boot_cpu_has(X86_FEATURE_OSPKE)) copy_init_pkru_to_fpregs(); + + fpregs_mark_activate(); + fpregs_unlock(); } /* @@ -389,16 +348,52 @@ void fpu__clear(struct fpu *fpu) /* * Make sure fpstate is cleared and initialized. */ - if (static_cpu_has(X86_FEATURE_FPU)) { - preempt_disable(); - fpu__initialize(fpu); - user_fpu_begin(); + fpu__initialize(fpu); + if (static_cpu_has(X86_FEATURE_FPU)) copy_init_fpstate_to_fpregs(); - preempt_enable(); - } } /* + * Load FPU context before returning to userspace. + */ +void switch_fpu_return(void) +{ + if (!static_cpu_has(X86_FEATURE_FPU)) + return; + + __fpregs_load_activate(); +} +EXPORT_SYMBOL_GPL(switch_fpu_return); + +#ifdef CONFIG_X86_DEBUG_FPU +/* + * If current FPU state according to its tracking (loaded FPU context on this + * CPU) is not valid then we must have TIF_NEED_FPU_LOAD set so the context is + * loaded on return to userland. + */ +void fpregs_assert_state_consistent(void) +{ + struct fpu *fpu = ¤t->thread.fpu; + + if (test_thread_flag(TIF_NEED_FPU_LOAD)) + return; + + WARN_ON_FPU(!fpregs_state_valid(fpu, smp_processor_id())); +} +EXPORT_SYMBOL_GPL(fpregs_assert_state_consistent); +#endif + +void fpregs_mark_activate(void) +{ + struct fpu *fpu = ¤t->thread.fpu; + + fpregs_activate(fpu); + fpu->last_cpu = smp_processor_id(); + clear_thread_flag(TIF_NEED_FPU_LOAD); +} +EXPORT_SYMBOL_GPL(fpregs_mark_activate); + +/* * x87 math exception handling: */ diff --git a/arch/x86/kernel/fpu/init.c b/arch/x86/kernel/fpu/init.c index 6abd83572b01..20d8fa7124c7 100644 --- a/arch/x86/kernel/fpu/init.c +++ b/arch/x86/kernel/fpu/init.c @@ -239,8 +239,6 @@ static void __init fpu__init_system_ctx_switch(void) WARN_ON_FPU(!on_boot_cpu); on_boot_cpu = 0; - - WARN_ON_FPU(current->thread.fpu.initialized); } /* diff --git a/arch/x86/kernel/fpu/regset.c b/arch/x86/kernel/fpu/regset.c index bc02f5144b95..d652b939ccfb 100644 --- a/arch/x86/kernel/fpu/regset.c +++ b/arch/x86/kernel/fpu/regset.c @@ -15,16 +15,12 @@ */ int regset_fpregs_active(struct task_struct *target, const struct user_regset *regset) { - struct fpu *target_fpu = &target->thread.fpu; - - return target_fpu->initialized ? regset->n : 0; + return regset->n; } int regset_xregset_fpregs_active(struct task_struct *target, const struct user_regset *regset) { - struct fpu *target_fpu = &target->thread.fpu; - - if (boot_cpu_has(X86_FEATURE_FXSR) && target_fpu->initialized) + if (boot_cpu_has(X86_FEATURE_FXSR)) return regset->n; else return 0; @@ -269,11 +265,10 @@ convert_from_fxsr(struct user_i387_ia32_struct *env, struct task_struct *tsk) memcpy(&to[i], &from[i], sizeof(to[0])); } -void convert_to_fxsr(struct task_struct *tsk, +void convert_to_fxsr(struct fxregs_state *fxsave, const struct user_i387_ia32_struct *env) { - struct fxregs_state *fxsave = &tsk->thread.fpu.state.fxsave; struct _fpreg *from = (struct _fpreg *) &env->st_space[0]; struct _fpxreg *to = (struct _fpxreg *) &fxsave->st_space[0]; int i; @@ -350,7 +345,7 @@ int fpregs_set(struct task_struct *target, const struct user_regset *regset, ret = user_regset_copyin(&pos, &count, &kbuf, &ubuf, &env, 0, -1); if (!ret) - convert_to_fxsr(target, &env); + convert_to_fxsr(&target->thread.fpu.state.fxsave, &env); /* * update the header bit in the xsave header, indicating the @@ -371,16 +366,9 @@ int fpregs_set(struct task_struct *target, const struct user_regset *regset, int dump_fpu(struct pt_regs *regs, struct user_i387_struct *ufpu) { struct task_struct *tsk = current; - struct fpu *fpu = &tsk->thread.fpu; - int fpvalid; - - fpvalid = fpu->initialized; - if (fpvalid) - fpvalid = !fpregs_get(tsk, NULL, - 0, sizeof(struct user_i387_ia32_struct), - ufpu, NULL); - return fpvalid; + return !fpregs_get(tsk, NULL, 0, sizeof(struct user_i387_ia32_struct), + ufpu, NULL); } EXPORT_SYMBOL(dump_fpu); diff --git a/arch/x86/kernel/fpu/signal.c b/arch/x86/kernel/fpu/signal.c index f6a1d299627c..5a8d118bc423 100644 --- a/arch/x86/kernel/fpu/signal.c +++ b/arch/x86/kernel/fpu/signal.c @@ -92,13 +92,13 @@ static inline int save_xstate_epilog(void __user *buf, int ia32_frame) return err; err |= __put_user(FP_XSTATE_MAGIC2, - (__u32 *)(buf + fpu_user_xstate_size)); + (__u32 __user *)(buf + fpu_user_xstate_size)); /* * Read the xfeatures which we copied (directly from the cpu or * from the state in task struct) to the user buffers. */ - err |= __get_user(xfeatures, (__u32 *)&x->header.xfeatures); + err |= __get_user(xfeatures, (__u32 __user *)&x->header.xfeatures); /* * For legacy compatible, we always set FP/SSE bits in the bit @@ -113,7 +113,7 @@ static inline int save_xstate_epilog(void __user *buf, int ia32_frame) */ xfeatures |= XFEATURE_MASK_FPSSE; - err |= __put_user(xfeatures, (__u32 *)&x->header.xfeatures); + err |= __put_user(xfeatures, (__u32 __user *)&x->header.xfeatures); return err; } @@ -144,9 +144,10 @@ static inline int copy_fpregs_to_sigframe(struct xregs_state __user *buf) * buf == buf_fx for 64-bit frames and 32-bit fsave frame. * buf != buf_fx for 32-bit frames with fxstate. * - * If the fpu, extended register state is live, save the state directly - * to the user frame pointed by the aligned pointer 'buf_fx'. Otherwise, - * copy the thread's fpu state to the user frame starting at 'buf_fx'. + * Try to save it directly to the user frame with disabled page fault handler. + * If this fails then do the slow path where the FPU state is first saved to + * task's fpu->state and then copy it to the user frame pointed to by the + * aligned pointer 'buf_fx'. * * If this is a 32-bit frame with fxstate, put a fsave header before * the aligned state at 'buf_fx'. @@ -156,10 +157,9 @@ static inline int copy_fpregs_to_sigframe(struct xregs_state __user *buf) */ int copy_fpstate_to_sigframe(void __user *buf, void __user *buf_fx, int size) { - struct fpu *fpu = ¤t->thread.fpu; - struct xregs_state *xsave = &fpu->state.xsave; struct task_struct *tsk = current; int ia32_fxstate = (buf != buf_fx); + int ret; ia32_fxstate &= (IS_ENABLED(CONFIG_X86_32) || IS_ENABLED(CONFIG_IA32_EMULATION)); @@ -172,28 +172,34 @@ int copy_fpstate_to_sigframe(void __user *buf, void __user *buf_fx, int size) sizeof(struct user_i387_ia32_struct), NULL, (struct _fpstate_32 __user *) buf) ? -1 : 1; - if (fpu->initialized || using_compacted_format()) { - /* Save the live register state to the user directly. */ - if (copy_fpregs_to_sigframe(buf_fx)) - return -1; - /* Update the thread's fxstate to save the fsave header. */ - if (ia32_fxstate) - copy_fxregs_to_kernel(fpu); - } else { - /* - * It is a *bug* if kernel uses compacted-format for xsave - * area and we copy it out directly to a signal frame. It - * should have been handled above by saving the registers - * directly. - */ - if (boot_cpu_has(X86_FEATURE_XSAVES)) { - WARN_ONCE(1, "x86/fpu: saving compacted-format xsave area to a signal frame!\n"); - return -1; - } - - fpstate_sanitize_xstate(fpu); - if (__copy_to_user(buf_fx, xsave, fpu_user_xstate_size)) - return -1; +retry: + /* + * Load the FPU registers if they are not valid for the current task. + * With a valid FPU state we can attempt to save the state directly to + * userland's stack frame which will likely succeed. If it does not, + * resolve the fault in the user memory and try again. + */ + fpregs_lock(); + if (test_thread_flag(TIF_NEED_FPU_LOAD)) + __fpregs_load_activate(); + + pagefault_disable(); + ret = copy_fpregs_to_sigframe(buf_fx); + pagefault_enable(); + fpregs_unlock(); + + if (ret) { + int aligned_size; + int nr_pages; + + aligned_size = offset_in_page(buf_fx) + fpu_user_xstate_size; + nr_pages = DIV_ROUND_UP(aligned_size, PAGE_SIZE); + + ret = get_user_pages_unlocked((unsigned long)buf_fx, nr_pages, + NULL, FOLL_WRITE); + if (ret == nr_pages) + goto retry; + return -EFAULT; } /* Save the fsave header for the 32-bit frames. */ @@ -207,11 +213,11 @@ int copy_fpstate_to_sigframe(void __user *buf, void __user *buf_fx, int size) } static inline void -sanitize_restored_xstate(struct task_struct *tsk, +sanitize_restored_xstate(union fpregs_state *state, struct user_i387_ia32_struct *ia32_env, u64 xfeatures, int fx_only) { - struct xregs_state *xsave = &tsk->thread.fpu.state.xsave; + struct xregs_state *xsave = &state->xsave; struct xstate_header *header = &xsave->header; if (use_xsave()) { @@ -238,17 +244,18 @@ sanitize_restored_xstate(struct task_struct *tsk, */ xsave->i387.mxcsr &= mxcsr_feature_mask; - convert_to_fxsr(tsk, ia32_env); + if (ia32_env) + convert_to_fxsr(&state->fxsave, ia32_env); } } /* * Restore the extended state if present. Otherwise, restore the FP/SSE state. */ -static inline int copy_user_to_fpregs_zeroing(void __user *buf, u64 xbv, int fx_only) +static int copy_user_to_fpregs_zeroing(void __user *buf, u64 xbv, int fx_only) { if (use_xsave()) { - if ((unsigned long)buf % 64 || fx_only) { + if (fx_only) { u64 init_bv = xfeatures_mask & ~XFEATURE_MASK_FPSSE; copy_kernel_to_xregs(&init_fpstate.xsave, init_bv); return copy_user_to_fxregs(buf); @@ -266,12 +273,15 @@ static inline int copy_user_to_fpregs_zeroing(void __user *buf, u64 xbv, int fx_ static int __fpu__restore_sig(void __user *buf, void __user *buf_fx, int size) { + struct user_i387_ia32_struct *envp = NULL; + int state_size = fpu_kernel_xstate_size; int ia32_fxstate = (buf != buf_fx); struct task_struct *tsk = current; struct fpu *fpu = &tsk->thread.fpu; - int state_size = fpu_kernel_xstate_size; + struct user_i387_ia32_struct env; u64 xfeatures = 0; int fx_only = 0; + int ret = 0; ia32_fxstate &= (IS_ENABLED(CONFIG_X86_32) || IS_ENABLED(CONFIG_IA32_EMULATION)); @@ -284,8 +294,6 @@ static int __fpu__restore_sig(void __user *buf, void __user *buf_fx, int size) if (!access_ok(buf, size)) return -EACCES; - fpu__initialize(fpu); - if (!static_cpu_has(X86_FEATURE_FPU)) return fpregs_soft_set(current, NULL, 0, sizeof(struct user_i387_ia32_struct), @@ -308,61 +316,101 @@ static int __fpu__restore_sig(void __user *buf, void __user *buf_fx, int size) } } + /* + * The current state of the FPU registers does not matter. By setting + * TIF_NEED_FPU_LOAD unconditionally it is ensured that the our xstate + * is not modified on context switch and that the xstate is considered + * to be loaded again on return to userland (overriding last_cpu avoids + * the optimisation). + */ + set_thread_flag(TIF_NEED_FPU_LOAD); + __fpu_invalidate_fpregs_state(fpu); + + if ((unsigned long)buf_fx % 64) + fx_only = 1; + /* + * For 32-bit frames with fxstate, copy the fxstate so it can be + * reconstructed later. + */ if (ia32_fxstate) { + ret = __copy_from_user(&env, buf, sizeof(env)); + if (ret) + goto err_out; + envp = &env; + } else { /* - * For 32-bit frames with fxstate, copy the user state to the - * thread's fpu state, reconstruct fxstate from the fsave - * header. Validate and sanitize the copied state. + * Attempt to restore the FPU registers directly from user + * memory. For that to succeed, the user access cannot cause + * page faults. If it does, fall back to the slow path below, + * going through the kernel buffer with the enabled pagefault + * handler. */ - struct user_i387_ia32_struct env; - int err = 0; + fpregs_lock(); + pagefault_disable(); + ret = copy_user_to_fpregs_zeroing(buf_fx, xfeatures, fx_only); + pagefault_enable(); + if (!ret) { + fpregs_mark_activate(); + fpregs_unlock(); + return 0; + } + fpregs_unlock(); + } - /* - * Drop the current fpu which clears fpu->initialized. This ensures - * that any context-switch during the copy of the new state, - * avoids the intermediate state from getting restored/saved. - * Thus avoiding the new restored state from getting corrupted. - * We will be ready to restore/save the state only after - * fpu->initialized is again set. - */ - fpu__drop(fpu); + + if (use_xsave() && !fx_only) { + u64 init_bv = xfeatures_mask & ~xfeatures; if (using_compacted_format()) { - err = copy_user_to_xstate(&fpu->state.xsave, buf_fx); + ret = copy_user_to_xstate(&fpu->state.xsave, buf_fx); } else { - err = __copy_from_user(&fpu->state.xsave, buf_fx, state_size); + ret = __copy_from_user(&fpu->state.xsave, buf_fx, state_size); - if (!err && state_size > offsetof(struct xregs_state, header)) - err = validate_xstate_header(&fpu->state.xsave.header); + if (!ret && state_size > offsetof(struct xregs_state, header)) + ret = validate_xstate_header(&fpu->state.xsave.header); } + if (ret) + goto err_out; - if (err || __copy_from_user(&env, buf, sizeof(env))) { - fpstate_init(&fpu->state); - trace_x86_fpu_init_state(fpu); - err = -1; - } else { - sanitize_restored_xstate(tsk, &env, xfeatures, fx_only); + sanitize_restored_xstate(&fpu->state, envp, xfeatures, fx_only); + + fpregs_lock(); + if (unlikely(init_bv)) + copy_kernel_to_xregs(&init_fpstate.xsave, init_bv); + ret = copy_kernel_to_xregs_err(&fpu->state.xsave, xfeatures); + + } else if (use_fxsr()) { + ret = __copy_from_user(&fpu->state.fxsave, buf_fx, state_size); + if (ret) { + ret = -EFAULT; + goto err_out; } - local_bh_disable(); - fpu->initialized = 1; - fpu__restore(fpu); - local_bh_enable(); + sanitize_restored_xstate(&fpu->state, envp, xfeatures, fx_only); - return err; - } else { - /* - * For 64-bit frames and 32-bit fsave frames, restore the user - * state to the registers directly (with exceptions handled). - */ - user_fpu_begin(); - if (copy_user_to_fpregs_zeroing(buf_fx, xfeatures, fx_only)) { - fpu__clear(fpu); - return -1; + fpregs_lock(); + if (use_xsave()) { + u64 init_bv = xfeatures_mask & ~XFEATURE_MASK_FPSSE; + copy_kernel_to_xregs(&init_fpstate.xsave, init_bv); } + + ret = copy_kernel_to_fxregs_err(&fpu->state.fxsave); + } else { + ret = __copy_from_user(&fpu->state.fsave, buf_fx, state_size); + if (ret) + goto err_out; + + fpregs_lock(); + ret = copy_kernel_to_fregs_err(&fpu->state.fsave); } + if (!ret) + fpregs_mark_activate(); + fpregs_unlock(); - return 0; +err_out: + if (ret) + fpu__clear(fpu); + return ret; } static inline int xstate_sigframe_size(void) diff --git a/arch/x86/kernel/fpu/xstate.c b/arch/x86/kernel/fpu/xstate.c index d7432c2b1051..9c459fd1d38e 100644 --- a/arch/x86/kernel/fpu/xstate.c +++ b/arch/x86/kernel/fpu/xstate.c @@ -805,20 +805,18 @@ void fpu__resume_cpu(void) } /* - * Given an xstate feature mask, calculate where in the xsave + * Given an xstate feature nr, calculate where in the xsave * buffer the state is. Callers should ensure that the buffer * is valid. */ -static void *__raw_xsave_addr(struct xregs_state *xsave, int xstate_feature_mask) +static void *__raw_xsave_addr(struct xregs_state *xsave, int xfeature_nr) { - int feature_nr = fls64(xstate_feature_mask) - 1; - - if (!xfeature_enabled(feature_nr)) { + if (!xfeature_enabled(xfeature_nr)) { WARN_ON_FPU(1); return NULL; } - return (void *)xsave + xstate_comp_offsets[feature_nr]; + return (void *)xsave + xstate_comp_offsets[xfeature_nr]; } /* * Given the xsave area and a state inside, this function returns the @@ -832,13 +830,13 @@ static void *__raw_xsave_addr(struct xregs_state *xsave, int xstate_feature_mask * * Inputs: * xstate: the thread's storage area for all FPU data - * xstate_feature: state which is defined in xsave.h (e.g. - * XFEATURE_MASK_FP, XFEATURE_MASK_SSE, etc...) + * xfeature_nr: state which is defined in xsave.h (e.g. XFEATURE_FP, + * XFEATURE_SSE, etc...) * Output: * address of the state in the xsave area, or NULL if the * field is not present in the xsave buffer. */ -void *get_xsave_addr(struct xregs_state *xsave, int xstate_feature) +void *get_xsave_addr(struct xregs_state *xsave, int xfeature_nr) { /* * Do we even *have* xsave state? @@ -851,11 +849,11 @@ void *get_xsave_addr(struct xregs_state *xsave, int xstate_feature) * have not enabled. Remember that pcntxt_mask is * what we write to the XCR0 register. */ - WARN_ONCE(!(xfeatures_mask & xstate_feature), + WARN_ONCE(!(xfeatures_mask & BIT_ULL(xfeature_nr)), "get of unsupported state"); /* * This assumes the last 'xsave*' instruction to - * have requested that 'xstate_feature' be saved. + * have requested that 'xfeature_nr' be saved. * If it did not, we might be seeing and old value * of the field in the buffer. * @@ -864,10 +862,10 @@ void *get_xsave_addr(struct xregs_state *xsave, int xstate_feature) * or because the "init optimization" caused it * to not be saved. */ - if (!(xsave->header.xfeatures & xstate_feature)) + if (!(xsave->header.xfeatures & BIT_ULL(xfeature_nr))) return NULL; - return __raw_xsave_addr(xsave, xstate_feature); + return __raw_xsave_addr(xsave, xfeature_nr); } EXPORT_SYMBOL_GPL(get_xsave_addr); @@ -882,25 +880,23 @@ EXPORT_SYMBOL_GPL(get_xsave_addr); * Note that this only works on the current task. * * Inputs: - * @xsave_state: state which is defined in xsave.h (e.g. XFEATURE_MASK_FP, - * XFEATURE_MASK_SSE, etc...) + * @xfeature_nr: state which is defined in xsave.h (e.g. XFEATURE_FP, + * XFEATURE_SSE, etc...) * Output: * address of the state in the xsave area or NULL if the state * is not present or is in its 'init state'. */ -const void *get_xsave_field_ptr(int xsave_state) +const void *get_xsave_field_ptr(int xfeature_nr) { struct fpu *fpu = ¤t->thread.fpu; - if (!fpu->initialized) - return NULL; /* * fpu__save() takes the CPU's xstate registers * and saves them off to the 'fpu memory buffer. */ fpu__save(fpu); - return get_xsave_addr(&fpu->state.xsave, xsave_state); + return get_xsave_addr(&fpu->state.xsave, xfeature_nr); } #ifdef CONFIG_ARCH_HAS_PKEYS @@ -1016,7 +1012,7 @@ int copy_xstate_to_kernel(void *kbuf, struct xregs_state *xsave, unsigned int of * Copy only in-use xstates: */ if ((header.xfeatures >> i) & 1) { - void *src = __raw_xsave_addr(xsave, 1 << i); + void *src = __raw_xsave_addr(xsave, i); offset = xstate_offsets[i]; size = xstate_sizes[i]; @@ -1102,7 +1098,7 @@ int copy_xstate_to_user(void __user *ubuf, struct xregs_state *xsave, unsigned i * Copy only in-use xstates: */ if ((header.xfeatures >> i) & 1) { - void *src = __raw_xsave_addr(xsave, 1 << i); + void *src = __raw_xsave_addr(xsave, i); offset = xstate_offsets[i]; size = xstate_sizes[i]; @@ -1159,7 +1155,7 @@ int copy_kernel_to_xstate(struct xregs_state *xsave, const void *kbuf) u64 mask = ((u64)1 << i); if (hdr.xfeatures & mask) { - void *dst = __raw_xsave_addr(xsave, 1 << i); + void *dst = __raw_xsave_addr(xsave, i); offset = xstate_offsets[i]; size = xstate_sizes[i]; @@ -1213,7 +1209,7 @@ int copy_user_to_xstate(struct xregs_state *xsave, const void __user *ubuf) u64 mask = ((u64)1 << i); if (hdr.xfeatures & mask) { - void *dst = __raw_xsave_addr(xsave, 1 << i); + void *dst = __raw_xsave_addr(xsave, i); offset = xstate_offsets[i]; size = xstate_sizes[i]; diff --git a/arch/x86/kernel/ima_arch.c b/arch/x86/kernel/ima_arch.c index e47cd9390ab4..85de790583f9 100644 --- a/arch/x86/kernel/ima_arch.c +++ b/arch/x86/kernel/ima_arch.c @@ -3,6 +3,7 @@ * Copyright (C) 2018 IBM Corporation */ #include <linux/efi.h> +#include <linux/module.h> #include <linux/ima.h> extern struct boot_params boot_params; @@ -64,12 +65,19 @@ static const char * const sb_arch_rules[] = { "appraise func=KEXEC_KERNEL_CHECK appraise_type=imasig", #endif /* CONFIG_KEXEC_VERIFY_SIG */ "measure func=KEXEC_KERNEL_CHECK", +#if !IS_ENABLED(CONFIG_MODULE_SIG) + "appraise func=MODULE_CHECK appraise_type=imasig", +#endif + "measure func=MODULE_CHECK", NULL }; const char * const *arch_get_ima_policy(void) { - if (IS_ENABLED(CONFIG_IMA_ARCH_POLICY) && arch_ima_get_secureboot()) + if (IS_ENABLED(CONFIG_IMA_ARCH_POLICY) && arch_ima_get_secureboot()) { + if (IS_ENABLED(CONFIG_MODULE_SIG)) + set_module_sig_enforced(); return sb_arch_rules; + } return NULL; } diff --git a/arch/x86/kernel/nmi.c b/arch/x86/kernel/nmi.c index 3755d0310026..05b09896cfaf 100644 --- a/arch/x86/kernel/nmi.c +++ b/arch/x86/kernel/nmi.c @@ -35,6 +35,7 @@ #include <asm/x86_init.h> #include <asm/reboot.h> #include <asm/cache.h> +#include <asm/nospec-branch.h> #define CREATE_TRACE_POINTS #include <trace/events/nmi.h> @@ -551,6 +552,9 @@ nmi_restart: write_cr2(this_cpu_read(nmi_cr2)); if (this_cpu_dec_return(nmi_state)) goto nmi_restart; + + if (user_mode(regs)) + mds_user_clear_cpu_buffers(); } NOKPROBE_SYMBOL(do_nmi); diff --git a/arch/x86/kernel/pci-dma.c b/arch/x86/kernel/pci-dma.c index d460998ae828..dcd272dbd0a9 100644 --- a/arch/x86/kernel/pci-dma.c +++ b/arch/x86/kernel/pci-dma.c @@ -51,14 +51,6 @@ int iommu_pass_through __read_mostly; extern struct iommu_table_entry __iommu_table[], __iommu_table_end[]; -/* Dummy device used for NULL arguments (normally ISA). */ -struct device x86_dma_fallback_dev = { - .init_name = "fallback device", - .coherent_dma_mask = ISA_DMA_BIT_MASK, - .dma_mask = &x86_dma_fallback_dev.coherent_dma_mask, -}; -EXPORT_SYMBOL(x86_dma_fallback_dev); - void __init pci_iommu_alloc(void) { struct iommu_table_entry *p; @@ -77,18 +69,6 @@ void __init pci_iommu_alloc(void) } } -bool arch_dma_alloc_attrs(struct device **dev) -{ - if (!*dev) - *dev = &x86_dma_fallback_dev; - - if (!is_device_dma_capable(*dev)) - return false; - return true; - -} -EXPORT_SYMBOL(arch_dma_alloc_attrs); - /* * See <Documentation/x86/x86_64/boot-options.txt> for the iommu kernel * parameter documentation. diff --git a/arch/x86/kernel/process.c b/arch/x86/kernel/process.c index d1d312d012a6..75fea0d48c0e 100644 --- a/arch/x86/kernel/process.c +++ b/arch/x86/kernel/process.c @@ -101,7 +101,7 @@ int arch_dup_task_struct(struct task_struct *dst, struct task_struct *src) dst->thread.vm86 = NULL; #endif - return fpu__copy(&dst->thread.fpu, &src->thread.fpu); + return fpu__copy(dst, src); } /* diff --git a/arch/x86/kernel/process_32.c b/arch/x86/kernel/process_32.c index 70933193878c..2399e910d109 100644 --- a/arch/x86/kernel/process_32.c +++ b/arch/x86/kernel/process_32.c @@ -241,7 +241,8 @@ __switch_to(struct task_struct *prev_p, struct task_struct *next_p) /* never put a printk in __switch_to... printk() calls wake_up*() indirectly */ - switch_fpu_prepare(prev_fpu, cpu); + if (!test_thread_flag(TIF_NEED_FPU_LOAD)) + switch_fpu_prepare(prev_fpu, cpu); /* * Save away %gs. No need to save %fs, as it was saved on the @@ -274,9 +275,7 @@ __switch_to(struct task_struct *prev_p, struct task_struct *next_p) /* * Leave lazy mode, flushing any hypercalls made here. * This must be done before restoring TLS segments so - * the GDT and LDT are properly updated, and must be - * done before fpu__restore(), so the TS bit is up - * to date. + * the GDT and LDT are properly updated. */ arch_end_context_switch(next_p); @@ -297,10 +296,10 @@ __switch_to(struct task_struct *prev_p, struct task_struct *next_p) if (prev->gs | next->gs) lazy_load_gs(next->gs); - switch_fpu_finish(next_fpu, cpu); - this_cpu_write(current_task, next_p); + switch_fpu_finish(next_fpu); + /* Load the Intel cache allocation PQR MSR. */ resctrl_sched_in(); diff --git a/arch/x86/kernel/process_64.c b/arch/x86/kernel/process_64.c index 844a28b29967..f8e1af380cdf 100644 --- a/arch/x86/kernel/process_64.c +++ b/arch/x86/kernel/process_64.c @@ -521,7 +521,8 @@ __switch_to(struct task_struct *prev_p, struct task_struct *next_p) WARN_ON_ONCE(IS_ENABLED(CONFIG_DEBUG_ENTRY) && this_cpu_read(irq_count) != -1); - switch_fpu_prepare(prev_fpu, cpu); + if (!test_thread_flag(TIF_NEED_FPU_LOAD)) + switch_fpu_prepare(prev_fpu, cpu); /* We must save %fs and %gs before load_TLS() because * %fs and %gs may be cleared by load_TLS(). @@ -539,9 +540,7 @@ __switch_to(struct task_struct *prev_p, struct task_struct *next_p) /* * Leave lazy mode, flushing any hypercalls made here. This * must be done after loading TLS entries in the GDT but before - * loading segments that might reference them, and and it must - * be done before fpu__restore(), so the TS bit is up to - * date. + * loading segments that might reference them. */ arch_end_context_switch(next_p); @@ -569,14 +568,14 @@ __switch_to(struct task_struct *prev_p, struct task_struct *next_p) x86_fsgsbase_load(prev, next); - switch_fpu_finish(next_fpu, cpu); - /* * Switch the PDA and FPU contexts. */ this_cpu_write(current_task, next_p); this_cpu_write(cpu_current_top_of_stack, task_top_of_stack(next_p)); + switch_fpu_finish(next_fpu); + /* Reload sp0. */ update_task_stack(next_p); diff --git a/arch/x86/kernel/signal.c b/arch/x86/kernel/signal.c index dff90fb6a9af..364813cea647 100644 --- a/arch/x86/kernel/signal.c +++ b/arch/x86/kernel/signal.c @@ -205,7 +205,7 @@ int setup_sigcontext(struct sigcontext __user *sc, void __user *fpstate, put_user_ex(regs->ss, &sc->ss); #endif /* CONFIG_X86_32 */ - put_user_ex(fpstate, &sc->fpstate); + put_user_ex(fpstate, (unsigned long __user *)&sc->fpstate); /* non-iBCS2 extensions.. */ put_user_ex(mask, &sc->oldmask); @@ -245,7 +245,7 @@ get_sigframe(struct k_sigaction *ka, struct pt_regs *regs, size_t frame_size, unsigned long sp = regs->sp; unsigned long buf_fx = 0; int onsigstack = on_sig_stack(sp); - struct fpu *fpu = ¤t->thread.fpu; + int ret; /* redzone */ if (IS_ENABLED(CONFIG_X86_64)) @@ -264,11 +264,9 @@ get_sigframe(struct k_sigaction *ka, struct pt_regs *regs, size_t frame_size, sp = (unsigned long) ka->sa.sa_restorer; } - if (fpu->initialized) { - sp = fpu__alloc_mathframe(sp, IS_ENABLED(CONFIG_X86_32), - &buf_fx, &math_size); - *fpstate = (void __user *)sp; - } + sp = fpu__alloc_mathframe(sp, IS_ENABLED(CONFIG_X86_32), + &buf_fx, &math_size); + *fpstate = (void __user *)sp; sp = align_sigframe(sp - frame_size); @@ -280,8 +278,8 @@ get_sigframe(struct k_sigaction *ka, struct pt_regs *regs, size_t frame_size, return (void __user *)-1L; /* save i387 and extended state */ - if (fpu->initialized && - copy_fpstate_to_sigframe(*fpstate, (void __user *)buf_fx, math_size) < 0) + ret = copy_fpstate_to_sigframe(*fpstate, (void __user *)buf_fx, math_size); + if (ret < 0) return (void __user *)-1L; return (void __user *)sp; @@ -574,7 +572,7 @@ static int x32_setup_rt_frame(struct ksignal *ksig, restorer = NULL; err |= -EFAULT; } - put_user_ex(restorer, &frame->pretcode); + put_user_ex(restorer, (unsigned long __user *)&frame->pretcode); } put_user_catch(err); err |= setup_sigcontext(&frame->uc.uc_mcontext, fpstate, @@ -765,8 +763,7 @@ handle_signal(struct ksignal *ksig, struct pt_regs *regs) /* * Ensure the signal handler starts with the new fpu state. */ - if (fpu->initialized) - fpu__clear(fpu); + fpu__clear(fpu); } signal_setup_done(failed, ksig, stepping); } diff --git a/arch/x86/kernel/traps.c b/arch/x86/kernel/traps.c index d26f9e9c3d83..7de466eb960b 100644 --- a/arch/x86/kernel/traps.c +++ b/arch/x86/kernel/traps.c @@ -58,6 +58,7 @@ #include <asm/alternative.h> #include <asm/fpu/xstate.h> #include <asm/trace/mpx.h> +#include <asm/nospec-branch.h> #include <asm/mpx.h> #include <asm/vm86.h> #include <asm/umip.h> @@ -367,6 +368,13 @@ dotraplinkage void do_double_fault(struct pt_regs *regs, long error_code) regs->ip = (unsigned long)general_protection; regs->sp = (unsigned long)&gpregs->orig_ax; + /* + * This situation can be triggered by userspace via + * modify_ldt(2) and the return does not take the regular + * user space exit, so a CPU buffer clear is required when + * MDS mitigation is enabled. + */ + mds_user_clear_cpu_buffers(); return; } #endif @@ -456,7 +464,7 @@ dotraplinkage void do_bounds(struct pt_regs *regs, long error_code) * which is all zeros which indicates MPX was not * responsible for the exception. */ - bndcsr = get_xsave_field_ptr(XFEATURE_MASK_BNDCSR); + bndcsr = get_xsave_field_ptr(XFEATURE_BNDCSR); if (!bndcsr) goto exit_trap; diff --git a/arch/x86/kvm/Kconfig b/arch/x86/kvm/Kconfig index 72fa955f4a15..fc042419e670 100644 --- a/arch/x86/kvm/Kconfig +++ b/arch/x86/kvm/Kconfig @@ -27,7 +27,6 @@ config KVM depends on X86_LOCAL_APIC select PREEMPT_NOTIFIERS select MMU_NOTIFIER - select ANON_INODES select HAVE_KVM_IRQCHIP select HAVE_KVM_IRQFD select IRQ_BYPASS_MANAGER diff --git a/arch/x86/kvm/cpuid.c b/arch/x86/kvm/cpuid.c index fd3951638ae4..bbbe611f0c49 100644 --- a/arch/x86/kvm/cpuid.c +++ b/arch/x86/kvm/cpuid.c @@ -410,7 +410,8 @@ static inline int __do_cpuid_ent(struct kvm_cpuid_entry2 *entry, u32 function, /* cpuid 7.0.edx*/ const u32 kvm_cpuid_7_0_edx_x86_features = F(AVX512_4VNNIW) | F(AVX512_4FMAPS) | F(SPEC_CTRL) | - F(SPEC_CTRL_SSBD) | F(ARCH_CAPABILITIES) | F(INTEL_STIBP); + F(SPEC_CTRL_SSBD) | F(ARCH_CAPABILITIES) | F(INTEL_STIBP) | + F(MD_CLEAR); /* all calls to cpuid_count() should be made on the same cpu */ get_cpu(); diff --git a/arch/x86/kvm/paging_tmpl.h b/arch/x86/kvm/paging_tmpl.h index 6bdca39829bc..08715034e315 100644 --- a/arch/x86/kvm/paging_tmpl.h +++ b/arch/x86/kvm/paging_tmpl.h @@ -140,7 +140,7 @@ static int FNAME(cmpxchg_gpte)(struct kvm_vcpu *vcpu, struct kvm_mmu *mmu, pt_element_t *table; struct page *page; - npages = get_user_pages_fast((unsigned long)ptep_user, 1, 1, &page); + npages = get_user_pages_fast((unsigned long)ptep_user, 1, FOLL_WRITE, &page); /* Check if the user is doing something meaningless. */ if (unlikely(npages != 1)) return -EFAULT; diff --git a/arch/x86/kvm/svm.c b/arch/x86/kvm/svm.c index 406b558abfef..6b92eaf4a3b1 100644 --- a/arch/x86/kvm/svm.c +++ b/arch/x86/kvm/svm.c @@ -1805,7 +1805,7 @@ static struct page **sev_pin_memory(struct kvm *kvm, unsigned long uaddr, return NULL; /* Pin the user virtual address. */ - npinned = get_user_pages_fast(uaddr, npages, write ? FOLL_WRITE : 0, pages); + npinned = get_user_pages_fast(uaddr, npages, FOLL_WRITE, pages); if (npinned != npages) { pr_err("SEV: Failure locking %lu pages.\n", npages); goto err; diff --git a/arch/x86/kvm/vmx/vmx.c b/arch/x86/kvm/vmx/vmx.c index 0c955bb286ff..e1fa935a545f 100644 --- a/arch/x86/kvm/vmx/vmx.c +++ b/arch/x86/kvm/vmx/vmx.c @@ -6431,8 +6431,11 @@ static void vmx_vcpu_run(struct kvm_vcpu *vcpu) */ x86_spec_ctrl_set_guest(vmx->spec_ctrl, 0); + /* L1D Flush includes CPU buffer clear to mitigate MDS */ if (static_branch_unlikely(&vmx_l1d_should_flush)) vmx_l1d_flush(vcpu); + else if (static_branch_unlikely(&mds_user_clear)) + mds_clear_cpu_buffers(); if (vcpu->arch.cr2 != read_cr2()) write_cr2(vcpu->arch.cr2); @@ -6500,7 +6503,7 @@ static void vmx_vcpu_run(struct kvm_vcpu *vcpu) */ if (static_cpu_has(X86_FEATURE_PKU) && kvm_read_cr4_bits(vcpu, X86_CR4_PKE)) { - vcpu->arch.pkru = __read_pkru(); + vcpu->arch.pkru = rdpkru(); if (vcpu->arch.pkru != vmx->host_pkru) __write_pkru(vmx->host_pkru); } @@ -6668,8 +6671,8 @@ free_partial_vcpu: return ERR_PTR(err); } -#define L1TF_MSG_SMT "L1TF CPU bug present and SMT on, data leak possible. See CVE-2018-3646 and https://www.kernel.org/doc/html/latest/admin-guide/l1tf.html for details.\n" -#define L1TF_MSG_L1D "L1TF CPU bug present and virtualization mitigation disabled, data leak possible. See CVE-2018-3646 and https://www.kernel.org/doc/html/latest/admin-guide/l1tf.html for details.\n" +#define L1TF_MSG_SMT "L1TF CPU bug present and SMT on, data leak possible. See CVE-2018-3646 and https://www.kernel.org/doc/html/latest/admin-guide/hw-vuln/l1tf.html for details.\n" +#define L1TF_MSG_L1D "L1TF CPU bug present and virtualization mitigation disabled, data leak possible. See CVE-2018-3646 and https://www.kernel.org/doc/html/latest/admin-guide/hw-vuln/l1tf.html for details.\n" static int vmx_vm_init(struct kvm *kvm) { diff --git a/arch/x86/kvm/x86.c b/arch/x86/kvm/x86.c index 22cc90baa67f..b9591abde62a 100644 --- a/arch/x86/kvm/x86.c +++ b/arch/x86/kvm/x86.c @@ -3681,15 +3681,15 @@ static void fill_xsave(u8 *dest, struct kvm_vcpu *vcpu) */ valid = xstate_bv & ~XFEATURE_MASK_FPSSE; while (valid) { - u64 feature = valid & -valid; - int index = fls64(feature) - 1; - void *src = get_xsave_addr(xsave, feature); + u64 xfeature_mask = valid & -valid; + int xfeature_nr = fls64(xfeature_mask) - 1; + void *src = get_xsave_addr(xsave, xfeature_nr); if (src) { u32 size, offset, ecx, edx; - cpuid_count(XSTATE_CPUID, index, + cpuid_count(XSTATE_CPUID, xfeature_nr, &size, &offset, &ecx, &edx); - if (feature == XFEATURE_MASK_PKRU) + if (xfeature_nr == XFEATURE_PKRU) memcpy(dest + offset, &vcpu->arch.pkru, sizeof(vcpu->arch.pkru)); else @@ -3697,7 +3697,7 @@ static void fill_xsave(u8 *dest, struct kvm_vcpu *vcpu) } - valid -= feature; + valid -= xfeature_mask; } } @@ -3724,22 +3724,22 @@ static void load_xsave(struct kvm_vcpu *vcpu, u8 *src) */ valid = xstate_bv & ~XFEATURE_MASK_FPSSE; while (valid) { - u64 feature = valid & -valid; - int index = fls64(feature) - 1; - void *dest = get_xsave_addr(xsave, feature); + u64 xfeature_mask = valid & -valid; + int xfeature_nr = fls64(xfeature_mask) - 1; + void *dest = get_xsave_addr(xsave, xfeature_nr); if (dest) { u32 size, offset, ecx, edx; - cpuid_count(XSTATE_CPUID, index, + cpuid_count(XSTATE_CPUID, xfeature_nr, &size, &offset, &ecx, &edx); - if (feature == XFEATURE_MASK_PKRU) + if (xfeature_nr == XFEATURE_PKRU) memcpy(&vcpu->arch.pkru, src + offset, sizeof(vcpu->arch.pkru)); else memcpy(dest, src + offset, size); } - valid -= feature; + valid -= xfeature_mask; } } @@ -7908,6 +7908,10 @@ static int vcpu_enter_guest(struct kvm_vcpu *vcpu) wait_lapic_expire(vcpu); guest_enter_irqoff(); + fpregs_assert_state_consistent(); + if (test_thread_flag(TIF_NEED_FPU_LOAD)) + switch_fpu_return(); + if (unlikely(vcpu->arch.switch_db_regs)) { set_debugreg(0, 7); set_debugreg(vcpu->arch.eff_db[0], 0); @@ -8166,22 +8170,30 @@ static int complete_emulated_mmio(struct kvm_vcpu *vcpu) /* Swap (qemu) user FPU context for the guest FPU context. */ static void kvm_load_guest_fpu(struct kvm_vcpu *vcpu) { - preempt_disable(); + fpregs_lock(); + copy_fpregs_to_fpstate(¤t->thread.fpu); /* PKRU is separately restored in kvm_x86_ops->run. */ __copy_kernel_to_fpregs(&vcpu->arch.guest_fpu->state, ~XFEATURE_MASK_PKRU); - preempt_enable(); + + fpregs_mark_activate(); + fpregs_unlock(); + trace_kvm_fpu(1); } /* When vcpu_run ends, restore user space FPU context. */ static void kvm_put_guest_fpu(struct kvm_vcpu *vcpu) { - preempt_disable(); + fpregs_lock(); + copy_fpregs_to_fpstate(vcpu->arch.guest_fpu); copy_kernel_to_fpregs(¤t->thread.fpu.state); - preempt_enable(); + + fpregs_mark_activate(); + fpregs_unlock(); + ++vcpu->stat.fpu_reload; trace_kvm_fpu(0); } @@ -8879,11 +8891,11 @@ void kvm_vcpu_reset(struct kvm_vcpu *vcpu, bool init_event) if (init_event) kvm_put_guest_fpu(vcpu); mpx_state_buffer = get_xsave_addr(&vcpu->arch.guest_fpu->state.xsave, - XFEATURE_MASK_BNDREGS); + XFEATURE_BNDREGS); if (mpx_state_buffer) memset(mpx_state_buffer, 0, sizeof(struct mpx_bndreg_state)); mpx_state_buffer = get_xsave_addr(&vcpu->arch.guest_fpu->state.xsave, - XFEATURE_MASK_BNDCSR); + XFEATURE_BNDCSR); if (mpx_state_buffer) memset(mpx_state_buffer, 0, sizeof(struct mpx_bndcsr)); if (init_event) diff --git a/arch/x86/math-emu/fpu_entry.c b/arch/x86/math-emu/fpu_entry.c index 9e2ba7e667f6..a873da6b46d6 100644 --- a/arch/x86/math-emu/fpu_entry.c +++ b/arch/x86/math-emu/fpu_entry.c @@ -113,9 +113,6 @@ void math_emulate(struct math_emu_info *info) unsigned long code_base = 0; unsigned long code_limit = 0; /* Initialized to stop compiler warnings */ struct desc_struct code_descriptor; - struct fpu *fpu = ¤t->thread.fpu; - - fpu__initialize(fpu); #ifdef RE_ENTRANT_CHECKING if (emulating) { diff --git a/arch/x86/mm/extable.c b/arch/x86/mm/extable.c index 3c4568f8fb28..b0a2de8d2f9e 100644 --- a/arch/x86/mm/extable.c +++ b/arch/x86/mm/extable.c @@ -145,7 +145,7 @@ __visible bool ex_handler_rdmsr_unsafe(const struct exception_table_entry *fixup unsigned long error_code, unsigned long fault_addr) { - if (pr_warn_once("unchecked MSR access error: RDMSR from 0x%x at rIP: 0x%lx (%pF)\n", + if (pr_warn_once("unchecked MSR access error: RDMSR from 0x%x at rIP: 0x%lx (%pS)\n", (unsigned int)regs->cx, regs->ip, (void *)regs->ip)) show_stack_regs(regs); @@ -162,7 +162,7 @@ __visible bool ex_handler_wrmsr_unsafe(const struct exception_table_entry *fixup unsigned long error_code, unsigned long fault_addr) { - if (pr_warn_once("unchecked MSR access error: WRMSR to 0x%x (tried to write 0x%08x%08x) at rIP: 0x%lx (%pF)\n", + if (pr_warn_once("unchecked MSR access error: WRMSR to 0x%x (tried to write 0x%08x%08x) at rIP: 0x%lx (%pS)\n", (unsigned int)regs->cx, (unsigned int)regs->dx, (unsigned int)regs->ax, regs->ip, (void *)regs->ip)) show_stack_regs(regs); diff --git a/arch/x86/mm/hugetlbpage.c b/arch/x86/mm/hugetlbpage.c index 92e4c4b85bba..fab095362c50 100644 --- a/arch/x86/mm/hugetlbpage.c +++ b/arch/x86/mm/hugetlbpage.c @@ -203,7 +203,7 @@ static __init int setup_hugepagesz(char *opt) } __setup("hugepagesz=", setup_hugepagesz); -#if (defined(CONFIG_MEMORY_ISOLATION) && defined(CONFIG_COMPACTION)) || defined(CONFIG_CMA) +#ifdef CONFIG_CONTIG_ALLOC static __init int gigantic_pages_init(void) { /* With compaction or CMA we can allocate gigantic pages at runtime */ diff --git a/arch/x86/mm/init_32.c b/arch/x86/mm/init_32.c index 85c94f9a87f8..075e568098f2 100644 --- a/arch/x86/mm/init_32.c +++ b/arch/x86/mm/init_32.c @@ -850,24 +850,25 @@ void __init mem_init(void) } #ifdef CONFIG_MEMORY_HOTPLUG -int arch_add_memory(int nid, u64 start, u64 size, struct vmem_altmap *altmap, - bool want_memblock) +int arch_add_memory(int nid, u64 start, u64 size, + struct mhp_restrictions *restrictions) { unsigned long start_pfn = start >> PAGE_SHIFT; unsigned long nr_pages = size >> PAGE_SHIFT; - return __add_pages(nid, start_pfn, nr_pages, altmap, want_memblock); + return __add_pages(nid, start_pfn, nr_pages, restrictions); } #ifdef CONFIG_MEMORY_HOTREMOVE -int arch_remove_memory(int nid, u64 start, u64 size, struct vmem_altmap *altmap) +void arch_remove_memory(int nid, u64 start, u64 size, + struct vmem_altmap *altmap) { unsigned long start_pfn = start >> PAGE_SHIFT; unsigned long nr_pages = size >> PAGE_SHIFT; struct zone *zone; zone = page_zone(pfn_to_page(start_pfn)); - return __remove_pages(zone, start_pfn, nr_pages, altmap); + __remove_pages(zone, start_pfn, nr_pages, altmap); } #endif #endif diff --git a/arch/x86/mm/init_64.c b/arch/x86/mm/init_64.c index bccff68e3267..20d14254b686 100644 --- a/arch/x86/mm/init_64.c +++ b/arch/x86/mm/init_64.c @@ -777,11 +777,11 @@ static void update_end_of_memory_vars(u64 start, u64 size) } int add_pages(int nid, unsigned long start_pfn, unsigned long nr_pages, - struct vmem_altmap *altmap, bool want_memblock) + struct mhp_restrictions *restrictions) { int ret; - ret = __add_pages(nid, start_pfn, nr_pages, altmap, want_memblock); + ret = __add_pages(nid, start_pfn, nr_pages, restrictions); WARN_ON_ONCE(ret); /* update max_pfn, max_low_pfn and high_memory */ @@ -791,15 +791,15 @@ int add_pages(int nid, unsigned long start_pfn, unsigned long nr_pages, return ret; } -int arch_add_memory(int nid, u64 start, u64 size, struct vmem_altmap *altmap, - bool want_memblock) +int arch_add_memory(int nid, u64 start, u64 size, + struct mhp_restrictions *restrictions) { unsigned long start_pfn = start >> PAGE_SHIFT; unsigned long nr_pages = size >> PAGE_SHIFT; init_memory_mapping(start, start + size); - return add_pages(nid, start_pfn, nr_pages, altmap, want_memblock); + return add_pages(nid, start_pfn, nr_pages, restrictions); } #define PAGE_INUSE 0xFD @@ -1141,24 +1141,20 @@ kernel_physical_mapping_remove(unsigned long start, unsigned long end) remove_pagetable(start, end, true, NULL); } -int __ref arch_remove_memory(int nid, u64 start, u64 size, - struct vmem_altmap *altmap) +void __ref arch_remove_memory(int nid, u64 start, u64 size, + struct vmem_altmap *altmap) { unsigned long start_pfn = start >> PAGE_SHIFT; unsigned long nr_pages = size >> PAGE_SHIFT; struct page *page = pfn_to_page(start_pfn); struct zone *zone; - int ret; /* With altmap the first mapped page is offset from @start */ if (altmap) page += vmem_altmap_offset(altmap); zone = page_zone(page); - ret = __remove_pages(zone, start_pfn, nr_pages, altmap); - WARN_ON_ONCE(ret); + __remove_pages(zone, start_pfn, nr_pages, altmap); kernel_physical_mapping_remove(start, start + size); - - return ret; } #endif #endif /* CONFIG_MEMORY_HOTPLUG */ diff --git a/arch/x86/mm/mpx.c b/arch/x86/mm/mpx.c index c805db6236b4..59726aaf4671 100644 --- a/arch/x86/mm/mpx.c +++ b/arch/x86/mm/mpx.c @@ -142,7 +142,7 @@ int mpx_fault_info(struct mpx_fault_info *info, struct pt_regs *regs) goto err_out; } /* get bndregs field from current task's xsave area */ - bndregs = get_xsave_field_ptr(XFEATURE_MASK_BNDREGS); + bndregs = get_xsave_field_ptr(XFEATURE_BNDREGS); if (!bndregs) { err = -EINVAL; goto err_out; @@ -190,7 +190,7 @@ static __user void *mpx_get_bounds_dir(void) * The bounds directory pointer is stored in a register * only accessible if we first do an xsave. */ - bndcsr = get_xsave_field_ptr(XFEATURE_MASK_BNDCSR); + bndcsr = get_xsave_field_ptr(XFEATURE_BNDCSR); if (!bndcsr) return MPX_INVALID_BOUNDS_DIR; @@ -376,7 +376,7 @@ static int do_mpx_bt_fault(void) const struct mpx_bndcsr *bndcsr; struct mm_struct *mm = current->mm; - bndcsr = get_xsave_field_ptr(XFEATURE_MASK_BNDCSR); + bndcsr = get_xsave_field_ptr(XFEATURE_BNDCSR); if (!bndcsr) return -EINVAL; /* diff --git a/arch/x86/mm/pkeys.c b/arch/x86/mm/pkeys.c index 047a77f6a10c..1dcfc91c8f0c 100644 --- a/arch/x86/mm/pkeys.c +++ b/arch/x86/mm/pkeys.c @@ -18,6 +18,7 @@ #include <asm/cpufeature.h> /* boot_cpu_has, ... */ #include <asm/mmu_context.h> /* vma_pkey() */ +#include <asm/fpu/internal.h> /* init_fpstate */ int __execute_only_pkey(struct mm_struct *mm) { @@ -39,17 +40,12 @@ int __execute_only_pkey(struct mm_struct *mm) * dance to set PKRU if we do not need to. Check it * first and assume that if the execute-only pkey is * write-disabled that we do not have to set it - * ourselves. We need preempt off so that nobody - * can make fpregs inactive. + * ourselves. */ - preempt_disable(); if (!need_to_set_mm_pkey && - current->thread.fpu.initialized && !__pkru_allows_read(read_pkru(), execute_only_pkey)) { - preempt_enable(); return execute_only_pkey; } - preempt_enable(); /* * Set up PKRU so that it denies access for everything @@ -131,7 +127,6 @@ int __arch_override_mprotect_pkey(struct vm_area_struct *vma, int prot, int pkey * in the process's lifetime will not accidentally get access * to data which is pkey-protected later on. */ -static u32 init_pkru_value = PKRU_AD_KEY( 1) | PKRU_AD_KEY( 2) | PKRU_AD_KEY( 3) | PKRU_AD_KEY( 4) | PKRU_AD_KEY( 5) | PKRU_AD_KEY( 6) | PKRU_AD_KEY( 7) | PKRU_AD_KEY( 8) | PKRU_AD_KEY( 9) | @@ -148,13 +143,6 @@ void copy_init_pkru_to_fpregs(void) { u32 init_pkru_value_snapshot = READ_ONCE(init_pkru_value); /* - * Any write to PKRU takes it out of the XSAVE 'init - * state' which increases context switch cost. Avoid - * writing 0 when PKRU was already 0. - */ - if (!init_pkru_value_snapshot && !read_pkru()) - return; - /* * Override the PKRU state that came from 'init_fpstate' * with the baseline from the process. */ @@ -174,6 +162,7 @@ static ssize_t init_pkru_read_file(struct file *file, char __user *user_buf, static ssize_t init_pkru_write_file(struct file *file, const char __user *user_buf, size_t count, loff_t *ppos) { + struct pkru_state *pk; char buf[32]; ssize_t len; u32 new_init_pkru; @@ -196,6 +185,10 @@ static ssize_t init_pkru_write_file(struct file *file, return -EINVAL; WRITE_ONCE(init_pkru_value, new_init_pkru); + pk = get_xsave_addr(&init_fpstate.xsave, XFEATURE_PKRU); + if (!pk) + return -EINVAL; + pk->pkru = new_init_pkru; return count; } diff --git a/arch/x86/net/bpf_jit_comp32.c b/arch/x86/net/bpf_jit_comp32.c index 0d9cdffce6ac..b29e82f190c7 100644 --- a/arch/x86/net/bpf_jit_comp32.c +++ b/arch/x86/net/bpf_jit_comp32.c @@ -117,6 +117,8 @@ static bool is_simm32(s64 value) #define IA32_JLE 0x7E #define IA32_JG 0x7F +#define COND_JMP_OPCODE_INVALID (0xFF) + /* * Map eBPF registers to IA32 32bit registers or stack scratch space. * @@ -698,19 +700,12 @@ static inline void emit_ia32_neg64(const u8 dst[], bool dstk, u8 **pprog) STACK_VAR(dst_hi)); } - /* xor ecx,ecx */ - EMIT2(0x31, add_2reg(0xC0, IA32_ECX, IA32_ECX)); - /* sub dreg_lo,ecx */ - EMIT2(0x2B, add_2reg(0xC0, dreg_lo, IA32_ECX)); - /* mov dreg_lo,ecx */ - EMIT2(0x89, add_2reg(0xC0, dreg_lo, IA32_ECX)); - - /* xor ecx,ecx */ - EMIT2(0x31, add_2reg(0xC0, IA32_ECX, IA32_ECX)); - /* sbb dreg_hi,ecx */ - EMIT2(0x19, add_2reg(0xC0, dreg_hi, IA32_ECX)); - /* mov dreg_hi,ecx */ - EMIT2(0x89, add_2reg(0xC0, dreg_hi, IA32_ECX)); + /* neg dreg_lo */ + EMIT2(0xF7, add_1reg(0xD8, dreg_lo)); + /* adc dreg_hi,0x0 */ + EMIT3(0x83, add_1reg(0xD0, dreg_hi), 0x00); + /* neg dreg_hi */ + EMIT2(0xF7, add_1reg(0xD8, dreg_hi)); if (dstk) { /* mov dword ptr [ebp+off],dreg_lo */ @@ -1613,6 +1608,75 @@ static inline void emit_push_r64(const u8 src[], u8 **pprog) *pprog = prog; } +static u8 get_cond_jmp_opcode(const u8 op, bool is_cmp_lo) +{ + u8 jmp_cond; + + /* Convert BPF opcode to x86 */ + switch (op) { + case BPF_JEQ: + jmp_cond = IA32_JE; + break; + case BPF_JSET: + case BPF_JNE: + jmp_cond = IA32_JNE; + break; + case BPF_JGT: + /* GT is unsigned '>', JA in x86 */ + jmp_cond = IA32_JA; + break; + case BPF_JLT: + /* LT is unsigned '<', JB in x86 */ + jmp_cond = IA32_JB; + break; + case BPF_JGE: + /* GE is unsigned '>=', JAE in x86 */ + jmp_cond = IA32_JAE; + break; + case BPF_JLE: + /* LE is unsigned '<=', JBE in x86 */ + jmp_cond = IA32_JBE; + break; + case BPF_JSGT: + if (!is_cmp_lo) + /* Signed '>', GT in x86 */ + jmp_cond = IA32_JG; + else + /* GT is unsigned '>', JA in x86 */ + jmp_cond = IA32_JA; + break; + case BPF_JSLT: + if (!is_cmp_lo) + /* Signed '<', LT in x86 */ + jmp_cond = IA32_JL; + else + /* LT is unsigned '<', JB in x86 */ + jmp_cond = IA32_JB; + break; + case BPF_JSGE: + if (!is_cmp_lo) + /* Signed '>=', GE in x86 */ + jmp_cond = IA32_JGE; + else + /* GE is unsigned '>=', JAE in x86 */ + jmp_cond = IA32_JAE; + break; + case BPF_JSLE: + if (!is_cmp_lo) + /* Signed '<=', LE in x86 */ + jmp_cond = IA32_JLE; + else + /* LE is unsigned '<=', JBE in x86 */ + jmp_cond = IA32_JBE; + break; + default: /* to silence GCC warning */ + jmp_cond = COND_JMP_OPCODE_INVALID; + break; + } + + return jmp_cond; +} + static int do_jit(struct bpf_prog *bpf_prog, int *addrs, u8 *image, int oldproglen, struct jit_context *ctx) { @@ -2069,10 +2133,6 @@ static int do_jit(struct bpf_prog *bpf_prog, int *addrs, u8 *image, case BPF_JMP | BPF_JLT | BPF_X: case BPF_JMP | BPF_JGE | BPF_X: case BPF_JMP | BPF_JLE | BPF_X: - case BPF_JMP | BPF_JSGT | BPF_X: - case BPF_JMP | BPF_JSLE | BPF_X: - case BPF_JMP | BPF_JSLT | BPF_X: - case BPF_JMP | BPF_JSGE | BPF_X: case BPF_JMP32 | BPF_JEQ | BPF_X: case BPF_JMP32 | BPF_JNE | BPF_X: case BPF_JMP32 | BPF_JGT | BPF_X: @@ -2118,6 +2178,40 @@ static int do_jit(struct bpf_prog *bpf_prog, int *addrs, u8 *image, EMIT2(0x39, add_2reg(0xC0, dreg_lo, sreg_lo)); goto emit_cond_jmp; } + case BPF_JMP | BPF_JSGT | BPF_X: + case BPF_JMP | BPF_JSLE | BPF_X: + case BPF_JMP | BPF_JSLT | BPF_X: + case BPF_JMP | BPF_JSGE | BPF_X: { + u8 dreg_lo = dstk ? IA32_EAX : dst_lo; + u8 dreg_hi = dstk ? IA32_EDX : dst_hi; + u8 sreg_lo = sstk ? IA32_ECX : src_lo; + u8 sreg_hi = sstk ? IA32_EBX : src_hi; + + if (dstk) { + EMIT3(0x8B, add_2reg(0x40, IA32_EBP, IA32_EAX), + STACK_VAR(dst_lo)); + EMIT3(0x8B, + add_2reg(0x40, IA32_EBP, + IA32_EDX), + STACK_VAR(dst_hi)); + } + + if (sstk) { + EMIT3(0x8B, add_2reg(0x40, IA32_EBP, IA32_ECX), + STACK_VAR(src_lo)); + EMIT3(0x8B, + add_2reg(0x40, IA32_EBP, + IA32_EBX), + STACK_VAR(src_hi)); + } + + /* cmp dreg_hi,sreg_hi */ + EMIT2(0x39, add_2reg(0xC0, dreg_hi, sreg_hi)); + EMIT2(IA32_JNE, 10); + /* cmp dreg_lo,sreg_lo */ + EMIT2(0x39, add_2reg(0xC0, dreg_lo, sreg_lo)); + goto emit_cond_jmp_signed; + } case BPF_JMP | BPF_JSET | BPF_X: case BPF_JMP32 | BPF_JSET | BPF_X: { bool is_jmp64 = BPF_CLASS(insn->code) == BPF_JMP; @@ -2194,10 +2288,6 @@ static int do_jit(struct bpf_prog *bpf_prog, int *addrs, u8 *image, case BPF_JMP | BPF_JLT | BPF_K: case BPF_JMP | BPF_JGE | BPF_K: case BPF_JMP | BPF_JLE | BPF_K: - case BPF_JMP | BPF_JSGT | BPF_K: - case BPF_JMP | BPF_JSLE | BPF_K: - case BPF_JMP | BPF_JSLT | BPF_K: - case BPF_JMP | BPF_JSGE | BPF_K: case BPF_JMP32 | BPF_JEQ | BPF_K: case BPF_JMP32 | BPF_JNE | BPF_K: case BPF_JMP32 | BPF_JGT | BPF_K: @@ -2238,50 +2328,9 @@ static int do_jit(struct bpf_prog *bpf_prog, int *addrs, u8 *image, /* cmp dreg_lo,sreg_lo */ EMIT2(0x39, add_2reg(0xC0, dreg_lo, sreg_lo)); -emit_cond_jmp: /* Convert BPF opcode to x86 */ - switch (BPF_OP(code)) { - case BPF_JEQ: - jmp_cond = IA32_JE; - break; - case BPF_JSET: - case BPF_JNE: - jmp_cond = IA32_JNE; - break; - case BPF_JGT: - /* GT is unsigned '>', JA in x86 */ - jmp_cond = IA32_JA; - break; - case BPF_JLT: - /* LT is unsigned '<', JB in x86 */ - jmp_cond = IA32_JB; - break; - case BPF_JGE: - /* GE is unsigned '>=', JAE in x86 */ - jmp_cond = IA32_JAE; - break; - case BPF_JLE: - /* LE is unsigned '<=', JBE in x86 */ - jmp_cond = IA32_JBE; - break; - case BPF_JSGT: - /* Signed '>', GT in x86 */ - jmp_cond = IA32_JG; - break; - case BPF_JSLT: - /* Signed '<', LT in x86 */ - jmp_cond = IA32_JL; - break; - case BPF_JSGE: - /* Signed '>=', GE in x86 */ - jmp_cond = IA32_JGE; - break; - case BPF_JSLE: - /* Signed '<=', LE in x86 */ - jmp_cond = IA32_JLE; - break; - default: /* to silence GCC warning */ +emit_cond_jmp: jmp_cond = get_cond_jmp_opcode(BPF_OP(code), false); + if (jmp_cond == COND_JMP_OPCODE_INVALID) return -EFAULT; - } jmp_offset = addrs[i + insn->off] - addrs[i]; if (is_imm8(jmp_offset)) { EMIT2(jmp_cond, jmp_offset); @@ -2291,7 +2340,66 @@ emit_cond_jmp: /* Convert BPF opcode to x86 */ pr_err("cond_jmp gen bug %llx\n", jmp_offset); return -EFAULT; } + break; + } + case BPF_JMP | BPF_JSGT | BPF_K: + case BPF_JMP | BPF_JSLE | BPF_K: + case BPF_JMP | BPF_JSLT | BPF_K: + case BPF_JMP | BPF_JSGE | BPF_K: { + u8 dreg_lo = dstk ? IA32_EAX : dst_lo; + u8 dreg_hi = dstk ? IA32_EDX : dst_hi; + u8 sreg_lo = IA32_ECX; + u8 sreg_hi = IA32_EBX; + u32 hi; + if (dstk) { + EMIT3(0x8B, add_2reg(0x40, IA32_EBP, IA32_EAX), + STACK_VAR(dst_lo)); + EMIT3(0x8B, + add_2reg(0x40, IA32_EBP, + IA32_EDX), + STACK_VAR(dst_hi)); + } + + /* mov ecx,imm32 */ + EMIT2_off32(0xC7, add_1reg(0xC0, IA32_ECX), imm32); + hi = imm32 & (1 << 31) ? (u32)~0 : 0; + /* mov ebx,imm32 */ + EMIT2_off32(0xC7, add_1reg(0xC0, IA32_EBX), hi); + /* cmp dreg_hi,sreg_hi */ + EMIT2(0x39, add_2reg(0xC0, dreg_hi, sreg_hi)); + EMIT2(IA32_JNE, 10); + /* cmp dreg_lo,sreg_lo */ + EMIT2(0x39, add_2reg(0xC0, dreg_lo, sreg_lo)); + + /* + * For simplicity of branch offset computation, + * let's use fixed jump coding here. + */ +emit_cond_jmp_signed: /* Check the condition for low 32-bit comparison */ + jmp_cond = get_cond_jmp_opcode(BPF_OP(code), true); + if (jmp_cond == COND_JMP_OPCODE_INVALID) + return -EFAULT; + jmp_offset = addrs[i + insn->off] - addrs[i] + 8; + if (is_simm32(jmp_offset)) { + EMIT2_off32(0x0F, jmp_cond + 0x10, jmp_offset); + } else { + pr_err("cond_jmp gen bug %llx\n", jmp_offset); + return -EFAULT; + } + EMIT2(0xEB, 6); + + /* Check the condition for high 32-bit comparison */ + jmp_cond = get_cond_jmp_opcode(BPF_OP(code), false); + if (jmp_cond == COND_JMP_OPCODE_INVALID) + return -EFAULT; + jmp_offset = addrs[i + insn->off] - addrs[i]; + if (is_simm32(jmp_offset)) { + EMIT2_off32(0x0F, jmp_cond + 0x10, jmp_offset); + } else { + pr_err("cond_jmp gen bug %llx\n", jmp_offset); + return -EFAULT; + } break; } case BPF_JMP | BPF_JA: diff --git a/arch/x86/pci/irq.c b/arch/x86/pci/irq.c index 52e55108404e..d3a73f9335e1 100644 --- a/arch/x86/pci/irq.c +++ b/arch/x86/pci/irq.c @@ -1119,6 +1119,8 @@ static const struct dmi_system_id pciirq_dmi_table[] __initconst = { void __init pcibios_irq_init(void) { + struct irq_routing_table *rtable = NULL; + DBG(KERN_DEBUG "PCI: IRQ init\n"); if (raw_pci_ops == NULL) @@ -1129,8 +1131,10 @@ void __init pcibios_irq_init(void) pirq_table = pirq_find_routing_table(); #ifdef CONFIG_PCI_BIOS - if (!pirq_table && (pci_probe & PCI_BIOS_IRQ_SCAN)) + if (!pirq_table && (pci_probe & PCI_BIOS_IRQ_SCAN)) { pirq_table = pcibios_get_irq_routing_table(); + rtable = pirq_table; + } #endif if (pirq_table) { pirq_peer_trick(); @@ -1145,8 +1149,10 @@ void __init pcibios_irq_init(void) * If we're using the I/O APIC, avoid using the PCI IRQ * routing table */ - if (io_apic_assign_pci_irqs) + if (io_apic_assign_pci_irqs) { + kfree(rtable); pirq_table = NULL; + } } x86_init.pci.fixup_irqs(); diff --git a/arch/x86/power/hibernate.c b/arch/x86/power/hibernate.c index bcddf09b5aa3..4845b8c7be7f 100644 --- a/arch/x86/power/hibernate.c +++ b/arch/x86/power/hibernate.c @@ -90,7 +90,6 @@ static int get_e820_md5(struct e820_table *table, void *buf) } desc->tfm = tfm; - desc->flags = 0; size = offsetof(struct e820_table, entries) + sizeof(struct e820_entry) * table->nr_entries; diff --git a/arch/x86/um/asm/syscall.h b/arch/x86/um/asm/syscall.h index ef898af102d1..56a2f0913e3c 100644 --- a/arch/x86/um/asm/syscall.h +++ b/arch/x86/um/asm/syscall.h @@ -9,7 +9,7 @@ typedef asmlinkage long (*sys_call_ptr_t)(unsigned long, unsigned long, unsigned long, unsigned long, unsigned long, unsigned long); -static inline int syscall_get_arch(void) +static inline int syscall_get_arch(struct task_struct *task) { #ifdef CONFIG_X86_32 return AUDIT_ARCH_I386; diff --git a/arch/x86/xen/multicalls.c b/arch/x86/xen/multicalls.c index 0766a08bdf45..07054572297f 100644 --- a/arch/x86/xen/multicalls.c +++ b/arch/x86/xen/multicalls.c @@ -105,7 +105,7 @@ void xen_mc_flush(void) for (i = 0; i < b->mcidx; i++) { if (b->entries[i].result < 0) { #if MC_DEBUG - pr_err(" call %2d: op=%lu arg=[%lx] result=%ld\t%pF\n", + pr_err(" call %2d: op=%lu arg=[%lx] result=%ld\t%pS\n", i + 1, b->debug[i].op, b->debug[i].args[0], |