From 18c3933c1983157df9dc06d4f3bfc764f161ed7a Mon Sep 17 00:00:00 2001 From: Brijesh Singh Date: Wed, 10 Nov 2021 16:06:50 -0600 Subject: x86/sev: Shorten GHCB terminate macro names Shorten macro names for improved readability. Suggested-by: Borislav Petkov Signed-off-by: Brijesh Singh Signed-off-by: Borislav Petkov Reviewed-by: Venu Busireddy Link: https://lkml.kernel.org/r/20211110220731.2396491-5-brijesh.singh@amd.com --- arch/x86/boot/compressed/sev.c | 6 +++--- arch/x86/include/asm/sev-common.h | 4 ++-- arch/x86/kernel/sev-shared.c | 2 +- arch/x86/kernel/sev.c | 4 ++-- 4 files changed, 8 insertions(+), 8 deletions(-) diff --git a/arch/x86/boot/compressed/sev.c b/arch/x86/boot/compressed/sev.c index 670e998fe930..28bcf04c022e 100644 --- a/arch/x86/boot/compressed/sev.c +++ b/arch/x86/boot/compressed/sev.c @@ -122,7 +122,7 @@ static enum es_result vc_read_mem(struct es_em_ctxt *ctxt, static bool early_setup_sev_es(void) { if (!sev_es_negotiate_protocol()) - sev_es_terminate(GHCB_SEV_ES_REASON_PROTOCOL_UNSUPPORTED); + sev_es_terminate(GHCB_SEV_ES_PROT_UNSUPPORTED); if (set_page_decrypted((unsigned long)&boot_ghcb_page)) return false; @@ -175,7 +175,7 @@ void do_boot_stage2_vc(struct pt_regs *regs, unsigned long exit_code) enum es_result result; if (!boot_ghcb && !early_setup_sev_es()) - sev_es_terminate(GHCB_SEV_ES_REASON_GENERAL_REQUEST); + sev_es_terminate(GHCB_SEV_ES_GEN_REQ); vc_ghcb_invalidate(boot_ghcb); result = vc_init_em_ctxt(&ctxt, regs, exit_code); @@ -202,5 +202,5 @@ finish: if (result == ES_OK) vc_finish_insn(&ctxt); else if (result != ES_RETRY) - sev_es_terminate(GHCB_SEV_ES_REASON_GENERAL_REQUEST); + sev_es_terminate(GHCB_SEV_ES_GEN_REQ); } diff --git a/arch/x86/include/asm/sev-common.h b/arch/x86/include/asm/sev-common.h index 2cef6c5a52c2..855b0ec9c4e8 100644 --- a/arch/x86/include/asm/sev-common.h +++ b/arch/x86/include/asm/sev-common.h @@ -68,8 +68,8 @@ (((((u64)reason_set) & GHCB_MSR_TERM_REASON_SET_MASK) << GHCB_MSR_TERM_REASON_SET_POS) | \ ((((u64)reason_val) & GHCB_MSR_TERM_REASON_MASK) << GHCB_MSR_TERM_REASON_POS)) -#define GHCB_SEV_ES_REASON_GENERAL_REQUEST 0 -#define GHCB_SEV_ES_REASON_PROTOCOL_UNSUPPORTED 1 +#define GHCB_SEV_ES_GEN_REQ 0 +#define GHCB_SEV_ES_PROT_UNSUPPORTED 1 #define GHCB_RESP_CODE(v) ((v) & GHCB_MSR_INFO_MASK) diff --git a/arch/x86/kernel/sev-shared.c b/arch/x86/kernel/sev-shared.c index 787dc5f568b5..ce987688bbc0 100644 --- a/arch/x86/kernel/sev-shared.c +++ b/arch/x86/kernel/sev-shared.c @@ -221,7 +221,7 @@ void __init do_vc_no_ghcb(struct pt_regs *regs, unsigned long exit_code) fail: /* Terminate the guest */ - sev_es_terminate(GHCB_SEV_ES_REASON_GENERAL_REQUEST); + sev_es_terminate(GHCB_SEV_ES_GEN_REQ); } static enum es_result vc_insn_string_read(struct es_em_ctxt *ctxt, diff --git a/arch/x86/kernel/sev.c b/arch/x86/kernel/sev.c index 74f0ec955384..0a6c82e060e0 100644 --- a/arch/x86/kernel/sev.c +++ b/arch/x86/kernel/sev.c @@ -1411,7 +1411,7 @@ DEFINE_IDTENTRY_VC_KERNEL(exc_vmm_communication) show_regs(regs); /* Ask hypervisor to sev_es_terminate */ - sev_es_terminate(GHCB_SEV_ES_REASON_GENERAL_REQUEST); + sev_es_terminate(GHCB_SEV_ES_GEN_REQ); /* If that fails and we get here - just panic */ panic("Returned from Terminate-Request to Hypervisor\n"); @@ -1459,7 +1459,7 @@ bool __init handle_vc_boot_ghcb(struct pt_regs *regs) /* Do initial setup or terminate the guest */ if (unlikely(boot_ghcb == NULL && !sev_es_setup_ghcb())) - sev_es_terminate(GHCB_SEV_ES_REASON_GENERAL_REQUEST); + sev_es_terminate(GHCB_SEV_ES_GEN_REQ); vc_ghcb_invalidate(boot_ghcb); -- cgit v1.2.3-70-g09d2 From dbc4c70e3cdfe204a67dd66bed78709ee3000ec0 Mon Sep 17 00:00:00 2001 From: Borislav Petkov Date: Wed, 10 Nov 2021 16:06:51 -0600 Subject: x86/sev: Get rid of excessive use of defines Remove all the defines of masks and bit positions for the GHCB MSR protocol and use comments instead which correspond directly to the spec so that following those can be a lot easier and straightforward with the spec opened in parallel to the code. Aligh vertically while at it. No functional changes. Signed-off-by: Borislav Petkov Signed-off-by: Brijesh Singh Signed-off-by: Borislav Petkov Link: https://lkml.kernel.org/r/20211110220731.2396491-6-brijesh.singh@amd.com --- arch/x86/include/asm/sev-common.h | 51 +++++++++++++++++++++------------------ 1 file changed, 28 insertions(+), 23 deletions(-) diff --git a/arch/x86/include/asm/sev-common.h b/arch/x86/include/asm/sev-common.h index 855b0ec9c4e8..aac44c3f839c 100644 --- a/arch/x86/include/asm/sev-common.h +++ b/arch/x86/include/asm/sev-common.h @@ -18,20 +18,19 @@ /* SEV Information Request/Response */ #define GHCB_MSR_SEV_INFO_RESP 0x001 #define GHCB_MSR_SEV_INFO_REQ 0x002 -#define GHCB_MSR_VER_MAX_POS 48 -#define GHCB_MSR_VER_MAX_MASK 0xffff -#define GHCB_MSR_VER_MIN_POS 32 -#define GHCB_MSR_VER_MIN_MASK 0xffff -#define GHCB_MSR_CBIT_POS 24 -#define GHCB_MSR_CBIT_MASK 0xff -#define GHCB_MSR_SEV_INFO(_max, _min, _cbit) \ - ((((_max) & GHCB_MSR_VER_MAX_MASK) << GHCB_MSR_VER_MAX_POS) | \ - (((_min) & GHCB_MSR_VER_MIN_MASK) << GHCB_MSR_VER_MIN_POS) | \ - (((_cbit) & GHCB_MSR_CBIT_MASK) << GHCB_MSR_CBIT_POS) | \ + +#define GHCB_MSR_SEV_INFO(_max, _min, _cbit) \ + /* GHCBData[63:48] */ \ + ((((_max) & 0xffff) << 48) | \ + /* GHCBData[47:32] */ \ + (((_min) & 0xffff) << 32) | \ + /* GHCBData[31:24] */ \ + (((_cbit) & 0xff) << 24) | \ GHCB_MSR_SEV_INFO_RESP) + #define GHCB_MSR_INFO(v) ((v) & 0xfffUL) -#define GHCB_MSR_PROTO_MAX(v) (((v) >> GHCB_MSR_VER_MAX_POS) & GHCB_MSR_VER_MAX_MASK) -#define GHCB_MSR_PROTO_MIN(v) (((v) >> GHCB_MSR_VER_MIN_POS) & GHCB_MSR_VER_MIN_MASK) +#define GHCB_MSR_PROTO_MAX(v) (((v) >> 48) & 0xffff) +#define GHCB_MSR_PROTO_MIN(v) (((v) >> 32) & 0xffff) /* CPUID Request/Response */ #define GHCB_MSR_CPUID_REQ 0x004 @@ -46,27 +45,33 @@ #define GHCB_CPUID_REQ_EBX 1 #define GHCB_CPUID_REQ_ECX 2 #define GHCB_CPUID_REQ_EDX 3 -#define GHCB_CPUID_REQ(fn, reg) \ - (GHCB_MSR_CPUID_REQ | \ - (((unsigned long)reg & GHCB_MSR_CPUID_REG_MASK) << GHCB_MSR_CPUID_REG_POS) | \ - (((unsigned long)fn) << GHCB_MSR_CPUID_FUNC_POS)) +#define GHCB_CPUID_REQ(fn, reg) \ + /* GHCBData[11:0] */ \ + (GHCB_MSR_CPUID_REQ | \ + /* GHCBData[31:12] */ \ + (((unsigned long)(reg) & 0x3) << 30) | \ + /* GHCBData[63:32] */ \ + (((unsigned long)fn) << 32)) /* AP Reset Hold */ -#define GHCB_MSR_AP_RESET_HOLD_REQ 0x006 -#define GHCB_MSR_AP_RESET_HOLD_RESP 0x007 +#define GHCB_MSR_AP_RESET_HOLD_REQ 0x006 +#define GHCB_MSR_AP_RESET_HOLD_RESP 0x007 /* GHCB Hypervisor Feature Request/Response */ -#define GHCB_MSR_HV_FT_REQ 0x080 -#define GHCB_MSR_HV_FT_RESP 0x081 +#define GHCB_MSR_HV_FT_REQ 0x080 +#define GHCB_MSR_HV_FT_RESP 0x081 #define GHCB_MSR_TERM_REQ 0x100 #define GHCB_MSR_TERM_REASON_SET_POS 12 #define GHCB_MSR_TERM_REASON_SET_MASK 0xf #define GHCB_MSR_TERM_REASON_POS 16 #define GHCB_MSR_TERM_REASON_MASK 0xff -#define GHCB_SEV_TERM_REASON(reason_set, reason_val) \ - (((((u64)reason_set) & GHCB_MSR_TERM_REASON_SET_MASK) << GHCB_MSR_TERM_REASON_SET_POS) | \ - ((((u64)reason_val) & GHCB_MSR_TERM_REASON_MASK) << GHCB_MSR_TERM_REASON_POS)) + +#define GHCB_SEV_TERM_REASON(reason_set, reason_val) \ + /* GHCBData[15:12] */ \ + (((((u64)reason_set) & 0xf) << 12) | \ + /* GHCBData[23:16] */ \ + ((((u64)reason_val) & 0xff) << 16)) #define GHCB_SEV_ES_GEN_REQ 0 #define GHCB_SEV_ES_PROT_UNSUPPORTED 1 -- cgit v1.2.3-70-g09d2 From 5ed0a99b12aa2dd09afe7ba485145529b89f26e6 Mon Sep 17 00:00:00 2001 From: Borislav Petkov Date: Wed, 10 Nov 2021 16:06:52 -0600 Subject: x86/head64: Carve out the guest encryption postprocessing into a helper Carve it out so that it is abstracted out of the main boot path. All other encrypted guest-relevant processing should be placed in there. No functional changes. Signed-off-by: Borislav Petkov Signed-off-by: Brijesh Singh Signed-off-by: Borislav Petkov Link: https://lkml.kernel.org/r/20211110220731.2396491-7-brijesh.singh@amd.com --- arch/x86/kernel/head64.c | 60 +++++++++++++++++++++++++----------------------- 1 file changed, 31 insertions(+), 29 deletions(-) diff --git a/arch/x86/kernel/head64.c b/arch/x86/kernel/head64.c index fc5371a7e9d1..3be9dd213dad 100644 --- a/arch/x86/kernel/head64.c +++ b/arch/x86/kernel/head64.c @@ -126,6 +126,36 @@ static bool __head check_la57_support(unsigned long physaddr) } #endif +static unsigned long sme_postprocess_startup(struct boot_params *bp, pmdval_t *pmd) +{ + unsigned long vaddr, vaddr_end; + int i; + + /* Encrypt the kernel and related (if SME is active) */ + sme_encrypt_kernel(bp); + + /* + * Clear the memory encryption mask from the .bss..decrypted section. + * The bss section will be memset to zero later in the initialization so + * there is no need to zero it after changing the memory encryption + * attribute. + */ + if (sme_get_me_mask()) { + vaddr = (unsigned long)__start_bss_decrypted; + vaddr_end = (unsigned long)__end_bss_decrypted; + for (; vaddr < vaddr_end; vaddr += PMD_SIZE) { + i = pmd_index(vaddr); + pmd[i] -= sme_get_me_mask(); + } + } + + /* + * Return the SME encryption mask (if SME is active) to be used as a + * modifier for the initial pgdir entry programmed into CR3. + */ + return sme_get_me_mask(); +} + /* Code in __startup_64() can be relocated during execution, but the compiler * doesn't have to generate PC-relative relocations when accessing globals from * that function. Clang actually does not generate them, which leads to @@ -135,7 +165,6 @@ static bool __head check_la57_support(unsigned long physaddr) unsigned long __head __startup_64(unsigned long physaddr, struct boot_params *bp) { - unsigned long vaddr, vaddr_end; unsigned long load_delta, *p; unsigned long pgtable_flags; pgdval_t *pgd; @@ -276,34 +305,7 @@ unsigned long __head __startup_64(unsigned long physaddr, */ *fixup_long(&phys_base, physaddr) += load_delta - sme_get_me_mask(); - /* Encrypt the kernel and related (if SME is active) */ - sme_encrypt_kernel(bp); - - /* - * Clear the memory encryption mask from the .bss..decrypted section. - * The bss section will be memset to zero later in the initialization so - * there is no need to zero it after changing the memory encryption - * attribute. - * - * This is early code, use an open coded check for SME instead of - * using cc_platform_has(). This eliminates worries about removing - * instrumentation or checking boot_cpu_data in the cc_platform_has() - * function. - */ - if (sme_get_me_mask()) { - vaddr = (unsigned long)__start_bss_decrypted; - vaddr_end = (unsigned long)__end_bss_decrypted; - for (; vaddr < vaddr_end; vaddr += PMD_SIZE) { - i = pmd_index(vaddr); - pmd[i] -= sme_get_me_mask(); - } - } - - /* - * Return the SME encryption mask (if SME is active) to be used as a - * modifier for the initial pgdir entry programmed into CR3. - */ - return sme_get_me_mask(); + return sme_postprocess_startup(bp, pmd); } unsigned long __startup_secondary_64(void) -- cgit v1.2.3-70-g09d2 From 75cc9a84c9eb36e436e3fcee5158fe31d1dfd78f Mon Sep 17 00:00:00 2001 From: Borislav Petkov Date: Wed, 10 Nov 2021 16:06:53 -0600 Subject: x86/sev: Remove do_early_exception() forward declarations There's a perfectly fine prototype in the asm/setup.h header. Use it. No functional changes. Signed-off-by: Borislav Petkov Signed-off-by: Brijesh Singh Signed-off-by: Borislav Petkov Link: https://lkml.kernel.org/r/20211110220731.2396491-8-brijesh.singh@amd.com --- arch/x86/kernel/sev.c | 7 +------ 1 file changed, 1 insertion(+), 6 deletions(-) diff --git a/arch/x86/kernel/sev.c b/arch/x86/kernel/sev.c index 0a6c82e060e0..03f9aff9d1f7 100644 --- a/arch/x86/kernel/sev.c +++ b/arch/x86/kernel/sev.c @@ -26,6 +26,7 @@ #include #include #include +#include #include #include #include @@ -86,9 +87,6 @@ struct ghcb_state { static DEFINE_PER_CPU(struct sev_es_runtime_data*, runtime_data); DEFINE_STATIC_KEY_FALSE(sev_es_enable_key); -/* Needed in vc_early_forward_exception */ -void do_early_exception(struct pt_regs *regs, int trapnr); - static __always_inline bool on_vc_stack(struct pt_regs *regs) { unsigned long sp = regs->sp; @@ -209,9 +207,6 @@ static noinstr struct ghcb *__sev_get_ghcb(struct ghcb_state *state) return ghcb; } -/* Needed in vc_early_forward_exception */ -void do_early_exception(struct pt_regs *regs, int trapnr); - static inline u64 sev_es_rd_ghcb_msr(void) { return __rdmsr(MSR_AMD64_SEV_ES_GHCB); -- cgit v1.2.3-70-g09d2 From 8260b9820f7050461b8969305bbd8cb5654f0c74 Mon Sep 17 00:00:00 2001 From: Kuppuswamy Sathyanarayanan Date: Mon, 6 Dec 2021 16:55:03 +0300 Subject: x86/sev: Use CC_ATTR attribute to generalize string I/O unroll INS/OUTS are not supported in TDX guests and cause #UD. Kernel has to avoid them when running in TDX guest. To support existing usage, string I/O operations are unrolled using IN/OUT instructions. AMD SEV platform implements this support by adding unroll logic in ins#bwl()/outs#bwl() macros with SEV-specific checks. Since TDX VM guests will also need similar support, use CC_ATTR_GUEST_UNROLL_STRING_IO and generic cc_platform_has() API to implement it. String I/O helpers were the last users of sev_key_active() interface and sev_enable_key static key. Remove them. [ bp: Move comment too and do not delete it. ] Suggested-by: Tom Lendacky Signed-off-by: Kuppuswamy Sathyanarayanan Signed-off-by: Kirill A. Shutemov Signed-off-by: Borislav Petkov Reviewed-by: Tony Luck Reviewed-by: Tom Lendacky Tested-by: Tom Lendacky Link: https://lkml.kernel.org/r/20211206135505.75045-2-kirill.shutemov@linux.intel.com --- arch/x86/include/asm/io.h | 20 +++----------------- arch/x86/kernel/cc_platform.c | 8 ++++++++ arch/x86/mm/mem_encrypt.c | 10 ---------- include/linux/cc_platform.h | 11 +++++++++++ 4 files changed, 22 insertions(+), 27 deletions(-) diff --git a/arch/x86/include/asm/io.h b/arch/x86/include/asm/io.h index 5c6a4af0b911..f6d91ecb8026 100644 --- a/arch/x86/include/asm/io.h +++ b/arch/x86/include/asm/io.h @@ -40,6 +40,7 @@ #include #include +#include #include #include #include @@ -256,21 +257,6 @@ static inline void slow_down_io(void) #endif -#ifdef CONFIG_AMD_MEM_ENCRYPT -#include - -extern struct static_key_false sev_enable_key; -static inline bool sev_key_active(void) -{ - return static_branch_unlikely(&sev_enable_key); -} - -#else /* !CONFIG_AMD_MEM_ENCRYPT */ - -static inline bool sev_key_active(void) { return false; } - -#endif /* CONFIG_AMD_MEM_ENCRYPT */ - #define BUILDIO(bwl, bw, type) \ static inline void out##bwl(unsigned type value, int port) \ { \ @@ -301,7 +287,7 @@ static inline unsigned type in##bwl##_p(int port) \ \ static inline void outs##bwl(int port, const void *addr, unsigned long count) \ { \ - if (sev_key_active()) { \ + if (cc_platform_has(CC_ATTR_GUEST_UNROLL_STRING_IO)) { \ unsigned type *value = (unsigned type *)addr; \ while (count) { \ out##bwl(*value, port); \ @@ -317,7 +303,7 @@ static inline void outs##bwl(int port, const void *addr, unsigned long count) \ \ static inline void ins##bwl(int port, void *addr, unsigned long count) \ { \ - if (sev_key_active()) { \ + if (cc_platform_has(CC_ATTR_GUEST_UNROLL_STRING_IO)) { \ unsigned type *value = (unsigned type *)addr; \ while (count) { \ *value = in##bwl(port); \ diff --git a/arch/x86/kernel/cc_platform.c b/arch/x86/kernel/cc_platform.c index 03bb2f343ddb..8a25b1c0d480 100644 --- a/arch/x86/kernel/cc_platform.c +++ b/arch/x86/kernel/cc_platform.c @@ -50,6 +50,14 @@ static bool amd_cc_platform_has(enum cc_attr attr) case CC_ATTR_GUEST_STATE_ENCRYPT: return sev_status & MSR_AMD64_SEV_ES_ENABLED; + /* + * With SEV, the rep string I/O instructions need to be unrolled + * but SEV-ES supports them through the #VC handler. + */ + case CC_ATTR_GUEST_UNROLL_STRING_IO: + return (sev_status & MSR_AMD64_SEV_ENABLED) && + !(sev_status & MSR_AMD64_SEV_ES_ENABLED); + default: return false; } diff --git a/arch/x86/mm/mem_encrypt.c b/arch/x86/mm/mem_encrypt.c index 35487305d8af..b520021a7e7b 100644 --- a/arch/x86/mm/mem_encrypt.c +++ b/arch/x86/mm/mem_encrypt.c @@ -43,8 +43,6 @@ u64 sme_me_mask __section(".data") = 0; u64 sev_status __section(".data") = 0; u64 sev_check_data __section(".data") = 0; EXPORT_SYMBOL(sme_me_mask); -DEFINE_STATIC_KEY_FALSE(sev_enable_key); -EXPORT_SYMBOL_GPL(sev_enable_key); /* Buffer used for early in-place encryption by BSP, no locking needed */ static char sme_early_buffer[PAGE_SIZE] __initdata __aligned(PAGE_SIZE); @@ -499,14 +497,6 @@ void __init mem_encrypt_init(void) /* Call into SWIOTLB to update the SWIOTLB DMA buffers */ swiotlb_update_mem_attributes(); - /* - * With SEV, we need to unroll the rep string I/O instructions, - * but SEV-ES supports them through the #VC handler. - */ - if (cc_platform_has(CC_ATTR_GUEST_MEM_ENCRYPT) && - !cc_platform_has(CC_ATTR_GUEST_STATE_ENCRYPT)) - static_branch_enable(&sev_enable_key); - print_mem_encrypt_feature_info(); } diff --git a/include/linux/cc_platform.h b/include/linux/cc_platform.h index a075b70b9a70..efd8205282da 100644 --- a/include/linux/cc_platform.h +++ b/include/linux/cc_platform.h @@ -61,6 +61,17 @@ enum cc_attr { * Examples include SEV-ES. */ CC_ATTR_GUEST_STATE_ENCRYPT, + + /** + * @CC_ATTR_GUEST_UNROLL_STRING_IO: String I/O is implemented with + * IN/OUT instructions + * + * The platform/OS is running as a guest/virtual machine and uses + * IN/OUT instructions in place of string I/O. + * + * Examples include TDX guest & SEV. + */ + CC_ATTR_GUEST_UNROLL_STRING_IO, }; #ifdef CONFIG_ARCH_HAS_CC_PLATFORM -- cgit v1.2.3-70-g09d2 From dbca5e1a04f8b30aea4e2c91e5045ee6e7c3ef43 Mon Sep 17 00:00:00 2001 From: Kuppuswamy Sathyanarayanan Date: Mon, 6 Dec 2021 16:55:04 +0300 Subject: x86/sev: Rename mem_encrypt.c to mem_encrypt_amd.c Both Intel TDX and AMD SEV implement memory encryption features. But the bulk of the code in mem_encrypt.c is AMD-specific. Rename the file to mem_encrypt_amd.c. A subsequent patch will extract the parts that can be shared by both TDX and AMD SEV/SME into a generic file. No functional changes. Signed-off-by: Kuppuswamy Sathyanarayanan Signed-off-by: Kirill A. Shutemov Signed-off-by: Borislav Petkov Reviewed-by: Tony Luck Reviewed-by: Tom Lendacky Tested-by: Tom Lendacky Link: https://lore.kernel.org/r/20211206135505.75045-3-kirill.shutemov@linux.intel.com --- arch/x86/mm/Makefile | 8 +- arch/x86/mm/mem_encrypt.c | 507 ------------------------------------------ arch/x86/mm/mem_encrypt_amd.c | 507 ++++++++++++++++++++++++++++++++++++++++++ 3 files changed, 511 insertions(+), 511 deletions(-) delete mode 100644 arch/x86/mm/mem_encrypt.c create mode 100644 arch/x86/mm/mem_encrypt_amd.c diff --git a/arch/x86/mm/Makefile b/arch/x86/mm/Makefile index 5864219221ca..c9c480641153 100644 --- a/arch/x86/mm/Makefile +++ b/arch/x86/mm/Makefile @@ -1,10 +1,10 @@ # SPDX-License-Identifier: GPL-2.0 # Kernel does not boot with instrumentation of tlb.c and mem_encrypt*.c KCOV_INSTRUMENT_tlb.o := n -KCOV_INSTRUMENT_mem_encrypt.o := n +KCOV_INSTRUMENT_mem_encrypt_amd.o := n KCOV_INSTRUMENT_mem_encrypt_identity.o := n -KASAN_SANITIZE_mem_encrypt.o := n +KASAN_SANITIZE_mem_encrypt_amd.o := n KASAN_SANITIZE_mem_encrypt_identity.o := n # Disable KCSAN entirely, because otherwise we get warnings that some functions @@ -12,7 +12,7 @@ KASAN_SANITIZE_mem_encrypt_identity.o := n KCSAN_SANITIZE := n ifdef CONFIG_FUNCTION_TRACER -CFLAGS_REMOVE_mem_encrypt.o = -pg +CFLAGS_REMOVE_mem_encrypt_amd.o = -pg CFLAGS_REMOVE_mem_encrypt_identity.o = -pg endif @@ -52,6 +52,6 @@ obj-$(CONFIG_X86_INTEL_MEMORY_PROTECTION_KEYS) += pkeys.o obj-$(CONFIG_RANDOMIZE_MEMORY) += kaslr.o obj-$(CONFIG_PAGE_TABLE_ISOLATION) += pti.o -obj-$(CONFIG_AMD_MEM_ENCRYPT) += mem_encrypt.o +obj-$(CONFIG_AMD_MEM_ENCRYPT) += mem_encrypt_amd.o obj-$(CONFIG_AMD_MEM_ENCRYPT) += mem_encrypt_identity.o obj-$(CONFIG_AMD_MEM_ENCRYPT) += mem_encrypt_boot.o diff --git a/arch/x86/mm/mem_encrypt.c b/arch/x86/mm/mem_encrypt.c deleted file mode 100644 index b520021a7e7b..000000000000 --- a/arch/x86/mm/mem_encrypt.c +++ /dev/null @@ -1,507 +0,0 @@ -// SPDX-License-Identifier: GPL-2.0-only -/* - * AMD Memory Encryption Support - * - * Copyright (C) 2016 Advanced Micro Devices, Inc. - * - * Author: Tom Lendacky - */ - -#define DISABLE_BRANCH_PROFILING - -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include - -#include -#include -#include -#include -#include -#include -#include -#include -#include - -#include "mm_internal.h" - -/* - * Since SME related variables are set early in the boot process they must - * reside in the .data section so as not to be zeroed out when the .bss - * section is later cleared. - */ -u64 sme_me_mask __section(".data") = 0; -u64 sev_status __section(".data") = 0; -u64 sev_check_data __section(".data") = 0; -EXPORT_SYMBOL(sme_me_mask); - -/* Buffer used for early in-place encryption by BSP, no locking needed */ -static char sme_early_buffer[PAGE_SIZE] __initdata __aligned(PAGE_SIZE); - -/* - * This routine does not change the underlying encryption setting of the - * page(s) that map this memory. It assumes that eventually the memory is - * meant to be accessed as either encrypted or decrypted but the contents - * are currently not in the desired state. - * - * This routine follows the steps outlined in the AMD64 Architecture - * Programmer's Manual Volume 2, Section 7.10.8 Encrypt-in-Place. - */ -static void __init __sme_early_enc_dec(resource_size_t paddr, - unsigned long size, bool enc) -{ - void *src, *dst; - size_t len; - - if (!sme_me_mask) - return; - - wbinvd(); - - /* - * There are limited number of early mapping slots, so map (at most) - * one page at time. - */ - while (size) { - len = min_t(size_t, sizeof(sme_early_buffer), size); - - /* - * Create mappings for the current and desired format of - * the memory. Use a write-protected mapping for the source. - */ - src = enc ? early_memremap_decrypted_wp(paddr, len) : - early_memremap_encrypted_wp(paddr, len); - - dst = enc ? early_memremap_encrypted(paddr, len) : - early_memremap_decrypted(paddr, len); - - /* - * If a mapping can't be obtained to perform the operation, - * then eventual access of that area in the desired mode - * will cause a crash. - */ - BUG_ON(!src || !dst); - - /* - * Use a temporary buffer, of cache-line multiple size, to - * avoid data corruption as documented in the APM. - */ - memcpy(sme_early_buffer, src, len); - memcpy(dst, sme_early_buffer, len); - - early_memunmap(dst, len); - early_memunmap(src, len); - - paddr += len; - size -= len; - } -} - -void __init sme_early_encrypt(resource_size_t paddr, unsigned long size) -{ - __sme_early_enc_dec(paddr, size, true); -} - -void __init sme_early_decrypt(resource_size_t paddr, unsigned long size) -{ - __sme_early_enc_dec(paddr, size, false); -} - -static void __init __sme_early_map_unmap_mem(void *vaddr, unsigned long size, - bool map) -{ - unsigned long paddr = (unsigned long)vaddr - __PAGE_OFFSET; - pmdval_t pmd_flags, pmd; - - /* Use early_pmd_flags but remove the encryption mask */ - pmd_flags = __sme_clr(early_pmd_flags); - - do { - pmd = map ? (paddr & PMD_MASK) + pmd_flags : 0; - __early_make_pgtable((unsigned long)vaddr, pmd); - - vaddr += PMD_SIZE; - paddr += PMD_SIZE; - size = (size <= PMD_SIZE) ? 0 : size - PMD_SIZE; - } while (size); - - flush_tlb_local(); -} - -void __init sme_unmap_bootdata(char *real_mode_data) -{ - struct boot_params *boot_data; - unsigned long cmdline_paddr; - - if (!cc_platform_has(CC_ATTR_HOST_MEM_ENCRYPT)) - return; - - /* Get the command line address before unmapping the real_mode_data */ - boot_data = (struct boot_params *)real_mode_data; - cmdline_paddr = boot_data->hdr.cmd_line_ptr | ((u64)boot_data->ext_cmd_line_ptr << 32); - - __sme_early_map_unmap_mem(real_mode_data, sizeof(boot_params), false); - - if (!cmdline_paddr) - return; - - __sme_early_map_unmap_mem(__va(cmdline_paddr), COMMAND_LINE_SIZE, false); -} - -void __init sme_map_bootdata(char *real_mode_data) -{ - struct boot_params *boot_data; - unsigned long cmdline_paddr; - - if (!cc_platform_has(CC_ATTR_HOST_MEM_ENCRYPT)) - return; - - __sme_early_map_unmap_mem(real_mode_data, sizeof(boot_params), true); - - /* Get the command line address after mapping the real_mode_data */ - boot_data = (struct boot_params *)real_mode_data; - cmdline_paddr = boot_data->hdr.cmd_line_ptr | ((u64)boot_data->ext_cmd_line_ptr << 32); - - if (!cmdline_paddr) - return; - - __sme_early_map_unmap_mem(__va(cmdline_paddr), COMMAND_LINE_SIZE, true); -} - -void __init sme_early_init(void) -{ - unsigned int i; - - if (!sme_me_mask) - return; - - early_pmd_flags = __sme_set(early_pmd_flags); - - __supported_pte_mask = __sme_set(__supported_pte_mask); - - /* Update the protection map with memory encryption mask */ - for (i = 0; i < ARRAY_SIZE(protection_map); i++) - protection_map[i] = pgprot_encrypted(protection_map[i]); - - if (cc_platform_has(CC_ATTR_GUEST_MEM_ENCRYPT)) - swiotlb_force = SWIOTLB_FORCE; -} - -void __init sev_setup_arch(void) -{ - phys_addr_t total_mem = memblock_phys_mem_size(); - unsigned long size; - - if (!cc_platform_has(CC_ATTR_GUEST_MEM_ENCRYPT)) - return; - - /* - * For SEV, all DMA has to occur via shared/unencrypted pages. - * SEV uses SWIOTLB to make this happen without changing device - * drivers. However, depending on the workload being run, the - * default 64MB of SWIOTLB may not be enough and SWIOTLB may - * run out of buffers for DMA, resulting in I/O errors and/or - * performance degradation especially with high I/O workloads. - * - * Adjust the default size of SWIOTLB for SEV guests using - * a percentage of guest memory for SWIOTLB buffers. - * Also, as the SWIOTLB bounce buffer memory is allocated - * from low memory, ensure that the adjusted size is within - * the limits of low available memory. - * - * The percentage of guest memory used here for SWIOTLB buffers - * is more of an approximation of the static adjustment which - * 64MB for <1G, and ~128M to 256M for 1G-to-4G, i.e., the 6% - */ - size = total_mem * 6 / 100; - size = clamp_val(size, IO_TLB_DEFAULT_SIZE, SZ_1G); - swiotlb_adjust_size(size); -} - -static unsigned long pg_level_to_pfn(int level, pte_t *kpte, pgprot_t *ret_prot) -{ - unsigned long pfn = 0; - pgprot_t prot; - - switch (level) { - case PG_LEVEL_4K: - pfn = pte_pfn(*kpte); - prot = pte_pgprot(*kpte); - break; - case PG_LEVEL_2M: - pfn = pmd_pfn(*(pmd_t *)kpte); - prot = pmd_pgprot(*(pmd_t *)kpte); - break; - case PG_LEVEL_1G: - pfn = pud_pfn(*(pud_t *)kpte); - prot = pud_pgprot(*(pud_t *)kpte); - break; - default: - WARN_ONCE(1, "Invalid level for kpte\n"); - return 0; - } - - if (ret_prot) - *ret_prot = prot; - - return pfn; -} - -void notify_range_enc_status_changed(unsigned long vaddr, int npages, bool enc) -{ -#ifdef CONFIG_PARAVIRT - unsigned long sz = npages << PAGE_SHIFT; - unsigned long vaddr_end = vaddr + sz; - - while (vaddr < vaddr_end) { - int psize, pmask, level; - unsigned long pfn; - pte_t *kpte; - - kpte = lookup_address(vaddr, &level); - if (!kpte || pte_none(*kpte)) { - WARN_ONCE(1, "kpte lookup for vaddr\n"); - return; - } - - pfn = pg_level_to_pfn(level, kpte, NULL); - if (!pfn) - continue; - - psize = page_level_size(level); - pmask = page_level_mask(level); - - notify_page_enc_status_changed(pfn, psize >> PAGE_SHIFT, enc); - - vaddr = (vaddr & pmask) + psize; - } -#endif -} - -static void __init __set_clr_pte_enc(pte_t *kpte, int level, bool enc) -{ - pgprot_t old_prot, new_prot; - unsigned long pfn, pa, size; - pte_t new_pte; - - pfn = pg_level_to_pfn(level, kpte, &old_prot); - if (!pfn) - return; - - new_prot = old_prot; - if (enc) - pgprot_val(new_prot) |= _PAGE_ENC; - else - pgprot_val(new_prot) &= ~_PAGE_ENC; - - /* If prot is same then do nothing. */ - if (pgprot_val(old_prot) == pgprot_val(new_prot)) - return; - - pa = pfn << PAGE_SHIFT; - size = page_level_size(level); - - /* - * We are going to perform in-place en-/decryption and change the - * physical page attribute from C=1 to C=0 or vice versa. Flush the - * caches to ensure that data gets accessed with the correct C-bit. - */ - clflush_cache_range(__va(pa), size); - - /* Encrypt/decrypt the contents in-place */ - if (enc) - sme_early_encrypt(pa, size); - else - sme_early_decrypt(pa, size); - - /* Change the page encryption mask. */ - new_pte = pfn_pte(pfn, new_prot); - set_pte_atomic(kpte, new_pte); -} - -static int __init early_set_memory_enc_dec(unsigned long vaddr, - unsigned long size, bool enc) -{ - unsigned long vaddr_end, vaddr_next, start; - unsigned long psize, pmask; - int split_page_size_mask; - int level, ret; - pte_t *kpte; - - start = vaddr; - vaddr_next = vaddr; - vaddr_end = vaddr + size; - - for (; vaddr < vaddr_end; vaddr = vaddr_next) { - kpte = lookup_address(vaddr, &level); - if (!kpte || pte_none(*kpte)) { - ret = 1; - goto out; - } - - if (level == PG_LEVEL_4K) { - __set_clr_pte_enc(kpte, level, enc); - vaddr_next = (vaddr & PAGE_MASK) + PAGE_SIZE; - continue; - } - - psize = page_level_size(level); - pmask = page_level_mask(level); - - /* - * Check whether we can change the large page in one go. - * We request a split when the address is not aligned and - * the number of pages to set/clear encryption bit is smaller - * than the number of pages in the large page. - */ - if (vaddr == (vaddr & pmask) && - ((vaddr_end - vaddr) >= psize)) { - __set_clr_pte_enc(kpte, level, enc); - vaddr_next = (vaddr & pmask) + psize; - continue; - } - - /* - * The virtual address is part of a larger page, create the next - * level page table mapping (4K or 2M). If it is part of a 2M - * page then we request a split of the large page into 4K - * chunks. A 1GB large page is split into 2M pages, resp. - */ - if (level == PG_LEVEL_2M) - split_page_size_mask = 0; - else - split_page_size_mask = 1 << PG_LEVEL_2M; - - /* - * kernel_physical_mapping_change() does not flush the TLBs, so - * a TLB flush is required after we exit from the for loop. - */ - kernel_physical_mapping_change(__pa(vaddr & pmask), - __pa((vaddr_end & pmask) + psize), - split_page_size_mask); - } - - ret = 0; - - notify_range_enc_status_changed(start, PAGE_ALIGN(size) >> PAGE_SHIFT, enc); -out: - __flush_tlb_all(); - return ret; -} - -int __init early_set_memory_decrypted(unsigned long vaddr, unsigned long size) -{ - return early_set_memory_enc_dec(vaddr, size, false); -} - -int __init early_set_memory_encrypted(unsigned long vaddr, unsigned long size) -{ - return early_set_memory_enc_dec(vaddr, size, true); -} - -void __init early_set_mem_enc_dec_hypercall(unsigned long vaddr, int npages, bool enc) -{ - notify_range_enc_status_changed(vaddr, npages, enc); -} - -/* Override for DMA direct allocation check - ARCH_HAS_FORCE_DMA_UNENCRYPTED */ -bool force_dma_unencrypted(struct device *dev) -{ - /* - * For SEV, all DMA must be to unencrypted addresses. - */ - if (cc_platform_has(CC_ATTR_GUEST_MEM_ENCRYPT)) - return true; - - /* - * For SME, all DMA must be to unencrypted addresses if the - * device does not support DMA to addresses that include the - * encryption mask. - */ - if (cc_platform_has(CC_ATTR_HOST_MEM_ENCRYPT)) { - u64 dma_enc_mask = DMA_BIT_MASK(__ffs64(sme_me_mask)); - u64 dma_dev_mask = min_not_zero(dev->coherent_dma_mask, - dev->bus_dma_limit); - - if (dma_dev_mask <= dma_enc_mask) - return true; - } - - return false; -} - -void __init mem_encrypt_free_decrypted_mem(void) -{ - unsigned long vaddr, vaddr_end, npages; - int r; - - vaddr = (unsigned long)__start_bss_decrypted_unused; - vaddr_end = (unsigned long)__end_bss_decrypted; - npages = (vaddr_end - vaddr) >> PAGE_SHIFT; - - /* - * The unused memory range was mapped decrypted, change the encryption - * attribute from decrypted to encrypted before freeing it. - */ - if (cc_platform_has(CC_ATTR_MEM_ENCRYPT)) { - r = set_memory_encrypted(vaddr, npages); - if (r) { - pr_warn("failed to free unused decrypted pages\n"); - return; - } - } - - free_init_pages("unused decrypted", vaddr, vaddr_end); -} - -static void print_mem_encrypt_feature_info(void) -{ - pr_info("AMD Memory Encryption Features active:"); - - /* Secure Memory Encryption */ - if (cc_platform_has(CC_ATTR_HOST_MEM_ENCRYPT)) { - /* - * SME is mutually exclusive with any of the SEV - * features below. - */ - pr_cont(" SME\n"); - return; - } - - /* Secure Encrypted Virtualization */ - if (cc_platform_has(CC_ATTR_GUEST_MEM_ENCRYPT)) - pr_cont(" SEV"); - - /* Encrypted Register State */ - if (cc_platform_has(CC_ATTR_GUEST_STATE_ENCRYPT)) - pr_cont(" SEV-ES"); - - pr_cont("\n"); -} - -/* Architecture __weak replacement functions */ -void __init mem_encrypt_init(void) -{ - if (!sme_me_mask) - return; - - /* Call into SWIOTLB to update the SWIOTLB DMA buffers */ - swiotlb_update_mem_attributes(); - - print_mem_encrypt_feature_info(); -} - -int arch_has_restricted_virtio_memory_access(void) -{ - return cc_platform_has(CC_ATTR_GUEST_MEM_ENCRYPT); -} -EXPORT_SYMBOL_GPL(arch_has_restricted_virtio_memory_access); diff --git a/arch/x86/mm/mem_encrypt_amd.c b/arch/x86/mm/mem_encrypt_amd.c new file mode 100644 index 000000000000..b520021a7e7b --- /dev/null +++ b/arch/x86/mm/mem_encrypt_amd.c @@ -0,0 +1,507 @@ +// SPDX-License-Identifier: GPL-2.0-only +/* + * AMD Memory Encryption Support + * + * Copyright (C) 2016 Advanced Micro Devices, Inc. + * + * Author: Tom Lendacky + */ + +#define DISABLE_BRANCH_PROFILING + +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include + +#include +#include +#include +#include +#include +#include +#include +#include +#include + +#include "mm_internal.h" + +/* + * Since SME related variables are set early in the boot process they must + * reside in the .data section so as not to be zeroed out when the .bss + * section is later cleared. + */ +u64 sme_me_mask __section(".data") = 0; +u64 sev_status __section(".data") = 0; +u64 sev_check_data __section(".data") = 0; +EXPORT_SYMBOL(sme_me_mask); + +/* Buffer used for early in-place encryption by BSP, no locking needed */ +static char sme_early_buffer[PAGE_SIZE] __initdata __aligned(PAGE_SIZE); + +/* + * This routine does not change the underlying encryption setting of the + * page(s) that map this memory. It assumes that eventually the memory is + * meant to be accessed as either encrypted or decrypted but the contents + * are currently not in the desired state. + * + * This routine follows the steps outlined in the AMD64 Architecture + * Programmer's Manual Volume 2, Section 7.10.8 Encrypt-in-Place. + */ +static void __init __sme_early_enc_dec(resource_size_t paddr, + unsigned long size, bool enc) +{ + void *src, *dst; + size_t len; + + if (!sme_me_mask) + return; + + wbinvd(); + + /* + * There are limited number of early mapping slots, so map (at most) + * one page at time. + */ + while (size) { + len = min_t(size_t, sizeof(sme_early_buffer), size); + + /* + * Create mappings for the current and desired format of + * the memory. Use a write-protected mapping for the source. + */ + src = enc ? early_memremap_decrypted_wp(paddr, len) : + early_memremap_encrypted_wp(paddr, len); + + dst = enc ? early_memremap_encrypted(paddr, len) : + early_memremap_decrypted(paddr, len); + + /* + * If a mapping can't be obtained to perform the operation, + * then eventual access of that area in the desired mode + * will cause a crash. + */ + BUG_ON(!src || !dst); + + /* + * Use a temporary buffer, of cache-line multiple size, to + * avoid data corruption as documented in the APM. + */ + memcpy(sme_early_buffer, src, len); + memcpy(dst, sme_early_buffer, len); + + early_memunmap(dst, len); + early_memunmap(src, len); + + paddr += len; + size -= len; + } +} + +void __init sme_early_encrypt(resource_size_t paddr, unsigned long size) +{ + __sme_early_enc_dec(paddr, size, true); +} + +void __init sme_early_decrypt(resource_size_t paddr, unsigned long size) +{ + __sme_early_enc_dec(paddr, size, false); +} + +static void __init __sme_early_map_unmap_mem(void *vaddr, unsigned long size, + bool map) +{ + unsigned long paddr = (unsigned long)vaddr - __PAGE_OFFSET; + pmdval_t pmd_flags, pmd; + + /* Use early_pmd_flags but remove the encryption mask */ + pmd_flags = __sme_clr(early_pmd_flags); + + do { + pmd = map ? (paddr & PMD_MASK) + pmd_flags : 0; + __early_make_pgtable((unsigned long)vaddr, pmd); + + vaddr += PMD_SIZE; + paddr += PMD_SIZE; + size = (size <= PMD_SIZE) ? 0 : size - PMD_SIZE; + } while (size); + + flush_tlb_local(); +} + +void __init sme_unmap_bootdata(char *real_mode_data) +{ + struct boot_params *boot_data; + unsigned long cmdline_paddr; + + if (!cc_platform_has(CC_ATTR_HOST_MEM_ENCRYPT)) + return; + + /* Get the command line address before unmapping the real_mode_data */ + boot_data = (struct boot_params *)real_mode_data; + cmdline_paddr = boot_data->hdr.cmd_line_ptr | ((u64)boot_data->ext_cmd_line_ptr << 32); + + __sme_early_map_unmap_mem(real_mode_data, sizeof(boot_params), false); + + if (!cmdline_paddr) + return; + + __sme_early_map_unmap_mem(__va(cmdline_paddr), COMMAND_LINE_SIZE, false); +} + +void __init sme_map_bootdata(char *real_mode_data) +{ + struct boot_params *boot_data; + unsigned long cmdline_paddr; + + if (!cc_platform_has(CC_ATTR_HOST_MEM_ENCRYPT)) + return; + + __sme_early_map_unmap_mem(real_mode_data, sizeof(boot_params), true); + + /* Get the command line address after mapping the real_mode_data */ + boot_data = (struct boot_params *)real_mode_data; + cmdline_paddr = boot_data->hdr.cmd_line_ptr | ((u64)boot_data->ext_cmd_line_ptr << 32); + + if (!cmdline_paddr) + return; + + __sme_early_map_unmap_mem(__va(cmdline_paddr), COMMAND_LINE_SIZE, true); +} + +void __init sme_early_init(void) +{ + unsigned int i; + + if (!sme_me_mask) + return; + + early_pmd_flags = __sme_set(early_pmd_flags); + + __supported_pte_mask = __sme_set(__supported_pte_mask); + + /* Update the protection map with memory encryption mask */ + for (i = 0; i < ARRAY_SIZE(protection_map); i++) + protection_map[i] = pgprot_encrypted(protection_map[i]); + + if (cc_platform_has(CC_ATTR_GUEST_MEM_ENCRYPT)) + swiotlb_force = SWIOTLB_FORCE; +} + +void __init sev_setup_arch(void) +{ + phys_addr_t total_mem = memblock_phys_mem_size(); + unsigned long size; + + if (!cc_platform_has(CC_ATTR_GUEST_MEM_ENCRYPT)) + return; + + /* + * For SEV, all DMA has to occur via shared/unencrypted pages. + * SEV uses SWIOTLB to make this happen without changing device + * drivers. However, depending on the workload being run, the + * default 64MB of SWIOTLB may not be enough and SWIOTLB may + * run out of buffers for DMA, resulting in I/O errors and/or + * performance degradation especially with high I/O workloads. + * + * Adjust the default size of SWIOTLB for SEV guests using + * a percentage of guest memory for SWIOTLB buffers. + * Also, as the SWIOTLB bounce buffer memory is allocated + * from low memory, ensure that the adjusted size is within + * the limits of low available memory. + * + * The percentage of guest memory used here for SWIOTLB buffers + * is more of an approximation of the static adjustment which + * 64MB for <1G, and ~128M to 256M for 1G-to-4G, i.e., the 6% + */ + size = total_mem * 6 / 100; + size = clamp_val(size, IO_TLB_DEFAULT_SIZE, SZ_1G); + swiotlb_adjust_size(size); +} + +static unsigned long pg_level_to_pfn(int level, pte_t *kpte, pgprot_t *ret_prot) +{ + unsigned long pfn = 0; + pgprot_t prot; + + switch (level) { + case PG_LEVEL_4K: + pfn = pte_pfn(*kpte); + prot = pte_pgprot(*kpte); + break; + case PG_LEVEL_2M: + pfn = pmd_pfn(*(pmd_t *)kpte); + prot = pmd_pgprot(*(pmd_t *)kpte); + break; + case PG_LEVEL_1G: + pfn = pud_pfn(*(pud_t *)kpte); + prot = pud_pgprot(*(pud_t *)kpte); + break; + default: + WARN_ONCE(1, "Invalid level for kpte\n"); + return 0; + } + + if (ret_prot) + *ret_prot = prot; + + return pfn; +} + +void notify_range_enc_status_changed(unsigned long vaddr, int npages, bool enc) +{ +#ifdef CONFIG_PARAVIRT + unsigned long sz = npages << PAGE_SHIFT; + unsigned long vaddr_end = vaddr + sz; + + while (vaddr < vaddr_end) { + int psize, pmask, level; + unsigned long pfn; + pte_t *kpte; + + kpte = lookup_address(vaddr, &level); + if (!kpte || pte_none(*kpte)) { + WARN_ONCE(1, "kpte lookup for vaddr\n"); + return; + } + + pfn = pg_level_to_pfn(level, kpte, NULL); + if (!pfn) + continue; + + psize = page_level_size(level); + pmask = page_level_mask(level); + + notify_page_enc_status_changed(pfn, psize >> PAGE_SHIFT, enc); + + vaddr = (vaddr & pmask) + psize; + } +#endif +} + +static void __init __set_clr_pte_enc(pte_t *kpte, int level, bool enc) +{ + pgprot_t old_prot, new_prot; + unsigned long pfn, pa, size; + pte_t new_pte; + + pfn = pg_level_to_pfn(level, kpte, &old_prot); + if (!pfn) + return; + + new_prot = old_prot; + if (enc) + pgprot_val(new_prot) |= _PAGE_ENC; + else + pgprot_val(new_prot) &= ~_PAGE_ENC; + + /* If prot is same then do nothing. */ + if (pgprot_val(old_prot) == pgprot_val(new_prot)) + return; + + pa = pfn << PAGE_SHIFT; + size = page_level_size(level); + + /* + * We are going to perform in-place en-/decryption and change the + * physical page attribute from C=1 to C=0 or vice versa. Flush the + * caches to ensure that data gets accessed with the correct C-bit. + */ + clflush_cache_range(__va(pa), size); + + /* Encrypt/decrypt the contents in-place */ + if (enc) + sme_early_encrypt(pa, size); + else + sme_early_decrypt(pa, size); + + /* Change the page encryption mask. */ + new_pte = pfn_pte(pfn, new_prot); + set_pte_atomic(kpte, new_pte); +} + +static int __init early_set_memory_enc_dec(unsigned long vaddr, + unsigned long size, bool enc) +{ + unsigned long vaddr_end, vaddr_next, start; + unsigned long psize, pmask; + int split_page_size_mask; + int level, ret; + pte_t *kpte; + + start = vaddr; + vaddr_next = vaddr; + vaddr_end = vaddr + size; + + for (; vaddr < vaddr_end; vaddr = vaddr_next) { + kpte = lookup_address(vaddr, &level); + if (!kpte || pte_none(*kpte)) { + ret = 1; + goto out; + } + + if (level == PG_LEVEL_4K) { + __set_clr_pte_enc(kpte, level, enc); + vaddr_next = (vaddr & PAGE_MASK) + PAGE_SIZE; + continue; + } + + psize = page_level_size(level); + pmask = page_level_mask(level); + + /* + * Check whether we can change the large page in one go. + * We request a split when the address is not aligned and + * the number of pages to set/clear encryption bit is smaller + * than the number of pages in the large page. + */ + if (vaddr == (vaddr & pmask) && + ((vaddr_end - vaddr) >= psize)) { + __set_clr_pte_enc(kpte, level, enc); + vaddr_next = (vaddr & pmask) + psize; + continue; + } + + /* + * The virtual address is part of a larger page, create the next + * level page table mapping (4K or 2M). If it is part of a 2M + * page then we request a split of the large page into 4K + * chunks. A 1GB large page is split into 2M pages, resp. + */ + if (level == PG_LEVEL_2M) + split_page_size_mask = 0; + else + split_page_size_mask = 1 << PG_LEVEL_2M; + + /* + * kernel_physical_mapping_change() does not flush the TLBs, so + * a TLB flush is required after we exit from the for loop. + */ + kernel_physical_mapping_change(__pa(vaddr & pmask), + __pa((vaddr_end & pmask) + psize), + split_page_size_mask); + } + + ret = 0; + + notify_range_enc_status_changed(start, PAGE_ALIGN(size) >> PAGE_SHIFT, enc); +out: + __flush_tlb_all(); + return ret; +} + +int __init early_set_memory_decrypted(unsigned long vaddr, unsigned long size) +{ + return early_set_memory_enc_dec(vaddr, size, false); +} + +int __init early_set_memory_encrypted(unsigned long vaddr, unsigned long size) +{ + return early_set_memory_enc_dec(vaddr, size, true); +} + +void __init early_set_mem_enc_dec_hypercall(unsigned long vaddr, int npages, bool enc) +{ + notify_range_enc_status_changed(vaddr, npages, enc); +} + +/* Override for DMA direct allocation check - ARCH_HAS_FORCE_DMA_UNENCRYPTED */ +bool force_dma_unencrypted(struct device *dev) +{ + /* + * For SEV, all DMA must be to unencrypted addresses. + */ + if (cc_platform_has(CC_ATTR_GUEST_MEM_ENCRYPT)) + return true; + + /* + * For SME, all DMA must be to unencrypted addresses if the + * device does not support DMA to addresses that include the + * encryption mask. + */ + if (cc_platform_has(CC_ATTR_HOST_MEM_ENCRYPT)) { + u64 dma_enc_mask = DMA_BIT_MASK(__ffs64(sme_me_mask)); + u64 dma_dev_mask = min_not_zero(dev->coherent_dma_mask, + dev->bus_dma_limit); + + if (dma_dev_mask <= dma_enc_mask) + return true; + } + + return false; +} + +void __init mem_encrypt_free_decrypted_mem(void) +{ + unsigned long vaddr, vaddr_end, npages; + int r; + + vaddr = (unsigned long)__start_bss_decrypted_unused; + vaddr_end = (unsigned long)__end_bss_decrypted; + npages = (vaddr_end - vaddr) >> PAGE_SHIFT; + + /* + * The unused memory range was mapped decrypted, change the encryption + * attribute from decrypted to encrypted before freeing it. + */ + if (cc_platform_has(CC_ATTR_MEM_ENCRYPT)) { + r = set_memory_encrypted(vaddr, npages); + if (r) { + pr_warn("failed to free unused decrypted pages\n"); + return; + } + } + + free_init_pages("unused decrypted", vaddr, vaddr_end); +} + +static void print_mem_encrypt_feature_info(void) +{ + pr_info("AMD Memory Encryption Features active:"); + + /* Secure Memory Encryption */ + if (cc_platform_has(CC_ATTR_HOST_MEM_ENCRYPT)) { + /* + * SME is mutually exclusive with any of the SEV + * features below. + */ + pr_cont(" SME\n"); + return; + } + + /* Secure Encrypted Virtualization */ + if (cc_platform_has(CC_ATTR_GUEST_MEM_ENCRYPT)) + pr_cont(" SEV"); + + /* Encrypted Register State */ + if (cc_platform_has(CC_ATTR_GUEST_STATE_ENCRYPT)) + pr_cont(" SEV-ES"); + + pr_cont("\n"); +} + +/* Architecture __weak replacement functions */ +void __init mem_encrypt_init(void) +{ + if (!sme_me_mask) + return; + + /* Call into SWIOTLB to update the SWIOTLB DMA buffers */ + swiotlb_update_mem_attributes(); + + print_mem_encrypt_feature_info(); +} + +int arch_has_restricted_virtio_memory_access(void) +{ + return cc_platform_has(CC_ATTR_GUEST_MEM_ENCRYPT); +} +EXPORT_SYMBOL_GPL(arch_has_restricted_virtio_memory_access); -- cgit v1.2.3-70-g09d2 From 20f07a044a76aebaaa0603038857229b5c460d69 Mon Sep 17 00:00:00 2001 From: "Kirill A. Shutemov" Date: Mon, 6 Dec 2021 16:55:05 +0300 Subject: x86/sev: Move common memory encryption code to mem_encrypt.c SEV and TDX both protect guest memory from host accesses. They both use guest physical address bits to communicate to the hardware which pages receive protection or not. SEV and TDX both assume that all I/O (real devices and virtio) must be performed to pages *without* protection. To add this support, AMD SEV code forces force_dma_unencrypted() to decrypt DMA pages when DMA pages were allocated for I/O. It also uses swiotlb_update_mem_attributes() to update decryption bits in SWIOTLB DMA buffers. Since TDX also uses a similar memory sharing design, all the above mentioned changes can be reused. So move force_dma_unencrypted(), SWIOTLB update code and virtio changes out of mem_encrypt_amd.c to mem_encrypt.c. Introduce a new config option X86_MEM_ENCRYPT that can be selected by platforms which use x86 memory encryption features (needed in both AMD SEV and Intel TDX guest platforms). Since the code is moved from mem_encrypt_amd.c, inherit the same make flags. This is preparation for enabling TDX memory encryption support and it has no functional changes. Co-developed-by: Kuppuswamy Sathyanarayanan Signed-off-by: Kuppuswamy Sathyanarayanan Signed-off-by: Kirill A. Shutemov Signed-off-by: Borislav Petkov Reviewed-by: Andi Kleen Reviewed-by: Tony Luck Reviewed-by: Tom Lendacky Tested-by: Tom Lendacky Link: https://lore.kernel.org/r/20211206135505.75045-4-kirill.shutemov@linux.intel.com --- arch/x86/Kconfig | 10 ++++-- arch/x86/mm/Makefile | 5 +++ arch/x86/mm/mem_encrypt.c | 84 +++++++++++++++++++++++++++++++++++++++++++ arch/x86/mm/mem_encrypt_amd.c | 69 ----------------------------------- 4 files changed, 96 insertions(+), 72 deletions(-) create mode 100644 arch/x86/mm/mem_encrypt.c diff --git a/arch/x86/Kconfig b/arch/x86/Kconfig index 95dd1ee01546..793e9b42ace0 100644 --- a/arch/x86/Kconfig +++ b/arch/x86/Kconfig @@ -1523,16 +1523,20 @@ config X86_CPA_STATISTICS helps to determine the effectiveness of preserving large and huge page mappings when mapping protections are changed. +config X86_MEM_ENCRYPT + select ARCH_HAS_FORCE_DMA_UNENCRYPTED + select DYNAMIC_PHYSICAL_MASK + select ARCH_HAS_RESTRICTED_VIRTIO_MEMORY_ACCESS + def_bool n + config AMD_MEM_ENCRYPT bool "AMD Secure Memory Encryption (SME) support" depends on X86_64 && CPU_SUP_AMD select DMA_COHERENT_POOL - select DYNAMIC_PHYSICAL_MASK select ARCH_USE_MEMREMAP_PROT - select ARCH_HAS_FORCE_DMA_UNENCRYPTED select INSTRUCTION_DECODER - select ARCH_HAS_RESTRICTED_VIRTIO_MEMORY_ACCESS select ARCH_HAS_CC_PLATFORM + select X86_MEM_ENCRYPT help Say yes to enable support for the encryption of system memory. This requires an AMD processor that supports Secure Memory diff --git a/arch/x86/mm/Makefile b/arch/x86/mm/Makefile index c9c480641153..fe3d3061fc11 100644 --- a/arch/x86/mm/Makefile +++ b/arch/x86/mm/Makefile @@ -1,9 +1,11 @@ # SPDX-License-Identifier: GPL-2.0 # Kernel does not boot with instrumentation of tlb.c and mem_encrypt*.c KCOV_INSTRUMENT_tlb.o := n +KCOV_INSTRUMENT_mem_encrypt.o := n KCOV_INSTRUMENT_mem_encrypt_amd.o := n KCOV_INSTRUMENT_mem_encrypt_identity.o := n +KASAN_SANITIZE_mem_encrypt.o := n KASAN_SANITIZE_mem_encrypt_amd.o := n KASAN_SANITIZE_mem_encrypt_identity.o := n @@ -12,6 +14,7 @@ KASAN_SANITIZE_mem_encrypt_identity.o := n KCSAN_SANITIZE := n ifdef CONFIG_FUNCTION_TRACER +CFLAGS_REMOVE_mem_encrypt.o = -pg CFLAGS_REMOVE_mem_encrypt_amd.o = -pg CFLAGS_REMOVE_mem_encrypt_identity.o = -pg endif @@ -52,6 +55,8 @@ obj-$(CONFIG_X86_INTEL_MEMORY_PROTECTION_KEYS) += pkeys.o obj-$(CONFIG_RANDOMIZE_MEMORY) += kaslr.o obj-$(CONFIG_PAGE_TABLE_ISOLATION) += pti.o +obj-$(CONFIG_X86_MEM_ENCRYPT) += mem_encrypt.o obj-$(CONFIG_AMD_MEM_ENCRYPT) += mem_encrypt_amd.o + obj-$(CONFIG_AMD_MEM_ENCRYPT) += mem_encrypt_identity.o obj-$(CONFIG_AMD_MEM_ENCRYPT) += mem_encrypt_boot.o diff --git a/arch/x86/mm/mem_encrypt.c b/arch/x86/mm/mem_encrypt.c new file mode 100644 index 000000000000..50d209939c66 --- /dev/null +++ b/arch/x86/mm/mem_encrypt.c @@ -0,0 +1,84 @@ +// SPDX-License-Identifier: GPL-2.0-only +/* + * Memory Encryption Support Common Code + * + * Copyright (C) 2016 Advanced Micro Devices, Inc. + * + * Author: Tom Lendacky + */ + +#include +#include +#include +#include +#include +#include + +/* Override for DMA direct allocation check - ARCH_HAS_FORCE_DMA_UNENCRYPTED */ +bool force_dma_unencrypted(struct device *dev) +{ + /* + * For SEV, all DMA must be to unencrypted addresses. + */ + if (cc_platform_has(CC_ATTR_GUEST_MEM_ENCRYPT)) + return true; + + /* + * For SME, all DMA must be to unencrypted addresses if the + * device does not support DMA to addresses that include the + * encryption mask. + */ + if (cc_platform_has(CC_ATTR_HOST_MEM_ENCRYPT)) { + u64 dma_enc_mask = DMA_BIT_MASK(__ffs64(sme_me_mask)); + u64 dma_dev_mask = min_not_zero(dev->coherent_dma_mask, + dev->bus_dma_limit); + + if (dma_dev_mask <= dma_enc_mask) + return true; + } + + return false; +} + +static void print_mem_encrypt_feature_info(void) +{ + pr_info("AMD Memory Encryption Features active:"); + + /* Secure Memory Encryption */ + if (cc_platform_has(CC_ATTR_HOST_MEM_ENCRYPT)) { + /* + * SME is mutually exclusive with any of the SEV + * features below. + */ + pr_cont(" SME\n"); + return; + } + + /* Secure Encrypted Virtualization */ + if (cc_platform_has(CC_ATTR_GUEST_MEM_ENCRYPT)) + pr_cont(" SEV"); + + /* Encrypted Register State */ + if (cc_platform_has(CC_ATTR_GUEST_STATE_ENCRYPT)) + pr_cont(" SEV-ES"); + + pr_cont("\n"); +} + +/* Architecture __weak replacement functions */ +void __init mem_encrypt_init(void) +{ + if (!cc_platform_has(CC_ATTR_MEM_ENCRYPT)) + return; + + /* Call into SWIOTLB to update the SWIOTLB DMA buffers */ + swiotlb_update_mem_attributes(); + + print_mem_encrypt_feature_info(); +} + +int arch_has_restricted_virtio_memory_access(void) +{ + return cc_platform_has(CC_ATTR_GUEST_MEM_ENCRYPT); +} +EXPORT_SYMBOL_GPL(arch_has_restricted_virtio_memory_access); diff --git a/arch/x86/mm/mem_encrypt_amd.c b/arch/x86/mm/mem_encrypt_amd.c index b520021a7e7b..2b2d018ea345 100644 --- a/arch/x86/mm/mem_encrypt_amd.c +++ b/arch/x86/mm/mem_encrypt_amd.c @@ -413,32 +413,6 @@ void __init early_set_mem_enc_dec_hypercall(unsigned long vaddr, int npages, boo notify_range_enc_status_changed(vaddr, npages, enc); } -/* Override for DMA direct allocation check - ARCH_HAS_FORCE_DMA_UNENCRYPTED */ -bool force_dma_unencrypted(struct device *dev) -{ - /* - * For SEV, all DMA must be to unencrypted addresses. - */ - if (cc_platform_has(CC_ATTR_GUEST_MEM_ENCRYPT)) - return true; - - /* - * For SME, all DMA must be to unencrypted addresses if the - * device does not support DMA to addresses that include the - * encryption mask. - */ - if (cc_platform_has(CC_ATTR_HOST_MEM_ENCRYPT)) { - u64 dma_enc_mask = DMA_BIT_MASK(__ffs64(sme_me_mask)); - u64 dma_dev_mask = min_not_zero(dev->coherent_dma_mask, - dev->bus_dma_limit); - - if (dma_dev_mask <= dma_enc_mask) - return true; - } - - return false; -} - void __init mem_encrypt_free_decrypted_mem(void) { unsigned long vaddr, vaddr_end, npages; @@ -462,46 +436,3 @@ void __init mem_encrypt_free_decrypted_mem(void) free_init_pages("unused decrypted", vaddr, vaddr_end); } - -static void print_mem_encrypt_feature_info(void) -{ - pr_info("AMD Memory Encryption Features active:"); - - /* Secure Memory Encryption */ - if (cc_platform_has(CC_ATTR_HOST_MEM_ENCRYPT)) { - /* - * SME is mutually exclusive with any of the SEV - * features below. - */ - pr_cont(" SME\n"); - return; - } - - /* Secure Encrypted Virtualization */ - if (cc_platform_has(CC_ATTR_GUEST_MEM_ENCRYPT)) - pr_cont(" SEV"); - - /* Encrypted Register State */ - if (cc_platform_has(CC_ATTR_GUEST_STATE_ENCRYPT)) - pr_cont(" SEV-ES"); - - pr_cont("\n"); -} - -/* Architecture __weak replacement functions */ -void __init mem_encrypt_init(void) -{ - if (!sme_me_mask) - return; - - /* Call into SWIOTLB to update the SWIOTLB DMA buffers */ - swiotlb_update_mem_attributes(); - - print_mem_encrypt_feature_info(); -} - -int arch_has_restricted_virtio_memory_access(void) -{ - return cc_platform_has(CC_ATTR_GUEST_MEM_ENCRYPT); -} -EXPORT_SYMBOL_GPL(arch_has_restricted_virtio_memory_access); -- cgit v1.2.3-70-g09d2