From 6ba0efa46047936afa81460489cfd24bc95dd863 Mon Sep 17 00:00:00 2001 From: Joerg Roedel Date: Mon, 7 Sep 2020 15:15:13 +0200 Subject: x86/boot/compressed/64: Disable red-zone usage The x86-64 ABI defines a red-zone on the stack: The 128-byte area beyond the location pointed to by %rsp is considered to be reserved and shall not be modified by signal or interrupt handlers. Therefore, functions may use this area for temporary data that is not needed across function calls. In particular, leaf functions may use this area for their entire stack frame, rather than adjusting the stack pointer in the prologue and epilogue. This area is known as the red zone. This is not compatible with exception handling, because the IRET frame written by the hardware at the stack pointer and the functions to handle the exception will overwrite the temporary variables of the interrupted function, causing undefined behavior. So disable red-zones for the pre-decompression boot code. Signed-off-by: Joerg Roedel Signed-off-by: Borislav Petkov Reviewed-by: Kees Cook Link: https://lkml.kernel.org/r/20200907131613.12703-13-joro@8bytes.org --- arch/x86/boot/compressed/Makefile | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'arch/x86/boot/compressed') diff --git a/arch/x86/boot/compressed/Makefile b/arch/x86/boot/compressed/Makefile index 3962f592633d..5343079af973 100644 --- a/arch/x86/boot/compressed/Makefile +++ b/arch/x86/boot/compressed/Makefile @@ -32,7 +32,7 @@ KBUILD_CFLAGS := -m$(BITS) -O2 KBUILD_CFLAGS += -fno-strict-aliasing $(call cc-option, -fPIE, -fPIC) KBUILD_CFLAGS += -DDISABLE_BRANCH_PROFILING cflags-$(CONFIG_X86_32) := -march=i386 -cflags-$(CONFIG_X86_64) := -mcmodel=small +cflags-$(CONFIG_X86_64) := -mcmodel=small -mno-red-zone KBUILD_CFLAGS += $(cflags-y) KBUILD_CFLAGS += -mno-mmx -mno-sse KBUILD_CFLAGS += -ffreestanding -- cgit v1.2.3-70-g09d2 From 64e682638eb51070ba6044535b250aad43c5564e Mon Sep 17 00:00:00 2001 From: Joerg Roedel Date: Mon, 7 Sep 2020 15:15:14 +0200 Subject: x86/boot/compressed/64: Add IDT Infrastructure Add code needed to setup an IDT in the early pre-decompression boot-code. The IDT is loaded first in startup_64, which is after EfiExitBootServices() has been called, and later reloaded when the kernel image has been relocated to the end of the decompression area. This allows to setup different IDT handlers before and after the relocation. Signed-off-by: Joerg Roedel Signed-off-by: Borislav Petkov Link: https://lkml.kernel.org/r/20200907131613.12703-14-joro@8bytes.org --- arch/x86/boot/compressed/Makefile | 1 + arch/x86/boot/compressed/head_64.S | 25 ++++++++++- arch/x86/boot/compressed/idt_64.c | 44 +++++++++++++++++++ arch/x86/boot/compressed/idt_handlers_64.S | 70 ++++++++++++++++++++++++++++++ arch/x86/boot/compressed/misc.h | 5 +++ arch/x86/include/asm/desc_defs.h | 3 ++ 6 files changed, 147 insertions(+), 1 deletion(-) create mode 100644 arch/x86/boot/compressed/idt_64.c create mode 100644 arch/x86/boot/compressed/idt_handlers_64.S (limited to 'arch/x86/boot/compressed') diff --git a/arch/x86/boot/compressed/Makefile b/arch/x86/boot/compressed/Makefile index 5343079af973..c661dc57674e 100644 --- a/arch/x86/boot/compressed/Makefile +++ b/arch/x86/boot/compressed/Makefile @@ -85,6 +85,7 @@ vmlinux-objs-$(CONFIG_EARLY_PRINTK) += $(obj)/early_serial_console.o vmlinux-objs-$(CONFIG_RANDOMIZE_BASE) += $(obj)/kaslr.o ifdef CONFIG_X86_64 vmlinux-objs-$(CONFIG_RANDOMIZE_BASE) += $(obj)/kaslr_64.o + vmlinux-objs-y += $(obj)/idt_64.o $(obj)/idt_handlers_64.o vmlinux-objs-y += $(obj)/mem_encrypt.o vmlinux-objs-y += $(obj)/pgtable_64.o endif diff --git a/arch/x86/boot/compressed/head_64.S b/arch/x86/boot/compressed/head_64.S index 97d37f0a34f5..c634ed8636da 100644 --- a/arch/x86/boot/compressed/head_64.S +++ b/arch/x86/boot/compressed/head_64.S @@ -33,6 +33,7 @@ #include #include #include +#include #include "pgtable.h" /* @@ -410,6 +411,10 @@ SYM_CODE_START(startup_64) .Lon_kernel_cs: + pushq %rsi + call load_stage1_idt + popq %rsi + /* * paging_prepare() sets up the trampoline and checks if we need to * enable 5-level paging. @@ -537,6 +542,13 @@ SYM_FUNC_START_LOCAL_NOALIGN(.Lrelocated) shrq $3, %rcx rep stosq +/* + * Load stage2 IDT + */ + pushq %rsi + call load_stage2_idt + popq %rsi + /* * Do the extraction, and jump to the new kernel.. */ @@ -690,10 +702,21 @@ SYM_DATA_START_LOCAL(gdt) .quad 0x0000000000000000 /* TS continued */ SYM_DATA_END_LABEL(gdt, SYM_L_LOCAL, gdt_end) +SYM_DATA_START(boot_idt_desc) + .word boot_idt_end - boot_idt - 1 + .quad 0 +SYM_DATA_END(boot_idt_desc) + .balign 8 +SYM_DATA_START(boot_idt) + .rept BOOT_IDT_ENTRIES + .quad 0 + .quad 0 + .endr +SYM_DATA_END_LABEL(boot_idt, SYM_L_GLOBAL, boot_idt_end) + #ifdef CONFIG_EFI_STUB SYM_DATA(image_offset, .long 0) #endif - #ifdef CONFIG_EFI_MIXED SYM_DATA_LOCAL(efi32_boot_args, .long 0, 0, 0) SYM_DATA(efi_is64, .byte 1) diff --git a/arch/x86/boot/compressed/idt_64.c b/arch/x86/boot/compressed/idt_64.c new file mode 100644 index 000000000000..082cd6bca033 --- /dev/null +++ b/arch/x86/boot/compressed/idt_64.c @@ -0,0 +1,44 @@ +// SPDX-License-Identifier: GPL-2.0-only +#include +#include +#include +#include "misc.h" + +static void set_idt_entry(int vector, void (*handler)(void)) +{ + unsigned long address = (unsigned long)handler; + gate_desc entry; + + memset(&entry, 0, sizeof(entry)); + + entry.offset_low = (u16)(address & 0xffff); + entry.segment = __KERNEL_CS; + entry.bits.type = GATE_TRAP; + entry.bits.p = 1; + entry.offset_middle = (u16)((address >> 16) & 0xffff); + entry.offset_high = (u32)(address >> 32); + + memcpy(&boot_idt[vector], &entry, sizeof(entry)); +} + +/* Have this here so we don't need to include */ +static void load_boot_idt(const struct desc_ptr *dtr) +{ + asm volatile("lidt %0"::"m" (*dtr)); +} + +/* Setup IDT before kernel jumping to .Lrelocated */ +void load_stage1_idt(void) +{ + boot_idt_desc.address = (unsigned long)boot_idt; + + load_boot_idt(&boot_idt_desc); +} + +/* Setup IDT after kernel jumping to .Lrelocated */ +void load_stage2_idt(void) +{ + boot_idt_desc.address = (unsigned long)boot_idt; + + load_boot_idt(&boot_idt_desc); +} diff --git a/arch/x86/boot/compressed/idt_handlers_64.S b/arch/x86/boot/compressed/idt_handlers_64.S new file mode 100644 index 000000000000..36dee2f40a8b --- /dev/null +++ b/arch/x86/boot/compressed/idt_handlers_64.S @@ -0,0 +1,70 @@ +/* SPDX-License-Identifier: GPL-2.0-only */ +/* + * Early IDT handler entry points + * + * Copyright (C) 2019 SUSE + * + * Author: Joerg Roedel + */ + +#include + +/* For ORIG_RAX */ +#include "../../entry/calling.h" + +.macro EXCEPTION_HANDLER name function error_code=0 +SYM_FUNC_START(\name) + + /* Build pt_regs */ + .if \error_code == 0 + pushq $0 + .endif + + pushq %rdi + pushq %rsi + pushq %rdx + pushq %rcx + pushq %rax + pushq %r8 + pushq %r9 + pushq %r10 + pushq %r11 + pushq %rbx + pushq %rbp + pushq %r12 + pushq %r13 + pushq %r14 + pushq %r15 + + /* Call handler with pt_regs */ + movq %rsp, %rdi + /* Error code is second parameter */ + movq ORIG_RAX(%rsp), %rsi + call \function + + /* Restore regs */ + popq %r15 + popq %r14 + popq %r13 + popq %r12 + popq %rbp + popq %rbx + popq %r11 + popq %r10 + popq %r9 + popq %r8 + popq %rax + popq %rcx + popq %rdx + popq %rsi + popq %rdi + + /* Remove error code and return */ + addq $8, %rsp + + iretq +SYM_FUNC_END(\name) + .endm + + .text + .code64 diff --git a/arch/x86/boot/compressed/misc.h b/arch/x86/boot/compressed/misc.h index 3efce27ba35c..8feb5f6f329e 100644 --- a/arch/x86/boot/compressed/misc.h +++ b/arch/x86/boot/compressed/misc.h @@ -23,6 +23,7 @@ #include #include #include +#include #define BOOT_CTYPE_H #include @@ -133,4 +134,8 @@ int count_immovable_mem_regions(void); static inline int count_immovable_mem_regions(void) { return 0; } #endif +/* idt_64.c */ +extern gate_desc boot_idt[BOOT_IDT_ENTRIES]; +extern struct desc_ptr boot_idt_desc; + #endif /* BOOT_COMPRESSED_MISC_H */ diff --git a/arch/x86/include/asm/desc_defs.h b/arch/x86/include/asm/desc_defs.h index a91f3b6e4f2a..5621fb3f2d1a 100644 --- a/arch/x86/include/asm/desc_defs.h +++ b/arch/x86/include/asm/desc_defs.h @@ -109,6 +109,9 @@ struct desc_ptr { #endif /* !__ASSEMBLY__ */ +/* Boot IDT definitions */ +#define BOOT_IDT_ENTRIES 32 + /* Access rights as returned by LAR */ #define AR_TYPE_RODATA (0 * (1 << 9)) #define AR_TYPE_RWDATA (1 * (1 << 9)) -- cgit v1.2.3-70-g09d2 From 5f2bb01682b7b067783207994c7b8a3dbeb1cd83 Mon Sep 17 00:00:00 2001 From: Joerg Roedel Date: Mon, 7 Sep 2020 15:15:15 +0200 Subject: x86/boot/compressed/64: Rename kaslr_64.c to ident_map_64.c The file contains only code related to identity-mapped page tables. Rename the file and compile it always in. Signed-off-by: Joerg Roedel Signed-off-by: Borislav Petkov Reviewed-by: Kees Cook Link: https://lkml.kernel.org/r/20200907131613.12703-15-joro@8bytes.org --- arch/x86/boot/compressed/Makefile | 2 +- arch/x86/boot/compressed/ident_map_64.c | 162 ++++++++++++++++++++++++++++++++ arch/x86/boot/compressed/kaslr.c | 9 -- arch/x86/boot/compressed/kaslr_64.c | 153 ------------------------------ arch/x86/boot/compressed/misc.h | 8 ++ 5 files changed, 171 insertions(+), 163 deletions(-) create mode 100644 arch/x86/boot/compressed/ident_map_64.c delete mode 100644 arch/x86/boot/compressed/kaslr_64.c (limited to 'arch/x86/boot/compressed') diff --git a/arch/x86/boot/compressed/Makefile b/arch/x86/boot/compressed/Makefile index c661dc57674e..e7f3eba99ea2 100644 --- a/arch/x86/boot/compressed/Makefile +++ b/arch/x86/boot/compressed/Makefile @@ -84,7 +84,7 @@ vmlinux-objs-y := $(obj)/vmlinux.lds $(obj)/kernel_info.o $(obj)/head_$(BITS).o vmlinux-objs-$(CONFIG_EARLY_PRINTK) += $(obj)/early_serial_console.o vmlinux-objs-$(CONFIG_RANDOMIZE_BASE) += $(obj)/kaslr.o ifdef CONFIG_X86_64 - vmlinux-objs-$(CONFIG_RANDOMIZE_BASE) += $(obj)/kaslr_64.o + vmlinux-objs-y += $(obj)/ident_map_64.o vmlinux-objs-y += $(obj)/idt_64.o $(obj)/idt_handlers_64.o vmlinux-objs-y += $(obj)/mem_encrypt.o vmlinux-objs-y += $(obj)/pgtable_64.o diff --git a/arch/x86/boot/compressed/ident_map_64.c b/arch/x86/boot/compressed/ident_map_64.c new file mode 100644 index 000000000000..d9932a133ac9 --- /dev/null +++ b/arch/x86/boot/compressed/ident_map_64.c @@ -0,0 +1,162 @@ +// SPDX-License-Identifier: GPL-2.0 +/* + * This code is used on x86_64 to create page table identity mappings on + * demand by building up a new set of page tables (or appending to the + * existing ones), and then switching over to them when ready. + * + * Copyright (C) 2015-2016 Yinghai Lu + * Copyright (C) 2016 Kees Cook + */ + +/* + * Since we're dealing with identity mappings, physical and virtual + * addresses are the same, so override these defines which are ultimately + * used by the headers in misc.h. + */ +#define __pa(x) ((unsigned long)(x)) +#define __va(x) ((void *)((unsigned long)(x))) + +/* No PAGE_TABLE_ISOLATION support needed either: */ +#undef CONFIG_PAGE_TABLE_ISOLATION + +#include "misc.h" + +/* These actually do the work of building the kernel identity maps. */ +#include +#include +/* Use the static base for this part of the boot process */ +#undef __PAGE_OFFSET +#define __PAGE_OFFSET __PAGE_OFFSET_BASE +#include "../../mm/ident_map.c" + +#ifdef CONFIG_X86_5LEVEL +unsigned int __pgtable_l5_enabled; +unsigned int pgdir_shift = 39; +unsigned int ptrs_per_p4d = 1; +#endif + +/* Used by PAGE_KERN* macros: */ +pteval_t __default_kernel_pte_mask __read_mostly = ~0; + +/* Used to track our page table allocation area. */ +struct alloc_pgt_data { + unsigned char *pgt_buf; + unsigned long pgt_buf_size; + unsigned long pgt_buf_offset; +}; + +/* + * Allocates space for a page table entry, using struct alloc_pgt_data + * above. Besides the local callers, this is used as the allocation + * callback in mapping_info below. + */ +static void *alloc_pgt_page(void *context) +{ + struct alloc_pgt_data *pages = (struct alloc_pgt_data *)context; + unsigned char *entry; + + /* Validate there is space available for a new page. */ + if (pages->pgt_buf_offset >= pages->pgt_buf_size) { + debug_putstr("out of pgt_buf in " __FILE__ "!?\n"); + debug_putaddr(pages->pgt_buf_offset); + debug_putaddr(pages->pgt_buf_size); + return NULL; + } + + entry = pages->pgt_buf + pages->pgt_buf_offset; + pages->pgt_buf_offset += PAGE_SIZE; + + return entry; +} + +/* Used to track our allocated page tables. */ +static struct alloc_pgt_data pgt_data; + +/* The top level page table entry pointer. */ +static unsigned long top_level_pgt; + +phys_addr_t physical_mask = (1ULL << __PHYSICAL_MASK_SHIFT) - 1; + +/* + * Mapping information structure passed to kernel_ident_mapping_init(). + * Due to relocation, pointers must be assigned at run time not build time. + */ +static struct x86_mapping_info mapping_info; + +/* Locates and clears a region for a new top level page table. */ +void initialize_identity_maps(void) +{ + /* If running as an SEV guest, the encryption mask is required. */ + set_sev_encryption_mask(); + + /* Exclude the encryption mask from __PHYSICAL_MASK */ + physical_mask &= ~sme_me_mask; + + /* Init mapping_info with run-time function/buffer pointers. */ + mapping_info.alloc_pgt_page = alloc_pgt_page; + mapping_info.context = &pgt_data; + mapping_info.page_flag = __PAGE_KERNEL_LARGE_EXEC | sme_me_mask; + mapping_info.kernpg_flag = _KERNPG_TABLE; + + /* + * It should be impossible for this not to already be true, + * but since calling this a second time would rewind the other + * counters, let's just make sure this is reset too. + */ + pgt_data.pgt_buf_offset = 0; + + /* + * If we came here via startup_32(), cr3 will be _pgtable already + * and we must append to the existing area instead of entirely + * overwriting it. + * + * With 5-level paging, we use '_pgtable' to allocate the p4d page table, + * the top-level page table is allocated separately. + * + * p4d_offset(top_level_pgt, 0) would cover both the 4- and 5-level + * cases. On 4-level paging it's equal to 'top_level_pgt'. + */ + top_level_pgt = read_cr3_pa(); + if (p4d_offset((pgd_t *)top_level_pgt, 0) == (p4d_t *)_pgtable) { + debug_putstr("booted via startup_32()\n"); + pgt_data.pgt_buf = _pgtable + BOOT_INIT_PGT_SIZE; + pgt_data.pgt_buf_size = BOOT_PGT_SIZE - BOOT_INIT_PGT_SIZE; + memset(pgt_data.pgt_buf, 0, pgt_data.pgt_buf_size); + } else { + debug_putstr("booted via startup_64()\n"); + pgt_data.pgt_buf = _pgtable; + pgt_data.pgt_buf_size = BOOT_PGT_SIZE; + memset(pgt_data.pgt_buf, 0, pgt_data.pgt_buf_size); + top_level_pgt = (unsigned long)alloc_pgt_page(&pgt_data); + } +} + +/* + * Adds the specified range to what will become the new identity mappings. + * Once all ranges have been added, the new mapping is activated by calling + * finalize_identity_maps() below. + */ +void add_identity_map(unsigned long start, unsigned long size) +{ + unsigned long end = start + size; + + /* Align boundary to 2M. */ + start = round_down(start, PMD_SIZE); + end = round_up(end, PMD_SIZE); + if (start >= end) + return; + + /* Build the mapping. */ + kernel_ident_mapping_init(&mapping_info, (pgd_t *)top_level_pgt, + start, end); +} + +/* + * This switches the page tables to the new level4 that has been built + * via calls to add_identity_map() above. If booted via startup_32(), + * this is effectively a no-op. + */ +void finalize_identity_maps(void) +{ + write_cr3(top_level_pgt); +} diff --git a/arch/x86/boot/compressed/kaslr.c b/arch/x86/boot/compressed/kaslr.c index 877970d76249..e27de98ed038 100644 --- a/arch/x86/boot/compressed/kaslr.c +++ b/arch/x86/boot/compressed/kaslr.c @@ -40,17 +40,8 @@ #include /* For COMMAND_LINE_SIZE */ #undef _SETUP -#ifdef CONFIG_X86_5LEVEL -unsigned int __pgtable_l5_enabled; -unsigned int pgdir_shift __ro_after_init = 39; -unsigned int ptrs_per_p4d __ro_after_init = 1; -#endif - extern unsigned long get_cmd_line_ptr(void); -/* Used by PAGE_KERN* macros: */ -pteval_t __default_kernel_pte_mask __read_mostly = ~0; - /* Simplified build-specific string for starting entropy. */ static const char build_str[] = UTS_RELEASE " (" LINUX_COMPILE_BY "@" LINUX_COMPILE_HOST ") (" LINUX_COMPILER ") " UTS_VERSION; diff --git a/arch/x86/boot/compressed/kaslr_64.c b/arch/x86/boot/compressed/kaslr_64.c deleted file mode 100644 index f9c5c13d979b..000000000000 --- a/arch/x86/boot/compressed/kaslr_64.c +++ /dev/null @@ -1,153 +0,0 @@ -// SPDX-License-Identifier: GPL-2.0 -/* - * This code is used on x86_64 to create page table identity mappings on - * demand by building up a new set of page tables (or appending to the - * existing ones), and then switching over to them when ready. - * - * Copyright (C) 2015-2016 Yinghai Lu - * Copyright (C) 2016 Kees Cook - */ - -/* - * Since we're dealing with identity mappings, physical and virtual - * addresses are the same, so override these defines which are ultimately - * used by the headers in misc.h. - */ -#define __pa(x) ((unsigned long)(x)) -#define __va(x) ((void *)((unsigned long)(x))) - -/* No PAGE_TABLE_ISOLATION support needed either: */ -#undef CONFIG_PAGE_TABLE_ISOLATION - -#include "misc.h" - -/* These actually do the work of building the kernel identity maps. */ -#include -#include -/* Use the static base for this part of the boot process */ -#undef __PAGE_OFFSET -#define __PAGE_OFFSET __PAGE_OFFSET_BASE -#include "../../mm/ident_map.c" - -/* Used to track our page table allocation area. */ -struct alloc_pgt_data { - unsigned char *pgt_buf; - unsigned long pgt_buf_size; - unsigned long pgt_buf_offset; -}; - -/* - * Allocates space for a page table entry, using struct alloc_pgt_data - * above. Besides the local callers, this is used as the allocation - * callback in mapping_info below. - */ -static void *alloc_pgt_page(void *context) -{ - struct alloc_pgt_data *pages = (struct alloc_pgt_data *)context; - unsigned char *entry; - - /* Validate there is space available for a new page. */ - if (pages->pgt_buf_offset >= pages->pgt_buf_size) { - debug_putstr("out of pgt_buf in " __FILE__ "!?\n"); - debug_putaddr(pages->pgt_buf_offset); - debug_putaddr(pages->pgt_buf_size); - return NULL; - } - - entry = pages->pgt_buf + pages->pgt_buf_offset; - pages->pgt_buf_offset += PAGE_SIZE; - - return entry; -} - -/* Used to track our allocated page tables. */ -static struct alloc_pgt_data pgt_data; - -/* The top level page table entry pointer. */ -static unsigned long top_level_pgt; - -phys_addr_t physical_mask = (1ULL << __PHYSICAL_MASK_SHIFT) - 1; - -/* - * Mapping information structure passed to kernel_ident_mapping_init(). - * Due to relocation, pointers must be assigned at run time not build time. - */ -static struct x86_mapping_info mapping_info; - -/* Locates and clears a region for a new top level page table. */ -void initialize_identity_maps(void) -{ - /* If running as an SEV guest, the encryption mask is required. */ - set_sev_encryption_mask(); - - /* Exclude the encryption mask from __PHYSICAL_MASK */ - physical_mask &= ~sme_me_mask; - - /* Init mapping_info with run-time function/buffer pointers. */ - mapping_info.alloc_pgt_page = alloc_pgt_page; - mapping_info.context = &pgt_data; - mapping_info.page_flag = __PAGE_KERNEL_LARGE_EXEC | sme_me_mask; - mapping_info.kernpg_flag = _KERNPG_TABLE; - - /* - * It should be impossible for this not to already be true, - * but since calling this a second time would rewind the other - * counters, let's just make sure this is reset too. - */ - pgt_data.pgt_buf_offset = 0; - - /* - * If we came here via startup_32(), cr3 will be _pgtable already - * and we must append to the existing area instead of entirely - * overwriting it. - * - * With 5-level paging, we use '_pgtable' to allocate the p4d page table, - * the top-level page table is allocated separately. - * - * p4d_offset(top_level_pgt, 0) would cover both the 4- and 5-level - * cases. On 4-level paging it's equal to 'top_level_pgt'. - */ - top_level_pgt = read_cr3_pa(); - if (p4d_offset((pgd_t *)top_level_pgt, 0) == (p4d_t *)_pgtable) { - debug_putstr("booted via startup_32()\n"); - pgt_data.pgt_buf = _pgtable + BOOT_INIT_PGT_SIZE; - pgt_data.pgt_buf_size = BOOT_PGT_SIZE - BOOT_INIT_PGT_SIZE; - memset(pgt_data.pgt_buf, 0, pgt_data.pgt_buf_size); - } else { - debug_putstr("booted via startup_64()\n"); - pgt_data.pgt_buf = _pgtable; - pgt_data.pgt_buf_size = BOOT_PGT_SIZE; - memset(pgt_data.pgt_buf, 0, pgt_data.pgt_buf_size); - top_level_pgt = (unsigned long)alloc_pgt_page(&pgt_data); - } -} - -/* - * Adds the specified range to what will become the new identity mappings. - * Once all ranges have been added, the new mapping is activated by calling - * finalize_identity_maps() below. - */ -void add_identity_map(unsigned long start, unsigned long size) -{ - unsigned long end = start + size; - - /* Align boundary to 2M. */ - start = round_down(start, PMD_SIZE); - end = round_up(end, PMD_SIZE); - if (start >= end) - return; - - /* Build the mapping. */ - kernel_ident_mapping_init(&mapping_info, (pgd_t *)top_level_pgt, - start, end); -} - -/* - * This switches the page tables to the new level4 that has been built - * via calls to add_identity_map() above. If booted via startup_32(), - * this is effectively a no-op. - */ -void finalize_identity_maps(void) -{ - write_cr3(top_level_pgt); -} diff --git a/arch/x86/boot/compressed/misc.h b/arch/x86/boot/compressed/misc.h index 8feb5f6f329e..98b7a1df9c59 100644 --- a/arch/x86/boot/compressed/misc.h +++ b/arch/x86/boot/compressed/misc.h @@ -134,6 +134,14 @@ int count_immovable_mem_regions(void); static inline int count_immovable_mem_regions(void) { return 0; } #endif +/* ident_map_64.c */ +#ifdef CONFIG_X86_5LEVEL +extern unsigned int __pgtable_l5_enabled, pgdir_shift, ptrs_per_p4d; +#endif + +/* Used by PAGE_KERN* macros: */ +extern pteval_t __default_kernel_pte_mask; + /* idt_64.c */ extern gate_desc boot_idt[BOOT_IDT_ENTRIES]; extern struct desc_ptr boot_idt_desc; -- cgit v1.2.3-70-g09d2 From 8b0d3b3b41ab6f14f1ce6d4a6b1c5f60b825123f Mon Sep 17 00:00:00 2001 From: Joerg Roedel Date: Mon, 7 Sep 2020 15:15:16 +0200 Subject: x86/boot/compressed/64: Add page-fault handler Install a page-fault handler to add an identity mapping to addresses not yet mapped. Also do some checking whether the error code is sane. This makes non SEV-ES machines use the exception handling infrastructure in the pre-decompressions boot code too, making it less likely to break in the future. Signed-off-by: Joerg Roedel Signed-off-by: Borislav Petkov Reviewed-by: Kees Cook Link: https://lkml.kernel.org/r/20200907131613.12703-16-joro@8bytes.org --- arch/x86/boot/compressed/ident_map_64.c | 39 ++++++++++++++++++++++++++++++ arch/x86/boot/compressed/idt_64.c | 2 ++ arch/x86/boot/compressed/idt_handlers_64.S | 2 ++ arch/x86/boot/compressed/misc.h | 6 +++++ 4 files changed, 49 insertions(+) (limited to 'arch/x86/boot/compressed') diff --git a/arch/x86/boot/compressed/ident_map_64.c b/arch/x86/boot/compressed/ident_map_64.c index d9932a133ac9..e3d980ae9c2b 100644 --- a/arch/x86/boot/compressed/ident_map_64.c +++ b/arch/x86/boot/compressed/ident_map_64.c @@ -19,10 +19,13 @@ /* No PAGE_TABLE_ISOLATION support needed either: */ #undef CONFIG_PAGE_TABLE_ISOLATION +#include "error.h" #include "misc.h" /* These actually do the work of building the kernel identity maps. */ #include +#include +#include #include /* Use the static base for this part of the boot process */ #undef __PAGE_OFFSET @@ -160,3 +163,39 @@ void finalize_identity_maps(void) { write_cr3(top_level_pgt); } + +static void do_pf_error(const char *msg, unsigned long error_code, + unsigned long address, unsigned long ip) +{ + error_putstr(msg); + + error_putstr("\nError Code: "); + error_puthex(error_code); + error_putstr("\nCR2: 0x"); + error_puthex(address); + error_putstr("\nRIP relative to _head: 0x"); + error_puthex(ip - (unsigned long)_head); + error_putstr("\n"); + + error("Stopping.\n"); +} + +void do_boot_page_fault(struct pt_regs *regs, unsigned long error_code) +{ + unsigned long address = native_read_cr2(); + + /* + * Check for unexpected error codes. Unexpected are: + * - Faults on present pages + * - User faults + * - Reserved bits set + */ + if (error_code & (X86_PF_PROT | X86_PF_USER | X86_PF_RSVD)) + do_pf_error("Unexpected page-fault:", error_code, address, regs->ip); + + /* + * Error code is sane - now identity map the 2M region around + * the faulting address. + */ + add_identity_map(address & PMD_MASK, PMD_SIZE); +} diff --git a/arch/x86/boot/compressed/idt_64.c b/arch/x86/boot/compressed/idt_64.c index 082cd6bca033..5f083092a86d 100644 --- a/arch/x86/boot/compressed/idt_64.c +++ b/arch/x86/boot/compressed/idt_64.c @@ -40,5 +40,7 @@ void load_stage2_idt(void) { boot_idt_desc.address = (unsigned long)boot_idt; + set_idt_entry(X86_TRAP_PF, boot_page_fault); + load_boot_idt(&boot_idt_desc); } diff --git a/arch/x86/boot/compressed/idt_handlers_64.S b/arch/x86/boot/compressed/idt_handlers_64.S index 36dee2f40a8b..b20e57504a94 100644 --- a/arch/x86/boot/compressed/idt_handlers_64.S +++ b/arch/x86/boot/compressed/idt_handlers_64.S @@ -68,3 +68,5 @@ SYM_FUNC_END(\name) .text .code64 + +EXCEPTION_HANDLER boot_page_fault do_boot_page_fault error_code=1 diff --git a/arch/x86/boot/compressed/misc.h b/arch/x86/boot/compressed/misc.h index 98b7a1df9c59..f0e199174c5f 100644 --- a/arch/x86/boot/compressed/misc.h +++ b/arch/x86/boot/compressed/misc.h @@ -37,6 +37,9 @@ #define memptr unsigned #endif +/* boot/compressed/vmlinux start and end markers */ +extern char _head[], _end[]; + /* misc.c */ extern memptr free_mem_ptr; extern memptr free_mem_end_ptr; @@ -146,4 +149,7 @@ extern pteval_t __default_kernel_pte_mask; extern gate_desc boot_idt[BOOT_IDT_ENTRIES]; extern struct desc_ptr boot_idt_desc; +/* IDT Entry Points */ +void boot_page_fault(void); + #endif /* BOOT_COMPRESSED_MISC_H */ -- cgit v1.2.3-70-g09d2 From ca0e22d4f011a56e974fa3a712d76e86a791559d Mon Sep 17 00:00:00 2001 From: Joerg Roedel Date: Mon, 7 Sep 2020 15:15:17 +0200 Subject: x86/boot/compressed/64: Always switch to own page table When booted through startup_64(), the kernel keeps running on the EFI page table until the KASLR code sets up its own page table. Without KASLR, the pre-decompression boot code never switches off the EFI page table. Change that by unconditionally switching to a kernel-controlled page table after relocation. This makes sure the kernel can make changes to the mapping when necessary, for example map pages unencrypted in SEV and SEV-ES guests. Also, remove the debug_putstr() calls in initialize_identity_maps() because the function now runs before console_init() is called. [ bp: Massage commit message. ] Signed-off-by: Joerg Roedel Signed-off-by: Borislav Petkov Reviewed-by: Kees Cook Link: https://lkml.kernel.org/r/20200907131613.12703-17-joro@8bytes.org --- arch/x86/boot/compressed/head_64.S | 3 +- arch/x86/boot/compressed/ident_map_64.c | 51 +++++++++++++++++++-------------- arch/x86/boot/compressed/kaslr.c | 3 -- 3 files changed, 32 insertions(+), 25 deletions(-) (limited to 'arch/x86/boot/compressed') diff --git a/arch/x86/boot/compressed/head_64.S b/arch/x86/boot/compressed/head_64.S index c634ed8636da..fb6c0392306b 100644 --- a/arch/x86/boot/compressed/head_64.S +++ b/arch/x86/boot/compressed/head_64.S @@ -543,10 +543,11 @@ SYM_FUNC_START_LOCAL_NOALIGN(.Lrelocated) rep stosq /* - * Load stage2 IDT + * Load stage2 IDT and switch to our own page-table */ pushq %rsi call load_stage2_idt + call initialize_identity_maps popq %rsi /* diff --git a/arch/x86/boot/compressed/ident_map_64.c b/arch/x86/boot/compressed/ident_map_64.c index e3d980ae9c2b..ecf9353b064d 100644 --- a/arch/x86/boot/compressed/ident_map_64.c +++ b/arch/x86/boot/compressed/ident_map_64.c @@ -86,9 +86,31 @@ phys_addr_t physical_mask = (1ULL << __PHYSICAL_MASK_SHIFT) - 1; */ static struct x86_mapping_info mapping_info; +/* + * Adds the specified range to what will become the new identity mappings. + * Once all ranges have been added, the new mapping is activated by calling + * finalize_identity_maps() below. + */ +void add_identity_map(unsigned long start, unsigned long size) +{ + unsigned long end = start + size; + + /* Align boundary to 2M. */ + start = round_down(start, PMD_SIZE); + end = round_up(end, PMD_SIZE); + if (start >= end) + return; + + /* Build the mapping. */ + kernel_ident_mapping_init(&mapping_info, (pgd_t *)top_level_pgt, + start, end); +} + /* Locates and clears a region for a new top level page table. */ void initialize_identity_maps(void) { + unsigned long start, size; + /* If running as an SEV guest, the encryption mask is required. */ set_sev_encryption_mask(); @@ -121,37 +143,24 @@ void initialize_identity_maps(void) */ top_level_pgt = read_cr3_pa(); if (p4d_offset((pgd_t *)top_level_pgt, 0) == (p4d_t *)_pgtable) { - debug_putstr("booted via startup_32()\n"); pgt_data.pgt_buf = _pgtable + BOOT_INIT_PGT_SIZE; pgt_data.pgt_buf_size = BOOT_PGT_SIZE - BOOT_INIT_PGT_SIZE; memset(pgt_data.pgt_buf, 0, pgt_data.pgt_buf_size); } else { - debug_putstr("booted via startup_64()\n"); pgt_data.pgt_buf = _pgtable; pgt_data.pgt_buf_size = BOOT_PGT_SIZE; memset(pgt_data.pgt_buf, 0, pgt_data.pgt_buf_size); top_level_pgt = (unsigned long)alloc_pgt_page(&pgt_data); } -} -/* - * Adds the specified range to what will become the new identity mappings. - * Once all ranges have been added, the new mapping is activated by calling - * finalize_identity_maps() below. - */ -void add_identity_map(unsigned long start, unsigned long size) -{ - unsigned long end = start + size; - - /* Align boundary to 2M. */ - start = round_down(start, PMD_SIZE); - end = round_up(end, PMD_SIZE); - if (start >= end) - return; - - /* Build the mapping. */ - kernel_ident_mapping_init(&mapping_info, (pgd_t *)top_level_pgt, - start, end); + /* + * New page-table is set up - map the kernel image and load it + * into cr3. + */ + start = (unsigned long)_head; + size = _end - _head; + add_identity_map(start, size); + write_cr3(top_level_pgt); } /* diff --git a/arch/x86/boot/compressed/kaslr.c b/arch/x86/boot/compressed/kaslr.c index e27de98ed038..82662869c4cb 100644 --- a/arch/x86/boot/compressed/kaslr.c +++ b/arch/x86/boot/compressed/kaslr.c @@ -861,9 +861,6 @@ void choose_random_location(unsigned long input, boot_params->hdr.loadflags |= KASLR_FLAG; - /* Prepare to add new identity pagetables on demand. */ - initialize_identity_maps(); - if (IS_ENABLED(CONFIG_X86_32)) mem_limit = KERNEL_IMAGE_SIZE; else -- cgit v1.2.3-70-g09d2 From 8570978ea030757839747aa9944ea576708be3d4 Mon Sep 17 00:00:00 2001 From: Joerg Roedel Date: Mon, 7 Sep 2020 15:15:18 +0200 Subject: x86/boot/compressed/64: Don't pre-map memory in KASLR code With the page-fault handler in place, he identity mapping can be built on-demand. So remove the code which manually creates the mappings and unexport/remove the functions used for it. Signed-off-by: Joerg Roedel Signed-off-by: Borislav Petkov Reviewed-by: Kees Cook Link: https://lkml.kernel.org/r/20200907131613.12703-18-joro@8bytes.org --- arch/x86/boot/compressed/ident_map_64.c | 6 ++---- arch/x86/boot/compressed/kaslr.c | 24 +----------------------- arch/x86/boot/compressed/misc.h | 10 ---------- 3 files changed, 3 insertions(+), 37 deletions(-) (limited to 'arch/x86/boot/compressed') diff --git a/arch/x86/boot/compressed/ident_map_64.c b/arch/x86/boot/compressed/ident_map_64.c index ecf9353b064d..c63257bf8373 100644 --- a/arch/x86/boot/compressed/ident_map_64.c +++ b/arch/x86/boot/compressed/ident_map_64.c @@ -87,11 +87,9 @@ phys_addr_t physical_mask = (1ULL << __PHYSICAL_MASK_SHIFT) - 1; static struct x86_mapping_info mapping_info; /* - * Adds the specified range to what will become the new identity mappings. - * Once all ranges have been added, the new mapping is activated by calling - * finalize_identity_maps() below. + * Adds the specified range to the identity mappings. */ -void add_identity_map(unsigned long start, unsigned long size) +static void add_identity_map(unsigned long start, unsigned long size) { unsigned long end = start + size; diff --git a/arch/x86/boot/compressed/kaslr.c b/arch/x86/boot/compressed/kaslr.c index 82662869c4cb..b59547ce5b19 100644 --- a/arch/x86/boot/compressed/kaslr.c +++ b/arch/x86/boot/compressed/kaslr.c @@ -397,8 +397,6 @@ static void mem_avoid_init(unsigned long input, unsigned long input_size, */ mem_avoid[MEM_AVOID_ZO_RANGE].start = input; mem_avoid[MEM_AVOID_ZO_RANGE].size = (output + init_size) - input; - add_identity_map(mem_avoid[MEM_AVOID_ZO_RANGE].start, - mem_avoid[MEM_AVOID_ZO_RANGE].size); /* Avoid initrd. */ initrd_start = (u64)boot_params->ext_ramdisk_image << 32; @@ -416,15 +414,11 @@ static void mem_avoid_init(unsigned long input, unsigned long input_size, cmd_line_size = strnlen((char *)cmd_line, COMMAND_LINE_SIZE-1) + 1; mem_avoid[MEM_AVOID_CMDLINE].start = cmd_line; mem_avoid[MEM_AVOID_CMDLINE].size = cmd_line_size; - add_identity_map(mem_avoid[MEM_AVOID_CMDLINE].start, - mem_avoid[MEM_AVOID_CMDLINE].size); } /* Avoid boot parameters. */ mem_avoid[MEM_AVOID_BOOTPARAMS].start = (unsigned long)boot_params; mem_avoid[MEM_AVOID_BOOTPARAMS].size = sizeof(*boot_params); - add_identity_map(mem_avoid[MEM_AVOID_BOOTPARAMS].start, - mem_avoid[MEM_AVOID_BOOTPARAMS].size); /* We don't need to set a mapping for setup_data. */ @@ -433,11 +427,6 @@ static void mem_avoid_init(unsigned long input, unsigned long input_size, /* Enumerate the immovable memory regions */ num_immovable_mem = count_immovable_mem_regions(); - -#ifdef CONFIG_X86_VERBOSE_BOOTUP - /* Make sure video RAM can be used. */ - add_identity_map(0, PMD_SIZE); -#endif } /* @@ -884,19 +873,8 @@ void choose_random_location(unsigned long input, warn("Physical KASLR disabled: no suitable memory region!"); } else { /* Update the new physical address location. */ - if (*output != random_addr) { - add_identity_map(random_addr, output_size); + if (*output != random_addr) *output = random_addr; - } - - /* - * This loads the identity mapping page table. - * This should only be done if a new physical address - * is found for the kernel, otherwise we should keep - * the old page table to make it be like the "nokaslr" - * case. - */ - finalize_identity_maps(); } diff --git a/arch/x86/boot/compressed/misc.h b/arch/x86/boot/compressed/misc.h index f0e199174c5f..9840c82a39f1 100644 --- a/arch/x86/boot/compressed/misc.h +++ b/arch/x86/boot/compressed/misc.h @@ -98,17 +98,7 @@ static inline void choose_random_location(unsigned long input, #endif #ifdef CONFIG_X86_64 -void initialize_identity_maps(void); -void add_identity_map(unsigned long start, unsigned long size); -void finalize_identity_maps(void); extern unsigned char _pgtable[]; -#else -static inline void initialize_identity_maps(void) -{ } -static inline void add_identity_map(unsigned long start, unsigned long size) -{ } -static inline void finalize_identity_maps(void) -{ } #endif #ifdef CONFIG_EARLY_PRINTK -- cgit v1.2.3-70-g09d2 From 21cf2372618ef167d8c4ae04880fb873b55b2daa Mon Sep 17 00:00:00 2001 From: Joerg Roedel Date: Mon, 7 Sep 2020 15:15:19 +0200 Subject: x86/boot/compressed/64: Change add_identity_map() to take start and end Changing the function to take start and end as parameters instead of start and size simplifies the callers which don't need to calculate the size if they already have start and end. Signed-off-by: Joerg Roedel Signed-off-by: Borislav Petkov Reviewed-by: Kees Cook Link: https://lkml.kernel.org/r/20200907131613.12703-19-joro@8bytes.org --- arch/x86/boot/compressed/ident_map_64.c | 15 +++++---------- 1 file changed, 5 insertions(+), 10 deletions(-) (limited to 'arch/x86/boot/compressed') diff --git a/arch/x86/boot/compressed/ident_map_64.c b/arch/x86/boot/compressed/ident_map_64.c index c63257bf8373..62e42c11a336 100644 --- a/arch/x86/boot/compressed/ident_map_64.c +++ b/arch/x86/boot/compressed/ident_map_64.c @@ -89,10 +89,8 @@ static struct x86_mapping_info mapping_info; /* * Adds the specified range to the identity mappings. */ -static void add_identity_map(unsigned long start, unsigned long size) +static void add_identity_map(unsigned long start, unsigned long end) { - unsigned long end = start + size; - /* Align boundary to 2M. */ start = round_down(start, PMD_SIZE); end = round_up(end, PMD_SIZE); @@ -107,8 +105,6 @@ static void add_identity_map(unsigned long start, unsigned long size) /* Locates and clears a region for a new top level page table. */ void initialize_identity_maps(void) { - unsigned long start, size; - /* If running as an SEV guest, the encryption mask is required. */ set_sev_encryption_mask(); @@ -155,9 +151,7 @@ void initialize_identity_maps(void) * New page-table is set up - map the kernel image and load it * into cr3. */ - start = (unsigned long)_head; - size = _end - _head; - add_identity_map(start, size); + add_identity_map((unsigned long)_head, (unsigned long)_end); write_cr3(top_level_pgt); } @@ -189,7 +183,8 @@ static void do_pf_error(const char *msg, unsigned long error_code, void do_boot_page_fault(struct pt_regs *regs, unsigned long error_code) { - unsigned long address = native_read_cr2(); + unsigned long address = native_read_cr2() & PMD_MASK; + unsigned long end = address + PMD_SIZE; /* * Check for unexpected error codes. Unexpected are: @@ -204,5 +199,5 @@ void do_boot_page_fault(struct pt_regs *regs, unsigned long error_code) * Error code is sane - now identity map the 2M region around * the faulting address. */ - add_identity_map(address & PMD_MASK, PMD_SIZE); + add_identity_map(address, end); } -- cgit v1.2.3-70-g09d2 From 29dcc60f6a19fb0aaee97bd1ae2ed8a7dc6f0cfe Mon Sep 17 00:00:00 2001 From: Joerg Roedel Date: Mon, 7 Sep 2020 15:15:20 +0200 Subject: x86/boot/compressed/64: Add stage1 #VC handler Add the first handler for #VC exceptions. At stage 1 there is no GHCB yet because the kernel might still be running on the EFI page table. The stage 1 handler is limited to the MSR-based protocol to talk to the hypervisor and can only support CPUID exit-codes, but that is enough to get to stage 2. [ bp: Zap superfluous newlines after rd/wrmsr instruction mnemonics. ] Signed-off-by: Joerg Roedel Signed-off-by: Borislav Petkov Link: https://lkml.kernel.org/r/20200907131613.12703-20-joro@8bytes.org --- arch/x86/boot/compressed/Makefile | 1 + arch/x86/boot/compressed/idt_64.c | 4 ++ arch/x86/boot/compressed/idt_handlers_64.S | 4 ++ arch/x86/boot/compressed/misc.h | 1 + arch/x86/boot/compressed/sev-es.c | 45 ++++++++++++++++++++ arch/x86/include/asm/msr-index.h | 1 + arch/x86/include/asm/sev-es.h | 37 +++++++++++++++++ arch/x86/include/asm/trapnr.h | 1 + arch/x86/kernel/sev-es-shared.c | 66 ++++++++++++++++++++++++++++++ 9 files changed, 160 insertions(+) create mode 100644 arch/x86/boot/compressed/sev-es.c create mode 100644 arch/x86/include/asm/sev-es.h create mode 100644 arch/x86/kernel/sev-es-shared.c (limited to 'arch/x86/boot/compressed') diff --git a/arch/x86/boot/compressed/Makefile b/arch/x86/boot/compressed/Makefile index e7f3eba99ea2..38f4a52a4eda 100644 --- a/arch/x86/boot/compressed/Makefile +++ b/arch/x86/boot/compressed/Makefile @@ -88,6 +88,7 @@ ifdef CONFIG_X86_64 vmlinux-objs-y += $(obj)/idt_64.o $(obj)/idt_handlers_64.o vmlinux-objs-y += $(obj)/mem_encrypt.o vmlinux-objs-y += $(obj)/pgtable_64.o + vmlinux-objs-$(CONFIG_AMD_MEM_ENCRYPT) += $(obj)/sev-es.o endif vmlinux-objs-$(CONFIG_ACPI) += $(obj)/acpi.o diff --git a/arch/x86/boot/compressed/idt_64.c b/arch/x86/boot/compressed/idt_64.c index 5f083092a86d..f3ca7324be44 100644 --- a/arch/x86/boot/compressed/idt_64.c +++ b/arch/x86/boot/compressed/idt_64.c @@ -32,6 +32,10 @@ void load_stage1_idt(void) { boot_idt_desc.address = (unsigned long)boot_idt; + + if (IS_ENABLED(CONFIG_AMD_MEM_ENCRYPT)) + set_idt_entry(X86_TRAP_VC, boot_stage1_vc); + load_boot_idt(&boot_idt_desc); } diff --git a/arch/x86/boot/compressed/idt_handlers_64.S b/arch/x86/boot/compressed/idt_handlers_64.S index b20e57504a94..92eb4df478a1 100644 --- a/arch/x86/boot/compressed/idt_handlers_64.S +++ b/arch/x86/boot/compressed/idt_handlers_64.S @@ -70,3 +70,7 @@ SYM_FUNC_END(\name) .code64 EXCEPTION_HANDLER boot_page_fault do_boot_page_fault error_code=1 + +#ifdef CONFIG_AMD_MEM_ENCRYPT +EXCEPTION_HANDLER boot_stage1_vc do_vc_no_ghcb error_code=1 +#endif diff --git a/arch/x86/boot/compressed/misc.h b/arch/x86/boot/compressed/misc.h index 9840c82a39f1..eaa8b45ebccb 100644 --- a/arch/x86/boot/compressed/misc.h +++ b/arch/x86/boot/compressed/misc.h @@ -141,5 +141,6 @@ extern struct desc_ptr boot_idt_desc; /* IDT Entry Points */ void boot_page_fault(void); +void boot_stage1_vc(void); #endif /* BOOT_COMPRESSED_MISC_H */ diff --git a/arch/x86/boot/compressed/sev-es.c b/arch/x86/boot/compressed/sev-es.c new file mode 100644 index 000000000000..99c3bcd4d61f --- /dev/null +++ b/arch/x86/boot/compressed/sev-es.c @@ -0,0 +1,45 @@ +// SPDX-License-Identifier: GPL-2.0 +/* + * AMD Encrypted Register State Support + * + * Author: Joerg Roedel + */ + +/* + * misc.h needs to be first because it knows how to include the other kernel + * headers in the pre-decompression code in a way that does not break + * compilation. + */ +#include "misc.h" + +#include +#include +#include +#include + +static inline u64 sev_es_rd_ghcb_msr(void) +{ + unsigned long low, high; + + asm volatile("rdmsr" : "=a" (low), "=d" (high) : + "c" (MSR_AMD64_SEV_ES_GHCB)); + + return ((high << 32) | low); +} + +static inline void sev_es_wr_ghcb_msr(u64 val) +{ + u32 low, high; + + low = val & 0xffffffffUL; + high = val >> 32; + + asm volatile("wrmsr" : : "c" (MSR_AMD64_SEV_ES_GHCB), + "a"(low), "d" (high) : "memory"); +} + +#undef __init +#define __init + +/* Include code for early handlers */ +#include "../../kernel/sev-es-shared.c" diff --git a/arch/x86/include/asm/msr-index.h b/arch/x86/include/asm/msr-index.h index 2859ee4f39a8..da34fdba7c5a 100644 --- a/arch/x86/include/asm/msr-index.h +++ b/arch/x86/include/asm/msr-index.h @@ -466,6 +466,7 @@ #define MSR_AMD64_IBSBRTARGET 0xc001103b #define MSR_AMD64_IBSOPDATA4 0xc001103d #define MSR_AMD64_IBS_REG_COUNT_MAX 8 /* includes MSR_AMD64_IBSBRTARGET */ +#define MSR_AMD64_SEV_ES_GHCB 0xc0010130 #define MSR_AMD64_SEV 0xc0010131 #define MSR_AMD64_SEV_ENABLED_BIT 0 #define MSR_AMD64_SEV_ENABLED BIT_ULL(MSR_AMD64_SEV_ENABLED_BIT) diff --git a/arch/x86/include/asm/sev-es.h b/arch/x86/include/asm/sev-es.h new file mode 100644 index 000000000000..48a44038b5d1 --- /dev/null +++ b/arch/x86/include/asm/sev-es.h @@ -0,0 +1,37 @@ +/* SPDX-License-Identifier: GPL-2.0 */ +/* + * AMD Encrypted Register State Support + * + * Author: Joerg Roedel + */ + +#ifndef __ASM_ENCRYPTED_STATE_H +#define __ASM_ENCRYPTED_STATE_H + +#include + +#define GHCB_SEV_CPUID_REQ 0x004UL +#define GHCB_CPUID_REQ_EAX 0 +#define GHCB_CPUID_REQ_EBX 1 +#define GHCB_CPUID_REQ_ECX 2 +#define GHCB_CPUID_REQ_EDX 3 +#define GHCB_CPUID_REQ(fn, reg) (GHCB_SEV_CPUID_REQ | \ + (((unsigned long)reg & 3) << 30) | \ + (((unsigned long)fn) << 32)) + +#define GHCB_SEV_CPUID_RESP 0x005UL +#define GHCB_SEV_TERMINATE 0x100UL + +#define GHCB_SEV_GHCB_RESP_CODE(v) ((v) & 0xfff) +#define VMGEXIT() { asm volatile("rep; vmmcall\n\r"); } + +void do_vc_no_ghcb(struct pt_regs *regs, unsigned long exit_code); + +static inline u64 lower_bits(u64 val, unsigned int bits) +{ + u64 mask = (1ULL << bits) - 1; + + return (val & mask); +} + +#endif diff --git a/arch/x86/include/asm/trapnr.h b/arch/x86/include/asm/trapnr.h index 082f45631fa9..f5d2325aa0b7 100644 --- a/arch/x86/include/asm/trapnr.h +++ b/arch/x86/include/asm/trapnr.h @@ -26,6 +26,7 @@ #define X86_TRAP_XF 19 /* SIMD Floating-Point Exception */ #define X86_TRAP_VE 20 /* Virtualization Exception */ #define X86_TRAP_CP 21 /* Control Protection Exception */ +#define X86_TRAP_VC 29 /* VMM Communication Exception */ #define X86_TRAP_IRET 32 /* IRET Exception */ #endif diff --git a/arch/x86/kernel/sev-es-shared.c b/arch/x86/kernel/sev-es-shared.c new file mode 100644 index 000000000000..0bea32341afa --- /dev/null +++ b/arch/x86/kernel/sev-es-shared.c @@ -0,0 +1,66 @@ +// SPDX-License-Identifier: GPL-2.0 +/* + * AMD Encrypted Register State Support + * + * Author: Joerg Roedel + * + * This file is not compiled stand-alone. It contains code shared + * between the pre-decompression boot code and the running Linux kernel + * and is included directly into both code-bases. + */ + +/* + * Boot VC Handler - This is the first VC handler during boot, there is no GHCB + * page yet, so it only supports the MSR based communication with the + * hypervisor and only the CPUID exit-code. + */ +void __init do_vc_no_ghcb(struct pt_regs *regs, unsigned long exit_code) +{ + unsigned int fn = lower_bits(regs->ax, 32); + unsigned long val; + + /* Only CPUID is supported via MSR protocol */ + if (exit_code != SVM_EXIT_CPUID) + goto fail; + + sev_es_wr_ghcb_msr(GHCB_CPUID_REQ(fn, GHCB_CPUID_REQ_EAX)); + VMGEXIT(); + val = sev_es_rd_ghcb_msr(); + if (GHCB_SEV_GHCB_RESP_CODE(val) != GHCB_SEV_CPUID_RESP) + goto fail; + regs->ax = val >> 32; + + sev_es_wr_ghcb_msr(GHCB_CPUID_REQ(fn, GHCB_CPUID_REQ_EBX)); + VMGEXIT(); + val = sev_es_rd_ghcb_msr(); + if (GHCB_SEV_GHCB_RESP_CODE(val) != GHCB_SEV_CPUID_RESP) + goto fail; + regs->bx = val >> 32; + + sev_es_wr_ghcb_msr(GHCB_CPUID_REQ(fn, GHCB_CPUID_REQ_ECX)); + VMGEXIT(); + val = sev_es_rd_ghcb_msr(); + if (GHCB_SEV_GHCB_RESP_CODE(val) != GHCB_SEV_CPUID_RESP) + goto fail; + regs->cx = val >> 32; + + sev_es_wr_ghcb_msr(GHCB_CPUID_REQ(fn, GHCB_CPUID_REQ_EDX)); + VMGEXIT(); + val = sev_es_rd_ghcb_msr(); + if (GHCB_SEV_GHCB_RESP_CODE(val) != GHCB_SEV_CPUID_RESP) + goto fail; + regs->dx = val >> 32; + + /* Skip over the CPUID two-byte opcode */ + regs->ip += 2; + + return; + +fail: + sev_es_wr_ghcb_msr(GHCB_SEV_TERMINATE); + VMGEXIT(); + + /* Shouldn't get here - if we do halt the machine */ + while (true) + asm volatile("hlt\n"); +} -- cgit v1.2.3-70-g09d2 From c2a0304a286f386e45cea3f4b0617f0813de67fd Mon Sep 17 00:00:00 2001 From: Joerg Roedel Date: Mon, 7 Sep 2020 15:15:21 +0200 Subject: x86/boot/compressed/64: Call set_sev_encryption_mask() earlier Call set_sev_encryption_mask() while still on the stage 1 #VC-handler because the stage 2 handler needs the kernel's own page tables to be set up, to which calling set_sev_encryption_mask() is a prerequisite. Signed-off-by: Joerg Roedel Signed-off-by: Borislav Petkov Link: https://lkml.kernel.org/r/20200907131613.12703-21-joro@8bytes.org --- arch/x86/boot/compressed/head_64.S | 9 ++++++++- arch/x86/boot/compressed/ident_map_64.c | 3 --- 2 files changed, 8 insertions(+), 4 deletions(-) (limited to 'arch/x86/boot/compressed') diff --git a/arch/x86/boot/compressed/head_64.S b/arch/x86/boot/compressed/head_64.S index fb6c0392306b..42190c00d9c2 100644 --- a/arch/x86/boot/compressed/head_64.S +++ b/arch/x86/boot/compressed/head_64.S @@ -543,9 +543,16 @@ SYM_FUNC_START_LOCAL_NOALIGN(.Lrelocated) rep stosq /* - * Load stage2 IDT and switch to our own page-table + * If running as an SEV guest, the encryption mask is required in the + * page-table setup code below. When the guest also has SEV-ES enabled + * set_sev_encryption_mask() will cause #VC exceptions, but the stage2 + * handler can't map its GHCB because the page-table is not set up yet. + * So set up the encryption mask here while still on the stage1 #VC + * handler. Then load stage2 IDT and switch to the kernel's own + * page-table. */ pushq %rsi + call set_sev_encryption_mask call load_stage2_idt call initialize_identity_maps popq %rsi diff --git a/arch/x86/boot/compressed/ident_map_64.c b/arch/x86/boot/compressed/ident_map_64.c index 62e42c11a336..b4f2a5f503cd 100644 --- a/arch/x86/boot/compressed/ident_map_64.c +++ b/arch/x86/boot/compressed/ident_map_64.c @@ -105,9 +105,6 @@ static void add_identity_map(unsigned long start, unsigned long end) /* Locates and clears a region for a new top level page table. */ void initialize_identity_maps(void) { - /* If running as an SEV guest, the encryption mask is required. */ - set_sev_encryption_mask(); - /* Exclude the encryption mask from __PHYSICAL_MASK */ physical_mask &= ~sme_me_mask; -- cgit v1.2.3-70-g09d2 From 4b3fdca64a7e8ad90c87cad1fbc6991471f48dc7 Mon Sep 17 00:00:00 2001 From: Joerg Roedel Date: Mon, 7 Sep 2020 15:15:22 +0200 Subject: x86/boot/compressed/64: Check return value of kernel_ident_mapping_init() The function can fail to create an identity mapping, check for that and bail out if it happens. Signed-off-by: Joerg Roedel Signed-off-by: Borislav Petkov Link: https://lkml.kernel.org/r/20200907131613.12703-22-joro@8bytes.org --- arch/x86/boot/compressed/ident_map_64.c | 7 +++++-- 1 file changed, 5 insertions(+), 2 deletions(-) (limited to 'arch/x86/boot/compressed') diff --git a/arch/x86/boot/compressed/ident_map_64.c b/arch/x86/boot/compressed/ident_map_64.c index b4f2a5f503cd..aa91bebc0fe9 100644 --- a/arch/x86/boot/compressed/ident_map_64.c +++ b/arch/x86/boot/compressed/ident_map_64.c @@ -91,6 +91,8 @@ static struct x86_mapping_info mapping_info; */ static void add_identity_map(unsigned long start, unsigned long end) { + int ret; + /* Align boundary to 2M. */ start = round_down(start, PMD_SIZE); end = round_up(end, PMD_SIZE); @@ -98,8 +100,9 @@ static void add_identity_map(unsigned long start, unsigned long end) return; /* Build the mapping. */ - kernel_ident_mapping_init(&mapping_info, (pgd_t *)top_level_pgt, - start, end); + ret = kernel_ident_mapping_init(&mapping_info, (pgd_t *)top_level_pgt, start, end); + if (ret) + error("Error: kernel_ident_mapping_init() failed\n"); } /* Locates and clears a region for a new top level page table. */ -- cgit v1.2.3-70-g09d2 From c81d60029a1393183d2125fcb4b64831629b8864 Mon Sep 17 00:00:00 2001 From: Joerg Roedel Date: Mon, 7 Sep 2020 15:15:23 +0200 Subject: x86/boot/compressed/64: Add set_page_en/decrypted() helpers The functions are needed to map the GHCB for SEV-ES guests. The GHCB is used for communication with the hypervisor, so its content must not be encrypted. After the GHCB is not needed anymore it must be mapped encrypted again so that the running kernel image can safely re-use the memory. Signed-off-by: Joerg Roedel Signed-off-by: Borislav Petkov Link: https://lkml.kernel.org/r/20200907131613.12703-23-joro@8bytes.org --- arch/x86/boot/compressed/ident_map_64.c | 133 ++++++++++++++++++++++++++++++++ arch/x86/boot/compressed/misc.h | 2 + 2 files changed, 135 insertions(+) (limited to 'arch/x86/boot/compressed') diff --git a/arch/x86/boot/compressed/ident_map_64.c b/arch/x86/boot/compressed/ident_map_64.c index aa91bebc0fe9..05742f641a06 100644 --- a/arch/x86/boot/compressed/ident_map_64.c +++ b/arch/x86/boot/compressed/ident_map_64.c @@ -24,6 +24,7 @@ /* These actually do the work of building the kernel identity maps. */ #include +#include #include #include #include @@ -165,6 +166,138 @@ void finalize_identity_maps(void) write_cr3(top_level_pgt); } +static pte_t *split_large_pmd(struct x86_mapping_info *info, + pmd_t *pmdp, unsigned long __address) +{ + unsigned long page_flags; + unsigned long address; + pte_t *pte; + pmd_t pmd; + int i; + + pte = (pte_t *)info->alloc_pgt_page(info->context); + if (!pte) + return NULL; + + address = __address & PMD_MASK; + /* No large page - clear PSE flag */ + page_flags = info->page_flag & ~_PAGE_PSE; + + /* Populate the PTEs */ + for (i = 0; i < PTRS_PER_PMD; i++) { + set_pte(&pte[i], __pte(address | page_flags)); + address += PAGE_SIZE; + } + + /* + * Ideally we need to clear the large PMD first and do a TLB + * flush before we write the new PMD. But the 2M range of the + * PMD might contain the code we execute and/or the stack + * we are on, so we can't do that. But that should be safe here + * because we are going from large to small mappings and we are + * also the only user of the page-table, so there is no chance + * of a TLB multihit. + */ + pmd = __pmd((unsigned long)pte | info->kernpg_flag); + set_pmd(pmdp, pmd); + /* Flush TLB to establish the new PMD */ + write_cr3(top_level_pgt); + + return pte + pte_index(__address); +} + +static void clflush_page(unsigned long address) +{ + unsigned int flush_size; + char *cl, *start, *end; + + /* + * Hardcode cl-size to 64 - CPUID can't be used here because that might + * cause another #VC exception and the GHCB is not ready to use yet. + */ + flush_size = 64; + start = (char *)(address & PAGE_MASK); + end = start + PAGE_SIZE; + + /* + * First make sure there are no pending writes on the cache-lines to + * flush. + */ + asm volatile("mfence" : : : "memory"); + + for (cl = start; cl != end; cl += flush_size) + clflush(cl); +} + +static int set_clr_page_flags(struct x86_mapping_info *info, + unsigned long address, + pteval_t set, pteval_t clr) +{ + pgd_t *pgdp = (pgd_t *)top_level_pgt; + p4d_t *p4dp; + pud_t *pudp; + pmd_t *pmdp; + pte_t *ptep, pte; + + /* + * First make sure there is a PMD mapping for 'address'. + * It should already exist, but keep things generic. + * + * To map the page just read from it and fault it in if there is no + * mapping yet. add_identity_map() can't be called here because that + * would unconditionally map the address on PMD level, destroying any + * PTE-level mappings that might already exist. Use assembly here so + * the access won't be optimized away. + */ + asm volatile("mov %[address], %%r9" + :: [address] "g" (*(unsigned long *)address) + : "r9", "memory"); + + /* + * The page is mapped at least with PMD size - so skip checks and walk + * directly to the PMD. + */ + p4dp = p4d_offset(pgdp, address); + pudp = pud_offset(p4dp, address); + pmdp = pmd_offset(pudp, address); + + if (pmd_large(*pmdp)) + ptep = split_large_pmd(info, pmdp, address); + else + ptep = pte_offset_kernel(pmdp, address); + + if (!ptep) + return -ENOMEM; + + /* + * Changing encryption attributes of a page requires to flush it from + * the caches. + */ + if ((set | clr) & _PAGE_ENC) + clflush_page(address); + + /* Update PTE */ + pte = *ptep; + pte = pte_set_flags(pte, set); + pte = pte_clear_flags(pte, clr); + set_pte(ptep, pte); + + /* Flush TLB after changing encryption attribute */ + write_cr3(top_level_pgt); + + return 0; +} + +int set_page_decrypted(unsigned long address) +{ + return set_clr_page_flags(&mapping_info, address, 0, _PAGE_ENC); +} + +int set_page_encrypted(unsigned long address) +{ + return set_clr_page_flags(&mapping_info, address, _PAGE_ENC, 0); +} + static void do_pf_error(const char *msg, unsigned long error_code, unsigned long address, unsigned long ip) { diff --git a/arch/x86/boot/compressed/misc.h b/arch/x86/boot/compressed/misc.h index eaa8b45ebccb..01c0fb3417ca 100644 --- a/arch/x86/boot/compressed/misc.h +++ b/arch/x86/boot/compressed/misc.h @@ -98,6 +98,8 @@ static inline void choose_random_location(unsigned long input, #endif #ifdef CONFIG_X86_64 +extern int set_page_decrypted(unsigned long address); +extern int set_page_encrypted(unsigned long address); extern unsigned char _pgtable[]; #endif -- cgit v1.2.3-70-g09d2 From 597cfe48212a3f110ab0f918bf59791f453e65b7 Mon Sep 17 00:00:00 2001 From: Joerg Roedel Date: Mon, 7 Sep 2020 15:15:24 +0200 Subject: x86/boot/compressed/64: Setup a GHCB-based VC Exception handler Install an exception handler for #VC exception that uses a GHCB. Also add the infrastructure for handling different exit-codes by decoding the instruction that caused the exception and error handling. Signed-off-by: Joerg Roedel Signed-off-by: Borislav Petkov Link: https://lkml.kernel.org/r/20200907131613.12703-24-joro@8bytes.org --- arch/x86/Kconfig | 1 + arch/x86/boot/compressed/Makefile | 5 + arch/x86/boot/compressed/idt_64.c | 4 + arch/x86/boot/compressed/idt_handlers_64.S | 3 +- arch/x86/boot/compressed/misc.c | 7 ++ arch/x86/boot/compressed/misc.h | 7 ++ arch/x86/boot/compressed/sev-es.c | 111 +++++++++++++++++++++ arch/x86/include/asm/sev-es.h | 39 ++++++++ arch/x86/include/uapi/asm/svm.h | 1 + arch/x86/kernel/sev-es-shared.c | 154 +++++++++++++++++++++++++++++ 10 files changed, 331 insertions(+), 1 deletion(-) (limited to 'arch/x86/boot/compressed') diff --git a/arch/x86/Kconfig b/arch/x86/Kconfig index 7101ac64bb20..8289dd44efbd 100644 --- a/arch/x86/Kconfig +++ b/arch/x86/Kconfig @@ -1521,6 +1521,7 @@ config AMD_MEM_ENCRYPT select DYNAMIC_PHYSICAL_MASK select ARCH_USE_MEMREMAP_PROT select ARCH_HAS_FORCE_DMA_UNENCRYPTED + select INSTRUCTION_DECODER help Say yes to enable support for the encryption of system memory. This requires an AMD processor that supports Secure Memory diff --git a/arch/x86/boot/compressed/Makefile b/arch/x86/boot/compressed/Makefile index 38f4a52a4eda..c01236ae1b7c 100644 --- a/arch/x86/boot/compressed/Makefile +++ b/arch/x86/boot/compressed/Makefile @@ -44,6 +44,11 @@ KBUILD_CFLAGS += $(call cc-option,-fmacro-prefix-map=$(srctree)/=) KBUILD_CFLAGS += -fno-asynchronous-unwind-tables KBUILD_CFLAGS += -D__DISABLE_EXPORTS +# sev-es.c indirectly inludes inat-table.h which is generated during +# compilation and stored in $(objtree). Add the directory to the includes so +# that the compiler finds it even with out-of-tree builds (make O=/some/path). +CFLAGS_sev-es.o += -I$(objtree)/arch/x86/lib/ + KBUILD_AFLAGS := $(KBUILD_CFLAGS) -D__ASSEMBLY__ GCOV_PROFILE := n UBSAN_SANITIZE :=n diff --git a/arch/x86/boot/compressed/idt_64.c b/arch/x86/boot/compressed/idt_64.c index f3ca7324be44..804a502ee0d2 100644 --- a/arch/x86/boot/compressed/idt_64.c +++ b/arch/x86/boot/compressed/idt_64.c @@ -46,5 +46,9 @@ void load_stage2_idt(void) set_idt_entry(X86_TRAP_PF, boot_page_fault); +#ifdef CONFIG_AMD_MEM_ENCRYPT + set_idt_entry(X86_TRAP_VC, boot_stage2_vc); +#endif + load_boot_idt(&boot_idt_desc); } diff --git a/arch/x86/boot/compressed/idt_handlers_64.S b/arch/x86/boot/compressed/idt_handlers_64.S index 92eb4df478a1..22890e199f5b 100644 --- a/arch/x86/boot/compressed/idt_handlers_64.S +++ b/arch/x86/boot/compressed/idt_handlers_64.S @@ -72,5 +72,6 @@ SYM_FUNC_END(\name) EXCEPTION_HANDLER boot_page_fault do_boot_page_fault error_code=1 #ifdef CONFIG_AMD_MEM_ENCRYPT -EXCEPTION_HANDLER boot_stage1_vc do_vc_no_ghcb error_code=1 +EXCEPTION_HANDLER boot_stage1_vc do_vc_no_ghcb error_code=1 +EXCEPTION_HANDLER boot_stage2_vc do_boot_stage2_vc error_code=1 #endif diff --git a/arch/x86/boot/compressed/misc.c b/arch/x86/boot/compressed/misc.c index e478e40fbe5a..267e7f93050e 100644 --- a/arch/x86/boot/compressed/misc.c +++ b/arch/x86/boot/compressed/misc.c @@ -442,6 +442,13 @@ asmlinkage __visible void *extract_kernel(void *rmode, memptr heap, parse_elf(output); handle_relocations(output, output_len, virt_addr); debug_putstr("done.\nBooting the kernel.\n"); + + /* + * Flush GHCB from cache and map it encrypted again when running as + * SEV-ES guest. + */ + sev_es_shutdown_ghcb(); + return output; } diff --git a/arch/x86/boot/compressed/misc.h b/arch/x86/boot/compressed/misc.h index 01c0fb3417ca..9995c70ca813 100644 --- a/arch/x86/boot/compressed/misc.h +++ b/arch/x86/boot/compressed/misc.h @@ -115,6 +115,12 @@ static inline void console_init(void) void set_sev_encryption_mask(void); +#ifdef CONFIG_AMD_MEM_ENCRYPT +void sev_es_shutdown_ghcb(void); +#else +static inline void sev_es_shutdown_ghcb(void) { } +#endif + /* acpi.c */ #ifdef CONFIG_ACPI acpi_physical_address get_rsdp_addr(void); @@ -144,5 +150,6 @@ extern struct desc_ptr boot_idt_desc; /* IDT Entry Points */ void boot_page_fault(void); void boot_stage1_vc(void); +void boot_stage2_vc(void); #endif /* BOOT_COMPRESSED_MISC_H */ diff --git a/arch/x86/boot/compressed/sev-es.c b/arch/x86/boot/compressed/sev-es.c index 99c3bcd4d61f..fa62af771dd5 100644 --- a/arch/x86/boot/compressed/sev-es.c +++ b/arch/x86/boot/compressed/sev-es.c @@ -13,10 +13,17 @@ #include "misc.h" #include +#include +#include #include #include #include +#include "error.h" + +struct ghcb boot_ghcb_page __aligned(PAGE_SIZE); +struct ghcb *boot_ghcb; + static inline u64 sev_es_rd_ghcb_msr(void) { unsigned long low, high; @@ -38,8 +45,112 @@ static inline void sev_es_wr_ghcb_msr(u64 val) "a"(low), "d" (high) : "memory"); } +static enum es_result vc_decode_insn(struct es_em_ctxt *ctxt) +{ + char buffer[MAX_INSN_SIZE]; + enum es_result ret; + + memcpy(buffer, (unsigned char *)ctxt->regs->ip, MAX_INSN_SIZE); + + insn_init(&ctxt->insn, buffer, MAX_INSN_SIZE, 1); + insn_get_length(&ctxt->insn); + + ret = ctxt->insn.immediate.got ? ES_OK : ES_DECODE_FAILED; + + return ret; +} + +static enum es_result vc_write_mem(struct es_em_ctxt *ctxt, + void *dst, char *buf, size_t size) +{ + memcpy(dst, buf, size); + + return ES_OK; +} + +static enum es_result vc_read_mem(struct es_em_ctxt *ctxt, + void *src, char *buf, size_t size) +{ + memcpy(buf, src, size); + + return ES_OK; +} + #undef __init +#undef __pa #define __init +#define __pa(x) ((unsigned long)(x)) + +#define __BOOT_COMPRESSED + +/* Basic instruction decoding support needed */ +#include "../../lib/inat.c" +#include "../../lib/insn.c" /* Include code for early handlers */ #include "../../kernel/sev-es-shared.c" + +static bool early_setup_sev_es(void) +{ + if (!sev_es_negotiate_protocol()) + sev_es_terminate(GHCB_SEV_ES_REASON_PROTOCOL_UNSUPPORTED); + + if (set_page_decrypted((unsigned long)&boot_ghcb_page)) + return false; + + /* Page is now mapped decrypted, clear it */ + memset(&boot_ghcb_page, 0, sizeof(boot_ghcb_page)); + + boot_ghcb = &boot_ghcb_page; + + /* Initialize lookup tables for the instruction decoder */ + inat_init_tables(); + + return true; +} + +void sev_es_shutdown_ghcb(void) +{ + if (!boot_ghcb) + return; + + /* + * GHCB Page must be flushed from the cache and mapped encrypted again. + * Otherwise the running kernel will see strange cache effects when + * trying to use that page. + */ + if (set_page_encrypted((unsigned long)&boot_ghcb_page)) + error("Can't map GHCB page encrypted"); +} + +void do_boot_stage2_vc(struct pt_regs *regs, unsigned long exit_code) +{ + struct es_em_ctxt ctxt; + enum es_result result; + + if (!boot_ghcb && !early_setup_sev_es()) + sev_es_terminate(GHCB_SEV_ES_REASON_GENERAL_REQUEST); + + vc_ghcb_invalidate(boot_ghcb); + result = vc_init_em_ctxt(&ctxt, regs, exit_code); + if (result != ES_OK) + goto finish; + + switch (exit_code) { + default: + result = ES_UNSUPPORTED; + break; + } + +finish: + if (result == ES_OK) { + vc_finish_insn(&ctxt); + } else if (result != ES_RETRY) { + /* + * For now, just halt the machine. That makes debugging easier, + * later we just call sev_es_terminate() here. + */ + while (true) + asm volatile("hlt\n"); + } +} diff --git a/arch/x86/include/asm/sev-es.h b/arch/x86/include/asm/sev-es.h index 48a44038b5d1..6dc52440c4b4 100644 --- a/arch/x86/include/asm/sev-es.h +++ b/arch/x86/include/asm/sev-es.h @@ -9,7 +9,14 @@ #define __ASM_ENCRYPTED_STATE_H #include +#include +#define GHCB_SEV_INFO 0x001UL +#define GHCB_SEV_INFO_REQ 0x002UL +#define GHCB_INFO(v) ((v) & 0xfffUL) +#define GHCB_PROTO_MAX(v) (((v) >> 48) & 0xffffUL) +#define GHCB_PROTO_MIN(v) (((v) >> 32) & 0xffffUL) +#define GHCB_PROTO_OUR 0x0001UL #define GHCB_SEV_CPUID_REQ 0x004UL #define GHCB_CPUID_REQ_EAX 0 #define GHCB_CPUID_REQ_EBX 1 @@ -19,12 +26,44 @@ (((unsigned long)reg & 3) << 30) | \ (((unsigned long)fn) << 32)) +#define GHCB_PROTOCOL_MAX 0x0001UL +#define GHCB_DEFAULT_USAGE 0x0000UL + #define GHCB_SEV_CPUID_RESP 0x005UL #define GHCB_SEV_TERMINATE 0x100UL +#define GHCB_SEV_TERMINATE_REASON(reason_set, reason_val) \ + (((((u64)reason_set) & 0x7) << 12) | \ + ((((u64)reason_val) & 0xff) << 16)) +#define GHCB_SEV_ES_REASON_GENERAL_REQUEST 0 +#define GHCB_SEV_ES_REASON_PROTOCOL_UNSUPPORTED 1 #define GHCB_SEV_GHCB_RESP_CODE(v) ((v) & 0xfff) #define VMGEXIT() { asm volatile("rep; vmmcall\n\r"); } +enum es_result { + ES_OK, /* All good */ + ES_UNSUPPORTED, /* Requested operation not supported */ + ES_VMM_ERROR, /* Unexpected state from the VMM */ + ES_DECODE_FAILED, /* Instruction decoding failed */ + ES_EXCEPTION, /* Instruction caused exception */ + ES_RETRY, /* Retry instruction emulation */ +}; + +struct es_fault_info { + unsigned long vector; + unsigned long error_code; + unsigned long cr2; +}; + +struct pt_regs; + +/* ES instruction emulation context */ +struct es_em_ctxt { + struct pt_regs *regs; + struct insn insn; + struct es_fault_info fi; +}; + void do_vc_no_ghcb(struct pt_regs *regs, unsigned long exit_code); static inline u64 lower_bits(u64 val, unsigned int bits) diff --git a/arch/x86/include/uapi/asm/svm.h b/arch/x86/include/uapi/asm/svm.h index 2e8a30f06c74..c68d1618c9b0 100644 --- a/arch/x86/include/uapi/asm/svm.h +++ b/arch/x86/include/uapi/asm/svm.h @@ -29,6 +29,7 @@ #define SVM_EXIT_WRITE_DR6 0x036 #define SVM_EXIT_WRITE_DR7 0x037 #define SVM_EXIT_EXCP_BASE 0x040 +#define SVM_EXIT_LAST_EXCP 0x05f #define SVM_EXIT_INTR 0x060 #define SVM_EXIT_NMI 0x061 #define SVM_EXIT_SMI 0x062 diff --git a/arch/x86/kernel/sev-es-shared.c b/arch/x86/kernel/sev-es-shared.c index 0bea32341afa..7ac6e6b0ae57 100644 --- a/arch/x86/kernel/sev-es-shared.c +++ b/arch/x86/kernel/sev-es-shared.c @@ -9,6 +9,118 @@ * and is included directly into both code-bases. */ +static void sev_es_terminate(unsigned int reason) +{ + u64 val = GHCB_SEV_TERMINATE; + + /* + * Tell the hypervisor what went wrong - only reason-set 0 is + * currently supported. + */ + val |= GHCB_SEV_TERMINATE_REASON(0, reason); + + /* Request Guest Termination from Hypvervisor */ + sev_es_wr_ghcb_msr(val); + VMGEXIT(); + + while (true) + asm volatile("hlt\n" : : : "memory"); +} + +static bool sev_es_negotiate_protocol(void) +{ + u64 val; + + /* Do the GHCB protocol version negotiation */ + sev_es_wr_ghcb_msr(GHCB_SEV_INFO_REQ); + VMGEXIT(); + val = sev_es_rd_ghcb_msr(); + + if (GHCB_INFO(val) != GHCB_SEV_INFO) + return false; + + if (GHCB_PROTO_MAX(val) < GHCB_PROTO_OUR || + GHCB_PROTO_MIN(val) > GHCB_PROTO_OUR) + return false; + + return true; +} + +static void vc_ghcb_invalidate(struct ghcb *ghcb) +{ + memset(ghcb->save.valid_bitmap, 0, sizeof(ghcb->save.valid_bitmap)); +} + +static bool vc_decoding_needed(unsigned long exit_code) +{ + /* Exceptions don't require to decode the instruction */ + return !(exit_code >= SVM_EXIT_EXCP_BASE && + exit_code <= SVM_EXIT_LAST_EXCP); +} + +static enum es_result vc_init_em_ctxt(struct es_em_ctxt *ctxt, + struct pt_regs *regs, + unsigned long exit_code) +{ + enum es_result ret = ES_OK; + + memset(ctxt, 0, sizeof(*ctxt)); + ctxt->regs = regs; + + if (vc_decoding_needed(exit_code)) + ret = vc_decode_insn(ctxt); + + return ret; +} + +static void vc_finish_insn(struct es_em_ctxt *ctxt) +{ + ctxt->regs->ip += ctxt->insn.length; +} + +static enum es_result sev_es_ghcb_hv_call(struct ghcb *ghcb, + struct es_em_ctxt *ctxt, + u64 exit_code, u64 exit_info_1, + u64 exit_info_2) +{ + enum es_result ret; + + /* Fill in protocol and format specifiers */ + ghcb->protocol_version = GHCB_PROTOCOL_MAX; + ghcb->ghcb_usage = GHCB_DEFAULT_USAGE; + + ghcb_set_sw_exit_code(ghcb, exit_code); + ghcb_set_sw_exit_info_1(ghcb, exit_info_1); + ghcb_set_sw_exit_info_2(ghcb, exit_info_2); + + sev_es_wr_ghcb_msr(__pa(ghcb)); + VMGEXIT(); + + if ((ghcb->save.sw_exit_info_1 & 0xffffffff) == 1) { + u64 info = ghcb->save.sw_exit_info_2; + unsigned long v; + + info = ghcb->save.sw_exit_info_2; + v = info & SVM_EVTINJ_VEC_MASK; + + /* Check if exception information from hypervisor is sane. */ + if ((info & SVM_EVTINJ_VALID) && + ((v == X86_TRAP_GP) || (v == X86_TRAP_UD)) && + ((info & SVM_EVTINJ_TYPE_MASK) == SVM_EVTINJ_TYPE_EXEPT)) { + ctxt->fi.vector = v; + if (info & SVM_EVTINJ_VALID_ERR) + ctxt->fi.error_code = info >> 32; + ret = ES_EXCEPTION; + } else { + ret = ES_VMM_ERROR; + } + } else { + ret = ES_OK; + } + + return ret; +} + /* * Boot VC Handler - This is the first VC handler during boot, there is no GHCB * page yet, so it only supports the MSR based communication with the @@ -64,3 +176,45 @@ fail: while (true) asm volatile("hlt\n"); } + +static enum es_result vc_insn_string_read(struct es_em_ctxt *ctxt, + void *src, char *buf, + unsigned int data_size, + unsigned int count, + bool backwards) +{ + int i, b = backwards ? -1 : 1; + enum es_result ret = ES_OK; + + for (i = 0; i < count; i++) { + void *s = src + (i * data_size * b); + char *d = buf + (i * data_size); + + ret = vc_read_mem(ctxt, s, d, data_size); + if (ret != ES_OK) + break; + } + + return ret; +} + +static enum es_result vc_insn_string_write(struct es_em_ctxt *ctxt, + void *dst, char *buf, + unsigned int data_size, + unsigned int count, + bool backwards) +{ + int i, s = backwards ? -1 : 1; + enum es_result ret = ES_OK; + + for (i = 0; i < count; i++) { + void *d = dst + (i * data_size * s); + char *b = buf + (i * data_size); + + ret = vc_write_mem(ctxt, d, b, data_size); + if (ret != ES_OK) + break; + } + + return ret; +} -- cgit v1.2.3-70-g09d2 From 69add17a7c1992593a7cf775a66e0256ad4b3ef8 Mon Sep 17 00:00:00 2001 From: Joerg Roedel Date: Mon, 7 Sep 2020 15:15:25 +0200 Subject: x86/boot/compressed/64: Unmap GHCB page before booting the kernel Force a page-fault on any further accesses to the GHCB page when they shouldn't happen anymore. This will catch any bugs where a #VC exception is raised even though none is expected anymore. Signed-off-by: Joerg Roedel Signed-off-by: Borislav Petkov Link: https://lkml.kernel.org/r/20200907131613.12703-25-joro@8bytes.org --- arch/x86/boot/compressed/ident_map_64.c | 17 +++++++++++++++-- arch/x86/boot/compressed/misc.h | 6 ++++++ arch/x86/boot/compressed/sev-es.c | 14 ++++++++++++++ 3 files changed, 35 insertions(+), 2 deletions(-) (limited to 'arch/x86/boot/compressed') diff --git a/arch/x86/boot/compressed/ident_map_64.c b/arch/x86/boot/compressed/ident_map_64.c index 05742f641a06..063a60edcf99 100644 --- a/arch/x86/boot/compressed/ident_map_64.c +++ b/arch/x86/boot/compressed/ident_map_64.c @@ -298,6 +298,11 @@ int set_page_encrypted(unsigned long address) return set_clr_page_flags(&mapping_info, address, _PAGE_ENC, 0); } +int set_page_non_present(unsigned long address) +{ + return set_clr_page_flags(&mapping_info, address, 0, _PAGE_PRESENT); +} + static void do_pf_error(const char *msg, unsigned long error_code, unsigned long address, unsigned long ip) { @@ -316,8 +321,14 @@ static void do_pf_error(const char *msg, unsigned long error_code, void do_boot_page_fault(struct pt_regs *regs, unsigned long error_code) { - unsigned long address = native_read_cr2() & PMD_MASK; - unsigned long end = address + PMD_SIZE; + unsigned long address = native_read_cr2(); + unsigned long end; + bool ghcb_fault; + + ghcb_fault = sev_es_check_ghcb_fault(address); + + address &= PMD_MASK; + end = address + PMD_SIZE; /* * Check for unexpected error codes. Unexpected are: @@ -327,6 +338,8 @@ void do_boot_page_fault(struct pt_regs *regs, unsigned long error_code) */ if (error_code & (X86_PF_PROT | X86_PF_USER | X86_PF_RSVD)) do_pf_error("Unexpected page-fault:", error_code, address, regs->ip); + else if (ghcb_fault) + do_pf_error("Page-fault on GHCB page:", error_code, address, regs->ip); /* * Error code is sane - now identity map the 2M region around diff --git a/arch/x86/boot/compressed/misc.h b/arch/x86/boot/compressed/misc.h index 9995c70ca813..c0e0ffeee50a 100644 --- a/arch/x86/boot/compressed/misc.h +++ b/arch/x86/boot/compressed/misc.h @@ -100,6 +100,7 @@ static inline void choose_random_location(unsigned long input, #ifdef CONFIG_X86_64 extern int set_page_decrypted(unsigned long address); extern int set_page_encrypted(unsigned long address); +extern int set_page_non_present(unsigned long address); extern unsigned char _pgtable[]; #endif @@ -117,8 +118,13 @@ void set_sev_encryption_mask(void); #ifdef CONFIG_AMD_MEM_ENCRYPT void sev_es_shutdown_ghcb(void); +extern bool sev_es_check_ghcb_fault(unsigned long address); #else static inline void sev_es_shutdown_ghcb(void) { } +static inline bool sev_es_check_ghcb_fault(unsigned long address) +{ + return false; +} #endif /* acpi.c */ diff --git a/arch/x86/boot/compressed/sev-es.c b/arch/x86/boot/compressed/sev-es.c index fa62af771dd5..1e1fab583302 100644 --- a/arch/x86/boot/compressed/sev-es.c +++ b/arch/x86/boot/compressed/sev-es.c @@ -121,6 +121,20 @@ void sev_es_shutdown_ghcb(void) */ if (set_page_encrypted((unsigned long)&boot_ghcb_page)) error("Can't map GHCB page encrypted"); + + /* + * GHCB page is mapped encrypted again and flushed from the cache. + * Mark it non-present now to catch bugs when #VC exceptions trigger + * after this point. + */ + if (set_page_non_present((unsigned long)&boot_ghcb_page)) + error("Can't unmap GHCB page"); +} + +bool sev_es_check_ghcb_fault(unsigned long address) +{ + /* Check whether the fault was on the GHCB page */ + return ((address & PAGE_MASK) == (unsigned long)&boot_ghcb_page); } void do_boot_stage2_vc(struct pt_regs *regs, unsigned long exit_code) -- cgit v1.2.3-70-g09d2 From 25189d08e5168c098c307a0eaae5b30c13a331ef Mon Sep 17 00:00:00 2001 From: Tom Lendacky Date: Mon, 7 Sep 2020 15:15:26 +0200 Subject: x86/sev-es: Add support for handling IOIO exceptions Add support for decoding and handling #VC exceptions for IOIO events. [ jroedel@suse.de: Adapted code to #VC handling framework ] Co-developed-by: Joerg Roedel Signed-off-by: Tom Lendacky Signed-off-by: Joerg Roedel Signed-off-by: Borislav Petkov Link: https://lkml.kernel.org/r/20200907131613.12703-26-joro@8bytes.org --- arch/x86/boot/compressed/sev-es.c | 32 ++++++ arch/x86/kernel/sev-es-shared.c | 214 ++++++++++++++++++++++++++++++++++++++ 2 files changed, 246 insertions(+) (limited to 'arch/x86/boot/compressed') diff --git a/arch/x86/boot/compressed/sev-es.c b/arch/x86/boot/compressed/sev-es.c index 1e1fab583302..61504eb1ab46 100644 --- a/arch/x86/boot/compressed/sev-es.c +++ b/arch/x86/boot/compressed/sev-es.c @@ -24,6 +24,35 @@ struct ghcb boot_ghcb_page __aligned(PAGE_SIZE); struct ghcb *boot_ghcb; +/* + * Copy a version of this function here - insn-eval.c can't be used in + * pre-decompression code. + */ +static bool insn_has_rep_prefix(struct insn *insn) +{ + int i; + + insn_get_prefixes(insn); + + for (i = 0; i < insn->prefixes.nbytes; i++) { + insn_byte_t p = insn->prefixes.bytes[i]; + + if (p == 0xf2 || p == 0xf3) + return true; + } + + return false; +} + +/* + * Only a dummy for insn_get_seg_base() - Early boot-code is 64bit only and + * doesn't use segments. + */ +static unsigned long insn_get_seg_base(struct pt_regs *regs, int seg_reg_idx) +{ + return 0UL; +} + static inline u64 sev_es_rd_ghcb_msr(void) { unsigned long low, high; @@ -151,6 +180,9 @@ void do_boot_stage2_vc(struct pt_regs *regs, unsigned long exit_code) goto finish; switch (exit_code) { + case SVM_EXIT_IOIO: + result = vc_handle_ioio(boot_ghcb, &ctxt); + break; default: result = ES_UNSUPPORTED; break; diff --git a/arch/x86/kernel/sev-es-shared.c b/arch/x86/kernel/sev-es-shared.c index 7ac6e6b0ae57..bae7cf28455b 100644 --- a/arch/x86/kernel/sev-es-shared.c +++ b/arch/x86/kernel/sev-es-shared.c @@ -218,3 +218,217 @@ static enum es_result vc_insn_string_write(struct es_em_ctxt *ctxt, return ret; } + +#define IOIO_TYPE_STR BIT(2) +#define IOIO_TYPE_IN 1 +#define IOIO_TYPE_INS (IOIO_TYPE_IN | IOIO_TYPE_STR) +#define IOIO_TYPE_OUT 0 +#define IOIO_TYPE_OUTS (IOIO_TYPE_OUT | IOIO_TYPE_STR) + +#define IOIO_REP BIT(3) + +#define IOIO_ADDR_64 BIT(9) +#define IOIO_ADDR_32 BIT(8) +#define IOIO_ADDR_16 BIT(7) + +#define IOIO_DATA_32 BIT(6) +#define IOIO_DATA_16 BIT(5) +#define IOIO_DATA_8 BIT(4) + +#define IOIO_SEG_ES (0 << 10) +#define IOIO_SEG_DS (3 << 10) + +static enum es_result vc_ioio_exitinfo(struct es_em_ctxt *ctxt, u64 *exitinfo) +{ + struct insn *insn = &ctxt->insn; + *exitinfo = 0; + + switch (insn->opcode.bytes[0]) { + /* INS opcodes */ + case 0x6c: + case 0x6d: + *exitinfo |= IOIO_TYPE_INS; + *exitinfo |= IOIO_SEG_ES; + *exitinfo |= (ctxt->regs->dx & 0xffff) << 16; + break; + + /* OUTS opcodes */ + case 0x6e: + case 0x6f: + *exitinfo |= IOIO_TYPE_OUTS; + *exitinfo |= IOIO_SEG_DS; + *exitinfo |= (ctxt->regs->dx & 0xffff) << 16; + break; + + /* IN immediate opcodes */ + case 0xe4: + case 0xe5: + *exitinfo |= IOIO_TYPE_IN; + *exitinfo |= (u64)insn->immediate.value << 16; + break; + + /* OUT immediate opcodes */ + case 0xe6: + case 0xe7: + *exitinfo |= IOIO_TYPE_OUT; + *exitinfo |= (u64)insn->immediate.value << 16; + break; + + /* IN register opcodes */ + case 0xec: + case 0xed: + *exitinfo |= IOIO_TYPE_IN; + *exitinfo |= (ctxt->regs->dx & 0xffff) << 16; + break; + + /* OUT register opcodes */ + case 0xee: + case 0xef: + *exitinfo |= IOIO_TYPE_OUT; + *exitinfo |= (ctxt->regs->dx & 0xffff) << 16; + break; + + default: + return ES_DECODE_FAILED; + } + + switch (insn->opcode.bytes[0]) { + case 0x6c: + case 0x6e: + case 0xe4: + case 0xe6: + case 0xec: + case 0xee: + /* Single byte opcodes */ + *exitinfo |= IOIO_DATA_8; + break; + default: + /* Length determined by instruction parsing */ + *exitinfo |= (insn->opnd_bytes == 2) ? IOIO_DATA_16 + : IOIO_DATA_32; + } + switch (insn->addr_bytes) { + case 2: + *exitinfo |= IOIO_ADDR_16; + break; + case 4: + *exitinfo |= IOIO_ADDR_32; + break; + case 8: + *exitinfo |= IOIO_ADDR_64; + break; + } + + if (insn_has_rep_prefix(insn)) + *exitinfo |= IOIO_REP; + + return ES_OK; +} + +static enum es_result vc_handle_ioio(struct ghcb *ghcb, struct es_em_ctxt *ctxt) +{ + struct pt_regs *regs = ctxt->regs; + u64 exit_info_1, exit_info_2; + enum es_result ret; + + ret = vc_ioio_exitinfo(ctxt, &exit_info_1); + if (ret != ES_OK) + return ret; + + if (exit_info_1 & IOIO_TYPE_STR) { + + /* (REP) INS/OUTS */ + + bool df = ((regs->flags & X86_EFLAGS_DF) == X86_EFLAGS_DF); + unsigned int io_bytes, exit_bytes; + unsigned int ghcb_count, op_count; + unsigned long es_base; + u64 sw_scratch; + + /* + * For the string variants with rep prefix the amount of in/out + * operations per #VC exception is limited so that the kernel + * has a chance to take interrupts and re-schedule while the + * instruction is emulated. + */ + io_bytes = (exit_info_1 >> 4) & 0x7; + ghcb_count = sizeof(ghcb->shared_buffer) / io_bytes; + + op_count = (exit_info_1 & IOIO_REP) ? regs->cx : 1; + exit_info_2 = min(op_count, ghcb_count); + exit_bytes = exit_info_2 * io_bytes; + + es_base = insn_get_seg_base(ctxt->regs, INAT_SEG_REG_ES); + + /* Read bytes of OUTS into the shared buffer */ + if (!(exit_info_1 & IOIO_TYPE_IN)) { + ret = vc_insn_string_read(ctxt, + (void *)(es_base + regs->si), + ghcb->shared_buffer, io_bytes, + exit_info_2, df); + if (ret) + return ret; + } + + /* + * Issue an VMGEXIT to the HV to consume the bytes from the + * shared buffer or to have it write them into the shared buffer + * depending on the instruction: OUTS or INS. + */ + sw_scratch = __pa(ghcb) + offsetof(struct ghcb, shared_buffer); + ghcb_set_sw_scratch(ghcb, sw_scratch); + ret = sev_es_ghcb_hv_call(ghcb, ctxt, SVM_EXIT_IOIO, + exit_info_1, exit_info_2); + if (ret != ES_OK) + return ret; + + /* Read bytes from shared buffer into the guest's destination. */ + if (exit_info_1 & IOIO_TYPE_IN) { + ret = vc_insn_string_write(ctxt, + (void *)(es_base + regs->di), + ghcb->shared_buffer, io_bytes, + exit_info_2, df); + if (ret) + return ret; + + if (df) + regs->di -= exit_bytes; + else + regs->di += exit_bytes; + } else { + if (df) + regs->si -= exit_bytes; + else + regs->si += exit_bytes; + } + + if (exit_info_1 & IOIO_REP) + regs->cx -= exit_info_2; + + ret = regs->cx ? ES_RETRY : ES_OK; + + } else { + + /* IN/OUT into/from rAX */ + + int bits = (exit_info_1 & 0x70) >> 1; + u64 rax = 0; + + if (!(exit_info_1 & IOIO_TYPE_IN)) + rax = lower_bits(regs->ax, bits); + + ghcb_set_rax(ghcb, rax); + + ret = sev_es_ghcb_hv_call(ghcb, ctxt, SVM_EXIT_IOIO, exit_info_1, 0); + if (ret != ES_OK) + return ret; + + if (exit_info_1 & IOIO_TYPE_IN) { + if (!ghcb_rax_is_valid(ghcb)) + return ES_VMM_ERROR; + regs->ax = lower_bits(ghcb->save.rax, bits); + } + } + + return ret; +} -- cgit v1.2.3-70-g09d2 From a7de15d489d956217b47671705ac2218ca50eaae Mon Sep 17 00:00:00 2001 From: Tom Lendacky Date: Mon, 7 Sep 2020 15:15:28 +0200 Subject: x86/sev-es: Add CPUID handling to #VC handler Handle #VC exceptions caused by CPUID instructions. These happen in early boot code when the KASLR code checks for RDTSC. Signed-off-by: Tom Lendacky [ jroedel@suse.de: Adapt to #VC handling framework ] Co-developed-by: Joerg Roedel Signed-off-by: Joerg Roedel Signed-off-by: Borislav Petkov Link: https://lkml.kernel.org/r/20200907131613.12703-28-joro@8bytes.org --- arch/x86/boot/compressed/sev-es.c | 4 ++++ arch/x86/kernel/sev-es-shared.c | 35 +++++++++++++++++++++++++++++++++++ 2 files changed, 39 insertions(+) (limited to 'arch/x86/boot/compressed') diff --git a/arch/x86/boot/compressed/sev-es.c b/arch/x86/boot/compressed/sev-es.c index 61504eb1ab46..b1790f487456 100644 --- a/arch/x86/boot/compressed/sev-es.c +++ b/arch/x86/boot/compressed/sev-es.c @@ -16,6 +16,7 @@ #include #include #include +#include #include #include @@ -183,6 +184,9 @@ void do_boot_stage2_vc(struct pt_regs *regs, unsigned long exit_code) case SVM_EXIT_IOIO: result = vc_handle_ioio(boot_ghcb, &ctxt); break; + case SVM_EXIT_CPUID: + result = vc_handle_cpuid(boot_ghcb, &ctxt); + break; default: result = ES_UNSUPPORTED; break; diff --git a/arch/x86/kernel/sev-es-shared.c b/arch/x86/kernel/sev-es-shared.c index bae7cf28455b..a6b41910b8ab 100644 --- a/arch/x86/kernel/sev-es-shared.c +++ b/arch/x86/kernel/sev-es-shared.c @@ -432,3 +432,38 @@ static enum es_result vc_handle_ioio(struct ghcb *ghcb, struct es_em_ctxt *ctxt) return ret; } + +static enum es_result vc_handle_cpuid(struct ghcb *ghcb, + struct es_em_ctxt *ctxt) +{ + struct pt_regs *regs = ctxt->regs; + u32 cr4 = native_read_cr4(); + enum es_result ret; + + ghcb_set_rax(ghcb, regs->ax); + ghcb_set_rcx(ghcb, regs->cx); + + if (cr4 & X86_CR4_OSXSAVE) + /* Safe to read xcr0 */ + ghcb_set_xcr0(ghcb, xgetbv(XCR_XFEATURE_ENABLED_MASK)); + else + /* xgetbv will cause #GP - use reset value for xcr0 */ + ghcb_set_xcr0(ghcb, 1); + + ret = sev_es_ghcb_hv_call(ghcb, ctxt, SVM_EXIT_CPUID, 0, 0); + if (ret != ES_OK) + return ret; + + if (!(ghcb_rax_is_valid(ghcb) && + ghcb_rbx_is_valid(ghcb) && + ghcb_rcx_is_valid(ghcb) && + ghcb_rdx_is_valid(ghcb))) + return ES_VMM_ERROR; + + regs->ax = ghcb->save.rax; + regs->bx = ghcb->save.rbx; + regs->cx = ghcb->save.rcx; + regs->dx = ghcb->save.rdx; + + return ES_OK; +} -- cgit v1.2.3-70-g09d2 From 4711e7acaa125d8cc242f06e1f4d6c74e177454b Mon Sep 17 00:00:00 2001 From: Tom Lendacky Date: Mon, 7 Sep 2020 15:15:55 +0200 Subject: x86/sev-es: Handle RDTSC(P) Events Implement a handler for #VC exceptions caused by RDTSC and RDTSCP instructions. Also make it available in the pre-decompression stage because the KASLR code uses RDTSC/RDTSCP to gather entropy and some hypervisors intercept these instructions. Signed-off-by: Tom Lendacky [ jroedel@suse.de: - Adapt to #VC handling infrastructure - Make it available early ] Co-developed-by: Joerg Roedel Signed-off-by: Joerg Roedel Signed-off-by: Borislav Petkov Link: https://lkml.kernel.org/r/20200907131613.12703-55-joro@8bytes.org --- arch/x86/boot/compressed/sev-es.c | 4 ++++ arch/x86/kernel/sev-es-shared.c | 23 +++++++++++++++++++++++ arch/x86/kernel/sev-es.c | 4 ++++ 3 files changed, 31 insertions(+) (limited to 'arch/x86/boot/compressed') diff --git a/arch/x86/boot/compressed/sev-es.c b/arch/x86/boot/compressed/sev-es.c index b1790f487456..5f15e5864e0c 100644 --- a/arch/x86/boot/compressed/sev-es.c +++ b/arch/x86/boot/compressed/sev-es.c @@ -181,6 +181,10 @@ void do_boot_stage2_vc(struct pt_regs *regs, unsigned long exit_code) goto finish; switch (exit_code) { + case SVM_EXIT_RDTSC: + case SVM_EXIT_RDTSCP: + result = vc_handle_rdtsc(boot_ghcb, &ctxt, exit_code); + break; case SVM_EXIT_IOIO: result = vc_handle_ioio(boot_ghcb, &ctxt); break; diff --git a/arch/x86/kernel/sev-es-shared.c b/arch/x86/kernel/sev-es-shared.c index 491b557bdfba..4be8af2f9c57 100644 --- a/arch/x86/kernel/sev-es-shared.c +++ b/arch/x86/kernel/sev-es-shared.c @@ -467,3 +467,26 @@ static enum es_result vc_handle_cpuid(struct ghcb *ghcb, return ES_OK; } + +static enum es_result vc_handle_rdtsc(struct ghcb *ghcb, + struct es_em_ctxt *ctxt, + unsigned long exit_code) +{ + bool rdtscp = (exit_code == SVM_EXIT_RDTSCP); + enum es_result ret; + + ret = sev_es_ghcb_hv_call(ghcb, ctxt, exit_code, 0, 0); + if (ret != ES_OK) + return ret; + + if (!(ghcb_rax_is_valid(ghcb) && ghcb_rdx_is_valid(ghcb) && + (!rdtscp || ghcb_rcx_is_valid(ghcb)))) + return ES_VMM_ERROR; + + ctxt->regs->ax = ghcb->save.rax; + ctxt->regs->dx = ghcb->save.rdx; + if (rdtscp) + ctxt->regs->cx = ghcb->save.rcx; + + return ES_OK; +} diff --git a/arch/x86/kernel/sev-es.c b/arch/x86/kernel/sev-es.c index aba27c3c1633..4d468ec325c3 100644 --- a/arch/x86/kernel/sev-es.c +++ b/arch/x86/kernel/sev-es.c @@ -866,6 +866,10 @@ static enum es_result vc_handle_exitcode(struct es_em_ctxt *ctxt, case SVM_EXIT_WRITE_DR7: result = vc_handle_dr7_write(ghcb, ctxt); break; + case SVM_EXIT_RDTSC: + case SVM_EXIT_RDTSCP: + result = vc_handle_rdtsc(ghcb, ctxt, exit_code); + break; case SVM_EXIT_CPUID: result = vc_handle_cpuid(ghcb, ctxt); break; -- cgit v1.2.3-70-g09d2 From 39336f4ffb2478ad384075cf4ba7ef2e5db2bbd7 Mon Sep 17 00:00:00 2001 From: Tom Lendacky Date: Mon, 7 Sep 2020 15:16:12 +0200 Subject: x86/efi: Add GHCB mappings when SEV-ES is active Calling down to EFI runtime services can result in the firmware performing VMGEXIT calls. The firmware is likely to use the GHCB of the OS (e.g., for setting EFI variables), so each GHCB in the system needs to be identity-mapped in the EFI page tables, as unencrypted, to avoid page faults. Signed-off-by: Tom Lendacky [ jroedel@suse.de: Moved GHCB mapping loop to sev-es.c ] Signed-off-by: Joerg Roedel Signed-off-by: Borislav Petkov Acked-by: Ard Biesheuvel Link: https://lkml.kernel.org/r/20200907131613.12703-72-joro@8bytes.org --- arch/x86/boot/compressed/sev-es.c | 1 + arch/x86/include/asm/sev-es.h | 2 ++ arch/x86/kernel/sev-es.c | 30 ++++++++++++++++++++++++++++++ arch/x86/platform/efi/efi_64.c | 10 ++++++++++ 4 files changed, 43 insertions(+) (limited to 'arch/x86/boot/compressed') diff --git a/arch/x86/boot/compressed/sev-es.c b/arch/x86/boot/compressed/sev-es.c index 5f15e5864e0c..2a6c7c375244 100644 --- a/arch/x86/boot/compressed/sev-es.c +++ b/arch/x86/boot/compressed/sev-es.c @@ -12,6 +12,7 @@ */ #include "misc.h" +#include #include #include #include diff --git a/arch/x86/include/asm/sev-es.h b/arch/x86/include/asm/sev-es.h index e919f09ae33c..cf1d957c7091 100644 --- a/arch/x86/include/asm/sev-es.h +++ b/arch/x86/include/asm/sev-es.h @@ -102,11 +102,13 @@ static __always_inline void sev_es_nmi_complete(void) if (static_branch_unlikely(&sev_es_enable_key)) __sev_es_nmi_complete(); } +extern int __init sev_es_efi_map_ghcbs(pgd_t *pgd); #else static inline void sev_es_ist_enter(struct pt_regs *regs) { } static inline void sev_es_ist_exit(void) { } static inline int sev_es_setup_ap_jump_table(struct real_mode_header *rmh) { return 0; } static inline void sev_es_nmi_complete(void) { } +static inline int sev_es_efi_map_ghcbs(pgd_t *pgd) { return 0; } #endif #endif diff --git a/arch/x86/kernel/sev-es.c b/arch/x86/kernel/sev-es.c index b6518e96dedb..8cac9f80bfc3 100644 --- a/arch/x86/kernel/sev-es.c +++ b/arch/x86/kernel/sev-es.c @@ -491,6 +491,36 @@ int sev_es_setup_ap_jump_table(struct real_mode_header *rmh) return 0; } +/* + * This is needed by the OVMF UEFI firmware which will use whatever it finds in + * the GHCB MSR as its GHCB to talk to the hypervisor. So make sure the per-cpu + * runtime GHCBs used by the kernel are also mapped in the EFI page-table. + */ +int __init sev_es_efi_map_ghcbs(pgd_t *pgd) +{ + struct sev_es_runtime_data *data; + unsigned long address, pflags; + int cpu; + u64 pfn; + + if (!sev_es_active()) + return 0; + + pflags = _PAGE_NX | _PAGE_RW; + + for_each_possible_cpu(cpu) { + data = per_cpu(runtime_data, cpu); + + address = __pa(&data->ghcb_page); + pfn = address >> PAGE_SHIFT; + + if (kernel_map_pages_in_pgd(pgd, pfn, address, 1, pflags)) + return 1; + } + + return 0; +} + static enum es_result vc_handle_msr(struct ghcb *ghcb, struct es_em_ctxt *ctxt) { struct pt_regs *regs = ctxt->regs; diff --git a/arch/x86/platform/efi/efi_64.c b/arch/x86/platform/efi/efi_64.c index 6af4da1149ba..8f5759df7776 100644 --- a/arch/x86/platform/efi/efi_64.c +++ b/arch/x86/platform/efi/efi_64.c @@ -47,6 +47,7 @@ #include #include #include +#include /* * We allocate runtime services regions top-down, starting from -4G, i.e. @@ -229,6 +230,15 @@ int __init efi_setup_page_tables(unsigned long pa_memmap, unsigned num_pages) return 1; } + /* + * When SEV-ES is active, the GHCB as set by the kernel will be used + * by firmware. Create a 1:1 unencrypted mapping for each GHCB. + */ + if (sev_es_efi_map_ghcbs(pgd)) { + pr_err("Failed to create 1:1 mapping for the GHCBs!\n"); + return 1; + } + /* * When making calls to the firmware everything needs to be 1:1 * mapped and addressable with 32-bit pointers. Map the kernel -- cgit v1.2.3-70-g09d2 From f5ed777586e08e09c4b6f1e87161a145ee1431cf Mon Sep 17 00:00:00 2001 From: Martin Radev Date: Mon, 7 Sep 2020 15:16:13 +0200 Subject: x86/sev-es: Check required CPU features for SEV-ES Make sure the machine supports RDRAND, otherwise there is no trusted source of randomness in the system. To also check this in the pre-decompression stage, make has_cpuflag() not depend on CONFIG_RANDOMIZE_BASE anymore. Signed-off-by: Martin Radev Signed-off-by: Joerg Roedel Signed-off-by: Borislav Petkov Reviewed-by: Kees Cook Link: https://lkml.kernel.org/r/20200907131613.12703-73-joro@8bytes.org --- arch/x86/boot/compressed/cpuflags.c | 4 ---- arch/x86/boot/compressed/misc.h | 5 +++-- arch/x86/boot/compressed/sev-es.c | 3 +++ arch/x86/kernel/sev-es-shared.c | 15 +++++++++++++++ arch/x86/kernel/sev-es.c | 3 +++ 5 files changed, 24 insertions(+), 6 deletions(-) (limited to 'arch/x86/boot/compressed') diff --git a/arch/x86/boot/compressed/cpuflags.c b/arch/x86/boot/compressed/cpuflags.c index 6448a8196d32..0cc1323896d1 100644 --- a/arch/x86/boot/compressed/cpuflags.c +++ b/arch/x86/boot/compressed/cpuflags.c @@ -1,6 +1,4 @@ // SPDX-License-Identifier: GPL-2.0 -#ifdef CONFIG_RANDOMIZE_BASE - #include "../cpuflags.c" bool has_cpuflag(int flag) @@ -9,5 +7,3 @@ bool has_cpuflag(int flag) return test_bit(flag, cpu.flags); } - -#endif diff --git a/arch/x86/boot/compressed/misc.h b/arch/x86/boot/compressed/misc.h index c0e0ffeee50a..6d31f1b4c4d1 100644 --- a/arch/x86/boot/compressed/misc.h +++ b/arch/x86/boot/compressed/misc.h @@ -85,8 +85,6 @@ void choose_random_location(unsigned long input, unsigned long *output, unsigned long output_size, unsigned long *virt_addr); -/* cpuflags.c */ -bool has_cpuflag(int flag); #else static inline void choose_random_location(unsigned long input, unsigned long input_size, @@ -97,6 +95,9 @@ static inline void choose_random_location(unsigned long input, } #endif +/* cpuflags.c */ +bool has_cpuflag(int flag); + #ifdef CONFIG_X86_64 extern int set_page_decrypted(unsigned long address); extern int set_page_encrypted(unsigned long address); diff --git a/arch/x86/boot/compressed/sev-es.c b/arch/x86/boot/compressed/sev-es.c index 2a6c7c375244..954cb2702e23 100644 --- a/arch/x86/boot/compressed/sev-es.c +++ b/arch/x86/boot/compressed/sev-es.c @@ -145,6 +145,9 @@ void sev_es_shutdown_ghcb(void) if (!boot_ghcb) return; + if (!sev_es_check_cpu_features()) + error("SEV-ES CPU Features missing."); + /* * GHCB Page must be flushed from the cache and mapped encrypted again. * Otherwise the running kernel will see strange cache effects when diff --git a/arch/x86/kernel/sev-es-shared.c b/arch/x86/kernel/sev-es-shared.c index 4be8af2f9c57..5f83ccaab877 100644 --- a/arch/x86/kernel/sev-es-shared.c +++ b/arch/x86/kernel/sev-es-shared.c @@ -9,6 +9,21 @@ * and is included directly into both code-bases. */ +#ifndef __BOOT_COMPRESSED +#define error(v) pr_err(v) +#define has_cpuflag(f) boot_cpu_has(f) +#endif + +static bool __init sev_es_check_cpu_features(void) +{ + if (!has_cpuflag(X86_FEATURE_RDRAND)) { + error("RDRAND instruction not supported - no trusted source of randomness available\n"); + return false; + } + + return true; +} + static void sev_es_terminate(unsigned int reason) { u64 val = GHCB_SEV_TERMINATE; diff --git a/arch/x86/kernel/sev-es.c b/arch/x86/kernel/sev-es.c index 8cac9f80bfc3..6fcfdd32769f 100644 --- a/arch/x86/kernel/sev-es.c +++ b/arch/x86/kernel/sev-es.c @@ -665,6 +665,9 @@ void __init sev_es_init_vc_handling(void) if (!sev_es_active()) return; + if (!sev_es_check_cpu_features()) + panic("SEV-ES CPU Features missing"); + /* Enable SEV-ES special handling */ static_branch_enable(&sev_es_enable_key); -- cgit v1.2.3-70-g09d2