From f62995c92a29e4d9331382b8b2461eef3b9c7c6b Mon Sep 17 00:00:00 2001 From: Baoquan He Date: Sun, 9 Jul 2017 20:37:39 +0800 Subject: x86/boot/KASLR: Wrap e820 entries walking code into new function process_e820_entries() The original function process_e820_entry() only takes care of each e820 entry passed. And move the E820_TYPE_RAM checking logic into process_e820_entries(). And remove the redundent local variable 'addr' definition in find_random_phys_addr(). Signed-off-by: Baoquan He Acked-by: Kees Cook Cc: Linus Torvalds Cc: Peter Zijlstra Cc: Thomas Gleixner Cc: fanc.fnst@cn.fujitsu.com Cc: izumi.taku@jp.fujitsu.com Cc: matt@codeblueprint.co.uk Cc: thgarnie@google.com Link: http://lkml.kernel.org/r/1499603862-11516-2-git-send-email-bhe@redhat.com Signed-off-by: Ingo Molnar --- arch/x86/boot/compressed/kaslr.c | 38 +++++++++++++++++++++----------------- 1 file changed, 21 insertions(+), 17 deletions(-) diff --git a/arch/x86/boot/compressed/kaslr.c b/arch/x86/boot/compressed/kaslr.c index 91f27ab970ef..1485f48aeda1 100644 --- a/arch/x86/boot/compressed/kaslr.c +++ b/arch/x86/boot/compressed/kaslr.c @@ -488,10 +488,6 @@ static void process_e820_entry(struct boot_e820_entry *entry, unsigned long start_orig, end; struct boot_e820_entry cur_entry; - /* Skip non-RAM entries. */ - if (entry->type != E820_TYPE_RAM) - return; - /* On 32-bit, ignore entries entirely above our maximum. */ if (IS_ENABLED(CONFIG_X86_32) && entry->addr >= KERNEL_IMAGE_SIZE) return; @@ -562,12 +558,29 @@ static void process_e820_entry(struct boot_e820_entry *entry, } } -static unsigned long find_random_phys_addr(unsigned long minimum, - unsigned long image_size) +static void process_e820_entries(unsigned long minimum, + unsigned long image_size) { int i; - unsigned long addr; + struct boot_e820_entry *entry; + + /* Verify potential e820 positions, appending to slots list. */ + for (i = 0; i < boot_params->e820_entries; i++) { + entry = &boot_params->e820_table[i]; + /* Skip non-RAM entries. */ + if (entry->type != E820_TYPE_RAM) + continue; + process_e820_entry(entry, minimum, image_size); + if (slot_area_index == MAX_SLOT_AREA) { + debug_putstr("Aborted e820 scan (slot_areas full)!\n"); + break; + } + } +} +static unsigned long find_random_phys_addr(unsigned long minimum, + unsigned long image_size) +{ /* Check if we had too many memmaps. */ if (memmap_too_large) { debug_putstr("Aborted e820 scan (more than 4 memmap= args)!\n"); @@ -577,16 +590,7 @@ static unsigned long find_random_phys_addr(unsigned long minimum, /* Make sure minimum is aligned. */ minimum = ALIGN(minimum, CONFIG_PHYSICAL_ALIGN); - /* Verify potential e820 positions, appending to slots list. */ - for (i = 0; i < boot_params->e820_entries; i++) { - process_e820_entry(&boot_params->e820_table[i], minimum, - image_size); - if (slot_area_index == MAX_SLOT_AREA) { - debug_putstr("Aborted e820 scan (slot_areas full)!\n"); - break; - } - } - + process_e820_entries(minimum, image_size); return slots_fetch_random(); } -- cgit v1.2.3-70-g09d2 From 87891b01b54210763117f0a67b022cd94de6cd13 Mon Sep 17 00:00:00 2001 From: Baoquan He Date: Sun, 9 Jul 2017 20:37:40 +0800 Subject: x86/boot/KASLR: Switch to pass struct mem_vector to process_e820_entry() This makes process_e820_entry() be able to process any kind of memory region. Signed-off-by: Baoquan He Acked-by: Kees Cook Cc: Linus Torvalds Cc: Peter Zijlstra Cc: Thomas Gleixner Cc: fanc.fnst@cn.fujitsu.com Cc: izumi.taku@jp.fujitsu.com Cc: matt@codeblueprint.co.uk Cc: thgarnie@google.com Link: http://lkml.kernel.org/r/1499603862-11516-3-git-send-email-bhe@redhat.com Signed-off-by: Ingo Molnar --- arch/x86/boot/compressed/kaslr.c | 25 ++++++++++++++----------- 1 file changed, 14 insertions(+), 11 deletions(-) diff --git a/arch/x86/boot/compressed/kaslr.c b/arch/x86/boot/compressed/kaslr.c index 1485f48aeda1..36ff9f729c43 100644 --- a/arch/x86/boot/compressed/kaslr.c +++ b/arch/x86/boot/compressed/kaslr.c @@ -479,31 +479,31 @@ static unsigned long slots_fetch_random(void) return 0; } -static void process_e820_entry(struct boot_e820_entry *entry, +static void process_e820_entry(struct mem_vector *entry, unsigned long minimum, unsigned long image_size) { struct mem_vector region, overlap; struct slot_area slot_area; unsigned long start_orig, end; - struct boot_e820_entry cur_entry; + struct mem_vector cur_entry; /* On 32-bit, ignore entries entirely above our maximum. */ - if (IS_ENABLED(CONFIG_X86_32) && entry->addr >= KERNEL_IMAGE_SIZE) + if (IS_ENABLED(CONFIG_X86_32) && entry->start >= KERNEL_IMAGE_SIZE) return; /* Ignore entries entirely below our minimum. */ - if (entry->addr + entry->size < minimum) + if (entry->start + entry->size < minimum) return; /* Ignore entries above memory limit */ - end = min(entry->size + entry->addr, mem_limit); - if (entry->addr >= end) + end = min(entry->size + entry->start, mem_limit); + if (entry->start >= end) return; - cur_entry.addr = entry->addr; - cur_entry.size = end - entry->addr; + cur_entry.start = entry->start; + cur_entry.size = end - entry->start; - region.start = cur_entry.addr; + region.start = cur_entry.start; region.size = cur_entry.size; /* Give up if slot area array is full. */ @@ -518,7 +518,7 @@ static void process_e820_entry(struct boot_e820_entry *entry, region.start = ALIGN(region.start, CONFIG_PHYSICAL_ALIGN); /* Did we raise the address above this e820 region? */ - if (region.start > cur_entry.addr + cur_entry.size) + if (region.start > cur_entry.start + cur_entry.size) return; /* Reduce size by any delta from the original address. */ @@ -562,6 +562,7 @@ static void process_e820_entries(unsigned long minimum, unsigned long image_size) { int i; + struct mem_vector region; struct boot_e820_entry *entry; /* Verify potential e820 positions, appending to slots list. */ @@ -570,7 +571,9 @@ static void process_e820_entries(unsigned long minimum, /* Skip non-RAM entries. */ if (entry->type != E820_TYPE_RAM) continue; - process_e820_entry(entry, minimum, image_size); + region.start = entry->addr; + region.size = entry->size; + process_e820_entry(®ion, minimum, image_size); if (slot_area_index == MAX_SLOT_AREA) { debug_putstr("Aborted e820 scan (slot_areas full)!\n"); break; -- cgit v1.2.3-70-g09d2 From 27aac20574110abfd594175a668dc58b23b2b14a Mon Sep 17 00:00:00 2001 From: Baoquan He Date: Sun, 9 Jul 2017 20:37:41 +0800 Subject: x86/boot/KASLR: Rename process_e820_entry() into process_mem_region() Now process_e820_entry() is not limited to e820 entry processing, rename it to process_mem_region(). And adjust the code comment accordingly. Signed-off-by: Baoquan He Acked-by: Kees Cook Cc: Linus Torvalds Cc: Peter Zijlstra Cc: Thomas Gleixner Cc: fanc.fnst@cn.fujitsu.com Cc: izumi.taku@jp.fujitsu.com Cc: matt@codeblueprint.co.uk Cc: thgarnie@google.com Link: http://lkml.kernel.org/r/1499603862-11516-4-git-send-email-bhe@redhat.com Signed-off-by: Ingo Molnar --- arch/x86/boot/compressed/kaslr.c | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/arch/x86/boot/compressed/kaslr.c b/arch/x86/boot/compressed/kaslr.c index 36ff9f729c43..99c7194f7ea6 100644 --- a/arch/x86/boot/compressed/kaslr.c +++ b/arch/x86/boot/compressed/kaslr.c @@ -479,7 +479,7 @@ static unsigned long slots_fetch_random(void) return 0; } -static void process_e820_entry(struct mem_vector *entry, +static void process_mem_region(struct mem_vector *entry, unsigned long minimum, unsigned long image_size) { @@ -517,7 +517,7 @@ static void process_e820_entry(struct mem_vector *entry, /* Potentially raise address to meet alignment needs. */ region.start = ALIGN(region.start, CONFIG_PHYSICAL_ALIGN); - /* Did we raise the address above this e820 region? */ + /* Did we raise the address above the passed in memory entry? */ if (region.start > cur_entry.start + cur_entry.size) return; @@ -573,7 +573,7 @@ static void process_e820_entries(unsigned long minimum, continue; region.start = entry->addr; region.size = entry->size; - process_e820_entry(®ion, minimum, image_size); + process_mem_region(®ion, minimum, image_size); if (slot_area_index == MAX_SLOT_AREA) { debug_putstr("Aborted e820 scan (slot_areas full)!\n"); break; -- cgit v1.2.3-70-g09d2 From 02e43c2dcd3b3cf7244f6dda65a07e8dacadaf8d Mon Sep 17 00:00:00 2001 From: Baoquan He Date: Wed, 16 Aug 2017 21:46:51 +0800 Subject: efi: Introduce efi_early_memdesc_ptr to get pointer to memmap descriptor The existing map iteration helper for_each_efi_memory_desc_in_map can only be used after the kernel initializes the EFI subsystem to set up struct efi_memory_map. Before that we also need iterate map descriptors which are stored in several intermediate structures, like struct efi_boot_memmap for arch independent usage and struct efi_info for x86 arch only. Introduce efi_early_memdesc_ptr() to get pointer to a map descriptor, and replace several places where that primitive is open coded. Signed-off-by: Baoquan He [ Various improvements to the text. ] Acked-by: Matt Fleming Cc: Linus Torvalds Cc: Peter Zijlstra Cc: Thomas Gleixner Cc: ard.biesheuvel@linaro.org Cc: fanc.fnst@cn.fujitsu.com Cc: izumi.taku@jp.fujitsu.com Cc: keescook@chromium.org Cc: linux-efi@vger.kernel.org Cc: n-horiguchi@ah.jp.nec.com Cc: thgarnie@google.com Link: http://lkml.kernel.org/r/20170816134651.GF21273@x1 Signed-off-by: Ingo Molnar --- arch/x86/boot/compressed/eboot.c | 2 +- drivers/firmware/efi/libstub/efi-stub-helper.c | 4 ++-- include/linux/efi.h | 22 ++++++++++++++++++++++ 3 files changed, 25 insertions(+), 3 deletions(-) diff --git a/arch/x86/boot/compressed/eboot.c b/arch/x86/boot/compressed/eboot.c index c3e869eaef0c..e007887a33b0 100644 --- a/arch/x86/boot/compressed/eboot.c +++ b/arch/x86/boot/compressed/eboot.c @@ -767,7 +767,7 @@ static efi_status_t setup_e820(struct boot_params *params, m |= (u64)efi->efi_memmap_hi << 32; #endif - d = (efi_memory_desc_t *)(m + (i * efi->efi_memdesc_size)); + d = efi_early_memdesc_ptr(m, efi->efi_memdesc_size, i); switch (d->type) { case EFI_RESERVED_TYPE: case EFI_RUNTIME_SERVICES_CODE: diff --git a/drivers/firmware/efi/libstub/efi-stub-helper.c b/drivers/firmware/efi/libstub/efi-stub-helper.c index b0184360efc6..50a9cab5a834 100644 --- a/drivers/firmware/efi/libstub/efi-stub-helper.c +++ b/drivers/firmware/efi/libstub/efi-stub-helper.c @@ -205,7 +205,7 @@ again: unsigned long m = (unsigned long)map; u64 start, end; - desc = (efi_memory_desc_t *)(m + (i * desc_size)); + desc = efi_early_memdesc_ptr(m, desc_size, i); if (desc->type != EFI_CONVENTIONAL_MEMORY) continue; @@ -298,7 +298,7 @@ efi_status_t efi_low_alloc(efi_system_table_t *sys_table_arg, unsigned long m = (unsigned long)map; u64 start, end; - desc = (efi_memory_desc_t *)(m + (i * desc_size)); + desc = efi_early_memdesc_ptr(m, desc_size, i); if (desc->type != EFI_CONVENTIONAL_MEMORY) continue; diff --git a/include/linux/efi.h b/include/linux/efi.h index 8269bcb8ccf7..a686ca9a7e5c 100644 --- a/include/linux/efi.h +++ b/include/linux/efi.h @@ -1020,6 +1020,28 @@ extern int efi_memattr_init(void); extern int efi_memattr_apply_permissions(struct mm_struct *mm, efi_memattr_perm_setter fn); +/* + * efi_early_memdesc_ptr - get the n-th EFI memmap descriptor + * @map: the start of efi memmap + * @desc_size: the size of space for each EFI memmap descriptor + * @n: the index of efi memmap descriptor + * + * EFI boot service provides the GetMemoryMap() function to get a copy of the + * current memory map which is an array of memory descriptors, each of + * which describes a contiguous block of memory. It also gets the size of the + * map, and the size of each descriptor, etc. + * + * Note that per section 6.2 of UEFI Spec 2.6 Errata A, the returned size of + * each descriptor might not be equal to sizeof(efi_memory_memdesc_t), + * since efi_memory_memdesc_t may be extended in the future. Thus the OS + * MUST use the returned size of the descriptor to find the start of each + * efi_memory_memdesc_t in the memory map array. This should only be used + * during bootup since for_each_efi_memory_desc_xxx() is available after the + * kernel initializes the EFI subsystem to set up struct efi_memory_map. + */ +#define efi_early_memdesc_ptr(map, desc_size, n) \ + (efi_memory_desc_t *)((void *)(map) + ((n) * (desc_size))) + /* Iterate through an efi_memory_map */ #define for_each_efi_memory_desc_in_map(m, md) \ for ((md) = (m)->map; \ -- cgit v1.2.3-70-g09d2 From c05cd79750fbe5415cda896bb99350603cc995ed Mon Sep 17 00:00:00 2001 From: Baoquan He Date: Mon, 14 Aug 2017 22:54:24 +0800 Subject: x86/boot/KASLR: Prefer mirrored memory regions for the kernel physical address Currently KASLR will parse all e820 entries of RAM type and add all candidate positions into the slots array. After that we choose one slot randomly as the new position which the kernel will be decompressed into and run at. On systems with EFI enabled, e820 memory regions are coming from EFI memory regions by combining adjacent regions. These EFI memory regions have various attributes, and the "mirrored" attribute is one of them. The physical memory region whose descriptors in EFI memory map has EFI_MEMORY_MORE_RELIABLE attribute (bit: 16) are mirrored. The address range mirroring feature of the kernel arranges such mirrored regions into normal zones and other regions into movable zones. With the mirroring feature enabled, the code and data of the kernel can only be located in the more reliable mirrored regions. However, the current KASLR code doesn't check EFI memory entries, and could choose a new kernel position in non-mirrored regions. This will break the intended functionality of the address range mirroring feature. To fix this, if EFI is detected, iterate EFI memory map and pick the mirrored region to process for adding candidate of randomization slot. If EFI is disabled or no mirrored region found, still process the e820 memory map. Signed-off-by: Baoquan He Cc: Linus Torvalds Cc: Peter Zijlstra Cc: Thomas Gleixner Cc: ard.biesheuvel@linaro.org Cc: fanc.fnst@cn.fujitsu.com Cc: izumi.taku@jp.fujitsu.com Cc: keescook@chromium.org Cc: linux-efi@vger.kernel.org Cc: matt@codeblueprint.co.uk Cc: n-horiguchi@ah.jp.nec.com Cc: thgarnie@google.com Link: http://lkml.kernel.org/r/1502722464-20614-3-git-send-email-bhe@redhat.com [ Rewrote most of the text. ] Signed-off-by: Ingo Molnar --- arch/x86/boot/compressed/kaslr.c | 68 ++++++++++++++++++++++++++++++++++++++-- 1 file changed, 66 insertions(+), 2 deletions(-) diff --git a/arch/x86/boot/compressed/kaslr.c b/arch/x86/boot/compressed/kaslr.c index 99c7194f7ea6..7de23bb279ce 100644 --- a/arch/x86/boot/compressed/kaslr.c +++ b/arch/x86/boot/compressed/kaslr.c @@ -37,7 +37,9 @@ #include #include #include +#include #include +#include /* Macros used by the included decompressor code below. */ #define STATIC @@ -558,6 +560,65 @@ static void process_mem_region(struct mem_vector *entry, } } +#ifdef CONFIG_EFI +/* + * Returns true if mirror region found (and must have been processed + * for slots adding) + */ +static bool +process_efi_entries(unsigned long minimum, unsigned long image_size) +{ + struct efi_info *e = &boot_params->efi_info; + bool efi_mirror_found = false; + struct mem_vector region; + efi_memory_desc_t *md; + unsigned long pmap; + char *signature; + u32 nr_desc; + int i; + + signature = (char *)&e->efi_loader_signature; + if (strncmp(signature, EFI32_LOADER_SIGNATURE, 4) && + strncmp(signature, EFI64_LOADER_SIGNATURE, 4)) + return false; + +#ifdef CONFIG_X86_32 + /* Can't handle data above 4GB at this time */ + if (e->efi_memmap_hi) { + warn("EFI memmap is above 4GB, can't be handled now on x86_32. EFI should be disabled.\n"); + return false; + } + pmap = e->efi_memmap; +#else + pmap = (e->efi_memmap | ((__u64)e->efi_memmap_hi << 32)); +#endif + + nr_desc = e->efi_memmap_size / e->efi_memdesc_size; + for (i = 0; i < nr_desc; i++) { + md = efi_early_memdesc_ptr(pmap, e->efi_memdesc_size, i); + if (md->attribute & EFI_MEMORY_MORE_RELIABLE) { + region.start = md->phys_addr; + region.size = md->num_pages << EFI_PAGE_SHIFT; + process_mem_region(®ion, minimum, image_size); + efi_mirror_found = true; + + if (slot_area_index == MAX_SLOT_AREA) { + debug_putstr("Aborted EFI scan (slot_areas full)!\n"); + break; + } + } + } + + return efi_mirror_found; +} +#else +static inline bool +process_efi_entries(unsigned long minimum, unsigned long image_size) +{ + return false; +} +#endif + static void process_e820_entries(unsigned long minimum, unsigned long image_size) { @@ -586,13 +647,16 @@ static unsigned long find_random_phys_addr(unsigned long minimum, { /* Check if we had too many memmaps. */ if (memmap_too_large) { - debug_putstr("Aborted e820 scan (more than 4 memmap= args)!\n"); + debug_putstr("Aborted memory entries scan (more than 4 memmap= args)!\n"); return 0; } /* Make sure minimum is aligned. */ minimum = ALIGN(minimum, CONFIG_PHYSICAL_ALIGN); + if (process_efi_entries(minimum, image_size)) + return slots_fetch_random(); + process_e820_entries(minimum, image_size); return slots_fetch_random(); } @@ -652,7 +716,7 @@ void choose_random_location(unsigned long input, */ min_addr = min(*output, 512UL << 20); - /* Walk e820 and find a random address. */ + /* Walk available memory entries to find a random address. */ random_addr = find_random_phys_addr(min_addr, output_size); if (!random_addr) { warn("Physical KASLR disabled: no suitable memory region!"); -- cgit v1.2.3-70-g09d2 From 0982adc746736a313dac9cb8cc936ca51ca3741a Mon Sep 17 00:00:00 2001 From: Naoya Horiguchi Date: Mon, 28 Aug 2017 16:30:59 +0900 Subject: x86/boot/KASLR: Work around firmware bugs by excluding EFI_BOOT_SERVICES_* and EFI_LOADER_* from KASLR's choice There's a potential bug in how we select the KASLR kernel address n the early boot code. The KASLR boot code currently chooses the kernel image's physical memory location from E820_TYPE_RAM regions by walking over all e820 entries. E820_TYPE_RAM includes EFI_BOOT_SERVICES_CODE and EFI_BOOT_SERVICES_DATA as well, so those regions can end up hosting the kernel image. According to the UEFI spec, all memory regions marked as EfiBootServicesCode and EfiBootServicesData are available as free memory after the first call to ExitBootServices(). I.e. so such regions should be usable for the kernel, per spec. In real life however, we have workarounds for broken x86 firmware, where we keep such regions reserved until SetVirtualAddressMap() is done. See the following code in should_map_region(): static bool should_map_region(efi_memory_desc_t *md) { ... /* * Map boot services regions as a workaround for buggy * firmware that accesses them even when they shouldn't. * * See efi_{reserve,free}_boot_services(). */ if (md->type =3D=3D EFI_BOOT_SERVICES_CODE || md->type =3D=3D EFI_BOOT_SERVICES_DATA) return false; This workaround suppressed a boot crash, but potential issues still remain because no one prevents the regions from overlapping with kernel image by KASLR. So let's make sure that EFI_BOOT_SERVICES_{CODE|DATA} regions are never chosen as kernel memory for the workaround to work fine. Furthermore, EFI_LOADER_{CODE|DATA} regions are also excluded because they can be used after ExitBootServices() as defined in EFI spec. As a result, we choose kernel address only from EFI_CONVENTIONAL_MEMORY which is the only memory type we know to be safely free. Signed-off-by: Naoya Horiguchi Cc: Ard Biesheuvel Cc: Baoquan He Cc: Junichi Nomura Cc: Kees Cook Cc: Linus Torvalds Cc: Matt Fleming Cc: Peter Zijlstra Cc: Thomas Garnier Cc: Thomas Gleixner Cc: fanc.fnst@cn.fujitsu.com Cc: izumi.taku@jp.fujitsu.com Link: http://lkml.kernel.org/r/20170828074444.GC23181@hori1.linux.bs1.fc.nec.co.jp [ Rewrote/fixed/clarified the changelog and the in code comments. ] Signed-off-by: Ingo Molnar --- arch/x86/boot/compressed/kaslr.c | 40 +++++++++++++++++++++++++++++++--------- 1 file changed, 31 insertions(+), 9 deletions(-) diff --git a/arch/x86/boot/compressed/kaslr.c b/arch/x86/boot/compressed/kaslr.c index 7de23bb279ce..17818ba6906f 100644 --- a/arch/x86/boot/compressed/kaslr.c +++ b/arch/x86/boot/compressed/kaslr.c @@ -597,19 +597,41 @@ process_efi_entries(unsigned long minimum, unsigned long image_size) for (i = 0; i < nr_desc; i++) { md = efi_early_memdesc_ptr(pmap, e->efi_memdesc_size, i); if (md->attribute & EFI_MEMORY_MORE_RELIABLE) { - region.start = md->phys_addr; - region.size = md->num_pages << EFI_PAGE_SHIFT; - process_mem_region(®ion, minimum, image_size); efi_mirror_found = true; - - if (slot_area_index == MAX_SLOT_AREA) { - debug_putstr("Aborted EFI scan (slot_areas full)!\n"); - break; - } + break; } } - return efi_mirror_found; + for (i = 0; i < nr_desc; i++) { + md = efi_early_memdesc_ptr(pmap, e->efi_memdesc_size, i); + + /* + * Here we are more conservative in picking free memory than + * the EFI spec allows: + * + * According to the spec, EFI_BOOT_SERVICES_{CODE|DATA} are also + * free memory and thus available to place the kernel image into, + * but in practice there's firmware where using that memory leads + * to crashes. + * + * Only EFI_CONVENTIONAL_MEMORY is guaranteed to be free. + */ + if (md->type != EFI_CONVENTIONAL_MEMORY) + continue; + + if (efi_mirror_found && + !(md->attribute & EFI_MEMORY_MORE_RELIABLE)) + continue; + + region.start = md->phys_addr; + region.size = md->num_pages << EFI_PAGE_SHIFT; + process_mem_region(®ion, minimum, image_size); + if (slot_area_index == MAX_SLOT_AREA) { + debug_putstr("Aborted EFI scan (slot_areas full)!\n"); + break; + } + } + return true; } #else static inline bool -- cgit v1.2.3-70-g09d2