From 4ff5308744f5858e4e49e56a0445e2f8b73e47e0 Mon Sep 17 00:00:00 2001 From: Thomas Garnier Date: Wed, 15 Jun 2016 12:05:45 -0700 Subject: x86/mm: Do not reference phys addr beyond kernel The new physical address randomized KASLR implementation can cause the kernel to be aligned close to the end of physical memory. In this case, _brk_end aligned to PMD will go beyond what is expected safe and hit the assert in __phys_addr_symbol(): VIRTUAL_BUG_ON(y >= KERNEL_IMAGE_SIZE); Instead, perform an inclusive range check to avoid incorrectly triggering the assert: kernel BUG at arch/x86/mm/physaddr.c:38! invalid opcode: 0000 [#1] SMP ... RIP: 0010:[] __phys_addr_symbol+0x41/0x50 ... Call Trace: [] cpa_process_alias+0xa9/0x210 [] ? do_raw_spin_unlock+0xc1/0x100 [] __change_page_attr_set_clr+0x8cf/0xbd0 [] ? vm_unmap_aliases+0x7d/0x210 [] change_page_attr_set_clr+0x18c/0x4e0 [] set_memory_4k+0x2c/0x40 [] check_bugs+0x28/0x2a [] start_kernel+0x49d/0x4b9 [] ? early_idt_handler_array+0x120/0x120 [] x86_64_start_reservations+0x29/0x2b [] x86_64_start_kernel+0x143/0x152 Signed-off-by: Thomas Garnier Signed-off-by: Kees Cook Cc: Andy Lutomirski Cc: Borislav Petkov Cc: Borislav Petkov Cc: Brian Gerst Cc: Chris Wilson Cc: Christian Borntraeger Cc: Denys Vlasenko Cc: Dexuan Cui Cc: H. Peter Anvin Cc: Josh Poimboeuf Cc: Linus Torvalds Cc: Matt Fleming Cc: Peter Zijlstra Cc: Sai Praneeth Cc: Thomas Gleixner Cc: Toshi Kani Link: http://lkml.kernel.org/r/20160615190545.GA26071@www.outflux.net Signed-off-by: Ingo Molnar --- arch/x86/mm/pageattr.c | 12 ++++++++++-- 1 file changed, 10 insertions(+), 2 deletions(-) (limited to 'arch/x86/mm/pageattr.c') diff --git a/arch/x86/mm/pageattr.c b/arch/x86/mm/pageattr.c index 7a1f7bbf4105..379b5111ac6b 100644 --- a/arch/x86/mm/pageattr.c +++ b/arch/x86/mm/pageattr.c @@ -101,7 +101,8 @@ static inline unsigned long highmap_start_pfn(void) static inline unsigned long highmap_end_pfn(void) { - return __pa_symbol(roundup(_brk_end, PMD_SIZE)) >> PAGE_SHIFT; + /* Do not reference physical address outside the kernel. */ + return __pa_symbol(roundup(_brk_end, PMD_SIZE) - 1) >> PAGE_SHIFT; } #endif @@ -112,6 +113,12 @@ within(unsigned long addr, unsigned long start, unsigned long end) return addr >= start && addr < end; } +static inline int +within_inclusive(unsigned long addr, unsigned long start, unsigned long end) +{ + return addr >= start && addr <= end; +} + /* * Flushing functions */ @@ -1316,7 +1323,8 @@ static int cpa_process_alias(struct cpa_data *cpa) * to touch the high mapped kernel as well: */ if (!within(vaddr, (unsigned long)_text, _brk_end) && - within(cpa->pfn, highmap_start_pfn(), highmap_end_pfn())) { + within_inclusive(cpa->pfn, highmap_start_pfn(), + highmap_end_pfn())) { unsigned long temp_cpa_vaddr = (cpa->pfn << PAGE_SHIFT) + __START_KERNEL_map - phys_base; alias_cpa = *cpa; -- cgit v1.2.3-70-g09d2 From dcb32d9913b7ed527b135a7e221f8d14b67bb952 Mon Sep 17 00:00:00 2001 From: Dave Hansen Date: Thu, 7 Jul 2016 17:19:15 -0700 Subject: x86/mm: Use pte_none() to test for empty PTE The page table manipulation code seems to have grown a couple of sites that are looking for empty PTEs. Just in case one of these entries got a stray bit set, use pte_none() instead of checking for a zero pte_val(). The use pte_same() makes me a bit nervous. If we were doing a pte_same() check against two cleared entries and one of them had a stray bit set, it might fail the pte_same() check. But, I don't think we ever _do_ pte_same() for cleared entries. It is almost entirely used for checking for races in fault-in paths. Signed-off-by: Dave Hansen Cc: Andrew Morton Cc: Andy Lutomirski Cc: Borislav Petkov Cc: Brian Gerst Cc: Dave Hansen Cc: Denys Vlasenko Cc: H. Peter Anvin Cc: Josh Poimboeuf Cc: Linus Torvalds Cc: Luis R. Rodriguez Cc: Peter Zijlstra Cc: Thomas Gleixner Cc: Toshi Kani Cc: dave.hansen@intel.com Cc: linux-mm@kvack.org Cc: mhocko@suse.com Link: http://lkml.kernel.org/r/20160708001915.813703D9@viggo.jf.intel.com Signed-off-by: Ingo Molnar --- arch/x86/mm/init_64.c | 12 ++++++------ arch/x86/mm/pageattr.c | 2 +- arch/x86/mm/pgtable_32.c | 2 +- 3 files changed, 8 insertions(+), 8 deletions(-) (limited to 'arch/x86/mm/pageattr.c') diff --git a/arch/x86/mm/init_64.c b/arch/x86/mm/init_64.c index bce2e5d9edd4..bb88fbc0a288 100644 --- a/arch/x86/mm/init_64.c +++ b/arch/x86/mm/init_64.c @@ -354,7 +354,7 @@ phys_pte_init(pte_t *pte_page, unsigned long addr, unsigned long end, * pagetable pages as RO. So assume someone who pre-setup * these mappings are more intelligent. */ - if (pte_val(*pte)) { + if (!pte_none(*pte)) { if (!after_bootmem) pages++; continue; @@ -396,7 +396,7 @@ phys_pmd_init(pmd_t *pmd_page, unsigned long address, unsigned long end, continue; } - if (pmd_val(*pmd)) { + if (!pmd_none(*pmd)) { if (!pmd_large(*pmd)) { spin_lock(&init_mm.page_table_lock); pte = (pte_t *)pmd_page_vaddr(*pmd); @@ -470,7 +470,7 @@ phys_pud_init(pud_t *pud_page, unsigned long addr, unsigned long end, continue; } - if (pud_val(*pud)) { + if (!pud_none(*pud)) { if (!pud_large(*pud)) { pmd = pmd_offset(pud, 0); last_map_addr = phys_pmd_init(pmd, addr, end, @@ -673,7 +673,7 @@ static void __meminit free_pte_table(pte_t *pte_start, pmd_t *pmd) for (i = 0; i < PTRS_PER_PTE; i++) { pte = pte_start + i; - if (pte_val(*pte)) + if (!pte_none(*pte)) return; } @@ -691,7 +691,7 @@ static void __meminit free_pmd_table(pmd_t *pmd_start, pud_t *pud) for (i = 0; i < PTRS_PER_PMD; i++) { pmd = pmd_start + i; - if (pmd_val(*pmd)) + if (!pmd_none(*pmd)) return; } @@ -710,7 +710,7 @@ static bool __meminit free_pud_table(pud_t *pud_start, pgd_t *pgd) for (i = 0; i < PTRS_PER_PUD; i++) { pud = pud_start + i; - if (pud_val(*pud)) + if (!pud_none(*pud)) return false; } diff --git a/arch/x86/mm/pageattr.c b/arch/x86/mm/pageattr.c index 7a1f7bbf4105..75142159b0a5 100644 --- a/arch/x86/mm/pageattr.c +++ b/arch/x86/mm/pageattr.c @@ -1185,7 +1185,7 @@ repeat: return __cpa_process_fault(cpa, address, primary); old_pte = *kpte; - if (!pte_val(old_pte)) + if (pte_none(old_pte)) return __cpa_process_fault(cpa, address, primary); if (level == PG_LEVEL_4K) { diff --git a/arch/x86/mm/pgtable_32.c b/arch/x86/mm/pgtable_32.c index 75cc0978d45d..e67ae0e6c59d 100644 --- a/arch/x86/mm/pgtable_32.c +++ b/arch/x86/mm/pgtable_32.c @@ -47,7 +47,7 @@ void set_pte_vaddr(unsigned long vaddr, pte_t pteval) return; } pte = pte_offset_kernel(pmd, vaddr); - if (pte_val(pteval)) + if (!pte_none(pteval)) set_pte_at(&init_mm, vaddr, pte, pteval); else pte_clear(&init_mm, vaddr, pte); -- cgit v1.2.3-70-g09d2 From 360cb4d15567a7eca07a5f3ade6de308bbfb4e70 Mon Sep 17 00:00:00 2001 From: Andy Lutomirski Date: Thu, 14 Jul 2016 13:22:50 -0700 Subject: x86/mm/cpa: In populate_pgd(), don't set the PGD entry until it's populated This avoids pointless races in which another CPU or task might see a partially populated global PGD entry. These races should normally be harmless, but, if another CPU propagates the entry via vmalloc_fault() and then populate_pgd() fails (due to memory allocation failure, for example), this prevents a use-after-free of the PGD entry. Signed-off-by: Andy Lutomirski Cc: Borislav Petkov Cc: Brian Gerst Cc: Denys Vlasenko Cc: H. Peter Anvin Cc: Josh Poimboeuf Cc: Linus Torvalds Cc: Peter Zijlstra Cc: Thomas Gleixner Link: http://lkml.kernel.org/r/bf99df27eac6835f687005364bd1fbd89130946c.1468527351.git.luto@kernel.org Signed-off-by: Ingo Molnar --- arch/x86/mm/pageattr.c | 9 ++++++--- 1 file changed, 6 insertions(+), 3 deletions(-) (limited to 'arch/x86/mm/pageattr.c') diff --git a/arch/x86/mm/pageattr.c b/arch/x86/mm/pageattr.c index 75142159b0a5..26aa487ae4ef 100644 --- a/arch/x86/mm/pageattr.c +++ b/arch/x86/mm/pageattr.c @@ -1104,8 +1104,6 @@ static int populate_pgd(struct cpa_data *cpa, unsigned long addr) pud = (pud_t *)get_zeroed_page(GFP_KERNEL | __GFP_NOTRACK); if (!pud) return -1; - - set_pgd(pgd_entry, __pgd(__pa(pud) | _KERNPG_TABLE)); } pgprot_val(pgprot) &= ~pgprot_val(cpa->mask_clr); @@ -1113,11 +1111,16 @@ static int populate_pgd(struct cpa_data *cpa, unsigned long addr) ret = populate_pud(cpa, addr, pgd_entry, pgprot); if (ret < 0) { - unmap_pgd_range(cpa->pgd, addr, + if (pud) + free_page((unsigned long)pud); + unmap_pud_range(pgd_entry, addr, addr + (cpa->numpages << PAGE_SHIFT)); return ret; } + if (pud) + set_pgd(pgd_entry, __pgd(__pa(pud) | _KERNPG_TABLE)); + cpa->numpages = ret; return 0; } -- cgit v1.2.3-70-g09d2 From d92fc69ccac4c0a20679fdbdc81b2010685a6f33 Mon Sep 17 00:00:00 2001 From: Andy Lutomirski Date: Thu, 14 Jul 2016 13:22:51 -0700 Subject: x86/mm: Remove kernel_unmap_pages_in_pgd() and efi_cleanup_page_tables() kernel_unmap_pages_in_pgd() is dangerous: if a PGD entry in init_mm.pgd were to be cleared, callers would need to ensure that the pgd entry hadn't been propagated to any other pgd. Its only caller was efi_cleanup_page_tables(), and that, in turn, was unused, so just delete both functions. This leaves a couple of other helpers unused, so delete them, too. Signed-off-by: Andy Lutomirski Reviewed-by: Matt Fleming Acked-by: Borislav Petkov Cc: Borislav Petkov Cc: Brian Gerst Cc: Denys Vlasenko Cc: H. Peter Anvin Cc: Josh Poimboeuf Cc: Linus Torvalds Cc: Peter Zijlstra Cc: Thomas Gleixner Cc: linux-efi@vger.kernel.org Link: http://lkml.kernel.org/r/77ff20fdde3b75cd393be5559ad8218870520248.1468527351.git.luto@kernel.org Signed-off-by: Ingo Molnar --- arch/x86/include/asm/efi.h | 1 - arch/x86/include/asm/pgtable_types.h | 2 -- arch/x86/mm/pageattr.c | 28 ---------------------------- arch/x86/platform/efi/efi.c | 2 -- arch/x86/platform/efi/efi_32.c | 3 --- arch/x86/platform/efi/efi_64.c | 5 ----- 6 files changed, 41 deletions(-) (limited to 'arch/x86/mm/pageattr.c') diff --git a/arch/x86/include/asm/efi.h b/arch/x86/include/asm/efi.h index 78d1e7467eae..45ea38df86d4 100644 --- a/arch/x86/include/asm/efi.h +++ b/arch/x86/include/asm/efi.h @@ -125,7 +125,6 @@ extern void __init efi_map_region_fixed(efi_memory_desc_t *md); extern void efi_sync_low_kernel_mappings(void); extern int __init efi_alloc_page_tables(void); extern int __init efi_setup_page_tables(unsigned long pa_memmap, unsigned num_pages); -extern void __init efi_cleanup_page_tables(unsigned long pa_memmap, unsigned num_pages); extern void __init old_map_region(efi_memory_desc_t *md); extern void __init runtime_code_page_mkexec(void); extern void __init efi_runtime_update_mappings(void); diff --git a/arch/x86/include/asm/pgtable_types.h b/arch/x86/include/asm/pgtable_types.h index d14d0a55322a..f1218f512f62 100644 --- a/arch/x86/include/asm/pgtable_types.h +++ b/arch/x86/include/asm/pgtable_types.h @@ -481,8 +481,6 @@ extern pmd_t *lookup_pmd_address(unsigned long address); extern phys_addr_t slow_virt_to_phys(void *__address); extern int kernel_map_pages_in_pgd(pgd_t *pgd, u64 pfn, unsigned long address, unsigned numpages, unsigned long page_flags); -void kernel_unmap_pages_in_pgd(pgd_t *root, unsigned long address, - unsigned numpages); #endif /* !__ASSEMBLY__ */ #endif /* _ASM_X86_PGTABLE_DEFS_H */ diff --git a/arch/x86/mm/pageattr.c b/arch/x86/mm/pageattr.c index 26aa487ae4ef..26c93c6e04a0 100644 --- a/arch/x86/mm/pageattr.c +++ b/arch/x86/mm/pageattr.c @@ -746,18 +746,6 @@ static bool try_to_free_pmd_page(pmd_t *pmd) return true; } -static bool try_to_free_pud_page(pud_t *pud) -{ - int i; - - for (i = 0; i < PTRS_PER_PUD; i++) - if (!pud_none(pud[i])) - return false; - - free_page((unsigned long)pud); - return true; -} - static bool unmap_pte_range(pmd_t *pmd, unsigned long start, unsigned long end) { pte_t *pte = pte_offset_kernel(pmd, start); @@ -871,16 +859,6 @@ static void unmap_pud_range(pgd_t *pgd, unsigned long start, unsigned long end) */ } -static void unmap_pgd_range(pgd_t *root, unsigned long addr, unsigned long end) -{ - pgd_t *pgd_entry = root + pgd_index(addr); - - unmap_pud_range(pgd_entry, addr, end); - - if (try_to_free_pud_page((pud_t *)pgd_page_vaddr(*pgd_entry))) - pgd_clear(pgd_entry); -} - static int alloc_pte_page(pmd_t *pmd) { pte_t *pte = (pte_t *)get_zeroed_page(GFP_KERNEL | __GFP_NOTRACK); @@ -1994,12 +1972,6 @@ out: return retval; } -void kernel_unmap_pages_in_pgd(pgd_t *root, unsigned long address, - unsigned numpages) -{ - unmap_pgd_range(root, address, address + (numpages << PAGE_SHIFT)); -} - /* * The testcases use internal knowledge of the implementation that shouldn't * be exposed to the rest of the kernel. Include these directly here. diff --git a/arch/x86/platform/efi/efi.c b/arch/x86/platform/efi/efi.c index f93545e7dc54..62986e5fbdba 100644 --- a/arch/x86/platform/efi/efi.c +++ b/arch/x86/platform/efi/efi.c @@ -978,8 +978,6 @@ static void __init __efi_enter_virtual_mode(void) * EFI mixed mode we need all of memory to be accessible when * we pass parameters to the EFI runtime services in the * thunking code. - * - * efi_cleanup_page_tables(__pa(new_memmap), 1 << pg_shift); */ free_pages((unsigned long)new_memmap, pg_shift); diff --git a/arch/x86/platform/efi/efi_32.c b/arch/x86/platform/efi/efi_32.c index 338402b91d2e..cef39b097649 100644 --- a/arch/x86/platform/efi/efi_32.c +++ b/arch/x86/platform/efi/efi_32.c @@ -49,9 +49,6 @@ int __init efi_setup_page_tables(unsigned long pa_memmap, unsigned num_pages) { return 0; } -void __init efi_cleanup_page_tables(unsigned long pa_memmap, unsigned num_pages) -{ -} void __init efi_map_region(efi_memory_desc_t *md) { diff --git a/arch/x86/platform/efi/efi_64.c b/arch/x86/platform/efi/efi_64.c index b226b3f497f1..d288dcea1ffe 100644 --- a/arch/x86/platform/efi/efi_64.c +++ b/arch/x86/platform/efi/efi_64.c @@ -285,11 +285,6 @@ int __init efi_setup_page_tables(unsigned long pa_memmap, unsigned num_pages) return 0; } -void __init efi_cleanup_page_tables(unsigned long pa_memmap, unsigned num_pages) -{ - kernel_unmap_pages_in_pgd(efi_pgd, pa_memmap, num_pages); -} - static void __init __map_region(efi_memory_desc_t *md, u64 va) { unsigned long flags = _PAGE_RW; -- cgit v1.2.3-70-g09d2 From 530dd8d4b9daf77e3e5d145a26210d91ced954c7 Mon Sep 17 00:00:00 2001 From: Andy Lutomirski Date: Fri, 22 Jul 2016 21:58:08 -0700 Subject: x86/mm/cpa: Fix populate_pgd(): Stop trying to deallocate failed PUDs Valdis Kletnieks bisected a boot failure back to this recent commit: 360cb4d15567 ("x86/mm/cpa: In populate_pgd(), don't set the PGD entry until it's populated") I broke the case where a PUD table got allocated -- populate_pud() would wander off a pgd_none entry and get lost. I'm not sure how this survived my testing. Fix the original issue in a much simpler way. The problem was that, if we allocated a PUD table, failed to populate it, and freed it, another CPU could potentially keep using the PGD entry we installed (either by copying it via vmalloc_fault or by speculatively caching it). There's a straightforward fix: simply leave the top-level entry in place if this happens. This can't waste any significant amount of memory -- there are at most 256 entries like this systemwide and, as a practical matter, if we hit this failure path repeatedly, we're likely to reuse the same page anyway. For context, this is a reversion with this hunk added in: if (ret < 0) { + /* + * Leave the PUD page in place in case some other CPU or thread + * already found it, but remove any useless entries we just + * added to it. + */ - unmap_pgd_range(cpa->pgd, addr, + unmap_pud_range(pgd_entry, addr, addr + (cpa->numpages << PAGE_SHIFT)); return ret; } This effectively open-codes what the now-deleted unmap_pgd_range() function used to do except that unmap_pgd_range() used to try to free the page as well. Reported-by: Valdis Kletnieks Signed-off-by: Andy Lutomirski Cc: Andrew Morton Cc: Borislav Petkov Cc: Brian Gerst Cc: Denys Vlasenko Cc: H. Peter Anvin Cc: Josh Poimboeuf Cc: Linus Torvalds Cc: Luis R. Rodriguez Cc: Mike Krinkin Cc: Peter Zijlstra Cc: Thomas Gleixner Cc: Toshi Kani Link: http://lkml.kernel.org/r/21cbc2822aa18aa812c0215f4231dbf5f65afa7f.1469249789.git.luto@kernel.org Signed-off-by: Ingo Molnar --- arch/x86/mm/pageattr.c | 7 ++----- 1 file changed, 2 insertions(+), 5 deletions(-) (limited to 'arch/x86/mm/pageattr.c') diff --git a/arch/x86/mm/pageattr.c b/arch/x86/mm/pageattr.c index 26c93c6e04a0..2bc6ea153f76 100644 --- a/arch/x86/mm/pageattr.c +++ b/arch/x86/mm/pageattr.c @@ -1082,6 +1082,8 @@ static int populate_pgd(struct cpa_data *cpa, unsigned long addr) pud = (pud_t *)get_zeroed_page(GFP_KERNEL | __GFP_NOTRACK); if (!pud) return -1; + + set_pgd(pgd_entry, __pgd(__pa(pud) | _KERNPG_TABLE)); } pgprot_val(pgprot) &= ~pgprot_val(cpa->mask_clr); @@ -1089,16 +1091,11 @@ static int populate_pgd(struct cpa_data *cpa, unsigned long addr) ret = populate_pud(cpa, addr, pgd_entry, pgprot); if (ret < 0) { - if (pud) - free_page((unsigned long)pud); unmap_pud_range(pgd_entry, addr, addr + (cpa->numpages << PAGE_SHIFT)); return ret; } - if (pud) - set_pgd(pgd_entry, __pgd(__pa(pud) | _KERNPG_TABLE)); - cpa->numpages = ret; return 0; } -- cgit v1.2.3-70-g09d2 From 55920d31f1e3fea06702c74271dd56c4fc9b70ca Mon Sep 17 00:00:00 2001 From: Andy Lutomirski Date: Sat, 23 Jul 2016 09:59:28 -0700 Subject: x86/mm/cpa: Add missing comment in populate_pdg() In commit: 21cbc2822aa1 ("x86/mm/cpa: Unbreak populate_pgd(): stop trying to deallocate failed PUDs") I intended to add this comment, but I failed at using git. Signed-off-by: Andy Lutomirski Cc: Borislav Petkov Cc: Linus Torvalds Cc: Peter Zijlstra Cc: Thomas Gleixner Link: http://lkml.kernel.org/r/242baf8612394f4e31216f96d13c4d2e9b90d1b7.1469293159.git.luto@kernel.org Signed-off-by: Ingo Molnar --- arch/x86/mm/pageattr.c | 5 +++++ 1 file changed, 5 insertions(+) (limited to 'arch/x86/mm/pageattr.c') diff --git a/arch/x86/mm/pageattr.c b/arch/x86/mm/pageattr.c index 2bc6ea153f76..47870a534877 100644 --- a/arch/x86/mm/pageattr.c +++ b/arch/x86/mm/pageattr.c @@ -1091,6 +1091,11 @@ static int populate_pgd(struct cpa_data *cpa, unsigned long addr) ret = populate_pud(cpa, addr, pgd_entry, pgprot); if (ret < 0) { + /* + * Leave the PUD page in place in case some other CPU or thread + * already found it, but remove any useless entries we just + * added to it. + */ unmap_pud_range(pgd_entry, addr, addr + (cpa->numpages << PAGE_SHIFT)); return ret; -- cgit v1.2.3-70-g09d2