From a86ecfa6a873e42286398b2a594cfa9e4ec10322 Mon Sep 17 00:00:00 2001 From: Colin Ian King Date: Mon, 14 Dec 2020 19:03:44 -0800 Subject: arch/Kconfig: fix spelling mistakes There are a few spelling mistakes in the Kconfig comments and help text. Fix these. Link: https://lkml.kernel.org/r/20201207155004.171962-1-colin.king@canonical.com Signed-off-by: Colin Ian King Acked-by: Randy Dunlap Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- arch/Kconfig | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-) (limited to 'arch/Kconfig') diff --git a/arch/Kconfig b/arch/Kconfig index ba4e966484ab..4f654c149709 100644 --- a/arch/Kconfig +++ b/arch/Kconfig @@ -261,7 +261,7 @@ config ARCH_HAS_SET_DIRECT_MAP # # Select if the architecture provides the arch_dma_set_uncached symbol to -# either provide an uncached segement alias for a DMA allocation, or +# either provide an uncached segment alias for a DMA allocation, or # to remap the page tables in place. # config ARCH_HAS_DMA_SET_UNCACHED @@ -314,14 +314,14 @@ config ARCH_32BIT_OFF_T config HAVE_ASM_MODVERSIONS bool help - This symbol should be selected by an architecure if it provides + This symbol should be selected by an architecture if it provides to support the module versioning for symbols exported from assembly code. config HAVE_REGS_AND_STACK_ACCESS_API bool help - This symbol should be selected by an architecure if it supports + This symbol should be selected by an architecture if it supports the API needed to access registers and stack entries from pt_regs, declared in asm/ptrace.h For example the kprobes-based event tracer needs this API. @@ -336,7 +336,7 @@ config HAVE_RSEQ config HAVE_FUNCTION_ARG_ACCESS_API bool help - This symbol should be selected by an architecure if it supports + This symbol should be selected by an architecture if it supports the API needed to access function arguments from pt_regs, declared in asm/ptrace.h -- cgit v1.2.3-70-g09d2 From c49dd340180260c6239e453263a9a244da9a7c85 Mon Sep 17 00:00:00 2001 From: Kalesh Singh Date: Mon, 14 Dec 2020 19:07:30 -0800 Subject: mm: speedup mremap on 1GB or larger regions MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Android needs to move large memory regions for garbage collection. The GC requires moving physical pages of multi-gigabyte heap using mremap. During this move, the application threads have to be paused for correctness. It is critical to keep this pause as short as possible to avoid jitters during user interaction. Optimize mremap for >= 1GB-sized regions by moving at the PUD/PGD level if the source and destination addresses are PUD-aligned. For CONFIG_PGTABLE_LEVELS == 3, moving at the PUD level in effect moves PGD entries, since the PUD entry is “folded back” onto the PGD entry. Add HAVE_MOVE_PUD so that architectures where moving at the PUD level isn't supported/tested can turn this off by not selecting the config. Link: https://lkml.kernel.org/r/20201014005320.2233162-4-kaleshsingh@google.com Signed-off-by: Kalesh Singh Acked-by: Kirill A. Shutemov Reported-by: kernel test robot Cc: Aneesh Kumar K.V Cc: Anshuman Khandual Cc: Arnd Bergmann Cc: Borislav Petkov Cc: Brian Geffon Cc: Catalin Marinas Cc: Christian Brauner Cc: Dave Hansen Cc: Frederic Weisbecker Cc: Gavin Shan Cc: Hassan Naveed Cc: "H. Peter Anvin" Cc: Ingo Molnar Cc: Jia He Cc: John Hubbard Cc: Kees Cook Cc: Krzysztof Kozlowski Cc: Lokesh Gidra Cc: Mark Rutland Cc: Masahiro Yamada Cc: Masami Hiramatsu Cc: Mike Rapoport Cc: Mina Almasry Cc: Minchan Kim Cc: Peter Zijlstra (Intel) Cc: Ralph Campbell Cc: Ram Pai Cc: Sami Tolvanen Cc: Sandipan Das Cc: SeongJae Park Cc: Shuah Khan Cc: Steven Price Cc: Suren Baghdasaryan Cc: Thomas Gleixner Cc: Will Deacon Cc: Zi Yan Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- arch/Kconfig | 7 ++ mm/mremap.c | 230 ++++++++++++++++++++++++++++++++++++++++++++++++----------- 2 files changed, 197 insertions(+), 40 deletions(-) (limited to 'arch/Kconfig') diff --git a/arch/Kconfig b/arch/Kconfig index 4f654c149709..54a240b61f56 100644 --- a/arch/Kconfig +++ b/arch/Kconfig @@ -648,6 +648,13 @@ config HAVE_IRQ_TIME_ACCOUNTING Archs need to ensure they use a high enough resolution clock to support irq time accounting and then call enable_sched_clock_irqtime(). +config HAVE_MOVE_PUD + bool + help + Architectures that select this are able to move page tables at the + PUD level. If there are only 3 page table levels, the move effectively + happens at the PGD level. + config HAVE_MOVE_PMD bool help diff --git a/mm/mremap.c b/mm/mremap.c index 138abbae4f75..078f731277b6 100644 --- a/mm/mremap.c +++ b/mm/mremap.c @@ -30,12 +30,11 @@ #include "internal.h" -static pmd_t *get_old_pmd(struct mm_struct *mm, unsigned long addr) +static pud_t *get_old_pud(struct mm_struct *mm, unsigned long addr) { pgd_t *pgd; p4d_t *p4d; pud_t *pud; - pmd_t *pmd; pgd = pgd_offset(mm, addr); if (pgd_none_or_clear_bad(pgd)) @@ -49,6 +48,18 @@ static pmd_t *get_old_pmd(struct mm_struct *mm, unsigned long addr) if (pud_none_or_clear_bad(pud)) return NULL; + return pud; +} + +static pmd_t *get_old_pmd(struct mm_struct *mm, unsigned long addr) +{ + pud_t *pud; + pmd_t *pmd; + + pud = get_old_pud(mm, addr); + if (!pud) + return NULL; + pmd = pmd_offset(pud, addr); if (pmd_none(*pmd)) return NULL; @@ -56,19 +67,27 @@ static pmd_t *get_old_pmd(struct mm_struct *mm, unsigned long addr) return pmd; } -static pmd_t *alloc_new_pmd(struct mm_struct *mm, struct vm_area_struct *vma, +static pud_t *alloc_new_pud(struct mm_struct *mm, struct vm_area_struct *vma, unsigned long addr) { pgd_t *pgd; p4d_t *p4d; - pud_t *pud; - pmd_t *pmd; pgd = pgd_offset(mm, addr); p4d = p4d_alloc(mm, pgd, addr); if (!p4d) return NULL; - pud = pud_alloc(mm, p4d, addr); + + return pud_alloc(mm, p4d, addr); +} + +static pmd_t *alloc_new_pmd(struct mm_struct *mm, struct vm_area_struct *vma, + unsigned long addr) +{ + pud_t *pud; + pmd_t *pmd; + + pud = alloc_new_pud(mm, vma, addr); if (!pud) return NULL; @@ -249,14 +268,148 @@ static bool move_normal_pmd(struct vm_area_struct *vma, unsigned long old_addr, return true; } +#else +static inline bool move_normal_pmd(struct vm_area_struct *vma, + unsigned long old_addr, unsigned long new_addr, pmd_t *old_pmd, + pmd_t *new_pmd) +{ + return false; +} #endif +#ifdef CONFIG_HAVE_MOVE_PUD +static bool move_normal_pud(struct vm_area_struct *vma, unsigned long old_addr, + unsigned long new_addr, pud_t *old_pud, pud_t *new_pud) +{ + spinlock_t *old_ptl, *new_ptl; + struct mm_struct *mm = vma->vm_mm; + pud_t pud; + + /* + * The destination pud shouldn't be established, free_pgtables() + * should have released it. + */ + if (WARN_ON_ONCE(!pud_none(*new_pud))) + return false; + + /* + * We don't have to worry about the ordering of src and dst + * ptlocks because exclusive mmap_lock prevents deadlock. + */ + old_ptl = pud_lock(vma->vm_mm, old_pud); + new_ptl = pud_lockptr(mm, new_pud); + if (new_ptl != old_ptl) + spin_lock_nested(new_ptl, SINGLE_DEPTH_NESTING); + + /* Clear the pud */ + pud = *old_pud; + pud_clear(old_pud); + + VM_BUG_ON(!pud_none(*new_pud)); + + /* Set the new pud */ + set_pud_at(mm, new_addr, new_pud, pud); + flush_tlb_range(vma, old_addr, old_addr + PUD_SIZE); + if (new_ptl != old_ptl) + spin_unlock(new_ptl); + spin_unlock(old_ptl); + + return true; +} +#else +static inline bool move_normal_pud(struct vm_area_struct *vma, + unsigned long old_addr, unsigned long new_addr, pud_t *old_pud, + pud_t *new_pud) +{ + return false; +} +#endif + +enum pgt_entry { + NORMAL_PMD, + HPAGE_PMD, + NORMAL_PUD, +}; + +/* + * Returns an extent of the corresponding size for the pgt_entry specified if + * valid. Else returns a smaller extent bounded by the end of the source and + * destination pgt_entry. + */ +static unsigned long get_extent(enum pgt_entry entry, unsigned long old_addr, + unsigned long old_end, unsigned long new_addr) +{ + unsigned long next, extent, mask, size; + + switch (entry) { + case HPAGE_PMD: + case NORMAL_PMD: + mask = PMD_MASK; + size = PMD_SIZE; + break; + case NORMAL_PUD: + mask = PUD_MASK; + size = PUD_SIZE; + break; + default: + BUILD_BUG(); + break; + } + + next = (old_addr + size) & mask; + /* even if next overflowed, extent below will be ok */ + extent = (next > old_end) ? old_end - old_addr : next - old_addr; + next = (new_addr + size) & mask; + if (extent > next - new_addr) + extent = next - new_addr; + return extent; +} + +/* + * Attempts to speedup the move by moving entry at the level corresponding to + * pgt_entry. Returns true if the move was successful, else false. + */ +static bool move_pgt_entry(enum pgt_entry entry, struct vm_area_struct *vma, + unsigned long old_addr, unsigned long new_addr, + void *old_entry, void *new_entry, bool need_rmap_locks) +{ + bool moved = false; + + /* See comment in move_ptes() */ + if (need_rmap_locks) + take_rmap_locks(vma); + + switch (entry) { + case NORMAL_PMD: + moved = move_normal_pmd(vma, old_addr, new_addr, old_entry, + new_entry); + break; + case NORMAL_PUD: + moved = move_normal_pud(vma, old_addr, new_addr, old_entry, + new_entry); + break; + case HPAGE_PMD: + moved = IS_ENABLED(CONFIG_TRANSPARENT_HUGEPAGE) && + move_huge_pmd(vma, old_addr, new_addr, old_entry, + new_entry); + break; + default: + WARN_ON_ONCE(1); + break; + } + + if (need_rmap_locks) + drop_rmap_locks(vma); + + return moved; +} + unsigned long move_page_tables(struct vm_area_struct *vma, unsigned long old_addr, struct vm_area_struct *new_vma, unsigned long new_addr, unsigned long len, bool need_rmap_locks) { - unsigned long extent, next, old_end; + unsigned long extent, old_end; struct mmu_notifier_range range; pmd_t *old_pmd, *new_pmd; @@ -269,53 +422,50 @@ unsigned long move_page_tables(struct vm_area_struct *vma, for (; old_addr < old_end; old_addr += extent, new_addr += extent) { cond_resched(); - next = (old_addr + PMD_SIZE) & PMD_MASK; - /* even if next overflowed, extent below will be ok */ - extent = next - old_addr; - if (extent > old_end - old_addr) - extent = old_end - old_addr; - next = (new_addr + PMD_SIZE) & PMD_MASK; - if (extent > next - new_addr) - extent = next - new_addr; + /* + * If extent is PUD-sized try to speed up the move by moving at the + * PUD level if possible. + */ + extent = get_extent(NORMAL_PUD, old_addr, old_end, new_addr); + if (IS_ENABLED(CONFIG_HAVE_MOVE_PUD) && extent == PUD_SIZE) { + pud_t *old_pud, *new_pud; + + old_pud = get_old_pud(vma->vm_mm, old_addr); + if (!old_pud) + continue; + new_pud = alloc_new_pud(vma->vm_mm, vma, new_addr); + if (!new_pud) + break; + if (move_pgt_entry(NORMAL_PUD, vma, old_addr, new_addr, + old_pud, new_pud, need_rmap_locks)) + continue; + } + + extent = get_extent(NORMAL_PMD, old_addr, old_end, new_addr); old_pmd = get_old_pmd(vma->vm_mm, old_addr); if (!old_pmd) continue; new_pmd = alloc_new_pmd(vma->vm_mm, vma, new_addr); if (!new_pmd) break; - if (is_swap_pmd(*old_pmd) || pmd_trans_huge(*old_pmd) || pmd_devmap(*old_pmd)) { - if (extent == HPAGE_PMD_SIZE) { - bool moved; - /* See comment in move_ptes() */ - if (need_rmap_locks) - take_rmap_locks(vma); - moved = move_huge_pmd(vma, old_addr, new_addr, - old_pmd, new_pmd); - if (need_rmap_locks) - drop_rmap_locks(vma); - if (moved) - continue; - } + if (is_swap_pmd(*old_pmd) || pmd_trans_huge(*old_pmd) || + pmd_devmap(*old_pmd)) { + if (extent == HPAGE_PMD_SIZE && + move_pgt_entry(HPAGE_PMD, vma, old_addr, new_addr, + old_pmd, new_pmd, need_rmap_locks)) + continue; split_huge_pmd(vma, old_pmd, old_addr); if (pmd_trans_unstable(old_pmd)) continue; - } else if (extent == PMD_SIZE) { -#ifdef CONFIG_HAVE_MOVE_PMD + } else if (IS_ENABLED(CONFIG_HAVE_MOVE_PMD) && + extent == PMD_SIZE) { /* * If the extent is PMD-sized, try to speed the move by * moving at the PMD level if possible. */ - bool moved; - - if (need_rmap_locks) - take_rmap_locks(vma); - moved = move_normal_pmd(vma, old_addr, new_addr, - old_pmd, new_pmd); - if (need_rmap_locks) - drop_rmap_locks(vma); - if (moved) + if (move_pgt_entry(NORMAL_PMD, vma, old_addr, new_addr, + old_pmd, new_pmd, need_rmap_locks)) continue; -#endif } if (pte_alloc(new_vma->vm_mm, new_pmd)) -- cgit v1.2.3-70-g09d2 From 4f5b0c1789963477cc9a4d45b4b62d694665cceb Mon Sep 17 00:00:00 2001 From: Mike Rapoport Date: Mon, 14 Dec 2020 19:09:59 -0800 Subject: arm, arm64: move free_unused_memmap() to generic mm ARM and ARM64 free unused parts of the memory map just before the initialization of the page allocator. To allow holes in the memory map both architectures overload pfn_valid() and define HAVE_ARCH_PFN_VALID. Allowing holes in the memory map for FLATMEM may be useful for small machines, such as ARC and m68k and will enable those architectures to cease using DISCONTIGMEM and still support more than one memory bank. Move the functions that free unused memory map to generic mm and enable them in case HAVE_ARCH_PFN_VALID=y. Link: https://lkml.kernel.org/r/20201101170454.9567-10-rppt@kernel.org Signed-off-by: Mike Rapoport Acked-by: Catalin Marinas [arm64] Cc: Alexey Dobriyan Cc: Geert Uytterhoeven Cc: Greg Ungerer Cc: John Paul Adrian Glaubitz Cc: Jonathan Corbet Cc: Matt Turner Cc: Meelis Roos Cc: Michael Schmitz Cc: Russell King Cc: Tony Luck Cc: Vineet Gupta Cc: Will Deacon Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- arch/Kconfig | 3 ++ arch/arm/Kconfig | 4 +-- arch/arm/mm/init.c | 78 -------------------------------------------------- arch/arm64/Kconfig | 4 +-- arch/arm64/mm/init.c | 68 -------------------------------------------- mm/memblock.c | 80 ++++++++++++++++++++++++++++++++++++++++++++++++++++ 6 files changed, 85 insertions(+), 152 deletions(-) (limited to 'arch/Kconfig') diff --git a/arch/Kconfig b/arch/Kconfig index 54a240b61f56..8d5efff59cd8 100644 --- a/arch/Kconfig +++ b/arch/Kconfig @@ -1044,6 +1044,9 @@ config ARCH_WANT_LD_ORPHAN_WARN by the linker, since the locations of such sections can change between linker versions. +config HAVE_ARCH_PFN_VALID + bool + source "kernel/gcov/Kconfig" source "scripts/gcc-plugins/Kconfig" diff --git a/arch/arm/Kconfig b/arch/arm/Kconfig index 353c3979a2d5..03602e8fe16d 100644 --- a/arch/arm/Kconfig +++ b/arch/arm/Kconfig @@ -69,6 +69,7 @@ config ARM select HAVE_ARCH_JUMP_LABEL if !XIP_KERNEL && !CPU_ENDIAN_BE32 && MMU select HAVE_ARCH_KGDB if !CPU_ENDIAN_BE32 && MMU select HAVE_ARCH_MMAP_RND_BITS if MMU + select HAVE_ARCH_PFN_VALID select HAVE_ARCH_SECCOMP select HAVE_ARCH_SECCOMP_FILTER if AEABI && !OABI_COMPAT select HAVE_ARCH_THREAD_STRUCT_WHITELIST @@ -1489,9 +1490,6 @@ config ARCH_SPARSEMEM_ENABLE bool select SPARSEMEM_STATIC if SPARSEMEM -config HAVE_ARCH_PFN_VALID - def_bool y - config HIGHMEM bool "High Memory Support" depends on MMU diff --git a/arch/arm/mm/init.c b/arch/arm/mm/init.c index c23dbf8bebee..db623d7c30de 100644 --- a/arch/arm/mm/init.c +++ b/arch/arm/mm/init.c @@ -267,83 +267,6 @@ static inline void poison_init_mem(void *s, size_t count) *p++ = 0xe7fddef0; } -static inline void __init -free_memmap(unsigned long start_pfn, unsigned long end_pfn) -{ - struct page *start_pg, *end_pg; - phys_addr_t pg, pgend; - - /* - * Convert start_pfn/end_pfn to a struct page pointer. - */ - start_pg = pfn_to_page(start_pfn - 1) + 1; - end_pg = pfn_to_page(end_pfn - 1) + 1; - - /* - * Convert to physical addresses, and - * round start upwards and end downwards. - */ - pg = PAGE_ALIGN(__pa(start_pg)); - pgend = __pa(end_pg) & PAGE_MASK; - - /* - * If there are free pages between these, - * free the section of the memmap array. - */ - if (pg < pgend) - memblock_free_early(pg, pgend - pg); -} - -/* - * The mem_map array can get very big. Free the unused area of the memory map. - */ -static void __init free_unused_memmap(void) -{ - unsigned long start, end, prev_end = 0; - int i; - - /* - * This relies on each bank being in address order. - * The banks are sorted previously in bootmem_init(). - */ - for_each_mem_pfn_range(i, MAX_NUMNODES, &start, &end, NULL) { -#ifdef CONFIG_SPARSEMEM - /* - * Take care not to free memmap entries that don't exist - * due to SPARSEMEM sections which aren't present. - */ - start = min(start, - ALIGN(prev_end, PAGES_PER_SECTION)); -#else - /* - * Align down here since the VM subsystem insists that the - * memmap entries are valid from the bank start aligned to - * MAX_ORDER_NR_PAGES. - */ - start = round_down(start, MAX_ORDER_NR_PAGES); -#endif - /* - * If we had a previous bank, and there is a space - * between the current bank and the previous, free it. - */ - if (prev_end && prev_end < start) - free_memmap(prev_end, start); - - /* - * Align up here since the VM subsystem insists that the - * memmap entries are valid from the bank end aligned to - * MAX_ORDER_NR_PAGES. - */ - prev_end = ALIGN(end, MAX_ORDER_NR_PAGES); - } - -#ifdef CONFIG_SPARSEMEM - if (!IS_ALIGNED(prev_end, PAGES_PER_SECTION)) - free_memmap(prev_end, - ALIGN(prev_end, PAGES_PER_SECTION)); -#endif -} - static void __init free_highpages(void) { #ifdef CONFIG_HIGHMEM @@ -385,7 +308,6 @@ void __init mem_init(void) set_max_mapnr(pfn_to_page(max_pfn) - mem_map); /* this will put all unused low memory onto the freelists */ - free_unused_memmap(); memblock_free_all(); #ifdef CONFIG_SA1111 diff --git a/arch/arm64/Kconfig b/arch/arm64/Kconfig index 15bb857e42e1..6f0751631ef1 100644 --- a/arch/arm64/Kconfig +++ b/arch/arm64/Kconfig @@ -140,6 +140,7 @@ config ARM64 select HAVE_ARCH_KGDB select HAVE_ARCH_MMAP_RND_BITS select HAVE_ARCH_MMAP_RND_COMPAT_BITS if COMPAT + select HAVE_ARCH_PFN_VALID select HAVE_ARCH_PREL32_RELOCATIONS select HAVE_ARCH_SECCOMP_FILTER select HAVE_ARCH_STACKLEAK @@ -1043,9 +1044,6 @@ config ARCH_SELECT_MEMORY_MODEL config ARCH_FLATMEM_ENABLE def_bool !NUMA -config HAVE_ARCH_PFN_VALID - def_bool y - config HW_PERF_EVENTS def_bool y depends on ARM_PMU diff --git a/arch/arm64/mm/init.c b/arch/arm64/mm/init.c index 095540667f0f..3d8328277bec 100644 --- a/arch/arm64/mm/init.c +++ b/arch/arm64/mm/init.c @@ -430,71 +430,6 @@ void __init bootmem_init(void) memblock_dump_all(); } -#ifndef CONFIG_SPARSEMEM_VMEMMAP -static inline void free_memmap(unsigned long start_pfn, unsigned long end_pfn) -{ - struct page *start_pg, *end_pg; - unsigned long pg, pgend; - - /* - * Convert start_pfn/end_pfn to a struct page pointer. - */ - start_pg = pfn_to_page(start_pfn - 1) + 1; - end_pg = pfn_to_page(end_pfn - 1) + 1; - - /* - * Convert to physical addresses, and round start upwards and end - * downwards. - */ - pg = (unsigned long)PAGE_ALIGN(__pa(start_pg)); - pgend = (unsigned long)__pa(end_pg) & PAGE_MASK; - - /* - * If there are free pages between these, free the section of the - * memmap array. - */ - if (pg < pgend) - memblock_free(pg, pgend - pg); -} - -/* - * The mem_map array can get very big. Free the unused area of the memory map. - */ -static void __init free_unused_memmap(void) -{ - unsigned long start, end, prev_end = 0; - int i; - - for_each_mem_pfn_range(i, MAX_NUMNODES, &start, &end, NULL) { -#ifdef CONFIG_SPARSEMEM - /* - * Take care not to free memmap entries that don't exist due - * to SPARSEMEM sections which aren't present. - */ - start = min(start, ALIGN(prev_end, PAGES_PER_SECTION)); -#endif - /* - * If we had a previous bank, and there is a space between the - * current bank and the previous, free it. - */ - if (prev_end && prev_end < start) - free_memmap(prev_end, start); - - /* - * Align up here since the VM subsystem insists that the - * memmap entries are valid from the bank end aligned to - * MAX_ORDER_NR_PAGES. - */ - prev_end = ALIGN(end, MAX_ORDER_NR_PAGES); - } - -#ifdef CONFIG_SPARSEMEM - if (!IS_ALIGNED(prev_end, PAGES_PER_SECTION)) - free_memmap(prev_end, ALIGN(prev_end, PAGES_PER_SECTION)); -#endif -} -#endif /* !CONFIG_SPARSEMEM_VMEMMAP */ - /* * mem_init() marks the free areas in the mem_map and tells us how much memory * is free. This is done after various parts of the system have claimed their @@ -510,9 +445,6 @@ void __init mem_init(void) set_max_mapnr(max_pfn - PHYS_PFN_OFFSET); -#ifndef CONFIG_SPARSEMEM_VMEMMAP - free_unused_memmap(); -#endif /* this will put all unused low memory onto the freelists */ memblock_free_all(); diff --git a/mm/memblock.c b/mm/memblock.c index b68ee86788af..049df4163a97 100644 --- a/mm/memblock.c +++ b/mm/memblock.c @@ -1926,6 +1926,85 @@ static int __init early_memblock(char *p) } early_param("memblock", early_memblock); +static void __init free_memmap(unsigned long start_pfn, unsigned long end_pfn) +{ + struct page *start_pg, *end_pg; + phys_addr_t pg, pgend; + + /* + * Convert start_pfn/end_pfn to a struct page pointer. + */ + start_pg = pfn_to_page(start_pfn - 1) + 1; + end_pg = pfn_to_page(end_pfn - 1) + 1; + + /* + * Convert to physical addresses, and round start upwards and end + * downwards. + */ + pg = PAGE_ALIGN(__pa(start_pg)); + pgend = __pa(end_pg) & PAGE_MASK; + + /* + * If there are free pages between these, free the section of the + * memmap array. + */ + if (pg < pgend) + memblock_free(pg, pgend - pg); +} + +/* + * The mem_map array can get very big. Free the unused area of the memory map. + */ +static void __init free_unused_memmap(void) +{ + unsigned long start, end, prev_end = 0; + int i; + + if (!IS_ENABLED(CONFIG_HAVE_ARCH_PFN_VALID) || + IS_ENABLED(CONFIG_SPARSEMEM_VMEMMAP)) + return; + + /* + * This relies on each bank being in address order. + * The banks are sorted previously in bootmem_init(). + */ + for_each_mem_pfn_range(i, MAX_NUMNODES, &start, &end, NULL) { +#ifdef CONFIG_SPARSEMEM + /* + * Take care not to free memmap entries that don't exist + * due to SPARSEMEM sections which aren't present. + */ + start = min(start, ALIGN(prev_end, PAGES_PER_SECTION)); +#else + /* + * Align down here since the VM subsystem insists that the + * memmap entries are valid from the bank start aligned to + * MAX_ORDER_NR_PAGES. + */ + start = round_down(start, MAX_ORDER_NR_PAGES); +#endif + + /* + * If we had a previous bank, and there is a space + * between the current bank and the previous, free it. + */ + if (prev_end && prev_end < start) + free_memmap(prev_end, start); + + /* + * Align up here since the VM subsystem insists that the + * memmap entries are valid from the bank end aligned to + * MAX_ORDER_NR_PAGES. + */ + prev_end = ALIGN(end, MAX_ORDER_NR_PAGES); + } + +#ifdef CONFIG_SPARSEMEM + if (!IS_ALIGNED(prev_end, PAGES_PER_SECTION)) + free_memmap(prev_end, ALIGN(prev_end, PAGES_PER_SECTION)); +#endif +} + static void __init __free_pages_memory(unsigned long start, unsigned long end) { int order; @@ -2012,6 +2091,7 @@ unsigned long __init memblock_free_all(void) { unsigned long pages; + free_unused_memmap(); reset_all_zones_managed_pages(); pages = free_low_memory_core_early(); -- cgit v1.2.3-70-g09d2 From 5d6ad668f31625c6aa9ed8dc3bdb29561d2b1144 Mon Sep 17 00:00:00 2001 From: Mike Rapoport Date: Mon, 14 Dec 2020 19:10:30 -0800 Subject: arch, mm: restore dependency of __kernel_map_pages() on DEBUG_PAGEALLOC The design of DEBUG_PAGEALLOC presumes that __kernel_map_pages() must never fail. With this assumption is wouldn't be safe to allow general usage of this function. Moreover, some architectures that implement __kernel_map_pages() have this function guarded by #ifdef DEBUG_PAGEALLOC and some refuse to map/unmap pages when page allocation debugging is disabled at runtime. As all the users of __kernel_map_pages() were converted to use debug_pagealloc_map_pages() it is safe to make it available only when DEBUG_PAGEALLOC is set. Link: https://lkml.kernel.org/r/20201109192128.960-4-rppt@kernel.org Signed-off-by: Mike Rapoport Acked-by: David Hildenbrand Acked-by: Kirill A. Shutemov Cc: Albert Ou Cc: Andy Lutomirski Cc: Benjamin Herrenschmidt Cc: Borislav Petkov Cc: Catalin Marinas Cc: Christian Borntraeger Cc: Christoph Lameter Cc: Dave Hansen Cc: David Rientjes Cc: "David S. Miller" Cc: "Edgecombe, Rick P" Cc: Heiko Carstens Cc: "H. Peter Anvin" Cc: Ingo Molnar Cc: Joonsoo Kim Cc: Len Brown Cc: Michael Ellerman Cc: Palmer Dabbelt Cc: Paul Mackerras Cc: Paul Walmsley Cc: Pavel Machek Cc: Pekka Enberg Cc: Peter Zijlstra Cc: Rafael J. Wysocki Cc: "Rafael J. Wysocki" Cc: Thomas Gleixner Cc: Vasily Gorbik Cc: Vlastimil Babka Cc: Will Deacon Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- arch/Kconfig | 3 +++ arch/arm64/Kconfig | 4 +--- arch/arm64/mm/pageattr.c | 8 ++++++-- arch/powerpc/Kconfig | 5 +---- arch/riscv/Kconfig | 4 +--- arch/riscv/include/asm/pgtable.h | 2 -- arch/riscv/mm/pageattr.c | 2 ++ arch/s390/Kconfig | 4 +--- arch/sparc/Kconfig | 4 +--- arch/x86/Kconfig | 4 +--- arch/x86/mm/pat/set_memory.c | 2 ++ include/linux/mm.h | 10 +++++++--- 12 files changed, 26 insertions(+), 26 deletions(-) (limited to 'arch/Kconfig') diff --git a/arch/Kconfig b/arch/Kconfig index 8d5efff59cd8..cd4172a80123 100644 --- a/arch/Kconfig +++ b/arch/Kconfig @@ -1047,6 +1047,9 @@ config ARCH_WANT_LD_ORPHAN_WARN config HAVE_ARCH_PFN_VALID bool +config ARCH_SUPPORTS_DEBUG_PAGEALLOC + bool + source "kernel/gcov/Kconfig" source "scripts/gcc-plugins/Kconfig" diff --git a/arch/arm64/Kconfig b/arch/arm64/Kconfig index 6f0751631ef1..f2dee598a921 100644 --- a/arch/arm64/Kconfig +++ b/arch/arm64/Kconfig @@ -71,6 +71,7 @@ config ARM64 select ARCH_USE_QUEUED_RWLOCKS select ARCH_USE_QUEUED_SPINLOCKS select ARCH_USE_SYM_ANNOTATIONS + select ARCH_SUPPORTS_DEBUG_PAGEALLOC select ARCH_SUPPORTS_MEMORY_FAILURE select ARCH_SUPPORTS_SHADOW_CALL_STACK if CC_HAVE_SHADOW_CALL_STACK select ARCH_SUPPORTS_ATOMIC_RMW @@ -1028,9 +1029,6 @@ config HOLES_IN_ZONE source "kernel/Kconfig.hz" -config ARCH_SUPPORTS_DEBUG_PAGEALLOC - def_bool y - config ARCH_SPARSEMEM_ENABLE def_bool y select SPARSEMEM_VMEMMAP_ENABLE diff --git a/arch/arm64/mm/pageattr.c b/arch/arm64/mm/pageattr.c index 1b94f5b82654..439325532be1 100644 --- a/arch/arm64/mm/pageattr.c +++ b/arch/arm64/mm/pageattr.c @@ -155,7 +155,7 @@ int set_direct_map_invalid_noflush(struct page *page) .clear_mask = __pgprot(PTE_VALID), }; - if (!rodata_full) + if (!debug_pagealloc_enabled() && !rodata_full) return 0; return apply_to_page_range(&init_mm, @@ -170,7 +170,7 @@ int set_direct_map_default_noflush(struct page *page) .clear_mask = __pgprot(PTE_RDONLY), }; - if (!rodata_full) + if (!debug_pagealloc_enabled() && !rodata_full) return 0; return apply_to_page_range(&init_mm, @@ -178,6 +178,7 @@ int set_direct_map_default_noflush(struct page *page) PAGE_SIZE, change_page_range, &data); } +#ifdef CONFIG_DEBUG_PAGEALLOC void __kernel_map_pages(struct page *page, int numpages, int enable) { if (!debug_pagealloc_enabled() && !rodata_full) @@ -186,6 +187,7 @@ void __kernel_map_pages(struct page *page, int numpages, int enable) set_memory_valid((unsigned long)page_address(page), numpages, enable); } +#ifdef CONFIG_HIBERNATION /* * This function is used to determine if a linear map page has been marked as * not-valid. Walk the page table and check the PTE_VALID bit. This is based @@ -232,3 +234,5 @@ bool kernel_page_present(struct page *page) ptep = pte_offset_kernel(pmdp, addr); return pte_valid(READ_ONCE(*ptep)); } +#endif /* CONFIG_HIBERNATION */ +#endif /* CONFIG_DEBUG_PAGEALLOC */ diff --git a/arch/powerpc/Kconfig b/arch/powerpc/Kconfig index 5181872f9452..8e8f46bbd39c 100644 --- a/arch/powerpc/Kconfig +++ b/arch/powerpc/Kconfig @@ -146,6 +146,7 @@ config PPC select ARCH_MIGHT_HAVE_PC_SERIO select ARCH_OPTIONAL_KERNEL_RWX if ARCH_HAS_STRICT_KERNEL_RWX select ARCH_SUPPORTS_ATOMIC_RMW + select ARCH_SUPPORTS_DEBUG_PAGEALLOC if PPC32 || PPC_BOOK3S_64 select ARCH_USE_BUILTIN_BSWAP select ARCH_USE_CMPXCHG_LOCKREF if PPC64 select ARCH_USE_QUEUED_RWLOCKS if PPC_QUEUED_SPINLOCKS @@ -356,10 +357,6 @@ config PPC_OF_PLATFORM_PCI depends on PCI depends on PPC64 # not supported on 32 bits yet -config ARCH_SUPPORTS_DEBUG_PAGEALLOC - depends on PPC32 || PPC_BOOK3S_64 - def_bool y - config ARCH_SUPPORTS_UPROBES def_bool y diff --git a/arch/riscv/Kconfig b/arch/riscv/Kconfig index 44377fd7860e..9283c6f9ae2a 100644 --- a/arch/riscv/Kconfig +++ b/arch/riscv/Kconfig @@ -14,6 +14,7 @@ config RISCV def_bool y select ARCH_CLOCKSOURCE_INIT select ARCH_SUPPORTS_ATOMIC_RMW + select ARCH_SUPPORTS_DEBUG_PAGEALLOC if MMU select ARCH_HAS_BINFMT_FLAT select ARCH_HAS_DEBUG_VM_PGTABLE select ARCH_HAS_DEBUG_VIRTUAL if MMU @@ -153,9 +154,6 @@ config ARCH_SELECT_MEMORY_MODEL config ARCH_WANT_GENERAL_HUGETLB def_bool y -config ARCH_SUPPORTS_DEBUG_PAGEALLOC - def_bool y - config SYS_SUPPORTS_HUGETLBFS depends on MMU def_bool y diff --git a/arch/riscv/include/asm/pgtable.h b/arch/riscv/include/asm/pgtable.h index 183f1f4b2ae6..41a72861987c 100644 --- a/arch/riscv/include/asm/pgtable.h +++ b/arch/riscv/include/asm/pgtable.h @@ -461,8 +461,6 @@ static inline int ptep_clear_flush_young(struct vm_area_struct *vma, #define VMALLOC_START 0 #define VMALLOC_END TASK_SIZE -static inline void __kernel_map_pages(struct page *page, int numpages, int enable) {} - #endif /* !CONFIG_MMU */ #define kern_addr_valid(addr) (1) /* FIXME */ diff --git a/arch/riscv/mm/pageattr.c b/arch/riscv/mm/pageattr.c index 19fecb362d81..321b09d2e2ea 100644 --- a/arch/riscv/mm/pageattr.c +++ b/arch/riscv/mm/pageattr.c @@ -184,6 +184,7 @@ int set_direct_map_default_noflush(struct page *page) return ret; } +#ifdef CONFIG_DEBUG_PAGEALLOC void __kernel_map_pages(struct page *page, int numpages, int enable) { if (!debug_pagealloc_enabled()) @@ -196,3 +197,4 @@ void __kernel_map_pages(struct page *page, int numpages, int enable) __set_memory((unsigned long)page_address(page), numpages, __pgprot(0), __pgprot(_PAGE_PRESENT)); } +#endif diff --git a/arch/s390/Kconfig b/arch/s390/Kconfig index 4a2a12be04c9..991a850a6c0b 100644 --- a/arch/s390/Kconfig +++ b/arch/s390/Kconfig @@ -35,9 +35,6 @@ config GENERIC_LOCKBREAK config PGSTE def_bool y if KVM -config ARCH_SUPPORTS_DEBUG_PAGEALLOC - def_bool y - config AUDIT_ARCH def_bool y @@ -106,6 +103,7 @@ config S390 select ARCH_INLINE_WRITE_UNLOCK_IRQRESTORE select ARCH_STACKWALK select ARCH_SUPPORTS_ATOMIC_RMW + select ARCH_SUPPORTS_DEBUG_PAGEALLOC select ARCH_SUPPORTS_NUMA_BALANCING select ARCH_USE_BUILTIN_BSWAP select ARCH_USE_CMPXCHG_LOCKREF diff --git a/arch/sparc/Kconfig b/arch/sparc/Kconfig index a6ca135442f9..2c729b8d097a 100644 --- a/arch/sparc/Kconfig +++ b/arch/sparc/Kconfig @@ -88,6 +88,7 @@ config SPARC64 select HAVE_C_RECORDMCOUNT select HAVE_ARCH_AUDITSYSCALL select ARCH_SUPPORTS_ATOMIC_RMW + select ARCH_SUPPORTS_DEBUG_PAGEALLOC select HAVE_NMI select HAVE_REGS_AND_STACK_ACCESS_API select ARCH_USE_QUEUED_RWLOCKS @@ -148,9 +149,6 @@ config GENERIC_ISA_DMA bool default y if SPARC32 -config ARCH_SUPPORTS_DEBUG_PAGEALLOC - def_bool y if SPARC64 - config PGTABLE_LEVELS default 4 if 64BIT default 3 diff --git a/arch/x86/Kconfig b/arch/x86/Kconfig index 46393177ce73..384c91b057c5 100644 --- a/arch/x86/Kconfig +++ b/arch/x86/Kconfig @@ -91,6 +91,7 @@ config X86 select ARCH_STACKWALK select ARCH_SUPPORTS_ACPI select ARCH_SUPPORTS_ATOMIC_RMW + select ARCH_SUPPORTS_DEBUG_PAGEALLOC select ARCH_SUPPORTS_NUMA_BALANCING if X86_64 select ARCH_USE_BUILTIN_BSWAP select ARCH_USE_QUEUED_RWLOCKS @@ -331,9 +332,6 @@ config ZONE_DMA32 config AUDIT_ARCH def_bool y if X86_64 -config ARCH_SUPPORTS_DEBUG_PAGEALLOC - def_bool y - config KASAN_SHADOW_OFFSET hex depends on KASAN diff --git a/arch/x86/mm/pat/set_memory.c b/arch/x86/mm/pat/set_memory.c index 40baa90e74f4..bc9be96b777f 100644 --- a/arch/x86/mm/pat/set_memory.c +++ b/arch/x86/mm/pat/set_memory.c @@ -2194,6 +2194,7 @@ int set_direct_map_default_noflush(struct page *page) return __set_pages_p(page, 1); } +#ifdef CONFIG_DEBUG_PAGEALLOC void __kernel_map_pages(struct page *page, int numpages, int enable) { if (PageHighMem(page)) @@ -2239,6 +2240,7 @@ bool kernel_page_present(struct page *page) return (pte_val(*pte) & _PAGE_PRESENT); } #endif /* CONFIG_HIBERNATION */ +#endif /* CONFIG_DEBUG_PAGEALLOC */ int __init kernel_map_pages_in_pgd(pgd_t *pgd, u64 pfn, unsigned long address, unsigned numpages, unsigned long page_flags) diff --git a/include/linux/mm.h b/include/linux/mm.h index 6b5df31387b5..7a67bdf3df5e 100644 --- a/include/linux/mm.h +++ b/include/linux/mm.h @@ -2931,7 +2931,11 @@ static inline bool debug_pagealloc_enabled_static(void) return static_branch_unlikely(&_debug_pagealloc_enabled); } -#if defined(CONFIG_DEBUG_PAGEALLOC) || defined(CONFIG_ARCH_HAS_SET_DIRECT_MAP) +#ifdef CONFIG_DEBUG_PAGEALLOC +/* + * To support DEBUG_PAGEALLOC architecture must ensure that + * __kernel_map_pages() never fails + */ extern void __kernel_map_pages(struct page *page, int numpages, int enable); static inline void debug_pagealloc_map_pages(struct page *page, int numpages) @@ -2949,13 +2953,13 @@ static inline void debug_pagealloc_unmap_pages(struct page *page, int numpages) #ifdef CONFIG_HIBERNATION extern bool kernel_page_present(struct page *page); #endif /* CONFIG_HIBERNATION */ -#else /* CONFIG_DEBUG_PAGEALLOC || CONFIG_ARCH_HAS_SET_DIRECT_MAP */ +#else /* CONFIG_DEBUG_PAGEALLOC */ static inline void debug_pagealloc_map_pages(struct page *page, int numpages) {} static inline void debug_pagealloc_unmap_pages(struct page *page, int numpages) {} #ifdef CONFIG_HIBERNATION static inline bool kernel_page_present(struct page *page) { return true; } #endif /* CONFIG_HIBERNATION */ -#endif /* CONFIG_DEBUG_PAGEALLOC || CONFIG_ARCH_HAS_SET_DIRECT_MAP */ +#endif /* CONFIG_DEBUG_PAGEALLOC */ #ifdef __HAVE_ARCH_GATE_AREA extern struct vm_area_struct *get_gate_vma(struct mm_struct *mm); -- cgit v1.2.3-70-g09d2