From e180377f1ae48b3cbc559c9875d9b038f7f000c6 Mon Sep 17 00:00:00 2001 From: "Kirill A. Shutemov" <kirill.shutemov@linux.intel.com> Date: Wed, 12 Dec 2012 13:50:59 -0800 Subject: thp: change split_huge_page_pmd() interface Pass vma instead of mm and add address parameter. In most cases we already have vma on the stack. We provides split_huge_page_pmd_mm() for few cases when we have mm, but not vma. This change is preparation to huge zero pmd splitting implementation. Signed-off-by: Kirill A. Shutemov <kirill.shutemov@linux.intel.com> Cc: Andrea Arcangeli <aarcange@redhat.com> Cc: Andi Kleen <ak@linux.intel.com> Cc: "H. Peter Anvin" <hpa@linux.intel.com> Cc: Mel Gorman <mel@csn.ul.ie> Cc: David Rientjes <rientjes@google.com> Signed-off-by: Andrew Morton <akpm@linux-foundation.org> Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org> --- arch/x86/kernel/vm86_32.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'arch') diff --git a/arch/x86/kernel/vm86_32.c b/arch/x86/kernel/vm86_32.c index 5c9687b1bde6..1dfe69cc78a8 100644 --- a/arch/x86/kernel/vm86_32.c +++ b/arch/x86/kernel/vm86_32.c @@ -182,7 +182,7 @@ static void mark_screen_rdonly(struct mm_struct *mm) if (pud_none_or_clear_bad(pud)) goto out; pmd = pmd_offset(pud, 0xA0000); - split_huge_page_pmd(mm, pmd); + split_huge_page_pmd_mm(mm, 0xA0000, pmd); if (pmd_none_or_clear_bad(pmd)) goto out; pte = pte_offset_map_lock(mm, pmd, 0xA0000, &ptl); -- cgit v1.2.3-70-g09d2 From 4b0ef1fe8a626f0ba7f649764f979d0dc9eab86b Mon Sep 17 00:00:00 2001 From: Lai Jiangshan <laijs@cn.fujitsu.com> Date: Wed, 12 Dec 2012 13:51:46 -0800 Subject: page_alloc: use N_MEMORY instead N_HIGH_MEMORY change the node_states initialization N_HIGH_MEMORY stands for the nodes that has normal or high memory. N_MEMORY stands for the nodes that has any memory. The code here need to handle with the nodes which have memory, we should use N_MEMORY instead. Since we introduced N_MEMORY, we update the initialization of node_states. Signed-off-by: Lai Jiangshan <laijs@cn.fujitsu.com> Signed-off-by: Lin Feng <linfeng@cn.fujitsu.com> Signed-off-by: Wen Congyang <wency@cn.fujitsu.com> Cc: Christoph Lameter <cl@linux.com> Cc: Hillf Danton <dhillf@gmail.com> Cc: David Rientjes <rientjes@google.com> Signed-off-by: Andrew Morton <akpm@linux-foundation.org> Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org> --- arch/x86/mm/init_64.c | 4 +++- mm/page_alloc.c | 40 ++++++++++++++++++++++------------------ 2 files changed, 25 insertions(+), 19 deletions(-) (limited to 'arch') diff --git a/arch/x86/mm/init_64.c b/arch/x86/mm/init_64.c index 3baff255adac..2ead3c8a4c84 100644 --- a/arch/x86/mm/init_64.c +++ b/arch/x86/mm/init_64.c @@ -630,7 +630,9 @@ void __init paging_init(void) * numa support is not compiled in, and later node_set_state * will not set it back. */ - node_clear_state(0, N_NORMAL_MEMORY); + node_clear_state(0, N_MEMORY); + if (N_MEMORY != N_NORMAL_MEMORY) + node_clear_state(0, N_NORMAL_MEMORY); zone_sizes_init(); } diff --git a/mm/page_alloc.c b/mm/page_alloc.c index 4171cd4f8257..35727168896b 100644 --- a/mm/page_alloc.c +++ b/mm/page_alloc.c @@ -1695,7 +1695,7 @@ bool zone_watermark_ok_safe(struct zone *z, int order, unsigned long mark, * * If the zonelist cache is present in the passed in zonelist, then * returns a pointer to the allowed node mask (either the current - * tasks mems_allowed, or node_states[N_HIGH_MEMORY].) + * tasks mems_allowed, or node_states[N_MEMORY].) * * If the zonelist cache is not available for this zonelist, does * nothing and returns NULL. @@ -1724,7 +1724,7 @@ static nodemask_t *zlc_setup(struct zonelist *zonelist, int alloc_flags) allowednodes = !in_interrupt() && (alloc_flags & ALLOC_CPUSET) ? &cpuset_current_mems_allowed : - &node_states[N_HIGH_MEMORY]; + &node_states[N_MEMORY]; return allowednodes; } @@ -3238,7 +3238,7 @@ static int find_next_best_node(int node, nodemask_t *used_node_mask) return node; } - for_each_node_state(n, N_HIGH_MEMORY) { + for_each_node_state(n, N_MEMORY) { /* Don't want a node to appear more than once */ if (node_isset(n, *used_node_mask)) @@ -3380,7 +3380,7 @@ static int default_zonelist_order(void) * local memory, NODE_ORDER may be suitable. */ average_size = total_size / - (nodes_weight(node_states[N_HIGH_MEMORY]) + 1); + (nodes_weight(node_states[N_MEMORY]) + 1); for_each_online_node(nid) { low_kmem_size = 0; total_size = 0; @@ -4731,7 +4731,7 @@ unsigned long __init find_min_pfn_with_active_regions(void) /* * early_calculate_totalpages() * Sum pages in active regions for movable zone. - * Populate N_HIGH_MEMORY for calculating usable_nodes. + * Populate N_MEMORY for calculating usable_nodes. */ static unsigned long __init early_calculate_totalpages(void) { @@ -4744,7 +4744,7 @@ static unsigned long __init early_calculate_totalpages(void) totalpages += pages; if (pages) - node_set_state(nid, N_HIGH_MEMORY); + node_set_state(nid, N_MEMORY); } return totalpages; } @@ -4761,9 +4761,9 @@ static void __init find_zone_movable_pfns_for_nodes(void) unsigned long usable_startpfn; unsigned long kernelcore_node, kernelcore_remaining; /* save the state before borrow the nodemask */ - nodemask_t saved_node_state = node_states[N_HIGH_MEMORY]; + nodemask_t saved_node_state = node_states[N_MEMORY]; unsigned long totalpages = early_calculate_totalpages(); - int usable_nodes = nodes_weight(node_states[N_HIGH_MEMORY]); + int usable_nodes = nodes_weight(node_states[N_MEMORY]); /* * If movablecore was specified, calculate what size of @@ -4798,7 +4798,7 @@ static void __init find_zone_movable_pfns_for_nodes(void) restart: /* Spread kernelcore memory as evenly as possible throughout nodes */ kernelcore_node = required_kernelcore / usable_nodes; - for_each_node_state(nid, N_HIGH_MEMORY) { + for_each_node_state(nid, N_MEMORY) { unsigned long start_pfn, end_pfn; /* @@ -4890,23 +4890,27 @@ restart: out: /* restore the node_state */ - node_states[N_HIGH_MEMORY] = saved_node_state; + node_states[N_MEMORY] = saved_node_state; } -/* Any regular memory on that node ? */ -static void __init check_for_regular_memory(pg_data_t *pgdat) +/* Any regular or high memory on that node ? */ +static void check_for_memory(pg_data_t *pgdat, int nid) { -#ifdef CONFIG_HIGHMEM enum zone_type zone_type; - for (zone_type = 0; zone_type <= ZONE_NORMAL; zone_type++) { + if (N_MEMORY == N_NORMAL_MEMORY) + return; + + for (zone_type = 0; zone_type <= ZONE_MOVABLE - 1; zone_type++) { struct zone *zone = &pgdat->node_zones[zone_type]; if (zone->present_pages) { - node_set_state(zone_to_nid(zone), N_NORMAL_MEMORY); + node_set_state(nid, N_HIGH_MEMORY); + if (N_NORMAL_MEMORY != N_HIGH_MEMORY && + zone_type <= ZONE_NORMAL) + node_set_state(nid, N_NORMAL_MEMORY); break; } } -#endif } /** @@ -4989,8 +4993,8 @@ void __init free_area_init_nodes(unsigned long *max_zone_pfn) /* Any memory on that node */ if (pgdat->node_present_pages) - node_set_state(nid, N_HIGH_MEMORY); - check_for_regular_memory(pgdat); + node_set_state(nid, N_MEMORY); + check_for_memory(pgdat, nid); } } -- cgit v1.2.3-70-g09d2 From c2d23f919bafcbc2259f5257d9a7d729802f0e3a Mon Sep 17 00:00:00 2001 From: David Rientjes <rientjes@google.com> Date: Wed, 12 Dec 2012 13:52:10 -0800 Subject: mm, oom: remove statically defined arch functions of same name out_of_memory() is a globally defined function to call the oom killer. x86, sh, and powerpc all use a function of the same name within file scope in their respective fault.c unnecessarily. Inline the functions into the pagefault handlers to clean the code up. Signed-off-by: David Rientjes <rientjes@google.com> Cc: Ingo Molnar <mingo@redhat.com> Cc: "H. Peter Anvin" <hpa@zytor.com> Cc: Thomas Gleixner <tglx@linutronix.de> Cc: Benjamin Herrenschmidt <benh@kernel.crashing.org> Cc: Paul Mackerras <paulus@samba.org> Cc: Paul Mundt <lethal@linux-sh.org> Reviewed-by: Michal Hocko <mhocko@suse.cz> Reviewed-by: KAMEZAWA Hiroyuki <kamezawa.hiroyu@jp.fujitsu.com> Signed-off-by: Andrew Morton <akpm@linux-foundation.org> Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org> --- arch/powerpc/mm/fault.c | 27 ++++++++++++--------------- arch/sh/mm/fault.c | 19 +++++++------------ arch/x86/mm/fault.c | 23 ++++++++--------------- 3 files changed, 27 insertions(+), 42 deletions(-) (limited to 'arch') diff --git a/arch/powerpc/mm/fault.c b/arch/powerpc/mm/fault.c index 0a6b28336eb0..3a8489a354e9 100644 --- a/arch/powerpc/mm/fault.c +++ b/arch/powerpc/mm/fault.c @@ -113,19 +113,6 @@ static int store_updates_sp(struct pt_regs *regs) #define MM_FAULT_CONTINUE -1 #define MM_FAULT_ERR(sig) (sig) -static int out_of_memory(struct pt_regs *regs) -{ - /* - * We ran out of memory, or some other thing happened to us that made - * us unable to handle the page fault gracefully. - */ - up_read(¤t->mm->mmap_sem); - if (!user_mode(regs)) - return MM_FAULT_ERR(SIGKILL); - pagefault_out_of_memory(); - return MM_FAULT_RETURN; -} - static int do_sigbus(struct pt_regs *regs, unsigned long address) { siginfo_t info; @@ -169,8 +156,18 @@ static int mm_fault_error(struct pt_regs *regs, unsigned long addr, int fault) return MM_FAULT_CONTINUE; /* Out of memory */ - if (fault & VM_FAULT_OOM) - return out_of_memory(regs); + if (fault & VM_FAULT_OOM) { + up_read(¤t->mm->mmap_sem); + + /* + * We ran out of memory, or some other thing happened to us that + * made us unable to handle the page fault gracefully. + */ + if (!user_mode(regs)) + return MM_FAULT_ERR(SIGKILL); + pagefault_out_of_memory(); + return MM_FAULT_RETURN; + } /* Bus error. x86 handles HWPOISON here, we'll add this if/when * we support the feature in HW diff --git a/arch/sh/mm/fault.c b/arch/sh/mm/fault.c index cbbdcad8fcb3..1f49c28affa9 100644 --- a/arch/sh/mm/fault.c +++ b/arch/sh/mm/fault.c @@ -301,17 +301,6 @@ bad_area_access_error(struct pt_regs *regs, unsigned long error_code, __bad_area(regs, error_code, address, SEGV_ACCERR); } -static void out_of_memory(void) -{ - /* - * We ran out of memory, call the OOM killer, and return the userspace - * (which will retry the fault, or kill us if we got oom-killed): - */ - up_read(¤t->mm->mmap_sem); - - pagefault_out_of_memory(); -} - static void do_sigbus(struct pt_regs *regs, unsigned long error_code, unsigned long address) { @@ -353,8 +342,14 @@ mm_fault_error(struct pt_regs *regs, unsigned long error_code, no_context(regs, error_code, address); return 1; } + up_read(¤t->mm->mmap_sem); - out_of_memory(); + /* + * We ran out of memory, call the OOM killer, and return the + * userspace (which will retry the fault, or kill us if we got + * oom-killed): + */ + pagefault_out_of_memory(); } else { if (fault & VM_FAULT_SIGBUS) do_sigbus(regs, error_code, address); diff --git a/arch/x86/mm/fault.c b/arch/x86/mm/fault.c index 7a529cbab7ad..027088f2f7dd 100644 --- a/arch/x86/mm/fault.c +++ b/arch/x86/mm/fault.c @@ -803,20 +803,6 @@ bad_area_access_error(struct pt_regs *regs, unsigned long error_code, __bad_area(regs, error_code, address, SEGV_ACCERR); } -/* TODO: fixup for "mm-invoke-oom-killer-from-page-fault.patch" */ -static void -out_of_memory(struct pt_regs *regs, unsigned long error_code, - unsigned long address) -{ - /* - * We ran out of memory, call the OOM killer, and return the userspace - * (which will retry the fault, or kill us if we got oom-killed): - */ - up_read(¤t->mm->mmap_sem); - - pagefault_out_of_memory(); -} - static void do_sigbus(struct pt_regs *regs, unsigned long error_code, unsigned long address, unsigned int fault) @@ -879,7 +865,14 @@ mm_fault_error(struct pt_regs *regs, unsigned long error_code, return 1; } - out_of_memory(regs, error_code, address); + up_read(¤t->mm->mmap_sem); + + /* + * We ran out of memory, call the OOM killer, and return the + * userspace (which will retry the fault, or kill us if we got + * oom-killed): + */ + pagefault_out_of_memory(); } else { if (fault & (VM_FAULT_SIGBUS|VM_FAULT_HWPOISON| VM_FAULT_HWPOISON_LARGE)) -- cgit v1.2.3-70-g09d2 From 816422ad76474fed8052b6f7b905a054d082e59a Mon Sep 17 00:00:00 2001 From: "Kirill A. Shutemov" <kirill.shutemov@linux.intel.com> Date: Wed, 12 Dec 2012 13:52:36 -0800 Subject: asm-generic, mm: pgtable: consolidate zero page helpers We have two different implementation of is_zero_pfn() and my_zero_pfn() helpers: for architectures with and without zero page coloring. Let's consolidate them in <asm-generic/pgtable.h>. Signed-off-by: Kirill A. Shutemov <kirill.shutemov@linux.intel.com> Signed-off-by: Andrew Morton <akpm@linux-foundation.org> Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org> --- arch/mips/include/asm/pgtable.h | 11 +---------- arch/s390/include/asm/pgtable.h | 11 +---------- include/asm-generic/pgtable.h | 26 ++++++++++++++++++++++++++ include/linux/mm.h | 8 -------- mm/memory.c | 7 ------- 5 files changed, 28 insertions(+), 35 deletions(-) (limited to 'arch') diff --git a/arch/mips/include/asm/pgtable.h b/arch/mips/include/asm/pgtable.h index c02158be836c..14490e9443af 100644 --- a/arch/mips/include/asm/pgtable.h +++ b/arch/mips/include/asm/pgtable.h @@ -76,16 +76,7 @@ extern unsigned long zero_page_mask; #define ZERO_PAGE(vaddr) \ (virt_to_page((void *)(empty_zero_page + (((unsigned long)(vaddr)) & zero_page_mask)))) - -#define is_zero_pfn is_zero_pfn -static inline int is_zero_pfn(unsigned long pfn) -{ - extern unsigned long zero_pfn; - unsigned long offset_from_zero_pfn = pfn - zero_pfn; - return offset_from_zero_pfn <= (zero_page_mask >> PAGE_SHIFT); -} - -#define my_zero_pfn(addr) page_to_pfn(ZERO_PAGE(addr)) +#define __HAVE_COLOR_ZERO_PAGE extern void paging_init(void); diff --git a/arch/s390/include/asm/pgtable.h b/arch/s390/include/asm/pgtable.h index 2d3b7cb26005..c814e6f5b57d 100644 --- a/arch/s390/include/asm/pgtable.h +++ b/arch/s390/include/asm/pgtable.h @@ -55,16 +55,7 @@ extern unsigned long zero_page_mask; #define ZERO_PAGE(vaddr) \ (virt_to_page((void *)(empty_zero_page + \ (((unsigned long)(vaddr)) &zero_page_mask)))) - -#define is_zero_pfn is_zero_pfn -static inline int is_zero_pfn(unsigned long pfn) -{ - extern unsigned long zero_pfn; - unsigned long offset_from_zero_pfn = pfn - zero_pfn; - return offset_from_zero_pfn <= (zero_page_mask >> PAGE_SHIFT); -} - -#define my_zero_pfn(addr) page_to_pfn(ZERO_PAGE(addr)) +#define __HAVE_COLOR_ZERO_PAGE #endif /* !__ASSEMBLY__ */ diff --git a/include/asm-generic/pgtable.h b/include/asm-generic/pgtable.h index b36ce40bd1c6..284e80831d2c 100644 --- a/include/asm-generic/pgtable.h +++ b/include/asm-generic/pgtable.h @@ -449,6 +449,32 @@ extern void untrack_pfn(struct vm_area_struct *vma, unsigned long pfn, unsigned long size); #endif +#ifdef __HAVE_COLOR_ZERO_PAGE +static inline int is_zero_pfn(unsigned long pfn) +{ + extern unsigned long zero_pfn; + unsigned long offset_from_zero_pfn = pfn - zero_pfn; + return offset_from_zero_pfn <= (zero_page_mask >> PAGE_SHIFT); +} + +static inline unsigned long my_zero_pfn(unsigned long addr) +{ + return page_to_pfn(ZERO_PAGE(addr)); +} +#else +static inline int is_zero_pfn(unsigned long pfn) +{ + extern unsigned long zero_pfn; + return pfn == zero_pfn; +} + +static inline unsigned long my_zero_pfn(unsigned long addr) +{ + extern unsigned long zero_pfn; + return zero_pfn; +} +#endif + #ifdef CONFIG_MMU #ifndef CONFIG_TRANSPARENT_HUGEPAGE diff --git a/include/linux/mm.h b/include/linux/mm.h index 4b80bad4a367..4af4f0b1be4c 100644 --- a/include/linux/mm.h +++ b/include/linux/mm.h @@ -516,14 +516,6 @@ static inline pte_t maybe_mkwrite(pte_t pte, struct vm_area_struct *vma) } #endif -#ifndef my_zero_pfn -static inline unsigned long my_zero_pfn(unsigned long addr) -{ - extern unsigned long zero_pfn; - return zero_pfn; -} -#endif - /* * Multiple processes may "see" the same page. E.g. for untouched * mappings of /dev/null, all processes see the same page full of diff --git a/mm/memory.c b/mm/memory.c index f9a4b0cb8623..3f680e7c7645 100644 --- a/mm/memory.c +++ b/mm/memory.c @@ -717,13 +717,6 @@ static inline bool is_cow_mapping(vm_flags_t flags) return (flags & (VM_SHARED | VM_MAYWRITE)) == VM_MAYWRITE; } -#ifndef is_zero_pfn -static inline int is_zero_pfn(unsigned long pfn) -{ - return pfn == zero_pfn; -} -#endif - /* * vm_normal_page -- This function gets the "struct page" associated with a pte. * -- cgit v1.2.3-70-g09d2