diff options
Diffstat (limited to 'mm/huge_memory.c')
-rw-r--r-- | mm/huge_memory.c | 41 |
1 files changed, 26 insertions, 15 deletions
diff --git a/mm/huge_memory.c b/mm/huge_memory.c index 406a3c28c026..88c83c84325c 100644 --- a/mm/huge_memory.c +++ b/mm/huge_memory.c @@ -34,6 +34,7 @@ #include <linux/oom.h> #include <linux/numa.h> #include <linux/page_owner.h> +#include <linux/sched/sysctl.h> #include <asm/tlb.h> #include <asm/pgalloc.h> @@ -1766,17 +1767,28 @@ int change_huge_pmd(struct vm_area_struct *vma, pmd_t *pmd, } #endif - /* - * Avoid trapping faults against the zero page. The read-only - * data is likely to be read-cached on the local CPU and - * local/remote hits to the zero page are not interesting. - */ - if (prot_numa && is_huge_zero_pmd(*pmd)) - goto unlock; + if (prot_numa) { + struct page *page; + /* + * Avoid trapping faults against the zero page. The read-only + * data is likely to be read-cached on the local CPU and + * local/remote hits to the zero page are not interesting. + */ + if (is_huge_zero_pmd(*pmd)) + goto unlock; - if (prot_numa && pmd_protnone(*pmd)) - goto unlock; + if (pmd_protnone(*pmd)) + goto unlock; + page = pmd_page(*pmd); + /* + * Skip scanning top tier node if normal numa + * balancing is disabled + */ + if (!(sysctl_numa_balancing_mode & NUMA_BALANCING_NORMAL) && + node_is_toptier(page_to_nid(page))) + goto unlock; + } /* * In case prot_numa, we are under mmap_read_lock(mm). It's critical * to not clear pmd intermittently to avoid race with MADV_DONTNEED @@ -2055,9 +2067,9 @@ static void __split_huge_pmd_locked(struct vm_area_struct *vma, pmd_t *pmd, young = pmd_young(old_pmd); soft_dirty = pmd_soft_dirty(old_pmd); uffd_wp = pmd_uffd_wp(old_pmd); + VM_BUG_ON_PAGE(!page_count(page), page); + page_ref_add(page, HPAGE_PMD_NR - 1); } - VM_BUG_ON_PAGE(!page_count(page), page); - page_ref_add(page, HPAGE_PMD_NR - 1); /* * Withdraw the table only after we mark the pmd entry invalid. @@ -2953,7 +2965,6 @@ static int split_huge_pages_pid(int pid, unsigned long vaddr_start, */ for (addr = vaddr_start; addr < vaddr_end; addr += PAGE_SIZE) { struct vm_area_struct *vma = find_vma(mm, addr); - unsigned int follflags; struct page *page; if (!vma || addr < vma->vm_start) @@ -2966,8 +2977,7 @@ static int split_huge_pages_pid(int pid, unsigned long vaddr_start, } /* FOLL_DUMP to ignore special (like zero) pages */ - follflags = FOLL_GET | FOLL_DUMP; - page = follow_page(vma, addr, follflags); + page = follow_page(vma, addr, FOLL_GET | FOLL_DUMP); if (IS_ERR(page)) continue; @@ -3197,7 +3207,6 @@ void remove_migration_pmd(struct page_vma_mapped_walk *pvmw, struct page *new) if (pmd_swp_uffd_wp(*pvmw->pmd)) pmde = pmd_wrprotect(pmd_mkuffd_wp(pmde)); - flush_cache_range(vma, mmun_start, mmun_start + HPAGE_PMD_SIZE); if (PageAnon(new)) page_add_anon_rmap(new, vma, mmun_start, true); else @@ -3205,6 +3214,8 @@ void remove_migration_pmd(struct page_vma_mapped_walk *pvmw, struct page *new) set_pmd_at(mm, mmun_start, pvmw->pmd, pmde); if ((vma->vm_flags & VM_LOCKED) && !PageDoubleMap(new)) mlock_vma_page(new); + + /* No need to invalidate - it was non-present before */ update_mmu_cache_pmd(vma, address, pvmw->pmd); } #endif |