summaryrefslogtreecommitdiff
path: root/mm/hugetlb.c
diff options
context:
space:
mode:
Diffstat (limited to 'mm/hugetlb.c')
-rw-r--r--mm/hugetlb.c134
1 files changed, 71 insertions, 63 deletions
diff --git a/mm/hugetlb.c b/mm/hugetlb.c
index 43e1af868cfd..0858a1827207 100644
--- a/mm/hugetlb.c
+++ b/mm/hugetlb.c
@@ -1355,6 +1355,10 @@ static struct folio *dequeue_hugetlb_folio_nodemask(struct hstate *h, gfp_t gfp_
struct zoneref *z;
int node = NUMA_NO_NODE;
+ /* 'nid' should not be NUMA_NO_NODE. Try to catch any misuse of it and rectifiy. */
+ if (nid == NUMA_NO_NODE)
+ nid = numa_node_id();
+
zonelist = node_zonelist(nid, gfp_mask);
retry_cpuset:
@@ -2257,13 +2261,11 @@ static struct folio *only_alloc_fresh_hugetlb_folio(struct hstate *h,
* pages is zero.
*/
static struct folio *alloc_fresh_hugetlb_folio(struct hstate *h,
- gfp_t gfp_mask, int nid, nodemask_t *nmask,
- nodemask_t *node_alloc_noretry)
+ gfp_t gfp_mask, int nid, nodemask_t *nmask)
{
struct folio *folio;
- folio = __alloc_fresh_hugetlb_folio(h, gfp_mask, nid, nmask,
- node_alloc_noretry);
+ folio = __alloc_fresh_hugetlb_folio(h, gfp_mask, nid, nmask, NULL);
if (!folio)
return NULL;
@@ -2481,7 +2483,7 @@ static struct folio *alloc_surplus_hugetlb_folio(struct hstate *h,
goto out_unlock;
spin_unlock_irq(&hugetlb_lock);
- folio = alloc_fresh_hugetlb_folio(h, gfp_mask, nid, nmask, NULL);
+ folio = alloc_fresh_hugetlb_folio(h, gfp_mask, nid, nmask);
if (!folio)
return NULL;
@@ -2517,7 +2519,7 @@ static struct folio *alloc_migrate_hugetlb_folio(struct hstate *h, gfp_t gfp_mas
if (hstate_is_gigantic(h))
return NULL;
- folio = alloc_fresh_hugetlb_folio(h, gfp_mask, nid, nmask, NULL);
+ folio = alloc_fresh_hugetlb_folio(h, gfp_mask, nid, nmask);
if (!folio)
return NULL;
@@ -2586,6 +2588,23 @@ struct folio *alloc_hugetlb_folio_nodemask(struct hstate *h, int preferred_nid,
return alloc_migrate_hugetlb_folio(h, gfp_mask, preferred_nid, nmask);
}
+static nodemask_t *policy_mbind_nodemask(gfp_t gfp)
+{
+#ifdef CONFIG_NUMA
+ struct mempolicy *mpol = get_task_policy(current);
+
+ /*
+ * Only enforce MPOL_BIND policy which overlaps with cpuset policy
+ * (from policy_nodemask) specifically for hugetlb case
+ */
+ if (mpol->mode == MPOL_BIND &&
+ (apply_policy_zone(mpol, gfp_zone(gfp)) &&
+ cpuset_nodemask_valid_mems_allowed(&mpol->nodes)))
+ return &mpol->nodes;
+#endif
+ return NULL;
+}
+
/*
* Increase the hugetlb pool such that it can accommodate a reservation
* of size 'delta'.
@@ -2599,6 +2618,8 @@ static int gather_surplus_pages(struct hstate *h, long delta)
long i;
long needed, allocated;
bool alloc_ok = true;
+ int node;
+ nodemask_t *mbind_nodemask = policy_mbind_nodemask(htlb_alloc_mask(h));
lockdep_assert_held(&hugetlb_lock);
needed = (h->resv_huge_pages + delta) - h->free_huge_pages;
@@ -2613,8 +2634,15 @@ static int gather_surplus_pages(struct hstate *h, long delta)
retry:
spin_unlock_irq(&hugetlb_lock);
for (i = 0; i < needed; i++) {
- folio = alloc_surplus_hugetlb_folio(h, htlb_alloc_mask(h),
- NUMA_NO_NODE, NULL);
+ folio = NULL;
+ for_each_node_mask(node, cpuset_current_mems_allowed) {
+ if (!mbind_nodemask || node_isset(node, *mbind_nodemask)) {
+ folio = alloc_surplus_hugetlb_folio(h, htlb_alloc_mask(h),
+ node, NULL);
+ if (folio)
+ break;
+ }
+ }
if (!folio) {
alloc_ok = false;
break;
@@ -3439,7 +3467,7 @@ static void __init hugetlb_hstate_alloc_pages_onenode(struct hstate *h, int nid)
gfp_t gfp_mask = htlb_alloc_mask(h) | __GFP_THISNODE;
folio = alloc_fresh_hugetlb_folio(h, gfp_mask, nid,
- &node_states[N_MEMORY], NULL);
+ &node_states[N_MEMORY]);
if (!folio)
break;
free_huge_folio(folio); /* free it into the hugepage allocator */
@@ -4617,7 +4645,7 @@ void __init hugetlb_add_hstate(unsigned int order)
BUG_ON(hugetlb_max_hstate >= HUGE_MAX_HSTATE);
BUG_ON(order < order_base_2(__NR_USED_SUBPAGE));
h = &hstates[hugetlb_max_hstate++];
- mutex_init(&h->resize_lock);
+ __mutex_init(&h->resize_lock, "resize mutex", &h->resize_key);
h->order = order;
h->mask = ~(huge_page_size(h) - 1);
for (i = 0; i < MAX_NUMNODES; ++i)
@@ -4840,23 +4868,6 @@ static int __init default_hugepagesz_setup(char *s)
}
__setup("default_hugepagesz=", default_hugepagesz_setup);
-static nodemask_t *policy_mbind_nodemask(gfp_t gfp)
-{
-#ifdef CONFIG_NUMA
- struct mempolicy *mpol = get_task_policy(current);
-
- /*
- * Only enforce MPOL_BIND policy which overlaps with cpuset policy
- * (from policy_nodemask) specifically for hugetlb case
- */
- if (mpol->mode == MPOL_BIND &&
- (apply_policy_zone(mpol, gfp_zone(gfp)) &&
- cpuset_nodemask_valid_mems_allowed(&mpol->nodes)))
- return &mpol->nodes;
-#endif
- return NULL;
-}
-
static unsigned int allowed_mems_nr(struct hstate *h)
{
int node;
@@ -4875,7 +4886,7 @@ static unsigned int allowed_mems_nr(struct hstate *h)
}
#ifdef CONFIG_SYSCTL
-static int proc_hugetlb_doulongvec_minmax(struct ctl_table *table, int write,
+static int proc_hugetlb_doulongvec_minmax(const struct ctl_table *table, int write,
void *buffer, size_t *length,
loff_t *ppos, unsigned long *out)
{
@@ -4892,7 +4903,7 @@ static int proc_hugetlb_doulongvec_minmax(struct ctl_table *table, int write,
}
static int hugetlb_sysctl_handler_common(bool obey_mempolicy,
- struct ctl_table *table, int write,
+ const struct ctl_table *table, int write,
void *buffer, size_t *length, loff_t *ppos)
{
struct hstate *h = &default_hstate;
@@ -5279,7 +5290,7 @@ static void set_huge_ptep_writable(struct vm_area_struct *vma,
{
pte_t entry;
- entry = huge_pte_mkwrite(huge_pte_mkdirty(huge_ptep_get(ptep)));
+ entry = huge_pte_mkwrite(huge_pte_mkdirty(huge_ptep_get(vma->vm_mm, address, ptep)));
if (huge_ptep_set_access_flags(vma, address, ptep, entry, 1))
update_mmu_cache(vma, address, ptep);
}
@@ -5387,7 +5398,7 @@ int copy_hugetlb_page_range(struct mm_struct *dst, struct mm_struct *src,
dst_ptl = huge_pte_lock(h, dst, dst_pte);
src_ptl = huge_pte_lockptr(h, src, src_pte);
spin_lock_nested(src_ptl, SINGLE_DEPTH_NESTING);
- entry = huge_ptep_get(src_pte);
+ entry = huge_ptep_get(src_vma->vm_mm, addr, src_pte);
again:
if (huge_pte_none(entry)) {
/*
@@ -5425,7 +5436,7 @@ again:
set_huge_pte_at(dst, addr, dst_pte,
make_pte_marker(marker), sz);
} else {
- entry = huge_ptep_get(src_pte);
+ entry = huge_ptep_get(src_vma->vm_mm, addr, src_pte);
pte_folio = page_folio(pte_page(entry));
folio_get(pte_folio);
@@ -5454,9 +5465,8 @@ again:
ret = PTR_ERR(new_folio);
break;
}
- ret = copy_user_large_folio(new_folio,
- pte_folio,
- addr, dst_vma);
+ ret = copy_user_large_folio(new_folio, pte_folio,
+ ALIGN_DOWN(addr, sz), dst_vma);
folio_put(pte_folio);
if (ret) {
folio_put(new_folio);
@@ -5467,7 +5477,7 @@ again:
dst_ptl = huge_pte_lock(h, dst, dst_pte);
src_ptl = huge_pte_lockptr(h, src, src_pte);
spin_lock_nested(src_ptl, SINGLE_DEPTH_NESTING);
- entry = huge_ptep_get(src_pte);
+ entry = huge_ptep_get(src_vma->vm_mm, addr, src_pte);
if (!pte_same(src_pte_old, entry)) {
restore_reserve_on_error(h, dst_vma, addr,
new_folio);
@@ -5577,7 +5587,7 @@ int move_hugetlb_page_tables(struct vm_area_struct *vma,
new_addr |= last_addr_mask;
continue;
}
- if (huge_pte_none(huge_ptep_get(src_pte)))
+ if (huge_pte_none(huge_ptep_get(mm, old_addr, src_pte)))
continue;
if (huge_pmd_unshare(mm, vma, old_addr, src_pte)) {
@@ -5650,7 +5660,7 @@ void __unmap_hugepage_range(struct mmu_gather *tlb, struct vm_area_struct *vma,
continue;
}
- pte = huge_ptep_get(ptep);
+ pte = huge_ptep_get(mm, address, ptep);
if (huge_pte_none(pte)) {
spin_unlock(ptl);
continue;
@@ -5899,7 +5909,7 @@ static vm_fault_t hugetlb_wp(struct folio *pagecache_folio,
struct vm_area_struct *vma = vmf->vma;
struct mm_struct *mm = vma->vm_mm;
const bool unshare = vmf->flags & FAULT_FLAG_UNSHARE;
- pte_t pte = huge_ptep_get(vmf->pte);
+ pte_t pte = huge_ptep_get(mm, vmf->address, vmf->pte);
struct hstate *h = hstate_vma(vma);
struct folio *old_folio;
struct folio *new_folio;
@@ -6020,7 +6030,7 @@ retry_avoidcopy:
vmf->pte = hugetlb_walk(vma, vmf->address,
huge_page_size(h));
if (likely(vmf->pte &&
- pte_same(huge_ptep_get(vmf->pte), pte)))
+ pte_same(huge_ptep_get(mm, vmf->address, vmf->pte), pte)))
goto retry_avoidcopy;
/*
* race occurs while re-acquiring page table
@@ -6058,7 +6068,7 @@ retry_avoidcopy:
*/
spin_lock(vmf->ptl);
vmf->pte = hugetlb_walk(vma, vmf->address, huge_page_size(h));
- if (likely(vmf->pte && pte_same(huge_ptep_get(vmf->pte), pte))) {
+ if (likely(vmf->pte && pte_same(huge_ptep_get(mm, vmf->address, vmf->pte), pte))) {
pte_t newpte = make_huge_pte(vma, &new_folio->page, !unshare);
/* Break COW or unshare */
@@ -6159,14 +6169,14 @@ static inline vm_fault_t hugetlb_handle_userfault(struct vm_fault *vmf,
* Recheck pte with pgtable lock. Returns true if pte didn't change, or
* false if pte changed or is changing.
*/
-static bool hugetlb_pte_stable(struct hstate *h, struct mm_struct *mm,
+static bool hugetlb_pte_stable(struct hstate *h, struct mm_struct *mm, unsigned long addr,
pte_t *ptep, pte_t old_pte)
{
spinlock_t *ptl;
bool same;
ptl = huge_pte_lock(h, mm, ptep);
- same = pte_same(huge_ptep_get(ptep), old_pte);
+ same = pte_same(huge_ptep_get(mm, addr, ptep), old_pte);
spin_unlock(ptl);
return same;
@@ -6227,7 +6237,7 @@ static vm_fault_t hugetlb_no_page(struct address_space *mapping,
* never happen on the page after UFFDIO_COPY has
* correctly installed the page and returned.
*/
- if (!hugetlb_pte_stable(h, mm, vmf->pte, vmf->orig_pte)) {
+ if (!hugetlb_pte_stable(h, mm, vmf->address, vmf->pte, vmf->orig_pte)) {
ret = 0;
goto out;
}
@@ -6256,14 +6266,13 @@ static vm_fault_t hugetlb_no_page(struct address_space *mapping,
* here. Before returning error, get ptl and make
* sure there really is no pte entry.
*/
- if (hugetlb_pte_stable(h, mm, vmf->pte, vmf->orig_pte))
+ if (hugetlb_pte_stable(h, mm, vmf->address, vmf->pte, vmf->orig_pte))
ret = vmf_error(PTR_ERR(folio));
else
ret = 0;
goto out;
}
- clear_huge_page(&folio->page, vmf->real_address,
- pages_per_huge_page(h));
+ folio_zero_user(folio, vmf->real_address);
__folio_mark_uptodate(folio);
new_folio = true;
@@ -6306,7 +6315,7 @@ static vm_fault_t hugetlb_no_page(struct address_space *mapping,
folio_unlock(folio);
folio_put(folio);
/* See comment in userfaultfd_missing() block above */
- if (!hugetlb_pte_stable(h, mm, vmf->pte, vmf->orig_pte)) {
+ if (!hugetlb_pte_stable(h, mm, vmf->address, vmf->pte, vmf->orig_pte)) {
ret = 0;
goto out;
}
@@ -6333,7 +6342,7 @@ static vm_fault_t hugetlb_no_page(struct address_space *mapping,
vmf->ptl = huge_pte_lock(h, mm, vmf->pte);
ret = 0;
/* If pte changed from under us, retry */
- if (!pte_same(huge_ptep_get(vmf->pte), vmf->orig_pte))
+ if (!pte_same(huge_ptep_get(mm, vmf->address, vmf->pte), vmf->orig_pte))
goto backout;
if (anon_rmap)
@@ -6454,7 +6463,7 @@ vm_fault_t hugetlb_fault(struct mm_struct *mm, struct vm_area_struct *vma,
return VM_FAULT_OOM;
}
- vmf.orig_pte = huge_ptep_get(vmf.pte);
+ vmf.orig_pte = huge_ptep_get(mm, vmf.address, vmf.pte);
if (huge_pte_none_mostly(vmf.orig_pte)) {
if (is_pte_marker(vmf.orig_pte)) {
pte_marker marker =
@@ -6495,7 +6504,7 @@ vm_fault_t hugetlb_fault(struct mm_struct *mm, struct vm_area_struct *vma,
* be released there.
*/
mutex_unlock(&hugetlb_fault_mutex_table[hash]);
- migration_entry_wait_huge(vma, vmf.pte);
+ migration_entry_wait_huge(vma, vmf.address, vmf.pte);
return 0;
} else if (unlikely(is_hugetlb_entry_hwpoisoned(vmf.orig_pte)))
ret = VM_FAULT_HWPOISON_LARGE |
@@ -6528,11 +6537,11 @@ vm_fault_t hugetlb_fault(struct mm_struct *mm, struct vm_area_struct *vma,
vmf.ptl = huge_pte_lock(h, mm, vmf.pte);
/* Check for a racing update before calling hugetlb_wp() */
- if (unlikely(!pte_same(vmf.orig_pte, huge_ptep_get(vmf.pte))))
+ if (unlikely(!pte_same(vmf.orig_pte, huge_ptep_get(mm, vmf.address, vmf.pte))))
goto out_ptl;
/* Handle userfault-wp first, before trying to lock more pages */
- if (userfaultfd_wp(vma) && huge_pte_uffd_wp(huge_ptep_get(vmf.pte)) &&
+ if (userfaultfd_wp(vma) && huge_pte_uffd_wp(huge_ptep_get(mm, vmf.address, vmf.pte)) &&
(flags & FAULT_FLAG_WRITE) && !huge_pte_write(vmf.orig_pte)) {
if (!userfaultfd_wp_async(vma)) {
spin_unlock(vmf.ptl);
@@ -6647,7 +6656,7 @@ int hugetlb_mfill_atomic_pte(pte_t *dst_pte,
struct hstate *h = hstate_vma(dst_vma);
struct address_space *mapping = dst_vma->vm_file->f_mapping;
pgoff_t idx = vma_hugecache_offset(h, dst_vma, dst_addr);
- unsigned long size;
+ unsigned long size = huge_page_size(h);
int vm_shared = dst_vma->vm_flags & VM_SHARED;
pte_t _dst_pte;
spinlock_t *ptl;
@@ -6660,14 +6669,13 @@ int hugetlb_mfill_atomic_pte(pte_t *dst_pte,
ptl = huge_pte_lock(h, dst_mm, dst_pte);
/* Don't overwrite any existing PTEs (even markers) */
- if (!huge_pte_none(huge_ptep_get(dst_pte))) {
+ if (!huge_pte_none(huge_ptep_get(dst_mm, dst_addr, dst_pte))) {
spin_unlock(ptl);
return -EEXIST;
}
_dst_pte = make_pte_marker(PTE_MARKER_POISONED);
- set_huge_pte_at(dst_mm, dst_addr, dst_pte, _dst_pte,
- huge_page_size(h));
+ set_huge_pte_at(dst_mm, dst_addr, dst_pte, _dst_pte, size);
/* No need to invalidate - it was non-present before */
update_mmu_cache(dst_vma, dst_addr, dst_pte);
@@ -6741,7 +6749,8 @@ int hugetlb_mfill_atomic_pte(pte_t *dst_pte,
*foliop = NULL;
goto out;
}
- ret = copy_user_large_folio(folio, *foliop, dst_addr, dst_vma);
+ ret = copy_user_large_folio(folio, *foliop,
+ ALIGN_DOWN(dst_addr, size), dst_vma);
folio_put(*foliop);
*foliop = NULL;
if (ret) {
@@ -6768,9 +6777,8 @@ int hugetlb_mfill_atomic_pte(pte_t *dst_pte,
/* Add shared, newly allocated pages to the page cache. */
if (vm_shared && !is_continue) {
- size = i_size_read(mapping->host) >> huge_page_shift(h);
ret = -EFAULT;
- if (idx >= size)
+ if (idx >= (i_size_read(mapping->host) >> huge_page_shift(h)))
goto out_release_nounlock;
/*
@@ -6797,7 +6805,7 @@ int hugetlb_mfill_atomic_pte(pte_t *dst_pte,
* page backing it, then access the page.
*/
ret = -EEXIST;
- if (!huge_pte_none_mostly(huge_ptep_get(dst_pte)))
+ if (!huge_pte_none_mostly(huge_ptep_get(dst_mm, dst_addr, dst_pte)))
goto out_release_unlock;
if (folio_in_pagecache)
@@ -6827,7 +6835,7 @@ int hugetlb_mfill_atomic_pte(pte_t *dst_pte,
if (wp_enabled)
_dst_pte = huge_pte_mkuffd_wp(_dst_pte);
- set_huge_pte_at(dst_mm, dst_addr, dst_pte, _dst_pte, huge_page_size(h));
+ set_huge_pte_at(dst_mm, dst_addr, dst_pte, _dst_pte, size);
hugetlb_count_add(pages_per_huge_page(h), dst_mm);
@@ -6918,7 +6926,7 @@ long hugetlb_change_protection(struct vm_area_struct *vma,
address |= last_addr_mask;
continue;
}
- pte = huge_ptep_get(ptep);
+ pte = huge_ptep_get(mm, address, ptep);
if (unlikely(is_hugetlb_entry_hwpoisoned(pte))) {
/* Nothing to do. */
} else if (unlikely(is_hugetlb_entry_migration(pte))) {