summaryrefslogtreecommitdiff
path: root/mm
diff options
context:
space:
mode:
Diffstat (limited to 'mm')
-rw-r--r--mm/damon/tests/sysfs-kunit.h1
-rw-r--r--mm/huge_memory.c15
-rw-r--r--mm/kasan/init.c8
-rw-r--r--mm/khugepaged.c6
-rw-r--r--mm/memory.c17
-rw-r--r--mm/mmap.c32
-rw-r--r--mm/mremap.c11
-rw-r--r--mm/secretmem.c4
-rw-r--r--mm/shmem.c7
-rw-r--r--mm/sparse-vmemmap.c5
-rw-r--r--mm/swapfile.c9
-rw-r--r--mm/vmscan.c4
-rw-r--r--mm/zswap.c1
13 files changed, 69 insertions, 51 deletions
diff --git a/mm/damon/tests/sysfs-kunit.h b/mm/damon/tests/sysfs-kunit.h
index 1c9b596057a7..7b5c7b307da9 100644
--- a/mm/damon/tests/sysfs-kunit.h
+++ b/mm/damon/tests/sysfs-kunit.h
@@ -67,6 +67,7 @@ static void damon_sysfs_test_add_targets(struct kunit *test)
damon_destroy_ctx(ctx);
kfree(sysfs_targets->targets_arr);
kfree(sysfs_targets);
+ kfree(sysfs_target->regions);
kfree(sysfs_target);
}
diff --git a/mm/huge_memory.c b/mm/huge_memory.c
index 3ca89e0279a7..2fb328880b50 100644
--- a/mm/huge_memory.c
+++ b/mm/huge_memory.c
@@ -109,18 +109,7 @@ unsigned long __thp_vma_allowable_orders(struct vm_area_struct *vma,
if (!vma->vm_mm) /* vdso */
return 0;
- /*
- * Explicitly disabled through madvise or prctl, or some
- * architectures may disable THP for some mappings, for
- * example, s390 kvm.
- * */
- if ((vm_flags & VM_NOHUGEPAGE) ||
- test_bit(MMF_DISABLE_THP, &vma->vm_mm->flags))
- return 0;
- /*
- * If the hardware/firmware marked hugepage support disabled.
- */
- if (transparent_hugepage_flags & (1 << TRANSPARENT_HUGEPAGE_UNSUPPORTED))
+ if (thp_disabled_by_hw() || vma_thp_disabled(vma, vm_flags))
return 0;
/* khugepaged doesn't collapse DAX vma, but page fault is fine. */
@@ -1586,7 +1575,7 @@ int copy_huge_pmd(struct mm_struct *dst_mm, struct mm_struct *src_mm,
int ret = -ENOMEM;
pmd = pmdp_get_lockless(src_pmd);
- if (unlikely(pmd_special(pmd))) {
+ if (unlikely(pmd_present(pmd) && pmd_special(pmd))) {
dst_ptl = pmd_lock(dst_mm, dst_pmd);
src_ptl = pmd_lockptr(src_mm, src_pmd);
spin_lock_nested(src_ptl, SINGLE_DEPTH_NESTING);
diff --git a/mm/kasan/init.c b/mm/kasan/init.c
index 89895f38f722..ac607c306292 100644
--- a/mm/kasan/init.c
+++ b/mm/kasan/init.c
@@ -106,6 +106,10 @@ static void __ref zero_pte_populate(pmd_t *pmd, unsigned long addr,
}
}
+void __weak __meminit kernel_pte_init(void *addr)
+{
+}
+
static int __ref zero_pmd_populate(pud_t *pud, unsigned long addr,
unsigned long end)
{
@@ -126,8 +130,10 @@ static int __ref zero_pmd_populate(pud_t *pud, unsigned long addr,
if (slab_is_available())
p = pte_alloc_one_kernel(&init_mm);
- else
+ else {
p = early_alloc(PAGE_SIZE, NUMA_NO_NODE);
+ kernel_pte_init(p);
+ }
if (!p)
return -ENOMEM;
diff --git a/mm/khugepaged.c b/mm/khugepaged.c
index f9c39898eaff..b538c3d48386 100644
--- a/mm/khugepaged.c
+++ b/mm/khugepaged.c
@@ -2227,7 +2227,7 @@ rollback:
folio_put(new_folio);
out:
VM_BUG_ON(!list_empty(&pagelist));
- trace_mm_khugepaged_collapse_file(mm, new_folio, index, is_shmem, addr, file, HPAGE_PMD_NR, result);
+ trace_mm_khugepaged_collapse_file(mm, new_folio, index, addr, is_shmem, file, HPAGE_PMD_NR, result);
return result;
}
@@ -2252,7 +2252,7 @@ static int hpage_collapse_scan_file(struct mm_struct *mm, unsigned long addr,
continue;
if (xa_is_value(folio)) {
- ++swap;
+ swap += 1 << xas_get_order(&xas);
if (cc->is_khugepaged &&
swap > khugepaged_max_ptes_swap) {
result = SCAN_EXCEED_SWAP_PTE;
@@ -2299,7 +2299,7 @@ static int hpage_collapse_scan_file(struct mm_struct *mm, unsigned long addr,
* is just too costly...
*/
- present++;
+ present += folio_nr_pages(folio);
if (need_resched()) {
xas_pause(&xas);
diff --git a/mm/memory.c b/mm/memory.c
index 2366578015ad..3ccee51adfbb 100644
--- a/mm/memory.c
+++ b/mm/memory.c
@@ -4181,11 +4181,6 @@ fallback:
return __alloc_swap_folio(vmf);
}
#else /* !CONFIG_TRANSPARENT_HUGEPAGE */
-static inline bool can_swapin_thp(struct vm_fault *vmf, pte_t *ptep, int nr_pages)
-{
- return false;
-}
-
static struct folio *alloc_swap_folio(struct vm_fault *vmf)
{
return __alloc_swap_folio(vmf);
@@ -4925,6 +4920,15 @@ vm_fault_t do_set_pmd(struct vm_fault *vmf, struct page *page)
pmd_t entry;
vm_fault_t ret = VM_FAULT_FALLBACK;
+ /*
+ * It is too late to allocate a small folio, we already have a large
+ * folio in the pagecache: especially s390 KVM cannot tolerate any
+ * PMD mappings, but PTE-mapped THP are fine. So let's simply refuse any
+ * PMD mappings if THPs are disabled.
+ */
+ if (thp_disabled_by_hw() || vma_thp_disabled(vma, vma->vm_flags))
+ return ret;
+
if (!thp_vma_suitable_order(vma, haddr, PMD_ORDER))
return ret;
@@ -6346,7 +6350,8 @@ static inline void pfnmap_args_setup(struct follow_pfnmap_args *args,
static inline void pfnmap_lockdep_assert(struct vm_area_struct *vma)
{
#ifdef CONFIG_LOCKDEP
- struct address_space *mapping = vma->vm_file->f_mapping;
+ struct file *file = vma->vm_file;
+ struct address_space *mapping = file ? file->f_mapping : NULL;
if (mapping)
lockdep_assert(lockdep_is_held(&vma->vm_file->f_mapping->i_mmap_rwsem) ||
diff --git a/mm/mmap.c b/mm/mmap.c
index dd4b35a25aeb..9c0fb43064b5 100644
--- a/mm/mmap.c
+++ b/mm/mmap.c
@@ -1371,7 +1371,7 @@ unsigned long mmap_region(struct file *file, unsigned long addr,
struct maple_tree mt_detach;
unsigned long end = addr + len;
bool writable_file_mapping = false;
- int error = -ENOMEM;
+ int error;
VMA_ITERATOR(vmi, mm, addr);
VMG_STATE(vmg, mm, &vmi, addr, end, vm_flags, pgoff);
@@ -1396,8 +1396,10 @@ unsigned long mmap_region(struct file *file, unsigned long addr,
}
/* Check against address space limit. */
- if (!may_expand_vm(mm, vm_flags, pglen - vms.nr_pages))
+ if (!may_expand_vm(mm, vm_flags, pglen - vms.nr_pages)) {
+ error = -ENOMEM;
goto abort_munmap;
+ }
/*
* Private writable mapping: check memory availability
@@ -1405,8 +1407,11 @@ unsigned long mmap_region(struct file *file, unsigned long addr,
if (accountable_mapping(file, vm_flags)) {
charged = pglen;
charged -= vms.nr_accounted;
- if (charged && security_vm_enough_memory_mm(mm, charged))
- goto abort_munmap;
+ if (charged) {
+ error = security_vm_enough_memory_mm(mm, charged);
+ if (error)
+ goto abort_munmap;
+ }
vms.nr_accounted = 0;
vm_flags |= VM_ACCOUNT;
@@ -1422,8 +1427,10 @@ unsigned long mmap_region(struct file *file, unsigned long addr,
* not unmapped, but the maps are removed from the list.
*/
vma = vm_area_alloc(mm);
- if (!vma)
+ if (!vma) {
+ error = -ENOMEM;
goto unacct_error;
+ }
vma_iter_config(&vmi, addr, end);
vma_set_range(vma, addr, end, pgoff);
@@ -1453,9 +1460,10 @@ unsigned long mmap_region(struct file *file, unsigned long addr,
* Expansion is handled above, merging is handled below.
* Drivers should not alter the address of the VMA.
*/
- error = -EINVAL;
- if (WARN_ON((addr != vma->vm_start)))
+ if (WARN_ON((addr != vma->vm_start))) {
+ error = -EINVAL;
goto close_and_free_vma;
+ }
vma_iter_config(&vmi, addr, end);
/*
@@ -1500,13 +1508,15 @@ unsigned long mmap_region(struct file *file, unsigned long addr,
}
/* Allow architectures to sanity-check the vm_flags */
- error = -EINVAL;
- if (!arch_validate_flags(vma->vm_flags))
+ if (!arch_validate_flags(vma->vm_flags)) {
+ error = -EINVAL;
goto close_and_free_vma;
+ }
- error = -ENOMEM;
- if (vma_iter_prealloc(&vmi, vma))
+ if (vma_iter_prealloc(&vmi, vma)) {
+ error = -ENOMEM;
goto close_and_free_vma;
+ }
/* Lock the VMA since it is modified after insertion into VMA tree */
vma_start_write(vma);
diff --git a/mm/mremap.c b/mm/mremap.c
index 24712f8dbb6b..dda09e957a5d 100644
--- a/mm/mremap.c
+++ b/mm/mremap.c
@@ -238,6 +238,7 @@ static bool move_normal_pmd(struct vm_area_struct *vma, unsigned long old_addr,
{
spinlock_t *old_ptl, *new_ptl;
struct mm_struct *mm = vma->vm_mm;
+ bool res = false;
pmd_t pmd;
if (!arch_supports_page_table_move())
@@ -277,19 +278,25 @@ static bool move_normal_pmd(struct vm_area_struct *vma, unsigned long old_addr,
if (new_ptl != old_ptl)
spin_lock_nested(new_ptl, SINGLE_DEPTH_NESTING);
- /* Clear the pmd */
pmd = *old_pmd;
+
+ /* Racing with collapse? */
+ if (unlikely(!pmd_present(pmd) || pmd_leaf(pmd)))
+ goto out_unlock;
+ /* Clear the pmd */
pmd_clear(old_pmd);
+ res = true;
VM_BUG_ON(!pmd_none(*new_pmd));
pmd_populate(mm, new_pmd, pmd_pgtable(pmd));
flush_tlb_range(vma, old_addr, old_addr + PMD_SIZE);
+out_unlock:
if (new_ptl != old_ptl)
spin_unlock(new_ptl);
spin_unlock(old_ptl);
- return true;
+ return res;
}
#else
static inline bool move_normal_pmd(struct vm_area_struct *vma,
diff --git a/mm/secretmem.c b/mm/secretmem.c
index 3afb5ad701e1..399552814fd0 100644
--- a/mm/secretmem.c
+++ b/mm/secretmem.c
@@ -238,7 +238,7 @@ SYSCALL_DEFINE1(memfd_secret, unsigned int, flags)
/* make sure local flags do not confict with global fcntl.h */
BUILD_BUG_ON(SECRETMEM_FLAGS_MASK & O_CLOEXEC);
- if (!secretmem_enable)
+ if (!secretmem_enable || !can_set_direct_map())
return -ENOSYS;
if (flags & ~(SECRETMEM_FLAGS_MASK | O_CLOEXEC))
@@ -280,7 +280,7 @@ static struct file_system_type secretmem_fs = {
static int __init secretmem_init(void)
{
- if (!secretmem_enable)
+ if (!secretmem_enable || !can_set_direct_map())
return 0;
secretmem_mnt = kern_mount(&secretmem_fs);
diff --git a/mm/shmem.c b/mm/shmem.c
index 4f11b5506363..c5adb987b23c 100644
--- a/mm/shmem.c
+++ b/mm/shmem.c
@@ -1664,12 +1664,7 @@ unsigned long shmem_allowable_huge_orders(struct inode *inode,
loff_t i_size;
int order;
- if (vma && ((vm_flags & VM_NOHUGEPAGE) ||
- test_bit(MMF_DISABLE_THP, &vma->vm_mm->flags)))
- return 0;
-
- /* If the hardware/firmware marked hugepage support disabled. */
- if (transparent_hugepage_flags & (1 << TRANSPARENT_HUGEPAGE_UNSUPPORTED))
+ if (thp_disabled_by_hw() || (vma && vma_thp_disabled(vma, vm_flags)))
return 0;
global_huge = shmem_huge_global_enabled(inode, index, write_end,
diff --git a/mm/sparse-vmemmap.c b/mm/sparse-vmemmap.c
index edcc7a6b0f6f..c0388b2e959d 100644
--- a/mm/sparse-vmemmap.c
+++ b/mm/sparse-vmemmap.c
@@ -184,6 +184,10 @@ static void * __meminit vmemmap_alloc_block_zero(unsigned long size, int node)
return p;
}
+void __weak __meminit kernel_pte_init(void *addr)
+{
+}
+
pmd_t * __meminit vmemmap_pmd_populate(pud_t *pud, unsigned long addr, int node)
{
pmd_t *pmd = pmd_offset(pud, addr);
@@ -191,6 +195,7 @@ pmd_t * __meminit vmemmap_pmd_populate(pud_t *pud, unsigned long addr, int node)
void *p = vmemmap_alloc_block_zero(PAGE_SIZE, node);
if (!p)
return NULL;
+ kernel_pte_init(p);
pmd_populate_kernel(&init_mm, pmd, p);
}
return pmd;
diff --git a/mm/swapfile.c b/mm/swapfile.c
index 0cded32414a1..b0915f3fab31 100644
--- a/mm/swapfile.c
+++ b/mm/swapfile.c
@@ -194,9 +194,6 @@ static int __try_to_reclaim_swap(struct swap_info_struct *si,
if (IS_ERR(folio))
return 0;
- /* offset could point to the middle of a large folio */
- entry = folio->swap;
- offset = swp_offset(entry);
nr_pages = folio_nr_pages(folio);
ret = -nr_pages;
@@ -210,6 +207,10 @@ static int __try_to_reclaim_swap(struct swap_info_struct *si,
if (!folio_trylock(folio))
goto out;
+ /* offset could point to the middle of a large folio */
+ entry = folio->swap;
+ offset = swp_offset(entry);
+
need_reclaim = ((flags & TTRS_ANYWAY) ||
((flags & TTRS_UNMAPPED) && !folio_mapped(folio)) ||
((flags & TTRS_FULL) && mem_cgroup_swap_full(folio)));
@@ -2312,7 +2313,7 @@ static int unuse_mm(struct mm_struct *mm, unsigned int type)
mmap_read_lock(mm);
for_each_vma(vmi, vma) {
- if (vma->anon_vma) {
+ if (vma->anon_vma && !is_vm_hugetlb_page(vma)) {
ret = unuse_vma(vma, type);
if (ret)
break;
diff --git a/mm/vmscan.c b/mm/vmscan.c
index 749cdc110c74..eb4e8440c507 100644
--- a/mm/vmscan.c
+++ b/mm/vmscan.c
@@ -4963,8 +4963,8 @@ static void lru_gen_shrink_node(struct pglist_data *pgdat, struct scan_control *
blk_finish_plug(&plug);
done:
- /* kswapd should never fail */
- pgdat->kswapd_failures = 0;
+ if (sc->nr_reclaimed > reclaimed)
+ pgdat->kswapd_failures = 0;
}
/******************************************************************************
diff --git a/mm/zswap.c b/mm/zswap.c
index 449914ea9919..162013952074 100644
--- a/mm/zswap.c
+++ b/mm/zswap.c
@@ -190,7 +190,6 @@ static struct shrinker *zswap_shrinker;
* section for context.
* pool - the zswap_pool the entry's data is in
* handle - zpool allocation handle that stores the compressed page data
- * value - value of the same-value filled pages which have same content
* objcg - the obj_cgroup that the compressed memory is charged to
* lru - handle to the pool's lru used to evict pages.
*/