diff options
author | Mark Brown <broonie@kernel.org> | 2021-06-23 16:31:14 +0100 |
---|---|---|
committer | Mark Brown <broonie@kernel.org> | 2021-06-23 16:31:14 +0100 |
commit | 8cc802bd75fbf840635e7d4d48050bbcab4d938d (patch) | |
tree | 4d7983db2b66d416dcf2deb7b4c53f6ceb5b7f0d /mm | |
parent | f5e2d697d3cbd6d20684eddd3e280809c30e37a1 (diff) | |
parent | 8e0eb2fb5c0732a6fa53f2df7079754152857c24 (diff) |
Merge series "ASoC: tlv320aic32x4: Add support for TAS2505" from Claudius Heine <ch@denx.de>:
Hi,
this is v2 from my patchset that add support for the TAS2505 to the tlv320aic32x4 driver.
kind regards,
Claudius
Changes from v1:
- clarified commit message of first patch, which add the type value to the struct
- removed unnecessary code to put and get speaker volume
- removed 'Gain' from 'HP Driver Playback Volume' control
- fixed rebase issues
Claudius Heine (3):
ASoC: tlv320aic32x4: add type to device private data struct
ASoC: tlv320aic32x4: add support for TAS2505
ASoC: tlv320aic32x4: dt-bindings: add TAS2505 to compatible
.../bindings/sound/tlv320aic32x4.txt | 1 +
sound/soc/codecs/tlv320aic32x4-i2c.c | 22 ++-
sound/soc/codecs/tlv320aic32x4-spi.c | 23 ++-
sound/soc/codecs/tlv320aic32x4.c | 139 +++++++++++++++++-
sound/soc/codecs/tlv320aic32x4.h | 10 ++
5 files changed, 186 insertions(+), 9 deletions(-)
base-commit: 70585216fe7730d9fb5453d3e2804e149d0fe201
--
2.32.0
Diffstat (limited to 'mm')
-rw-r--r-- | mm/debug_vm_pgtable.c | 4 | ||||
-rw-r--r-- | mm/gup.c | 4 | ||||
-rw-r--r-- | mm/huge_memory.c | 56 | ||||
-rw-r--r-- | mm/hugetlb.c | 152 | ||||
-rw-r--r-- | mm/internal.h | 73 | ||||
-rw-r--r-- | mm/ioremap.c | 6 | ||||
-rw-r--r-- | mm/kasan/init.c | 4 | ||||
-rw-r--r-- | mm/kfence/core.c | 6 | ||||
-rw-r--r-- | mm/ksm.c | 3 | ||||
-rw-r--r-- | mm/memory-failure.c | 36 | ||||
-rw-r--r-- | mm/memory.c | 45 | ||||
-rw-r--r-- | mm/migrate.c | 1 | ||||
-rw-r--r-- | mm/page_alloc.c | 2 | ||||
-rw-r--r-- | mm/page_vma_mapped.c | 27 | ||||
-rw-r--r-- | mm/pgtable-generic.c | 5 | ||||
-rw-r--r-- | mm/rmap.c | 39 | ||||
-rw-r--r-- | mm/shmem.c | 34 | ||||
-rw-r--r-- | mm/shuffle.h | 4 | ||||
-rw-r--r-- | mm/slab_common.c | 13 | ||||
-rw-r--r-- | mm/slub.c | 47 | ||||
-rw-r--r-- | mm/sparse.c | 13 | ||||
-rw-r--r-- | mm/swapfile.c | 2 | ||||
-rw-r--r-- | mm/truncate.c | 43 | ||||
-rw-r--r-- | mm/userfaultfd.c | 28 |
24 files changed, 423 insertions, 224 deletions
diff --git a/mm/debug_vm_pgtable.c b/mm/debug_vm_pgtable.c index 05efe98a9ac2..297d1b349c19 100644 --- a/mm/debug_vm_pgtable.c +++ b/mm/debug_vm_pgtable.c @@ -192,7 +192,7 @@ static void __init pmd_advanced_tests(struct mm_struct *mm, pr_debug("Validating PMD advanced\n"); /* Align the address wrt HPAGE_PMD_SIZE */ - vaddr = (vaddr & HPAGE_PMD_MASK) + HPAGE_PMD_SIZE; + vaddr &= HPAGE_PMD_MASK; pgtable_trans_huge_deposit(mm, pmdp, pgtable); @@ -330,7 +330,7 @@ static void __init pud_advanced_tests(struct mm_struct *mm, pr_debug("Validating PUD advanced\n"); /* Align the address wrt HPAGE_PUD_SIZE */ - vaddr = (vaddr & HPAGE_PUD_MASK) + HPAGE_PUD_SIZE; + vaddr &= HPAGE_PUD_MASK; set_pud_at(mm, vaddr, pudp, pud); pudp_set_wrprotect(mm, vaddr, pudp); @@ -1593,10 +1593,6 @@ struct page *get_dump_page(unsigned long addr) FOLL_FORCE | FOLL_DUMP | FOLL_GET); if (locked) mmap_read_unlock(mm); - - if (ret == 1 && is_page_poisoned(page)) - return NULL; - return (ret == 1) ? page : NULL; } #endif /* CONFIG_ELF_CORE */ diff --git a/mm/huge_memory.c b/mm/huge_memory.c index 63ed6b25deaa..6d2a0119fc58 100644 --- a/mm/huge_memory.c +++ b/mm/huge_memory.c @@ -62,6 +62,7 @@ static struct shrinker deferred_split_shrinker; static atomic_t huge_zero_refcount; struct page *huge_zero_page __read_mostly; +unsigned long huge_zero_pfn __read_mostly = ~0UL; bool transparent_hugepage_enabled(struct vm_area_struct *vma) { @@ -98,6 +99,7 @@ retry: __free_pages(zero_page, compound_order(zero_page)); goto retry; } + WRITE_ONCE(huge_zero_pfn, page_to_pfn(zero_page)); /* We take additional reference here. It will be put back by shrinker */ atomic_set(&huge_zero_refcount, 2); @@ -147,6 +149,7 @@ static unsigned long shrink_huge_zero_page_scan(struct shrinker *shrink, if (atomic_cmpxchg(&huge_zero_refcount, 1, 0) == 1) { struct page *zero_page = xchg(&huge_zero_page, NULL); BUG_ON(zero_page == NULL); + WRITE_ONCE(huge_zero_pfn, ~0UL); __free_pages(zero_page, compound_order(zero_page)); return HPAGE_PMD_NR; } @@ -2044,7 +2047,7 @@ static void __split_huge_pmd_locked(struct vm_area_struct *vma, pmd_t *pmd, count_vm_event(THP_SPLIT_PMD); if (!vma_is_anonymous(vma)) { - _pmd = pmdp_huge_clear_flush_notify(vma, haddr, pmd); + old_pmd = pmdp_huge_clear_flush_notify(vma, haddr, pmd); /* * We are going to unmap this huge page. So * just go ahead and zap it @@ -2053,16 +2056,25 @@ static void __split_huge_pmd_locked(struct vm_area_struct *vma, pmd_t *pmd, zap_deposited_table(mm, pmd); if (vma_is_special_huge(vma)) return; - page = pmd_page(_pmd); - if (!PageDirty(page) && pmd_dirty(_pmd)) - set_page_dirty(page); - if (!PageReferenced(page) && pmd_young(_pmd)) - SetPageReferenced(page); - page_remove_rmap(page, true); - put_page(page); + if (unlikely(is_pmd_migration_entry(old_pmd))) { + swp_entry_t entry; + + entry = pmd_to_swp_entry(old_pmd); + page = migration_entry_to_page(entry); + } else { + page = pmd_page(old_pmd); + if (!PageDirty(page) && pmd_dirty(old_pmd)) + set_page_dirty(page); + if (!PageReferenced(page) && pmd_young(old_pmd)) + SetPageReferenced(page); + page_remove_rmap(page, true); + put_page(page); + } add_mm_counter(mm, mm_counter_file(page), -HPAGE_PMD_NR); return; - } else if (pmd_trans_huge(*pmd) && is_huge_zero_pmd(*pmd)) { + } + + if (is_huge_zero_pmd(*pmd)) { /* * FIXME: Do we want to invalidate secondary mmu by calling * mmu_notifier_invalidate_range() see comments below inside @@ -2338,17 +2350,17 @@ void vma_adjust_trans_huge(struct vm_area_struct *vma, static void unmap_page(struct page *page) { - enum ttu_flags ttu_flags = TTU_IGNORE_MLOCK | + enum ttu_flags ttu_flags = TTU_IGNORE_MLOCK | TTU_SYNC | TTU_RMAP_LOCKED | TTU_SPLIT_HUGE_PMD; - bool unmap_success; VM_BUG_ON_PAGE(!PageHead(page), page); if (PageAnon(page)) ttu_flags |= TTU_SPLIT_FREEZE; - unmap_success = try_to_unmap(page, ttu_flags); - VM_BUG_ON_PAGE(!unmap_success, page); + try_to_unmap(page, ttu_flags); + + VM_WARN_ON_ONCE_PAGE(page_mapped(page), page); } static void remap_page(struct page *page, unsigned int nr) @@ -2659,7 +2671,7 @@ int split_huge_page_to_list(struct page *page, struct list_head *list) struct deferred_split *ds_queue = get_deferred_split_queue(head); struct anon_vma *anon_vma = NULL; struct address_space *mapping = NULL; - int count, mapcount, extra_pins, ret; + int extra_pins, ret; pgoff_t end; VM_BUG_ON_PAGE(is_huge_zero_page(head), head); @@ -2718,7 +2730,6 @@ int split_huge_page_to_list(struct page *page, struct list_head *list) } unmap_page(head); - VM_BUG_ON_PAGE(compound_mapcount(head), head); /* block interrupt reentry in xa_lock and spinlock */ local_irq_disable(); @@ -2736,9 +2747,7 @@ int split_huge_page_to_list(struct page *page, struct list_head *list) /* Prevent deferred_split_scan() touching ->_refcount */ spin_lock(&ds_queue->split_queue_lock); - count = page_count(head); - mapcount = total_mapcount(head); - if (!mapcount && page_ref_freeze(head, 1 + extra_pins)) { + if (page_ref_freeze(head, 1 + extra_pins)) { if (!list_empty(page_deferred_list(head))) { ds_queue->split_queue_len--; list_del(page_deferred_list(head)); @@ -2758,16 +2767,9 @@ int split_huge_page_to_list(struct page *page, struct list_head *list) __split_huge_page(page, list, end); ret = 0; } else { - if (IS_ENABLED(CONFIG_DEBUG_VM) && mapcount) { - pr_alert("total_mapcount: %u, page_count(): %u\n", - mapcount, count); - if (PageTail(page)) - dump_page(head, NULL); - dump_page(page, "total_mapcount(head) > 0"); - BUG(); - } spin_unlock(&ds_queue->split_queue_lock); -fail: if (mapping) +fail: + if (mapping) xa_unlock(&mapping->i_pages); local_irq_enable(); remap_page(head, thp_nr_pages(head)); diff --git a/mm/hugetlb.c b/mm/hugetlb.c index 3db405dea3dc..e0a5f9cbbece 100644 --- a/mm/hugetlb.c +++ b/mm/hugetlb.c @@ -1793,7 +1793,7 @@ retry: SetPageHWPoison(page); ClearPageHWPoison(head); } - remove_hugetlb_page(h, page, false); + remove_hugetlb_page(h, head, false); h->max_huge_pages--; spin_unlock_irq(&hugetlb_lock); update_and_free_page(h, head); @@ -2121,12 +2121,18 @@ out: * be restored when a newly allocated huge page must be freed. It is * to be called after calling vma_needs_reservation to determine if a * reservation exists. + * + * vma_del_reservation is used in error paths where an entry in the reserve + * map was created during huge page allocation and must be removed. It is to + * be called after calling vma_needs_reservation to determine if a reservation + * exists. */ enum vma_resv_mode { VMA_NEEDS_RESV, VMA_COMMIT_RESV, VMA_END_RESV, VMA_ADD_RESV, + VMA_DEL_RESV, }; static long __vma_reservation_common(struct hstate *h, struct vm_area_struct *vma, unsigned long addr, @@ -2170,11 +2176,21 @@ static long __vma_reservation_common(struct hstate *h, ret = region_del(resv, idx, idx + 1); } break; + case VMA_DEL_RESV: + if (vma->vm_flags & VM_MAYSHARE) { + region_abort(resv, idx, idx + 1, 1); + ret = region_del(resv, idx, idx + 1); + } else { + ret = region_add(resv, idx, idx + 1, 1, NULL, NULL); + /* region_add calls of range 1 should never fail. */ + VM_BUG_ON(ret < 0); + } + break; default: BUG(); } - if (vma->vm_flags & VM_MAYSHARE) + if (vma->vm_flags & VM_MAYSHARE || mode == VMA_DEL_RESV) return ret; /* * We know private mapping must have HPAGE_RESV_OWNER set. @@ -2222,25 +2238,39 @@ static long vma_add_reservation(struct hstate *h, return __vma_reservation_common(h, vma, addr, VMA_ADD_RESV); } +static long vma_del_reservation(struct hstate *h, + struct vm_area_struct *vma, unsigned long addr) +{ + return __vma_reservation_common(h, vma, addr, VMA_DEL_RESV); +} + /* - * This routine is called to restore a reservation on error paths. In the - * specific error paths, a huge page was allocated (via alloc_huge_page) - * and is about to be freed. If a reservation for the page existed, - * alloc_huge_page would have consumed the reservation and set - * HPageRestoreReserve in the newly allocated page. When the page is freed - * via free_huge_page, the global reservation count will be incremented if - * HPageRestoreReserve is set. However, free_huge_page can not adjust the - * reserve map. Adjust the reserve map here to be consistent with global - * reserve count adjustments to be made by free_huge_page. + * This routine is called to restore reservation information on error paths. + * It should ONLY be called for pages allocated via alloc_huge_page(), and + * the hugetlb mutex should remain held when calling this routine. + * + * It handles two specific cases: + * 1) A reservation was in place and the page consumed the reservation. + * HPageRestoreReserve is set in the page. + * 2) No reservation was in place for the page, so HPageRestoreReserve is + * not set. However, alloc_huge_page always updates the reserve map. + * + * In case 1, free_huge_page later in the error path will increment the + * global reserve count. But, free_huge_page does not have enough context + * to adjust the reservation map. This case deals primarily with private + * mappings. Adjust the reserve map here to be consistent with global + * reserve count adjustments to be made by free_huge_page. Make sure the + * reserve map indicates there is a reservation present. + * + * In case 2, simply undo reserve map modifications done by alloc_huge_page. */ -static void restore_reserve_on_error(struct hstate *h, - struct vm_area_struct *vma, unsigned long address, - struct page *page) +void restore_reserve_on_error(struct hstate *h, struct vm_area_struct *vma, + unsigned long address, struct page *page) { - if (unlikely(HPageRestoreReserve(page))) { - long rc = vma_needs_reservation(h, vma, address); + long rc = vma_needs_reservation(h, vma, address); - if (unlikely(rc < 0)) { + if (HPageRestoreReserve(page)) { + if (unlikely(rc < 0)) /* * Rare out of memory condition in reserve map * manipulation. Clear HPageRestoreReserve so that @@ -2253,16 +2283,57 @@ static void restore_reserve_on_error(struct hstate *h, * accounting of reserve counts. */ ClearHPageRestoreReserve(page); - } else if (rc) { - rc = vma_add_reservation(h, vma, address); - if (unlikely(rc < 0)) + else if (rc) + (void)vma_add_reservation(h, vma, address); + else + vma_end_reservation(h, vma, address); + } else { + if (!rc) { + /* + * This indicates there is an entry in the reserve map + * added by alloc_huge_page. We know it was added + * before the alloc_huge_page call, otherwise + * HPageRestoreReserve would be set on the page. + * Remove the entry so that a subsequent allocation + * does not consume a reservation. + */ + rc = vma_del_reservation(h, vma, address); + if (rc < 0) /* - * See above comment about rare out of - * memory condition. + * VERY rare out of memory condition. Since + * we can not delete the entry, set + * HPageRestoreReserve so that the reserve + * count will be incremented when the page + * is freed. This reserve will be consumed + * on a subsequent allocation. */ - ClearHPageRestoreReserve(page); + SetHPageRestoreReserve(page); + } else if (rc < 0) { + /* + * Rare out of memory condition from + * vma_needs_reservation call. Memory allocation is + * only attempted if a new entry is needed. Therefore, + * this implies there is not an entry in the + * reserve map. + * + * For shared mappings, no entry in the map indicates + * no reservation. We are done. + */ + if (!(vma->vm_flags & VM_MAYSHARE)) + /* + * For private mappings, no entry indicates + * a reservation is present. Since we can + * not add an entry, set SetHPageRestoreReserve + * on the page so reserve count will be + * incremented when freed. This reserve will + * be consumed on a subsequent allocation. + */ + SetHPageRestoreReserve(page); } else - vma_end_reservation(h, vma, address); + /* + * No reservation present, do nothing + */ + vma_end_reservation(h, vma, address); } } @@ -4037,6 +4108,8 @@ again: spin_lock_nested(src_ptl, SINGLE_DEPTH_NESTING); entry = huge_ptep_get(src_pte); if (!pte_same(src_pte_old, entry)) { + restore_reserve_on_error(h, vma, addr, + new); put_page(new); /* dst_entry won't change as in child */ goto again; @@ -4056,6 +4129,7 @@ again: * See Documentation/vm/mmu_notifier.rst */ huge_ptep_set_wrprotect(src, addr, src_pte); + entry = huge_pte_wrprotect(entry); } page_dup_rmap(ptepage, true); @@ -4888,10 +4962,20 @@ int hugetlb_mcopy_atomic_pte(struct mm_struct *dst_mm, if (!page) goto out; } else if (!*pagep) { - ret = -ENOMEM; + /* If a page already exists, then it's UFFDIO_COPY for + * a non-missing case. Return -EEXIST. + */ + if (vm_shared && + hugetlbfs_pagecache_present(h, dst_vma, dst_addr)) { + ret = -EEXIST; + goto out; + } + page = alloc_huge_page(dst_vma, dst_addr, 0); - if (IS_ERR(page)) + if (IS_ERR(page)) { + ret = -ENOMEM; goto out; + } ret = copy_huge_page_from_user(page, (const void __user *) src_addr, @@ -4995,6 +5079,7 @@ out_release_unlock: if (vm_shared || is_continue) unlock_page(page); out_release_nounlock: + restore_reserve_on_error(h, dst_vma, dst_addr, page); put_page(page); goto out; } @@ -5846,6 +5931,21 @@ unlock: return ret; } +int get_hwpoison_huge_page(struct page *page, bool *hugetlb) +{ + int ret = 0; + + *hugetlb = false; + spin_lock_irq(&hugetlb_lock); + if (PageHeadHuge(page)) { + *hugetlb = true; + if (HPageFreed(page) || HPageMigratable(page)) + ret = get_page_unless_zero(page); + } + spin_unlock_irq(&hugetlb_lock); + return ret; +} + void putback_active_hugepage(struct page *page) { spin_lock_irq(&hugetlb_lock); diff --git a/mm/internal.h b/mm/internal.h index 54bd0dc2c23c..e8fdb531f887 100644 --- a/mm/internal.h +++ b/mm/internal.h @@ -96,26 +96,6 @@ static inline void set_page_refcounted(struct page *page) set_page_count(page, 1); } -/* - * When kernel touch the user page, the user page may be have been marked - * poison but still mapped in user space, if without this page, the kernel - * can guarantee the data integrity and operation success, the kernel is - * better to check the posion status and avoid touching it, be good not to - * panic, coredump for process fatal signal is a sample case matching this - * scenario. Or if kernel can't guarantee the data integrity, it's better - * not to call this function, let kernel touch the poison page and get to - * panic. - */ -static inline bool is_page_poisoned(struct page *page) -{ - if (PageHWPoison(page)) - return true; - else if (PageHuge(page) && PageHWPoison(compound_head(page))) - return true; - - return false; -} - extern unsigned long highest_memmap_pfn; /* @@ -404,27 +384,52 @@ static inline void mlock_migrate_page(struct page *newpage, struct page *page) extern pmd_t maybe_pmd_mkwrite(pmd_t pmd, struct vm_area_struct *vma); /* - * At what user virtual address is page expected in @vma? + * At what user virtual address is page expected in vma? + * Returns -EFAULT if all of the page is outside the range of vma. + * If page is a compound head, the entire compound page is considered. */ static inline unsigned long -__vma_address(struct page *page, struct vm_area_struct *vma) +vma_address(struct page *page, struct vm_area_struct *vma) { - pgoff_t pgoff = page_to_pgoff(page); - return vma->vm_start + ((pgoff - vma->vm_pgoff) << PAGE_SHIFT); + pgoff_t pgoff; + unsigned long address; + + VM_BUG_ON_PAGE(PageKsm(page), page); /* KSM page->index unusable */ + pgoff = page_to_pgoff(page); + if (pgoff >= vma->vm_pgoff) { + address = vma->vm_start + + ((pgoff - vma->vm_pgoff) << PAGE_SHIFT); + /* Check for address beyond vma (or wrapped through 0?) */ + if (address < vma->vm_start || address >= vma->vm_end) + address = -EFAULT; + } else if (PageHead(page) && + pgoff + compound_nr(page) - 1 >= vma->vm_pgoff) { + /* Test above avoids possibility of wrap to 0 on 32-bit */ + address = vma->vm_start; + } else { + address = -EFAULT; + } + return address; } +/* + * Then at what user virtual address will none of the page be found in vma? + * Assumes that vma_address() already returned a good starting address. + * If page is a compound head, the entire compound page is considered. + */ static inline unsigned long -vma_address(struct page *page, struct vm_area_struct *vma) +vma_address_end(struct page *page, struct vm_area_struct *vma) { - unsigned long start, end; - - start = __vma_address(page, vma); - end = start + thp_size(page) - PAGE_SIZE; - - /* page should be within @vma mapping range */ - VM_BUG_ON_VMA(end < vma->vm_start || start >= vma->vm_end, vma); - - return max(start, vma->vm_start); + pgoff_t pgoff; + unsigned long address; + + VM_BUG_ON_PAGE(PageKsm(page), page); /* KSM page->index unusable */ + pgoff = page_to_pgoff(page) + compound_nr(page); + address = vma->vm_start + ((pgoff - vma->vm_pgoff) << PAGE_SHIFT); + /* Check for address beyond vma (or wrapped through 0?) */ + if (address < vma->vm_start || address > vma->vm_end) + address = vma->vm_end; + return address; } static inline struct file *maybe_unlock_mmap_for_io(struct vm_fault *vmf, diff --git a/mm/ioremap.c b/mm/ioremap.c index d1dcc7e744ac..8ee0136f8cb0 100644 --- a/mm/ioremap.c +++ b/mm/ioremap.c @@ -16,16 +16,16 @@ #include "pgalloc-track.h" #ifdef CONFIG_HAVE_ARCH_HUGE_VMAP -static bool __ro_after_init iomap_max_page_shift = PAGE_SHIFT; +static unsigned int __ro_after_init iomap_max_page_shift = BITS_PER_LONG - 1; static int __init set_nohugeiomap(char *str) { - iomap_max_page_shift = P4D_SHIFT; + iomap_max_page_shift = PAGE_SHIFT; return 0; } early_param("nohugeiomap", set_nohugeiomap); #else /* CONFIG_HAVE_ARCH_HUGE_VMAP */ -static const bool iomap_max_page_shift = PAGE_SHIFT; +static const unsigned int iomap_max_page_shift = PAGE_SHIFT; #endif /* CONFIG_HAVE_ARCH_HUGE_VMAP */ int ioremap_page_range(unsigned long addr, diff --git a/mm/kasan/init.c b/mm/kasan/init.c index c4605ac9837b..348f31d15a97 100644 --- a/mm/kasan/init.c +++ b/mm/kasan/init.c @@ -220,8 +220,8 @@ static int __ref zero_p4d_populate(pgd_t *pgd, unsigned long addr, /** * kasan_populate_early_shadow - populate shadow memory region with * kasan_early_shadow_page - * @shadow_start - start of the memory range to populate - * @shadow_end - end of the memory range to populate + * @shadow_start: start of the memory range to populate + * @shadow_end: end of the memory range to populate */ int __ref kasan_populate_early_shadow(const void *shadow_start, const void *shadow_end) diff --git a/mm/kfence/core.c b/mm/kfence/core.c index e18fbbd5d9b4..4d21ac44d5d3 100644 --- a/mm/kfence/core.c +++ b/mm/kfence/core.c @@ -627,10 +627,10 @@ static void toggle_allocation_gate(struct work_struct *work) * During low activity with no allocations we might wait a * while; let's avoid the hung task warning. */ - wait_event_timeout(allocation_wait, atomic_read(&kfence_allocation_gate), - sysctl_hung_task_timeout_secs * HZ / 2); + wait_event_idle_timeout(allocation_wait, atomic_read(&kfence_allocation_gate), + sysctl_hung_task_timeout_secs * HZ / 2); } else { - wait_event(allocation_wait, atomic_read(&kfence_allocation_gate)); + wait_event_idle(allocation_wait, atomic_read(&kfence_allocation_gate)); } /* Disable static key and reset timer. */ @@ -776,11 +776,12 @@ static void remove_rmap_item_from_tree(struct rmap_item *rmap_item) struct page *page; stable_node = rmap_item->head; - page = get_ksm_page(stable_node, GET_KSM_PAGE_NOLOCK); + page = get_ksm_page(stable_node, GET_KSM_PAGE_LOCK); if (!page) goto out; hlist_del(&rmap_item->hlist); + unlock_page(page); put_page(page); if (!hlist_empty(&stable_node->hlist)) diff --git a/mm/memory-failure.c b/mm/memory-failure.c index 85ad98c00fd9..0143d32bc666 100644 --- a/mm/memory-failure.c +++ b/mm/memory-failure.c @@ -949,6 +949,17 @@ static int page_action(struct page_state *ps, struct page *p, return (result == MF_RECOVERED || result == MF_DELAYED) ? 0 : -EBUSY; } +/* + * Return true if a page type of a given page is supported by hwpoison + * mechanism (while handling could fail), otherwise false. This function + * does not return true for hugetlb or device memory pages, so it's assumed + * to be called only in the context where we never have such pages. + */ +static inline bool HWPoisonHandlable(struct page *page) +{ + return PageLRU(page) || __PageMovable(page); +} + /** * __get_hwpoison_page() - Get refcount for memory error handling: * @page: raw error page (hit by memory error) @@ -959,8 +970,22 @@ static int page_action(struct page_state *ps, struct page *p, static int __get_hwpoison_page(struct page *page) { struct page *head = compound_head(page); + int ret = 0; + bool hugetlb = false; + + ret = get_hwpoison_huge_page(head, &hugetlb); + if (hugetlb) + return ret; - if (!PageHuge(head) && PageTransHuge(head)) { + /* + * This check prevents from calling get_hwpoison_unless_zero() + * for any unsupported type of page in order to reduce the risk of + * unexpected races caused by taking a page refcount. + */ + if (!HWPoisonHandlable(head)) + return 0; + + if (PageTransHuge(head)) { /* * Non anonymous thp exists only in allocation/free time. We * can't handle such a case correctly, so let's give it up. @@ -1017,7 +1042,7 @@ try_again: ret = -EIO; } } else { - if (PageHuge(p) || PageLRU(p) || __PageMovable(p)) { + if (PageHuge(p) || HWPoisonHandlable(p)) { ret = 1; } else { /* @@ -1527,7 +1552,12 @@ try_again: return 0; } - if (!PageTransTail(p) && !PageLRU(p)) + /* + * __munlock_pagevec may clear a writeback page's LRU flag without + * page_lock. We need wait writeback completion for this page or it + * may trigger vfs BUG while evict inode. + */ + if (!PageTransTail(p) && !PageLRU(p) && !PageWriteback(p)) goto identify_page_state; /* diff --git a/mm/memory.c b/mm/memory.c index 730daa00952b..486f4a2874e7 100644 --- a/mm/memory.c +++ b/mm/memory.c @@ -1361,7 +1361,18 @@ static inline unsigned long zap_pmd_range(struct mmu_gather *tlb, else if (zap_huge_pmd(tlb, vma, pmd, addr)) goto next; /* fall through */ + } else if (details && details->single_page && + PageTransCompound(details->single_page) && + next - addr == HPAGE_PMD_SIZE && pmd_none(*pmd)) { + spinlock_t *ptl = pmd_lock(tlb->mm, pmd); + /* + * Take and drop THP pmd lock so that we cannot return + * prematurely, while zap_huge_pmd() has cleared *pmd, + * but not yet decremented compound_mapcount(). + */ + spin_unlock(ptl); } + /* * Here there can be other concurrent MADV_DONTNEED or * trans huge page faults running, and if the pmd is @@ -2939,6 +2950,7 @@ static vm_fault_t wp_page_copy(struct vm_fault *vmf) } flush_cache_page(vma, vmf->address, pte_pfn(vmf->orig_pte)); entry = mk_pte(new_page, vma->vm_page_prot); + entry = pte_sw_mkyoung(entry); entry = maybe_mkwrite(pte_mkdirty(entry), vma); /* @@ -3236,6 +3248,36 @@ static inline void unmap_mapping_range_tree(struct rb_root_cached *root, } /** + * unmap_mapping_page() - Unmap single page from processes. + * @page: The locked page to be unmapped. + * + * Unmap this page from any userspace process which still has it mmaped. + * Typically, for efficiency, the range of nearby pages has already been + * unmapped by unmap_mapping_pages() or unmap_mapping_range(). But once + * truncation or invalidation holds the lock on a page, it may find that + * the page has been remapped again: and then uses unmap_mapping_page() + * to unmap it finally. + */ +void unmap_mapping_page(struct page *page) +{ + struct address_space *mapping = page->mapping; + struct zap_details details = { }; + + VM_BUG_ON(!PageLocked(page)); + VM_BUG_ON(PageTail(page)); + + details.check_mapping = mapping; + details.first_index = page->index; + details.last_index = page->index + thp_nr_pages(page) - 1; + details.single_page = page; + + i_mmap_lock_write(mapping); + if (unlikely(!RB_EMPTY_ROOT(&mapping->i_mmap.rb_root))) + unmap_mapping_range_tree(&mapping->i_mmap, &details); + i_mmap_unlock_write(mapping); +} + +/** * unmap_mapping_pages() - Unmap pages from processes. * @mapping: The address space containing pages to be unmapped. * @start: Index of first page to be unmapped. @@ -3602,6 +3644,7 @@ static vm_fault_t do_anonymous_page(struct vm_fault *vmf) __SetPageUptodate(page); entry = mk_pte(page, vma->vm_page_prot); + entry = pte_sw_mkyoung(entry); if (vma->vm_flags & VM_WRITE) entry = pte_mkwrite(pte_mkdirty(entry)); @@ -3786,6 +3829,8 @@ void do_set_pte(struct vm_fault *vmf, struct page *page, unsigned long addr) if (prefault && arch_wants_old_prefaulted_pte()) entry = pte_mkold(entry); + else + entry = pte_sw_mkyoung(entry); if (write) entry = maybe_mkwrite(pte_mkdirty(entry), vma); diff --git a/mm/migrate.c b/mm/migrate.c index b234c3f3acb7..41ff2c9896c4 100644 --- a/mm/migrate.c +++ b/mm/migrate.c @@ -295,6 +295,7 @@ void __migration_entry_wait(struct mm_struct *mm, pte_t *ptep, goto out; page = migration_entry_to_page(entry); + page = compound_head(page); /* * Once page cache replacement of page migration started, page_count diff --git a/mm/page_alloc.c b/mm/page_alloc.c index aaa1655cf682..d1f5de1c1283 100644 --- a/mm/page_alloc.c +++ b/mm/page_alloc.c @@ -9158,6 +9158,8 @@ bool take_page_off_buddy(struct page *page) del_page_from_free_list(page_head, zone, page_order); break_down_buddy_pages(zone, page_head, page, 0, page_order, migratetype); + if (!is_migrate_isolate(migratetype)) + __mod_zone_freepage_state(zone, -1, migratetype); ret = true; break; } diff --git a/mm/page_vma_mapped.c b/mm/page_vma_mapped.c index 2cf01d933f13..e37bd43904af 100644 --- a/mm/page_vma_mapped.c +++ b/mm/page_vma_mapped.c @@ -212,23 +212,34 @@ restart: pvmw->ptl = NULL; } } else if (!pmd_present(pmde)) { + /* + * If PVMW_SYNC, take and drop THP pmd lock so that we + * cannot return prematurely, while zap_huge_pmd() has + * cleared *pmd but not decremented compound_mapcount(). + */ + if ((pvmw->flags & PVMW_SYNC) && + PageTransCompound(pvmw->page)) { + spinlock_t *ptl = pmd_lock(mm, pvmw->pmd); + + spin_unlock(ptl); + } return false; } if (!map_pte(pvmw)) goto next_pte; while (1) { + unsigned long end; + if (check_pte(pvmw)) return true; next_pte: /* Seek to next pte only makes sense for THP */ if (!PageTransHuge(pvmw->page) || PageHuge(pvmw->page)) return not_found(pvmw); + end = vma_address_end(pvmw->page, pvmw->vma); do { pvmw->address += PAGE_SIZE; - if (pvmw->address >= pvmw->vma->vm_end || - pvmw->address >= - __vma_address(pvmw->page, pvmw->vma) + - thp_size(pvmw->page)) + if (pvmw->address >= end) return not_found(pvmw); /* Did we cross page table boundary? */ if (pvmw->address % PMD_SIZE == 0) { @@ -266,14 +277,10 @@ int page_mapped_in_vma(struct page *page, struct vm_area_struct *vma) .vma = vma, .flags = PVMW_SYNC, }; - unsigned long start, end; - - start = __vma_address(page, vma); - end = start + thp_size(page) - PAGE_SIZE; - if (unlikely(end < vma->vm_start || start >= vma->vm_end)) + pvmw.address = vma_address(page, vma); + if (pvmw.address == -EFAULT) return 0; - pvmw.address = max(start, vma->vm_start); if (!page_vma_mapped_walk(&pvmw)) return 0; page_vma_mapped_walk_done(&pvmw); diff --git a/mm/pgtable-generic.c b/mm/pgtable-generic.c index c2210e1cdb51..4e640baf9794 100644 --- a/mm/pgtable-generic.c +++ b/mm/pgtable-generic.c @@ -135,9 +135,8 @@ pmd_t pmdp_huge_clear_flush(struct vm_area_struct *vma, unsigned long address, { pmd_t pmd; VM_BUG_ON(address & ~HPAGE_PMD_MASK); - VM_BUG_ON(!pmd_present(*pmdp)); - /* Below assumes pmd_present() is true */ - VM_BUG_ON(!pmd_trans_huge(*pmdp) && !pmd_devmap(*pmdp)); + VM_BUG_ON(pmd_present(*pmdp) && !pmd_trans_huge(*pmdp) && + !pmd_devmap(*pmdp)); pmd = pmdp_huge_get_and_clear(vma->vm_mm, address, pmdp); flush_pmd_tlb_range(vma, address, address + HPAGE_PMD_SIZE); return pmd; diff --git a/mm/rmap.c b/mm/rmap.c index 693a610e181d..e05c300048e6 100644 --- a/mm/rmap.c +++ b/mm/rmap.c @@ -707,7 +707,6 @@ static bool should_defer_flush(struct mm_struct *mm, enum ttu_flags flags) */ unsigned long page_address_in_vma(struct page *page, struct vm_area_struct *vma) { - unsigned long address; if (PageAnon(page)) { struct anon_vma *page__anon_vma = page_anon_vma(page); /* @@ -717,15 +716,13 @@ unsigned long page_address_in_vma(struct page *page, struct vm_area_struct *vma) if (!vma->anon_vma || !page__anon_vma || vma->anon_vma->root != page__anon_vma->root) return -EFAULT; - } else if (page->mapping) { - if (!vma->vm_file || vma->vm_file->f_mapping != page->mapping) - return -EFAULT; - } else + } else if (!vma->vm_file) { return -EFAULT; - address = __vma_address(page, vma); - if (unlikely(address < vma->vm_start || address >= vma->vm_end)) + } else if (vma->vm_file->f_mapping != compound_head(page)->mapping) { return -EFAULT; - return address; + } + + return vma_address(page, vma); } pmd_t *mm_find_pmd(struct mm_struct *mm, unsigned long address) @@ -919,7 +916,7 @@ static bool page_mkclean_one(struct page *page, struct vm_area_struct *vma, */ mmu_notifier_range_init(&range, MMU_NOTIFY_PROTECTION_PAGE, 0, vma, vma->vm_mm, address, - min(vma->vm_end, address + page_size(page))); + vma_address_end(page, vma)); mmu_notifier_invalidate_range_start(&range); while (page_vma_mapped_walk(&pvmw)) { @@ -1405,6 +1402,15 @@ static bool try_to_unmap_one(struct page *page, struct vm_area_struct *vma, struct mmu_notifier_range range; enum ttu_flags flags = (enum ttu_flags)(long)arg; + /* + * When racing against e.g. zap_pte_range() on another cpu, + * in between its ptep_get_and_clear_full() and page_remove_rmap(), + * try_to_unmap() may return false when it is about to become true, + * if page table locking is skipped: use TTU_SYNC to wait for that. + */ + if (flags & TTU_SYNC) + pvmw.flags = PVMW_SYNC; + /* munlock has nothing to gain from examining un-locked vmas */ if ((flags & TTU_MUNLOCK) && !(vma->vm_flags & VM_LOCKED)) return true; @@ -1426,9 +1432,10 @@ static bool try_to_unmap_one(struct page *page, struct vm_area_struct *vma, * Note that the page can not be free in this function as call of * try_to_unmap() must hold a reference on the page. */ + range.end = PageKsm(page) ? + address + PAGE_SIZE : vma_address_end(page, vma); mmu_notifier_range_init(&range, MMU_NOTIFY_CLEAR, 0, vma, vma->vm_mm, - address, - min(vma->vm_end, address + page_size(page))); + address, range.end); if (PageHuge(page)) { /* * If sharing is possible, start and end will be adjusted @@ -1777,7 +1784,13 @@ bool try_to_unmap(struct page *page, enum ttu_flags flags) else rmap_walk(page, &rwc); - return !page_mapcount(page) ? true : false; + /* + * When racing against e.g. zap_pte_range() on another cpu, + * in between its ptep_get_and_clear_full() and page_remove_rmap(), + * try_to_unmap() may return false when it is about to become true, + * if page table locking is skipped: use TTU_SYNC to wait for that. + */ + return !page_mapcount(page); } /** @@ -1874,6 +1887,7 @@ static void rmap_walk_anon(struct page *page, struct rmap_walk_control *rwc, struct vm_area_struct *vma = avc->vma; unsigned long address = vma_address(page, vma); + VM_BUG_ON_VMA(address == -EFAULT, vma); cond_resched(); if (rwc->invalid_vma && rwc->invalid_vma(vma, rwc->arg)) @@ -1928,6 +1942,7 @@ static void rmap_walk_file(struct page *page, struct rmap_walk_control *rwc, pgoff_start, pgoff_end) { unsigned long address = vma_address(page, vma); + VM_BUG_ON_VMA(address == -EFAULT, vma); cond_resched(); if (rwc->invalid_vma && rwc->invalid_vma(vma, rwc->arg)) diff --git a/mm/shmem.c b/mm/shmem.c index a08cedefbfaa..5d46611cba8d 100644 --- a/mm/shmem.c +++ b/mm/shmem.c @@ -2258,25 +2258,11 @@ out_nomem: static int shmem_mmap(struct file *file, struct vm_area_struct *vma) { struct shmem_inode_info *info = SHMEM_I(file_inode(file)); + int ret; - if (info->seals & F_SEAL_FUTURE_WRITE) { - /* - * New PROT_WRITE and MAP_SHARED mmaps are not allowed when - * "future write" seal active. - */ - if ((vma->vm_flags & VM_SHARED) && (vma->vm_flags & VM_WRITE)) - return -EPERM; - - /* - * Since an F_SEAL_FUTURE_WRITE sealed memfd can be mapped as - * MAP_SHARED and read-only, take care to not allow mprotect to - * revert protections on such mappings. Do this only for shared - * mappings. For private mappings, don't need to mask - * VM_MAYWRITE as we still want them to be COW-writable. - */ - if (vma->vm_flags & VM_SHARED) - vma->vm_flags &= ~(VM_MAYWRITE); - } + ret = seal_check_future_write(info->seals, vma); + if (ret) + return ret; /* arm64 - allow memory tagging on RAM-based files */ vma->vm_flags |= VM_MTE_ALLOWED; @@ -2375,8 +2361,18 @@ static int shmem_mfill_atomic_pte(struct mm_struct *dst_mm, pgoff_t offset, max_off; ret = -ENOMEM; - if (!shmem_inode_acct_block(inode, 1)) + if (!shmem_inode_acct_block(inode, 1)) { + /* + * We may have got a page, returned -ENOENT triggering a retry, + * and now we find ourselves with -ENOMEM. Release the page, to + * avoid a BUG_ON in our caller. + */ + if (unlikely(*pagep)) { + put_page(*pagep); + *pagep = NULL; + } goto out; + } if (!*pagep) { page = shmem_alloc_page(gfp, info, pgoff); diff --git a/mm/shuffle.h b/mm/shuffle.h index 71b784f0b7c3..cec62984f7d3 100644 --- a/mm/shuffle.h +++ b/mm/shuffle.h @@ -10,7 +10,7 @@ DECLARE_STATIC_KEY_FALSE(page_alloc_shuffle_key); extern void __shuffle_free_memory(pg_data_t *pgdat); extern bool shuffle_pick_tail(void); -static inline void shuffle_free_memory(pg_data_t *pgdat) +static inline void __meminit shuffle_free_memory(pg_data_t *pgdat) { if (!static_branch_unlikely(&page_alloc_shuffle_key)) return; @@ -18,7 +18,7 @@ static inline void shuffle_free_memory(pg_data_t *pgdat) } extern void __shuffle_zone(struct zone *z); -static inline void shuffle_zone(struct zone *z) +static inline void __meminit shuffle_zone(struct zone *z) { if (!static_branch_unlikely(&page_alloc_shuffle_key)) return; diff --git a/mm/slab_common.c b/mm/slab_common.c index f8833d3e5d47..7cab77655f11 100644 --- a/mm/slab_common.c +++ b/mm/slab_common.c @@ -97,8 +97,7 @@ EXPORT_SYMBOL(kmem_cache_size); #ifdef CONFIG_DEBUG_VM static int kmem_cache_sanity_check(const char *name, unsigned int size) { - if (!name || in_interrupt() || size < sizeof(void *) || - size > KMALLOC_MAX_SIZE) { + if (!name || in_interrupt() || size > KMALLOC_MAX_SIZE) { pr_err("kmem_cache_create(%s) integrity check failed\n", name); return -EINVAL; } @@ -318,6 +317,16 @@ kmem_cache_create_usercopy(const char *name, const char *cache_name; int err; +#ifdef CONFIG_SLUB_DEBUG + /* + * If no slub_debug was enabled globally, the static key is not yet + * enabled by setup_slub_debug(). Enable it if the cache is being + * created with any of the debugging flags passed explicitly. + */ + if (flags & SLAB_DEBUG_FLAGS) + static_branch_enable(&slub_debug_enabled); +#endif + mutex_lock(&slab_mutex); err = kmem_cache_sanity_check(name, size); diff --git a/mm/slub.c b/mm/slub.c index feda53ae62ba..61bd40e3eb9a 100644 --- a/mm/slub.c +++ b/mm/slub.c @@ -15,6 +15,7 @@ #include <linux/module.h> #include <linux/bit_spinlock.h> #include <linux/interrupt.h> +#include <linux/swab.h> #include <linux/bitops.h> #include <linux/slab.h> #include "slab.h" @@ -301,6 +302,7 @@ static inline void *get_freepointer_safe(struct kmem_cache *s, void *object) if (!debug_pagealloc_enabled_static()) return get_freepointer(s, object); + object = kasan_reset_tag(object); freepointer_addr = (unsigned long)object + s->offset; copy_from_kernel_nofault(&p, (void **)freepointer_addr, sizeof(p)); return freelist_ptr(s, p, freepointer_addr); @@ -711,15 +713,15 @@ static void print_trailer(struct kmem_cache *s, struct page *page, u8 *p) p, p - addr, get_freepointer(s, p)); if (s->flags & SLAB_RED_ZONE) - print_section(KERN_ERR, "Redzone ", p - s->red_left_pad, + print_section(KERN_ERR, "Redzone ", p - s->red_left_pad, s->red_left_pad); else if (p > addr + 16) print_section(KERN_ERR, "Bytes b4 ", p - 16, 16); - print_section(KERN_ERR, "Object ", p, + print_section(KERN_ERR, "Object ", p, min_t(unsigned int, s->object_size, PAGE_SIZE)); if (s->flags & SLAB_RED_ZONE) - print_section(KERN_ERR, "Redzone ", p + s->object_size, + print_section(KERN_ERR, "Redzone ", p + s->object_size, s->inuse - s->object_size); off = get_info_end(s); @@ -731,7 +733,7 @@ static void print_trailer(struct kmem_cache *s, struct page *page, u8 *p) if (off != size_from_object(s)) /* Beginning of the filler is the free pointer */ - print_section(KERN_ERR, "Padding ", p + off, + print_section(KERN_ERR, "Padding ", p + off, size_from_object(s) - off); dump_stack(); @@ -908,11 +910,11 @@ static int check_object(struct kmem_cache *s, struct page *page, u8 *endobject = object + s->object_size; if (s->flags & SLAB_RED_ZONE) { - if (!check_bytes_and_report(s, page, object, "Redzone", + if (!check_bytes_and_report(s, page, object, "Left Redzone", object - s->red_left_pad, val, s->red_left_pad)) return 0; - if (!check_bytes_and_report(s, page, object, "Redzone", + if (!check_bytes_and_report(s, page, object, "Right Redzone", endobject, val, s->inuse - s->object_size)) return 0; } else { @@ -927,7 +929,7 @@ static int check_object(struct kmem_cache *s, struct page *page, if (val != SLUB_RED_ACTIVE && (s->flags & __OBJECT_POISON) && (!check_bytes_and_report(s, page, p, "Poison", p, POISON_FREE, s->object_size - 1) || - !check_bytes_and_report(s, page, p, "Poison", + !check_bytes_and_report(s, page, p, "End Poison", p + s->object_size - 1, POISON_END, 1))) return 0; /* @@ -3688,7 +3690,6 @@ static int calculate_sizes(struct kmem_cache *s, int forced_order) { slab_flags_t flags = s->flags; unsigned int size = s->object_size; - unsigned int freepointer_area; unsigned int order; /* @@ -3697,13 +3698,6 @@ static int calculate_sizes(struct kmem_cache *s, int forced_order) * the possible location of the free pointer. */ size = ALIGN(size, sizeof(void *)); - /* - * This is the area of the object where a freepointer can be - * safely written. If redzoning adds more to the inuse size, we - * can't use that portion for writing the freepointer, so - * s->offset must be limited within this for the general case. - */ - freepointer_area = size; #ifdef CONFIG_SLUB_DEBUG /* @@ -3729,19 +3723,21 @@ static int calculate_sizes(struct kmem_cache *s, int forced_order) /* * With that we have determined the number of bytes in actual use - * by the object. This is the potential offset to the free pointer. + * by the object and redzoning. */ s->inuse = size; - if (((flags & (SLAB_TYPESAFE_BY_RCU | SLAB_POISON)) || - s->ctor)) { + if ((flags & (SLAB_TYPESAFE_BY_RCU | SLAB_POISON)) || + ((flags & SLAB_RED_ZONE) && s->object_size < sizeof(void *)) || + s->ctor) { /* * Relocate free pointer after the object if it is not * permitted to overwrite the first word of the object on * kmem_cache_free. * * This is the case if we do RCU, have a constructor or - * destructor or are poisoning the objects. + * destructor, are poisoning the objects, or are + * redzoning an object smaller than sizeof(void *). * * The assumption that s->offset >= s->inuse means free * pointer is outside of the object is used in the @@ -3750,13 +3746,13 @@ static int calculate_sizes(struct kmem_cache *s, int forced_order) */ s->offset = size; size += sizeof(void *); - } else if (freepointer_area > sizeof(void *)) { + } else { /* * Store freelist pointer near middle of object to keep * it away from the edges of the object to avoid small * sized over/underflows from neighboring allocations. */ - s->offset = ALIGN(freepointer_area / 2, sizeof(void *)); + s->offset = ALIGN_DOWN(s->object_size / 2, sizeof(void *)); } #ifdef CONFIG_SLUB_DEBUG @@ -3828,15 +3824,6 @@ static int calculate_sizes(struct kmem_cache *s, int forced_order) static int kmem_cache_open(struct kmem_cache *s, slab_flags_t flags) { -#ifdef CONFIG_SLUB_DEBUG - /* - * If no slub_debug was enabled globally, the static key is not yet - * enabled by setup_slub_debug(). Enable it if the cache is being - * created with any of the debugging flags passed explicitly. - */ - if (flags & SLAB_DEBUG_FLAGS) - static_branch_enable(&slub_debug_enabled); -#endif s->flags = kmem_cache_flags(s->size, flags, s->name); #ifdef CONFIG_SLAB_FREELIST_HARDENED s->random = get_random_long(); diff --git a/mm/sparse.c b/mm/sparse.c index b2ada9dc00cb..55c18aff3e42 100644 --- a/mm/sparse.c +++ b/mm/sparse.c @@ -344,6 +344,15 @@ size_t mem_section_usage_size(void) return sizeof(struct mem_section_usage) + usemap_size(); } +static inline phys_addr_t pgdat_to_phys(struct pglist_data *pgdat) +{ +#ifndef CONFIG_NEED_MULTIPLE_NODES + return __pa_symbol(pgdat); +#else + return __pa(pgdat); +#endif +} + #ifdef CONFIG_MEMORY_HOTREMOVE static struct mem_section_usage * __init sparse_early_usemaps_alloc_pgdat_section(struct pglist_data *pgdat, @@ -362,7 +371,7 @@ sparse_early_usemaps_alloc_pgdat_section(struct pglist_data *pgdat, * from the same section as the pgdat where possible to avoid * this problem. */ - goal = __pa(pgdat) & (PAGE_SECTION_MASK << PAGE_SHIFT); + goal = pgdat_to_phys(pgdat) & (PAGE_SECTION_MASK << PAGE_SHIFT); limit = goal + (1UL << PA_SECTION_SHIFT); nid = early_pfn_to_nid(goal >> PAGE_SHIFT); again: @@ -390,7 +399,7 @@ static void __init check_usemap_section_nr(int nid, } usemap_snr = pfn_to_section_nr(__pa(usage) >> PAGE_SHIFT); - pgdat_snr = pfn_to_section_nr(__pa(pgdat) >> PAGE_SHIFT); + pgdat_snr = pfn_to_section_nr(pgdat_to_phys(pgdat) >> PAGE_SHIFT); if (usemap_snr == pgdat_snr) return; diff --git a/mm/swapfile.c b/mm/swapfile.c index 149e77454e3c..996afa8131c8 100644 --- a/mm/swapfile.c +++ b/mm/swapfile.c @@ -1900,7 +1900,7 @@ unsigned int count_swap_pages(int type, int free) static inline int pte_same_as_swp(pte_t pte, pte_t swp_pte) { - return pte_same(pte_swp_clear_soft_dirty(pte), swp_pte); + return pte_same(pte_swp_clear_flags(pte), swp_pte); } /* diff --git a/mm/truncate.c b/mm/truncate.c index 95af244b112a..234ddd879caa 100644 --- a/mm/truncate.c +++ b/mm/truncate.c @@ -167,13 +167,10 @@ void do_invalidatepage(struct page *page, unsigned int offset, * its lock, b) when a concurrent invalidate_mapping_pages got there first and * c) when tmpfs swizzles a page between a tmpfs inode and swapper_space. */ -static void -truncate_cleanup_page(struct address_space *mapping, struct page *page) +static void truncate_cleanup_page(struct page *page) { - if (page_mapped(page)) { - unsigned int nr = thp_nr_pages(page); - unmap_mapping_pages(mapping, page->index, nr, false); - } + if (page_mapped(page)) + unmap_mapping_page(page); if (page_has_private(page)) do_invalidatepage(page, 0, thp_size(page)); @@ -218,7 +215,7 @@ int truncate_inode_page(struct address_space *mapping, struct page *page) if (page->mapping != mapping) return -EIO; - truncate_cleanup_page(mapping, page); + truncate_cleanup_page(page); delete_from_page_cache(page); return 0; } @@ -325,7 +322,7 @@ void truncate_inode_pages_range(struct address_space *mapping, index = indices[pagevec_count(&pvec) - 1] + 1; truncate_exceptional_pvec_entries(mapping, &pvec, indices); for (i = 0; i < pagevec_count(&pvec); i++) - truncate_cleanup_page(mapping, pvec.pages[i]); + truncate_cleanup_page(pvec.pages[i]); delete_from_page_cache_batch(mapping, &pvec); for (i = 0; i < pagevec_count(&pvec); i++) unlock_page(pvec.pages[i]); @@ -639,6 +636,16 @@ int invalidate_inode_pages2_range(struct address_space *mapping, continue; } + if (!did_range_unmap && page_mapped(page)) { + /* + * If page is mapped, before taking its lock, + * zap the rest of the file in one hit. + */ + unmap_mapping_pages(mapping, index, + (1 + end - index), false); + did_range_unmap = 1; + } + lock_page(page); WARN_ON(page_to_index(page) != index); if (page->mapping != mapping) { @@ -646,23 +653,11 @@ int invalidate_inode_pages2_range(struct address_space *mapping, continue; } wait_on_page_writeback(page); - if (page_mapped(page)) { - if (!did_range_unmap) { - /* - * Zap the rest of the file in one hit. - */ - unmap_mapping_pages(mapping, index, - (1 + end - index), false); - did_range_unmap = 1; - } else { - /* - * Just zap this page - */ - unmap_mapping_pages(mapping, index, - 1, false); - } - } + + if (page_mapped(page)) + unmap_mapping_page(page); BUG_ON(page_mapped(page)); + ret2 = do_launder_page(mapping, page); if (ret2 == 0) { if (!invalidate_complete_page2(mapping, page)) diff --git a/mm/userfaultfd.c b/mm/userfaultfd.c index e14b3820c6a8..63a73e164d55 100644 --- a/mm/userfaultfd.c +++ b/mm/userfaultfd.c @@ -360,38 +360,38 @@ out: * If a reservation for the page existed in the reservation * map of a private mapping, the map was modified to indicate * the reservation was consumed when the page was allocated. - * We clear the PagePrivate flag now so that the global + * We clear the HPageRestoreReserve flag now so that the global * reserve count will not be incremented in free_huge_page. * The reservation map will still indicate the reservation * was consumed and possibly prevent later page allocation. * This is better than leaking a global reservation. If no - * reservation existed, it is still safe to clear PagePrivate - * as no adjustments to reservation counts were made during - * allocation. + * reservation existed, it is still safe to clear + * HPageRestoreReserve as no adjustments to reservation counts + * were made during allocation. * * The reservation map for shared mappings indicates which * pages have reservations. When a huge page is allocated * for an address with a reservation, no change is made to - * the reserve map. In this case PagePrivate will be set - * to indicate that the global reservation count should be + * the reserve map. In this case HPageRestoreReserve will be + * set to indicate that the global reservation count should be * incremented when the page is freed. This is the desired * behavior. However, when a huge page is allocated for an * address without a reservation a reservation entry is added - * to the reservation map, and PagePrivate will not be set. - * When the page is freed, the global reserve count will NOT - * be incremented and it will appear as though we have leaked - * reserved page. In this case, set PagePrivate so that the - * global reserve count will be incremented to match the - * reservation map entry which was created. + * to the reservation map, and HPageRestoreReserve will not be + * set. When the page is freed, the global reserve count will + * NOT be incremented and it will appear as though we have + * leaked reserved page. In this case, set HPageRestoreReserve + * so that the global reserve count will be incremented to + * match the reservation map entry which was created. * * Note that vm_alloc_shared is based on the flags of the vma * for which the page was originally allocated. dst_vma could * be different or NULL on error. */ if (vm_alloc_shared) - SetPagePrivate(page); + SetHPageRestoreReserve(page); else - ClearPagePrivate(page); + ClearHPageRestoreReserve(page); put_page(page); } BUG_ON(copied < 0); |