diff options
Diffstat (limited to 'mm/memory.c')
-rw-r--r-- | mm/memory.c | 129 |
1 files changed, 92 insertions, 37 deletions
diff --git a/mm/memory.c b/mm/memory.c index 6666bc20b3c6..be44d0b36b18 100644 --- a/mm/memory.c +++ b/mm/memory.c @@ -3287,19 +3287,35 @@ static vm_fault_t do_wp_page(struct vm_fault *vmf) if (PageAnon(vmf->page)) { struct page *page = vmf->page; - /* PageKsm() doesn't necessarily raise the page refcount */ - if (PageKsm(page) || page_count(page) != 1) + /* + * We have to verify under page lock: these early checks are + * just an optimization to avoid locking the page and freeing + * the swapcache if there is little hope that we can reuse. + * + * PageKsm() doesn't necessarily raise the page refcount. + */ + if (PageKsm(page) || page_count(page) > 3) + goto copy; + if (!PageLRU(page)) + /* + * Note: We cannot easily detect+handle references from + * remote LRU pagevecs or references to PageLRU() pages. + */ + lru_add_drain(); + if (page_count(page) > 1 + PageSwapCache(page)) goto copy; if (!trylock_page(page)) goto copy; - if (PageKsm(page) || page_mapcount(page) != 1 || page_count(page) != 1) { + if (PageSwapCache(page)) + try_to_free_swap(page); + if (PageKsm(page) || page_count(page) != 1) { unlock_page(page); goto copy; } /* - * Ok, we've got the only map reference, and the only - * page count reference, and the page is locked, - * it's dark out, and we're wearing sunglasses. Hit it. + * Ok, we've got the only page reference from our mapping + * and the page is locked, it's dark out, and we're wearing + * sunglasses. Hit it. */ unlock_page(page); wp_page_reuse(vmf); @@ -3372,11 +3388,11 @@ void unmap_mapping_folio(struct folio *folio) details.even_cows = false; details.single_folio = folio; - i_mmap_lock_write(mapping); + i_mmap_lock_read(mapping); if (unlikely(!RB_EMPTY_ROOT(&mapping->i_mmap.rb_root))) unmap_mapping_range_tree(&mapping->i_mmap, first_index, last_index, &details); - i_mmap_unlock_write(mapping); + i_mmap_unlock_read(mapping); } /** @@ -3402,11 +3418,11 @@ void unmap_mapping_pages(struct address_space *mapping, pgoff_t start, if (last_index < first_index) last_index = ULONG_MAX; - i_mmap_lock_write(mapping); + i_mmap_lock_read(mapping); if (unlikely(!RB_EMPTY_ROOT(&mapping->i_mmap.rb_root))) unmap_mapping_range_tree(&mapping->i_mmap, first_index, last_index, &details); - i_mmap_unlock_write(mapping); + i_mmap_unlock_read(mapping); } EXPORT_SYMBOL_GPL(unmap_mapping_pages); @@ -3473,6 +3489,25 @@ static vm_fault_t remove_device_exclusive_entry(struct vm_fault *vmf) return 0; } +static inline bool should_try_to_free_swap(struct page *page, + struct vm_area_struct *vma, + unsigned int fault_flags) +{ + if (!PageSwapCache(page)) + return false; + if (mem_cgroup_swap_full(page) || (vma->vm_flags & VM_LOCKED) || + PageMlocked(page)) + return true; + /* + * If we want to map a page that's in the swapcache writable, we + * have to detect via the refcount if we're really the exclusive + * user. Try freeing the swapcache to get rid of the swapcache + * reference only in case it's likely that we'll be the exlusive user. + */ + return (fault_flags & FAULT_FLAG_WRITE) && !PageKsm(page) && + page_count(page) == 2; +} + /* * We enter with non-exclusive mmap_lock (to exclude vma changes, * but allow concurrent faults), and pte mapped but not yet locked. @@ -3591,21 +3626,39 @@ vm_fault_t do_swap_page(struct vm_fault *vmf) goto out_release; } - /* - * Make sure try_to_free_swap or reuse_swap_page or swapoff did not - * release the swapcache from under us. The page pin, and pte_same - * test below, are not enough to exclude that. Even if it is still - * swapcache, we need to check that the page's swap has not changed. - */ - if (unlikely((!PageSwapCache(page) || - page_private(page) != entry.val)) && swapcache) - goto out_page; - - page = ksm_might_need_to_copy(page, vma, vmf->address); - if (unlikely(!page)) { - ret = VM_FAULT_OOM; - page = swapcache; - goto out_page; + if (swapcache) { + /* + * Make sure try_to_free_swap or swapoff did not release the + * swapcache from under us. The page pin, and pte_same test + * below, are not enough to exclude that. Even if it is still + * swapcache, we need to check that the page's swap has not + * changed. + */ + if (unlikely(!PageSwapCache(page) || + page_private(page) != entry.val)) + goto out_page; + + /* + * KSM sometimes has to copy on read faults, for example, if + * page->index of !PageKSM() pages would be nonlinear inside the + * anon VMA -- PageKSM() is lost on actual swapout. + */ + page = ksm_might_need_to_copy(page, vma, vmf->address); + if (unlikely(!page)) { + ret = VM_FAULT_OOM; + page = swapcache; + goto out_page; + } + + /* + * If we want to map a page that's in the swapcache writable, we + * have to detect via the refcount if we're really the exclusive + * owner. Try removing the extra reference from the local LRU + * pagevecs if required. + */ + if ((vmf->flags & FAULT_FLAG_WRITE) && page == swapcache && + !PageKsm(page) && !PageLRU(page)) + lru_add_drain(); } cgroup_throttle_swaprate(page, GFP_KERNEL); @@ -3624,19 +3677,25 @@ vm_fault_t do_swap_page(struct vm_fault *vmf) } /* - * The page isn't present yet, go ahead with the fault. - * - * Be careful about the sequence of operations here. - * To get its accounting right, reuse_swap_page() must be called - * while the page is counted on swap but not yet in mapcount i.e. - * before page_add_anon_rmap() and swap_free(); try_to_free_swap() - * must be called after the swap_free(), or it will never succeed. + * Remove the swap entry and conditionally try to free up the swapcache. + * We're already holding a reference on the page but haven't mapped it + * yet. */ + swap_free(entry); + if (should_try_to_free_swap(page, vma, vmf->flags)) + try_to_free_swap(page); inc_mm_counter_fast(vma->vm_mm, MM_ANONPAGES); dec_mm_counter_fast(vma->vm_mm, MM_SWAPENTS); pte = mk_pte(page, vma->vm_page_prot); - if ((vmf->flags & FAULT_FLAG_WRITE) && reuse_swap_page(page)) { + + /* + * Same logic as in do_wp_page(); however, optimize for fresh pages + * that are certainly not shared because we just allocated them without + * exposing them to the swapcache. + */ + if ((vmf->flags & FAULT_FLAG_WRITE) && !PageKsm(page) && + (page != swapcache || page_count(page) == 1)) { pte = maybe_mkwrite(pte_mkdirty(pte), vma); vmf->flags &= ~FAULT_FLAG_WRITE; ret |= VM_FAULT_WRITE; @@ -3662,10 +3721,6 @@ vm_fault_t do_swap_page(struct vm_fault *vmf) set_pte_at(vma->vm_mm, vmf->address, vmf->pte, pte); arch_do_swap_page(vma->vm_mm, vma, vmf->address, pte, vmf->orig_pte); - swap_free(entry); - if (mem_cgroup_swap_full(page) || - (vma->vm_flags & VM_LOCKED) || PageMlocked(page)) - try_to_free_swap(page); unlock_page(page); if (page != swapcache && swapcache) { /* |