summaryrefslogtreecommitdiff
path: root/mm/memory.c
diff options
context:
space:
mode:
Diffstat (limited to 'mm/memory.c')
-rw-r--r--mm/memory.c129
1 files changed, 92 insertions, 37 deletions
diff --git a/mm/memory.c b/mm/memory.c
index 6666bc20b3c6..be44d0b36b18 100644
--- a/mm/memory.c
+++ b/mm/memory.c
@@ -3287,19 +3287,35 @@ static vm_fault_t do_wp_page(struct vm_fault *vmf)
if (PageAnon(vmf->page)) {
struct page *page = vmf->page;
- /* PageKsm() doesn't necessarily raise the page refcount */
- if (PageKsm(page) || page_count(page) != 1)
+ /*
+ * We have to verify under page lock: these early checks are
+ * just an optimization to avoid locking the page and freeing
+ * the swapcache if there is little hope that we can reuse.
+ *
+ * PageKsm() doesn't necessarily raise the page refcount.
+ */
+ if (PageKsm(page) || page_count(page) > 3)
+ goto copy;
+ if (!PageLRU(page))
+ /*
+ * Note: We cannot easily detect+handle references from
+ * remote LRU pagevecs or references to PageLRU() pages.
+ */
+ lru_add_drain();
+ if (page_count(page) > 1 + PageSwapCache(page))
goto copy;
if (!trylock_page(page))
goto copy;
- if (PageKsm(page) || page_mapcount(page) != 1 || page_count(page) != 1) {
+ if (PageSwapCache(page))
+ try_to_free_swap(page);
+ if (PageKsm(page) || page_count(page) != 1) {
unlock_page(page);
goto copy;
}
/*
- * Ok, we've got the only map reference, and the only
- * page count reference, and the page is locked,
- * it's dark out, and we're wearing sunglasses. Hit it.
+ * Ok, we've got the only page reference from our mapping
+ * and the page is locked, it's dark out, and we're wearing
+ * sunglasses. Hit it.
*/
unlock_page(page);
wp_page_reuse(vmf);
@@ -3372,11 +3388,11 @@ void unmap_mapping_folio(struct folio *folio)
details.even_cows = false;
details.single_folio = folio;
- i_mmap_lock_write(mapping);
+ i_mmap_lock_read(mapping);
if (unlikely(!RB_EMPTY_ROOT(&mapping->i_mmap.rb_root)))
unmap_mapping_range_tree(&mapping->i_mmap, first_index,
last_index, &details);
- i_mmap_unlock_write(mapping);
+ i_mmap_unlock_read(mapping);
}
/**
@@ -3402,11 +3418,11 @@ void unmap_mapping_pages(struct address_space *mapping, pgoff_t start,
if (last_index < first_index)
last_index = ULONG_MAX;
- i_mmap_lock_write(mapping);
+ i_mmap_lock_read(mapping);
if (unlikely(!RB_EMPTY_ROOT(&mapping->i_mmap.rb_root)))
unmap_mapping_range_tree(&mapping->i_mmap, first_index,
last_index, &details);
- i_mmap_unlock_write(mapping);
+ i_mmap_unlock_read(mapping);
}
EXPORT_SYMBOL_GPL(unmap_mapping_pages);
@@ -3473,6 +3489,25 @@ static vm_fault_t remove_device_exclusive_entry(struct vm_fault *vmf)
return 0;
}
+static inline bool should_try_to_free_swap(struct page *page,
+ struct vm_area_struct *vma,
+ unsigned int fault_flags)
+{
+ if (!PageSwapCache(page))
+ return false;
+ if (mem_cgroup_swap_full(page) || (vma->vm_flags & VM_LOCKED) ||
+ PageMlocked(page))
+ return true;
+ /*
+ * If we want to map a page that's in the swapcache writable, we
+ * have to detect via the refcount if we're really the exclusive
+ * user. Try freeing the swapcache to get rid of the swapcache
+ * reference only in case it's likely that we'll be the exlusive user.
+ */
+ return (fault_flags & FAULT_FLAG_WRITE) && !PageKsm(page) &&
+ page_count(page) == 2;
+}
+
/*
* We enter with non-exclusive mmap_lock (to exclude vma changes,
* but allow concurrent faults), and pte mapped but not yet locked.
@@ -3591,21 +3626,39 @@ vm_fault_t do_swap_page(struct vm_fault *vmf)
goto out_release;
}
- /*
- * Make sure try_to_free_swap or reuse_swap_page or swapoff did not
- * release the swapcache from under us. The page pin, and pte_same
- * test below, are not enough to exclude that. Even if it is still
- * swapcache, we need to check that the page's swap has not changed.
- */
- if (unlikely((!PageSwapCache(page) ||
- page_private(page) != entry.val)) && swapcache)
- goto out_page;
-
- page = ksm_might_need_to_copy(page, vma, vmf->address);
- if (unlikely(!page)) {
- ret = VM_FAULT_OOM;
- page = swapcache;
- goto out_page;
+ if (swapcache) {
+ /*
+ * Make sure try_to_free_swap or swapoff did not release the
+ * swapcache from under us. The page pin, and pte_same test
+ * below, are not enough to exclude that. Even if it is still
+ * swapcache, we need to check that the page's swap has not
+ * changed.
+ */
+ if (unlikely(!PageSwapCache(page) ||
+ page_private(page) != entry.val))
+ goto out_page;
+
+ /*
+ * KSM sometimes has to copy on read faults, for example, if
+ * page->index of !PageKSM() pages would be nonlinear inside the
+ * anon VMA -- PageKSM() is lost on actual swapout.
+ */
+ page = ksm_might_need_to_copy(page, vma, vmf->address);
+ if (unlikely(!page)) {
+ ret = VM_FAULT_OOM;
+ page = swapcache;
+ goto out_page;
+ }
+
+ /*
+ * If we want to map a page that's in the swapcache writable, we
+ * have to detect via the refcount if we're really the exclusive
+ * owner. Try removing the extra reference from the local LRU
+ * pagevecs if required.
+ */
+ if ((vmf->flags & FAULT_FLAG_WRITE) && page == swapcache &&
+ !PageKsm(page) && !PageLRU(page))
+ lru_add_drain();
}
cgroup_throttle_swaprate(page, GFP_KERNEL);
@@ -3624,19 +3677,25 @@ vm_fault_t do_swap_page(struct vm_fault *vmf)
}
/*
- * The page isn't present yet, go ahead with the fault.
- *
- * Be careful about the sequence of operations here.
- * To get its accounting right, reuse_swap_page() must be called
- * while the page is counted on swap but not yet in mapcount i.e.
- * before page_add_anon_rmap() and swap_free(); try_to_free_swap()
- * must be called after the swap_free(), or it will never succeed.
+ * Remove the swap entry and conditionally try to free up the swapcache.
+ * We're already holding a reference on the page but haven't mapped it
+ * yet.
*/
+ swap_free(entry);
+ if (should_try_to_free_swap(page, vma, vmf->flags))
+ try_to_free_swap(page);
inc_mm_counter_fast(vma->vm_mm, MM_ANONPAGES);
dec_mm_counter_fast(vma->vm_mm, MM_SWAPENTS);
pte = mk_pte(page, vma->vm_page_prot);
- if ((vmf->flags & FAULT_FLAG_WRITE) && reuse_swap_page(page)) {
+
+ /*
+ * Same logic as in do_wp_page(); however, optimize for fresh pages
+ * that are certainly not shared because we just allocated them without
+ * exposing them to the swapcache.
+ */
+ if ((vmf->flags & FAULT_FLAG_WRITE) && !PageKsm(page) &&
+ (page != swapcache || page_count(page) == 1)) {
pte = maybe_mkwrite(pte_mkdirty(pte), vma);
vmf->flags &= ~FAULT_FLAG_WRITE;
ret |= VM_FAULT_WRITE;
@@ -3662,10 +3721,6 @@ vm_fault_t do_swap_page(struct vm_fault *vmf)
set_pte_at(vma->vm_mm, vmf->address, vmf->pte, pte);
arch_do_swap_page(vma->vm_mm, vma, vmf->address, pte, vmf->orig_pte);
- swap_free(entry);
- if (mem_cgroup_swap_full(page) ||
- (vma->vm_flags & VM_LOCKED) || PageMlocked(page))
- try_to_free_swap(page);
unlock_page(page);
if (page != swapcache && swapcache) {
/*