diff options
Diffstat (limited to 'mm/filemap.c')
-rw-r--r-- | mm/filemap.c | 115 |
1 files changed, 78 insertions, 37 deletions
diff --git a/mm/filemap.c b/mm/filemap.c index 1d6b3a369077..382c3d06bfb1 100644 --- a/mm/filemap.c +++ b/mm/filemap.c @@ -168,7 +168,7 @@ static void filemap_unaccount_folio(struct address_space *mapping, add_taint(TAINT_BAD_PAGE, LOCKDEP_NOW_UNRELIABLE); if (mapping_exiting(mapping) && !folio_test_large(folio)) { - int mapcount = page_mapcount(&folio->page); + int mapcount = folio_mapcount(folio); if (folio_ref_count(folio) >= mapcount + 2) { /* @@ -852,23 +852,18 @@ noinline int __filemap_add_folio(struct address_space *mapping, struct folio *folio, pgoff_t index, gfp_t gfp, void **shadowp) { XA_STATE(xas, &mapping->i_pages, index); - bool huge = folio_test_hugetlb(folio); - bool charged = false; - long nr = 1; + void *alloced_shadow = NULL; + int alloced_order = 0; + bool huge; + long nr; VM_BUG_ON_FOLIO(!folio_test_locked(folio), folio); VM_BUG_ON_FOLIO(folio_test_swapbacked(folio), folio); mapping_set_update(&xas, mapping); - if (!huge) { - int error = mem_cgroup_charge(folio, NULL, gfp); - if (error) - return error; - charged = true; - } - VM_BUG_ON_FOLIO(index & (folio_nr_pages(folio) - 1), folio); xas_set_order(&xas, index, folio_order(folio)); + huge = folio_test_hugetlb(folio); nr = folio_nr_pages(folio); gfp &= GFP_RECLAIM_MASK; @@ -876,13 +871,10 @@ noinline int __filemap_add_folio(struct address_space *mapping, folio->mapping = mapping; folio->index = xas.xa_index; - do { - unsigned int order = xa_get_order(xas.xa, xas.xa_index); + for (;;) { + int order = -1, split_order = 0; void *entry, *old = NULL; - if (order > folio_order(folio)) - xas_split_alloc(&xas, xa_load(xas.xa, xas.xa_index), - order, gfp); xas_lock_irq(&xas); xas_for_each_conflict(&xas, entry) { old = entry; @@ -890,19 +882,33 @@ noinline int __filemap_add_folio(struct address_space *mapping, xas_set_err(&xas, -EEXIST); goto unlock; } + /* + * If a larger entry exists, + * it will be the first and only entry iterated. + */ + if (order == -1) + order = xas_get_order(&xas); + } + + /* entry may have changed before we re-acquire the lock */ + if (alloced_order && (old != alloced_shadow || order != alloced_order)) { + xas_destroy(&xas); + alloced_order = 0; } if (old) { - if (shadowp) - *shadowp = old; - /* entry may have been split before we acquired lock */ - order = xa_get_order(xas.xa, xas.xa_index); - if (order > folio_order(folio)) { + if (order > 0 && order > folio_order(folio)) { /* How to handle large swap entries? */ BUG_ON(shmem_mapping(mapping)); + if (!alloced_order) { + split_order = order; + goto unlock; + } xas_split(&xas, old, order); xas_reset(&xas); } + if (shadowp) + *shadowp = old; } xas_store(&xas, folio); @@ -918,9 +924,24 @@ noinline int __filemap_add_folio(struct address_space *mapping, __lruvec_stat_mod_folio(folio, NR_FILE_THPS, nr); } + unlock: xas_unlock_irq(&xas); - } while (xas_nomem(&xas, gfp)); + + /* split needed, alloc here and retry. */ + if (split_order) { + xas_split_alloc(&xas, old, split_order, gfp); + if (xas_error(&xas)) + goto error; + alloced_shadow = old; + alloced_order = split_order; + xas_reset(&xas); + continue; + } + + if (!xas_nomem(&xas, gfp)) + break; + } if (xas_error(&xas)) goto error; @@ -928,8 +949,6 @@ unlock: trace_mm_filemap_add_to_page_cache(folio); return 0; error: - if (charged) - mem_cgroup_uncharge(folio); folio->mapping = NULL; /* Leave page->index set: truncation relies upon it */ folio_put_refs(folio, nr); @@ -943,11 +962,16 @@ int filemap_add_folio(struct address_space *mapping, struct folio *folio, void *shadow = NULL; int ret; + ret = mem_cgroup_charge(folio, NULL, gfp); + if (ret) + return ret; + __folio_set_locked(folio); ret = __filemap_add_folio(mapping, folio, index, gfp, &shadow); - if (unlikely(ret)) + if (unlikely(ret)) { + mem_cgroup_uncharge(folio); __folio_clear_locked(folio); - else { + } else { /* * The folio might have been evicted from cache only * recently, in which case it should be activated like @@ -966,7 +990,7 @@ int filemap_add_folio(struct address_space *mapping, struct folio *folio, EXPORT_SYMBOL_GPL(filemap_add_folio); #ifdef CONFIG_NUMA -struct folio *filemap_alloc_folio(gfp_t gfp, unsigned int order) +struct folio *filemap_alloc_folio_noprof(gfp_t gfp, unsigned int order) { int n; struct folio *folio; @@ -981,9 +1005,9 @@ struct folio *filemap_alloc_folio(gfp_t gfp, unsigned int order) return folio; } - return folio_alloc(gfp, order); + return folio_alloc_noprof(gfp, order); } -EXPORT_SYMBOL(filemap_alloc_folio); +EXPORT_SYMBOL(filemap_alloc_folio_noprof); #endif /* @@ -1786,7 +1810,7 @@ EXPORT_SYMBOL(page_cache_prev_miss); * C. Return the page to the page allocator * * This means that any page may have its reference count temporarily - * increased by a speculative page cache (or fast GUP) lookup as it can + * increased by a speculative page cache (or GUP-fast) lookup as it can * be allocated by another user before the RCU grace period expires. * Because the refcount temporarily acquired here may end up being the * last refcount on the page, any page allocation must be freeable by @@ -3481,7 +3505,7 @@ skip: static vm_fault_t filemap_map_folio_range(struct vm_fault *vmf, struct folio *folio, unsigned long start, unsigned long addr, unsigned int nr_pages, - unsigned int *mmap_miss) + unsigned long *rss, unsigned int *mmap_miss) { vm_fault_t ret = 0; struct page *page = folio_page(folio, start); @@ -3492,7 +3516,15 @@ static vm_fault_t filemap_map_folio_range(struct vm_fault *vmf, if (PageHWPoison(page + count)) goto skip; - (*mmap_miss)++; + /* + * If there are too many folios that are recently evicted + * in a file, they will probably continue to be evicted. + * In such situation, read-ahead is only a waste of IO. + * Don't decrease mmap_miss in this scenario to make sure + * we can stop read-ahead. + */ + if (!folio_test_workingset(folio)) + (*mmap_miss)++; /* * NOTE: If there're PTE markers, we'll leave them to be @@ -3507,6 +3539,7 @@ static vm_fault_t filemap_map_folio_range(struct vm_fault *vmf, skip: if (count) { set_pte_range(vmf, folio, page, count, addr); + *rss += count; folio_ref_add(folio, count); if (in_range(vmf->address, addr, count * PAGE_SIZE)) ret = VM_FAULT_NOPAGE; @@ -3521,6 +3554,7 @@ skip: if (count) { set_pte_range(vmf, folio, page, count, addr); + *rss += count; folio_ref_add(folio, count); if (in_range(vmf->address, addr, count * PAGE_SIZE)) ret = VM_FAULT_NOPAGE; @@ -3533,7 +3567,7 @@ skip: static vm_fault_t filemap_map_order0_folio(struct vm_fault *vmf, struct folio *folio, unsigned long addr, - unsigned int *mmap_miss) + unsigned long *rss, unsigned int *mmap_miss) { vm_fault_t ret = 0; struct page *page = &folio->page; @@ -3541,7 +3575,9 @@ static vm_fault_t filemap_map_order0_folio(struct vm_fault *vmf, if (PageHWPoison(page)) return ret; - (*mmap_miss)++; + /* See comment of filemap_map_folio_range() */ + if (!folio_test_workingset(folio)) + (*mmap_miss)++; /* * NOTE: If there're PTE markers, we'll leave them to be @@ -3555,6 +3591,7 @@ static vm_fault_t filemap_map_order0_folio(struct vm_fault *vmf, ret = VM_FAULT_NOPAGE; set_pte_range(vmf, folio, page, 1, addr); + (*rss)++; folio_ref_inc(folio); return ret; @@ -3571,7 +3608,8 @@ vm_fault_t filemap_map_pages(struct vm_fault *vmf, XA_STATE(xas, &mapping->i_pages, start_pgoff); struct folio *folio; vm_fault_t ret = 0; - unsigned int nr_pages = 0, mmap_miss = 0, mmap_miss_saved; + unsigned long rss = 0; + unsigned int nr_pages = 0, mmap_miss = 0, mmap_miss_saved, folio_type; rcu_read_lock(); folio = next_uptodate_folio(&xas, mapping, end_pgoff); @@ -3590,6 +3628,8 @@ vm_fault_t filemap_map_pages(struct vm_fault *vmf, folio_put(folio); goto out; } + + folio_type = mm_counter_file(folio); do { unsigned long end; @@ -3601,15 +3641,16 @@ vm_fault_t filemap_map_pages(struct vm_fault *vmf, if (!folio_test_large(folio)) ret |= filemap_map_order0_folio(vmf, - folio, addr, &mmap_miss); + folio, addr, &rss, &mmap_miss); else ret |= filemap_map_folio_range(vmf, folio, xas.xa_index - folio->index, addr, - nr_pages, &mmap_miss); + nr_pages, &rss, &mmap_miss); folio_unlock(folio); folio_put(folio); } while ((folio = next_uptodate_folio(&xas, mapping, end_pgoff)) != NULL); + add_mm_counter(vma->vm_mm, folio_type, rss); pte_unmap_unlock(vmf->pte, vmf->ptl); out: rcu_read_unlock(); |