diff options
Diffstat (limited to 'mm/memcontrol.c')
-rw-r--r-- | mm/memcontrol.c | 98 |
1 files changed, 68 insertions, 30 deletions
diff --git a/mm/memcontrol.c b/mm/memcontrol.c index 49f40730e711..5abffe6f8389 100644 --- a/mm/memcontrol.c +++ b/mm/memcontrol.c @@ -348,29 +348,27 @@ static void memcg_reparent_objcgs(struct mem_cgroup *memcg, * conditional to this static branch, we'll have to allow modules that does * kmem_cache_alloc and the such to see this symbol as well */ -DEFINE_STATIC_KEY_FALSE(memcg_kmem_enabled_key); -EXPORT_SYMBOL(memcg_kmem_enabled_key); +DEFINE_STATIC_KEY_FALSE(memcg_kmem_online_key); +EXPORT_SYMBOL(memcg_kmem_online_key); DEFINE_STATIC_KEY_FALSE(memcg_bpf_enabled_key); EXPORT_SYMBOL(memcg_bpf_enabled_key); #endif /** - * mem_cgroup_css_from_page - css of the memcg associated with a page - * @page: page of interest + * mem_cgroup_css_from_folio - css of the memcg associated with a folio + * @folio: folio of interest * * If memcg is bound to the default hierarchy, css of the memcg associated - * with @page is returned. The returned css remains associated with @page + * with @folio is returned. The returned css remains associated with @folio * until it is released. * * If memcg is bound to a traditional hierarchy, the css of root_mem_cgroup * is returned. */ -struct cgroup_subsys_state *mem_cgroup_css_from_page(struct page *page) +struct cgroup_subsys_state *mem_cgroup_css_from_folio(struct folio *folio) { - struct mem_cgroup *memcg; - - memcg = page_memcg(page); + struct mem_cgroup *memcg = folio_memcg(folio); if (!memcg || !cgroup_subsys_on_dfl(memory_cgrp_subsys)) memcg = root_mem_cgroup; @@ -483,6 +481,12 @@ static void mem_cgroup_update_tree(struct mem_cgroup *memcg, int nid) struct mem_cgroup_per_node *mz; struct mem_cgroup_tree_per_node *mctz; + if (lru_gen_enabled()) { + if (soft_limit_excess(memcg)) + lru_gen_soft_reclaim(&memcg->nodeinfo[nid]->lruvec); + return; + } + mctz = soft_limit_tree.rb_tree_per_node[nid]; if (!mctz) return; @@ -2945,13 +2949,13 @@ struct mem_cgroup *mem_cgroup_from_obj_folio(struct folio *folio, void *p) } /* - * page_memcg_check() is used here, because in theory we can encounter + * folio_memcg_check() is used here, because in theory we can encounter * a folio where the slab flag has been cleared already, but * slab->memcg_data has not been freed yet - * page_memcg_check(page) will guarantee that a proper memory + * folio_memcg_check() will guarantee that a proper memory * cgroup pointer or NULL will be returned. */ - return page_memcg_check(folio_page(folio, 0)); + return folio_memcg_check(folio); } /* @@ -3036,7 +3040,7 @@ struct obj_cgroup *get_obj_cgroup_from_page(struct page *page) { struct obj_cgroup *objcg; - if (!memcg_kmem_enabled()) + if (!memcg_kmem_online()) return NULL; if (PageMemcgKmem(page)) { @@ -3532,6 +3536,9 @@ unsigned long mem_cgroup_soft_limit_reclaim(pg_data_t *pgdat, int order, struct mem_cgroup_tree_per_node *mctz; unsigned long excess; + if (lru_gen_enabled()) + return 0; + if (order > 0) return 0; @@ -3745,7 +3752,7 @@ static int memcg_online_kmem(struct mem_cgroup *memcg) objcg->memcg = memcg; rcu_assign_pointer(memcg->objcg, objcg); - static_branch_enable(&memcg_kmem_enabled_key); + static_branch_enable(&memcg_kmem_online_key); memcg->kmemcg_id = memcg->id.id; @@ -3920,6 +3927,10 @@ static int mem_cgroup_move_charge_write(struct cgroup_subsys_state *css, { struct mem_cgroup *memcg = mem_cgroup_from_css(css); + pr_warn_once("Cgroup memory moving (move_charge_at_immigrate) is deprecated. " + "Please report your usecase to linux-mm@kvack.org if you " + "depend on this functionality.\n"); + if (val & ~MOVE_MASK) return -EINVAL; @@ -5393,6 +5404,7 @@ static int mem_cgroup_css_online(struct cgroup_subsys_state *css) if (unlikely(mem_cgroup_is_root(memcg))) queue_delayed_work(system_unbound_wq, &stats_flush_dwork, 2UL*HZ); + lru_gen_online_memcg(memcg); return 0; offline_kmem: memcg_offline_kmem(memcg); @@ -5424,6 +5436,7 @@ static void mem_cgroup_css_offline(struct cgroup_subsys_state *css) memcg_offline_kmem(memcg); reparent_shrinker_deferred(memcg); wb_memcg_offline(memcg); + lru_gen_offline_memcg(memcg); drain_all_stock(memcg); @@ -5435,6 +5448,7 @@ static void mem_cgroup_css_released(struct cgroup_subsys_state *css) struct mem_cgroup *memcg = mem_cgroup_from_css(css); invalidate_reclaim_iterators(memcg); + lru_gen_release_memcg(memcg); } static void mem_cgroup_css_free(struct cgroup_subsys_state *css) @@ -5703,7 +5717,7 @@ static struct page *mc_handle_file_pte(struct vm_area_struct *vma, * @from: mem_cgroup which the page is moved from. * @to: mem_cgroup which the page is moved to. @from != @to. * - * The caller must make sure the page is not on LRU (isolate_page() is useful.) + * The page must be locked and not on the LRU. * * This function doesn't do "charge" to new cgroup and doesn't do "uncharge" * from old cgroup. @@ -5720,20 +5734,13 @@ static int mem_cgroup_move_account(struct page *page, int nid, ret; VM_BUG_ON(from == to); + VM_BUG_ON_FOLIO(!folio_test_locked(folio), folio); VM_BUG_ON_FOLIO(folio_test_lru(folio), folio); VM_BUG_ON(compound && !folio_test_large(folio)); - /* - * Prevent mem_cgroup_migrate() from looking at - * page's memory cgroup of its source page while we change it. - */ - ret = -EBUSY; - if (!folio_trylock(folio)) - goto out; - ret = -EINVAL; if (folio_memcg(folio) != from) - goto out_unlock; + goto out; pgdat = folio_pgdat(folio); from_vec = mem_cgroup_lruvec(from, pgdat); @@ -5820,8 +5827,6 @@ static int mem_cgroup_move_account(struct page *page, mem_cgroup_charge_statistics(from, -nr_pages); memcg_check_events(from, nid); local_irq_enable(); -out_unlock: - folio_unlock(folio); out: return ret; } @@ -5870,6 +5875,29 @@ static enum mc_target_type get_mctgt_type(struct vm_area_struct *vma, else if (is_swap_pte(ptent)) page = mc_handle_swap_pte(vma, ptent, &ent); + if (target && page) { + if (!trylock_page(page)) { + put_page(page); + return ret; + } + /* + * page_mapped() must be stable during the move. This + * pte is locked, so if it's present, the page cannot + * become unmapped. If it isn't, we have only partial + * control over the mapped state: the page lock will + * prevent new faults against pagecache and swapcache, + * so an unmapped page cannot become mapped. However, + * if the page is already mapped elsewhere, it can + * unmap, and there is nothing we can do about it. + * Alas, skip moving the page in this case. + */ + if (!pte_present(ptent) && page_mapped(page)) { + unlock_page(page); + put_page(page); + return ret; + } + } + if (!page && !ent.val) return ret; if (page) { @@ -5886,8 +5914,11 @@ static enum mc_target_type get_mctgt_type(struct vm_area_struct *vma, if (target) target->page = page; } - if (!ret || !target) + if (!ret || !target) { + if (target) + unlock_page(page); put_page(page); + } } /* * There is a swap entry and a page doesn't exist or isn't charged. @@ -5927,6 +5958,10 @@ static enum mc_target_type get_mctgt_type_thp(struct vm_area_struct *vma, ret = MC_TARGET_PAGE; if (target) { get_page(page); + if (!trylock_page(page)) { + put_page(page); + return MC_TARGET_NONE; + } target->page = page; } } @@ -6157,7 +6192,7 @@ static int mem_cgroup_move_charge_pte_range(pmd_t *pmd, target_type = get_mctgt_type_thp(vma, addr, *pmd, &target); if (target_type == MC_TARGET_PAGE) { page = target.page; - if (!isolate_lru_page(page)) { + if (isolate_lru_page(page)) { if (!mem_cgroup_move_account(page, true, mc.from, mc.to)) { mc.precharge -= HPAGE_PMD_NR; @@ -6165,6 +6200,7 @@ static int mem_cgroup_move_charge_pte_range(pmd_t *pmd, } putback_lru_page(page); } + unlock_page(page); put_page(page); } else if (target_type == MC_TARGET_DEVICE) { page = target.page; @@ -6173,6 +6209,7 @@ static int mem_cgroup_move_charge_pte_range(pmd_t *pmd, mc.precharge -= HPAGE_PMD_NR; mc.moved_charge += HPAGE_PMD_NR; } + unlock_page(page); put_page(page); } spin_unlock(ptl); @@ -6205,7 +6242,7 @@ retry: */ if (PageTransCompound(page)) goto put; - if (!device && isolate_lru_page(page)) + if (!device && !isolate_lru_page(page)) goto put; if (!mem_cgroup_move_account(page, false, mc.from, mc.to)) { @@ -6215,7 +6252,8 @@ retry: } if (!device) putback_lru_page(page); -put: /* get_mctgt_type() gets the page */ +put: /* get_mctgt_type() gets & locks the page */ + unlock_page(page); put_page(page); break; case MC_TARGET_SWAP: |