From 3e4fb13ac34bad94cf390415b1e6bc3ca9520d65 Mon Sep 17 00:00:00 2001 From: Kefeng Wang Date: Thu, 2 Mar 2023 19:58:35 +0800 Subject: mm: swap: remove unneeded cgroup_throttle_swaprate() All the callers of cgroup_throttle_swaprate() are converted to folio_throttle_swaprate(), so make __cgroup_throttle_swaprate() to take a folio, and rename it to __folio_throttle_swaprate(), also rename gfp_mask to gfp and drop redundant extern keyword. finally, drop unused cgroup_throttle_swaprate(). Link: https://lkml.kernel.org/r/20230302115835.105364-8-wangkefeng.wang@huawei.com Signed-off-by: Kefeng Wang Reviewed-by: Matthew Wilcox (Oracle) Signed-off-by: Andrew Morton --- include/linux/swap.h | 12 ++++-------- 1 file changed, 4 insertions(+), 8 deletions(-) (limited to 'include/linux/swap.h') diff --git a/include/linux/swap.h b/include/linux/swap.h index 209a425739a9..d5d0b54e90e8 100644 --- a/include/linux/swap.h +++ b/include/linux/swap.h @@ -641,22 +641,18 @@ extern atomic_t zswap_stored_pages; #endif #if defined(CONFIG_SWAP) && defined(CONFIG_MEMCG) && defined(CONFIG_BLK_CGROUP) -extern void __cgroup_throttle_swaprate(struct page *page, gfp_t gfp_mask); -static inline void cgroup_throttle_swaprate(struct page *page, gfp_t gfp_mask) +void __folio_throttle_swaprate(struct folio *folio, gfp_t gfp); +static inline void folio_throttle_swaprate(struct folio *folio, gfp_t gfp) { if (mem_cgroup_disabled()) return; - __cgroup_throttle_swaprate(page, gfp_mask); + __folio_throttle_swaprate(folio, gfp); } #else -static inline void cgroup_throttle_swaprate(struct page *page, gfp_t gfp_mask) -{ -} -#endif static inline void folio_throttle_swaprate(struct folio *folio, gfp_t gfp) { - cgroup_throttle_swaprate(&folio->page, gfp); } +#endif #if defined(CONFIG_MEMCG) && defined(CONFIG_SWAP) void mem_cgroup_swapout(struct folio *folio, swp_entry_t entry); -- cgit v1.2.3-70-g09d2 From 82b3aa2681ca92421c4b508e7c390000273c53fe Mon Sep 17 00:00:00 2001 From: Yue Zhao Date: Mon, 6 Mar 2023 23:41:36 +0800 Subject: mm, memcg: Prevent memory.swappiness load/store tearing The knob for cgroup v1 memory controller: memory.swappiness is not protected by any locking so it can be modified while it is used. This is not an actual problem because races are unlikely. But it is better to use [READ|WRITE]_ONCE to prevent compiler from doing anything funky. The access of memcg->swappiness and vm_swappiness is lockless, so both of them can be concurrently set at the same time as we are trying to read them. All occurrences of memcg->swappiness and vm_swappiness are updated with [READ|WRITE]_ONCE. [findns94@gmail.com: v3] Link: https://lkml.kernel.org/r/20230308162555.14195-3-findns94@gmail.com Link: https://lkml.kernel.org/r/20230306154138.3775-3-findns94@gmail.com Signed-off-by: Yue Zhao Acked-by: Michal Hocko Acked-by: Shakeel Butt Acked-by: Roman Gushchin Cc: Johannes Weiner Cc: Matthew Wilcox (Oracle) Cc: Muchun Song Cc: Tang Yizhou Signed-off-by: Andrew Morton --- include/linux/swap.h | 8 ++++---- mm/memcontrol.c | 6 +++--- 2 files changed, 7 insertions(+), 7 deletions(-) (limited to 'include/linux/swap.h') diff --git a/include/linux/swap.h b/include/linux/swap.h index d5d0b54e90e8..bfc3b06b5f8f 100644 --- a/include/linux/swap.h +++ b/include/linux/swap.h @@ -620,18 +620,18 @@ static inline int mem_cgroup_swappiness(struct mem_cgroup *memcg) { /* Cgroup2 doesn't have per-cgroup swappiness */ if (cgroup_subsys_on_dfl(memory_cgrp_subsys)) - return vm_swappiness; + return READ_ONCE(vm_swappiness); /* root ? */ if (mem_cgroup_disabled() || mem_cgroup_is_root(memcg)) - return vm_swappiness; + return READ_ONCE(vm_swappiness); - return memcg->swappiness; + return READ_ONCE(memcg->swappiness); } #else static inline int mem_cgroup_swappiness(struct mem_cgroup *mem) { - return vm_swappiness; + return READ_ONCE(vm_swappiness); } #endif diff --git a/mm/memcontrol.c b/mm/memcontrol.c index 06821e5f7604..1b0112afcad3 100644 --- a/mm/memcontrol.c +++ b/mm/memcontrol.c @@ -4179,9 +4179,9 @@ static int mem_cgroup_swappiness_write(struct cgroup_subsys_state *css, return -EINVAL; if (!mem_cgroup_is_root(memcg)) - memcg->swappiness = val; + WRITE_ONCE(memcg->swappiness, val); else - vm_swappiness = val; + WRITE_ONCE(vm_swappiness, val); return 0; } @@ -5353,7 +5353,7 @@ mem_cgroup_css_alloc(struct cgroup_subsys_state *parent_css) #endif page_counter_set_high(&memcg->swap, PAGE_COUNTER_MAX); if (parent) { - memcg->swappiness = mem_cgroup_swappiness(parent); + WRITE_ONCE(memcg->swappiness, mem_cgroup_swappiness(parent)); memcg->oom_kill_disable = parent->oom_kill_disable; page_counter_init(&memcg->memory, &parent->memory); -- cgit v1.2.3-70-g09d2 From c7b23b68e2aa93f86a206222d23ccd9a21f5982a Mon Sep 17 00:00:00 2001 From: Yosry Ahmed Date: Thu, 13 Apr 2023 10:40:34 +0000 Subject: mm: vmscan: refactor updating current->reclaim_state During reclaim, we keep track of pages reclaimed from other means than LRU-based reclaim through scan_control->reclaim_state->reclaimed_slab, which we stash a pointer to in current task_struct. However, we keep track of more than just reclaimed slab pages through this. We also use it for clean file pages dropped through pruned inodes, and xfs buffer pages freed. Rename reclaimed_slab to reclaimed, and add a helper function that wraps updating it through current, so that future changes to this logic are contained within include/linux/swap.h. Link: https://lkml.kernel.org/r/20230413104034.1086717-4-yosryahmed@google.com Signed-off-by: Yosry Ahmed Acked-by: Michal Hocko Cc: Alexander Viro Cc: Christoph Lameter Cc: Darrick J. Wong Cc: Dave Chinner Cc: David Hildenbrand Cc: David Rientjes Cc: Hyeonggon Yoo <42.hyeyoo@gmail.com> Cc: Johannes Weiner Cc: Joonsoo Kim Cc: Matthew Wilcox Cc: Miaohe Lin Cc: NeilBrown Cc: Peter Xu Cc: Roman Gushchin Cc: Shakeel Butt Cc: Tim Chen Cc: Vlastimil Babka Cc: Yu Zhao Signed-off-by: Andrew Morton --- fs/inode.c | 3 +-- fs/xfs/xfs_buf.c | 3 +-- include/linux/swap.h | 17 ++++++++++++++++- mm/slab.c | 3 +-- mm/slob.c | 6 ++---- mm/slub.c | 5 ++--- 6 files changed, 23 insertions(+), 14 deletions(-) (limited to 'include/linux/swap.h') diff --git a/fs/inode.c b/fs/inode.c index 4558dc2f1355..e60fcc41faf1 100644 --- a/fs/inode.c +++ b/fs/inode.c @@ -864,8 +864,7 @@ static enum lru_status inode_lru_isolate(struct list_head *item, __count_vm_events(KSWAPD_INODESTEAL, reap); else __count_vm_events(PGINODESTEAL, reap); - if (current->reclaim_state) - current->reclaim_state->reclaimed_slab += reap; + mm_account_reclaimed_pages(reap); } iput(inode); spin_lock(lru_lock); diff --git a/fs/xfs/xfs_buf.c b/fs/xfs/xfs_buf.c index 54c774af6e1c..15d1e5a7c2d3 100644 --- a/fs/xfs/xfs_buf.c +++ b/fs/xfs/xfs_buf.c @@ -286,8 +286,7 @@ xfs_buf_free_pages( if (bp->b_pages[i]) __free_page(bp->b_pages[i]); } - if (current->reclaim_state) - current->reclaim_state->reclaimed_slab += bp->b_page_count; + mm_account_reclaimed_pages(bp->b_page_count); if (bp->b_pages != bp->b_page_array) kmem_free(bp->b_pages); diff --git a/include/linux/swap.h b/include/linux/swap.h index bfc3b06b5f8f..7f7d5b9ddf7e 100644 --- a/include/linux/swap.h +++ b/include/linux/swap.h @@ -153,13 +153,28 @@ union swap_header { * memory reclaim */ struct reclaim_state { - unsigned long reclaimed_slab; + /* pages reclaimed outside of LRU-based reclaim */ + unsigned long reclaimed; #ifdef CONFIG_LRU_GEN /* per-thread mm walk data */ struct lru_gen_mm_walk *mm_walk; #endif }; +/* + * mm_account_reclaimed_pages(): account reclaimed pages outside of LRU-based + * reclaim + * @pages: number of pages reclaimed + * + * If the current process is undergoing a reclaim operation, increment the + * number of reclaimed pages by @pages. + */ +static inline void mm_account_reclaimed_pages(unsigned long pages) +{ + if (current->reclaim_state) + current->reclaim_state->reclaimed += pages; +} + #ifdef __KERNEL__ struct address_space; diff --git a/mm/slab.c b/mm/slab.c index 6b7c172158e5..bb57f7fdbae1 100644 --- a/mm/slab.c +++ b/mm/slab.c @@ -1392,8 +1392,7 @@ static void kmem_freepages(struct kmem_cache *cachep, struct slab *slab) smp_wmb(); __folio_clear_slab(folio); - if (current->reclaim_state) - current->reclaim_state->reclaimed_slab += 1 << order; + mm_account_reclaimed_pages(1 << order); unaccount_slab(slab, order, cachep); __free_pages(&folio->page, order); } diff --git a/mm/slob.c b/mm/slob.c index fe567fcfa3a3..79cc8680c973 100644 --- a/mm/slob.c +++ b/mm/slob.c @@ -61,7 +61,7 @@ #include #include -#include /* struct reclaim_state */ +#include /* mm_account_reclaimed_pages() */ #include #include #include @@ -211,9 +211,7 @@ static void slob_free_pages(void *b, int order) { struct page *sp = virt_to_page(b); - if (current->reclaim_state) - current->reclaim_state->reclaimed_slab += 1 << order; - + mm_account_reclaimed_pages(1 << order); mod_node_page_state(page_pgdat(sp), NR_SLAB_UNRECLAIMABLE_B, -(PAGE_SIZE << order)); __free_pages(sp, order); diff --git a/mm/slub.c b/mm/slub.c index f49d669ff604..2728d5ae4dc0 100644 --- a/mm/slub.c +++ b/mm/slub.c @@ -11,7 +11,7 @@ */ #include -#include /* struct reclaim_state */ +#include /* mm_account_reclaimed_pages() */ #include #include #include @@ -2063,8 +2063,7 @@ static void __free_slab(struct kmem_cache *s, struct slab *slab) /* Make the mapping reset visible before clearing the flag */ smp_wmb(); __folio_clear_slab(folio); - if (current->reclaim_state) - current->reclaim_state->reclaimed_slab += pages; + mm_account_reclaimed_pages(pages); unaccount_slab(slab, order, s); __free_pages(&folio->page, order); } -- cgit v1.2.3-70-g09d2 From 4bf4f155bfbc77a12ad2c9e12d9f57718adb9a5c Mon Sep 17 00:00:00 2001 From: Kefeng Wang Date: Mon, 17 Apr 2023 19:48:07 +0800 Subject: mm: correct arg in reclaim_pages()/reclaim_clean_pages_from_list() Both of them change the arg from page_list to folio_list when convert them to use a folio, but not the declaration, let's correct it, also move the reclaim_pages() from swap.h to internal.h as it only used in mm. Link: https://lkml.kernel.org/r/20230417114807.186786-1-wangkefeng.wang@huawei.com Signed-off-by: Kefeng Wang Reviwed-by: Matthew Wilcox (Oracle) Reviewed-by: SeongJae Park Reviewed-by: David Hildenbrand Cc: Hugh Dickins Signed-off-by: Andrew Morton --- include/linux/swap.h | 1 - mm/internal.h | 3 ++- 2 files changed, 2 insertions(+), 2 deletions(-) (limited to 'include/linux/swap.h') diff --git a/include/linux/swap.h b/include/linux/swap.h index 7f7d5b9ddf7e..3c69cb653cb9 100644 --- a/include/linux/swap.h +++ b/include/linux/swap.h @@ -442,7 +442,6 @@ extern unsigned long shrink_all_memory(unsigned long nr_pages); extern int vm_swappiness; long remove_mapping(struct address_space *mapping, struct folio *folio); -extern unsigned long reclaim_pages(struct list_head *page_list); #ifdef CONFIG_NUMA extern int node_reclaim_mode; extern int sysctl_min_unmapped_ratio; diff --git a/mm/internal.h b/mm/internal.h index 6483db57a31f..68410c6d97ac 100644 --- a/mm/internal.h +++ b/mm/internal.h @@ -783,8 +783,9 @@ extern unsigned long __must_check vm_mmap_pgoff(struct file *, unsigned long, unsigned long, unsigned long); extern void set_pageblock_order(void); +unsigned long reclaim_pages(struct list_head *folio_list); unsigned int reclaim_clean_pages_from_list(struct zone *zone, - struct list_head *page_list); + struct list_head *folio_list); /* The ALLOC_WMARK bits are used as an index to zone->watermark */ #define ALLOC_WMARK_MIN WMARK_MIN #define ALLOC_WMARK_LOW WMARK_LOW -- cgit v1.2.3-70-g09d2