diff options
author | Shakeel Butt <shakeelb@google.com> | 2020-08-06 23:21:37 -0700 |
---|---|---|
committer | Linus Torvalds <torvalds@linux-foundation.org> | 2020-08-07 11:33:25 -0700 |
commit | 991e7673859ed41e7ba83c8c4e57afe8cfebe314 (patch) | |
tree | aa52b23335d5c32f8ece153f5e9b164729a6fbd4 | |
parent | fbc1ac9d09d70859eee24131d667e01e3986e368 (diff) |
mm: memcontrol: account kernel stack per node
Currently the kernel stack is being accounted per-zone. There is no need
to do that. In addition due to being per-zone, memcg has to keep a
separate MEMCG_KERNEL_STACK_KB. Make the stat per-node and deprecate
MEMCG_KERNEL_STACK_KB as memcg_stat_item is an extension of
node_stat_item. In addition localize the kernel stack stats updates to
account_kernel_stack().
Signed-off-by: Shakeel Butt <shakeelb@google.com>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
Reviewed-by: Roman Gushchin <guro@fb.com>
Cc: Johannes Weiner <hannes@cmpxchg.org>
Cc: Michal Hocko <mhocko@kernel.org>
Link: http://lkml.kernel.org/r/20200630161539.1759185-1-shakeelb@google.com
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
-rw-r--r-- | drivers/base/node.c | 4 | ||||
-rw-r--r-- | fs/proc/meminfo.c | 4 | ||||
-rw-r--r-- | include/linux/memcontrol.h | 21 | ||||
-rw-r--r-- | include/linux/mmzone.h | 8 | ||||
-rw-r--r-- | kernel/fork.c | 51 | ||||
-rw-r--r-- | kernel/scs.c | 2 | ||||
-rw-r--r-- | mm/memcontrol.c | 2 | ||||
-rw-r--r-- | mm/page_alloc.c | 16 | ||||
-rw-r--r-- | mm/vmstat.c | 8 |
9 files changed, 55 insertions, 61 deletions
diff --git a/drivers/base/node.c b/drivers/base/node.c index 0cf13e31603c..508b80f6329b 100644 --- a/drivers/base/node.c +++ b/drivers/base/node.c @@ -440,9 +440,9 @@ static ssize_t node_read_meminfo(struct device *dev, nid, K(node_page_state(pgdat, NR_FILE_MAPPED)), nid, K(node_page_state(pgdat, NR_ANON_MAPPED)), nid, K(i.sharedram), - nid, sum_zone_node_page_state(nid, NR_KERNEL_STACK_KB), + nid, node_page_state(pgdat, NR_KERNEL_STACK_KB), #ifdef CONFIG_SHADOW_CALL_STACK - nid, sum_zone_node_page_state(nid, NR_KERNEL_SCS_KB), + nid, node_page_state(pgdat, NR_KERNEL_SCS_KB), #endif nid, K(sum_zone_node_page_state(nid, NR_PAGETABLE)), nid, 0UL, diff --git a/fs/proc/meminfo.c b/fs/proc/meminfo.c index 38ea95fd919a..2a4c58f70fb9 100644 --- a/fs/proc/meminfo.c +++ b/fs/proc/meminfo.c @@ -101,10 +101,10 @@ static int meminfo_proc_show(struct seq_file *m, void *v) show_val_kb(m, "SReclaimable: ", sreclaimable); show_val_kb(m, "SUnreclaim: ", sunreclaim); seq_printf(m, "KernelStack: %8lu kB\n", - global_zone_page_state(NR_KERNEL_STACK_KB)); + global_node_page_state(NR_KERNEL_STACK_KB)); #ifdef CONFIG_SHADOW_CALL_STACK seq_printf(m, "ShadowCallStack:%8lu kB\n", - global_zone_page_state(NR_KERNEL_SCS_KB)); + global_node_page_state(NR_KERNEL_SCS_KB)); #endif show_val_kb(m, "PageTables: ", global_zone_page_state(NR_PAGETABLE)); diff --git a/include/linux/memcontrol.h b/include/linux/memcontrol.h index 5a8b62d075e6..624400c27eba 100644 --- a/include/linux/memcontrol.h +++ b/include/linux/memcontrol.h @@ -32,8 +32,6 @@ struct kmem_cache; enum memcg_stat_item { MEMCG_SWAP = NR_VM_NODE_STAT_ITEMS, MEMCG_SOCK, - /* XXX: why are these zone and not node counters? */ - MEMCG_KERNEL_STACK_KB, MEMCG_NR_STAT, }; @@ -729,8 +727,19 @@ void __mod_memcg_lruvec_state(struct lruvec *lruvec, enum node_stat_item idx, void __mod_lruvec_state(struct lruvec *lruvec, enum node_stat_item idx, int val); void __mod_lruvec_slab_state(void *p, enum node_stat_item idx, int val); + void mod_memcg_obj_state(void *p, int idx, int val); +static inline void mod_lruvec_slab_state(void *p, enum node_stat_item idx, + int val) +{ + unsigned long flags; + + local_irq_save(flags); + __mod_lruvec_slab_state(p, idx, val); + local_irq_restore(flags); +} + static inline void mod_memcg_lruvec_state(struct lruvec *lruvec, enum node_stat_item idx, int val) { @@ -1151,6 +1160,14 @@ static inline void __mod_lruvec_slab_state(void *p, enum node_stat_item idx, __mod_node_page_state(page_pgdat(page), idx, val); } +static inline void mod_lruvec_slab_state(void *p, enum node_stat_item idx, + int val) +{ + struct page *page = virt_to_head_page(p); + + mod_node_page_state(page_pgdat(page), idx, val); +} + static inline void mod_memcg_obj_state(void *p, int idx, int val) { } diff --git a/include/linux/mmzone.h b/include/linux/mmzone.h index b79100edd228..a3bd54139a30 100644 --- a/include/linux/mmzone.h +++ b/include/linux/mmzone.h @@ -155,10 +155,6 @@ enum zone_stat_item { NR_ZONE_WRITE_PENDING, /* Count of dirty, writeback and unstable pages */ NR_MLOCK, /* mlock()ed pages found and moved off LRU */ NR_PAGETABLE, /* used for pagetables */ - NR_KERNEL_STACK_KB, /* measured in KiB */ -#if IS_ENABLED(CONFIG_SHADOW_CALL_STACK) - NR_KERNEL_SCS_KB, /* measured in KiB */ -#endif /* Second 128 byte cacheline */ NR_BOUNCE, #if IS_ENABLED(CONFIG_ZSMALLOC) @@ -203,6 +199,10 @@ enum node_stat_item { NR_KERNEL_MISC_RECLAIMABLE, /* reclaimable non-slab kernel pages */ NR_FOLL_PIN_ACQUIRED, /* via: pin_user_page(), gup flag: FOLL_PIN */ NR_FOLL_PIN_RELEASED, /* pages returned via unpin_user_page() */ + NR_KERNEL_STACK_KB, /* measured in KiB */ +#if IS_ENABLED(CONFIG_SHADOW_CALL_STACK) + NR_KERNEL_SCS_KB, /* measured in KiB */ +#endif NR_VM_NODE_STAT_ITEMS }; diff --git a/kernel/fork.c b/kernel/fork.c index 76d3f3387554..c7b4ce9d2647 100644 --- a/kernel/fork.c +++ b/kernel/fork.c @@ -276,13 +276,8 @@ static inline void free_thread_stack(struct task_struct *tsk) if (vm) { int i; - for (i = 0; i < THREAD_SIZE / PAGE_SIZE; i++) { - mod_memcg_page_state(vm->pages[i], - MEMCG_KERNEL_STACK_KB, - -(int)(PAGE_SIZE / 1024)); - + for (i = 0; i < THREAD_SIZE / PAGE_SIZE; i++) memcg_kmem_uncharge_page(vm->pages[i], 0); - } for (i = 0; i < NR_CACHED_STACKS; i++) { if (this_cpu_cmpxchg(cached_stacks[i], @@ -382,31 +377,14 @@ static void account_kernel_stack(struct task_struct *tsk, int account) void *stack = task_stack_page(tsk); struct vm_struct *vm = task_stack_vm_area(tsk); - BUILD_BUG_ON(IS_ENABLED(CONFIG_VMAP_STACK) && PAGE_SIZE % 1024 != 0); - - if (vm) { - int i; - - BUG_ON(vm->nr_pages != THREAD_SIZE / PAGE_SIZE); - for (i = 0; i < THREAD_SIZE / PAGE_SIZE; i++) { - mod_zone_page_state(page_zone(vm->pages[i]), - NR_KERNEL_STACK_KB, - PAGE_SIZE / 1024 * account); - } - } else { - /* - * All stack pages are in the same zone and belong to the - * same memcg. - */ - struct page *first_page = virt_to_page(stack); - - mod_zone_page_state(page_zone(first_page), NR_KERNEL_STACK_KB, - THREAD_SIZE / 1024 * account); - - mod_memcg_obj_state(stack, MEMCG_KERNEL_STACK_KB, - account * (THREAD_SIZE / 1024)); - } + /* All stack pages are in the same node. */ + if (vm) + mod_lruvec_page_state(vm->pages[0], NR_KERNEL_STACK_KB, + account * (THREAD_SIZE / 1024)); + else + mod_lruvec_slab_state(stack, NR_KERNEL_STACK_KB, + account * (THREAD_SIZE / 1024)); } static int memcg_charge_kernel_stack(struct task_struct *tsk) @@ -415,24 +393,23 @@ static int memcg_charge_kernel_stack(struct task_struct *tsk) struct vm_struct *vm = task_stack_vm_area(tsk); int ret; + BUILD_BUG_ON(IS_ENABLED(CONFIG_VMAP_STACK) && PAGE_SIZE % 1024 != 0); + if (vm) { int i; + BUG_ON(vm->nr_pages != THREAD_SIZE / PAGE_SIZE); + for (i = 0; i < THREAD_SIZE / PAGE_SIZE; i++) { /* * If memcg_kmem_charge_page() fails, page->mem_cgroup - * pointer is NULL, and both memcg_kmem_uncharge_page() - * and mod_memcg_page_state() in free_thread_stack() - * will ignore this page. So it's safe. + * pointer is NULL, and memcg_kmem_uncharge_page() in + * free_thread_stack() will ignore this page. */ ret = memcg_kmem_charge_page(vm->pages[i], GFP_KERNEL, 0); if (ret) return ret; - - mod_memcg_page_state(vm->pages[i], - MEMCG_KERNEL_STACK_KB, - PAGE_SIZE / 1024); } } #endif diff --git a/kernel/scs.c b/kernel/scs.c index 5d4d9bbdec36..4ff4a7ba0094 100644 --- a/kernel/scs.c +++ b/kernel/scs.c @@ -17,7 +17,7 @@ static void __scs_account(void *s, int account) { struct page *scs_page = virt_to_page(s); - mod_zone_page_state(page_zone(scs_page), NR_KERNEL_SCS_KB, + mod_node_page_state(page_pgdat(scs_page), NR_KERNEL_SCS_KB, account * (SCS_SIZE / SZ_1K)); } diff --git a/mm/memcontrol.c b/mm/memcontrol.c index 473f9b91d51f..a3e963366769 100644 --- a/mm/memcontrol.c +++ b/mm/memcontrol.c @@ -1485,7 +1485,7 @@ static char *memory_stat_format(struct mem_cgroup *memcg) (u64)memcg_page_state(memcg, NR_FILE_PAGES) * PAGE_SIZE); seq_buf_printf(&s, "kernel_stack %llu\n", - (u64)memcg_page_state(memcg, MEMCG_KERNEL_STACK_KB) * + (u64)memcg_page_state(memcg, NR_KERNEL_STACK_KB) * 1024); seq_buf_printf(&s, "slab %llu\n", (u64)(memcg_page_state(memcg, NR_SLAB_RECLAIMABLE_B) + diff --git a/mm/page_alloc.c b/mm/page_alloc.c index f9ad093814d2..8d5d8526c2f3 100644 --- a/mm/page_alloc.c +++ b/mm/page_alloc.c @@ -5396,6 +5396,10 @@ void show_free_areas(unsigned int filter, nodemask_t *nodemask) " anon_thp: %lukB" #endif " writeback_tmp:%lukB" + " kernel_stack:%lukB" +#ifdef CONFIG_SHADOW_CALL_STACK + " shadow_call_stack:%lukB" +#endif " all_unreclaimable? %s" "\n", pgdat->node_id, @@ -5417,6 +5421,10 @@ void show_free_areas(unsigned int filter, nodemask_t *nodemask) K(node_page_state(pgdat, NR_ANON_THPS) * HPAGE_PMD_NR), #endif K(node_page_state(pgdat, NR_WRITEBACK_TEMP)), + node_page_state(pgdat, NR_KERNEL_STACK_KB), +#ifdef CONFIG_SHADOW_CALL_STACK + node_page_state(pgdat, NR_KERNEL_SCS_KB), +#endif pgdat->kswapd_failures >= MAX_RECLAIM_RETRIES ? "yes" : "no"); } @@ -5448,10 +5456,6 @@ void show_free_areas(unsigned int filter, nodemask_t *nodemask) " present:%lukB" " managed:%lukB" " mlocked:%lukB" - " kernel_stack:%lukB" -#ifdef CONFIG_SHADOW_CALL_STACK - " shadow_call_stack:%lukB" -#endif " pagetables:%lukB" " bounce:%lukB" " free_pcp:%lukB" @@ -5473,10 +5477,6 @@ void show_free_areas(unsigned int filter, nodemask_t *nodemask) K(zone->present_pages), K(zone_managed_pages(zone)), K(zone_page_state(zone, NR_MLOCK)), - zone_page_state(zone, NR_KERNEL_STACK_KB), -#ifdef CONFIG_SHADOW_CALL_STACK - zone_page_state(zone, NR_KERNEL_SCS_KB), -#endif K(zone_page_state(zone, NR_PAGETABLE)), K(zone_page_state(zone, NR_BOUNCE)), K(free_pcp), diff --git a/mm/vmstat.c b/mm/vmstat.c index b171a76bfe83..2b866cbab11d 100644 --- a/mm/vmstat.c +++ b/mm/vmstat.c @@ -1140,10 +1140,6 @@ const char * const vmstat_text[] = { "nr_zone_write_pending", "nr_mlock", "nr_page_table_pages", - "nr_kernel_stack", -#if IS_ENABLED(CONFIG_SHADOW_CALL_STACK) - "nr_shadow_call_stack", -#endif "nr_bounce", #if IS_ENABLED(CONFIG_ZSMALLOC) "nr_zspages", @@ -1194,6 +1190,10 @@ const char * const vmstat_text[] = { "nr_kernel_misc_reclaimable", "nr_foll_pin_acquired", "nr_foll_pin_released", + "nr_kernel_stack", +#if IS_ENABLED(CONFIG_SHADOW_CALL_STACK) + "nr_shadow_call_stack", +#endif /* enum writeback_stat_item counters */ "nr_dirty_threshold", |