diff options
Diffstat (limited to 'mm/memcontrol.c')
| -rw-r--r-- | mm/memcontrol.c | 138 | 
1 files changed, 65 insertions, 73 deletions
diff --git a/mm/memcontrol.c b/mm/memcontrol.c index e8ca4bdcb03c..b29b850cf399 100644 --- a/mm/memcontrol.c +++ b/mm/memcontrol.c @@ -197,7 +197,7 @@ static struct move_charge_struct {  };  /* - * Maximum loops in mem_cgroup_hierarchical_reclaim(), used for soft + * Maximum loops in mem_cgroup_soft_reclaim(), used for soft   * limit reclaim to prevent infinite loops, if they ever occur.   */  #define	MEM_CGROUP_MAX_RECLAIM_LOOPS		100 @@ -742,6 +742,10 @@ struct memcg_vmstats {  	long			state[MEMCG_NR_STAT];  	unsigned long		events[NR_MEMCG_EVENTS]; +	/* Non-hierarchical (CPU aggregated) page state & events */ +	long			state_local[MEMCG_NR_STAT]; +	unsigned long		events_local[NR_MEMCG_EVENTS]; +  	/* Pending child counts during tree propagation */  	long			state_pending[MEMCG_NR_STAT];  	unsigned long		events_pending[NR_MEMCG_EVENTS]; @@ -775,11 +779,8 @@ void __mod_memcg_state(struct mem_cgroup *memcg, int idx, int val)  /* idx can be of type enum memcg_stat_item or node_stat_item. */  static unsigned long memcg_page_state_local(struct mem_cgroup *memcg, int idx)  { -	long x = 0; -	int cpu; +	long x = READ_ONCE(memcg->vmstats->state_local[idx]); -	for_each_possible_cpu(cpu) -		x += per_cpu(memcg->vmstats_percpu->state[idx], cpu);  #ifdef CONFIG_SMP  	if (x < 0)  		x = 0; @@ -926,16 +927,12 @@ static unsigned long memcg_events(struct mem_cgroup *memcg, int event)  static unsigned long memcg_events_local(struct mem_cgroup *memcg, int event)  { -	long x = 0; -	int cpu;  	int index = memcg_events_index(event);  	if (index < 0)  		return 0; -	for_each_possible_cpu(cpu) -		x += per_cpu(memcg->vmstats_percpu->events[index], cpu); -	return x; +	return READ_ONCE(memcg->vmstats->events_local[index]);  }  static void mem_cgroup_charge_statistics(struct mem_cgroup *memcg, @@ -1629,7 +1626,6 @@ static void memory_stat_format(struct mem_cgroup *memcg, struct seq_buf *s)  	WARN_ON_ONCE(seq_buf_has_overflowed(s));  } -#define K(x) ((x) << (PAGE_SHIFT-10))  /**   * mem_cgroup_print_oom_context: Print OOM information relevant to   * memory controller. @@ -3036,21 +3032,21 @@ __always_inline struct obj_cgroup *get_obj_cgroup_from_current(void)  	return objcg;  } -struct obj_cgroup *get_obj_cgroup_from_page(struct page *page) +struct obj_cgroup *get_obj_cgroup_from_folio(struct folio *folio)  {  	struct obj_cgroup *objcg;  	if (!memcg_kmem_online())  		return NULL; -	if (PageMemcgKmem(page)) { -		objcg = __folio_objcg(page_folio(page)); +	if (folio_memcg_kmem(folio)) { +		objcg = __folio_objcg(folio);  		obj_cgroup_get(objcg);  	} else {  		struct mem_cgroup *memcg;  		rcu_read_lock(); -		memcg = __folio_memcg(page_folio(page)); +		memcg = __folio_memcg(folio);  		if (memcg)  			objcg = __get_obj_cgroup_from_memcg(memcg);  		else @@ -3871,10 +3867,6 @@ static ssize_t mem_cgroup_write(struct kernfs_open_file *of,  		case _MEMSWAP:  			ret = mem_cgroup_resize_max(memcg, nr_pages, true);  			break; -		case _KMEM: -			/* kmem.limit_in_bytes is deprecated. */ -			ret = -EOPNOTSUPP; -			break;  		case _TCP:  			ret = memcg_update_tcp_max(memcg, nr_pages);  			break; @@ -5086,12 +5078,6 @@ static struct cftype mem_cgroup_legacy_files[] = {  	},  #endif  	{ -		.name = "kmem.limit_in_bytes", -		.private = MEMFILE_PRIVATE(_KMEM, RES_LIMIT), -		.write = mem_cgroup_write, -		.read_u64 = mem_cgroup_read_u64, -	}, -	{  		.name = "kmem.usage_in_bytes",  		.private = MEMFILE_PRIVATE(_KMEM, RES_USAGE),  		.read_u64 = mem_cgroup_read_u64, @@ -5165,6 +5151,7 @@ static struct cftype mem_cgroup_legacy_files[] = {   * those references are manageable from userspace.   */ +#define MEM_CGROUP_ID_MAX	((1UL << MEM_CGROUP_ID_SHIFT) - 1)  static DEFINE_IDR(mem_cgroup_idr);  static void mem_cgroup_id_remove(struct mem_cgroup *memcg) @@ -5526,7 +5513,7 @@ static void mem_cgroup_css_rstat_flush(struct cgroup_subsys_state *css, int cpu)  	struct mem_cgroup *memcg = mem_cgroup_from_css(css);  	struct mem_cgroup *parent = parent_mem_cgroup(memcg);  	struct memcg_vmstats_percpu *statc; -	long delta, v; +	long delta, delta_cpu, v;  	int i, nid;  	statc = per_cpu_ptr(memcg->vmstats_percpu, cpu); @@ -5542,19 +5529,23 @@ static void mem_cgroup_css_rstat_flush(struct cgroup_subsys_state *css, int cpu)  			memcg->vmstats->state_pending[i] = 0;  		/* Add CPU changes on this level since the last flush */ +		delta_cpu = 0;  		v = READ_ONCE(statc->state[i]);  		if (v != statc->state_prev[i]) { -			delta += v - statc->state_prev[i]; +			delta_cpu = v - statc->state_prev[i]; +			delta += delta_cpu;  			statc->state_prev[i] = v;  		} -		if (!delta) -			continue; -  		/* Aggregate counts on this level and propagate upwards */ -		memcg->vmstats->state[i] += delta; -		if (parent) -			parent->vmstats->state_pending[i] += delta; +		if (delta_cpu) +			memcg->vmstats->state_local[i] += delta_cpu; + +		if (delta) { +			memcg->vmstats->state[i] += delta; +			if (parent) +				parent->vmstats->state_pending[i] += delta; +		}  	}  	for (i = 0; i < NR_MEMCG_EVENTS; i++) { @@ -5562,18 +5553,22 @@ static void mem_cgroup_css_rstat_flush(struct cgroup_subsys_state *css, int cpu)  		if (delta)  			memcg->vmstats->events_pending[i] = 0; +		delta_cpu = 0;  		v = READ_ONCE(statc->events[i]);  		if (v != statc->events_prev[i]) { -			delta += v - statc->events_prev[i]; +			delta_cpu = v - statc->events_prev[i]; +			delta += delta_cpu;  			statc->events_prev[i] = v;  		} -		if (!delta) -			continue; +		if (delta_cpu) +			memcg->vmstats->events_local[i] += delta_cpu; -		memcg->vmstats->events[i] += delta; -		if (parent) -			parent->vmstats->events_pending[i] += delta; +		if (delta) { +			memcg->vmstats->events[i] += delta; +			if (parent) +				parent->vmstats->events_pending[i] += delta; +		}  	}  	for_each_node_state(nid, N_MEMORY) { @@ -5591,18 +5586,22 @@ static void mem_cgroup_css_rstat_flush(struct cgroup_subsys_state *css, int cpu)  			if (delta)  				pn->lruvec_stats.state_pending[i] = 0; +			delta_cpu = 0;  			v = READ_ONCE(lstatc->state[i]);  			if (v != lstatc->state_prev[i]) { -				delta += v - lstatc->state_prev[i]; +				delta_cpu = v - lstatc->state_prev[i]; +				delta += delta_cpu;  				lstatc->state_prev[i] = v;  			} -			if (!delta) -				continue; +			if (delta_cpu) +				pn->lruvec_stats.state_local[i] += delta_cpu; -			pn->lruvec_stats.state[i] += delta; -			if (ppn) -				ppn->lruvec_stats.state_pending[i] += delta; +			if (delta) { +				pn->lruvec_stats.state[i] += delta; +				if (ppn) +					ppn->lruvec_stats.state_pending[i] += delta; +			}  		}  	}  } @@ -5648,7 +5647,7 @@ static struct page *mc_handle_present_pte(struct vm_area_struct *vma,  {  	struct page *page = vm_normal_page(vma, addr, ptent); -	if (!page || !page_mapped(page)) +	if (!page)  		return NULL;  	if (PageAnon(page)) {  		if (!(mc.flags & MOVE_ANON)) @@ -5657,8 +5656,7 @@ static struct page *mc_handle_present_pte(struct vm_area_struct *vma,  		if (!(mc.flags & MOVE_FILE))  			return NULL;  	} -	if (!get_page_unless_zero(page)) -		return NULL; +	get_page(page);  	return page;  } @@ -5766,7 +5764,7 @@ static int mem_cgroup_move_account(struct page *page,  		if (folio_mapped(folio)) {  			__mod_lruvec_state(from_vec, NR_ANON_MAPPED, -nr_pages);  			__mod_lruvec_state(to_vec, NR_ANON_MAPPED, nr_pages); -			if (folio_test_transhuge(folio)) { +			if (folio_test_pmd_mappable(folio)) {  				__mod_lruvec_state(from_vec, NR_ANON_THPS,  						   -nr_pages);  				__mod_lruvec_state(to_vec, NR_ANON_THPS, @@ -5852,25 +5850,20 @@ out:   * @ptent: the pte to be checked   * @target: the pointer the target page or swap ent will be stored(can be NULL)   * - * Returns - *   0(MC_TARGET_NONE): if the pte is not a target for move charge. - *   1(MC_TARGET_PAGE): if the page corresponding to this pte is a target for - *     move charge. if @target is not NULL, the page is stored in target->page - *     with extra refcnt got(Callers should handle it). - *   2(MC_TARGET_SWAP): if the swap entry corresponding to this pte is a - *     target for charge migration. if @target is not NULL, the entry is stored - *     in target->ent. - *   3(MC_TARGET_DEVICE): like MC_TARGET_PAGE  but page is device memory and - *   thus not on the lru. - *     For now we such page is charge like a regular page would be as for all - *     intent and purposes it is just special memory taking the place of a - *     regular page. - * - *     See Documentations/vm/hmm.txt and include/linux/hmm.h - * - * Called with pte lock held. + * Context: Called with pte lock held. + * Return: + * * MC_TARGET_NONE - If the pte is not a target for move charge. + * * MC_TARGET_PAGE - If the page corresponding to this pte is a target for + *   move charge. If @target is not NULL, the page is stored in target->page + *   with extra refcnt taken (Caller should release it). + * * MC_TARGET_SWAP - If the swap entry corresponding to this pte is a + *   target for charge migration.  If @target is not NULL, the entry is + *   stored in target->ent. + * * MC_TARGET_DEVICE - Like MC_TARGET_PAGE but page is device memory and + *   thus not on the lru.  For now such page is charged like a regular page + *   would be as it is just special memory taking the place of a regular page. + *   See Documentations/vm/hmm.txt and include/linux/hmm.h   */ -  static enum mc_target_type get_mctgt_type(struct vm_area_struct *vma,  		unsigned long addr, pte_t ptent, union mc_target *target)  { @@ -6024,6 +6017,7 @@ static int mem_cgroup_count_precharge_pte_range(pmd_t *pmd,  static const struct mm_walk_ops precharge_walk_ops = {  	.pmd_entry	= mem_cgroup_count_precharge_pte_range, +	.walk_lock	= PGWALK_RDLOCK,  };  static unsigned long mem_cgroup_count_precharge(struct mm_struct *mm) @@ -6303,6 +6297,7 @@ put:			/* get_mctgt_type() gets & locks the page */  static const struct mm_walk_ops charge_walk_ops = {  	.pmd_entry	= mem_cgroup_move_charge_pte_range, +	.walk_lock	= PGWALK_RDLOCK,  };  static void mem_cgroup_move_charge(void) @@ -6696,8 +6691,8 @@ static ssize_t memory_reclaim(struct kernfs_open_file *of, char *buf,  			lru_add_drain_all();  		reclaimed = try_to_free_mem_cgroup_pages(memcg, -						nr_to_reclaim - nr_reclaimed, -						GFP_KERNEL, reclaim_options); +					min(nr_to_reclaim - nr_reclaimed, SWAP_CLUSTER_MAX), +					GFP_KERNEL, reclaim_options);  		if (!reclaimed && !nr_retries--)  			return -EAGAIN; @@ -7535,9 +7530,6 @@ void __mem_cgroup_uncharge_swap(swp_entry_t entry, unsigned int nr_pages)  	struct mem_cgroup *memcg;  	unsigned short id; -	if (mem_cgroup_disabled()) -		return; -  	id = swap_cgroup_record(entry, 0, nr_pages);  	rcu_read_lock();  	memcg = mem_cgroup_from_id(id); @@ -7787,7 +7779,7 @@ bool obj_cgroup_may_zswap(struct obj_cgroup *objcg)   * @objcg: the object cgroup   * @size: size of compressed object   * - * This forces the charge after obj_cgroup_may_swap() allowed + * This forces the charge after obj_cgroup_may_zswap() allowed   * compression and storage in zwap for this cgroup to go ahead.   */  void obj_cgroup_charge_zswap(struct obj_cgroup *objcg, size_t size)  | 
