diff options
Diffstat (limited to 'mm/memcontrol.c')
| -rw-r--r-- | mm/memcontrol.c | 109 | 
1 files changed, 71 insertions, 38 deletions
diff --git a/mm/memcontrol.c b/mm/memcontrol.c index 9acfb165eb52..14cb1db4c52b 100644 --- a/mm/memcontrol.c +++ b/mm/memcontrol.c @@ -903,14 +903,20 @@ struct mem_cgroup *mem_cgroup_iter(struct mem_cgroup *root,  		if (prev && reclaim->generation != iter->generation)  			goto out_unlock; -		do { +		while (1) {  			pos = READ_ONCE(iter->position); +			if (!pos || css_tryget(&pos->css)) +				break;  			/* -			 * A racing update may change the position and -			 * put the last reference, hence css_tryget(), -			 * or retry to see the updated position. +			 * css reference reached zero, so iter->position will +			 * be cleared by ->css_released. However, we should not +			 * rely on this happening soon, because ->css_released +			 * is called from a work queue, and by busy-waiting we +			 * might block it. So we clear iter->position right +			 * away.  			 */ -		} while (pos && !css_tryget(&pos->css)); +			(void)cmpxchg(&iter->position, pos, NULL); +		}  	}  	if (pos) @@ -956,17 +962,13 @@ struct mem_cgroup *mem_cgroup_iter(struct mem_cgroup *root,  	}  	if (reclaim) { -		if (cmpxchg(&iter->position, pos, memcg) == pos) { -			if (memcg) -				css_get(&memcg->css); -			if (pos) -				css_put(&pos->css); -		} -  		/* -		 * pairs with css_tryget when dereferencing iter->position -		 * above. +		 * The position could have already been updated by a competing +		 * thread, so check that the value hasn't changed since we read +		 * it to avoid reclaiming from the same cgroup twice.  		 */ +		(void)cmpxchg(&iter->position, pos, memcg); +  		if (pos)  			css_put(&pos->css); @@ -999,6 +1001,28 @@ void mem_cgroup_iter_break(struct mem_cgroup *root,  		css_put(&prev->css);  } +static void invalidate_reclaim_iterators(struct mem_cgroup *dead_memcg) +{ +	struct mem_cgroup *memcg = dead_memcg; +	struct mem_cgroup_reclaim_iter *iter; +	struct mem_cgroup_per_zone *mz; +	int nid, zid; +	int i; + +	while ((memcg = parent_mem_cgroup(memcg))) { +		for_each_node(nid) { +			for (zid = 0; zid < MAX_NR_ZONES; zid++) { +				mz = &memcg->nodeinfo[nid]->zoneinfo[zid]; +				for (i = 0; i <= DEF_PRIORITY; i++) { +					iter = &mz->iter[i]; +					cmpxchg(&iter->position, +						dead_memcg, NULL); +				} +			} +		} +	} +} +  /*   * Iteration constructs for visiting all cgroups (under a tree).  If   * loops are exited prematurely (break), mem_cgroup_iter_break() must @@ -2128,7 +2152,7 @@ done_restock:  	 */  	do {  		if (page_counter_read(&memcg->memory) > memcg->high) { -			current->memcg_nr_pages_over_high += nr_pages; +			current->memcg_nr_pages_over_high += batch;  			set_notify_resume(current);  			break;  		} @@ -4324,6 +4348,13 @@ static void mem_cgroup_css_offline(struct cgroup_subsys_state *css)  	wb_memcg_offline(memcg);  } +static void mem_cgroup_css_released(struct cgroup_subsys_state *css) +{ +	struct mem_cgroup *memcg = mem_cgroup_from_css(css); + +	invalidate_reclaim_iterators(memcg); +} +  static void mem_cgroup_css_free(struct cgroup_subsys_state *css)  {  	struct mem_cgroup *memcg = mem_cgroup_from_css(css); @@ -4779,23 +4810,18 @@ static void mem_cgroup_clear_mc(void)  	spin_unlock(&mc.lock);  } -static int mem_cgroup_can_attach(struct cgroup_subsys_state *css, -				 struct cgroup_taskset *tset) +static int mem_cgroup_can_attach(struct cgroup_taskset *tset)  { -	struct mem_cgroup *memcg = mem_cgroup_from_css(css); +	struct cgroup_subsys_state *css; +	struct mem_cgroup *memcg = NULL; /* unneeded init to make gcc happy */  	struct mem_cgroup *from;  	struct task_struct *leader, *p;  	struct mm_struct *mm;  	unsigned long move_flags;  	int ret = 0; -	/* -	 * We are now commited to this value whatever it is. Changes in this -	 * tunable will only affect upcoming migrations, not the current one. -	 * So we need to save it, and keep it going. -	 */ -	move_flags = READ_ONCE(memcg->move_charge_at_immigrate); -	if (!move_flags) +	/* charge immigration isn't supported on the default hierarchy */ +	if (cgroup_subsys_on_dfl(memory_cgrp_subsys))  		return 0;  	/* @@ -4805,13 +4831,23 @@ static int mem_cgroup_can_attach(struct cgroup_subsys_state *css,  	 * multiple.  	 */  	p = NULL; -	cgroup_taskset_for_each_leader(leader, tset) { +	cgroup_taskset_for_each_leader(leader, css, tset) {  		WARN_ON_ONCE(p);  		p = leader; +		memcg = mem_cgroup_from_css(css);  	}  	if (!p)  		return 0; +	/* +	 * We are now commited to this value whatever it is. Changes in this +	 * tunable will only affect upcoming migrations, not the current one. +	 * So we need to save it, and keep it going. +	 */ +	move_flags = READ_ONCE(memcg->move_charge_at_immigrate); +	if (!move_flags) +		return 0; +  	from = mem_cgroup_from_task(p);  	VM_BUG_ON(from == memcg); @@ -4842,8 +4878,7 @@ static int mem_cgroup_can_attach(struct cgroup_subsys_state *css,  	return ret;  } -static void mem_cgroup_cancel_attach(struct cgroup_subsys_state *css, -				     struct cgroup_taskset *tset) +static void mem_cgroup_cancel_attach(struct cgroup_taskset *tset)  {  	if (mc.to)  		mem_cgroup_clear_mc(); @@ -4985,10 +5020,10 @@ retry:  	atomic_dec(&mc.from->moving_account);  } -static void mem_cgroup_move_task(struct cgroup_subsys_state *css, -				 struct cgroup_taskset *tset) +static void mem_cgroup_move_task(struct cgroup_taskset *tset)  { -	struct task_struct *p = cgroup_taskset_first(tset); +	struct cgroup_subsys_state *css; +	struct task_struct *p = cgroup_taskset_first(tset, &css);  	struct mm_struct *mm = get_task_mm(p);  	if (mm) { @@ -5000,17 +5035,14 @@ static void mem_cgroup_move_task(struct cgroup_subsys_state *css,  		mem_cgroup_clear_mc();  }  #else	/* !CONFIG_MMU */ -static int mem_cgroup_can_attach(struct cgroup_subsys_state *css, -				 struct cgroup_taskset *tset) +static int mem_cgroup_can_attach(struct cgroup_taskset *tset)  {  	return 0;  } -static void mem_cgroup_cancel_attach(struct cgroup_subsys_state *css, -				     struct cgroup_taskset *tset) +static void mem_cgroup_cancel_attach(struct cgroup_taskset *tset)  {  } -static void mem_cgroup_move_task(struct cgroup_subsys_state *css, -				 struct cgroup_taskset *tset) +static void mem_cgroup_move_task(struct cgroup_taskset *tset)  {  }  #endif @@ -5184,6 +5216,7 @@ struct cgroup_subsys memory_cgrp_subsys = {  	.css_alloc = mem_cgroup_css_alloc,  	.css_online = mem_cgroup_css_online,  	.css_offline = mem_cgroup_css_offline, +	.css_released = mem_cgroup_css_released,  	.css_free = mem_cgroup_css_free,  	.css_reset = mem_cgroup_css_reset,  	.can_attach = mem_cgroup_can_attach, @@ -5511,11 +5544,11 @@ void mem_cgroup_uncharge_list(struct list_head *page_list)   * mem_cgroup_replace_page - migrate a charge to another page   * @oldpage: currently charged page   * @newpage: page to transfer the charge to - * @lrucare: either or both pages might be on the LRU already   *   * Migrate the charge from @oldpage to @newpage.   *   * Both pages must be locked, @newpage->mapping must be set up. + * Either or both pages might be on the LRU already.   */  void mem_cgroup_replace_page(struct page *oldpage, struct page *newpage)  {  | 
