From eda09706b240ca9129ac4e1fbb4eb1e2bc67aadc Mon Sep 17 00:00:00 2001
From: Michal Koutný <mkoutny@suse.com>
Date: Wed, 3 Nov 2021 17:58:45 +0100
Subject: cgroup: rstat: Mark benign data race to silence KCSAN
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

There is a race between updaters and flushers (flush can possibly miss
the latest update(s)). This is expected as explained in
cgroup_rstat_updated() comment, add also machine readable annotation so
that KCSAN results aren't noisy.

Reported-by: Hao Sun <sunhao.th@gmail.com>
Link: https://lore.kernel.org/r/CACkBjsbPVdkub=e-E-p1WBOLxS515ith-53SFdmFHWV_QMo40w@mail.gmail.com
Suggested-by: Hao Sun <sunhao.th@gmail.com>

Signed-off-by: Michal Koutný <mkoutny@suse.com>
Reviewed-by: Shakeel Butt <shakeelb@google.com>
Signed-off-by: Tejun Heo <tj@kernel.org>
---
 kernel/cgroup/rstat.c | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

(limited to 'kernel/cgroup/rstat.c')

diff --git a/kernel/cgroup/rstat.c b/kernel/cgroup/rstat.c
index 1486768f2318..1abe74114527 100644
--- a/kernel/cgroup/rstat.c
+++ b/kernel/cgroup/rstat.c
@@ -35,7 +35,7 @@ void cgroup_rstat_updated(struct cgroup *cgrp, int cpu)
 	 * instead of NULL, we can tell whether @cgrp is on the list by
 	 * testing the next pointer for NULL.
 	 */
-	if (cgroup_rstat_cpu(cgrp, cpu)->updated_next)
+	if (data_race(cgroup_rstat_cpu(cgrp, cpu)->updated_next))
 		return;
 
 	raw_spin_lock_irqsave(cpu_lock, flags);
-- 
cgit v1.2.3-70-g09d2


From 0da41f7348fff193d01d031ce255088fa98324b7 Mon Sep 17 00:00:00 2001
From: Wei Yang <richard.weiyang@gmail.com>
Date: Sat, 25 Dec 2021 00:09:31 +0000
Subject: cgroup: rstat: explicitly put loop variant in while
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

Instead of do while unconditionally, let's put the loop variant in
while.

Signed-off-by: Wei Yang <richard.weiyang@gmail.com>
Reviewed-by: Michal Koutný <mkoutny@suse.com>
Signed-off-by: Tejun Heo <tj@kernel.org>
---
 kernel/cgroup/rstat.c | 4 +---
 1 file changed, 1 insertion(+), 3 deletions(-)

(limited to 'kernel/cgroup/rstat.c')

diff --git a/kernel/cgroup/rstat.c b/kernel/cgroup/rstat.c
index 1abe74114527..bc6993258271 100644
--- a/kernel/cgroup/rstat.c
+++ b/kernel/cgroup/rstat.c
@@ -124,12 +124,10 @@ static struct cgroup *cgroup_rstat_cpu_pop_updated(struct cgroup *pos,
 
 			prstatc = cgroup_rstat_cpu(parent, cpu);
 			nextp = &prstatc->updated_children;
-			while (true) {
+			while (*nextp != pos) {
 				struct cgroup_rstat_cpu *nrstatc;
 
 				nrstatc = cgroup_rstat_cpu(*nextp, cpu);
-				if (*nextp == pos)
-					break;
 				WARN_ON_ONCE(*nextp == parent);
 				nextp = &nrstatc->updated_next;
 			}
-- 
cgit v1.2.3-70-g09d2


From f5f60d235e7058da13a643c33fc7599c05ec0b73 Mon Sep 17 00:00:00 2001
From: Wei Yang <richard.weiyang@gmail.com>
Date: Sat, 25 Dec 2021 00:09:32 +0000
Subject: cgroup/rstat: check updated_next only for root
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

After commit dc26532aed0a ("cgroup: rstat: punt root-level optimization to
individual controllers"), each rstat on updated_children list has its
->updated_next not NULL.

This means we can remove the check on ->updated_next, if we make sure
the subtree from @root is on list, which could be done by checking
updated_next for root.

tj: Coding style fixes.

Signed-off-by: Wei Yang <richard.weiyang@gmail.com>
Reviewed-by: Michal Koutný <mkoutny@suse.com>
Signed-off-by: Tejun Heo <tj@kernel.org>
---
 kernel/cgroup/rstat.c | 47 +++++++++++++++++++++++------------------------
 1 file changed, 23 insertions(+), 24 deletions(-)

(limited to 'kernel/cgroup/rstat.c')

diff --git a/kernel/cgroup/rstat.c b/kernel/cgroup/rstat.c
index bc6993258271..9d331ba44870 100644
--- a/kernel/cgroup/rstat.c
+++ b/kernel/cgroup/rstat.c
@@ -88,6 +88,7 @@ static struct cgroup *cgroup_rstat_cpu_pop_updated(struct cgroup *pos,
 						   struct cgroup *root, int cpu)
 {
 	struct cgroup_rstat_cpu *rstatc;
+	struct cgroup *parent;
 
 	if (pos == root)
 		return NULL;
@@ -96,10 +97,14 @@ static struct cgroup *cgroup_rstat_cpu_pop_updated(struct cgroup *pos,
 	 * We're gonna walk down to the first leaf and visit/remove it.  We
 	 * can pick whatever unvisited node as the starting point.
 	 */
-	if (!pos)
+	if (!pos) {
 		pos = root;
-	else
+		/* return NULL if this subtree is not on-list */
+		if (!cgroup_rstat_cpu(pos, cpu)->updated_next)
+			return NULL;
+	} else {
 		pos = cgroup_parent(pos);
+	}
 
 	/* walk down to the first leaf */
 	while (true) {
@@ -115,31 +120,25 @@ static struct cgroup *cgroup_rstat_cpu_pop_updated(struct cgroup *pos,
 	 * However, due to the way we traverse, @pos will be the first
 	 * child in most cases. The only exception is @root.
 	 */
-	if (rstatc->updated_next) {
-		struct cgroup *parent = cgroup_parent(pos);
-
-		if (parent) {
-			struct cgroup_rstat_cpu *prstatc;
-			struct cgroup **nextp;
-
-			prstatc = cgroup_rstat_cpu(parent, cpu);
-			nextp = &prstatc->updated_children;
-			while (*nextp != pos) {
-				struct cgroup_rstat_cpu *nrstatc;
-
-				nrstatc = cgroup_rstat_cpu(*nextp, cpu);
-				WARN_ON_ONCE(*nextp == parent);
-				nextp = &nrstatc->updated_next;
-			}
-			*nextp = rstatc->updated_next;
-		}
+	parent = cgroup_parent(pos);
+	if (parent) {
+		struct cgroup_rstat_cpu *prstatc;
+		struct cgroup **nextp;
 
-		rstatc->updated_next = NULL;
-		return pos;
+		prstatc = cgroup_rstat_cpu(parent, cpu);
+		nextp = &prstatc->updated_children;
+		while (*nextp != pos) {
+			struct cgroup_rstat_cpu *nrstatc;
+
+			nrstatc = cgroup_rstat_cpu(*nextp, cpu);
+			WARN_ON_ONCE(*nextp == parent);
+			nextp = &nrstatc->updated_next;
+		}
+		*nextp = rstatc->updated_next;
 	}
 
-	/* only happens for @root */
-	return NULL;
+	rstatc->updated_next = NULL;
+	return pos;
 }
 
 /* see cgroup_rstat_flush() */
-- 
cgit v1.2.3-70-g09d2