From 80127a39681bd68c959f0953f84a830cbd7c3b1c Mon Sep 17 00:00:00 2001 From: Peter Zijlstra Date: Thu, 14 Jul 2016 20:08:46 +0200 Subject: locking/percpu-rwsem: Optimize readers and reduce global impact Currently the percpu-rwsem switches to (global) atomic ops while a writer is waiting; which could be quite a while and slows down releasing the readers. This patch cures this problem by ordering the reader-state vs reader-count (see the comments in __percpu_down_read() and percpu_down_write()). This changes a global atomic op into a full memory barrier, which doesn't have the global cacheline contention. This also enables using the percpu-rwsem with rcu_sync disabled in order to bias the implementation differently, reducing the writer latency by adding some cost to readers. Signed-off-by: Peter Zijlstra (Intel) Reviewed-by: Oleg Nesterov Cc: Andrew Morton Cc: Linus Torvalds Cc: Paul E. McKenney Cc: Paul McKenney Cc: Peter Zijlstra Cc: Thomas Gleixner Cc: linux-kernel@vger.kernel.org [ Fixed modular build. ] Signed-off-by: Ingo Molnar --- kernel/rcu/sync.c | 2 ++ 1 file changed, 2 insertions(+) (limited to 'kernel/rcu') diff --git a/kernel/rcu/sync.c b/kernel/rcu/sync.c index be922c9f3d37..198473d90f81 100644 --- a/kernel/rcu/sync.c +++ b/kernel/rcu/sync.c @@ -68,6 +68,8 @@ void rcu_sync_lockdep_assert(struct rcu_sync *rsp) RCU_LOCKDEP_WARN(!gp_ops[rsp->gp_type].held(), "suspicious rcu_sync_is_idle() usage"); } + +EXPORT_SYMBOL_GPL(rcu_sync_lockdep_assert); #endif /** -- cgit v1.2.3-70-g09d2 From 3942a9bd7b5842a924e99ee6ec1350b8006c94ec Mon Sep 17 00:00:00 2001 From: Peter Zijlstra Date: Thu, 11 Aug 2016 18:54:13 +0200 Subject: locking, rcu, cgroup: Avoid synchronize_sched() in __cgroup_procs_write() The current percpu-rwsem read side is entirely free of serializing insns at the cost of having a synchronize_sched() in the write path. The latency of the synchronize_sched() is too high for cgroups. The commit 1ed1328792ff talks about the write path being a fairly cold path but this is not the case for Android which moves task to the foreground cgroup and back around binder IPC calls from foreground processes to background processes, so it is significantly hotter than human initiated operations. Switch cgroup_threadgroup_rwsem into the slow mode for now to avoid the problem, hopefully it should not be that slow after another commit: 80127a39681b ("locking/percpu-rwsem: Optimize readers and reduce global impact"). We could just add rcu_sync_enter() into cgroup_init() but we do not want another synchronize_sched() at boot time, so this patch adds the new helper which doesn't block but currently can only be called before the first use. Reported-by: John Stultz Reported-by: Dmitry Shmidt Signed-off-by: Peter Zijlstra (Intel) Signed-off-by: Oleg Nesterov Signed-off-by: Peter Zijlstra (Intel) Cc: Andrew Morton Cc: Colin Cross Cc: Linus Torvalds Cc: Paul E. McKenney Cc: Peter Zijlstra Cc: Rom Lemarchand Cc: Tejun Heo Cc: Thomas Gleixner Cc: Todd Kjos Link: http://lkml.kernel.org/r/20160811165413.GA22807@redhat.com Signed-off-by: Ingo Molnar --- include/linux/rcu_sync.h | 1 + kernel/cgroup.c | 6 ++++++ kernel/rcu/sync.c | 12 ++++++++++++ 3 files changed, 19 insertions(+) (limited to 'kernel/rcu') diff --git a/include/linux/rcu_sync.h b/include/linux/rcu_sync.h index a63a33e6196e..ece7ed9a4a70 100644 --- a/include/linux/rcu_sync.h +++ b/include/linux/rcu_sync.h @@ -59,6 +59,7 @@ static inline bool rcu_sync_is_idle(struct rcu_sync *rsp) } extern void rcu_sync_init(struct rcu_sync *, enum rcu_sync_type); +extern void rcu_sync_enter_start(struct rcu_sync *); extern void rcu_sync_enter(struct rcu_sync *); extern void rcu_sync_exit(struct rcu_sync *); extern void rcu_sync_dtor(struct rcu_sync *); diff --git a/kernel/cgroup.c b/kernel/cgroup.c index d1c51b7f5221..9f51cdf58f5a 100644 --- a/kernel/cgroup.c +++ b/kernel/cgroup.c @@ -5606,6 +5606,12 @@ int __init cgroup_init(void) BUG_ON(cgroup_init_cftypes(NULL, cgroup_dfl_base_files)); BUG_ON(cgroup_init_cftypes(NULL, cgroup_legacy_base_files)); + /* + * The latency of the synchronize_sched() is too high for cgroups, + * avoid it at the cost of forcing all readers into the slow path. + */ + rcu_sync_enter_start(&cgroup_threadgroup_rwsem.rss); + get_user_ns(init_cgroup_ns.user_ns); mutex_lock(&cgroup_mutex); diff --git a/kernel/rcu/sync.c b/kernel/rcu/sync.c index 198473d90f81..50d1861f7759 100644 --- a/kernel/rcu/sync.c +++ b/kernel/rcu/sync.c @@ -84,6 +84,18 @@ void rcu_sync_init(struct rcu_sync *rsp, enum rcu_sync_type type) rsp->gp_type = type; } +/** + * Must be called after rcu_sync_init() and before first use. + * + * Ensures rcu_sync_is_idle() returns false and rcu_sync_{enter,exit}() + * pairs turn into NO-OPs. + */ +void rcu_sync_enter_start(struct rcu_sync *rsp) +{ + rsp->gp_count++; + rsp->gp_state = GP_PASSED; +} + /** * rcu_sync_enter() - Force readers onto slowpath * @rsp: Pointer to rcu_sync structure to use for synchronization -- cgit v1.2.3-70-g09d2