summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorPaul E. McKenney <paulmck@kernel.org>2024-07-01 19:30:56 -0700
committerNeeraj Upadhyay <neeraj.upadhyay@kernel.org>2024-08-15 00:10:50 +0530
commit1dd01c06506c42ab3a8026272ee93c466ae0cb4b (patch)
tree26d468a14e07c102b449447a0955f089dfddf408
parent9fbaa44114ca6b69074a488295e86a9fa7e685f9 (diff)
rcu: Summarize RCU CPU stall warnings during CSD-lock stalls
During CSD-lock stalls, the additional information output by RCU CPU stall warnings is usually redundant, flooding the console for not good reason. However, this has been the way things work for a few years. This commit therefore adds an rcutree.csd_lock_suppress_rcu_stall kernel boot parameter that causes RCU CPU stall warnings to be abbreviated to a single line when there is at least one CPU that has been stuck waiting for CSD lock for more than five seconds. To make this abbreviated message happen with decent probability: tools/testing/selftests/rcutorture/bin/kvm.sh --allcpus --duration 8 \ --configs "2*TREE01" --kconfig "CONFIG_CSD_LOCK_WAIT_DEBUG=y" \ --bootargs "csdlock_debug=1 rcutorture.stall_cpu=200 \ rcutorture.stall_cpu_holdoff=120 rcutorture.stall_cpu_irqsoff=1 \ rcutree.csd_lock_suppress_rcu_stall=1 \ rcupdate.rcu_exp_cpu_stall_timeout=5000" --trust-make [ paulmck: Apply kernel test robot feedback. ] Signed-off-by: Paul E. McKenney <paulmck@kernel.org> Signed-off-by: Neeraj Upadhyay <neeraj.upadhyay@kernel.org>
-rw-r--r--Documentation/admin-guide/kernel-parameters.txt4
-rw-r--r--kernel/rcu/tree_stall.h8
2 files changed, 11 insertions, 1 deletions
diff --git a/Documentation/admin-guide/kernel-parameters.txt b/Documentation/admin-guide/kernel-parameters.txt
index f1384c7b59c9..d56356c13184 100644
--- a/Documentation/admin-guide/kernel-parameters.txt
+++ b/Documentation/admin-guide/kernel-parameters.txt
@@ -4937,6 +4937,10 @@
Set maximum number of finished RCU callbacks to
process in one batch.
+ rcutree.csd_lock_suppress_rcu_stall= [KNL]
+ Do only a one-line RCU CPU stall warning when
+ there is an ongoing too-long CSD-lock wait.
+
rcutree.do_rcu_barrier= [KNL]
Request a call to rcu_barrier(). This is
throttled so that userspace tests can safely
diff --git a/kernel/rcu/tree_stall.h b/kernel/rcu/tree_stall.h
index 4b0e9d7c4c68..b497d4c6dabd 100644
--- a/kernel/rcu/tree_stall.h
+++ b/kernel/rcu/tree_stall.h
@@ -9,6 +9,7 @@
#include <linux/kvm_para.h>
#include <linux/rcu_notifier.h>
+#include <linux/smp.h>
//////////////////////////////////////////////////////////////////////////////
//
@@ -719,6 +720,9 @@ static void print_cpu_stall(unsigned long gps)
set_preempt_need_resched();
}
+static bool csd_lock_suppress_rcu_stall;
+module_param(csd_lock_suppress_rcu_stall, bool, 0644);
+
static void check_cpu_stall(struct rcu_data *rdp)
{
bool self_detected;
@@ -791,7 +795,9 @@ static void check_cpu_stall(struct rcu_data *rdp)
return;
rcu_stall_notifier_call_chain(RCU_STALL_NOTIFY_NORM, (void *)j - gps);
- if (self_detected) {
+ if (READ_ONCE(csd_lock_suppress_rcu_stall) && csd_lock_is_stuck()) {
+ pr_err("INFO: %s detected stall, but suppressed full report due to a stuck CSD-lock.\n", rcu_state.name);
+ } else if (self_detected) {
/* We haven't checked in, so go dump stack. */
print_cpu_stall(gps);
} else {