From e2c73a6860bdf54f2c6bf8cddc34ddc91a1343e1 Mon Sep 17 00:00:00 2001
From: "Paul E. McKenney" <paulmck@kernel.org>
Date: Mon, 27 Sep 2021 14:18:51 -0700
Subject: rcu: Remove the RCU_FAST_NO_HZ Kconfig option

All of the uses of CONFIG_RCU_FAST_NO_HZ=y that I have seen involve
systems with RCU callbacks offloaded.  In this situation, all that this
Kconfig option does is slow down idle entry/exit with an additional
allways-taken early exit.  If this is the only use case, then this
Kconfig option nothing but an attractive nuisance that needs to go away.

This commit therefore removes the RCU_FAST_NO_HZ Kconfig option.

Signed-off-by: Paul E. McKenney <paulmck@kernel.org>
---
 kernel/rcu/tree.c | 11 -----------
 1 file changed, 11 deletions(-)

(limited to 'kernel/rcu/tree.c')

diff --git a/kernel/rcu/tree.c b/kernel/rcu/tree.c
index ef8d36f580fc..5e98257c2910 100644
--- a/kernel/rcu/tree.c
+++ b/kernel/rcu/tree.c
@@ -624,7 +624,6 @@ static noinstr void rcu_eqs_enter(bool user)
 	instrumentation_begin();
 	trace_rcu_dyntick(TPS("Start"), rdp->dynticks_nesting, 0, atomic_read(&rdp->dynticks));
 	WARN_ON_ONCE(IS_ENABLED(CONFIG_RCU_EQS_DEBUG) && !user && !is_idle_task(current));
-	rcu_prepare_for_idle();
 	rcu_preempt_deferred_qs(current);
 
 	// instrumentation for the noinstr rcu_dynticks_eqs_enter()
@@ -768,9 +767,6 @@ noinstr void rcu_nmi_exit(void)
 	trace_rcu_dyntick(TPS("Startirq"), rdp->dynticks_nmi_nesting, 0, atomic_read(&rdp->dynticks));
 	WRITE_ONCE(rdp->dynticks_nmi_nesting, 0); /* Avoid store tearing. */
 
-	if (!in_nmi())
-		rcu_prepare_for_idle();
-
 	// instrumentation for the noinstr rcu_dynticks_eqs_enter()
 	instrument_atomic_write(&rdp->dynticks, sizeof(rdp->dynticks));
 	instrumentation_end();
@@ -872,7 +868,6 @@ static void noinstr rcu_eqs_exit(bool user)
 	// instrumentation for the noinstr rcu_dynticks_eqs_exit()
 	instrument_atomic_write(&rdp->dynticks, sizeof(rdp->dynticks));
 
-	rcu_cleanup_after_idle();
 	trace_rcu_dyntick(TPS("End"), rdp->dynticks_nesting, 1, atomic_read(&rdp->dynticks));
 	WARN_ON_ONCE(IS_ENABLED(CONFIG_RCU_EQS_DEBUG) && !user && !is_idle_task(current));
 	WRITE_ONCE(rdp->dynticks_nesting, 1);
@@ -1014,12 +1009,6 @@ noinstr void rcu_nmi_enter(void)
 		rcu_dynticks_eqs_exit();
 		// ... but is watching here.
 
-		if (!in_nmi()) {
-			instrumentation_begin();
-			rcu_cleanup_after_idle();
-			instrumentation_end();
-		}
-
 		instrumentation_begin();
 		// instrumentation for the noinstr rcu_dynticks_curr_cpu_in_eqs()
 		instrument_atomic_read(&rdp->dynticks, sizeof(rdp->dynticks));
-- 
cgit v1.2.3-70-g09d2


From bc849e9192c75833a85f2e9376a265ab31f8eec7 Mon Sep 17 00:00:00 2001
From: "Paul E. McKenney" <paulmck@kernel.org>
Date: Mon, 27 Sep 2021 14:30:20 -0700
Subject: rcu: Move rcu_needs_cpu() to tree.c

Now that RCU_FAST_NO_HZ is no more, there is but one implementation of
the rcu_needs_cpu() function.  This commit therefore moves this function
from kernel/rcu/tree_plugin.c to kernel/rcu/tree.c.

Signed-off-by: Paul E. McKenney <paulmck@kernel.org>
---
 kernel/rcu/tree.c        | 18 ++++++++++++++++++
 kernel/rcu/tree_plugin.h | 16 ----------------
 2 files changed, 18 insertions(+), 16 deletions(-)

(limited to 'kernel/rcu/tree.c')

diff --git a/kernel/rcu/tree.c b/kernel/rcu/tree.c
index 5e98257c2910..4ac019e9b25f 100644
--- a/kernel/rcu/tree.c
+++ b/kernel/rcu/tree.c
@@ -1075,6 +1075,24 @@ void rcu_irq_enter_irqson(void)
 	local_irq_restore(flags);
 }
 
+/*
+ * Check to see if any future non-offloaded RCU-related work will need
+ * to be done by the current CPU, even if none need be done immediately,
+ * returning 1 if so.  This function is part of the RCU implementation;
+ * it is -not- an exported member of the RCU API.  This is used by
+ * the idle-entry code to figure out whether it is safe to disable the
+ * scheduler-clock interrupt.
+ *
+ * Just check whether or not this CPU has non-offloaded RCU callbacks
+ * queued.
+ */
+int rcu_needs_cpu(u64 basemono, u64 *nextevt)
+{
+	*nextevt = KTIME_MAX;
+	return !rcu_segcblist_empty(&this_cpu_ptr(&rcu_data)->cblist) &&
+		!rcu_rdp_is_offloaded(this_cpu_ptr(&rcu_data));
+}
+
 /*
  * If any sort of urgency was applied to the current CPU (for example,
  * the scheduler-clock interrupt was enabled on a nohz_full CPU) in order
diff --git a/kernel/rcu/tree_plugin.h b/kernel/rcu/tree_plugin.h
index 19f7d578cedb..0575757a0f8f 100644
--- a/kernel/rcu/tree_plugin.h
+++ b/kernel/rcu/tree_plugin.h
@@ -1251,22 +1251,6 @@ static void __init rcu_spawn_boost_kthreads(void)
 
 #endif /* #else #ifdef CONFIG_RCU_BOOST */
 
-/*
- * Check to see if any future non-offloaded RCU-related work will need
- * to be done by the current CPU, even if none need be done immediately,
- * returning 1 if so.  This function is part of the RCU implementation;
- * it is -not- an exported member of the RCU API.
- *
- * Just check whether or not this CPU has non-offloaded RCU callbacks
- * queued.
- */
-int rcu_needs_cpu(u64 basemono, u64 *nextevt)
-{
-	*nextevt = KTIME_MAX;
-	return !rcu_segcblist_empty(&this_cpu_ptr(&rcu_data)->cblist) &&
-		!rcu_rdp_is_offloaded(this_cpu_ptr(&rcu_data));
-}
-
 /*
  * Is this CPU a NO_HZ_FULL CPU that should ignore RCU so that the
  * grace-period kthread will do force_quiescent_state() processing?
-- 
cgit v1.2.3-70-g09d2


From 2407a64f8045552203ee5cb9904ce75ce2fceef4 Mon Sep 17 00:00:00 2001
From: Changbin Du <changbin.du@gmail.com>
Date: Tue, 28 Sep 2021 08:21:28 +0800
Subject: rcu: in_irq() cleanup

This commit replaces the obsolete and ambiguous macro in_irq() with its
shiny new in_hardirq() equivalent.

Signed-off-by: Changbin Du <changbin.du@gmail.com>
Signed-off-by: Paul E. McKenney <paulmck@kernel.org>
---
 include/linux/rcutiny.h  | 2 +-
 kernel/rcu/tree.c        | 2 +-
 kernel/rcu/tree_plugin.h | 2 +-
 3 files changed, 3 insertions(+), 3 deletions(-)

(limited to 'kernel/rcu/tree.c')

diff --git a/include/linux/rcutiny.h b/include/linux/rcutiny.h
index 9be015305f9f..858f4d429946 100644
--- a/include/linux/rcutiny.h
+++ b/include/linux/rcutiny.h
@@ -85,7 +85,7 @@ static inline void rcu_irq_enter_irqson(void) { }
 static inline void rcu_irq_exit(void) { }
 static inline void rcu_irq_exit_check_preempt(void) { }
 #define rcu_is_idle_cpu(cpu) \
-	(is_idle_task(current) && !in_nmi() && !in_irq() && !in_serving_softirq())
+	(is_idle_task(current) && !in_nmi() && !in_hardirq() && !in_serving_softirq())
 static inline void exit_rcu(void) { }
 static inline bool rcu_preempt_need_deferred_qs(struct task_struct *t)
 {
diff --git a/kernel/rcu/tree.c b/kernel/rcu/tree.c
index ef8d36f580fc..f0f19dc7f19e 100644
--- a/kernel/rcu/tree.c
+++ b/kernel/rcu/tree.c
@@ -1467,7 +1467,7 @@ static void rcu_gp_kthread_wake(void)
 {
 	struct task_struct *t = READ_ONCE(rcu_state.gp_kthread);
 
-	if ((current == t && !in_irq() && !in_serving_softirq()) ||
+	if ((current == t && !in_hardirq() && !in_serving_softirq()) ||
 	    !READ_ONCE(rcu_state.gp_flags) || !t)
 		return;
 	WRITE_ONCE(rcu_state.gp_wake_time, jiffies);
diff --git a/kernel/rcu/tree_plugin.h b/kernel/rcu/tree_plugin.h
index 5199559fbbf0..599084c4c21f 100644
--- a/kernel/rcu/tree_plugin.h
+++ b/kernel/rcu/tree_plugin.h
@@ -642,7 +642,7 @@ static void rcu_read_unlock_special(struct task_struct *t)
 			   (IS_ENABLED(CONFIG_RCU_BOOST) && irqs_were_disabled &&
 			    t->rcu_blocked_node);
 		// Need to defer quiescent state until everything is enabled.
-		if (use_softirq && (in_irq() || (expboost && !irqs_were_disabled))) {
+		if (use_softirq && (in_hardirq() || (expboost && !irqs_were_disabled))) {
 			// Using softirq, safe to awaken, and either the
 			// wakeup is free or there is either an expedited
 			// GP in flight or a potential need to deboost.
-- 
cgit v1.2.3-70-g09d2


From 300c0c5e721834f484b03fa3062602dd8ff48413 Mon Sep 17 00:00:00 2001
From: Jun Miao <jun.miao@intel.com>
Date: Tue, 16 Nov 2021 07:23:02 +0800
Subject: rcu: Avoid alloc_pages() when recording stack

The default kasan_record_aux_stack() calls stack_depot_save() with GFP_NOWAIT,
which in turn can then call alloc_pages(GFP_NOWAIT, ...).  In general, however,
it is not even possible to use either GFP_ATOMIC nor GFP_NOWAIT in certain
non-preemptive contexts/RT kernel including raw_spin_locks (see gfp.h and ab00db216c9c7).
Fix it by instructing stackdepot to not expand stack storage via alloc_pages()
in case it runs out by using kasan_record_aux_stack_noalloc().

Jianwei Hu reported:
BUG: sleeping function called from invalid context at kernel/locking/rtmutex.c:969
in_atomic(): 0, irqs_disabled(): 1, non_block: 0, pid: 15319, name: python3
INFO: lockdep is turned off.
irq event stamp: 0
  hardirqs last  enabled at (0): [<0000000000000000>] 0x0
  hardirqs last disabled at (0): [<ffffffff856c8b13>] copy_process+0xaf3/0x2590
  softirqs last  enabled at (0): [<ffffffff856c8b13>] copy_process+0xaf3/0x2590
  softirqs last disabled at (0): [<0000000000000000>] 0x0
  CPU: 6 PID: 15319 Comm: python3 Tainted: G        W  O 5.15-rc7-preempt-rt #1
  Hardware name: Supermicro SYS-E300-9A-8C/A2SDi-8C-HLN4F, BIOS 1.1b 12/17/2018
  Call Trace:
    show_stack+0x52/0x58
    dump_stack+0xa1/0xd6
    ___might_sleep.cold+0x11c/0x12d
    rt_spin_lock+0x3f/0xc0
    rmqueue+0x100/0x1460
    rmqueue+0x100/0x1460
    mark_usage+0x1a0/0x1a0
    ftrace_graph_ret_addr+0x2a/0xb0
    rmqueue_pcplist.constprop.0+0x6a0/0x6a0
     __kasan_check_read+0x11/0x20
     __zone_watermark_ok+0x114/0x270
     get_page_from_freelist+0x148/0x630
     is_module_text_address+0x32/0xa0
     __alloc_pages_nodemask+0x2f6/0x790
     __alloc_pages_slowpath.constprop.0+0x12d0/0x12d0
     create_prof_cpu_mask+0x30/0x30
     alloc_pages_current+0xb1/0x150
     stack_depot_save+0x39f/0x490
     kasan_save_stack+0x42/0x50
     kasan_save_stack+0x23/0x50
     kasan_record_aux_stack+0xa9/0xc0
     __call_rcu+0xff/0x9c0
     call_rcu+0xe/0x10
     put_object+0x53/0x70
     __delete_object+0x7b/0x90
     kmemleak_free+0x46/0x70
     slab_free_freelist_hook+0xb4/0x160
     kfree+0xe5/0x420
     kfree_const+0x17/0x30
     kobject_cleanup+0xaa/0x230
     kobject_put+0x76/0x90
     netdev_queue_update_kobjects+0x17d/0x1f0
     ... ...
     ksys_write+0xd9/0x180
     __x64_sys_write+0x42/0x50
     do_syscall_64+0x38/0x50
     entry_SYSCALL_64_after_hwframe+0x44/0xa9

Links: https://git.kernel.org/pub/scm/linux/kernel/git/torvalds/linux.git/commit/include/linux/kasan.h?id=7cb3007ce2da27ec02a1a3211941e7fe6875b642
Fixes: 84109ab58590 ("rcu: Record kvfree_call_rcu() call stack for KASAN")
Fixes: 26e760c9a7c8 ("rcu: kasan: record and print call_rcu() call stack")
Reported-by: Jianwei Hu <jianwei.hu@windriver.com>
Reviewed-by: Uladzislau Rezki (Sony) <urezki@gmail.com>
Acked-by: Marco Elver <elver@google.com>
Tested-by: Juri Lelli <juri.lelli@redhat.com>
Signed-off-by: Jun Miao <jun.miao@intel.com>
Signed-off-by: Paul E. McKenney <paulmck@kernel.org>
---
 kernel/rcu/tree.c | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

(limited to 'kernel/rcu/tree.c')

diff --git a/kernel/rcu/tree.c b/kernel/rcu/tree.c
index f0f19dc7f19e..c8beea8dd3a0 100644
--- a/kernel/rcu/tree.c
+++ b/kernel/rcu/tree.c
@@ -2982,7 +2982,7 @@ __call_rcu(struct rcu_head *head, rcu_callback_t func)
 	head->func = func;
 	head->next = NULL;
 	local_irq_save(flags);
-	kasan_record_aux_stack(head);
+	kasan_record_aux_stack_noalloc(head);
 	rdp = this_cpu_ptr(&rcu_data);
 
 	/* Add the callback to our list. */
@@ -3547,7 +3547,7 @@ void kvfree_call_rcu(struct rcu_head *head, rcu_callback_t func)
 		return;
 	}
 
-	kasan_record_aux_stack(ptr);
+	kasan_record_aux_stack_noalloc(ptr);
 	success = add_ptr_to_bulk_krc_lock(&krcp, &flags, ptr, !head);
 	if (!success) {
 		run_page_cache_worker(krcp);
-- 
cgit v1.2.3-70-g09d2


From 790da248978a0722d92d1471630c881704f7eb0d Mon Sep 17 00:00:00 2001
From: "Paul E. McKenney" <paulmck@kernel.org>
Date: Wed, 29 Sep 2021 11:09:34 -0700
Subject: rcu: Make idle entry report expedited quiescent states

In non-preemptible kernels, an unfortunately timed expedited grace period
can result in the rcu_exp_handler() IPI handler setting the rcu_data
structure's cpu_no_qs.b.exp field just as the target CPU enters idle.
There are situations in which this field will not be checked until after
that CPU exits idle.  The resulting grace-period latency does not qualify
as "expedited".

This commit therefore checks this field upon non-preemptible idle entry in
the rcu_preempt_deferred_qs() function.  It also qualifies the rcu_core()
preempt_count() check with IS_ENABLED(CONFIG_PREEMPT_COUNT) to prevent
false-positive quiescent states from count-free kernels.

Reported-by: Neeraj Upadhyay <neeraju@codeaurora.org>
Signed-off-by: Paul E. McKenney <paulmck@kernel.org>
---
 kernel/rcu/tree.c        |  2 +-
 kernel/rcu/tree_plugin.h | 13 ++++++++++++-
 2 files changed, 13 insertions(+), 2 deletions(-)

(limited to 'kernel/rcu/tree.c')

diff --git a/kernel/rcu/tree.c b/kernel/rcu/tree.c
index ef8d36f580fc..a8d1fe35f482 100644
--- a/kernel/rcu/tree.c
+++ b/kernel/rcu/tree.c
@@ -2714,7 +2714,7 @@ static __latent_entropy void rcu_core(void)
 	WARN_ON_ONCE(!rdp->beenonline);
 
 	/* Report any deferred quiescent states if preemption enabled. */
-	if (!(preempt_count() & PREEMPT_MASK)) {
+	if (IS_ENABLED(CONFIG_PREEMPT_COUNT) && (!(preempt_count() & PREEMPT_MASK))) {
 		rcu_preempt_deferred_qs(current);
 	} else if (rcu_preempt_need_deferred_qs(current)) {
 		set_tsk_need_resched(current);
diff --git a/kernel/rcu/tree_plugin.h b/kernel/rcu/tree_plugin.h
index e1a9fb96e0b9..8fb1612021a0 100644
--- a/kernel/rcu/tree_plugin.h
+++ b/kernel/rcu/tree_plugin.h
@@ -927,7 +927,18 @@ static bool rcu_preempt_need_deferred_qs(struct task_struct *t)
 {
 	return false;
 }
-static void rcu_preempt_deferred_qs(struct task_struct *t) { }
+
+// Except that we do need to respond to a request by an expedited grace
+// period for a quiescent state from this CPU.  Note that requests from
+// tasks are handled when removing the task from the blocked-tasks list
+// below.
+static void rcu_preempt_deferred_qs(struct task_struct *t)
+{
+	struct rcu_data *rdp = this_cpu_ptr(&rcu_data);
+
+	if (rdp->cpu_no_qs.b.exp)
+		rcu_report_exp_rdp(rdp);
+}
 
 /*
  * Because there is no preemptible RCU, there can be no readers blocked,
-- 
cgit v1.2.3-70-g09d2


From 614ddad17f22a22e035e2ea37a04815f50362017 Mon Sep 17 00:00:00 2001
From: "Paul E. McKenney" <paulmck@kernel.org>
Date: Fri, 17 Sep 2021 15:04:48 -0700
Subject: rcu: Tighten rcu_advance_cbs_nowake() checks

Currently, rcu_advance_cbs_nowake() checks that a grace period is in
progress, however, that grace period could end just after the check.
This commit rechecks that a grace period is still in progress while
holding the rcu_node structure's lock.  The grace period cannot end while
the current CPU's rcu_node structure's ->lock is held, thus avoiding
false positives from the WARN_ON_ONCE().

As Daniel Vacek noted, it is not necessary for the rcu_node structure
to have a CPU that has not yet passed through its quiescent state.

Tested-by: Guillaume Morin <guillaume@morinfr.org>
Signed-off-by: Paul E. McKenney <paulmck@kernel.org>
---
 kernel/rcu/tree.c | 7 ++++---
 1 file changed, 4 insertions(+), 3 deletions(-)

(limited to 'kernel/rcu/tree.c')

diff --git a/kernel/rcu/tree.c b/kernel/rcu/tree.c
index ef8d36f580fc..8706b30c2ac8 100644
--- a/kernel/rcu/tree.c
+++ b/kernel/rcu/tree.c
@@ -1590,10 +1590,11 @@ static void __maybe_unused rcu_advance_cbs_nowake(struct rcu_node *rnp,
 						  struct rcu_data *rdp)
 {
 	rcu_lockdep_assert_cblist_protected(rdp);
-	if (!rcu_seq_state(rcu_seq_current(&rnp->gp_seq)) ||
-	    !raw_spin_trylock_rcu_node(rnp))
+	if (!rcu_seq_state(rcu_seq_current(&rnp->gp_seq)) || !raw_spin_trylock_rcu_node(rnp))
 		return;
-	WARN_ON_ONCE(rcu_advance_cbs(rnp, rdp));
+	// The grace period cannot end while we hold the rcu_node lock.
+	if (rcu_seq_state(rcu_seq_current(&rnp->gp_seq)))
+		WARN_ON_ONCE(rcu_advance_cbs(rnp, rdp));
 	raw_spin_unlock_rcu_node(rnp);
 }
 
-- 
cgit v1.2.3-70-g09d2


From 213d56bf33bdda835bac04046f09256a75c5ca8e Mon Sep 17 00:00:00 2001
From: Frederic Weisbecker <frederic@kernel.org>
Date: Tue, 19 Oct 2021 02:08:07 +0200
Subject: rcu/nocb: Prepare state machine for a new step

Currently SEGCBLIST_SOFTIRQ_ONLY is a bit of an exception among the
segcblist flags because it is an exclusive state that doesn't mix up
with the other flags. Remove it in favour of:

_ A flag specifying that rcu_core() needs to perform callbacks execution
  and acceleration

and

_ A flag specifying we want the nocb lock to be held in any needed
  circumstances

This clarifies the code and is more flexible: It allows to have a state
where rcu_core() runs with locking while offloading hasn't started yet.
This is a necessary step to prepare for triggering rcu_core() at the
very beginning of the de-offloading process so that rcu_core() won't
dismiss work while being preempted by the de-offloading process, at
least not without a pending subsequent rcu_core() that will quickly
catch up.

Reviewed-by: Valentin Schneider <Valentin.Schneider@arm.com>
Tested-by: Valentin Schneider <valentin.schneider@arm.com>
Tested-by: Sebastian Andrzej Siewior <bigeasy@linutronix.de>
Signed-off-by: Frederic Weisbecker <frederic@kernel.org>
Cc: Peter Zijlstra <peterz@infradead.org>
Cc: Sebastian Andrzej Siewior <bigeasy@linutronix.de>
Cc: Josh Triplett <josh@joshtriplett.org>
Cc: Joel Fernandes <joel@joelfernandes.org>
Cc: Boqun Feng <boqun.feng@gmail.com>
Cc: Neeraj Upadhyay <neeraju@codeaurora.org>
Cc: Uladzislau Rezki <urezki@gmail.com>
Cc: Thomas Gleixner <tglx@linutronix.de>
Signed-off-by: Paul E. McKenney <paulmck@kernel.org>
---
 include/linux/rcu_segcblist.h | 37 +++++++++++++++++++++++--------------
 kernel/rcu/rcu_segcblist.c    |  6 +++---
 kernel/rcu/rcu_segcblist.h    | 12 +++++++-----
 kernel/rcu/tree.c             |  2 +-
 kernel/rcu/tree_nocb.h        | 24 ++++++++++++++++--------
 5 files changed, 50 insertions(+), 31 deletions(-)

(limited to 'kernel/rcu/tree.c')

diff --git a/include/linux/rcu_segcblist.h b/include/linux/rcu_segcblist.h
index 3db96c4f45fd..812961b1d064 100644
--- a/include/linux/rcu_segcblist.h
+++ b/include/linux/rcu_segcblist.h
@@ -69,7 +69,7 @@ struct rcu_cblist {
  *
  *
  *  ----------------------------------------------------------------------------
- *  |                         SEGCBLIST_SOFTIRQ_ONLY                           |
+ *  |                              SEGCBLIST_RCU_CORE                          |
  *  |                                                                          |
  *  |  Callbacks processed by rcu_core() from softirqs or local                |
  *  |  rcuc kthread, without holding nocb_lock.                                |
@@ -77,7 +77,7 @@ struct rcu_cblist {
  *                                         |
  *                                         v
  *  ----------------------------------------------------------------------------
- *  |                        SEGCBLIST_OFFLOADED                               |
+ *  |       SEGCBLIST_RCU_CORE | SEGCBLIST_LOCKING | SEGCBLIST_OFFLOADED       |
  *  |                                                                          |
  *  | Callbacks processed by rcu_core() from softirqs or local                 |
  *  | rcuc kthread, while holding nocb_lock. Waking up CB and GP kthreads,     |
@@ -89,7 +89,9 @@ struct rcu_cblist {
  *                        |                                 |
  *                        v                                 v
  *  ---------------------------------------  ----------------------------------|
- *  |        SEGCBLIST_OFFLOADED |        |  |     SEGCBLIST_OFFLOADED |       |
+ *  |        SEGCBLIST_RCU_CORE   |       |  |     SEGCBLIST_RCU_CORE   |      |
+ *  |        SEGCBLIST_LOCKING    |       |  |     SEGCBLIST_LOCKING    |      |
+ *  |        SEGCBLIST_OFFLOADED  |       |  |     SEGCBLIST_OFFLOADED  |      |
  *  |        SEGCBLIST_KTHREAD_CB         |  |     SEGCBLIST_KTHREAD_GP        |
  *  |                                     |  |                                 |
  *  |                                     |  |                                 |
@@ -104,9 +106,10 @@ struct rcu_cblist {
  *                                         |
  *                                         v
  *  |--------------------------------------------------------------------------|
- *  |                           SEGCBLIST_OFFLOADED |                          |
- *  |                           SEGCBLIST_KTHREAD_CB |                         |
- *  |                           SEGCBLIST_KTHREAD_GP                           |
+ *  |                           SEGCBLIST_LOCKING    |                         |
+ *  |                           SEGCBLIST_OFFLOADED  |                         |
+ *  |                           SEGCBLIST_KTHREAD_GP |                         |
+ *  |                           SEGCBLIST_KTHREAD_CB                           |
  *  |                                                                          |
  *  |   Kthreads handle callbacks holding nocb_lock, local rcu_core() stops    |
  *  |   handling callbacks. Enable bypass queueing.                            |
@@ -120,7 +123,8 @@ struct rcu_cblist {
  *
  *
  *  |--------------------------------------------------------------------------|
- *  |                           SEGCBLIST_OFFLOADED |                          |
+ *  |                           SEGCBLIST_LOCKING    |                         |
+ *  |                           SEGCBLIST_OFFLOADED  |                         |
  *  |                           SEGCBLIST_KTHREAD_CB |                         |
  *  |                           SEGCBLIST_KTHREAD_GP                           |
  *  |                                                                          |
@@ -130,6 +134,8 @@ struct rcu_cblist {
  *                                      |
  *                                      v
  *  |--------------------------------------------------------------------------|
+ *  |                           SEGCBLIST_RCU_CORE   |                         |
+ *  |                           SEGCBLIST_LOCKING    |                         |
  *  |                           SEGCBLIST_KTHREAD_CB |                         |
  *  |                           SEGCBLIST_KTHREAD_GP                           |
  *  |                                                                          |
@@ -143,7 +149,9 @@ struct rcu_cblist {
  *                     |                                 |
  *                     v                                 v
  *  ---------------------------------------------------------------------------|
- *  |                                                                          |
+ *  |                                     |                                    |
+ *  |        SEGCBLIST_RCU_CORE |         |       SEGCBLIST_RCU_CORE |         |
+ *  |        SEGCBLIST_LOCKING  |         |       SEGCBLIST_LOCKING  |         |
  *  |        SEGCBLIST_KTHREAD_CB         |       SEGCBLIST_KTHREAD_GP         |
  *  |                                     |                                    |
  *  | GP kthread woke up and              |   CB kthread woke up and           |
@@ -159,7 +167,7 @@ struct rcu_cblist {
  *                                      |
  *                                      v
  *  ----------------------------------------------------------------------------
- *  |                                   0                                      |
+ *  |                SEGCBLIST_RCU_CORE | SEGCBLIST_LOCKING                    |
  *  |                                                                          |
  *  | Callbacks processed by rcu_core() from softirqs or local                 |
  *  | rcuc kthread, while holding nocb_lock. Forbid nocb_timer to be armed.    |
@@ -168,17 +176,18 @@ struct rcu_cblist {
  *                                      |
  *                                      v
  *  ----------------------------------------------------------------------------
- *  |                         SEGCBLIST_SOFTIRQ_ONLY                           |
+ *  |                         SEGCBLIST_RCU_CORE                               |
  *  |                                                                          |
  *  |  Callbacks processed by rcu_core() from softirqs or local                |
  *  |  rcuc kthread, without holding nocb_lock.                                |
  *  ----------------------------------------------------------------------------
  */
 #define SEGCBLIST_ENABLED	BIT(0)
-#define SEGCBLIST_SOFTIRQ_ONLY	BIT(1)
-#define SEGCBLIST_KTHREAD_CB	BIT(2)
-#define SEGCBLIST_KTHREAD_GP	BIT(3)
-#define SEGCBLIST_OFFLOADED	BIT(4)
+#define SEGCBLIST_RCU_CORE	BIT(1)
+#define SEGCBLIST_LOCKING	BIT(2)
+#define SEGCBLIST_KTHREAD_CB	BIT(3)
+#define SEGCBLIST_KTHREAD_GP	BIT(4)
+#define SEGCBLIST_OFFLOADED	BIT(5)
 
 struct rcu_segcblist {
 	struct rcu_head *head;
diff --git a/kernel/rcu/rcu_segcblist.c b/kernel/rcu/rcu_segcblist.c
index aaa111237b60..c07aab6e39ef 100644
--- a/kernel/rcu/rcu_segcblist.c
+++ b/kernel/rcu/rcu_segcblist.c
@@ -261,14 +261,14 @@ void rcu_segcblist_disable(struct rcu_segcblist *rsclp)
 }
 
 /*
- * Mark the specified rcu_segcblist structure as offloaded.
+ * Mark the specified rcu_segcblist structure as offloaded (or not)
  */
 void rcu_segcblist_offload(struct rcu_segcblist *rsclp, bool offload)
 {
 	if (offload) {
-		rcu_segcblist_clear_flags(rsclp, SEGCBLIST_SOFTIRQ_ONLY);
-		rcu_segcblist_set_flags(rsclp, SEGCBLIST_OFFLOADED);
+		rcu_segcblist_set_flags(rsclp, SEGCBLIST_LOCKING | SEGCBLIST_OFFLOADED);
 	} else {
+		rcu_segcblist_set_flags(rsclp, SEGCBLIST_RCU_CORE);
 		rcu_segcblist_clear_flags(rsclp, SEGCBLIST_OFFLOADED);
 	}
 }
diff --git a/kernel/rcu/rcu_segcblist.h b/kernel/rcu/rcu_segcblist.h
index 9a19328ff251..e373fbe44da5 100644
--- a/kernel/rcu/rcu_segcblist.h
+++ b/kernel/rcu/rcu_segcblist.h
@@ -80,11 +80,14 @@ static inline bool rcu_segcblist_is_enabled(struct rcu_segcblist *rsclp)
 	return rcu_segcblist_test_flags(rsclp, SEGCBLIST_ENABLED);
 }
 
-/* Is the specified rcu_segcblist offloaded, or is SEGCBLIST_SOFTIRQ_ONLY set? */
+/*
+ * Is the specified rcu_segcblist NOCB offloaded (or in the middle of the
+ * [de]offloading process)?
+ */
 static inline bool rcu_segcblist_is_offloaded(struct rcu_segcblist *rsclp)
 {
 	if (IS_ENABLED(CONFIG_RCU_NOCB_CPU) &&
-	    !rcu_segcblist_test_flags(rsclp, SEGCBLIST_SOFTIRQ_ONLY))
+	    rcu_segcblist_test_flags(rsclp, SEGCBLIST_LOCKING))
 		return true;
 
 	return false;
@@ -92,9 +95,8 @@ static inline bool rcu_segcblist_is_offloaded(struct rcu_segcblist *rsclp)
 
 static inline bool rcu_segcblist_completely_offloaded(struct rcu_segcblist *rsclp)
 {
-	int flags = SEGCBLIST_KTHREAD_CB | SEGCBLIST_KTHREAD_GP | SEGCBLIST_OFFLOADED;
-
-	if (IS_ENABLED(CONFIG_RCU_NOCB_CPU) && (rsclp->flags & flags) == flags)
+	if (IS_ENABLED(CONFIG_RCU_NOCB_CPU) &&
+	    !rcu_segcblist_test_flags(rsclp, SEGCBLIST_RCU_CORE))
 		return true;
 
 	return false;
diff --git a/kernel/rcu/tree.c b/kernel/rcu/tree.c
index 8706b30c2ac8..e905d7e4ddb9 100644
--- a/kernel/rcu/tree.c
+++ b/kernel/rcu/tree.c
@@ -79,7 +79,7 @@ static DEFINE_PER_CPU_SHARED_ALIGNED(struct rcu_data, rcu_data) = {
 	.dynticks_nmi_nesting = DYNTICK_IRQ_NONIDLE,
 	.dynticks = ATOMIC_INIT(1),
 #ifdef CONFIG_RCU_NOCB_CPU
-	.cblist.flags = SEGCBLIST_SOFTIRQ_ONLY,
+	.cblist.flags = SEGCBLIST_RCU_CORE,
 #endif
 };
 static struct rcu_state rcu_state = {
diff --git a/kernel/rcu/tree_nocb.h b/kernel/rcu/tree_nocb.h
index 368ef7b9af4f..b3e07d0bfbbf 100644
--- a/kernel/rcu/tree_nocb.h
+++ b/kernel/rcu/tree_nocb.h
@@ -1000,12 +1000,12 @@ static long rcu_nocb_rdp_deoffload(void *arg)
 	 */
 	rcu_nocb_lock_irqsave(rdp, flags);
 	/*
-	 * Theoretically we could set SEGCBLIST_SOFTIRQ_ONLY after the nocb
+	 * Theoretically we could clear SEGCBLIST_LOCKING after the nocb
 	 * lock is released but how about being paranoid for once?
 	 */
-	rcu_segcblist_set_flags(cblist, SEGCBLIST_SOFTIRQ_ONLY);
+	rcu_segcblist_clear_flags(cblist, SEGCBLIST_LOCKING);
 	/*
-	 * With SEGCBLIST_SOFTIRQ_ONLY, we can't use
+	 * Without SEGCBLIST_LOCKING, we can't use
 	 * rcu_nocb_unlock_irqrestore() anymore.
 	 */
 	raw_spin_unlock_irqrestore(&rdp->nocb_lock, flags);
@@ -1058,14 +1058,14 @@ static long rcu_nocb_rdp_offload(void *arg)
 
 	pr_info("Offloading %d\n", rdp->cpu);
 	/*
-	 * Can't use rcu_nocb_lock_irqsave() while we are in
-	 * SEGCBLIST_SOFTIRQ_ONLY mode.
+	 * Can't use rcu_nocb_lock_irqsave() before SEGCBLIST_LOCKING
+	 * is set.
 	 */
 	raw_spin_lock_irqsave(&rdp->nocb_lock, flags);
 
 	/*
 	 * We didn't take the nocb lock while working on the
-	 * rdp->cblist in SEGCBLIST_SOFTIRQ_ONLY mode.
+	 * rdp->cblist with SEGCBLIST_LOCKING cleared (pure softirq/rcuc mode).
 	 * Every modifications that have been done previously on
 	 * rdp->cblist must be visible remotely by the nocb kthreads
 	 * upon wake up after reading the cblist flags.
@@ -1084,6 +1084,14 @@ static long rcu_nocb_rdp_offload(void *arg)
 			      rcu_segcblist_test_flags(cblist, SEGCBLIST_KTHREAD_CB) &&
 			      rcu_segcblist_test_flags(cblist, SEGCBLIST_KTHREAD_GP));
 
+	/*
+	 * All kthreads are ready to work, we can finally relieve rcu_core() and
+	 * enable nocb bypass.
+	 */
+	rcu_nocb_lock_irqsave(rdp, flags);
+	rcu_segcblist_clear_flags(cblist, SEGCBLIST_RCU_CORE);
+	rcu_nocb_unlock_irqrestore(rdp, flags);
+
 	return ret;
 }
 
@@ -1154,8 +1162,8 @@ void __init rcu_init_nohz(void)
 		if (rcu_segcblist_empty(&rdp->cblist))
 			rcu_segcblist_init(&rdp->cblist);
 		rcu_segcblist_offload(&rdp->cblist, true);
-		rcu_segcblist_set_flags(&rdp->cblist, SEGCBLIST_KTHREAD_CB);
-		rcu_segcblist_set_flags(&rdp->cblist, SEGCBLIST_KTHREAD_GP);
+		rcu_segcblist_set_flags(&rdp->cblist, SEGCBLIST_KTHREAD_CB | SEGCBLIST_KTHREAD_GP);
+		rcu_segcblist_clear_flags(&rdp->cblist, SEGCBLIST_RCU_CORE);
 	}
 	rcu_organize_nocb_kthreads();
 }
-- 
cgit v1.2.3-70-g09d2


From fbb94cbd70d41c7511460896dfc7f9ea5da704b3 Mon Sep 17 00:00:00 2001
From: Frederic Weisbecker <frederic@kernel.org>
Date: Tue, 19 Oct 2021 02:08:08 +0200
Subject: rcu/nocb: Invoke rcu_core() at the start of deoffloading

On PREEMPT_RT, if rcu_core() is preempted by the de-offloading process,
some work, such as callbacks acceleration and invocation, may be left
unattended due to the volatile checks on the offloaded state.

In the worst case this work is postponed until the next rcu_pending()
check that can take a jiffy to reach, which can be a problem in case
of callbacks flooding.

Solve that with invoking rcu_core() early in the de-offloading process.
This way any work dismissed by an ongoing rcu_core() call fooled by
a preempting deoffloading process will be caught up by a nearby future
recall to rcu_core(), this time fully aware of the de-offloading state.

Tested-by: Valentin Schneider <valentin.schneider@arm.com>
Tested-by: Sebastian Andrzej Siewior <bigeasy@linutronix.de>
Signed-off-by: Frederic Weisbecker <frederic@kernel.org>
Cc: Valentin Schneider <valentin.schneider@arm.com>
Cc: Peter Zijlstra <peterz@infradead.org>
Cc: Sebastian Andrzej Siewior <bigeasy@linutronix.de>
Cc: Josh Triplett <josh@joshtriplett.org>
Cc: Joel Fernandes <joel@joelfernandes.org>
Cc: Boqun Feng <boqun.feng@gmail.com>
Cc: Neeraj Upadhyay <neeraju@codeaurora.org>
Cc: Uladzislau Rezki <urezki@gmail.com>
Cc: Thomas Gleixner <tglx@linutronix.de>
Signed-off-by: Paul E. McKenney <paulmck@kernel.org>
---
 include/linux/rcu_segcblist.h | 14 ++++++++++++++
 kernel/rcu/rcu_segcblist.c    |  6 ++----
 kernel/rcu/tree.c             | 17 +++++++++++++++++
 kernel/rcu/tree_nocb.h        |  9 +++++++++
 4 files changed, 42 insertions(+), 4 deletions(-)

(limited to 'kernel/rcu/tree.c')

diff --git a/include/linux/rcu_segcblist.h b/include/linux/rcu_segcblist.h
index 812961b1d064..659d13a7ddaa 100644
--- a/include/linux/rcu_segcblist.h
+++ b/include/linux/rcu_segcblist.h
@@ -136,6 +136,20 @@ struct rcu_cblist {
  *  |--------------------------------------------------------------------------|
  *  |                           SEGCBLIST_RCU_CORE   |                         |
  *  |                           SEGCBLIST_LOCKING    |                         |
+ *  |                           SEGCBLIST_OFFLOADED  |                         |
+ *  |                           SEGCBLIST_KTHREAD_CB |                         |
+ *  |                           SEGCBLIST_KTHREAD_GP                           |
+ *  |                                                                          |
+ *  |   CB/GP kthreads handle callbacks holding nocb_lock, local rcu_core()    |
+ *  |   handles callbacks concurrently. Bypass enqueue is enabled.             |
+ *  |   Invoke RCU core so we make sure not to preempt it in the middle with   |
+ *  |   leaving some urgent work unattended within a jiffy.                    |
+ *  ----------------------------------------------------------------------------
+ *                                      |
+ *                                      v
+ *  |--------------------------------------------------------------------------|
+ *  |                           SEGCBLIST_RCU_CORE   |                         |
+ *  |                           SEGCBLIST_LOCKING    |                         |
  *  |                           SEGCBLIST_KTHREAD_CB |                         |
  *  |                           SEGCBLIST_KTHREAD_GP                           |
  *  |                                                                          |
diff --git a/kernel/rcu/rcu_segcblist.c b/kernel/rcu/rcu_segcblist.c
index c07aab6e39ef..81145c3ece25 100644
--- a/kernel/rcu/rcu_segcblist.c
+++ b/kernel/rcu/rcu_segcblist.c
@@ -265,12 +265,10 @@ void rcu_segcblist_disable(struct rcu_segcblist *rsclp)
  */
 void rcu_segcblist_offload(struct rcu_segcblist *rsclp, bool offload)
 {
-	if (offload) {
+	if (offload)
 		rcu_segcblist_set_flags(rsclp, SEGCBLIST_LOCKING | SEGCBLIST_OFFLOADED);
-	} else {
-		rcu_segcblist_set_flags(rsclp, SEGCBLIST_RCU_CORE);
+	else
 		rcu_segcblist_clear_flags(rsclp, SEGCBLIST_OFFLOADED);
-	}
 }
 
 /*
diff --git a/kernel/rcu/tree.c b/kernel/rcu/tree.c
index e905d7e4ddb9..a329adfece86 100644
--- a/kernel/rcu/tree.c
+++ b/kernel/rcu/tree.c
@@ -2707,6 +2707,23 @@ static __latent_entropy void rcu_core(void)
 	unsigned long flags;
 	struct rcu_data *rdp = raw_cpu_ptr(&rcu_data);
 	struct rcu_node *rnp = rdp->mynode;
+	/*
+	 * On RT rcu_core() can be preempted when IRQs aren't disabled.
+	 * Therefore this function can race with concurrent NOCB (de-)offloading
+	 * on this CPU and the below condition must be considered volatile.
+	 * However if we race with:
+	 *
+	 * _ Offloading:   In the worst case we accelerate or process callbacks
+	 *                 concurrently with NOCB kthreads. We are guaranteed to
+	 *                 call rcu_nocb_lock() if that happens.
+	 *
+	 * _ Deoffloading: In the worst case we miss callbacks acceleration or
+	 *                 processing. This is fine because the early stage
+	 *                 of deoffloading invokes rcu_core() after setting
+	 *                 SEGCBLIST_RCU_CORE. So we guarantee that we'll process
+	 *                 what could have been dismissed without the need to wait
+	 *                 for the next rcu_pending() check in the next jiffy.
+	 */
 	const bool do_batch = !rcu_segcblist_completely_offloaded(&rdp->cblist);
 
 	if (cpu_is_offline(smp_processor_id()))
diff --git a/kernel/rcu/tree_nocb.h b/kernel/rcu/tree_nocb.h
index b3e07d0bfbbf..2461fe8d0c23 100644
--- a/kernel/rcu/tree_nocb.h
+++ b/kernel/rcu/tree_nocb.h
@@ -990,6 +990,15 @@ static long rcu_nocb_rdp_deoffload(void *arg)
 	 * will refuse to put anything into the bypass.
 	 */
 	WARN_ON_ONCE(!rcu_nocb_flush_bypass(rdp, NULL, jiffies));
+	/*
+	 * Start with invoking rcu_core() early. This way if the current thread
+	 * happens to preempt an ongoing call to rcu_core() in the middle,
+	 * leaving some work dismissed because rcu_core() still thinks the rdp is
+	 * completely offloaded, we are guaranteed a nearby future instance of
+	 * rcu_core() to catch up.
+	 */
+	rcu_segcblist_set_flags(cblist, SEGCBLIST_RCU_CORE);
+	invoke_rcu_core();
 	ret = rdp_offload_toggle(rdp, false, flags);
 	swait_event_exclusive(rdp->nocb_state_wq,
 			      !rcu_segcblist_test_flags(cblist, SEGCBLIST_KTHREAD_CB |
-- 
cgit v1.2.3-70-g09d2


From 24ee940d89277602147ce1b8b4fd87b01b9a6660 Mon Sep 17 00:00:00 2001
From: Thomas Gleixner <tglx@linutronix.de>
Date: Tue, 19 Oct 2021 02:08:09 +0200
Subject: rcu/nocb: Make rcu_core() callbacks acceleration preempt-safe

While reporting a quiescent state for a given CPU, rcu_core() takes
advantage of the freshly loaded grace period sequence number and the
locked rnp to accelerate the callbacks whose sequence number have been
assigned a stale value.

This action is only necessary when the rdp isn't offloaded, otherwise
the NOCB kthreads already take care of the callbacks progression.

However the check for the offloaded state is volatile because it is
performed outside the IRQs disabled section. It's possible for the
offloading process to preempt rcu_core() at that point on PREEMPT_RT.

This is dangerous because rcu_core() may end up accelerating callbacks
concurrently with NOCB kthreads without appropriate locking.

Fix this with moving the offloaded check inside the rnp locking section.

Reported-and-tested-by: Valentin Schneider <valentin.schneider@arm.com>
Reviewed-by: Valentin Schneider <valentin.schneider@arm.com>
Tested-by: Sebastian Andrzej Siewior <bigeasy@linutronix.de>
Signed-off-by: Thomas Gleixner <tglx@linutronix.de>
Cc: Peter Zijlstra <peterz@infradead.org>
Cc: Sebastian Andrzej Siewior <bigeasy@linutronix.de>
Cc: Josh Triplett <josh@joshtriplett.org>
Cc: Joel Fernandes <joel@joelfernandes.org>
Cc: Boqun Feng <boqun.feng@gmail.com>
Cc: Neeraj Upadhyay <neeraju@codeaurora.org>
Cc: Uladzislau Rezki <urezki@gmail.com>
Cc: Thomas Gleixner <tglx@linutronix.de>
Signed-off-by: Frederic Weisbecker <frederic@kernel.org>
Signed-off-by: Paul E. McKenney <paulmck@kernel.org>
---
 kernel/rcu/tree.c | 5 +++--
 1 file changed, 3 insertions(+), 2 deletions(-)

(limited to 'kernel/rcu/tree.c')

diff --git a/kernel/rcu/tree.c b/kernel/rcu/tree.c
index a329adfece86..5985698f3341 100644
--- a/kernel/rcu/tree.c
+++ b/kernel/rcu/tree.c
@@ -2278,7 +2278,6 @@ rcu_report_qs_rdp(struct rcu_data *rdp)
 	unsigned long flags;
 	unsigned long mask;
 	bool needwake = false;
-	const bool offloaded = rcu_rdp_is_offloaded(rdp);
 	struct rcu_node *rnp;
 
 	WARN_ON_ONCE(rdp->cpu != smp_processor_id());
@@ -2305,8 +2304,10 @@ rcu_report_qs_rdp(struct rcu_data *rdp)
 		/*
 		 * This GP can't end until cpu checks in, so all of our
 		 * callbacks can be processed during the next GP.
+		 *
+		 * NOCB kthreads have their own way to deal with that.
 		 */
-		if (!offloaded)
+		if (!rcu_rdp_is_offloaded(rdp))
 			needwake = rcu_accelerate_cbs(rnp, rdp);
 
 		rcu_disable_urgency_upon_qs(rdp);
-- 
cgit v1.2.3-70-g09d2


From b3bb02fe5a2b538ae53eda1fe591dd6c81a91ad4 Mon Sep 17 00:00:00 2001
From: Frederic Weisbecker <frederic@kernel.org>
Date: Tue, 19 Oct 2021 02:08:10 +0200
Subject: rcu/nocb: Make rcu_core() callbacks acceleration (de-)offloading safe

When callbacks are offloaded, the NOCB kthreads handle the callbacks
progression on behalf of rcu_core().

However during the (de-)offloading process, the kthread may not be
entirely up to the task. As a result some callbacks grace period
sequence number may remain stale for a while because rcu_core() won't
take care of them either.

Fix this with forcing callbacks acceleration from rcu_core() as long
as the offloading process isn't complete.

Reported-and-tested-by: Valentin Schneider <valentin.schneider@arm.com>
Tested-by: Sebastian Andrzej Siewior <bigeasy@linutronix.de>
Signed-off-by: Frederic Weisbecker <frederic@kernel.org>
Cc: Valentin Schneider <valentin.schneider@arm.com>
Cc: Peter Zijlstra <peterz@infradead.org>
Cc: Sebastian Andrzej Siewior <bigeasy@linutronix.de>
Cc: Josh Triplett <josh@joshtriplett.org>
Cc: Joel Fernandes <joel@joelfernandes.org>
Cc: Boqun Feng <boqun.feng@gmail.com>
Cc: Neeraj Upadhyay <neeraju@codeaurora.org>
Cc: Uladzislau Rezki <urezki@gmail.com>
Cc: Thomas Gleixner <tglx@linutronix.de>
Signed-off-by: Paul E. McKenney <paulmck@kernel.org>
---
 kernel/rcu/tree.c | 18 ++++++++++++++++--
 1 file changed, 16 insertions(+), 2 deletions(-)

(limited to 'kernel/rcu/tree.c')

diff --git a/kernel/rcu/tree.c b/kernel/rcu/tree.c
index 5985698f3341..cb9abb80377f 100644
--- a/kernel/rcu/tree.c
+++ b/kernel/rcu/tree.c
@@ -2278,6 +2278,7 @@ rcu_report_qs_rdp(struct rcu_data *rdp)
 	unsigned long flags;
 	unsigned long mask;
 	bool needwake = false;
+	bool needacc = false;
 	struct rcu_node *rnp;
 
 	WARN_ON_ONCE(rdp->cpu != smp_processor_id());
@@ -2305,16 +2306,29 @@ rcu_report_qs_rdp(struct rcu_data *rdp)
 		 * This GP can't end until cpu checks in, so all of our
 		 * callbacks can be processed during the next GP.
 		 *
-		 * NOCB kthreads have their own way to deal with that.
+		 * NOCB kthreads have their own way to deal with that...
 		 */
-		if (!rcu_rdp_is_offloaded(rdp))
+		if (!rcu_rdp_is_offloaded(rdp)) {
 			needwake = rcu_accelerate_cbs(rnp, rdp);
+		} else if (!rcu_segcblist_completely_offloaded(&rdp->cblist)) {
+			/*
+			 * ...but NOCB kthreads may miss or delay callbacks acceleration
+			 * if in the middle of a (de-)offloading process.
+			 */
+			needacc = true;
+		}
 
 		rcu_disable_urgency_upon_qs(rdp);
 		rcu_report_qs_rnp(mask, rnp, rnp->gp_seq, flags);
 		/* ^^^ Released rnp->lock */
 		if (needwake)
 			rcu_gp_kthread_wake();
+
+		if (needacc) {
+			rcu_nocb_lock_irqsave(rdp, flags);
+			rcu_accelerate_cbs_unlocked(rnp, rdp);
+			rcu_nocb_unlock_irqrestore(rdp, flags);
+		}
 	}
 }
 
-- 
cgit v1.2.3-70-g09d2


From 344e219d7d2b28117daaae5fe8da2e054b53d5a2 Mon Sep 17 00:00:00 2001
From: Frederic Weisbecker <frederic@kernel.org>
Date: Tue, 19 Oct 2021 02:08:11 +0200
Subject: rcu/nocb: Check a stable offloaded state to manipulate
 qlen_last_fqs_check

It's not entirely obvious why rdp->qlen_last_fqs_check is updated before
processing the queue only on offloaded rdp. There can be different
effect to that, either in favour of triggering the force quiescent state
path or not. For example:

1) If the number of callbacks has decreased since the last
   rdp->qlen_last_fqs_check update (because we recently called
   rcu_do_batch() and we executed below qhimark callbacks) and the number
   of processed callbacks on a subsequent do_batch() arranges for
   exceeding qhimark on non-offloaded but not on offloaded setup, then we
   may spare a later run to the force quiescent state
   slow path on __call_rcu_nocb_wake(), as compared to the non-offloaded
   counterpart scenario.

   Here is such an offloaded scenario instance:

    qhimark = 1000
    rdp->last_qlen_last_fqs_check = 3000
    rcu_segcblist_n_cbs(rdp) = 2000

    rcu_do_batch() {
        if (offloaded)
            rdp->last_qlen_fqs_check = rcu_segcblist_n_cbs(rdp) // 2000
        // run 1000 callback
        rcu_segcblist_n_cbs(rdp) = 1000
        // Not updating rdp->qlen_last_fqs_check
        if (count < rdp->qlen_last_fqs_check - qhimark)
            rdp->qlen_last_fqs_check = count;
    }

    call_rcu() * 1001 {
        __call_rcu_nocb_wake() {
            // not taking the fqs slowpath:
            // rcu_segcblist_n_cbs(rdp) == 2001
            // rdp->qlen_last_fqs_check == 2000
            // qhimark == 1000
            if (len > rdp->qlen_last_fqs_check + qhimark)
                ...
    }

    In the case of a non-offloaded scenario, rdp->qlen_last_fqs_check
    would be 1000 and the fqs slowpath would have executed.

2) If the number of callbacks has increased since the last
   rdp->qlen_last_fqs_check update (because we recently queued below
   qhimark callbacks) and the number of callbacks executed in rcu_do_batch()
   doesn't exceed qhimark for either offloaded or non-offloaded setup,
   then it's possible that the offloaded scenario later run the force
   quiescent state slow path on __call_rcu_nocb_wake() while the
   non-offloaded doesn't.

    qhimark = 1000
    rdp->last_qlen_last_fqs_check = 3000
    rcu_segcblist_n_cbs(rdp) = 2000

    rcu_do_batch() {
        if (offloaded)
            rdp->last_qlen_last_fqs_check = rcu_segcblist_n_cbs(rdp) // 2000
        // run 100 callbacks
        // concurrent queued 100
        rcu_segcblist_n_cbs(rdp) = 2000
        // Not updating rdp->qlen_last_fqs_check
        if (count < rdp->qlen_last_fqs_check - qhimark)
            rdp->qlen_last_fqs_check = count;
    }

    call_rcu() * 1001 {
        __call_rcu_nocb_wake() {
            // Taking the fqs slowpath:
            // rcu_segcblist_n_cbs(rdp) == 3001
            // rdp->qlen_last_fqs_check == 2000
            // qhimark == 1000
            if (len > rdp->qlen_last_fqs_check + qhimark)
                ...
    }

    In the case of a non-offloaded scenario, rdp->qlen_last_fqs_check
    would be 3000 and the fqs slowpath would have executed.

The reason for updating rdp->qlen_last_fqs_check when invoking callbacks
for offloaded CPUs is that there is usually no point in waking up either
the rcuog or rcuoc kthreads while in this state.  After all, both threads
are prohibited from indefinite sleeps.

The exception is when some huge number of callbacks are enqueued while
rcu_do_batch() is in the midst of invoking, in which case interrupting
the rcuog kthread's timed sleep might get more callbacks set up for the
next grace period.

Reported-and-tested-by: Valentin Schneider <valentin.schneider@arm.com>
Tested-by: Sebastian Andrzej Siewior <bigeasy@linutronix.de>
Original-patch-by: Thomas Gleixner <tglx@linutronix.de>
Signed-off-by: Frederic Weisbecker <frederic@kernel.org>
Cc: Valentin Schneider <valentin.schneider@arm.com>
Cc: Peter Zijlstra <peterz@infradead.org>
Cc: Sebastian Andrzej Siewior <bigeasy@linutronix.de>
Cc: Josh Triplett <josh@joshtriplett.org>
Cc: Joel Fernandes <joel@joelfernandes.org>
Cc: Boqun Feng <boqun.feng@gmail.com>
Cc: Neeraj Upadhyay <neeraju@codeaurora.org>
Cc: Uladzislau Rezki <urezki@gmail.com>
Cc: Thomas Gleixner <tglx@linutronix.de>
Signed-off-by: Paul E. McKenney <paulmck@kernel.org>
---
 kernel/rcu/tree.c | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

(limited to 'kernel/rcu/tree.c')

diff --git a/kernel/rcu/tree.c b/kernel/rcu/tree.c
index cb9abb80377f..4cbfc4e4fa9e 100644
--- a/kernel/rcu/tree.c
+++ b/kernel/rcu/tree.c
@@ -2498,7 +2498,7 @@ static void rcu_do_batch(struct rcu_data *rdp)
 	trace_rcu_batch_start(rcu_state.name,
 			      rcu_segcblist_n_cbs(&rdp->cblist), bl);
 	rcu_segcblist_extract_done_cbs(&rdp->cblist, &rcl);
-	if (offloaded)
+	if (rcu_rdp_is_offloaded(rdp))
 		rdp->qlen_last_fqs_check = rcu_segcblist_n_cbs(&rdp->cblist);
 
 	trace_rcu_segcb_stats(&rdp->cblist, TPS("SegCbDequeued"));
-- 
cgit v1.2.3-70-g09d2


From 7b65dfa32dca1be0400d43a3d5bb80ed6e04958e Mon Sep 17 00:00:00 2001
From: Frederic Weisbecker <frederic@kernel.org>
Date: Tue, 19 Oct 2021 02:08:12 +0200
Subject: rcu/nocb: Use appropriate rcu_nocb_lock_irqsave()

Instead of hardcoding IRQ save and nocb lock, use the consolidated
API (and fix a comment as per Valentin Schneider's suggestion).

Reviewed-by: Valentin Schneider <valentin.schneider@arm.com>
Tested-by: Valentin Schneider <valentin.schneider@arm.com>
Tested-by: Sebastian Andrzej Siewior <bigeasy@linutronix.de>
Signed-off-by: Frederic Weisbecker <frederic@kernel.org>
Cc: Valentin Schneider <valentin.schneider@arm.com>
Cc: Peter Zijlstra <peterz@infradead.org>
Cc: Sebastian Andrzej Siewior <bigeasy@linutronix.de>
Cc: Josh Triplett <josh@joshtriplett.org>
Cc: Joel Fernandes <joel@joelfernandes.org>
Cc: Boqun Feng <boqun.feng@gmail.com>
Cc: Neeraj Upadhyay <neeraju@codeaurora.org>
Cc: Uladzislau Rezki <urezki@gmail.com>
Cc: Thomas Gleixner <tglx@linutronix.de>
Signed-off-by: Paul E. McKenney <paulmck@kernel.org>
---
 kernel/rcu/tree.c | 8 +++-----
 1 file changed, 3 insertions(+), 5 deletions(-)

(limited to 'kernel/rcu/tree.c')

diff --git a/kernel/rcu/tree.c b/kernel/rcu/tree.c
index 4cbfc4e4fa9e..20587d035d03 100644
--- a/kernel/rcu/tree.c
+++ b/kernel/rcu/tree.c
@@ -2478,12 +2478,11 @@ static void rcu_do_batch(struct rcu_data *rdp)
 	}
 
 	/*
-	 * Extract the list of ready callbacks, disabling to prevent
+	 * Extract the list of ready callbacks, disabling IRQs to prevent
 	 * races with call_rcu() from interrupt handlers.  Leave the
 	 * callback counts, as rcu_barrier() needs to be conservative.
 	 */
-	local_irq_save(flags);
-	rcu_nocb_lock(rdp);
+	rcu_nocb_lock_irqsave(rdp, flags);
 	WARN_ON_ONCE(cpu_is_offline(smp_processor_id()));
 	pending = rcu_segcblist_n_cbs(&rdp->cblist);
 	div = READ_ONCE(rcu_divisor);
@@ -2546,8 +2545,7 @@ static void rcu_do_batch(struct rcu_data *rdp)
 		}
 	}
 
-	local_irq_save(flags);
-	rcu_nocb_lock(rdp);
+	rcu_nocb_lock_irqsave(rdp, flags);
 	rdp->n_cbs_invoked += count;
 	trace_rcu_batch_end(rcu_state.name, count, !!rcl.head, need_resched(),
 			    is_idle_task(current), rcu_is_callbacks_kthread());
-- 
cgit v1.2.3-70-g09d2


From 78ad37a2c50dfdb9a60e42bb9ee1da86d1fe770c Mon Sep 17 00:00:00 2001
From: Frederic Weisbecker <frederic@kernel.org>
Date: Tue, 19 Oct 2021 02:08:13 +0200
Subject: rcu/nocb: Limit number of softirq callbacks only on softirq

The current condition to limit the number of callbacks executed in a
row checks the offloaded state of the rdp. Not only is it volatile
but it is also misleading: the rcu_core() may well be executing
callbacks concurrently with NOCB kthreads, and the offloaded state
would then be verified on both cases. As a result the limit would
spuriously not apply anymore on softirq while in the middle of
(de-)offloading process.

Fix and clarify the condition with those constraints in mind:

_ If callbacks are processed either by rcuc or NOCB kthread, the call
  to cond_resched_tasks_rcu_qs() is enough to take care of the overload.

_ If instead callbacks are processed by softirqs:
  * If need_resched(), exit the callbacks processing
  * Otherwise if CPU is idle we can continue
  * Otherwise exit because a softirq shouldn't interrupt a task for too
    long nor deprive other pending softirq vectors of the CPU.

Tested-by: Valentin Schneider <valentin.schneider@arm.com>
Tested-by: Sebastian Andrzej Siewior <bigeasy@linutronix.de>
Signed-off-by: Frederic Weisbecker <frederic@kernel.org>
Cc: Valentin Schneider <valentin.schneider@arm.com>
Cc: Peter Zijlstra <peterz@infradead.org>
Cc: Sebastian Andrzej Siewior <bigeasy@linutronix.de>
Cc: Josh Triplett <josh@joshtriplett.org>
Cc: Joel Fernandes <joel@joelfernandes.org>
Cc: Boqun Feng <boqun.feng@gmail.com>
Cc: Neeraj Upadhyay <neeraju@codeaurora.org>
Cc: Uladzislau Rezki <urezki@gmail.com>
Cc: Thomas Gleixner <tglx@linutronix.de>
Signed-off-by: Paul E. McKenney <paulmck@kernel.org>
---
 kernel/rcu/tree.c | 5 ++---
 1 file changed, 2 insertions(+), 3 deletions(-)

(limited to 'kernel/rcu/tree.c')

diff --git a/kernel/rcu/tree.c b/kernel/rcu/tree.c
index 20587d035d03..4f4d9ea6a9cb 100644
--- a/kernel/rcu/tree.c
+++ b/kernel/rcu/tree.c
@@ -2525,9 +2525,8 @@ static void rcu_do_batch(struct rcu_data *rdp)
 		/*
 		 * Stop only if limit reached and CPU has something to do.
 		 */
-		if (count >= bl && !offloaded &&
-		    (need_resched() ||
-		     (!is_idle_task(current) && !rcu_is_callbacks_kthread())))
+		if (count >= bl && in_serving_softirq() &&
+		    (need_resched() || !is_idle_task(current)))
 			break;
 		if (unlikely(tlimit)) {
 			/* only call local_clock() every 32 callbacks */
-- 
cgit v1.2.3-70-g09d2


From 3e61e95e2d095e308616cba4ffb640f95a480e01 Mon Sep 17 00:00:00 2001
From: Frederic Weisbecker <frederic@kernel.org>
Date: Tue, 19 Oct 2021 02:08:14 +0200
Subject: rcu: Fix callbacks processing time limit retaining cond_resched()

The callbacks processing time limit makes sure we are not exceeding a
given amount of time executing the queue.

However its "continue" clause bypasses the cond_resched() call on
rcuc and NOCB kthreads, delaying it until we reach the limit, which can
be very long...

Make sure the scheduler has a higher priority than the time limit.

Reviewed-by: Valentin Schneider <valentin.schneider@arm.com>
Tested-by: Valentin Schneider <valentin.schneider@arm.com>
Tested-by: Sebastian Andrzej Siewior <bigeasy@linutronix.de>
Signed-off-by: Frederic Weisbecker <frederic@kernel.org>
Cc: Valentin Schneider <valentin.schneider@arm.com>
Cc: Peter Zijlstra <peterz@infradead.org>
Cc: Sebastian Andrzej Siewior <bigeasy@linutronix.de>
Cc: Josh Triplett <josh@joshtriplett.org>
Cc: Joel Fernandes <joel@joelfernandes.org>
Cc: Boqun Feng <boqun.feng@gmail.com>
Cc: Neeraj Upadhyay <neeraju@codeaurora.org>
Cc: Uladzislau Rezki <urezki@gmail.com>
Cc: Thomas Gleixner <tglx@linutronix.de>
Signed-off-by: Paul E. McKenney <paulmck@kernel.org>
---
 kernel/rcu/tree.c | 25 +++++++++++++++----------
 1 file changed, 15 insertions(+), 10 deletions(-)

(limited to 'kernel/rcu/tree.c')

diff --git a/kernel/rcu/tree.c b/kernel/rcu/tree.c
index 4f4d9ea6a9cb..74e210ccfd38 100644
--- a/kernel/rcu/tree.c
+++ b/kernel/rcu/tree.c
@@ -2525,9 +2525,21 @@ static void rcu_do_batch(struct rcu_data *rdp)
 		/*
 		 * Stop only if limit reached and CPU has something to do.
 		 */
-		if (count >= bl && in_serving_softirq() &&
-		    (need_resched() || !is_idle_task(current)))
-			break;
+		if (in_serving_softirq()) {
+			if (count >= bl && (need_resched() || !is_idle_task(current)))
+				break;
+		} else {
+			local_bh_enable();
+			lockdep_assert_irqs_enabled();
+			cond_resched_tasks_rcu_qs();
+			lockdep_assert_irqs_enabled();
+			local_bh_disable();
+		}
+
+		/*
+		 * Make sure we don't spend too much time here and deprive other
+		 * softirq vectors of CPU cycles.
+		 */
 		if (unlikely(tlimit)) {
 			/* only call local_clock() every 32 callbacks */
 			if (likely((count & 31) || local_clock() < tlimit))
@@ -2535,13 +2547,6 @@ static void rcu_do_batch(struct rcu_data *rdp)
 			/* Exceeded the time limit, so leave. */
 			break;
 		}
-		if (!in_serving_softirq()) {
-			local_bh_enable();
-			lockdep_assert_irqs_enabled();
-			cond_resched_tasks_rcu_qs();
-			lockdep_assert_irqs_enabled();
-			local_bh_disable();
-		}
 	}
 
 	rcu_nocb_lock_irqsave(rdp, flags);
-- 
cgit v1.2.3-70-g09d2


From a554ba288845fd3f6f12311fd76a51694233458a Mon Sep 17 00:00:00 2001
From: Frederic Weisbecker <frederic@kernel.org>
Date: Tue, 19 Oct 2021 02:08:15 +0200
Subject: rcu: Apply callbacks processing time limit only on softirq

Time limit only makes sense when callbacks are serviced in softirq mode
because:

_ In case we need to get back to the scheduler,
  cond_resched_tasks_rcu_qs() is called after each callback.

_ In case some other softirq vector needs the CPU, the call to
  local_bh_enable() before cond_resched_tasks_rcu_qs() takes care about
  them via a call to do_softirq().

Therefore, make sure the time limit only applies to softirq mode.

Reviewed-by: Valentin Schneider <valentin.schneider@arm.com>
Tested-by: Valentin Schneider <valentin.schneider@arm.com>
Tested-by: Sebastian Andrzej Siewior <bigeasy@linutronix.de>
Signed-off-by: Frederic Weisbecker <frederic@kernel.org>
Cc: Valentin Schneider <valentin.schneider@arm.com>
Cc: Peter Zijlstra <peterz@infradead.org>
Cc: Sebastian Andrzej Siewior <bigeasy@linutronix.de>
Cc: Josh Triplett <josh@joshtriplett.org>
Cc: Joel Fernandes <joel@joelfernandes.org>
Cc: Boqun Feng <boqun.feng@gmail.com>
Cc: Neeraj Upadhyay <neeraju@codeaurora.org>
Cc: Uladzislau Rezki <urezki@gmail.com>
Cc: Thomas Gleixner <tglx@linutronix.de>
Signed-off-by: Paul E. McKenney <paulmck@kernel.org>
---
 kernel/rcu/tree.c | 25 ++++++++++++-------------
 1 file changed, 12 insertions(+), 13 deletions(-)

(limited to 'kernel/rcu/tree.c')

diff --git a/kernel/rcu/tree.c b/kernel/rcu/tree.c
index 74e210ccfd38..1b31fad9f1a0 100644
--- a/kernel/rcu/tree.c
+++ b/kernel/rcu/tree.c
@@ -2488,7 +2488,7 @@ static void rcu_do_batch(struct rcu_data *rdp)
 	div = READ_ONCE(rcu_divisor);
 	div = div < 0 ? 7 : div > sizeof(long) * 8 - 2 ? sizeof(long) * 8 - 2 : div;
 	bl = max(rdp->blimit, pending >> div);
-	if (unlikely(bl > 100)) {
+	if (in_serving_softirq() && unlikely(bl > 100)) {
 		long rrn = READ_ONCE(rcu_resched_ns);
 
 		rrn = rrn < NSEC_PER_MSEC ? NSEC_PER_MSEC : rrn > NSEC_PER_SEC ? NSEC_PER_SEC : rrn;
@@ -2528,6 +2528,17 @@ static void rcu_do_batch(struct rcu_data *rdp)
 		if (in_serving_softirq()) {
 			if (count >= bl && (need_resched() || !is_idle_task(current)))
 				break;
+			/*
+			 * Make sure we don't spend too much time here and deprive other
+			 * softirq vectors of CPU cycles.
+			 */
+			if (unlikely(tlimit)) {
+				/* only call local_clock() every 32 callbacks */
+				if (likely((count & 31) || local_clock() < tlimit))
+					continue;
+				/* Exceeded the time limit, so leave. */
+				break;
+			}
 		} else {
 			local_bh_enable();
 			lockdep_assert_irqs_enabled();
@@ -2535,18 +2546,6 @@ static void rcu_do_batch(struct rcu_data *rdp)
 			lockdep_assert_irqs_enabled();
 			local_bh_disable();
 		}
-
-		/*
-		 * Make sure we don't spend too much time here and deprive other
-		 * softirq vectors of CPU cycles.
-		 */
-		if (unlikely(tlimit)) {
-			/* only call local_clock() every 32 callbacks */
-			if (likely((count & 31) || local_clock() < tlimit))
-				continue;
-			/* Exceeded the time limit, so leave. */
-			break;
-		}
 	}
 
 	rcu_nocb_lock_irqsave(rdp, flags);
-- 
cgit v1.2.3-70-g09d2


From 0598a4d4429c0a952ac0e99e5280354cf4ccc01c Mon Sep 17 00:00:00 2001
From: Frederic Weisbecker <frederic@kernel.org>
Date: Tue, 19 Oct 2021 02:08:16 +0200
Subject: rcu/nocb: Don't invoke local rcu core on callback overload from nocb
 kthread

rcu_core() tries to ensure that its self-invocation in case of callbacks
overload only happen in softirq/rcuc mode. Indeed it doesn't make sense
to trigger local RCU core from nocb_cb kthread since it can execute
on a CPU different from the target rdp. Also in case of overload, the
nocb_cb kthread simply iterates a new loop of callbacks processing.

However the "offloaded" check that aims at preventing misplaced
rcu_core() invocations is wrong. First of all that state is volatile
and second: softirq/rcuc can execute while the target rdp is offloaded.
As a result rcu_core() can be invoked on the wrong CPU while in the
process of (de-)offloading.

Fix that with moving the rcu_core() self-invocation to rcu_core() itself,
irrespective of the rdp offloaded state.

Tested-by: Valentin Schneider <valentin.schneider@arm.com>
Tested-by: Sebastian Andrzej Siewior <bigeasy@linutronix.de>
Signed-off-by: Frederic Weisbecker <frederic@kernel.org>
Cc: Valentin Schneider <valentin.schneider@arm.com>
Cc: Peter Zijlstra <peterz@infradead.org>
Cc: Sebastian Andrzej Siewior <bigeasy@linutronix.de>
Cc: Josh Triplett <josh@joshtriplett.org>
Cc: Joel Fernandes <joel@joelfernandes.org>
Cc: Boqun Feng <boqun.feng@gmail.com>
Cc: Neeraj Upadhyay <neeraju@codeaurora.org>
Cc: Uladzislau Rezki <urezki@gmail.com>
Cc: Thomas Gleixner <tglx@linutronix.de>
Signed-off-by: Paul E. McKenney <paulmck@kernel.org>
---
 kernel/rcu/tree.c | 10 +++++-----
 1 file changed, 5 insertions(+), 5 deletions(-)

(limited to 'kernel/rcu/tree.c')

diff --git a/kernel/rcu/tree.c b/kernel/rcu/tree.c
index 1b31fad9f1a0..03c038f324a0 100644
--- a/kernel/rcu/tree.c
+++ b/kernel/rcu/tree.c
@@ -2460,7 +2460,6 @@ static void rcu_do_batch(struct rcu_data *rdp)
 	int div;
 	bool __maybe_unused empty;
 	unsigned long flags;
-	const bool offloaded = rcu_rdp_is_offloaded(rdp);
 	struct rcu_head *rhp;
 	struct rcu_cblist rcl = RCU_CBLIST_INITIALIZER(rcl);
 	long bl, count = 0;
@@ -2582,9 +2581,6 @@ static void rcu_do_batch(struct rcu_data *rdp)
 
 	rcu_nocb_unlock_irqrestore(rdp, flags);
 
-	/* Re-invoke RCU core processing if there are callbacks remaining. */
-	if (!offloaded && rcu_segcblist_ready_cbs(&rdp->cblist))
-		invoke_rcu_core();
 	tick_dep_clear_task(current, TICK_DEP_BIT_RCU);
 }
 
@@ -2771,8 +2767,12 @@ static __latent_entropy void rcu_core(void)
 
 	/* If there are callbacks ready, invoke them. */
 	if (do_batch && rcu_segcblist_ready_cbs(&rdp->cblist) &&
-	    likely(READ_ONCE(rcu_scheduler_fully_active)))
+	    likely(READ_ONCE(rcu_scheduler_fully_active))) {
 		rcu_do_batch(rdp);
+		/* Re-invoke RCU core processing if there are callbacks remaining. */
+		if (rcu_segcblist_ready_cbs(&rdp->cblist))
+			invoke_rcu_core();
+	}
 
 	/* Do any needed deferred wakeups of rcuo kthreads. */
 	do_nocb_deferred_wakeup(rdp);
-- 
cgit v1.2.3-70-g09d2