summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorAlexei Starovoitov <ast@kernel.org>2022-10-18 10:27:02 -0700
committerAlexei Starovoitov <ast@kernel.org>2022-10-18 10:27:05 -0700
commit79d878f7ad8e8c68327107d76c25fb4d539c3c99 (patch)
treee2b8022025e270d1f39a04f592dbdd468be80f94
parent62c69e89e81bfbdb9a87ae3e0599dcc6aacf786b (diff)
parent4835f9ee980c1867584018e69cbf1f62d7844cb3 (diff)
Merge branch 'Remove unnecessary RCU grace period chaining'
Hou Tao says: ==================== Now bpf uses RCU grace period chaining to wait for the completion of access from both sleepable and non-sleepable bpf program: calling call_rcu_tasks_trace() firstly to wait for a RCU-tasks-trace grace period, then in its callback calls call_rcu() or kfree_rcu() to wait for a normal RCU grace period. According to the implementation of RCU Tasks Trace, it inovkes ->postscan_func() to wait for one RCU-tasks-trace grace period and rcu_tasks_trace_postscan() inovkes synchronize_rcu() to wait for one normal RCU grace period in turn, so one RCU-tasks-trace grace period will imply one normal RCU grace period. To codify the implication, introduces rcu_trace_implies_rcu_gp() in patch #1. And using it in patch Other two uses of call_rcu_tasks_trace() are unchanged: for __bpf_prog_put_rcu() there is no gp chain and for __bpf_tramp_image_put_rcu_tasks() it chains RCU tasks trace GP and RCU tasks GP. An alternative way to remove these unnecessary RCU grace period chainings is using the RCU polling API to check whether or not a normal RCU grace period has passed (e.g. get_state_synchronize_rcu()). But it needs an unsigned long space for each free element or each call, and it is not affordable for local storage element, so as for now always rcu_trace_implies_rcu_gp(). Comments are always welcome. Change Log: v2: * codify the implication of RCU Tasks Trace grace period instead of assuming for it v1: https://lore.kernel.org/bpf/20221011071128.3470622-1-houtao@huaweicloud.com Hou Tao (3): bpf: Use rcu_trace_implies_rcu_gp() in bpf memory allocator bpf: Use rcu_trace_implies_rcu_gp() in local storage map bpf: Use rcu_trace_implies_rcu_gp() for program array freeing ==================== Reviewed-by: Paul E. McKenney <paulmck@kernel.org> Signed-off-by: Alexei Starovoitov <ast@kernel.org>
-rw-r--r--include/linux/rcupdate.h12
-rw-r--r--kernel/bpf/bpf_local_storage.c13
-rw-r--r--kernel/bpf/core.c8
-rw-r--r--kernel/bpf/memalloc.c15
-rw-r--r--kernel/rcu/tasks.h2
5 files changed, 42 insertions, 8 deletions
diff --git a/include/linux/rcupdate.h b/include/linux/rcupdate.h
index 08605ce7379d..8822f06e4b40 100644
--- a/include/linux/rcupdate.h
+++ b/include/linux/rcupdate.h
@@ -241,6 +241,18 @@ static inline void exit_tasks_rcu_finish(void) { }
#endif /* #else #ifdef CONFIG_TASKS_RCU_GENERIC */
/**
+ * rcu_trace_implies_rcu_gp - does an RCU Tasks Trace grace period imply an RCU grace period?
+ *
+ * As an accident of implementation, an RCU Tasks Trace grace period also
+ * acts as an RCU grace period. However, this could change at any time.
+ * Code relying on this accident must call this function to verify that
+ * this accident is still happening.
+ *
+ * You have been warned!
+ */
+static inline bool rcu_trace_implies_rcu_gp(void) { return true; }
+
+/**
* cond_resched_tasks_rcu_qs - Report potential quiescent states to RCU
*
* This macro resembles cond_resched(), except that it is defined to
diff --git a/kernel/bpf/bpf_local_storage.c b/kernel/bpf/bpf_local_storage.c
index 802fc15b0d73..9dc6de1cf185 100644
--- a/kernel/bpf/bpf_local_storage.c
+++ b/kernel/bpf/bpf_local_storage.c
@@ -88,8 +88,14 @@ void bpf_local_storage_free_rcu(struct rcu_head *rcu)
{
struct bpf_local_storage *local_storage;
+ /* If RCU Tasks Trace grace period implies RCU grace period, do
+ * kfree(), else do kfree_rcu().
+ */
local_storage = container_of(rcu, struct bpf_local_storage, rcu);
- kfree_rcu(local_storage, rcu);
+ if (rcu_trace_implies_rcu_gp())
+ kfree(local_storage);
+ else
+ kfree_rcu(local_storage, rcu);
}
static void bpf_selem_free_rcu(struct rcu_head *rcu)
@@ -97,7 +103,10 @@ static void bpf_selem_free_rcu(struct rcu_head *rcu)
struct bpf_local_storage_elem *selem;
selem = container_of(rcu, struct bpf_local_storage_elem, rcu);
- kfree_rcu(selem, rcu);
+ if (rcu_trace_implies_rcu_gp())
+ kfree(selem);
+ else
+ kfree_rcu(selem, rcu);
}
/* local_storage->lock must be held and selem->local_storage == local_storage.
diff --git a/kernel/bpf/core.c b/kernel/bpf/core.c
index 711fd293b6de..4bc5f46d7030 100644
--- a/kernel/bpf/core.c
+++ b/kernel/bpf/core.c
@@ -2251,8 +2251,14 @@ static void __bpf_prog_array_free_sleepable_cb(struct rcu_head *rcu)
{
struct bpf_prog_array *progs;
+ /* If RCU Tasks Trace grace period implies RCU grace period, there is
+ * no need to call kfree_rcu(), just call kfree() directly.
+ */
progs = container_of(rcu, struct bpf_prog_array, rcu);
- kfree_rcu(progs, rcu);
+ if (rcu_trace_implies_rcu_gp())
+ kfree(progs);
+ else
+ kfree_rcu(progs, rcu);
}
void bpf_prog_array_free_sleepable(struct bpf_prog_array *progs)
diff --git a/kernel/bpf/memalloc.c b/kernel/bpf/memalloc.c
index 5f83be1d2018..2433be58bb85 100644
--- a/kernel/bpf/memalloc.c
+++ b/kernel/bpf/memalloc.c
@@ -222,9 +222,13 @@ static void __free_rcu(struct rcu_head *head)
static void __free_rcu_tasks_trace(struct rcu_head *head)
{
- struct bpf_mem_cache *c = container_of(head, struct bpf_mem_cache, rcu);
-
- call_rcu(&c->rcu, __free_rcu);
+ /* If RCU Tasks Trace grace period implies RCU grace period,
+ * there is no need to invoke call_rcu().
+ */
+ if (rcu_trace_implies_rcu_gp())
+ __free_rcu(head);
+ else
+ call_rcu(head, __free_rcu);
}
static void enque_to_free(struct bpf_mem_cache *c, void *obj)
@@ -253,8 +257,9 @@ static void do_call_rcu(struct bpf_mem_cache *c)
*/
__llist_add(llnode, &c->waiting_for_gp);
/* Use call_rcu_tasks_trace() to wait for sleepable progs to finish.
- * Then use call_rcu() to wait for normal progs to finish
- * and finally do free_one() on each element.
+ * If RCU Tasks Trace grace period implies RCU grace period, free
+ * these elements directly, else use call_rcu() to wait for normal
+ * progs to finish and finally do free_one() on each element.
*/
call_rcu_tasks_trace(&c->rcu, __free_rcu_tasks_trace);
}
diff --git a/kernel/rcu/tasks.h b/kernel/rcu/tasks.h
index f5bf6fb430da..9435e5a7b53e 100644
--- a/kernel/rcu/tasks.h
+++ b/kernel/rcu/tasks.h
@@ -1535,6 +1535,8 @@ static void rcu_tasks_trace_postscan(struct list_head *hop)
{
// Wait for late-stage exiting tasks to finish exiting.
// These might have passed the call to exit_tasks_rcu_finish().
+
+ // If you remove the following line, update rcu_trace_implies_rcu_gp()!!!
synchronize_rcu();
// Any tasks that exit after this point will set
// TRC_NEED_QS_CHECKED in ->trc_reader_special.b.need_qs.