summaryrefslogtreecommitdiff
path: root/tools/testing/selftests/bpf/bench.c
diff options
context:
space:
mode:
authorDave Marchevsky <davemarchevsky@fb.com>2022-07-05 12:00:18 -0700
committerDaniel Borkmann <daniel@iogearbox.net>2022-07-07 16:35:21 +0200
commit2b4b2621fd6401865b31b9f403e4b936b7439e94 (patch)
treefa77994c04a9e5317a655a39f7f11cfbf64ee493 /tools/testing/selftests/bpf/bench.c
parent935dc35c75318fa213d26808ad8bb130fb0b486e (diff)
selftests/bpf: Add benchmark for local_storage RCU Tasks Trace usage
This benchmark measures grace period latency and kthread cpu usage of RCU Tasks Trace when many processes are creating/deleting BPF local_storage. Intent here is to quantify improvement on these metrics after Paul's recent RCU Tasks patches [0]. Specifically, fork 15k tasks which call a bpf prog that creates/destroys task local_storage and sleep in a loop, resulting in many call_rcu_tasks_trace calls. To determine grace period latency, trace time elapsed between rcu_tasks_trace_pregp_step and rcu_tasks_trace_postgp; for cpu usage look at rcu_task_trace_kthread's stime in /proc/PID/stat. On my virtualized test environment (Skylake, 8 cpus) benchmark results demonstrate significant improvement: BEFORE Paul's patches: SUMMARY tasks_trace grace period latency avg 22298.551 us stddev 1302.165 us SUMMARY ticks per tasks_trace grace period avg 2.291 stddev 0.324 AFTER Paul's patches: SUMMARY tasks_trace grace period latency avg 16969.197 us stddev 2525.053 us SUMMARY ticks per tasks_trace grace period avg 1.146 stddev 0.178 Note that since these patches are not in bpf-next benchmarking was done by cherry-picking this patch onto rcu tree. [0] https://lore.kernel.org/rcu/20220620225402.GA3842369@paulmck-ThinkPad-P17-Gen-1/ Signed-off-by: Dave Marchevsky <davemarchevsky@fb.com> Signed-off-by: Daniel Borkmann <daniel@iogearbox.net> Acked-by: Paul E. McKenney <paulmck@kernel.org> Acked-by: Martin KaFai Lau <kafai@fb.com> Link: https://lore.kernel.org/bpf/20220705190018.3239050-1-davemarchevsky@fb.com
Diffstat (limited to 'tools/testing/selftests/bpf/bench.c')
-rw-r--r--tools/testing/selftests/bpf/bench.c42
1 files changed, 42 insertions, 0 deletions
diff --git a/tools/testing/selftests/bpf/bench.c b/tools/testing/selftests/bpf/bench.c
index 1e7b5d4b1f11..c1f20a147462 100644
--- a/tools/testing/selftests/bpf/bench.c
+++ b/tools/testing/selftests/bpf/bench.c
@@ -79,6 +79,43 @@ void hits_drops_report_progress(int iter, struct bench_res *res, long delta_ns)
hits_per_sec, hits_per_prod, drops_per_sec, hits_per_sec + drops_per_sec);
}
+void
+grace_period_latency_basic_stats(struct bench_res res[], int res_cnt, struct basic_stats *gp_stat)
+{
+ int i;
+
+ memset(gp_stat, 0, sizeof(struct basic_stats));
+
+ for (i = 0; i < res_cnt; i++)
+ gp_stat->mean += res[i].gp_ns / 1000.0 / (double)res[i].gp_ct / (0.0 + res_cnt);
+
+#define IT_MEAN_DIFF (res[i].gp_ns / 1000.0 / (double)res[i].gp_ct - gp_stat->mean)
+ if (res_cnt > 1) {
+ for (i = 0; i < res_cnt; i++)
+ gp_stat->stddev += (IT_MEAN_DIFF * IT_MEAN_DIFF) / (res_cnt - 1.0);
+ }
+ gp_stat->stddev = sqrt(gp_stat->stddev);
+#undef IT_MEAN_DIFF
+}
+
+void
+grace_period_ticks_basic_stats(struct bench_res res[], int res_cnt, struct basic_stats *gp_stat)
+{
+ int i;
+
+ memset(gp_stat, 0, sizeof(struct basic_stats));
+ for (i = 0; i < res_cnt; i++)
+ gp_stat->mean += res[i].stime / (double)res[i].gp_ct / (0.0 + res_cnt);
+
+#define IT_MEAN_DIFF (res[i].stime / (double)res[i].gp_ct - gp_stat->mean)
+ if (res_cnt > 1) {
+ for (i = 0; i < res_cnt; i++)
+ gp_stat->stddev += (IT_MEAN_DIFF * IT_MEAN_DIFF) / (res_cnt - 1.0);
+ }
+ gp_stat->stddev = sqrt(gp_stat->stddev);
+#undef IT_MEAN_DIFF
+}
+
void hits_drops_report_final(struct bench_res res[], int res_cnt)
{
int i;
@@ -236,6 +273,7 @@ extern struct argp bench_ringbufs_argp;
extern struct argp bench_bloom_map_argp;
extern struct argp bench_bpf_loop_argp;
extern struct argp bench_local_storage_argp;
+extern struct argp bench_local_storage_rcu_tasks_trace_argp;
extern struct argp bench_strncmp_argp;
static const struct argp_child bench_parsers[] = {
@@ -244,6 +282,8 @@ static const struct argp_child bench_parsers[] = {
{ &bench_bpf_loop_argp, 0, "bpf_loop helper benchmark", 0 },
{ &bench_local_storage_argp, 0, "local_storage benchmark", 0 },
{ &bench_strncmp_argp, 0, "bpf_strncmp helper benchmark", 0 },
+ { &bench_local_storage_rcu_tasks_trace_argp, 0,
+ "local_storage RCU Tasks Trace slowdown benchmark", 0 },
{},
};
@@ -449,6 +489,7 @@ extern const struct bench bench_bpf_hashmap_full_update;
extern const struct bench bench_local_storage_cache_seq_get;
extern const struct bench bench_local_storage_cache_interleaved_get;
extern const struct bench bench_local_storage_cache_hashmap_control;
+extern const struct bench bench_local_storage_tasks_trace;
static const struct bench *benchs[] = {
&bench_count_global,
@@ -487,6 +528,7 @@ static const struct bench *benchs[] = {
&bench_local_storage_cache_seq_get,
&bench_local_storage_cache_interleaved_get,
&bench_local_storage_cache_hashmap_control,
+ &bench_local_storage_tasks_trace,
};
static void setup_benchmark()