From 42602e3a06f8e5b9a059344e305c9bee2dcc87c8 Mon Sep 17 00:00:00 2001 From: Jason Xing Date: Tue, 29 Oct 2024 15:46:27 +0800 Subject: bpf: handle implicit declaration of function gettid in bpf_iter.c MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit As we can see from the title, when I compiled the selftests/bpf, I saw the error: implicit declaration of function ‘gettid’ ; did you mean ‘getgid’? [-Werror=implicit-function-declaration] skel->bss->tid = gettid(); ^~~~~~ getgid Directly call the syscall solves this issue. Signed-off-by: Jason Xing Reviewed-by: Alan Maguire Tested-by: Alan Maguire Link: https://lore.kernel.org/r/20241029074627.80289-1-kerneljasonxing@gmail.com Signed-off-by: Martin KaFai Lau --- tools/testing/selftests/bpf/prog_tests/bpf_iter.c | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) (limited to 'tools/testing/selftests/bpf/prog_tests/bpf_iter.c') diff --git a/tools/testing/selftests/bpf/prog_tests/bpf_iter.c b/tools/testing/selftests/bpf/prog_tests/bpf_iter.c index f0a3a9c18e9e..9006549a1294 100644 --- a/tools/testing/selftests/bpf/prog_tests/bpf_iter.c +++ b/tools/testing/selftests/bpf/prog_tests/bpf_iter.c @@ -226,7 +226,7 @@ static void test_task_common_nocheck(struct bpf_iter_attach_opts *opts, ASSERT_OK(pthread_create(&thread_id, NULL, &do_nothing_wait, NULL), "pthread_create"); - skel->bss->tid = gettid(); + skel->bss->tid = syscall(SYS_gettid); do_dummy_read_opts(skel->progs.dump_task, opts); @@ -255,10 +255,10 @@ static void *run_test_task_tid(void *arg) union bpf_iter_link_info linfo; int num_unknown_tid, num_known_tid; - ASSERT_NEQ(getpid(), gettid(), "check_new_thread_id"); + ASSERT_NEQ(getpid(), syscall(SYS_gettid), "check_new_thread_id"); memset(&linfo, 0, sizeof(linfo)); - linfo.task.tid = gettid(); + linfo.task.tid = syscall(SYS_gettid); opts.link_info = &linfo; opts.link_info_len = sizeof(linfo); test_task_common(&opts, 0, 1); -- cgit v1.2.3-70-g09d2 From 0e2fb011a0ba8e2258ce776fdf89fbd589c2a3a6 Mon Sep 17 00:00:00 2001 From: Kumar Kartikeya Dwivedi Date: Mon, 4 Nov 2024 09:19:58 -0800 Subject: selftests/bpf: Clean up open-coded gettid syscall invocations Availability of the gettid definition across glibc versions supported by BPF selftests is not certain. Currently, all users in the tree open-code syscall to gettid. Convert them to a common macro definition. Reviewed-by: Jiri Olsa Signed-off-by: Kumar Kartikeya Dwivedi Link: https://lore.kernel.org/r/20241104171959.2938862-3-memxor@gmail.com Signed-off-by: Alexei Starovoitov --- tools/testing/selftests/bpf/benchs/bench_trigger.c | 3 ++- tools/testing/selftests/bpf/bpf_util.h | 9 +++++++++ tools/testing/selftests/bpf/map_tests/task_storage_map.c | 3 ++- tools/testing/selftests/bpf/prog_tests/bpf_cookie.c | 2 +- tools/testing/selftests/bpf/prog_tests/bpf_iter.c | 6 +++--- tools/testing/selftests/bpf/prog_tests/cgrp_local_storage.c | 10 +++++----- tools/testing/selftests/bpf/prog_tests/core_reloc.c | 2 +- tools/testing/selftests/bpf/prog_tests/linked_funcs.c | 2 +- tools/testing/selftests/bpf/prog_tests/ns_current_pid_tgid.c | 2 +- tools/testing/selftests/bpf/prog_tests/rcu_read_lock.c | 4 ++-- tools/testing/selftests/bpf/prog_tests/task_local_storage.c | 10 +++++----- tools/testing/selftests/bpf/prog_tests/uprobe_multi_test.c | 2 +- 12 files changed, 33 insertions(+), 22 deletions(-) (limited to 'tools/testing/selftests/bpf/prog_tests/bpf_iter.c') diff --git a/tools/testing/selftests/bpf/benchs/bench_trigger.c b/tools/testing/selftests/bpf/benchs/bench_trigger.c index 2ed0ef6f21ee..32e9f194d449 100644 --- a/tools/testing/selftests/bpf/benchs/bench_trigger.c +++ b/tools/testing/selftests/bpf/benchs/bench_trigger.c @@ -4,6 +4,7 @@ #include #include #include +#include "bpf_util.h" #include "bench.h" #include "trigger_bench.skel.h" #include "trace_helpers.h" @@ -72,7 +73,7 @@ static __always_inline void inc_counter(struct counter *counters) unsigned slot; if (unlikely(tid == 0)) - tid = syscall(SYS_gettid); + tid = sys_gettid(); /* multiplicative hashing, it's fast */ slot = 2654435769U * tid; diff --git a/tools/testing/selftests/bpf/bpf_util.h b/tools/testing/selftests/bpf/bpf_util.h index 10587a29b967..feff92219e21 100644 --- a/tools/testing/selftests/bpf/bpf_util.h +++ b/tools/testing/selftests/bpf/bpf_util.h @@ -6,6 +6,7 @@ #include #include #include +#include #include /* libbpf_num_possible_cpus */ static inline unsigned int bpf_num_possible_cpus(void) @@ -59,4 +60,12 @@ static inline void bpf_strlcpy(char *dst, const char *src, size_t sz) (offsetof(TYPE, MEMBER) + sizeof_field(TYPE, MEMBER)) #endif +/* Availability of gettid across glibc versions is hit-and-miss, therefore + * fallback to syscall in this macro and use it everywhere. + */ +#ifndef sys_gettid +#define sys_gettid() syscall(SYS_gettid) +#endif + + #endif /* __BPF_UTIL__ */ diff --git a/tools/testing/selftests/bpf/map_tests/task_storage_map.c b/tools/testing/selftests/bpf/map_tests/task_storage_map.c index 7d050364efca..62971dbf2996 100644 --- a/tools/testing/selftests/bpf/map_tests/task_storage_map.c +++ b/tools/testing/selftests/bpf/map_tests/task_storage_map.c @@ -12,6 +12,7 @@ #include #include +#include "bpf_util.h" #include "test_maps.h" #include "task_local_storage_helpers.h" #include "read_bpf_task_storage_busy.skel.h" @@ -115,7 +116,7 @@ void test_task_storage_map_stress_lookup(void) CHECK(err, "attach", "error %d\n", err); /* Trigger program */ - syscall(SYS_gettid); + sys_gettid(); skel->bss->pid = 0; CHECK(skel->bss->busy != 0, "bad bpf_task_storage_busy", "got %d\n", skel->bss->busy); diff --git a/tools/testing/selftests/bpf/prog_tests/bpf_cookie.c b/tools/testing/selftests/bpf/prog_tests/bpf_cookie.c index 070c52c312e5..6befa870434b 100644 --- a/tools/testing/selftests/bpf/prog_tests/bpf_cookie.c +++ b/tools/testing/selftests/bpf/prog_tests/bpf_cookie.c @@ -690,7 +690,7 @@ void test_bpf_cookie(void) if (!ASSERT_OK_PTR(skel, "skel_open")) return; - skel->bss->my_tid = syscall(SYS_gettid); + skel->bss->my_tid = sys_gettid(); if (test__start_subtest("kprobe")) kprobe_subtest(skel); diff --git a/tools/testing/selftests/bpf/prog_tests/bpf_iter.c b/tools/testing/selftests/bpf/prog_tests/bpf_iter.c index 9006549a1294..b8e1224cfd19 100644 --- a/tools/testing/selftests/bpf/prog_tests/bpf_iter.c +++ b/tools/testing/selftests/bpf/prog_tests/bpf_iter.c @@ -226,7 +226,7 @@ static void test_task_common_nocheck(struct bpf_iter_attach_opts *opts, ASSERT_OK(pthread_create(&thread_id, NULL, &do_nothing_wait, NULL), "pthread_create"); - skel->bss->tid = syscall(SYS_gettid); + skel->bss->tid = sys_gettid(); do_dummy_read_opts(skel->progs.dump_task, opts); @@ -255,10 +255,10 @@ static void *run_test_task_tid(void *arg) union bpf_iter_link_info linfo; int num_unknown_tid, num_known_tid; - ASSERT_NEQ(getpid(), syscall(SYS_gettid), "check_new_thread_id"); + ASSERT_NEQ(getpid(), sys_gettid(), "check_new_thread_id"); memset(&linfo, 0, sizeof(linfo)); - linfo.task.tid = syscall(SYS_gettid); + linfo.task.tid = sys_gettid(); opts.link_info = &linfo; opts.link_info_len = sizeof(linfo); test_task_common(&opts, 0, 1); diff --git a/tools/testing/selftests/bpf/prog_tests/cgrp_local_storage.c b/tools/testing/selftests/bpf/prog_tests/cgrp_local_storage.c index 747761572098..9015e2c2ab12 100644 --- a/tools/testing/selftests/bpf/prog_tests/cgrp_local_storage.c +++ b/tools/testing/selftests/bpf/prog_tests/cgrp_local_storage.c @@ -63,14 +63,14 @@ static void test_tp_btf(int cgroup_fd) if (!ASSERT_OK(err, "map_delete_elem")) goto out; - skel->bss->target_pid = syscall(SYS_gettid); + skel->bss->target_pid = sys_gettid(); err = cgrp_ls_tp_btf__attach(skel); if (!ASSERT_OK(err, "skel_attach")) goto out; - syscall(SYS_gettid); - syscall(SYS_gettid); + sys_gettid(); + sys_gettid(); skel->bss->target_pid = 0; @@ -154,7 +154,7 @@ static void test_recursion(int cgroup_fd) goto out; /* trigger sys_enter, make sure it does not cause deadlock */ - syscall(SYS_gettid); + sys_gettid(); out: cgrp_ls_recursion__destroy(skel); @@ -224,7 +224,7 @@ static void test_yes_rcu_lock(__u64 cgroup_id) return; CGROUP_MODE_SET(skel); - skel->bss->target_pid = syscall(SYS_gettid); + skel->bss->target_pid = sys_gettid(); bpf_program__set_autoload(skel->progs.yes_rcu_lock, true); err = cgrp_ls_sleepable__load(skel); diff --git a/tools/testing/selftests/bpf/prog_tests/core_reloc.c b/tools/testing/selftests/bpf/prog_tests/core_reloc.c index 26019313e1fc..1c682550e0e7 100644 --- a/tools/testing/selftests/bpf/prog_tests/core_reloc.c +++ b/tools/testing/selftests/bpf/prog_tests/core_reloc.c @@ -1010,7 +1010,7 @@ static void run_core_reloc_tests(bool use_btfgen) struct data *data; void *mmap_data = NULL; - my_pid_tgid = getpid() | ((uint64_t)syscall(SYS_gettid) << 32); + my_pid_tgid = getpid() | ((uint64_t)sys_gettid() << 32); for (i = 0; i < ARRAY_SIZE(test_cases); i++) { char btf_file[] = "/tmp/core_reloc.btf.XXXXXX"; diff --git a/tools/testing/selftests/bpf/prog_tests/linked_funcs.c b/tools/testing/selftests/bpf/prog_tests/linked_funcs.c index cad664546912..fa639b021f7e 100644 --- a/tools/testing/selftests/bpf/prog_tests/linked_funcs.c +++ b/tools/testing/selftests/bpf/prog_tests/linked_funcs.c @@ -20,7 +20,7 @@ void test_linked_funcs(void) bpf_program__set_autoload(skel->progs.handler1, true); bpf_program__set_autoload(skel->progs.handler2, true); - skel->rodata->my_tid = syscall(SYS_gettid); + skel->rodata->my_tid = sys_gettid(); skel->bss->syscall_id = SYS_getpgid; err = linked_funcs__load(skel); diff --git a/tools/testing/selftests/bpf/prog_tests/ns_current_pid_tgid.c b/tools/testing/selftests/bpf/prog_tests/ns_current_pid_tgid.c index c29787e092d6..761ce24bce38 100644 --- a/tools/testing/selftests/bpf/prog_tests/ns_current_pid_tgid.c +++ b/tools/testing/selftests/bpf/prog_tests/ns_current_pid_tgid.c @@ -23,7 +23,7 @@ static int get_pid_tgid(pid_t *pid, pid_t *tgid, struct stat st; int err; - *pid = syscall(SYS_gettid); + *pid = sys_gettid(); *tgid = getpid(); err = stat("/proc/self/ns/pid", &st); diff --git a/tools/testing/selftests/bpf/prog_tests/rcu_read_lock.c b/tools/testing/selftests/bpf/prog_tests/rcu_read_lock.c index a1f7e7378a64..ebe0c12b5536 100644 --- a/tools/testing/selftests/bpf/prog_tests/rcu_read_lock.c +++ b/tools/testing/selftests/bpf/prog_tests/rcu_read_lock.c @@ -21,7 +21,7 @@ static void test_success(void) if (!ASSERT_OK_PTR(skel, "skel_open")) return; - skel->bss->target_pid = syscall(SYS_gettid); + skel->bss->target_pid = sys_gettid(); bpf_program__set_autoload(skel->progs.get_cgroup_id, true); bpf_program__set_autoload(skel->progs.task_succ, true); @@ -58,7 +58,7 @@ static void test_rcuptr_acquire(void) if (!ASSERT_OK_PTR(skel, "skel_open")) return; - skel->bss->target_pid = syscall(SYS_gettid); + skel->bss->target_pid = sys_gettid(); bpf_program__set_autoload(skel->progs.task_acquire, true); err = rcu_read_lock__load(skel); diff --git a/tools/testing/selftests/bpf/prog_tests/task_local_storage.c b/tools/testing/selftests/bpf/prog_tests/task_local_storage.c index 00cc9d0aee5d..60f474d965a9 100644 --- a/tools/testing/selftests/bpf/prog_tests/task_local_storage.c +++ b/tools/testing/selftests/bpf/prog_tests/task_local_storage.c @@ -31,14 +31,14 @@ static void test_sys_enter_exit(void) if (!ASSERT_OK_PTR(skel, "skel_open_and_load")) return; - skel->bss->target_pid = syscall(SYS_gettid); + skel->bss->target_pid = sys_gettid(); err = task_local_storage__attach(skel); if (!ASSERT_OK(err, "skel_attach")) goto out; - syscall(SYS_gettid); - syscall(SYS_gettid); + sys_gettid(); + sys_gettid(); /* 3x syscalls: 1x attach and 2x gettid */ ASSERT_EQ(skel->bss->enter_cnt, 3, "enter_cnt"); @@ -107,7 +107,7 @@ static void test_recursion(void) /* trigger sys_enter, make sure it does not cause deadlock */ skel->bss->test_pid = getpid(); - syscall(SYS_gettid); + sys_gettid(); skel->bss->test_pid = 0; task_ls_recursion__detach(skel); @@ -262,7 +262,7 @@ static void test_uptr_basic(void) __u64 ev_dummy_data = 1; int err; - my_tid = syscall(SYS_gettid); + my_tid = sys_gettid(); parent_task_fd = sys_pidfd_open(my_tid, 0); if (!ASSERT_OK_FD(parent_task_fd, "parent_task_fd")) return; diff --git a/tools/testing/selftests/bpf/prog_tests/uprobe_multi_test.c b/tools/testing/selftests/bpf/prog_tests/uprobe_multi_test.c index 2c39902b8a09..619b31cd24a1 100644 --- a/tools/testing/selftests/bpf/prog_tests/uprobe_multi_test.c +++ b/tools/testing/selftests/bpf/prog_tests/uprobe_multi_test.c @@ -125,7 +125,7 @@ static void *child_thread(void *ctx) struct child *child = ctx; int c = 0, err; - child->tid = syscall(SYS_gettid); + child->tid = sys_gettid(); /* let parent know we are ready */ err = write(child->c2p[1], &c, 1); -- cgit v1.2.3-70-g09d2 From d9d4d127e813427afb26ff7e0f0c58989501be84 Mon Sep 17 00:00:00 2001 From: Eduard Zingerman Date: Tue, 12 Nov 2024 03:09:03 -0800 Subject: selftests/bpf: watchdog timer for test_progs This commit provides a watchdog timer that sets a limit of how long a single sub-test could run: - if sub-test runs for 10 seconds, the name of the test is printed (currently the name of the test is printed only after it finishes); - if sub-test runs for 120 seconds, the running thread is terminated with SIGSEGV (to trigger crash_handler() and get a stack trace). Specifically: - the timer is armed on each call to run_one_test(); - re-armed at each call to test__start_subtest(); - is stopped when exiting run_one_test(). Default timeout could be overridden using '-w' or '--watchdog-timeout' options. Value 0 can be used to turn the timer off. Here is an example execution: $ ./ssh-exec.sh ./test_progs -w 5 -t \ send_signal/send_signal_perf_thread_remote,send_signal/send_signal_nmi_thread_remote WATCHDOG: test case send_signal/send_signal_nmi_thread_remote executes for 5 seconds, terminating with SIGSEGV Caught signal #11! Stack trace: ./test_progs(crash_handler+0x1f)[0x9049ef] /lib64/libc.so.6(+0x40d00)[0x7f1f1184fd00] /lib64/libc.so.6(read+0x4a)[0x7f1f1191cc4a] ./test_progs[0x720dd3] ./test_progs[0x71ef7a] ./test_progs(test_send_signal+0x1db)[0x71edeb] ./test_progs[0x9066c5] ./test_progs(main+0x5ed)[0x9054ad] /lib64/libc.so.6(+0x2a088)[0x7f1f11839088] /lib64/libc.so.6(__libc_start_main+0x8b)[0x7f1f1183914b] ./test_progs(_start+0x25)[0x527385] #292 send_signal:FAIL test_send_signal_common:PASS:reading pipe 0 nsec test_send_signal_common:PASS:reading pipe error: size 0 0 nsec test_send_signal_common:PASS:incorrect result 0 nsec test_send_signal_common:PASS:pipe_write 0 nsec test_send_signal_common:PASS:setpriority 0 nsec Timer is implemented using timer_{create,start} librt API. Internally librt uses pthreads for SIGEV_THREAD timers, so this change adds a background timer thread to the test process. Because of this a few checks in tests 'bpf_iter' and 'iters' need an update to account for an extra thread. For parallelized scenario the watchdog is also created for each worker fork. If one of the workers gets stuck, it would be terminated by a watchdog. In theory, this might lead to a scenario when all worker threads are exhausted, however this should not be a problem for server_main(), as it would exit with some of the tests not run. Signed-off-by: Eduard Zingerman Link: https://lore.kernel.org/r/20241112110906.3045278-2-eddyz87@gmail.com Signed-off-by: Alexei Starovoitov --- tools/testing/selftests/bpf/prog_tests/bpf_iter.c | 8 +- tools/testing/selftests/bpf/prog_tests/iters.c | 4 +- tools/testing/selftests/bpf/test_progs.c | 104 ++++++++++++++++++++++ tools/testing/selftests/bpf/test_progs.h | 6 ++ 4 files changed, 116 insertions(+), 6 deletions(-) (limited to 'tools/testing/selftests/bpf/prog_tests/bpf_iter.c') diff --git a/tools/testing/selftests/bpf/prog_tests/bpf_iter.c b/tools/testing/selftests/bpf/prog_tests/bpf_iter.c index b8e1224cfd19..6f1bfacd7375 100644 --- a/tools/testing/selftests/bpf/prog_tests/bpf_iter.c +++ b/tools/testing/selftests/bpf/prog_tests/bpf_iter.c @@ -265,10 +265,10 @@ static void *run_test_task_tid(void *arg) linfo.task.tid = 0; linfo.task.pid = getpid(); - /* This includes the parent thread, this thread, + /* This includes the parent thread, this thread, watchdog timer thread * and the do_nothing_wait thread */ - test_task_common(&opts, 2, 1); + test_task_common(&opts, 3, 1); test_task_common_nocheck(NULL, &num_unknown_tid, &num_known_tid); ASSERT_GT(num_unknown_tid, 2, "check_num_unknown_tid"); @@ -297,7 +297,7 @@ static void test_task_pid(void) opts.link_info = &linfo; opts.link_info_len = sizeof(linfo); - test_task_common(&opts, 1, 1); + test_task_common(&opts, 2, 1); } static void test_task_pidfd(void) @@ -315,7 +315,7 @@ static void test_task_pidfd(void) opts.link_info = &linfo; opts.link_info_len = sizeof(linfo); - test_task_common(&opts, 1, 1); + test_task_common(&opts, 2, 1); close(pidfd); } diff --git a/tools/testing/selftests/bpf/prog_tests/iters.c b/tools/testing/selftests/bpf/prog_tests/iters.c index 89ff23c4a8bc..3cea71f9c500 100644 --- a/tools/testing/selftests/bpf/prog_tests/iters.c +++ b/tools/testing/selftests/bpf/prog_tests/iters.c @@ -192,8 +192,8 @@ static void subtest_task_iters(void) syscall(SYS_getpgid); iters_task__detach(skel); ASSERT_EQ(skel->bss->procs_cnt, 1, "procs_cnt"); - ASSERT_EQ(skel->bss->threads_cnt, thread_num + 1, "threads_cnt"); - ASSERT_EQ(skel->bss->proc_threads_cnt, thread_num + 1, "proc_threads_cnt"); + ASSERT_EQ(skel->bss->threads_cnt, thread_num + 2, "threads_cnt"); + ASSERT_EQ(skel->bss->proc_threads_cnt, thread_num + 2, "proc_threads_cnt"); ASSERT_EQ(skel->bss->invalid_cnt, 0, "invalid_cnt"); pthread_mutex_unlock(&do_nothing_mutex); for (int i = 0; i < thread_num; i++) diff --git a/tools/testing/selftests/bpf/test_progs.c b/tools/testing/selftests/bpf/test_progs.c index 7421874380c2..6088d8222d59 100644 --- a/tools/testing/selftests/bpf/test_progs.c +++ b/tools/testing/selftests/bpf/test_progs.c @@ -16,6 +16,7 @@ #include #include #include +#include #include "json_writer.h" #include "network_helpers.h" @@ -179,6 +180,88 @@ int usleep(useconds_t usec) return syscall(__NR_nanosleep, &ts, NULL); } +/* Watchdog timer is started by watchdog_start() and stopped by watchdog_stop(). + * If timer is active for longer than env.secs_till_notify, + * it prints the name of the current test to the stderr. + * If timer is active for longer than env.secs_till_kill, + * it kills the thread executing the test by sending a SIGSEGV signal to it. + */ +static void watchdog_timer_func(union sigval sigval) +{ + struct itimerspec timeout = {}; + char test_name[256]; + int err; + + if (env.subtest_state) + snprintf(test_name, sizeof(test_name), "%s/%s", + env.test->test_name, env.subtest_state->name); + else + snprintf(test_name, sizeof(test_name), "%s", + env.test->test_name); + + switch (env.watchdog_state) { + case WD_NOTIFY: + fprintf(env.stderr_saved, "WATCHDOG: test case %s executes for %d seconds...\n", + test_name, env.secs_till_notify); + timeout.it_value.tv_sec = env.secs_till_kill - env.secs_till_notify; + env.watchdog_state = WD_KILL; + err = timer_settime(env.watchdog, 0, &timeout, NULL); + if (err) + fprintf(env.stderr_saved, "Failed to arm watchdog timer\n"); + break; + case WD_KILL: + fprintf(env.stderr_saved, + "WATCHDOG: test case %s executes for %d seconds, terminating with SIGSEGV\n", + test_name, env.secs_till_kill); + pthread_kill(env.main_thread, SIGSEGV); + break; + } +} + +static void watchdog_start(void) +{ + struct itimerspec timeout = {}; + int err; + + if (env.secs_till_kill == 0) + return; + if (env.secs_till_notify > 0) { + env.watchdog_state = WD_NOTIFY; + timeout.it_value.tv_sec = env.secs_till_notify; + } else { + env.watchdog_state = WD_KILL; + timeout.it_value.tv_sec = env.secs_till_kill; + } + err = timer_settime(env.watchdog, 0, &timeout, NULL); + if (err) + fprintf(env.stderr_saved, "Failed to start watchdog timer\n"); +} + +static void watchdog_stop(void) +{ + struct itimerspec timeout = {}; + int err; + + env.watchdog_state = WD_NOTIFY; + err = timer_settime(env.watchdog, 0, &timeout, NULL); + if (err) + fprintf(env.stderr_saved, "Failed to stop watchdog timer\n"); +} + +static void watchdog_init(void) +{ + struct sigevent watchdog_sev = { + .sigev_notify = SIGEV_THREAD, + .sigev_notify_function = watchdog_timer_func, + }; + int err; + + env.main_thread = pthread_self(); + err = timer_create(CLOCK_MONOTONIC, &watchdog_sev, &env.watchdog); + if (err) + fprintf(stderr, "Failed to initialize watchdog timer\n"); +} + static bool should_run(struct test_selector *sel, int num, const char *name) { int i; @@ -515,6 +598,7 @@ bool test__start_subtest(const char *subtest_name) env.subtest_state = subtest_state; stdio_hijack_init(&subtest_state->log_buf, &subtest_state->log_cnt); + watchdog_start(); return true; } @@ -780,6 +864,7 @@ enum ARG_KEYS { ARG_DEBUG = -1, ARG_JSON_SUMMARY = 'J', ARG_TRAFFIC_MONITOR = 'm', + ARG_WATCHDOG_TIMEOUT = 'w', }; static const struct argp_option opts[] = { @@ -810,6 +895,8 @@ static const struct argp_option opts[] = { { "traffic-monitor", ARG_TRAFFIC_MONITOR, "NAMES", 0, "Monitor network traffic of tests with name matching the pattern (supports '*' wildcard)." }, #endif + { "watchdog-timeout", ARG_WATCHDOG_TIMEOUT, "SECONDS", 0, + "Kill the process if tests are not making progress for specified number of seconds." }, {}, }; @@ -1035,6 +1122,16 @@ static error_t parse_arg(int key, char *arg, struct argp_state *state) true); break; #endif + case ARG_WATCHDOG_TIMEOUT: + env->secs_till_kill = atoi(arg); + if (env->secs_till_kill < 0) { + fprintf(stderr, "Invalid watchdog timeout: %s.\n", arg); + return -EINVAL; + } + if (env->secs_till_kill < env->secs_till_notify) { + env->secs_till_notify = 0; + } + break; default: return ARGP_ERR_UNKNOWN; } @@ -1263,10 +1360,12 @@ static void run_one_test(int test_num) stdio_hijack(&state->log_buf, &state->log_cnt); + watchdog_start(); if (test->run_test) test->run_test(); else if (test->run_serial_test) test->run_serial_test(); + watchdog_stop(); /* ensure last sub-test is finalized properly */ if (env.subtest_state) @@ -1707,6 +1806,7 @@ out: static int worker_main(int sock) { save_netns(); + watchdog_init(); while (true) { /* receive command */ @@ -1816,6 +1916,8 @@ int main(int argc, char **argv) sigaction(SIGSEGV, &sigact, NULL); + env.secs_till_notify = 10; + env.secs_till_kill = 120; err = argp_parse(&argp, argc, argv, 0, NULL, &env); if (err) return err; @@ -1824,6 +1926,8 @@ int main(int argc, char **argv) if (err) return err; + watchdog_init(); + /* Use libbpf 1.0 API mode */ libbpf_set_strict_mode(LIBBPF_STRICT_ALL); libbpf_set_print(libbpf_print_fn); diff --git a/tools/testing/selftests/bpf/test_progs.h b/tools/testing/selftests/bpf/test_progs.h index 7a58895867c3..74de33ae37e5 100644 --- a/tools/testing/selftests/bpf/test_progs.h +++ b/tools/testing/selftests/bpf/test_progs.h @@ -131,6 +131,12 @@ struct test_env { pid_t *worker_pids; /* array of worker pids */ int *worker_socks; /* array of worker socks */ int *worker_current_test; /* array of current running test for each worker */ + + pthread_t main_thread; + int secs_till_notify; + int secs_till_kill; + timer_t watchdog; /* watch for stalled tests/subtests */ + enum { WD_NOTIFY, WD_KILL } watchdog_state; }; #define MAX_LOG_TRUNK_SIZE 8192 -- cgit v1.2.3-70-g09d2