From 3f6a74bd628278e6eab4220449702a388aea7595 Mon Sep 17 00:00:00 2001 From: Arnaldo Carvalho de Melo Date: Wed, 19 Jul 2023 12:20:11 -0300 Subject: perf evsel: Free evsel->filter on the destructor Noticed with: make EXTRA_CFLAGS="-fsanitize=address" BUILD_BPF_SKEL=1 CORESIGHT=1 O=/tmp/build/perf-tools-next -C tools/perf install-bin Direct leak of 45 byte(s) in 1 object(s) allocated from: #0 0x7f213f87243b in strdup (/lib64/libasan.so.8+0x7243b) #1 0x63d15f in evsel__set_filter util/evsel.c:1371 #2 0x63d15f in evsel__append_filter util/evsel.c:1387 #3 0x63d15f in evsel__append_tp_filter util/evsel.c:1400 #4 0x62cd52 in evlist__append_tp_filter util/evlist.c:1145 #5 0x62cd52 in evlist__append_tp_filter_pids util/evlist.c:1196 #6 0x541e49 in trace__set_filter_loop_pids /home/acme/git/perf-tools/tools/perf/builtin-trace.c:3646 #7 0x541e49 in trace__set_filter_pids /home/acme/git/perf-tools/tools/perf/builtin-trace.c:3670 #8 0x541e49 in trace__run /home/acme/git/perf-tools/tools/perf/builtin-trace.c:3970 #9 0x541e49 in cmd_trace /home/acme/git/perf-tools/tools/perf/builtin-trace.c:5141 #10 0x5ef1a2 in run_builtin /home/acme/git/perf-tools/tools/perf/perf.c:323 #11 0x4196da in handle_internal_command /home/acme/git/perf-tools/tools/perf/perf.c:377 #12 0x4196da in run_argv /home/acme/git/perf-tools/tools/perf/perf.c:421 #13 0x4196da in main /home/acme/git/perf-tools/tools/perf/perf.c:537 #14 0x7f213e84a50f in __libc_start_call_main (/lib64/libc.so.6+0x2750f) Free it on evsel__exit(). Acked-by: Ian Rogers Cc: Adrian Hunter Cc: Jiri Olsa Cc: Namhyung Kim Link: https://lore.kernel.org/lkml/20230719202951.534582-2-acme@kernel.org Signed-off-by: Arnaldo Carvalho de Melo --- tools/perf/util/evsel.c | 1 + 1 file changed, 1 insertion(+) diff --git a/tools/perf/util/evsel.c b/tools/perf/util/evsel.c index 762e2b2634a5..e41bc4d9925f 100644 --- a/tools/perf/util/evsel.c +++ b/tools/perf/util/evsel.c @@ -1474,6 +1474,7 @@ void evsel__exit(struct evsel *evsel) perf_thread_map__put(evsel->core.threads); zfree(&evsel->group_name); zfree(&evsel->name); + zfree(&evsel->filter); zfree(&evsel->pmu_name); zfree(&evsel->group_pmu_name); zfree(&evsel->unit); -- cgit v1.2.3-70-g09d2 From 04cb4fc4d40a5bf1f9d116bc77e69791451f9fcc Mon Sep 17 00:00:00 2001 From: Arnaldo Carvalho de Melo Date: Wed, 19 Jul 2023 15:23:52 -0300 Subject: perf thread: Allow tools to register a thread->priv destructor So that when thread__delete() runs it can be called and free stuff tools stashed into thread->priv, like 'perf trace' does and will use this new facility to plug some leaks. Added an assert(thread__priv_destructor == NULL) as suggested in Ian's review. Acked-by: Ian Rogers Cc: Adrian Hunter Cc: Jiri Olsa Cc: Namhyung Kim Link: https://lore.kernel.org/lkml/CAP-5=fV3Er=Ek8=iE=bSGbEBmM56_PJffMWot1g_5Bh8B5hO7A@mail.gmail.com Signed-off-by: Arnaldo Carvalho de Melo --- tools/perf/util/thread.c | 13 +++++++++++++ tools/perf/util/thread.h | 2 ++ 2 files changed, 15 insertions(+) diff --git a/tools/perf/util/thread.c b/tools/perf/util/thread.c index 0b166404c5c3..fe5e6991ae4b 100644 --- a/tools/perf/util/thread.c +++ b/tools/perf/util/thread.c @@ -80,6 +80,15 @@ err_thread: return NULL; } +static void (*thread__priv_destructor)(void *priv); + +void thread__set_priv_destructor(void (*destructor)(void *priv)) +{ + assert(thread__priv_destructor == NULL); + + thread__priv_destructor = destructor; +} + void thread__delete(struct thread *thread) { struct namespaces *namespaces, *tmp_namespaces; @@ -112,6 +121,10 @@ void thread__delete(struct thread *thread) exit_rwsem(thread__namespaces_lock(thread)); exit_rwsem(thread__comm_lock(thread)); thread__free_stitch_list(thread); + + if (thread__priv_destructor) + thread__priv_destructor(thread__priv(thread)); + RC_CHK_FREE(thread); } diff --git a/tools/perf/util/thread.h b/tools/perf/util/thread.h index 9068a21ce0fa..e79225a0ea46 100644 --- a/tools/perf/util/thread.h +++ b/tools/perf/util/thread.h @@ -71,6 +71,8 @@ struct thread *thread__new(pid_t pid, pid_t tid); int thread__init_maps(struct thread *thread, struct machine *machine); void thread__delete(struct thread *thread); +void thread__set_priv_destructor(void (*destructor)(void *priv)); + struct thread *thread__get(struct thread *thread); void thread__put(struct thread *thread); -- cgit v1.2.3-70-g09d2 From 9de251cb501f834aeb13e87598d1f78588964101 Mon Sep 17 00:00:00 2001 From: Arnaldo Carvalho de Melo Date: Wed, 19 Jul 2023 15:26:23 -0300 Subject: perf trace: Register a thread priv destructor To plug these leaks detected with: $ make EXTRA_CFLAGS="-fsanitize=address" BUILD_BPF_SKEL=1 CORESIGHT=1 O=/tmp/build/perf-tools-next -C tools/perf install-bin ================================================================= ==473890==ERROR: LeakSanitizer: detected memory leaks Direct leak of 112 byte(s) in 1 object(s) allocated from: #0 0x7fdf19aba097 in calloc (/lib64/libasan.so.8+0xba097) #1 0x987836 in zalloc (/home/acme/bin/perf+0x987836) #2 0x5367ae in thread_trace__new /home/acme/git/perf-tools-next/tools/perf/builtin-trace.c:1289 #3 0x5367ae in thread__trace /home/acme/git/perf-tools-next/tools/perf/builtin-trace.c:1307 #4 0x5367ae in trace__sys_exit /home/acme/git/perf-tools-next/tools/perf/builtin-trace.c:2468 #5 0x52bf34 in trace__handle_event /home/acme/git/perf-tools-next/tools/perf/builtin-trace.c:3177 #6 0x52bf34 in __trace__deliver_event /home/acme/git/perf-tools-next/tools/perf/builtin-trace.c:3685 #7 0x542927 in trace__deliver_event /home/acme/git/perf-tools-next/tools/perf/builtin-trace.c:3712 #8 0x542927 in trace__run /home/acme/git/perf-tools-next/tools/perf/builtin-trace.c:4055 #9 0x542927 in cmd_trace /home/acme/git/perf-tools-next/tools/perf/builtin-trace.c:5141 #10 0x5ef1a2 in run_builtin /home/acme/git/perf-tools-next/tools/perf/perf.c:323 #11 0x4196da in handle_internal_command /home/acme/git/perf-tools-next/tools/perf/perf.c:377 #12 0x4196da in run_argv /home/acme/git/perf-tools-next/tools/perf/perf.c:421 #13 0x4196da in main /home/acme/git/perf-tools-next/tools/perf/perf.c:537 #14 0x7fdf18a4a50f in __libc_start_call_main (/lib64/libc.so.6+0x2750f) Direct leak of 2048 byte(s) in 1 object(s) allocated from: #0 0x7f788fcba6af in __interceptor_malloc (/lib64/libasan.so.8+0xba6af) #1 0x5337c0 in trace__sys_enter /home/acme/git/perf-tools-next/tools/perf/builtin-trace.c:2342 #2 0x52bfb4 in trace__handle_event /home/acme/git/perf-tools-next/tools/perf/builtin-trace.c:3191 #3 0x52bfb4 in __trace__deliver_event /home/acme/git/perf-tools-next/tools/perf/builtin-trace.c:3699 #4 0x542883 in trace__deliver_event /home/acme/git/perf-tools-next/tools/perf/builtin-trace.c:3726 #5 0x542883 in trace__run /home/acme/git/perf-tools-next/tools/perf/builtin-trace.c:4069 #6 0x542883 in cmd_trace /home/acme/git/perf-tools-next/tools/perf/builtin-trace.c:5155 #7 0x5ef232 in run_builtin /home/acme/git/perf-tools-next/tools/perf/perf.c:323 #8 0x4196da in handle_internal_command /home/acme/git/perf-tools-next/tools/perf/perf.c:377 #9 0x4196da in run_argv /home/acme/git/perf-tools-next/tools/perf/perf.c:421 #10 0x4196da in main /home/acme/git/perf-tools-next/tools/perf/perf.c:537 #11 0x7f788ec4a50f in __libc_start_call_main (/lib64/libc.so.6+0x2750f) Indirect leak of 48 byte(s) in 1 object(s) allocated from: #0 0x7fdf19aba6af in __interceptor_malloc (/lib64/libasan.so.8+0xba6af) #1 0x77b335 in intlist__new util/intlist.c:116 #2 0x5367fd in thread_trace__new /home/acme/git/perf-tools-next/tools/perf/builtin-trace.c:1293 #3 0x5367fd in thread__trace /home/acme/git/perf-tools-next/tools/perf/builtin-trace.c:1307 #4 0x5367fd in trace__sys_exit /home/acme/git/perf-tools-next/tools/perf/builtin-trace.c:2468 #5 0x52bf34 in trace__handle_event /home/acme/git/perf-tools-next/tools/perf/builtin-trace.c:3177 #6 0x52bf34 in __trace__deliver_event /home/acme/git/perf-tools-next/tools/perf/builtin-trace.c:3685 #7 0x542927 in trace__deliver_event /home/acme/git/perf-tools-next/tools/perf/builtin-trace.c:3712 #8 0x542927 in trace__run /home/acme/git/perf-tools-next/tools/perf/builtin-trace.c:4055 #9 0x542927 in cmd_trace /home/acme/git/perf-tools-next/tools/perf/builtin-trace.c:5141 #10 0x5ef1a2 in run_builtin /home/acme/git/perf-tools-next/tools/perf/perf.c:323 #11 0x4196da in handle_internal_command /home/acme/git/perf-tools-next/tools/perf/perf.c:377 #12 0x4196da in run_argv /home/acme/git/perf-tools-next/tools/perf/perf.c:421 #13 0x4196da in main /home/acme/git/perf-tools-next/tools/perf/perf.c:537 #14 0x7fdf18a4a50f in __libc_start_call_main (/lib64/libc.so.6+0x2750f) Acked-by: Ian Rogers Cc: Adrian Hunter Cc: Jiri Olsa Cc: Namhyung Kim Link: https://lore.kernel.org/lkml/20230719202951.534582-4-acme@kernel.org Signed-off-by: Arnaldo Carvalho de Melo --- tools/perf/builtin-trace.c | 15 +++++++++++++++ 1 file changed, 15 insertions(+) diff --git a/tools/perf/builtin-trace.c b/tools/perf/builtin-trace.c index 6e73d0e95715..b7cbe4bcd136 100644 --- a/tools/perf/builtin-trace.c +++ b/tools/perf/builtin-trace.c @@ -1296,6 +1296,19 @@ static struct thread_trace *thread_trace__new(void) return ttrace; } +static void thread_trace__delete(void *pttrace) +{ + struct thread_trace *ttrace = pttrace; + + if (!ttrace) + return; + + intlist__delete(ttrace->syscall_stats); + ttrace->syscall_stats = NULL; + zfree(&ttrace->entry_str); + free(ttrace); +} + static struct thread_trace *thread__trace(struct thread *thread, FILE *fp) { struct thread_trace *ttrace; @@ -1635,6 +1648,8 @@ static int trace__symbols_init(struct trace *trace, struct evlist *evlist) if (trace->host == NULL) return -ENOMEM; + thread__set_priv_destructor(thread_trace__delete); + err = trace_event__register_resolver(trace->host, trace__machine__resolve_kernel_addr); if (err < 0) goto out; -- cgit v1.2.3-70-g09d2 From 7962ef13651a9163f07b530607392ea123482e8a Mon Sep 17 00:00:00 2001 From: Arnaldo Carvalho de Melo Date: Wed, 19 Jul 2023 15:37:14 -0300 Subject: perf trace: Really free the evsel->priv area In 3cb4d5e00e037c70 ("perf trace: Free syscall tp fields in evsel->priv") it only was freeing if strcmp(evsel->tp_format->system, "syscalls") returned zero, while the corresponding initialization of evsel->priv was being performed if it was _not_ zero, i.e. if the tp system wasn't 'syscalls'. Just stop looking for that and free it if evsel->priv was set, which should be equivalent. Also use the pre-existing evsel_trace__delete() function. This resolves these leaks, detected with: $ make EXTRA_CFLAGS="-fsanitize=address" BUILD_BPF_SKEL=1 CORESIGHT=1 O=/tmp/build/perf-tools-next -C tools/perf install-bin ================================================================= ==481565==ERROR: LeakSanitizer: detected memory leaks Direct leak of 40 byte(s) in 1 object(s) allocated from: #0 0x7f7343cba097 in calloc (/lib64/libasan.so.8+0xba097) #1 0x987966 in zalloc (/home/acme/bin/perf+0x987966) #2 0x52f9b9 in evsel_trace__new /home/acme/git/perf-tools-next/tools/perf/builtin-trace.c:307 #3 0x52f9b9 in evsel__syscall_tp /home/acme/git/perf-tools-next/tools/perf/builtin-trace.c:333 #4 0x52f9b9 in evsel__init_raw_syscall_tp /home/acme/git/perf-tools-next/tools/perf/builtin-trace.c:458 #5 0x52f9b9 in perf_evsel__raw_syscall_newtp /home/acme/git/perf-tools-next/tools/perf/builtin-trace.c:480 #6 0x540e8b in trace__add_syscall_newtp /home/acme/git/perf-tools-next/tools/perf/builtin-trace.c:3212 #7 0x540e8b in trace__run /home/acme/git/perf-tools-next/tools/perf/builtin-trace.c:3891 #8 0x540e8b in cmd_trace /home/acme/git/perf-tools-next/tools/perf/builtin-trace.c:5156 #9 0x5ef262 in run_builtin /home/acme/git/perf-tools-next/tools/perf/perf.c:323 #10 0x4196da in handle_internal_command /home/acme/git/perf-tools-next/tools/perf/perf.c:377 #11 0x4196da in run_argv /home/acme/git/perf-tools-next/tools/perf/perf.c:421 #12 0x4196da in main /home/acme/git/perf-tools-next/tools/perf/perf.c:537 #13 0x7f7342c4a50f in __libc_start_call_main (/lib64/libc.so.6+0x2750f) Direct leak of 40 byte(s) in 1 object(s) allocated from: #0 0x7f7343cba097 in calloc (/lib64/libasan.so.8+0xba097) #1 0x987966 in zalloc (/home/acme/bin/perf+0x987966) #2 0x52f9b9 in evsel_trace__new /home/acme/git/perf-tools-next/tools/perf/builtin-trace.c:307 #3 0x52f9b9 in evsel__syscall_tp /home/acme/git/perf-tools-next/tools/perf/builtin-trace.c:333 #4 0x52f9b9 in evsel__init_raw_syscall_tp /home/acme/git/perf-tools-next/tools/perf/builtin-trace.c:458 #5 0x52f9b9 in perf_evsel__raw_syscall_newtp /home/acme/git/perf-tools-next/tools/perf/builtin-trace.c:480 #6 0x540dd1 in trace__add_syscall_newtp /home/acme/git/perf-tools-next/tools/perf/builtin-trace.c:3205 #7 0x540dd1 in trace__run /home/acme/git/perf-tools-next/tools/perf/builtin-trace.c:3891 #8 0x540dd1 in cmd_trace /home/acme/git/perf-tools-next/tools/perf/builtin-trace.c:5156 #9 0x5ef262 in run_builtin /home/acme/git/perf-tools-next/tools/perf/perf.c:323 #10 0x4196da in handle_internal_command /home/acme/git/perf-tools-next/tools/perf/perf.c:377 #11 0x4196da in run_argv /home/acme/git/perf-tools-next/tools/perf/perf.c:421 #12 0x4196da in main /home/acme/git/perf-tools-next/tools/perf/perf.c:537 #13 0x7f7342c4a50f in __libc_start_call_main (/lib64/libc.so.6+0x2750f) SUMMARY: AddressSanitizer: 80 byte(s) leaked in 2 allocation(s). [root@quaco ~]# With this we plug all leaks with "perf trace sleep 1". Fixes: 3cb4d5e00e037c70 ("perf trace: Free syscall tp fields in evsel->priv") Acked-by: Ian Rogers Cc: Adrian Hunter Cc: Jiri Olsa Cc: Namhyung Kim Cc: Riccardo Mancini Link: https://lore.kernel.org/lkml/20230719202951.534582-5-acme@kernel.org Signed-off-by: Arnaldo Carvalho de Melo --- tools/perf/builtin-trace.c | 9 ++------- 1 file changed, 2 insertions(+), 7 deletions(-) diff --git a/tools/perf/builtin-trace.c b/tools/perf/builtin-trace.c index b7cbe4bcd136..56651d666480 100644 --- a/tools/perf/builtin-trace.c +++ b/tools/perf/builtin-trace.c @@ -3151,13 +3151,8 @@ static void evlist__free_syscall_tp_fields(struct evlist *evlist) struct evsel *evsel; evlist__for_each_entry(evlist, evsel) { - struct evsel_trace *et = evsel->priv; - - if (!et || !evsel->tp_format || strcmp(evsel->tp_format->system, "syscalls")) - continue; - - zfree(&et->fmt); - free(et); + evsel_trace__delete(evsel->priv); + evsel->priv = NULL; } } -- cgit v1.2.3-70-g09d2 From fcca1faf11b47011770c361a1dfc36ed83905148 Mon Sep 17 00:00:00 2001 From: Arnaldo Carvalho de Melo Date: Wed, 19 Jul 2023 16:49:02 -0300 Subject: perf trace: Free thread_trace->files table The fd->pathname table that is kept in 'struct thread_trace' and thus in thread->priv must be freed when a thread is deleted. This was also detected using -fsanitize=address. Acked-by: Ian Rogers Cc: Adrian Hunter Cc: Jiri Olsa Cc: Namhyung Kim Link: https://lore.kernel.org/lkml/20230719202951.534582-6-acme@kernel.org Signed-off-by: Arnaldo Carvalho de Melo --- tools/perf/builtin-trace.c | 14 ++++++++++++++ 1 file changed, 14 insertions(+) diff --git a/tools/perf/builtin-trace.c b/tools/perf/builtin-trace.c index 56651d666480..7ece2521efb6 100644 --- a/tools/perf/builtin-trace.c +++ b/tools/perf/builtin-trace.c @@ -1296,6 +1296,8 @@ static struct thread_trace *thread_trace__new(void) return ttrace; } +static void thread_trace__free_files(struct thread_trace *ttrace); + static void thread_trace__delete(void *pttrace) { struct thread_trace *ttrace = pttrace; @@ -1305,6 +1307,7 @@ static void thread_trace__delete(void *pttrace) intlist__delete(ttrace->syscall_stats); ttrace->syscall_stats = NULL; + thread_trace__free_files(ttrace); zfree(&ttrace->entry_str); free(ttrace); } @@ -1346,6 +1349,17 @@ void syscall_arg__set_ret_scnprintf(struct syscall_arg *arg, static const size_t trace__entry_str_size = 2048; +static void thread_trace__free_files(struct thread_trace *ttrace) +{ + for (int i = 0; i < ttrace->files.max; ++i) { + struct file *file = ttrace->files.table + i; + zfree(&file->pathname); + } + + zfree(&ttrace->files.table); + ttrace->files.max = -1; +} + static struct file *thread_trace__files_entry(struct thread_trace *ttrace, int fd) { if (fd < 0) -- cgit v1.2.3-70-g09d2 From faa4e0da1cbab97aa18bb6562ab32d9f0fc32e82 Mon Sep 17 00:00:00 2001 From: Arnaldo Carvalho de Melo Date: Wed, 19 Jul 2023 16:53:01 -0300 Subject: MAINTAINERS: Add git information for perf-tools and perf-tools-next trees/branches Now the perf tools development is done on these trees/branches: git://git.kernel.org/pub/scm/linux/kernel/git/perf/perf-tools.git perf-tools git git://git.kernel.org/pub/scm/linux/kernel/git/perf/perf-tools-next.git perf-tools-next For a while I'll continue mirroring what is these to the same branches in my git tree. Suggested-by: John Garry Acked-by: Ian Rogers Cc: Adrian Hunter Cc: Alexander Shishkin Cc: Ingo Molnar Cc: Jiri Olsa Cc: Mark Rutland Cc: Namhyung Kim Cc: Peter Zijlstra Link: https://lore.kernel.org/lkml/CAP-5=fVGOP6-k=BTRd_bn=N0HVy+1ShpdW5rk5ND0ZGhm_fQkg@mail.gmail.com Signed-off-by: Arnaldo Carvalho de Melo --- MAINTAINERS | 2 ++ 1 file changed, 2 insertions(+) diff --git a/MAINTAINERS b/MAINTAINERS index aee340630eca..e351cfc7cd41 100644 --- a/MAINTAINERS +++ b/MAINTAINERS @@ -16629,6 +16629,8 @@ L: linux-kernel@vger.kernel.org S: Supported W: https://perf.wiki.kernel.org/ T: git git://git.kernel.org/pub/scm/linux/kernel/git/tip/tip.git perf/core +T: git git://git.kernel.org/pub/scm/linux/kernel/git/perf/perf-tools.git perf-tools +T: git git://git.kernel.org/pub/scm/linux/kernel/git/perf/perf-tools-next.git perf-tools-next F: arch/*/events/* F: arch/*/events/*/* F: arch/*/include/asm/perf_event.h -- cgit v1.2.3-70-g09d2 From 2df270716447a1024a6c955eed8fa579333dca85 Mon Sep 17 00:00:00 2001 From: Arnaldo Carvalho de Melo Date: Fri, 2 Jun 2023 15:18:27 -0300 Subject: perf bench uprobe: Add benchmark to test uprobe overhead This just adds the initial "workload", a call to libc's usleep(1000us) function: $ perf stat --null perf bench uprobe all # Running uprobe/baseline benchmark... # Executed 1000 usleep(1000) calls Total time: 1053533 usecs 1053.533 usecs/op Performance counter stats for 'perf bench uprobe all': 1.061042896 seconds time elapsed 0.001079000 seconds user 0.006499000 seconds sys $ More entries will be added using a BPF skel to add various uprobes to the usleep() function. Acked-by: Ian Rogers Cc: Adrian Hunter Cc: Andre Fredette Cc: Clark Williams Cc: Dave Tucker Cc: Derek Barbosa Cc: Jiri Olsa Cc: Masami Hiramatsu (Google) Cc: Namhyung Kim Link: https://lore.kernel.org/lkml/20230719204910.539044-2-acme@kernel.org Signed-off-by: Arnaldo Carvalho de Melo --- tools/perf/Documentation/perf-bench.txt | 3 ++ tools/perf/bench/Build | 1 + tools/perf/bench/bench.h | 1 + tools/perf/bench/uprobe.c | 80 +++++++++++++++++++++++++++++++++ tools/perf/builtin-bench.c | 6 +++ 5 files changed, 91 insertions(+) create mode 100644 tools/perf/bench/uprobe.c diff --git a/tools/perf/Documentation/perf-bench.txt b/tools/perf/Documentation/perf-bench.txt index f04f0eaded98..ca5789625cd2 100644 --- a/tools/perf/Documentation/perf-bench.txt +++ b/tools/perf/Documentation/perf-bench.txt @@ -67,6 +67,9 @@ SUBSYSTEM 'internals':: Benchmark internal perf functionality. +'uprobe':: + Benchmark overhead of uprobe + BPF. + 'all':: All benchmark subsystems. diff --git a/tools/perf/bench/Build b/tools/perf/bench/Build index 0f158dc8139b..47412d47dccf 100644 --- a/tools/perf/bench/Build +++ b/tools/perf/bench/Build @@ -16,6 +16,7 @@ perf-y += inject-buildid.o perf-y += evlist-open-close.o perf-y += breakpoint.o perf-y += pmu-scan.o +perf-y += uprobe.o perf-$(CONFIG_X86_64) += mem-memcpy-x86-64-asm.o perf-$(CONFIG_X86_64) += mem-memset-x86-64-asm.o diff --git a/tools/perf/bench/bench.h b/tools/perf/bench/bench.h index 0d2b65976212..201311f75c96 100644 --- a/tools/perf/bench/bench.h +++ b/tools/perf/bench/bench.h @@ -42,6 +42,7 @@ int bench_inject_build_id(int argc, const char **argv); int bench_evlist_open_close(int argc, const char **argv); int bench_breakpoint_thread(int argc, const char **argv); int bench_breakpoint_enable(int argc, const char **argv); +int bench_uprobe_baseline(int argc, const char **argv); int bench_pmu_scan(int argc, const char **argv); #define BENCH_FORMAT_DEFAULT_STR "default" diff --git a/tools/perf/bench/uprobe.c b/tools/perf/bench/uprobe.c new file mode 100644 index 000000000000..707174220a76 --- /dev/null +++ b/tools/perf/bench/uprobe.c @@ -0,0 +1,80 @@ +// SPDX-License-Identifier: (GPL-2.0-only OR BSD-2-Clause) +/* + * uprobe.c + * + * uprobe benchmarks + * + * Copyright (C) 2023, Red Hat Inc, Arnaldo Carvalho de Melo + */ +#include "../perf.h" +#include "../util/util.h" +#include +#include "../builtin.h" +#include "bench.h" +#include + +#include +#include +#include +#include +#include +#include +#include + +#define LOOPS_DEFAULT 1000 +static int loops = LOOPS_DEFAULT; + +static const struct option options[] = { + OPT_INTEGER('l', "loop", &loops, "Specify number of loops"), + OPT_END() +}; + +static const char * const bench_uprobe_usage[] = { + "perf bench uprobe ", + NULL +}; + +static int bench_uprobe(int argc, const char **argv) +{ + const char *name = "usleep(1000)", *unit = "usec"; + struct timespec start, end; + u64 diff; + int i; + + argc = parse_options(argc, argv, options, bench_uprobe_usage, 0); + + clock_gettime(CLOCK_REALTIME, &start); + + for (i = 0; i < loops; i++) { + usleep(USEC_PER_MSEC); + } + + clock_gettime(CLOCK_REALTIME, &end); + + diff = end.tv_sec * NSEC_PER_SEC + end.tv_nsec - (start.tv_sec * NSEC_PER_SEC + start.tv_nsec); + diff /= NSEC_PER_USEC; + + switch (bench_format) { + case BENCH_FORMAT_DEFAULT: + printf("# Executed %'d %s calls\n", loops, name); + printf(" %14s: %'" PRIu64 " %ss\n\n", "Total time", diff, unit); + printf(" %'.3f %ss/op\n", (double)diff / (double)loops, unit); + break; + + case BENCH_FORMAT_SIMPLE: + printf("%" PRIu64 "\n", diff); + break; + + default: + /* reaching here is something of a disaster */ + fprintf(stderr, "Unknown format:%d\n", bench_format); + exit(1); + } + + return 0; +} + +int bench_uprobe_baseline(int argc, const char **argv) +{ + return bench_uprobe(argc, argv); +} diff --git a/tools/perf/builtin-bench.c b/tools/perf/builtin-bench.c index db435b791a09..09637aee8341 100644 --- a/tools/perf/builtin-bench.c +++ b/tools/perf/builtin-bench.c @@ -104,6 +104,11 @@ static struct bench breakpoint_benchmarks[] = { { NULL, NULL, NULL }, }; +static struct bench uprobe_benchmarks[] = { + { "baseline", "Baseline libc usleep(1000) call", bench_uprobe_baseline, }, + { NULL, NULL, NULL }, +}; + struct collection { const char *name; const char *summary; @@ -123,6 +128,7 @@ static struct collection collections[] = { #endif { "internals", "Perf-internals benchmarks", internals_benchmarks }, { "breakpoint", "Breakpoint benchmarks", breakpoint_benchmarks }, + { "uprobe", "uprobe benchmarks", uprobe_benchmarks }, { "all", "All benchmarks", NULL }, { NULL, NULL, NULL } }; -- cgit v1.2.3-70-g09d2 From dded6f615b854740461be63672fa05158875ffaa Mon Sep 17 00:00:00 2001 From: Arnaldo Carvalho de Melo Date: Wed, 5 Jul 2023 08:45:12 -0300 Subject: perf bench uprobe: Print diff to baseline This is just prep work to show the diff to the unmodified workload. Acked-by: Ian Rogers Cc: Adrian Hunter Cc: Andre Fredette Cc: Clark Williams Cc: Dave Tucker Cc: Derek Barbosa Cc: Jiri Olsa Cc: Masami Hiramatsu (Google) Cc: Namhyung Kim Link: https://lore.kernel.org/lkml/20230719204910.539044-3-acme@kernel.org Signed-off-by: Arnaldo Carvalho de Melo --- tools/perf/bench/uprobe.c | 27 ++++++++++++++++++++++++--- 1 file changed, 24 insertions(+), 3 deletions(-) diff --git a/tools/perf/bench/uprobe.c b/tools/perf/bench/uprobe.c index 707174220a76..60e7c43298d8 100644 --- a/tools/perf/bench/uprobe.c +++ b/tools/perf/bench/uprobe.c @@ -34,6 +34,29 @@ static const char * const bench_uprobe_usage[] = { NULL }; +static int bench_uprobe_format__default_fprintf(const char *name, const char *unit, u64 diff, FILE *fp) +{ + static u64 baseline; + s64 diff_to_baseline = diff - baseline; + int printed = fprintf(fp, "# Executed %'d %s calls\n", loops, name); + + printed += fprintf(fp, " %14s: %'" PRIu64 " %ss", "Total time", diff, unit); + + if (baseline) + printed += fprintf(fp, " %s%'" PRId64 " to baseline", diff_to_baseline > 0 ? "+" : "", diff_to_baseline); + + printed += fprintf(fp, "\n\n %'.3f %ss/op", (double)diff / (double)loops, unit); + + if (baseline) + printed += fprintf(fp, " %'.3f %ss/op to baseline", (double)diff_to_baseline / (double)loops, unit); + else + baseline = diff; + + fputc('\n', fp); + + return printed + 1; +} + static int bench_uprobe(int argc, const char **argv) { const char *name = "usleep(1000)", *unit = "usec"; @@ -56,9 +79,7 @@ static int bench_uprobe(int argc, const char **argv) switch (bench_format) { case BENCH_FORMAT_DEFAULT: - printf("# Executed %'d %s calls\n", loops, name); - printf(" %14s: %'" PRIu64 " %ss\n\n", "Total time", diff, unit); - printf(" %'.3f %ss/op\n", (double)diff / (double)loops, unit); + bench_uprobe_format__default_fprintf(name, unit, diff, stdout); break; case BENCH_FORMAT_SIMPLE: -- cgit v1.2.3-70-g09d2 From 54d811023b5f99e658511b577b16a6d7014d162c Mon Sep 17 00:00:00 2001 From: Arnaldo Carvalho de Melo Date: Wed, 5 Jul 2023 08:49:40 -0300 Subject: perf bench uprobe: Show diff to previous Will be useful to show the incremental overhead as we do more stuff in the BPF program attached to the uprobes. Acked-by: Ian Rogers Cc: Adrian Hunter Cc: Andre Fredette Cc: Clark Williams Cc: Dave Tucker Cc: Derek Barbosa Cc: Jiri Olsa Cc: Masami Hiramatsu (Google) Cc: Namhyung Kim Link: https://lore.kernel.org/lkml/20230719204910.539044-4-acme@kernel.org Signed-off-by: Arnaldo Carvalho de Melo --- tools/perf/bench/uprobe.c | 21 ++++++++++++++++----- 1 file changed, 16 insertions(+), 5 deletions(-) diff --git a/tools/perf/bench/uprobe.c b/tools/perf/bench/uprobe.c index 60e7c43298d8..a90e09f791c5 100644 --- a/tools/perf/bench/uprobe.c +++ b/tools/perf/bench/uprobe.c @@ -36,24 +36,35 @@ static const char * const bench_uprobe_usage[] = { static int bench_uprobe_format__default_fprintf(const char *name, const char *unit, u64 diff, FILE *fp) { - static u64 baseline; - s64 diff_to_baseline = diff - baseline; + static u64 baseline, previous; + s64 diff_to_baseline = diff - baseline, + diff_to_previous = diff - previous; int printed = fprintf(fp, "# Executed %'d %s calls\n", loops, name); printed += fprintf(fp, " %14s: %'" PRIu64 " %ss", "Total time", diff, unit); - if (baseline) + if (baseline) { printed += fprintf(fp, " %s%'" PRId64 " to baseline", diff_to_baseline > 0 ? "+" : "", diff_to_baseline); + if (previous != baseline) + fprintf(stdout, " %s%'" PRId64 " to previous", diff_to_previous > 0 ? "+" : "", diff_to_previous); + } + printed += fprintf(fp, "\n\n %'.3f %ss/op", (double)diff / (double)loops, unit); - if (baseline) + if (baseline) { printed += fprintf(fp, " %'.3f %ss/op to baseline", (double)diff_to_baseline / (double)loops, unit); - else + + if (previous != baseline) + printed += fprintf(fp, " %'.3f %ss/op to previous", (double)diff_to_previous / (double)loops, unit); + } else { baseline = diff; + } fputc('\n', fp); + previous = diff; + return printed + 1; } -- cgit v1.2.3-70-g09d2 From 6af5e4cf3a6521d23ca53df5001319babefdffbf Mon Sep 17 00:00:00 2001 From: Arnaldo Carvalho de Melo Date: Fri, 2 Jun 2023 17:42:47 -0300 Subject: perf bench uprobe empty: Add entry attaching an empty BPF program Using libbpf and a BPF skel: # perf bench uprobe all # Running uprobe/baseline benchmark... # Executed 1,000 usleep(1000) calls Total time: 1,055,618 usecs 1,055.618 usecs/op # Running uprobe/empty benchmark... # Executed 1,000 usleep(1000) calls Total time: 1,057,146 usecs +1,528 to baseline 1,057.146 usecs/op # Acked-by: Ian Rogers Cc: Adrian Hunter Cc: Andre Fredette Cc: Clark Williams Cc: Dave Tucker Cc: Derek Barbosa Cc: Jiri Olsa Cc: Masami Hiramatsu (Google) Cc: Namhyung Kim Link: https://lore.kernel.org/lkml/20230719204910.539044-5-acme@kernel.org Signed-off-by: Arnaldo Carvalho de Melo --- tools/perf/Makefile.perf | 1 + tools/perf/bench/bench.h | 1 + tools/perf/bench/uprobe.c | 75 +++++++++++++++++++++++++++-- tools/perf/builtin-bench.c | 3 +- tools/perf/util/bpf_skel/bench_uprobe.bpf.c | 12 +++++ 5 files changed, 88 insertions(+), 4 deletions(-) create mode 100644 tools/perf/util/bpf_skel/bench_uprobe.bpf.c diff --git a/tools/perf/Makefile.perf b/tools/perf/Makefile.perf index 097316ef38e6..a44d16ec11ee 100644 --- a/tools/perf/Makefile.perf +++ b/tools/perf/Makefile.perf @@ -1057,6 +1057,7 @@ SKELETONS += $(SKEL_OUT)/bperf_leader.skel.h $(SKEL_OUT)/bperf_follower.skel.h SKELETONS += $(SKEL_OUT)/bperf_cgroup.skel.h $(SKEL_OUT)/func_latency.skel.h SKELETONS += $(SKEL_OUT)/off_cpu.skel.h $(SKEL_OUT)/lock_contention.skel.h SKELETONS += $(SKEL_OUT)/kwork_trace.skel.h $(SKEL_OUT)/sample_filter.skel.h +SKELETONS += $(SKEL_OUT)/bench_uprobe.skel.h $(SKEL_TMP_OUT) $(LIBAPI_OUTPUT) $(LIBBPF_OUTPUT) $(LIBPERF_OUTPUT) $(LIBSUBCMD_OUTPUT) $(LIBSYMBOL_OUTPUT): $(Q)$(MKDIR) -p $@ diff --git a/tools/perf/bench/bench.h b/tools/perf/bench/bench.h index 201311f75c96..daf4850b441c 100644 --- a/tools/perf/bench/bench.h +++ b/tools/perf/bench/bench.h @@ -43,6 +43,7 @@ int bench_evlist_open_close(int argc, const char **argv); int bench_breakpoint_thread(int argc, const char **argv); int bench_breakpoint_enable(int argc, const char **argv); int bench_uprobe_baseline(int argc, const char **argv); +int bench_uprobe_empty(int argc, const char **argv); int bench_pmu_scan(int argc, const char **argv); #define BENCH_FORMAT_DEFAULT_STR "default" diff --git a/tools/perf/bench/uprobe.c b/tools/perf/bench/uprobe.c index a90e09f791c5..dfb90038a4f7 100644 --- a/tools/perf/bench/uprobe.c +++ b/tools/perf/bench/uprobe.c @@ -24,6 +24,11 @@ #define LOOPS_DEFAULT 1000 static int loops = LOOPS_DEFAULT; +enum bench_uprobe { + BENCH_UPROBE__BASELINE, + BENCH_UPROBE__EMPTY, +}; + static const struct option options[] = { OPT_INTEGER('l', "loop", &loops, "Specify number of loops"), OPT_END() @@ -34,6 +39,59 @@ static const char * const bench_uprobe_usage[] = { NULL }; +#ifdef HAVE_BPF_SKEL +#include "bpf_skel/bench_uprobe.skel.h" + +struct bench_uprobe_bpf *skel; + +static int bench_uprobe__setup_bpf_skel(void) +{ + DECLARE_LIBBPF_OPTS(bpf_uprobe_opts, uprobe_opts); + int err; + + /* Load and verify BPF application */ + skel = bench_uprobe_bpf__open(); + if (!skel) { + fprintf(stderr, "Failed to open and load uprobes bench BPF skeleton\n"); + return -1; + } + + err = bench_uprobe_bpf__load(skel); + if (err) { + fprintf(stderr, "Failed to load and verify BPF skeleton\n"); + goto cleanup; + } + + uprobe_opts.func_name = "usleep"; + skel->links.empty = bpf_program__attach_uprobe_opts(/*prog=*/skel->progs.empty, + /*pid=*/-1, + /*binary_path=*/"/lib64/libc.so.6", + /*func_offset=*/0, + /*opts=*/&uprobe_opts); + if (!skel->links.empty) { + err = -errno; + fprintf(stderr, "Failed to attach bench uprobe: %s\n", strerror(errno)); + goto cleanup; + } + + return err; +cleanup: + bench_uprobe_bpf__destroy(skel); + return err; +} + +static void bench_uprobe__teardown_bpf_skel(void) +{ + if (skel) { + bench_uprobe_bpf__destroy(skel); + skel = NULL; + } +} +#else +static int bench_uprobe__setup_bpf_skel(void) { return 0; } +static void bench_uprobe__teardown_bpf_skel(void) {}; +#endif + static int bench_uprobe_format__default_fprintf(const char *name, const char *unit, u64 diff, FILE *fp) { static u64 baseline, previous; @@ -68,7 +126,7 @@ static int bench_uprobe_format__default_fprintf(const char *name, const char *un return printed + 1; } -static int bench_uprobe(int argc, const char **argv) +static int bench_uprobe(int argc, const char **argv, enum bench_uprobe bench) { const char *name = "usleep(1000)", *unit = "usec"; struct timespec start, end; @@ -77,7 +135,10 @@ static int bench_uprobe(int argc, const char **argv) argc = parse_options(argc, argv, options, bench_uprobe_usage, 0); - clock_gettime(CLOCK_REALTIME, &start); + if (bench != BENCH_UPROBE__BASELINE && bench_uprobe__setup_bpf_skel() < 0) + return 0; + + clock_gettime(CLOCK_REALTIME, &start); for (i = 0; i < loops; i++) { usleep(USEC_PER_MSEC); @@ -103,10 +164,18 @@ static int bench_uprobe(int argc, const char **argv) exit(1); } + if (bench != BENCH_UPROBE__BASELINE) + bench_uprobe__teardown_bpf_skel(); + return 0; } int bench_uprobe_baseline(int argc, const char **argv) { - return bench_uprobe(argc, argv); + return bench_uprobe(argc, argv, BENCH_UPROBE__BASELINE); +} + +int bench_uprobe_empty(int argc, const char **argv) +{ + return bench_uprobe(argc, argv, BENCH_UPROBE__EMPTY); } diff --git a/tools/perf/builtin-bench.c b/tools/perf/builtin-bench.c index 09637aee8341..1021680bbc6d 100644 --- a/tools/perf/builtin-bench.c +++ b/tools/perf/builtin-bench.c @@ -105,7 +105,8 @@ static struct bench breakpoint_benchmarks[] = { }; static struct bench uprobe_benchmarks[] = { - { "baseline", "Baseline libc usleep(1000) call", bench_uprobe_baseline, }, + { "baseline", "Baseline libc usleep(1000) call", bench_uprobe_baseline, }, + { "empty", "Attach empty BPF prog to uprobe on usleep, system wide", bench_uprobe_empty, }, { NULL, NULL, NULL }, }; diff --git a/tools/perf/util/bpf_skel/bench_uprobe.bpf.c b/tools/perf/util/bpf_skel/bench_uprobe.bpf.c new file mode 100644 index 000000000000..1365dcc5dddf --- /dev/null +++ b/tools/perf/util/bpf_skel/bench_uprobe.bpf.c @@ -0,0 +1,12 @@ +// SPDX-License-Identifier: (GPL-2.0-only OR BSD-2-Clause) +// Copyright (c) 2023 Red Hat +#include "vmlinux.h" +#include + +SEC("uprobe") +int BPF_UPROBE(empty) +{ + return 0; +} + +char LICENSE[] SEC("license") = "Dual BSD/GPL"; -- cgit v1.2.3-70-g09d2 From 7b47623b8cae8149688c11396bb690bed6936f70 Mon Sep 17 00:00:00 2001 From: Arnaldo Carvalho de Melo Date: Fri, 2 Jun 2023 17:42:47 -0300 Subject: perf bench uprobe trace_printk: Add entry attaching an BPF program that does a trace_printk [root@five ~]# perf bench uprobe all # Running uprobe/baseline benchmark... # Executed 1,000 usleep(1000) calls Total time: 1,053,963 usecs 1,053.963 usecs/op # Running uprobe/empty benchmark... # Executed 1,000 usleep(1000) calls Total time: 1,056,293 usecs +2,330 to baseline 1,056.293 usecs/op 2.330 usecs/op to baseline # Running uprobe/trace_printk benchmark... # Executed 1,000 usleep(1000) calls Total time: 1,056,977 usecs +3,014 to baseline +684 to previous 1,056.977 usecs/op 3.014 usecs/op to baseline 0.684 usecs/op to previous [root@five ~]# Acked-by: Ian Rogers Cc: Adrian Hunter Cc: Andre Fredette Cc: Clark Williams Cc: Dave Tucker Cc: Derek Barbosa Cc: Jiri Olsa Cc: Masami Hiramatsu (Google) Cc: Namhyung Kim Link: https://lore.kernel.org/lkml/20230719204910.539044-6-acme@kernel.org Signed-off-by: Arnaldo Carvalho de Melo --- tools/perf/bench/bench.h | 1 + tools/perf/bench/uprobe.c | 39 +++++++++++++++++++++-------- tools/perf/builtin-bench.c | 1 + tools/perf/util/bpf_skel/bench_uprobe.bpf.c | 11 ++++++++ 4 files changed, 41 insertions(+), 11 deletions(-) diff --git a/tools/perf/bench/bench.h b/tools/perf/bench/bench.h index daf4850b441c..50de4773651f 100644 --- a/tools/perf/bench/bench.h +++ b/tools/perf/bench/bench.h @@ -44,6 +44,7 @@ int bench_breakpoint_thread(int argc, const char **argv); int bench_breakpoint_enable(int argc, const char **argv); int bench_uprobe_baseline(int argc, const char **argv); int bench_uprobe_empty(int argc, const char **argv); +int bench_uprobe_trace_printk(int argc, const char **argv); int bench_pmu_scan(int argc, const char **argv); #define BENCH_FORMAT_DEFAULT_STR "default" diff --git a/tools/perf/bench/uprobe.c b/tools/perf/bench/uprobe.c index dfb90038a4f7..914c0817fe8a 100644 --- a/tools/perf/bench/uprobe.c +++ b/tools/perf/bench/uprobe.c @@ -11,6 +11,7 @@ #include #include "../builtin.h" #include "bench.h" +#include #include #include @@ -27,6 +28,7 @@ static int loops = LOOPS_DEFAULT; enum bench_uprobe { BENCH_UPROBE__BASELINE, BENCH_UPROBE__EMPTY, + BENCH_UPROBE__TRACE_PRINTK, }; static const struct option options[] = { @@ -42,9 +44,21 @@ static const char * const bench_uprobe_usage[] = { #ifdef HAVE_BPF_SKEL #include "bpf_skel/bench_uprobe.skel.h" +#define bench_uprobe__attach_uprobe(prog) \ + skel->links.prog = bpf_program__attach_uprobe_opts(/*prog=*/skel->progs.prog, \ + /*pid=*/-1, \ + /*binary_path=*/"/lib64/libc.so.6", \ + /*func_offset=*/0, \ + /*opts=*/&uprobe_opts); \ + if (!skel->links.prog) { \ + err = -errno; \ + fprintf(stderr, "Failed to attach bench uprobe \"%s\": %s\n", #prog, strerror(errno)); \ + goto cleanup; \ + } + struct bench_uprobe_bpf *skel; -static int bench_uprobe__setup_bpf_skel(void) +static int bench_uprobe__setup_bpf_skel(enum bench_uprobe bench) { DECLARE_LIBBPF_OPTS(bpf_uprobe_opts, uprobe_opts); int err; @@ -63,14 +77,12 @@ static int bench_uprobe__setup_bpf_skel(void) } uprobe_opts.func_name = "usleep"; - skel->links.empty = bpf_program__attach_uprobe_opts(/*prog=*/skel->progs.empty, - /*pid=*/-1, - /*binary_path=*/"/lib64/libc.so.6", - /*func_offset=*/0, - /*opts=*/&uprobe_opts); - if (!skel->links.empty) { - err = -errno; - fprintf(stderr, "Failed to attach bench uprobe: %s\n", strerror(errno)); + switch (bench) { + case BENCH_UPROBE__BASELINE: break; + case BENCH_UPROBE__EMPTY: bench_uprobe__attach_uprobe(empty); break; + case BENCH_UPROBE__TRACE_PRINTK: bench_uprobe__attach_uprobe(trace_printk); break; + default: + fprintf(stderr, "Invalid bench: %d\n", bench); goto cleanup; } @@ -88,7 +100,7 @@ static void bench_uprobe__teardown_bpf_skel(void) } } #else -static int bench_uprobe__setup_bpf_skel(void) { return 0; } +static int bench_uprobe__setup_bpf_skel(enum bench_uprobe bench __maybe_unused) { return 0; } static void bench_uprobe__teardown_bpf_skel(void) {}; #endif @@ -135,7 +147,7 @@ static int bench_uprobe(int argc, const char **argv, enum bench_uprobe bench) argc = parse_options(argc, argv, options, bench_uprobe_usage, 0); - if (bench != BENCH_UPROBE__BASELINE && bench_uprobe__setup_bpf_skel() < 0) + if (bench != BENCH_UPROBE__BASELINE && bench_uprobe__setup_bpf_skel(bench) < 0) return 0; clock_gettime(CLOCK_REALTIME, &start); @@ -179,3 +191,8 @@ int bench_uprobe_empty(int argc, const char **argv) { return bench_uprobe(argc, argv, BENCH_UPROBE__EMPTY); } + +int bench_uprobe_trace_printk(int argc, const char **argv) +{ + return bench_uprobe(argc, argv, BENCH_UPROBE__TRACE_PRINTK); +} diff --git a/tools/perf/builtin-bench.c b/tools/perf/builtin-bench.c index 1021680bbc6d..f60ccafccac2 100644 --- a/tools/perf/builtin-bench.c +++ b/tools/perf/builtin-bench.c @@ -107,6 +107,7 @@ static struct bench breakpoint_benchmarks[] = { static struct bench uprobe_benchmarks[] = { { "baseline", "Baseline libc usleep(1000) call", bench_uprobe_baseline, }, { "empty", "Attach empty BPF prog to uprobe on usleep, system wide", bench_uprobe_empty, }, + { "trace_printk", "Attach trace_printk BPF prog to uprobe on usleep syswide", bench_uprobe_trace_printk, }, { NULL, NULL, NULL }, }; diff --git a/tools/perf/util/bpf_skel/bench_uprobe.bpf.c b/tools/perf/util/bpf_skel/bench_uprobe.bpf.c index 1365dcc5dddf..2c55896bb33c 100644 --- a/tools/perf/util/bpf_skel/bench_uprobe.bpf.c +++ b/tools/perf/util/bpf_skel/bench_uprobe.bpf.c @@ -3,10 +3,21 @@ #include "vmlinux.h" #include +unsigned int nr_uprobes; + SEC("uprobe") int BPF_UPROBE(empty) { return 0; } +SEC("uprobe") +int BPF_UPROBE(trace_printk) +{ + char fmt[] = "perf bench uprobe %u"; + + bpf_trace_printk(fmt, sizeof(fmt), ++nr_uprobes); + return 0; +} + char LICENSE[] SEC("license") = "Dual BSD/GPL"; -- cgit v1.2.3-70-g09d2 From 681f34d52b9647db68cebc5f957ddfff01fb6ba0 Mon Sep 17 00:00:00 2001 From: Lu Hongfei Date: Thu, 6 Jul 2023 17:46:34 +0800 Subject: perf diff: Replaces some ',' as separator with the more usual ';' When wrapping code, use ';' better than using ',' which is more in line with the coding habits of most engineers. Signed-off-by: Lu Hongfei Acked-by: Ian Rogers Cc: Adrian Hunter Cc: Alexander Shishkin Cc: Ingo Molnar Cc: Jiri Olsa Cc: Mark Rutland Cc: Namhyung Kim Cc: Peter Zijlstra Cc: opensource.kernel@vivo.com Link: https://lore.kernel.org/r/20230706094635.1553-1-luhongfei@vivo.com Signed-off-by: Arnaldo Carvalho de Melo --- tools/perf/builtin-diff.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/tools/perf/builtin-diff.c b/tools/perf/builtin-diff.c index e8a1b16aa5f8..57d300d8e570 100644 --- a/tools/perf/builtin-diff.c +++ b/tools/perf/builtin-diff.c @@ -1915,8 +1915,8 @@ static int data_init(int argc, const char **argv) struct perf_data *data = &d->data; data->path = use_default ? defaults[i] : argv[i]; - data->mode = PERF_DATA_MODE_READ, - data->force = force, + data->mode = PERF_DATA_MODE_READ; + data->force = force; d->idx = i; } -- cgit v1.2.3-70-g09d2 From 91f88a0ac8bce5f385ef8c1a6766fce04f7f0043 Mon Sep 17 00:00:00 2001 From: Ian Rogers Date: Mon, 24 Jul 2023 13:12:44 -0700 Subject: perf stat: Avoid uninitialized use of perf_stat_config perf_event__read_stat_config will assign values based on number of tags and tag values. Initialize the structs to zero before they are assigned so that no uninitialized values can be seen. This potential error was reported by GCC with LTO enabled. Reviewed-by: Nick Desaulniers Signed-off-by: Ian Rogers Cc: Adrian Hunter Cc: Alexander Shishkin Cc: Carsten Haitzler Cc: Fangrui Song Cc: Ingo Molnar Cc: James Clark Cc: Jiri Olsa Cc: Kan Liang Cc: Mark Rutland Cc: Namhyung Kim Cc: Nathan Chancellor Cc: Peter Zijlstra Cc: Ravi Bangoria Cc: Tom Rix Cc: Xing Zhengjun Cc: Yang Jihong Cc: bpf@vger.kernel.org Cc: llvm@lists.linux.dev Link: https://lore.kernel.org/r/20230724201247.748146-2-irogers@google.com Signed-off-by: Arnaldo Carvalho de Melo --- tools/perf/tests/stat.c | 2 +- tools/perf/util/stat.c | 2 +- 2 files changed, 2 insertions(+), 2 deletions(-) diff --git a/tools/perf/tests/stat.c b/tools/perf/tests/stat.c index 500974040fe3..706780fb5695 100644 --- a/tools/perf/tests/stat.c +++ b/tools/perf/tests/stat.c @@ -27,7 +27,7 @@ static int process_stat_config_event(struct perf_tool *tool __maybe_unused, struct machine *machine __maybe_unused) { struct perf_record_stat_config *config = &event->stat_config; - struct perf_stat_config stat_config; + struct perf_stat_config stat_config = {}; #define HAS(term, val) \ has_term(config, PERF_STAT_CONFIG_TERM__##term, val) diff --git a/tools/perf/util/stat.c b/tools/perf/util/stat.c index 967e583392c7..ec3506042217 100644 --- a/tools/perf/util/stat.c +++ b/tools/perf/util/stat.c @@ -729,7 +729,7 @@ size_t perf_event__fprintf_stat_round(union perf_event *event, FILE *fp) size_t perf_event__fprintf_stat_config(union perf_event *event, FILE *fp) { - struct perf_stat_config sc; + struct perf_stat_config sc = {}; size_t ret; perf_event__read_stat_config(&sc, &event->stat_config); -- cgit v1.2.3-70-g09d2 From 0f97a3a0deccece93797cd35ba1c18704e94b7e7 Mon Sep 17 00:00:00 2001 From: Ian Rogers Date: Mon, 24 Jul 2023 13:12:45 -0700 Subject: perf parse-events: Avoid use uninitialized warning MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit With GCC LTO a potential use uninitialized is spotted: ``` In function ‘parse_events_config_bpf’, inlined from ‘parse_events_load_bpf’ at util/parse-events.c:874:8: util/parse-events.c:792:37: error: ‘error_pos’ may be used uninitialized [-Werror=maybe-uninitialized] 792 | idx = term->err_term + error_pos; | ^ util/parse-events.c: In function ‘parse_events_load_bpf’: util/parse-events.c:765:13: note: ‘error_pos’ was declared here 765 | int error_pos; | ^ ``` So initialize at declaration. Reviewed-by: Nick Desaulniers Signed-off-by: Ian Rogers Cc: Adrian Hunter Cc: Alexander Shishkin Cc: Carsten Haitzler Cc: Fangrui Song Cc: Ingo Molnar Cc: James Clark Cc: Jiri Olsa Cc: Kan Liang Cc: Mark Rutland Cc: Namhyung Kim Cc: Nathan Chancellor Cc: Peter Zijlstra Cc: Ravi Bangoria Cc: Tom Rix Cc: Xing Zhengjun Cc: Yang Jihong Cc: bpf@vger.kernel.org Cc: llvm@lists.linux.dev Link: https://lore.kernel.org/r/20230724201247.748146-3-irogers@google.com Signed-off-by: Arnaldo Carvalho de Melo --- tools/perf/util/parse-events.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/tools/perf/util/parse-events.c b/tools/perf/util/parse-events.c index acde097e327c..da29061ecf49 100644 --- a/tools/perf/util/parse-events.c +++ b/tools/perf/util/parse-events.c @@ -762,7 +762,7 @@ parse_events_config_bpf(struct parse_events_state *parse_state, struct list_head *head_config) { struct parse_events_term *term; - int error_pos; + int error_pos = 0; if (!head_config || list_empty(head_config)) return 0; -- cgit v1.2.3-70-g09d2 From 5cfb0cc0d95af8bf33a8fb1cedc3e76ca3b6fb81 Mon Sep 17 00:00:00 2001 From: Ian Rogers Date: Mon, 24 Jul 2023 13:12:46 -0700 Subject: perf test: Avoid weak symbol for arch_tests GCC LTO will complain that the array length varies for the arch_tests weak symbol. Use extern/static and architecture determining #if to workaround this problem. Signed-off-by: Ian Rogers Cc: Adrian Hunter Cc: Alexander Shishkin Cc: Carsten Haitzler Cc: Fangrui Song Cc: Ingo Molnar Cc: James Clark Cc: Jiri Olsa Cc: Kan Liang Cc: Mark Rutland Cc: Namhyung Kim Cc: Nathan Chancellor Cc: Nick Desaulniers Cc: Peter Zijlstra Cc: Ravi Bangoria Cc: Tom Rix Cc: Xing Zhengjun Cc: Yang Jihong Cc: bpf@vger.kernel.org Cc: llvm@lists.linux.dev Link: https://lore.kernel.org/r/20230724201247.748146-4-irogers@google.com Signed-off-by: Arnaldo Carvalho de Melo --- tools/perf/tests/builtin-test.c | 11 ++++++++++- 1 file changed, 10 insertions(+), 1 deletion(-) diff --git a/tools/perf/tests/builtin-test.c b/tools/perf/tests/builtin-test.c index 1f6557ce3b0a..6accb5442a73 100644 --- a/tools/perf/tests/builtin-test.c +++ b/tools/perf/tests/builtin-test.c @@ -33,9 +33,18 @@ static bool dont_fork; const char *dso_to_test; -struct test_suite *__weak arch_tests[] = { +/* + * List of architecture specific tests. Not a weak symbol as the array length is + * dependent on the initialization, as such GCC with LTO complains of + * conflicting definitions with a weak symbol. + */ +#if defined(__i386__) || defined(__x86_64__) || defined(__aarch64__) || defined(__powerpc64__) +extern struct test_suite *arch_tests[]; +#else +static struct test_suite *arch_tests[] = { NULL, }; +#endif static struct test_suite *generic_tests[] = { &suite__vmlinux_matches_kallsyms, -- cgit v1.2.3-70-g09d2 From c126ac4a2003fff398311739514f173944a5ceab Mon Sep 17 00:00:00 2001 From: Ian Rogers Date: Mon, 24 Jul 2023 13:12:47 -0700 Subject: perf build: Add LTO build option Add an LTO build option, that sets the appropriate CFLAGS and CXXFLAGS values. Signed-off-by: Ian Rogers Cc: Adrian Hunter Cc: Alexander Shishkin Cc: Carsten Haitzler Cc: Fangrui Song Cc: Ingo Molnar Cc: James Clark Cc: Jiri Olsa Cc: Kan Liang Cc: Mark Rutland Cc: Namhyung Kim Cc: Nathan Chancellor Cc: Nick Desaulniers Cc: Peter Zijlstra Cc: Ravi Bangoria Cc: Tom Rix Cc: Xing Zhengjun Cc: Yang Jihong Cc: bpf@vger.kernel.org Cc: llvm@lists.linux.dev Link: https://lore.kernel.org/r/20230724201247.748146-5-irogers@google.com Signed-off-by: Arnaldo Carvalho de Melo --- tools/perf/Makefile.config | 5 +++++ 1 file changed, 5 insertions(+) diff --git a/tools/perf/Makefile.config b/tools/perf/Makefile.config index c5db0de49868..a9cfe83638a9 100644 --- a/tools/perf/Makefile.config +++ b/tools/perf/Makefile.config @@ -256,6 +256,11 @@ ifdef PARSER_DEBUG $(call detected_var,PARSER_DEBUG_FLEX) endif +ifdef LTO + CORE_CFLAGS += -flto + CXXFLAGS += -flto +endif + # Try different combinations to accommodate systems that only have # python[2][3]-config in weird combinations in the following order of # priority from lowest to highest: -- cgit v1.2.3-70-g09d2 From 84efbdb7fb8e0844a3f9c67a6bdcc89db1012e1c Mon Sep 17 00:00:00 2001 From: Ian Rogers Date: Tue, 27 Jun 2023 11:10:18 -0700 Subject: perf parse-events: Remove unused PE_PMU_EVENT_FAKE token Removed by commit 70c90e4a6b2f ("perf parse-events: Avoid scanning PMUs before parsing"). Signed-off-by: Ian Rogers Cc: Adrian Hunter Cc: Alexander Shishkin Cc: Athira Rajeev Cc: Ingo Molnar Cc: Jiri Olsa Cc: Kan Liang Cc: Mark Rutland Cc: Namhyung Kim Cc: Peter Zijlstra Cc: bpf@vger.kernel.org Link: https://lore.kernel.org/r/20230627181030.95608-2-irogers@google.com Signed-off-by: Arnaldo Carvalho de Melo --- tools/perf/util/parse-events.y | 42 ++---------------------------------------- 1 file changed, 2 insertions(+), 40 deletions(-) diff --git a/tools/perf/util/parse-events.y b/tools/perf/util/parse-events.y index 9f28d4b5502f..64755f9cd600 100644 --- a/tools/perf/util/parse-events.y +++ b/tools/perf/util/parse-events.y @@ -63,7 +63,7 @@ static void free_list_evsel(struct list_head* list_evsel) %token PE_LEGACY_CACHE %token PE_PREFIX_MEM PE_PREFIX_RAW PE_PREFIX_GROUP %token PE_ERROR -%token PE_KERNEL_PMU_EVENT PE_PMU_EVENT_FAKE +%token PE_KERNEL_PMU_EVENT %token PE_ARRAY_ALL PE_ARRAY_RANGE %token PE_DRV_CFG_TERM %token PE_TERM_HW @@ -81,7 +81,7 @@ static void free_list_evsel(struct list_head* list_evsel) %type PE_MODIFIER_EVENT %type PE_MODIFIER_BP %type PE_EVENT_NAME -%type PE_KERNEL_PMU_EVENT PE_PMU_EVENT_FAKE +%type PE_KERNEL_PMU_EVENT %type PE_DRV_CFG_TERM %type name_or_raw name_or_legacy %destructor { free ($$); } @@ -394,44 +394,6 @@ PE_KERNEL_PMU_EVENT opt_pmu_config YYABORT; $$ = list; } -| -PE_PMU_EVENT_FAKE sep_dc -{ - struct list_head *list; - int err; - - list = alloc_list(); - if (!list) - YYABORT; - - err = parse_events_add_pmu(_parse_state, list, $1, /*head_config=*/NULL, - /*auto_merge_stats=*/false); - free($1); - if (err < 0) { - free(list); - YYABORT; - } - $$ = list; -} -| -PE_PMU_EVENT_FAKE opt_pmu_config -{ - struct list_head *list; - int err; - - list = alloc_list(); - if (!list) - YYABORT; - - err = parse_events_add_pmu(_parse_state, list, $1, $2, /*auto_merge_stats=*/false); - free($1); - parse_events_terms__delete($2); - if (err < 0) { - free(list); - YYABORT; - } - $$ = list; -} value_sym: PE_VALUE_SYM_HW -- cgit v1.2.3-70-g09d2 From bf7d46b3a088ccb8f8045c5902d5848bc23286f9 Mon Sep 17 00:00:00 2001 From: Ian Rogers Date: Tue, 27 Jun 2023 11:10:19 -0700 Subject: perf parse-events: Remove unused PE_KERNEL_PMU_EVENT token Removed by commit 70c90e4a6b2f ("perf parse-events: Avoid scanning PMUs before parsing"). Signed-off-by: Ian Rogers Cc: Adrian Hunter Cc: Alexander Shishkin Cc: Athira Rajeev Cc: Ingo Molnar Cc: Jiri Olsa Cc: Kan Liang Cc: Mark Rutland Cc: Namhyung Kim Cc: Peter Zijlstra Cc: bpf@vger.kernel.org Link: https://lore.kernel.org/r/20230627181030.95608-3-irogers@google.com Signed-off-by: Arnaldo Carvalho de Melo --- tools/perf/util/parse-events.y | 27 --------------------------- 1 file changed, 27 deletions(-) diff --git a/tools/perf/util/parse-events.y b/tools/perf/util/parse-events.y index 64755f9cd600..4ee6c6865655 100644 --- a/tools/perf/util/parse-events.y +++ b/tools/perf/util/parse-events.y @@ -63,7 +63,6 @@ static void free_list_evsel(struct list_head* list_evsel) %token PE_LEGACY_CACHE %token PE_PREFIX_MEM PE_PREFIX_RAW PE_PREFIX_GROUP %token PE_ERROR -%token PE_KERNEL_PMU_EVENT %token PE_ARRAY_ALL PE_ARRAY_RANGE %token PE_DRV_CFG_TERM %token PE_TERM_HW @@ -81,7 +80,6 @@ static void free_list_evsel(struct list_head* list_evsel) %type PE_MODIFIER_EVENT %type PE_MODIFIER_BP %type PE_EVENT_NAME -%type PE_KERNEL_PMU_EVENT %type PE_DRV_CFG_TERM %type name_or_raw name_or_legacy %destructor { free ($$); } @@ -358,18 +356,6 @@ PE_NAME opt_pmu_config #undef CLEANUP_YYABORT } | -PE_KERNEL_PMU_EVENT sep_dc -{ - struct list_head *list; - int err; - - err = parse_events_multi_pmu_add(_parse_state, $1, NULL, &list); - free($1); - if (err < 0) - YYABORT; - $$ = list; -} -| PE_NAME sep_dc { struct list_head *list; @@ -381,19 +367,6 @@ PE_NAME sep_dc YYABORT; $$ = list; } -| -PE_KERNEL_PMU_EVENT opt_pmu_config -{ - struct list_head *list; - int err; - - /* frees $2 */ - err = parse_events_multi_pmu_add(_parse_state, $1, $2, &list); - free($1); - if (err < 0) - YYABORT; - $$ = list; -} value_sym: PE_VALUE_SYM_HW -- cgit v1.2.3-70-g09d2 From 7e34daa55051f537036a70bd29b43a4cd4c55564 Mon Sep 17 00:00:00 2001 From: Ian Rogers Date: Tue, 27 Jun 2023 11:10:20 -0700 Subject: perf parse-events: Remove two unused tokens The tokens PE_PREFIX_RAW and PE_PREFIX_GROUP are unused so remove them. Signed-off-by: Ian Rogers Cc: Adrian Hunter Cc: Alexander Shishkin Cc: Athira Rajeev Cc: Ingo Molnar Cc: Jiri Olsa Cc: Kan Liang Cc: Mark Rutland Cc: Namhyung Kim Cc: Peter Zijlstra Cc: bpf@vger.kernel.org Link: https://lore.kernel.org/r/20230627181030.95608-4-irogers@google.com Signed-off-by: Arnaldo Carvalho de Melo --- tools/perf/util/parse-events.y | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/tools/perf/util/parse-events.y b/tools/perf/util/parse-events.y index 4ee6c6865655..b09a5fa92144 100644 --- a/tools/perf/util/parse-events.y +++ b/tools/perf/util/parse-events.y @@ -61,7 +61,7 @@ static void free_list_evsel(struct list_head* list_evsel) %token PE_BPF_OBJECT PE_BPF_SOURCE %token PE_MODIFIER_EVENT PE_MODIFIER_BP PE_BP_COLON PE_BP_SLASH %token PE_LEGACY_CACHE -%token PE_PREFIX_MEM PE_PREFIX_RAW PE_PREFIX_GROUP +%token PE_PREFIX_MEM %token PE_ERROR %token PE_ARRAY_ALL PE_ARRAY_RANGE %token PE_DRV_CFG_TERM -- cgit v1.2.3-70-g09d2 From 22881e2b458dc75e809fc7798f91f4814f3be65f Mon Sep 17 00:00:00 2001 From: Ian Rogers Date: Tue, 27 Jun 2023 11:10:21 -0700 Subject: perf parse-events: Add more comments to 'struct parse_events_state' Improve documentation of 'struct parse_events_state'. Signed-off-by: Ian Rogers Cc: Adrian Hunter Cc: Alexander Shishkin Cc: Athira Rajeev Cc: Ingo Molnar Cc: Jiri Olsa Cc: Kan Liang Cc: Mark Rutland Cc: Namhyung Kim Cc: Peter Zijlstra Cc: bpf@vger.kernel.org Link: https://lore.kernel.org/r/20230627181030.95608-5-irogers@google.com Signed-off-by: Arnaldo Carvalho de Melo --- tools/perf/util/parse-events.h | 8 ++++++++ 1 file changed, 8 insertions(+) diff --git a/tools/perf/util/parse-events.h b/tools/perf/util/parse-events.h index b0eb95f93e9c..b37e5ee193a8 100644 --- a/tools/perf/util/parse-events.h +++ b/tools/perf/util/parse-events.h @@ -121,17 +121,25 @@ struct parse_events_error { }; struct parse_events_state { + /* The list parsed events are placed on. */ struct list_head list; + /* The updated index used by entries as they are added. */ int idx; + /* Error information. */ struct parse_events_error *error; + /* Used by BPF event creation. */ struct evlist *evlist; + /* Holds returned terms for term parsing. */ struct list_head *terms; + /* Start token. */ int stoken; + /* Special fake PMU marker for testing. */ struct perf_pmu *fake_pmu; /* If non-null, when wildcard matching only match the given PMU. */ const char *pmu_filter; /* Should PE_LEGACY_NAME tokens be generated for config terms? */ bool match_legacy_cache_terms; + /* Were multiple PMUs scanned to find events? */ bool wild_card_pmus; }; -- cgit v1.2.3-70-g09d2 From 93d7e9c8fbb4624b7dfe8b3605b9f10f192ede98 Mon Sep 17 00:00:00 2001 From: Ian Rogers Date: Tue, 27 Jun 2023 11:10:22 -0700 Subject: perf parse-events: Avoid regrouped warning for wild card events There is logic to avoid printing the regrouping warning for wild card PMUs, this logic also needs to apply for wild card events. Before: ``` $ perf stat -e '{data_read,data_write}' -a sleep 1 WARNING: events were regrouped to match PMUs Performance counter stats for 'system wide': 2,979.16 MiB data_read 410.26 MiB data_write 1.001541923 seconds time elapsed ``` After: ``` $ perf stat -e '{data_read,data_write}' -a sleep 1 Performance counter stats for 'system wide': 2,975.94 MiB data_read 432.05 MiB data_write 1.001119499 seconds time elapsed ``` Signed-off-by: Ian Rogers Cc: Adrian Hunter Cc: Alexander Shishkin Cc: Athira Rajeev Cc: Ingo Molnar Cc: Jiri Olsa Cc: Kan Liang Cc: Mark Rutland Cc: Namhyung Kim Cc: Peter Zijlstra Cc: bpf@vger.kernel.org Link: https://lore.kernel.org/r/20230627181030.95608-6-irogers@google.com Signed-off-by: Arnaldo Carvalho de Melo --- tools/perf/util/parse-events.c | 1 + 1 file changed, 1 insertion(+) diff --git a/tools/perf/util/parse-events.c b/tools/perf/util/parse-events.c index da29061ecf49..75778d5be5b6 100644 --- a/tools/perf/util/parse-events.c +++ b/tools/perf/util/parse-events.c @@ -1730,6 +1730,7 @@ int parse_events_multi_pmu_add(struct parse_events_state *parse_state, auto_merge_stats)) { pr_debug("%s -> %s/%s/\n", str, pmu->name, alias->str); + parse_state->wild_card_pmus = true; ok++; } parse_events_terms__delete(orig_head); -- cgit v1.2.3-70-g09d2 From 88cc47e24597971b05b6e94c28a2fc81d2a8d61a Mon Sep 17 00:00:00 2001 From: Arnaldo Carvalho de Melo Date: Fri, 28 Jul 2023 17:26:54 -0300 Subject: perf build: Define YYNOMEM as YYNOABORT for bison < 3.81 YYNOMEM was introduced in bison 3.81, so define it as YYABORT for older versions, which should provide the previous perf behaviour. Cc: Adrian Hunter Cc: Ian Rogers Cc: Jiri Olsa Cc: Namhyung Kim Signed-off-by: Arnaldo Carvalho de Melo --- tools/perf/util/Build | 6 ++++++ 1 file changed, 6 insertions(+) diff --git a/tools/perf/util/Build b/tools/perf/util/Build index 96f4ea1d45c5..9c6c4475524b 100644 --- a/tools/perf/util/Build +++ b/tools/perf/util/Build @@ -301,6 +301,12 @@ ifeq ($(BISON_GE_35),1) else bison_flags += -w endif + +BISON_LT_381 := $(shell expr $(shell $(BISON) --version | grep bison | sed -e 's/.\+ \([0-9]\+\).\([0-9]\+\).\([0-9]\+\)/\1\2\3/g') \< 381) +ifeq ($(BISON_LT_381),1) + bison_flags += -DYYNOMEM=YYABORT +endif + CFLAGS_parse-events-bison.o += $(bison_flags) CFLAGS_pmu-bison.o += -DYYLTYPE_IS_TRIVIAL=0 $(bison_flags) CFLAGS_expr-bison.o += -DYYLTYPE_IS_TRIVIAL=0 $(bison_flags) -- cgit v1.2.3-70-g09d2 From 9462e4de62755c85867991a4beccff15377d0e95 Mon Sep 17 00:00:00 2001 From: Ian Rogers Date: Tue, 27 Jun 2023 11:10:23 -0700 Subject: perf parse-event: Add memory allocation test for name terms If the name memory allocation fails then propagate to the parser. Committer notes: Use $(BISON_FALLBACK_FLAGS) on the bison call so that we continue building with older bison versions, before 3.81, where YYNOMEM isn't present. Signed-off-by: Ian Rogers Cc: Adrian Hunter Cc: Alexander Shishkin Cc: Athira Rajeev Cc: Ingo Molnar Cc: Jiri Olsa Cc: Kan Liang Cc: Mark Rutland Cc: Namhyung Kim Cc: Peter Zijlstra Cc: bpf@vger.kernel.org Link: https://lore.kernel.org/r/20230627181030.95608-7-irogers@google.com Signed-off-by: Arnaldo Carvalho de Melo --- tools/perf/util/Build | 2 +- tools/perf/util/parse-events.c | 5 ++++- tools/perf/util/parse-events.y | 2 +- 3 files changed, 6 insertions(+), 3 deletions(-) diff --git a/tools/perf/util/Build b/tools/perf/util/Build index 9c6c4475524b..bb08149179e4 100644 --- a/tools/perf/util/Build +++ b/tools/perf/util/Build @@ -246,7 +246,7 @@ $(OUTPUT)util/parse-events-flex.c $(OUTPUT)util/parse-events-flex.h: util/parse- $(OUTPUT)util/parse-events-bison.c $(OUTPUT)util/parse-events-bison.h: util/parse-events.y $(call rule_mkdir) - $(Q)$(call echo-cmd,bison)$(BISON) -v $< -d $(PARSER_DEBUG_BISON) $(BISON_FILE_PREFIX_MAP) \ + $(Q)$(call echo-cmd,bison)$(BISON) -v $< -d $(PARSER_DEBUG_BISON) $(BISON_FILE_PREFIX_MAP) $(BISON_FALLBACK_FLAGS) \ -o $(OUTPUT)util/parse-events-bison.c -p parse_events_ $(OUTPUT)util/expr-flex.c $(OUTPUT)util/expr-flex.h: util/expr.l $(OUTPUT)util/expr-bison.c diff --git a/tools/perf/util/parse-events.c b/tools/perf/util/parse-events.c index 75778d5be5b6..83adb0c2a6bc 100644 --- a/tools/perf/util/parse-events.c +++ b/tools/perf/util/parse-events.c @@ -1973,8 +1973,11 @@ int parse_events_name(struct list_head *list, const char *name) struct evsel *evsel; __evlist__for_each_entry(list, evsel) { - if (!evsel->name) + if (!evsel->name) { evsel->name = strdup(name); + if (!evsel->name) + return -ENOMEM; + } } return 0; diff --git a/tools/perf/util/parse-events.y b/tools/perf/util/parse-events.y index b09a5fa92144..3ee351768433 100644 --- a/tools/perf/util/parse-events.y +++ b/tools/perf/util/parse-events.y @@ -263,7 +263,7 @@ PE_EVENT_NAME event_def free($1); if (err) { free_list_evsel($2); - YYABORT; + YYNOMEM; } $$ = $2; } -- cgit v1.2.3-70-g09d2 From a7a3252dad354a9e5c173156dab959e4019b9467 Mon Sep 17 00:00:00 2001 From: Ian Rogers Date: Tue, 27 Jun 2023 11:10:24 -0700 Subject: perf parse-events: Separate YYABORT and YYNOMEM cases Split cases in event_pmu for greater accuracy. Signed-off-by: Ian Rogers Cc: Adrian Hunter Cc: Alexander Shishkin Cc: Athira Rajeev Cc: Ingo Molnar Cc: Jiri Olsa Cc: Kan Liang Cc: Mark Rutland Cc: Namhyung Kim Cc: Peter Zijlstra Cc: bpf@vger.kernel.org Link: https://lore.kernel.org/r/20230627181030.95608-8-irogers@google.com Signed-off-by: Arnaldo Carvalho de Melo --- tools/perf/util/parse-events.y | 45 ++++++++++++++++++++++++------------------ 1 file changed, 26 insertions(+), 19 deletions(-) diff --git a/tools/perf/util/parse-events.y b/tools/perf/util/parse-events.y index 3ee351768433..d22866b97b76 100644 --- a/tools/perf/util/parse-events.y +++ b/tools/perf/util/parse-events.y @@ -283,37 +283,42 @@ event_pmu: PE_NAME opt_pmu_config { struct parse_events_state *parse_state = _parse_state; - struct parse_events_error *error = parse_state->error; struct list_head *list = NULL, *orig_terms = NULL, *terms= NULL; + struct parse_events_error *error = parse_state->error; char *pattern = NULL; -#define CLEANUP_YYABORT \ +#define CLEANUP \ do { \ parse_events_terms__delete($2); \ parse_events_terms__delete(orig_terms); \ free(list); \ free($1); \ free(pattern); \ - YYABORT; \ } while(0) - if (parse_events_copy_term_list($2, &orig_terms)) - CLEANUP_YYABORT; - if (error) error->idx = @1.first_column; + if (parse_events_copy_term_list($2, &orig_terms)) { + CLEANUP; + YYNOMEM; + } + list = alloc_list(); - if (!list) - CLEANUP_YYABORT; + if (!list) { + CLEANUP; + YYNOMEM; + } /* Attempt to add to list assuming $1 is a PMU name. */ if (parse_events_add_pmu(parse_state, list, $1, $2, /*auto_merge_stats=*/false)) { struct perf_pmu *pmu = NULL; int ok = 0; /* Failure to add, try wildcard expansion of $1 as a PMU name. */ - if (asprintf(&pattern, "%s*", $1) < 0) - CLEANUP_YYABORT; + if (asprintf(&pattern, "%s*", $1) < 0) { + CLEANUP; + YYNOMEM; + } while ((pmu = perf_pmus__scan(pmu)) != NULL) { char *name = pmu->name; @@ -328,8 +333,10 @@ PE_NAME opt_pmu_config !perf_pmu__match(pattern, pmu->alias_name, $1)) { bool auto_merge_stats = perf_pmu__auto_merge_stats(pmu); - if (parse_events_copy_term_list(orig_terms, &terms)) - CLEANUP_YYABORT; + if (parse_events_copy_term_list(orig_terms, &terms)) { + CLEANUP; + YYNOMEM; + } if (!parse_events_add_pmu(parse_state, list, pmu->name, terms, auto_merge_stats)) { ok++; @@ -345,15 +352,15 @@ PE_NAME opt_pmu_config ok = !parse_events_multi_pmu_add(parse_state, $1, $2, &list); $2 = NULL; } - if (!ok) - CLEANUP_YYABORT; + if (!ok) { + CLEANUP; + YYABORT; + } } - parse_events_terms__delete($2); - parse_events_terms__delete(orig_terms); - free(pattern); - free($1); $$ = list; -#undef CLEANUP_YYABORT + list = NULL; + CLEANUP; +#undef CLEANUP } | PE_NAME sep_dc -- cgit v1.2.3-70-g09d2 From 77cdd787fc45e3426b8e0b5038b85c276540dfb4 Mon Sep 17 00:00:00 2001 From: Ian Rogers Date: Tue, 27 Jun 2023 11:10:25 -0700 Subject: perf parse-events: Move instances of YYABORT to YYNOMEM Migration to improve error reporting as YYABORT cases should carry event parsing errors. Signed-off-by: Ian Rogers Cc: Adrian Hunter Cc: Alexander Shishkin Cc: Athira Rajeev Cc: Ingo Molnar Cc: Jiri Olsa Cc: Kan Liang Cc: Mark Rutland Cc: Namhyung Kim Cc: Peter Zijlstra Cc: bpf@vger.kernel.org Link: https://lore.kernel.org/r/20230627181030.95608-9-irogers@google.com Signed-off-by: Arnaldo Carvalho de Melo --- tools/perf/util/parse-events.y | 58 +++++++++++++++++++++++++++++------------- 1 file changed, 40 insertions(+), 18 deletions(-) diff --git a/tools/perf/util/parse-events.y b/tools/perf/util/parse-events.y index d22866b97b76..eaf43bd8fe3f 100644 --- a/tools/perf/util/parse-events.y +++ b/tools/perf/util/parse-events.y @@ -390,7 +390,8 @@ value_sym '/' event_config '/' bool wildcard = (type == PERF_TYPE_HARDWARE || type == PERF_TYPE_HW_CACHE); list = alloc_list(); - ABORT_ON(!list); + if (!list) + YYNOMEM; err = parse_events_add_numeric(_parse_state, list, type, config, $3, wildcard); parse_events_terms__delete($3); if (err) { @@ -408,7 +409,8 @@ value_sym sep_slash_slash_dc bool wildcard = (type == PERF_TYPE_HARDWARE || type == PERF_TYPE_HW_CACHE); list = alloc_list(); - ABORT_ON(!list); + if (!list) + YYNOMEM; ABORT_ON(parse_events_add_numeric(_parse_state, list, type, config, /*head_config=*/NULL, wildcard)); $$ = list; @@ -419,7 +421,8 @@ PE_VALUE_SYM_TOOL sep_slash_slash_dc struct list_head *list; list = alloc_list(); - ABORT_ON(!list); + if (!list) + YYNOMEM; ABORT_ON(parse_events_add_tool(_parse_state, list, $1)); $$ = list; } @@ -432,7 +435,9 @@ PE_LEGACY_CACHE opt_event_config int err; list = alloc_list(); - ABORT_ON(!list); + if (!list) + YYNOMEM; + err = parse_events_add_cache(list, &parse_state->idx, $1, parse_state, $2); parse_events_terms__delete($2); @@ -451,7 +456,9 @@ PE_PREFIX_MEM PE_VALUE PE_BP_SLASH PE_VALUE PE_BP_COLON PE_MODIFIER_BP opt_event int err; list = alloc_list(); - ABORT_ON(!list); + if (!list) + YYNOMEM; + err = parse_events_add_breakpoint(_parse_state, list, $2, $6, $4, $7); parse_events_terms__delete($7); @@ -469,7 +476,9 @@ PE_PREFIX_MEM PE_VALUE PE_BP_SLASH PE_VALUE opt_event_config int err; list = alloc_list(); - ABORT_ON(!list); + if (!list) + YYNOMEM; + err = parse_events_add_breakpoint(_parse_state, list, $2, NULL, $4, $5); parse_events_terms__delete($5); @@ -486,7 +495,9 @@ PE_PREFIX_MEM PE_VALUE PE_BP_COLON PE_MODIFIER_BP opt_event_config int err; list = alloc_list(); - ABORT_ON(!list); + if (!list) + YYNOMEM; + err = parse_events_add_breakpoint(_parse_state, list, $2, $4, 0, $5); parse_events_terms__delete($5); @@ -504,7 +515,8 @@ PE_PREFIX_MEM PE_VALUE opt_event_config int err; list = alloc_list(); - ABORT_ON(!list); + if (!list) + YYNOMEM; err = parse_events_add_breakpoint(_parse_state, list, $2, NULL, 0, $3); parse_events_terms__delete($3); @@ -524,7 +536,8 @@ tracepoint_name opt_event_config int err; list = alloc_list(); - ABORT_ON(!list); + if (!list) + YYNOMEM; if (error) error->idx = @1.first_column; @@ -556,7 +569,8 @@ PE_VALUE ':' PE_VALUE opt_event_config int err; list = alloc_list(); - ABORT_ON(!list); + if (!list) + YYNOMEM; err = parse_events_add_numeric(_parse_state, list, (u32)$1, $3, $4, /*wildcard=*/false); parse_events_terms__delete($4); @@ -575,7 +589,8 @@ PE_RAW opt_event_config u64 num; list = alloc_list(); - ABORT_ON(!list); + if (!list) + YYNOMEM; errno = 0; num = strtoull($1 + 1, NULL, 16); ABORT_ON(errno); @@ -598,7 +613,8 @@ PE_BPF_OBJECT opt_event_config int err; list = alloc_list(); - ABORT_ON(!list); + if (!list) + YYNOMEM; err = parse_events_load_bpf(parse_state, list, $1, false, $2); parse_events_terms__delete($2); free($1); @@ -615,7 +631,8 @@ PE_BPF_SOURCE opt_event_config int err; list = alloc_list(); - ABORT_ON(!list); + if (!list) + YYNOMEM; err = parse_events_load_bpf(_parse_state, list, $1, true, $2); parse_events_terms__delete($2); if (err) { @@ -680,7 +697,8 @@ event_term struct list_head *head = malloc(sizeof(*head)); struct parse_events_term *term = $1; - ABORT_ON(!head); + if (!head) + YYNOMEM; INIT_LIST_HEAD(head); list_add_tail(&term->list, head); $$ = head; @@ -857,7 +875,8 @@ PE_DRV_CFG_TERM struct parse_events_term *term; char *config = strdup($1); - ABORT_ON(!config); + if (!config) + YYNOMEM; if (parse_events_term__str(&term, PARSE_EVENTS__TERM_TYPE_DRV_CFG, config, $1, &@1, NULL)) { free($1); @@ -888,7 +907,8 @@ array_terms ',' array_term new_array.ranges = realloc($1.ranges, sizeof(new_array.ranges[0]) * new_array.nr_ranges); - ABORT_ON(!new_array.ranges); + if (!new_array.ranges) + YYNOMEM; memcpy(&new_array.ranges[$1.nr_ranges], $3.ranges, $3.nr_ranges * sizeof(new_array.ranges[0])); free($3.ranges); @@ -904,7 +924,8 @@ PE_VALUE array.nr_ranges = 1; array.ranges = malloc(sizeof(array.ranges[0])); - ABORT_ON(!array.ranges); + if (!array.ranges) + YYNOMEM; array.ranges[0].start = $1; array.ranges[0].length = 1; $$ = array; @@ -917,7 +938,8 @@ PE_VALUE PE_ARRAY_RANGE PE_VALUE ABORT_ON($3 < $1); array.nr_ranges = 1; array.ranges = malloc(sizeof(array.ranges[0])); - ABORT_ON(!array.ranges); + if (!array.ranges) + YYNOMEM; array.ranges[0].start = $1; array.ranges[0].length = $3 - $1 + 1; $$ = array; -- cgit v1.2.3-70-g09d2 From b52cb995f1a559bc6e1a7cdc0ed0375503528541 Mon Sep 17 00:00:00 2001 From: Ian Rogers Date: Tue, 27 Jun 2023 11:10:26 -0700 Subject: perf parse-events: Separate ENOMEM memory handling Add PE_ABORT that will YYNOMEM or YYABORT accordingly. Signed-off-by: Ian Rogers Cc: Adrian Hunter Cc: Alexander Shishkin Cc: Athira Rajeev Cc: Ingo Molnar Cc: Jiri Olsa Cc: Kan Liang Cc: Mark Rutland Cc: Namhyung Kim Cc: Peter Zijlstra Cc: bpf@vger.kernel.org Link: https://lore.kernel.org/r/20230627181030.95608-10-irogers@google.com Signed-off-by: Arnaldo Carvalho de Melo --- tools/perf/util/parse-events.y | 134 +++++++++++++++++++++++++---------------- 1 file changed, 82 insertions(+), 52 deletions(-) diff --git a/tools/perf/util/parse-events.y b/tools/perf/util/parse-events.y index eaf43bd8fe3f..f090a85c4518 100644 --- a/tools/perf/util/parse-events.y +++ b/tools/perf/util/parse-events.y @@ -28,6 +28,13 @@ do { \ YYABORT; \ } while (0) +#define PE_ABORT(val) \ +do { \ + if (val == -ENOMEM) \ + YYNOMEM; \ + YYABORT; \ +} while (0) + static struct list_head* alloc_list(void) { struct list_head *list; @@ -371,7 +378,7 @@ PE_NAME sep_dc err = parse_events_multi_pmu_add(_parse_state, $1, NULL, &list); free($1); if (err < 0) - YYABORT; + PE_ABORT(err); $$ = list; } @@ -396,7 +403,7 @@ value_sym '/' event_config '/' parse_events_terms__delete($3); if (err) { free_list_evsel(list); - YYABORT; + PE_ABORT(err); } $$ = list; } @@ -407,23 +414,28 @@ value_sym sep_slash_slash_dc int type = $1 >> 16; int config = $1 & 255; bool wildcard = (type == PERF_TYPE_HARDWARE || type == PERF_TYPE_HW_CACHE); + int err; list = alloc_list(); if (!list) YYNOMEM; - ABORT_ON(parse_events_add_numeric(_parse_state, list, type, config, - /*head_config=*/NULL, wildcard)); + err = parse_events_add_numeric(_parse_state, list, type, config, /*head_config=*/NULL, wildcard); + if (err) + PE_ABORT(err); $$ = list; } | PE_VALUE_SYM_TOOL sep_slash_slash_dc { struct list_head *list; + int err; list = alloc_list(); if (!list) YYNOMEM; - ABORT_ON(parse_events_add_tool(_parse_state, list, $1)); + err = parse_events_add_tool(_parse_state, list, $1); + if (err) + YYNOMEM; $$ = list; } @@ -444,7 +456,7 @@ PE_LEGACY_CACHE opt_event_config free($1); if (err) { free_list_evsel(list); - YYABORT; + PE_ABORT(err); } $$ = list; } @@ -465,7 +477,7 @@ PE_PREFIX_MEM PE_VALUE PE_BP_SLASH PE_VALUE PE_BP_COLON PE_MODIFIER_BP opt_event free($6); if (err) { free(list); - YYABORT; + PE_ABORT(err); } $$ = list; } @@ -484,7 +496,7 @@ PE_PREFIX_MEM PE_VALUE PE_BP_SLASH PE_VALUE opt_event_config parse_events_terms__delete($5); if (err) { free(list); - YYABORT; + PE_ABORT(err); } $$ = list; } @@ -504,7 +516,7 @@ PE_PREFIX_MEM PE_VALUE PE_BP_COLON PE_MODIFIER_BP opt_event_config free($4); if (err) { free(list); - YYABORT; + PE_ABORT(err); } $$ = list; } @@ -522,7 +534,7 @@ PE_PREFIX_MEM PE_VALUE opt_event_config parse_events_terms__delete($3); if (err) { free(list); - YYABORT; + PE_ABORT(err); } $$ = list; } @@ -549,7 +561,7 @@ tracepoint_name opt_event_config free($1.event); if (err) { free(list); - YYABORT; + PE_ABORT(err); } $$ = list; } @@ -576,7 +588,7 @@ PE_VALUE ':' PE_VALUE opt_event_config parse_events_terms__delete($4); if (err) { free(list); - YYABORT; + PE_ABORT(err); } $$ = list; } @@ -600,7 +612,7 @@ PE_RAW opt_event_config parse_events_terms__delete($2); if (err) { free(list); - YYABORT; + PE_ABORT(err); } $$ = list; } @@ -620,7 +632,7 @@ PE_BPF_OBJECT opt_event_config free($1); if (err) { free(list); - YYABORT; + PE_ABORT(err); } $$ = list; } @@ -637,7 +649,7 @@ PE_BPF_SOURCE opt_event_config parse_events_terms__delete($2); if (err) { free(list); - YYABORT; + PE_ABORT(err); } $$ = list; } @@ -712,11 +724,12 @@ event_term: PE_RAW { struct parse_events_term *term; + int err = parse_events_term__str(&term, PARSE_EVENTS__TERM_TYPE_RAW, + strdup("raw"), $1, &@1, &@1); - if (parse_events_term__str(&term, PARSE_EVENTS__TERM_TYPE_RAW, - strdup("raw"), $1, &@1, &@1)) { + if (err) { free($1); - YYABORT; + PE_ABORT(err); } $$ = term; } @@ -724,12 +737,12 @@ PE_RAW name_or_raw '=' name_or_legacy { struct parse_events_term *term; + int err = parse_events_term__str(&term, PARSE_EVENTS__TERM_TYPE_USER, $1, $3, &@1, &@3); - if (parse_events_term__str(&term, PARSE_EVENTS__TERM_TYPE_USER, - $1, $3, &@1, &@3)) { + if (err) { free($1); free($3); - YYABORT; + PE_ABORT(err); } $$ = term; } @@ -737,11 +750,12 @@ name_or_raw '=' name_or_legacy name_or_raw '=' PE_VALUE { struct parse_events_term *term; + int err = parse_events_term__num(&term, PARSE_EVENTS__TERM_TYPE_USER, + $1, $3, false, &@1, &@3); - if (parse_events_term__num(&term, PARSE_EVENTS__TERM_TYPE_USER, - $1, $3, false, &@1, &@3)) { + if (err) { free($1); - YYABORT; + PE_ABORT(err); } $$ = term; } @@ -749,12 +763,13 @@ name_or_raw '=' PE_VALUE name_or_raw '=' PE_TERM_HW { struct parse_events_term *term; + int err = parse_events_term__str(&term, PARSE_EVENTS__TERM_TYPE_USER, + $1, $3.str, &@1, &@3); - if (parse_events_term__str(&term, PARSE_EVENTS__TERM_TYPE_USER, - $1, $3.str, &@1, &@3)) { + if (err) { free($1); free($3.str); - YYABORT; + PE_ABORT(err); } $$ = term; } @@ -762,11 +777,12 @@ name_or_raw '=' PE_TERM_HW PE_LEGACY_CACHE { struct parse_events_term *term; + int err = parse_events_term__num(&term, PARSE_EVENTS__TERM_TYPE_LEGACY_CACHE, + $1, 1, true, &@1, NULL); - if (parse_events_term__num(&term, PARSE_EVENTS__TERM_TYPE_LEGACY_CACHE, - $1, 1, true, &@1, NULL)) { + if (err) { free($1); - YYABORT; + PE_ABORT(err); } $$ = term; } @@ -774,11 +790,12 @@ PE_LEGACY_CACHE PE_NAME { struct parse_events_term *term; + int err = parse_events_term__num(&term, PARSE_EVENTS__TERM_TYPE_USER, + $1, 1, true, &@1, NULL); - if (parse_events_term__num(&term, PARSE_EVENTS__TERM_TYPE_USER, - $1, 1, true, &@1, NULL)) { + if (err) { free($1); - YYABORT; + PE_ABORT(err); } $$ = term; } @@ -786,11 +803,12 @@ PE_NAME PE_TERM_HW { struct parse_events_term *term; + int err = parse_events_term__num(&term, PARSE_EVENTS__TERM_TYPE_HARDWARE, + $1.str, $1.num & 255, false, &@1, NULL); - if (parse_events_term__num(&term, PARSE_EVENTS__TERM_TYPE_HARDWARE, - $1.str, $1.num & 255, false, &@1, NULL)) { + if (err) { free($1.str); - YYABORT; + PE_ABORT(err); } $$ = term; } @@ -798,10 +816,11 @@ PE_TERM_HW PE_TERM '=' name_or_legacy { struct parse_events_term *term; + int err = parse_events_term__str(&term, (int)$1, NULL, $3, &@1, &@3); - if (parse_events_term__str(&term, (int)$1, NULL, $3, &@1, &@3)) { + if (err) { free($3); - YYABORT; + PE_ABORT(err); } $$ = term; } @@ -809,10 +828,11 @@ PE_TERM '=' name_or_legacy PE_TERM '=' PE_TERM_HW { struct parse_events_term *term; + int err = parse_events_term__str(&term, (int)$1, NULL, $3.str, &@1, &@3); - if (parse_events_term__str(&term, (int)$1, NULL, $3.str, &@1, &@3)) { + if (err) { free($3.str); - YYABORT; + PE_ABORT(err); } $$ = term; } @@ -820,37 +840,46 @@ PE_TERM '=' PE_TERM_HW PE_TERM '=' PE_TERM { struct parse_events_term *term; + int err = parse_events_term__term(&term, (int)$1, (int)$3, &@1, &@3); + + if (err) + PE_ABORT(err); - ABORT_ON(parse_events_term__term(&term, (int)$1, (int)$3, &@1, &@3)); $$ = term; } | PE_TERM '=' PE_VALUE { struct parse_events_term *term; + int err = parse_events_term__num(&term, (int)$1, NULL, $3, false, &@1, &@3); + + if (err) + PE_ABORT(err); - ABORT_ON(parse_events_term__num(&term, (int)$1, NULL, $3, false, &@1, &@3)); $$ = term; } | PE_TERM { struct parse_events_term *term; + int err = parse_events_term__num(&term, (int)$1, NULL, 1, true, &@1, NULL); + + if (err) + PE_ABORT(err); - ABORT_ON(parse_events_term__num(&term, (int)$1, NULL, 1, true, &@1, NULL)); $$ = term; } | name_or_raw array '=' name_or_legacy { struct parse_events_term *term; + int err = parse_events_term__str(&term, PARSE_EVENTS__TERM_TYPE_USER, $1, $4, &@1, &@4); - if (parse_events_term__str(&term, PARSE_EVENTS__TERM_TYPE_USER, - $1, $4, &@1, &@4)) { + if (err) { free($1); free($4); free($2.ranges); - YYABORT; + PE_ABORT(err); } term->array = $2; $$ = term; @@ -859,12 +888,12 @@ name_or_raw array '=' name_or_legacy name_or_raw array '=' PE_VALUE { struct parse_events_term *term; + int err = parse_events_term__num(&term, PARSE_EVENTS__TERM_TYPE_USER, $1, $4, false, &@1, &@4); - if (parse_events_term__num(&term, PARSE_EVENTS__TERM_TYPE_USER, - $1, $4, false, &@1, &@4)) { + if (err) { free($1); free($2.ranges); - YYABORT; + PE_ABORT(err); } term->array = $2; $$ = term; @@ -874,14 +903,15 @@ PE_DRV_CFG_TERM { struct parse_events_term *term; char *config = strdup($1); + int err; if (!config) YYNOMEM; - if (parse_events_term__str(&term, PARSE_EVENTS__TERM_TYPE_DRV_CFG, - config, $1, &@1, NULL)) { + err = parse_events_term__str(&term, PARSE_EVENTS__TERM_TYPE_DRV_CFG, config, $1, &@1, NULL); + if (err) { free($1); free(config); - YYABORT; + PE_ABORT(err); } $$ = term; } -- cgit v1.2.3-70-g09d2 From b30d4f0b695428f513c561eeaea52e042ef48550 Mon Sep 17 00:00:00 2001 From: Ian Rogers Date: Tue, 27 Jun 2023 11:10:27 -0700 Subject: perf parse-events: Additional error reporting When no events or PMUs match report an error for event_pmu: Before: ``` $ perf stat -e 'asdfasdf' -a sleep 1 Run 'perf list' for a list of valid events Usage: perf stat [] [] -e, --event event selector. use 'perf list' to list available events ``` After: ``` $ perf stat -e 'asdfasdf' -a sleep 1 event syntax error: 'asdfasdf' \___ Bad event name Unabled to find PMU or event on a PMU of 'asdfasdf' Run 'perf list' for a list of valid events Usage: perf stat [] [] -e, --event event selector. use 'perf list' to list available events ``` Fixes the inadvertent removal when hybrid parsing was modified. Fixes: 70c90e4a6b2fbe77 ("perf parse-events: Avoid scanning PMUs before parsing") Signed-off-by: Ian Rogers Cc: Adrian Hunter Cc: Alexander Shishkin Cc: Athira Rajeev Cc: Ian Rogers Cc: Ingo Molnar Cc: Jiri Olsa Cc: Kan Liang Cc: Mark Rutland Cc: Namhyung Kim Cc: Peter Zijlstra Cc: bpf@vger.kernel.org Link: https://lore.kernel.org/r/20230627181030.95608-11-irogers@google.com Signed-off-by: Arnaldo Carvalho de Melo --- tools/perf/util/parse-events.y | 25 +++++++++++++++++++------ 1 file changed, 19 insertions(+), 6 deletions(-) diff --git a/tools/perf/util/parse-events.y b/tools/perf/util/parse-events.y index f090a85c4518..a636a7db6e6f 100644 --- a/tools/perf/util/parse-events.y +++ b/tools/perf/util/parse-events.y @@ -291,7 +291,6 @@ PE_NAME opt_pmu_config { struct parse_events_state *parse_state = _parse_state; struct list_head *list = NULL, *orig_terms = NULL, *terms= NULL; - struct parse_events_error *error = parse_state->error; char *pattern = NULL; #define CLEANUP \ @@ -303,9 +302,6 @@ PE_NAME opt_pmu_config free(pattern); \ } while(0) - if (error) - error->idx = @1.first_column; - if (parse_events_copy_term_list($2, &orig_terms)) { CLEANUP; YYNOMEM; @@ -360,6 +356,14 @@ PE_NAME opt_pmu_config $2 = NULL; } if (!ok) { + struct parse_events_error *error = parse_state->error; + char *help; + + if (asprintf(&help, "Unabled to find PMU or event on a PMU of '%s'", $1) < 0) + help = NULL; + parse_events_error__handle(error, @1.first_column, + strdup("Bad event or PMU"), + help); CLEANUP; YYABORT; } @@ -376,9 +380,18 @@ PE_NAME sep_dc int err; err = parse_events_multi_pmu_add(_parse_state, $1, NULL, &list); - free($1); - if (err < 0) + if (err < 0) { + struct parse_events_state *parse_state = _parse_state; + struct parse_events_error *error = parse_state->error; + char *help; + + if (asprintf(&help, "Unabled to find PMU or event on a PMU of '%s'", $1) < 0) + help = NULL; + parse_events_error__handle(error, @1.first_column, strdup("Bad event name"), help); + free($1); PE_ABORT(err); + } + free($1); $$ = list; } -- cgit v1.2.3-70-g09d2 From d81fa63b09fbd8b6ae5761164ed75f9ccf005893 Mon Sep 17 00:00:00 2001 From: Ian Rogers Date: Tue, 27 Jun 2023 11:10:28 -0700 Subject: perf parse-events: Populate error column for BPF/tracepoint events Follow convention from parse_events_terms__num/str and pass the YYLTYPE for the location. Signed-off-by: Ian Rogers Cc: Adrian Hunter Cc: Alexander Shishkin Cc: Athira Rajeev Cc: Ingo Molnar Cc: Jiri Olsa Cc: Kan Liang Cc: Mark Rutland Cc: Namhyung Kim Cc: Peter Zijlstra Cc: bpf@vger.kernel.org Link: https://lore.kernel.org/r/20230627181030.95608-12-irogers@google.com Signed-off-by: Arnaldo Carvalho de Melo --- tools/perf/tests/bpf.c | 2 +- tools/perf/util/parse-events.c | 80 +++++++++++++++++++++++++----------------- tools/perf/util/parse-events.h | 8 +++-- tools/perf/util/parse-events.y | 6 ++-- 4 files changed, 57 insertions(+), 39 deletions(-) diff --git a/tools/perf/tests/bpf.c b/tools/perf/tests/bpf.c index 8beb46066034..31796f2a80f4 100644 --- a/tools/perf/tests/bpf.c +++ b/tools/perf/tests/bpf.c @@ -124,7 +124,7 @@ static int do_test(struct bpf_object *obj, int (*func)(void), parse_state.error = &parse_error; INIT_LIST_HEAD(&parse_state.list); - err = parse_events_load_bpf_obj(&parse_state, &parse_state.list, obj, NULL); + err = parse_events_load_bpf_obj(&parse_state, &parse_state.list, obj, NULL, NULL); parse_events_error__exit(&parse_error); if (err == -ENODATA) { pr_debug("Failed to add events selected by BPF, debuginfo package not installed\n"); diff --git a/tools/perf/util/parse-events.c b/tools/perf/util/parse-events.c index 83adb0c2a6bc..7c13b70e743c 100644 --- a/tools/perf/util/parse-events.c +++ b/tools/perf/util/parse-events.c @@ -499,7 +499,7 @@ int parse_events_add_cache(struct list_head *list, int *idx, const char *name, #ifdef HAVE_LIBTRACEEVENT static void tracepoint_error(struct parse_events_error *e, int err, - const char *sys, const char *name) + const char *sys, const char *name, int column) { const char *str; char help[BUFSIZ]; @@ -526,18 +526,19 @@ static void tracepoint_error(struct parse_events_error *e, int err, } tracing_path__strerror_open_tp(err, help, sizeof(help), sys, name); - parse_events_error__handle(e, 0, strdup(str), strdup(help)); + parse_events_error__handle(e, column, strdup(str), strdup(help)); } static int add_tracepoint(struct list_head *list, int *idx, const char *sys_name, const char *evt_name, struct parse_events_error *err, - struct list_head *head_config) + struct list_head *head_config, void *loc_) { + YYLTYPE *loc = loc_; struct evsel *evsel = evsel__newtp_idx(sys_name, evt_name, (*idx)++); if (IS_ERR(evsel)) { - tracepoint_error(err, PTR_ERR(evsel), sys_name, evt_name); + tracepoint_error(err, PTR_ERR(evsel), sys_name, evt_name, loc->first_column); return PTR_ERR(evsel); } @@ -556,7 +557,7 @@ static int add_tracepoint(struct list_head *list, int *idx, static int add_tracepoint_multi_event(struct list_head *list, int *idx, const char *sys_name, const char *evt_name, struct parse_events_error *err, - struct list_head *head_config) + struct list_head *head_config, YYLTYPE *loc) { char *evt_path; struct dirent *evt_ent; @@ -565,13 +566,13 @@ static int add_tracepoint_multi_event(struct list_head *list, int *idx, evt_path = get_events_file(sys_name); if (!evt_path) { - tracepoint_error(err, errno, sys_name, evt_name); + tracepoint_error(err, errno, sys_name, evt_name, loc->first_column); return -1; } evt_dir = opendir(evt_path); if (!evt_dir) { put_events_file(evt_path); - tracepoint_error(err, errno, sys_name, evt_name); + tracepoint_error(err, errno, sys_name, evt_name, loc->first_column); return -1; } @@ -588,11 +589,11 @@ static int add_tracepoint_multi_event(struct list_head *list, int *idx, found++; ret = add_tracepoint(list, idx, sys_name, evt_ent->d_name, - err, head_config); + err, head_config, loc); } if (!found) { - tracepoint_error(err, ENOENT, sys_name, evt_name); + tracepoint_error(err, ENOENT, sys_name, evt_name, loc->first_column); ret = -1; } @@ -604,19 +605,19 @@ static int add_tracepoint_multi_event(struct list_head *list, int *idx, static int add_tracepoint_event(struct list_head *list, int *idx, const char *sys_name, const char *evt_name, struct parse_events_error *err, - struct list_head *head_config) + struct list_head *head_config, YYLTYPE *loc) { return strpbrk(evt_name, "*?") ? - add_tracepoint_multi_event(list, idx, sys_name, evt_name, - err, head_config) : - add_tracepoint(list, idx, sys_name, evt_name, - err, head_config); + add_tracepoint_multi_event(list, idx, sys_name, evt_name, + err, head_config, loc) : + add_tracepoint(list, idx, sys_name, evt_name, + err, head_config, loc); } static int add_tracepoint_multi_sys(struct list_head *list, int *idx, const char *sys_name, const char *evt_name, struct parse_events_error *err, - struct list_head *head_config) + struct list_head *head_config, YYLTYPE *loc) { struct dirent *events_ent; DIR *events_dir; @@ -624,7 +625,7 @@ static int add_tracepoint_multi_sys(struct list_head *list, int *idx, events_dir = tracing_events__opendir(); if (!events_dir) { - tracepoint_error(err, errno, sys_name, evt_name); + tracepoint_error(err, errno, sys_name, evt_name, loc->first_column); return -1; } @@ -640,7 +641,7 @@ static int add_tracepoint_multi_sys(struct list_head *list, int *idx, continue; ret = add_tracepoint_event(list, idx, events_ent->d_name, - evt_name, err, head_config); + evt_name, err, head_config, loc); } closedir(events_dir); @@ -653,6 +654,7 @@ struct __add_bpf_event_param { struct parse_events_state *parse_state; struct list_head *list; struct list_head *head_config; + YYLTYPE *loc; }; static int add_bpf_event(const char *group, const char *event, int fd, struct bpf_object *obj, @@ -679,7 +681,7 @@ static int add_bpf_event(const char *group, const char *event, int fd, struct bp err = parse_events_add_tracepoint(&new_evsels, &parse_state->idx, group, event, parse_state->error, - param->head_config); + param->head_config, param->loc); if (err) { struct evsel *evsel, *tmp; @@ -706,12 +708,14 @@ static int add_bpf_event(const char *group, const char *event, int fd, struct bp int parse_events_load_bpf_obj(struct parse_events_state *parse_state, struct list_head *list, struct bpf_object *obj, - struct list_head *head_config) + struct list_head *head_config, + void *loc) { int err; char errbuf[BUFSIZ]; - struct __add_bpf_event_param param = {parse_state, list, head_config}; + struct __add_bpf_event_param param = {parse_state, list, head_config, loc}; static bool registered_unprobe_atexit = false; + YYLTYPE test_loc = {.first_column = -1}; if (IS_ERR(obj) || !obj) { snprintf(errbuf, sizeof(errbuf), @@ -742,6 +746,9 @@ int parse_events_load_bpf_obj(struct parse_events_state *parse_state, goto errout; } + if (!param.loc) + param.loc = &test_loc; + err = bpf__foreach_event(obj, add_bpf_event, ¶m); if (err) { snprintf(errbuf, sizeof(errbuf), @@ -751,7 +758,7 @@ int parse_events_load_bpf_obj(struct parse_events_state *parse_state, return 0; errout: - parse_events_error__handle(parse_state->error, 0, + parse_events_error__handle(parse_state->error, param.loc->first_column, strdup(errbuf), strdup("(add -v to see detail)")); return err; } @@ -839,11 +846,13 @@ int parse_events_load_bpf(struct parse_events_state *parse_state, struct list_head *list, char *bpf_file_name, bool source, - struct list_head *head_config) + struct list_head *head_config, + void *loc_) { int err; struct bpf_object *obj; LIST_HEAD(obj_head_config); + YYLTYPE *loc = loc_; if (head_config) split_bpf_config_terms(head_config, &obj_head_config); @@ -863,12 +872,12 @@ int parse_events_load_bpf(struct parse_events_state *parse_state, -err, errbuf, sizeof(errbuf)); - parse_events_error__handle(parse_state->error, 0, + parse_events_error__handle(parse_state->error, loc->first_column, strdup(errbuf), strdup("(add -v to see detail)")); return err; } - err = parse_events_load_bpf_obj(parse_state, list, obj, head_config); + err = parse_events_load_bpf_obj(parse_state, list, obj, head_config, loc); if (err) return err; err = parse_events_config_bpf(parse_state, obj, &obj_head_config); @@ -885,9 +894,12 @@ int parse_events_load_bpf(struct parse_events_state *parse_state, int parse_events_load_bpf_obj(struct parse_events_state *parse_state, struct list_head *list __maybe_unused, struct bpf_object *obj __maybe_unused, - struct list_head *head_config __maybe_unused) + struct list_head *head_config __maybe_unused, + void *loc_) { - parse_events_error__handle(parse_state->error, 0, + YYLTYPE *loc = loc_; + + parse_events_error__handle(parse_state->error, loc->first_column, strdup("BPF support is not compiled"), strdup("Make sure libbpf-devel is available at build time.")); return -ENOTSUP; @@ -897,9 +909,12 @@ int parse_events_load_bpf(struct parse_events_state *parse_state, struct list_head *list __maybe_unused, char *bpf_file_name __maybe_unused, bool source __maybe_unused, - struct list_head *head_config __maybe_unused) + struct list_head *head_config __maybe_unused, + void *loc_) { - parse_events_error__handle(parse_state->error, 0, + YYLTYPE *loc = loc_; + + parse_events_error__handle(parse_state->error, loc->first_column, strdup("BPF support is not compiled"), strdup("Make sure libbpf-devel is available at build time.")); return -ENOTSUP; @@ -1441,8 +1456,9 @@ static int get_config_chgs(struct perf_pmu *pmu, struct list_head *head_config, int parse_events_add_tracepoint(struct list_head *list, int *idx, const char *sys, const char *event, struct parse_events_error *err, - struct list_head *head_config) + struct list_head *head_config, void *loc_) { + YYLTYPE *loc = loc_; #ifdef HAVE_LIBTRACEEVENT if (head_config) { struct perf_event_attr attr; @@ -1454,17 +1470,17 @@ int parse_events_add_tracepoint(struct list_head *list, int *idx, if (strpbrk(sys, "*?")) return add_tracepoint_multi_sys(list, idx, sys, event, - err, head_config); + err, head_config, loc); else return add_tracepoint_event(list, idx, sys, event, - err, head_config); + err, head_config, loc); #else (void)list; (void)idx; (void)sys; (void)event; (void)head_config; - parse_events_error__handle(err, 0, strdup("unsupported tracepoint"), + parse_events_error__handle(err, loc->first_column, strdup("unsupported tracepoint"), strdup("libtraceevent is necessary for tracepoint support")); return -1; #endif diff --git a/tools/perf/util/parse-events.h b/tools/perf/util/parse-events.h index b37e5ee193a8..cabbe70adb82 100644 --- a/tools/perf/util/parse-events.h +++ b/tools/perf/util/parse-events.h @@ -169,18 +169,20 @@ int parse_events_name(struct list_head *list, const char *name); int parse_events_add_tracepoint(struct list_head *list, int *idx, const char *sys, const char *event, struct parse_events_error *error, - struct list_head *head_config); + struct list_head *head_config, void *loc); int parse_events_load_bpf(struct parse_events_state *parse_state, struct list_head *list, char *bpf_file_name, bool source, - struct list_head *head_config); + struct list_head *head_config, + void *loc); /* Provide this function for perf test */ struct bpf_object; int parse_events_load_bpf_obj(struct parse_events_state *parse_state, struct list_head *list, struct bpf_object *obj, - struct list_head *head_config); + struct list_head *head_config, + void *loc); int parse_events_add_numeric(struct parse_events_state *parse_state, struct list_head *list, u32 type, u64 config, diff --git a/tools/perf/util/parse-events.y b/tools/perf/util/parse-events.y index a636a7db6e6f..50f5b819de37 100644 --- a/tools/perf/util/parse-events.y +++ b/tools/perf/util/parse-events.y @@ -567,7 +567,7 @@ tracepoint_name opt_event_config error->idx = @1.first_column; err = parse_events_add_tracepoint(list, &parse_state->idx, $1.sys, $1.event, - error, $2); + error, $2, &@1); parse_events_terms__delete($2); free($1.sys); @@ -640,7 +640,7 @@ PE_BPF_OBJECT opt_event_config list = alloc_list(); if (!list) YYNOMEM; - err = parse_events_load_bpf(parse_state, list, $1, false, $2); + err = parse_events_load_bpf(parse_state, list, $1, false, $2, &@1); parse_events_terms__delete($2); free($1); if (err) { @@ -658,7 +658,7 @@ PE_BPF_SOURCE opt_event_config list = alloc_list(); if (!list) YYNOMEM; - err = parse_events_load_bpf(_parse_state, list, $1, true, $2); + err = parse_events_load_bpf(_parse_state, list, $1, true, $2, &@1); parse_events_terms__delete($2); if (err) { free(list); -- cgit v1.2.3-70-g09d2 From 81a4e31f8c4a50bc5c5f49a1e2e4a295edd57719 Mon Sep 17 00:00:00 2001 From: Ian Rogers Date: Tue, 27 Jun 2023 11:10:29 -0700 Subject: perf parse-events: Improve location for add pmu Improve the location for add PMU for cases when PMUs aren't found. Signed-off-by: Ian Rogers Cc: Adrian Hunter Cc: Alexander Shishkin Cc: Athira Rajeev Cc: Ingo Molnar Cc: Jiri Olsa Cc: Kan Liang Cc: Mark Rutland Cc: Namhyung Kim Cc: Peter Zijlstra Cc: bpf@vger.kernel.org Link: https://lore.kernel.org/r/20230627181030.95608-13-irogers@google.com Signed-off-by: Arnaldo Carvalho de Melo --- tools/perf/util/parse-events.c | 12 +++++++----- tools/perf/util/parse-events.h | 4 ++-- tools/perf/util/parse-events.y | 8 ++++---- 3 files changed, 13 insertions(+), 11 deletions(-) diff --git a/tools/perf/util/parse-events.c b/tools/perf/util/parse-events.c index 7c13b70e743c..926d3ac97324 100644 --- a/tools/perf/util/parse-events.c +++ b/tools/perf/util/parse-events.c @@ -1575,13 +1575,14 @@ static bool config_term_percore(struct list_head *config_terms) int parse_events_add_pmu(struct parse_events_state *parse_state, struct list_head *list, char *name, struct list_head *head_config, - bool auto_merge_stats) + bool auto_merge_stats, void *loc_) { struct perf_event_attr attr; struct perf_pmu_info info; struct perf_pmu *pmu; struct evsel *evsel; struct parse_events_error *err = parse_state->error; + YYLTYPE *loc = loc_; LIST_HEAD(config_terms); pmu = parse_state->fake_pmu ?: perf_pmus__find(name); @@ -1605,7 +1606,7 @@ int parse_events_add_pmu(struct parse_events_state *parse_state, if (asprintf(&err_str, "Cannot find PMU `%s'. Missing kernel support?", name) >= 0) - parse_events_error__handle(err, 0, err_str, NULL); + parse_events_error__handle(err, loc->first_column, err_str, NULL); return -EINVAL; } if (head_config) @@ -1691,12 +1692,13 @@ int parse_events_add_pmu(struct parse_events_state *parse_state, int parse_events_multi_pmu_add(struct parse_events_state *parse_state, char *str, struct list_head *head, - struct list_head **listp) + struct list_head **listp, void *loc_) { struct parse_events_term *term; struct list_head *list = NULL; struct list_head *orig_head = NULL; struct perf_pmu *pmu = NULL; + YYLTYPE *loc = loc_; int ok = 0; char *config; @@ -1743,7 +1745,7 @@ int parse_events_multi_pmu_add(struct parse_events_state *parse_state, parse_events_copy_term_list(head, &orig_head); if (!parse_events_add_pmu(parse_state, list, pmu->name, orig_head, - auto_merge_stats)) { + auto_merge_stats, loc)) { pr_debug("%s -> %s/%s/\n", str, pmu->name, alias->str); parse_state->wild_card_pmus = true; @@ -1756,7 +1758,7 @@ int parse_events_multi_pmu_add(struct parse_events_state *parse_state, if (parse_state->fake_pmu) { if (!parse_events_add_pmu(parse_state, list, str, head, - /*auto_merge_stats=*/true)) { + /*auto_merge_stats=*/true, loc)) { pr_debug("%s -> %s/%s/\n", str, "fake_pmu", str); ok++; } diff --git a/tools/perf/util/parse-events.h b/tools/perf/util/parse-events.h index cabbe70adb82..e59b33805886 100644 --- a/tools/perf/util/parse-events.h +++ b/tools/perf/util/parse-events.h @@ -202,7 +202,7 @@ int parse_events_add_breakpoint(struct parse_events_state *parse_state, int parse_events_add_pmu(struct parse_events_state *parse_state, struct list_head *list, char *name, struct list_head *head_config, - bool auto_merge_stats); + bool auto_merge_stats, void *loc); struct evsel *parse_events__add_event(int idx, struct perf_event_attr *attr, const char *name, const char *metric_id, @@ -211,7 +211,7 @@ struct evsel *parse_events__add_event(int idx, struct perf_event_attr *attr, int parse_events_multi_pmu_add(struct parse_events_state *parse_state, char *str, struct list_head *head_config, - struct list_head **listp); + struct list_head **listp, void *loc); int parse_events_copy_term_list(struct list_head *old, struct list_head **new); diff --git a/tools/perf/util/parse-events.y b/tools/perf/util/parse-events.y index 50f5b819de37..844646752462 100644 --- a/tools/perf/util/parse-events.y +++ b/tools/perf/util/parse-events.y @@ -313,7 +313,7 @@ PE_NAME opt_pmu_config YYNOMEM; } /* Attempt to add to list assuming $1 is a PMU name. */ - if (parse_events_add_pmu(parse_state, list, $1, $2, /*auto_merge_stats=*/false)) { + if (parse_events_add_pmu(parse_state, list, $1, $2, /*auto_merge_stats=*/false, &@1)) { struct perf_pmu *pmu = NULL; int ok = 0; @@ -341,7 +341,7 @@ PE_NAME opt_pmu_config YYNOMEM; } if (!parse_events_add_pmu(parse_state, list, pmu->name, terms, - auto_merge_stats)) { + auto_merge_stats, &@1)) { ok++; parse_state->wild_card_pmus = true; } @@ -352,7 +352,7 @@ PE_NAME opt_pmu_config if (!ok) { /* Failure to add, assume $1 is an event name. */ zfree(&list); - ok = !parse_events_multi_pmu_add(parse_state, $1, $2, &list); + ok = !parse_events_multi_pmu_add(parse_state, $1, $2, &list, &@1); $2 = NULL; } if (!ok) { @@ -379,7 +379,7 @@ PE_NAME sep_dc struct list_head *list; int err; - err = parse_events_multi_pmu_add(_parse_state, $1, NULL, &list); + err = parse_events_multi_pmu_add(_parse_state, $1, NULL, &list, &@1); if (err < 0) { struct parse_events_state *parse_state = _parse_state; struct parse_events_error *error = parse_state->error; -- cgit v1.2.3-70-g09d2 From 4c11adff675652759a0f0ad2194f4646b5463a42 Mon Sep 17 00:00:00 2001 From: Ian Rogers Date: Tue, 27 Jun 2023 11:10:30 -0700 Subject: perf parse-events: Remove ABORT_ON Prefer informative messages rather than none with ABORT_ON. Document one failure mode and add an error message for another. Signed-off-by: Ian Rogers Cc: Adrian Hunter Cc: Alexander Shishkin Cc: Athira Rajeev Cc: Ingo Molnar Cc: Jiri Olsa Cc: Kan Liang Cc: Mark Rutland Cc: Namhyung Kim Cc: Peter Zijlstra Cc: bpf@vger.kernel.org Link: https://lore.kernel.org/r/20230627181030.95608-14-irogers@google.com Signed-off-by: Arnaldo Carvalho de Melo --- tools/perf/util/parse-events.y | 22 ++++++++++++++-------- 1 file changed, 14 insertions(+), 8 deletions(-) diff --git a/tools/perf/util/parse-events.y b/tools/perf/util/parse-events.y index 844646752462..454577f7aff6 100644 --- a/tools/perf/util/parse-events.y +++ b/tools/perf/util/parse-events.y @@ -22,12 +22,6 @@ void parse_events_error(YYLTYPE *loc, void *parse_state, void *scanner, char const *msg); -#define ABORT_ON(val) \ -do { \ - if (val) \ - YYABORT; \ -} while (0) - #define PE_ABORT(val) \ do { \ if (val == -ENOMEM) \ @@ -618,7 +612,9 @@ PE_RAW opt_event_config YYNOMEM; errno = 0; num = strtoull($1 + 1, NULL, 16); - ABORT_ON(errno); + /* Given the lexer will only give [a-fA-F0-9]+ a failure here should be impossible. */ + if (errno) + YYABORT; free($1); err = parse_events_add_numeric(_parse_state, list, PERF_TYPE_RAW, num, $2, /*wildcard=*/false); @@ -978,7 +974,17 @@ PE_VALUE PE_ARRAY_RANGE PE_VALUE { struct parse_events_array array; - ABORT_ON($3 < $1); + if ($3 < $1) { + struct parse_events_state *parse_state = _parse_state; + struct parse_events_error *error = parse_state->error; + char *err_str; + + if (asprintf(&err_str, "Expected '%ld' to be less-than '%ld'", $3, $1) < 0) + err_str = NULL; + + parse_events_error__handle(error, @1.first_column, err_str, NULL); + YYABORT; + } array.nr_ranges = 1; array.ranges = malloc(sizeof(array.ranges[0])); if (!array.ranges) -- cgit v1.2.3-70-g09d2 From f9dd531c5b82037f0f8f9ffc04ce0c09840fba2e Mon Sep 17 00:00:00 2001 From: Namhyung Kim Date: Mon, 24 Jul 2023 17:19:28 -0700 Subject: perf symbols: Add kallsyms__get_symbol_start() The kallsyms__get_symbol_start() to get any symbol address from kallsyms. The existing kallsyms__get_function_start() only allows text symbols so create this to allow data symbols too. Signed-off-by: Namhyung Kim Acked-by: Adrian Hunter Acked-by: Ian Rogers Cc: Ingo Molnar Cc: Jiri Olsa Cc: Peter Zijlstra Link: https://lore.kernel.org/r/20230725001929.368041-1-namhyung@kernel.org Signed-off-by: Arnaldo Carvalho de Melo --- tools/perf/util/event.c | 30 +++++++++++++++++++++++++++--- tools/perf/util/event.h | 2 ++ 2 files changed, 29 insertions(+), 3 deletions(-) diff --git a/tools/perf/util/event.c b/tools/perf/util/event.c index 4cbb092e0684..923c0fb15122 100644 --- a/tools/perf/util/event.c +++ b/tools/perf/util/event.c @@ -93,8 +93,8 @@ struct process_symbol_args { u64 start; }; -static int find_symbol_cb(void *arg, const char *name, char type, - u64 start) +static int find_func_symbol_cb(void *arg, const char *name, char type, + u64 start) { struct process_symbol_args *args = arg; @@ -110,12 +110,36 @@ static int find_symbol_cb(void *arg, const char *name, char type, return 1; } +static int find_any_symbol_cb(void *arg, const char *name, + char type __maybe_unused, u64 start) +{ + struct process_symbol_args *args = arg; + + if (strcmp(name, args->name)) + return 0; + + args->start = start; + return 1; +} + int kallsyms__get_function_start(const char *kallsyms_filename, const char *symbol_name, u64 *addr) { struct process_symbol_args args = { .name = symbol_name, }; - if (kallsyms__parse(kallsyms_filename, &args, find_symbol_cb) <= 0) + if (kallsyms__parse(kallsyms_filename, &args, find_func_symbol_cb) <= 0) + return -1; + + *addr = args.start; + return 0; +} + +int kallsyms__get_symbol_start(const char *kallsyms_filename, + const char *symbol_name, u64 *addr) +{ + struct process_symbol_args args = { .name = symbol_name, }; + + if (kallsyms__parse(kallsyms_filename, &args, find_any_symbol_cb) <= 0) return -1; *addr = args.start; diff --git a/tools/perf/util/event.h b/tools/perf/util/event.h index de20e01c9d72..d8bcee2e9b93 100644 --- a/tools/perf/util/event.h +++ b/tools/perf/util/event.h @@ -360,6 +360,8 @@ size_t perf_event__fprintf(union perf_event *event, struct machine *machine, FIL int kallsyms__get_function_start(const char *kallsyms_filename, const char *symbol_name, u64 *addr); +int kallsyms__get_symbol_start(const char *kallsyms_filename, + const char *symbol_name, u64 *addr); void event_attr_init(struct perf_event_attr *attr); -- cgit v1.2.3-70-g09d2 From 69a87a32f5cd8b262cb2195b045f96c63aede734 Mon Sep 17 00:00:00 2001 From: Namhyung Kim Date: Mon, 24 Jul 2023 17:19:29 -0700 Subject: perf machine: Include data symbols in the kernel map When 'perf record -d' is used, it needs data mmaps to symbolize global data. But it missed to collect kernel data maps so it cannot symbolize them. Instead of having a separate map, just increase the kernel map size to include the data section. Probably we can have a separate kernel map for data, but the current code assumes a single kernel map. So it'd require more changes in other places and looks error-prone. I decided not to go that way for now. Also it seems the kernel module size already includes the data section. For example, my system has the following. $ grep -e _stext -e _etext -e _edata /proc/kallsyms ffffffff99800000 T _stext ffffffff9a601ac8 T _etext ffffffff9b446a00 D _edata Size of the text section is (0x9a601ac8 - 0x99800000 = 0xe01ac8) and size including data section is (0x9b446a00 - 0x99800000 = 0x1c46a00). Before: $ perf record -d true $ perf report -D | grep MMAP | head -1 0 0 0x460 [0x60]: PERF_RECORD_MMAP -1/0: [0xffffffff99800000(0xe01ac8) @ 0xffffffff99800000]: x [kernel.kallsyms]_text ^^^^^^^^ here After: $ perf report -D | grep MMAP | head -1 0 0 0x460 [0x60]: PERF_RECORD_MMAP -1/0: [0xffffffff99800000(0x1c46a00) @ 0xffffffff99800000]: x [kernel.kallsyms]_text ^^^^^^^^^ Instead of just replacing it to _edata, try _edata first and then fall back to _etext just in case. Signed-off-by: Namhyung Kim Acked-by: Adrian Hunter Cc: Ian Rogers Cc: Ingo Molnar Cc: Jiri Olsa Cc: Peter Zijlstra Link: https://lore.kernel.org/r/20230725001929.368041-2-namhyung@kernel.org Signed-off-by: Arnaldo Carvalho de Melo --- tools/perf/util/machine.c | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) diff --git a/tools/perf/util/machine.c b/tools/perf/util/machine.c index 4e62843d51b7..11de3ca8d4fa 100644 --- a/tools/perf/util/machine.c +++ b/tools/perf/util/machine.c @@ -1216,7 +1216,9 @@ static int machine__get_running_kernel_start(struct machine *machine, *start = addr; - err = kallsyms__get_function_start(filename, "_etext", &addr); + err = kallsyms__get_symbol_start(filename, "_edata", &addr); + if (err) + err = kallsyms__get_function_start(filename, "_etext", &addr); if (!err) *end = addr; -- cgit v1.2.3-70-g09d2 From 1e37201405590eac6491f9e677bd4064a92900e8 Mon Sep 17 00:00:00 2001 From: Xiu Jianfeng Date: Thu, 27 Jul 2023 10:50:01 +0000 Subject: perf doc: Fix typo in perf.data-file-format.txt The 'it' should be 'is' here, fix it. Signed-off-by: Xiu Jianfeng Cc: Adrian Hunter Cc: Alexander Shishkin Cc: Ian Rogers Cc: Ingo Molnar Cc: Jiri Olsa Cc: Mark Rutland Cc: Namhyung Kim Cc: Peter Zijlstra Link: https://lore.kernel.org/r/20230727105001.261420-1-xiujianfeng@huawei.com Signed-off-by: Arnaldo Carvalho de Melo --- tools/perf/Documentation/perf.data-file-format.txt | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/tools/perf/Documentation/perf.data-file-format.txt b/tools/perf/Documentation/perf.data-file-format.txt index 635ba043fd7d..010a4edcd384 100644 --- a/tools/perf/Documentation/perf.data-file-format.txt +++ b/tools/perf/Documentation/perf.data-file-format.txt @@ -43,7 +43,7 @@ struct perf_file_section { Flags section: -For each of the optional features a perf_file_section it placed after the data +For each of the optional features a perf_file_section is placed after the data section if the feature bit is set in the perf_header flags bitset. The respective perf_file_section points to the data of the additional header and defines its size. -- cgit v1.2.3-70-g09d2 From 1699d3efe111e33e275ca7d4163c8b1470ba79b3 Mon Sep 17 00:00:00 2001 From: Anup Sharma Date: Fri, 21 Jul 2023 23:22:28 +0530 Subject: perf scripts python: Add initial script file with usage information Added necessary modules, including the Perf-Trace-Util library, and defines the required functions and variables for using perf script python. The perf_trace_context and Core modules for tracing and processing events has been also imported. Added usage information. Signed-off-by: Anup Sharma Cc: Adrian Hunter Cc: Alexander Shishkin Cc: Ian Rogers Cc: Ingo Molnar Cc: Jiri Olsa Cc: Mark Rutland Cc: Namhyung Kim Cc: Peter Zijlstra Link: https://lore.kernel.org/r/f2f1a62f1cc69f44a5414da46a26a4cf124d2744.1689961706.git.anupnewsmail@gmail.com Signed-off-by: Arnaldo Carvalho de Melo --- tools/perf/scripts/python/gecko.py | 31 +++++++++++++++++++++++++++++++ 1 file changed, 31 insertions(+) create mode 100644 tools/perf/scripts/python/gecko.py diff --git a/tools/perf/scripts/python/gecko.py b/tools/perf/scripts/python/gecko.py new file mode 100644 index 000000000000..7a62c1b411d9 --- /dev/null +++ b/tools/perf/scripts/python/gecko.py @@ -0,0 +1,31 @@ +# firefox-gecko-converter.py - Convert perf record output to Firefox's gecko profile format +# SPDX-License-Identifier: GPL-2.0 +# +# The script converts perf.data to Gecko Profile Format, +# which can be read by https://profiler.firefox.com/. +# +# Usage: +# +# perf record -a -g -F 99 sleep 60 +# perf script report gecko > output.json + +import os +import sys +from typing import Dict + +# Add the Perf-Trace-Util library to the Python path +sys.path.append(os.environ['PERF_EXEC_PATH'] + \ + '/scripts/python/Perf-Trace-Util/lib/Perf/Trace') + +from perf_trace_context import * +from Core import * + +# Uses perf script python interface to parse each +# event and store the data in the thread builder. +def process_event(param_dict: Dict) -> None: + pass + +# Trace_end runs at the end and will be used to aggregate +# the data into the final json object and print it out to stdout. +def trace_end() -> None: + pass -- cgit v1.2.3-70-g09d2 From 0a02e44cc2fe1657af1f2740cb9a1dcd8a9338cc Mon Sep 17 00:00:00 2001 From: Anup Sharma Date: Fri, 21 Jul 2023 23:22:56 +0530 Subject: perf scripts python: Extact necessary information from process event The script takes in a sample event dictionary(param_dict) and retrieves relevant data such as time stamp, PID, TID, and comm for each event. Also start time is defined as a global variable as it need to be passed to trace_end for gecko meta information field creation. Signed-off-by: Anup Sharma Cc: Adrian Hunter Cc: Alexander Shishkin Cc: Ian Rogers Cc: Ingo Molnar Cc: Jiri Olsa Cc: Mark Rutland Cc: Namhyung Kim Cc: Peter Zijlstra Link: https://lore.kernel.org/r/19910fefcfe4be03cd5c2aa3fec11d3f86c0381b.1689961706.git.anupnewsmail@gmail.com Signed-off-by: Arnaldo Carvalho de Melo --- tools/perf/scripts/python/gecko.py | 14 +++++++++++++- 1 file changed, 13 insertions(+), 1 deletion(-) diff --git a/tools/perf/scripts/python/gecko.py b/tools/perf/scripts/python/gecko.py index 7a62c1b411d9..a02b1e04ff52 100644 --- a/tools/perf/scripts/python/gecko.py +++ b/tools/perf/scripts/python/gecko.py @@ -20,10 +20,22 @@ sys.path.append(os.environ['PERF_EXEC_PATH'] + \ from perf_trace_context import * from Core import * +# start_time is intialiazed only once for the all event traces. +start_time = None + # Uses perf script python interface to parse each # event and store the data in the thread builder. def process_event(param_dict: Dict) -> None: - pass + global start_time + global tid_to_thread + time_stamp = (param_dict['sample']['time'] // 1000) / 1000 + pid = param_dict['sample']['pid'] + tid = param_dict['sample']['tid'] + comm = param_dict['comm'] + + # Start time is the time of the first sample + if not start_time: + start_time = time_stamp # Trace_end runs at the end and will be used to aggregate # the data into the final json object and print it out to stdout. -- cgit v1.2.3-70-g09d2 From 5aacd7f08a3276f4fad729a600d51a1cc5d5191a Mon Sep 17 00:00:00 2001 From: Anup Sharma Date: Fri, 21 Jul 2023 23:23:19 +0530 Subject: perf scripts python: Add classes and conversion functions This commit introduces new classes and conversion functions to facilitate the representation of Gecko profile information. The new classes Frame, Stack, Sample, and Thread are added to handle specific components of the profile data, also link to the origin docs has been commented out. Additionally, Inside the Thread class _to_json_dict() method has been created that converts the current thread data into the corresponding format expected by the GeckoThread JSON schema, as per the Gecko profile format specification. Signed-off-by: Anup Sharma Cc: Adrian Hunter Cc: Alexander Shishkin Cc: Ian Rogers Cc: Ingo Molnar Cc: Jiri Olsa Cc: Mark Rutland Cc: Namhyung Kim Cc: Peter Zijlstra Link: https://lore.kernel.org/r/ab7b40bd32df7101a6f8b4a3aa41570b63b831ac.1689961706.git.anupnewsmail@gmail.com Signed-off-by: Arnaldo Carvalho de Melo --- tools/perf/scripts/python/gecko.py | 133 ++++++++++++++++++++++++++++++++++++- 1 file changed, 132 insertions(+), 1 deletion(-) diff --git a/tools/perf/scripts/python/gecko.py b/tools/perf/scripts/python/gecko.py index a02b1e04ff52..2cd5cd6e31e3 100644 --- a/tools/perf/scripts/python/gecko.py +++ b/tools/perf/scripts/python/gecko.py @@ -11,7 +11,8 @@ import os import sys -from typing import Dict +from dataclasses import dataclass, field +from typing import List, Dict, Optional, NamedTuple, Set, Tuple, Any # Add the Perf-Trace-Util library to the Python path sys.path.append(os.environ['PERF_EXEC_PATH'] + \ @@ -20,9 +21,139 @@ sys.path.append(os.environ['PERF_EXEC_PATH'] + \ from perf_trace_context import * from Core import * +StringID = int +StackID = int +FrameID = int +CategoryID = int +Milliseconds = float + # start_time is intialiazed only once for the all event traces. start_time = None +# https://github.com/firefox-devtools/profiler/blob/53970305b51b9b472e26d7457fee1d66cd4e2737/src/types/gecko-profile.js#L156 +class Frame(NamedTuple): + string_id: StringID + relevantForJS: bool + innerWindowID: int + implementation: None + optimizations: None + line: None + column: None + category: CategoryID + subcategory: int + +# https://github.com/firefox-devtools/profiler/blob/53970305b51b9b472e26d7457fee1d66cd4e2737/src/types/gecko-profile.js#L216 +class Stack(NamedTuple): + prefix_id: Optional[StackID] + frame_id: FrameID + +# https://github.com/firefox-devtools/profiler/blob/53970305b51b9b472e26d7457fee1d66cd4e2737/src/types/gecko-profile.js#L90 +class Sample(NamedTuple): + stack_id: Optional[StackID] + time_ms: Milliseconds + responsiveness: int + +@dataclass +class Thread: + """A builder for a profile of the thread. + + Attributes: + comm: Thread command-line (name). + pid: process ID of containing process. + tid: thread ID. + samples: Timeline of profile samples. + frameTable: interned stack frame ID -> stack frame. + stringTable: interned string ID -> string. + stringMap: interned string -> string ID. + stackTable: interned stack ID -> stack. + stackMap: (stack prefix ID, leaf stack frame ID) -> interned Stack ID. + frameMap: Stack Frame string -> interned Frame ID. + comm: str + pid: int + tid: int + samples: List[Sample] = field(default_factory=list) + frameTable: List[Frame] = field(default_factory=list) + stringTable: List[str] = field(default_factory=list) + stringMap: Dict[str, int] = field(default_factory=dict) + stackTable: List[Stack] = field(default_factory=list) + stackMap: Dict[Tuple[Optional[int], int], int] = field(default_factory=dict) + frameMap: Dict[str, int] = field(default_factory=dict) + """ + comm: str + pid: int + tid: int + samples: List[Sample] = field(default_factory=list) + frameTable: List[Frame] = field(default_factory=list) + stringTable: List[str] = field(default_factory=list) + stringMap: Dict[str, int] = field(default_factory=dict) + stackTable: List[Stack] = field(default_factory=list) + stackMap: Dict[Tuple[Optional[int], int], int] = field(default_factory=dict) + frameMap: Dict[str, int] = field(default_factory=dict) + + def _to_json_dict(self) -> Dict: + """Converts current Thread to GeckoThread JSON format.""" + # Gecko profile format is row-oriented data as List[List], + # And a schema for interpreting each index. + # Schema: + # https://github.com/firefox-devtools/profiler/blob/main/docs-developer/gecko-profile-format.md + # https://github.com/firefox-devtools/profiler/blob/53970305b51b9b472e26d7457fee1d66cd4e2737/src/types/gecko-profile.js#L230 + return { + "tid": self.tid, + "pid": self.pid, + "name": self.comm, + # https://github.com/firefox-devtools/profiler/blob/53970305b51b9b472e26d7457fee1d66cd4e2737/src/types/gecko-profile.js#L51 + "markers": { + "schema": { + "name": 0, + "startTime": 1, + "endTime": 2, + "phase": 3, + "category": 4, + "data": 5, + }, + "data": [], + }, + + # https://github.com/firefox-devtools/profiler/blob/53970305b51b9b472e26d7457fee1d66cd4e2737/src/types/gecko-profile.js#L90 + "samples": { + "schema": { + "stack": 0, + "time": 1, + "responsiveness": 2, + }, + "data": self.samples + }, + + # https://github.com/firefox-devtools/profiler/blob/53970305b51b9b472e26d7457fee1d66cd4e2737/src/types/gecko-profile.js#L156 + "frameTable": { + "schema": { + "location": 0, + "relevantForJS": 1, + "innerWindowID": 2, + "implementation": 3, + "optimizations": 4, + "line": 5, + "column": 6, + "category": 7, + "subcategory": 8, + }, + "data": self.frameTable, + }, + + # https://github.com/firefox-devtools/profiler/blob/53970305b51b9b472e26d7457fee1d66cd4e2737/src/types/gecko-profile.js#L216 + "stackTable": { + "schema": { + "prefix": 0, + "frame": 1, + }, + "data": self.stackTable, + }, + "stringTable": self.stringTable, + "registerTime": 0, + "unregisterTime": None, + "processType": "default", + } + # Uses perf script python interface to parse each # event and store the data in the thread builder. def process_event(param_dict: Dict) -> None: -- cgit v1.2.3-70-g09d2 From 833daec7e6cfda0d6c30a94c21b6706ff094cd45 Mon Sep 17 00:00:00 2001 From: Anup Sharma Date: Fri, 21 Jul 2023 23:24:42 +0530 Subject: perf scripts python: Add trace end processing and PRODUCT and CATEGORIES information The final output will now be presented in JSON format following the Gecko profile structure. Additionally, the inclusion of PRODUCT allows easy retrieval of header information for UI. Furthermore, CATEGORIES have been introduced to enable customization of kernel and user colors using input arguments. To facilitate this functionality, an argparse-based parser has been implemented. Note: The implementation of threads will be addressed in subsequent commits for now I have commented it out. Signed-off-by: Anup Sharma Cc: Adrian Hunter Cc: Alexander Shishkin Cc: Ian Rogers Cc: Ingo Molnar Cc: Jiri Olsa Cc: Mark Rutland Cc: Namhyung Kim Cc: Peter Zijlstra Link: https://lore.kernel.org/r/fa6d027e4134c48e8a2ea45dd8f6b21e6a3418e4.1689961706.git.anupnewsmail@gmail.com Signed-off-by: Arnaldo Carvalho de Melo --- tools/perf/scripts/python/gecko.py | 65 +++++++++++++++++++++++++++++++++++++- 1 file changed, 64 insertions(+), 1 deletion(-) diff --git a/tools/perf/scripts/python/gecko.py b/tools/perf/scripts/python/gecko.py index 2cd5cd6e31e3..794a91bec464 100644 --- a/tools/perf/scripts/python/gecko.py +++ b/tools/perf/scripts/python/gecko.py @@ -11,6 +11,8 @@ import os import sys +import json +import argparse from dataclasses import dataclass, field from typing import List, Dict, Optional, NamedTuple, Set, Tuple, Any @@ -30,6 +32,13 @@ Milliseconds = float # start_time is intialiazed only once for the all event traces. start_time = None +# https://github.com/firefox-devtools/profiler/blob/53970305b51b9b472e26d7457fee1d66cd4e2737/src/types/profile.js#L425 +# Follow Brendan Gregg's Flamegraph convention: orange for kernel and yellow for user space by default. +CATEGORIES = None + +# The product name is used by the profiler UI to show the Operating system and Processor. +PRODUCT = os.popen('uname -op').read().strip() + # https://github.com/firefox-devtools/profiler/blob/53970305b51b9b472e26d7457fee1d66cd4e2737/src/types/gecko-profile.js#L156 class Frame(NamedTuple): string_id: StringID @@ -171,4 +180,58 @@ def process_event(param_dict: Dict) -> None: # Trace_end runs at the end and will be used to aggregate # the data into the final json object and print it out to stdout. def trace_end() -> None: - pass + # Schema: https://github.com/firefox-devtools/profiler/blob/53970305b51b9b472e26d7457fee1d66cd4e2737/src/types/gecko-profile.js#L305 + gecko_profile_with_meta = { + "meta": { + "interval": 1, + "processType": 0, + "product": PRODUCT, + "stackwalk": 1, + "debug": 0, + "gcpoison": 0, + "asyncstack": 1, + "startTime": start_time, + "shutdownTime": None, + "version": 24, + "presymbolicated": True, + "categories": CATEGORIES, + "markerSchema": [], + }, + "libs": [], + # threads will be implemented in later commits. + # "threads": threads, + "processes": [], + "pausedRanges": [], + } + json.dump(gecko_profile_with_meta, sys.stdout, indent=2) + +def main() -> None: + global CATEGORIES + parser = argparse.ArgumentParser(description="Convert perf.data to Firefox\'s Gecko Profile format") + + # Add the command-line options + # Colors must be defined according to this: + # https://github.com/firefox-devtools/profiler/blob/50124adbfa488adba6e2674a8f2618cf34b59cd2/res/css/categories.css + parser.add_argument('--user-color', default='yellow', help='Color for the User category') + parser.add_argument('--kernel-color', default='orange', help='Color for the Kernel category') + # Parse the command-line arguments + args = parser.parse_args() + # Access the values provided by the user + user_color = args.user_color + kernel_color = args.kernel_color + + CATEGORIES = [ + { + "name": 'User', + "color": user_color, + "subcategories": ['Other'] + }, + { + "name": 'Kernel', + "color": kernel_color, + "subcategories": ['Other'] + }, + ] + +if __name__ == '__main__': + main() -- cgit v1.2.3-70-g09d2 From 258dfd41c1df2030772ceab378f7bd6f0f78b938 Mon Sep 17 00:00:00 2001 From: Anup Sharma Date: Fri, 21 Jul 2023 23:25:38 +0530 Subject: perf scripts python: Implement add sample function and thread processing The intern_stack function is responsible for retrieving or creating a stack_id based on the provided frame_id and prefix_id. It first generates a key using the frame_id and prefix_id values. If the stack corresponding to the key is found in the stackMap, it is returned. Otherwise, a new stack is created by appending the prefix_id and frame_id to the stackTable. The key and the index of the newly created stack are added to the stackMap for future reference. The _intern_frame function is responsible for retrieving or creating a frame_id based on the provided frame string. If the frame_id corresponding to the frameString is found in the frameMap, it is returned. Otherwise, a new frame is created by appending relevant information to the frameTable and adding the frameString to the string_id through _intern_string. The _intern_string function will gets a matching string, or saves the new string and returns a String ID. Signed-off-by: Anup Sharma Link: https://lore.kernel.org/r/4442f4b1ab4c7317cf940560a3a285fcdfbeeb08.1689961706.git.anupnewsmail@gmail.com Cc: Mark Rutland Cc: Ian Rogers Cc: Peter Zijlstra Cc: Adrian Hunter Cc: Arnaldo Carvalho de Melo Cc: Jiri Olsa Cc: Namhyung Kim Cc: Alexander Shishkin Cc: Ingo Molnar Cc: linux-kernel@vger.kernel.org Cc: linux-perf-users@vger.kernel.org Signed-off-by: Arnaldo Carvalho de Melo --- tools/perf/scripts/python/gecko.py | 54 ++++++++++++++++++++++++++++++++++++++ 1 file changed, 54 insertions(+) diff --git a/tools/perf/scripts/python/gecko.py b/tools/perf/scripts/python/gecko.py index 794a91bec464..97949249a3c0 100644 --- a/tools/perf/scripts/python/gecko.py +++ b/tools/perf/scripts/python/gecko.py @@ -13,6 +13,7 @@ import os import sys import json import argparse +from functools import reduce from dataclasses import dataclass, field from typing import List, Dict, Optional, NamedTuple, Set, Tuple, Any @@ -39,6 +40,10 @@ CATEGORIES = None # The product name is used by the profiler UI to show the Operating system and Processor. PRODUCT = os.popen('uname -op').read().strip() +# The category index is used by the profiler UI to show the color of the flame graph. +USER_CATEGORY_INDEX = 0 +KERNEL_CATEGORY_INDEX = 1 + # https://github.com/firefox-devtools/profiler/blob/53970305b51b9b472e26d7457fee1d66cd4e2737/src/types/gecko-profile.js#L156 class Frame(NamedTuple): string_id: StringID @@ -99,6 +104,55 @@ class Thread: stackMap: Dict[Tuple[Optional[int], int], int] = field(default_factory=dict) frameMap: Dict[str, int] = field(default_factory=dict) + def _intern_stack(self, frame_id: int, prefix_id: Optional[int]) -> int: + """Gets a matching stack, or saves the new stack. Returns a Stack ID.""" + key = f"{frame_id}" if prefix_id is None else f"{frame_id},{prefix_id}" + # key = (prefix_id, frame_id) + stack_id = self.stackMap.get(key) + if stack_id is None: + # return stack_id + stack_id = len(self.stackTable) + self.stackTable.append(Stack(prefix_id=prefix_id, frame_id=frame_id)) + self.stackMap[key] = stack_id + return stack_id + + def _intern_string(self, string: str) -> int: + """Gets a matching string, or saves the new string. Returns a String ID.""" + string_id = self.stringMap.get(string) + if string_id is not None: + return string_id + string_id = len(self.stringTable) + self.stringTable.append(string) + self.stringMap[string] = string_id + return string_id + + def _intern_frame(self, frame_str: str) -> int: + """Gets a matching stack frame, or saves the new frame. Returns a Frame ID.""" + frame_id = self.frameMap.get(frame_str) + if frame_id is not None: + return frame_id + frame_id = len(self.frameTable) + self.frameMap[frame_str] = frame_id + string_id = self._intern_string(frame_str) + + symbol_name_to_category = KERNEL_CATEGORY_INDEX if frame_str.find('kallsyms') != -1 \ + or frame_str.find('/vmlinux') != -1 \ + or frame_str.endswith('.ko)') \ + else USER_CATEGORY_INDEX + + self.frameTable.append(Frame( + string_id=string_id, + relevantForJS=False, + innerWindowID=0, + implementation=None, + optimizations=None, + line=None, + column=None, + category=symbol_name_to_category, + subcategory=None, + )) + return frame_id + def _to_json_dict(self) -> Dict: """Converts current Thread to GeckoThread JSON format.""" # Gecko profile format is row-oriented data as List[List], -- cgit v1.2.3-70-g09d2 From 2d889c6af1cc125380b03b6efdaed0a4b4611aed Mon Sep 17 00:00:00 2001 From: Anup Sharma Date: Fri, 21 Jul 2023 23:26:24 +0530 Subject: perf scripts python: Implement add sample function and thread processing The stack has been created for storing func and dso from the callchain. The sample has been added to a specific thread. It first checks if the thread exists in the Thread class. Then it call _add_sample function which is responsible for appending a new entry to the samples list. Also callchain parsing and storing part is implemented. Moreover removed the comment from thread. Signed-off-by: Anup Sharma Cc: Adrian Hunter Cc: Alexander Shishkin Cc: Ian Rogers Cc: Ingo Molnar Cc: Jiri Olsa Cc: Mark Rutland Cc: Namhyung Kim Cc: Peter Zijlstra Link: https://lore.kernel.org/r/5a112be85ccdcdcd611e343f6a7a7482d01f6299.1689961706.git.anupnewsmail@gmail.com Signed-off-by: Arnaldo Carvalho de Melo --- tools/perf/scripts/python/gecko.py | 52 ++++++++++++++++++++++++++++++++++++-- 1 file changed, 50 insertions(+), 2 deletions(-) diff --git a/tools/perf/scripts/python/gecko.py b/tools/perf/scripts/python/gecko.py index 97949249a3c0..278c3aed282a 100644 --- a/tools/perf/scripts/python/gecko.py +++ b/tools/perf/scripts/python/gecko.py @@ -40,6 +40,9 @@ CATEGORIES = None # The product name is used by the profiler UI to show the Operating system and Processor. PRODUCT = os.popen('uname -op').read().strip() +# Here key = tid, value = Thread +tid_to_thread = dict() + # The category index is used by the profiler UI to show the color of the flame graph. USER_CATEGORY_INDEX = 0 KERNEL_CATEGORY_INDEX = 1 @@ -153,6 +156,25 @@ class Thread: )) return frame_id + def _add_sample(self, comm: str, stack: List[str], time_ms: Milliseconds) -> None: + """Add a timestamped stack trace sample to the thread builder. + Args: + comm: command-line (name) of the thread at this sample + stack: sampled stack frames. Root first, leaf last. + time_ms: timestamp of sample in milliseconds. + """ + # Ihreads may not set their names right after they are created. + # Instead, they might do it later. In such situations, to use the latest name they have set. + if self.comm != comm: + self.comm = comm + + prefix_stack_id = reduce(lambda prefix_id, frame: self._intern_stack + (self._intern_frame(frame), prefix_id), stack, None) + if prefix_stack_id is not None: + self.samples.append(Sample(stack_id=prefix_stack_id, + time_ms=time_ms, + responsiveness=0)) + def _to_json_dict(self) -> Dict: """Converts current Thread to GeckoThread JSON format.""" # Gecko profile format is row-oriented data as List[List], @@ -231,9 +253,36 @@ def process_event(param_dict: Dict) -> None: if not start_time: start_time = time_stamp + # Parse and append the callchain of the current sample into a stack. + stack = [] + if param_dict['callchain']: + for call in param_dict['callchain']: + if 'sym' not in call: + continue + stack.append(f'{call["sym"]["name"]} (in {call["dso"]})') + if len(stack) != 0: + # Reverse the stack, as root come first and the leaf at the end. + stack = stack[::-1] + + # During perf record if -g is not used, the callchain is not available. + # In that case, the symbol and dso are available in the event parameters. + else: + func = param_dict['symbol'] if 'symbol' in param_dict else '[unknown]' + dso = param_dict['dso'] if 'dso' in param_dict else '[unknown]' + stack.append(f'{func} (in {dso})') + + # Add sample to the specific thread. + thread = tid_to_thread.get(tid) + if thread is None: + thread = Thread(comm=comm, pid=pid, tid=tid) + tid_to_thread[tid] = thread + thread._add_sample(comm=comm, stack=stack, time_ms=time_stamp) + # Trace_end runs at the end and will be used to aggregate # the data into the final json object and print it out to stdout. def trace_end() -> None: + threads = [thread._to_json_dict() for thread in tid_to_thread.values()] + # Schema: https://github.com/firefox-devtools/profiler/blob/53970305b51b9b472e26d7457fee1d66cd4e2737/src/types/gecko-profile.js#L305 gecko_profile_with_meta = { "meta": { @@ -252,8 +301,7 @@ def trace_end() -> None: "markerSchema": [], }, "libs": [], - # threads will be implemented in later commits. - # "threads": threads, + "threads": threads, "processes": [], "pausedRanges": [], } -- cgit v1.2.3-70-g09d2 From f9f72b2ab77e986ac30de09a735a002b37d81503 Mon Sep 17 00:00:00 2001 From: Anup Sharma Date: Fri, 21 Jul 2023 23:27:46 +0530 Subject: perf scripts python: Add command execution for gecko script This will enable the execution of gecko.py script using record and report commands in 'perf script'. And this will be also reflected at "perf script -l" command. For Example: perf script record gecko perf script report gecko Committer notes: As discussed on the perf tools office hours, I made -F 99 the default for the record script and removed the double -- on the report script so that the existing 'perf script' protocol for the combined operation: # perf script gecko Works, i.e. the record script pipes its stdout into the stdin of the report script, basically: /bin/sh /usr/libexec/perf-core/scripts/python/bin/gecko-record -F 99 -g -a -q -o - | \ /bin/sh /usr/libexec/perf-core/scripts/python/bin/gecko-report -i - Testing it: The resulting JSON file needs to be uploaded to https://profiler.firefox.com, Anup already has code to start a local http server on the trace_begin handler of the gecko python script, start firefox and feed it the JSON. The example below only collects sample for the specified workload, so that we don't produce thousands of lines, to collect system wide samples, use instead: # perf script gecko -a sleep 0.5 # nohup perf script gecko sleep 0.5 { "meta": { "interval": 1, "processType": 0, "product": "x86_64 GNU/Linux", "stackwalk": 1, "debug": 0, "gcpoison": 0, "asyncstack": 1, "startTime": 274601692.636, "shutdownTime": null, "version": 24, "presymbolicated": true, "categories": [ { "name": "User", "color": "yellow", "subcategories": [ "Other" ] }, { "name": "Kernel", "color": "orange", "subcategories": [ "Other" ] } ], "markerSchema": [] }, "libs": [], "threads": [ { "tid": 3344498, "pid": 3344498, "name": "sleep", "markers": { "schema": { "name": 0, "startTime": 1, "endTime": 2, "phase": 3, "category": 4, "data": 5 }, "data": [] }, "samples": { "schema": { "stack": 0, "time": 1, "responsiveness": 2 }, "data": [ [ 21, 274601692.636, 0 ], [ 23, 274601692.641, 0 ], [ 29, 274601692.643, 0 ], [ 42, 274601692.648, 0 ] ] }, "frameTable": { "schema": { "location": 0, "relevantForJS": 1, "innerWindowID": 2, "implementation": 3, "optimizations": 4, "line": 5, "column": 6, "category": 7, "subcategory": 8 }, "data": [ [ 0, false, 0, null, null, null, null, 1, null ], [ 1, false, 0, null, null, null, null, 1, null ], [ 2, false, 0, null, null, null, null, 1, null ], [ 3, false, 0, null, null, null, null, 1, null ], [ 4, false, 0, null, null, null, null, 1, null ], [ 5, false, 0, null, null, null, null, 1, null ], [ 6, false, 0, null, null, null, null, 1, null ], [ 7, false, 0, null, null, null, null, 1, null ], [ 8, false, 0, null, null, null, null, 1, null ], [ 9, false, 0, null, null, null, null, 1, null ], [ 10, false, 0, null, null, null, null, 1, null ], [ 11, false, 0, null, null, null, null, 1, null ], [ 12, false, 0, null, null, null, null, 1, null ], [ 13, false, 0, null, null, null, null, 1, null ], [ 14, false, 0, null, null, null, null, 1, null ], [ 15, false, 0, null, null, null, null, 1, null ], [ 16, false, 0, null, null, null, null, 1, null ], [ 17, false, 0, null, null, null, null, 1, null ], [ 18, false, 0, null, null, null, null, 1, null ], [ 19, false, 0, null, null, null, null, 1, null ], [ 20, false, 0, null, null, null, null, 1, null ], [ 21, false, 0, null, null, null, null, 1, null ], [ 22, false, 0, null, null, null, null, 1, null ], [ 23, false, 0, null, null, null, null, 1, null ], [ 24, false, 0, null, null, null, null, 1, null ], [ 25, false, 0, null, null, null, null, 1, null ], [ 26, false, 0, null, null, null, null, 1, null ], [ 27, false, 0, null, null, null, null, 1, null ], [ 28, false, 0, null, null, null, null, 1, null ], [ 29, false, 0, null, null, null, null, 1, null ], [ 30, false, 0, null, null, null, null, 1, null ], [ 31, false, 0, null, null, null, null, 1, null ], [ 32, false, 0, null, null, null, null, 1, null ], [ 33, false, 0, null, null, null, null, 1, null ], [ 34, false, 0, null, null, null, null, 1, null ], [ 35, false, 0, null, null, null, null, 1, null ], [ 36, false, 0, null, null, null, null, 1, null ], [ 37, false, 0, null, null, null, null, 1, null ], [ 38, false, 0, null, null, null, null, 1, null ] ] }, "stackTable": { "schema": { "prefix": 0, "frame": 1 }, "data": [ [ null, 0 ], [ 0, 1 ], [ 1, 2 ], [ 2, 3 ], [ 3, 4 ], [ 4, 5 ], [ 5, 6 ], [ 6, 7 ], [ 7, 8 ], [ 8, 9 ], [ 9, 10 ], [ 10, 11 ], [ 11, 12 ], [ 12, 13 ], [ 13, 14 ], [ 14, 15 ], [ 15, 16 ], [ 16, 17 ], [ 17, 18 ], [ 18, 19 ], [ 19, 20 ], [ 20, 21 ], [ 20, 22 ], [ 22, 23 ], [ 11, 24 ], [ 24, 25 ], [ 25, 26 ], [ 26, 27 ], [ 27, 28 ], [ 28, 29 ], [ 9, 11 ], [ 30, 24 ], [ 31, 25 ], [ 32, 30 ], [ 33, 31 ], [ 34, 32 ], [ 35, 29 ], [ 36, 33 ], [ 37, 34 ], [ 38, 35 ], [ 39, 36 ], [ 40, 37 ], [ 41, 38 ] ] }, "stringTable": [ "__func__.0 (in [kernel.kallsyms].rodata)", "perf_trace_ext4_fc_track_inode (in [kernel.kallsyms])", "perf_trace_ext4_es_insert_delayed_block (in [kernel.kallsyms])", "ext4_es_show_pblock (in [kernel.kallsyms])", "perf_trace_ext4_ext_rm_leaf (in [kernel.kallsyms])", "devcgroup_access_write (in [kernel.kallsyms])", "devcgroup_update_access (in [kernel.kallsyms])", "propagate_exception (in [kernel.kallsyms])", "revalidate_active_exceptions (in [kernel.kallsyms])", "perf_trace_ext4_fc_commit_stop (in [kernel.kallsyms])", "perf_fetch_caller_regs (in [kernel.kallsyms])", "khugepaged (in [kernel.kallsyms])", "khugepaged_wait_work (in [kernel.kallsyms])", "freezable_schedule_timeout (in [kernel.kallsyms])", "freezer_count (in [kernel.kallsyms])", "try_to_freeze (in [kernel.kallsyms])", "try_to_freeze_unsafe (in [kernel.kallsyms])", "split_huge_pages_write (in [kernel.kallsyms])", "migrate_pages (in [kernel.kallsyms])", "unmap_and_move (in [kernel.kallsyms])", "__unmap_and_move (in [kernel.kallsyms])", "collect_events (in [kernel.kallsyms])", "uncore_down_prepare (in [kernel.kallsyms])", "perf_iommu_read (in [kernel.kallsyms])", "khugepaged_do_scan (in [kernel.kallsyms])", "khugepaged_scan_mm_slot (in [kernel.kallsyms])", "khugepaged_scan_file (in [kernel.kallsyms])", "need_resched (in [kernel.kallsyms])", "get_current (in [kernel.kallsyms])", "move_to_new_page (in [kernel.kallsyms])", "khugepaged_scan_pmd (in [kernel.kallsyms])", "trace_mm_khugepaged_scan_pmd (in [kernel.kallsyms])", "migrate_huge_page_move_mapping (in [kernel.kallsyms])", "do_huge_pmd_numa_page (in [kernel.kallsyms])", "pmd_pfn (in [kernel.kallsyms])", "protnone_mask (in [kernel.kallsyms])", "__pte_needs_invert (in [kernel.kallsyms])", "reclaim_high (in [kernel.kallsyms])", "memcg_memory_event (in [kernel.kallsyms])" ], "registerTime": 0, "unregisterTime": null, "processType": "default" } ], "processes": [], "pausedRanges": [] } # Signed-off-by: Anup Sharma Tested-by: Arnaldo Carvalho de Melo Cc: Adrian Hunter Cc: Alexander Shishkin Cc: Ian Rogers Cc: Ingo Molnar Cc: Jiri Olsa Cc: Mark Rutland Cc: Namhyung Kim Cc: Peter Zijlstra Link: https://lore.kernel.org/r/cbf03cda175ea3dd2c6cd87bd3f12d803446cb95.1689961706.git.anupnewsmail@gmail.com Signed-off-by: Arnaldo Carvalho de Melo --- tools/perf/scripts/python/bin/gecko-record | 2 ++ tools/perf/scripts/python/bin/gecko-report | 3 +++ 2 files changed, 5 insertions(+) create mode 100644 tools/perf/scripts/python/bin/gecko-record create mode 100644 tools/perf/scripts/python/bin/gecko-report diff --git a/tools/perf/scripts/python/bin/gecko-record b/tools/perf/scripts/python/bin/gecko-record new file mode 100644 index 000000000000..f0d1aa55f171 --- /dev/null +++ b/tools/perf/scripts/python/bin/gecko-record @@ -0,0 +1,2 @@ +#!/bin/bash +perf record -F 99 -g "$@" diff --git a/tools/perf/scripts/python/bin/gecko-report b/tools/perf/scripts/python/bin/gecko-report new file mode 100644 index 000000000000..0c12cc08f3ab --- /dev/null +++ b/tools/perf/scripts/python/bin/gecko-report @@ -0,0 +1,3 @@ +#!/bin/bash +# description: create firefox gecko profile json format from perf.data +perf script "$@" -s "$PERF_EXEC_PATH"/scripts/python/gecko.py -- cgit v1.2.3-70-g09d2 From 404e077a16bb7796908b604b2df02cd650c965aa Mon Sep 17 00:00:00 2001 From: James Clark Date: Wed, 28 Jun 2023 11:53:02 +0100 Subject: perf tools: Add a place to put kernel config fragments for test runs Defconfig doesn't give full coverage for a perf test run, so these can be merged with defconfig to do so. It's not complete yet, but is a starting point as a place to add to when a specific test needs something extra to run. Signed-off-by: James Clark Cc: Adrian Hunter Cc: Aishwarya.TCV@arm.com Cc: Alexander Shishkin Cc: Ian Rogers Cc: Ingo Molnar Cc: Jiri Olsa Cc: Leo Yan Cc: Mark Brown Cc: Mark Rutland Cc: Masahiro Yamada Cc: Namhyung Kim Cc: Peter Zijlstra Link: https://lore.kernel.org/r/20230628105303.4053478-2-james.clark@arm.com Signed-off-by: Arnaldo Carvalho de Melo --- tools/perf/tests/config-fragments/README | 7 +++++++ tools/perf/tests/config-fragments/arm64 | 1 + tools/perf/tests/config-fragments/config | 11 +++++++++++ 3 files changed, 19 insertions(+) create mode 100644 tools/perf/tests/config-fragments/README create mode 100644 tools/perf/tests/config-fragments/arm64 create mode 100644 tools/perf/tests/config-fragments/config diff --git a/tools/perf/tests/config-fragments/README b/tools/perf/tests/config-fragments/README new file mode 100644 index 000000000000..fe7de5d93674 --- /dev/null +++ b/tools/perf/tests/config-fragments/README @@ -0,0 +1,7 @@ +This folder is for kernel config fragments that can be merged with +defconfig to give full test coverage of a perf test run. This is only +an optimistic set as some features require hardware support in order to +pass and not skip. + +'config' is shared across all platforms, and for arch specific files, +the file name should match that used in the ARCH=... make option. diff --git a/tools/perf/tests/config-fragments/arm64 b/tools/perf/tests/config-fragments/arm64 new file mode 100644 index 000000000000..64c4ab17cd58 --- /dev/null +++ b/tools/perf/tests/config-fragments/arm64 @@ -0,0 +1 @@ +CONFIG_CORESIGHT_SOURCE_ETM4X=y diff --git a/tools/perf/tests/config-fragments/config b/tools/perf/tests/config-fragments/config new file mode 100644 index 000000000000..c340b3195fca --- /dev/null +++ b/tools/perf/tests/config-fragments/config @@ -0,0 +1,11 @@ +CONFIG_TRACEPOINTS=y +CONFIG_STACKTRACE=y +CONFIG_NOP_TRACER=y +CONFIG_RING_BUFFER=y +CONFIG_EVENT_TRACING=y +CONFIG_CONTEXT_SWITCH_TRACER=y +CONFIG_TRACING=y +CONFIG_GENERIC_TRACER=y +CONFIG_FTRACE=y +CONFIG_FTRACE_SYSCALLS=y +CONFIG_BRANCH_PROFILE_NONE=y -- cgit v1.2.3-70-g09d2 From 24069d8112c94e76758da83a5bddc9aa98601d22 Mon Sep 17 00:00:00 2001 From: Jing Zhang Date: Fri, 28 Jul 2023 15:09:33 +0800 Subject: perf jevents: Add support for Yitian 710 DDR PMU (arm64) aliasing Add alias support for T-HEAD Yitian 710 SoC DDR PMU events. Reviewed-by: John Garry Reviewed-by: Shuai Xue Signed-off-by: Jing Zhang Acked-by: Ian Rogers Cc: Namhyung Kim Cc: Will Deacon Cc: Zhuo Song Cc: linux-arm-kernel@lists.infradead.org Cc: linux-doc@vger.kernel.org Link: https://lore.kernel.org/r/1690528175-2499-2-git-send-email-renyu.zj@linux.alibaba.com Signed-off-by: Arnaldo Carvalho de Melo --- .../arm64/freescale/yitian710/sys/ali_drw.json | 373 +++++++++++++++++++++ tools/perf/pmu-events/jevents.py | 1 + 2 files changed, 374 insertions(+) create mode 100644 tools/perf/pmu-events/arch/arm64/freescale/yitian710/sys/ali_drw.json diff --git a/tools/perf/pmu-events/arch/arm64/freescale/yitian710/sys/ali_drw.json b/tools/perf/pmu-events/arch/arm64/freescale/yitian710/sys/ali_drw.json new file mode 100644 index 000000000000..e21c469a8ef0 --- /dev/null +++ b/tools/perf/pmu-events/arch/arm64/freescale/yitian710/sys/ali_drw.json @@ -0,0 +1,373 @@ +[ + { + "BriefDescription": "A Write or Read Op at HIF interface. The unit is 64B.", + "ConfigCode": "0x0", + "EventName": "hif_rd_or_wr", + "Unit": "ali_drw", + "Compat": "ali_drw_pmu" + }, + { + "BriefDescription": "A Write Op at HIF interface. The unit is 64B.", + "ConfigCode": "0x1", + "EventName": "hif_wr", + "Unit": "ali_drw", + "Compat": "ali_drw_pmu" + }, + { + "BriefDescription": "A Read Op at HIF interface. The unit is 64B.", + "ConfigCode": "0x2", + "EventName": "hif_rd", + "Unit": "ali_drw", + "Compat": "ali_drw_pmu" + }, + { + "BriefDescription": "A Read-Modify-Write Op at HIF interface. The unit is 64B.", + "ConfigCode": "0x3", + "EventName": "hif_rmw", + "Unit": "ali_drw", + "Compat": "ali_drw_pmu" + }, + { + "BriefDescription": "A high priority Read at HIF interface. The unit is 64B.", + "ConfigCode": "0x4", + "EventName": "hif_hi_pri_rd", + "Unit": "ali_drw", + "Compat": "ali_drw_pmu" + }, + { + "BriefDescription": "A write data cycle at DFI interface (to DRAM).", + "ConfigCode": "0x7", + "EventName": "dfi_wr_data_cycles", + "Unit": "ali_drw", + "Compat": "ali_drw_pmu" + }, + { + "BriefDescription": "A read data cycle at DFI interface (to DRAM).", + "ConfigCode": "0x8", + "EventName": "dfi_rd_data_cycles", + "Unit": "ali_drw", + "Compat": "ali_drw_pmu" + }, + { + "BriefDescription": "A high priority read becomes critical.", + "ConfigCode": "0x9", + "EventName": "hpr_xact_when_critical", + "Unit": "ali_drw", + "Compat": "ali_drw_pmu" + }, + { + "BriefDescription": "A low priority read becomes critical.", + "ConfigCode": "0xA", + "EventName": "lpr_xact_when_critical", + "Unit": "ali_drw", + "Compat": "ali_drw_pmu" + }, + { + "BriefDescription": "A write becomes critical.", + "ConfigCode": "0xB", + "EventName": "wr_xact_when_critical", + "Unit": "ali_drw", + "Compat": "ali_drw_pmu" + }, + { + "BriefDescription": "An Activate(ACT) command to DRAM.", + "ConfigCode": "0xC", + "EventName": "op_is_activate", + "Unit": "ali_drw", + "Compat": "ali_drw_pmu" + }, + { + "BriefDescription": "A Read or Write CAS command to DRAM.", + "ConfigCode": "0xD", + "EventName": "op_is_rd_or_wr", + "Unit": "ali_drw", + "Compat": "ali_drw_pmu" + }, + { + "BriefDescription": "An Activate(ACT) command for read to DRAM.", + "ConfigCode": "0xE", + "EventName": "op_is_rd_activate", + "Unit": "ali_drw", + "Compat": "ali_drw_pmu" + }, + { + "BriefDescription": "A Read CAS command to DRAM.", + "ConfigCode": "0xF", + "EventName": "op_is_rd", + "Unit": "ali_drw", + "Compat": "ali_drw_pmu" + }, + { + "BriefDescription": "A Write CAS command to DRAM.", + "ConfigCode": "0x10", + "EventName": "op_is_wr", + "Unit": "ali_drw", + "Compat": "ali_drw_pmu" + }, + { + "BriefDescription": "A Masked Write command to DRAM.", + "ConfigCode": "0x11", + "EventName": "op_is_mwr", + "Unit": "ali_drw", + "Compat": "ali_drw_pmu" + }, + { + "BriefDescription": "A Precharge(PRE) command to DRAM.", + "ConfigCode": "0x12", + "EventName": "op_is_precharge", + "Unit": "ali_drw", + "Compat": "ali_drw_pmu" + }, + { + "BriefDescription": "A Precharge(PRE) required by read or write.", + "ConfigCode": "0x13", + "EventName": "precharge_for_rdwr", + "Unit": "ali_drw", + "Compat": "ali_drw_pmu" + }, + { + "BriefDescription": "A Precharge(PRE) required by other conditions.", + "ConfigCode": "0x14", + "EventName": "precharge_for_other", + "Unit": "ali_drw", + "Compat": "ali_drw_pmu" + }, + { + "BriefDescription": "A read-write turnaround.", + "ConfigCode": "0x15", + "EventName": "rdwr_transitions", + "Unit": "ali_drw", + "Compat": "ali_drw_pmu" + }, + { + "BriefDescription": "A write combine(merge) in write data buffer.", + "ConfigCode": "0x16", + "EventName": "write_combine", + "Unit": "ali_drw", + "Compat": "ali_drw_pmu" + }, + { + "BriefDescription": "A Write-After-Read hazard.", + "ConfigCode": "0x17", + "EventName": "war_hazard", + "Unit": "ali_drw", + "Compat": "ali_drw_pmu" + }, + { + "BriefDescription": "A Read-After-Write hazard.", + "ConfigCode": "0x18", + "EventName": "raw_hazard", + "Unit": "ali_drw", + "Compat": "ali_drw_pmu" + }, + { + "BriefDescription": "A Write-After-Write hazard.", + "ConfigCode": "0x19", + "EventName": "waw_hazard", + "Unit": "ali_drw", + "Compat": "ali_drw_pmu" + }, + { + "BriefDescription": "Rank0 enters self-refresh(SRE).", + "ConfigCode": "0x1A", + "EventName": "op_is_enter_selfref_rk0", + "Unit": "ali_drw", + "Compat": "ali_drw_pmu" + }, + { + "BriefDescription": "Rank1 enters self-refresh(SRE).", + "ConfigCode": "0x1B", + "EventName": "op_is_enter_selfref_rk1", + "Unit": "ali_drw", + "Compat": "ali_drw_pmu" + }, + { + "BriefDescription": "Rank2 enters self-refresh(SRE).", + "ConfigCode": "0x1C", + "EventName": "op_is_enter_selfref_rk2", + "Unit": "ali_drw", + "Compat": "ali_drw_pmu" + }, + { + "BriefDescription": "Rank3 enters self-refresh(SRE).", + "ConfigCode": "0x1D", + "EventName": "op_is_enter_selfref_rk3", + "Unit": "ali_drw", + "Compat": "ali_drw_pmu" + }, + { + "BriefDescription": "Rank0 enters power-down(PDE).", + "ConfigCode": "0x1E", + "EventName": "op_is_enter_powerdown_rk0", + "Unit": "ali_drw", + "Compat": "ali_drw_pmu" + }, + { + "BriefDescription": "Rank1 enters power-down(PDE).", + "ConfigCode": "0x1F", + "EventName": "op_is_enter_powerdown_rk1", + "Unit": "ali_drw", + "Compat": "ali_drw_pmu" + }, + { + "BriefDescription": "Rank2 enters power-down(PDE).", + "ConfigCode": "0x20", + "EventName": "op_is_enter_powerdown_rk2", + "Unit": "ali_drw", + "Compat": "ali_drw_pmu" + }, + { + "BriefDescription": "Rank3 enters power-down(PDE).", + "ConfigCode": "0x21", + "EventName": "op_is_enter_powerdown_rk3", + "Unit": "ali_drw", + "Compat": "ali_drw_pmu" + }, + { + "BriefDescription": "A cycle that Rank0 stays in self-refresh mode.", + "ConfigCode": "0x26", + "EventName": "selfref_mode_rk0", + "Unit": "ali_drw", + "Compat": "ali_drw_pmu" + }, + { + "BriefDescription": "A cycle that Rank1 stays in self-refresh mode.", + "ConfigCode": "0x27", + "EventName": "selfref_mode_rk1", + "Unit": "ali_drw", + "Compat": "ali_drw_pmu" + }, + { + "BriefDescription": "A cycle that Rank2 stays in self-refresh mode.", + "ConfigCode": "0x28", + "EventName": "selfref_mode_rk2", + "Unit": "ali_drw", + "Compat": "ali_drw_pmu" + }, + { + "BriefDescription": "A cycle that Rank3 stays in self-refresh mode.", + "ConfigCode": "0x29", + "EventName": "selfref_mode_rk3", + "Unit": "ali_drw", + "Compat": "ali_drw_pmu" + }, + { + "BriefDescription": "An auto-refresh(REF) command to DRAM.", + "ConfigCode": "0x2A", + "EventName": "op_is_refresh", + "Unit": "ali_drw", + "Compat": "ali_drw_pmu" + }, + { + "BriefDescription": "A critical auto-refresh(REF) command to DRAM.", + "ConfigCode": "0x2B", + "EventName": "op_is_crit_ref", + "Unit": "ali_drw", + "Compat": "ali_drw_pmu" + }, + { + "BriefDescription": "An MRR or MRW command to DRAM.", + "ConfigCode": "0x2D", + "EventName": "op_is_load_mode", + "Unit": "ali_drw", + "Compat": "ali_drw_pmu" + }, + { + "BriefDescription": "A ZQCal command to DRAM.", + "ConfigCode": "0x2E", + "EventName": "op_is_zqcl", + "Unit": "ali_drw", + "Compat": "ali_drw_pmu" + }, + { + "BriefDescription": "At least one entry in read queue reaches the visible window limit.", + "ConfigCode": "0x30", + "EventName": "visible_window_limit_reached_rd", + "Unit": "ali_drw", + "Compat": "ali_drw_pmu" + }, + { + "BriefDescription": "At least one entry in write queue reaches the visible window limit.", + "ConfigCode": "0x31", + "EventName": "visible_window_limit_reached_wr", + "Unit": "ali_drw", + "Compat": "ali_drw_pmu" + }, + { + "BriefDescription": "A DQS Oscillator MPC command to DRAM.", + "ConfigCode": "0x34", + "EventName": "op_is_dqsosc_mpc", + "Unit": "ali_drw", + "Compat": "ali_drw_pmu" + }, + { + "BriefDescription": "A DQS Oscillator MRR command to DRAM.", + "ConfigCode": "0x35", + "EventName": "op_is_dqsosc_mrr", + "Unit": "ali_drw", + "Compat": "ali_drw_pmu" + }, + { + "BriefDescription": "A Temperature Compensated Refresh(TCR) MRR command to DRAM.", + "ConfigCode": "0x36", + "EventName": "op_is_tcr_mrr", + "Unit": "ali_drw", + "Compat": "ali_drw_pmu" + }, + { + "BriefDescription": "A ZQCal Start command to DRAM.", + "ConfigCode": "0x37", + "EventName": "op_is_zqstart", + "Unit": "ali_drw", + "Compat": "ali_drw_pmu" + }, + { + "BriefDescription": "A ZQCal Latch command to DRAM.", + "ConfigCode": "0x38", + "EventName": "op_is_zqlatch", + "Unit": "ali_drw", + "Compat": "ali_drw_pmu" + }, + { + "BriefDescription": "A packet at CHI TXREQ interface (request).", + "ConfigCode": "0x39", + "EventName": "chi_txreq", + "Unit": "ali_drw", + "Compat": "ali_drw_pmu" + }, + { + "BriefDescription": "A packet at CHI TXDAT interface (read data).", + "ConfigCode": "0x3A", + "EventName": "chi_txdat", + "Unit": "ali_drw", + "Compat": "ali_drw_pmu" + }, + { + "BriefDescription": "A packet at CHI RXDAT interface (write data).", + "ConfigCode": "0x3B", + "EventName": "chi_rxdat", + "Unit": "ali_drw", + "Compat": "ali_drw_pmu" + }, + { + "BriefDescription": "A packet at CHI RXRSP interface.", + "ConfigCode": "0x3C", + "EventName": "chi_rxrsp", + "Unit": "ali_drw", + "Compat": "ali_drw_pmu" + }, + { + "BriefDescription": "A violation detected in TZC.", + "ConfigCode": "0x3D", + "EventName": "tsz_vio", + "Unit": "ali_drw", + "Compat": "ali_drw_pmu" + }, + { + "BriefDescription": "The ddr cycles.", + "ConfigCode": "0x80", + "EventName": "ddr_cycles", + "Unit": "ali_drw", + "Compat": "ali_drw_pmu" + } +] diff --git a/tools/perf/pmu-events/jevents.py b/tools/perf/pmu-events/jevents.py index 12e80bb7939b..08ec9aa583e7 100755 --- a/tools/perf/pmu-events/jevents.py +++ b/tools/perf/pmu-events/jevents.py @@ -274,6 +274,7 @@ class JsonEvent: 'DFPMC': 'amd_df', 'cpu_core': 'cpu_core', 'cpu_atom': 'cpu_atom', + 'ali_drw': 'ali_drw', } return table[unit] if unit in table else f'uncore_{unit.lower()}' -- cgit v1.2.3-70-g09d2 From 3e65bd13746af54bbe9a9c415a437bcc0b78282c Mon Sep 17 00:00:00 2001 From: Jing Zhang Date: Fri, 28 Jul 2023 15:09:34 +0800 Subject: perf vendor events arm64: Add JSON metrics for Yitian 710 DDR Add JSON metrics for T-HEAD Yitian 710 SoC DDR. Reviewed-by: John Garry Signed-off-by: Jing Zhang Acked-by: Ian Rogers Cc: Namhyung Kim Cc: Shuai Xue Cc: Will Deacon Cc: Zhuo Song Cc: linux-arm-kernel@lists.infradead.org Cc: linux-doc@vger.kernel.org Link: https://lore.kernel.org/r/1690528175-2499-3-git-send-email-renyu.zj@linux.alibaba.com Signed-off-by: Arnaldo Carvalho de Melo --- .../arch/arm64/freescale/yitian710/sys/metrics.json | 20 ++++++++++++++++++++ 1 file changed, 20 insertions(+) create mode 100644 tools/perf/pmu-events/arch/arm64/freescale/yitian710/sys/metrics.json diff --git a/tools/perf/pmu-events/arch/arm64/freescale/yitian710/sys/metrics.json b/tools/perf/pmu-events/arch/arm64/freescale/yitian710/sys/metrics.json new file mode 100644 index 000000000000..bc865b374b6a --- /dev/null +++ b/tools/perf/pmu-events/arch/arm64/freescale/yitian710/sys/metrics.json @@ -0,0 +1,20 @@ +[ + { + "MetricName": "ddr_read_bandwidth.all", + "BriefDescription": "The ddr read bandwidth(MB/s).", + "MetricGroup": "ali_drw", + "MetricExpr": "hif_rd * 64 / 1e6 / duration_time", + "ScaleUnit": "1MB/s", + "Unit": "ali_drw", + "Compat": "ali_drw_pmu" + }, + { + "MetricName": "ddr_write_bandwidth.all", + "BriefDescription": "The ddr write bandwidth(MB/s).", + "MetricGroup": "ali_drw", + "MetricExpr": "(hif_wr + hif_rmw) * 64 / 1e6 / duration_time", + "ScaleUnit": "1MB/s", + "Unit": "ali_drw", + "Compat": "ali_drw_pmu" + } +] -- cgit v1.2.3-70-g09d2 From f849ce6baf467ea7fb9e68ae05d5d045f3aa89e8 Mon Sep 17 00:00:00 2001 From: Jing Zhang Date: Fri, 28 Jul 2023 15:09:35 +0800 Subject: perf docs: Update metric usage for Alibaba's T-Head PMU driver (arm64) Alibaba's T-Head ali_drw PMU supports DDR bandwidth metrics. Update its usage in the documentation. Reviewed-by: John Garry Signed-off-by: Jing Zhang Acked-by: Ian Rogers Cc: Namhyung Kim Cc: Shuai Xue Cc: Will Deacon Cc: Zhuo Song Cc: linux-arm-kernel@lists.infradead.org Cc: linux-doc@vger.kernel.org Link: https://lore.kernel.org/r/1690528175-2499-4-git-send-email-renyu.zj@linux.alibaba.com Signed-off-by: Arnaldo Carvalho de Melo --- Documentation/admin-guide/perf/alibaba_pmu.rst | 5 +++++ 1 file changed, 5 insertions(+) diff --git a/Documentation/admin-guide/perf/alibaba_pmu.rst b/Documentation/admin-guide/perf/alibaba_pmu.rst index 11de998bb480..7d840023903f 100644 --- a/Documentation/admin-guide/perf/alibaba_pmu.rst +++ b/Documentation/admin-guide/perf/alibaba_pmu.rst @@ -88,6 +88,11 @@ data bandwidth:: -e ali_drw_27080/hif_rmw/ \ -e ali_drw_27080/cycle/ -- sleep 10 +Example usage of counting all memory read/write bandwidth by metric:: + + perf stat -M ddr_read_bandwidth.all -- sleep 10 + perf stat -M ddr_write_bandwidth.all -- sleep 10 + The average DRAM bandwidth can be calculated as follows: - Read Bandwidth = perf_hif_rd * DDRC_WIDTH * DDRC_Freq / DDRC_Cycle -- cgit v1.2.3-70-g09d2 From 1134f290d07c251969d731781863dc513d81d049 Mon Sep 17 00:00:00 2001 From: Ian Rogers Date: Thu, 27 Jul 2023 23:49:12 -0700 Subject: perf bpf-loader: Remove unneeded diagnostic pragma Added during the progress to libbpf 1.0 the deprecated functions are no longer used and so the pragma can be removed. Signed-off-by: Ian Rogers Acked-by: James Clark Cc: Adrian Hunter Cc: Alexander Shishkin Cc: Andrii Nakryiko Cc: Eduard Zingerman Cc: Gaosheng Cui Cc: Ingo Molnar Cc: Jiri Olsa Cc: Kan Liang Cc: Mark Rutland Cc: Namhyung Kim Cc: Nathan Chancellor Cc: Nick Desaulniers Cc: Peter Zijlstra Cc: Rob Herring Cc: Tom Rix Cc: bpf@vger.kernel.org Cc: llvm@lists.linux.dev Link: https://lore.kernel.org/r/20230728064917.767761-2-irogers@google.com Signed-off-by: Arnaldo Carvalho de Melo --- tools/perf/util/bpf-loader.c | 3 --- 1 file changed, 3 deletions(-) diff --git a/tools/perf/util/bpf-loader.c b/tools/perf/util/bpf-loader.c index 44cde27d6389..8f4c76f2265a 100644 --- a/tools/perf/util/bpf-loader.c +++ b/tools/perf/util/bpf-loader.c @@ -32,9 +32,6 @@ #include -/* temporarily disable libbpf deprecation warnings */ -#pragma GCC diagnostic ignored "-Wdeprecated-declarations" - static int libbpf_perf_print(enum libbpf_print_level level __attribute__((unused)), const char *fmt, va_list args) { -- cgit v1.2.3-70-g09d2 From 435bea0a45cb309772b22c56b2d570f743f655e0 Mon Sep 17 00:00:00 2001 From: Ian Rogers Date: Thu, 27 Jul 2023 23:49:13 -0700 Subject: perf build: Don't always set -funwind-tables and -ggdb3 Commit 6a40cd90f5deb6de ("perf tools: Add libunwind dependency for DWARF CFI unwinding") added libunwind support but also -funwind-tables and -ggdb3 to the standard build. These build flags aren't necessary so remove, set -g when DEBUG is enabled for the build. Signed-off-by: Ian Rogers Acked-by: James Clark Cc: Adrian Hunter Cc: Alexander Shishkin Cc: Andrii Nakryiko Cc: Eduard Zingerman Cc: Gaosheng Cui Cc: Ingo Molnar Cc: Jiri Olsa Cc: Kan Liang Cc: Mark Rutland Cc: Namhyung Kim Cc: Nathan Chancellor Cc: Nick Desaulniers Cc: Peter Zijlstra Cc: Rob Herring Cc: Tom Rix Cc: bpf@vger.kernel.org Cc: llvm@lists.linux.dev Link: https://lore.kernel.org/r/20230728064917.767761-3-irogers@google.com Signed-off-by: Arnaldo Carvalho de Melo --- tools/perf/Makefile.config | 7 +++---- 1 file changed, 3 insertions(+), 4 deletions(-) diff --git a/tools/perf/Makefile.config b/tools/perf/Makefile.config index a9cfe83638a9..14709a6bd622 100644 --- a/tools/perf/Makefile.config +++ b/tools/perf/Makefile.config @@ -246,6 +246,9 @@ ifeq ($(CC_NO_CLANG), 0) else CORE_CFLAGS += -O6 endif +else + CORE_CFLAGS += -g + CXXFLAGS += -g endif ifdef PARSER_DEBUG @@ -324,8 +327,6 @@ FEATURE_CHECK_LDFLAGS-disassembler-four-args = -lbfd -lopcodes -ldl FEATURE_CHECK_LDFLAGS-disassembler-init-styled = -lbfd -lopcodes -ldl CORE_CFLAGS += -fno-omit-frame-pointer -CORE_CFLAGS += -ggdb3 -CORE_CFLAGS += -funwind-tables CORE_CFLAGS += -Wall CORE_CFLAGS += -Wextra CORE_CFLAGS += -std=gnu11 @@ -333,8 +334,6 @@ CORE_CFLAGS += -std=gnu11 CXXFLAGS += -std=gnu++14 -fno-exceptions -fno-rtti CXXFLAGS += -Wall CXXFLAGS += -fno-omit-frame-pointer -CXXFLAGS += -ggdb3 -CXXFLAGS += -funwind-tables CXXFLAGS += -Wno-strict-aliasing HOSTCFLAGS += -Wall -- cgit v1.2.3-70-g09d2 From e5764ae4c9714e58fa3c38a7e4900d4379dc2263 Mon Sep 17 00:00:00 2001 From: Ian Rogers Date: Thu, 27 Jul 2023 23:49:14 -0700 Subject: perf build: Add Wextra for C++ compilation Commit d58ac0bf8d1e ("perf build: Add clang and llvm compile and linking support") added -Wall and -Wno-strict-aliasing for CXXFLAGS, but not -Wextra. -Wno-strict-aliasing is no longer necessary, adding -Wextra for CXXFLAGS requires adding -Wno-unused-parameter clang.cpp and clang-test.cpp for LIBCLANGLLVM=1 to build. Signed-off-by: Ian Rogers Acked-by: James Clark Cc: Adrian Hunter Cc: Alexander Shishkin Cc: Andrii Nakryiko Cc: Eduard Zingerman Cc: Gaosheng Cui Cc: Ingo Molnar Cc: Jiri Olsa Cc: Kan Liang Cc: Mark Rutland Cc: Namhyung Kim Cc: Nathan Chancellor Cc: Nick Desaulniers Cc: Peter Zijlstra Cc: Rob Herring Cc: Tom Rix Cc: bpf@vger.kernel.org Cc: llvm@lists.linux.dev Link: https://lore.kernel.org/r/20230728064917.767761-4-irogers@google.com Signed-off-by: Arnaldo Carvalho de Melo --- tools/perf/Makefile.config | 2 +- tools/perf/util/c++/Build | 3 +++ 2 files changed, 4 insertions(+), 1 deletion(-) diff --git a/tools/perf/Makefile.config b/tools/perf/Makefile.config index 14709a6bd622..fe7afe6d8529 100644 --- a/tools/perf/Makefile.config +++ b/tools/perf/Makefile.config @@ -333,8 +333,8 @@ CORE_CFLAGS += -std=gnu11 CXXFLAGS += -std=gnu++14 -fno-exceptions -fno-rtti CXXFLAGS += -Wall +CXXFLAGS += -Wextra CXXFLAGS += -fno-omit-frame-pointer -CXXFLAGS += -Wno-strict-aliasing HOSTCFLAGS += -Wall HOSTCFLAGS += -Wextra diff --git a/tools/perf/util/c++/Build b/tools/perf/util/c++/Build index 613ecfd76527..8610d032ac19 100644 --- a/tools/perf/util/c++/Build +++ b/tools/perf/util/c++/Build @@ -1,2 +1,5 @@ perf-$(CONFIG_CLANGLLVM) += clang.o perf-$(CONFIG_CLANGLLVM) += clang-test.o + +CXXFLAGS_clang.o += -Wno-unused-parameter +CXXFLAGS_clang-test.o += -Wno-unused-parameter -- cgit v1.2.3-70-g09d2 From a9b451509565d40a5ca3b41c39a2b758cdbc5355 Mon Sep 17 00:00:00 2001 From: Arnaldo Carvalho de Melo Date: Mon, 31 Jul 2023 16:19:21 -0300 Subject: tools build: Add 3-component logical version comparators The next cset needs to compare if a flex version is greater or equal/less than another, but since there is no canonical, generally available way to compare versions in the command line (sort -V, yeah, but...), just use awk to canonicalize the versions like is also done in scripts/rust_is_available.sh. There was a problem spotted in linux-next where a bashism, here documents, aka the '<<<' stdin redirector, for strings to be used as the stdin for awk. Use $(shell echo | awk ...) instead. Cc: Adrian Hunter Cc: Ian Rogers Cc: Jiri Olsa Cc: Namhyung Kim Signed-off-by: Arnaldo Carvalho de Melo --- tools/scripts/utilities.mak | 20 ++++++++++++++++++++ 1 file changed, 20 insertions(+) diff --git a/tools/scripts/utilities.mak b/tools/scripts/utilities.mak index 172e47273b5d..d69d0345cc23 100644 --- a/tools/scripts/utilities.mak +++ b/tools/scripts/utilities.mak @@ -177,3 +177,23 @@ $(if $($(1)),$(call _ge_attempt,$($(1)),$(1)),$(call _ge_attempt,$(2))) endef _ge_attempt = $(or $(get-executable),$(call _gea_err,$(2))) _gea_err = $(if $(1),$(error Please set '$(1)' appropriately)) + +# version-ge3 +# +# Usage $(call version-ge3,2.6.4,$(FLEX_VERSION)) +# +# To compare if a 3 component version is greater or equal to another, first use +# was to check the flex version to see if we can use compiler warnings as +# errors for one of the cases flex generates code C compilers complains about. + +version-ge3 = $(shell echo "$(1).$(2)" | awk -F'.' '{ printf("%d\n", (10000000 * $$1 + 10000 * $$2 + $$3) >= (10000000 * $$4 + 10000 * $$5 + $$6)) }') + +# version-lt3 +# +# Usage $(call version-lt3,2.6.2,$(FLEX_VERSION)) +# +# To compare if a 3 component version is less thjan another, first use was to +# check the flex version to see if we can use compiler warnings as errors for +# one of the cases flex generates code C compilers complains about. + +version-lt3 = $(shell echo "$(1).$(2)" | awk -F'.' '{ printf("%d\n", (10000000 * $$1 + 10000 * $$2 + $$3) < (10000000 * $$4 + 10000 * $$5 + $$6)) }') -- cgit v1.2.3-70-g09d2 From 10c775afa5992d55be76fa40a6373a93751ba6b4 Mon Sep 17 00:00:00 2001 From: Ian Rogers Date: Thu, 27 Jul 2023 23:49:15 -0700 Subject: perf build: Disable fewer flex warnings If flex is version 2.6.4, reduce the number of flex C warnings disabled. Earlier flex versions have all C warnings disabled. Committer notes: Added this to the list of ignored warnings to get it building on a Fedora 36 machine with flex 2.6.4: -Wno-misleading-indentation Noticed when building with: $ make LLVM=1 -C tools/perf NO_BPF_SKEL=1 DEBUG=1 Take two: We can't just try to canonicalize flex versions by just removing the dots, as we end up with: 2.6.4 >= 2.5.37 becoming: 264 >= 2537 Failing the build on flex 2.5.37, so instead use the back to the past added $(call version_ge3,$(FLEX_VERSION),2.6.4) variant to check for that. Making sure $(FLEX_VERSION) keeps the dots as we may want to use 'sort -V' or something nicer when available everywhere. Some other tweaks for other flex versions and combinations with gcc and clang versions were added, notes on the patch. Signed-off-by: Ian Rogers Cc: Adrian Hunter Cc: Alexander Shishkin Cc: Andrii Nakryiko Cc: Eduard Zingerman Cc: Gaosheng Cui Cc: Ingo Molnar Cc: Jiri Olsa Cc: Kan Liang Cc: Mark Rutland Cc: Namhyung Kim Cc: Nathan Chancellor Cc: Nick Desaulniers Cc: Peter Zijlstra Cc: Rob Herring Cc: Tom Rix Cc: bpf@vger.kernel.org Cc: llvm@lists.linux.dev Link: https://lore.kernel.org/r/20230728064917.767761-5-irogers@google.com Signed-off-by: Arnaldo Carvalho de Melo --- tools/perf/util/Build | 24 ++++++++++++++++++------ 1 file changed, 18 insertions(+), 6 deletions(-) diff --git a/tools/perf/util/Build b/tools/perf/util/Build index bb08149179e4..15c69decb76b 100644 --- a/tools/perf/util/Build +++ b/tools/perf/util/Build @@ -1,3 +1,5 @@ +include $(srctree)/tools/scripts/utilities.mak + perf-y += arm64-frame-pointer-unwind-support.o perf-y += addr_location.o perf-y += annotate.o @@ -279,16 +281,26 @@ $(OUTPUT)util/bpf-filter-bison.c $(OUTPUT)util/bpf-filter-bison.h: util/bpf-filt $(Q)$(call echo-cmd,bison)$(BISON) -v $< -d $(PARSER_DEBUG_BISON) $(BISON_FILE_PREFIX_MAP) \ -o $(OUTPUT)util/bpf-filter-bison.c -p perf_bpf_filter_ -FLEX_GE_26 := $(shell expr $(shell $(FLEX) --version | sed -e 's/flex \([0-9]\+\).\([0-9]\+\)/\1\2/g') \>\= 26) -ifeq ($(FLEX_GE_26),1) - flex_flags := -Wno-switch-enum -Wno-switch-default -Wno-unused-function -Wno-redundant-decls -Wno-sign-compare -Wno-unused-parameter -Wno-missing-prototypes -Wno-missing-declarations - CC_HASNT_MISLEADING_INDENTATION := $(shell echo "int main(void) { return 0 }" | $(CC) -Werror -Wno-misleading-indentation -o /dev/null -xc - 2>&1 | grep -q -- -Wno-misleading-indentation ; echo $$?) - ifeq ($(CC_HASNT_MISLEADING_INDENTATION), 1) - flex_flags += -Wno-misleading-indentation +FLEX_VERSION := $(shell $(FLEX) --version | cut -d' ' -f2) + +FLEX_GE_260 := $(call version-ge3,$(FLEX_VERSION),2.6.0) +ifeq ($(FLEX_GE_260),1) + flex_flags := -Wno-redundant-decls -Wno-switch-default -Wno-unused-function -Wno-misleading-indentation + + # Some newer clang and gcc version complain about this + # util/parse-events-bison.c:1317:9: error: variable 'parse_events_nerrs' set but not used [-Werror,-Wunused-but-set-variable] + # int yynerrs = 0; + + flex_flags += -Wno-unused-but-set-variable + + FLEX_LT_262 := $(call version-lt3,$(FLEX_VERSION),2.6.2) + ifeq ($(FLEX_LT_262),1) + flex_flags += -Wno-sign-compare endif else flex_flags := -w endif + CFLAGS_parse-events-flex.o += $(flex_flags) CFLAGS_pmu-flex.o += $(flex_flags) CFLAGS_expr-flex.o += $(flex_flags) -- cgit v1.2.3-70-g09d2 From ddc8e4c966923ad1137790817157c8a5f0301aec Mon Sep 17 00:00:00 2001 From: Ian Rogers Date: Thu, 27 Jul 2023 23:49:16 -0700 Subject: perf build: Disable fewer bison warnings If bison is version 3.8.2, reduce the number of bison C warnings disabled. Earlier bison versions have all C warnings disabled. Avoid implicit declarations of yylex by adding the declaration in the C file. A header can't be included as a circular dependency would occur due to the lexer using the bison defined tokens. Committer notes: Some recent versions of gcc and clang (noticed on Alpine Linux 3.17, edge, clearlinux, fedora 37, etc. Signed-off-by: Ian Rogers Cc: Adrian Hunter Cc: Alexander Shishkin Cc: Andrii Nakryiko Cc: Eduard Zingerman Cc: Gaosheng Cui Cc: Ingo Molnar Cc: Jiri Olsa Cc: Kan Liang Cc: Mark Rutland Cc: Namhyung Kim Cc: Nathan Chancellor Cc: Nick Desaulniers Cc: Peter Zijlstra Cc: Rob Herring Cc: Tom Rix Cc: bpf@vger.kernel.org Cc: llvm@lists.linux.dev Link: https://lore.kernel.org/r/20230728064917.767761-6-irogers@google.com Signed-off-by: Arnaldo Carvalho de Melo --- tools/perf/util/Build | 12 ++++++++---- tools/perf/util/bpf-filter.y | 2 ++ tools/perf/util/expr.y | 4 +++- tools/perf/util/parse-events.y | 1 + tools/perf/util/pmu.y | 3 +++ 5 files changed, 17 insertions(+), 5 deletions(-) diff --git a/tools/perf/util/Build b/tools/perf/util/Build index 15c69decb76b..1acfbbd1f39a 100644 --- a/tools/perf/util/Build +++ b/tools/perf/util/Build @@ -306,10 +306,14 @@ CFLAGS_pmu-flex.o += $(flex_flags) CFLAGS_expr-flex.o += $(flex_flags) CFLAGS_bpf-filter-flex.o += $(flex_flags) -bison_flags := -DYYENABLE_NLS=0 -BISON_GE_35 := $(shell expr $(shell $(BISON) --version | grep bison | sed -e 's/.\+ \([0-9]\+\).\([0-9]\+\)/\1\2/g') \>\= 35) -ifeq ($(BISON_GE_35),1) - bison_flags += -Wno-unused-parameter -Wno-nested-externs -Wno-implicit-function-declaration -Wno-switch-enum -Wno-unused-but-set-variable -Wno-unknown-warning-option +# Some newer clang and gcc version complain about this +# util/parse-events-bison.c:1317:9: error: variable 'parse_events_nerrs' set but not used [-Werror,-Wunused-but-set-variable] +# int yynerrs = 0; + +bison_flags := -DYYENABLE_NLS=0 -Wno-unused-but-set-variable +BISON_GE_382 := $(shell expr $(shell $(BISON) --version | grep bison | sed -e 's/.\+ \([0-9]\+\).\([0-9]\+\).\([0-9]\+\)/\1\2\3/g') \>\= 382) +ifeq ($(BISON_GE_382),1) + bison_flags += -Wno-switch-enum else bison_flags += -w endif diff --git a/tools/perf/util/bpf-filter.y b/tools/perf/util/bpf-filter.y index 07d6c7926c13..5dfa948fc986 100644 --- a/tools/perf/util/bpf-filter.y +++ b/tools/perf/util/bpf-filter.y @@ -9,6 +9,8 @@ #include #include "bpf-filter.h" +int perf_bpf_filter_lex(void); + static void perf_bpf_filter_error(struct list_head *expr __maybe_unused, char const *msg) { diff --git a/tools/perf/util/expr.y b/tools/perf/util/expr.y index dd504afd8f36..65d54a6f29ad 100644 --- a/tools/perf/util/expr.y +++ b/tools/perf/util/expr.y @@ -7,6 +7,8 @@ #include "util/debug.h" #define IN_EXPR_Y 1 #include "expr.h" +#include "expr-bison.h" +int expr_lex(YYSTYPE * yylval_param , void *yyscanner); %} %define api.pure full @@ -56,7 +58,7 @@ static void expr_error(double *final_val __maybe_unused, struct expr_parse_ctx *ctx __maybe_unused, bool compute_ids __maybe_unused, - void *scanner, + void *scanner __maybe_unused, const char *s) { pr_debug("%s\n", s); diff --git a/tools/perf/util/parse-events.y b/tools/perf/util/parse-events.y index 454577f7aff6..251b7d2fde32 100644 --- a/tools/perf/util/parse-events.y +++ b/tools/perf/util/parse-events.y @@ -20,6 +20,7 @@ #include "parse-events.h" #include "parse-events-bison.h" +int parse_events_lex(YYSTYPE * yylval_param, YYLTYPE * yylloc_param , void *yyscanner); void parse_events_error(YYLTYPE *loc, void *parse_state, void *scanner, char const *msg); #define PE_ABORT(val) \ diff --git a/tools/perf/util/pmu.y b/tools/perf/util/pmu.y index dff4e892ac4d..3d46cca3bb94 100644 --- a/tools/perf/util/pmu.y +++ b/tools/perf/util/pmu.y @@ -11,6 +11,9 @@ #include #include #include "pmu.h" +#include "pmu-bison.h" + +int perf_pmu_lex(YYSTYPE * yylval_param , void *yyscanner); #define ABORT_ON(val) \ do { \ -- cgit v1.2.3-70-g09d2 From f776b0435e8cf6e73359d4203c2bc7bf2cf4b2af Mon Sep 17 00:00:00 2001 From: Ian Rogers Date: Thu, 27 Jul 2023 23:49:17 -0700 Subject: perf build: Remove -Wno-redundant-decls in 2 cases Properly fix a warning and remove the -Wno-redundant-decls C flag. Signed-off-by: Ian Rogers Cc: Adrian Hunter Cc: Alexander Shishkin Cc: Andrii Nakryiko Cc: Eduard Zingerman Cc: Gaosheng Cui Cc: Ingo Molnar Cc: Jiri Olsa Cc: Kan Liang Cc: Mark Rutland Cc: Namhyung Kim Cc: Nathan Chancellor Cc: Nick Desaulniers Cc: Peter Zijlstra Cc: Rob Herring Cc: Tom Rix Cc: bpf@vger.kernel.org Cc: llvm@lists.linux.dev Link: https://lore.kernel.org/r/20230728064917.767761-7-irogers@google.com Signed-off-by: Arnaldo Carvalho de Melo --- tools/perf/util/Build | 2 -- tools/perf/util/parse-events.c | 1 - 2 files changed, 3 deletions(-) diff --git a/tools/perf/util/Build b/tools/perf/util/Build index 1acfbbd1f39a..d487aec0b458 100644 --- a/tools/perf/util/Build +++ b/tools/perf/util/Build @@ -338,8 +338,6 @@ CFLAGS_find_bit.o += -Wno-unused-parameter -DETC_PERFCONFIG="BUILD_STR($(ET CFLAGS_rbtree.o += -Wno-unused-parameter -DETC_PERFCONFIG="BUILD_STR($(ETC_PERFCONFIG_SQ))" CFLAGS_libstring.o += -Wno-unused-parameter -DETC_PERFCONFIG="BUILD_STR($(ETC_PERFCONFIG_SQ))" CFLAGS_hweight.o += -Wno-unused-parameter -DETC_PERFCONFIG="BUILD_STR($(ETC_PERFCONFIG_SQ))" -CFLAGS_parse-events.o += -Wno-redundant-decls -CFLAGS_expr.o += -Wno-redundant-decls CFLAGS_header.o += -include $(OUTPUT)PERF-VERSION-FILE CFLAGS_arm-spe.o += -I$(srctree)/tools/arch/arm64/include/ diff --git a/tools/perf/util/parse-events.c b/tools/perf/util/parse-events.c index 926d3ac97324..ac315e1be2bc 100644 --- a/tools/perf/util/parse-events.c +++ b/tools/perf/util/parse-events.c @@ -35,7 +35,6 @@ #ifdef PARSER_DEBUG extern int parse_events_debug; #endif -int parse_events_parse(void *parse_state, void *scanner); static int get_config_terms(struct list_head *head_config, struct list_head *head_terms __maybe_unused); -- cgit v1.2.3-70-g09d2 From 7822a8913f4c51c7d1aff793b525d60c3384fb5b Mon Sep 17 00:00:00 2001 From: Namhyung Kim Date: Thu, 27 Jul 2023 19:24:46 -0700 Subject: perf build: Update build rule for generated files The bison and flex generate C files from the source (.y and .l) files. When O= option is used, they are saved in a separate directory but the default build rule assumes the .C files are in the source directory. So it might read invalid file if there are generated files from an old version. The same is true for the pmu-events files. For example, the following command would cause a build failure: $ git checkout v6.3 $ make -C tools/perf # build in the same directory $ git checkout v6.5-rc2 $ mkdir build # create a build directory $ make -C tools/perf O=build # build in a different directory but it # refers files in the source directory Let's update the build rule to specify those cases explicitly to depend on the files in the output directory. Note that it's not a complete fix and it needs the next patch for the include path too. Fixes: 80eeb67fe577aa76 ("perf jevents: Program to convert JSON file") Signed-off-by: Namhyung Kim Cc: Adrian Hunter Cc: Andi Kleen Cc: Anup Sharma Cc: Ian Rogers Cc: Ingo Molnar Cc: Jiri Olsa Cc: Peter Zijlstra Cc: stable@vger.kernel.org Link: https://lore.kernel.org/r/20230728022447.1323563-1-namhyung@kernel.org Signed-off-by: Arnaldo Carvalho de Melo --- tools/build/Makefile.build | 10 ++++++++++ tools/perf/pmu-events/Build | 6 ++++++ 2 files changed, 16 insertions(+) diff --git a/tools/build/Makefile.build b/tools/build/Makefile.build index 89430338a3d9..fac42486a8cf 100644 --- a/tools/build/Makefile.build +++ b/tools/build/Makefile.build @@ -117,6 +117,16 @@ $(OUTPUT)%.s: %.c FORCE $(call rule_mkdir) $(call if_changed_dep,cc_s_c) +# bison and flex files are generated in the OUTPUT directory +# so it needs a separate rule to depend on them properly +$(OUTPUT)%-bison.o: $(OUTPUT)%-bison.c FORCE + $(call rule_mkdir) + $(call if_changed_dep,$(host)cc_o_c) + +$(OUTPUT)%-flex.o: $(OUTPUT)%-flex.c FORCE + $(call rule_mkdir) + $(call if_changed_dep,$(host)cc_o_c) + # Gather build data: # obj-y - list of build objects # subdir-y - list of directories to nest diff --git a/tools/perf/pmu-events/Build b/tools/perf/pmu-events/Build index 150765f2baee..1d18bb89402e 100644 --- a/tools/perf/pmu-events/Build +++ b/tools/perf/pmu-events/Build @@ -35,3 +35,9 @@ $(PMU_EVENTS_C): $(JSON) $(JSON_TEST) $(JEVENTS_PY) $(METRIC_PY) $(METRIC_TEST_L $(call rule_mkdir) $(Q)$(call echo-cmd,gen)$(PYTHON) $(JEVENTS_PY) $(JEVENTS_ARCH) $(JEVENTS_MODEL) pmu-events/arch $@ endif + +# pmu-events.c file is generated in the OUTPUT directory so it needs a +# separate rule to depend on it properly +$(OUTPUT)pmu-events/pmu-events.o: $(PMU_EVENTS_C) + $(call rule_mkdir) + $(call if_changed_dep,cc_o_c) -- cgit v1.2.3-70-g09d2 From c7e97f215a4ad634b746804679f5937d25f77e29 Mon Sep 17 00:00:00 2001 From: Namhyung Kim Date: Thu, 27 Jul 2023 19:24:47 -0700 Subject: perf build: Include generated header files properly The flex and bison generate header files from the source. When user specified a build directory with O= option, it'd generate files under the directory. The build command has -I option to specify the header include directory. But the -I option only affects the files included like <...>. Let's change the flex and bison headers to use it instead of "...". Fixes: 80eeb67fe577aa76 ("perf jevents: Program to convert JSON file") Signed-off-by: Namhyung Kim Cc: Adrian Hunter Cc: Andi Kleen Cc: Anup Sharma Cc: Ian Rogers Cc: Ingo Molnar Cc: Jiri Olsa Cc: Peter Zijlstra Cc: stable@vger.kernel.org Link: https://lore.kernel.org/r/20230728022447.1323563-2-namhyung@kernel.org Signed-off-by: Arnaldo Carvalho de Melo --- tools/perf/pmu-events/jevents.py | 2 +- tools/perf/util/bpf-filter.c | 4 ++-- tools/perf/util/expr.c | 4 ++-- tools/perf/util/parse-events.c | 4 ++-- tools/perf/util/pmu.c | 4 ++-- 5 files changed, 9 insertions(+), 9 deletions(-) diff --git a/tools/perf/pmu-events/jevents.py b/tools/perf/pmu-events/jevents.py index 08ec9aa583e7..8cd561aa606a 100755 --- a/tools/perf/pmu-events/jevents.py +++ b/tools/perf/pmu-events/jevents.py @@ -1000,7 +1000,7 @@ such as "arm/cortex-a34".''', _args = ap.parse_args() _args.output_file.write(""" -#include "pmu-events/pmu-events.h" +#include #include "util/header.h" #include "util/pmu.h" #include diff --git a/tools/perf/util/bpf-filter.c b/tools/perf/util/bpf-filter.c index 0b30688d78a7..47f01df658d9 100644 --- a/tools/perf/util/bpf-filter.c +++ b/tools/perf/util/bpf-filter.c @@ -9,8 +9,8 @@ #include "util/evsel.h" #include "util/bpf-filter.h" -#include "util/bpf-filter-flex.h" -#include "util/bpf-filter-bison.h" +#include +#include #include "bpf_skel/sample-filter.h" #include "bpf_skel/sample_filter.skel.h" diff --git a/tools/perf/util/expr.c b/tools/perf/util/expr.c index 4814262e3805..7410a165f68b 100644 --- a/tools/perf/util/expr.c +++ b/tools/perf/util/expr.c @@ -10,8 +10,8 @@ #include "debug.h" #include "evlist.h" #include "expr.h" -#include "expr-bison.h" -#include "expr-flex.h" +#include +#include #include "util/hashmap.h" #include "smt.h" #include "tsc.h" diff --git a/tools/perf/util/parse-events.c b/tools/perf/util/parse-events.c index ac315e1be2bc..acddb2542b1a 100644 --- a/tools/perf/util/parse-events.c +++ b/tools/perf/util/parse-events.c @@ -18,8 +18,8 @@ #include "debug.h" #include #include -#include "parse-events-bison.h" -#include "parse-events-flex.h" +#include +#include #include "pmu.h" #include "pmus.h" #include "asm/bug.h" diff --git a/tools/perf/util/pmu.c b/tools/perf/util/pmu.c index 7f984a7f16ca..b6654b9f55d2 100644 --- a/tools/perf/util/pmu.c +++ b/tools/perf/util/pmu.c @@ -19,8 +19,8 @@ #include "evsel.h" #include "pmu.h" #include "pmus.h" -#include "pmu-bison.h" -#include "pmu-flex.h" +#include +#include #include "parse-events.h" #include "print-events.h" #include "header.h" -- cgit v1.2.3-70-g09d2 From c76a1444c00ea6900a5e7d10065d6e93b0e104f9 Mon Sep 17 00:00:00 2001 From: Ian Rogers Date: Thu, 27 Jul 2023 17:12:10 -0700 Subject: perf parse-event: Avoid BPF test SEGV loc is passed as NULL in tools/perf/tests/bpf.c do_test, meaning errors trigger a SEGV when trying to access. Add the missing NULL check. Signed-off-by: Ian Rogers Cc: Adrian Hunter Cc: Alexander Shishkin Cc: Eduard Zingerman Cc: He Kuang Cc: Ingo Molnar Cc: Jiri Olsa Cc: Kan Liang Cc: Mark Rutland Cc: Namhyung Kim Cc: Peter Zijlstra Cc: Rob Herring Cc: Wang Nan Cc: Wang ShaoBo Cc: YueHaibing Cc: bpf@vger.kernel.org Link: https://lore.kernel.org/r/20230728001212.457900-2-irogers@google.com Signed-off-by: Arnaldo Carvalho de Melo --- tools/perf/util/parse-events.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/tools/perf/util/parse-events.c b/tools/perf/util/parse-events.c index acddb2542b1a..e17e96b4f798 100644 --- a/tools/perf/util/parse-events.c +++ b/tools/perf/util/parse-events.c @@ -757,7 +757,7 @@ int parse_events_load_bpf_obj(struct parse_events_state *parse_state, return 0; errout: - parse_events_error__handle(parse_state->error, param.loc->first_column, + parse_events_error__handle(parse_state->error, param.loc ? param.loc->first_column : 0, strdup(errbuf), strdup("(add -v to see detail)")); return err; } -- cgit v1.2.3-70-g09d2 From 30f4ade33d649aa0e8603386721f184ad9d3cb55 Mon Sep 17 00:00:00 2001 From: Ian Rogers Date: Thu, 27 Jul 2023 17:12:11 -0700 Subject: perf tools: Revert enable indices setting syntax for BPF map This reverts commit e571e029bdbf ("perf tools: Enable indices setting syntax for BPF map"). The reverted commit added a notion of arrays that could be set as event terms for BPF events. The parsing hasn't worked over multiple Linux releases. Given the broken nature of the parsing it appears the code isn't in use, nor could I find a way for it to be used to add a test. The original commit contains a test in the commit message, however, running it yields: ``` $ perf record -e './test_bpf_map_3.c/map:channel.value[0,1,2,3...5]=101/' usleep 2 event syntax error: '..pf_map_3.c/map:channel.value[0,1,2,3...5]=101/' \___ parser error Run 'perf list' for a list of valid events Usage: perf record [] [] or: perf record [] -- [] -e, --event event selector. use 'perf list' to list available events ``` Given the code can't be used this commit reverts and removes it. Signed-off-by: Ian Rogers Cc: Adrian Hunter Cc: Alexander Shishkin Cc: Eduard Zingerman Cc: He Kuang Cc: Ingo Molnar Cc: Jiri Olsa Cc: Kan Liang Cc: Mark Rutland Cc: Namhyung Kim Cc: Peter Zijlstra Cc: Rob Herring Cc: Wang Nan Cc: Wang ShaoBo Cc: YueHaibing Cc: bpf@vger.kernel.org Link: https://lore.kernel.org/r/20230728001212.457900-3-irogers@google.com Signed-off-by: Arnaldo Carvalho de Melo --- tools/perf/util/parse-events.c | 8 +-- tools/perf/util/parse-events.l | 11 ---- tools/perf/util/parse-events.y | 122 ----------------------------------------- 3 files changed, 1 insertion(+), 140 deletions(-) diff --git a/tools/perf/util/parse-events.c b/tools/perf/util/parse-events.c index e17e96b4f798..37614dc1d698 100644 --- a/tools/perf/util/parse-events.c +++ b/tools/perf/util/parse-events.c @@ -799,13 +799,7 @@ parse_events_config_bpf(struct parse_events_state *parse_state, parse_events_error__handle(parse_state->error, idx, strdup(errbuf), - strdup( -"Hint:\tValid config terms:\n" -" \tmap:[].value=[value]\n" -" \tmap:[].event=[event]\n" -"\n" -" \twhere is something like [0,3...5] or [all]\n" -" \t(add -v to see detail)")); + NULL); return err; } } diff --git a/tools/perf/util/parse-events.l b/tools/perf/util/parse-events.l index 99335ec586ae..d7d084cc4140 100644 --- a/tools/perf/util/parse-events.l +++ b/tools/perf/util/parse-events.l @@ -175,7 +175,6 @@ do { \ %x mem %s config %x event -%x array group [^,{}/]*[{][^}]*[}][^,{}/]* event_pmu [^,{}/]+[/][^/]*[/][^,{}/]* @@ -251,14 +250,6 @@ non_digit [^0-9] } } -{ -"]" { BEGIN(config); return ']'; } -{num_dec} { return value(yyscanner, 10); } -{num_hex} { return value(yyscanner, 16); } -, { return ','; } -"\.\.\." { return PE_ARRAY_RANGE; } -} - { /* * Please update config_term_names when new static term is added. @@ -302,8 +293,6 @@ r0x{num_raw_hex} { return str(yyscanner, PE_RAW); } {lc_type}-{lc_op_result} { return lc_str(yyscanner, _parse_state); } {lc_type}-{lc_op_result}-{lc_op_result} { return lc_str(yyscanner, _parse_state); } {name_minus} { return str(yyscanner, PE_NAME); } -\[all\] { return PE_ARRAY_ALL; } -"[" { BEGIN(array); return '['; } @{drv_cfg_term} { return drv_str(yyscanner, PE_DRV_CFG_TERM); } } diff --git a/tools/perf/util/parse-events.y b/tools/perf/util/parse-events.y index 251b7d2fde32..c3517e3498d7 100644 --- a/tools/perf/util/parse-events.y +++ b/tools/perf/util/parse-events.y @@ -65,7 +65,6 @@ static void free_list_evsel(struct list_head* list_evsel) %token PE_LEGACY_CACHE %token PE_PREFIX_MEM %token PE_ERROR -%token PE_ARRAY_ALL PE_ARRAY_RANGE %token PE_DRV_CFG_TERM %token PE_TERM_HW %type PE_VALUE @@ -109,11 +108,6 @@ static void free_list_evsel(struct list_head* list_evsel) %type groups %destructor { free_list_evsel ($$); } %type tracepoint_name -%destructor { free ($$.sys); free ($$.event); } -%type array -%type array_term -%type array_terms -%destructor { free ($$.ranges); } %type PE_TERM_HW %destructor { free ($$.str); } @@ -128,7 +122,6 @@ static void free_list_evsel(struct list_head* list_evsel) char *sys; char *event; } tracepoint_name; - struct parse_events_array array; struct hardware_term { char *str; u64 num; @@ -879,121 +872,6 @@ PE_TERM $$ = term; } -| -name_or_raw array '=' name_or_legacy -{ - struct parse_events_term *term; - int err = parse_events_term__str(&term, PARSE_EVENTS__TERM_TYPE_USER, $1, $4, &@1, &@4); - - if (err) { - free($1); - free($4); - free($2.ranges); - PE_ABORT(err); - } - term->array = $2; - $$ = term; -} -| -name_or_raw array '=' PE_VALUE -{ - struct parse_events_term *term; - int err = parse_events_term__num(&term, PARSE_EVENTS__TERM_TYPE_USER, $1, $4, false, &@1, &@4); - - if (err) { - free($1); - free($2.ranges); - PE_ABORT(err); - } - term->array = $2; - $$ = term; -} -| -PE_DRV_CFG_TERM -{ - struct parse_events_term *term; - char *config = strdup($1); - int err; - - if (!config) - YYNOMEM; - err = parse_events_term__str(&term, PARSE_EVENTS__TERM_TYPE_DRV_CFG, config, $1, &@1, NULL); - if (err) { - free($1); - free(config); - PE_ABORT(err); - } - $$ = term; -} - -array: -'[' array_terms ']' -{ - $$ = $2; -} -| -PE_ARRAY_ALL -{ - $$.nr_ranges = 0; - $$.ranges = NULL; -} - -array_terms: -array_terms ',' array_term -{ - struct parse_events_array new_array; - - new_array.nr_ranges = $1.nr_ranges + $3.nr_ranges; - new_array.ranges = realloc($1.ranges, - sizeof(new_array.ranges[0]) * - new_array.nr_ranges); - if (!new_array.ranges) - YYNOMEM; - memcpy(&new_array.ranges[$1.nr_ranges], $3.ranges, - $3.nr_ranges * sizeof(new_array.ranges[0])); - free($3.ranges); - $$ = new_array; -} -| -array_term - -array_term: -PE_VALUE -{ - struct parse_events_array array; - - array.nr_ranges = 1; - array.ranges = malloc(sizeof(array.ranges[0])); - if (!array.ranges) - YYNOMEM; - array.ranges[0].start = $1; - array.ranges[0].length = 1; - $$ = array; -} -| -PE_VALUE PE_ARRAY_RANGE PE_VALUE -{ - struct parse_events_array array; - - if ($3 < $1) { - struct parse_events_state *parse_state = _parse_state; - struct parse_events_error *error = parse_state->error; - char *err_str; - - if (asprintf(&err_str, "Expected '%ld' to be less-than '%ld'", $3, $1) < 0) - err_str = NULL; - - parse_events_error__handle(error, @1.first_column, err_str, NULL); - YYABORT; - } - array.nr_ranges = 1; - array.ranges = malloc(sizeof(array.ranges[0])); - if (!array.ranges) - YYNOMEM; - array.ranges[0].start = $1; - array.ranges[0].length = $3 - $1 + 1; - $$ = array; -} sep_dc: ':' | -- cgit v1.2.3-70-g09d2 From c9b57eb8dcb097e32f9a73f88f6d13c57ce65b4d Mon Sep 17 00:00:00 2001 From: Ian Rogers Date: Thu, 27 Jul 2023 17:12:12 -0700 Subject: perf parse-events: Remove array remnants parse_events_array was set up by event term parsing, which no longer exists. Remove this struct and references to it. Signed-off-by: Ian Rogers Cc: Adrian Hunter Cc: Alexander Shishkin Cc: Eduard Zingerman Cc: He Kuang Cc: Ingo Molnar Cc: Jiri Olsa Cc: Kan Liang Cc: Mark Rutland Cc: Namhyung Kim Cc: Peter Zijlstra Cc: Rob Herring Cc: Wang Nan Cc: Wang ShaoBo Cc: YueHaibing Cc: bpf@vger.kernel.org Link: https://lore.kernel.org/r/20230728001212.457900-4-irogers@google.com Signed-off-by: Arnaldo Carvalho de Melo --- tools/perf/util/bpf-loader.c | 101 ----------------------------------------- tools/perf/util/parse-events.c | 8 ---- tools/perf/util/parse-events.h | 10 ---- 3 files changed, 119 deletions(-) diff --git a/tools/perf/util/bpf-loader.c b/tools/perf/util/bpf-loader.c index 8f4c76f2265a..50e42698cbb7 100644 --- a/tools/perf/util/bpf-loader.c +++ b/tools/perf/util/bpf-loader.c @@ -1088,16 +1088,12 @@ enum bpf_map_op_type { enum bpf_map_key_type { BPF_MAP_KEY_ALL, - BPF_MAP_KEY_RANGES, }; struct bpf_map_op { struct list_head list; enum bpf_map_op_type op_type; enum bpf_map_key_type key_type; - union { - struct parse_events_array array; - } k; union { u64 value; struct evsel *evsel; @@ -1113,8 +1109,6 @@ bpf_map_op__delete(struct bpf_map_op *op) { if (!list_empty(&op->list)) list_del_init(&op->list); - if (op->key_type == BPF_MAP_KEY_RANGES) - parse_events__clear_array(&op->k.array); free(op); } @@ -1193,18 +1187,6 @@ bpf_map_op_setkey(struct bpf_map_op *op, struct parse_events_term *term) if (!term) return 0; - if (term->array.nr_ranges) { - size_t memsz = term->array.nr_ranges * - sizeof(op->k.array.ranges[0]); - - op->k.array.ranges = memdup(term->array.ranges, memsz); - if (!op->k.array.ranges) { - pr_debug("Not enough memory to alloc indices for map\n"); - return -ENOMEM; - } - op->key_type = BPF_MAP_KEY_RANGES; - op->k.array.nr_ranges = term->array.nr_ranges; - } return 0; } @@ -1241,18 +1223,6 @@ bpf_map_op__clone(struct bpf_map_op *op) } INIT_LIST_HEAD(&newop->list); - if (op->key_type == BPF_MAP_KEY_RANGES) { - size_t memsz = op->k.array.nr_ranges * - sizeof(op->k.array.ranges[0]); - - newop->k.array.ranges = memdup(op->k.array.ranges, memsz); - if (!newop->k.array.ranges) { - pr_debug("Failed to alloc indices for map\n"); - free(newop); - return NULL; - } - } - return newop; } @@ -1453,40 +1423,6 @@ struct bpf_obj_config__map_func bpf_obj_config__map_funcs[] = { {"event", bpf_map__config_event}, }; -static int -config_map_indices_range_check(struct parse_events_term *term, - struct bpf_map *map, - const char *map_name) -{ - struct parse_events_array *array = &term->array; - unsigned int i; - - if (!array->nr_ranges) - return 0; - if (!array->ranges) { - pr_debug("ERROR: map %s: array->nr_ranges is %d but range array is NULL\n", - map_name, (int)array->nr_ranges); - return -BPF_LOADER_ERRNO__INTERNAL; - } - - if (!map) { - pr_debug("Map '%s' is invalid\n", map_name); - return -BPF_LOADER_ERRNO__INTERNAL; - } - - for (i = 0; i < array->nr_ranges; i++) { - unsigned int start = array->ranges[i].start; - size_t length = array->ranges[i].length; - unsigned int idx = start + length - 1; - - if (idx >= bpf_map__max_entries(map)) { - pr_debug("ERROR: index %d too large\n", idx); - return -BPF_LOADER_ERRNO__OBJCONF_MAP_IDX2BIG; - } - } - return 0; -} - static int bpf__obj_config_map(struct bpf_object *obj, struct parse_events_term *term, @@ -1522,12 +1458,6 @@ bpf__obj_config_map(struct bpf_object *obj, goto out; } - *key_scan_pos += strlen(map_opt); - err = config_map_indices_range_check(term, map, map_name); - if (err) - goto out; - *key_scan_pos -= strlen(map_opt); - for (i = 0; i < ARRAY_SIZE(bpf_obj_config__map_funcs); i++) { struct bpf_obj_config__map_func *func = &bpf_obj_config__map_funcs[i]; @@ -1576,7 +1506,6 @@ typedef int (*map_config_func_t)(const char *name, int map_fd, const struct bpf_map *map, struct bpf_map_op *op, void *pkey, void *arg); - static int foreach_key_array_all(map_config_func_t func, void *arg, const char *name, @@ -1597,32 +1526,6 @@ foreach_key_array_all(map_config_func_t func, return 0; } -static int -foreach_key_array_ranges(map_config_func_t func, void *arg, - const char *name, int map_fd, - const struct bpf_map *map, - struct bpf_map_op *op) -{ - unsigned int i, j; - int err; - - for (i = 0; i < op->k.array.nr_ranges; i++) { - unsigned int start = op->k.array.ranges[i].start; - size_t length = op->k.array.ranges[i].length; - - for (j = 0; j < length; j++) { - unsigned int idx = start + j; - - err = func(name, map_fd, map, op, &idx, arg); - if (err) { - pr_debug("ERROR: failed to insert value to %s[%u]\n", - name, idx); - return err; - } - } - } - return 0; -} static int bpf_map_config_foreach_key(struct bpf_map *map, @@ -1663,10 +1566,6 @@ bpf_map_config_foreach_key(struct bpf_map *map, err = foreach_key_array_all(func, arg, name, map_fd, map, op); break; - case BPF_MAP_KEY_RANGES: - err = foreach_key_array_ranges(func, arg, name, - map_fd, map, op); - break; default: pr_debug("ERROR: keytype for map '%s' invalid\n", name); diff --git a/tools/perf/util/parse-events.c b/tools/perf/util/parse-events.c index 37614dc1d698..52e9f062b26b 100644 --- a/tools/perf/util/parse-events.c +++ b/tools/perf/util/parse-events.c @@ -2716,9 +2716,6 @@ int parse_events_term__clone(struct parse_events_term **new, void parse_events_term__delete(struct parse_events_term *term) { - if (term->array.nr_ranges) - zfree(&term->array.ranges); - if (term->type_val != PARSE_EVENTS__TERM_TYPE_NUM) zfree(&term->val.str); @@ -2769,11 +2766,6 @@ void parse_events_terms__delete(struct list_head *terms) free(terms); } -void parse_events__clear_array(struct parse_events_array *a) -{ - zfree(&a->ranges); -} - void parse_events_evlist_error(struct parse_events_state *parse_state, int idx, const char *str) { diff --git a/tools/perf/util/parse-events.h b/tools/perf/util/parse-events.h index e59b33805886..b77ff619a623 100644 --- a/tools/perf/util/parse-events.h +++ b/tools/perf/util/parse-events.h @@ -81,17 +81,8 @@ enum { __PARSE_EVENTS__TERM_TYPE_NR, }; -struct parse_events_array { - size_t nr_ranges; - struct { - unsigned int start; - size_t length; - } *ranges; -}; - struct parse_events_term { char *config; - struct parse_events_array array; union { char *str; u64 num; @@ -162,7 +153,6 @@ int parse_events_term__clone(struct parse_events_term **new, void parse_events_term__delete(struct parse_events_term *term); void parse_events_terms__delete(struct list_head *terms); void parse_events_terms__purge(struct list_head *terms); -void parse_events__clear_array(struct parse_events_array *a); int parse_events__modifier_event(struct list_head *list, char *str, bool add); int parse_events__modifier_group(struct list_head *list, char *event_mod); int parse_events_name(struct list_head *list, const char *name); -- cgit v1.2.3-70-g09d2 From 11cb1ed477692320af116c543b47084cbb898026 Mon Sep 17 00:00:00 2001 From: Aditya Gupta Date: Tue, 25 Jul 2023 11:46:49 +0530 Subject: perf tests task_analyzer: Check perf build options for libtraceevent support Currently we depend on output of 'perf record -e "sched:sched_switch"', to check whether perf was built with libtraceevent support. Instead, a more straightforward approach can be to check the build options, using 'perf version --build-options', to check for libtraceevent support. When perf is compiled WITHOUT libtraceevent ('make NO_LIBTRACEEVENT=1'), 'perf version --build-options' outputs (output trimmed): ... libtraceevent: [ OFF ] # HAVE_LIBTRACEEVENT ... While, when perf is compiled WITH libtraceevent, 'perf version --build-options' outputs: ... libtraceevent: [ on ] # HAVE_LIBTRACEEVENT ... Committer notes: Removed one grep in the pipleline by combining the two into just one expression that covers the OFF + HAVE_LIBTRACEEVENT. Suggested-by: Ian Rogers Signed-off-by: Aditya Gupta Cc: Athira Rajeev Cc: Disha Goel Cc: Jiri Olsa Cc: Kajol Jain Cc: Madhavan Srinivasan Cc: Namhyung Kim Cc: linuxppc-dev@lists.ozlabs.org Link: https://lore.kernel.org/r/20230725061649.34937-1-adityag@linux.ibm.com Signed-off-by: Arnaldo Carvalho de Melo --- tools/perf/tests/shell/test_task_analyzer.sh | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/tools/perf/tests/shell/test_task_analyzer.sh b/tools/perf/tests/shell/test_task_analyzer.sh index 0095abbe20ca..92d15154ba79 100755 --- a/tools/perf/tests/shell/test_task_analyzer.sh +++ b/tools/perf/tests/shell/test_task_analyzer.sh @@ -52,7 +52,7 @@ find_str_or_fail() { # check if perf is compiled with libtraceevent support skip_no_probe_record_support() { - perf record -e "sched:sched_switch" -a -- sleep 1 2>&1 | grep "libtraceevent is necessary for tracepoint support" && return 2 + perf version --build-options | grep -q " OFF .* HAVE_LIBTRACEEVENT" && return 2 return 0 } -- cgit v1.2.3-70-g09d2 From 38beba673b43b168906fff54f59cf004d2eb8120 Mon Sep 17 00:00:00 2001 From: Akanksha J N Date: Sun, 9 Jul 2023 23:57:35 +0530 Subject: perf tests trace+probe_vfs_getname: Fix shellcheck warnings about word splitting/quoting Running shellcheck -S on probe_vfs_getname.sh, throws below warnings: Before fix: $ shellcheck -S warning trace+probe_vfs_getname.sh In trace+probe_vfs_getname.sh line 13: . $(dirname $0)/lib/probe.sh ^-----------^ SC2046 (warning): Quote this to prevent word splitting. In trace+probe_vfs_getname.sh line 18: . $(dirname $0)/lib/probe_vfs_getname.sh ^-----------^ SC2046 (warning): Quote this to prevent word splitting. In trace+probe_vfs_getname.sh line 21: evts=$(echo $(perf list syscalls:sys_enter_open* 2>/dev/null | grep -E 'open(at)? ' | sed -r 's/.*sys_enter_([a-z]+) +\[.*$/\1/') | sed 's/ /,/') ^-- SC2046 (warning): Quote this to prevent word splitting. Fix the shellcheck warnings by adding quotes to prevent word splitting. Signed-off-by: Akanksha J N Acked-by: Ian Rogers Cc: Disha Goel Cc: Jiri Olsa Cc: Madhavan Srinivasan Cc: Namhyung Kim Cc: linuxppc-dev@lists.ozlabs.org Link: https://lore.kernel.org/r/20230709182800.53002-2-atrajeev@linux.vnet.ibm.com Signed-off-by: Athira Rajeev Signed-off-by: Kajol Jain Signed-off-by: Arnaldo Carvalho de Melo --- tools/perf/tests/shell/trace+probe_vfs_getname.sh | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/tools/perf/tests/shell/trace+probe_vfs_getname.sh b/tools/perf/tests/shell/trace+probe_vfs_getname.sh index 0a4bac3dd77e..db2ff141f703 100755 --- a/tools/perf/tests/shell/trace+probe_vfs_getname.sh +++ b/tools/perf/tests/shell/trace+probe_vfs_getname.sh @@ -10,15 +10,15 @@ # SPDX-License-Identifier: GPL-2.0 # Arnaldo Carvalho de Melo , 2017 -. $(dirname $0)/lib/probe.sh +. "$(dirname $0)"/lib/probe.sh skip_if_no_perf_probe || exit 2 skip_if_no_perf_trace || exit 2 -. $(dirname $0)/lib/probe_vfs_getname.sh +. "$(dirname $0)"/lib/probe_vfs_getname.sh trace_open_vfs_getname() { - evts=$(echo $(perf list syscalls:sys_enter_open* 2>/dev/null | grep -E 'open(at)? ' | sed -r 's/.*sys_enter_([a-z]+) +\[.*$/\1/') | sed 's/ /,/') + evts="$(echo "$(perf list syscalls:sys_enter_open* 2>/dev/null | grep -E 'open(at)? ' | sed -r 's/.*sys_enter_([a-z]+) +\[.*$/\1/')" | sed ':a;N;s:\n:,:g')" perf trace -e $evts touch $file 2>&1 | \ grep -E " +[0-9]+\.[0-9]+ +\( +[0-9]+\.[0-9]+ ms\): +touch\/[0-9]+ open(at)?\((dfd: +CWD, +)?filename: +${file}, +flags: CREAT\|NOCTTY\|NONBLOCK\|WRONLY, +mode: +IRUGO\|IWUGO\) += +[0-9]+$" } -- cgit v1.2.3-70-g09d2 From 7b485d9468903dea570542dc814f778d268162f6 Mon Sep 17 00:00:00 2001 From: Kajol Jain Date: Sun, 9 Jul 2023 23:57:36 +0530 Subject: perf tests probe_vfs_getname: Fix shellcheck warnings about word splitting/quoting Running shellcheck on probe_vfs_getname.sh throws below warning: In tests/shell/probe_vfs_getname.sh line 7: . $(dirname $0)/lib/probe.sh ^-----------^ SC2046 (warning): Quote this to prevent word splitting. In tests/shell/probe_vfs_getname.sh line 11: . $(dirname $0)/lib/probe_vfs_getname.sh ^-----------^ SC2046 (warning): Quote this to prevent word splitting. Fixed the warning by adding quotes to avoid word splitting. ShellCheck result with patch: # shellcheck -S warning probe_vfs_getname.sh # Signed-off-by: Kajol Jain Acked-by: Ian Rogers Cc: Disha Goel Cc: Jiri Olsa Cc: Madhavan Srinivasan Cc: Namhyung Kim Cc: linuxppc-dev@lists.ozlabs.org Link: https://lore.kernel.org/r/20230709182800.53002-3-atrajeev@linux.vnet.ibm.com Signed-off-by: Athira Rajeev Signed-off-by: Arnaldo Carvalho de Melo --- tools/perf/tests/shell/probe_vfs_getname.sh | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/tools/perf/tests/shell/probe_vfs_getname.sh b/tools/perf/tests/shell/probe_vfs_getname.sh index 5d1b63d3f3e1..871243d6d03a 100755 --- a/tools/perf/tests/shell/probe_vfs_getname.sh +++ b/tools/perf/tests/shell/probe_vfs_getname.sh @@ -4,11 +4,11 @@ # SPDX-License-Identifier: GPL-2.0 # Arnaldo Carvalho de Melo , 2017 -. $(dirname $0)/lib/probe.sh +. "$(dirname $0)"/lib/probe.sh skip_if_no_perf_probe || exit 2 -. $(dirname $0)/lib/probe_vfs_getname.sh +. "$(dirname $0)"/lib/probe_vfs_getname.sh add_probe_vfs_getname || skip_if_no_debuginfo err=$? -- cgit v1.2.3-70-g09d2 From a225c3049791b1591debf07fa9f4377b2c331ecc Mon Sep 17 00:00:00 2001 From: Kajol Jain Date: Sun, 9 Jul 2023 23:57:37 +0530 Subject: perf tests record_offcpu: Fix shellcheck warnings about word splitting/quoting and signal names case Running shellcheck on record_offcpu.sh throws below warning: In tests/shell/record_offcpu.sh line 13: trap - exit term int ^--^ SC3049 (warning): In POSIX sh, using lower/mixed case for signal names is undefined. ^--^ SC3049 (warning): In POSIX sh, using lower/mixed case for signal names is undefined. ^-^ SC3049 (warning): In POSIX sh, using lower/mixed case for signal names is undefined. In tests/shell/record_offcpu.sh line 20: trap trap_cleanup exit term int ^--^ SC3049 (warning): In POSIX sh, using lower/mixed case for signal names is undefined. ^--^ SC3049 (warning): In POSIX sh, using lower/mixed case for signal names is undefined. ^-^ SC3049 (warning): In POSIX sh, using lower/mixed case for signal names is undefined. In tests/shell/record_offcpu.sh line 25: if [ `id -u` != 0 ] ^-----^ SC2046 (warning): Quote this to prevent word splitting. Fixed the warnings by: - Capitalize signals(INT, TERM, EXIT) to avoid mixed/lower case naming of signals. - Adding quotes to avoid word splitting. Result from shellcheck after patch changes: $ shellcheck -S warning record_offcpu.sh $ Signed-off-by: Kajol Jain Acked-by: Ian Rogers Cc: Disha Goel Cc: Jiri Olsa Cc: Madhavan Srinivasan Cc: Namhyung Kim Cc: linuxppc-dev@lists.ozlabs.org Link: https://lore.kernel.org/r/20230709182800.53002-4-atrajeev@linux.vnet.ibm.com Signed-off-by: Athira Rajeev Signed-off-by: Arnaldo Carvalho de Melo --- tools/perf/tests/shell/record_offcpu.sh | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/tools/perf/tests/shell/record_offcpu.sh b/tools/perf/tests/shell/record_offcpu.sh index f062ae9a95e1..a0d14cd0aa79 100755 --- a/tools/perf/tests/shell/record_offcpu.sh +++ b/tools/perf/tests/shell/record_offcpu.sh @@ -10,19 +10,19 @@ perfdata=$(mktemp /tmp/__perf_test.perf.data.XXXXX) cleanup() { rm -f ${perfdata} rm -f ${perfdata}.old - trap - exit term int + trap - EXIT TERM INT } trap_cleanup() { cleanup exit 1 } -trap trap_cleanup exit term int +trap trap_cleanup EXIT TERM INT test_offcpu_priv() { echo "Checking off-cpu privilege" - if [ `id -u` != 0 ] + if [ "$(id -u)" != 0 ] then echo "off-cpu test [Skipped permission]" err=2 -- cgit v1.2.3-70-g09d2 From edf197cb9da529ef854ba21311ddbaddc7098eba Mon Sep 17 00:00:00 2001 From: Kajol Jain Date: Sun, 9 Jul 2023 23:57:38 +0530 Subject: perf tests lock_contention: Fix shellcheck issue about quoting to avoid word splitting Running shellcheck on lock_contention.sh generates below warning: In tests/shell/lock_contention.sh line 24: if [ `id -u` != 0 ]; then ^-----^ SC2046 (warning): Quote this to prevent word splitting. In tests/shell/lock_contention.sh line 160: local type=$(head -1 "${result}" | awk '{ print $8 }' | sed -e 's/:.*//') ^--------^ SC3043 (warning): In POSIX sh, 'local' is undefined. ^--^ SC2155 (warning): Declare and assign separately to avoid masking return values. ^-- SC2046 (warning): Quote this to prevent word splitting. Fixed above warnings by: - Adding quotes to avoid word splitting. - Fixing shellcheck warnings for local usage, by prefixing function name to the variable. Signed-off-by: Kajol Jain Acked-by: Ian Rogers Cc: Disha Goel Cc: Jiri Olsa Cc: Madhavan Srinivasan Cc: Namhyung Kim Cc: linuxppc-dev@lists.ozlabs.org Link: https://lore.kernel.org/r/20230709182800.53002-5-atrajeev@linux.vnet.ibm.com Signed-off-by: Athira Rajeev Signed-off-by: Arnaldo Carvalho de Melo --- tools/perf/tests/shell/lock_contention.sh | 12 ++++++------ 1 file changed, 6 insertions(+), 6 deletions(-) diff --git a/tools/perf/tests/shell/lock_contention.sh b/tools/perf/tests/shell/lock_contention.sh index 4a194420416e..d120e83db7d9 100755 --- a/tools/perf/tests/shell/lock_contention.sh +++ b/tools/perf/tests/shell/lock_contention.sh @@ -21,7 +21,7 @@ trap_cleanup() { trap trap_cleanup EXIT TERM INT check() { - if [ `id -u` != 0 ]; then + if [ "$(id -u)" != 0 ]; then echo "[Skip] No root permission" err=2 exit @@ -157,10 +157,10 @@ test_lock_filter() perf lock contention -i ${perfdata} -L tasklist_lock -q 2> ${result} # find out the type of tasklist_lock - local type=$(head -1 "${result}" | awk '{ print $8 }' | sed -e 's/:.*//') + test_lock_filter_type=$(head -1 "${result}" | awk '{ print $8 }' | sed -e 's/:.*//') - if [ "$(grep -c -v "${type}" "${result}")" != "0" ]; then - echo "[Fail] Recorded result should not have non-${type} locks:" "$(cat "${result}")" + if [ "$(grep -c -v "${test_lock_filter_type}" "${result}")" != "0" ]; then + echo "[Fail] Recorded result should not have non-${test_lock_filter_type} locks:" "$(cat "${result}")" err=1 exit fi @@ -170,8 +170,8 @@ test_lock_filter() fi perf lock con -a -b -L tasklist_lock -q -- perf bench sched messaging > /dev/null 2> ${result} - if [ "$(grep -c -v "${type}" "${result}")" != "0" ]; then - echo "[Fail] BPF result should not have non-${type} locks:" "$(cat "${result}")" + if [ "$(grep -c -v "${test_lock_filter_type}" "${result}")" != "0" ]; then + echo "[Fail] BPF result should not have non-${test_lock_filter_type} locks:" "$(cat "${result}")" err=1 exit fi -- cgit v1.2.3-70-g09d2 From 0dd1f815545d7210150642741c364521cc5cf116 Mon Sep 17 00:00:00 2001 From: Kajol Jain Date: Sun, 9 Jul 2023 23:57:39 +0530 Subject: perf test stat_bpf_counters_cgrp: Fix shellcheck issue about logical operators Running shellcheck on lock_contention.sh generates below warning: In stat_bpf_counters_cgrp.sh line 28: if [ -d /sys/fs/cgroup/system.slice -a -d /sys/fs/cgroup/user.slice ]; then ^-- SC2166 (warning): Prefer [ p ] && [ q ] as [ p -a q ] is not well defined. In stat_bpf_counters_cgrp.sh line 34: local self_cgrp=$(grep perf_event /proc/self/cgroup | cut -d: -f3) ^-------------^ SC3043 (warning): In POSIX sh, 'local' is undefined. ^-------^ SC2155 (warning): Declare and assign separately to avoid masking return values. ^-- SC2046 (warning): Quote this to prevent word splitting. In stat_bpf_counters_cgrp.sh line 51: local output ^----------^ SC3043 (warning): In POSIX sh, 'local' is undefined. In stat_bpf_counters_cgrp.sh line 65: local output ^----------^ SC3043 (warning): In POSIX sh, 'local' is undefined. Fixed above warnings by: - Changing the expression [p -a q] to [p] && [q]. - Fixing shellcheck warnings for local usage, by prefixing function name to the variable. Signed-off-by: Kajol Jain Acked-by: Ian Rogers Cc: Disha Goel Cc: Jiri Olsa Cc: Madhavan Srinivasan Cc: Namhyung Kim Cc: linuxppc-dev@lists.ozlabs.org Link: https://lore.kernel.org/r/20230709182800.53002-6-atrajeev@linux.vnet.ibm.com Signed-off-by: Athira Rajeev Signed-off-by: Arnaldo Carvalho de Melo --- tools/perf/tests/shell/stat_bpf_counters_cgrp.sh | 28 ++++++++++-------------- 1 file changed, 12 insertions(+), 16 deletions(-) diff --git a/tools/perf/tests/shell/stat_bpf_counters_cgrp.sh b/tools/perf/tests/shell/stat_bpf_counters_cgrp.sh index d724855d097c..a74440a00b6b 100755 --- a/tools/perf/tests/shell/stat_bpf_counters_cgrp.sh +++ b/tools/perf/tests/shell/stat_bpf_counters_cgrp.sh @@ -25,22 +25,22 @@ check_bpf_counter() find_cgroups() { # try usual systemd slices first - if [ -d /sys/fs/cgroup/system.slice -a -d /sys/fs/cgroup/user.slice ]; then + if [ -d /sys/fs/cgroup/system.slice ] && [ -d /sys/fs/cgroup/user.slice ]; then test_cgroups="system.slice,user.slice" return fi # try root and self cgroups - local self_cgrp=$(grep perf_event /proc/self/cgroup | cut -d: -f3) - if [ -z ${self_cgrp} ]; then + find_cgroups_self_cgrp=$(grep perf_event /proc/self/cgroup | cut -d: -f3) + if [ -z ${find_cgroups_self_cgrp} ]; then # cgroup v2 doesn't specify perf_event - self_cgrp=$(grep ^0: /proc/self/cgroup | cut -d: -f3) + find_cgroups_self_cgrp=$(grep ^0: /proc/self/cgroup | cut -d: -f3) fi - if [ -z ${self_cgrp} ]; then + if [ -z ${find_cgroups_self_cgrp} ]; then test_cgroups="/" else - test_cgroups="/,${self_cgrp}" + test_cgroups="/,${find_cgroups_self_cgrp}" fi } @@ -48,13 +48,11 @@ find_cgroups() # Just check if it runs without failure and has non-zero results. check_system_wide_counted() { - local output - - output=$(perf stat -a --bpf-counters --for-each-cgroup ${test_cgroups} -e cpu-clock -x, sleep 1 2>&1) - if echo ${output} | grep -q -F "&1) + if echo ${check_system_wide_counted_output} | grep -q -F "&1) - if echo ${output} | grep -q -F "&1) + if echo ${check_cpu_list_counted_output} | grep -q -F " Date: Sun, 9 Jul 2023 23:57:40 +0530 Subject: perf tests: Address signal case issues detected via shellcheck Running shellcheck -S on test_arm_spe_fork.sh throws below warnings: In tests/shell/test_arm_spe_fork.sh line 25: trap cleanup_files exit term int ^--^ SC3049 (warning): In POSIX sh, using lower/mixed case for signal names is undefined. ^--^ SC3049 (warning): In POSIX sh, using lower/mixed case for signal names is undefined. ^-^ SC3049 (warning): In POSIX sh, using lower/mixed case for signal names is undefined. Fixed this issue by using uppercase for "EXIT", "TERM" and "INIT" signals to avoid using lower/mixed case for signal names as input. Signed-off-by: Athira Rajeev Acked-by: Ian Rogers Cc: Disha Goel Cc: Jiri Olsa Cc: Kajol Jain Cc: Madhavan Srinivasan Cc: Namhyung Kim Cc: linuxppc-dev@lists.ozlabs.org Link: https://lore.kernel.org/r/20230709182800.53002-7-atrajeev@linux.vnet.ibm.com Signed-off-by: Arnaldo Carvalho de Melo --- tools/perf/tests/shell/test_arm_spe_fork.sh | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/tools/perf/tests/shell/test_arm_spe_fork.sh b/tools/perf/tests/shell/test_arm_spe_fork.sh index fad361675a1d..1a7e6a82d0e3 100755 --- a/tools/perf/tests/shell/test_arm_spe_fork.sh +++ b/tools/perf/tests/shell/test_arm_spe_fork.sh @@ -22,7 +22,7 @@ cleanup_files() rm -f ${PERF_DATA} } -trap cleanup_files exit term int +trap cleanup_files EXIT TERM INT echo "Recording workload..." perf record -o ${PERF_DATA} -e arm_spe/period=65536/ -vvv -- $TEST_PROGRAM > ${PERF_RECORD_LOG} 2>&1 & -- cgit v1.2.3-70-g09d2 From faae152aa6d237001e55bfcb9026cadf9531fc3c Mon Sep 17 00:00:00 2001 From: Athira Rajeev Date: Sun, 9 Jul 2023 23:57:41 +0530 Subject: perf tests stat+csv_summary: Fix unused variable references detected via shellcheck Running shellcheck on stat+csv_summary.sh throws below warnings: In tests/shell/stat+csv_summary.sh line 26: while read num event run pct ^-^ SC2034 (warning): num appears unused. Verify use (or export if used externally). ^---^ SC2034 (warning): event appears unused. Verify use (or export if used externally). ^-^ SC2034 (warning): run appears unused. Verify use (or export if used externally). ^-^ SC2034 (warning): pct appears unused. Verify use (or export if used externally). These variables are intentionally unused since they are needed to parse through the output. Use "_" as a prefix for these throw away variables. Signed-off-by: Athira Rajeev Acked-by: Ian Rogers Cc: Disha Goel Cc: Jiri Olsa Cc: Kajol Jain Cc: Madhavan Srinivasan Cc: Namhyung Kim Cc: linuxppc-dev@lists.ozlabs.org Link: https://lore.kernel.org/r/20230709182800.53002-8-atrajeev@linux.vnet.ibm.com Signed-off-by: Arnaldo Carvalho de Melo --- tools/perf/tests/shell/stat+csv_summary.sh | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/tools/perf/tests/shell/stat+csv_summary.sh b/tools/perf/tests/shell/stat+csv_summary.sh index 5571ff75eb42..8bae9c8a835e 100755 --- a/tools/perf/tests/shell/stat+csv_summary.sh +++ b/tools/perf/tests/shell/stat+csv_summary.sh @@ -10,7 +10,7 @@ set -e # perf stat -e cycles -x' ' -I1000 --interval-count 1 --summary 2>&1 | \ grep -e summary | \ -while read summary num event run pct +while read summary _num _event _run _pct do if [ $summary != "summary" ]; then exit 1 @@ -23,7 +23,7 @@ done # perf stat -e cycles -x' ' -I1000 --interval-count 1 --summary --no-csv-summary 2>&1 | \ grep -e summary | \ -while read num event run pct +while read _num _event _run _pct do exit 1 done -- cgit v1.2.3-70-g09d2 From 38b3fa07f19e0c6b22ea1167d82b184bb0e0a830 Mon Sep 17 00:00:00 2001 From: Athira Rajeev Date: Sun, 9 Jul 2023 23:57:42 +0530 Subject: perf tests perf_dat _converter_json: Use quoting to avoid word splitting Running shellcheck on test_perf_data_converter_json.sh throws below warning: In tests/shell/test_perf_data_converter_json.sh line 42: if [ $(cat "${result}" | wc -l) -gt "0" ] ; then ^------------------------^ SC2046 (warning): Quote this to prevent word splitting. Fixed the warning by adding quotes to avoid word splitting. ShellCheck result with patch: # shellcheck -S warning test_perf_data_converter_json.sh # Signed-off-by: Athira Rajeev Acked-by: Ian Rogers Cc: Disha Goel Cc: Jiri Olsa Cc: Kajol Jain Cc: Madhavan Srinivasan Cc: Namhyung Kim Cc: linuxppc-dev@lists.ozlabs.org Link: https://lore.kernel.org/r/20230709182800.53002-9-atrajeev@linux.vnet.ibm.com Signed-off-by: Arnaldo Carvalho de Melo --- tools/perf/tests/shell/test_perf_data_converter_json.sh | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/tools/perf/tests/shell/test_perf_data_converter_json.sh b/tools/perf/tests/shell/test_perf_data_converter_json.sh index 72ac6c83231c..6ded58f98f55 100755 --- a/tools/perf/tests/shell/test_perf_data_converter_json.sh +++ b/tools/perf/tests/shell/test_perf_data_converter_json.sh @@ -39,7 +39,7 @@ test_json_converter_command() echo "Testing Perf Data Convertion Command to JSON" perf record -o "$perfdata" -F 99 -g -- perf test -w noploop > /dev/null 2>&1 perf data convert --to-json "$result" --force -i "$perfdata" >/dev/null 2>&1 - if [ $(cat "${result}" | wc -l) -gt "0" ] ; then + if [ "$(cat ${result} | wc -l)" -gt "0" ] ; then echo "Perf Data Converter Command to JSON [SUCCESS]" else echo "Perf Data Converter Command to JSON [FAILED]" -- cgit v1.2.3-70-g09d2 From d10eedd87bb3e7678c3a0f77a5cfe7150a318ee2 Mon Sep 17 00:00:00 2001 From: Athira Rajeev Date: Sun, 9 Jul 2023 23:57:43 +0530 Subject: perf tests stat_bpf_counters: Fix usage of '==' to address shellcheck warning Running shellcheck on stat_bpf_counter.sh generates below warning: In tests/shell/stat_bpf_counters.sh line 34: if [ "$base_cycles" == "&1 | awk '/cycles/ {print $1}') -if [ "$bpf_cycles" == " Date: Sun, 9 Jul 2023 23:57:44 +0530 Subject: perf tests stat+shadow_stat: Fix shellcheck warning about unused variable Running shellcheck on stat+shadow_stat.sh generates below warning: In tests/shell/stat+shadow_stat.sh line 48: while read cpu num evt hash ipc rest ^--^ SC2034 (warning): hash appears unused. Verify use (or export if used externally). This variable is intentionally unused since it is needed to parse through the output. Use "_" as a prefix for this throw away variable. Signed-off-by: Athira Rajeev Acked-by: Ian Rogers Cc: Disha Goel Cc: Jiri Olsa Cc: Kajol Jain Cc: Madhavan Srinivasan Cc: Namhyung Kim Cc: linuxppc-dev@lists.ozlabs.org Link: https://lore.kernel.org/r/20230709182800.53002-11-atrajeev@linux.vnet.ibm.com Signed-off-by: Arnaldo Carvalho de Melo --- tools/perf/tests/shell/stat+shadow_stat.sh | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/tools/perf/tests/shell/stat+shadow_stat.sh b/tools/perf/tests/shell/stat+shadow_stat.sh index 0e9cba84e757..a1918a15e36a 100755 --- a/tools/perf/tests/shell/stat+shadow_stat.sh +++ b/tools/perf/tests/shell/stat+shadow_stat.sh @@ -14,7 +14,7 @@ test_global_aggr() { perf stat -a --no-big-num -e cycles,instructions sleep 1 2>&1 | \ grep -e cycles -e instructions | \ - while read num evt hash ipc rest + while read num evt _hash ipc rest do # skip not counted events if [ "$num" = "&1 | \ grep ^CPU | \ - while read cpu num evt hash ipc rest + while read cpu num evt _hash ipc rest do # skip not counted events if [ "$num" = " Date: Sun, 9 Jul 2023 23:57:45 +0530 Subject: perf tests asm_pure_loop: Fix shellcheck warning about word splitting/quote Running shellcheck on asm_pure_loop.sh throws below warning: In coresight/asm_pure_loop.sh line 8: . $(dirname $0)/../lib/coresight.sh ^-----------^ SC2046 (warning): Quote this to prevent word splitting. Fixed the warning by adding quotes to avoid word splitting. ShellCheck result with patch: # shellcheck -S warning coresight/asm_pure_loop.sh # Signed-off-by: Kajol Jain Acked-by: Ian Rogers Cc: Disha Goel Cc: Jiri Olsa Cc: Madhavan Srinivasan Cc: Namhyung Kim Cc: linuxppc-dev@lists.ozlabs.org Link: https://lore.kernel.org/r/20230709182800.53002-12-atrajeev@linux.vnet.ibm.com Signed-off-by: Athira Rajeev Signed-off-by: Arnaldo Carvalho de Melo --- tools/perf/tests/shell/coresight/asm_pure_loop.sh | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/tools/perf/tests/shell/coresight/asm_pure_loop.sh b/tools/perf/tests/shell/coresight/asm_pure_loop.sh index 569e9d46162b..779bc8608e1e 100755 --- a/tools/perf/tests/shell/coresight/asm_pure_loop.sh +++ b/tools/perf/tests/shell/coresight/asm_pure_loop.sh @@ -5,7 +5,7 @@ # Carsten Haitzler , 2021 TEST="asm_pure_loop" -. $(dirname $0)/../lib/coresight.sh +. "$(dirname $0)"/../lib/coresight.sh ARGS="" DATV="out" DATA="$DATD/perf-$TEST-$DATV.data" -- cgit v1.2.3-70-g09d2 From 5f83f1d58821660ced1c7330a6dd3ff013240f66 Mon Sep 17 00:00:00 2001 From: Kajol Jain Date: Sun, 9 Jul 2023 23:57:46 +0530 Subject: perf tests memcpy_thread_16k_10: Fix shellcheck warning about word splitting/quote Running shellcheck on memcpy_thread_16k_10.sh throws below warning: In memcpy_thread_16k_10.sh line 8: . $(dirname $0)/../lib/coresight.sh ^-----------^ SC2046 (warning): Quote this to prevent word splitting. Fixed the warning by adding quotes to avoid word splitting. ShellCheck result with patch: # shellcheck -S warning coresight/memcpy_thread_16k_10.sh # Signed-off-by: Kajol Jain Acked-by: Ian Rogers Cc: Disha Goel Cc: Jiri Olsa Cc: Madhavan Srinivasan Cc: Namhyung Kim Cc: linuxppc-dev@lists.ozlabs.org Link: https://lore.kernel.org/r/20230709182800.53002-13-atrajeev@linux.vnet.ibm.com Signed-off-by: Athira Rajeev Signed-off-by: Arnaldo Carvalho de Melo --- tools/perf/tests/shell/coresight/memcpy_thread_16k_10.sh | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/tools/perf/tests/shell/coresight/memcpy_thread_16k_10.sh b/tools/perf/tests/shell/coresight/memcpy_thread_16k_10.sh index d21ba8545938..08a44e52ce9b 100755 --- a/tools/perf/tests/shell/coresight/memcpy_thread_16k_10.sh +++ b/tools/perf/tests/shell/coresight/memcpy_thread_16k_10.sh @@ -5,7 +5,7 @@ # Carsten Haitzler , 2021 TEST="memcpy_thread" -. $(dirname $0)/../lib/coresight.sh +. "$(dirname $0)"/../lib/coresight.sh ARGS="16 10 1" DATV="16k_10" DATA="$DATD/perf-$TEST-$DATV.data" -- cgit v1.2.3-70-g09d2 From 05ef238cd05db7b3b2e596027c403964de1d3919 Mon Sep 17 00:00:00 2001 From: Kajol Jain Date: Sun, 9 Jul 2023 23:57:47 +0530 Subject: perf tests lib probe: Fix shellcheck warning about about missing shebang Running shellcheck on probe.sh throws below warning: In lib/probe.sh line 1: ^-- SC2148 (error): Tips depend on target shell and yours is unknown. Add a shebang or a 'shell' directive. Fixed the warnings by adding shell directive. Signed-off-by: Kajol Jain Acked-by: Ian Rogers Cc: Disha Goel Cc: Jiri Olsa Cc: Madhavan Srinivasan Cc: Namhyung Kim Cc: linuxppc-dev@lists.ozlabs.org Link: https://lore.kernel.org/r/20230709182800.53002-14-atrajeev@linux.vnet.ibm.com Signed-off-by: Athira Rajeev Signed-off-by: Arnaldo Carvalho de Melo --- tools/perf/tests/shell/lib/probe.sh | 1 + 1 file changed, 1 insertion(+) diff --git a/tools/perf/tests/shell/lib/probe.sh b/tools/perf/tests/shell/lib/probe.sh index 51e3f60baba0..5aa6e2ec5734 100644 --- a/tools/perf/tests/shell/lib/probe.sh +++ b/tools/perf/tests/shell/lib/probe.sh @@ -1,3 +1,4 @@ +#!/bin/bash # SPDX-License-Identifier: GPL-2.0 # Arnaldo Carvalho de Melo , 2017 -- cgit v1.2.3-70-g09d2 From f188b2ce65730178f8dc0d105beb759719a12e6c Mon Sep 17 00:00:00 2001 From: Kajol Jain Date: Sun, 9 Jul 2023 23:57:48 +0530 Subject: perf beauty arch_errno_names: Fix shellcheck issue about local variables Running shellcheck on arch_errno_names.sh generates below warning: In arch_errno_names.sh line 20: local arch="$1" ^--------^ SC3043 (warning): In POSIX sh, 'local' is undefined. ...... In arch_errno_names.sh line 61: local arch ^--------^ SC3043 (warning): In POSIX sh, 'local' is undefined. In arch_errno_names.sh line 67: printf '\t\treturn errno_to_name__%s(err);\n' $(arch_string "$arch") ^--------------------^ SC2046 (warning): Quote this to prevent word splitting. In arch_errno_names.sh line 69: printf '\treturn errno_to_name__%s(err);\n' $(arch_string "$default") ^-----------------------^ SC2046 (warning): Quote this to prevent word splitting. Fixed the warnings by: - Fixing shellcheck warnings for local usage, by removing local from the variable names - Adding quotes to avoid word splitting Signed-off-by: Kajol Jain Acked-by: Ian Rogers Cc: Disha Goel Cc: Jiri Olsa Cc: Madhavan Srinivasan Cc: Namhyung Kim Cc: linuxppc-dev@lists.ozlabs.org Link: https://lore.kernel.org/r/20230709182800.53002-15-atrajeev@linux.vnet.ibm.com Signed-off-by: Athira Rajeev Signed-off-by: Arnaldo Carvalho de Melo --- tools/perf/trace/beauty/arch_errno_names.sh | 15 ++++++--------- 1 file changed, 6 insertions(+), 9 deletions(-) diff --git a/tools/perf/trace/beauty/arch_errno_names.sh b/tools/perf/trace/beauty/arch_errno_names.sh index 37c53bac5f56..cc09dcaa891e 100755 --- a/tools/perf/trace/beauty/arch_errno_names.sh +++ b/tools/perf/trace/beauty/arch_errno_names.sh @@ -17,8 +17,7 @@ arch_string() asm_errno_file() { - local arch="$1" - local header + arch="$1" header="$toolsdir/arch/$arch/include/uapi/asm/errno.h" if test -r "$header"; then @@ -30,8 +29,7 @@ asm_errno_file() create_errno_lookup_func() { - local arch=$(arch_string "$1") - local nr name + arch=$(arch_string "$1") printf "static const char *errno_to_name__%s(int err)\n{\n\tswitch (err) {\n" $arch @@ -44,8 +42,8 @@ create_errno_lookup_func() process_arch() { - local arch="$1" - local asm_errno=$(asm_errno_file "$arch") + arch="$1" + asm_errno=$(asm_errno_file "$arch") $gcc $CFLAGS $include_path -E -dM -x c $asm_errno \ |grep -hE '^#define[[:blank:]]+(E[^[:blank:]]+)[[:blank:]]+([[:digit:]]+).*' \ @@ -56,9 +54,8 @@ process_arch() create_arch_errno_table_func() { - local archlist="$1" - local default="$2" - local arch + archlist="$1" + default="$2" printf 'const char *arch_syscalls__strerrno(const char *arch, int err)\n' printf '{\n' -- cgit v1.2.3-70-g09d2 From e936584214b93929eb41ec598959469e3a1f6079 Mon Sep 17 00:00:00 2001 From: Kajol Jain Date: Sun, 9 Jul 2023 23:57:49 +0530 Subject: perf build: Fix shellcheck issue about quotes for check-headers.sh Running shellcheck on check-headers.sh generates below warning: In check-headers.sh line 126: check_2 "tools/$file" "$file" $* ^-- SC2048 (warning): Use "$@" (with quotes) to prevent whitespace problems. In check-headers.sh line 134: check_2 "tools/perf/trace/beauty/$file" "$file" $* ^-- SC2048 (warning): Use "$@" (with quotes) to prevent whitespace problems. In check-headers.sh line 186: cd tools/perf ^-----------^ SC2164 (warning): Use 'cd ... || exit' or 'cd ... || return' in case cd fails. Fixed the warnings by: - Using "$@" instead of $* - Adding exit condition with cd command Signed-off-by: Kajol Jain Acked-by: Ian Rogers Cc: Disha Goel Cc: Jiri Olsa Cc: Madhavan Srinivasan Cc: Namhyung Kim Cc: linuxppc-dev@lists.ozlabs.org Link: https://lore.kernel.org/r/20230709182800.53002-16-atrajeev@linux.vnet.ibm.com Signed-off-by: Athira Rajeev Signed-off-by: Arnaldo Carvalho de Melo --- tools/perf/check-headers.sh | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/tools/perf/check-headers.sh b/tools/perf/check-headers.sh index a0f1d8adce60..4314c9197850 100755 --- a/tools/perf/check-headers.sh +++ b/tools/perf/check-headers.sh @@ -123,7 +123,7 @@ check () { shift - check_2 "tools/$file" "$file" $* + check_2 "tools/$file" "$file" "$@" } beauty_check () { @@ -131,7 +131,7 @@ beauty_check () { shift - check_2 "tools/perf/trace/beauty/$file" "$file" $* + check_2 "tools/perf/trace/beauty/$file" "$file" "$@" } # Check if we have the kernel headers (tools/perf/../../include), else @@ -183,7 +183,7 @@ done check_2 tools/perf/util/hashmap.h tools/lib/bpf/hashmap.h check_2 tools/perf/util/hashmap.c tools/lib/bpf/hashmap.c -cd tools/perf +cd tools/perf || exit if [ ${#FAILURES[@]} -gt 0 ] then -- cgit v1.2.3-70-g09d2 From 5fe0531205688fe9bda0ce94628b133b0f94b229 Mon Sep 17 00:00:00 2001 From: Athira Rajeev Date: Sun, 9 Jul 2023 23:57:50 +0530 Subject: perf tests thread_loop_check_tid_10: Fix shellcheck warnings bout word splitting/quoting Fix the shellcheck warnings for thread_loop_check_tid_10.sh In ./tools/perf/tests/shell/coresight/thread_loop_check_tid_10.sh line 8: . $(dirname $0)/../lib/coresight.sh ^-----------^ SC2046 (warning): Quote this to prevent word splitting. Add quotes to prevent word splitting which are caused by unquoted command expansions. Signed-off-by: Athira Rajeev Acked-by: Ian Rogers Cc: Disha Goel Cc: Jiri Olsa Cc: Kajol Jain Cc: Madhavan Srinivasan Cc: Namhyung Kim Cc: linuxppc-dev@lists.ozlabs.org Link: https://lore.kernel.org/r/20230709182800.53002-17-atrajeev@linux.vnet.ibm.com Signed-off-by: Arnaldo Carvalho de Melo --- tools/perf/tests/shell/coresight/thread_loop_check_tid_10.sh | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/tools/perf/tests/shell/coresight/thread_loop_check_tid_10.sh b/tools/perf/tests/shell/coresight/thread_loop_check_tid_10.sh index 7c13636fc778..c83a200dede4 100755 --- a/tools/perf/tests/shell/coresight/thread_loop_check_tid_10.sh +++ b/tools/perf/tests/shell/coresight/thread_loop_check_tid_10.sh @@ -5,7 +5,7 @@ # Carsten Haitzler , 2021 TEST="thread_loop" -. $(dirname $0)/../lib/coresight.sh +. "$(dirname $0)"/../lib/coresight.sh ARGS="10 1" DATV="check-tid-10th" DATA="$DATD/perf-$TEST-$DATV.data" -- cgit v1.2.3-70-g09d2 From b19de09bbee61aa9ef9fba392a9c8743019b858f Mon Sep 17 00:00:00 2001 From: Athira Rajeev Date: Sun, 9 Jul 2023 23:57:51 +0530 Subject: perf tests unroll_loop_thread_10: Fix shellcheck warnings about word splitting/quoting Fix the shellcheck warnings for unroll_loop_thread_10.sh Add quotes to prevent word splitting which are caused by unquoted command expansions. Signed-off-by: Athira Rajeev Acked-by: Ian Rogers Cc: Disha Goel Cc: Jiri Olsa Cc: Kajol Jain Cc: Madhavan Srinivasan Cc: Namhyung Kim Cc: linuxppc-dev@lists.ozlabs.org Link: https://lore.kernel.org/r/20230709182800.53002-18-atrajeev@linux.vnet.ibm.com Signed-off-by: Arnaldo Carvalho de Melo --- tools/perf/tests/shell/coresight/unroll_loop_thread_10.sh | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/tools/perf/tests/shell/coresight/unroll_loop_thread_10.sh b/tools/perf/tests/shell/coresight/unroll_loop_thread_10.sh index f48c85230b15..7304e3d3a6ff 100755 --- a/tools/perf/tests/shell/coresight/unroll_loop_thread_10.sh +++ b/tools/perf/tests/shell/coresight/unroll_loop_thread_10.sh @@ -5,7 +5,7 @@ # Carsten Haitzler , 2021 TEST="unroll_loop_thread" -. $(dirname $0)/../lib/coresight.sh +. "$(dirname $0)"/../lib/coresight.sh ARGS="10" DATV="10" DATA="$DATD/perf-$TEST-$DATV.data" -- cgit v1.2.3-70-g09d2 From a5f3171b13525d106e15286d96a9b53dcf7d4fd5 Mon Sep 17 00:00:00 2001 From: Athira Rajeev Date: Sun, 9 Jul 2023 23:57:52 +0530 Subject: perf tests lib probe_vfs_getname: Fix shellcheck warnings about missing shebang/local variables Running shellcheck on probe_vfs_getname fails with below warning: In ./tools/perf/tests/shell/lib/probe_vfs_getname.sh line 1: # Arnaldo Carvalho de Melo , 2017 ^-- SC2148 (error): Tips depend on target shell and yours is unknown. Add a shebang or a 'shell' directive. In ./tools/perf/tests/shell/lib/probe_vfs_getname.sh line 14: local verbose=$1 ^-----------^ SC3043 (warning): In POSIX sh, 'local' is undefined. Fix this: - by adding shebang in the beginning of the file and - rename variable verbose to "add_probe_verbose" after removing local Signed-off-by: Athira Rajeev Acked-by: Ian Rogers Cc: Disha Goel Cc: Jiri Olsa Cc: Kajol Jain Cc: Madhavan Srinivasan Cc: Namhyung Kim Cc: linuxppc-dev@lists.ozlabs.org Link: https://lore.kernel.org/r/20230709182800.53002-19-atrajeev@linux.vnet.ibm.com Signed-off-by: Arnaldo Carvalho de Melo --- tools/perf/tests/shell/lib/probe_vfs_getname.sh | 5 +++-- 1 file changed, 3 insertions(+), 2 deletions(-) diff --git a/tools/perf/tests/shell/lib/probe_vfs_getname.sh b/tools/perf/tests/shell/lib/probe_vfs_getname.sh index 60c5e34f90c4..bf4c1fb71c4b 100644 --- a/tools/perf/tests/shell/lib/probe_vfs_getname.sh +++ b/tools/perf/tests/shell/lib/probe_vfs_getname.sh @@ -1,3 +1,4 @@ +#!/bin/sh # Arnaldo Carvalho de Melo , 2017 perf probe -l 2>&1 | grep -q probe:vfs_getname @@ -10,11 +11,11 @@ cleanup_probe_vfs_getname() { } add_probe_vfs_getname() { - local verbose=$1 + add_probe_verbose=$1 if [ $had_vfs_getname -eq 1 ] ; then line=$(perf probe -L getname_flags 2>&1 | grep -E 'result.*=.*filename;' | sed -r 's/[[:space:]]+([[:digit:]]+)[[:space:]]+result->uptr.*/\1/') perf probe -q "vfs_getname=getname_flags:${line} pathname=result->name:string" || \ - perf probe $verbose "vfs_getname=getname_flags:${line} pathname=filename:ustring" + perf probe $add_probe_verbose "vfs_getname=getname_flags:${line} pathname=filename:ustring" fi } -- cgit v1.2.3-70-g09d2 From 1e094f925e1ec2825586b6a7c15f90afed9c1468 Mon Sep 17 00:00:00 2001 From: Athira Rajeev Date: Sun, 9 Jul 2023 23:57:53 +0530 Subject: perf tests lib waiting: Fix the shellcheck warnings about missing shebang Running shellcheck in "lib/waiting.sh" generates below warning: In ./tools/perf/tests/shell/lib/waiting.sh line 1: # SPDX-License-Identifier: GPL-2.0 ^-- SC2148 (error): Tips depend on target shell and yours is unknown. Add a shebang or a 'shell' directive. Fix this by adding shebang in the beginning of the script. Signed-off-by: Athira Rajeev Acked-by: Ian Rogers Cc: Disha Goel Cc: Jiri Olsa Cc: Kajol Jain Cc: Madhavan Srinivasan Cc: Namhyung Kim Cc: linuxppc-dev@lists.ozlabs.org Link: https://lore.kernel.org/r/20230709182800.53002-20-atrajeev@linux.vnet.ibm.com Signed-off-by: Arnaldo Carvalho de Melo --- tools/perf/tests/shell/lib/waiting.sh | 1 + 1 file changed, 1 insertion(+) diff --git a/tools/perf/tests/shell/lib/waiting.sh b/tools/perf/tests/shell/lib/waiting.sh index e7a39134a68e..bdd5a7c71591 100644 --- a/tools/perf/tests/shell/lib/waiting.sh +++ b/tools/perf/tests/shell/lib/waiting.sh @@ -1,3 +1,4 @@ +#!/bin/sh # SPDX-License-Identifier: GPL-2.0 tenths=date\ +%s%1N -- cgit v1.2.3-70-g09d2 From 5e9310ae235bc304a522f14a7fce6293e3cb9d14 Mon Sep 17 00:00:00 2001 From: Athira Rajeev Date: Sun, 9 Jul 2023 23:57:54 +0530 Subject: perf trace x86_arch_prctl: Address shellcheck warnings about local variables Running shellcheck on x86_arch_prctl.sh generates below warning: In ./tools/perf/trace/beauty/x86_arch_prctl.sh line 10: local idx=$1 ^-------^ SC3043 (warning): In POSIX sh, 'local' is undefined. In ./tools/perf/trace/beauty/x86_arch_prctl.sh line 11: local prefix=$2 ^----------^ SC3043 (warning): In POSIX sh, 'local' is undefined. In ./tools/perf/trace/beauty/x86_arch_prctl.sh line 12: local first_entry=$3 ^---------------^ SC3043 (warning): In POSIX sh, 'local' is undefined. Fix this by removing local since these are variables used only in specific function Signed-off-by: Athira Rajeev Acked-by: Ian Rogers Cc: Disha Goel Cc: Jiri Olsa Cc: Kajol Jain Cc: Madhavan Srinivasan Cc: Namhyung Kim Cc: linuxppc-dev@lists.ozlabs.org Link: https://lore.kernel.org/r/20230709182800.53002-21-atrajeev@linux.vnet.ibm.com Signed-off-by: Arnaldo Carvalho de Melo --- tools/perf/trace/beauty/x86_arch_prctl.sh | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/tools/perf/trace/beauty/x86_arch_prctl.sh b/tools/perf/trace/beauty/x86_arch_prctl.sh index fd5c740512c5..b1596df251f0 100755 --- a/tools/perf/trace/beauty/x86_arch_prctl.sh +++ b/tools/perf/trace/beauty/x86_arch_prctl.sh @@ -7,9 +7,9 @@ prctl_arch_header=${x86_header_dir}/prctl.h print_range () { - local idx=$1 - local prefix=$2 - local first_entry=$3 + idx=$1 + prefix=$2 + first_entry=$3 printf "#define x86_arch_prctl_codes_%d_offset %s\n" $idx $first_entry printf "static const char *x86_arch_prctl_codes_%d[] = {\n" $idx -- cgit v1.2.3-70-g09d2 From 84caba70d09c20638ee1ecdd24e0932520ad63fe Mon Sep 17 00:00:00 2001 From: Athira Rajeev Date: Sun, 9 Jul 2023 23:57:55 +0530 Subject: perf arch x86: Address shellcheck warnings about unused variables in syscalltbl.sh Running shellcheck on syscalltbl.sh generates below warning: In ./tools/perf/arch/x86/entry/syscalls/syscalltbl.sh line 27: while read nr abi name entry compat; do ^-^ SC2034 (warning): abi appears unused. Verify use (or export if used externally). ^----^ SC2034 (warning): compat appears unused. Verify use (or export if used externally). These variables are intentionally unused since they are needed to parse through the output. Use "_" as a prefix for these throw away variables. Signed-off-by: Athira Rajeev Acked-by: Ian Rogers Cc: Disha Goel Cc: Jiri Olsa Cc: Kajol Jain Cc: Madhavan Srinivasan Cc: Namhyung Kim Cc: linuxppc-dev@lists.ozlabs.org Link: https://lore.kernel.org/r/20230709182800.53002-22-atrajeev@linux.vnet.ibm.com Signed-off-by: Arnaldo Carvalho de Melo --- tools/perf/arch/x86/entry/syscalls/syscalltbl.sh | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/tools/perf/arch/x86/entry/syscalls/syscalltbl.sh b/tools/perf/arch/x86/entry/syscalls/syscalltbl.sh index fa526a993845..59d7914ed6bb 100755 --- a/tools/perf/arch/x86/entry/syscalls/syscalltbl.sh +++ b/tools/perf/arch/x86/entry/syscalls/syscalltbl.sh @@ -24,7 +24,7 @@ sorted_table=$(mktemp /tmp/syscalltbl.XXXXXX) grep '^[0-9]' "$in" | sort -n > $sorted_table max_nr=0 -while read nr abi name entry compat; do +while read nr _abi name entry _compat; do if [ $nr -ge 512 ] ; then # discard compat sycalls break fi -- cgit v1.2.3-70-g09d2 From 3a4367c11884a0be3a74963b37d2b2fe243ee1d4 Mon Sep 17 00:00:00 2001 From: Athira Rajeev Date: Sun, 9 Jul 2023 23:57:56 +0530 Subject: perf tests record+zstd_comp_decomp: Fix the shellcheck warnings about word splitting/quoting Running shellcheck on record+zstd_comp_decomp.sh testcases throws below warning: In tests/shell/record+zstd_comp_decomp.sh line 16: $perf_tool record -o $trace_file $gflag -z -F 5000 -- \ ^---------^ SC2086 (info): Double quote to prevent globbing and word splitting. Did you mean: $perf_tool record -o "$trace_file" $gflag -z -F 5000 -- \ In tests/shell/record+zstd_comp_decomp.sh line 22: $perf_tool report -i $trace_file --header --stats | \ ^---------^ SC2086 (info): Double quote to prevent globbing and word splitting. Added double quote around file names to fix these shellcheck reported issues. Signed-off-by: Athira Rajeev Acked-by: Ian Rogers Cc: Disha Goel Cc: Jiri Olsa Cc: Kajol Jain Cc: Madhavan Srinivasan Cc: Namhyung Kim Cc: linuxppc-dev@lists.ozlabs.org Link: https://lore.kernel.org/r/20230709182800.53002-23-atrajeev@linux.vnet.ibm.com Signed-off-by: Arnaldo Carvalho de Melo --- tools/perf/tests/shell/record+zstd_comp_decomp.sh | 14 +++++++------- 1 file changed, 7 insertions(+), 7 deletions(-) diff --git a/tools/perf/tests/shell/record+zstd_comp_decomp.sh b/tools/perf/tests/shell/record+zstd_comp_decomp.sh index 49bd875d5122..8929046e9057 100755 --- a/tools/perf/tests/shell/record+zstd_comp_decomp.sh +++ b/tools/perf/tests/shell/record+zstd_comp_decomp.sh @@ -13,25 +13,25 @@ skip_if_no_z_record() { collect_z_record() { echo "Collecting compressed record file:" [ "$(uname -m)" != s390x ] && gflag='-g' - $perf_tool record -o $trace_file $gflag -z -F 5000 -- \ + $perf_tool record -o "$trace_file" $gflag -z -F 5000 -- \ dd count=500 if=/dev/urandom of=/dev/null } check_compressed_stats() { echo "Checking compressed events stats:" - $perf_tool report -i $trace_file --header --stats | \ + $perf_tool report -i "$trace_file" --header --stats | \ grep -E "(# compressed : Zstd,)|(COMPRESSED events:)" } check_compressed_output() { - $perf_tool inject -i $trace_file -o $trace_file.decomp && - $perf_tool report -i $trace_file --stdio -F comm,dso,sym | head -n -3 > $trace_file.comp.output && - $perf_tool report -i $trace_file.decomp --stdio -F comm,dso,sym | head -n -3 > $trace_file.decomp.output && - diff $trace_file.comp.output $trace_file.decomp.output + $perf_tool inject -i "$trace_file" -o "$trace_file.decomp" && + $perf_tool report -i "$trace_file" --stdio -F comm,dso,sym | head -n -3 > "$trace_file.comp.output" && + $perf_tool report -i "$trace_file.decomp" --stdio -F comm,dso,sym | head -n -3 > "$trace_file.decomp.output" && + diff "$trace_file.comp.output" "$trace_file.decomp.output" } skip_if_no_z_record || exit 2 collect_z_record && check_compressed_stats && check_compressed_output err=$? -rm -f $trace_file* +rm -f "$trace_file*" exit $err -- cgit v1.2.3-70-g09d2 From 1f14b8af2c9c5ec43a834c960f436721253ff592 Mon Sep 17 00:00:00 2001 From: Kajol Jain Date: Sun, 9 Jul 2023 23:57:57 +0530 Subject: perf tests coresight thread_loop_check_tid_2: Fix shellcheck warnings about word splitting/quoting Running shellcheck on thread_loop_check_tid_2.sh throws below warning: In tests/shell/coresight/thread_loop_check_tid_2.sh line 8: . $(dirname $0)/../lib/coresight.sh ^-----------^ SC2046 (warning): Quote this to prevent word splitting. Fixed the warning by adding quotes to avoid word splitting. Signed-off-by: Kajol Jain Acked-by: Ian Rogers Cc: Disha Goel Cc: Jiri Olsa Cc: Madhavan Srinivasan Cc: Namhyung Kim Cc: linuxppc-dev@lists.ozlabs.org Link: https://lore.kernel.org/r/20230709182800.53002-24-atrajeev@linux.vnet.ibm.com Signed-off-by: Athira Rajeev Signed-off-by: Arnaldo Carvalho de Melo --- tools/perf/tests/shell/coresight/thread_loop_check_tid_2.sh | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/tools/perf/tests/shell/coresight/thread_loop_check_tid_2.sh b/tools/perf/tests/shell/coresight/thread_loop_check_tid_2.sh index a067145af43c..6346fd5e87c8 100755 --- a/tools/perf/tests/shell/coresight/thread_loop_check_tid_2.sh +++ b/tools/perf/tests/shell/coresight/thread_loop_check_tid_2.sh @@ -5,7 +5,7 @@ # Carsten Haitzler , 2021 TEST="thread_loop" -. $(dirname $0)/../lib/coresight.sh +. "$(dirname $0)"/../lib/coresight.sh ARGS="2 20" DATV="check-tid-2th" DATA="$DATD/perf-$TEST-$DATV.data" -- cgit v1.2.3-70-g09d2 From eef1fb50caba2a4c508ffede6e98695c013ca1df Mon Sep 17 00:00:00 2001 From: Kajol Jain Date: Sun, 9 Jul 2023 23:57:58 +0530 Subject: perf tests lib stat_output: Fix shellcheck warning about missing shebang Running shellcheck on stat_output.sh throws below warning: In tests/shell/lib/stat_output.sh line 1: ^-- SC2148 (error): Tips depend on target shell and yours is unknown. Add a shebang or a 'shell' directive. Fixed the warning by adding shell directive. Signed-off-by: Kajol Jain Acked-by: Ian Rogers Cc: Disha Goel Cc: Jiri Olsa Cc: Madhavan Srinivasan Cc: Namhyung Kim Cc: linuxppc-dev@lists.ozlabs.org Link: https://lore.kernel.org/r/20230709182800.53002-25-atrajeev@linux.vnet.ibm.com Signed-off-by: Athira Rajeev Signed-off-by: Arnaldo Carvalho de Melo --- tools/perf/tests/shell/lib/stat_output.sh | 1 + 1 file changed, 1 insertion(+) diff --git a/tools/perf/tests/shell/lib/stat_output.sh b/tools/perf/tests/shell/lib/stat_output.sh index 698343f0ecf9..3cc158a64326 100644 --- a/tools/perf/tests/shell/lib/stat_output.sh +++ b/tools/perf/tests/shell/lib/stat_output.sh @@ -1,3 +1,4 @@ +#!/bin/bash # SPDX-License-Identifier: GPL-2.0 # Return true if perf_event_paranoid is > $1 and not running as root. -- cgit v1.2.3-70-g09d2 From 8439b44abb0c2e6522823e0e20ae21feb882b408 Mon Sep 17 00:00:00 2001 From: Athira Rajeev Date: Sun, 9 Jul 2023 23:57:59 +0530 Subject: perf tests stat+std_output: Fix shellcheck warnings about word splitting/quoting Running shellcheck on stat+csv_output.sh throws below warning: In tests/shell/stat+csv_output.sh line 9: . $(dirname $0)/lib/stat_output.sh ^-----------^ SC2046 (warning): Quote this to prevent word splitting. Fixed the warning by adding quotes to avoid word splitting. Signed-off-by: Athira Rajeev Acked-by: Ian Rogers Cc: Disha Goel Cc: Jiri Olsa Cc: Kajol Jain Cc: Madhavan Srinivasan Cc: Namhyung Kim Cc: linuxppc-dev@lists.ozlabs.org Link: https://lore.kernel.org/r/20230709182800.53002-26-atrajeev@linux.vnet.ibm.com Signed-off-by: Arnaldo Carvalho de Melo --- tools/perf/tests/shell/stat+csv_output.sh | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/tools/perf/tests/shell/stat+csv_output.sh b/tools/perf/tests/shell/stat+csv_output.sh index 34a0701fee05..d890eb26e914 100755 --- a/tools/perf/tests/shell/stat+csv_output.sh +++ b/tools/perf/tests/shell/stat+csv_output.sh @@ -6,7 +6,7 @@ set -e -. $(dirname $0)/lib/stat_output.sh +. "$(dirname $0)"/lib/stat_output.sh csv_sep=@ -- cgit v1.2.3-70-g09d2 From 35578a551b757cd00afe9b81406363f85cab16b2 Mon Sep 17 00:00:00 2001 From: Athira Rajeev Date: Sun, 9 Jul 2023 23:58:00 +0530 Subject: perf tests stat+std_output: Fix shellcheck warnings about word splitting/quoting and local variables Running shellcheck on stat_std_output testcase throws below warning: In tests/shell/stat+std_output.sh line 9: . $(dirname $0)/lib/stat_output.sh ^-----------^ SC2046 (warning): Quote this to prevent word splitting. In tests/shell/stat+std_output.sh line 32: local -i cnt=0 ^-^ SC2034 (warning): cnt appears unused. Verify use (or export if used externally). Fixed the warning by adding quotes to avoid word splitting and removed unused variable "cnt" at line 32. Signed-off-by: Athira Rajeev Acked-by: Ian Rogers Cc: Disha Goel Cc: Jiri Olsa Cc: Kajol Jain Cc: Madhavan Srinivasan Cc: Namhyung Kim Cc: linuxppc-dev@lists.ozlabs.org Link: https://lore.kernel.org/r/20230709182800.53002-27-atrajeev@linux.vnet.ibm.com Signed-off-by: Arnaldo Carvalho de Melo --- tools/perf/tests/shell/stat+std_output.sh | 3 +-- 1 file changed, 1 insertion(+), 2 deletions(-) diff --git a/tools/perf/tests/shell/stat+std_output.sh b/tools/perf/tests/shell/stat+std_output.sh index f972b31fa0c2..fb2b10547a11 100755 --- a/tools/perf/tests/shell/stat+std_output.sh +++ b/tools/perf/tests/shell/stat+std_output.sh @@ -6,7 +6,7 @@ set -e -. $(dirname $0)/lib/stat_output.sh +. "$(dirname $0)"/lib/stat_output.sh stat_output=$(mktemp /tmp/__perf_test.stat_output.std.XXXXX) @@ -28,7 +28,6 @@ trap trap_cleanup EXIT TERM INT function commachecker() { - local -i cnt=0 local prefix=1 case "$1" -- cgit v1.2.3-70-g09d2 From ed847e30f001b207013b6136c264454d7560557f Mon Sep 17 00:00:00 2001 From: Arnaldo Carvalho de Melo Date: Tue, 1 Aug 2023 09:36:08 -0300 Subject: perf test bpf: Address error about non-null argument for epoll_pwait 2nd arg MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit First noticed on Fedora Rawhide: tests/bpf.c: In function ‘epoll_pwait_loop’: tests/bpf.c:36:17: error: argument 2 null where non-null expected [-Werror=nonnull] 36 | epoll_pwait(-(i + 1), NULL, 0, 0, NULL); | ^~~~~~~~~~~ In file included from tests/bpf.c:5: /usr/include/sys/epoll.h:134:12: note: in a call to function ‘epoll_pwait’ declared ‘nonnull’ 134 | extern int epoll_pwait (int __epfd, struct epoll_event *__events, | ^~~~~~~~~~~ [perfbuilder@27cfe44d67ed perf-6.5.0-rc2]$ gcc -v Using built-in specs. COLLECT_GCC=gcc COLLECT_LTO_WRAPPER=/usr/libexec/gcc/x86_64-redhat-linux/13/lto-wrapper OFFLOAD_TARGET_NAMES=nvptx-none OFFLOAD_TARGET_DEFAULT=1 Target: x86_64-redhat-linux Configured with: ../configure --enable-bootstrap --enable-languages=c,c++,fortran,objc,obj-c++,ada,go,d,m2,lto --prefix=/usr --mandir=/usr/share/man --infodir=/usr/share/info --with-bugurl=http://bugzilla.redhat.com/bugzilla --enable-shared --enable-threads=posix --enable-checking=release --enable-multilib --with-system-zlib --enable-__cxa_atexit --disable-libunwind-exceptions --enable-gnu-unique-object --enable-linker-build-id --with-gcc-major-version-only --enable-libstdcxx-backtrace --with-libstdcxx-zoneinfo=/usr/share/zoneinfo --with-linker-hash-style=gnu --enable-plugin --enable-initfini-array --with-isl=/builddir/build/BUILD/gcc-13.2.1-20230728/obj-x86_64-redhat-linux/isl-install --enable-offload-targets=nvptx-none --without-cuda-driver --enable-offload-defaulted --enable-gnu-indirect-function --enable-cet --with-tune=generic --with-arch_32=i686 --build=x86_64-redhat-linux --with-build-config=bootstrap-lto --enable-link-serialization=1 Thread model: posix Supported LTO compression algorithms: zlib zstd gcc version 13.2.1 20230728 (Red Hat 13.2.1-1) (GCC) [perfbuilder@27cfe44d67ed perf-6.5.0-rc2]$ Just add that argument to address this compiler warning. Cc: Adrian Hunter Cc: Ian Rogers Cc: Jiri Olsa Cc: Namhyung Kim Link: https://lore.kernel.org/lkml/ZMj8+bvN86D0ZKiB@kernel.org Signed-off-by: Arnaldo Carvalho de Melo --- tools/perf/tests/bpf.c | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/tools/perf/tests/bpf.c b/tools/perf/tests/bpf.c index 31796f2a80f4..9ccecd873ecd 100644 --- a/tools/perf/tests/bpf.c +++ b/tools/perf/tests/bpf.c @@ -29,11 +29,12 @@ static int epoll_pwait_loop(void) { + struct epoll_event events; int i; /* Should fail NR_ITERS times */ for (i = 0; i < NR_ITERS; i++) - epoll_pwait(-(i + 1), NULL, 0, 0, NULL); + epoll_pwait(-(i + 1), &events, 0, 0, NULL); return 0; } -- cgit v1.2.3-70-g09d2 From e8ca4f0f8c03330bf912daee6aa258f4d33ee724 Mon Sep 17 00:00:00 2001 From: Masami Hiramatsu Date: Fri, 28 Jul 2023 23:19:30 +0900 Subject: perf probe: Show correct error message about @symbol usage for uprobe Since @symbol variable access is not supported by uprobe event, it must be correctly warn user instead of kernel version update. Committer testing: With/without the patch: [root@quaco ~]# perf probe -x ~/bin/perf -L sigtrap_handler 0 sigtrap_handler(int signum __maybe_unused, siginfo_t *info, void *ucontext __maybe_unused) 1 { 2 if (!__atomic_fetch_add(&ctx.signal_count, 1, __ATOMIC_RELAXED)) 3 ctx.first_siginfo = *info; 4 __atomic_fetch_sub(&ctx.tids_want_signal, syscall(SYS_gettid), __ATOMIC_RELAXED); 5 } static void *test_thread(void *arg) { [root@quaco ~]# perf probe -x ~/bin/perf sigtrap_handler:4 "ctx.signal_count" Without the patch: [root@quaco ~]# perf probe -x ~/bin/perf sigtrap_handler:4 "ctx.signal_count" Failed to write event: Invalid argument Please upgrade your kernel to at least 3.14 to have access to feature @ctx Error: Failed to add events. [root@quaco ~]# With the patch: [root@quaco ~]# Failed to write event: Invalid argument @ctx accesses a variable by symbol name, but that is not supported for user application probe. Error: Failed to add events. [root@quaco ~]# Reported-by: Arnaldo Carvalho de Melo Signed-off-by: Masami Hiramatsu Closes: https://lore.kernel.org/all/ZLWDEjvFjrrEJODp@kernel.org/ Tested-by: Arnaldo Carvalho de Melo Link: https://lore.kernel.org/r/169055397023.67089.12693645664676964310.stgit@devnote2 Signed-off-by: Arnaldo Carvalho de Melo --- tools/perf/util/probe-event.c | 13 +++++++++---- 1 file changed, 9 insertions(+), 4 deletions(-) diff --git a/tools/perf/util/probe-event.c b/tools/perf/util/probe-event.c index 16822a8a540f..2d056f02ae40 100644 --- a/tools/perf/util/probe-event.c +++ b/tools/perf/util/probe-event.c @@ -2800,13 +2800,18 @@ static void warn_uprobe_event_compat(struct probe_trace_event *tev) if (!tev->uprobes || tev->nargs == 0 || !buf) goto out; - for (i = 0; i < tev->nargs; i++) - if (strglobmatch(tev->args[i].value, "[$@+-]*")) { - pr_warning("Please upgrade your kernel to at least " - "3.14 to have access to feature %s\n", + for (i = 0; i < tev->nargs; i++) { + if (strchr(tev->args[i].value, '@')) { + pr_warning("%s accesses a variable by symbol name, but that is not supported for user application probe.\n", + tev->args[i].value); + break; + } + if (strglobmatch(tev->args[i].value, "[$+-]*")) { + pr_warning("Please upgrade your kernel to at least 3.14 to have access to feature %s\n", tev->args[i].value); break; } + } out: free(buf); } -- cgit v1.2.3-70-g09d2 From 714b4511114254c9cf143dd31c4d4251129fb0a5 Mon Sep 17 00:00:00 2001 From: Ian Rogers Date: Mon, 31 Jul 2023 22:36:31 -0700 Subject: perf parse-events x86: Avoid sorting uops_retired.slots As topdown.slots may appear as slots it may get confused with uops_retired.slots which is an invalid perf metric event group leader. Special case uops_retired.slots to avoid this confusion. Reviewed-by: Kan Liang Signed-off-by: Ian Rogers Cc: Adrian Hunter Cc: Alexander Shishkin Cc: Andi Kleen Cc: Ingo Molnar Cc: Jiri Olsa Cc: Mark Rutland Cc: Namhyung Kim Cc: Peter Zijlstra Cc: Ravi Bangoria Cc: Weilin Wang Cc: Xing Zhengjun Link: https://lore.kernel.org/r/20230801053634.1142634-2-irogers@google.com Signed-off-by: Arnaldo Carvalho de Melo --- tools/perf/arch/x86/util/evlist.c | 7 ++++--- tools/perf/arch/x86/util/evsel.c | 7 +++---- 2 files changed, 7 insertions(+), 7 deletions(-) diff --git a/tools/perf/arch/x86/util/evlist.c b/tools/perf/arch/x86/util/evlist.c index cbd582182932..b1ce0c52d88d 100644 --- a/tools/perf/arch/x86/util/evlist.c +++ b/tools/perf/arch/x86/util/evlist.c @@ -75,11 +75,12 @@ int arch_evlist__add_default_attrs(struct evlist *evlist, int arch_evlist__cmp(const struct evsel *lhs, const struct evsel *rhs) { - if (topdown_sys_has_perf_metrics() && evsel__sys_has_perf_metrics(lhs)) { + if (topdown_sys_has_perf_metrics() && + (arch_evsel__must_be_in_group(lhs) || arch_evsel__must_be_in_group(rhs))) { /* Ensure the topdown slots comes first. */ - if (strcasestr(lhs->name, "slots")) + if (strcasestr(lhs->name, "slots") && !strcasestr(lhs->name, "uops_retired.slots")) return -1; - if (strcasestr(rhs->name, "slots")) + if (strcasestr(rhs->name, "slots") && !strcasestr(rhs->name, "uops_retired.slots")) return 1; /* Followed by topdown events. */ if (strcasestr(lhs->name, "topdown") && !strcasestr(rhs->name, "topdown")) diff --git a/tools/perf/arch/x86/util/evsel.c b/tools/perf/arch/x86/util/evsel.c index 81d22657922a..090d0f371891 100644 --- a/tools/perf/arch/x86/util/evsel.c +++ b/tools/perf/arch/x86/util/evsel.c @@ -40,12 +40,11 @@ bool evsel__sys_has_perf_metrics(const struct evsel *evsel) bool arch_evsel__must_be_in_group(const struct evsel *evsel) { - if (!evsel__sys_has_perf_metrics(evsel)) + if (!evsel__sys_has_perf_metrics(evsel) || !evsel->name || + strcasestr(evsel->name, "uops_retired.slots")) return false; - return evsel->name && - (strcasestr(evsel->name, "slots") || - strcasestr(evsel->name, "topdown")); + return strcasestr(evsel->name, "topdown") || strcasestr(evsel->name, "slots"); } int arch_evsel__hw_name(struct evsel *evsel, char *bf, size_t size) -- cgit v1.2.3-70-g09d2 From ab0cfb796e03b24584bdb110111f1a290eb0df05 Mon Sep 17 00:00:00 2001 From: Ian Rogers Date: Mon, 31 Jul 2023 22:36:32 -0700 Subject: perf vendor events intel: Update meteorlake to 1.04 1.04 events were released in: https://github.com/intel/perfmon/commit/44fe3681501f43fc515577aced8e944b187c8e51 Addition of 51 core events. Reviewed-by: Kan Liang Signed-off-by: Ian Rogers Cc: Adrian Hunter Cc: Alexander Shishkin Cc: Andi Kleen Cc: Ingo Molnar Cc: Jiri Olsa Cc: Mark Rutland Cc: Namhyung Kim Cc: Peter Zijlstra Cc: Ravi Bangoria Cc: Weilin Wang Cc: Xing Zhengjun Link: https://lore.kernel.org/r/20230801053634.1142634-3-irogers@google.com Signed-off-by: Arnaldo Carvalho de Melo --- tools/perf/pmu-events/arch/x86/mapfile.csv | 2 +- .../perf/pmu-events/arch/x86/meteorlake/cache.json | 165 +++++++++++++++++++++ .../arch/x86/meteorlake/floating-point.json | 8 + .../pmu-events/arch/x86/meteorlake/frontend.json | 56 +++++++ .../pmu-events/arch/x86/meteorlake/memory.json | 80 ++++++++++ .../perf/pmu-events/arch/x86/meteorlake/other.json | 16 ++ .../pmu-events/arch/x86/meteorlake/pipeline.json | 159 ++++++++++++++++++++ 7 files changed, 485 insertions(+), 1 deletion(-) diff --git a/tools/perf/pmu-events/arch/x86/mapfile.csv b/tools/perf/pmu-events/arch/x86/mapfile.csv index 6650100830c4..9020d7a23c91 100644 --- a/tools/perf/pmu-events/arch/x86/mapfile.csv +++ b/tools/perf/pmu-events/arch/x86/mapfile.csv @@ -19,7 +19,7 @@ GenuineIntel-6-3A,v24,ivybridge,core GenuineIntel-6-3E,v23,ivytown,core GenuineIntel-6-2D,v23,jaketown,core GenuineIntel-6-(57|85),v10,knightslanding,core -GenuineIntel-6-A[AC],v1.03,meteorlake,core +GenuineIntel-6-A[AC],v1.04,meteorlake,core GenuineIntel-6-1[AEF],v3,nehalemep,core GenuineIntel-6-2E,v3,nehalemex,core GenuineIntel-6-A7,v1.01,rocketlake,core diff --git a/tools/perf/pmu-events/arch/x86/meteorlake/cache.json b/tools/perf/pmu-events/arch/x86/meteorlake/cache.json index e1ae7c92f38e..1de0200b32f6 100644 --- a/tools/perf/pmu-events/arch/x86/meteorlake/cache.json +++ b/tools/perf/pmu-events/arch/x86/meteorlake/cache.json @@ -36,6 +36,15 @@ "UMask": "0x2", "Unit": "cpu_core" }, + { + "BriefDescription": "Number of cycles a demand request has waited due to L1D due to lack of L2 resources.", + "EventCode": "0x48", + "EventName": "L1D_PEND_MISS.L2_STALLS", + "PublicDescription": "Counts number of cycles a demand request has waited due to L1D due to lack of L2 resources. Demand requests include cacheable/uncacheable demand load, store, lock or SW prefetch accesses.", + "SampleAfterValue": "1000003", + "UMask": "0x4", + "Unit": "cpu_core" + }, { "BriefDescription": "Number of L1D misses that are outstanding", "EventCode": "0x48", @@ -260,6 +269,15 @@ "UMask": "0x40", "Unit": "cpu_core" }, + { + "BriefDescription": "Cycles when L1D is locked", + "EventCode": "0x42", + "EventName": "LOCK_CYCLES.CACHE_LOCK_DURATION", + "PublicDescription": "This event counts the number of cycles when the L1D is locked. It is a superset of the 0x1 mask (BUS_LOCK_CLOCKS.BUS_LOCK_DURATION).", + "SampleAfterValue": "2000003", + "UMask": "0x2", + "Unit": "cpu_core" + }, { "BriefDescription": "Counts the number of cacheable memory requests that miss in the LLC. Counts on a per core basis.", "EventCode": "0x2e", @@ -514,6 +532,17 @@ "UMask": "0x4", "Unit": "cpu_core" }, + { + "BriefDescription": "Retired load instructions whose data sources were L3 hit and cross-core snoop missed in on-pkg core cache.", + "Data_LA": "1", + "EventCode": "0xd2", + "EventName": "MEM_LOAD_L3_HIT_RETIRED.XSNP_MISS", + "PEBS": "1", + "PublicDescription": "Counts the retired load instructions whose data sources were L3 hit and cross-core snoop missed in on-pkg core cache.", + "SampleAfterValue": "20011", + "UMask": "0x1", + "Unit": "cpu_core" + }, { "BriefDescription": "Retired load instructions whose data sources were hits in L3 without snoops required", "Data_LA": "1", @@ -730,6 +759,14 @@ "UMask": "0x1", "Unit": "cpu_atom" }, + { + "BriefDescription": "MEM_STORE_RETIRED.L2_HIT", + "EventCode": "0x44", + "EventName": "MEM_STORE_RETIRED.L2_HIT", + "SampleAfterValue": "200003", + "UMask": "0x1", + "Unit": "cpu_core" + }, { "BriefDescription": "Counts the number of load ops retired.", "Data_LA": "1", @@ -977,6 +1014,15 @@ "UMask": "0x8", "Unit": "cpu_core" }, + { + "BriefDescription": "Cacheable and Non-Cacheable code read requests", + "EventCode": "0x21", + "EventName": "OFFCORE_REQUESTS.DEMAND_CODE_RD", + "PublicDescription": "Counts both cacheable and Non-Cacheable code read requests.", + "SampleAfterValue": "100003", + "UMask": "0x2", + "Unit": "cpu_core" + }, { "BriefDescription": "Demand Data Read requests sent to uncore", "EventCode": "0x21", @@ -995,6 +1041,89 @@ "UMask": "0x4", "Unit": "cpu_core" }, + { + "BriefDescription": "Cycles when offcore outstanding cacheable Core Data Read transactions are present in SuperQueue (SQ), queue to uncore.", + "CounterMask": "1", + "EventCode": "0x20", + "EventName": "OFFCORE_REQUESTS_OUTSTANDING.CYCLES_WITH_DATA_RD", + "PublicDescription": "Counts cycles when offcore outstanding cacheable Core Data Read transactions are present in the super queue. A transaction is considered to be in the Offcore outstanding state between L2 miss and transaction completion sent to requestor (SQ de-allocation). See corresponding Umask under OFFCORE_REQUESTS.", + "SampleAfterValue": "1000003", + "UMask": "0x8", + "Unit": "cpu_core" + }, + { + "BriefDescription": "Cycles with offcore outstanding Code Reads transactions in the SuperQueue (SQ), queue to uncore.", + "CounterMask": "1", + "EventCode": "0x20", + "EventName": "OFFCORE_REQUESTS_OUTSTANDING.CYCLES_WITH_DEMAND_CODE_RD", + "PublicDescription": "Counts the number of offcore outstanding Code Reads transactions in the super queue every cycle. The 'Offcore outstanding' state of the transaction lasts from the L2 miss until the sending transaction completion to requestor (SQ deallocation). See the corresponding Umask under OFFCORE_REQUESTS.", + "SampleAfterValue": "1000003", + "UMask": "0x2", + "Unit": "cpu_core" + }, + { + "BriefDescription": "Cycles where at least 1 outstanding demand data read request is pending.", + "CounterMask": "1", + "EventCode": "0x20", + "EventName": "OFFCORE_REQUESTS_OUTSTANDING.CYCLES_WITH_DEMAND_DATA_RD", + "SampleAfterValue": "2000003", + "UMask": "0x1", + "Unit": "cpu_core" + }, + { + "BriefDescription": "Cycles with offcore outstanding demand rfo reads transactions in SuperQueue (SQ), queue to uncore.", + "CounterMask": "1", + "EventCode": "0x20", + "EventName": "OFFCORE_REQUESTS_OUTSTANDING.CYCLES_WITH_DEMAND_RFO", + "PublicDescription": "Counts the number of offcore outstanding demand rfo Reads transactions in the super queue every cycle. The 'Offcore outstanding' state of the transaction lasts from the L2 miss until the sending transaction completion to requestor (SQ deallocation). See the corresponding Umask under OFFCORE_REQUESTS.", + "SampleAfterValue": "1000003", + "UMask": "0x4", + "Unit": "cpu_core" + }, + { + "BriefDescription": "OFFCORE_REQUESTS_OUTSTANDING.DATA_RD", + "EventCode": "0x20", + "EventName": "OFFCORE_REQUESTS_OUTSTANDING.DATA_RD", + "SampleAfterValue": "1000003", + "UMask": "0x8", + "Unit": "cpu_core" + }, + { + "BriefDescription": "Offcore outstanding Code Reads transactions in the SuperQueue (SQ), queue to uncore, every cycle.", + "EventCode": "0x20", + "EventName": "OFFCORE_REQUESTS_OUTSTANDING.DEMAND_CODE_RD", + "PublicDescription": "Counts the number of offcore outstanding Code Reads transactions in the super queue every cycle. The 'Offcore outstanding' state of the transaction lasts from the L2 miss until the sending transaction completion to requestor (SQ deallocation). See the corresponding Umask under OFFCORE_REQUESTS.", + "SampleAfterValue": "1000003", + "UMask": "0x2", + "Unit": "cpu_core" + }, + { + "BriefDescription": "For every cycle, increments by the number of outstanding demand data read requests pending.", + "EventCode": "0x20", + "EventName": "OFFCORE_REQUESTS_OUTSTANDING.DEMAND_DATA_RD", + "PublicDescription": "For every cycle, increments by the number of outstanding demand data read requests pending. Requests are considered outstanding from the time they miss the core's L2 cache until the transaction completion message is sent to the requestor.", + "SampleAfterValue": "1000003", + "UMask": "0x1", + "Unit": "cpu_core" + }, + { + "BriefDescription": "Cycles with at least 6 offcore outstanding Demand Data Read transactions in uncore queue.", + "CounterMask": "6", + "EventCode": "0x20", + "EventName": "OFFCORE_REQUESTS_OUTSTANDING.DEMAND_DATA_RD_GE_6", + "SampleAfterValue": "2000003", + "UMask": "0x1", + "Unit": "cpu_core" + }, + { + "BriefDescription": "Store Read transactions pending for off-core. Highly correlated.", + "EventCode": "0x20", + "EventName": "OFFCORE_REQUESTS_OUTSTANDING.DEMAND_RFO", + "PublicDescription": "Counts the number of off-core outstanding read-for-ownership (RFO) store transactions every cycle. An RFO transaction is considered to be in the Off-core outstanding state between L2 cache miss and transaction completion.", + "SampleAfterValue": "1000003", + "UMask": "0x4", + "Unit": "cpu_core" + }, { "BriefDescription": "Counts bus locks, accounts for cache line split locks and UC locks.", "EventCode": "0x2c", @@ -1004,6 +1133,42 @@ "UMask": "0x10", "Unit": "cpu_core" }, + { + "BriefDescription": "Number of PREFETCHNTA instructions executed.", + "EventCode": "0x40", + "EventName": "SW_PREFETCH_ACCESS.NTA", + "PublicDescription": "Counts the number of PREFETCHNTA instructions executed.", + "SampleAfterValue": "100003", + "UMask": "0x1", + "Unit": "cpu_core" + }, + { + "BriefDescription": "Number of PREFETCHW instructions executed.", + "EventCode": "0x40", + "EventName": "SW_PREFETCH_ACCESS.PREFETCHW", + "PublicDescription": "Counts the number of PREFETCHW instructions executed.", + "SampleAfterValue": "100003", + "UMask": "0x8", + "Unit": "cpu_core" + }, + { + "BriefDescription": "Number of PREFETCHT0 instructions executed.", + "EventCode": "0x40", + "EventName": "SW_PREFETCH_ACCESS.T0", + "PublicDescription": "Counts the number of PREFETCHT0 instructions executed.", + "SampleAfterValue": "100003", + "UMask": "0x2", + "Unit": "cpu_core" + }, + { + "BriefDescription": "Number of PREFETCHT1 or PREFETCHT2 instructions executed.", + "EventCode": "0x40", + "EventName": "SW_PREFETCH_ACCESS.T1_T2", + "PublicDescription": "Counts the number of PREFETCHT1 or PREFETCHT2 instructions executed.", + "SampleAfterValue": "100003", + "UMask": "0x4", + "Unit": "cpu_core" + }, { "BriefDescription": "Counts the number of issue slots every cycle that were not delivered by the frontend due to an icache miss", "EventCode": "0x71", diff --git a/tools/perf/pmu-events/arch/x86/meteorlake/floating-point.json b/tools/perf/pmu-events/arch/x86/meteorlake/floating-point.json index 616489f0974a..f66506ee37ef 100644 --- a/tools/perf/pmu-events/arch/x86/meteorlake/floating-point.json +++ b/tools/perf/pmu-events/arch/x86/meteorlake/floating-point.json @@ -41,6 +41,14 @@ "UMask": "0x2", "Unit": "cpu_core" }, + { + "BriefDescription": "FP_ARITH_DISPATCHED.PORT_5", + "EventCode": "0xb3", + "EventName": "FP_ARITH_DISPATCHED.PORT_5", + "SampleAfterValue": "2000003", + "UMask": "0x4", + "Unit": "cpu_core" + }, { "BriefDescription": "Counts number of SSE/AVX computational 128-bit packed double precision floating-point instructions retired; some instructions will count twice as noted below. Each count represents 2 computation operations, one for each element. Applies to SSE* and AVX* packed double precision floating-point instructions: ADD SUB HADD HSUB SUBADD MUL DIV MIN MAX SQRT DPP FM(N)ADD/SUB. DPP and FM(N)ADD/SUB instructions count twice as they perform 2 calculations per element.", "EventCode": "0xc7", diff --git a/tools/perf/pmu-events/arch/x86/meteorlake/frontend.json b/tools/perf/pmu-events/arch/x86/meteorlake/frontend.json index 0f064518d1c0..8264419500a5 100644 --- a/tools/perf/pmu-events/arch/x86/meteorlake/frontend.json +++ b/tools/perf/pmu-events/arch/x86/meteorlake/frontend.json @@ -43,6 +43,14 @@ "UMask": "0x2", "Unit": "cpu_core" }, + { + "BriefDescription": "DSB_FILL.FB_STALL_OT", + "EventCode": "0x62", + "EventName": "DSB_FILL.FB_STALL_OT", + "SampleAfterValue": "1000003", + "UMask": "0x10", + "Unit": "cpu_core" + }, { "BriefDescription": "Retired ANT branches", "EventCode": "0xc6", @@ -55,6 +63,30 @@ "UMask": "0x3", "Unit": "cpu_core" }, + { + "BriefDescription": "Retired Instructions who experienced DSB miss.", + "EventCode": "0xc6", + "EventName": "FRONTEND_RETIRED.ANY_DSB_MISS", + "MSRIndex": "0x3F7", + "MSRValue": "0x1", + "PEBS": "1", + "PublicDescription": "Counts retired Instructions that experienced DSB (Decode stream buffer i.e. the decoded instruction-cache) miss.", + "SampleAfterValue": "100007", + "UMask": "0x3", + "Unit": "cpu_core" + }, + { + "BriefDescription": "Retired Instructions who experienced a critical DSB miss.", + "EventCode": "0xc6", + "EventName": "FRONTEND_RETIRED.DSB_MISS", + "MSRIndex": "0x3F7", + "MSRValue": "0x11", + "PEBS": "1", + "PublicDescription": "Number of retired Instructions that experienced a critical DSB (Decode stream buffer i.e. the decoded instruction-cache) miss. Critical means stalls were exposed to the back-end as a result of the DSB miss.", + "SampleAfterValue": "100007", + "UMask": "0x3", + "Unit": "cpu_core" + }, { "BriefDescription": "Counts the number of instructions retired that were tagged because empty issue slots were seen before the uop due to ITLB miss", "EventCode": "0xc6", @@ -88,6 +120,18 @@ "UMask": "0x3", "Unit": "cpu_core" }, + { + "BriefDescription": "Retired Instructions who experienced Instruction L2 Cache true miss.", + "EventCode": "0xc6", + "EventName": "FRONTEND_RETIRED.L2_MISS", + "MSRIndex": "0x3F7", + "MSRValue": "0x13", + "PEBS": "1", + "PublicDescription": "Counts retired Instructions who experienced Instruction L2 Cache true miss.", + "SampleAfterValue": "100007", + "UMask": "0x3", + "Unit": "cpu_core" + }, { "BriefDescription": "Retired instructions after front-end starvation of at least 1 cycle", "EventCode": "0xc6", @@ -243,6 +287,18 @@ "UMask": "0x3", "Unit": "cpu_core" }, + { + "BriefDescription": "Retired Instructions who experienced STLB (2nd level TLB) true miss.", + "EventCode": "0xc6", + "EventName": "FRONTEND_RETIRED.STLB_MISS", + "MSRIndex": "0x3F7", + "MSRValue": "0x15", + "PEBS": "1", + "PublicDescription": "Counts retired Instructions that experienced STLB (2nd level TLB) true miss.", + "SampleAfterValue": "100007", + "UMask": "0x3", + "Unit": "cpu_core" + }, { "BriefDescription": "FRONTEND_RETIRED.UNKNOWN_BRANCH", "EventCode": "0xc6", diff --git a/tools/perf/pmu-events/arch/x86/meteorlake/memory.json b/tools/perf/pmu-events/arch/x86/meteorlake/memory.json index 67e949b4c789..2605e1d0ba9f 100644 --- a/tools/perf/pmu-events/arch/x86/meteorlake/memory.json +++ b/tools/perf/pmu-events/arch/x86/meteorlake/memory.json @@ -66,6 +66,15 @@ "UMask": "0x84", "Unit": "cpu_atom" }, + { + "BriefDescription": "Number of machine clears due to memory ordering conflicts.", + "EventCode": "0xc3", + "EventName": "MACHINE_CLEARS.MEMORY_ORDERING", + "PublicDescription": "Counts the number of Machine Clears detected dye to memory ordering. Memory Ordering Machine Clears may apply when a memory read may not conform to the memory ordering rules of the x86 architecture", + "SampleAfterValue": "100003", + "UMask": "0x2", + "Unit": "cpu_core" + }, { "BriefDescription": "Execution stalls while L1 cache miss demand load is outstanding.", "CounterMask": "3", @@ -95,6 +104,35 @@ "UMask": "0x9", "Unit": "cpu_core" }, + { + "BriefDescription": "MEMORY_ORDERING.MD_NUKE", + "EventCode": "0x09", + "EventName": "MEMORY_ORDERING.MD_NUKE", + "SampleAfterValue": "100003", + "UMask": "0x1", + "Unit": "cpu_core" + }, + { + "BriefDescription": "Counts the number of memory ordering machine clears due to memory renaming.", + "EventCode": "0x09", + "EventName": "MEMORY_ORDERING.MRN_NUKE", + "SampleAfterValue": "100003", + "UMask": "0x2", + "Unit": "cpu_core" + }, + { + "BriefDescription": "Counts randomly selected loads when the latency from first dispatch to completion is greater than 1024 cycles.", + "Data_LA": "1", + "EventCode": "0xcd", + "EventName": "MEM_TRANS_RETIRED.LOAD_LATENCY_GT_1024", + "MSRIndex": "0x3F6", + "MSRValue": "0x400", + "PEBS": "2", + "PublicDescription": "Counts randomly selected loads when the latency from first dispatch to completion is greater than 1024 cycles. Reported latency may be longer than just the memory latency.", + "SampleAfterValue": "53", + "UMask": "0x1", + "Unit": "cpu_core" + }, { "BriefDescription": "Counts randomly selected loads when the latency from first dispatch to completion is greater than 128 cycles.", "Data_LA": "1", @@ -121,6 +159,19 @@ "UMask": "0x1", "Unit": "cpu_core" }, + { + "BriefDescription": "Counts randomly selected loads when the latency from first dispatch to completion is greater than 2048 cycles.", + "Data_LA": "1", + "EventCode": "0xcd", + "EventName": "MEM_TRANS_RETIRED.LOAD_LATENCY_GT_2048", + "MSRIndex": "0x3F6", + "MSRValue": "0x800", + "PEBS": "2", + "PublicDescription": "Counts randomly selected loads when the latency from first dispatch to completion is greater than 2048 cycles. Reported latency may be longer than just the memory latency.", + "SampleAfterValue": "23", + "UMask": "0x1", + "Unit": "cpu_core" + }, { "BriefDescription": "Counts randomly selected loads when the latency from first dispatch to completion is greater than 256 cycles.", "Data_LA": "1", @@ -235,5 +286,34 @@ "SampleAfterValue": "100003", "UMask": "0x10", "Unit": "cpu_core" + }, + { + "BriefDescription": "Cycles where data return is pending for a Demand Data Read request who miss L3 cache.", + "CounterMask": "1", + "EventCode": "0x20", + "EventName": "OFFCORE_REQUESTS_OUTSTANDING.CYCLES_WITH_L3_MISS_DEMAND_DATA_RD", + "PublicDescription": "Cycles with at least 1 Demand Data Read requests who miss L3 cache in the superQ.", + "SampleAfterValue": "1000003", + "UMask": "0x10", + "Unit": "cpu_core" + }, + { + "BriefDescription": "For every cycle, increments by the number of demand data read requests pending that are known to have missed the L3 cache.", + "EventCode": "0x20", + "EventName": "OFFCORE_REQUESTS_OUTSTANDING.L3_MISS_DEMAND_DATA_RD", + "PublicDescription": "For every cycle, increments by the number of demand data read requests pending that are known to have missed the L3 cache. Note that this does not capture all elapsed cycles while requests are outstanding - only cycles from when the requests were known by the requesting core to have missed the L3 cache.", + "SampleAfterValue": "2000003", + "UMask": "0x10", + "Unit": "cpu_core" + }, + { + "BriefDescription": "Cycles where the core is waiting on at least 6 outstanding demand data read requests known to have missed the L3 cache.", + "CounterMask": "6", + "EventCode": "0x20", + "EventName": "OFFCORE_REQUESTS_OUTSTANDING.L3_MISS_DEMAND_DATA_RD_GE_6", + "PublicDescription": "Cycles where the core is waiting on at least 6 outstanding demand data read requests known to have missed the L3 cache. Note that this event does not capture all elapsed cycles while the requests are outstanding - only cycles from when the requests were known to have missed the L3 cache.", + "SampleAfterValue": "2000003", + "UMask": "0x10", + "Unit": "cpu_core" } ] diff --git a/tools/perf/pmu-events/arch/x86/meteorlake/other.json b/tools/perf/pmu-events/arch/x86/meteorlake/other.json index 2ec57f487525..f4c603599df4 100644 --- a/tools/perf/pmu-events/arch/x86/meteorlake/other.json +++ b/tools/perf/pmu-events/arch/x86/meteorlake/other.json @@ -1,4 +1,12 @@ [ + { + "BriefDescription": "ASSISTS.PAGE_FAULT", + "EventCode": "0xc1", + "EventName": "ASSISTS.PAGE_FAULT", + "SampleAfterValue": "1000003", + "UMask": "0x8", + "Unit": "cpu_core" + }, { "BriefDescription": "Counts streaming stores that have any type of response.", "EventCode": "0x2A,0x2B", @@ -30,6 +38,14 @@ "UMask": "0x7", "Unit": "cpu_core" }, + { + "BriefDescription": "RS.EMPTY_RESOURCE", + "EventCode": "0xa5", + "EventName": "RS.EMPTY_RESOURCE", + "SampleAfterValue": "1000003", + "UMask": "0x1", + "Unit": "cpu_core" + }, { "BriefDescription": "Counts the number of issue slots in a UMWAIT or TPAUSE instruction where no uop issues due to the instruction putting the CPU into the C0.1 activity state. For Tremont, UMWAIT and TPAUSE will only put the CPU into C0.1 activity state (not C0.2 activity state)", "EventCode": "0x75", diff --git a/tools/perf/pmu-events/arch/x86/meteorlake/pipeline.json b/tools/perf/pmu-events/arch/x86/meteorlake/pipeline.json index eeaa7a97f71c..352c5efafc06 100644 --- a/tools/perf/pmu-events/arch/x86/meteorlake/pipeline.json +++ b/tools/perf/pmu-events/arch/x86/meteorlake/pipeline.json @@ -311,6 +311,16 @@ "UMask": "0x60", "Unit": "cpu_core" }, + { + "BriefDescription": "This event counts the number of mispredicted ret instructions retired. Non PEBS", + "EventCode": "0xc5", + "EventName": "BR_MISP_RETIRED.RET", + "PEBS": "1", + "PublicDescription": "This is a non-precise version (that is, does not use PEBS) of the event that counts mispredicted return instructions retired.", + "SampleAfterValue": "100007", + "UMask": "0x8", + "Unit": "cpu_core" + }, { "BriefDescription": "Counts the number of mispredicted near RET branch instructions retired.", "EventCode": "0xc5", @@ -329,6 +339,33 @@ "UMask": "0x48", "Unit": "cpu_core" }, + { + "BriefDescription": "Core clocks when the thread is in the C0.1 light-weight slower wakeup time but more power saving optimized state.", + "EventCode": "0xec", + "EventName": "CPU_CLK_UNHALTED.C01", + "PublicDescription": "Counts core clocks when the thread is in the C0.1 light-weight slower wakeup time but more power saving optimized state. This state can be entered via the TPAUSE or UMWAIT instructions.", + "SampleAfterValue": "2000003", + "UMask": "0x10", + "Unit": "cpu_core" + }, + { + "BriefDescription": "Core clocks when the thread is in the C0.2 light-weight faster wakeup time but less power saving optimized state.", + "EventCode": "0xec", + "EventName": "CPU_CLK_UNHALTED.C02", + "PublicDescription": "Counts core clocks when the thread is in the C0.2 light-weight faster wakeup time but less power saving optimized state. This state can be entered via the TPAUSE or UMWAIT instructions.", + "SampleAfterValue": "2000003", + "UMask": "0x20", + "Unit": "cpu_core" + }, + { + "BriefDescription": "Core clocks when the thread is in the C0.1 or C0.2 or running a PAUSE in C0 ACPI state.", + "EventCode": "0xec", + "EventName": "CPU_CLK_UNHALTED.C0_WAIT", + "PublicDescription": "Counts core clocks when the thread is in the C0.1 or C0.2 power saving optimized states (TPAUSE or UMWAIT instructions) or running the PAUSE instruction.", + "SampleAfterValue": "2000003", + "UMask": "0x70", + "Unit": "cpu_core" + }, { "BriefDescription": "Fixed Counter: Counts the number of unhalted core clock cycles", "EventName": "CPU_CLK_UNHALTED.CORE", @@ -361,6 +398,24 @@ "UMask": "0x2", "Unit": "cpu_core" }, + { + "BriefDescription": "CPU_CLK_UNHALTED.PAUSE", + "EventCode": "0xec", + "EventName": "CPU_CLK_UNHALTED.PAUSE", + "SampleAfterValue": "2000003", + "UMask": "0x40", + "Unit": "cpu_core" + }, + { + "BriefDescription": "CPU_CLK_UNHALTED.PAUSE_INST", + "CounterMask": "1", + "EdgeDetect": "1", + "EventCode": "0xec", + "EventName": "CPU_CLK_UNHALTED.PAUSE_INST", + "SampleAfterValue": "2000003", + "UMask": "0x40", + "Unit": "cpu_core" + }, { "BriefDescription": "Core crystal clock cycles. Cycle counts are evenly distributed between active threads in the Core.", "EventCode": "0x3c", @@ -602,6 +657,15 @@ "UMask": "0x10", "Unit": "cpu_core" }, + { + "BriefDescription": "Retired NOP instructions.", + "EventCode": "0xc0", + "EventName": "INST_RETIRED.NOP", + "PublicDescription": "Counts all retired NOP or ENDBR32/64 or PREFETCHIT0/1 instructions", + "SampleAfterValue": "2000003", + "UMask": "0x2", + "Unit": "cpu_core" + }, { "BriefDescription": "Precise instruction retired with PEBS precise-distribution", "EventName": "INST_RETIRED.PREC_DIST", @@ -611,6 +675,15 @@ "UMask": "0x1", "Unit": "cpu_core" }, + { + "BriefDescription": "Iterations of Repeat string retired instructions.", + "EventCode": "0xc0", + "EventName": "INST_RETIRED.REP_ITERATION", + "PublicDescription": "Number of iterations of Repeat (REP) string retired instructions such as MOVS, CMPS, and SCAS. Each has a byte, word, and doubleword version and string instructions can be repeated using a repetition prefix, REP, that allows their architectural execution to be repeated a number of times as specified by the RCX register. Note the number of iterations is implementation-dependent.", + "SampleAfterValue": "2000003", + "UMask": "0x8", + "Unit": "cpu_core" + }, { "BriefDescription": "Cycles the Backend cluster is recovering after a miss-speculation or a Store Buffer or Load Buffer drain stall.", "CounterMask": "1", @@ -621,6 +694,17 @@ "UMask": "0x3", "Unit": "cpu_core" }, + { + "BriefDescription": "Clears speculative count", + "CounterMask": "1", + "EdgeDetect": "1", + "EventCode": "0xad", + "EventName": "INT_MISC.CLEARS_COUNT", + "PublicDescription": "Counts the number of speculative clears due to any type of branch misprediction or machine clears", + "SampleAfterValue": "500009", + "UMask": "0x1", + "Unit": "cpu_core" + }, { "BriefDescription": "Counts cycles after recovery from a branch misprediction or machine clear till the first uop is issued from the resteered path.", "EventCode": "0xad", @@ -630,6 +714,15 @@ "UMask": "0x80", "Unit": "cpu_core" }, + { + "BriefDescription": "Cycles when Resource Allocation Table (RAT) external stall is sent to Instruction Decode Queue (IDQ) for the thread", + "EventCode": "0xad", + "EventName": "INT_MISC.RAT_STALLS", + "PublicDescription": "This event counts the number of cycles during which Resource Allocation Table (RAT) external stall is sent to Instruction Decode Queue (IDQ) for the current thread. This also includes the cycles during which the Allocator is serving another thread.", + "SampleAfterValue": "1000003", + "UMask": "0x8", + "Unit": "cpu_core" + }, { "BriefDescription": "Core cycles the allocator was stalled due to recovery from earlier clear event for this thread", "EventCode": "0xad", @@ -733,6 +826,15 @@ "UMask": "0x4", "Unit": "cpu_atom" }, + { + "BriefDescription": "False dependencies in MOB due to partial compare on address.", + "EventCode": "0x03", + "EventName": "LD_BLOCKS.ADDRESS_ALIAS", + "PublicDescription": "Counts the number of times a load got blocked due to false dependencies in MOB due to partial compare on address.", + "SampleAfterValue": "100003", + "UMask": "0x4", + "Unit": "cpu_core" + }, { "BriefDescription": "Counts the number of retired loads that are blocked because its address exactly matches an older store whose data is not ready.", "EventCode": "0x03", @@ -742,6 +844,15 @@ "UMask": "0x1", "Unit": "cpu_atom" }, + { + "BriefDescription": "The number of times that split load operations are temporarily blocked because all resources for handling the split accesses are in use.", + "EventCode": "0x03", + "EventName": "LD_BLOCKS.NO_SR", + "PublicDescription": "Counts the number of times that split load operations are temporarily blocked because all resources for handling the split accesses are in use.", + "SampleAfterValue": "100003", + "UMask": "0x88", + "Unit": "cpu_core" + }, { "BriefDescription": "Counts the number of retired loads that are blocked because its address partially overlapped with an older store.", "EventCode": "0x03", @@ -751,6 +862,15 @@ "UMask": "0x2", "Unit": "cpu_atom" }, + { + "BriefDescription": "Loads blocked due to overlapping with a preceding store that cannot be forwarded.", + "EventCode": "0x03", + "EventName": "LD_BLOCKS.STORE_FORWARD", + "PublicDescription": "Counts the number of times where store forwarding was prevented for a load operation. The most common case is a load blocked due to the address of memory access (partially) overlapping with a preceding uncompleted store. Note: See the table of not supported store forwards in the Optimization Guide.", + "SampleAfterValue": "100003", + "UMask": "0x82", + "Unit": "cpu_core" + }, { "BriefDescription": "Cycles Uops delivered by the LSD, but didn't come from the decoder.", "CounterMask": "1", @@ -823,6 +943,24 @@ "UMask": "0x1", "Unit": "cpu_atom" }, + { + "BriefDescription": "Self-modifying code (SMC) detected.", + "EventCode": "0xc3", + "EventName": "MACHINE_CLEARS.SMC", + "PublicDescription": "Counts self-modifying code (SMC) detected, which causes a machine clear.", + "SampleAfterValue": "100003", + "UMask": "0x4", + "Unit": "cpu_core" + }, + { + "BriefDescription": "LFENCE instructions retired", + "EventCode": "0xe0", + "EventName": "MISC2_RETIRED.LFENCE", + "PublicDescription": "number of LFENCE retired instructions", + "SampleAfterValue": "400009", + "UMask": "0x20", + "Unit": "cpu_core" + }, { "BriefDescription": "Counts cycles where the pipeline is stalled due to serializing operations.", "EventCode": "0xa2", @@ -1260,6 +1398,16 @@ "UMask": "0x1", "Unit": "cpu_core" }, + { + "BriefDescription": "Cycles with retired uop(s).", + "CounterMask": "1", + "EventCode": "0xc2", + "EventName": "UOPS_RETIRED.CYCLES", + "PublicDescription": "Counts cycles where at least one uop has retired.", + "SampleAfterValue": "1000003", + "UMask": "0x2", + "Unit": "cpu_core" + }, { "BriefDescription": "Retired uops except the last uop of each instruction.", "EventCode": "0xc2", @@ -1306,6 +1454,17 @@ "UMask": "0x2", "Unit": "cpu_core" }, + { + "BriefDescription": "Cycles without actually retired uops.", + "CounterMask": "1", + "EventCode": "0xc2", + "EventName": "UOPS_RETIRED.STALLS", + "Invert": "1", + "PublicDescription": "This event counts cycles without actually retired uops.", + "SampleAfterValue": "1000003", + "UMask": "0x2", + "Unit": "cpu_core" + }, { "BriefDescription": "Cycles with less than 10 actually retired uops.", "CounterMask": "10", -- cgit v1.2.3-70-g09d2 From b691f30700b56fe4fba690e17b1e1b2eb327a589 Mon Sep 17 00:00:00 2001 From: Ian Rogers Date: Mon, 31 Jul 2023 22:36:33 -0700 Subject: perf vendor events intel: Update sapphirerapids to 1.15 1.15 events were released in: https://github.com/intel/perfmon/commit/76dfb81a1148ec049fd9caae9c62529404da63df Adds the events OCR.DEMAND_DATA_RD.LOCAL_SOCKET_PMM and OCR.DEMAND_DATA_RD.PMM. Reviewed-by: Kan Liang Signed-off-by: Ian Rogers Cc: Adrian Hunter Cc: Alexander Shishkin Cc: Andi Kleen Cc: Ingo Molnar Cc: Jiri Olsa Cc: Mark Rutland Cc: Namhyung Kim Cc: Peter Zijlstra Cc: Ravi Bangoria Cc: Weilin Wang Cc: Xing Zhengjun Link: https://lore.kernel.org/r/20230801053634.1142634-4-irogers@google.com Signed-off-by: Arnaldo Carvalho de Melo --- tools/perf/pmu-events/arch/x86/mapfile.csv | 2 +- .../perf/pmu-events/arch/x86/sapphirerapids/other.json | 18 ++++++++++++++++++ 2 files changed, 19 insertions(+), 1 deletion(-) diff --git a/tools/perf/pmu-events/arch/x86/mapfile.csv b/tools/perf/pmu-events/arch/x86/mapfile.csv index 9020d7a23c91..3a8770e29fe8 100644 --- a/tools/perf/pmu-events/arch/x86/mapfile.csv +++ b/tools/perf/pmu-events/arch/x86/mapfile.csv @@ -24,7 +24,7 @@ GenuineIntel-6-1[AEF],v3,nehalemep,core GenuineIntel-6-2E,v3,nehalemex,core GenuineIntel-6-A7,v1.01,rocketlake,core GenuineIntel-6-2A,v19,sandybridge,core -GenuineIntel-6-(8F|CF),v1.14,sapphirerapids,core +GenuineIntel-6-(8F|CF),v1.15,sapphirerapids,core GenuineIntel-6-AF,v1.00,sierraforest,core GenuineIntel-6-(37|4A|4C|4D|5A),v15,silvermont,core GenuineIntel-6-(4E|5E|8E|9E|A5|A6),v57,skylake,core diff --git a/tools/perf/pmu-events/arch/x86/sapphirerapids/other.json b/tools/perf/pmu-events/arch/x86/sapphirerapids/other.json index 31b6be9fb8c7..442ef3807a9d 100644 --- a/tools/perf/pmu-events/arch/x86/sapphirerapids/other.json +++ b/tools/perf/pmu-events/arch/x86/sapphirerapids/other.json @@ -76,6 +76,24 @@ "SampleAfterValue": "100003", "UMask": "0x1" }, + { + "BriefDescription": "Counts demand data reads that were supplied by PMM attached to this socket, whether or not in Sub NUMA Cluster(SNC) Mode. In SNC Mode counts PMM accesses that are controlled by the close or distant SNC Cluster.", + "EventCode": "0x2A,0x2B", + "EventName": "OCR.DEMAND_DATA_RD.LOCAL_SOCKET_PMM", + "MSRIndex": "0x1a6,0x1a7", + "MSRValue": "0x700C00001", + "SampleAfterValue": "100003", + "UMask": "0x1" + }, + { + "BriefDescription": "Counts demand data reads that were supplied by PMM.", + "EventCode": "0x2A,0x2B", + "EventName": "OCR.DEMAND_DATA_RD.PMM", + "MSRIndex": "0x1a6,0x1a7", + "MSRValue": "0x703C00001", + "SampleAfterValue": "100003", + "UMask": "0x1" + }, { "BriefDescription": "Counts demand data reads that were supplied by DRAM attached to another socket.", "EventCode": "0x2A,0x2B", -- cgit v1.2.3-70-g09d2 From 9a7d82c188baea8049e62cc1c92eb5b1846ed4ad Mon Sep 17 00:00:00 2001 From: Ian Rogers Date: Mon, 31 Jul 2023 22:36:34 -0700 Subject: perf vendor events intel: Update Icelake+ metric constraints Avoid grouping events especially in cases where the kernel's PMU driver fails to not open the events, causing the events to report back as "". This update comes from: https://github.com/intel/perfmon/pull/94 Fixes issues reported with patch: https://lore.kernel.org/lkml/20230719001836.198363-3-irogers@google.com/ Reviewed-by: Kan Liang Signed-off-by: Ian Rogers Cc: Adrian Hunter Cc: Alexander Shishkin Cc: Andi Kleen Cc: Ingo Molnar Cc: Jiri Olsa Cc: Mark Rutland Cc: Namhyung Kim Cc: Peter Zijlstra Cc: Ravi Bangoria Cc: Weilin Wang Cc: Xing Zhengjun Link: https://lore.kernel.org/r/20230801053634.1142634-5-irogers@google.com Signed-off-by: Arnaldo Carvalho de Melo --- tools/perf/pmu-events/arch/x86/alderlake/adl-metrics.json | 11 +++++++---- tools/perf/pmu-events/arch/x86/alderlaken/adln-metrics.json | 2 ++ tools/perf/pmu-events/arch/x86/icelake/icl-metrics.json | 10 ++++++---- tools/perf/pmu-events/arch/x86/icelakex/icx-metrics.json | 10 ++++++---- tools/perf/pmu-events/arch/x86/rocketlake/rkl-metrics.json | 10 ++++++---- .../perf/pmu-events/arch/x86/sapphirerapids/spr-metrics.json | 9 +++++---- tools/perf/pmu-events/arch/x86/tigerlake/tgl-metrics.json | 10 ++++++---- 7 files changed, 38 insertions(+), 24 deletions(-) diff --git a/tools/perf/pmu-events/arch/x86/alderlake/adl-metrics.json b/tools/perf/pmu-events/arch/x86/alderlake/adl-metrics.json index daf9458f0b77..c6780d5c456b 100644 --- a/tools/perf/pmu-events/arch/x86/alderlake/adl-metrics.json +++ b/tools/perf/pmu-events/arch/x86/alderlake/adl-metrics.json @@ -558,6 +558,7 @@ }, { "BriefDescription": "Counts the number of cycles a core is stalled due to a demand load which hit in the Last Level Cache (LLC) or other core with HITE/F/M.", + "MetricConstraint": "NO_GROUP_EVENTS_NMI", "MetricExpr": "cpu_atom@MEM_BOUND_STALLS.LOAD_LLC_HIT@ / tma_info_core_clks - max((cpu_atom@MEM_BOUND_STALLS.LOAD@ - cpu_atom@LD_HEAD.L1_MISS_AT_RET@) / tma_info_core_clks, 0) * cpu_atom@MEM_BOUND_STALLS.LOAD_LLC_HIT@ / cpu_atom@MEM_BOUND_STALLS.LOAD@", "MetricGroup": "TopdownL3;tma_L3_group;tma_memory_bound_group", "MetricName": "tma_l3_bound", @@ -800,6 +801,7 @@ }, { "BriefDescription": "Counts the number of cycles that the oldest load of the load buffer is stalled at retirement due to a store forward block.", + "MetricConstraint": "NO_GROUP_EVENTS_NMI", "MetricExpr": "LD_HEAD.ST_ADDR_AT_RET / tma_info_core_clks", "MetricGroup": "TopdownL4;tma_L4_group;tma_l1_bound_group", "MetricName": "tma_store_fwd_blk", @@ -1058,7 +1060,6 @@ }, { "BriefDescription": "This metric represents overall arithmetic floating-point (FP) operations fraction the CPU has executed (retired)", - "MetricConstraint": "NO_GROUP_EVENTS", "MetricExpr": "tma_x87_use + tma_fp_scalar + tma_fp_vector", "MetricGroup": "HPC;TopdownL3;tma_L3_group;tma_light_operations_group", "MetricName": "tma_fp_arith", @@ -1230,6 +1231,7 @@ }, { "BriefDescription": "Total pipeline cost of Instruction Cache misses - subset of the Big_Code Bottleneck", + "MetricConstraint": "NO_GROUP_EVENTS", "MetricExpr": "100 * (tma_fetch_latency * tma_icache_misses / (tma_branch_resteers + tma_dsb_switches + tma_icache_misses + tma_itlb_misses + tma_lcp + tma_ms_switches))", "MetricGroup": "Fed;FetchLat;IcMiss;tma_issueFL", "MetricName": "tma_info_botlnk_l2_ic_misses", @@ -1267,6 +1269,7 @@ }, { "BriefDescription": "Total pipeline cost of (external) Memory Bandwidth related bottlenecks", + "MetricConstraint": "NO_GROUP_EVENTS", "MetricExpr": "100 * tma_memory_bound * (tma_dram_bound / (tma_dram_bound + tma_l1_bound + tma_l2_bound + tma_l3_bound + tma_store_bound) * (tma_mem_bandwidth / (tma_mem_bandwidth + tma_mem_latency)) + tma_l3_bound / (tma_dram_bound + tma_l1_bound + tma_l2_bound + tma_l3_bound + tma_store_bound) * (tma_sq_full / (tma_contested_accesses + tma_data_sharing + tma_l3_hit_latency + tma_sq_full))) + tma_l1_bound / (tma_dram_bound + tma_l1_bound + tma_l2_bound + tma_l3_bound + tma_store_bound) * (tma_fb_full / (tma_dtlb_load + tma_fb_full + tma_lock_latency + tma_split_loads + tma_store_fwd_blk))", "MetricGroup": "Mem;MemoryBW;Offcore;tma_issueBW", "MetricName": "tma_info_bottleneck_memory_bandwidth", @@ -1355,7 +1358,6 @@ }, { "BriefDescription": "Floating Point Operations Per Cycle", - "MetricConstraint": "NO_GROUP_EVENTS", "MetricExpr": "(cpu_core@FP_ARITH_INST_RETIRED.SCALAR_SINGLE@ + cpu_core@FP_ARITH_INST_RETIRED.SCALAR_DOUBLE@ + 2 * cpu_core@FP_ARITH_INST_RETIRED.128B_PACKED_DOUBLE@ + 4 * (cpu_core@FP_ARITH_INST_RETIRED.128B_PACKED_SINGLE@ + cpu_core@FP_ARITH_INST_RETIRED.256B_PACKED_DOUBLE@) + 8 * cpu_core@FP_ARITH_INST_RETIRED.256B_PACKED_SINGLE@) / tma_info_core_core_clks", "MetricGroup": "Flops;Ret", "MetricName": "tma_info_core_flopc", @@ -1363,7 +1365,6 @@ }, { "BriefDescription": "Actual per-core usage of the Floating Point non-X87 execution units (regardless of precision or vector-width)", - "MetricConstraint": "NO_GROUP_EVENTS", "MetricExpr": "(cpu_core@FP_ARITH_DISPATCHED.PORT_0@ + cpu_core@FP_ARITH_DISPATCHED.PORT_1@ + cpu_core@FP_ARITH_DISPATCHED.PORT_5@) / (2 * tma_info_core_core_clks)", "MetricGroup": "Cor;Flops;HPC", "MetricName": "tma_info_core_fp_arith_utilization", @@ -1769,7 +1770,6 @@ }, { "BriefDescription": "Average number of Uops retired in cycles where at least one uop has retired.", - "MetricConstraint": "NO_GROUP_EVENTS", "MetricExpr": "tma_retiring * tma_info_thread_slots / cpu_core@UOPS_RETIRED.SLOTS\\,cmask\\=1@", "MetricGroup": "Pipeline;Ret", "MetricName": "tma_info_pipeline_retire", @@ -2002,6 +2002,7 @@ }, { "BriefDescription": "This metric estimates how often the CPU was stalled due to loads accesses to L3 cache or contended with a sibling Core", + "MetricConstraint": "NO_GROUP_EVENTS_NMI", "MetricExpr": "(cpu_core@MEMORY_ACTIVITY.STALLS_L2_MISS@ - cpu_core@MEMORY_ACTIVITY.STALLS_L3_MISS@) / tma_info_thread_clks", "MetricGroup": "CacheMisses;MemoryBound;TmaL3mem;TopdownL3;tma_L3_group;tma_memory_bound_group", "MetricName": "tma_l3_bound", @@ -2375,6 +2376,7 @@ }, { "BriefDescription": "This metric represents rate of split store accesses", + "MetricConstraint": "NO_GROUP_EVENTS_NMI", "MetricExpr": "MEM_INST_RETIRED.SPLIT_STORES / tma_info_core_core_clks", "MetricGroup": "TopdownL4;tma_L4_group;tma_issueSpSt;tma_store_bound_group", "MetricName": "tma_split_stores", @@ -2405,6 +2407,7 @@ }, { "BriefDescription": "This metric roughly estimates fraction of cycles when the memory subsystem had loads blocked since they could not forward data from earlier (in program order) overlapping stores", + "MetricConstraint": "NO_GROUP_EVENTS_NMI", "MetricExpr": "13 * cpu_core@LD_BLOCKS.STORE_FORWARD@ / tma_info_thread_clks", "MetricGroup": "TopdownL4;tma_L4_group;tma_l1_bound_group", "MetricName": "tma_store_fwd_blk", diff --git a/tools/perf/pmu-events/arch/x86/alderlaken/adln-metrics.json b/tools/perf/pmu-events/arch/x86/alderlaken/adln-metrics.json index 0f1628d698da..06e67e34e1bf 100644 --- a/tools/perf/pmu-events/arch/x86/alderlaken/adln-metrics.json +++ b/tools/perf/pmu-events/arch/x86/alderlaken/adln-metrics.json @@ -466,6 +466,7 @@ }, { "BriefDescription": "Counts the number of cycles a core is stalled due to a demand load which hit in the Last Level Cache (LLC) or other core with HITE/F/M.", + "MetricConstraint": "NO_GROUP_EVENTS_NMI", "MetricExpr": "MEM_BOUND_STALLS.LOAD_LLC_HIT / tma_info_core_clks - max((MEM_BOUND_STALLS.LOAD - LD_HEAD.L1_MISS_AT_RET) / tma_info_core_clks, 0) * MEM_BOUND_STALLS.LOAD_LLC_HIT / MEM_BOUND_STALLS.LOAD", "MetricGroup": "TopdownL3;tma_L3_group;tma_memory_bound_group", "MetricName": "tma_l3_bound", @@ -682,6 +683,7 @@ }, { "BriefDescription": "Counts the number of cycles that the oldest load of the load buffer is stalled at retirement due to a store forward block.", + "MetricConstraint": "NO_GROUP_EVENTS_NMI", "MetricExpr": "LD_HEAD.ST_ADDR_AT_RET / tma_info_core_clks", "MetricGroup": "TopdownL4;tma_L4_group;tma_l1_bound_group", "MetricName": "tma_store_fwd_blk", diff --git a/tools/perf/pmu-events/arch/x86/icelake/icl-metrics.json b/tools/perf/pmu-events/arch/x86/icelake/icl-metrics.json index 8fcc05c4e0a1..a6eed0d9a26d 100644 --- a/tools/perf/pmu-events/arch/x86/icelake/icl-metrics.json +++ b/tools/perf/pmu-events/arch/x86/icelake/icl-metrics.json @@ -85,6 +85,7 @@ }, { "BriefDescription": "This metric estimates how often memory load accesses were aliased by preceding stores (in program order) with a 4K address offset", + "MetricConstraint": "NO_GROUP_EVENTS_NMI", "MetricExpr": "LD_BLOCKS_PARTIAL.ADDRESS_ALIAS / tma_info_thread_clks", "MetricGroup": "TopdownL4;tma_L4_group;tma_l1_bound_group", "MetricName": "tma_4k_aliasing", @@ -319,7 +320,6 @@ }, { "BriefDescription": "This metric represents overall arithmetic floating-point (FP) operations fraction the CPU has executed (retired)", - "MetricConstraint": "NO_GROUP_EVENTS", "MetricExpr": "tma_x87_use + tma_fp_scalar + tma_fp_vector", "MetricGroup": "HPC;TopdownL3;tma_L3_group;tma_light_operations_group", "MetricName": "tma_fp_arith", @@ -464,6 +464,7 @@ }, { "BriefDescription": "Total pipeline cost of Instruction Cache misses - subset of the Big_Code Bottleneck", + "MetricConstraint": "NO_GROUP_EVENTS", "MetricExpr": "100 * (tma_fetch_latency * tma_icache_misses / (tma_branch_resteers + tma_dsb_switches + tma_icache_misses + tma_itlb_misses + tma_lcp + tma_ms_switches))", "MetricGroup": "Fed;FetchLat;IcMiss;tma_issueFL", "MetricName": "tma_info_botlnk_l2_ic_misses", @@ -497,6 +498,7 @@ }, { "BriefDescription": "Total pipeline cost of (external) Memory Bandwidth related bottlenecks", + "MetricConstraint": "NO_GROUP_EVENTS", "MetricExpr": "100 * tma_memory_bound * (tma_dram_bound / (tma_dram_bound + tma_l1_bound + tma_l2_bound + tma_l3_bound + tma_store_bound) * (tma_mem_bandwidth / (tma_mem_bandwidth + tma_mem_latency)) + tma_l3_bound / (tma_dram_bound + tma_l1_bound + tma_l2_bound + tma_l3_bound + tma_store_bound) * (tma_sq_full / (tma_contested_accesses + tma_data_sharing + tma_l3_hit_latency + tma_sq_full))) + tma_l1_bound / (tma_dram_bound + tma_l1_bound + tma_l2_bound + tma_l3_bound + tma_store_bound) * (tma_fb_full / (tma_4k_aliasing + tma_dtlb_load + tma_fb_full + tma_lock_latency + tma_split_loads + tma_store_fwd_blk))", "MetricGroup": "Mem;MemoryBW;Offcore;tma_issueBW", "MetricName": "tma_info_bottleneck_memory_bandwidth", @@ -574,14 +576,12 @@ }, { "BriefDescription": "Floating Point Operations Per Cycle", - "MetricConstraint": "NO_GROUP_EVENTS", "MetricExpr": "(cpu@FP_ARITH_INST_RETIRED.SCALAR_SINGLE\\,umask\\=0x03@ + 2 * FP_ARITH_INST_RETIRED.128B_PACKED_DOUBLE + 4 * cpu@FP_ARITH_INST_RETIRED.128B_PACKED_SINGLE\\,umask\\=0x18@ + 8 * cpu@FP_ARITH_INST_RETIRED.256B_PACKED_SINGLE\\,umask\\=0x60@ + 16 * FP_ARITH_INST_RETIRED.512B_PACKED_SINGLE) / tma_info_core_core_clks", "MetricGroup": "Flops;Ret", "MetricName": "tma_info_core_flopc" }, { "BriefDescription": "Actual per-core usage of the Floating Point non-X87 execution units (regardless of precision or vector-width)", - "MetricConstraint": "NO_GROUP_EVENTS", "MetricExpr": "(cpu@FP_ARITH_INST_RETIRED.SCALAR_SINGLE\\,umask\\=0x03@ + cpu@FP_ARITH_INST_RETIRED.128B_PACKED_DOUBLE\\,umask\\=0xfc@) / (2 * tma_info_core_core_clks)", "MetricGroup": "Cor;Flops;HPC", "MetricName": "tma_info_core_fp_arith_utilization", @@ -927,7 +927,6 @@ }, { "BriefDescription": "Average number of Uops retired in cycles where at least one uop has retired.", - "MetricConstraint": "NO_GROUP_EVENTS", "MetricExpr": "tma_retiring * tma_info_thread_slots / cpu@UOPS_RETIRED.SLOTS\\,cmask\\=1@", "MetricGroup": "Pipeline;Ret", "MetricName": "tma_info_pipeline_retire" @@ -1100,6 +1099,7 @@ }, { "BriefDescription": "This metric estimates how often the CPU was stalled due to loads accesses to L3 cache or contended with a sibling Core", + "MetricConstraint": "NO_GROUP_EVENTS_NMI", "MetricExpr": "(CYCLE_ACTIVITY.STALLS_L2_MISS - CYCLE_ACTIVITY.STALLS_L3_MISS) / tma_info_thread_clks", "MetricGroup": "CacheMisses;MemoryBound;TmaL3mem;TopdownL3;tma_L3_group;tma_memory_bound_group", "MetricName": "tma_l3_bound", @@ -1419,6 +1419,7 @@ }, { "BriefDescription": "This metric represents rate of split store accesses", + "MetricConstraint": "NO_GROUP_EVENTS_NMI", "MetricExpr": "MEM_INST_RETIRED.SPLIT_STORES / tma_info_core_core_clks", "MetricGroup": "TopdownL4;tma_L4_group;tma_issueSpSt;tma_store_bound_group", "MetricName": "tma_split_stores", @@ -1446,6 +1447,7 @@ }, { "BriefDescription": "This metric roughly estimates fraction of cycles when the memory subsystem had loads blocked since they could not forward data from earlier (in program order) overlapping stores", + "MetricConstraint": "NO_GROUP_EVENTS_NMI", "MetricExpr": "13 * LD_BLOCKS.STORE_FORWARD / tma_info_thread_clks", "MetricGroup": "TopdownL4;tma_L4_group;tma_l1_bound_group", "MetricName": "tma_store_fwd_blk", diff --git a/tools/perf/pmu-events/arch/x86/icelakex/icx-metrics.json b/tools/perf/pmu-events/arch/x86/icelakex/icx-metrics.json index 9bb7e3f20f7f..7082ad5ba961 100644 --- a/tools/perf/pmu-events/arch/x86/icelakex/icx-metrics.json +++ b/tools/perf/pmu-events/arch/x86/icelakex/icx-metrics.json @@ -289,6 +289,7 @@ }, { "BriefDescription": "This metric estimates how often memory load accesses were aliased by preceding stores (in program order) with a 4K address offset", + "MetricConstraint": "NO_GROUP_EVENTS_NMI", "MetricExpr": "LD_BLOCKS_PARTIAL.ADDRESS_ALIAS / tma_info_thread_clks", "MetricGroup": "TopdownL4;tma_L4_group;tma_l1_bound_group", "MetricName": "tma_4k_aliasing", @@ -523,7 +524,6 @@ }, { "BriefDescription": "This metric represents overall arithmetic floating-point (FP) operations fraction the CPU has executed (retired)", - "MetricConstraint": "NO_GROUP_EVENTS", "MetricExpr": "tma_x87_use + tma_fp_scalar + tma_fp_vector", "MetricGroup": "HPC;TopdownL3;tma_L3_group;tma_light_operations_group", "MetricName": "tma_fp_arith", @@ -668,6 +668,7 @@ }, { "BriefDescription": "Total pipeline cost of Instruction Cache misses - subset of the Big_Code Bottleneck", + "MetricConstraint": "NO_GROUP_EVENTS", "MetricExpr": "100 * (tma_fetch_latency * tma_icache_misses / (tma_branch_resteers + tma_dsb_switches + tma_icache_misses + tma_itlb_misses + tma_lcp + tma_ms_switches))", "MetricGroup": "Fed;FetchLat;IcMiss;tma_issueFL", "MetricName": "tma_info_botlnk_l2_ic_misses", @@ -701,6 +702,7 @@ }, { "BriefDescription": "Total pipeline cost of (external) Memory Bandwidth related bottlenecks", + "MetricConstraint": "NO_GROUP_EVENTS", "MetricExpr": "100 * tma_memory_bound * (tma_dram_bound / (tma_dram_bound + tma_l1_bound + tma_l2_bound + tma_l3_bound + tma_pmm_bound + tma_store_bound) * (tma_mem_bandwidth / (tma_mem_bandwidth + tma_mem_latency)) + tma_l3_bound / (tma_dram_bound + tma_l1_bound + tma_l2_bound + tma_l3_bound + tma_pmm_bound + tma_store_bound) * (tma_sq_full / (tma_contested_accesses + tma_data_sharing + tma_l3_hit_latency + tma_sq_full))) + tma_l1_bound / (tma_dram_bound + tma_l1_bound + tma_l2_bound + tma_l3_bound + tma_pmm_bound + tma_store_bound) * (tma_fb_full / (tma_4k_aliasing + tma_dtlb_load + tma_fb_full + tma_lock_latency + tma_split_loads + tma_store_fwd_blk))", "MetricGroup": "Mem;MemoryBW;Offcore;tma_issueBW", "MetricName": "tma_info_bottleneck_memory_bandwidth", @@ -778,14 +780,12 @@ }, { "BriefDescription": "Floating Point Operations Per Cycle", - "MetricConstraint": "NO_GROUP_EVENTS", "MetricExpr": "(cpu@FP_ARITH_INST_RETIRED.SCALAR_SINGLE\\,umask\\=0x03@ + 2 * FP_ARITH_INST_RETIRED.128B_PACKED_DOUBLE + 4 * cpu@FP_ARITH_INST_RETIRED.128B_PACKED_SINGLE\\,umask\\=0x18@ + 8 * cpu@FP_ARITH_INST_RETIRED.256B_PACKED_SINGLE\\,umask\\=0x60@ + 16 * FP_ARITH_INST_RETIRED.512B_PACKED_SINGLE) / tma_info_core_core_clks", "MetricGroup": "Flops;Ret", "MetricName": "tma_info_core_flopc" }, { "BriefDescription": "Actual per-core usage of the Floating Point non-X87 execution units (regardless of precision or vector-width)", - "MetricConstraint": "NO_GROUP_EVENTS", "MetricExpr": "(cpu@FP_ARITH_INST_RETIRED.SCALAR_SINGLE\\,umask\\=0x03@ + cpu@FP_ARITH_INST_RETIRED.128B_PACKED_DOUBLE\\,umask\\=0xfc@) / (2 * tma_info_core_core_clks)", "MetricGroup": "Cor;Flops;HPC", "MetricName": "tma_info_core_fp_arith_utilization", @@ -1144,7 +1144,6 @@ }, { "BriefDescription": "Average number of Uops retired in cycles where at least one uop has retired.", - "MetricConstraint": "NO_GROUP_EVENTS", "MetricExpr": "tma_retiring * tma_info_thread_slots / cpu@UOPS_RETIRED.SLOTS\\,cmask\\=1@", "MetricGroup": "Pipeline;Ret", "MetricName": "tma_info_pipeline_retire" @@ -1369,6 +1368,7 @@ }, { "BriefDescription": "This metric estimates how often the CPU was stalled due to loads accesses to L3 cache or contended with a sibling Core", + "MetricConstraint": "NO_GROUP_EVENTS_NMI", "MetricExpr": "(CYCLE_ACTIVITY.STALLS_L2_MISS - CYCLE_ACTIVITY.STALLS_L3_MISS) / tma_info_thread_clks", "MetricGroup": "CacheMisses;MemoryBound;TmaL3mem;TopdownL3;tma_L3_group;tma_memory_bound_group", "MetricName": "tma_l3_bound", @@ -1715,6 +1715,7 @@ }, { "BriefDescription": "This metric represents rate of split store accesses", + "MetricConstraint": "NO_GROUP_EVENTS_NMI", "MetricExpr": "MEM_INST_RETIRED.SPLIT_STORES / tma_info_core_core_clks", "MetricGroup": "TopdownL4;tma_L4_group;tma_issueSpSt;tma_store_bound_group", "MetricName": "tma_split_stores", @@ -1742,6 +1743,7 @@ }, { "BriefDescription": "This metric roughly estimates fraction of cycles when the memory subsystem had loads blocked since they could not forward data from earlier (in program order) overlapping stores", + "MetricConstraint": "NO_GROUP_EVENTS_NMI", "MetricExpr": "13 * LD_BLOCKS.STORE_FORWARD / tma_info_thread_clks", "MetricGroup": "TopdownL4;tma_L4_group;tma_l1_bound_group", "MetricName": "tma_store_fwd_blk", diff --git a/tools/perf/pmu-events/arch/x86/rocketlake/rkl-metrics.json b/tools/perf/pmu-events/arch/x86/rocketlake/rkl-metrics.json index 1bb9cededa56..a0191c8b708d 100644 --- a/tools/perf/pmu-events/arch/x86/rocketlake/rkl-metrics.json +++ b/tools/perf/pmu-events/arch/x86/rocketlake/rkl-metrics.json @@ -85,6 +85,7 @@ }, { "BriefDescription": "This metric estimates how often memory load accesses were aliased by preceding stores (in program order) with a 4K address offset", + "MetricConstraint": "NO_GROUP_EVENTS_NMI", "MetricExpr": "LD_BLOCKS_PARTIAL.ADDRESS_ALIAS / tma_info_thread_clks", "MetricGroup": "TopdownL4;tma_L4_group;tma_l1_bound_group", "MetricName": "tma_4k_aliasing", @@ -319,7 +320,6 @@ }, { "BriefDescription": "This metric represents overall arithmetic floating-point (FP) operations fraction the CPU has executed (retired)", - "MetricConstraint": "NO_GROUP_EVENTS", "MetricExpr": "tma_x87_use + tma_fp_scalar + tma_fp_vector", "MetricGroup": "HPC;TopdownL3;tma_L3_group;tma_light_operations_group", "MetricName": "tma_fp_arith", @@ -464,6 +464,7 @@ }, { "BriefDescription": "Total pipeline cost of Instruction Cache misses - subset of the Big_Code Bottleneck", + "MetricConstraint": "NO_GROUP_EVENTS", "MetricExpr": "100 * (tma_fetch_latency * tma_icache_misses / (tma_branch_resteers + tma_dsb_switches + tma_icache_misses + tma_itlb_misses + tma_lcp + tma_ms_switches))", "MetricGroup": "Fed;FetchLat;IcMiss;tma_issueFL", "MetricName": "tma_info_botlnk_l2_ic_misses", @@ -497,6 +498,7 @@ }, { "BriefDescription": "Total pipeline cost of (external) Memory Bandwidth related bottlenecks", + "MetricConstraint": "NO_GROUP_EVENTS", "MetricExpr": "100 * tma_memory_bound * (tma_dram_bound / (tma_dram_bound + tma_l1_bound + tma_l2_bound + tma_l3_bound + tma_store_bound) * (tma_mem_bandwidth / (tma_mem_bandwidth + tma_mem_latency)) + tma_l3_bound / (tma_dram_bound + tma_l1_bound + tma_l2_bound + tma_l3_bound + tma_store_bound) * (tma_sq_full / (tma_contested_accesses + tma_data_sharing + tma_l3_hit_latency + tma_sq_full))) + tma_l1_bound / (tma_dram_bound + tma_l1_bound + tma_l2_bound + tma_l3_bound + tma_store_bound) * (tma_fb_full / (tma_4k_aliasing + tma_dtlb_load + tma_fb_full + tma_lock_latency + tma_split_loads + tma_store_fwd_blk))", "MetricGroup": "Mem;MemoryBW;Offcore;tma_issueBW", "MetricName": "tma_info_bottleneck_memory_bandwidth", @@ -574,14 +576,12 @@ }, { "BriefDescription": "Floating Point Operations Per Cycle", - "MetricConstraint": "NO_GROUP_EVENTS", "MetricExpr": "(cpu@FP_ARITH_INST_RETIRED.SCALAR_SINGLE\\,umask\\=0x03@ + 2 * FP_ARITH_INST_RETIRED.128B_PACKED_DOUBLE + 4 * cpu@FP_ARITH_INST_RETIRED.128B_PACKED_SINGLE\\,umask\\=0x18@ + 8 * cpu@FP_ARITH_INST_RETIRED.256B_PACKED_SINGLE\\,umask\\=0x60@ + 16 * FP_ARITH_INST_RETIRED.512B_PACKED_SINGLE) / tma_info_core_core_clks", "MetricGroup": "Flops;Ret", "MetricName": "tma_info_core_flopc" }, { "BriefDescription": "Actual per-core usage of the Floating Point non-X87 execution units (regardless of precision or vector-width)", - "MetricConstraint": "NO_GROUP_EVENTS", "MetricExpr": "(cpu@FP_ARITH_INST_RETIRED.SCALAR_SINGLE\\,umask\\=0x03@ + cpu@FP_ARITH_INST_RETIRED.128B_PACKED_DOUBLE\\,umask\\=0xfc@) / (2 * tma_info_core_core_clks)", "MetricGroup": "Cor;Flops;HPC", "MetricName": "tma_info_core_fp_arith_utilization", @@ -933,7 +933,6 @@ }, { "BriefDescription": "Average number of Uops retired in cycles where at least one uop has retired.", - "MetricConstraint": "NO_GROUP_EVENTS", "MetricExpr": "tma_retiring * tma_info_thread_slots / cpu@UOPS_RETIRED.SLOTS\\,cmask\\=1@", "MetricGroup": "Pipeline;Ret", "MetricName": "tma_info_pipeline_retire" @@ -1126,6 +1125,7 @@ }, { "BriefDescription": "This metric estimates how often the CPU was stalled due to loads accesses to L3 cache or contended with a sibling Core", + "MetricConstraint": "NO_GROUP_EVENTS_NMI", "MetricExpr": "(CYCLE_ACTIVITY.STALLS_L2_MISS - CYCLE_ACTIVITY.STALLS_L3_MISS) / tma_info_thread_clks", "MetricGroup": "CacheMisses;MemoryBound;TmaL3mem;TopdownL3;tma_L3_group;tma_memory_bound_group", "MetricName": "tma_l3_bound", @@ -1445,6 +1445,7 @@ }, { "BriefDescription": "This metric represents rate of split store accesses", + "MetricConstraint": "NO_GROUP_EVENTS_NMI", "MetricExpr": "MEM_INST_RETIRED.SPLIT_STORES / tma_info_core_core_clks", "MetricGroup": "TopdownL4;tma_L4_group;tma_issueSpSt;tma_store_bound_group", "MetricName": "tma_split_stores", @@ -1472,6 +1473,7 @@ }, { "BriefDescription": "This metric roughly estimates fraction of cycles when the memory subsystem had loads blocked since they could not forward data from earlier (in program order) overlapping stores", + "MetricConstraint": "NO_GROUP_EVENTS_NMI", "MetricExpr": "13 * LD_BLOCKS.STORE_FORWARD / tma_info_thread_clks", "MetricGroup": "TopdownL4;tma_L4_group;tma_l1_bound_group", "MetricName": "tma_store_fwd_blk", diff --git a/tools/perf/pmu-events/arch/x86/sapphirerapids/spr-metrics.json b/tools/perf/pmu-events/arch/x86/sapphirerapids/spr-metrics.json index c207c851a9f9..222212abd811 100644 --- a/tools/perf/pmu-events/arch/x86/sapphirerapids/spr-metrics.json +++ b/tools/perf/pmu-events/arch/x86/sapphirerapids/spr-metrics.json @@ -553,7 +553,6 @@ }, { "BriefDescription": "This metric represents overall arithmetic floating-point (FP) operations fraction the CPU has executed (retired)", - "MetricConstraint": "NO_GROUP_EVENTS", "MetricExpr": "tma_x87_use + tma_fp_scalar + tma_fp_vector + tma_fp_amx", "MetricGroup": "HPC;TopdownL3;tma_L3_group;tma_light_operations_group", "MetricName": "tma_fp_arith", @@ -717,6 +716,7 @@ }, { "BriefDescription": "Total pipeline cost of Instruction Cache misses - subset of the Big_Code Bottleneck", + "MetricConstraint": "NO_GROUP_EVENTS", "MetricExpr": "100 * (tma_fetch_latency * tma_icache_misses / (tma_branch_resteers + tma_dsb_switches + tma_icache_misses + tma_itlb_misses + tma_lcp + tma_ms_switches))", "MetricGroup": "Fed;FetchLat;IcMiss;tma_issueFL", "MetricName": "tma_info_botlnk_l2_ic_misses", @@ -750,6 +750,7 @@ }, { "BriefDescription": "Total pipeline cost of (external) Memory Bandwidth related bottlenecks", + "MetricConstraint": "NO_GROUP_EVENTS", "MetricExpr": "100 * tma_memory_bound * (tma_dram_bound / (tma_dram_bound + tma_l1_bound + tma_l2_bound + tma_l3_bound + tma_pmm_bound + tma_store_bound) * (tma_mem_bandwidth / (tma_mem_bandwidth + tma_mem_latency)) + tma_l3_bound / (tma_dram_bound + tma_l1_bound + tma_l2_bound + tma_l3_bound + tma_pmm_bound + tma_store_bound) * (tma_sq_full / (tma_contested_accesses + tma_data_sharing + tma_l3_hit_latency + tma_sq_full))) + tma_l1_bound / (tma_dram_bound + tma_l1_bound + tma_l2_bound + tma_l3_bound + tma_pmm_bound + tma_store_bound) * (tma_fb_full / (tma_dtlb_load + tma_fb_full + tma_lock_latency + tma_split_loads + tma_store_fwd_blk))", "MetricGroup": "Mem;MemoryBW;Offcore;tma_issueBW", "MetricName": "tma_info_bottleneck_memory_bandwidth", @@ -827,14 +828,12 @@ }, { "BriefDescription": "Floating Point Operations Per Cycle", - "MetricConstraint": "NO_GROUP_EVENTS", "MetricExpr": "cpu@FP_ARITH_INST_RETIRED.SCALAR_SINGLE\\,umask\\=0x03@ + FP_ARITH_INST_RETIRED2.SCALAR_HALF + 2 * (FP_ARITH_INST_RETIRED.128B_PACKED_DOUBLE + FP_ARITH_INST_RETIRED2.COMPLEX_SCALAR_HALF) + 4 * cpu@FP_ARITH_INST_RETIRED.128B_PACKED_SINGLE\\,umask\\=0x18@ + 8 * (FP_ARITH_INST_RETIRED2.128B_PACKED_HALF + cpu@FP_ARITH_INST_RETIRED.256B_PACKED_SINGLE\\,umask\\=0x60@) + 16 * (FP_ARITH_INST_RETIRED2.256B_PACKED_HALF + FP_ARITH_INST_RETIRED.512B_PACKED_SINGLE) + 32 * FP_ARITH_INST_RETIRED2.512B_PACKED_HALF + 4 * AMX_OPS_RETIRED.BF16", "MetricGroup": "Flops;Ret", "MetricName": "tma_info_core_flopc" }, { "BriefDescription": "Actual per-core usage of the Floating Point non-X87 execution units (regardless of precision or vector-width)", - "MetricConstraint": "NO_GROUP_EVENTS", "MetricExpr": "(FP_ARITH_DISPATCHED.PORT_0 + FP_ARITH_DISPATCHED.PORT_1 + FP_ARITH_DISPATCHED.PORT_5) / (2 * tma_info_core_core_clks)", "MetricGroup": "Cor;Flops;HPC", "MetricName": "tma_info_core_fp_arith_utilization", @@ -1216,7 +1215,6 @@ }, { "BriefDescription": "Average number of Uops retired in cycles where at least one uop has retired.", - "MetricConstraint": "NO_GROUP_EVENTS", "MetricExpr": "tma_retiring * tma_info_thread_slots / cpu@UOPS_RETIRED.SLOTS\\,cmask\\=1@", "MetricGroup": "Pipeline;Ret", "MetricName": "tma_info_pipeline_retire" @@ -1467,6 +1465,7 @@ }, { "BriefDescription": "This metric estimates how often the CPU was stalled due to loads accesses to L3 cache or contended with a sibling Core", + "MetricConstraint": "NO_GROUP_EVENTS_NMI", "MetricExpr": "(MEMORY_ACTIVITY.STALLS_L2_MISS - MEMORY_ACTIVITY.STALLS_L3_MISS) / tma_info_thread_clks", "MetricGroup": "CacheMisses;MemoryBound;TmaL3mem;TopdownL3;tma_L3_group;tma_memory_bound_group", "MetricName": "tma_l3_bound", @@ -1841,6 +1840,7 @@ }, { "BriefDescription": "This metric represents rate of split store accesses", + "MetricConstraint": "NO_GROUP_EVENTS_NMI", "MetricExpr": "MEM_INST_RETIRED.SPLIT_STORES / tma_info_core_core_clks", "MetricGroup": "TopdownL4;tma_L4_group;tma_issueSpSt;tma_store_bound_group", "MetricName": "tma_split_stores", @@ -1868,6 +1868,7 @@ }, { "BriefDescription": "This metric roughly estimates fraction of cycles when the memory subsystem had loads blocked since they could not forward data from earlier (in program order) overlapping stores", + "MetricConstraint": "NO_GROUP_EVENTS_NMI", "MetricExpr": "13 * LD_BLOCKS.STORE_FORWARD / tma_info_thread_clks", "MetricGroup": "TopdownL4;tma_L4_group;tma_l1_bound_group", "MetricName": "tma_store_fwd_blk", diff --git a/tools/perf/pmu-events/arch/x86/tigerlake/tgl-metrics.json b/tools/perf/pmu-events/arch/x86/tigerlake/tgl-metrics.json index c7c2d6ab1a93..fab084e1bc69 100644 --- a/tools/perf/pmu-events/arch/x86/tigerlake/tgl-metrics.json +++ b/tools/perf/pmu-events/arch/x86/tigerlake/tgl-metrics.json @@ -79,6 +79,7 @@ }, { "BriefDescription": "This metric estimates how often memory load accesses were aliased by preceding stores (in program order) with a 4K address offset", + "MetricConstraint": "NO_GROUP_EVENTS_NMI", "MetricExpr": "LD_BLOCKS_PARTIAL.ADDRESS_ALIAS / tma_info_thread_clks", "MetricGroup": "TopdownL4;tma_L4_group;tma_l1_bound_group", "MetricName": "tma_4k_aliasing", @@ -313,7 +314,6 @@ }, { "BriefDescription": "This metric represents overall arithmetic floating-point (FP) operations fraction the CPU has executed (retired)", - "MetricConstraint": "NO_GROUP_EVENTS", "MetricExpr": "tma_x87_use + tma_fp_scalar + tma_fp_vector", "MetricGroup": "HPC;TopdownL3;tma_L3_group;tma_light_operations_group", "MetricName": "tma_fp_arith", @@ -458,6 +458,7 @@ }, { "BriefDescription": "Total pipeline cost of Instruction Cache misses - subset of the Big_Code Bottleneck", + "MetricConstraint": "NO_GROUP_EVENTS", "MetricExpr": "100 * (tma_fetch_latency * tma_icache_misses / (tma_branch_resteers + tma_dsb_switches + tma_icache_misses + tma_itlb_misses + tma_lcp + tma_ms_switches))", "MetricGroup": "Fed;FetchLat;IcMiss;tma_issueFL", "MetricName": "tma_info_botlnk_l2_ic_misses", @@ -491,6 +492,7 @@ }, { "BriefDescription": "Total pipeline cost of (external) Memory Bandwidth related bottlenecks", + "MetricConstraint": "NO_GROUP_EVENTS", "MetricExpr": "100 * tma_memory_bound * (tma_dram_bound / (tma_dram_bound + tma_l1_bound + tma_l2_bound + tma_l3_bound + tma_store_bound) * (tma_mem_bandwidth / (tma_mem_bandwidth + tma_mem_latency)) + tma_l3_bound / (tma_dram_bound + tma_l1_bound + tma_l2_bound + tma_l3_bound + tma_store_bound) * (tma_sq_full / (tma_contested_accesses + tma_data_sharing + tma_l3_hit_latency + tma_sq_full))) + tma_l1_bound / (tma_dram_bound + tma_l1_bound + tma_l2_bound + tma_l3_bound + tma_store_bound) * (tma_fb_full / (tma_4k_aliasing + tma_dtlb_load + tma_fb_full + tma_lock_latency + tma_split_loads + tma_store_fwd_blk))", "MetricGroup": "Mem;MemoryBW;Offcore;tma_issueBW", "MetricName": "tma_info_bottleneck_memory_bandwidth", @@ -568,14 +570,12 @@ }, { "BriefDescription": "Floating Point Operations Per Cycle", - "MetricConstraint": "NO_GROUP_EVENTS", "MetricExpr": "(cpu@FP_ARITH_INST_RETIRED.SCALAR_SINGLE\\,umask\\=0x03@ + 2 * FP_ARITH_INST_RETIRED.128B_PACKED_DOUBLE + 4 * cpu@FP_ARITH_INST_RETIRED.128B_PACKED_SINGLE\\,umask\\=0x18@ + 8 * cpu@FP_ARITH_INST_RETIRED.256B_PACKED_SINGLE\\,umask\\=0x60@ + 16 * FP_ARITH_INST_RETIRED.512B_PACKED_SINGLE) / tma_info_core_core_clks", "MetricGroup": "Flops;Ret", "MetricName": "tma_info_core_flopc" }, { "BriefDescription": "Actual per-core usage of the Floating Point non-X87 execution units (regardless of precision or vector-width)", - "MetricConstraint": "NO_GROUP_EVENTS", "MetricExpr": "(cpu@FP_ARITH_INST_RETIRED.SCALAR_SINGLE\\,umask\\=0x03@ + cpu@FP_ARITH_INST_RETIRED.128B_PACKED_DOUBLE\\,umask\\=0xfc@) / (2 * tma_info_core_core_clks)", "MetricGroup": "Cor;Flops;HPC", "MetricName": "tma_info_core_fp_arith_utilization", @@ -927,7 +927,6 @@ }, { "BriefDescription": "Average number of Uops retired in cycles where at least one uop has retired.", - "MetricConstraint": "NO_GROUP_EVENTS", "MetricExpr": "tma_retiring * tma_info_thread_slots / cpu@UOPS_RETIRED.SLOTS\\,cmask\\=1@", "MetricGroup": "Pipeline;Ret", "MetricName": "tma_info_pipeline_retire" @@ -1114,6 +1113,7 @@ }, { "BriefDescription": "This metric estimates how often the CPU was stalled due to loads accesses to L3 cache or contended with a sibling Core", + "MetricConstraint": "NO_GROUP_EVENTS_NMI", "MetricExpr": "(CYCLE_ACTIVITY.STALLS_L2_MISS - CYCLE_ACTIVITY.STALLS_L3_MISS) / tma_info_thread_clks", "MetricGroup": "CacheMisses;MemoryBound;TmaL3mem;TopdownL3;tma_L3_group;tma_memory_bound_group", "MetricName": "tma_l3_bound", @@ -1433,6 +1433,7 @@ }, { "BriefDescription": "This metric represents rate of split store accesses", + "MetricConstraint": "NO_GROUP_EVENTS_NMI", "MetricExpr": "MEM_INST_RETIRED.SPLIT_STORES / tma_info_core_core_clks", "MetricGroup": "TopdownL4;tma_L4_group;tma_issueSpSt;tma_store_bound_group", "MetricName": "tma_split_stores", @@ -1460,6 +1461,7 @@ }, { "BriefDescription": "This metric roughly estimates fraction of cycles when the memory subsystem had loads blocked since they could not forward data from earlier (in program order) overlapping stores", + "MetricConstraint": "NO_GROUP_EVENTS_NMI", "MetricExpr": "13 * LD_BLOCKS.STORE_FORWARD / tma_info_thread_clks", "MetricGroup": "TopdownL4;tma_L4_group;tma_l1_bound_group", "MetricName": "tma_store_fwd_blk", -- cgit v1.2.3-70-g09d2 From a7789d3f2e96ac1056f127d529f9c35e3ce88479 Mon Sep 17 00:00:00 2001 From: Arnaldo Carvalho de Melo Date: Wed, 2 Aug 2023 10:33:42 -0300 Subject: perf python: Cope with declarations after statements found in Python.h MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit With -Werror the build was failing on fedora rawhide: [perfbuilder@27cfe44d67ed perf-6.5.0-rc2]$ gcc -v Using built-in specs. COLLECT_GCC=gcc COLLECT_LTO_WRAPPER=/usr/libexec/gcc/x86_64-redhat-linux/13/lto-wrapper OFFLOAD_TARGET_NAMES=nvptx-none OFFLOAD_TARGET_DEFAULT=1 Target: x86_64-redhat-linux Configured with: ../configure --enable-bootstrap --enable-languages=c,c++,fortran,objc,obj-c++,ada,go,d,m2,lto --prefix=/usr --mandir=/usr/share/man --infodir=/usr/share/info --with-bugurl=http://bugzilla.redhat.com/bugzilla --enable-shared --enable-threads=posix --enable-checking=release --enable-multilib --with-system-zlib --enable-__cxa_atexit --disable-libunwind-exceptions --enable-gnu-unique-object --enable-linker-build-id --with-gcc-major-version-only --enable-libstdcxx-backtrace --with-libstdcxx-zoneinfo=/usr/share/zoneinfo --with-linker-hash-style=gnu --enable-plugin --enable-initfini-array --with-isl=/builddir/build/BUILD/gcc-13.2.1-20230728/obj-x86_64-redhat-linux/isl-install --enable-offload-targets=nvptx-none --without-cuda-driver --enable-offload-defaulted --enable-gnu-indirect-function --enable-cet --with-tune=generic --with-arch_32=i686 --build=x86_64-redhat-linux --with-build-config=bootstrap-lto --enable-link-serialization=1 Thread model: posix Supported LTO compression algorithms: zlib zstd gcc version 13.2.1 20230728 (Red Hat 13.2.1-1) (GCC) [perfbuilder@27cfe44d67ed perf-6.5.0-rc2]$ In file included from /usr/include/python3.12/Python.h:44, from /git/perf-6.5.0-rc2/tools/perf/util/python.c:2: /usr/include/python3.12/object.h: In function ‘Py_SIZE’: /usr/include/python3.12/object.h:217:5: error: ISO C90 forbids mixed declarations and code [-Werror=declaration-after-statement] 217 | PyVarObject *var_ob = _PyVarObject_CAST(ob); | ^~~~~~~~~~~ LD /tmp/build/perf/arch/perf-in.o In file included from /usr/include/python3.12/Python.h:53: /usr/include/python3.12/cpython/longintrepr.h: In function ‘_PyLong_CompactValue’: /usr/include/python3.12/cpython/longintrepr.h:121:5: error: ISO C90 forbids mixed declarations and code [-Werror=declaration-after-statement] 121 | Py_ssize_t sign = 1 - (op->long_value.lv_tag & _PyLong_SIGN_MASK); | ^~~~~~~~~~ So add -Wno-declaration-after-statement to the python binding CFLAGS. Reviewed-by: Ian Rogers Cc: Adrian Hunter Cc: Jiri Olsa Cc: Namhyung Kim Link: https://lore.kernel.org/lkml/ZMpcTMvnQns81YWA@kernel.org Signed-off-by: Arnaldo Carvalho de Melo --- tools/perf/util/setup.py | 3 +++ 1 file changed, 3 insertions(+) diff --git a/tools/perf/util/setup.py b/tools/perf/util/setup.py index 869738fc06c3..79d5e2955f85 100644 --- a/tools/perf/util/setup.py +++ b/tools/perf/util/setup.py @@ -66,6 +66,9 @@ if cc_is_clang: else: cflags += ['-Wno-cast-function-type' ] +# The python headers have mixed code with declarations (decls after asserts, for instance) +cflags += [ "-Wno-declaration-after-statement" ] + src_perf = getenv('srctree') + '/tools/perf' build_lib = getenv('PYTHON_EXTBUILD_LIB') build_tmp = getenv('PYTHON_EXTBUILD_TMP') -- cgit v1.2.3-70-g09d2 From c43888e739bbf184eb95018188215a5487cc0b15 Mon Sep 17 00:00:00 2001 From: Arnaldo Carvalho de Melo Date: Wed, 2 Aug 2023 10:33:42 -0300 Subject: perf script python: Cope with declarations after statements found in Python.h With -Werror the build was failing on fedora rawhide: [perfbuilder@27cfe44d67ed perf-6.5.0-rc2]$ gcc -v Using built-in specs. COLLECT_GCC=gcc COLLECT_LTO_WRAPPER=/usr/libexec/gcc/x86_64-redhat-linux/13/lto-wrapper OFFLOAD_TARGET_NAMES=nvptx-none OFFLOAD_TARGET_DEFAULT=1 Target: x86_64-redhat-linux Configured with: ../configure --enable-bootstrap --enable-languages=c,c++,fortran,objc,obj-c++,ada,go,d,m2,lto --prefix=/usr --mandir=/usr/share/man --infodir=/usr/share/info --with-bugurl=http://bugzilla.redhat.com/bugzilla --enable-shared --enable-threads=posix --enable-checking=release --enable-multilib --with-system-zlib --enable-__cxa_atexit --disable-libunwind-exceptions --enable-gnu-unique-object --enable-linker-build-id --with-gcc-major-version-only --enable-libstdcxx-backtrace --with-libstdcxx-zoneinfo=/usr/share/zoneinfo --with-linker-hash-style=gnu --enable-plugin --enable-initfini-array --with-isl=/builddir/build/BUILD/gcc-13.2.1-20230728/obj-x86_64-redhat-linux/isl-install --enable-offload-targets=nvptx-none --without-cuda-driver --enable-offload-defaulted --enable-gnu-indirect-function --enable-cet --with-tune=generic --with-arch_32=i686 --build=x86_64-redhat-linux --with-build-config=bootstrap-lto --enable-link-serialization=1 Thread model: posix Supported LTO compression algorithms: zlib zstd gcc version 13.2.1 20230728 (Red Hat 13.2.1-1) (GCC) [perfbuilder@27cfe44d67ed perf-6.5.0-rc2]$ In file included from /usr/include/python3.12/Python.h:44, from scripts/python/Perf-Trace-Util/Context.c:14: /usr/include/python3.12/object.h: In function 'Py_SIZE': /usr/include/python3.12/object.h:217:5: error: ISO C90 forbids mixed declarations and code [-Werror=declaration-after-statement] 217 | PyVarObject *var_ob = _PyVarObject_CAST(ob); | ^~~~~~~~~~~ In file included from /usr/include/python3.12/Python.h:53: /usr/include/python3.12/cpython/longintrepr.h: In function '_PyLong_CompactValue': /usr/include/python3.12/cpython/longintrepr.h:121:5: error: ISO C90 forbids mixed declarations and code [-Werror=declaration-after-statement] 121 | Py_ssize_t sign = 1 - (op->long_value.lv_tag & _PyLong_SIGN_MASK); | ^~~~~~~~~~ In file included from /usr/include/python3.12/Python.h:44, from util/scripting-engines/trace-event-python.c:22: /usr/include/python3.12/object.h: In function 'Py_SIZE': /usr/include/python3.12/object.h:217:5: error: ISO C90 forbids mixed declarations and code [-Werror=declaration-after-statement] 217 | PyVarObject *var_ob = _PyVarObject_CAST(ob); | ^~~~~~~~~~~ CC /tmp/build/perf/util/units.o CC /tmp/build/perf/util/time-utils.o In file included from /usr/include/python3.12/Python.h:53: /usr/include/python3.12/cpython/longintrepr.h: In function '_PyLong_CompactValue': /usr/include/python3.12/cpython/longintrepr.h:121:5: error: ISO C90 forbids mixed declarations and code [-Werror=declaration-after-statement] 121 | Py_ssize_t sign = 1 - (op->long_value.lv_tag & _PyLong_SIGN_MASK); | ^~~~~~~~~~ So add -Wno-declaration-after-statement to the python scripting CFLAGS. Reviewed-by: Ian Rogers Cc: Adrian Hunter Cc: Jiri Olsa Cc: Namhyung Kim Link: https://lore.kernel.org/lkml/ZMpdKeO8gU%2FcWDqH@kernel.org Signed-off-by: Arnaldo Carvalho de Melo --- tools/perf/scripts/python/Perf-Trace-Util/Build | 3 ++- tools/perf/util/scripting-engines/Build | 3 ++- 2 files changed, 4 insertions(+), 2 deletions(-) diff --git a/tools/perf/scripts/python/Perf-Trace-Util/Build b/tools/perf/scripts/python/Perf-Trace-Util/Build index 7d0e33ce6aba..5b0b5ff7e14a 100644 --- a/tools/perf/scripts/python/Perf-Trace-Util/Build +++ b/tools/perf/scripts/python/Perf-Trace-Util/Build @@ -1,3 +1,4 @@ perf-y += Context.o -CFLAGS_Context.o += $(PYTHON_EMBED_CCOPTS) -Wno-redundant-decls -Wno-strict-prototypes -Wno-unused-parameter -Wno-nested-externs +# -Wno-declaration-after-statement: The python headers have mixed code with declarations (decls after asserts, for instance) +CFLAGS_Context.o += $(PYTHON_EMBED_CCOPTS) -Wno-redundant-decls -Wno-strict-prototypes -Wno-unused-parameter -Wno-nested-externs -Wno-declaration-after-statement diff --git a/tools/perf/util/scripting-engines/Build b/tools/perf/util/scripting-engines/Build index c220fec97032..586b94e90f4e 100644 --- a/tools/perf/util/scripting-engines/Build +++ b/tools/perf/util/scripting-engines/Build @@ -5,4 +5,5 @@ perf-$(CONFIG_LIBPYTHON) += trace-event-python.o CFLAGS_trace-event-perl.o += $(PERL_EMBED_CCOPTS) -Wno-redundant-decls -Wno-strict-prototypes -Wno-unused-parameter -Wno-shadow -Wno-nested-externs -Wno-undef -Wno-switch-default -Wno-bad-function-cast -Wno-declaration-after-statement -Wno-switch-enum -CFLAGS_trace-event-python.o += $(PYTHON_EMBED_CCOPTS) -Wno-redundant-decls -Wno-strict-prototypes -Wno-unused-parameter -Wno-shadow -Wno-deprecated-declarations -Wno-switch-enum +# -Wno-declaration-after-statement: The python headers have mixed code with declarations (decls after asserts, for instance) +CFLAGS_trace-event-python.o += $(PYTHON_EMBED_CCOPTS) -Wno-redundant-decls -Wno-strict-prototypes -Wno-unused-parameter -Wno-shadow -Wno-deprecated-declarations -Wno-switch-enum -Wno-declaration-after-statement -- cgit v1.2.3-70-g09d2 From 4e95ed4f4d5bc6838a10e6952999b41b1d07e56f Mon Sep 17 00:00:00 2001 From: Thomas Richter Date: Tue, 25 Jul 2023 17:03:46 +0200 Subject: perf build: Update feature check for clang and llvm Perf build auto-detects features and packages already installed for its build. This is done in directory tools/build/feature. This directory contains small sample programs. When they successfully compile the necessary prereqs in form of libraries and header files are present. Such a check is also done for llvm and clang. And the checks fail. Fix this and update to the latest C++ standard and use the new library provided by clang (which contains new packaging) s/ee this link for reference: https://fedoraproject.org/wiki/Changes/Stop-Shipping-Individual-Component-Libraries-In-clang-lib-Package Output before: # rm -f ./test-clang.bin; make test-clang.bin; ./test-clang.bin; \ ll test-clang.make.output g++ -MD -Wall -Werror -o test-clang.bin test-clang.cpp \ > test-clang.make.output 2>&1 -std=gnu++14 \ -I/usr/include \ -L/usr/lib64 \ -Wl,--start-group -lclangBasic -lclangDriver \ -lclangFrontend -lclangEdit -lclangLex \ -lclangAST -Wl,--end-group \ -lLLVM-16 \ \ > test-clang.make.output 2>&1 make: *** [Makefile:356: test-clang.bin] Error 1 -bash: ./test-clang.bin: No such file or directory -rw-r--r--. 1 root root 252041 Jul 12 09:56 test-clang.make.output # File test-clang.make.output contains many lines of unreferenced symbols. Output after: # rm -f ./test-clang.bin; make test-clang.bin; ./test-clang.bin; \ cat test-clang.make.output g++ -MD -Wall -Werror -o test-clang.bin test-clang.cpp \ > test-clang.make.output 2>&1 -std=gnu++17 \ -I/usr/include \ -L/usr/lib64 \ -Wl,--start-group -lclang-cpp -Wl,--end-group \ -lLLVM-16 \ \ > test-clang.make.output 2>&1 # Committer notes: Test it in the tools/build/feature directory, and have clang-devel and llvm-devel installed. Signed-off-by: Thomas Richter Tested-by: Arnaldo Carvalho de Melo Cc: Heiko Carstens Cc: Ian Rogers Cc: Jiri Olsa Cc: Sumanth Korikkar Cc: Sven Schnelle Cc: Vasily Gorbik Cc: Wang Nan Link: https://lore.kernel.org/r/20230725150347.3479291-1-tmricht@linux.ibm.com Signed-off-by: Arnaldo Carvalho de Melo --- tools/build/feature/Makefile | 10 ++++------ 1 file changed, 4 insertions(+), 6 deletions(-) diff --git a/tools/build/feature/Makefile b/tools/build/feature/Makefile index 2cd6dbbee088..3184f387990a 100644 --- a/tools/build/feature/Makefile +++ b/tools/build/feature/Makefile @@ -340,7 +340,7 @@ $(OUTPUT)test-jvmti-cmlr.bin: $(BUILD) $(OUTPUT)test-llvm.bin: - $(BUILDXX) -std=gnu++14 \ + $(BUILDXX) -std=gnu++17 \ -I$(shell $(LLVM_CONFIG) --includedir) \ -L$(shell $(LLVM_CONFIG) --libdir) \ $(shell $(LLVM_CONFIG) --libs Core BPF) \ @@ -348,17 +348,15 @@ $(OUTPUT)test-llvm.bin: > $(@:.bin=.make.output) 2>&1 $(OUTPUT)test-llvm-version.bin: - $(BUILDXX) -std=gnu++14 \ + $(BUILDXX) -std=gnu++17 \ -I$(shell $(LLVM_CONFIG) --includedir) \ > $(@:.bin=.make.output) 2>&1 $(OUTPUT)test-clang.bin: - $(BUILDXX) -std=gnu++14 \ + $(BUILDXX) -std=gnu++17 \ -I$(shell $(LLVM_CONFIG) --includedir) \ -L$(shell $(LLVM_CONFIG) --libdir) \ - -Wl,--start-group -lclangBasic -lclangDriver \ - -lclangFrontend -lclangEdit -lclangLex \ - -lclangAST -Wl,--end-group \ + -Wl,--start-group -lclang-cpp -Wl,--end-group \ $(shell $(LLVM_CONFIG) --libs Core option) \ $(shell $(LLVM_CONFIG) --system-libs) \ > $(@:.bin=.make.output) 2>&1 -- cgit v1.2.3-70-g09d2 From 8fcaea9fd0dabc33f1a18f62aa3cf3d12286cd9f Mon Sep 17 00:00:00 2001 From: Thomas Richter Date: Tue, 25 Jul 2023 17:03:47 +0200 Subject: perf build: Support llvm and clang support compiled in Perf build suports llvm and clang support compiled in. Test case 56 builtin clang support provides a test case which is always skipped. Link perf with the latest llvm and clang libraries and enable this test case. Use 'make LIBCLANGLLVM=1' to include this support. V2: Add Library patch before -lclang-cpp Output before: # ./perf test 56 56: builtin clang support : 56.1: builtin clang compile C source to IR : Skip (not compiled in) 56.2: builtin clang compile C source to ELF object: Skip (not compiled in) Output after: # ./perf test 56 56: builtin clang support : 56.1: builtin clang compile C source to IR : Ok 56.2: builtin clang compile C source to ELF object : Ok # From Ian Rogers: Build tested with LLVM 14 and 15 using: BUILD_BPF_SKEL=1 LIBCLANGLLVM=1 LLVM_CONFIG=llvm-config-14 BUILD_BPF_SKEL=1 LIBCLANGLLVM=1 LLVM_CONFIG=llvm-config-15 Signed-off-by: Thomas Richter Tested-by: Arnaldo Carvalho de Melo Tested-by: Ian Rogers Cc: Heiko Carstens Cc: Jiri Olsa Cc: Sumanth Korikkar Cc: Sven Schnelle Cc: Thomas Richter Cc: Vasily Gorbik Cc: Wang Nan Link: https://lore.kernel.org/r/20230725150347.3479291-2-tmricht@linux.ibm.com Signed-off-by: Arnaldo Carvalho de Melo --- tools/perf/Makefile.config | 2 +- tools/perf/Makefile.perf | 5 +---- 2 files changed, 2 insertions(+), 5 deletions(-) diff --git a/tools/perf/Makefile.config b/tools/perf/Makefile.config index fe7afe6d8529..1bf8dc53641f 100644 --- a/tools/perf/Makefile.config +++ b/tools/perf/Makefile.config @@ -331,7 +331,7 @@ CORE_CFLAGS += -Wall CORE_CFLAGS += -Wextra CORE_CFLAGS += -std=gnu11 -CXXFLAGS += -std=gnu++14 -fno-exceptions -fno-rtti +CXXFLAGS += -std=gnu++17 -fno-exceptions -fno-rtti CXXFLAGS += -Wall CXXFLAGS += -Wextra CXXFLAGS += -fno-omit-frame-pointer diff --git a/tools/perf/Makefile.perf b/tools/perf/Makefile.perf index a44d16ec11ee..0ed7ee0c1665 100644 --- a/tools/perf/Makefile.perf +++ b/tools/perf/Makefile.perf @@ -426,10 +426,7 @@ EXTLIBS := $(call filter-out,$(EXCLUDE_EXTLIBS),$(EXTLIBS)) LIBS = -Wl,--whole-archive $(PERFLIBS) $(EXTRA_PERFLIBS) -Wl,--no-whole-archive -Wl,--start-group $(EXTLIBS) -Wl,--end-group ifeq ($(USE_CLANG), 1) - CLANGLIBS_LIST = AST Basic CodeGen Driver Frontend Lex Tooling Edit Sema Analysis Parse Serialization - CLANGLIBS_NOEXT_LIST = $(foreach l,$(CLANGLIBS_LIST),$(shell $(LLVM_CONFIG) --libdir)/libclang$(l)) - LIBCLANG = $(foreach l,$(CLANGLIBS_NOEXT_LIST),$(wildcard $(l).a $(l).so)) - LIBS += -Wl,--start-group $(LIBCLANG) -Wl,--end-group + LIBS += -L$(shell $(LLVM_CONFIG) --libdir) -lclang-cpp endif ifeq ($(USE_LLVM), 1) -- cgit v1.2.3-70-g09d2 From 979e9c9fc9c2a761303585e07fe2699bdd88182f Mon Sep 17 00:00:00 2001 From: Arnaldo Carvalho de Melo Date: Wed, 2 Aug 2023 18:22:14 -0300 Subject: perf annotate bpf: Don't enclose non-debug code with an assert() MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit In 616b14b47a86d880 ("perf build: Conditionally define NDEBUG") we started using NDEBUG=1 when DEBUG=1 isn't present, so code that is enclosed with assert() is not called. In dd317df072071903 ("perf build: Make binutil libraries opt in") we stopped linking against binutils-devel, for licensing reasons. Recently people asked me why annotation of BPF programs wasn't working, i.e. this: $ perf annotate bpf_prog_5280546344e3f45c_kfree_skb was returning: case SYMBOL_ANNOTATE_ERRNO__NO_LIBOPCODES_FOR_BPF: scnprintf(buf, buflen, "Please link with binutils's libopcode to enable BPF annotation"); This was on a fedora rpm, so its new enough that I had to try to test by rebuilding using BUILD_NONDISTRO=1, only to get it segfaulting on me. This combination made this libopcode function not to be called: assert(bfd_check_format(bfdf, bfd_object)); Changing it to: if (!bfd_check_format(bfdf, bfd_object)) abort(); Made it work, looking at this "check" function made me realize it changes the 'bfdf' internal state, i.e. we better call it. So stop using assert() on it, just call it and abort if it fails. Probably it is better to propagate the error, etc, but it seems it is unlikely to fail from the usage done so far and we really need to stop using libopcodes, so do the quick fix above and move on. With it we have BPF annotation back working when built with BUILD_NONDISTRO=1: ⬢[acme@toolbox perf-tools-next]$ perf annotate --stdio2 bpf_prog_5280546344e3f45c_kfree_skb | head No kallsyms or vmlinux with build-id 939bc71a1a51cdc434e60af93c7e734f7d5c0e7e was found Samples: 12 of event 'cpu-clock:ppp', 4000 Hz, Event count (approx.): 3000000, [percent: local period] bpf_prog_5280546344e3f45c_kfree_skb() bpf_prog_5280546344e3f45c_kfree_skb Percent int kfree_skb(struct trace_event_raw_kfree_skb *args) { nop 33.33 xchg %ax,%ax push %rbp mov %rsp,%rbp sub $0x180,%rsp push %rbx push %r13 ⬢[acme@toolbox perf-tools-next]$ Fixes: 6987561c9e86eace ("perf annotate: Enable annotation of BPF programs") Cc: Adrian Hunter Cc: Ian Rogers Cc: Jiri Olsa Cc: Mohamed Mahmoud Cc: Namhyung Kim Cc: Dave Tucker Cc: Derek Barbosa Cc: Song Liu Link: https://lore.kernel.org/lkml/ZMrMzoQBe0yqMek1@kernel.org Signed-off-by: Arnaldo Carvalho de Melo --- tools/perf/util/annotate.c | 10 +++++++--- 1 file changed, 7 insertions(+), 3 deletions(-) diff --git a/tools/perf/util/annotate.c b/tools/perf/util/annotate.c index ba988a13dacb..82956adf9963 100644 --- a/tools/perf/util/annotate.c +++ b/tools/perf/util/annotate.c @@ -1846,8 +1846,11 @@ static int symbol__disassemble_bpf(struct symbol *sym, perf_exe(tpath, sizeof(tpath)); bfdf = bfd_openr(tpath, NULL); - assert(bfdf); - assert(bfd_check_format(bfdf, bfd_object)); + if (bfdf == NULL) + abort(); + + if (!bfd_check_format(bfdf, bfd_object)) + abort(); s = open_memstream(&buf, &buf_size); if (!s) { @@ -1895,7 +1898,8 @@ static int symbol__disassemble_bpf(struct symbol *sym, #else disassemble = disassembler(bfdf); #endif - assert(disassemble); + if (disassemble == NULL) + abort(); fflush(s); do { -- cgit v1.2.3-70-g09d2 From e2cabf2a44791f01c21f8d5189b946926e34142e Mon Sep 17 00:00:00 2001 From: Namhyung Kim Date: Mon, 31 Jul 2023 02:49:32 -0700 Subject: perf hists browser: Fix hierarchy mode header The commit ef9ff6017e3c4593 ("perf ui browser: Move the extra title lines from the hists browser") introduced ui_browser__gotorc_title() to help moving non-title lines easily. But it missed to update the title for the hierarchy mode so it won't print the header line on TUI at all. $ perf report --hierarchy Fixes: ef9ff6017e3c4593 ("perf ui browser: Move the extra title lines from the hists browser") Signed-off-by: Namhyung Kim Tested-by: Arnaldo Carvalho de Melo Cc: Adrian Hunter Cc: Ian Rogers Cc: Ingo Molnar Cc: Jiri Olsa Cc: Peter Zijlstra Cc: stable@vger.kernel.org Link: https://lore.kernel.org/r/20230731094934.1616495-1-namhyung@kernel.org Signed-off-by: Arnaldo Carvalho de Melo --- tools/perf/ui/browsers/hists.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/tools/perf/ui/browsers/hists.c b/tools/perf/ui/browsers/hists.c index c7ad9e003080..d8b88f10a48d 100644 --- a/tools/perf/ui/browsers/hists.c +++ b/tools/perf/ui/browsers/hists.c @@ -1779,7 +1779,7 @@ static void hists_browser__hierarchy_headers(struct hist_browser *browser) hists_browser__scnprintf_hierarchy_headers(browser, headers, sizeof(headers)); - ui_browser__gotorc(&browser->b, 0, 0); + ui_browser__gotorc_title(&browser->b, 0, 0); ui_browser__set_color(&browser->b, HE_COLORSET_ROOT); ui_browser__write_nstring(&browser->b, headers, browser->b.width + 1); } -- cgit v1.2.3-70-g09d2 From f6b8436bede3e80226e8b2100279c4450c73806a Mon Sep 17 00:00:00 2001 From: Namhyung Kim Date: Mon, 31 Jul 2023 02:49:33 -0700 Subject: perf hists browser: Fix the number of entries for 'e' key The 'e' key is to toggle expand/collapse the selected entry only. But the current code has a bug that it only increases the number of entries by 1 in the hierarchy mode so users cannot move under the current entry after the key stroke. This is due to a wrong assumption in the hist_entry__set_folding(). The commit b33f922651011eff ("perf hists browser: Put hist_entry folding logic into single function") factored out the code, but actually it should be handled separately. The hist_browser__set_folding() is to update fold state for each entry so it needs to traverse all (child) entries regardless of the current fold state. So it increases the number of entries by 1. But the hist_entry__set_folding() only cares the currently selected entry and its all children. So it should count all unfolded child entries. This code is implemented in hist_browser__toggle_fold() already so we can just call it. Fixes: b33f922651011eff ("perf hists browser: Put hist_entry folding logic into single function") Signed-off-by: Namhyung Kim Tested-by: Arnaldo Carvalho de Melo Cc: Adrian Hunter Cc: Ian Rogers Cc: Ingo Molnar Cc: Jiri Olsa Cc: Peter Zijlstra Cc: stable@vger.kernel.org Link: https://lore.kernel.org/r/20230731094934.1616495-2-namhyung@kernel.org Signed-off-by: Arnaldo Carvalho de Melo --- tools/perf/ui/browsers/hists.c | 58 +++++++++++++++++------------------------- 1 file changed, 24 insertions(+), 34 deletions(-) diff --git a/tools/perf/ui/browsers/hists.c b/tools/perf/ui/browsers/hists.c index d8b88f10a48d..70db5a717905 100644 --- a/tools/perf/ui/browsers/hists.c +++ b/tools/perf/ui/browsers/hists.c @@ -407,11 +407,6 @@ static bool hist_browser__selection_has_children(struct hist_browser *browser) return container_of(ms, struct callchain_list, ms)->has_children; } -static bool hist_browser__he_selection_unfolded(struct hist_browser *browser) -{ - return browser->he_selection ? browser->he_selection->unfolded : false; -} - static bool hist_browser__selection_unfolded(struct hist_browser *browser) { struct hist_entry *he = browser->he_selection; @@ -584,8 +579,8 @@ static int hierarchy_set_folding(struct hist_browser *hb, struct hist_entry *he, return n; } -static void __hist_entry__set_folding(struct hist_entry *he, - struct hist_browser *hb, bool unfold) +static void hist_entry__set_folding(struct hist_entry *he, + struct hist_browser *hb, bool unfold) { hist_entry__init_have_children(he); he->unfolded = unfold ? he->has_children : false; @@ -603,34 +598,12 @@ static void __hist_entry__set_folding(struct hist_entry *he, he->nr_rows = 0; } -static void hist_entry__set_folding(struct hist_entry *he, - struct hist_browser *browser, bool unfold) -{ - double percent; - - percent = hist_entry__get_percent_limit(he); - if (he->filtered || percent < browser->min_pcnt) - return; - - __hist_entry__set_folding(he, browser, unfold); - - if (!he->depth || unfold) - browser->nr_hierarchy_entries++; - if (he->leaf) - browser->nr_callchain_rows += he->nr_rows; - else if (unfold && !hist_entry__has_hierarchy_children(he, browser->min_pcnt)) { - browser->nr_hierarchy_entries++; - he->has_no_entry = true; - he->nr_rows = 1; - } else - he->has_no_entry = false; -} - static void __hist_browser__set_folding(struct hist_browser *browser, bool unfold) { struct rb_node *nd; struct hist_entry *he; + double percent; nd = rb_first_cached(&browser->hists->entries); while (nd) { @@ -640,6 +613,21 @@ __hist_browser__set_folding(struct hist_browser *browser, bool unfold) nd = __rb_hierarchy_next(nd, HMD_FORCE_CHILD); hist_entry__set_folding(he, browser, unfold); + + percent = hist_entry__get_percent_limit(he); + if (he->filtered || percent < browser->min_pcnt) + continue; + + if (!he->depth || unfold) + browser->nr_hierarchy_entries++; + if (he->leaf) + browser->nr_callchain_rows += he->nr_rows; + else if (unfold && !hist_entry__has_hierarchy_children(he, browser->min_pcnt)) { + browser->nr_hierarchy_entries++; + he->has_no_entry = true; + he->nr_rows = 1; + } else + he->has_no_entry = false; } } @@ -659,8 +647,10 @@ static void hist_browser__set_folding_selected(struct hist_browser *browser, boo if (!browser->he_selection) return; - hist_entry__set_folding(browser->he_selection, browser, unfold); - browser->b.nr_entries = hist_browser__nr_entries(browser); + if (unfold == browser->he_selection->unfolded) + return; + + hist_browser__toggle_fold(browser); } static void ui_browser__warn_lost_events(struct ui_browser *browser) @@ -732,8 +722,8 @@ static int hist_browser__handle_hotkey(struct hist_browser *browser, bool warn_l hist_browser__set_folding(browser, true); break; case 'e': - /* Expand the selected entry. */ - hist_browser__set_folding_selected(browser, !hist_browser__he_selection_unfolded(browser)); + /* Toggle expand/collapse the selected entry. */ + hist_browser__toggle_fold(browser); break; case 'H': browser->show_headers = !browser->show_headers; -- cgit v1.2.3-70-g09d2 From 7bc0153c53bc1ef96e5d1ffb039d9070a944966b Mon Sep 17 00:00:00 2001 From: Arnaldo Carvalho de Melo Date: Fri, 4 Aug 2023 13:18:38 -0300 Subject: perf probe: Free string returned by synthesize_perf_probe_point() on failure to add a probe Building perf with EXTRA_CFLAGS="-fsanitize=address" a leak is detect when trying to add a probe to a non-existent function: # perf probe -x ~/bin/perf dso__neW Probe point 'dso__neW' not found. Error: Failed to add events. ================================================================= ==296634==ERROR: LeakSanitizer: detected memory leaks Direct leak of 128 byte(s) in 1 object(s) allocated from: #0 0x7f67642ba097 in calloc (/lib64/libasan.so.8+0xba097) #1 0x7f67641a76f1 in allocate_cfi (/lib64/libdw.so.1+0x3f6f1) Direct leak of 65 byte(s) in 1 object(s) allocated from: #0 0x7f67642b95b5 in __interceptor_realloc.part.0 (/lib64/libasan.so.8+0xb95b5) #1 0x6cac75 in strbuf_grow util/strbuf.c:64 #2 0x6ca934 in strbuf_init util/strbuf.c:25 #3 0x9337d2 in synthesize_perf_probe_point util/probe-event.c:2018 #4 0x92be51 in try_to_find_probe_trace_events util/probe-event.c:964 #5 0x93d5c6 in convert_to_probe_trace_events util/probe-event.c:3512 #6 0x93d6d5 in convert_perf_probe_events util/probe-event.c:3529 #7 0x56f37f in perf_add_probe_events /var/home/acme/git/perf-tools-next/tools/perf/builtin-probe.c:354 #8 0x572fbc in __cmd_probe /var/home/acme/git/perf-tools-next/tools/perf/builtin-probe.c:738 #9 0x5730f2 in cmd_probe /var/home/acme/git/perf-tools-next/tools/perf/builtin-probe.c:766 #10 0x635d81 in run_builtin /var/home/acme/git/perf-tools-next/tools/perf/perf.c:323 #11 0x6362c1 in handle_internal_command /var/home/acme/git/perf-tools-next/tools/perf/perf.c:377 #12 0x63667a in run_argv /var/home/acme/git/perf-tools-next/tools/perf/perf.c:421 #13 0x636b8d in main /var/home/acme/git/perf-tools-next/tools/perf/perf.c:537 #14 0x7f676302950f in __libc_start_call_main (/lib64/libc.so.6+0x2950f) SUMMARY: AddressSanitizer: 193 byte(s) leaked in 2 allocation(s). # synthesize_perf_probe_point() returns a "detachec" strbuf, i.e. a malloc'ed string that needs to be free'd. An audit will be performed to find other such cases. Acked-by: Masami Hiramatsu Cc: Adrian Hunter Cc: Ian Rogers Cc: Jiri Olsa Cc: Namhyung Kim Link: https://lore.kernel.org/lkml/ZM0l1Oxamr4SVjfY@kernel.org Signed-off-by: Arnaldo Carvalho de Melo --- tools/perf/util/probe-event.c | 5 +++-- 1 file changed, 3 insertions(+), 2 deletions(-) diff --git a/tools/perf/util/probe-event.c b/tools/perf/util/probe-event.c index 2d056f02ae40..c7bfeab610a3 100644 --- a/tools/perf/util/probe-event.c +++ b/tools/perf/util/probe-event.c @@ -961,8 +961,9 @@ static int try_to_find_probe_trace_events(struct perf_probe_event *pev, debuginfo__delete(dinfo); if (ntevs == 0) { /* No error but failed to find probe point. */ - pr_warning("Probe point '%s' not found.\n", - synthesize_perf_probe_point(&pev->point)); + char *probe_point = synthesize_perf_probe_point(&pev->point); + pr_warning("Probe point '%s' not found.\n", probe_point); + free(probe_point); return -ENODEV; } else if (ntevs < 0) { /* Error path : ntevs < 0 */ -- cgit v1.2.3-70-g09d2 From a612bbf8b8fdd29136a1ac8b2afca3780278d344 Mon Sep 17 00:00:00 2001 From: Arnaldo Carvalho de Melo Date: Fri, 4 Aug 2023 13:18:38 -0300 Subject: perf probe: Free string returned by synthesize_perf_probe_point() on failure in synthesize_perf_probe_command() Building perf with EXTRA_CFLAGS="-fsanitize=address" a leak was detected elsewhere and lead to an audit, where we found that synthesize_perf_probe_command() may leak synthesize_perf_probe_point() return on failure, fix it. Cc: Adrian Hunter Cc: Ian Rogers Cc: Jiri Olsa Cc: Masami Hiramatsu Cc: Namhyung Kim Link: https://lore.kernel.org/lkml/ZM0mzpQktHnhXJXr@kernel.org Signed-off-by: Arnaldo Carvalho de Melo --- tools/perf/util/probe-event.c | 8 ++++++-- 1 file changed, 6 insertions(+), 2 deletions(-) diff --git a/tools/perf/util/probe-event.c b/tools/perf/util/probe-event.c index c7bfeab610a3..2835d87cb977 100644 --- a/tools/perf/util/probe-event.c +++ b/tools/perf/util/probe-event.c @@ -2063,14 +2063,18 @@ char *synthesize_perf_probe_command(struct perf_probe_event *pev) goto out; tmp = synthesize_perf_probe_point(&pev->point); - if (!tmp || strbuf_addstr(&buf, tmp) < 0) + if (!tmp || strbuf_addstr(&buf, tmp) < 0) { + free(tmp); goto out; + } free(tmp); for (i = 0; i < pev->nargs; i++) { tmp = synthesize_perf_probe_arg(pev->args + i); - if (!tmp || strbuf_addf(&buf, " %s", tmp) < 0) + if (!tmp || strbuf_addf(&buf, " %s", tmp) < 0) { + free(tmp); goto out; + } free(tmp); } -- cgit v1.2.3-70-g09d2 From aeb50d3f2cd6255b7d065a91ed6a01784b208e6f Mon Sep 17 00:00:00 2001 From: Arnaldo Carvalho de Melo Date: Fri, 4 Aug 2023 13:30:06 -0300 Subject: perf probe: Make synthesize_perf_probe_point() private to probe-event.c Not used in any other place, so just make it static. Cc: Adrian Hunter Cc: Ian Rogers Cc: Jiri Olsa Cc: Masami Hiramatsu Cc: Namhyung Kim Link: https://lore.kernel.org/lkml/ZM0pjfOe6R4X%2Fcql@kernel.org/ Signed-off-by: Arnaldo Carvalho de Melo --- tools/perf/util/probe-event.c | 4 +++- tools/perf/util/probe-event.h | 1 - 2 files changed, 3 insertions(+), 2 deletions(-) diff --git a/tools/perf/util/probe-event.c b/tools/perf/util/probe-event.c index 2835d87cb977..1a5b7fa459b2 100644 --- a/tools/perf/util/probe-event.c +++ b/tools/perf/util/probe-event.c @@ -53,6 +53,8 @@ bool probe_event_dry_run; /* Dry run flag */ struct probe_conf probe_conf = { .magic_num = DEFAULT_PROBE_MAGIC_NUM }; +static char *synthesize_perf_probe_point(struct perf_probe_point *pp); + #define semantic_error(msg ...) pr_err("Semantic error :" msg) int e_snprintf(char *str, size_t size, const char *format, ...) @@ -2010,7 +2012,7 @@ out: } /* Compose only probe point (not argument) */ -char *synthesize_perf_probe_point(struct perf_probe_point *pp) +static char *synthesize_perf_probe_point(struct perf_probe_point *pp) { struct strbuf buf; char *tmp, *ret = NULL; diff --git a/tools/perf/util/probe-event.h b/tools/perf/util/probe-event.h index 8ad5b1579f1d..7e3b6c3d1f74 100644 --- a/tools/perf/util/probe-event.h +++ b/tools/perf/util/probe-event.h @@ -137,7 +137,6 @@ int parse_probe_trace_command(const char *cmd, struct probe_trace_event *tev); char *synthesize_perf_probe_command(struct perf_probe_event *pev); char *synthesize_probe_trace_command(struct probe_trace_event *tev); char *synthesize_perf_probe_arg(struct perf_probe_arg *pa); -char *synthesize_perf_probe_point(struct perf_probe_point *pp); int perf_probe_event__copy(struct perf_probe_event *dst, struct perf_probe_event *src); -- cgit v1.2.3-70-g09d2 From 8c49c6e1a7b790c4cb9f464c5485117451d91c60 Mon Sep 17 00:00:00 2001 From: Ivan Babrou Date: Mon, 17 Jul 2023 17:07:37 -0700 Subject: perf script: Print "cgroup" field on the same line as "comm" Commit 3fd7a168bf51 ("perf script: Add 'cgroup' field for output") added support for printing cgroup path in perf script output. It was okay if you didn't want any stacks: $ sudo perf script --comms jpegtran:23f4bf -F comm,tid,cpu,time,cgroup jpegtran:23f4bf 3321915 [013] 404718.587488: /idle.slice/polish.service jpegtran:23f4bf 3321915 [031] 404718.592073: /idle.slice/polish.service With stacks it gets messier as cgroup is printed after the stack: $ perf script --comms jpegtran:23f4bf -F comm,tid,cpu,time,cgroup,ip,sym jpegtran:23f4bf 3321915 [013] 404718.587488: 5c554 compress_output 570d9 jpeg_finish_compress 3476e jpegtran_main 330ee jpegtran::main 326e2 core::ops::function::FnOnce::call_once (inlined) 326e2 std::sys_common::backtrace::__rust_begin_short_backtrace /idle.slice/polish.service jpegtran:23f4bf 3321915 [031] 404718.592073: 8474d jsimd_encode_mcu_AC_first_prepare_sse2.PADDING 55af68e62fff [unknown] /idle.slice/polish.service Let's instead print cgroup on the same line as comm: $ perf script --comms jpegtran:23f4bf -F comm,tid,cpu,time,cgroup,ip,sym jpegtran:23f4bf 3321915 [013] 404718.587488: /idle.slice/polish.service 5c554 compress_output 570d9 jpeg_finish_compress 3476e jpegtran_main 330ee jpegtran::main 326e2 core::ops::function::FnOnce::call_once (inlined) 326e2 std::sys_common::backtrace::__rust_begin_short_backtrace jpegtran:23f4bf 3321915 [031] 404718.592073: /idle.slice/polish.service 8474d jsimd_encode_mcu_AC_first_prepare_sse2.PADDING 55af68e62fff [unknown] Fixes: 3fd7a168bf514979 ("perf script: Add 'cgroup' field for output") Signed-off-by: Ivan Babrou Acked-by: Ian Rogers Acked-by: Namhyung Kim Cc: Adrian Hunter Cc: Alexander Shishkin Cc: Jiri Olsa Cc: Mark Rutland Cc: Peter Zijlstra Cc: kernel-team@cloudflare.com Link: https://lore.kernel.org/r/20230718000737.49077-1-ivan@cloudflare.com Signed-off-by: Arnaldo Carvalho de Melo --- tools/perf/builtin-script.c | 22 +++++++++++----------- 1 file changed, 11 insertions(+), 11 deletions(-) diff --git a/tools/perf/builtin-script.c b/tools/perf/builtin-script.c index 200b3e7ea8da..517bf25750c8 100644 --- a/tools/perf/builtin-script.c +++ b/tools/perf/builtin-script.c @@ -2199,6 +2199,17 @@ static void process_event(struct perf_script *script, if (PRINT_FIELD(RETIRE_LAT)) fprintf(fp, "%16" PRIu16, sample->retire_lat); + if (PRINT_FIELD(CGROUP)) { + const char *cgrp_name; + struct cgroup *cgrp = cgroup__find(machine->env, + sample->cgroup); + if (cgrp != NULL) + cgrp_name = cgrp->name; + else + cgrp_name = "unknown"; + fprintf(fp, " %s", cgrp_name); + } + if (PRINT_FIELD(IP)) { struct callchain_cursor *cursor = NULL; @@ -2243,17 +2254,6 @@ static void process_event(struct perf_script *script, if (PRINT_FIELD(CODE_PAGE_SIZE)) fprintf(fp, " %s", get_page_size_name(sample->code_page_size, str)); - if (PRINT_FIELD(CGROUP)) { - const char *cgrp_name; - struct cgroup *cgrp = cgroup__find(machine->env, - sample->cgroup); - if (cgrp != NULL) - cgrp_name = cgrp->name; - else - cgrp_name = "unknown"; - fprintf(fp, " %s", cgrp_name); - } - perf_sample__fprintf_ipc(sample, attr, fp); fprintf(fp, "\n"); -- cgit v1.2.3-70-g09d2 From 878460e8d0ff84a0edbaff9d06f9d9dbe8353800 Mon Sep 17 00:00:00 2001 From: Arnaldo Carvalho de Melo Date: Thu, 10 Aug 2023 11:46:20 -0300 Subject: perf build: Remove -Wno-unused-but-set-variable from the flex flags when building with clang < 13.0.0 clang < 13.0.0 doesn't grok -Wno-unused-but-set-variable, so just remove it to avoid: error: unknown warning option '-Wno-unused-but-set-variable'; did you mean '-Wno-unused-const-variable'? [-Werror,-Wunknown-warning-option] make[4]: *** [/git/perf-6.5.0-rc4/tools/build/Makefile.build:128: /tmp/build/perf/util/pmu-flex.o] Error 1 make[4]: *** Waiting for unfinished jobs.... Fixes: ddc8e4c966923ad1 ("perf build: Disable fewer bison warnings") Cc: Adrian Hunter Cc: Ian Rogers Cc: Jiri Olsa Cc: Namhyung Kim Link: https://lore.kernel.org/lkml/ZNUSWr52jUnVaaa%2F@kernel.org/ Signed-off-by: Arnaldo Carvalho de Melo --- tools/perf/util/Build | 21 ++++++++++++++++----- 1 file changed, 16 insertions(+), 5 deletions(-) diff --git a/tools/perf/util/Build b/tools/perf/util/Build index d487aec0b458..9699e31ff4c0 100644 --- a/tools/perf/util/Build +++ b/tools/perf/util/Build @@ -1,3 +1,4 @@ +include $(srctree)/tools/scripts/Makefile.include include $(srctree)/tools/scripts/utilities.mak perf-y += arm64-frame-pointer-unwind-support.o @@ -301,16 +302,21 @@ else flex_flags := -w endif -CFLAGS_parse-events-flex.o += $(flex_flags) -CFLAGS_pmu-flex.o += $(flex_flags) -CFLAGS_expr-flex.o += $(flex_flags) -CFLAGS_bpf-filter-flex.o += $(flex_flags) - # Some newer clang and gcc version complain about this # util/parse-events-bison.c:1317:9: error: variable 'parse_events_nerrs' set but not used [-Werror,-Wunused-but-set-variable] # int yynerrs = 0; bison_flags := -DYYENABLE_NLS=0 -Wno-unused-but-set-variable + +# Old clangs don't grok -Wno-unused-but-set-variable, remove it +ifeq ($(CC_NO_CLANG), 0) + CLANG_VERSION := $(shell $(CLANG) --version | head -1 | sed 's/.*clang version \([[:digit:]]\+.[[:digit:]]\+.[[:digit:]]\+\).*/\1/g') + ifeq ($(call version-lt3,$(CLANG_VERSION),13.0.0),1) + bison_flags := $(subst -Wno-unused-but-set-variable,,$(bison_flags)) + flex_flags := $(subst -Wno-unused-but-set-variable,,$(flex_flags)) + endif +endif + BISON_GE_382 := $(shell expr $(shell $(BISON) --version | grep bison | sed -e 's/.\+ \([0-9]\+\).\([0-9]\+\).\([0-9]\+\)/\1\2\3/g') \>\= 382) ifeq ($(BISON_GE_382),1) bison_flags += -Wno-switch-enum @@ -323,6 +329,11 @@ ifeq ($(BISON_LT_381),1) bison_flags += -DYYNOMEM=YYABORT endif +CFLAGS_parse-events-flex.o += $(flex_flags) +CFLAGS_pmu-flex.o += $(flex_flags) +CFLAGS_expr-flex.o += $(flex_flags) +CFLAGS_bpf-filter-flex.o += $(flex_flags) + CFLAGS_parse-events-bison.o += $(bison_flags) CFLAGS_pmu-bison.o += -DYYLTYPE_IS_TRIVIAL=0 $(bison_flags) CFLAGS_expr-bison.o += -DYYLTYPE_IS_TRIVIAL=0 $(bison_flags) -- cgit v1.2.3-70-g09d2 From e59fea47f83e8a9ac5b772d140a0d67d50ba0ed8 Mon Sep 17 00:00:00 2001 From: Athira Rajeev Date: Fri, 11 Aug 2023 10:45:46 +0530 Subject: perf symbols: Fix DSO kernel load and symbol process to correctly map DSO to its long_name, type and adjust_symbols Test "object code reading" fails sometimes for kernel address as below: Reading object code for memory address: 0xc000000000004c3c File is: [kernel.kallsyms] On file address is: 0x14c3c dso__data_read_offset failed test child finished with -1 ---- end ---- Object code reading: FAILED! Here dso__data_read_offset() fails for symbol address 0xc000000000004c3c. This is because the DSO long_name here is "[kernel.kallsyms]" and hence open_dso() fails to open this file. There is an incorrect DSO to map handling here. The key points here are: - The DSO long_name is set to "[kernel.kallsyms]". This file is not present and hence returns error - The DSO binary type is set to DSO_BINARY_TYPE__NOT_FOUND - The DSO adjust_symbols member is set to zero In the end dso__data_read_offset() returns -1 and the address 0x14c3c can not be resolved. Hence the test fails. But the address actually maps to the kernel DSO # objdump -z -d --start-address=0xc000000000004c3c --stop-address=0xc000000000004cbc /home/athira/linux/vmlinux /home/athira/linux/vmlinux: file format elf64-powerpcle Disassembly of section .head.text: c000000000004c3c : c000000000004c3c: a6 02 9b 7d mfsrr1 r12 c000000000004c40: 78 13 42 7c mr r2,r2 c000000000004c44: 18 00 4d e9 ld r10,24(r13) c000000000004c48: 60 c6 4a 61 ori r10,r10,50784 c000000000004c4c: a6 03 49 7d mtctr r10 Fix dso__process_kernel_symbol() to set the binary_type and adjust_symbols members. dso->adjust_symbols is used by map__rip_2objdump() which converts the symbol start address to the objdump address. Also set dso->long_name in dso__load_vmlinux(). Suggested-by: Adrian Hunter Signed-off-by: Athira Rajeev Acked-by: Adrian Hunter Cc: Disha Goel Cc: Ian Rogers Cc: Jiri Olsa Cc: Kajol Jain Cc: Madhavan Srinivasan Cc: Namhyung Kim Cc: linuxppc-dev@lists.ozlabs.org Link: https://lore.kernel.org/r/20230811051546.70039-1-atrajeev@linux.vnet.ibm.com Signed-off-by: Arnaldo Carvalho de Melo --- tools/perf/util/symbol-elf.c | 2 ++ tools/perf/util/symbol.c | 15 ++++++++++----- 2 files changed, 12 insertions(+), 5 deletions(-) diff --git a/tools/perf/util/symbol-elf.c b/tools/perf/util/symbol-elf.c index 8bd466d1c2bd..95e99c332d7e 100644 --- a/tools/perf/util/symbol-elf.c +++ b/tools/perf/util/symbol-elf.c @@ -1440,6 +1440,8 @@ static int dso__process_kernel_symbol(struct dso *dso, struct map *map, curr_dso->kernel = dso->kernel; curr_dso->long_name = dso->long_name; curr_dso->long_name_len = dso->long_name_len; + curr_dso->binary_type = dso->binary_type; + curr_dso->adjust_symbols = dso->adjust_symbols; curr_map = map__new2(start, curr_dso); dso__put(curr_dso); if (curr_map == NULL) diff --git a/tools/perf/util/symbol.c b/tools/perf/util/symbol.c index f849f9ef68e6..3f36675b7c8f 100644 --- a/tools/perf/util/symbol.c +++ b/tools/perf/util/symbol.c @@ -2204,15 +2204,20 @@ int dso__load_vmlinux(struct dso *dso, struct map *map, if (symsrc__init(&ss, dso, symfs_vmlinux, symtab_type)) return -1; + /* + * dso__load_sym() may copy 'dso' which will result in the copies having + * an incorrect long name unless we set it here first. + */ + dso__set_long_name(dso, vmlinux, vmlinux_allocated); + if (dso->kernel == DSO_SPACE__KERNEL_GUEST) + dso->binary_type = DSO_BINARY_TYPE__GUEST_VMLINUX; + else + dso->binary_type = DSO_BINARY_TYPE__VMLINUX; + err = dso__load_sym(dso, map, &ss, &ss, 0); symsrc__destroy(&ss); if (err > 0) { - if (dso->kernel == DSO_SPACE__KERNEL_GUEST) - dso->binary_type = DSO_BINARY_TYPE__GUEST_VMLINUX; - else - dso->binary_type = DSO_BINARY_TYPE__VMLINUX; - dso__set_long_name(dso, vmlinux, vmlinux_allocated); dso__set_loaded(dso); pr_debug("Using %s for symbols\n", symfs_vmlinux); } -- cgit v1.2.3-70-g09d2 From 33d9c5062113a4bd9c5f7414fdeccea3c58e6809 Mon Sep 17 00:00:00 2001 From: Ian Rogers Date: Thu, 10 Aug 2023 11:09:44 -0700 Subject: perf script python: Add stub for PMU symbol to the python binding Fix missing symbol seen in: ``` 19: 'import perf' in python : --- start --- test child forked, pid 2640936 python usage test: "echo "import sys ; sys.path.insert(0, 'python'); import perf" | '/usr/bin/python3' " Traceback (most recent call last): File "", line 1, in ImportError: tools/perf/python/perf.cpython-311-x86_64-linux-gnu.so: undefined symbol: perf_pmus__supports_extended_type test child finished with -1 ---- end ---- 'import perf' in python: FAILED! ``` Signed-off-by: Ian Rogers Cc: Adrian Hunter Cc: Alexander Shishkin Cc: Ingo Molnar Cc: James Clark Cc: Jiri Olsa Cc: Kan Liang Cc: Leo Yan Cc: Mark Rutland Cc: Namhyung Kim Cc: Peter Zijlstra Cc: Yang Jihong Link: https://lore.kernel.org/r/20230810180944.2794188-1-irogers@google.com Signed-off-by: Arnaldo Carvalho de Melo --- tools/perf/util/python.c | 5 +++++ 1 file changed, 5 insertions(+) diff --git a/tools/perf/util/python.c b/tools/perf/util/python.c index 4eed8ec23994..c29f5f0bb552 100644 --- a/tools/perf/util/python.c +++ b/tools/perf/util/python.c @@ -113,6 +113,11 @@ bool evsel__is_aux_event(const struct evsel *evsel __maybe_unused) return false; } +bool perf_pmus__supports_extended_type(void) +{ + return false; +} + /* * Add this one here not to drag util/metricgroup.c */ -- cgit v1.2.3-70-g09d2 From 7777ac3dfe29f55dd0323d05a4cc81164fcfeb0e Mon Sep 17 00:00:00 2001 From: Arnaldo Carvalho de Melo Date: Tue, 15 Aug 2023 16:30:56 -0300 Subject: perf test trace+probe_vfs_getname.sh: Remove stray \ before / Running on fedora:38 in verbose mode I noticed: # perf test -v 117 grep: warning: stray \ before / 117: Check open filename arg using perf trace + vfs_getname : Remove that \ before /. Cc: Adrian Hunter Cc: Ian Rogers Cc: Jiri Olsa Cc: Namhyung Kim Link: https://lore.kernel.org/lkml/ZNvTDsSMO3nw9Tnp@kernel.org Signed-off-by: Arnaldo Carvalho de Melo --- tools/perf/tests/shell/trace+probe_vfs_getname.sh | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/tools/perf/tests/shell/trace+probe_vfs_getname.sh b/tools/perf/tests/shell/trace+probe_vfs_getname.sh index db2ff141f703..3697f054ce19 100755 --- a/tools/perf/tests/shell/trace+probe_vfs_getname.sh +++ b/tools/perf/tests/shell/trace+probe_vfs_getname.sh @@ -20,7 +20,7 @@ skip_if_no_perf_trace || exit 2 trace_open_vfs_getname() { evts="$(echo "$(perf list syscalls:sys_enter_open* 2>/dev/null | grep -E 'open(at)? ' | sed -r 's/.*sys_enter_([a-z]+) +\[.*$/\1/')" | sed ':a;N;s:\n:,:g')" perf trace -e $evts touch $file 2>&1 | \ - grep -E " +[0-9]+\.[0-9]+ +\( +[0-9]+\.[0-9]+ ms\): +touch\/[0-9]+ open(at)?\((dfd: +CWD, +)?filename: +${file}, +flags: CREAT\|NOCTTY\|NONBLOCK\|WRONLY, +mode: +IRUGO\|IWUGO\) += +[0-9]+$" + grep -E " +[0-9]+\.[0-9]+ +\( +[0-9]+\.[0-9]+ ms\): +touch/[0-9]+ open(at)?\((dfd: +CWD, +)?filename: +${file}, +flags: CREAT\|NOCTTY\|NONBLOCK\|WRONLY, +mode: +IRUGO\|IWUGO\) += +[0-9]+$" } -- cgit v1.2.3-70-g09d2 From 6f769c3458b6cf2ddb3537c2a0b17463ead2af87 Mon Sep 17 00:00:00 2001 From: Arnaldo Carvalho de Melo Date: Tue, 15 Aug 2023 16:35:19 -0300 Subject: perf tests trace+probe_vfs_getname.sh: Accept quotes surrounding the filename With augmented_raw_syscalls transformed into a BPF skel made the output have a " around the filenames, which is not what the old perf probe vfs_getname method of obtaining filenames did, so accept the augmented way, with the quotes. At this point probably removing all the logic for the vfs_getname method is in order, will do it at some point. For now lets accept with/without quotes and make that test pass. Cc: Adrian Hunter Cc: Ian Rogers Cc: Jiri Olsa Cc: Namhyung Kim Link: https://lore.kernel.org/lkml/ Signed-off-by: Arnaldo Carvalho de Melo --- tools/perf/tests/shell/trace+probe_vfs_getname.sh | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/tools/perf/tests/shell/trace+probe_vfs_getname.sh b/tools/perf/tests/shell/trace+probe_vfs_getname.sh index 3697f054ce19..4014487cf4d9 100755 --- a/tools/perf/tests/shell/trace+probe_vfs_getname.sh +++ b/tools/perf/tests/shell/trace+probe_vfs_getname.sh @@ -20,7 +20,7 @@ skip_if_no_perf_trace || exit 2 trace_open_vfs_getname() { evts="$(echo "$(perf list syscalls:sys_enter_open* 2>/dev/null | grep -E 'open(at)? ' | sed -r 's/.*sys_enter_([a-z]+) +\[.*$/\1/')" | sed ':a;N;s:\n:,:g')" perf trace -e $evts touch $file 2>&1 | \ - grep -E " +[0-9]+\.[0-9]+ +\( +[0-9]+\.[0-9]+ ms\): +touch/[0-9]+ open(at)?\((dfd: +CWD, +)?filename: +${file}, +flags: CREAT\|NOCTTY\|NONBLOCK\|WRONLY, +mode: +IRUGO\|IWUGO\) += +[0-9]+$" + grep -E " +[0-9]+\.[0-9]+ +\( +[0-9]+\.[0-9]+ ms\): +touch/[0-9]+ open(at)?\((dfd: +CWD, +)?filename: +\"?${file}\"?, +flags: CREAT\|NOCTTY\|NONBLOCK\|WRONLY, +mode: +IRUGO\|IWUGO\) += +[0-9]+$" } -- cgit v1.2.3-70-g09d2 From 56b11a2126bf2f422831ecf6112b87a4485b221b Mon Sep 17 00:00:00 2001 From: Ian Rogers Date: Fri, 11 Aug 2023 12:19:48 -0300 Subject: perf bpf: Remove support for embedding clang for compiling BPF events (-e foo.c) This never was in the default build for perf, is difficult to maintain as it uses clang/llvm internals so ditch it, keeping, for now, the external compilation of .c BPF into .o bytecode and its subsequent loading, that is also going to be removed, do it separately to help bisection and to properly document what is being removed and why. Committer notes: Extracted from a larger patch and removed some leftovers, namely deleting these now unused feature tests: tools/build/feature/test-clang.cpp tools/build/feature/test-cxx.cpp tools/build/feature/test-llvm-version.cpp tools/build/feature/test-llvm.cpp Testing the use of BPF events after applying this patch: To use the external clang/llvm toolchain to compile a .c event and then use libbpf to load it, to get the syscalls:sys_enter_open* tracepoints and read the filename pointer, putting it into the ring buffer right after the usual tracepoint payload for 'perf trace' to then print it: [root@quaco ~]# perf trace -e /home/acme/git/perf-tools-next/tools/perf/examples/bpf/augmented_raw_syscalls.c,open* --max-events=10 0.000 systemd-oomd/959 openat(dfd: CWD, filename: "/proc/meminfo", flags: RDONLY|CLOEXEC) = 12 0.083 abrt-dump-jour/1453 openat(dfd: CWD, filename: "/var/log/journal/d6a97235307247e09f13f326fb607e3c/system.journal", flags: RDONLY|CLOEXEC|NONBLOCK) = 4 0.063 abrt-dump-jour/1454 openat(dfd: CWD, filename: "/var/log/journal/d6a97235307247e09f13f326fb607e3c/system.journal", flags: RDONLY|CLOEXEC|NONBLOCK) = 4 0.082 abrt-dump-jour/1455 openat(dfd: CWD, filename: "/var/log/journal/d6a97235307247e09f13f326fb607e3c/system.journal", flags: RDONLY|CLOEXEC|NONBLOCK) = 4 250.124 systemd-oomd/959 openat(dfd: CWD, filename: "/proc/meminfo", flags: RDONLY|CLOEXEC) = 12 250.521 systemd-oomd/959 openat(dfd: CWD, filename: "/sys/fs/cgroup/user.slice/user-1000.slice/user@1000.service/app.slice/memory.pressure", flags: RDONLY|CLOEXEC) = 12 251.047 systemd-oomd/959 openat(dfd: CWD, filename: "/sys/fs/cgroup/user.slice/user-1000.slice/user@1000.service/app.slice/memory.current", flags: RDONLY|CLOEXEC) = 12 251.162 systemd-oomd/959 openat(dfd: CWD, filename: "/sys/fs/cgroup/user.slice/user-1000.slice/user@1000.service/app.slice/memory.min", flags: RDONLY|CLOEXEC) = 12 251.242 systemd-oomd/959 openat(dfd: CWD, filename: "/sys/fs/cgroup/user.slice/user-1000.slice/user@1000.service/app.slice/memory.low", flags: RDONLY|CLOEXEC) = 12 251.353 systemd-oomd/959 openat(dfd: CWD, filename: "/sys/fs/cgroup/user.slice/user-1000.slice/user@1000.service/app.slice/memory.swap.current", flags: RDONLY|CLOEXEC) = 12 [root@quaco ~]# Same thing, but with a prebuilt .o BPF bytecode: [root@quaco ~]# perf trace -e /home/acme/git/perf-tools-next/tools/perf/examples/bpf/augmented_raw_syscalls.o,open* --max-events=10 0.000 systemd-oomd/959 openat(dfd: CWD, filename: "/proc/meminfo", flags: RDONLY|CLOEXEC) = 12 0.083 abrt-dump-jour/1453 openat(dfd: CWD, filename: "/var/log/journal/d6a97235307247e09f13f326fb607e3c/system.journal", flags: RDONLY|CLOEXEC|NONBLOCK) = 4 0.083 abrt-dump-jour/1455 openat(dfd: CWD, filename: "/var/log/journal/d6a97235307247e09f13f326fb607e3c/system.journal", flags: RDONLY|CLOEXEC|NONBLOCK) = 4 0.062 abrt-dump-jour/1454 openat(dfd: CWD, filename: "/var/log/journal/d6a97235307247e09f13f326fb607e3c/system.journal", flags: RDONLY|CLOEXEC|NONBLOCK) = 4 249.985 systemd-oomd/959 openat(dfd: CWD, filename: "/proc/meminfo", flags: RDONLY|CLOEXEC) = 12 466.763 thermald/1234 openat(dfd: CWD, filename: "/sys/class/powercap/intel-rapl/intel-rapl:0/intel-rapl:0:2/energy_uj") = 13 467.145 thermald/1234 openat(dfd: CWD, filename: "/sys/class/powercap/intel-rapl/intel-rapl:0/energy_uj") = 13 467.311 thermald/1234 openat(dfd: CWD, filename: "/sys/class/thermal/thermal_zone2/temp") = 13 500.040 cgroupify/24006 openat(dfd: 4, filename: ".", flags: RDONLY|CLOEXEC|DIRECTORY|NONBLOCK) = 5 500.295 cgroupify/24006 openat(dfd: 4, filename: "24616/cgroup.procs") = 5 [root@quaco ~]# Signed-off-by: Ian Rogers Tested-by: Arnaldo Carvalho de Melo Cc: Adrian Hunter Cc: Alexander Shishkin Cc: Andi Kleen Cc: Andrii Nakryiko Cc: Anshuman Khandual Cc: Athira Rajeev Cc: Brendan Gregg Cc: Carsten Haitzler Cc: Eduard Zingerman Cc: Fangrui Song Cc: He Kuang Cc: Ingo Molnar Cc: James Clark Cc: Jiri Olsa Cc: Kan Liang Cc: Leo Yan Cc: Madhavan Srinivasan Cc: Mark Rutland Cc: Namhyung Kim Cc: Nathan Chancellor Cc: "Naveen N. Rao" Cc: Nick Desaulniers Cc: Peter Zijlstra Cc: Ravi Bangoria Cc: Rob Herring Cc: Tiezhu Yang Cc: Tom Rix Cc: Wang Nan Cc: Wang ShaoBo Cc: Yang Jihong Cc: Yonghong Song Cc: YueHaibing Link: https://lore.kernel.org/lkml/ZNZWsAXg2px1sm2h@kernel.org Signed-off-by: Arnaldo Carvalho de Melo --- tools/build/feature/test-clang.cpp | 28 ---- tools/build/feature/test-cxx.cpp | 16 --- tools/build/feature/test-llvm-version.cpp | 12 -- tools/build/feature/test-llvm.cpp | 14 -- tools/perf/Makefile.config | 31 ---- tools/perf/Makefile.perf | 17 --- tools/perf/tests/Build | 1 - tools/perf/tests/builtin-test.c | 1 - tools/perf/tests/clang.c | 32 ----- tools/perf/tests/make | 1 - tools/perf/util/Build | 2 - tools/perf/util/bpf-loader.c | 15 +- tools/perf/util/c++/Build | 5 - tools/perf/util/c++/clang-c.h | 43 ------ tools/perf/util/c++/clang-test.cpp | 67 --------- tools/perf/util/c++/clang.cpp | 225 ------------------------------ tools/perf/util/c++/clang.h | 27 ---- 17 files changed, 4 insertions(+), 533 deletions(-) delete mode 100644 tools/build/feature/test-clang.cpp delete mode 100644 tools/build/feature/test-cxx.cpp delete mode 100644 tools/build/feature/test-llvm-version.cpp delete mode 100644 tools/build/feature/test-llvm.cpp delete mode 100644 tools/perf/tests/clang.c delete mode 100644 tools/perf/util/c++/Build delete mode 100644 tools/perf/util/c++/clang-c.h delete mode 100644 tools/perf/util/c++/clang-test.cpp delete mode 100644 tools/perf/util/c++/clang.cpp delete mode 100644 tools/perf/util/c++/clang.h diff --git a/tools/build/feature/test-clang.cpp b/tools/build/feature/test-clang.cpp deleted file mode 100644 index 7d87075cd1c5..000000000000 --- a/tools/build/feature/test-clang.cpp +++ /dev/null @@ -1,28 +0,0 @@ -// SPDX-License-Identifier: GPL-2.0 -#include "clang/Basic/Version.h" -#if CLANG_VERSION_MAJOR < 8 -#include "clang/Basic/VirtualFileSystem.h" -#endif -#include "clang/Driver/Driver.h" -#include "clang/Frontend/TextDiagnosticPrinter.h" -#include "llvm/ADT/IntrusiveRefCntPtr.h" -#include "llvm/Support/ManagedStatic.h" -#if CLANG_VERSION_MAJOR >= 8 -#include "llvm/Support/VirtualFileSystem.h" -#endif -#include "llvm/Support/raw_ostream.h" - -using namespace clang; -using namespace clang::driver; - -int main() -{ - IntrusiveRefCntPtr DiagID(new DiagnosticIDs()); - IntrusiveRefCntPtr DiagOpts = new DiagnosticOptions(); - - DiagnosticsEngine Diags(DiagID, &*DiagOpts); - Driver TheDriver("test", "bpf-pc-linux", Diags); - - llvm::llvm_shutdown(); - return 0; -} diff --git a/tools/build/feature/test-cxx.cpp b/tools/build/feature/test-cxx.cpp deleted file mode 100644 index 396aaedd2418..000000000000 --- a/tools/build/feature/test-cxx.cpp +++ /dev/null @@ -1,16 +0,0 @@ -// SPDX-License-Identifier: GPL-2.0 -#include -#include - -static void print_str(std::string s) -{ - std::cout << s << std::endl; -} - -int main() -{ - std::string s("Hello World!"); - print_str(std::move(s)); - std::cout << "|" << s << "|" << std::endl; - return 0; -} diff --git a/tools/build/feature/test-llvm-version.cpp b/tools/build/feature/test-llvm-version.cpp deleted file mode 100644 index 8a091625446a..000000000000 --- a/tools/build/feature/test-llvm-version.cpp +++ /dev/null @@ -1,12 +0,0 @@ -// SPDX-License-Identifier: GPL-2.0 -#include -#include "llvm/Config/llvm-config.h" - -#define NUM_VERSION (((LLVM_VERSION_MAJOR) << 16) + (LLVM_VERSION_MINOR << 8) + LLVM_VERSION_PATCH) -#define pass int main() {printf("%x\n", NUM_VERSION); return 0;} - -#if NUM_VERSION >= 0x030900 -pass -#else -# error This LLVM is not tested yet. -#endif diff --git a/tools/build/feature/test-llvm.cpp b/tools/build/feature/test-llvm.cpp deleted file mode 100644 index 88a3d1bdd9f6..000000000000 --- a/tools/build/feature/test-llvm.cpp +++ /dev/null @@ -1,14 +0,0 @@ -// SPDX-License-Identifier: GPL-2.0 -#include "llvm/Support/ManagedStatic.h" -#include "llvm/Support/raw_ostream.h" -#define NUM_VERSION (((LLVM_VERSION_MAJOR) << 16) + (LLVM_VERSION_MINOR << 8) + LLVM_VERSION_PATCH) - -#if NUM_VERSION < 0x030900 -# error "LLVM version too low" -#endif -int main() -{ - llvm::errs() << "Hello World!\n"; - llvm::llvm_shutdown(); - return 0; -} diff --git a/tools/perf/Makefile.config b/tools/perf/Makefile.config index 1bf8dc53641f..e0592ed4c10f 100644 --- a/tools/perf/Makefile.config +++ b/tools/perf/Makefile.config @@ -1127,37 +1127,6 @@ ifndef NO_JVMTI endif endif -USE_CXX = 0 -USE_CLANGLLVM = 0 -ifdef LIBCLANGLLVM - $(call feature_check,cxx) - ifneq ($(feature-cxx), 1) - msg := $(warning No g++ found, disable clang and llvm support. Please install g++) - else - $(call feature_check,llvm) - $(call feature_check,llvm-version) - ifneq ($(feature-llvm), 1) - msg := $(warning No suitable libLLVM found, disabling builtin clang and LLVM support. Please install llvm-dev(el) (>= 3.9.0)) - else - $(call feature_check,clang) - ifneq ($(feature-clang), 1) - msg := $(warning No suitable libclang found, disabling builtin clang and LLVM support. Please install libclang-dev(el) (>= 3.9.0)) - else - CFLAGS += -DHAVE_LIBCLANGLLVM_SUPPORT - CXXFLAGS += -DHAVE_LIBCLANGLLVM_SUPPORT -I$(shell $(LLVM_CONFIG) --includedir) - $(call detected,CONFIG_CXX) - $(call detected,CONFIG_CLANGLLVM) - USE_CXX = 1 - USE_LLVM = 1 - USE_CLANG = 1 - ifneq ($(feature-llvm-version),1) - msg := $(warning This version of LLVM is not tested. May cause build errors) - endif - endif - endif - endif -endif - ifndef NO_LIBPFM4 $(call feature_check,libpfm4) ifeq ($(feature-libpfm4), 1) diff --git a/tools/perf/Makefile.perf b/tools/perf/Makefile.perf index 0ed7ee0c1665..5370d7bf123e 100644 --- a/tools/perf/Makefile.perf +++ b/tools/perf/Makefile.perf @@ -99,10 +99,6 @@ include ../scripts/utilities.mak # Define NO_JVMTI_CMLR (debug only) if you do not want to process CMLR # data for java source lines. # -# Define LIBCLANGLLVM if you DO want builtin clang and llvm support. -# When selected, pass LLVM_CONFIG=/path/to/llvm-config to `make' if -# llvm-config is not in $PATH. -# # Define CORESIGHT if you DO WANT support for CoreSight trace decoding. # # Define NO_AIO if you do not want support of Posix AIO based trace @@ -425,19 +421,6 @@ endif EXTLIBS := $(call filter-out,$(EXCLUDE_EXTLIBS),$(EXTLIBS)) LIBS = -Wl,--whole-archive $(PERFLIBS) $(EXTRA_PERFLIBS) -Wl,--no-whole-archive -Wl,--start-group $(EXTLIBS) -Wl,--end-group -ifeq ($(USE_CLANG), 1) - LIBS += -L$(shell $(LLVM_CONFIG) --libdir) -lclang-cpp -endif - -ifeq ($(USE_LLVM), 1) - LIBLLVM = $(shell $(LLVM_CONFIG) --libs all) $(shell $(LLVM_CONFIG) --system-libs) - LIBS += -L$(shell $(LLVM_CONFIG) --libdir) $(LIBLLVM) -endif - -ifeq ($(USE_CXX), 1) - LIBS += -lstdc++ -endif - export INSTALL SHELL_PATH ### Build rules diff --git a/tools/perf/tests/Build b/tools/perf/tests/Build index fb9ac5dc4079..52df5923a8b9 100644 --- a/tools/perf/tests/Build +++ b/tools/perf/tests/Build @@ -51,7 +51,6 @@ perf-y += sdt.o perf-y += is_printable_array.o perf-y += bitmap.o perf-y += perf-hooks.o -perf-y += clang.o perf-y += unit_number__scnprintf.o perf-y += mem2node.o perf-y += maps.o diff --git a/tools/perf/tests/builtin-test.c b/tools/perf/tests/builtin-test.c index 6accb5442a73..0f3691fd31c2 100644 --- a/tools/perf/tests/builtin-test.c +++ b/tools/perf/tests/builtin-test.c @@ -108,7 +108,6 @@ static struct test_suite *generic_tests[] = { &suite__is_printable_array, &suite__bitmap_print, &suite__perf_hooks, - &suite__clang, &suite__unit_number__scnprint, &suite__mem2node, &suite__time_utils, diff --git a/tools/perf/tests/clang.c b/tools/perf/tests/clang.c deleted file mode 100644 index a7111005d5b9..000000000000 --- a/tools/perf/tests/clang.c +++ /dev/null @@ -1,32 +0,0 @@ -// SPDX-License-Identifier: GPL-2.0 -#include "tests.h" -#include "c++/clang-c.h" -#include - -#ifndef HAVE_LIBCLANGLLVM_SUPPORT -static int test__clang_to_IR(struct test_suite *test __maybe_unused, - int subtest __maybe_unused) -{ - return TEST_SKIP; -} - -static int test__clang_to_obj(struct test_suite *test __maybe_unused, - int subtest __maybe_unused) -{ - return TEST_SKIP; -} -#endif - -static struct test_case clang_tests[] = { - TEST_CASE_REASON("builtin clang compile C source to IR", clang_to_IR, - "not compiled in"), - TEST_CASE_REASON("builtin clang compile C source to ELF object", - clang_to_obj, - "not compiled in"), - { .name = NULL, } -}; - -struct test_suite suite__clang = { - .desc = "builtin clang support", - .test_cases = clang_tests, -}; diff --git a/tools/perf/tests/make b/tools/perf/tests/make index 58cf96d762d0..ea4c341f5af1 100644 --- a/tools/perf/tests/make +++ b/tools/perf/tests/make @@ -95,7 +95,6 @@ make_with_babeltrace:= LIBBABELTRACE=1 make_with_coresight := CORESIGHT=1 make_no_sdt := NO_SDT=1 make_no_syscall_tbl := NO_SYSCALL_TABLE=1 -make_with_clangllvm := LIBCLANGLLVM=1 make_no_libpfm4 := NO_LIBPFM4=1 make_with_gtk2 := GTK2=1 make_refcnt_check := EXTRA_CFLAGS="-DREFCNT_CHECKING=1" diff --git a/tools/perf/util/Build b/tools/perf/util/Build index 9699e31ff4c0..ff3b55c7ed43 100644 --- a/tools/perf/util/Build +++ b/tools/perf/util/Build @@ -232,8 +232,6 @@ perf-y += perf-hooks.o perf-$(CONFIG_LIBBPF) += bpf-event.o perf-$(CONFIG_LIBBPF) += bpf-utils.o -perf-$(CONFIG_CXX) += c++/ - perf-$(CONFIG_LIBPFM4) += pfm.o CFLAGS_config.o += -DETC_PERFCONFIG="BUILD_STR($(ETC_PERFCONFIG_SQ))" diff --git a/tools/perf/util/bpf-loader.c b/tools/perf/util/bpf-loader.c index 50e42698cbb7..b54e42f17926 100644 --- a/tools/perf/util/bpf-loader.c +++ b/tools/perf/util/bpf-loader.c @@ -26,7 +26,6 @@ #include "strfilter.h" #include "util.h" #include "llvm-utils.h" -#include "c++/clang-c.h" #include "util/hashmap.h" #include "asm/bug.h" @@ -220,16 +219,10 @@ struct bpf_object *bpf__prepare_load(const char *filename, bool source) void *obj_buf; size_t obj_buf_sz; - perf_clang__init(); - err = perf_clang__compile_bpf(filename, &obj_buf, &obj_buf_sz); - perf_clang__cleanup(); - if (err) { - pr_debug("bpf: builtin compilation failed: %d, try external compiler\n", err); - err = llvm__compile_bpf(filename, &obj_buf, &obj_buf_sz); - if (err) - return ERR_PTR(-BPF_LOADER_ERRNO__COMPILE); - } else - pr_debug("bpf: successful builtin compilation\n"); + err = llvm__compile_bpf(filename, &obj_buf, &obj_buf_sz); + if (err) + return ERR_PTR(-BPF_LOADER_ERRNO__COMPILE); + obj = bpf_object__open_mem(obj_buf, obj_buf_sz, &opts); if (!IS_ERR_OR_NULL(obj) && llvm_param.dump_obj) diff --git a/tools/perf/util/c++/Build b/tools/perf/util/c++/Build deleted file mode 100644 index 8610d032ac19..000000000000 --- a/tools/perf/util/c++/Build +++ /dev/null @@ -1,5 +0,0 @@ -perf-$(CONFIG_CLANGLLVM) += clang.o -perf-$(CONFIG_CLANGLLVM) += clang-test.o - -CXXFLAGS_clang.o += -Wno-unused-parameter -CXXFLAGS_clang-test.o += -Wno-unused-parameter diff --git a/tools/perf/util/c++/clang-c.h b/tools/perf/util/c++/clang-c.h deleted file mode 100644 index d3731a876b6c..000000000000 --- a/tools/perf/util/c++/clang-c.h +++ /dev/null @@ -1,43 +0,0 @@ -/* SPDX-License-Identifier: GPL-2.0 */ -#ifndef PERF_UTIL_CLANG_C_H -#define PERF_UTIL_CLANG_C_H - -#include /* for size_t */ - -#ifdef __cplusplus -extern "C" { -#endif - -#ifdef HAVE_LIBCLANGLLVM_SUPPORT -extern void perf_clang__init(void); -extern void perf_clang__cleanup(void); - -struct test_suite; -extern int test__clang_to_IR(struct test_suite *test, int subtest); -extern int test__clang_to_obj(struct test_suite *test, int subtest); - -extern int perf_clang__compile_bpf(const char *filename, - void **p_obj_buf, - size_t *p_obj_buf_sz); -#else - -#include -#include /* for __maybe_unused */ - -static inline void perf_clang__init(void) { } -static inline void perf_clang__cleanup(void) { } - -static inline int -perf_clang__compile_bpf(const char *filename __maybe_unused, - void **p_obj_buf __maybe_unused, - size_t *p_obj_buf_sz __maybe_unused) -{ - return -ENOTSUP; -} - -#endif - -#ifdef __cplusplus -} -#endif -#endif diff --git a/tools/perf/util/c++/clang-test.cpp b/tools/perf/util/c++/clang-test.cpp deleted file mode 100644 index a4683ca53697..000000000000 --- a/tools/perf/util/c++/clang-test.cpp +++ /dev/null @@ -1,67 +0,0 @@ -// SPDX-License-Identifier: GPL-2.0 -#include "clang.h" -#include "clang-c.h" -extern "C" { -#include "../util.h" -} -#include "llvm/IR/Function.h" -#include "llvm/IR/LLVMContext.h" - -#include -#include - -class perf_clang_scope { -public: - explicit perf_clang_scope() {perf_clang__init();} - ~perf_clang_scope() {perf_clang__cleanup();} -}; - -static std::unique_ptr -__test__clang_to_IR(void) -{ - unsigned int kernel_version; - - if (fetch_kernel_version(&kernel_version, NULL, 0)) - return std::unique_ptr(nullptr); - - std::string cflag_kver("-DLINUX_VERSION_CODE=" + - std::to_string(kernel_version)); - - std::unique_ptr M = - perf::getModuleFromSource({cflag_kver.c_str()}, - "perf-test.c", - test_llvm__bpf_base_prog); - return M; -} - -extern "C" { -int test__clang_to_IR(struct test_suite *test __maybe_unused, - int subtest __maybe_unused) -{ - perf_clang_scope _scope; - - auto M = __test__clang_to_IR(); - if (!M) - return -1; - for (llvm::Function& F : *M) - if (F.getName() == "bpf_func__SyS_epoll_pwait") - return 0; - return -1; -} - -int test__clang_to_obj(struct test_suite *test __maybe_unused, - int subtest __maybe_unused) -{ - perf_clang_scope _scope; - - auto M = __test__clang_to_IR(); - if (!M) - return -1; - - auto Buffer = perf::getBPFObjectFromModule(&*M); - if (!Buffer) - return -1; - return 0; -} - -} diff --git a/tools/perf/util/c++/clang.cpp b/tools/perf/util/c++/clang.cpp deleted file mode 100644 index 1aad7d6d34aa..000000000000 --- a/tools/perf/util/c++/clang.cpp +++ /dev/null @@ -1,225 +0,0 @@ -// SPDX-License-Identifier: GPL-2.0 -/* - * llvm C frontend for perf. Support dynamically compile C file - * - * Inspired by clang example code: - * http://llvm.org/svn/llvm-project/cfe/trunk/examples/clang-interpreter/main.cpp - * - * Copyright (C) 2016 Wang Nan - * Copyright (C) 2016 Huawei Inc. - */ - -#include "clang/Basic/Version.h" -#include "clang/CodeGen/CodeGenAction.h" -#include "clang/Frontend/CompilerInvocation.h" -#include "clang/Frontend/CompilerInstance.h" -#include "clang/Frontend/TextDiagnosticPrinter.h" -#include "clang/Tooling/Tooling.h" -#include "llvm/IR/LegacyPassManager.h" -#include "llvm/IR/Module.h" -#include "llvm/Option/Option.h" -#include "llvm/Support/FileSystem.h" -#include "llvm/Support/ManagedStatic.h" -#if CLANG_VERSION_MAJOR >= 14 -#include "llvm/MC/TargetRegistry.h" -#else -#include "llvm/Support/TargetRegistry.h" -#endif -#include "llvm/Support/TargetSelect.h" -#include "llvm/Target/TargetMachine.h" -#include "llvm/Target/TargetOptions.h" -#include - -#include "clang.h" -#include "clang-c.h" - -namespace perf { - -static std::unique_ptr LLVMCtx; - -using namespace clang; - -static CompilerInvocation * -createCompilerInvocation(llvm::opt::ArgStringList CFlags, StringRef& Path, - DiagnosticsEngine& Diags) -{ - llvm::opt::ArgStringList CCArgs { - "-cc1", - "-triple", "bpf-pc-linux", - "-fsyntax-only", - "-O2", - "-nostdsysteminc", - "-nobuiltininc", - "-vectorize-loops", - "-vectorize-slp", - "-Wno-unused-value", - "-Wno-pointer-sign", - "-x", "c"}; - - CCArgs.append(CFlags.begin(), CFlags.end()); - CompilerInvocation *CI = tooling::newInvocation(&Diags, CCArgs -#if CLANG_VERSION_MAJOR >= 11 - ,/*BinaryName=*/nullptr -#endif - ); - - FrontendOptions& Opts = CI->getFrontendOpts(); - Opts.Inputs.clear(); - Opts.Inputs.emplace_back(Path, - FrontendOptions::getInputKindForExtension("c")); - return CI; -} - -static std::unique_ptr -getModuleFromSource(llvm::opt::ArgStringList CFlags, - StringRef Path, IntrusiveRefCntPtr VFS) -{ - CompilerInstance Clang; - Clang.createDiagnostics(); - -#if CLANG_VERSION_MAJOR < 9 - Clang.setVirtualFileSystem(&*VFS); -#else - Clang.createFileManager(&*VFS); -#endif - -#if CLANG_VERSION_MAJOR < 4 - IntrusiveRefCntPtr CI = - createCompilerInvocation(std::move(CFlags), Path, - Clang.getDiagnostics()); - Clang.setInvocation(&*CI); -#else - std::shared_ptr CI( - createCompilerInvocation(std::move(CFlags), Path, - Clang.getDiagnostics())); - Clang.setInvocation(CI); -#endif - - std::unique_ptr Act(new EmitLLVMOnlyAction(&*LLVMCtx)); - if (!Clang.ExecuteAction(*Act)) - return std::unique_ptr(nullptr); - - return Act->takeModule(); -} - -std::unique_ptr -getModuleFromSource(llvm::opt::ArgStringList CFlags, - StringRef Name, StringRef Content) -{ - using namespace vfs; - - llvm::IntrusiveRefCntPtr OverlayFS( - new OverlayFileSystem(getRealFileSystem())); - llvm::IntrusiveRefCntPtr MemFS( - new InMemoryFileSystem(true)); - - /* - * pushOverlay helps setting working dir for MemFS. Must call - * before addFile. - */ - OverlayFS->pushOverlay(MemFS); - MemFS->addFile(Twine(Name), 0, llvm::MemoryBuffer::getMemBuffer(Content)); - - return getModuleFromSource(std::move(CFlags), Name, OverlayFS); -} - -std::unique_ptr -getModuleFromSource(llvm::opt::ArgStringList CFlags, StringRef Path) -{ - IntrusiveRefCntPtr VFS(vfs::getRealFileSystem()); - return getModuleFromSource(std::move(CFlags), Path, VFS); -} - -std::unique_ptr> -getBPFObjectFromModule(llvm::Module *Module) -{ - using namespace llvm; - - std::string TargetTriple("bpf-pc-linux"); - std::string Error; - const Target* Target = TargetRegistry::lookupTarget(TargetTriple, Error); - if (!Target) { - llvm::errs() << Error; - return std::unique_ptr>(nullptr); - } - - llvm::TargetOptions Opt; - TargetMachine *TargetMachine = - Target->createTargetMachine(TargetTriple, - "generic", "", - Opt, Reloc::Static); - - Module->setDataLayout(TargetMachine->createDataLayout()); - Module->setTargetTriple(TargetTriple); - - std::unique_ptr> Buffer(new SmallVector()); - raw_svector_ostream ostream(*Buffer); - - legacy::PassManager PM; - bool NotAdded; - NotAdded = TargetMachine->addPassesToEmitFile(PM, ostream -#if CLANG_VERSION_MAJOR >= 7 - , /*DwoOut=*/nullptr -#endif -#if CLANG_VERSION_MAJOR < 10 - , TargetMachine::CGFT_ObjectFile -#else - , llvm::CGFT_ObjectFile -#endif - ); - if (NotAdded) { - llvm::errs() << "TargetMachine can't emit a file of this type\n"; - return std::unique_ptr>(nullptr); - } - PM.run(*Module); - - return Buffer; -} - -} - -extern "C" { -void perf_clang__init(void) -{ - perf::LLVMCtx.reset(new llvm::LLVMContext()); - LLVMInitializeBPFTargetInfo(); - LLVMInitializeBPFTarget(); - LLVMInitializeBPFTargetMC(); - LLVMInitializeBPFAsmPrinter(); -} - -void perf_clang__cleanup(void) -{ - perf::LLVMCtx.reset(nullptr); - llvm::llvm_shutdown(); -} - -int perf_clang__compile_bpf(const char *filename, - void **p_obj_buf, - size_t *p_obj_buf_sz) -{ - using namespace perf; - - if (!p_obj_buf || !p_obj_buf_sz) - return -EINVAL; - - llvm::opt::ArgStringList CFlags; - auto M = getModuleFromSource(std::move(CFlags), filename); - if (!M) - return -EINVAL; - auto O = getBPFObjectFromModule(&*M); - if (!O) - return -EINVAL; - - size_t size = O->size_in_bytes(); - void *buffer; - - buffer = malloc(size); - if (!buffer) - return -ENOMEM; - memcpy(buffer, O->data(), size); - *p_obj_buf = buffer; - *p_obj_buf_sz = size; - return 0; -} -} diff --git a/tools/perf/util/c++/clang.h b/tools/perf/util/c++/clang.h deleted file mode 100644 index 6ce33e22f23c..000000000000 --- a/tools/perf/util/c++/clang.h +++ /dev/null @@ -1,27 +0,0 @@ -/* SPDX-License-Identifier: GPL-2.0 */ -#ifndef PERF_UTIL_CLANG_H -#define PERF_UTIL_CLANG_H - -#include "llvm/ADT/StringRef.h" -#include "llvm/IR/LLVMContext.h" -#include "llvm/IR/Module.h" -#include "llvm/Option/Option.h" -#include - -namespace perf { - -using namespace llvm; - -std::unique_ptr -getModuleFromSource(opt::ArgStringList CFlags, - StringRef Name, StringRef Content); - -std::unique_ptr -getModuleFromSource(opt::ArgStringList CFlags, - StringRef Path); - -std::unique_ptr> -getBPFObjectFromModule(llvm::Module *Module); - -} -#endif -- cgit v1.2.3-70-g09d2 From 3d6dfae889174340af94c7357c8bae018966c524 Mon Sep 17 00:00:00 2001 From: Ian Rogers Date: Fri, 11 Aug 2023 15:26:11 -0300 Subject: perf parse-events: Remove BPF event support MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit New features like the BPF --filter support in perf record have made the BPF event functionality somewhat redundant. As shown by commit fcb027c1a4f6 ("perf tools: Revert enable indices setting syntax for BPF map") and commit 14e4b9f4289a ("perf trace: Raw augmented syscalls fix libbpf 1.0+ compatibility") the BPF event support hasn't been well maintained and it adds considerable complexity in areas like event parsing, not least as '/' is a separator for event modifiers as well as in paths. This patch removes support in the event parser for BPF events and then the associated functions are removed. This leads to the removal of whole source files like bpf-loader.c. Removing support means that augmented syscalls in perf trace is broken, this will be fixed in a later commit adding support using BPF skeletons. The removal of BPF events causes an unused label warning from flex generated code, so update build to ignore it: ``` util/parse-events-flex.c:2704:1: error: label ‘find_rule’ defined but not used [-Werror=unused-label] 2704 | find_rule: /* we branch to this label when backing up */ ``` Committer notes: Extracted from a larger patch that was also removing the support for linking with libllvm and libclang, that were an alternative to using an external clang execution to compile the .c event source code into BPF bytecode. Testing it: # perf trace -e /home/acme/git/perf/tools/perf/examples/bpf/augmented_raw_syscalls.c event syntax error: '/home/acme/git/perf/tools/perf/examples/bpf/augmented_raw_syscalls.c' \___ Bad event or PMU Unabled to find PMU or event on a PMU of 'home' Initial error: event syntax error: '/home/acme/git/perf/tools/perf/examples/bpf/augmented_raw_syscalls.c' \___ Cannot find PMU `home'. Missing kernel support? Run 'perf list' for a list of valid events Usage: perf trace [] [] or: perf trace [] -- [] or: perf trace record [] [] or: perf trace record [] -- [] -e, --event event/syscall selector. use 'perf list' to list available events # Signed-off-by: Ian Rogers Acked-by: Jiri Olsa Tested-by: Arnaldo Carvalho de Melo Cc: Adrian Hunter Cc: Alexander Shishkin Cc: Andi Kleen Cc: Andrii Nakryiko Cc: Anshuman Khandual Cc: Athira Rajeev Cc: Brendan Gregg Cc: Carsten Haitzler Cc: Eduard Zingerman Cc: Fangrui Song Cc: He Kuang Cc: Ingo Molnar Cc: James Clark Cc: Kan Liang Cc: Leo Yan Cc: Madhavan Srinivasan Cc: Mark Rutland Cc: Namhyung Kim Cc: Nathan Chancellor Cc: Naveen N. Rao Cc: Nick Desaulniers Cc: Peter Zijlstra Cc: Ravi Bangoria Cc: Rob Herring Cc: Tiezhu Yang Cc: Tom Rix Cc: Wang Nan Cc: Wang ShaoBo Cc: Yang Jihong Cc: Yonghong Song Cc: YueHaibing Cc: bpf@vger.kernel.org Cc: llvm@lists.linux.dev Link: https://lore.kernel.org/r/20230810184853.2860737-2-irogers@google.com Signed-off-by: Arnaldo Carvalho de Melo Signed-off-by: Arnaldo Carvalho de Melo --- tools/perf/Documentation/perf-config.txt | 33 - tools/perf/Documentation/perf-record.txt | 22 - tools/perf/Makefile.config | 12 - tools/perf/builtin-record.c | 45 - tools/perf/builtin-trace.c | 146 +- tools/perf/perf.c | 2 - tools/perf/tests/.gitignore | 5 - tools/perf/tests/Build | 30 - tools/perf/tests/bpf-script-example.c | 60 - tools/perf/tests/bpf-script-test-kbuild.c | 21 - tools/perf/tests/bpf-script-test-prologue.c | 49 - tools/perf/tests/bpf-script-test-relocation.c | 51 - tools/perf/tests/bpf.c | 390 ----- tools/perf/tests/builtin-test.c | 2 - tools/perf/tests/llvm.c | 219 --- tools/perf/tests/llvm.h | 31 - tools/perf/tests/tests.h | 2 - tools/perf/util/Build | 6 +- tools/perf/util/bpf-loader.c | 1999 ------------------------- tools/perf/util/bpf-loader.h | 216 --- tools/perf/util/config.c | 4 - tools/perf/util/llvm-utils.c | 612 -------- tools/perf/util/llvm-utils.h | 69 - tools/perf/util/parse-events.c | 268 ---- tools/perf/util/parse-events.h | 15 - tools/perf/util/parse-events.l | 31 - tools/perf/util/parse-events.y | 44 +- 27 files changed, 3 insertions(+), 4381 deletions(-) delete mode 100644 tools/perf/tests/.gitignore delete mode 100644 tools/perf/tests/bpf-script-example.c delete mode 100644 tools/perf/tests/bpf-script-test-kbuild.c delete mode 100644 tools/perf/tests/bpf-script-test-prologue.c delete mode 100644 tools/perf/tests/bpf-script-test-relocation.c delete mode 100644 tools/perf/tests/bpf.c delete mode 100644 tools/perf/tests/llvm.c delete mode 100644 tools/perf/tests/llvm.h delete mode 100644 tools/perf/util/bpf-loader.c delete mode 100644 tools/perf/util/bpf-loader.h delete mode 100644 tools/perf/util/llvm-utils.c delete mode 100644 tools/perf/util/llvm-utils.h diff --git a/tools/perf/Documentation/perf-config.txt b/tools/perf/Documentation/perf-config.txt index 1478068ad5dd..0b4e79dbd3f6 100644 --- a/tools/perf/Documentation/perf-config.txt +++ b/tools/perf/Documentation/perf-config.txt @@ -125,9 +125,6 @@ Given a $HOME/.perfconfig like this: group = true skip-empty = true - [llvm] - dump-obj = true - clang-opt = -g You can hide source code of annotate feature setting the config to false with @@ -657,36 +654,6 @@ ftrace.*:: -F option is not specified. Possible values are 'function' and 'function_graph'. -llvm.*:: - llvm.clang-path:: - Path to clang. If omit, search it from $PATH. - - llvm.clang-bpf-cmd-template:: - Cmdline template. Below lines show its default value. Environment - variable is used to pass options. - "$CLANG_EXEC -D__KERNEL__ -D__NR_CPUS__=$NR_CPUS "\ - "-DLINUX_VERSION_CODE=$LINUX_VERSION_CODE " \ - "$CLANG_OPTIONS $PERF_BPF_INC_OPTIONS $KERNEL_INC_OPTIONS " \ - "-Wno-unused-value -Wno-pointer-sign " \ - "-working-directory $WORKING_DIR " \ - "-c \"$CLANG_SOURCE\" --target=bpf $CLANG_EMIT_LLVM -O2 -o - $LLVM_OPTIONS_PIPE" - - llvm.clang-opt:: - Options passed to clang. - - llvm.kbuild-dir:: - kbuild directory. If not set, use /lib/modules/`uname -r`/build. - If set to "" deliberately, skip kernel header auto-detector. - - llvm.kbuild-opts:: - Options passed to 'make' when detecting kernel header options. - - llvm.dump-obj:: - Enable perf dump BPF object files compiled by LLVM. - - llvm.opts:: - Options passed to llc. - samples.*:: samples.context:: diff --git a/tools/perf/Documentation/perf-record.txt b/tools/perf/Documentation/perf-record.txt index 680396c56bd1..7d362407fb39 100644 --- a/tools/perf/Documentation/perf-record.txt +++ b/tools/perf/Documentation/perf-record.txt @@ -99,20 +99,6 @@ OPTIONS If you want to profile write accesses in [0x1000~1008), just set 'mem:0x1000/8:w'. - - a BPF source file (ending in .c) or a precompiled object file (ending - in .o) selects one or more BPF events. - The BPF program can attach to various perf events based on the ELF section - names. - - When processing a '.c' file, perf searches an installed LLVM to compile it - into an object file first. Optional clang options can be passed via the - '--clang-opt' command line option, e.g.: - - perf record --clang-opt "-DLINUX_VERSION_CODE=0x50000" \ - -e tests/bpf-script-example.c - - Note: '--clang-opt' must be placed before '--event/-e'. - - a group of events surrounded by a pair of brace ("{event1,event2,...}"). Each event is separated by commas and the group should be quoted to prevent the shell interpretation. You also need to use --group on @@ -547,14 +533,6 @@ PERF_RECORD_SWITCH_CPU_WIDE. In some cases (e.g. Intel PT, CoreSight or Arm SPE) switch events will be enabled automatically, which can be suppressed by by the option --no-switch-events. ---clang-path=PATH:: -Path to clang binary to use for compiling BPF scriptlets. -(enabled when BPF support is on) - ---clang-opt=OPTIONS:: -Options passed to clang when compiling BPF scriptlets. -(enabled when BPF support is on) - --vmlinux=PATH:: Specify vmlinux path which has debuginfo. (enabled when BPF prologue is on) diff --git a/tools/perf/Makefile.config b/tools/perf/Makefile.config index e0592ed4c10f..d66b52407e19 100644 --- a/tools/perf/Makefile.config +++ b/tools/perf/Makefile.config @@ -589,18 +589,6 @@ ifndef NO_LIBELF LIBBPF_STATIC := 1 endif endif - - ifndef NO_DWARF - ifdef PERF_HAVE_ARCH_REGS_QUERY_REGISTER_OFFSET - CFLAGS += -DHAVE_BPF_PROLOGUE - $(call detected,CONFIG_BPF_PROLOGUE) - else - msg := $(warning BPF prologue is not supported by architecture $(SRCARCH), missing regs_query_register_offset()); - endif - else - msg := $(warning DWARF support is off, BPF prologue is disabled); - endif - endif # NO_LIBBPF endif # NO_LIBELF diff --git a/tools/perf/builtin-record.c b/tools/perf/builtin-record.c index aec18db7ff23..34bb31f08bb5 100644 --- a/tools/perf/builtin-record.c +++ b/tools/perf/builtin-record.c @@ -37,8 +37,6 @@ #include "util/parse-branch-options.h" #include "util/parse-regs-options.h" #include "util/perf_api_probe.h" -#include "util/llvm-utils.h" -#include "util/bpf-loader.h" #include "util/trigger.h" #include "util/perf-hooks.h" #include "util/cpu-set-sched.h" @@ -2465,16 +2463,6 @@ static int __cmd_record(struct record *rec, int argc, const char **argv) } } - err = bpf__apply_obj_config(); - if (err) { - char errbuf[BUFSIZ]; - - bpf__strerror_apply_obj_config(err, errbuf, sizeof(errbuf)); - pr_err("ERROR: Apply config to BPF failed: %s\n", - errbuf); - goto out_free_threads; - } - /* * Normally perf_session__new would do this, but it doesn't have the * evlist. @@ -3486,10 +3474,6 @@ static struct option __record_options[] = { "collect kernel callchains"), OPT_BOOLEAN(0, "user-callchains", &record.opts.user_callchains, "collect user callchains"), - OPT_STRING(0, "clang-path", &llvm_param.clang_path, "clang path", - "clang binary to use for compiling BPF scriptlets"), - OPT_STRING(0, "clang-opt", &llvm_param.clang_opt, "clang options", - "options passed to clang when compiling BPF scriptlets"), OPT_STRING(0, "vmlinux", &symbol_conf.vmlinux_name, "file", "vmlinux pathname"), OPT_BOOLEAN(0, "buildid-all", &record.buildid_all, @@ -3967,27 +3951,6 @@ int cmd_record(int argc, const char **argv) setlocale(LC_ALL, ""); -#ifndef HAVE_LIBBPF_SUPPORT -# define set_nobuild(s, l, c) set_option_nobuild(record_options, s, l, "NO_LIBBPF=1", c) - set_nobuild('\0', "clang-path", true); - set_nobuild('\0', "clang-opt", true); -# undef set_nobuild -#endif - -#ifndef HAVE_BPF_PROLOGUE -# if !defined (HAVE_DWARF_SUPPORT) -# define REASON "NO_DWARF=1" -# elif !defined (HAVE_LIBBPF_SUPPORT) -# define REASON "NO_LIBBPF=1" -# else -# define REASON "this architecture doesn't support BPF prologue" -# endif -# define set_nobuild(s, l, c) set_option_nobuild(record_options, s, l, REASON, c) - set_nobuild('\0', "vmlinux", true); -# undef set_nobuild -# undef REASON -#endif - #ifndef HAVE_BPF_SKEL # define set_nobuild(s, l, m, c) set_option_nobuild(record_options, s, l, m, c) set_nobuild('\0', "off-cpu", "no BUILD_BPF_SKEL=1", true); @@ -4116,14 +4079,6 @@ int cmd_record(int argc, const char **argv) if (dry_run) goto out; - err = bpf__setup_stdout(rec->evlist); - if (err) { - bpf__strerror_setup_stdout(rec->evlist, err, errbuf, sizeof(errbuf)); - pr_err("ERROR: Setup BPF stdout failed: %s\n", - errbuf); - goto out; - } - err = -ENOMEM; if (rec->no_buildid_cache || rec->no_buildid) { diff --git a/tools/perf/builtin-trace.c b/tools/perf/builtin-trace.c index 7ece2521efb6..59862467e781 100644 --- a/tools/perf/builtin-trace.c +++ b/tools/perf/builtin-trace.c @@ -18,6 +18,7 @@ #include #ifdef HAVE_LIBBPF_SUPPORT #include +#include #endif #include "util/bpf_map.h" #include "util/rlimit.h" @@ -53,7 +54,6 @@ #include "trace/beauty/beauty.h" #include "trace-event.h" #include "util/parse-events.h" -#include "util/bpf-loader.h" #include "util/tracepoint.h" #include "callchain.h" #include "print_binary.h" @@ -3287,17 +3287,6 @@ static struct bpf_map *trace__find_bpf_map_by_name(struct trace *trace, const ch return bpf_object__find_map_by_name(trace->bpf_obj, name); } -static void trace__set_bpf_map_filtered_pids(struct trace *trace) -{ - trace->filter_pids.map = trace__find_bpf_map_by_name(trace, "pids_filtered"); -} - -static void trace__set_bpf_map_syscalls(struct trace *trace) -{ - trace->syscalls.prog_array.sys_enter = trace__find_bpf_map_by_name(trace, "syscalls_sys_enter"); - trace->syscalls.prog_array.sys_exit = trace__find_bpf_map_by_name(trace, "syscalls_sys_exit"); -} - static struct bpf_program *trace__find_bpf_program_by_title(struct trace *trace, const char *name) { struct bpf_program *pos, *prog = NULL; @@ -3553,25 +3542,6 @@ static int trace__init_syscalls_bpf_prog_array_maps(struct trace *trace) return err; } -static void trace__delete_augmented_syscalls(struct trace *trace) -{ - struct evsel *evsel, *tmp; - - evlist__remove(trace->evlist, trace->syscalls.events.augmented); - evsel__delete(trace->syscalls.events.augmented); - trace->syscalls.events.augmented = NULL; - - evlist__for_each_entry_safe(trace->evlist, tmp, evsel) { - if (evsel->bpf_obj == trace->bpf_obj) { - evlist__remove(trace->evlist, evsel); - evsel__delete(evsel); - } - - } - - bpf_object__close(trace->bpf_obj); - trace->bpf_obj = NULL; -} #else // HAVE_LIBBPF_SUPPORT static struct bpf_map *trace__find_bpf_map_by_name(struct trace *trace __maybe_unused, const char *name __maybe_unused) @@ -3579,45 +3549,12 @@ static struct bpf_map *trace__find_bpf_map_by_name(struct trace *trace __maybe_u return NULL; } -static void trace__set_bpf_map_filtered_pids(struct trace *trace __maybe_unused) -{ -} - -static void trace__set_bpf_map_syscalls(struct trace *trace __maybe_unused) -{ -} - -static struct bpf_program *trace__find_bpf_program_by_title(struct trace *trace __maybe_unused, - const char *name __maybe_unused) -{ - return NULL; -} - static int trace__init_syscalls_bpf_prog_array_maps(struct trace *trace __maybe_unused) { return 0; } - -static void trace__delete_augmented_syscalls(struct trace *trace __maybe_unused) -{ -} #endif // HAVE_LIBBPF_SUPPORT -static bool trace__only_augmented_syscalls_evsels(struct trace *trace) -{ - struct evsel *evsel; - - evlist__for_each_entry(trace->evlist, evsel) { - if (evsel == trace->syscalls.events.augmented || - evsel->bpf_obj == trace->bpf_obj) - continue; - - return false; - } - - return true; -} - static int trace__set_ev_qualifier_filter(struct trace *trace) { if (trace->syscalls.events.sys_enter) @@ -3981,16 +3918,6 @@ static int trace__run(struct trace *trace, int argc, const char **argv) if (err < 0) goto out_error_open; - err = bpf__apply_obj_config(); - if (err) { - char errbuf[BUFSIZ]; - - bpf__strerror_apply_obj_config(err, errbuf, sizeof(errbuf)); - pr_err("ERROR: Apply config to BPF failed: %s\n", - errbuf); - goto out_error_open; - } - err = trace__set_filter_pids(trace); if (err < 0) goto out_error_mem; @@ -4922,77 +4849,6 @@ int cmd_trace(int argc, const char **argv) "cgroup monitoring only available in system-wide mode"); } - evsel = bpf__setup_output_event(trace.evlist, "__augmented_syscalls__"); - if (IS_ERR(evsel)) { - bpf__strerror_setup_output_event(trace.evlist, PTR_ERR(evsel), bf, sizeof(bf)); - pr_err("ERROR: Setup trace syscalls enter failed: %s\n", bf); - goto out; - } - - if (evsel) { - trace.syscalls.events.augmented = evsel; - - evsel = evlist__find_tracepoint_by_name(trace.evlist, "raw_syscalls:sys_enter"); - if (evsel == NULL) { - pr_err("ERROR: raw_syscalls:sys_enter not found in the augmented BPF object\n"); - goto out; - } - - if (evsel->bpf_obj == NULL) { - pr_err("ERROR: raw_syscalls:sys_enter not associated to a BPF object\n"); - goto out; - } - - trace.bpf_obj = evsel->bpf_obj; - - /* - * If we have _just_ the augmenter event but don't have a - * explicit --syscalls, then assume we want all strace-like - * syscalls: - */ - if (!trace.trace_syscalls && trace__only_augmented_syscalls_evsels(&trace)) - trace.trace_syscalls = true; - /* - * So, if we have a syscall augmenter, but trace_syscalls, aka - * strace-like syscall tracing is not set, then we need to trow - * away the augmenter, i.e. all the events that were created - * from that BPF object file. - * - * This is more to fix the current .perfconfig trace.add_events - * style of setting up the strace-like eBPF based syscall point - * payload augmenter. - * - * All this complexity will be avoided by adding an alternative - * to trace.add_events in the form of - * trace.bpf_augmented_syscalls, that will be only parsed if we - * need it. - * - * .perfconfig trace.add_events is still useful if we want, for - * instance, have msr_write.msr in some .perfconfig profile based - * 'perf trace --config determinism.profile' mode, where for some - * particular goal/workload type we want a set of events and - * output mode (with timings, etc) instead of having to add - * all via the command line. - * - * Also --config to specify an alternate .perfconfig file needs - * to be implemented. - */ - if (!trace.trace_syscalls) { - trace__delete_augmented_syscalls(&trace); - } else { - trace__set_bpf_map_filtered_pids(&trace); - trace__set_bpf_map_syscalls(&trace); - trace.syscalls.unaugmented_prog = trace__find_bpf_program_by_title(&trace, "!raw_syscalls:unaugmented"); - } - } - - err = bpf__setup_stdout(trace.evlist); - if (err) { - bpf__strerror_setup_stdout(trace.evlist, err, bf, sizeof(bf)); - pr_err("ERROR: Setup BPF stdout failed: %s\n", bf); - goto out; - } - err = -1; if (map_dump_str) { diff --git a/tools/perf/perf.c b/tools/perf/perf.c index 38cae4721583..d3fc8090413c 100644 --- a/tools/perf/perf.c +++ b/tools/perf/perf.c @@ -18,7 +18,6 @@ #include #include "util/parse-events.h" #include -#include "util/bpf-loader.h" #include "util/debug.h" #include "util/event.h" #include "util/util.h" // usage() @@ -324,7 +323,6 @@ static int run_builtin(struct cmd_struct *p, int argc, const char **argv) perf_config__exit(); exit_browser(status); perf_env__exit(&perf_env); - bpf__clear(); if (status) return status & 0xff; diff --git a/tools/perf/tests/.gitignore b/tools/perf/tests/.gitignore deleted file mode 100644 index d053b325f728..000000000000 --- a/tools/perf/tests/.gitignore +++ /dev/null @@ -1,5 +0,0 @@ -# SPDX-License-Identifier: GPL-2.0-only -llvm-src-base.c -llvm-src-kbuild.c -llvm-src-prologue.c -llvm-src-relocation.c diff --git a/tools/perf/tests/Build b/tools/perf/tests/Build index 52df5923a8b9..63d5e6d5f165 100644 --- a/tools/perf/tests/Build +++ b/tools/perf/tests/Build @@ -37,8 +37,6 @@ perf-y += sample-parsing.o perf-y += parse-no-sample-id-all.o perf-y += kmod-path.o perf-y += thread-map.o -perf-y += llvm.o llvm-src-base.o llvm-src-kbuild.o llvm-src-prologue.o llvm-src-relocation.o -perf-y += bpf.o perf-y += topology.o perf-y += mem.o perf-y += cpumap.o @@ -69,34 +67,6 @@ perf-y += sigtrap.o perf-y += event_groups.o perf-y += symbols.o -$(OUTPUT)tests/llvm-src-base.c: tests/bpf-script-example.c tests/Build - $(call rule_mkdir) - $(Q)echo '#include ' > $@ - $(Q)echo 'const char test_llvm__bpf_base_prog[] =' >> $@ - $(Q)sed -e 's/"/\\"/g' -e 's/\(.*\)/"\1\\n"/g' $< >> $@ - $(Q)echo ';' >> $@ - -$(OUTPUT)tests/llvm-src-kbuild.c: tests/bpf-script-test-kbuild.c tests/Build - $(call rule_mkdir) - $(Q)echo '#include ' > $@ - $(Q)echo 'const char test_llvm__bpf_test_kbuild_prog[] =' >> $@ - $(Q)sed -e 's/"/\\"/g' -e 's/\(.*\)/"\1\\n"/g' $< >> $@ - $(Q)echo ';' >> $@ - -$(OUTPUT)tests/llvm-src-prologue.c: tests/bpf-script-test-prologue.c tests/Build - $(call rule_mkdir) - $(Q)echo '#include ' > $@ - $(Q)echo 'const char test_llvm__bpf_test_prologue_prog[] =' >> $@ - $(Q)sed -e 's/"/\\"/g' -e 's/\(.*\)/"\1\\n"/g' $< >> $@ - $(Q)echo ';' >> $@ - -$(OUTPUT)tests/llvm-src-relocation.c: tests/bpf-script-test-relocation.c tests/Build - $(call rule_mkdir) - $(Q)echo '#include ' > $@ - $(Q)echo 'const char test_llvm__bpf_test_relocation[] =' >> $@ - $(Q)sed -e 's/"/\\"/g' -e 's/\(.*\)/"\1\\n"/g' $< >> $@ - $(Q)echo ';' >> $@ - ifeq ($(SRCARCH),$(filter $(SRCARCH),x86 arm arm64 powerpc)) perf-$(CONFIG_DWARF_UNWIND) += dwarf-unwind.o endif diff --git a/tools/perf/tests/bpf-script-example.c b/tools/perf/tests/bpf-script-example.c deleted file mode 100644 index b638cc99d5ae..000000000000 --- a/tools/perf/tests/bpf-script-example.c +++ /dev/null @@ -1,60 +0,0 @@ -// SPDX-License-Identifier: GPL-2.0 -/* - * bpf-script-example.c - * Test basic LLVM building - */ -#ifndef LINUX_VERSION_CODE -# error Need LINUX_VERSION_CODE -# error Example: for 4.2 kernel, put 'clang-opt="-DLINUX_VERSION_CODE=0x40200" into llvm section of ~/.perfconfig' -#endif -#define BPF_ANY 0 -#define BPF_MAP_TYPE_ARRAY 2 -#define BPF_FUNC_map_lookup_elem 1 -#define BPF_FUNC_map_update_elem 2 - -static void *(*bpf_map_lookup_elem)(void *map, void *key) = - (void *) BPF_FUNC_map_lookup_elem; -static void *(*bpf_map_update_elem)(void *map, void *key, void *value, int flags) = - (void *) BPF_FUNC_map_update_elem; - -/* - * Following macros are taken from tools/lib/bpf/bpf_helpers.h, - * and are used to create BTF defined maps. It is easier to take - * 2 simple macros, than being able to include above header in - * runtime. - * - * __uint - defines integer attribute of BTF map definition, - * Such attributes are represented using a pointer to an array, - * in which dimensionality of array encodes specified integer - * value. - * - * __type - defines pointer variable with typeof(val) type for - * attributes like key or value, which will be defined by the - * size of the type. - */ -#define __uint(name, val) int (*name)[val] -#define __type(name, val) typeof(val) *name - -#define SEC(NAME) __attribute__((section(NAME), used)) -struct { - __uint(type, BPF_MAP_TYPE_ARRAY); - __uint(max_entries, 1); - __type(key, int); - __type(value, int); -} flip_table SEC(".maps"); - -SEC("syscalls:sys_enter_epoll_pwait") -int bpf_func__SyS_epoll_pwait(void *ctx) -{ - int ind =0; - int *flag = bpf_map_lookup_elem(&flip_table, &ind); - int new_flag; - if (!flag) - return 0; - /* flip flag and store back */ - new_flag = !*flag; - bpf_map_update_elem(&flip_table, &ind, &new_flag, BPF_ANY); - return new_flag; -} -char _license[] SEC("license") = "GPL"; -int _version SEC("version") = LINUX_VERSION_CODE; diff --git a/tools/perf/tests/bpf-script-test-kbuild.c b/tools/perf/tests/bpf-script-test-kbuild.c deleted file mode 100644 index 219673aa278f..000000000000 --- a/tools/perf/tests/bpf-script-test-kbuild.c +++ /dev/null @@ -1,21 +0,0 @@ -// SPDX-License-Identifier: GPL-2.0 -/* - * bpf-script-test-kbuild.c - * Test include from kernel header - */ -#ifndef LINUX_VERSION_CODE -# error Need LINUX_VERSION_CODE -# error Example: for 4.2 kernel, put 'clang-opt="-DLINUX_VERSION_CODE=0x40200" into llvm section of ~/.perfconfig' -#endif -#define SEC(NAME) __attribute__((section(NAME), used)) - -#include - -SEC("func=vfs_llseek") -int bpf_func__vfs_llseek(void *ctx) -{ - return 0; -} - -char _license[] SEC("license") = "GPL"; -int _version SEC("version") = LINUX_VERSION_CODE; diff --git a/tools/perf/tests/bpf-script-test-prologue.c b/tools/perf/tests/bpf-script-test-prologue.c deleted file mode 100644 index 91778b5c6125..000000000000 --- a/tools/perf/tests/bpf-script-test-prologue.c +++ /dev/null @@ -1,49 +0,0 @@ -// SPDX-License-Identifier: GPL-2.0 -/* - * bpf-script-test-prologue.c - * Test BPF prologue - */ -#ifndef LINUX_VERSION_CODE -# error Need LINUX_VERSION_CODE -# error Example: for 4.2 kernel, put 'clang-opt="-DLINUX_VERSION_CODE=0x40200" into llvm section of ~/.perfconfig' -#endif -#define SEC(NAME) __attribute__((section(NAME), used)) - -#include - -/* - * If CONFIG_PROFILE_ALL_BRANCHES is selected, - * 'if' is redefined after include kernel header. - * Recover 'if' for BPF object code. - */ -#ifdef if -# undef if -#endif - -typedef unsigned int __bitwise fmode_t; - -#define FMODE_READ 0x1 -#define FMODE_WRITE 0x2 - -static void (*bpf_trace_printk)(const char *fmt, int fmt_size, ...) = - (void *) 6; - -SEC("func=null_lseek file->f_mode offset orig") -int bpf_func__null_lseek(void *ctx, int err, unsigned long _f_mode, - unsigned long offset, unsigned long orig) -{ - fmode_t f_mode = (fmode_t)_f_mode; - - if (err) - return 0; - if (f_mode & FMODE_WRITE) - return 0; - if (offset & 1) - return 0; - if (orig == SEEK_CUR) - return 0; - return 1; -} - -char _license[] SEC("license") = "GPL"; -int _version SEC("version") = LINUX_VERSION_CODE; diff --git a/tools/perf/tests/bpf-script-test-relocation.c b/tools/perf/tests/bpf-script-test-relocation.c deleted file mode 100644 index 74006e4b2d24..000000000000 --- a/tools/perf/tests/bpf-script-test-relocation.c +++ /dev/null @@ -1,51 +0,0 @@ -// SPDX-License-Identifier: GPL-2.0 -/* - * bpf-script-test-relocation.c - * Test BPF loader checking relocation - */ -#ifndef LINUX_VERSION_CODE -# error Need LINUX_VERSION_CODE -# error Example: for 4.2 kernel, put 'clang-opt="-DLINUX_VERSION_CODE=0x40200" into llvm section of ~/.perfconfig' -#endif -#define BPF_ANY 0 -#define BPF_MAP_TYPE_ARRAY 2 -#define BPF_FUNC_map_lookup_elem 1 -#define BPF_FUNC_map_update_elem 2 - -static void *(*bpf_map_lookup_elem)(void *map, void *key) = - (void *) BPF_FUNC_map_lookup_elem; -static void *(*bpf_map_update_elem)(void *map, void *key, void *value, int flags) = - (void *) BPF_FUNC_map_update_elem; - -struct bpf_map_def { - unsigned int type; - unsigned int key_size; - unsigned int value_size; - unsigned int max_entries; -}; - -#define SEC(NAME) __attribute__((section(NAME), used)) -struct bpf_map_def SEC("maps") my_table = { - .type = BPF_MAP_TYPE_ARRAY, - .key_size = sizeof(int), - .value_size = sizeof(int), - .max_entries = 1, -}; - -int this_is_a_global_val; - -SEC("func=sys_write") -int bpf_func__sys_write(void *ctx) -{ - int key = 0; - int value = 0; - - /* - * Incorrect relocation. Should not allow this program be - * loaded into kernel. - */ - bpf_map_update_elem(&this_is_a_global_val, &key, &value, 0); - return 0; -} -char _license[] SEC("license") = "GPL"; -int _version SEC("version") = LINUX_VERSION_CODE; diff --git a/tools/perf/tests/bpf.c b/tools/perf/tests/bpf.c deleted file mode 100644 index 9ccecd873ecd..000000000000 --- a/tools/perf/tests/bpf.c +++ /dev/null @@ -1,390 +0,0 @@ -// SPDX-License-Identifier: GPL-2.0 -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include "tests.h" -#include "llvm.h" -#include "debug.h" -#include "parse-events.h" -#include "util/mmap.h" -#define NR_ITERS 111 -#define PERF_TEST_BPF_PATH "/sys/fs/bpf/perf_test" - -#if defined(HAVE_LIBBPF_SUPPORT) && defined(HAVE_LIBTRACEEVENT) -#include -#include - -static int epoll_pwait_loop(void) -{ - struct epoll_event events; - int i; - - /* Should fail NR_ITERS times */ - for (i = 0; i < NR_ITERS; i++) - epoll_pwait(-(i + 1), &events, 0, 0, NULL); - return 0; -} - -#ifdef HAVE_BPF_PROLOGUE - -static int llseek_loop(void) -{ - int fds[2], i; - - fds[0] = open("/dev/null", O_RDONLY); - fds[1] = open("/dev/null", O_RDWR); - - if (fds[0] < 0 || fds[1] < 0) - return -1; - - for (i = 0; i < NR_ITERS; i++) { - lseek(fds[i % 2], i, (i / 2) % 2 ? SEEK_CUR : SEEK_SET); - lseek(fds[(i + 1) % 2], i, (i / 2) % 2 ? SEEK_CUR : SEEK_SET); - } - close(fds[0]); - close(fds[1]); - return 0; -} - -#endif - -static struct { - enum test_llvm__testcase prog_id; - const char *name; - const char *msg_compile_fail; - const char *msg_load_fail; - int (*target_func)(void); - int expect_result; - bool pin; -} bpf_testcase_table[] = { - { - .prog_id = LLVM_TESTCASE_BASE, - .name = "[basic_bpf_test]", - .msg_compile_fail = "fix 'perf test LLVM' first", - .msg_load_fail = "load bpf object failed", - .target_func = &epoll_pwait_loop, - .expect_result = (NR_ITERS + 1) / 2, - }, - { - .prog_id = LLVM_TESTCASE_BASE, - .name = "[bpf_pinning]", - .msg_compile_fail = "fix kbuild first", - .msg_load_fail = "check your vmlinux setting?", - .target_func = &epoll_pwait_loop, - .expect_result = (NR_ITERS + 1) / 2, - .pin = true, - }, -#ifdef HAVE_BPF_PROLOGUE - { - .prog_id = LLVM_TESTCASE_BPF_PROLOGUE, - .name = "[bpf_prologue_test]", - .msg_compile_fail = "fix kbuild first", - .msg_load_fail = "check your vmlinux setting?", - .target_func = &llseek_loop, - .expect_result = (NR_ITERS + 1) / 4, - }, -#endif -}; - -static int do_test(struct bpf_object *obj, int (*func)(void), - int expect) -{ - struct record_opts opts = { - .target = { - .uid = UINT_MAX, - .uses_mmap = true, - }, - .freq = 0, - .mmap_pages = 256, - .default_interval = 1, - }; - - char pid[16]; - char sbuf[STRERR_BUFSIZE]; - struct evlist *evlist; - int i, ret = TEST_FAIL, err = 0, count = 0; - - struct parse_events_state parse_state; - struct parse_events_error parse_error; - - parse_events_error__init(&parse_error); - bzero(&parse_state, sizeof(parse_state)); - parse_state.error = &parse_error; - INIT_LIST_HEAD(&parse_state.list); - - err = parse_events_load_bpf_obj(&parse_state, &parse_state.list, obj, NULL, NULL); - parse_events_error__exit(&parse_error); - if (err == -ENODATA) { - pr_debug("Failed to add events selected by BPF, debuginfo package not installed\n"); - return TEST_SKIP; - } - if (err || list_empty(&parse_state.list)) { - pr_debug("Failed to add events selected by BPF\n"); - return TEST_FAIL; - } - - snprintf(pid, sizeof(pid), "%d", getpid()); - pid[sizeof(pid) - 1] = '\0'; - opts.target.tid = opts.target.pid = pid; - - /* Instead of evlist__new_default, don't add default events */ - evlist = evlist__new(); - if (!evlist) { - pr_debug("Not enough memory to create evlist\n"); - return TEST_FAIL; - } - - err = evlist__create_maps(evlist, &opts.target); - if (err < 0) { - pr_debug("Not enough memory to create thread/cpu maps\n"); - goto out_delete_evlist; - } - - evlist__splice_list_tail(evlist, &parse_state.list); - - evlist__config(evlist, &opts, NULL); - - err = evlist__open(evlist); - if (err < 0) { - pr_debug("perf_evlist__open: %s\n", - str_error_r(errno, sbuf, sizeof(sbuf))); - goto out_delete_evlist; - } - - err = evlist__mmap(evlist, opts.mmap_pages); - if (err < 0) { - pr_debug("evlist__mmap: %s\n", - str_error_r(errno, sbuf, sizeof(sbuf))); - goto out_delete_evlist; - } - - evlist__enable(evlist); - (*func)(); - evlist__disable(evlist); - - for (i = 0; i < evlist->core.nr_mmaps; i++) { - union perf_event *event; - struct mmap *md; - - md = &evlist->mmap[i]; - if (perf_mmap__read_init(&md->core) < 0) - continue; - - while ((event = perf_mmap__read_event(&md->core)) != NULL) { - const u32 type = event->header.type; - - if (type == PERF_RECORD_SAMPLE) - count ++; - } - perf_mmap__read_done(&md->core); - } - - if (count != expect * evlist->core.nr_entries) { - pr_debug("BPF filter result incorrect, expected %d, got %d samples\n", expect * evlist->core.nr_entries, count); - goto out_delete_evlist; - } - - ret = TEST_OK; - -out_delete_evlist: - evlist__delete(evlist); - return ret; -} - -static struct bpf_object * -prepare_bpf(void *obj_buf, size_t obj_buf_sz, const char *name) -{ - struct bpf_object *obj; - - obj = bpf__prepare_load_buffer(obj_buf, obj_buf_sz, name); - if (IS_ERR(obj)) { - pr_debug("Compile BPF program failed.\n"); - return NULL; - } - return obj; -} - -static int __test__bpf(int idx) -{ - int ret; - void *obj_buf; - size_t obj_buf_sz; - struct bpf_object *obj; - - ret = test_llvm__fetch_bpf_obj(&obj_buf, &obj_buf_sz, - bpf_testcase_table[idx].prog_id, - false, NULL); - if (ret != TEST_OK || !obj_buf || !obj_buf_sz) { - pr_debug("Unable to get BPF object, %s\n", - bpf_testcase_table[idx].msg_compile_fail); - if ((idx == 0) || (ret == TEST_SKIP)) - return TEST_SKIP; - else - return TEST_FAIL; - } - - obj = prepare_bpf(obj_buf, obj_buf_sz, - bpf_testcase_table[idx].name); - if ((!!bpf_testcase_table[idx].target_func) != (!!obj)) { - if (!obj) - pr_debug("Fail to load BPF object: %s\n", - bpf_testcase_table[idx].msg_load_fail); - else - pr_debug("Success unexpectedly: %s\n", - bpf_testcase_table[idx].msg_load_fail); - ret = TEST_FAIL; - goto out; - } - - if (obj) { - ret = do_test(obj, - bpf_testcase_table[idx].target_func, - bpf_testcase_table[idx].expect_result); - if (ret != TEST_OK) - goto out; - if (bpf_testcase_table[idx].pin) { - int err; - - if (!bpf_fs__mount()) { - pr_debug("BPF filesystem not mounted\n"); - ret = TEST_FAIL; - goto out; - } - err = mkdir(PERF_TEST_BPF_PATH, 0777); - if (err && errno != EEXIST) { - pr_debug("Failed to make perf_test dir: %s\n", - strerror(errno)); - ret = TEST_FAIL; - goto out; - } - if (bpf_object__pin(obj, PERF_TEST_BPF_PATH)) - ret = TEST_FAIL; - if (rm_rf(PERF_TEST_BPF_PATH)) - ret = TEST_FAIL; - } - } - -out: - free(obj_buf); - bpf__clear(); - return ret; -} - -static int check_env(void) -{ - LIBBPF_OPTS(bpf_prog_load_opts, opts); - int err; - char license[] = "GPL"; - - struct bpf_insn insns[] = { - BPF_MOV64_IMM(BPF_REG_0, 1), - BPF_EXIT_INSN(), - }; - - err = fetch_kernel_version(&opts.kern_version, NULL, 0); - if (err) { - pr_debug("Unable to get kernel version\n"); - return err; - } - err = bpf_prog_load(BPF_PROG_TYPE_KPROBE, NULL, license, insns, - ARRAY_SIZE(insns), &opts); - if (err < 0) { - pr_err("Missing basic BPF support, skip this test: %s\n", - strerror(errno)); - return err; - } - close(err); - - return 0; -} - -static int test__bpf(int i) -{ - int err; - - if (i < 0 || i >= (int)ARRAY_SIZE(bpf_testcase_table)) - return TEST_FAIL; - - if (geteuid() != 0) { - pr_debug("Only root can run BPF test\n"); - return TEST_SKIP; - } - - if (check_env()) - return TEST_SKIP; - - err = __test__bpf(i); - return err; -} -#endif - -static int test__basic_bpf_test(struct test_suite *test __maybe_unused, - int subtest __maybe_unused) -{ -#if defined(HAVE_LIBBPF_SUPPORT) && defined(HAVE_LIBTRACEEVENT) - return test__bpf(0); -#else - pr_debug("Skip BPF test because BPF or libtraceevent support is not compiled\n"); - return TEST_SKIP; -#endif -} - -static int test__bpf_pinning(struct test_suite *test __maybe_unused, - int subtest __maybe_unused) -{ -#if defined(HAVE_LIBBPF_SUPPORT) && defined(HAVE_LIBTRACEEVENT) - return test__bpf(1); -#else - pr_debug("Skip BPF test because BPF or libtraceevent support is not compiled\n"); - return TEST_SKIP; -#endif -} - -static int test__bpf_prologue_test(struct test_suite *test __maybe_unused, - int subtest __maybe_unused) -{ -#if defined(HAVE_LIBBPF_SUPPORT) && defined(HAVE_BPF_PROLOGUE) && defined(HAVE_LIBTRACEEVENT) - return test__bpf(2); -#else - pr_debug("Skip BPF test because BPF or libtraceevent support is not compiled\n"); - return TEST_SKIP; -#endif -} - - -static struct test_case bpf_tests[] = { -#if defined(HAVE_LIBBPF_SUPPORT) && defined(HAVE_LIBTRACEEVENT) - TEST_CASE("Basic BPF filtering", basic_bpf_test), - TEST_CASE_REASON("BPF pinning", bpf_pinning, - "clang isn't installed or environment missing BPF support"), -#ifdef HAVE_BPF_PROLOGUE - TEST_CASE_REASON("BPF prologue generation", bpf_prologue_test, - "clang/debuginfo isn't installed or environment missing BPF support"), -#else - TEST_CASE_REASON("BPF prologue generation", bpf_prologue_test, "not compiled in"), -#endif -#else - TEST_CASE_REASON("Basic BPF filtering", basic_bpf_test, "not compiled in or missing libtraceevent support"), - TEST_CASE_REASON("BPF pinning", bpf_pinning, "not compiled in or missing libtraceevent support"), - TEST_CASE_REASON("BPF prologue generation", bpf_prologue_test, "not compiled in or missing libtraceevent support"), -#endif - { .name = NULL, } -}; - -struct test_suite suite__bpf = { - .desc = "BPF filter", - .test_cases = bpf_tests, -}; diff --git a/tools/perf/tests/builtin-test.c b/tools/perf/tests/builtin-test.c index 0f3691fd31c2..0ad18cf6dd22 100644 --- a/tools/perf/tests/builtin-test.c +++ b/tools/perf/tests/builtin-test.c @@ -92,9 +92,7 @@ static struct test_suite *generic_tests[] = { &suite__fdarray__add, &suite__kmod_path__parse, &suite__thread_map, - &suite__llvm, &suite__session_topology, - &suite__bpf, &suite__thread_map_synthesize, &suite__thread_map_remove, &suite__cpu_map, diff --git a/tools/perf/tests/llvm.c b/tools/perf/tests/llvm.c deleted file mode 100644 index 0bc25a56cfef..000000000000 --- a/tools/perf/tests/llvm.c +++ /dev/null @@ -1,219 +0,0 @@ -// SPDX-License-Identifier: GPL-2.0 -#include -#include -#include -#include "tests.h" -#include "debug.h" - -#ifdef HAVE_LIBBPF_SUPPORT -#include -#include -#include "llvm.h" -static int test__bpf_parsing(void *obj_buf, size_t obj_buf_sz) -{ - struct bpf_object *obj; - - obj = bpf_object__open_mem(obj_buf, obj_buf_sz, NULL); - if (libbpf_get_error(obj)) - return TEST_FAIL; - bpf_object__close(obj); - return TEST_OK; -} - -static struct { - const char *source; - const char *desc; - bool should_load_fail; -} bpf_source_table[__LLVM_TESTCASE_MAX] = { - [LLVM_TESTCASE_BASE] = { - .source = test_llvm__bpf_base_prog, - .desc = "Basic BPF llvm compile", - }, - [LLVM_TESTCASE_KBUILD] = { - .source = test_llvm__bpf_test_kbuild_prog, - .desc = "kbuild searching", - }, - [LLVM_TESTCASE_BPF_PROLOGUE] = { - .source = test_llvm__bpf_test_prologue_prog, - .desc = "Compile source for BPF prologue generation", - }, - [LLVM_TESTCASE_BPF_RELOCATION] = { - .source = test_llvm__bpf_test_relocation, - .desc = "Compile source for BPF relocation", - .should_load_fail = true, - }, -}; - -int -test_llvm__fetch_bpf_obj(void **p_obj_buf, - size_t *p_obj_buf_sz, - enum test_llvm__testcase idx, - bool force, - bool *should_load_fail) -{ - const char *source; - const char *desc; - const char *tmpl_old, *clang_opt_old; - char *tmpl_new = NULL, *clang_opt_new = NULL; - int err, old_verbose, ret = TEST_FAIL; - - if (idx >= __LLVM_TESTCASE_MAX) - return TEST_FAIL; - - source = bpf_source_table[idx].source; - desc = bpf_source_table[idx].desc; - if (should_load_fail) - *should_load_fail = bpf_source_table[idx].should_load_fail; - - /* - * Skip this test if user's .perfconfig doesn't set [llvm] section - * and clang is not found in $PATH - */ - if (!force && (!llvm_param.user_set_param && - llvm__search_clang())) { - pr_debug("No clang, skip this test\n"); - return TEST_SKIP; - } - - /* - * llvm is verbosity when error. Suppress all error output if - * not 'perf test -v'. - */ - old_verbose = verbose; - if (verbose == 0) - verbose = -1; - - *p_obj_buf = NULL; - *p_obj_buf_sz = 0; - - if (!llvm_param.clang_bpf_cmd_template) - goto out; - - if (!llvm_param.clang_opt) - llvm_param.clang_opt = strdup(""); - - err = asprintf(&tmpl_new, "echo '%s' | %s%s", source, - llvm_param.clang_bpf_cmd_template, - old_verbose ? "" : " 2>/dev/null"); - if (err < 0) - goto out; - err = asprintf(&clang_opt_new, "-xc %s", llvm_param.clang_opt); - if (err < 0) - goto out; - - tmpl_old = llvm_param.clang_bpf_cmd_template; - llvm_param.clang_bpf_cmd_template = tmpl_new; - clang_opt_old = llvm_param.clang_opt; - llvm_param.clang_opt = clang_opt_new; - - err = llvm__compile_bpf("-", p_obj_buf, p_obj_buf_sz); - - llvm_param.clang_bpf_cmd_template = tmpl_old; - llvm_param.clang_opt = clang_opt_old; - - verbose = old_verbose; - if (err) - goto out; - - ret = TEST_OK; -out: - free(tmpl_new); - free(clang_opt_new); - if (ret != TEST_OK) - pr_debug("Failed to compile test case: '%s'\n", desc); - return ret; -} - -static int test__llvm(int subtest) -{ - int ret; - void *obj_buf = NULL; - size_t obj_buf_sz = 0; - bool should_load_fail = false; - - if ((subtest < 0) || (subtest >= __LLVM_TESTCASE_MAX)) - return TEST_FAIL; - - ret = test_llvm__fetch_bpf_obj(&obj_buf, &obj_buf_sz, - subtest, false, &should_load_fail); - - if (ret == TEST_OK && !should_load_fail) { - ret = test__bpf_parsing(obj_buf, obj_buf_sz); - if (ret != TEST_OK) { - pr_debug("Failed to parse test case '%s'\n", - bpf_source_table[subtest].desc); - } - } - free(obj_buf); - - return ret; -} -#endif //HAVE_LIBBPF_SUPPORT - -static int test__llvm__bpf_base_prog(struct test_suite *test __maybe_unused, - int subtest __maybe_unused) -{ -#ifdef HAVE_LIBBPF_SUPPORT - return test__llvm(LLVM_TESTCASE_BASE); -#else - pr_debug("Skip LLVM test because BPF support is not compiled\n"); - return TEST_SKIP; -#endif -} - -static int test__llvm__bpf_test_kbuild_prog(struct test_suite *test __maybe_unused, - int subtest __maybe_unused) -{ -#ifdef HAVE_LIBBPF_SUPPORT - return test__llvm(LLVM_TESTCASE_KBUILD); -#else - pr_debug("Skip LLVM test because BPF support is not compiled\n"); - return TEST_SKIP; -#endif -} - -static int test__llvm__bpf_test_prologue_prog(struct test_suite *test __maybe_unused, - int subtest __maybe_unused) -{ -#ifdef HAVE_LIBBPF_SUPPORT - return test__llvm(LLVM_TESTCASE_BPF_PROLOGUE); -#else - pr_debug("Skip LLVM test because BPF support is not compiled\n"); - return TEST_SKIP; -#endif -} - -static int test__llvm__bpf_test_relocation(struct test_suite *test __maybe_unused, - int subtest __maybe_unused) -{ -#ifdef HAVE_LIBBPF_SUPPORT - return test__llvm(LLVM_TESTCASE_BPF_RELOCATION); -#else - pr_debug("Skip LLVM test because BPF support is not compiled\n"); - return TEST_SKIP; -#endif -} - - -static struct test_case llvm_tests[] = { -#ifdef HAVE_LIBBPF_SUPPORT - TEST_CASE("Basic BPF llvm compile", llvm__bpf_base_prog), - TEST_CASE("kbuild searching", llvm__bpf_test_kbuild_prog), - TEST_CASE("Compile source for BPF prologue generation", - llvm__bpf_test_prologue_prog), - TEST_CASE("Compile source for BPF relocation", llvm__bpf_test_relocation), -#else - TEST_CASE_REASON("Basic BPF llvm compile", llvm__bpf_base_prog, "not compiled in"), - TEST_CASE_REASON("kbuild searching", llvm__bpf_test_kbuild_prog, "not compiled in"), - TEST_CASE_REASON("Compile source for BPF prologue generation", - llvm__bpf_test_prologue_prog, "not compiled in"), - TEST_CASE_REASON("Compile source for BPF relocation", - llvm__bpf_test_relocation, "not compiled in"), -#endif - { .name = NULL, } -}; - -struct test_suite suite__llvm = { - .desc = "LLVM search and compile", - .test_cases = llvm_tests, -}; diff --git a/tools/perf/tests/llvm.h b/tools/perf/tests/llvm.h deleted file mode 100644 index f68b0d9b8ae2..000000000000 --- a/tools/perf/tests/llvm.h +++ /dev/null @@ -1,31 +0,0 @@ -/* SPDX-License-Identifier: GPL-2.0 */ -#ifndef PERF_TEST_LLVM_H -#define PERF_TEST_LLVM_H - -#ifdef __cplusplus -extern "C" { -#endif - -#include /* for size_t */ -#include /* for bool */ - -extern const char test_llvm__bpf_base_prog[]; -extern const char test_llvm__bpf_test_kbuild_prog[]; -extern const char test_llvm__bpf_test_prologue_prog[]; -extern const char test_llvm__bpf_test_relocation[]; - -enum test_llvm__testcase { - LLVM_TESTCASE_BASE, - LLVM_TESTCASE_KBUILD, - LLVM_TESTCASE_BPF_PROLOGUE, - LLVM_TESTCASE_BPF_RELOCATION, - __LLVM_TESTCASE_MAX, -}; - -int test_llvm__fetch_bpf_obj(void **p_obj_buf, size_t *p_obj_buf_sz, - enum test_llvm__testcase index, bool force, - bool *should_load_fail); -#ifdef __cplusplus -} -#endif -#endif diff --git a/tools/perf/tests/tests.h b/tools/perf/tests/tests.h index f424c0b7f43f..f33cfc3c19a4 100644 --- a/tools/perf/tests/tests.h +++ b/tools/perf/tests/tests.h @@ -113,7 +113,6 @@ DECLARE_SUITE(fdarray__filter); DECLARE_SUITE(fdarray__add); DECLARE_SUITE(kmod_path__parse); DECLARE_SUITE(thread_map); -DECLARE_SUITE(llvm); DECLARE_SUITE(bpf); DECLARE_SUITE(session_topology); DECLARE_SUITE(thread_map_synthesize); @@ -129,7 +128,6 @@ DECLARE_SUITE(sdt_event); DECLARE_SUITE(is_printable_array); DECLARE_SUITE(bitmap_print); DECLARE_SUITE(perf_hooks); -DECLARE_SUITE(clang); DECLARE_SUITE(unit_number__scnprint); DECLARE_SUITE(mem2node); DECLARE_SUITE(maps__merge_in); diff --git a/tools/perf/util/Build b/tools/perf/util/Build index ff3b55c7ed43..c6650d3fb066 100644 --- a/tools/perf/util/Build +++ b/tools/perf/util/Build @@ -23,7 +23,6 @@ perf-y += evswitch.o perf-y += find_bit.o perf-y += get_current_dir_name.o perf-y += levenshtein.o -perf-y += llvm-utils.o perf-y += mmap.o perf-y += memswap.o perf-y += parse-events.o @@ -150,7 +149,6 @@ perf-y += list_sort.o perf-y += mutex.o perf-y += sharded_mutex.o -perf-$(CONFIG_LIBBPF) += bpf-loader.o perf-$(CONFIG_LIBBPF) += bpf_map.o perf-$(CONFIG_PERF_BPF_SKEL) += bpf_counter.o perf-$(CONFIG_PERF_BPF_SKEL) += bpf_counter_cgroup.o @@ -168,7 +166,6 @@ ifeq ($(CONFIG_LIBTRACEEVENT),y) perf-$(CONFIG_PERF_BPF_SKEL) += bpf_kwork.o endif -perf-$(CONFIG_BPF_PROLOGUE) += bpf-prologue.o perf-$(CONFIG_LIBELF) += symbol-elf.o perf-$(CONFIG_LIBELF) += probe-file.o perf-$(CONFIG_LIBELF) += probe-event.o @@ -235,7 +232,6 @@ perf-$(CONFIG_LIBBPF) += bpf-utils.o perf-$(CONFIG_LIBPFM4) += pfm.o CFLAGS_config.o += -DETC_PERFCONFIG="BUILD_STR($(ETC_PERFCONFIG_SQ))" -CFLAGS_llvm-utils.o += -DLIBBPF_INCLUDE_DIR="BUILD_STR($(libbpf_include_dir_SQ))" # avoid compiler warnings in 32-bit mode CFLAGS_genelf_debug.o += -Wno-packed @@ -327,7 +323,7 @@ ifeq ($(BISON_LT_381),1) bison_flags += -DYYNOMEM=YYABORT endif -CFLAGS_parse-events-flex.o += $(flex_flags) +CFLAGS_parse-events-flex.o += $(flex_flags) -Wno-unused-label CFLAGS_pmu-flex.o += $(flex_flags) CFLAGS_expr-flex.o += $(flex_flags) CFLAGS_bpf-filter-flex.o += $(flex_flags) diff --git a/tools/perf/util/bpf-loader.c b/tools/perf/util/bpf-loader.c deleted file mode 100644 index b54e42f17926..000000000000 --- a/tools/perf/util/bpf-loader.c +++ /dev/null @@ -1,1999 +0,0 @@ -// SPDX-License-Identifier: GPL-2.0 -/* - * bpf-loader.c - * - * Copyright (C) 2015 Wang Nan - * Copyright (C) 2015 Huawei Inc. - */ - -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include "debug.h" -#include "evlist.h" -#include "bpf-loader.h" -#include "bpf-prologue.h" -#include "probe-event.h" -#include "probe-finder.h" // for MAX_PROBES -#include "parse-events.h" -#include "strfilter.h" -#include "util.h" -#include "llvm-utils.h" -#include "util/hashmap.h" -#include "asm/bug.h" - -#include - -static int libbpf_perf_print(enum libbpf_print_level level __attribute__((unused)), - const char *fmt, va_list args) -{ - return veprintf(1, verbose, pr_fmt(fmt), args); -} - -struct bpf_prog_priv { - bool is_tp; - char *sys_name; - char *evt_name; - struct perf_probe_event pev; - bool need_prologue; - struct bpf_insn *insns_buf; - int nr_types; - int *type_mapping; - int *prologue_fds; -}; - -struct bpf_perf_object { - struct list_head list; - struct bpf_object *obj; -}; - -struct bpf_preproc_result { - struct bpf_insn *new_insn_ptr; - int new_insn_cnt; -}; - -static LIST_HEAD(bpf_objects_list); -static struct hashmap *bpf_program_hash; -static struct hashmap *bpf_map_hash; - -static struct bpf_perf_object * -bpf_perf_object__next(struct bpf_perf_object *prev) -{ - if (!prev) { - if (list_empty(&bpf_objects_list)) - return NULL; - - return list_first_entry(&bpf_objects_list, struct bpf_perf_object, list); - } - if (list_is_last(&prev->list, &bpf_objects_list)) - return NULL; - - return list_next_entry(prev, list); -} - -#define bpf_perf_object__for_each(perf_obj, tmp) \ - for ((perf_obj) = bpf_perf_object__next(NULL), \ - (tmp) = bpf_perf_object__next(perf_obj); \ - (perf_obj) != NULL; \ - (perf_obj) = (tmp), (tmp) = bpf_perf_object__next(tmp)) - -static bool libbpf_initialized; -static int libbpf_sec_handler; - -static int bpf_perf_object__add(struct bpf_object *obj) -{ - struct bpf_perf_object *perf_obj = zalloc(sizeof(*perf_obj)); - - if (perf_obj) { - INIT_LIST_HEAD(&perf_obj->list); - perf_obj->obj = obj; - list_add_tail(&perf_obj->list, &bpf_objects_list); - } - return perf_obj ? 0 : -ENOMEM; -} - -static void *program_priv(const struct bpf_program *prog) -{ - void *priv; - - if (IS_ERR_OR_NULL(bpf_program_hash)) - return NULL; - if (!hashmap__find(bpf_program_hash, prog, &priv)) - return NULL; - return priv; -} - -static struct bpf_insn prologue_init_insn[] = { - BPF_MOV64_IMM(BPF_REG_2, 0), - BPF_MOV64_IMM(BPF_REG_3, 0), - BPF_MOV64_IMM(BPF_REG_4, 0), - BPF_MOV64_IMM(BPF_REG_5, 0), -}; - -static int libbpf_prog_prepare_load_fn(struct bpf_program *prog, - struct bpf_prog_load_opts *opts __maybe_unused, - long cookie __maybe_unused) -{ - size_t init_size_cnt = ARRAY_SIZE(prologue_init_insn); - size_t orig_insn_cnt, insn_cnt, init_size, orig_size; - struct bpf_prog_priv *priv = program_priv(prog); - const struct bpf_insn *orig_insn; - struct bpf_insn *insn; - - if (IS_ERR_OR_NULL(priv)) { - pr_debug("bpf: failed to get private field\n"); - return -BPF_LOADER_ERRNO__INTERNAL; - } - - if (!priv->need_prologue) - return 0; - - /* prepend initialization code to program instructions */ - orig_insn = bpf_program__insns(prog); - orig_insn_cnt = bpf_program__insn_cnt(prog); - init_size = init_size_cnt * sizeof(*insn); - orig_size = orig_insn_cnt * sizeof(*insn); - - insn_cnt = orig_insn_cnt + init_size_cnt; - insn = malloc(insn_cnt * sizeof(*insn)); - if (!insn) - return -ENOMEM; - - memcpy(insn, prologue_init_insn, init_size); - memcpy((char *) insn + init_size, orig_insn, orig_size); - bpf_program__set_insns(prog, insn, insn_cnt); - return 0; -} - -static int libbpf_init(void) -{ - LIBBPF_OPTS(libbpf_prog_handler_opts, handler_opts, - .prog_prepare_load_fn = libbpf_prog_prepare_load_fn, - ); - - if (libbpf_initialized) - return 0; - - libbpf_set_print(libbpf_perf_print); - libbpf_sec_handler = libbpf_register_prog_handler(NULL, BPF_PROG_TYPE_KPROBE, - 0, &handler_opts); - if (libbpf_sec_handler < 0) { - pr_debug("bpf: failed to register libbpf section handler: %d\n", - libbpf_sec_handler); - return -BPF_LOADER_ERRNO__INTERNAL; - } - libbpf_initialized = true; - return 0; -} - -struct bpf_object * -bpf__prepare_load_buffer(void *obj_buf, size_t obj_buf_sz, const char *name) -{ - LIBBPF_OPTS(bpf_object_open_opts, opts, .object_name = name); - struct bpf_object *obj; - int err; - - err = libbpf_init(); - if (err) - return ERR_PTR(err); - - obj = bpf_object__open_mem(obj_buf, obj_buf_sz, &opts); - if (IS_ERR_OR_NULL(obj)) { - pr_debug("bpf: failed to load buffer\n"); - return ERR_PTR(-EINVAL); - } - - if (bpf_perf_object__add(obj)) { - bpf_object__close(obj); - return ERR_PTR(-ENOMEM); - } - - return obj; -} - -static void bpf_perf_object__close(struct bpf_perf_object *perf_obj) -{ - list_del(&perf_obj->list); - bpf_object__close(perf_obj->obj); - free(perf_obj); -} - -struct bpf_object *bpf__prepare_load(const char *filename, bool source) -{ - LIBBPF_OPTS(bpf_object_open_opts, opts, .object_name = filename); - struct bpf_object *obj; - int err; - - err = libbpf_init(); - if (err) - return ERR_PTR(err); - - if (source) { - void *obj_buf; - size_t obj_buf_sz; - - err = llvm__compile_bpf(filename, &obj_buf, &obj_buf_sz); - if (err) - return ERR_PTR(-BPF_LOADER_ERRNO__COMPILE); - - obj = bpf_object__open_mem(obj_buf, obj_buf_sz, &opts); - - if (!IS_ERR_OR_NULL(obj) && llvm_param.dump_obj) - llvm__dump_obj(filename, obj_buf, obj_buf_sz); - - free(obj_buf); - } else { - obj = bpf_object__open(filename); - } - - if (IS_ERR_OR_NULL(obj)) { - pr_debug("bpf: failed to load %s\n", filename); - return obj; - } - - if (bpf_perf_object__add(obj)) { - bpf_object__close(obj); - return ERR_PTR(-BPF_LOADER_ERRNO__COMPILE); - } - - return obj; -} - -static void close_prologue_programs(struct bpf_prog_priv *priv) -{ - struct perf_probe_event *pev; - int i, fd; - - if (!priv->need_prologue) - return; - pev = &priv->pev; - for (i = 0; i < pev->ntevs; i++) { - fd = priv->prologue_fds[i]; - if (fd != -1) - close(fd); - } -} - -static void -clear_prog_priv(const struct bpf_program *prog __maybe_unused, - void *_priv) -{ - struct bpf_prog_priv *priv = _priv; - - close_prologue_programs(priv); - cleanup_perf_probe_events(&priv->pev, 1); - zfree(&priv->insns_buf); - zfree(&priv->prologue_fds); - zfree(&priv->type_mapping); - zfree(&priv->sys_name); - zfree(&priv->evt_name); - free(priv); -} - -static void bpf_program_hash_free(void) -{ - struct hashmap_entry *cur; - size_t bkt; - - if (IS_ERR_OR_NULL(bpf_program_hash)) - return; - - hashmap__for_each_entry(bpf_program_hash, cur, bkt) - clear_prog_priv(cur->pkey, cur->pvalue); - - hashmap__free(bpf_program_hash); - bpf_program_hash = NULL; -} - -static void bpf_map_hash_free(void); - -void bpf__clear(void) -{ - struct bpf_perf_object *perf_obj, *tmp; - - bpf_perf_object__for_each(perf_obj, tmp) { - bpf__unprobe(perf_obj->obj); - bpf_perf_object__close(perf_obj); - } - - bpf_program_hash_free(); - bpf_map_hash_free(); -} - -static size_t ptr_hash(const long __key, void *ctx __maybe_unused) -{ - return __key; -} - -static bool ptr_equal(long key1, long key2, void *ctx __maybe_unused) -{ - return key1 == key2; -} - -static int program_set_priv(struct bpf_program *prog, void *priv) -{ - void *old_priv; - - /* - * Should not happen, we warn about it in the - * caller function - config_bpf_program - */ - if (IS_ERR(bpf_program_hash)) - return PTR_ERR(bpf_program_hash); - - if (!bpf_program_hash) { - bpf_program_hash = hashmap__new(ptr_hash, ptr_equal, NULL); - if (IS_ERR(bpf_program_hash)) - return PTR_ERR(bpf_program_hash); - } - - old_priv = program_priv(prog); - if (old_priv) { - clear_prog_priv(prog, old_priv); - return hashmap__set(bpf_program_hash, prog, priv, NULL, NULL); - } - return hashmap__add(bpf_program_hash, prog, priv); -} - -static int -prog_config__exec(const char *value, struct perf_probe_event *pev) -{ - pev->uprobes = true; - pev->target = strdup(value); - if (!pev->target) - return -ENOMEM; - return 0; -} - -static int -prog_config__module(const char *value, struct perf_probe_event *pev) -{ - pev->uprobes = false; - pev->target = strdup(value); - if (!pev->target) - return -ENOMEM; - return 0; -} - -static int -prog_config__bool(const char *value, bool *pbool, bool invert) -{ - int err; - bool bool_value; - - if (!pbool) - return -EINVAL; - - err = strtobool(value, &bool_value); - if (err) - return err; - - *pbool = invert ? !bool_value : bool_value; - return 0; -} - -static int -prog_config__inlines(const char *value, - struct perf_probe_event *pev __maybe_unused) -{ - return prog_config__bool(value, &probe_conf.no_inlines, true); -} - -static int -prog_config__force(const char *value, - struct perf_probe_event *pev __maybe_unused) -{ - return prog_config__bool(value, &probe_conf.force_add, false); -} - -static struct { - const char *key; - const char *usage; - const char *desc; - int (*func)(const char *, struct perf_probe_event *); -} bpf_prog_config_terms[] = { - { - .key = "exec", - .usage = "exec=", - .desc = "Set uprobe target", - .func = prog_config__exec, - }, - { - .key = "module", - .usage = "module= ", - .desc = "Set kprobe module", - .func = prog_config__module, - }, - { - .key = "inlines", - .usage = "inlines=[yes|no] ", - .desc = "Probe at inline symbol", - .func = prog_config__inlines, - }, - { - .key = "force", - .usage = "force=[yes|no] ", - .desc = "Forcibly add events with existing name", - .func = prog_config__force, - }, -}; - -static int -do_prog_config(const char *key, const char *value, - struct perf_probe_event *pev) -{ - unsigned int i; - - pr_debug("config bpf program: %s=%s\n", key, value); - for (i = 0; i < ARRAY_SIZE(bpf_prog_config_terms); i++) - if (strcmp(key, bpf_prog_config_terms[i].key) == 0) - return bpf_prog_config_terms[i].func(value, pev); - - pr_debug("BPF: ERROR: invalid program config option: %s=%s\n", - key, value); - - pr_debug("\nHint: Valid options are:\n"); - for (i = 0; i < ARRAY_SIZE(bpf_prog_config_terms); i++) - pr_debug("\t%s:\t%s\n", bpf_prog_config_terms[i].usage, - bpf_prog_config_terms[i].desc); - pr_debug("\n"); - - return -BPF_LOADER_ERRNO__PROGCONF_TERM; -} - -static const char * -parse_prog_config_kvpair(const char *config_str, struct perf_probe_event *pev) -{ - char *text = strdup(config_str); - char *sep, *line; - const char *main_str = NULL; - int err = 0; - - if (!text) { - pr_debug("Not enough memory: dup config_str failed\n"); - return ERR_PTR(-ENOMEM); - } - - line = text; - while ((sep = strchr(line, ';'))) { - char *equ; - - *sep = '\0'; - equ = strchr(line, '='); - if (!equ) { - pr_warning("WARNING: invalid config in BPF object: %s\n", - line); - pr_warning("\tShould be 'key=value'.\n"); - goto nextline; - } - *equ = '\0'; - - err = do_prog_config(line, equ + 1, pev); - if (err) - break; -nextline: - line = sep + 1; - } - - if (!err) - main_str = config_str + (line - text); - free(text); - - return err ? ERR_PTR(err) : main_str; -} - -static int -parse_prog_config(const char *config_str, const char **p_main_str, - bool *is_tp, struct perf_probe_event *pev) -{ - int err; - const char *main_str = parse_prog_config_kvpair(config_str, pev); - - if (IS_ERR(main_str)) - return PTR_ERR(main_str); - - *p_main_str = main_str; - if (!strchr(main_str, '=')) { - /* Is a tracepoint event? */ - const char *s = strchr(main_str, ':'); - - if (!s) { - pr_debug("bpf: '%s' is not a valid tracepoint\n", - config_str); - return -BPF_LOADER_ERRNO__CONFIG; - } - - *is_tp = true; - return 0; - } - - *is_tp = false; - err = parse_perf_probe_command(main_str, pev); - if (err < 0) { - pr_debug("bpf: '%s' is not a valid config string\n", - config_str); - /* parse failed, don't need clear pev. */ - return -BPF_LOADER_ERRNO__CONFIG; - } - return 0; -} - -static int -config_bpf_program(struct bpf_program *prog) -{ - struct perf_probe_event *pev = NULL; - struct bpf_prog_priv *priv = NULL; - const char *config_str, *main_str; - bool is_tp = false; - int err; - - /* Initialize per-program probing setting */ - probe_conf.no_inlines = false; - probe_conf.force_add = false; - - priv = calloc(sizeof(*priv), 1); - if (!priv) { - pr_debug("bpf: failed to alloc priv\n"); - return -ENOMEM; - } - pev = &priv->pev; - - config_str = bpf_program__section_name(prog); - pr_debug("bpf: config program '%s'\n", config_str); - err = parse_prog_config(config_str, &main_str, &is_tp, pev); - if (err) - goto errout; - - if (is_tp) { - char *s = strchr(main_str, ':'); - - priv->is_tp = true; - priv->sys_name = strndup(main_str, s - main_str); - priv->evt_name = strdup(s + 1); - goto set_priv; - } - - if (pev->group && strcmp(pev->group, PERF_BPF_PROBE_GROUP)) { - pr_debug("bpf: '%s': group for event is set and not '%s'.\n", - config_str, PERF_BPF_PROBE_GROUP); - err = -BPF_LOADER_ERRNO__GROUP; - goto errout; - } else if (!pev->group) - pev->group = strdup(PERF_BPF_PROBE_GROUP); - - if (!pev->group) { - pr_debug("bpf: strdup failed\n"); - err = -ENOMEM; - goto errout; - } - - if (!pev->event) { - pr_debug("bpf: '%s': event name is missing. Section name should be 'key=value'\n", - config_str); - err = -BPF_LOADER_ERRNO__EVENTNAME; - goto errout; - } - pr_debug("bpf: config '%s' is ok\n", config_str); - -set_priv: - err = program_set_priv(prog, priv); - if (err) { - pr_debug("Failed to set priv for program '%s'\n", config_str); - goto errout; - } - - return 0; - -errout: - if (pev) - clear_perf_probe_event(pev); - free(priv); - return err; -} - -static int bpf__prepare_probe(void) -{ - static int err = 0; - static bool initialized = false; - - /* - * Make err static, so if init failed the first, bpf__prepare_probe() - * fails each time without calling init_probe_symbol_maps multiple - * times. - */ - if (initialized) - return err; - - initialized = true; - err = init_probe_symbol_maps(false); - if (err < 0) - pr_debug("Failed to init_probe_symbol_maps\n"); - probe_conf.max_probes = MAX_PROBES; - return err; -} - -static int -preproc_gen_prologue(struct bpf_program *prog, int n, - const struct bpf_insn *orig_insns, int orig_insns_cnt, - struct bpf_preproc_result *res) -{ - struct bpf_prog_priv *priv = program_priv(prog); - struct probe_trace_event *tev; - struct perf_probe_event *pev; - struct bpf_insn *buf; - size_t prologue_cnt = 0; - int i, err; - - if (IS_ERR_OR_NULL(priv) || priv->is_tp) - goto errout; - - pev = &priv->pev; - - if (n < 0 || n >= priv->nr_types) - goto errout; - - /* Find a tev belongs to that type */ - for (i = 0; i < pev->ntevs; i++) { - if (priv->type_mapping[i] == n) - break; - } - - if (i >= pev->ntevs) { - pr_debug("Internal error: prologue type %d not found\n", n); - return -BPF_LOADER_ERRNO__PROLOGUE; - } - - tev = &pev->tevs[i]; - - buf = priv->insns_buf; - err = bpf__gen_prologue(tev->args, tev->nargs, - buf, &prologue_cnt, - BPF_MAXINSNS - orig_insns_cnt); - if (err) { - const char *title; - - title = bpf_program__section_name(prog); - pr_debug("Failed to generate prologue for program %s\n", - title); - return err; - } - - memcpy(&buf[prologue_cnt], orig_insns, - sizeof(struct bpf_insn) * orig_insns_cnt); - - res->new_insn_ptr = buf; - res->new_insn_cnt = prologue_cnt + orig_insns_cnt; - return 0; - -errout: - pr_debug("Internal error in preproc_gen_prologue\n"); - return -BPF_LOADER_ERRNO__PROLOGUE; -} - -/* - * compare_tev_args is reflexive, transitive and antisymmetric. - * I can proof it but this margin is too narrow to contain. - */ -static int compare_tev_args(const void *ptev1, const void *ptev2) -{ - int i, ret; - const struct probe_trace_event *tev1 = - *(const struct probe_trace_event **)ptev1; - const struct probe_trace_event *tev2 = - *(const struct probe_trace_event **)ptev2; - - ret = tev2->nargs - tev1->nargs; - if (ret) - return ret; - - for (i = 0; i < tev1->nargs; i++) { - struct probe_trace_arg *arg1, *arg2; - struct probe_trace_arg_ref *ref1, *ref2; - - arg1 = &tev1->args[i]; - arg2 = &tev2->args[i]; - - ret = strcmp(arg1->value, arg2->value); - if (ret) - return ret; - - ref1 = arg1->ref; - ref2 = arg2->ref; - - while (ref1 && ref2) { - ret = ref2->offset - ref1->offset; - if (ret) - return ret; - - ref1 = ref1->next; - ref2 = ref2->next; - } - - if (ref1 || ref2) - return ref2 ? 1 : -1; - } - - return 0; -} - -/* - * Assign a type number to each tevs in a pev. - * mapping is an array with same slots as tevs in that pev. - * nr_types will be set to number of types. - */ -static int map_prologue(struct perf_probe_event *pev, int *mapping, - int *nr_types) -{ - int i, type = 0; - struct probe_trace_event **ptevs; - - size_t array_sz = sizeof(*ptevs) * pev->ntevs; - - ptevs = malloc(array_sz); - if (!ptevs) { - pr_debug("Not enough memory: alloc ptevs failed\n"); - return -ENOMEM; - } - - pr_debug("In map_prologue, ntevs=%d\n", pev->ntevs); - for (i = 0; i < pev->ntevs; i++) - ptevs[i] = &pev->tevs[i]; - - qsort(ptevs, pev->ntevs, sizeof(*ptevs), - compare_tev_args); - - for (i = 0; i < pev->ntevs; i++) { - int n; - - n = ptevs[i] - pev->tevs; - if (i == 0) { - mapping[n] = type; - pr_debug("mapping[%d]=%d\n", n, type); - continue; - } - - if (compare_tev_args(ptevs + i, ptevs + i - 1) == 0) - mapping[n] = type; - else - mapping[n] = ++type; - - pr_debug("mapping[%d]=%d\n", n, mapping[n]); - } - free(ptevs); - *nr_types = type + 1; - - return 0; -} - -static int hook_load_preprocessor(struct bpf_program *prog) -{ - struct bpf_prog_priv *priv = program_priv(prog); - struct perf_probe_event *pev; - bool need_prologue = false; - int i; - - if (IS_ERR_OR_NULL(priv)) { - pr_debug("Internal error when hook preprocessor\n"); - return -BPF_LOADER_ERRNO__INTERNAL; - } - - if (priv->is_tp) { - priv->need_prologue = false; - return 0; - } - - pev = &priv->pev; - for (i = 0; i < pev->ntevs; i++) { - struct probe_trace_event *tev = &pev->tevs[i]; - - if (tev->nargs > 0) { - need_prologue = true; - break; - } - } - - /* - * Since all tevs don't have argument, we don't need generate - * prologue. - */ - if (!need_prologue) { - priv->need_prologue = false; - return 0; - } - - priv->need_prologue = true; - priv->insns_buf = malloc(sizeof(struct bpf_insn) * BPF_MAXINSNS); - if (!priv->insns_buf) { - pr_debug("Not enough memory: alloc insns_buf failed\n"); - return -ENOMEM; - } - - priv->prologue_fds = malloc(sizeof(int) * pev->ntevs); - if (!priv->prologue_fds) { - pr_debug("Not enough memory: alloc prologue fds failed\n"); - return -ENOMEM; - } - memset(priv->prologue_fds, -1, sizeof(int) * pev->ntevs); - - priv->type_mapping = malloc(sizeof(int) * pev->ntevs); - if (!priv->type_mapping) { - pr_debug("Not enough memory: alloc type_mapping failed\n"); - return -ENOMEM; - } - memset(priv->type_mapping, -1, - sizeof(int) * pev->ntevs); - - return map_prologue(pev, priv->type_mapping, &priv->nr_types); -} - -int bpf__probe(struct bpf_object *obj) -{ - int err = 0; - struct bpf_program *prog; - struct bpf_prog_priv *priv; - struct perf_probe_event *pev; - - err = bpf__prepare_probe(); - if (err) { - pr_debug("bpf__prepare_probe failed\n"); - return err; - } - - bpf_object__for_each_program(prog, obj) { - err = config_bpf_program(prog); - if (err) - goto out; - - priv = program_priv(prog); - if (IS_ERR_OR_NULL(priv)) { - if (!priv) - err = -BPF_LOADER_ERRNO__INTERNAL; - else - err = PTR_ERR(priv); - goto out; - } - - if (priv->is_tp) { - bpf_program__set_type(prog, BPF_PROG_TYPE_TRACEPOINT); - continue; - } - - bpf_program__set_type(prog, BPF_PROG_TYPE_KPROBE); - pev = &priv->pev; - - err = convert_perf_probe_events(pev, 1); - if (err < 0) { - pr_debug("bpf_probe: failed to convert perf probe events\n"); - goto out; - } - - err = apply_perf_probe_events(pev, 1); - if (err < 0) { - pr_debug("bpf_probe: failed to apply perf probe events\n"); - goto out; - } - - /* - * After probing, let's consider prologue, which - * adds program fetcher to BPF programs. - * - * hook_load_preprocessor() hooks pre-processor - * to bpf_program, let it generate prologue - * dynamically during loading. - */ - err = hook_load_preprocessor(prog); - if (err) - goto out; - } -out: - return err < 0 ? err : 0; -} - -#define EVENTS_WRITE_BUFSIZE 4096 -int bpf__unprobe(struct bpf_object *obj) -{ - int err, ret = 0; - struct bpf_program *prog; - - bpf_object__for_each_program(prog, obj) { - struct bpf_prog_priv *priv = program_priv(prog); - int i; - - if (IS_ERR_OR_NULL(priv) || priv->is_tp) - continue; - - for (i = 0; i < priv->pev.ntevs; i++) { - struct probe_trace_event *tev = &priv->pev.tevs[i]; - char name_buf[EVENTS_WRITE_BUFSIZE]; - struct strfilter *delfilter; - - snprintf(name_buf, EVENTS_WRITE_BUFSIZE, - "%s:%s", tev->group, tev->event); - name_buf[EVENTS_WRITE_BUFSIZE - 1] = '\0'; - - delfilter = strfilter__new(name_buf, NULL); - if (!delfilter) { - pr_debug("Failed to create filter for unprobing\n"); - ret = -ENOMEM; - continue; - } - - err = del_perf_probe_events(delfilter); - strfilter__delete(delfilter); - if (err) { - pr_debug("Failed to delete %s\n", name_buf); - ret = err; - continue; - } - } - } - return ret; -} - -static int bpf_object__load_prologue(struct bpf_object *obj) -{ - int init_cnt = ARRAY_SIZE(prologue_init_insn); - const struct bpf_insn *orig_insns; - struct bpf_preproc_result res; - struct perf_probe_event *pev; - struct bpf_program *prog; - int orig_insns_cnt; - - bpf_object__for_each_program(prog, obj) { - struct bpf_prog_priv *priv = program_priv(prog); - int err, i, fd; - - if (IS_ERR_OR_NULL(priv)) { - pr_debug("bpf: failed to get private field\n"); - return -BPF_LOADER_ERRNO__INTERNAL; - } - - if (!priv->need_prologue) - continue; - - /* - * For each program that needs prologue we do following: - * - * - take its current instructions and use them - * to generate the new code with prologue - * - load new instructions with bpf_prog_load - * and keep the fd in prologue_fds - * - new fd will be used in bpf__foreach_event - * to connect this program with perf evsel - */ - orig_insns = bpf_program__insns(prog); - orig_insns_cnt = bpf_program__insn_cnt(prog); - - pev = &priv->pev; - for (i = 0; i < pev->ntevs; i++) { - /* - * Skipping artificall prologue_init_insn instructions - * (init_cnt), so the prologue can be generated instead - * of them. - */ - err = preproc_gen_prologue(prog, i, - orig_insns + init_cnt, - orig_insns_cnt - init_cnt, - &res); - if (err) - return err; - - fd = bpf_prog_load(bpf_program__get_type(prog), - bpf_program__name(prog), "GPL", - res.new_insn_ptr, - res.new_insn_cnt, NULL); - if (fd < 0) { - char bf[128]; - - libbpf_strerror(-errno, bf, sizeof(bf)); - pr_debug("bpf: load objects with prologue failed: err=%d: (%s)\n", - -errno, bf); - return -errno; - } - priv->prologue_fds[i] = fd; - } - /* - * We no longer need the original program, - * we can unload it. - */ - bpf_program__unload(prog); - } - return 0; -} - -int bpf__load(struct bpf_object *obj) -{ - int err; - - err = bpf_object__load(obj); - if (err) { - char bf[128]; - libbpf_strerror(err, bf, sizeof(bf)); - pr_debug("bpf: load objects failed: err=%d: (%s)\n", err, bf); - return err; - } - return bpf_object__load_prologue(obj); -} - -int bpf__foreach_event(struct bpf_object *obj, - bpf_prog_iter_callback_t func, - void *arg) -{ - struct bpf_program *prog; - int err; - - bpf_object__for_each_program(prog, obj) { - struct bpf_prog_priv *priv = program_priv(prog); - struct probe_trace_event *tev; - struct perf_probe_event *pev; - int i, fd; - - if (IS_ERR_OR_NULL(priv)) { - pr_debug("bpf: failed to get private field\n"); - return -BPF_LOADER_ERRNO__INTERNAL; - } - - if (priv->is_tp) { - fd = bpf_program__fd(prog); - err = (*func)(priv->sys_name, priv->evt_name, fd, obj, arg); - if (err) { - pr_debug("bpf: tracepoint call back failed, stop iterate\n"); - return err; - } - continue; - } - - pev = &priv->pev; - for (i = 0; i < pev->ntevs; i++) { - tev = &pev->tevs[i]; - - if (priv->need_prologue) - fd = priv->prologue_fds[i]; - else - fd = bpf_program__fd(prog); - - if (fd < 0) { - pr_debug("bpf: failed to get file descriptor\n"); - return fd; - } - - err = (*func)(tev->group, tev->event, fd, obj, arg); - if (err) { - pr_debug("bpf: call back failed, stop iterate\n"); - return err; - } - } - } - return 0; -} - -enum bpf_map_op_type { - BPF_MAP_OP_SET_VALUE, - BPF_MAP_OP_SET_EVSEL, -}; - -enum bpf_map_key_type { - BPF_MAP_KEY_ALL, -}; - -struct bpf_map_op { - struct list_head list; - enum bpf_map_op_type op_type; - enum bpf_map_key_type key_type; - union { - u64 value; - struct evsel *evsel; - } v; -}; - -struct bpf_map_priv { - struct list_head ops_list; -}; - -static void -bpf_map_op__delete(struct bpf_map_op *op) -{ - if (!list_empty(&op->list)) - list_del_init(&op->list); - free(op); -} - -static void -bpf_map_priv__purge(struct bpf_map_priv *priv) -{ - struct bpf_map_op *pos, *n; - - list_for_each_entry_safe(pos, n, &priv->ops_list, list) { - list_del_init(&pos->list); - bpf_map_op__delete(pos); - } -} - -static void -bpf_map_priv__clear(const struct bpf_map *map __maybe_unused, - void *_priv) -{ - struct bpf_map_priv *priv = _priv; - - bpf_map_priv__purge(priv); - free(priv); -} - -static void *map_priv(const struct bpf_map *map) -{ - void *priv; - - if (IS_ERR_OR_NULL(bpf_map_hash)) - return NULL; - if (!hashmap__find(bpf_map_hash, map, &priv)) - return NULL; - return priv; -} - -static void bpf_map_hash_free(void) -{ - struct hashmap_entry *cur; - size_t bkt; - - if (IS_ERR_OR_NULL(bpf_map_hash)) - return; - - hashmap__for_each_entry(bpf_map_hash, cur, bkt) - bpf_map_priv__clear(cur->pkey, cur->pvalue); - - hashmap__free(bpf_map_hash); - bpf_map_hash = NULL; -} - -static int map_set_priv(struct bpf_map *map, void *priv) -{ - void *old_priv; - - if (WARN_ON_ONCE(IS_ERR(bpf_map_hash))) - return PTR_ERR(bpf_program_hash); - - if (!bpf_map_hash) { - bpf_map_hash = hashmap__new(ptr_hash, ptr_equal, NULL); - if (IS_ERR(bpf_map_hash)) - return PTR_ERR(bpf_map_hash); - } - - old_priv = map_priv(map); - if (old_priv) { - bpf_map_priv__clear(map, old_priv); - return hashmap__set(bpf_map_hash, map, priv, NULL, NULL); - } - return hashmap__add(bpf_map_hash, map, priv); -} - -static int -bpf_map_op_setkey(struct bpf_map_op *op, struct parse_events_term *term) -{ - op->key_type = BPF_MAP_KEY_ALL; - if (!term) - return 0; - - return 0; -} - -static struct bpf_map_op * -bpf_map_op__new(struct parse_events_term *term) -{ - struct bpf_map_op *op; - int err; - - op = zalloc(sizeof(*op)); - if (!op) { - pr_debug("Failed to alloc bpf_map_op\n"); - return ERR_PTR(-ENOMEM); - } - INIT_LIST_HEAD(&op->list); - - err = bpf_map_op_setkey(op, term); - if (err) { - free(op); - return ERR_PTR(err); - } - return op; -} - -static struct bpf_map_op * -bpf_map_op__clone(struct bpf_map_op *op) -{ - struct bpf_map_op *newop; - - newop = memdup(op, sizeof(*op)); - if (!newop) { - pr_debug("Failed to alloc bpf_map_op\n"); - return NULL; - } - - INIT_LIST_HEAD(&newop->list); - return newop; -} - -static struct bpf_map_priv * -bpf_map_priv__clone(struct bpf_map_priv *priv) -{ - struct bpf_map_priv *newpriv; - struct bpf_map_op *pos, *newop; - - newpriv = zalloc(sizeof(*newpriv)); - if (!newpriv) { - pr_debug("Not enough memory to alloc map private\n"); - return NULL; - } - INIT_LIST_HEAD(&newpriv->ops_list); - - list_for_each_entry(pos, &priv->ops_list, list) { - newop = bpf_map_op__clone(pos); - if (!newop) { - bpf_map_priv__purge(newpriv); - return NULL; - } - list_add_tail(&newop->list, &newpriv->ops_list); - } - - return newpriv; -} - -static int -bpf_map__add_op(struct bpf_map *map, struct bpf_map_op *op) -{ - const char *map_name = bpf_map__name(map); - struct bpf_map_priv *priv = map_priv(map); - - if (IS_ERR(priv)) { - pr_debug("Failed to get private from map %s\n", map_name); - return PTR_ERR(priv); - } - - if (!priv) { - priv = zalloc(sizeof(*priv)); - if (!priv) { - pr_debug("Not enough memory to alloc map private\n"); - return -ENOMEM; - } - INIT_LIST_HEAD(&priv->ops_list); - - if (map_set_priv(map, priv)) { - free(priv); - return -BPF_LOADER_ERRNO__INTERNAL; - } - } - - list_add_tail(&op->list, &priv->ops_list); - return 0; -} - -static struct bpf_map_op * -bpf_map__add_newop(struct bpf_map *map, struct parse_events_term *term) -{ - struct bpf_map_op *op; - int err; - - op = bpf_map_op__new(term); - if (IS_ERR(op)) - return op; - - err = bpf_map__add_op(map, op); - if (err) { - bpf_map_op__delete(op); - return ERR_PTR(err); - } - return op; -} - -static int -__bpf_map__config_value(struct bpf_map *map, - struct parse_events_term *term) -{ - struct bpf_map_op *op; - const char *map_name = bpf_map__name(map); - - if (!map) { - pr_debug("Map '%s' is invalid\n", map_name); - return -BPF_LOADER_ERRNO__INTERNAL; - } - - if (bpf_map__type(map) != BPF_MAP_TYPE_ARRAY) { - pr_debug("Map %s type is not BPF_MAP_TYPE_ARRAY\n", - map_name); - return -BPF_LOADER_ERRNO__OBJCONF_MAP_TYPE; - } - if (bpf_map__key_size(map) < sizeof(unsigned int)) { - pr_debug("Map %s has incorrect key size\n", map_name); - return -BPF_LOADER_ERRNO__OBJCONF_MAP_KEYSIZE; - } - switch (bpf_map__value_size(map)) { - case 1: - case 2: - case 4: - case 8: - break; - default: - pr_debug("Map %s has incorrect value size\n", map_name); - return -BPF_LOADER_ERRNO__OBJCONF_MAP_VALUESIZE; - } - - op = bpf_map__add_newop(map, term); - if (IS_ERR(op)) - return PTR_ERR(op); - op->op_type = BPF_MAP_OP_SET_VALUE; - op->v.value = term->val.num; - return 0; -} - -static int -bpf_map__config_value(struct bpf_map *map, - struct parse_events_term *term, - struct evlist *evlist __maybe_unused) -{ - if (!term->err_val) { - pr_debug("Config value not set\n"); - return -BPF_LOADER_ERRNO__OBJCONF_CONF; - } - - if (term->type_val != PARSE_EVENTS__TERM_TYPE_NUM) { - pr_debug("ERROR: wrong value type for 'value'\n"); - return -BPF_LOADER_ERRNO__OBJCONF_MAP_VALUE; - } - - return __bpf_map__config_value(map, term); -} - -static int -__bpf_map__config_event(struct bpf_map *map, - struct parse_events_term *term, - struct evlist *evlist) -{ - struct bpf_map_op *op; - const char *map_name = bpf_map__name(map); - struct evsel *evsel = evlist__find_evsel_by_str(evlist, term->val.str); - - if (!evsel) { - pr_debug("Event (for '%s') '%s' doesn't exist\n", - map_name, term->val.str); - return -BPF_LOADER_ERRNO__OBJCONF_MAP_NOEVT; - } - - if (!map) { - pr_debug("Map '%s' is invalid\n", map_name); - return PTR_ERR(map); - } - - /* - * No need to check key_size and value_size: - * kernel has already checked them. - */ - if (bpf_map__type(map) != BPF_MAP_TYPE_PERF_EVENT_ARRAY) { - pr_debug("Map %s type is not BPF_MAP_TYPE_PERF_EVENT_ARRAY\n", - map_name); - return -BPF_LOADER_ERRNO__OBJCONF_MAP_TYPE; - } - - op = bpf_map__add_newop(map, term); - if (IS_ERR(op)) - return PTR_ERR(op); - op->op_type = BPF_MAP_OP_SET_EVSEL; - op->v.evsel = evsel; - return 0; -} - -static int -bpf_map__config_event(struct bpf_map *map, - struct parse_events_term *term, - struct evlist *evlist) -{ - if (!term->err_val) { - pr_debug("Config value not set\n"); - return -BPF_LOADER_ERRNO__OBJCONF_CONF; - } - - if (term->type_val != PARSE_EVENTS__TERM_TYPE_STR) { - pr_debug("ERROR: wrong value type for 'event'\n"); - return -BPF_LOADER_ERRNO__OBJCONF_MAP_VALUE; - } - - return __bpf_map__config_event(map, term, evlist); -} - -struct bpf_obj_config__map_func { - const char *config_opt; - int (*config_func)(struct bpf_map *, struct parse_events_term *, - struct evlist *); -}; - -struct bpf_obj_config__map_func bpf_obj_config__map_funcs[] = { - {"value", bpf_map__config_value}, - {"event", bpf_map__config_event}, -}; - -static int -bpf__obj_config_map(struct bpf_object *obj, - struct parse_events_term *term, - struct evlist *evlist, - int *key_scan_pos) -{ - /* key is "map:." */ - char *map_name = strdup(term->config + sizeof("map:") - 1); - struct bpf_map *map; - int err = -BPF_LOADER_ERRNO__OBJCONF_OPT; - char *map_opt; - size_t i; - - if (!map_name) - return -ENOMEM; - - map_opt = strchr(map_name, '.'); - if (!map_opt) { - pr_debug("ERROR: Invalid map config: %s\n", map_name); - goto out; - } - - *map_opt++ = '\0'; - if (*map_opt == '\0') { - pr_debug("ERROR: Invalid map option: %s\n", term->config); - goto out; - } - - map = bpf_object__find_map_by_name(obj, map_name); - if (!map) { - pr_debug("ERROR: Map %s doesn't exist\n", map_name); - err = -BPF_LOADER_ERRNO__OBJCONF_MAP_NOTEXIST; - goto out; - } - - for (i = 0; i < ARRAY_SIZE(bpf_obj_config__map_funcs); i++) { - struct bpf_obj_config__map_func *func = - &bpf_obj_config__map_funcs[i]; - - if (strcmp(map_opt, func->config_opt) == 0) { - err = func->config_func(map, term, evlist); - goto out; - } - } - - pr_debug("ERROR: Invalid map config option '%s'\n", map_opt); - err = -BPF_LOADER_ERRNO__OBJCONF_MAP_OPT; -out: - if (!err) - *key_scan_pos += strlen(map_opt); - - free(map_name); - return err; -} - -int bpf__config_obj(struct bpf_object *obj, - struct parse_events_term *term, - struct evlist *evlist, - int *error_pos) -{ - int key_scan_pos = 0; - int err; - - if (!obj || !term || !term->config) - return -EINVAL; - - if (strstarts(term->config, "map:")) { - key_scan_pos = sizeof("map:") - 1; - err = bpf__obj_config_map(obj, term, evlist, &key_scan_pos); - goto out; - } - err = -BPF_LOADER_ERRNO__OBJCONF_OPT; -out: - if (error_pos) - *error_pos = key_scan_pos; - return err; - -} - -typedef int (*map_config_func_t)(const char *name, int map_fd, - const struct bpf_map *map, - struct bpf_map_op *op, - void *pkey, void *arg); -static int -foreach_key_array_all(map_config_func_t func, - void *arg, const char *name, - int map_fd, const struct bpf_map *map, - struct bpf_map_op *op) -{ - unsigned int i; - int err; - - for (i = 0; i < bpf_map__max_entries(map); i++) { - err = func(name, map_fd, map, op, &i, arg); - if (err) { - pr_debug("ERROR: failed to insert value to %s[%u]\n", - name, i); - return err; - } - } - return 0; -} - - -static int -bpf_map_config_foreach_key(struct bpf_map *map, - map_config_func_t func, - void *arg) -{ - int err, map_fd, type; - struct bpf_map_op *op; - const char *name = bpf_map__name(map); - struct bpf_map_priv *priv = map_priv(map); - - if (IS_ERR(priv)) { - pr_debug("ERROR: failed to get private from map %s\n", name); - return -BPF_LOADER_ERRNO__INTERNAL; - } - if (!priv || list_empty(&priv->ops_list)) { - pr_debug("INFO: nothing to config for map %s\n", name); - return 0; - } - - if (!map) { - pr_debug("Map '%s' is invalid\n", name); - return -BPF_LOADER_ERRNO__INTERNAL; - } - map_fd = bpf_map__fd(map); - if (map_fd < 0) { - pr_debug("ERROR: failed to get fd from map %s\n", name); - return map_fd; - } - - type = bpf_map__type(map); - list_for_each_entry(op, &priv->ops_list, list) { - switch (type) { - case BPF_MAP_TYPE_ARRAY: - case BPF_MAP_TYPE_PERF_EVENT_ARRAY: - switch (op->key_type) { - case BPF_MAP_KEY_ALL: - err = foreach_key_array_all(func, arg, name, - map_fd, map, op); - break; - default: - pr_debug("ERROR: keytype for map '%s' invalid\n", - name); - return -BPF_LOADER_ERRNO__INTERNAL; - } - if (err) - return err; - break; - default: - pr_debug("ERROR: type of '%s' incorrect\n", name); - return -BPF_LOADER_ERRNO__OBJCONF_MAP_TYPE; - } - } - - return 0; -} - -static int -apply_config_value_for_key(int map_fd, void *pkey, - size_t val_size, u64 val) -{ - int err = 0; - - switch (val_size) { - case 1: { - u8 _val = (u8)(val); - err = bpf_map_update_elem(map_fd, pkey, &_val, BPF_ANY); - break; - } - case 2: { - u16 _val = (u16)(val); - err = bpf_map_update_elem(map_fd, pkey, &_val, BPF_ANY); - break; - } - case 4: { - u32 _val = (u32)(val); - err = bpf_map_update_elem(map_fd, pkey, &_val, BPF_ANY); - break; - } - case 8: { - err = bpf_map_update_elem(map_fd, pkey, &val, BPF_ANY); - break; - } - default: - pr_debug("ERROR: invalid value size\n"); - return -BPF_LOADER_ERRNO__OBJCONF_MAP_VALUESIZE; - } - if (err && errno) - err = -errno; - return err; -} - -static int -apply_config_evsel_for_key(const char *name, int map_fd, void *pkey, - struct evsel *evsel) -{ - struct xyarray *xy = evsel->core.fd; - struct perf_event_attr *attr; - unsigned int key, events; - bool check_pass = false; - int *evt_fd; - int err; - - if (!xy) { - pr_debug("ERROR: evsel not ready for map %s\n", name); - return -BPF_LOADER_ERRNO__INTERNAL; - } - - if (xy->row_size / xy->entry_size != 1) { - pr_debug("ERROR: Dimension of target event is incorrect for map %s\n", - name); - return -BPF_LOADER_ERRNO__OBJCONF_MAP_EVTDIM; - } - - attr = &evsel->core.attr; - if (attr->inherit) { - pr_debug("ERROR: Can't put inherit event into map %s\n", name); - return -BPF_LOADER_ERRNO__OBJCONF_MAP_EVTINH; - } - - if (evsel__is_bpf_output(evsel)) - check_pass = true; - if (attr->type == PERF_TYPE_RAW) - check_pass = true; - if (attr->type == PERF_TYPE_HARDWARE) - check_pass = true; - if (!check_pass) { - pr_debug("ERROR: Event type is wrong for map %s\n", name); - return -BPF_LOADER_ERRNO__OBJCONF_MAP_EVTTYPE; - } - - events = xy->entries / (xy->row_size / xy->entry_size); - key = *((unsigned int *)pkey); - if (key >= events) { - pr_debug("ERROR: there is no event %d for map %s\n", - key, name); - return -BPF_LOADER_ERRNO__OBJCONF_MAP_MAPSIZE; - } - evt_fd = xyarray__entry(xy, key, 0); - err = bpf_map_update_elem(map_fd, pkey, evt_fd, BPF_ANY); - if (err && errno) - err = -errno; - return err; -} - -static int -apply_obj_config_map_for_key(const char *name, int map_fd, - const struct bpf_map *map, - struct bpf_map_op *op, - void *pkey, void *arg __maybe_unused) -{ - int err; - - switch (op->op_type) { - case BPF_MAP_OP_SET_VALUE: - err = apply_config_value_for_key(map_fd, pkey, - bpf_map__value_size(map), - op->v.value); - break; - case BPF_MAP_OP_SET_EVSEL: - err = apply_config_evsel_for_key(name, map_fd, pkey, - op->v.evsel); - break; - default: - pr_debug("ERROR: unknown value type for '%s'\n", name); - err = -BPF_LOADER_ERRNO__INTERNAL; - } - return err; -} - -static int -apply_obj_config_map(struct bpf_map *map) -{ - return bpf_map_config_foreach_key(map, - apply_obj_config_map_for_key, - NULL); -} - -static int -apply_obj_config_object(struct bpf_object *obj) -{ - struct bpf_map *map; - int err; - - bpf_object__for_each_map(map, obj) { - err = apply_obj_config_map(map); - if (err) - return err; - } - return 0; -} - -int bpf__apply_obj_config(void) -{ - struct bpf_perf_object *perf_obj, *tmp; - int err; - - bpf_perf_object__for_each(perf_obj, tmp) { - err = apply_obj_config_object(perf_obj->obj); - if (err) - return err; - } - - return 0; -} - -#define bpf__perf_for_each_map(map, pobj, tmp) \ - bpf_perf_object__for_each(pobj, tmp) \ - bpf_object__for_each_map(map, pobj->obj) - -#define bpf__perf_for_each_map_named(map, pobj, pobjtmp, name) \ - bpf__perf_for_each_map(map, pobj, pobjtmp) \ - if (bpf_map__name(map) && (strcmp(name, bpf_map__name(map)) == 0)) - -struct evsel *bpf__setup_output_event(struct evlist *evlist, const char *name) -{ - struct bpf_map_priv *tmpl_priv = NULL; - struct bpf_perf_object *perf_obj, *tmp; - struct evsel *evsel = NULL; - struct bpf_map *map; - int err; - bool need_init = false; - - bpf__perf_for_each_map_named(map, perf_obj, tmp, name) { - struct bpf_map_priv *priv = map_priv(map); - - if (IS_ERR(priv)) - return ERR_PTR(-BPF_LOADER_ERRNO__INTERNAL); - - /* - * No need to check map type: type should have been - * verified by kernel. - */ - if (!need_init && !priv) - need_init = !priv; - if (!tmpl_priv && priv) - tmpl_priv = priv; - } - - if (!need_init) - return NULL; - - if (!tmpl_priv) { - char *event_definition = NULL; - - if (asprintf(&event_definition, "bpf-output/no-inherit=1,name=%s/", name) < 0) - return ERR_PTR(-ENOMEM); - - err = parse_event(evlist, event_definition); - free(event_definition); - - if (err) { - pr_debug("ERROR: failed to create the \"%s\" bpf-output event\n", name); - return ERR_PTR(-err); - } - - evsel = evlist__last(evlist); - } - - bpf__perf_for_each_map_named(map, perf_obj, tmp, name) { - struct bpf_map_priv *priv = map_priv(map); - - if (IS_ERR(priv)) - return ERR_PTR(-BPF_LOADER_ERRNO__INTERNAL); - if (priv) - continue; - - if (tmpl_priv) { - priv = bpf_map_priv__clone(tmpl_priv); - if (!priv) - return ERR_PTR(-ENOMEM); - - err = map_set_priv(map, priv); - if (err) { - bpf_map_priv__clear(map, priv); - return ERR_PTR(err); - } - } else if (evsel) { - struct bpf_map_op *op; - - op = bpf_map__add_newop(map, NULL); - if (IS_ERR(op)) - return ERR_CAST(op); - op->op_type = BPF_MAP_OP_SET_EVSEL; - op->v.evsel = evsel; - } - } - - return evsel; -} - -int bpf__setup_stdout(struct evlist *evlist) -{ - struct evsel *evsel = bpf__setup_output_event(evlist, "__bpf_stdout__"); - return PTR_ERR_OR_ZERO(evsel); -} - -#define ERRNO_OFFSET(e) ((e) - __BPF_LOADER_ERRNO__START) -#define ERRCODE_OFFSET(c) ERRNO_OFFSET(BPF_LOADER_ERRNO__##c) -#define NR_ERRNO (__BPF_LOADER_ERRNO__END - __BPF_LOADER_ERRNO__START) - -static const char *bpf_loader_strerror_table[NR_ERRNO] = { - [ERRCODE_OFFSET(CONFIG)] = "Invalid config string", - [ERRCODE_OFFSET(GROUP)] = "Invalid group name", - [ERRCODE_OFFSET(EVENTNAME)] = "No event name found in config string", - [ERRCODE_OFFSET(INTERNAL)] = "BPF loader internal error", - [ERRCODE_OFFSET(COMPILE)] = "Error when compiling BPF scriptlet", - [ERRCODE_OFFSET(PROGCONF_TERM)] = "Invalid program config term in config string", - [ERRCODE_OFFSET(PROLOGUE)] = "Failed to generate prologue", - [ERRCODE_OFFSET(PROLOGUE2BIG)] = "Prologue too big for program", - [ERRCODE_OFFSET(PROLOGUEOOB)] = "Offset out of bound for prologue", - [ERRCODE_OFFSET(OBJCONF_OPT)] = "Invalid object config option", - [ERRCODE_OFFSET(OBJCONF_CONF)] = "Config value not set (missing '=')", - [ERRCODE_OFFSET(OBJCONF_MAP_OPT)] = "Invalid object map config option", - [ERRCODE_OFFSET(OBJCONF_MAP_NOTEXIST)] = "Target map doesn't exist", - [ERRCODE_OFFSET(OBJCONF_MAP_VALUE)] = "Incorrect value type for map", - [ERRCODE_OFFSET(OBJCONF_MAP_TYPE)] = "Incorrect map type", - [ERRCODE_OFFSET(OBJCONF_MAP_KEYSIZE)] = "Incorrect map key size", - [ERRCODE_OFFSET(OBJCONF_MAP_VALUESIZE)] = "Incorrect map value size", - [ERRCODE_OFFSET(OBJCONF_MAP_NOEVT)] = "Event not found for map setting", - [ERRCODE_OFFSET(OBJCONF_MAP_MAPSIZE)] = "Invalid map size for event setting", - [ERRCODE_OFFSET(OBJCONF_MAP_EVTDIM)] = "Event dimension too large", - [ERRCODE_OFFSET(OBJCONF_MAP_EVTINH)] = "Doesn't support inherit event", - [ERRCODE_OFFSET(OBJCONF_MAP_EVTTYPE)] = "Wrong event type for map", - [ERRCODE_OFFSET(OBJCONF_MAP_IDX2BIG)] = "Index too large", -}; - -static int -bpf_loader_strerror(int err, char *buf, size_t size) -{ - char sbuf[STRERR_BUFSIZE]; - const char *msg; - - if (!buf || !size) - return -1; - - err = err > 0 ? err : -err; - - if (err >= __LIBBPF_ERRNO__START) - return libbpf_strerror(err, buf, size); - - if (err >= __BPF_LOADER_ERRNO__START && err < __BPF_LOADER_ERRNO__END) { - msg = bpf_loader_strerror_table[ERRNO_OFFSET(err)]; - snprintf(buf, size, "%s", msg); - buf[size - 1] = '\0'; - return 0; - } - - if (err >= __BPF_LOADER_ERRNO__END) - snprintf(buf, size, "Unknown bpf loader error %d", err); - else - snprintf(buf, size, "%s", - str_error_r(err, sbuf, sizeof(sbuf))); - - buf[size - 1] = '\0'; - return -1; -} - -#define bpf__strerror_head(err, buf, size) \ - char sbuf[STRERR_BUFSIZE], *emsg;\ - if (!size)\ - return 0;\ - if (err < 0)\ - err = -err;\ - bpf_loader_strerror(err, sbuf, sizeof(sbuf));\ - emsg = sbuf;\ - switch (err) {\ - default:\ - scnprintf(buf, size, "%s", emsg);\ - break; - -#define bpf__strerror_entry(val, fmt...)\ - case val: {\ - scnprintf(buf, size, fmt);\ - break;\ - } - -#define bpf__strerror_end(buf, size)\ - }\ - buf[size - 1] = '\0'; - -int bpf__strerror_prepare_load(const char *filename, bool source, - int err, char *buf, size_t size) -{ - size_t n; - int ret; - - n = snprintf(buf, size, "Failed to load %s%s: ", - filename, source ? " from source" : ""); - if (n >= size) { - buf[size - 1] = '\0'; - return 0; - } - buf += n; - size -= n; - - ret = bpf_loader_strerror(err, buf, size); - buf[size - 1] = '\0'; - return ret; -} - -int bpf__strerror_probe(struct bpf_object *obj __maybe_unused, - int err, char *buf, size_t size) -{ - bpf__strerror_head(err, buf, size); - case BPF_LOADER_ERRNO__PROGCONF_TERM: { - scnprintf(buf, size, "%s (add -v to see detail)", emsg); - break; - } - bpf__strerror_entry(EEXIST, "Probe point exist. Try 'perf probe -d \"*\"' and set 'force=yes'"); - bpf__strerror_entry(EACCES, "You need to be root"); - bpf__strerror_entry(EPERM, "You need to be root, and /proc/sys/kernel/kptr_restrict should be 0"); - bpf__strerror_entry(ENOENT, "You need to check probing points in BPF file"); - bpf__strerror_end(buf, size); - return 0; -} - -int bpf__strerror_load(struct bpf_object *obj, - int err, char *buf, size_t size) -{ - bpf__strerror_head(err, buf, size); - case LIBBPF_ERRNO__KVER: { - unsigned int obj_kver = bpf_object__kversion(obj); - unsigned int real_kver; - - if (fetch_kernel_version(&real_kver, NULL, 0)) { - scnprintf(buf, size, "Unable to fetch kernel version"); - break; - } - - if (obj_kver != real_kver) { - scnprintf(buf, size, - "'version' ("KVER_FMT") doesn't match running kernel ("KVER_FMT")", - KVER_PARAM(obj_kver), - KVER_PARAM(real_kver)); - break; - } - - scnprintf(buf, size, "Failed to load program for unknown reason"); - break; - } - bpf__strerror_end(buf, size); - return 0; -} - -int bpf__strerror_config_obj(struct bpf_object *obj __maybe_unused, - struct parse_events_term *term __maybe_unused, - struct evlist *evlist __maybe_unused, - int *error_pos __maybe_unused, int err, - char *buf, size_t size) -{ - bpf__strerror_head(err, buf, size); - bpf__strerror_entry(BPF_LOADER_ERRNO__OBJCONF_MAP_TYPE, - "Can't use this config term with this map type"); - bpf__strerror_end(buf, size); - return 0; -} - -int bpf__strerror_apply_obj_config(int err, char *buf, size_t size) -{ - bpf__strerror_head(err, buf, size); - bpf__strerror_entry(BPF_LOADER_ERRNO__OBJCONF_MAP_EVTDIM, - "Cannot set event to BPF map in multi-thread tracing"); - bpf__strerror_entry(BPF_LOADER_ERRNO__OBJCONF_MAP_EVTINH, - "%s (Hint: use -i to turn off inherit)", emsg); - bpf__strerror_entry(BPF_LOADER_ERRNO__OBJCONF_MAP_EVTTYPE, - "Can only put raw, hardware and BPF output event into a BPF map"); - bpf__strerror_end(buf, size); - return 0; -} - -int bpf__strerror_setup_output_event(struct evlist *evlist __maybe_unused, - int err, char *buf, size_t size) -{ - bpf__strerror_head(err, buf, size); - bpf__strerror_end(buf, size); - return 0; -} diff --git a/tools/perf/util/bpf-loader.h b/tools/perf/util/bpf-loader.h deleted file mode 100644 index 5d1c725cea29..000000000000 --- a/tools/perf/util/bpf-loader.h +++ /dev/null @@ -1,216 +0,0 @@ -/* SPDX-License-Identifier: GPL-2.0 */ -/* - * Copyright (C) 2015, Wang Nan - * Copyright (C) 2015, Huawei Inc. - */ -#ifndef __BPF_LOADER_H -#define __BPF_LOADER_H - -#include -#include - -#ifdef HAVE_LIBBPF_SUPPORT -#include - -enum bpf_loader_errno { - __BPF_LOADER_ERRNO__START = __LIBBPF_ERRNO__START - 100, - /* Invalid config string */ - BPF_LOADER_ERRNO__CONFIG = __BPF_LOADER_ERRNO__START, - BPF_LOADER_ERRNO__GROUP, /* Invalid group name */ - BPF_LOADER_ERRNO__EVENTNAME, /* Event name is missing */ - BPF_LOADER_ERRNO__INTERNAL, /* BPF loader internal error */ - BPF_LOADER_ERRNO__COMPILE, /* Error when compiling BPF scriptlet */ - BPF_LOADER_ERRNO__PROGCONF_TERM,/* Invalid program config term in config string */ - BPF_LOADER_ERRNO__PROLOGUE, /* Failed to generate prologue */ - BPF_LOADER_ERRNO__PROLOGUE2BIG, /* Prologue too big for program */ - BPF_LOADER_ERRNO__PROLOGUEOOB, /* Offset out of bound for prologue */ - BPF_LOADER_ERRNO__OBJCONF_OPT, /* Invalid object config option */ - BPF_LOADER_ERRNO__OBJCONF_CONF, /* Config value not set (lost '=')) */ - BPF_LOADER_ERRNO__OBJCONF_MAP_OPT, /* Invalid object map config option */ - BPF_LOADER_ERRNO__OBJCONF_MAP_NOTEXIST, /* Target map not exist */ - BPF_LOADER_ERRNO__OBJCONF_MAP_VALUE, /* Incorrect value type for map */ - BPF_LOADER_ERRNO__OBJCONF_MAP_TYPE, /* Incorrect map type */ - BPF_LOADER_ERRNO__OBJCONF_MAP_KEYSIZE, /* Incorrect map key size */ - BPF_LOADER_ERRNO__OBJCONF_MAP_VALUESIZE,/* Incorrect map value size */ - BPF_LOADER_ERRNO__OBJCONF_MAP_NOEVT, /* Event not found for map setting */ - BPF_LOADER_ERRNO__OBJCONF_MAP_MAPSIZE, /* Invalid map size for event setting */ - BPF_LOADER_ERRNO__OBJCONF_MAP_EVTDIM, /* Event dimension too large */ - BPF_LOADER_ERRNO__OBJCONF_MAP_EVTINH, /* Doesn't support inherit event */ - BPF_LOADER_ERRNO__OBJCONF_MAP_EVTTYPE, /* Wrong event type for map */ - BPF_LOADER_ERRNO__OBJCONF_MAP_IDX2BIG, /* Index too large */ - __BPF_LOADER_ERRNO__END, -}; -#endif // HAVE_LIBBPF_SUPPORT - -struct evsel; -struct evlist; -struct bpf_object; -struct parse_events_term; -#define PERF_BPF_PROBE_GROUP "perf_bpf_probe" - -typedef int (*bpf_prog_iter_callback_t)(const char *group, const char *event, - int fd, struct bpf_object *obj, void *arg); - -#ifdef HAVE_LIBBPF_SUPPORT -struct bpf_object *bpf__prepare_load(const char *filename, bool source); -int bpf__strerror_prepare_load(const char *filename, bool source, - int err, char *buf, size_t size); - -struct bpf_object *bpf__prepare_load_buffer(void *obj_buf, size_t obj_buf_sz, - const char *name); - -void bpf__clear(void); - -int bpf__probe(struct bpf_object *obj); -int bpf__unprobe(struct bpf_object *obj); -int bpf__strerror_probe(struct bpf_object *obj, int err, - char *buf, size_t size); - -int bpf__load(struct bpf_object *obj); -int bpf__strerror_load(struct bpf_object *obj, int err, - char *buf, size_t size); -int bpf__foreach_event(struct bpf_object *obj, - bpf_prog_iter_callback_t func, void *arg); - -int bpf__config_obj(struct bpf_object *obj, struct parse_events_term *term, - struct evlist *evlist, int *error_pos); -int bpf__strerror_config_obj(struct bpf_object *obj, - struct parse_events_term *term, - struct evlist *evlist, - int *error_pos, int err, char *buf, - size_t size); -int bpf__apply_obj_config(void); -int bpf__strerror_apply_obj_config(int err, char *buf, size_t size); - -int bpf__setup_stdout(struct evlist *evlist); -struct evsel *bpf__setup_output_event(struct evlist *evlist, const char *name); -int bpf__strerror_setup_output_event(struct evlist *evlist, int err, char *buf, size_t size); -#else -#include -#include -#include "debug.h" - -static inline struct bpf_object * -bpf__prepare_load(const char *filename __maybe_unused, - bool source __maybe_unused) -{ - pr_debug("ERROR: eBPF object loading is disabled during compiling.\n"); - return ERR_PTR(-ENOTSUP); -} - -static inline struct bpf_object * -bpf__prepare_load_buffer(void *obj_buf __maybe_unused, - size_t obj_buf_sz __maybe_unused) -{ - return ERR_PTR(-ENOTSUP); -} - -static inline void bpf__clear(void) { } - -static inline int bpf__probe(struct bpf_object *obj __maybe_unused) { return 0;} -static inline int bpf__unprobe(struct bpf_object *obj __maybe_unused) { return 0;} -static inline int bpf__load(struct bpf_object *obj __maybe_unused) { return 0; } - -static inline int -bpf__foreach_event(struct bpf_object *obj __maybe_unused, - bpf_prog_iter_callback_t func __maybe_unused, - void *arg __maybe_unused) -{ - return 0; -} - -static inline int -bpf__config_obj(struct bpf_object *obj __maybe_unused, - struct parse_events_term *term __maybe_unused, - struct evlist *evlist __maybe_unused, - int *error_pos __maybe_unused) -{ - return 0; -} - -static inline int -bpf__apply_obj_config(void) -{ - return 0; -} - -static inline int -bpf__setup_stdout(struct evlist *evlist __maybe_unused) -{ - return 0; -} - -static inline struct evsel * -bpf__setup_output_event(struct evlist *evlist __maybe_unused, const char *name __maybe_unused) -{ - return NULL; -} - -static inline int -__bpf_strerror(char *buf, size_t size) -{ - if (!size) - return 0; - strncpy(buf, - "ERROR: eBPF object loading is disabled during compiling.\n", - size); - buf[size - 1] = '\0'; - return 0; -} - -static inline -int bpf__strerror_prepare_load(const char *filename __maybe_unused, - bool source __maybe_unused, - int err __maybe_unused, - char *buf, size_t size) -{ - return __bpf_strerror(buf, size); -} - -static inline int -bpf__strerror_probe(struct bpf_object *obj __maybe_unused, - int err __maybe_unused, - char *buf, size_t size) -{ - return __bpf_strerror(buf, size); -} - -static inline int bpf__strerror_load(struct bpf_object *obj __maybe_unused, - int err __maybe_unused, - char *buf, size_t size) -{ - return __bpf_strerror(buf, size); -} - -static inline int -bpf__strerror_config_obj(struct bpf_object *obj __maybe_unused, - struct parse_events_term *term __maybe_unused, - struct evlist *evlist __maybe_unused, - int *error_pos __maybe_unused, - int err __maybe_unused, - char *buf, size_t size) -{ - return __bpf_strerror(buf, size); -} - -static inline int -bpf__strerror_apply_obj_config(int err __maybe_unused, - char *buf, size_t size) -{ - return __bpf_strerror(buf, size); -} - -static inline int -bpf__strerror_setup_output_event(struct evlist *evlist __maybe_unused, - int err __maybe_unused, char *buf, size_t size) -{ - return __bpf_strerror(buf, size); -} - -#endif - -static inline int bpf__strerror_setup_stdout(struct evlist *evlist, int err, char *buf, size_t size) -{ - return bpf__strerror_setup_output_event(evlist, err, buf, size); -} -#endif diff --git a/tools/perf/util/config.c b/tools/perf/util/config.c index 46f144c46827..7a650de0db83 100644 --- a/tools/perf/util/config.c +++ b/tools/perf/util/config.c @@ -16,7 +16,6 @@ #include #include "util/event.h" /* proc_map_timeout */ #include "util/hist.h" /* perf_hist_config */ -#include "util/llvm-utils.h" /* perf_llvm_config */ #include "util/stat.h" /* perf_stat__set_big_num */ #include "util/evsel.h" /* evsel__hw_names, evsel__use_bpf_counters */ #include "util/srcline.h" /* addr2line_timeout_ms */ @@ -486,9 +485,6 @@ int perf_default_config(const char *var, const char *value, if (strstarts(var, "call-graph.")) return perf_callchain_config(var, value); - if (strstarts(var, "llvm.")) - return perf_llvm_config(var, value); - if (strstarts(var, "buildid.")) return perf_buildid_config(var, value); diff --git a/tools/perf/util/llvm-utils.c b/tools/perf/util/llvm-utils.c deleted file mode 100644 index c6c9c2228578..000000000000 --- a/tools/perf/util/llvm-utils.c +++ /dev/null @@ -1,612 +0,0 @@ -// SPDX-License-Identifier: GPL-2.0 -/* - * Copyright (C) 2015, Wang Nan - * Copyright (C) 2015, Huawei Inc. - */ - -#include -#include -#include -#include -#include -#include -#include -#include -#include "debug.h" -#include "llvm-utils.h" -#include "config.h" -#include "util.h" -#include -#include - -#define CLANG_BPF_CMD_DEFAULT_TEMPLATE \ - "$CLANG_EXEC -D__KERNEL__ -D__NR_CPUS__=$NR_CPUS "\ - "-DLINUX_VERSION_CODE=$LINUX_VERSION_CODE " \ - "$CLANG_OPTIONS $PERF_BPF_INC_OPTIONS $KERNEL_INC_OPTIONS " \ - "-Wno-unused-value -Wno-pointer-sign " \ - "-working-directory $WORKING_DIR " \ - "-c \"$CLANG_SOURCE\" --target=bpf $CLANG_EMIT_LLVM -g -O2 -o - $LLVM_OPTIONS_PIPE" - -struct llvm_param llvm_param = { - .clang_path = "clang", - .llc_path = "llc", - .clang_bpf_cmd_template = CLANG_BPF_CMD_DEFAULT_TEMPLATE, - .clang_opt = NULL, - .opts = NULL, - .kbuild_dir = NULL, - .kbuild_opts = NULL, - .user_set_param = false, -}; - -static void version_notice(void); - -int perf_llvm_config(const char *var, const char *value) -{ - if (!strstarts(var, "llvm.")) - return 0; - var += sizeof("llvm.") - 1; - - if (!strcmp(var, "clang-path")) - llvm_param.clang_path = strdup(value); - else if (!strcmp(var, "clang-bpf-cmd-template")) - llvm_param.clang_bpf_cmd_template = strdup(value); - else if (!strcmp(var, "clang-opt")) - llvm_param.clang_opt = strdup(value); - else if (!strcmp(var, "kbuild-dir")) - llvm_param.kbuild_dir = strdup(value); - else if (!strcmp(var, "kbuild-opts")) - llvm_param.kbuild_opts = strdup(value); - else if (!strcmp(var, "dump-obj")) - llvm_param.dump_obj = !!perf_config_bool(var, value); - else if (!strcmp(var, "opts")) - llvm_param.opts = strdup(value); - else { - pr_debug("Invalid LLVM config option: %s\n", value); - return -1; - } - llvm_param.user_set_param = true; - return 0; -} - -static int -search_program(const char *def, const char *name, - char *output) -{ - char *env, *path, *tmp = NULL; - char buf[PATH_MAX]; - int ret; - - output[0] = '\0'; - if (def && def[0] != '\0') { - if (def[0] == '/') { - if (access(def, F_OK) == 0) { - strlcpy(output, def, PATH_MAX); - return 0; - } - } else if (def[0] != '\0') - name = def; - } - - env = getenv("PATH"); - if (!env) - return -1; - env = strdup(env); - if (!env) - return -1; - - ret = -ENOENT; - path = strtok_r(env, ":", &tmp); - while (path) { - scnprintf(buf, sizeof(buf), "%s/%s", path, name); - if (access(buf, F_OK) == 0) { - strlcpy(output, buf, PATH_MAX); - ret = 0; - break; - } - path = strtok_r(NULL, ":", &tmp); - } - - free(env); - return ret; -} - -static int search_program_and_warn(const char *def, const char *name, - char *output) -{ - int ret = search_program(def, name, output); - - if (ret) { - pr_err("ERROR:\tunable to find %s.\n" - "Hint:\tTry to install latest clang/llvm to support BPF. Check your $PATH\n" - " \tand '%s-path' option in [llvm] section of ~/.perfconfig.\n", - name, name); - version_notice(); - } - return ret; -} - -#define READ_SIZE 4096 -static int -read_from_pipe(const char *cmd, void **p_buf, size_t *p_read_sz) -{ - int err = 0; - void *buf = NULL; - FILE *file = NULL; - size_t read_sz = 0, buf_sz = 0; - char serr[STRERR_BUFSIZE]; - - file = popen(cmd, "r"); - if (!file) { - pr_err("ERROR: unable to popen cmd: %s\n", - str_error_r(errno, serr, sizeof(serr))); - return -EINVAL; - } - - while (!feof(file) && !ferror(file)) { - /* - * Make buf_sz always have obe byte extra space so we - * can put '\0' there. - */ - if (buf_sz - read_sz < READ_SIZE + 1) { - void *new_buf; - - buf_sz = read_sz + READ_SIZE + 1; - new_buf = realloc(buf, buf_sz); - - if (!new_buf) { - pr_err("ERROR: failed to realloc memory\n"); - err = -ENOMEM; - goto errout; - } - - buf = new_buf; - } - read_sz += fread(buf + read_sz, 1, READ_SIZE, file); - } - - if (buf_sz - read_sz < 1) { - pr_err("ERROR: internal error\n"); - err = -EINVAL; - goto errout; - } - - if (ferror(file)) { - pr_err("ERROR: error occurred when reading from pipe: %s\n", - str_error_r(errno, serr, sizeof(serr))); - err = -EIO; - goto errout; - } - - err = WEXITSTATUS(pclose(file)); - file = NULL; - if (err) { - err = -EINVAL; - goto errout; - } - - /* - * If buf is string, give it terminal '\0' to make our life - * easier. If buf is not string, that '\0' is out of space - * indicated by read_sz so caller won't even notice it. - */ - ((char *)buf)[read_sz] = '\0'; - - if (!p_buf) - free(buf); - else - *p_buf = buf; - - if (p_read_sz) - *p_read_sz = read_sz; - return 0; - -errout: - if (file) - pclose(file); - free(buf); - if (p_buf) - *p_buf = NULL; - if (p_read_sz) - *p_read_sz = 0; - return err; -} - -static inline void -force_set_env(const char *var, const char *value) -{ - if (value) { - setenv(var, value, 1); - pr_debug("set env: %s=%s\n", var, value); - } else { - unsetenv(var); - pr_debug("unset env: %s\n", var); - } -} - -static void -version_notice(void) -{ - pr_err( -" \tLLVM 3.7 or newer is required. Which can be found from http://llvm.org\n" -" \tYou may want to try git trunk:\n" -" \t\tgit clone http://llvm.org/git/llvm.git\n" -" \t\t and\n" -" \t\tgit clone http://llvm.org/git/clang.git\n\n" -" \tOr fetch the latest clang/llvm 3.7 from pre-built llvm packages for\n" -" \tdebian/ubuntu:\n" -" \t\thttps://apt.llvm.org/\n\n" -" \tIf you are using old version of clang, change 'clang-bpf-cmd-template'\n" -" \toption in [llvm] section of ~/.perfconfig to:\n\n" -" \t \"$CLANG_EXEC $CLANG_OPTIONS $KERNEL_INC_OPTIONS $PERF_BPF_INC_OPTIONS \\\n" -" \t -working-directory $WORKING_DIR -c $CLANG_SOURCE \\\n" -" \t -emit-llvm -o - | /path/to/llc -march=bpf -filetype=obj -o -\"\n" -" \t(Replace /path/to/llc with path to your llc)\n\n" -); -} - -static int detect_kbuild_dir(char **kbuild_dir) -{ - const char *test_dir = llvm_param.kbuild_dir; - const char *prefix_dir = ""; - const char *suffix_dir = ""; - - /* _UTSNAME_LENGTH is 65 */ - char release[128]; - - char *autoconf_path; - - int err; - - if (!test_dir) { - err = fetch_kernel_version(NULL, release, - sizeof(release)); - if (err) - return -EINVAL; - - test_dir = release; - prefix_dir = "/lib/modules/"; - suffix_dir = "/build"; - } - - err = asprintf(&autoconf_path, "%s%s%s/include/generated/autoconf.h", - prefix_dir, test_dir, suffix_dir); - if (err < 0) - return -ENOMEM; - - if (access(autoconf_path, R_OK) == 0) { - free(autoconf_path); - - err = asprintf(kbuild_dir, "%s%s%s", prefix_dir, test_dir, - suffix_dir); - if (err < 0) - return -ENOMEM; - return 0; - } - pr_debug("%s: Couldn't find \"%s\", missing kernel-devel package?.\n", - __func__, autoconf_path); - free(autoconf_path); - return -ENOENT; -} - -static const char *kinc_fetch_script = -"#!/usr/bin/env sh\n" -"if ! test -d \"$KBUILD_DIR\"\n" -"then\n" -" exit 1\n" -"fi\n" -"if ! test -f \"$KBUILD_DIR/include/generated/autoconf.h\"\n" -"then\n" -" exit 1\n" -"fi\n" -"TMPDIR=`mktemp -d`\n" -"if test -z \"$TMPDIR\"\n" -"then\n" -" exit 1\n" -"fi\n" -"cat << EOF > $TMPDIR/Makefile\n" -"obj-y := dummy.o\n" -"\\$(obj)/%.o: \\$(src)/%.c\n" -"\t@echo -n \"\\$(NOSTDINC_FLAGS) \\$(LINUXINCLUDE) \\$(EXTRA_CFLAGS)\"\n" -"\t\\$(CC) -c -o \\$@ \\$<\n" -"EOF\n" -"touch $TMPDIR/dummy.c\n" -"make -s -C $KBUILD_DIR M=$TMPDIR $KBUILD_OPTS dummy.o 2>/dev/null\n" -"RET=$?\n" -"rm -rf $TMPDIR\n" -"exit $RET\n"; - -void llvm__get_kbuild_opts(char **kbuild_dir, char **kbuild_include_opts) -{ - static char *saved_kbuild_dir; - static char *saved_kbuild_include_opts; - int err; - - if (!kbuild_dir || !kbuild_include_opts) - return; - - *kbuild_dir = NULL; - *kbuild_include_opts = NULL; - - if (saved_kbuild_dir && saved_kbuild_include_opts && - !IS_ERR(saved_kbuild_dir) && !IS_ERR(saved_kbuild_include_opts)) { - *kbuild_dir = strdup(saved_kbuild_dir); - *kbuild_include_opts = strdup(saved_kbuild_include_opts); - - if (*kbuild_dir && *kbuild_include_opts) - return; - - zfree(kbuild_dir); - zfree(kbuild_include_opts); - /* - * Don't fall through: it may breaks saved_kbuild_dir and - * saved_kbuild_include_opts if detect them again when - * memory is low. - */ - return; - } - - if (llvm_param.kbuild_dir && !llvm_param.kbuild_dir[0]) { - pr_debug("[llvm.kbuild-dir] is set to \"\" deliberately.\n"); - pr_debug("Skip kbuild options detection.\n"); - goto errout; - } - - err = detect_kbuild_dir(kbuild_dir); - if (err) { - pr_warning( -"WARNING:\tunable to get correct kernel building directory.\n" -"Hint:\tSet correct kbuild directory using 'kbuild-dir' option in [llvm]\n" -" \tsection of ~/.perfconfig or set it to \"\" to suppress kbuild\n" -" \tdetection.\n\n"); - goto errout; - } - - pr_debug("Kernel build dir is set to %s\n", *kbuild_dir); - force_set_env("KBUILD_DIR", *kbuild_dir); - force_set_env("KBUILD_OPTS", llvm_param.kbuild_opts); - err = read_from_pipe(kinc_fetch_script, - (void **)kbuild_include_opts, - NULL); - if (err) { - pr_warning( -"WARNING:\tunable to get kernel include directories from '%s'\n" -"Hint:\tTry set clang include options using 'clang-bpf-cmd-template'\n" -" \toption in [llvm] section of ~/.perfconfig and set 'kbuild-dir'\n" -" \toption in [llvm] to \"\" to suppress this detection.\n\n", - *kbuild_dir); - - zfree(kbuild_dir); - goto errout; - } - - pr_debug("include option is set to %s\n", *kbuild_include_opts); - - saved_kbuild_dir = strdup(*kbuild_dir); - saved_kbuild_include_opts = strdup(*kbuild_include_opts); - - if (!saved_kbuild_dir || !saved_kbuild_include_opts) { - zfree(&saved_kbuild_dir); - zfree(&saved_kbuild_include_opts); - } - return; -errout: - saved_kbuild_dir = ERR_PTR(-EINVAL); - saved_kbuild_include_opts = ERR_PTR(-EINVAL); -} - -int llvm__get_nr_cpus(void) -{ - static int nr_cpus_avail = 0; - char serr[STRERR_BUFSIZE]; - - if (nr_cpus_avail > 0) - return nr_cpus_avail; - - nr_cpus_avail = sysconf(_SC_NPROCESSORS_CONF); - if (nr_cpus_avail <= 0) { - pr_err( -"WARNING:\tunable to get available CPUs in this system: %s\n" -" \tUse 128 instead.\n", str_error_r(errno, serr, sizeof(serr))); - nr_cpus_avail = 128; - } - return nr_cpus_avail; -} - -void llvm__dump_obj(const char *path, void *obj_buf, size_t size) -{ - char *obj_path = strdup(path); - FILE *fp; - char *p; - - if (!obj_path) { - pr_warning("WARNING: Not enough memory, skip object dumping\n"); - return; - } - - p = strrchr(obj_path, '.'); - if (!p || (strcmp(p, ".c") != 0)) { - pr_warning("WARNING: invalid llvm source path: '%s', skip object dumping\n", - obj_path); - goto out; - } - - p[1] = 'o'; - fp = fopen(obj_path, "wb"); - if (!fp) { - pr_warning("WARNING: failed to open '%s': %s, skip object dumping\n", - obj_path, strerror(errno)); - goto out; - } - - pr_debug("LLVM: dumping %s\n", obj_path); - if (fwrite(obj_buf, size, 1, fp) != 1) - pr_debug("WARNING: failed to write to file '%s': %s, skip object dumping\n", obj_path, strerror(errno)); - fclose(fp); -out: - free(obj_path); -} - -int llvm__compile_bpf(const char *path, void **p_obj_buf, - size_t *p_obj_buf_sz) -{ - size_t obj_buf_sz; - void *obj_buf = NULL; - int err, nr_cpus_avail; - unsigned int kernel_version; - char linux_version_code_str[64]; - const char *clang_opt = llvm_param.clang_opt; - char clang_path[PATH_MAX], llc_path[PATH_MAX], abspath[PATH_MAX], nr_cpus_avail_str[64]; - char serr[STRERR_BUFSIZE]; - char *kbuild_dir = NULL, *kbuild_include_opts = NULL, - *perf_bpf_include_opts = NULL; - const char *template = llvm_param.clang_bpf_cmd_template; - char *pipe_template = NULL; - const char *opts = llvm_param.opts; - char *command_echo = NULL, *command_out; - char *libbpf_include_dir = system_path(LIBBPF_INCLUDE_DIR); - - if (path[0] != '-' && realpath(path, abspath) == NULL) { - err = errno; - pr_err("ERROR: problems with path %s: %s\n", - path, str_error_r(err, serr, sizeof(serr))); - return -err; - } - - if (!template) - template = CLANG_BPF_CMD_DEFAULT_TEMPLATE; - - err = search_program_and_warn(llvm_param.clang_path, - "clang", clang_path); - if (err) - return -ENOENT; - - /* - * This is an optional work. Even it fail we can continue our - * work. Needn't check error return. - */ - llvm__get_kbuild_opts(&kbuild_dir, &kbuild_include_opts); - - nr_cpus_avail = llvm__get_nr_cpus(); - snprintf(nr_cpus_avail_str, sizeof(nr_cpus_avail_str), "%d", - nr_cpus_avail); - - if (fetch_kernel_version(&kernel_version, NULL, 0)) - kernel_version = 0; - - snprintf(linux_version_code_str, sizeof(linux_version_code_str), - "0x%x", kernel_version); - if (asprintf(&perf_bpf_include_opts, "-I%s/", libbpf_include_dir) < 0) - goto errout; - force_set_env("NR_CPUS", nr_cpus_avail_str); - force_set_env("LINUX_VERSION_CODE", linux_version_code_str); - force_set_env("CLANG_EXEC", clang_path); - force_set_env("CLANG_OPTIONS", clang_opt); - force_set_env("KERNEL_INC_OPTIONS", kbuild_include_opts); - force_set_env("PERF_BPF_INC_OPTIONS", perf_bpf_include_opts); - force_set_env("WORKING_DIR", kbuild_dir ? : "."); - - if (opts) { - err = search_program_and_warn(llvm_param.llc_path, "llc", llc_path); - if (err) - goto errout; - - err = -ENOMEM; - if (asprintf(&pipe_template, "%s -emit-llvm | %s -march=bpf %s -filetype=obj -o -", - template, llc_path, opts) < 0) { - pr_err("ERROR:\tnot enough memory to setup command line\n"); - goto errout; - } - - template = pipe_template; - - } - - /* - * Since we may reset clang's working dir, path of source file - * should be transferred into absolute path, except we want - * stdin to be source file (testing). - */ - force_set_env("CLANG_SOURCE", - (path[0] == '-') ? path : abspath); - - pr_debug("llvm compiling command template: %s\n", template); - - /* - * Below, substitute control characters for values that can cause the - * echo to misbehave, then substitute the values back. - */ - err = -ENOMEM; - if (asprintf(&command_echo, "echo -n \a%s\a", template) < 0) - goto errout; - -#define SWAP_CHAR(a, b) do { if (*p == a) *p = b; } while (0) - for (char *p = command_echo; *p; p++) { - SWAP_CHAR('<', '\001'); - SWAP_CHAR('>', '\002'); - SWAP_CHAR('"', '\003'); - SWAP_CHAR('\'', '\004'); - SWAP_CHAR('|', '\005'); - SWAP_CHAR('&', '\006'); - SWAP_CHAR('\a', '"'); - } - err = read_from_pipe(command_echo, (void **) &command_out, NULL); - if (err) - goto errout; - - for (char *p = command_out; *p; p++) { - SWAP_CHAR('\001', '<'); - SWAP_CHAR('\002', '>'); - SWAP_CHAR('\003', '"'); - SWAP_CHAR('\004', '\''); - SWAP_CHAR('\005', '|'); - SWAP_CHAR('\006', '&'); - } -#undef SWAP_CHAR - pr_debug("llvm compiling command : %s\n", command_out); - - err = read_from_pipe(template, &obj_buf, &obj_buf_sz); - if (err) { - pr_err("ERROR:\tunable to compile %s\n", path); - pr_err("Hint:\tCheck error message shown above.\n"); - pr_err("Hint:\tYou can also pre-compile it into .o using:\n"); - pr_err(" \t\tclang --target=bpf -O2 -c %s\n", path); - pr_err(" \twith proper -I and -D options.\n"); - goto errout; - } - - free(command_echo); - free(command_out); - free(kbuild_dir); - free(kbuild_include_opts); - free(perf_bpf_include_opts); - free(libbpf_include_dir); - - if (!p_obj_buf) - free(obj_buf); - else - *p_obj_buf = obj_buf; - - if (p_obj_buf_sz) - *p_obj_buf_sz = obj_buf_sz; - return 0; -errout: - free(command_echo); - free(kbuild_dir); - free(kbuild_include_opts); - free(obj_buf); - free(perf_bpf_include_opts); - free(libbpf_include_dir); - free(pipe_template); - if (p_obj_buf) - *p_obj_buf = NULL; - if (p_obj_buf_sz) - *p_obj_buf_sz = 0; - return err; -} - -int llvm__search_clang(void) -{ - char clang_path[PATH_MAX]; - - return search_program_and_warn(llvm_param.clang_path, "clang", clang_path); -} diff --git a/tools/perf/util/llvm-utils.h b/tools/perf/util/llvm-utils.h deleted file mode 100644 index 7878a0e3fa98..000000000000 --- a/tools/perf/util/llvm-utils.h +++ /dev/null @@ -1,69 +0,0 @@ -/* SPDX-License-Identifier: GPL-2.0 */ -/* - * Copyright (C) 2015, Wang Nan - * Copyright (C) 2015, Huawei Inc. - */ -#ifndef __LLVM_UTILS_H -#define __LLVM_UTILS_H - -#include - -struct llvm_param { - /* Path of clang executable */ - const char *clang_path; - /* Path of llc executable */ - const char *llc_path; - /* - * Template of clang bpf compiling. 5 env variables - * can be used: - * $CLANG_EXEC: Path to clang. - * $CLANG_OPTIONS: Extra options to clang. - * $KERNEL_INC_OPTIONS: Kernel include directories. - * $WORKING_DIR: Kernel source directory. - * $CLANG_SOURCE: Source file to be compiled. - */ - const char *clang_bpf_cmd_template; - /* Will be filled in $CLANG_OPTIONS */ - const char *clang_opt; - /* - * If present it'll add -emit-llvm to $CLANG_OPTIONS to pipe - * the clang output to llc, useful for new llvm options not - * yet selectable via 'clang -mllvm option', such as -mattr=dwarfris - * in clang 6.0/llvm 7 - */ - const char *opts; - /* Where to find kbuild system */ - const char *kbuild_dir; - /* - * Arguments passed to make, like 'ARCH=arm' if doing cross - * compiling. Should not be used for dynamic compiling. - */ - const char *kbuild_opts; - /* - * Default is false. If set to true, write compiling result - * to object file. - */ - bool dump_obj; - /* - * Default is false. If one of the above fields is set by user - * explicitly then user_set_llvm is set to true. This is used - * for perf test. If user doesn't set anything in .perfconfig - * and clang is not found, don't trigger llvm test. - */ - bool user_set_param; -}; - -extern struct llvm_param llvm_param; -int perf_llvm_config(const char *var, const char *value); - -int llvm__compile_bpf(const char *path, void **p_obj_buf, size_t *p_obj_buf_sz); - -/* This function is for test__llvm() use only */ -int llvm__search_clang(void); - -/* Following functions are reused by builtin clang support */ -void llvm__get_kbuild_opts(char **kbuild_dir, char **kbuild_include_opts); -int llvm__get_nr_cpus(void); - -void llvm__dump_obj(const char *path, void *obj_buf, size_t size); -#endif diff --git a/tools/perf/util/parse-events.c b/tools/perf/util/parse-events.c index 0b5075ef00c8..00a8ec94f5b2 100644 --- a/tools/perf/util/parse-events.c +++ b/tools/perf/util/parse-events.c @@ -14,7 +14,6 @@ #include "parse-events.h" #include "string2.h" #include "strlist.h" -#include "bpf-loader.h" #include "debug.h" #include #include @@ -648,272 +647,6 @@ static int add_tracepoint_multi_sys(struct list_head *list, int *idx, } #endif /* HAVE_LIBTRACEEVENT */ -#ifdef HAVE_LIBBPF_SUPPORT -struct __add_bpf_event_param { - struct parse_events_state *parse_state; - struct list_head *list; - struct list_head *head_config; - YYLTYPE *loc; -}; - -static int add_bpf_event(const char *group, const char *event, int fd, struct bpf_object *obj, - void *_param) -{ - LIST_HEAD(new_evsels); - struct __add_bpf_event_param *param = _param; - struct parse_events_state *parse_state = param->parse_state; - struct list_head *list = param->list; - struct evsel *pos; - int err; - /* - * Check if we should add the event, i.e. if it is a TP but starts with a '!', - * then don't add the tracepoint, this will be used for something else, like - * adding to a BPF_MAP_TYPE_PROG_ARRAY. - * - * See tools/perf/examples/bpf/augmented_raw_syscalls.c - */ - if (group[0] == '!') - return 0; - - pr_debug("add bpf event %s:%s and attach bpf program %d\n", - group, event, fd); - - err = parse_events_add_tracepoint(&new_evsels, &parse_state->idx, group, - event, parse_state->error, - param->head_config, param->loc); - if (err) { - struct evsel *evsel, *tmp; - - pr_debug("Failed to add BPF event %s:%s\n", - group, event); - list_for_each_entry_safe(evsel, tmp, &new_evsels, core.node) { - list_del_init(&evsel->core.node); - evsel__delete(evsel); - } - return err; - } - pr_debug("adding %s:%s\n", group, event); - - list_for_each_entry(pos, &new_evsels, core.node) { - pr_debug("adding %s:%s to %p\n", - group, event, pos); - pos->bpf_fd = fd; - pos->bpf_obj = obj; - } - list_splice(&new_evsels, list); - return 0; -} - -int parse_events_load_bpf_obj(struct parse_events_state *parse_state, - struct list_head *list, - struct bpf_object *obj, - struct list_head *head_config, - void *loc) -{ - int err; - char errbuf[BUFSIZ]; - struct __add_bpf_event_param param = {parse_state, list, head_config, loc}; - static bool registered_unprobe_atexit = false; - YYLTYPE test_loc = {.first_column = -1}; - - if (IS_ERR(obj) || !obj) { - snprintf(errbuf, sizeof(errbuf), - "Internal error: load bpf obj with NULL"); - err = -EINVAL; - goto errout; - } - - /* - * Register atexit handler before calling bpf__probe() so - * bpf__probe() don't need to unprobe probe points its already - * created when failure. - */ - if (!registered_unprobe_atexit) { - atexit(bpf__clear); - registered_unprobe_atexit = true; - } - - err = bpf__probe(obj); - if (err) { - bpf__strerror_probe(obj, err, errbuf, sizeof(errbuf)); - goto errout; - } - - err = bpf__load(obj); - if (err) { - bpf__strerror_load(obj, err, errbuf, sizeof(errbuf)); - goto errout; - } - - if (!param.loc) - param.loc = &test_loc; - - err = bpf__foreach_event(obj, add_bpf_event, ¶m); - if (err) { - snprintf(errbuf, sizeof(errbuf), - "Attach events in BPF object failed"); - goto errout; - } - - return 0; -errout: - parse_events_error__handle(parse_state->error, param.loc ? param.loc->first_column : 0, - strdup(errbuf), strdup("(add -v to see detail)")); - return err; -} - -static int -parse_events_config_bpf(struct parse_events_state *parse_state, - struct bpf_object *obj, - struct list_head *head_config) -{ - struct parse_events_term *term; - int error_pos = 0; - - if (!head_config || list_empty(head_config)) - return 0; - - list_for_each_entry(term, head_config, list) { - int err; - - if (term->type_term != PARSE_EVENTS__TERM_TYPE_USER) { - parse_events_error__handle(parse_state->error, term->err_term, - strdup("Invalid config term for BPF object"), - NULL); - return -EINVAL; - } - - err = bpf__config_obj(obj, term, parse_state->evlist, &error_pos); - if (err) { - char errbuf[BUFSIZ]; - int idx; - - bpf__strerror_config_obj(obj, term, parse_state->evlist, - &error_pos, err, errbuf, - sizeof(errbuf)); - - if (err == -BPF_LOADER_ERRNO__OBJCONF_MAP_VALUE) - idx = term->err_val; - else - idx = term->err_term + error_pos; - - parse_events_error__handle(parse_state->error, idx, - strdup(errbuf), - NULL); - return err; - } - } - return 0; -} - -/* - * Split config terms: - * perf record -e bpf.c/call-graph=fp,map:array.value[0]=1/ ... - * 'call-graph=fp' is 'evt config', should be applied to each - * events in bpf.c. - * 'map:array.value[0]=1' is 'obj config', should be processed - * with parse_events_config_bpf. - * - * Move object config terms from the first list to obj_head_config. - */ -static void -split_bpf_config_terms(struct list_head *evt_head_config, - struct list_head *obj_head_config) -{ - struct parse_events_term *term, *temp; - - /* - * Currently, all possible user config term - * belong to bpf object. parse_events__is_hardcoded_term() - * happens to be a good flag. - * - * See parse_events_config_bpf() and - * config_term_tracepoint(). - */ - list_for_each_entry_safe(term, temp, evt_head_config, list) - if (!parse_events__is_hardcoded_term(term)) - list_move_tail(&term->list, obj_head_config); -} - -int parse_events_load_bpf(struct parse_events_state *parse_state, - struct list_head *list, - char *bpf_file_name, - bool source, - struct list_head *head_config, - void *loc_) -{ - int err; - struct bpf_object *obj; - LIST_HEAD(obj_head_config); - YYLTYPE *loc = loc_; - - if (head_config) - split_bpf_config_terms(head_config, &obj_head_config); - - obj = bpf__prepare_load(bpf_file_name, source); - if (IS_ERR(obj)) { - char errbuf[BUFSIZ]; - - err = PTR_ERR(obj); - - if (err == -ENOTSUP) - snprintf(errbuf, sizeof(errbuf), - "BPF support is not compiled"); - else - bpf__strerror_prepare_load(bpf_file_name, - source, - -err, errbuf, - sizeof(errbuf)); - - parse_events_error__handle(parse_state->error, loc->first_column, - strdup(errbuf), strdup("(add -v to see detail)")); - return err; - } - - err = parse_events_load_bpf_obj(parse_state, list, obj, head_config, loc); - if (err) - return err; - err = parse_events_config_bpf(parse_state, obj, &obj_head_config); - - /* - * Caller doesn't know anything about obj_head_config, - * so combine them together again before returning. - */ - if (head_config) - list_splice_tail(&obj_head_config, head_config); - return err; -} -#else // HAVE_LIBBPF_SUPPORT -int parse_events_load_bpf_obj(struct parse_events_state *parse_state, - struct list_head *list __maybe_unused, - struct bpf_object *obj __maybe_unused, - struct list_head *head_config __maybe_unused, - void *loc_) -{ - YYLTYPE *loc = loc_; - - parse_events_error__handle(parse_state->error, loc->first_column, - strdup("BPF support is not compiled"), - strdup("Make sure libbpf-devel is available at build time.")); - return -ENOTSUP; -} - -int parse_events_load_bpf(struct parse_events_state *parse_state, - struct list_head *list __maybe_unused, - char *bpf_file_name __maybe_unused, - bool source __maybe_unused, - struct list_head *head_config __maybe_unused, - void *loc_) -{ - YYLTYPE *loc = loc_; - - parse_events_error__handle(parse_state->error, loc->first_column, - strdup("BPF support is not compiled"), - strdup("Make sure libbpf-devel is available at build time.")); - return -ENOTSUP; -} -#endif // HAVE_LIBBPF_SUPPORT - static int parse_breakpoint_type(const char *type, struct perf_event_attr *attr) { @@ -2274,7 +2007,6 @@ int __parse_events(struct evlist *evlist, const char *str, const char *pmu_filte .list = LIST_HEAD_INIT(parse_state.list), .idx = evlist->core.nr_entries, .error = err, - .evlist = evlist, .stoken = PE_START_EVENTS, .fake_pmu = fake_pmu, .pmu_filter = pmu_filter, diff --git a/tools/perf/util/parse-events.h b/tools/perf/util/parse-events.h index b77ff619a623..411f69b2ac3a 100644 --- a/tools/perf/util/parse-events.h +++ b/tools/perf/util/parse-events.h @@ -118,8 +118,6 @@ struct parse_events_state { int idx; /* Error information. */ struct parse_events_error *error; - /* Used by BPF event creation. */ - struct evlist *evlist; /* Holds returned terms for term parsing. */ struct list_head *terms; /* Start token. */ @@ -160,19 +158,6 @@ int parse_events_add_tracepoint(struct list_head *list, int *idx, const char *sys, const char *event, struct parse_events_error *error, struct list_head *head_config, void *loc); -int parse_events_load_bpf(struct parse_events_state *parse_state, - struct list_head *list, - char *bpf_file_name, - bool source, - struct list_head *head_config, - void *loc); -/* Provide this function for perf test */ -struct bpf_object; -int parse_events_load_bpf_obj(struct parse_events_state *parse_state, - struct list_head *list, - struct bpf_object *obj, - struct list_head *head_config, - void *loc); int parse_events_add_numeric(struct parse_events_state *parse_state, struct list_head *list, u32 type, u64 config, diff --git a/tools/perf/util/parse-events.l b/tools/perf/util/parse-events.l index d7d084cc4140..1147084b2c76 100644 --- a/tools/perf/util/parse-events.l +++ b/tools/perf/util/parse-events.l @@ -68,31 +68,6 @@ static int lc_str(yyscan_t scanner, const struct parse_events_state *state) return str(scanner, state->match_legacy_cache_terms ? PE_LEGACY_CACHE : PE_NAME); } -static bool isbpf_suffix(char *text) -{ - int len = strlen(text); - - if (len < 2) - return false; - if ((text[len - 1] == 'c' || text[len - 1] == 'o') && - text[len - 2] == '.') - return true; - if (len > 4 && !strcmp(text + len - 4, ".obj")) - return true; - return false; -} - -static bool isbpf(yyscan_t scanner) -{ - char *text = parse_events_get_text(scanner); - struct stat st; - - if (!isbpf_suffix(text)) - return false; - - return stat(text, &st) == 0; -} - /* * This function is called when the parser gets two kind of input: * @@ -179,8 +154,6 @@ do { \ group [^,{}/]*[{][^}]*[}][^,{}/]* event_pmu [^,{}/]+[/][^/]*[/][^,{}/]* event [^,{}/]+ -bpf_object [^,{}]+\.(o|bpf)[a-zA-Z0-9._]* -bpf_source [^,{}]+\.c[a-zA-Z0-9._]* num_dec [0-9]+ num_hex 0x[a-fA-F0-9]+ @@ -233,8 +206,6 @@ non_digit [^0-9] } {event_pmu} | -{bpf_object} | -{bpf_source} | {event} { BEGIN(INITIAL); REWIND(1); @@ -363,8 +334,6 @@ r{num_raw_hex} { return str(yyscanner, PE_RAW); } {num_hex} { return value(yyscanner, 16); } {modifier_event} { return str(yyscanner, PE_MODIFIER_EVENT); } -{bpf_object} { if (!isbpf(yyscanner)) { USER_REJECT }; return str(yyscanner, PE_BPF_OBJECT); } -{bpf_source} { if (!isbpf(yyscanner)) { USER_REJECT }; return str(yyscanner, PE_BPF_SOURCE); } {name} { return str(yyscanner, PE_NAME); } {name_tag} { return str(yyscanner, PE_NAME); } "/" { BEGIN(config); return '/'; } diff --git a/tools/perf/util/parse-events.y b/tools/perf/util/parse-events.y index c3517e3498d7..00da1f8c0baf 100644 --- a/tools/perf/util/parse-events.y +++ b/tools/perf/util/parse-events.y @@ -60,7 +60,6 @@ static void free_list_evsel(struct list_head* list_evsel) %token PE_VALUE_SYM_TOOL %token PE_EVENT_NAME %token PE_RAW PE_NAME -%token PE_BPF_OBJECT PE_BPF_SOURCE %token PE_MODIFIER_EVENT PE_MODIFIER_BP PE_BP_COLON PE_BP_SLASH %token PE_LEGACY_CACHE %token PE_PREFIX_MEM @@ -75,8 +74,6 @@ static void free_list_evsel(struct list_head* list_evsel) %type value_sym %type PE_RAW %type PE_NAME -%type PE_BPF_OBJECT -%type PE_BPF_SOURCE %type PE_LEGACY_CACHE %type PE_MODIFIER_EVENT %type PE_MODIFIER_BP @@ -97,7 +94,6 @@ static void free_list_evsel(struct list_head* list_evsel) %type event_legacy_tracepoint %type event_legacy_numeric %type event_legacy_raw -%type event_bpf_file %type event_def %type event_mod %type event_name @@ -271,8 +267,7 @@ event_def: event_pmu | event_legacy_mem sep_dc | event_legacy_tracepoint sep_dc | event_legacy_numeric sep_dc | - event_legacy_raw sep_dc | - event_bpf_file + event_legacy_raw sep_dc event_pmu: PE_NAME opt_pmu_config @@ -620,43 +615,6 @@ PE_RAW opt_event_config $$ = list; } -event_bpf_file: -PE_BPF_OBJECT opt_event_config -{ - struct parse_events_state *parse_state = _parse_state; - struct list_head *list; - int err; - - list = alloc_list(); - if (!list) - YYNOMEM; - err = parse_events_load_bpf(parse_state, list, $1, false, $2, &@1); - parse_events_terms__delete($2); - free($1); - if (err) { - free(list); - PE_ABORT(err); - } - $$ = list; -} -| -PE_BPF_SOURCE opt_event_config -{ - struct list_head *list; - int err; - - list = alloc_list(); - if (!list) - YYNOMEM; - err = parse_events_load_bpf(_parse_state, list, $1, true, $2, &@1); - parse_events_terms__delete($2); - if (err) { - free(list); - PE_ABORT(err); - } - $$ = list; -} - opt_event_config: '/' event_config '/' { -- cgit v1.2.3-70-g09d2 From 5e6da6be3082f77be06894a1a94d52a90b4007dc Mon Sep 17 00:00:00 2001 From: Ian Rogers Date: Thu, 10 Aug 2023 11:48:51 -0700 Subject: perf trace: Migrate BPF augmentation to use a skeleton Previously a BPF event of augmented_raw_syscalls.c could be used to enable augmentation of syscalls by perf trace. As BPF events are no longer supported, switch to using a BPF skeleton which when attached explicitly opens the sysenter and sysexit tracepoints. The dump map is removed as debugging wasn't supported by the augmentation and bpf_printk can be used when necessary. Remove tools/perf/examples/bpf/augmented_raw_syscalls.c so that the rename/migration to a BPF skeleton captures that this was the source. Committer notes: Some minor stylistic changes to help visualizing the diff. Use libbpf_strerror when failing to load the augmented raw syscalls BPF. Use bpf_object__for_each_program(prog, trace.skel->obj) to disable auto attachment for all but the sys_enter, sys_exit tracepoints, to avoid having to add extra lines as we go adding support for more pointer receiving syscalls. Committer testing: # perf trace -e open* --max-events=10 0.000 ( 0.022 ms): systemd-oomd/1151 openat(dfd: CWD, filename: "/proc/meminfo", flags: RDONLY|CLOEXEC) = 11 208.833 ( ): gnome-terminal/3223 openat(dfd: CWD, filename: "/proc/51250/cmdline") ... 249.993 ( 0.024 ms): systemd-oomd/1151 openat(dfd: CWD, filename: "/proc/meminfo", flags: RDONLY|CLOEXEC) = 11 250.118 ( 0.030 ms): systemd-oomd/1151 openat(dfd: CWD, filename: "/sys/fs/cgroup/user.slice/user-1000.slice/user@1000.service/memory.pressure", flags: RDONLY|CLOEXEC) = 11 250.205 ( 0.016 ms): systemd-oomd/1151 openat(dfd: CWD, filename: "/sys/fs/cgroup/user.slice/user-1000.slice/user@1000.service/memory.current", flags: RDONLY|CLOEXEC) = 11 250.244 ( 0.014 ms): systemd-oomd/1151 openat(dfd: CWD, filename: "/sys/fs/cgroup/user.slice/user-1000.slice/user@1000.service/memory.min", flags: RDONLY|CLOEXEC) = 11 250.282 ( 0.014 ms): systemd-oomd/1151 openat(dfd: CWD, filename: "/sys/fs/cgroup/user.slice/user-1000.slice/user@1000.service/memory.low", flags: RDONLY|CLOEXEC) = 11 250.320 ( 0.014 ms): systemd-oomd/1151 openat(dfd: CWD, filename: "/sys/fs/cgroup/user.slice/user-1000.slice/user@1000.service/memory.swap.current", flags: RDONLY|CLOEXEC) = 11 250.355 ( 0.014 ms): systemd-oomd/1151 openat(dfd: CWD, filename: "/sys/fs/cgroup/user.slice/user-1000.slice/user@1000.service/memory.stat", flags: RDONLY|CLOEXEC) = 11 250.717 ( 0.016 ms): systemd-oomd/1151 openat(dfd: CWD, filename: "/sys/fs/cgroup/user.slice/user-1001.slice/user@1001.service/memory.pressure", flags: RDONLY|CLOEXEC) = 11 # # perf trace -e *nanosleep* --max-events=10 ? ( ): SCTP timer/28304 ... [continued]: clock_nanosleep()) = 0 0.007 (10.058 ms): SCTP timer/28304 clock_nanosleep(rqtp: { .tv_sec: 0, .tv_nsec: 10000000 }, rmtp: 0x7f0466b78de0) = 0 10.069 ( ): SCTP timer/28304 clock_nanosleep(rqtp: { .tv_sec: 0, .tv_nsec: 10000000 }, rmtp: 0x7f0466b78de0) ... 10.069 (10.056 ms): SCTP timer/28304 ... [continued]: clock_nanosleep()) = 0 17.059 ( ): podman/3572 nanosleep(rqtp: 0x7fc4f4d75be0) ... 17.059 (10.061 ms): podman/3572 ... [continued]: nanosleep()) = 0 20.131 (10.059 ms): SCTP timer/28304 clock_nanosleep(rqtp: { .tv_sec: 0, .tv_nsec: 10000000 }, rmtp: 0x7f0466b78de0) = 0 30.195 (10.038 ms): SCTP timer/28304 clock_nanosleep(rqtp: { .tv_sec: 0, .tv_nsec: 10000000 }, rmtp: 0x7f0466b78de0) = 0 40.238 (10.057 ms): SCTP timer/28304 clock_nanosleep(rqtp: { .tv_sec: 0, .tv_nsec: 10000000 }, rmtp: 0x7f0466b78de0) = 0 50.301 ( ): SCTP timer/28304 clock_nanosleep(rqtp: { .tv_sec: 0, .tv_nsec: 10000000 }, rmtp: 0x7f0466b78de0) ... # # perf trace -e perf_event* -- perf stat -e instructions,cycles,cache-misses sleep 0.1 0.000 ( 0.011 ms): perf/51331 perf_event_open(attr_uptr: { type: 0 (PERF_TYPE_HARDWARE), size: 136, config: 0x1 (PERF_COUNT_HW_INSTRUCTIONS), sample_type: IDENTIFIER, read_format: TOTAL_TIME_ENABLED|TOTAL_TIME_RUNNING, disabled: 1, inherit: 1, enable_on_exec: 1, exclude_guest: 1 }, pid: 51332 (perf), cpu: -1, group_fd: -1, flags: FD_CLOEXEC) = 3 0.013 ( 0.003 ms): perf/51331 perf_event_open(attr_uptr: { type: 0 (PERF_TYPE_HARDWARE), size: 136, config: 0 (PERF_COUNT_HW_CPU_CYCLES), sample_type: IDENTIFIER, read_format: TOTAL_TIME_ENABLED|TOTAL_TIME_RUNNING, disabled: 1, inherit: 1, enable_on_exec: 1, exclude_guest: 1 }, pid: 51332 (perf), cpu: -1, group_fd: -1, flags: FD_CLOEXEC) = 4 0.017 ( 0.002 ms): perf/51331 perf_event_open(attr_uptr: { type: 0 (PERF_TYPE_HARDWARE), size: 136, config: 0x3 (PERF_COUNT_HW_CACHE_MISSES), sample_type: IDENTIFIER, read_format: TOTAL_TIME_ENABLED|TOTAL_TIME_RUNNING, disabled: 1, inherit: 1, enable_on_exec: 1, exclude_guest: 1 }, pid: 51332 (perf), cpu: -1, group_fd: -1, flags: FD_CLOEXEC) = 5 Performance counter stats for 'sleep 0.1': 1,495,051 instructions # 1.11 insn per cycle 1,347,641 cycles 35,424 cache-misses 0.100935279 seconds time elapsed 0.000924000 seconds user 0.000000000 seconds sys # # perf trace -e connect* ssh localhost 0.000 ( 0.012 ms): ssh/51346 connect(fd: 4, uservaddr: { .family: LOCAL, path: /var/lib/sss/pipes/nss }, addrlen: 110) = -1 ECONNREFUSED (Connection refused) 0.118 ( 0.004 ms): ssh/51346 connect(fd: 6, uservaddr: { .family: LOCAL, path: /var/lib/sss/pipes/nss }, addrlen: 110) = -1 ECONNREFUSED (Connection refused) 0.399 ( 0.007 ms): ssh/51346 connect(fd: 4, uservaddr: { .family: LOCAL, path: /var/lib/sss/pipes/nss }, addrlen: 110) = -1 ECONNREFUSED (Connection refused) 0.426 ( 0.003 ms): ssh/51346 connect(fd: 4, uservaddr: { .family: LOCAL, path: /var/lib/sss/pipes/nss }, addrlen: 110) = -1 ECONNREFUSED (Connection refused) 0.754 ( 0.009 ms): ssh/51346 connect(fd: 4, uservaddr: { .family: INET, port: 22, addr: 127.0.0.1 }, addrlen: 16) = 0 0.771 ( 0.010 ms): ssh/51346 connect(fd: 4, uservaddr: { .family: INET6, port: 22, addr: ::1 }, addrlen: 28) = 0 0.798 ( 0.053 ms): ssh/51346 connect(fd: 4, uservaddr: { .family: INET6, port: 22, addr: ::1 }, addrlen: 28) = 0 0.870 ( 0.004 ms): ssh/51346 connect(fd: 5, uservaddr: { .family: LOCAL, path: /var/lib/sss/pipes/nss }, addrlen: 110) = -1 ECONNREFUSED (Connection refused) 0.904 ( 0.003 ms): ssh/51346 connect(fd: 5, uservaddr: { .family: LOCAL, path: /var/lib/sss/pipes/nss }, addrlen: 110) = -1 ECONNREFUSED (Connection refused) 0.930 ( 0.003 ms): ssh/51346 connect(fd: 5, uservaddr: { .family: LOCAL, path: /var/lib/sss/pipes/nss }, addrlen: 110) = -1 ECONNREFUSED (Connection refused) 0.957 ( 0.003 ms): ssh/51346 connect(fd: 5, uservaddr: { .family: LOCAL, path: /var/lib/sss/pipes/nss }, addrlen: 110) = -1 ECONNREFUSED (Connection refused) 0.981 ( 0.003 ms): ssh/51346 connect(fd: 5, uservaddr: { .family: LOCAL, path: /var/lib/sss/pipes/nss }, addrlen: 110) = -1 ECONNREFUSED (Connection refused) 1.006 ( 0.004 ms): ssh/51346 connect(fd: 5, uservaddr: { .family: LOCAL, path: /var/lib/sss/pipes/nss }, addrlen: 110) = -1 ECONNREFUSED (Connection refused) 1.036 ( 0.005 ms): ssh/51346 connect(fd: 5, uservaddr: { .family: LOCAL, path: /var/lib/sss/pipes/nss }, addrlen: 110) = -1 ECONNREFUSED (Connection refused) 65.077 ( 0.022 ms): ssh/51346 connect(fd: 5, uservaddr: { .family: LOCAL, path: /var/run/.heim_org.h5l.kcm-socket }, addrlen: 110) = 0 66.608 ( 0.014 ms): ssh/51346 connect(fd: 5, uservaddr: { .family: LOCAL, path: /var/run/.heim_org.h5l.kcm-socket }, addrlen: 110) = 0 root@localhost's password: # # perf trace -e sendto* ping -c 2 localhost PING localhost(localhost (::1)) 56 data bytes 64 bytes from localhost (::1): icmp_seq=1 ttl=64 time=0.024 ms 0.000 ( 0.011 ms): ping/51357 sendto(fd: 5, buff: 0x7ffcca35e620, len: 20, addr: { .family: NETLINK }, addr_len: 0xc) = 20 0.135 ( 0.026 ms): ping/51357 sendto(fd: 4, buff: 0x5601398f7b20, len: 64, addr: { .family: INET6, port: 58, addr: ::1 }, addr_len: 0x1c) = 64 1014.929 ( 0.050 ms): ping/51357 sendto(fd: 4, buff: 0x5601398f7b20, len: 64, flags: CONFIRM, addr: { .family: INET6, port: 58, addr: ::1 }, addr_len: 0x1c) = 64 64 bytes from localhost (::1): icmp_seq=2 ttl=64 time=0.046 ms --- localhost ping statistics --- 2 packets transmitted, 2 received, 0% packet loss, time 1015ms rtt min/avg/max/mdev = 0.024/0.035/0.046/0.011 ms # Signed-off-by: Ian Rogers Acked-by: Jiri Olsa Tested-by: Arnaldo Carvalho de Melo Cc: Adrian Hunter Cc: Alexander Shishkin Cc: Andi Kleen Cc: Andrii Nakryiko Cc: Anshuman Khandual Cc: Athira Rajeev Cc: Brendan Gregg Cc: Carsten Haitzler Cc: Eduard Zingerman Cc: Fangrui Song Cc: He Kuang Cc: Ingo Molnar Cc: James Clark Cc: Kan Liang Cc: Leo Yan Cc: Madhavan Srinivasan Cc: Mark Rutland Cc: Namhyung Kim Cc: Nathan Chancellor Cc: Naveen N. Rao Cc: Nick Desaulniers Cc: Peter Zijlstra Cc: Ravi Bangoria Cc: Rob Herring Cc: Tiezhu Yang Cc: Tom Rix Cc: Wang Nan Cc: Wang ShaoBo Cc: Yang Jihong Cc: Yonghong Song Cc: YueHaibing Cc: bpf@vger.kernel.org Cc: llvm@lists.linux.dev Link: https://lore.kernel.org/r/20230810184853.2860737-3-irogers@google.com Signed-off-by: Arnaldo Carvalho de Melo --- tools/perf/Makefile.perf | 1 + tools/perf/builtin-trace.c | 159 ++++---- tools/perf/examples/bpf/augmented_raw_syscalls.c | 417 -------------------- .../util/bpf_skel/augmented_raw_syscalls.bpf.c | 418 +++++++++++++++++++++ 4 files changed, 514 insertions(+), 481 deletions(-) delete mode 100644 tools/perf/examples/bpf/augmented_raw_syscalls.c create mode 100644 tools/perf/util/bpf_skel/augmented_raw_syscalls.bpf.c diff --git a/tools/perf/Makefile.perf b/tools/perf/Makefile.perf index 5370d7bf123e..40663c69b25c 100644 --- a/tools/perf/Makefile.perf +++ b/tools/perf/Makefile.perf @@ -1038,6 +1038,7 @@ SKELETONS += $(SKEL_OUT)/bperf_cgroup.skel.h $(SKEL_OUT)/func_latency.skel.h SKELETONS += $(SKEL_OUT)/off_cpu.skel.h $(SKEL_OUT)/lock_contention.skel.h SKELETONS += $(SKEL_OUT)/kwork_trace.skel.h $(SKEL_OUT)/sample_filter.skel.h SKELETONS += $(SKEL_OUT)/bench_uprobe.skel.h +SKELETONS += $(SKEL_OUT)/augmented_raw_syscalls.skel.h $(SKEL_TMP_OUT) $(LIBAPI_OUTPUT) $(LIBBPF_OUTPUT) $(LIBPERF_OUTPUT) $(LIBSUBCMD_OUTPUT) $(LIBSYMBOL_OUTPUT): $(Q)$(MKDIR) -p $@ diff --git a/tools/perf/builtin-trace.c b/tools/perf/builtin-trace.c index 59862467e781..0ebfa95895e0 100644 --- a/tools/perf/builtin-trace.c +++ b/tools/perf/builtin-trace.c @@ -19,6 +19,9 @@ #ifdef HAVE_LIBBPF_SUPPORT #include #include +#ifdef HAVE_BPF_SKEL +#include "bpf_skel/augmented_raw_syscalls.skel.h" +#endif #endif #include "util/bpf_map.h" #include "util/rlimit.h" @@ -127,25 +130,19 @@ struct trace { struct syscalltbl *sctbl; struct { struct syscall *table; - struct { // per syscall BPF_MAP_TYPE_PROG_ARRAY - struct bpf_map *sys_enter, - *sys_exit; - } prog_array; struct { struct evsel *sys_enter, - *sys_exit, - *augmented; + *sys_exit, + *bpf_output; } events; - struct bpf_program *unaugmented_prog; } syscalls; - struct { - struct bpf_map *map; - } dump; +#ifdef HAVE_BPF_SKEL + struct augmented_raw_syscalls_bpf *skel; +#endif struct record_opts opts; struct evlist *evlist; struct machine *host; struct thread *current; - struct bpf_object *bpf_obj; struct cgroup *cgroup; u64 base_time; FILE *output; @@ -415,6 +412,7 @@ static int evsel__init_syscall_tp(struct evsel *evsel) if (evsel__init_tp_uint_field(evsel, &sc->id, "__syscall_nr") && evsel__init_tp_uint_field(evsel, &sc->id, "nr")) return -ENOENT; + return 0; } @@ -2845,7 +2843,7 @@ static int trace__event_handler(struct trace *trace, struct evsel *evsel, if (thread) trace__fprintf_comm_tid(trace, thread, trace->output); - if (evsel == trace->syscalls.events.augmented) { + if (evsel == trace->syscalls.events.bpf_output) { int id = perf_evsel__sc_tp_uint(evsel, id, sample); struct syscall *sc = trace__syscall_info(trace, evsel, id); @@ -3278,24 +3276,16 @@ out_enomem: goto out; } -#ifdef HAVE_LIBBPF_SUPPORT -static struct bpf_map *trace__find_bpf_map_by_name(struct trace *trace, const char *name) -{ - if (trace->bpf_obj == NULL) - return NULL; - - return bpf_object__find_map_by_name(trace->bpf_obj, name); -} - +#ifdef HAVE_BPF_SKEL static struct bpf_program *trace__find_bpf_program_by_title(struct trace *trace, const char *name) { struct bpf_program *pos, *prog = NULL; const char *sec_name; - if (trace->bpf_obj == NULL) + if (trace->skel->obj == NULL) return NULL; - bpf_object__for_each_program(pos, trace->bpf_obj) { + bpf_object__for_each_program(pos, trace->skel->obj) { sec_name = bpf_program__section_name(pos); if (sec_name && !strcmp(sec_name, name)) { prog = pos; @@ -3313,12 +3303,12 @@ static struct bpf_program *trace__find_syscall_bpf_prog(struct trace *trace, str if (prog_name == NULL) { char default_prog_name[256]; - scnprintf(default_prog_name, sizeof(default_prog_name), "!syscalls:sys_%s_%s", type, sc->name); + scnprintf(default_prog_name, sizeof(default_prog_name), "tp/syscalls/sys_%s_%s", type, sc->name); prog = trace__find_bpf_program_by_title(trace, default_prog_name); if (prog != NULL) goto out_found; if (sc->fmt && sc->fmt->alias) { - scnprintf(default_prog_name, sizeof(default_prog_name), "!syscalls:sys_%s_%s", type, sc->fmt->alias); + scnprintf(default_prog_name, sizeof(default_prog_name), "tp/syscalls/sys_%s_%s", type, sc->fmt->alias); prog = trace__find_bpf_program_by_title(trace, default_prog_name); if (prog != NULL) goto out_found; @@ -3336,7 +3326,7 @@ out_found: pr_debug("Couldn't find BPF prog \"%s\" to associate with syscalls:sys_%s_%s, not augmenting it\n", prog_name, type, sc->name); out_unaugmented: - return trace->syscalls.unaugmented_prog; + return trace->skel->progs.syscall_unaugmented; } static void trace__init_syscall_bpf_progs(struct trace *trace, int id) @@ -3353,13 +3343,13 @@ static void trace__init_syscall_bpf_progs(struct trace *trace, int id) static int trace__bpf_prog_sys_enter_fd(struct trace *trace, int id) { struct syscall *sc = trace__syscall_info(trace, NULL, id); - return sc ? bpf_program__fd(sc->bpf_prog.sys_enter) : bpf_program__fd(trace->syscalls.unaugmented_prog); + return sc ? bpf_program__fd(sc->bpf_prog.sys_enter) : bpf_program__fd(trace->skel->progs.syscall_unaugmented); } static int trace__bpf_prog_sys_exit_fd(struct trace *trace, int id) { struct syscall *sc = trace__syscall_info(trace, NULL, id); - return sc ? bpf_program__fd(sc->bpf_prog.sys_exit) : bpf_program__fd(trace->syscalls.unaugmented_prog); + return sc ? bpf_program__fd(sc->bpf_prog.sys_exit) : bpf_program__fd(trace->skel->progs.syscall_unaugmented); } static struct bpf_program *trace__find_usable_bpf_prog_entry(struct trace *trace, struct syscall *sc) @@ -3384,7 +3374,7 @@ try_to_find_pair: bool is_candidate = false; if (pair == NULL || pair == sc || - pair->bpf_prog.sys_enter == trace->syscalls.unaugmented_prog) + pair->bpf_prog.sys_enter == trace->skel->progs.syscall_unaugmented) continue; for (field = sc->args, candidate_field = pair->args; @@ -3437,7 +3427,7 @@ try_to_find_pair: */ if (pair_prog == NULL) { pair_prog = trace__find_syscall_bpf_prog(trace, pair, pair->fmt ? pair->fmt->bpf_prog_name.sys_enter : NULL, "enter"); - if (pair_prog == trace->syscalls.unaugmented_prog) + if (pair_prog == trace->skel->progs.syscall_unaugmented) goto next_candidate; } @@ -3452,8 +3442,8 @@ try_to_find_pair: static int trace__init_syscalls_bpf_prog_array_maps(struct trace *trace) { - int map_enter_fd = bpf_map__fd(trace->syscalls.prog_array.sys_enter), - map_exit_fd = bpf_map__fd(trace->syscalls.prog_array.sys_exit); + int map_enter_fd = bpf_map__fd(trace->skel->maps.syscalls_sys_enter); + int map_exit_fd = bpf_map__fd(trace->skel->maps.syscalls_sys_exit); int err = 0, key; for (key = 0; key < trace->sctbl->syscalls.nr_entries; ++key) { @@ -3515,7 +3505,7 @@ static int trace__init_syscalls_bpf_prog_array_maps(struct trace *trace) * For now we're just reusing the sys_enter prog, and if it * already has an augmenter, we don't need to find one. */ - if (sc->bpf_prog.sys_enter != trace->syscalls.unaugmented_prog) + if (sc->bpf_prog.sys_enter != trace->skel->progs.syscall_unaugmented) continue; /* @@ -3538,22 +3528,9 @@ static int trace__init_syscalls_bpf_prog_array_maps(struct trace *trace) break; } - return err; } - -#else // HAVE_LIBBPF_SUPPORT -static struct bpf_map *trace__find_bpf_map_by_name(struct trace *trace __maybe_unused, - const char *name __maybe_unused) -{ - return NULL; -} - -static int trace__init_syscalls_bpf_prog_array_maps(struct trace *trace __maybe_unused) -{ - return 0; -} -#endif // HAVE_LIBBPF_SUPPORT +#endif // HAVE_BPF_SKEL static int trace__set_ev_qualifier_filter(struct trace *trace) { @@ -3917,13 +3894,31 @@ static int trace__run(struct trace *trace, int argc, const char **argv) err = evlist__open(evlist); if (err < 0) goto out_error_open; +#ifdef HAVE_BPF_SKEL + { + struct perf_cpu cpu; + /* + * Set up the __augmented_syscalls__ BPF map to hold for each + * CPU the bpf-output event's file descriptor. + */ + perf_cpu_map__for_each_cpu(cpu, i, trace->syscalls.events.bpf_output->core.cpus) { + bpf_map__update_elem(trace->skel->maps.__augmented_syscalls__, + &cpu.cpu, sizeof(int), + xyarray__entry(trace->syscalls.events.bpf_output->core.fd, + cpu.cpu, 0), + sizeof(__u32), BPF_ANY); + } + } +#endif err = trace__set_filter_pids(trace); if (err < 0) goto out_error_mem; - if (trace->syscalls.prog_array.sys_enter) +#ifdef HAVE_BPF_SKEL + if (trace->skel->progs.sys_enter) trace__init_syscalls_bpf_prog_array_maps(trace); +#endif if (trace->ev_qualifier_ids.nr > 0) { err = trace__set_ev_qualifier_filter(trace); @@ -3956,9 +3951,6 @@ static int trace__run(struct trace *trace, int argc, const char **argv) if (err < 0) goto out_error_apply_filters; - if (trace->dump.map) - bpf_map__fprintf(trace->dump.map, trace->output); - err = evlist__mmap(evlist, trace->opts.mmap_pages); if (err < 0) goto out_error_mmap; @@ -4655,6 +4647,18 @@ static void trace__exit(struct trace *trace) zfree(&trace->perfconfig_events); } +#ifdef HAVE_BPF_SKEL +static int bpf__setup_bpf_output(struct evlist *evlist) +{ + int err = parse_event(evlist, "bpf-output/no-inherit=1,name=__augmented_syscalls__/"); + + if (err) + pr_debug("ERROR: failed to create the \"__augmented_syscalls__\" bpf-output event\n"); + + return err; +} +#endif + int cmd_trace(int argc, const char **argv) { const char *trace_usage[] = { @@ -4686,7 +4690,6 @@ int cmd_trace(int argc, const char **argv) .max_stack = UINT_MAX, .max_events = ULONG_MAX, }; - const char *map_dump_str = NULL; const char *output_name = NULL; const struct option trace_options[] = { OPT_CALLBACK('e', "event", &trace, "event", @@ -4720,9 +4723,6 @@ int cmd_trace(int argc, const char **argv) OPT_CALLBACK(0, "duration", &trace, "float", "show only events with duration > N.M ms", trace__set_duration), -#ifdef HAVE_LIBBPF_SUPPORT - OPT_STRING(0, "map-dump", &map_dump_str, "BPF map", "BPF map to periodically dump"), -#endif OPT_BOOLEAN(0, "sched", &trace.sched, "show blocking scheduler events"), OPT_INCR('v', "verbose", &verbose, "be more verbose"), OPT_BOOLEAN('T', "time", &trace.full_time, @@ -4849,16 +4849,44 @@ int cmd_trace(int argc, const char **argv) "cgroup monitoring only available in system-wide mode"); } - err = -1; +#ifdef HAVE_BPF_SKEL + trace.skel = augmented_raw_syscalls_bpf__open(); + if (!trace.skel) { + pr_debug("Failed to open augmented syscalls BPF skeleton"); + } else { + /* + * Disable attaching the BPF programs except for sys_enter and + * sys_exit that tail call into this as necessary. + */ + struct bpf_program *prog; - if (map_dump_str) { - trace.dump.map = trace__find_bpf_map_by_name(&trace, map_dump_str); - if (trace.dump.map == NULL) { - pr_err("ERROR: BPF map \"%s\" not found\n", map_dump_str); - goto out; + bpf_object__for_each_program(prog, trace.skel->obj) { + if (prog != trace.skel->progs.sys_enter && prog != trace.skel->progs.sys_exit) + bpf_program__set_autoattach(prog, /*autoattach=*/false); + } + + err = augmented_raw_syscalls_bpf__load(trace.skel); + + if (err < 0) { + libbpf_strerror(err, bf, sizeof(bf)); + pr_debug("Failed to load augmented syscalls BPF skeleton: %s\n", bf); + } else { + augmented_raw_syscalls_bpf__attach(trace.skel); + trace__add_syscall_newtp(&trace); } } + err = bpf__setup_bpf_output(trace.evlist); + if (err) { + libbpf_strerror(err, bf, sizeof(bf)); + pr_err("ERROR: Setup BPF output event failed: %s\n", bf); + goto out; + } + trace.syscalls.events.bpf_output = evlist__last(trace.evlist); + assert(!strcmp(evsel__name(trace.syscalls.events.bpf_output), "__augmented_syscalls__")); +#endif + err = -1; + if (trace.trace_pgfaults) { trace.opts.sample_address = true; trace.opts.sample_time = true; @@ -4909,7 +4937,7 @@ int cmd_trace(int argc, const char **argv) * buffers that are being copied from kernel to userspace, think 'read' * syscall. */ - if (trace.syscalls.events.augmented) { + if (trace.syscalls.events.bpf_output) { evlist__for_each_entry(trace.evlist, evsel) { bool raw_syscalls_sys_exit = strcmp(evsel__name(evsel), "raw_syscalls:sys_exit") == 0; @@ -4918,9 +4946,9 @@ int cmd_trace(int argc, const char **argv) goto init_augmented_syscall_tp; } - if (trace.syscalls.events.augmented->priv == NULL && + if (trace.syscalls.events.bpf_output->priv == NULL && strstr(evsel__name(evsel), "syscalls:sys_enter")) { - struct evsel *augmented = trace.syscalls.events.augmented; + struct evsel *augmented = trace.syscalls.events.bpf_output; if (evsel__init_augmented_syscall_tp(augmented, evsel) || evsel__init_augmented_syscall_tp_args(augmented)) goto out; @@ -5025,5 +5053,8 @@ out_close: fclose(trace.output); out: trace__exit(&trace); +#ifdef HAVE_BPF_SKEL + augmented_raw_syscalls_bpf__destroy(trace.skel); +#endif return err; } diff --git a/tools/perf/examples/bpf/augmented_raw_syscalls.c b/tools/perf/examples/bpf/augmented_raw_syscalls.c deleted file mode 100644 index 9a03189d33d3..000000000000 --- a/tools/perf/examples/bpf/augmented_raw_syscalls.c +++ /dev/null @@ -1,417 +0,0 @@ -// SPDX-License-Identifier: GPL-2.0 -/* - * Augment the raw_syscalls tracepoints with the contents of the pointer arguments. - * - * Test it with: - * - * perf trace -e tools/perf/examples/bpf/augmented_raw_syscalls.c cat /etc/passwd > /dev/null - * - * This exactly matches what is marshalled into the raw_syscall:sys_enter - * payload expected by the 'perf trace' beautifiers. - * - * For now it just uses the existing tracepoint augmentation code in 'perf - * trace', in the next csets we'll hook up these with the sys_enter/sys_exit - * code that will combine entry/exit in a strace like way. - */ - -#include -#include -#include - -// FIXME: These should come from system headers -typedef char bool; -typedef int pid_t; -typedef long long int __s64; -typedef __s64 time64_t; - -struct timespec64 { - time64_t tv_sec; - long int tv_nsec; -}; - -/* bpf-output associated map */ -struct __augmented_syscalls__ { - __uint(type, BPF_MAP_TYPE_PERF_EVENT_ARRAY); - __type(key, int); - __type(value, __u32); - __uint(max_entries, __NR_CPUS__); -} __augmented_syscalls__ SEC(".maps"); - -/* - * What to augment at entry? - * - * Pointer arg payloads (filenames, etc) passed from userspace to the kernel - */ -struct syscalls_sys_enter { - __uint(type, BPF_MAP_TYPE_PROG_ARRAY); - __type(key, __u32); - __type(value, __u32); - __uint(max_entries, 512); -} syscalls_sys_enter SEC(".maps"); - -/* - * What to augment at exit? - * - * Pointer arg payloads returned from the kernel (struct stat, etc) to userspace. - */ -struct syscalls_sys_exit { - __uint(type, BPF_MAP_TYPE_PROG_ARRAY); - __type(key, __u32); - __type(value, __u32); - __uint(max_entries, 512); -} syscalls_sys_exit SEC(".maps"); - -struct syscall_enter_args { - unsigned long long common_tp_fields; - long syscall_nr; - unsigned long args[6]; -}; - -struct syscall_exit_args { - unsigned long long common_tp_fields; - long syscall_nr; - long ret; -}; - -struct augmented_arg { - unsigned int size; - int err; - char value[PATH_MAX]; -}; - -struct pids_filtered { - __uint(type, BPF_MAP_TYPE_HASH); - __type(key, pid_t); - __type(value, bool); - __uint(max_entries, 64); -} pids_filtered SEC(".maps"); - -/* - * Desired design of maximum size and alignment (see RFC2553) - */ -#define SS_MAXSIZE 128 /* Implementation specific max size */ - -typedef unsigned short sa_family_t; - -/* - * FIXME: Should come from system headers - * - * The definition uses anonymous union and struct in order to control the - * default alignment. - */ -struct sockaddr_storage { - union { - struct { - sa_family_t ss_family; /* address family */ - /* Following field(s) are implementation specific */ - char __data[SS_MAXSIZE - sizeof(unsigned short)]; - /* space to achieve desired size, */ - /* _SS_MAXSIZE value minus size of ss_family */ - }; - void *__align; /* implementation specific desired alignment */ - }; -}; - -struct augmented_args_payload { - struct syscall_enter_args args; - union { - struct { - struct augmented_arg arg, arg2; - }; - struct sockaddr_storage saddr; - char __data[sizeof(struct augmented_arg)]; - }; -}; - -// We need more tmp space than the BPF stack can give us -struct augmented_args_tmp { - __uint(type, BPF_MAP_TYPE_PERCPU_ARRAY); - __type(key, int); - __type(value, struct augmented_args_payload); - __uint(max_entries, 1); -} augmented_args_tmp SEC(".maps"); - -static inline struct augmented_args_payload *augmented_args_payload(void) -{ - int key = 0; - return bpf_map_lookup_elem(&augmented_args_tmp, &key); -} - -static inline int augmented__output(void *ctx, struct augmented_args_payload *args, int len) -{ - /* If perf_event_output fails, return non-zero so that it gets recorded unaugmented */ - return bpf_perf_event_output(ctx, &__augmented_syscalls__, BPF_F_CURRENT_CPU, args, len); -} - -static inline -unsigned int augmented_arg__read_str(struct augmented_arg *augmented_arg, const void *arg, unsigned int arg_len) -{ - unsigned int augmented_len = sizeof(*augmented_arg); - int string_len = bpf_probe_read_str(&augmented_arg->value, arg_len, arg); - - augmented_arg->size = augmented_arg->err = 0; - /* - * probe_read_str may return < 0, e.g. -EFAULT - * So we leave that in the augmented_arg->size that userspace will - */ - if (string_len > 0) { - augmented_len -= sizeof(augmented_arg->value) - string_len; - augmented_len &= sizeof(augmented_arg->value) - 1; - augmented_arg->size = string_len; - } else { - /* - * So that username notice the error while still being able - * to skip this augmented arg record - */ - augmented_arg->err = string_len; - augmented_len = offsetof(struct augmented_arg, value); - } - - return augmented_len; -} - -SEC("!raw_syscalls:unaugmented") -int syscall_unaugmented(struct syscall_enter_args *args) -{ - return 1; -} - -/* - * These will be tail_called from SEC("raw_syscalls:sys_enter"), so will find in - * augmented_args_tmp what was read by that raw_syscalls:sys_enter and go - * on from there, reading the first syscall arg as a string, i.e. open's - * filename. - */ -SEC("!syscalls:sys_enter_connect") -int sys_enter_connect(struct syscall_enter_args *args) -{ - struct augmented_args_payload *augmented_args = augmented_args_payload(); - const void *sockaddr_arg = (const void *)args->args[1]; - unsigned int socklen = args->args[2]; - unsigned int len = sizeof(augmented_args->args); - - if (augmented_args == NULL) - return 1; /* Failure: don't filter */ - - if (socklen > sizeof(augmented_args->saddr)) - socklen = sizeof(augmented_args->saddr); - - bpf_probe_read(&augmented_args->saddr, socklen, sockaddr_arg); - - return augmented__output(args, augmented_args, len + socklen); -} - -SEC("!syscalls:sys_enter_sendto") -int sys_enter_sendto(struct syscall_enter_args *args) -{ - struct augmented_args_payload *augmented_args = augmented_args_payload(); - const void *sockaddr_arg = (const void *)args->args[4]; - unsigned int socklen = args->args[5]; - unsigned int len = sizeof(augmented_args->args); - - if (augmented_args == NULL) - return 1; /* Failure: don't filter */ - - if (socklen > sizeof(augmented_args->saddr)) - socklen = sizeof(augmented_args->saddr); - - bpf_probe_read(&augmented_args->saddr, socklen, sockaddr_arg); - - return augmented__output(args, augmented_args, len + socklen); -} - -SEC("!syscalls:sys_enter_open") -int sys_enter_open(struct syscall_enter_args *args) -{ - struct augmented_args_payload *augmented_args = augmented_args_payload(); - const void *filename_arg = (const void *)args->args[0]; - unsigned int len = sizeof(augmented_args->args); - - if (augmented_args == NULL) - return 1; /* Failure: don't filter */ - - len += augmented_arg__read_str(&augmented_args->arg, filename_arg, sizeof(augmented_args->arg.value)); - - return augmented__output(args, augmented_args, len); -} - -SEC("!syscalls:sys_enter_openat") -int sys_enter_openat(struct syscall_enter_args *args) -{ - struct augmented_args_payload *augmented_args = augmented_args_payload(); - const void *filename_arg = (const void *)args->args[1]; - unsigned int len = sizeof(augmented_args->args); - - if (augmented_args == NULL) - return 1; /* Failure: don't filter */ - - len += augmented_arg__read_str(&augmented_args->arg, filename_arg, sizeof(augmented_args->arg.value)); - - return augmented__output(args, augmented_args, len); -} - -SEC("!syscalls:sys_enter_rename") -int sys_enter_rename(struct syscall_enter_args *args) -{ - struct augmented_args_payload *augmented_args = augmented_args_payload(); - const void *oldpath_arg = (const void *)args->args[0], - *newpath_arg = (const void *)args->args[1]; - unsigned int len = sizeof(augmented_args->args), oldpath_len; - - if (augmented_args == NULL) - return 1; /* Failure: don't filter */ - - oldpath_len = augmented_arg__read_str(&augmented_args->arg, oldpath_arg, sizeof(augmented_args->arg.value)); - len += oldpath_len + augmented_arg__read_str((void *)(&augmented_args->arg) + oldpath_len, newpath_arg, sizeof(augmented_args->arg.value)); - - return augmented__output(args, augmented_args, len); -} - -SEC("!syscalls:sys_enter_renameat") -int sys_enter_renameat(struct syscall_enter_args *args) -{ - struct augmented_args_payload *augmented_args = augmented_args_payload(); - const void *oldpath_arg = (const void *)args->args[1], - *newpath_arg = (const void *)args->args[3]; - unsigned int len = sizeof(augmented_args->args), oldpath_len; - - if (augmented_args == NULL) - return 1; /* Failure: don't filter */ - - oldpath_len = augmented_arg__read_str(&augmented_args->arg, oldpath_arg, sizeof(augmented_args->arg.value)); - len += oldpath_len + augmented_arg__read_str((void *)(&augmented_args->arg) + oldpath_len, newpath_arg, sizeof(augmented_args->arg.value)); - - return augmented__output(args, augmented_args, len); -} - -#define PERF_ATTR_SIZE_VER0 64 /* sizeof first published struct */ - -// we need just the start, get the size to then copy it -struct perf_event_attr_size { - __u32 type; - /* - * Size of the attr structure, for fwd/bwd compat. - */ - __u32 size; -}; - -SEC("!syscalls:sys_enter_perf_event_open") -int sys_enter_perf_event_open(struct syscall_enter_args *args) -{ - struct augmented_args_payload *augmented_args = augmented_args_payload(); - const struct perf_event_attr_size *attr = (const struct perf_event_attr_size *)args->args[0], *attr_read; - unsigned int len = sizeof(augmented_args->args); - - if (augmented_args == NULL) - goto failure; - - if (bpf_probe_read(&augmented_args->__data, sizeof(*attr), attr) < 0) - goto failure; - - attr_read = (const struct perf_event_attr_size *)augmented_args->__data; - - __u32 size = attr_read->size; - - if (!size) - size = PERF_ATTR_SIZE_VER0; - - if (size > sizeof(augmented_args->__data)) - goto failure; - - // Now that we read attr->size and tested it against the size limits, read it completely - if (bpf_probe_read(&augmented_args->__data, size, attr) < 0) - goto failure; - - return augmented__output(args, augmented_args, len + size); -failure: - return 1; /* Failure: don't filter */ -} - -SEC("!syscalls:sys_enter_clock_nanosleep") -int sys_enter_clock_nanosleep(struct syscall_enter_args *args) -{ - struct augmented_args_payload *augmented_args = augmented_args_payload(); - const void *rqtp_arg = (const void *)args->args[2]; - unsigned int len = sizeof(augmented_args->args); - __u32 size = sizeof(struct timespec64); - - if (augmented_args == NULL) - goto failure; - - if (size > sizeof(augmented_args->__data)) - goto failure; - - bpf_probe_read(&augmented_args->__data, size, rqtp_arg); - - return augmented__output(args, augmented_args, len + size); -failure: - return 1; /* Failure: don't filter */ -} - -static pid_t getpid(void) -{ - return bpf_get_current_pid_tgid(); -} - -static bool pid_filter__has(struct pids_filtered *pids, pid_t pid) -{ - return bpf_map_lookup_elem(pids, &pid) != NULL; -} - -SEC("raw_syscalls:sys_enter") -int sys_enter(struct syscall_enter_args *args) -{ - struct augmented_args_payload *augmented_args; - /* - * We start len, the amount of data that will be in the perf ring - * buffer, if this is not filtered out by one of pid_filter__has(), - * syscall->enabled, etc, with the non-augmented raw syscall payload, - * i.e. sizeof(augmented_args->args). - * - * We'll add to this as we add augmented syscalls right after that - * initial, non-augmented raw_syscalls:sys_enter payload. - */ - unsigned int len = sizeof(augmented_args->args); - - if (pid_filter__has(&pids_filtered, getpid())) - return 0; - - augmented_args = augmented_args_payload(); - if (augmented_args == NULL) - return 1; - - bpf_probe_read(&augmented_args->args, sizeof(augmented_args->args), args); - - /* - * Jump to syscall specific augmenter, even if the default one, - * "!raw_syscalls:unaugmented" that will just return 1 to return the - * unaugmented tracepoint payload. - */ - bpf_tail_call(args, &syscalls_sys_enter, augmented_args->args.syscall_nr); - - // If not found on the PROG_ARRAY syscalls map, then we're filtering it: - return 0; -} - -SEC("raw_syscalls:sys_exit") -int sys_exit(struct syscall_exit_args *args) -{ - struct syscall_exit_args exit_args; - - if (pid_filter__has(&pids_filtered, getpid())) - return 0; - - bpf_probe_read(&exit_args, sizeof(exit_args), args); - /* - * Jump to syscall specific return augmenter, even if the default one, - * "!raw_syscalls:unaugmented" that will just return 1 to return the - * unaugmented tracepoint payload. - */ - bpf_tail_call(args, &syscalls_sys_exit, exit_args.syscall_nr); - /* - * If not found on the PROG_ARRAY syscalls map, then we're filtering it: - */ - return 0; -} - -char _license[] SEC("license") = "GPL"; diff --git a/tools/perf/util/bpf_skel/augmented_raw_syscalls.bpf.c b/tools/perf/util/bpf_skel/augmented_raw_syscalls.bpf.c new file mode 100644 index 000000000000..70478b9460ee --- /dev/null +++ b/tools/perf/util/bpf_skel/augmented_raw_syscalls.bpf.c @@ -0,0 +1,418 @@ +// SPDX-License-Identifier: GPL-2.0 +/* + * Augment the raw_syscalls tracepoints with the contents of the pointer arguments. + * + * Test it with: + * + * perf trace -e tools/perf/examples/bpf/augmented_raw_syscalls.c cat /etc/passwd > /dev/null + * + * This exactly matches what is marshalled into the raw_syscall:sys_enter + * payload expected by the 'perf trace' beautifiers. + * + * For now it just uses the existing tracepoint augmentation code in 'perf + * trace', in the next csets we'll hook up these with the sys_enter/sys_exit + * code that will combine entry/exit in a strace like way. + */ + +#include +#include +#include + +#define MAX_CPUS 4096 + +// FIXME: These should come from system headers +typedef char bool; +typedef int pid_t; +typedef long long int __s64; +typedef __s64 time64_t; + +struct timespec64 { + time64_t tv_sec; + long int tv_nsec; +}; + +/* bpf-output associated map */ +struct __augmented_syscalls__ { + __uint(type, BPF_MAP_TYPE_PERF_EVENT_ARRAY); + __type(key, int); + __type(value, __u32); + __uint(max_entries, MAX_CPUS); +} __augmented_syscalls__ SEC(".maps"); + +/* + * What to augment at entry? + * + * Pointer arg payloads (filenames, etc) passed from userspace to the kernel + */ +struct syscalls_sys_enter { + __uint(type, BPF_MAP_TYPE_PROG_ARRAY); + __type(key, __u32); + __type(value, __u32); + __uint(max_entries, 512); +} syscalls_sys_enter SEC(".maps"); + +/* + * What to augment at exit? + * + * Pointer arg payloads returned from the kernel (struct stat, etc) to userspace. + */ +struct syscalls_sys_exit { + __uint(type, BPF_MAP_TYPE_PROG_ARRAY); + __type(key, __u32); + __type(value, __u32); + __uint(max_entries, 512); +} syscalls_sys_exit SEC(".maps"); + +struct syscall_enter_args { + unsigned long long common_tp_fields; + long syscall_nr; + unsigned long args[6]; +}; + +struct syscall_exit_args { + unsigned long long common_tp_fields; + long syscall_nr; + long ret; +}; + +struct augmented_arg { + unsigned int size; + int err; + char value[PATH_MAX]; +}; + +struct pids_filtered { + __uint(type, BPF_MAP_TYPE_HASH); + __type(key, pid_t); + __type(value, bool); + __uint(max_entries, 64); +} pids_filtered SEC(".maps"); + +/* + * Desired design of maximum size and alignment (see RFC2553) + */ +#define SS_MAXSIZE 128 /* Implementation specific max size */ + +typedef unsigned short sa_family_t; + +/* + * FIXME: Should come from system headers + * + * The definition uses anonymous union and struct in order to control the + * default alignment. + */ +struct sockaddr_storage { + union { + struct { + sa_family_t ss_family; /* address family */ + /* Following field(s) are implementation specific */ + char __data[SS_MAXSIZE - sizeof(unsigned short)]; + /* space to achieve desired size, */ + /* _SS_MAXSIZE value minus size of ss_family */ + }; + void *__align; /* implementation specific desired alignment */ + }; +}; + +struct augmented_args_payload { + struct syscall_enter_args args; + union { + struct { + struct augmented_arg arg, arg2; + }; + struct sockaddr_storage saddr; + char __data[sizeof(struct augmented_arg)]; + }; +}; + +// We need more tmp space than the BPF stack can give us +struct augmented_args_tmp { + __uint(type, BPF_MAP_TYPE_PERCPU_ARRAY); + __type(key, int); + __type(value, struct augmented_args_payload); + __uint(max_entries, 1); +} augmented_args_tmp SEC(".maps"); + +static inline struct augmented_args_payload *augmented_args_payload(void) +{ + int key = 0; + return bpf_map_lookup_elem(&augmented_args_tmp, &key); +} + +static inline int augmented__output(void *ctx, struct augmented_args_payload *args, int len) +{ + /* If perf_event_output fails, return non-zero so that it gets recorded unaugmented */ + return bpf_perf_event_output(ctx, &__augmented_syscalls__, BPF_F_CURRENT_CPU, args, len); +} + +static inline +unsigned int augmented_arg__read_str(struct augmented_arg *augmented_arg, const void *arg, unsigned int arg_len) +{ + unsigned int augmented_len = sizeof(*augmented_arg); + int string_len = bpf_probe_read_str(&augmented_arg->value, arg_len, arg); + + augmented_arg->size = augmented_arg->err = 0; + /* + * probe_read_str may return < 0, e.g. -EFAULT + * So we leave that in the augmented_arg->size that userspace will + */ + if (string_len > 0) { + augmented_len -= sizeof(augmented_arg->value) - string_len; + augmented_len &= sizeof(augmented_arg->value) - 1; + augmented_arg->size = string_len; + } else { + /* + * So that username notice the error while still being able + * to skip this augmented arg record + */ + augmented_arg->err = string_len; + augmented_len = offsetof(struct augmented_arg, value); + } + + return augmented_len; +} + +SEC("tp/raw_syscalls/sys_enter") +int syscall_unaugmented(struct syscall_enter_args *args) +{ + return 1; +} + +/* + * These will be tail_called from SEC("raw_syscalls:sys_enter"), so will find in + * augmented_args_tmp what was read by that raw_syscalls:sys_enter and go + * on from there, reading the first syscall arg as a string, i.e. open's + * filename. + */ +SEC("tp/syscalls/sys_enter_connect") +int sys_enter_connect(struct syscall_enter_args *args) +{ + struct augmented_args_payload *augmented_args = augmented_args_payload(); + const void *sockaddr_arg = (const void *)args->args[1]; + unsigned int socklen = args->args[2]; + unsigned int len = sizeof(augmented_args->args); + + if (augmented_args == NULL) + return 1; /* Failure: don't filter */ + + if (socklen > sizeof(augmented_args->saddr)) + socklen = sizeof(augmented_args->saddr); + + bpf_probe_read(&augmented_args->saddr, socklen, sockaddr_arg); + + return augmented__output(args, augmented_args, len + socklen); +} + +SEC("tp/syscalls/sys_enter_sendto") +int sys_enter_sendto(struct syscall_enter_args *args) +{ + struct augmented_args_payload *augmented_args = augmented_args_payload(); + const void *sockaddr_arg = (const void *)args->args[4]; + unsigned int socklen = args->args[5]; + unsigned int len = sizeof(augmented_args->args); + + if (augmented_args == NULL) + return 1; /* Failure: don't filter */ + + if (socklen > sizeof(augmented_args->saddr)) + socklen = sizeof(augmented_args->saddr); + + bpf_probe_read(&augmented_args->saddr, socklen, sockaddr_arg); + + return augmented__output(args, augmented_args, len + socklen); +} + +SEC("tp/syscalls/sys_enter_open") +int sys_enter_open(struct syscall_enter_args *args) +{ + struct augmented_args_payload *augmented_args = augmented_args_payload(); + const void *filename_arg = (const void *)args->args[0]; + unsigned int len = sizeof(augmented_args->args); + + if (augmented_args == NULL) + return 1; /* Failure: don't filter */ + + len += augmented_arg__read_str(&augmented_args->arg, filename_arg, sizeof(augmented_args->arg.value)); + + return augmented__output(args, augmented_args, len); +} + +SEC("tp/syscalls/sys_enter_openat") +int sys_enter_openat(struct syscall_enter_args *args) +{ + struct augmented_args_payload *augmented_args = augmented_args_payload(); + const void *filename_arg = (const void *)args->args[1]; + unsigned int len = sizeof(augmented_args->args); + + if (augmented_args == NULL) + return 1; /* Failure: don't filter */ + + len += augmented_arg__read_str(&augmented_args->arg, filename_arg, sizeof(augmented_args->arg.value)); + + return augmented__output(args, augmented_args, len); +} + +SEC("tp/syscalls/sys_enter_rename") +int sys_enter_rename(struct syscall_enter_args *args) +{ + struct augmented_args_payload *augmented_args = augmented_args_payload(); + const void *oldpath_arg = (const void *)args->args[0], + *newpath_arg = (const void *)args->args[1]; + unsigned int len = sizeof(augmented_args->args), oldpath_len; + + if (augmented_args == NULL) + return 1; /* Failure: don't filter */ + + oldpath_len = augmented_arg__read_str(&augmented_args->arg, oldpath_arg, sizeof(augmented_args->arg.value)); + len += oldpath_len + augmented_arg__read_str((void *)(&augmented_args->arg) + oldpath_len, newpath_arg, sizeof(augmented_args->arg.value)); + + return augmented__output(args, augmented_args, len); +} + +SEC("tp/syscalls/sys_enter_renameat") +int sys_enter_renameat(struct syscall_enter_args *args) +{ + struct augmented_args_payload *augmented_args = augmented_args_payload(); + const void *oldpath_arg = (const void *)args->args[1], + *newpath_arg = (const void *)args->args[3]; + unsigned int len = sizeof(augmented_args->args), oldpath_len; + + if (augmented_args == NULL) + return 1; /* Failure: don't filter */ + + oldpath_len = augmented_arg__read_str(&augmented_args->arg, oldpath_arg, sizeof(augmented_args->arg.value)); + len += oldpath_len + augmented_arg__read_str((void *)(&augmented_args->arg) + oldpath_len, newpath_arg, sizeof(augmented_args->arg.value)); + + return augmented__output(args, augmented_args, len); +} + +#define PERF_ATTR_SIZE_VER0 64 /* sizeof first published struct */ + +// we need just the start, get the size to then copy it +struct perf_event_attr_size { + __u32 type; + /* + * Size of the attr structure, for fwd/bwd compat. + */ + __u32 size; +}; + +SEC("tp/syscalls/sys_enter_perf_event_open") +int sys_enter_perf_event_open(struct syscall_enter_args *args) +{ + struct augmented_args_payload *augmented_args = augmented_args_payload(); + const struct perf_event_attr_size *attr = (const struct perf_event_attr_size *)args->args[0], *attr_read; + unsigned int len = sizeof(augmented_args->args); + + if (augmented_args == NULL) + goto failure; + + if (bpf_probe_read(&augmented_args->__data, sizeof(*attr), attr) < 0) + goto failure; + + attr_read = (const struct perf_event_attr_size *)augmented_args->__data; + + __u32 size = attr_read->size; + + if (!size) + size = PERF_ATTR_SIZE_VER0; + + if (size > sizeof(augmented_args->__data)) + goto failure; + + // Now that we read attr->size and tested it against the size limits, read it completely + if (bpf_probe_read(&augmented_args->__data, size, attr) < 0) + goto failure; + + return augmented__output(args, augmented_args, len + size); +failure: + return 1; /* Failure: don't filter */ +} + +SEC("tp/syscalls/sys_enter_clock_nanosleep") +int sys_enter_clock_nanosleep(struct syscall_enter_args *args) +{ + struct augmented_args_payload *augmented_args = augmented_args_payload(); + const void *rqtp_arg = (const void *)args->args[2]; + unsigned int len = sizeof(augmented_args->args); + __u32 size = sizeof(struct timespec64); + + if (augmented_args == NULL) + goto failure; + + if (size > sizeof(augmented_args->__data)) + goto failure; + + bpf_probe_read(&augmented_args->__data, size, rqtp_arg); + + return augmented__output(args, augmented_args, len + size); +failure: + return 1; /* Failure: don't filter */ +} + +static pid_t getpid(void) +{ + return bpf_get_current_pid_tgid(); +} + +static bool pid_filter__has(struct pids_filtered *pids, pid_t pid) +{ + return bpf_map_lookup_elem(pids, &pid) != NULL; +} + +SEC("tp/raw_syscalls/sys_enter") +int sys_enter(struct syscall_enter_args *args) +{ + struct augmented_args_payload *augmented_args; + /* + * We start len, the amount of data that will be in the perf ring + * buffer, if this is not filtered out by one of pid_filter__has(), + * syscall->enabled, etc, with the non-augmented raw syscall payload, + * i.e. sizeof(augmented_args->args). + * + * We'll add to this as we add augmented syscalls right after that + * initial, non-augmented raw_syscalls:sys_enter payload. + */ + + if (pid_filter__has(&pids_filtered, getpid())) + return 0; + + augmented_args = augmented_args_payload(); + if (augmented_args == NULL) + return 1; + + bpf_probe_read(&augmented_args->args, sizeof(augmented_args->args), args); + + /* + * Jump to syscall specific augmenter, even if the default one, + * "!raw_syscalls:unaugmented" that will just return 1 to return the + * unaugmented tracepoint payload. + */ + bpf_tail_call(args, &syscalls_sys_enter, augmented_args->args.syscall_nr); + + // If not found on the PROG_ARRAY syscalls map, then we're filtering it: + return 0; +} + +SEC("tp/raw_syscalls/sys_exit") +int sys_exit(struct syscall_exit_args *args) +{ + struct syscall_exit_args exit_args; + + if (pid_filter__has(&pids_filtered, getpid())) + return 0; + + bpf_probe_read(&exit_args, sizeof(exit_args), args); + /* + * Jump to syscall specific return augmenter, even if the default one, + * "!raw_syscalls:unaugmented" that will just return 1 to return the + * unaugmented tracepoint payload. + */ + bpf_tail_call(args, &syscalls_sys_exit, exit_args.syscall_nr); + /* + * If not found on the PROG_ARRAY syscalls map, then we're filtering it: + */ + return 0; +} + +char _license[] SEC("license") = "GPL"; -- cgit v1.2.3-70-g09d2 From 5056c99e8d97e1129ff29826971eefbe345b6837 Mon Sep 17 00:00:00 2001 From: Ian Rogers Date: Thu, 10 Aug 2023 11:48:52 -0700 Subject: perf bpf examples: With no BPF events remove examples The examples were used to give demonstrations of BPF events but such functionality is now subsumed by using --filter with 'perf record' or the direct use of BPF skeletons. Signed-off-by: Ian Rogers Acked-by: Jiri Olsa Cc: Adrian Hunter Cc: Alexander Shishkin Cc: Andi Kleen Cc: Andrii Nakryiko Cc: Anshuman Khandual Cc: Athira Rajeev Cc: Brendan Gregg Cc: Carsten Haitzler Cc: Eduard Zingerman Cc: Fangrui Song Cc: He Kuang Cc: Ingo Molnar Cc: James Clark Cc: Kan Liang Cc: Leo Yan Cc: Madhavan Srinivasan Cc: Mark Rutland Cc: Namhyung Kim Cc: Nathan Chancellor Cc: Naveen N. Rao Cc: Nick Desaulniers Cc: Peter Zijlstra Cc: Ravi Bangoria Cc: Rob Herring Cc: Tiezhu Yang Cc: Tom Rix Cc: Wang Nan Cc: Wang ShaoBo Cc: Yang Jihong Cc: Yonghong Song Cc: YueHaibing Cc: bpf@vger.kernel.org Cc: llvm@lists.linux.dev Link: https://lore.kernel.org/r/20230810184853.2860737-4-irogers@google.com Signed-off-by: Arnaldo Carvalho de Melo --- tools/perf/Makefile.perf | 5 --- tools/perf/examples/bpf/5sec.c | 53 ------------------------------ tools/perf/examples/bpf/empty.c | 12 ------- tools/perf/examples/bpf/hello.c | 27 --------------- tools/perf/examples/bpf/sys_enter_openat.c | 33 ------------------- 5 files changed, 130 deletions(-) delete mode 100644 tools/perf/examples/bpf/5sec.c delete mode 100644 tools/perf/examples/bpf/empty.c delete mode 100644 tools/perf/examples/bpf/hello.c delete mode 100644 tools/perf/examples/bpf/sys_enter_openat.c diff --git a/tools/perf/Makefile.perf b/tools/perf/Makefile.perf index 40663c69b25c..c90d55786a02 100644 --- a/tools/perf/Makefile.perf +++ b/tools/perf/Makefile.perf @@ -958,11 +958,6 @@ ifndef NO_JVMTI endif $(call QUIET_INSTALL, libexec) \ $(INSTALL) -d -m 755 '$(DESTDIR_SQ)$(perfexec_instdir_SQ)' -ifndef NO_LIBBPF - $(call QUIET_INSTALL, bpf-examples) \ - $(INSTALL) -d -m 755 '$(DESTDIR_SQ)$(perf_examples_instdir_SQ)/bpf'; \ - $(INSTALL) examples/bpf/*.c -m 644 -t '$(DESTDIR_SQ)$(perf_examples_instdir_SQ)/bpf' -endif $(call QUIET_INSTALL, perf-archive) \ $(INSTALL) $(OUTPUT)perf-archive -t '$(DESTDIR_SQ)$(perfexec_instdir_SQ)' $(call QUIET_INSTALL, perf-iostat) \ diff --git a/tools/perf/examples/bpf/5sec.c b/tools/perf/examples/bpf/5sec.c deleted file mode 100644 index 3bd7fc17631f..000000000000 --- a/tools/perf/examples/bpf/5sec.c +++ /dev/null @@ -1,53 +0,0 @@ -// SPDX-License-Identifier: GPL-2.0 -/* - Description: - - . Disable strace like syscall tracing (--no-syscalls), or try tracing - just some (-e *sleep). - - . Attach a filter function to a kernel function, returning when it should - be considered, i.e. appear on the output. - - . Run it system wide, so that any sleep of >= 5 seconds and < than 6 - seconds gets caught. - - . Ask for callgraphs using DWARF info, so that userspace can be unwound - - . While this is running, run something like "sleep 5s". - - . If we decide to add tv_nsec as well, then it becomes: - - int probe(hrtimer_nanosleep, rqtp->tv_sec rqtp->tv_nsec)(void *ctx, int err, long sec, long nsec) - - I.e. add where it comes from (rqtp->tv_nsec) and where it will be - accessible in the function body (nsec) - - # perf trace --no-syscalls -e tools/perf/examples/bpf/5sec.c/call-graph=dwarf/ - 0.000 perf_bpf_probe:func:(ffffffff9811b5f0) tv_sec=5 - hrtimer_nanosleep ([kernel.kallsyms]) - __x64_sys_nanosleep ([kernel.kallsyms]) - do_syscall_64 ([kernel.kallsyms]) - entry_SYSCALL_64 ([kernel.kallsyms]) - __GI___nanosleep (/usr/lib64/libc-2.26.so) - rpl_nanosleep (/usr/bin/sleep) - xnanosleep (/usr/bin/sleep) - main (/usr/bin/sleep) - __libc_start_main (/usr/lib64/libc-2.26.so) - _start (/usr/bin/sleep) - ^C# - - Copyright (C) 2018 Red Hat, Inc., Arnaldo Carvalho de Melo -*/ - -#include -#include - -#define NSEC_PER_SEC 1000000000L - -SEC("hrtimer_nanosleep=hrtimer_nanosleep rqtp") -int hrtimer_nanosleep(void *ctx, int err, long long sec) -{ - return sec / NSEC_PER_SEC == 5ULL; -} - -char _license[] SEC("license") = "GPL"; diff --git a/tools/perf/examples/bpf/empty.c b/tools/perf/examples/bpf/empty.c deleted file mode 100644 index 3e296c0c53d7..000000000000 --- a/tools/perf/examples/bpf/empty.c +++ /dev/null @@ -1,12 +0,0 @@ -// SPDX-License-Identifier: GPL-2.0 -#include -#include - -struct syscall_enter_args; - -SEC("raw_syscalls:sys_enter") -int sys_enter(struct syscall_enter_args *args) -{ - return 0; -} -char _license[] SEC("license") = "GPL"; diff --git a/tools/perf/examples/bpf/hello.c b/tools/perf/examples/bpf/hello.c deleted file mode 100644 index e9080b0df158..000000000000 --- a/tools/perf/examples/bpf/hello.c +++ /dev/null @@ -1,27 +0,0 @@ -// SPDX-License-Identifier: GPL-2.0 -#include -#include - -struct __bpf_stdout__ { - __uint(type, BPF_MAP_TYPE_PERF_EVENT_ARRAY); - __type(key, int); - __type(value, __u32); - __uint(max_entries, __NR_CPUS__); -} __bpf_stdout__ SEC(".maps"); - -#define puts(from) \ - ({ const int __len = sizeof(from); \ - char __from[sizeof(from)] = from; \ - bpf_perf_event_output(args, &__bpf_stdout__, BPF_F_CURRENT_CPU, \ - &__from, __len & (sizeof(from) - 1)); }) - -struct syscall_enter_args; - -SEC("raw_syscalls:sys_enter") -int sys_enter(struct syscall_enter_args *args) -{ - puts("Hello, world\n"); - return 0; -} - -char _license[] SEC("license") = "GPL"; diff --git a/tools/perf/examples/bpf/sys_enter_openat.c b/tools/perf/examples/bpf/sys_enter_openat.c deleted file mode 100644 index c4481c390d23..000000000000 --- a/tools/perf/examples/bpf/sys_enter_openat.c +++ /dev/null @@ -1,33 +0,0 @@ -// SPDX-License-Identifier: GPL-2.0 -/* - * Hook into 'openat' syscall entry tracepoint - * - * Test it with: - * - * perf trace -e tools/perf/examples/bpf/sys_enter_openat.c cat /etc/passwd > /dev/null - * - * It'll catch some openat syscalls related to the dynamic linked and - * the last one should be the one for '/etc/passwd'. - * - * The syscall_enter_openat_args can be used to get the syscall fields - * and use them for filtering calls, i.e. use in expressions for - * the return value. - */ - -#include - -struct syscall_enter_openat_args { - unsigned long long unused; - long syscall_nr; - long dfd; - char *filename_ptr; - long flags; - long mode; -}; - -int syscall_enter(openat)(struct syscall_enter_openat_args *args) -{ - return 1; -} - -license(GPL); -- cgit v1.2.3-70-g09d2 From cd2cece61ac5f900c43df366c9a64ddb62173707 Mon Sep 17 00:00:00 2001 From: Ian Rogers Date: Thu, 10 Aug 2023 11:48:53 -0700 Subject: perf trace: Tidy comments related to BPF + syscall augmentation Now tools/perf/examples/bpf/augmented_syscalls.c is tools/perf/util/bpf_skel/augmented_syscalls.bpf.c and not enabled as a BPF event, tidy the comments to reflect this. Signed-off-by: Ian Rogers Acked-by: Jiri Olsa Cc: Adrian Hunter Cc: Alexander Shishkin Cc: Andi Kleen Cc: Andrii Nakryiko Cc: Anshuman Khandual Cc: Athira Rajeev Cc: Brendan Gregg Cc: Carsten Haitzler Cc: Eduard Zingerman Cc: Fangrui Song Cc: He Kuang Cc: Ingo Molnar Cc: James Clark Cc: Kan Liang Cc: Leo Yan Cc: Madhavan Srinivasan Cc: Mark Rutland Cc: Namhyung Kim Cc: Nathan Chancellor Cc: Naveen N. Rao Cc: Nick Desaulniers Cc: Peter Zijlstra Cc: Ravi Bangoria Cc: Rob Herring Cc: Tiezhu Yang Cc: Tom Rix Cc: Wang Nan Cc: Wang ShaoBo Cc: Yang Jihong Cc: Yonghong Song Cc: YueHaibing Cc: bpf@vger.kernel.org Cc: llvm@lists.linux.dev Link: https://lore.kernel.org/r/20230810184853.2860737-5-irogers@google.com Signed-off-by: Arnaldo Carvalho de Melo --- tools/perf/trace/beauty/beauty.h | 15 +++++++-------- tools/perf/util/bpf_skel/augmented_raw_syscalls.bpf.c | 8 -------- 2 files changed, 7 insertions(+), 16 deletions(-) diff --git a/tools/perf/trace/beauty/beauty.h b/tools/perf/trace/beauty/beauty.h index 3d12bf0f6d07..788e8f6bd90e 100644 --- a/tools/perf/trace/beauty/beauty.h +++ b/tools/perf/trace/beauty/beauty.h @@ -67,15 +67,14 @@ extern struct strarray strarray__socket_level; /** * augmented_arg: extra payload for syscall pointer arguments - * If perf_sample->raw_size is more than what a syscall sys_enter_FOO puts, - * then its the arguments contents, so that we can show more than just a + * If perf_sample->raw_size is more than what a syscall sys_enter_FOO puts, then + * its the arguments contents, so that we can show more than just a * pointer. This will be done initially with eBPF, the start of that is at the - * tools/perf/examples/bpf/augmented_syscalls.c example for the openat, but - * will eventually be done automagically caching the running kernel tracefs - * events data into an eBPF C script, that then gets compiled and its .o file - * cached for subsequent use. For char pointers like the ones for 'open' like - * syscalls its easy, for the rest we should use DWARF or better, BTF, much - * more compact. + * tools/perf/util/bpf_skel/augmented_syscalls.bpf.c that will eventually be + * done automagically caching the running kernel tracefs events data into an + * eBPF C script, that then gets compiled and its .o file cached for subsequent + * use. For char pointers like the ones for 'open' like syscalls its easy, for + * the rest we should use DWARF or better, BTF, much more compact. * * @size: 8 if all we need is an integer, otherwise all of the augmented arg. * @int_arg: will be used for integer like pointer contents, like 'accept's 'upeer_addrlen' diff --git a/tools/perf/util/bpf_skel/augmented_raw_syscalls.bpf.c b/tools/perf/util/bpf_skel/augmented_raw_syscalls.bpf.c index 70478b9460ee..0586c4118656 100644 --- a/tools/perf/util/bpf_skel/augmented_raw_syscalls.bpf.c +++ b/tools/perf/util/bpf_skel/augmented_raw_syscalls.bpf.c @@ -2,16 +2,8 @@ /* * Augment the raw_syscalls tracepoints with the contents of the pointer arguments. * - * Test it with: - * - * perf trace -e tools/perf/examples/bpf/augmented_raw_syscalls.c cat /etc/passwd > /dev/null - * * This exactly matches what is marshalled into the raw_syscall:sys_enter * payload expected by the 'perf trace' beautifiers. - * - * For now it just uses the existing tracepoint augmentation code in 'perf - * trace', in the next csets we'll hook up these with the sys_enter/sys_exit - * code that will combine entry/exit in a strace like way. */ #include -- cgit v1.2.3-70-g09d2 From dc7f01f1bceca38839992b3371e0be8a3c9d5acf Mon Sep 17 00:00:00 2001 From: Namhyung Kim Date: Thu, 10 Aug 2023 19:58:21 -0700 Subject: perf bpf-filter: Fix sample flag check with || For logical OR operator, the actual sample_flags are in the 'groups' list so it needs to check entries in the list instead. Otherwise it would show the following error message. $ sudo perf record -a -e cycles:p --filter 'period > 100 || weight > 0' sleep 1 Error: cycles:p event does not have sample flags 0 failed to set filter "BPF" on event cycles:p with 2 (No such file or directory) Actually it should warn on 'weight' is used without WEIGHT flag. Error: cycles:p event does not have PERF_SAMPLE_WEIGHT Hint: please add -W option to perf record failed to set filter "BPF" on event cycles:p with 2 (No such file or directory) Fixes: 4310551b76e0d676 ("perf bpf filter: Show warning for missing sample flags") Reviewed-by: Ian Rogers Signed-off-by: Namhyung Kim Tested-by: Arnaldo Carvalho de Melo Cc: Adrian Hunter Cc: Ingo Molnar Cc: Jiri Olsa Cc: Peter Zijlstra Link: https://lore.kernel.org/r/20230811025822.3859771-1-namhyung@kernel.org Signed-off-by: Arnaldo Carvalho de Melo --- tools/perf/util/bpf-filter.c | 10 ++++++++++ 1 file changed, 10 insertions(+) diff --git a/tools/perf/util/bpf-filter.c b/tools/perf/util/bpf-filter.c index 47f01df658d9..b51544996046 100644 --- a/tools/perf/util/bpf-filter.c +++ b/tools/perf/util/bpf-filter.c @@ -62,6 +62,16 @@ static int check_sample_flags(struct evsel *evsel, struct perf_bpf_filter_expr * if (evsel->core.attr.sample_type & expr->sample_flags) return 0; + if (expr->op == PBF_OP_GROUP_BEGIN) { + struct perf_bpf_filter_expr *group; + + list_for_each_entry(group, &expr->groups, list) { + if (check_sample_flags(evsel, group) < 0) + return -1; + } + return 0; + } + info = get_sample_info(expr->sample_flags); if (info == NULL) { pr_err("Error: %s event does not have sample flags %lx\n", -- cgit v1.2.3-70-g09d2 From 9575ecdd198a50e95ed2319471f7518465b1cd94 Mon Sep 17 00:00:00 2001 From: Namhyung Kim Date: Thu, 10 Aug 2023 19:58:22 -0700 Subject: perf test: Add perf record sample filtering test $ sudo ./perf test 'sample filter' -v 94: perf record sample filtering (by BPF) tests : --- start --- test child forked, pid 3817527 Checking BPF-filter privilege Basic bpf-filter test Basic bpf-filter test [Success] Failing bpf-filter test Error: task-clock event does not have PERF_SAMPLE_CPU Failing bpf-filter test [Success] Group bpf-filter test Error: task-clock event does not have PERF_SAMPLE_CPU Error: task-clock event does not have PERF_SAMPLE_CODE_PAGE_SIZE Group bpf-filter test [Success] test child finished with 0 ---- end ---- perf record sample filtering (by BPF) tests: Ok Reviewed-by: Ian Rogers Signed-off-by: Namhyung Kim Tested-by: Arnaldo Carvalho de Melo Cc: Adrian Hunter Cc: Athira Rajeev Cc: Ingo Molnar Cc: Jiri Olsa Cc: Peter Zijlstra Link: https://lore.kernel.org/r/20230811025822.3859771-2-namhyung@kernel.org Signed-off-by: Arnaldo Carvalho de Melo --- tools/perf/tests/shell/record_bpf_filter.sh | 128 ++++++++++++++++++++++++++++ 1 file changed, 128 insertions(+) create mode 100755 tools/perf/tests/shell/record_bpf_filter.sh diff --git a/tools/perf/tests/shell/record_bpf_filter.sh b/tools/perf/tests/shell/record_bpf_filter.sh new file mode 100755 index 000000000000..e76ea861b92c --- /dev/null +++ b/tools/perf/tests/shell/record_bpf_filter.sh @@ -0,0 +1,128 @@ +#!/bin/sh +# perf record sample filtering (by BPF) tests +# SPDX-License-Identifier: GPL-2.0 + +set -e + +err=0 +perfdata=$(mktemp /tmp/__perf_test.perf.data.XXXXX) + +cleanup() { + rm -f "${perfdata}" + rm -f "${perfdata}".old + trap - EXIT TERM INT +} + +trap_cleanup() { + cleanup + exit 1 +} +trap trap_cleanup EXIT TERM INT + +test_bpf_filter_priv() { + echo "Checking BPF-filter privilege" + + if [ "$(id -u)" != 0 ] + then + echo "bpf-filter test [Skipped permission]" + err=2 + return + fi + if ! perf record -e task-clock --filter 'period > 1' \ + -o /dev/null --quiet true 2>&1 + then + echo "bpf-filter test [Skipped missing BPF support]" + err=2 + return + fi +} + +test_bpf_filter_basic() { + echo "Basic bpf-filter test" + + if ! perf record -e task-clock -c 10000 --filter 'ip < 0xffffffff00000000' \ + -o "${perfdata}" true 2> /dev/null + then + echo "Basic bpf-filter test [Failed record]" + err=1 + return + fi + if perf script -i "${perfdata}" -F ip | grep 'ffffffff[0-9a-f]*' + then + echo "Basic bpf-filter test [Failed invalid output]" + err=1 + return + fi + echo "Basic bpf-filter test [Success]" +} + +test_bpf_filter_fail() { + echo "Failing bpf-filter test" + + # 'cpu' requires PERF_SAMPLE_CPU flag + if ! perf record -e task-clock --filter 'cpu > 0' \ + -o /dev/null true 2>&1 | grep PERF_SAMPLE_CPU + then + echo "Failing bpf-filter test [Failed forbidden CPU]" + err=1 + return + fi + + if ! perf record --sample-cpu -e task-clock --filter 'cpu > 0' \ + -o /dev/null true 2>/dev/null + then + echo "Failing bpf-filter test [Failed should succeed]" + err=1 + return + fi + + echo "Failing bpf-filter test [Success]" +} + +test_bpf_filter_group() { + echo "Group bpf-filter test" + + if ! perf record -e task-clock --filter 'period > 1000 || ip > 0' \ + -o /dev/null true 2>/dev/null + then + echo "Group bpf-filter test [Failed should succeed]" + err=1 + return + fi + + if ! perf record -e task-clock --filter 'cpu > 0 || ip > 0' \ + -o /dev/null true 2>&1 | grep PERF_SAMPLE_CPU + then + echo "Group bpf-filter test [Failed forbidden CPU]" + err=1 + return + fi + + if ! perf record -e task-clock --filter 'period > 0 || code_pgsz > 4096' \ + -o /dev/null true 2>&1 | grep PERF_SAMPLE_CODE_PAGE_SIZE + then + echo "Group bpf-filter test [Failed forbidden CODE_PAGE_SIZE]" + err=1 + return + fi + + echo "Group bpf-filter test [Success]" +} + + +test_bpf_filter_priv + +if [ $err = 0 ]; then + test_bpf_filter_basic +fi + +if [ $err = 0 ]; then + test_bpf_filter_fail +fi + +if [ $err = 0 ]; then + test_bpf_filter_group +fi + +cleanup +exit $err -- cgit v1.2.3-70-g09d2 From d095ad45e2d808a5c8c047b5c8d5e0fad7fec4e4 Mon Sep 17 00:00:00 2001 From: Yang Jihong Date: Tue, 15 Aug 2023 22:10:09 +0000 Subject: perf evsel: Remove duplicate check for `field` in evsel__intval() The `file` parameter in evsel__intval() is checked repeatedly, fix it. No functional change. Signed-off-by: Yang Jihong Cc: Adrian Hunter Cc: Alexander Shishkin Cc: Ian Rogers Cc: Ingo Molnar Cc: Jiri Olsa Cc: Kan Liang Cc: Mark Rutland Cc: Namhyung Kim Cc: Peter Zijlstra Cc: Ravi Bangoria Cc: Sandipan Das Link: https://lore.kernel.org/r/20230815221009.3641751-1-yangjihong1@huawei.com Signed-off-by: Arnaldo Carvalho de Melo --- tools/perf/util/evsel.c | 3 --- 1 file changed, 3 deletions(-) diff --git a/tools/perf/util/evsel.c b/tools/perf/util/evsel.c index e41bc4d9925f..0c50c443d456 100644 --- a/tools/perf/util/evsel.c +++ b/tools/perf/util/evsel.c @@ -2827,9 +2827,6 @@ u64 evsel__intval(struct evsel *evsel, struct perf_sample *sample, const char *n { struct tep_format_field *field = evsel__field(evsel, name); - if (!field) - return 0; - return field ? format_field__intval(field, sample, evsel->needs_swap) : 0; } #endif -- cgit v1.2.3-70-g09d2 From 708a3e8b80a5364fc3dd4991f1e589a6d7a4a8e0 Mon Sep 17 00:00:00 2001 From: Wei Li Date: Tue, 15 Aug 2023 21:17:35 +0800 Subject: perf scripts python: Support syscall name parsing on arm64 In the result of "perf script syscall-counts" on arm64, the syscall events are not resolved currently. Add "aarch64" to audit uname list to support name parsing. * After the patch: [root@localhost ~]# perf script syscall-counts sleep 1 Press control+C to stop and show the summary syscall events: event count ---------------------------------------- ----------- mmap 6 close 5 mprotect 4 brk 3 newfstatat 3 openat 3 getrandom 1 prlimit64 1 munmap 1 clock_nanosleep 1 set_robust_list 1 set_tid_address 1 exit_group 1 read 1 faccessat 1 Signed-off-by: Wei Li Cc: Adrian Hunter Cc: Alexander Shishkin Cc: Ian Rogers Cc: Ingo Molnar Cc: Jiri Olsa Cc: Li Bin Cc: Mark Rutland Cc: Namhyung Kim Cc: Peter Zijlstra Link: https://lore.kernel.org/r/20230815131735.1237221-1-liwei391@huawei.com Signed-off-by: Arnaldo Carvalho de Melo --- tools/perf/scripts/python/Perf-Trace-Util/lib/Perf/Trace/Util.py | 1 + 1 file changed, 1 insertion(+) diff --git a/tools/perf/scripts/python/Perf-Trace-Util/lib/Perf/Trace/Util.py b/tools/perf/scripts/python/Perf-Trace-Util/lib/Perf/Trace/Util.py index 7384dcb628c4..c37a03fb7ec5 100644 --- a/tools/perf/scripts/python/Perf-Trace-Util/lib/Perf/Trace/Util.py +++ b/tools/perf/scripts/python/Perf-Trace-Util/lib/Perf/Trace/Util.py @@ -54,6 +54,7 @@ try: import audit machine_to_id = { 'x86_64': audit.MACH_86_64, + 'aarch64': audit.MACH_AARCH64, 'alpha' : audit.MACH_ALPHA, 'ia64' : audit.MACH_IA64, 'ppc' : audit.MACH_PPC, -- cgit v1.2.3-70-g09d2 From 41a37430f66560c4b9a9d68a977c8dab65b97ff8 Mon Sep 17 00:00:00 2001 From: Wei Li Date: Tue, 15 Aug 2023 21:18:05 +0800 Subject: perf scripts python: Update audit-libs package name for python3 'audit-libs-python' is the package for python2, update it for python3. On Ubuntu and Fedora, the new package is 'python3-audit'. Signed-off-by: Wei Li Cc: Adrian Hunter Cc: Alexander Shishkin Cc: Ian Rogers Cc: Ingo Molnar Cc: Jiri Olsa Cc: Li Bin Cc: Mark Rutland Cc: Namhyung Kim Cc: Peter Zijlstra Link: https://lore.kernel.org/r/20230815131805.1237491-1-liwei391@huawei.com Signed-off-by: Arnaldo Carvalho de Melo --- tools/perf/scripts/python/Perf-Trace-Util/lib/Perf/Trace/Util.py | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/tools/perf/scripts/python/Perf-Trace-Util/lib/Perf/Trace/Util.py b/tools/perf/scripts/python/Perf-Trace-Util/lib/Perf/Trace/Util.py index c37a03fb7ec5..b75d31858e54 100644 --- a/tools/perf/scripts/python/Perf-Trace-Util/lib/Perf/Trace/Util.py +++ b/tools/perf/scripts/python/Perf-Trace-Util/lib/Perf/Trace/Util.py @@ -74,9 +74,9 @@ try: except: if not audit_package_warned: audit_package_warned = True - print("Install the audit-libs-python package to get syscall names.\n" - "For example:\n # apt-get install python-audit (Ubuntu)" - "\n # yum install audit-libs-python (Fedora)" + print("Install the python-audit package to get syscall names.\n" + "For example:\n # apt-get install python3-audit (Ubuntu)" + "\n # yum install python3-audit (Fedora)" "\n etc.\n") def syscall_name(id): -- cgit v1.2.3-70-g09d2 From f178a76b054fd046d212c3c67745146ff191a443 Mon Sep 17 00:00:00 2001 From: Adrian Hunter Date: Mon, 31 Jul 2023 12:18:55 +0300 Subject: perf dlfilter: Add a test for resolve_address() Extend the "dlfilter C API" test to test perf_dlfilter_fns.resolve_address(). The test currently fails, but passes after a subsequent patch. Reviewed-by: Ian Rogers Signed-off-by: Adrian Hunter Cc: Jiri Olsa Cc: Namhyung Kim Link: https://lore.kernel.org/r/20230731091857.10681-1-adrian.hunter@intel.com Signed-off-by: Arnaldo Carvalho de Melo --- tools/perf/dlfilters/dlfilter-test-api-v0.c | 26 +++++++++++++++++++++++++- 1 file changed, 25 insertions(+), 1 deletion(-) diff --git a/tools/perf/dlfilters/dlfilter-test-api-v0.c b/tools/perf/dlfilters/dlfilter-test-api-v0.c index b1f51efd67d6..72f263d49121 100644 --- a/tools/perf/dlfilters/dlfilter-test-api-v0.c +++ b/tools/perf/dlfilters/dlfilter-test-api-v0.c @@ -254,6 +254,30 @@ static int check_addr_al(void *ctx) return 0; } +static int check_address_al(void *ctx, const struct perf_dlfilter_sample *sample) +{ + struct perf_dlfilter_al address_al; + const struct perf_dlfilter_al *al; + + al = perf_dlfilter_fns.resolve_ip(ctx); + if (!al) + return test_fail("resolve_ip() failed"); + + address_al.size = sizeof(address_al); + if (perf_dlfilter_fns.resolve_address(ctx, sample->ip, &address_al)) + return test_fail("resolve_address() failed"); + + CHECK(address_al.sym && al->sym); + CHECK(!strcmp(address_al.sym, al->sym)); + CHECK(address_al.addr == al->addr); + CHECK(address_al.sym_start == al->sym_start); + CHECK(address_al.sym_end == al->sym_end); + CHECK(address_al.dso && al->dso); + CHECK(!strcmp(address_al.dso, al->dso)); + + return 0; +} + static int check_attr(void *ctx) { struct perf_event_attr *attr = perf_dlfilter_fns.attr(ctx); @@ -290,7 +314,7 @@ static int do_checks(void *data, const struct perf_dlfilter_sample *sample, void if (early && !d->do_early) return 0; - if (check_al(ctx) || check_addr_al(ctx)) + if (check_al(ctx) || check_addr_al(ctx) || check_address_al(ctx, sample)) return -1; if (early) -- cgit v1.2.3-70-g09d2 From 42c6dd9d23019ff339d0aca80a444eb71087050e Mon Sep 17 00:00:00 2001 From: Arnaldo Carvalho de Melo Date: Mon, 31 Jul 2023 12:18:56 +0300 Subject: perf dlfilter: Initialize addr_location before passing it to thread__find_symbol_fb() As thread__find_symbol_fb() will end up calling thread__find_map() and it in turn will call these on uninitialized memory: maps__zput(al->maps); map__zput(al->map); thread__zput(al->thread); Fixes: 0dd5041c9a0eaf8c ("perf addr_location: Add init/exit/copy functions") Reviewed-by: Ian Rogers Cc: Adrian Hunter Cc: Aneesh Kumar K.V Cc: Athira Rajeev Cc: Disha Goel Cc: Jiri Olsa Cc: Kajol Jain Cc: Madhavan Srinivasan Cc: Namhyung Kim Link: https://lore.kernel.org/r/20230731091857.10681-2-adrian.hunter@intel.com Signed-off-by: Arnaldo Carvalho de Melo --- tools/perf/util/dlfilter.c | 1 + 1 file changed, 1 insertion(+) diff --git a/tools/perf/util/dlfilter.c b/tools/perf/util/dlfilter.c index 46f74b2344db..798a53d7e6c9 100644 --- a/tools/perf/util/dlfilter.c +++ b/tools/perf/util/dlfilter.c @@ -166,6 +166,7 @@ static __s32 dlfilter__resolve_address(void *ctx, __u64 address, struct perf_dlf if (!thread) return -1; + addr_location__init(&al); thread__find_symbol_fb(thread, d->sample->cpumode, address, &al); al_to_d_al(&al, &d_al); -- cgit v1.2.3-70-g09d2 From 82b0a10390e5f198a4e23c9cc6a7307d2cf099f3 Mon Sep 17 00:00:00 2001 From: Adrian Hunter Date: Mon, 31 Jul 2023 12:18:57 +0300 Subject: perf dlfilter: Add al_cleanup() Add perf_dlfilter_fns.al_cleanup() to do addr_location__exit() on data passed via perf_dlfilter_fns.resolve_address(). Add dlfilter-test-api-v2 to the "dlfilter C API" test to test it. Update documentation, clarifying that data returned by APIs should not be dereferenced after filter_event() and filter_event_early() return. Fixes: 0dd5041c9a0eaf8c ("perf addr_location: Add init/exit/copy functions") Reviewed-by: Ian Rogers Signed-off-by: Adrian Hunter Cc: Jiri Olsa Cc: Namhyung Kim Link: https://lore.kernel.org/r/20230731091857.10681-3-adrian.hunter@intel.com Signed-off-by: Arnaldo Carvalho de Melo --- tools/perf/Documentation/perf-dlfilter.txt | 22 +- tools/perf/Makefile.perf | 2 +- tools/perf/dlfilters/dlfilter-test-api-v2.c | 377 ++++++++++++++++++++++++++++ tools/perf/include/perf/perf_dlfilter.h | 11 +- tools/perf/tests/dlfilter-test.c | 38 ++- tools/perf/util/dlfilter.c | 29 +++ 6 files changed, 464 insertions(+), 15 deletions(-) create mode 100644 tools/perf/dlfilters/dlfilter-test-api-v2.c diff --git a/tools/perf/Documentation/perf-dlfilter.txt b/tools/perf/Documentation/perf-dlfilter.txt index fb22e3b31dc5..8887cc20a809 100644 --- a/tools/perf/Documentation/perf-dlfilter.txt +++ b/tools/perf/Documentation/perf-dlfilter.txt @@ -64,6 +64,12 @@ internal filtering. If implemented, 'filter_description' should return a one-line description of the filter, and optionally a longer description. +Do not assume the 'sample' argument is valid (dereferenceable) +after 'filter_event' and 'filter_event_early' return. + +Do not assume data referenced by pointers in struct perf_dlfilter_sample +is valid (dereferenceable) after 'filter_event' and 'filter_event_early' return. + The perf_dlfilter_sample structure ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ @@ -150,7 +156,8 @@ struct perf_dlfilter_fns { const char *(*srcline)(void *ctx, __u32 *line_number); struct perf_event_attr *(*attr)(void *ctx); __s32 (*object_code)(void *ctx, __u64 ip, void *buf, __u32 len); - void *(*reserved[120])(void *); + void (*al_cleanup)(void *ctx, struct perf_dlfilter_al *al); + void *(*reserved[119])(void *); }; ---- @@ -161,7 +168,8 @@ struct perf_dlfilter_fns { 'args' returns arguments from --dlarg options. 'resolve_address' provides information about 'address'. al->size must be set -before calling. Returns 0 on success, -1 otherwise. +before calling. Returns 0 on success, -1 otherwise. Call al_cleanup() (if present, +see below) when 'al' data is no longer needed. 'insn' returns instruction bytes and length. @@ -171,6 +179,12 @@ before calling. Returns 0 on success, -1 otherwise. 'object_code' reads object code and returns the number of bytes read. +'al_cleanup' must be called (if present, so check perf_dlfilter_fns.al_cleanup != NULL) +after resolve_address() to free any associated resources. + +Do not assume pointers obtained via perf_dlfilter_fns are valid (dereferenceable) +after 'filter_event' and 'filter_event_early' return. + The perf_dlfilter_al structure ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ @@ -197,9 +211,13 @@ struct perf_dlfilter_al { /* Below members are only populated by resolve_ip() */ __u8 filtered; /* true if this sample event will be filtered out */ const char *comm; + void *priv; /* Private data. Do not change */ }; ---- +Do not assume data referenced by pointers in struct perf_dlfilter_al +is valid (dereferenceable) after 'filter_event' and 'filter_event_early' return. + perf_dlfilter_sample flags ~~~~~~~~~~~~~~~~~~~~~~~~~~ diff --git a/tools/perf/Makefile.perf b/tools/perf/Makefile.perf index c90d55786a02..a5dd1ba0fb5f 100644 --- a/tools/perf/Makefile.perf +++ b/tools/perf/Makefile.perf @@ -377,7 +377,7 @@ ifndef NO_JVMTI PROGRAMS += $(OUTPUT)$(LIBJVMTI) endif -DLFILTERS := dlfilter-test-api-v0.so dlfilter-show-cycles.so +DLFILTERS := dlfilter-test-api-v0.so dlfilter-test-api-v2.so dlfilter-show-cycles.so DLFILTERS := $(patsubst %,$(OUTPUT)dlfilters/%,$(DLFILTERS)) # what 'all' will build and 'install' will install, in perfexecdir diff --git a/tools/perf/dlfilters/dlfilter-test-api-v2.c b/tools/perf/dlfilters/dlfilter-test-api-v2.c new file mode 100644 index 000000000000..38e593d92920 --- /dev/null +++ b/tools/perf/dlfilters/dlfilter-test-api-v2.c @@ -0,0 +1,377 @@ +// SPDX-License-Identifier: GPL-2.0 +/* + * Test v2 API for perf --dlfilter shared object + * Copyright (c) 2023, Intel Corporation. + */ +#include +#include +#include +#include + +/* + * Copy v2 API instead of including current API + */ +#include +#include + +/* + * The following macro can be used to determine if this header defines + * perf_dlfilter_sample machine_pid and vcpu. + */ +#define PERF_DLFILTER_HAS_MACHINE_PID + +/* Definitions for perf_dlfilter_sample flags */ +enum { + PERF_DLFILTER_FLAG_BRANCH = 1ULL << 0, + PERF_DLFILTER_FLAG_CALL = 1ULL << 1, + PERF_DLFILTER_FLAG_RETURN = 1ULL << 2, + PERF_DLFILTER_FLAG_CONDITIONAL = 1ULL << 3, + PERF_DLFILTER_FLAG_SYSCALLRET = 1ULL << 4, + PERF_DLFILTER_FLAG_ASYNC = 1ULL << 5, + PERF_DLFILTER_FLAG_INTERRUPT = 1ULL << 6, + PERF_DLFILTER_FLAG_TX_ABORT = 1ULL << 7, + PERF_DLFILTER_FLAG_TRACE_BEGIN = 1ULL << 8, + PERF_DLFILTER_FLAG_TRACE_END = 1ULL << 9, + PERF_DLFILTER_FLAG_IN_TX = 1ULL << 10, + PERF_DLFILTER_FLAG_VMENTRY = 1ULL << 11, + PERF_DLFILTER_FLAG_VMEXIT = 1ULL << 12, +}; + +/* + * perf sample event information (as per perf script and ) + */ +struct perf_dlfilter_sample { + __u32 size; /* Size of this structure (for compatibility checking) */ + __u16 ins_lat; /* Refer PERF_SAMPLE_WEIGHT_TYPE in */ + __u16 p_stage_cyc; /* Refer PERF_SAMPLE_WEIGHT_TYPE in */ + __u64 ip; + __s32 pid; + __s32 tid; + __u64 time; + __u64 addr; + __u64 id; + __u64 stream_id; + __u64 period; + __u64 weight; /* Refer PERF_SAMPLE_WEIGHT_TYPE in */ + __u64 transaction; /* Refer PERF_SAMPLE_TRANSACTION in */ + __u64 insn_cnt; /* For instructions-per-cycle (IPC) */ + __u64 cyc_cnt; /* For instructions-per-cycle (IPC) */ + __s32 cpu; + __u32 flags; /* Refer PERF_DLFILTER_FLAG_* above */ + __u64 data_src; /* Refer PERF_SAMPLE_DATA_SRC in */ + __u64 phys_addr; /* Refer PERF_SAMPLE_PHYS_ADDR in */ + __u64 data_page_size; /* Refer PERF_SAMPLE_DATA_PAGE_SIZE in */ + __u64 code_page_size; /* Refer PERF_SAMPLE_CODE_PAGE_SIZE in */ + __u64 cgroup; /* Refer PERF_SAMPLE_CGROUP in */ + __u8 cpumode; /* Refer CPUMODE_MASK etc in */ + __u8 addr_correlates_sym; /* True => resolve_addr() can be called */ + __u16 misc; /* Refer perf_event_header in */ + __u32 raw_size; /* Refer PERF_SAMPLE_RAW in */ + const void *raw_data; /* Refer PERF_SAMPLE_RAW in */ + __u64 brstack_nr; /* Number of brstack entries */ + const struct perf_branch_entry *brstack; /* Refer */ + __u64 raw_callchain_nr; /* Number of raw_callchain entries */ + const __u64 *raw_callchain; /* Refer */ + const char *event; + __s32 machine_pid; + __s32 vcpu; +}; + +/* + * Address location (as per perf script) + */ +struct perf_dlfilter_al { + __u32 size; /* Size of this structure (for compatibility checking) */ + __u32 symoff; + const char *sym; + __u64 addr; /* Mapped address (from dso) */ + __u64 sym_start; + __u64 sym_end; + const char *dso; + __u8 sym_binding; /* STB_LOCAL, STB_GLOBAL or STB_WEAK, refer */ + __u8 is_64_bit; /* Only valid if dso is not NULL */ + __u8 is_kernel_ip; /* True if in kernel space */ + __u32 buildid_size; + __u8 *buildid; + /* Below members are only populated by resolve_ip() */ + __u8 filtered; /* True if this sample event will be filtered out */ + const char *comm; + void *priv; /* Private data (v2 API) */ +}; + +struct perf_dlfilter_fns { + /* Return information about ip */ + const struct perf_dlfilter_al *(*resolve_ip)(void *ctx); + /* Return information about addr (if addr_correlates_sym) */ + const struct perf_dlfilter_al *(*resolve_addr)(void *ctx); + /* Return arguments from --dlarg option */ + char **(*args)(void *ctx, int *dlargc); + /* + * Return information about address (al->size must be set before + * calling). Returns 0 on success, -1 otherwise. Call al_cleanup() + * when 'al' data is no longer needed. + */ + __s32 (*resolve_address)(void *ctx, __u64 address, struct perf_dlfilter_al *al); + /* Return instruction bytes and length */ + const __u8 *(*insn)(void *ctx, __u32 *length); + /* Return source file name and line number */ + const char *(*srcline)(void *ctx, __u32 *line_number); + /* Return perf_event_attr, refer */ + struct perf_event_attr *(*attr)(void *ctx); + /* Read object code, return numbers of bytes read */ + __s32 (*object_code)(void *ctx, __u64 ip, void *buf, __u32 len); + /* + * If present (i.e. must check al_cleanup != NULL), call after + * resolve_address() to free any associated resources. (v2 API) + */ + void (*al_cleanup)(void *ctx, struct perf_dlfilter_al *al); + /* Reserved */ + void *(*reserved[119])(void *); +}; + +struct perf_dlfilter_fns perf_dlfilter_fns; + +static int verbose; + +#define pr_debug(fmt, ...) do { \ + if (verbose > 0) \ + fprintf(stderr, fmt, ##__VA_ARGS__); \ + } while (0) + +static int test_fail(const char *msg) +{ + pr_debug("%s\n", msg); + return -1; +} + +#define CHECK(x) do { \ + if (!(x)) \ + return test_fail("Check '" #x "' failed\n"); \ + } while (0) + +struct filter_data { + __u64 ip; + __u64 addr; + int do_early; + int early_filter_cnt; + int filter_cnt; +}; + +static struct filter_data *filt_dat; + +int start(void **data, void *ctx) +{ + int dlargc; + char **dlargv; + struct filter_data *d; + static bool called; + + verbose = 1; + + CHECK(!filt_dat && !called); + called = true; + + d = calloc(1, sizeof(*d)); + if (!d) + test_fail("Failed to allocate memory"); + filt_dat = d; + *data = d; + + dlargv = perf_dlfilter_fns.args(ctx, &dlargc); + + CHECK(dlargc == 6); + CHECK(!strcmp(dlargv[0], "first")); + verbose = strtol(dlargv[1], NULL, 0); + d->ip = strtoull(dlargv[2], NULL, 0); + d->addr = strtoull(dlargv[3], NULL, 0); + d->do_early = strtol(dlargv[4], NULL, 0); + CHECK(!strcmp(dlargv[5], "last")); + + pr_debug("%s API\n", __func__); + + return 0; +} + +#define CHECK_SAMPLE(x) do { \ + if (sample->x != expected.x) \ + return test_fail("'" #x "' not expected value\n"); \ + } while (0) + +static int check_sample(struct filter_data *d, const struct perf_dlfilter_sample *sample) +{ + struct perf_dlfilter_sample expected = { + .ip = d->ip, + .pid = 12345, + .tid = 12346, + .time = 1234567890, + .addr = d->addr, + .id = 99, + .stream_id = 101, + .period = 543212345, + .cpu = 31, + .cpumode = PERF_RECORD_MISC_USER, + .addr_correlates_sym = 1, + .misc = PERF_RECORD_MISC_USER, + }; + + CHECK(sample->size >= sizeof(struct perf_dlfilter_sample)); + + CHECK_SAMPLE(ip); + CHECK_SAMPLE(pid); + CHECK_SAMPLE(tid); + CHECK_SAMPLE(time); + CHECK_SAMPLE(addr); + CHECK_SAMPLE(id); + CHECK_SAMPLE(stream_id); + CHECK_SAMPLE(period); + CHECK_SAMPLE(cpu); + CHECK_SAMPLE(cpumode); + CHECK_SAMPLE(addr_correlates_sym); + CHECK_SAMPLE(misc); + + CHECK(!sample->raw_data); + CHECK_SAMPLE(brstack_nr); + CHECK(!sample->brstack); + CHECK_SAMPLE(raw_callchain_nr); + CHECK(!sample->raw_callchain); + +#define EVENT_NAME "branches:" + CHECK(!strncmp(sample->event, EVENT_NAME, strlen(EVENT_NAME))); + + return 0; +} + +static int check_al(void *ctx) +{ + const struct perf_dlfilter_al *al; + + al = perf_dlfilter_fns.resolve_ip(ctx); + if (!al) + return test_fail("resolve_ip() failed"); + + CHECK(al->sym && !strcmp("foo", al->sym)); + CHECK(!al->symoff); + + return 0; +} + +static int check_addr_al(void *ctx) +{ + const struct perf_dlfilter_al *addr_al; + + addr_al = perf_dlfilter_fns.resolve_addr(ctx); + if (!addr_al) + return test_fail("resolve_addr() failed"); + + CHECK(addr_al->sym && !strcmp("bar", addr_al->sym)); + CHECK(!addr_al->symoff); + + return 0; +} + +static int check_address_al(void *ctx, const struct perf_dlfilter_sample *sample) +{ + struct perf_dlfilter_al address_al; + const struct perf_dlfilter_al *al; + + al = perf_dlfilter_fns.resolve_ip(ctx); + if (!al) + return test_fail("resolve_ip() failed"); + + address_al.size = sizeof(address_al); + if (perf_dlfilter_fns.resolve_address(ctx, sample->ip, &address_al)) + return test_fail("resolve_address() failed"); + + CHECK(address_al.sym && al->sym); + CHECK(!strcmp(address_al.sym, al->sym)); + CHECK(address_al.addr == al->addr); + CHECK(address_al.sym_start == al->sym_start); + CHECK(address_al.sym_end == al->sym_end); + CHECK(address_al.dso && al->dso); + CHECK(!strcmp(address_al.dso, al->dso)); + + /* al_cleanup() is v2 API so may not be present */ + if (perf_dlfilter_fns.al_cleanup) + perf_dlfilter_fns.al_cleanup(ctx, &address_al); + + return 0; +} + +static int check_attr(void *ctx) +{ + struct perf_event_attr *attr = perf_dlfilter_fns.attr(ctx); + + CHECK(attr); + CHECK(attr->type == PERF_TYPE_HARDWARE); + CHECK(attr->config == PERF_COUNT_HW_BRANCH_INSTRUCTIONS); + + return 0; +} + +static int do_checks(void *data, const struct perf_dlfilter_sample *sample, void *ctx, bool early) +{ + struct filter_data *d = data; + + CHECK(data && filt_dat == data); + + if (early) { + CHECK(!d->early_filter_cnt); + d->early_filter_cnt += 1; + } else { + CHECK(!d->filter_cnt); + CHECK(d->early_filter_cnt); + CHECK(d->do_early != 2); + d->filter_cnt += 1; + } + + if (check_sample(data, sample)) + return -1; + + if (check_attr(ctx)) + return -1; + + if (early && !d->do_early) + return 0; + + if (check_al(ctx) || check_addr_al(ctx) || check_address_al(ctx, sample)) + return -1; + + if (early) + return d->do_early == 2; + + return 1; +} + +int filter_event_early(void *data, const struct perf_dlfilter_sample *sample, void *ctx) +{ + pr_debug("%s API\n", __func__); + + return do_checks(data, sample, ctx, true); +} + +int filter_event(void *data, const struct perf_dlfilter_sample *sample, void *ctx) +{ + pr_debug("%s API\n", __func__); + + return do_checks(data, sample, ctx, false); +} + +int stop(void *data, void *ctx) +{ + static bool called; + + pr_debug("%s API\n", __func__); + + CHECK(data && filt_dat == data && !called); + called = true; + + free(data); + filt_dat = NULL; + return 0; +} + +const char *filter_description(const char **long_description) +{ + *long_description = "Filter used by the 'dlfilter C API' perf test"; + return "dlfilter to test v2 C API"; +} diff --git a/tools/perf/include/perf/perf_dlfilter.h b/tools/perf/include/perf/perf_dlfilter.h index a26e2f129f83..16fc4568ac53 100644 --- a/tools/perf/include/perf/perf_dlfilter.h +++ b/tools/perf/include/perf/perf_dlfilter.h @@ -91,6 +91,7 @@ struct perf_dlfilter_al { /* Below members are only populated by resolve_ip() */ __u8 filtered; /* True if this sample event will be filtered out */ const char *comm; + void *priv; /* Private data. Do not change */ }; struct perf_dlfilter_fns { @@ -102,7 +103,8 @@ struct perf_dlfilter_fns { char **(*args)(void *ctx, int *dlargc); /* * Return information about address (al->size must be set before - * calling). Returns 0 on success, -1 otherwise. + * calling). Returns 0 on success, -1 otherwise. Call al_cleanup() + * when 'al' data is no longer needed. */ __s32 (*resolve_address)(void *ctx, __u64 address, struct perf_dlfilter_al *al); /* Return instruction bytes and length */ @@ -113,8 +115,13 @@ struct perf_dlfilter_fns { struct perf_event_attr *(*attr)(void *ctx); /* Read object code, return numbers of bytes read */ __s32 (*object_code)(void *ctx, __u64 ip, void *buf, __u32 len); + /* + * If present (i.e. must check al_cleanup != NULL), call after + * resolve_address() to free any associated resources. + */ + void (*al_cleanup)(void *ctx, struct perf_dlfilter_al *al); /* Reserved */ - void *(*reserved[120])(void *); + void *(*reserved[119])(void *); }; /* diff --git a/tools/perf/tests/dlfilter-test.c b/tools/perf/tests/dlfilter-test.c index 086fd2179e41..da3a9b50b1b1 100644 --- a/tools/perf/tests/dlfilter-test.c +++ b/tools/perf/tests/dlfilter-test.c @@ -1,7 +1,7 @@ // SPDX-License-Identifier: GPL-2.0 /* * Test dlfilter C API. A perf.data file is synthesized and then processed - * by perf script with a dlfilter named dlfilter-test-api-v0.so. Also a C file + * by perf script with dlfilters named dlfilter-test-api-v*.so. Also a C file * is compiled to provide a dso to match the synthesized perf.data file. */ @@ -37,6 +37,8 @@ #define MAP_START 0x400000 +#define DLFILTER_TEST_NAME_MAX 128 + struct test_data { struct perf_tool tool; struct machine *machine; @@ -45,6 +47,8 @@ struct test_data { u64 bar; u64 ip; u64 addr; + char name[DLFILTER_TEST_NAME_MAX]; + char desc[DLFILTER_TEST_NAME_MAX]; char perf[PATH_MAX]; char perf_data_file_name[PATH_MAX]; char c_file_name[PATH_MAX]; @@ -215,7 +219,7 @@ static int write_prog(char *file_name) return err ? -1 : 0; } -static int get_dlfilters_path(char *buf, size_t sz) +static int get_dlfilters_path(const char *name, char *buf, size_t sz) { char perf[PATH_MAX]; char path[PATH_MAX]; @@ -224,12 +228,12 @@ static int get_dlfilters_path(char *buf, size_t sz) perf_exe(perf, sizeof(perf)); perf_path = dirname(perf); - snprintf(path, sizeof(path), "%s/dlfilters/dlfilter-test-api-v0.so", perf_path); + snprintf(path, sizeof(path), "%s/dlfilters/%s", perf_path, name); if (access(path, R_OK)) { exec_path = get_argv_exec_path(); if (!exec_path) return -1; - snprintf(path, sizeof(path), "%s/dlfilters/dlfilter-test-api-v0.so", exec_path); + snprintf(path, sizeof(path), "%s/dlfilters/%s", exec_path, name); free(exec_path); if (access(path, R_OK)) return -1; @@ -244,9 +248,9 @@ static int check_filter_desc(struct test_data *td) char *desc = NULL; int ret; - if (get_filter_desc(td->dlfilters, "dlfilter-test-api-v0.so", &desc, &long_desc) && + if (get_filter_desc(td->dlfilters, td->name, &desc, &long_desc) && long_desc && !strcmp(long_desc, "Filter used by the 'dlfilter C API' perf test") && - desc && !strcmp(desc, "dlfilter to test v0 C API")) + desc && !strcmp(desc, td->desc)) ret = 0; else ret = -1; @@ -284,7 +288,7 @@ static int get_ip_addr(struct test_data *td) static int do_run_perf_script(struct test_data *td, int do_early) { return system_cmd("%s script -i %s " - "--dlfilter %s/dlfilter-test-api-v0.so " + "--dlfilter %s/%s " "--dlarg first " "--dlarg %d " "--dlarg %" PRIu64 " " @@ -292,7 +296,7 @@ static int do_run_perf_script(struct test_data *td, int do_early) "--dlarg %d " "--dlarg last", td->perf, td->perf_data_file_name, td->dlfilters, - verbose, td->ip, td->addr, do_early); + td->name, verbose, td->ip, td->addr, do_early); } static int run_perf_script(struct test_data *td) @@ -321,7 +325,7 @@ static int test__dlfilter_test(struct test_data *td) u64 id = 99; int err; - if (get_dlfilters_path(td->dlfilters, PATH_MAX)) + if (get_dlfilters_path(td->name, td->dlfilters, PATH_MAX)) return test_result("dlfilters not found", TEST_SKIP); if (check_filter_desc(td)) @@ -399,14 +403,18 @@ static void test_data__free(struct test_data *td) } } -static int test__dlfilter(struct test_suite *test __maybe_unused, int subtest __maybe_unused) +static int test__dlfilter_ver(int ver) { struct test_data td = {.fd = -1}; int pid = getpid(); int err; + pr_debug("\n-- Testing version %d API --\n", ver); + perf_exe(td.perf, sizeof(td.perf)); + snprintf(td.name, sizeof(td.name), "dlfilter-test-api-v%d.so", ver); + snprintf(td.desc, sizeof(td.desc), "dlfilter to test v%d C API", ver); snprintf(td.perf_data_file_name, PATH_MAX, "/tmp/dlfilter-test-%u-perf-data", pid); snprintf(td.c_file_name, PATH_MAX, "/tmp/dlfilter-test-%u-prog.c", pid); snprintf(td.prog_file_name, PATH_MAX, "/tmp/dlfilter-test-%u-prog", pid); @@ -416,4 +424,14 @@ static int test__dlfilter(struct test_suite *test __maybe_unused, int subtest __ return err; } +static int test__dlfilter(struct test_suite *test __maybe_unused, int subtest __maybe_unused) +{ + int err = test__dlfilter_ver(0); + + if (err) + return err; + /* No test for version 1 */ + return test__dlfilter_ver(2); +} + DEFINE_SUITE("dlfilter C API", dlfilter); diff --git a/tools/perf/util/dlfilter.c b/tools/perf/util/dlfilter.c index 798a53d7e6c9..e0f822ebb9b9 100644 --- a/tools/perf/util/dlfilter.c +++ b/tools/perf/util/dlfilter.c @@ -10,6 +10,8 @@ #include #include #include +#include +#include #include "debug.h" #include "event.h" @@ -63,6 +65,7 @@ static void al_to_d_al(struct addr_location *al, struct perf_dlfilter_al *d_al) d_al->addr = al->addr; d_al->comm = NULL; d_al->filtered = 0; + d_al->priv = NULL; } static struct addr_location *get_al(struct dlfilter *d) @@ -151,6 +154,11 @@ static char **dlfilter__args(void *ctx, int *dlargc) return d->dlargv; } +static bool has_priv(struct perf_dlfilter_al *d_al_p) +{ + return d_al_p->size >= offsetof(struct perf_dlfilter_al, priv) + sizeof(d_al_p->priv); +} + static __s32 dlfilter__resolve_address(void *ctx, __u64 address, struct perf_dlfilter_al *d_al_p) { struct dlfilter *d = (struct dlfilter *)ctx; @@ -177,9 +185,29 @@ static __s32 dlfilter__resolve_address(void *ctx, __u64 address, struct perf_dlf memcpy(d_al_p, &d_al, min((size_t)sz, sizeof(d_al))); d_al_p->size = sz; + if (has_priv(d_al_p)) + d_al_p->priv = memdup(&al, sizeof(al)); + return 0; } +static void dlfilter__al_cleanup(void *ctx __maybe_unused, struct perf_dlfilter_al *d_al_p) +{ + struct addr_location *al; + + /* Ensure backward compatibility */ + if (!has_priv(d_al_p) || !d_al_p->priv) + return; + + al = d_al_p->priv; + + d_al_p->priv = NULL; + + addr_location__exit(al); + + free(al); +} + static const __u8 *dlfilter__insn(void *ctx, __u32 *len) { struct dlfilter *d = (struct dlfilter *)ctx; @@ -297,6 +325,7 @@ static const struct perf_dlfilter_fns perf_dlfilter_fns = { .resolve_addr = dlfilter__resolve_addr, .args = dlfilter__args, .resolve_address = dlfilter__resolve_address, + .al_cleanup = dlfilter__al_cleanup, .insn = dlfilter__insn, .srcline = dlfilter__srcline, .attr = dlfilter__attr, -- cgit v1.2.3-70-g09d2 From a4b6452af7f481d9a36037dd7c76c2d394992ad5 Mon Sep 17 00:00:00 2001 From: James Clark Date: Fri, 11 Aug 2023 15:39:18 +0100 Subject: perf cs-etm: Don't duplicate FIELD_GET() linux/bitfield.h can be included as long as linux/kernel.h is included first, so change the order of the includes and drop the duplicate macro. Reviewed-by: John Garry Signed-off-by: James Clark Cc: Adrian Hunter Cc: Alexander Shishkin Cc: Andrii Nakryiko Cc: Eduard Zingerman Cc: Ian Rogers Cc: Ingo Molnar Cc: Jing Zhang Cc: Jiri Olsa Cc: Kajol Jain Cc: Kan Liang Cc: Leo Yan Cc: Mark Rutland Cc: Mike Leach Cc: Namhyung Kim Cc: Nick Forrington Cc: Peter Zijlstra Cc: Rob Herring Cc: Sohom Datta Cc: Suzuki Poulouse Cc: Will Deacon Cc: coresight@lists.linaro.org Cc: linux-arm-kernel@lists.infradead.org Link: https://lore.kernel.org/r/20230811144017.491628-2-james.clark@arm.com Signed-off-by: Arnaldo Carvalho de Melo --- tools/perf/util/cs-etm.c | 14 ++------------ 1 file changed, 2 insertions(+), 12 deletions(-) diff --git a/tools/perf/util/cs-etm.c b/tools/perf/util/cs-etm.c index 1419b40dfbe8..9729d006550d 100644 --- a/tools/perf/util/cs-etm.c +++ b/tools/perf/util/cs-etm.c @@ -6,10 +6,11 @@ * Author: Mathieu Poirier */ +#include +#include #include #include #include -#include #include #include #include @@ -281,17 +282,6 @@ static int cs_etm__metadata_set_trace_id(u8 trace_chan_id, u64 *cpu_metadata) return 0; } -/* - * FIELD_GET (linux/bitfield.h) not available outside kernel code, - * and the header contains too many dependencies to just copy over, - * so roll our own based on the original - */ -#define __bf_shf(x) (__builtin_ffsll(x) - 1) -#define FIELD_GET(_mask, _reg) \ - ({ \ - (typeof(_mask))(((_reg) & (_mask)) >> __bf_shf(_mask)); \ - }) - /* * Get a metadata for a specific cpu from an array. * -- cgit v1.2.3-70-g09d2 From ab3744007d51420dd63d5323acbe7abbb843ba63 Mon Sep 17 00:00:00 2001 From: James Clark Date: Fri, 11 Aug 2023 15:39:21 +0100 Subject: perf vendor events arm64: Update scale units and descriptions of common topdown metrics Metrics will be published here [1] going forwards, but they have slightly different scale units. To allow autogenerated metrics to be added more easily, update the scale units to match. The more detailed descriptions have also been taken and added to the common file. [1]: https://gitlab.arm.com/telemetry-solution/telemetry-solution/-/tree/main/data/pmu/cpu/ Reviewed-by: John Garry Signed-off-by: James Clark Acked-by: Ian Rogers Cc: Adrian Hunter Cc: Alexander Shishkin Cc: Andrii Nakryiko Cc: Eduard Zingerman Cc: Ingo Molnar Cc: Jing Zhang Cc: Jiri Olsa Cc: Kajol Jain Cc: Kan Liang Cc: Leo Yan Cc: Mark Rutland Cc: Mike Leach Cc: Namhyung Kim Cc: Nick Forrington Cc: Peter Zijlstra Cc: Rob Herring Cc: Sohom Datta Cc: Suzuki Poulouse Cc: Will Deacon Cc: coresight@lists.linaro.org Cc: linux-arm-kernel@lists.infradead.org Link: https://lore.kernel.org/r/20230811144017.491628-5-james.clark@arm.com Signed-off-by: Arnaldo Carvalho de Melo --- tools/perf/pmu-events/arch/arm64/sbsa.json | 24 ++++++++++++------------ 1 file changed, 12 insertions(+), 12 deletions(-) diff --git a/tools/perf/pmu-events/arch/arm64/sbsa.json b/tools/perf/pmu-events/arch/arm64/sbsa.json index f90b338261ac..4eed79a28f6e 100644 --- a/tools/perf/pmu-events/arch/arm64/sbsa.json +++ b/tools/perf/pmu-events/arch/arm64/sbsa.json @@ -1,34 +1,34 @@ [ { - "MetricExpr": "stall_slot_frontend / (#slots * cpu_cycles)", - "BriefDescription": "Frontend bound L1 topdown metric", + "MetricExpr": "100 * (stall_slot_frontend / (#slots * cpu_cycles))", + "BriefDescription": "This metric is the percentage of total slots that were stalled due to resource constraints in the frontend of the processor.", "DefaultMetricgroupName": "TopdownL1", "MetricGroup": "Default;TopdownL1", "MetricName": "frontend_bound", - "ScaleUnit": "100%" + "ScaleUnit": "1percent of slots" }, { - "MetricExpr": "(1 - op_retired / op_spec) * (1 - stall_slot / (#slots * cpu_cycles))", - "BriefDescription": "Bad speculation L1 topdown metric", + "MetricExpr": "100 * ((1 - op_retired / op_spec) * (1 - stall_slot / (#slots * cpu_cycles)))", + "BriefDescription": "This metric is the percentage of total slots that executed operations and didn't retire due to a pipeline flush.\nThis indicates cycles that were utilized but inefficiently.", "DefaultMetricgroupName": "TopdownL1", "MetricGroup": "Default;TopdownL1", "MetricName": "bad_speculation", - "ScaleUnit": "100%" + "ScaleUnit": "1percent of slots" }, { - "MetricExpr": "(op_retired / op_spec) * (1 - stall_slot / (#slots * cpu_cycles))", - "BriefDescription": "Retiring L1 topdown metric", + "MetricExpr": "100 * ((op_retired / op_spec) * (1 - stall_slot / (#slots * cpu_cycles)))", + "BriefDescription": "This metric is the percentage of total slots that retired operations, which indicates cycles that were utilized efficiently.", "DefaultMetricgroupName": "TopdownL1", "MetricGroup": "Default;TopdownL1", "MetricName": "retiring", - "ScaleUnit": "100%" + "ScaleUnit": "1percent of slots" }, { - "MetricExpr": "stall_slot_backend / (#slots * cpu_cycles)", - "BriefDescription": "Backend Bound L1 topdown metric", + "MetricExpr": "100 * (stall_slot_backend / (#slots * cpu_cycles))", + "BriefDescription": "This metric is the percentage of total slots that were stalled due to resource constraints in the backend of the processor.", "DefaultMetricgroupName": "TopdownL1", "MetricGroup": "Default;TopdownL1", "MetricName": "backend_bound", - "ScaleUnit": "100%" + "ScaleUnit": "1percent of slots" } ] -- cgit v1.2.3-70-g09d2 From a1ef3aaf6ada374818ebcf978c175e65a4cd60ab Mon Sep 17 00:00:00 2001 From: Changbin Du Date: Tue, 18 Jul 2023 16:52:42 +0800 Subject: perf docs: Fix format of unordered lists Fix the format of unordered lists so the can wrap properly. Signed-off-by: Changbin Du Acked-by: Adrian Hunter Cc: Alexander Shishkin Cc: Ian Rogers Cc: Ingo Molnar Cc: Jiri Olsa Cc: Mark Rutland Cc: Namhyung Kim Cc: Peter Zijlstra Link: https://lore.kernel.org/r/20230718085242.3090797-1-changbin.du@huawei.com Signed-off-by: Arnaldo Carvalho de Melo --- tools/perf/Documentation/perf-ftrace.txt | 16 ++++--- tools/perf/Documentation/perf-record.txt | 73 +++++++++++++++++--------------- 2 files changed, 48 insertions(+), 41 deletions(-) diff --git a/tools/perf/Documentation/perf-ftrace.txt b/tools/perf/Documentation/perf-ftrace.txt index df4595563801..d780b93fcf87 100644 --- a/tools/perf/Documentation/perf-ftrace.txt +++ b/tools/perf/Documentation/perf-ftrace.txt @@ -96,8 +96,9 @@ OPTIONS for 'perf ftrace trace' --func-opts:: List of options allowed to set: - call-graph - Display kernel stack trace for function tracer. - irq-info - Display irq context info for function tracer. + + - call-graph - Display kernel stack trace for function tracer. + - irq-info - Display irq context info for function tracer. -G:: --graph-funcs=:: @@ -118,11 +119,12 @@ OPTIONS for 'perf ftrace trace' --graph-opts:: List of options allowed to set: - nosleep-time - Measure on-CPU time only for function_graph tracer. - noirqs - Ignore functions that happen inside interrupt. - verbose - Show process names, PIDs, timestamps, etc. - thresh= - Setup trace duration threshold in microseconds. - depth= - Set max depth for function graph tracer to follow. + + - nosleep-time - Measure on-CPU time only for function_graph tracer. + - noirqs - Ignore functions that happen inside interrupt. + - verbose - Show process names, PIDs, timestamps, etc. + - thresh= - Setup trace duration threshold in microseconds. + - depth= - Set max depth for function graph tracer to follow. OPTIONS for 'perf ftrace latency' diff --git a/tools/perf/Documentation/perf-record.txt b/tools/perf/Documentation/perf-record.txt index 7d362407fb39..d5217be012d7 100644 --- a/tools/perf/Documentation/perf-record.txt +++ b/tools/perf/Documentation/perf-record.txt @@ -509,9 +509,10 @@ CLOCK_BOOTTIME, CLOCK_REALTIME and CLOCK_TAI. Select AUX area tracing Snapshot Mode. This option is valid only with an AUX area tracing event. Optionally, certain snapshot capturing parameters can be specified in a string that follows this option: - 'e': take one last snapshot on exit; guarantees that there is at least one + + - 'e': take one last snapshot on exit; guarantees that there is at least one snapshot in the output file; - : if the PMU supports this, specify the desired snapshot size. + - : if the PMU supports this, specify the desired snapshot size. In Snapshot Mode trace data is captured only when signal SIGUSR2 is received and on exit if the above 'e' option is given. @@ -550,8 +551,9 @@ providing implementation for Posix AIO API. --affinity=mode:: Set affinity mask of trace reading thread according to the policy defined by 'mode' value: - node - thread affinity mask is set to NUMA node cpu mask of the processed mmap buffer - cpu - thread affinity mask is set to cpu of the processed mmap buffer + + - node - thread affinity mask is set to NUMA node cpu mask of the processed mmap buffer + - cpu - thread affinity mask is set to cpu of the processed mmap buffer --mmap-flush=number:: @@ -603,16 +605,17 @@ Record timestamp boundary (time of first/last samples). --switch-output[=mode]:: Generate multiple perf.data files, timestamp prefixed, switching to a new one based on 'mode' value: - "signal" - when receiving a SIGUSR2 (default value) or - - when reaching the size threshold, size is expected to - be a number with appended unit character - B/K/M/G -