From d4953f7ef1a2e87ef732823af35361404d13fea8 Mon Sep 17 00:00:00 2001 From: Ian Rogers Date: Sat, 14 Mar 2020 10:03:56 -0700 Subject: perf parse-events: Fix 3 use after frees found with clang ASAN Reproducible with a clang asan build and then running perf test in particular 'Parse event definition strings'. Signed-off-by: Ian Rogers Acked-by: Jiri Olsa Cc: Adrian Hunter Cc: Alexander Shishkin Cc: Andi Kleen Cc: Leo Yan Cc: Mark Rutland Cc: Namhyung Kim Cc: Peter Zijlstra Cc: Stephane Eranian Cc: clang-built-linux@googlegroups.com Link: http://lore.kernel.org/lkml/20200314170356.62914-1-irogers@google.com Signed-off-by: Arnaldo Carvalho de Melo --- tools/perf/util/evsel.c | 1 + tools/perf/util/parse-events.c | 6 +++--- 2 files changed, 4 insertions(+), 3 deletions(-) (limited to 'tools/perf/util') diff --git a/tools/perf/util/evsel.c b/tools/perf/util/evsel.c index 816d930d774e..15ccd193483f 100644 --- a/tools/perf/util/evsel.c +++ b/tools/perf/util/evsel.c @@ -1287,6 +1287,7 @@ void perf_evsel__exit(struct evsel *evsel) perf_thread_map__put(evsel->core.threads); zfree(&evsel->group_name); zfree(&evsel->name); + zfree(&evsel->pmu_name); perf_evsel__object.fini(evsel); } diff --git a/tools/perf/util/parse-events.c b/tools/perf/util/parse-events.c index a7dc0b096974..10107747b361 100644 --- a/tools/perf/util/parse-events.c +++ b/tools/perf/util/parse-events.c @@ -1449,7 +1449,7 @@ int parse_events_add_pmu(struct parse_events_state *parse_state, evsel = __add_event(list, &parse_state->idx, &attr, NULL, pmu, NULL, auto_merge_stats, NULL); if (evsel) { - evsel->pmu_name = name; + evsel->pmu_name = name ? strdup(name) : NULL; evsel->use_uncore_alias = use_uncore_alias; return 0; } else { @@ -1497,7 +1497,7 @@ int parse_events_add_pmu(struct parse_events_state *parse_state, evsel->snapshot = info.snapshot; evsel->metric_expr = info.metric_expr; evsel->metric_name = info.metric_name; - evsel->pmu_name = name; + evsel->pmu_name = name ? strdup(name) : NULL; evsel->use_uncore_alias = use_uncore_alias; evsel->percore = config_term_percore(&evsel->config_terms); } @@ -1547,7 +1547,7 @@ int parse_events_multi_pmu_add(struct parse_events_state *parse_state, if (!parse_events_add_pmu(parse_state, list, pmu->name, head, true, true)) { - pr_debug("%s -> %s/%s/\n", config, + pr_debug("%s -> %s/%s/\n", str, pmu->name, alias->str); ok++; } -- cgit v1.2.3-70-g09d2 From 7eec00a74720d35b6d89505350e5b08ecef376c0 Mon Sep 17 00:00:00 2001 From: Leo Yan Date: Fri, 6 Mar 2020 09:57:58 +0800 Subject: perf symbols: Consolidate symbol fixup issue After copying Arm64's perf archive with object files and perf.data file to x86 laptop, the x86's perf kernel symbol resolution fails. It outputs 'unknown' for all symbols parsing. This issue is root caused by the function elf__needs_adjust_symbols(), x86 perf tool uses one weak version, Arm64 (and powerpc) has rewritten their own version. elf__needs_adjust_symbols() decides if need to parse symbols with the relative offset address; but x86 building uses the weak function which misses to check for the elf type 'ET_DYN', so that it cannot parse symbols in Arm DSOs due to the wrong result from elf__needs_adjust_symbols(). The DSO parsing should not depend on any specific architecture perf building; e.g. x86 perf tool can parse Arm and Arm64 DSOs, vice versa. And confirmed by Naveen N. Rao that powerpc64 kernels are not being built as ET_DYN anymore and change to ET_EXEC. This patch removes the arch specific functions for Arm64 and powerpc and changes elf__needs_adjust_symbols() as a common function. In the common elf__needs_adjust_symbols(), it checks an extra condition 'ET_DYN' for elf header type. With this fixing, the Arm64 DSO can be parsed properly with x86's perf tool. Before: # perf script main 3258 1 branches: 0 [unknown] ([unknown]) => ffff800010c4665c [unknown] ([kernel.kallsyms]) main 3258 1 branches: ffff800010c46670 [unknown] ([kernel.kallsyms]) => ffff800010c4eaec [unknown] ([kernel.kallsyms]) main 3258 1 branches: ffff800010c4eaec [unknown] ([kernel.kallsyms]) => ffff800010c4eb00 [unknown] ([kernel.kallsyms]) main 3258 1 branches: ffff800010c4eb08 [unknown] ([kernel.kallsyms]) => ffff800010c4e780 [unknown] ([kernel.kallsyms]) main 3258 1 branches: ffff800010c4e7a0 [unknown] ([kernel.kallsyms]) => ffff800010c4eeac [unknown] ([kernel.kallsyms]) main 3258 1 branches: ffff800010c4eebc [unknown] ([kernel.kallsyms]) => ffff800010c4ed80 [unknown] ([kernel.kallsyms]) After: # perf script main 3258 1 branches: 0 [unknown] ([unknown]) => ffff800010c4665c coresight_timeout+0x54 ([kernel.kallsyms]) main 3258 1 branches: ffff800010c46670 coresight_timeout+0x68 ([kernel.kallsyms]) => ffff800010c4eaec etm4_enable_hw+0x3cc ([kernel.kallsyms]) main 3258 1 branches: ffff800010c4eaec etm4_enable_hw+0x3cc ([kernel.kallsyms]) => ffff800010c4eb00 etm4_enable_hw+0x3e0 ([kernel.kallsyms]) main 3258 1 branches: ffff800010c4eb08 etm4_enable_hw+0x3e8 ([kernel.kallsyms]) => ffff800010c4e780 etm4_enable_hw+0x60 ([kernel.kallsyms]) main 3258 1 branches: ffff800010c4e7a0 etm4_enable_hw+0x80 ([kernel.kallsyms]) => ffff800010c4eeac etm4_enable+0x2d4 ([kernel.kallsyms]) main 3258 1 branches: ffff800010c4eebc etm4_enable+0x2e4 ([kernel.kallsyms]) => ffff800010c4ed80 etm4_enable+0x1a8 ([kernel.kallsyms]) v3: Changed to check for ET_DYN across all architectures. v2: Fixed Arm64 and powerpc native building. Reported-by: Mike Leach Signed-off-by: Leo Yan Reviewed-by: Naveen N. Rao Acked-by: Jiri Olsa Cc: Alexander Shishkin Cc: Allison Randal Cc: Enrico Weigelt Cc: Greg Kroah-Hartman Cc: Hendrik Brueckner Cc: John Garry Cc: Kate Stewart Cc: Mark Rutland Cc: Mathieu Poirier Cc: Namhyung Kim Cc: Peter Zijlstra Cc: Thomas Gleixner Cc: Thomas Richter Link: http://lore.kernel.org/lkml/20200306015759.10084-1-leo.yan@linaro.org Signed-off-by: Arnaldo Carvalho de Melo --- tools/perf/arch/arm64/util/Build | 1 - tools/perf/arch/arm64/util/sym-handling.c | 19 ------------------- tools/perf/arch/powerpc/util/Build | 1 - tools/perf/arch/powerpc/util/sym-handling.c | 10 ---------- tools/perf/util/symbol-elf.c | 10 ++++++++-- 5 files changed, 8 insertions(+), 33 deletions(-) delete mode 100644 tools/perf/arch/arm64/util/sym-handling.c (limited to 'tools/perf/util') diff --git a/tools/perf/arch/arm64/util/Build b/tools/perf/arch/arm64/util/Build index 0a7782c61209..789956f76d85 100644 --- a/tools/perf/arch/arm64/util/Build +++ b/tools/perf/arch/arm64/util/Build @@ -1,6 +1,5 @@ perf-y += header.o perf-y += perf_regs.o -perf-y += sym-handling.o perf-$(CONFIG_DWARF) += dwarf-regs.o perf-$(CONFIG_LOCAL_LIBUNWIND) += unwind-libunwind.o perf-$(CONFIG_LIBDW_DWARF_UNWIND) += unwind-libdw.o diff --git a/tools/perf/arch/arm64/util/sym-handling.c b/tools/perf/arch/arm64/util/sym-handling.c deleted file mode 100644 index 8dfa3e5229f1..000000000000 --- a/tools/perf/arch/arm64/util/sym-handling.c +++ /dev/null @@ -1,19 +0,0 @@ -// SPDX-License-Identifier: GPL-2.0-only -/* - * - * Copyright (C) 2015 Naveen N. Rao, IBM Corporation - */ - -#include "symbol.h" // for the elf__needs_adjust_symbols() prototype -#include - -#ifdef HAVE_LIBELF_SUPPORT -#include - -bool elf__needs_adjust_symbols(GElf_Ehdr ehdr) -{ - return ehdr.e_type == ET_EXEC || - ehdr.e_type == ET_REL || - ehdr.e_type == ET_DYN; -} -#endif diff --git a/tools/perf/arch/powerpc/util/Build b/tools/perf/arch/powerpc/util/Build index 7cf0b8803097..e5c9504f8586 100644 --- a/tools/perf/arch/powerpc/util/Build +++ b/tools/perf/arch/powerpc/util/Build @@ -1,5 +1,4 @@ perf-y += header.o -perf-y += sym-handling.o perf-y += kvm-stat.o perf-y += perf_regs.o perf-y += mem-events.o diff --git a/tools/perf/arch/powerpc/util/sym-handling.c b/tools/perf/arch/powerpc/util/sym-handling.c index abb7a12d8f93..0856b32f9e08 100644 --- a/tools/perf/arch/powerpc/util/sym-handling.c +++ b/tools/perf/arch/powerpc/util/sym-handling.c @@ -10,16 +10,6 @@ #include "probe-event.h" #include "probe-file.h" -#ifdef HAVE_LIBELF_SUPPORT -bool elf__needs_adjust_symbols(GElf_Ehdr ehdr) -{ - return ehdr.e_type == ET_EXEC || - ehdr.e_type == ET_REL || - ehdr.e_type == ET_DYN; -} - -#endif - int arch__choose_best_symbol(struct symbol *syma, struct symbol *symb __maybe_unused) { diff --git a/tools/perf/util/symbol-elf.c b/tools/perf/util/symbol-elf.c index 1965aefccb02..be5b493f8284 100644 --- a/tools/perf/util/symbol-elf.c +++ b/tools/perf/util/symbol-elf.c @@ -704,9 +704,15 @@ void symsrc__destroy(struct symsrc *ss) close(ss->fd); } -bool __weak elf__needs_adjust_symbols(GElf_Ehdr ehdr) +bool elf__needs_adjust_symbols(GElf_Ehdr ehdr) { - return ehdr.e_type == ET_EXEC || ehdr.e_type == ET_REL; + /* + * Usually vmlinux is an ELF file with type ET_EXEC for most + * architectures; except Arm64 kernel is linked with option + * '-share', so need to check type ET_DYN. + */ + return ehdr.e_type == ET_EXEC || ehdr.e_type == ET_REL || + ehdr.e_type == ET_DYN; } int symsrc__init(struct symsrc *ss, struct dso *dso, const char *name, -- cgit v1.2.3-70-g09d2 From 443bc639e518c6c4e8fc2e0456cccf3a04f6e77f Mon Sep 17 00:00:00 2001 From: Jin Yao Date: Thu, 27 Feb 2020 12:39:37 +0800 Subject: perf report: Print al_addr when symbol is not found For branch mode, if the symbol is not found, it prints the address. For example, 0x0000555eee0365a0 in below output. Overhead Command Source Shared Object Source Symbol Target Symbol 17.55% div libc-2.27.so [.] __random [.] __random 6.11% div div [.] 0x0000555eee0365a0 [.] rand 6.10% div libc-2.27.so [.] rand [.] 0x0000555eee036769 5.80% div libc-2.27.so [.] __random_r [.] __random 5.72% div libc-2.27.so [.] __random [.] __random_r 5.62% div libc-2.27.so [.] __random_r [.] __random_r 5.38% div libc-2.27.so [.] __random [.] rand 4.56% div libc-2.27.so [.] __random [.] __random 4.49% div div [.] 0x0000555eee036779 [.] 0x0000555eee0365ff 4.25% div div [.] 0x0000555eee0365fa [.] 0x0000555eee036760 But it's not very easy to understand what the instructions are in the binary. So this patch uses the al_addr instead. With this patch, the output is Overhead Command Source Shared Object Source Symbol Target Symbol 17.55% div libc-2.27.so [.] __random [.] __random 6.11% div div [.] 0x00000000000005a0 [.] rand 6.10% div libc-2.27.so [.] rand [.] 0x0000000000000769 5.80% div libc-2.27.so [.] __random_r [.] __random 5.72% div libc-2.27.so [.] __random [.] __random_r 5.62% div libc-2.27.so [.] __random_r [.] __random_r 5.38% div libc-2.27.so [.] __random [.] rand 4.56% div libc-2.27.so [.] __random [.] __random 4.49% div div [.] 0x0000000000000779 [.] 0x00000000000005ff 4.25% div div [.] 0x00000000000005fa [.] 0x0000000000000760 Now we can use objdump to dump the object starting from 0x5a0. For example, objdump -d --start-address 0x5a0 div 00000000000005a0 : 5a0: ff 25 2a 0a 20 00 jmpq *0x200a2a(%rip) # 200fd0 <__cxa_finalize@plt+0x200a20> 5a6: 68 02 00 00 00 pushq $0x2 5ab: e9 c0 ff ff ff jmpq 570 ... Committer testing: [root@seventh ~]# perf record -a -b sleep 1 [root@seventh ~]# perf report --header-only | grep cpudesc # cpudesc : Intel(R) Core(TM) i5-7500 CPU @ 3.40GHz [root@seventh ~]# perf evlist -v cycles: size: 120, { sample_period, sample_freq }: 4000, sample_type: IP|TID|TIME|CPU|PERIOD|BRANCH_STACK, read_format: ID, disabled: 1, inherit: 1, mmap: 1, comm: 1, freq: 1, task: 1, precise_ip: 3, sample_id_all: 1, exclude_guest: 1, mmap2: 1, comm_exec: 1, ksymbol: 1, bpf_event: 1, branch_sample_type: ANY [root@seventh ~]# Before: [root@seventh ~]# perf report --stdio --dso libsystemd-shared-241.so | head -20 # To display the perf.data header info, please use --header/--header-only options. # # # Total Lost Samples: 0 # # Samples: 2K of event 'cycles' # Event count (approx.): 2240 # # Overhead Command Source Shared Object Source Symbol Target Symbol Basic Block Cycles # ........ ............... ........................ ...................... ...................... .................. # 0.13% systemd-journal libc-2.29.so [.] cfree@GLIBC_2.2.5 [.] _int_free 1 0.09% systemd libsystemd-shared-241.so [.] 0x00007fe406465c82 [.] 0x00007fe406465d80 1 0.09% systemd libsystemd-shared-241.so [.] 0x00007fe406465ded [.] 0x00007fe406465c30 1 0.09% systemd libsystemd-shared-241.so [.] 0x00007fe406465e4e [.] 0x00007fe406465de0 1 0.09% systemd-journal systemd-journald [.] free@plt [.] cfree@GLIBC_2.2.5 1 0.09% systemd-journal libc-2.29.so [.] _int_free [.] _int_free 18 0.09% systemd-journal libc-2.29.so [.] _int_free [.] _int_free 2 0.04% systemd libsystemd-shared-241.so [.] bus_resolve@plt [.] bus_resolve 204 0.04% systemd libsystemd-shared-241.so [.] getpid_cached@plt [.] getpid_cached 7 [root@seventh ~]# After: [root@seventh ~]# perf report --stdio --dso libsystemd-shared-241.so | head -20 # To display the perf.data header info, please use --header/--header-only options. # # # Total Lost Samples: 0 # # Samples: 2K of event 'cycles' # Event count (approx.): 2240 # # Overhead Command Source Shared Object Source Symbol Target Symbol Basic Block Cycles # ........ ............... ........................ ...................... ...................... .................. # 0.13% systemd-journal libc-2.29.so [.] cfree@GLIBC_2.2.5 [.] _int_free 1 0.09% systemd libsystemd-shared-241.so [.] 0x00000000000f7c82 [.] 0x00000000000f7d80 1 0.09% systemd libsystemd-shared-241.so [.] 0x00000000000f7ded [.] 0x00000000000f7c30 1 0.09% systemd libsystemd-shared-241.so [.] 0x00000000000f7e4e [.] 0x00000000000f7de0 1 0.09% systemd-journal systemd-journald [.] free@plt [.] cfree@GLIBC_2.2.5 1 0.09% systemd-journal libc-2.29.so [.] _int_free [.] _int_free 18 0.09% systemd-journal libc-2.29.so [.] _int_free [.] _int_free 2 0.04% systemd libsystemd-shared-241.so [.] bus_resolve@plt [.] bus_resolve 204 0.04% systemd libsystemd-shared-241.so [.] getpid_cached@plt [.] getpid_cached 7 [root@seventh ~]# Lets use -v to get full paths and then try objdump on the unresolved address: [root@seventh ~]# perf report -v --stdio --dso libsystemd-shared-241.so |& grep libsystemd-shared-241.so | tail -1 0.04% systemd-journal /usr/lib/systemd/libsystemd-shared-241.so 0x80c1a B [.] 0x0000000000080c1a 0x80a95 B [.] 0x0000000000080a95 61 [root@seventh ~]# [root@seventh ~]# objdump -d --start-address 0x00000000000f7d80 /usr/lib/systemd/libsystemd-shared-241.so | head -20 /usr/lib/systemd/libsystemd-shared-241.so: file format elf64-x86-64 Disassembly of section .text: 00000000000f7d80 : f7d80: 41 39 11 cmp %edx,(%r9) f7d83: 0f 84 ff fe ff ff je f7c88 f7d89: 4c 8d 05 97 09 0c 00 lea 0xc0997(%rip),%r8 # 1b8727 f7d90: b9 49 00 00 00 mov $0x49,%ecx f7d95: 48 8d 15 c9 f5 0b 00 lea 0xbf5c9(%rip),%rdx # 1b7365 f7d9c: 31 ff xor %edi,%edi f7d9e: 48 8d 35 9b ff 0b 00 lea 0xbff9b(%rip),%rsi # 1b7d40 f7da5: e8 a6 d6 f4 ff callq 45450 f7daa: 66 0f 1f 44 00 00 nopw 0x0(%rax,%rax,1) f7db0: 41 56 push %r14 f7db2: 41 55 push %r13 f7db4: 41 54 push %r12 f7db6: 55 push %rbp [root@seventh ~]# If we tried the the reported address before this patch: [root@seventh ~]# objdump -d --start-address 0x00007fe406465d80 /usr/lib/systemd/libsystemd-shared-241.so | head -20 /usr/lib/systemd/libsystemd-shared-241.so: file format elf64-x86-64 [root@seventh ~]# Signed-off-by: Jin Yao Tested-by: Arnaldo Carvalho de Melo Tested-by: Ravi Bangoria Acked-by: Jiri Olsa Cc: Alexander Shishkin Cc: Andi Kleen Cc: Kan Liang Cc: Peter Zijlstra Link: http://lore.kernel.org/lkml/20200227043939.4403-2-yao.jin@linux.intel.com Signed-off-by: Arnaldo Carvalho de Melo --- tools/perf/util/sort.c | 6 ++++-- 1 file changed, 4 insertions(+), 2 deletions(-) (limited to 'tools/perf/util') diff --git a/tools/perf/util/sort.c b/tools/perf/util/sort.c index ab0cfd790ad0..e860595576c2 100644 --- a/tools/perf/util/sort.c +++ b/tools/perf/util/sort.c @@ -869,7 +869,8 @@ static int hist_entry__sym_from_snprintf(struct hist_entry *he, char *bf, if (he->branch_info) { struct addr_map_symbol *from = &he->branch_info->from; - return _hist_entry__sym_snprintf(&from->ms, from->addr, he->level, bf, size, width); + return _hist_entry__sym_snprintf(&from->ms, from->al_addr, + he->level, bf, size, width); } return repsep_snprintf(bf, size, "%-*.*s", width, width, "N/A"); @@ -881,7 +882,8 @@ static int hist_entry__sym_to_snprintf(struct hist_entry *he, char *bf, if (he->branch_info) { struct addr_map_symbol *to = &he->branch_info->to; - return _hist_entry__sym_snprintf(&to->ms, to->addr, he->level, bf, size, width); + return _hist_entry__sym_snprintf(&to->ms, to->al_addr, + he->level, bf, size, width); } return repsep_snprintf(bf, size, "%-*.*s", width, width, "N/A"); -- cgit v1.2.3-70-g09d2 From 7b0a0dcb64705d1dbed46d33c9810251667469b9 Mon Sep 17 00:00:00 2001 From: Jin Yao Date: Thu, 27 Feb 2020 12:39:38 +0800 Subject: perf report: Support interactive annotation of code without symbols MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit For perf report on stripped binaries it is currently impossible to do annotation. The annotation state is all tied to symbols, but there are either no symbols, or symbols are not covering all the code. We should support the annotation functionality even without symbols. This patch fakes a symbol and the symbol name is the string of address. After that, we just follow current annotation working flow. For example, 1. perf report Overhead Command Shared Object Symbol 20.67% div libc-2.27.so [.] __random_r 17.29% div libc-2.27.so [.] __random 10.59% div div [.] 0x0000000000000628 9.25% div div [.] 0x0000000000000612 6.11% div div [.] 0x0000000000000645 2. Select the line of "10.59% div div [.] 0x0000000000000628" and ENTER. Annotate 0x0000000000000628 Zoom into div thread Zoom into div DSO (use the 'k' hotkey to zoom directly into the kernel) Browse map details Run scripts for samples of symbol [0x0000000000000628] Run scripts for all samples Switch to another data file in PWD Exit 3. Select the "Annotate 0x0000000000000628" and ENTER. Percent│ │ │ │ Disassembly of section .text: │ │ 0000000000000628 <.text+0x68>: │ divsd %xmm4,%xmm0 │ divsd %xmm3,%xmm1 │ movsd (%rsp),%xmm2 │ addsd %xmm1,%xmm0 │ addsd %xmm2,%xmm0 │ movsd %xmm0,(%rsp) Now we can see the dump of object starting from 0x628. v5: --- Remove the hotkey 'a' implementation from this patch. It will be moved to a separate patch. v4: --- 1. Support the hotkey 'a'. When we press 'a' on address, now it supports the annotation. 2. Change the patch title from "Support interactive annotation of code without symbols" to "perf report: Support interactive annotation of code without symbols" v3: --- Keep just the ANNOTATION_DUMMY_LEN, and remove the opts->annotate_dummy_len since it's the "maybe in future we will provide" feature. v2: --- Fix a crash issue when annotating an address in "unknown" object. The steps to reproduce this issue: perf record -e cycles:u ls perf report 75.29% ls ld-2.27.so [.] do_lookup_x 23.64% ls ld-2.27.so [.] __GI___tunables_init 1.04% ls [unknown] [k] 0xffffffff85c01210 0.03% ls ld-2.27.so [.] _start When annotating 0xffffffff85c01210, the crash happens. v2 adds checking for ms->map in add_annotate_opt(). If the object is "unknown", ms->map is NULL. Committer notes: Renamed new_annotate_sym() to symbol__new_unresolved(). Use PRIx64 to fix this issue in some 32-bit arches: ui/browsers/hists.c: In function 'symbol__new_unresolved': ui/browsers/hists.c:2474:38: error: format '%lx' expects argument of type 'long unsigned int', but argument 5 has type 'u64' {aka 'long long unsigned int'} [-Werror=format=] snprintf(name, sizeof(name), "%-#.*lx", BITS_PER_LONG / 4, addr); ~~~~~~^ ~~~~ %-#.*llx Signed-off-by: Jin Yao Tested-by: Arnaldo Carvalho de Melo Tested-by: Ravi Bangoria Acked-by: Jiri Olsa Cc: Alexander Shishkin Cc: Andi Kleen Cc: Kan Liang Cc: Peter Zijlstra Link: http://lore.kernel.org/lkml/20200227043939.4403-3-yao.jin@linux.intel.com Signed-off-by: Arnaldo Carvalho de Melo --- tools/perf/ui/browsers/hists.c | 43 ++++++++++++++++++++++++++++++++++++------ tools/perf/util/annotate.h | 1 + 2 files changed, 38 insertions(+), 6 deletions(-) (limited to 'tools/perf/util') diff --git a/tools/perf/ui/browsers/hists.c b/tools/perf/ui/browsers/hists.c index f36dee499320..d0f9745856fd 100644 --- a/tools/perf/ui/browsers/hists.c +++ b/tools/perf/ui/browsers/hists.c @@ -2465,13 +2465,41 @@ do_annotate(struct hist_browser *browser, struct popup_action *act) return 0; } +static struct symbol *symbol__new_unresolved(u64 addr, struct map *map) +{ + struct annotated_source *src; + struct symbol *sym; + char name[64]; + + snprintf(name, sizeof(name), "%.*" PRIx64, BITS_PER_LONG / 4, addr); + + sym = symbol__new(addr, ANNOTATION_DUMMY_LEN, 0, 0, name); + if (sym) { + src = symbol__hists(sym, 1); + if (!src) { + symbol__delete(sym); + return NULL; + } + + dso__insert_symbol(map->dso, sym); + } + + return sym; +} + static int add_annotate_opt(struct hist_browser *browser __maybe_unused, struct popup_action *act, char **optstr, - struct map_symbol *ms) + struct map_symbol *ms, + u64 addr) { - if (ms->sym == NULL || ms->map->dso->annotate_warned || - symbol__annotation(ms->sym)->src == NULL) + if (!ms->map || !ms->map->dso || ms->map->dso->annotate_warned) + return 0; + + if (!ms->sym) + ms->sym = symbol__new_unresolved(addr, ms->map); + + if (ms->sym == NULL || symbol__annotation(ms->sym)->src == NULL) return 0; if (asprintf(optstr, "Annotate %s", ms->sym->name) < 0) @@ -3219,17 +3247,20 @@ do_hotkey: // key came straight from options ui__popup_menu() nr_options += add_annotate_opt(browser, &actions[nr_options], &options[nr_options], - &bi->from.ms); + &bi->from.ms, + bi->from.al_addr); if (bi->to.ms.sym != bi->from.ms.sym) nr_options += add_annotate_opt(browser, &actions[nr_options], &options[nr_options], - &bi->to.ms); + &bi->to.ms, + bi->to.al_addr); } else { nr_options += add_annotate_opt(browser, &actions[nr_options], &options[nr_options], - browser->selection); + browser->selection, + browser->he_selection->ip); } skip_annotation: nr_options += add_thread_opt(browser, &actions[nr_options], diff --git a/tools/perf/util/annotate.h b/tools/perf/util/annotate.h index 07c775938d46..2d88069d6428 100644 --- a/tools/perf/util/annotate.h +++ b/tools/perf/util/annotate.h @@ -74,6 +74,7 @@ bool ins__is_fused(struct arch *arch, const char *ins1, const char *ins2); #define ANNOTATION__CYCLES_WIDTH 6 #define ANNOTATION__MINMAX_CYCLES_WIDTH 19 #define ANNOTATION__AVG_IPC_WIDTH 36 +#define ANNOTATION_DUMMY_LEN 256 struct annotation_options { bool hide_src_code, -- cgit v1.2.3-70-g09d2 From 429a5f9d89fc52256b2a59cd1277afbfafb739b3 Mon Sep 17 00:00:00 2001 From: Jin Yao Date: Thu, 20 Feb 2020 09:36:14 +0800 Subject: perf report: Allow specifying event to be used as sort key in --group output When performing "perf report --group", it shows the event group information together. By default, the output is sorted by the first event in group. It would be nice for user to select any event for sorting. This patch introduces a new option "--group-sort-idx" to sort the output by the event at the index n in event group. For example, Before: # perf report --group --stdio # To display the perf.data header info, please use --header/--header-only options. # # # Total Lost Samples: 0 # # Samples: 12K of events 'cpu/instructions,period=2000003/, cpu/cpu-cycles,period=200003/, BR_MISP_RETIRED.ALL_BRANCHES:pp, cpu/event=0xc0,umask=1,cmask=1, # Event count (approx.): 6451235635 # # Overhead Command Shared Object Symbol # ................................ ......... ....................... ................................... # 92.19% 98.68% 0.00% 93.30% mgen mgen [.] LOOP1 3.12% 0.29% 0.00% 0.16% gsd-color libglib-2.0.so.0.5600.4 [.] 0x0000000000049515 1.56% 0.03% 0.00% 0.04% gsd-color libglib-2.0.so.0.5600.4 [.] 0x00000000000494b7 1.56% 0.01% 0.00% 0.00% gsd-color libglib-2.0.so.0.5600.4 [.] 0x00000000000494ce 1.56% 0.00% 0.00% 0.00% mgen [kernel.kallsyms] [k] task_tick_fair 0.00% 0.15% 0.00% 0.04% perf [kernel.kallsyms] [k] smp_call_function_single 0.00% 0.13% 0.00% 6.08% swapper [kernel.kallsyms] [k] intel_idle 0.00% 0.03% 0.00% 0.00% gsd-color libglib-2.0.so.0.5600.4 [.] g_main_context_check 0.00% 0.03% 0.00% 0.00% swapper [kernel.kallsyms] [k] apic_timer_interrupt ... After: # perf report --group --stdio --group-sort-idx 3 # To display the perf.data header info, please use --header/--header-only options. # # # Total Lost Samples: 0 # # Samples: 12K of events 'cpu/instructions,period=2000003/, cpu/cpu-cycles,period=200003/, BR_MISP_RETIRED.ALL_BRANCHES:pp, cpu/event=0xc0,umask=1,cmask=1, # Event count (approx.): 6451235635 # # Overhead Command Shared Object Symbol # ................................ ......... ....................... ................................... # 92.19% 98.68% 0.00% 93.30% mgen mgen [.] LOOP1 0.00% 0.13% 0.00% 6.08% swapper [kernel.kallsyms] [k] intel_idle 3.12% 0.29% 0.00% 0.16% gsd-color libglib-2.0.so.0.5600.4 [.] 0x0000000000049515 0.00% 0.00% 0.00% 0.06% swapper [kernel.kallsyms] [k] hrtimer_start_range_ns 1.56% 0.03% 0.00% 0.04% gsd-color libglib-2.0.so.0.5600.4 [.] 0x00000000000494b7 0.00% 0.15% 0.00% 0.04% perf [kernel.kallsyms] [k] smp_call_function_single 0.00% 0.00% 0.00% 0.02% mgen [kernel.kallsyms] [k] update_curr 0.00% 0.00% 0.00% 0.02% mgen [kernel.kallsyms] [k] apic_timer_interrupt 0.00% 0.00% 0.00% 0.02% mgen [kernel.kallsyms] [k] native_apic_msr_eoi_write 0.00% 0.00% 0.00% 0.02% mgen [kernel.kallsyms] [k] __update_load_avg_se 0.00% 0.00% 0.00% 0.02% mgen [kernel.kallsyms] [k] scheduler_tick Now the output is sorted by the fourth event in group. v7: --- Rebase to latest perf/core, no other change. v4: --- 1. Update Documentation/perf-report.txt to mention '--group-sort-idx' support multiple groups with different amount of events and it should be used on grouped events. 2. Update __hpp__group_sort_idx(), just return when the idx is out of limit. 3. Return failure on symbol_conf.group_sort_idx && !session->evlist->nr_groups. So now we don't need to use together with --group. v3: --- Refine the code in __hpp__group_sort_idx(). Before: for (i = 1; i < nr_members; i++) { if (i == idx) { ret = field_cmp(fields_a[i], fields_b[i]); if (ret) goto out; } } After: if (idx >= 1 && idx < nr_members) { ret = field_cmp(fields_a[idx], fields_b[idx]); if (ret) goto out; } Signed-off-by: Jin Yao Tested-by: Arnaldo Carvalho de Melo Acked-by: Jiri Olsa Cc: Alexander Shishkin Cc: Andi Kleen Cc: Kan Liang Cc: Peter Zijlstra Link: http://lore.kernel.org/lkml/20200220013616.19916-2-yao.jin@linux.intel.com [ Renamed pair_fields_alloc() to hist_entry__new_pair() and combined decl + assignment of vars ] Signed-off-by: Arnaldo Carvalho de Melo --- tools/perf/Documentation/perf-report.txt | 5 ++ tools/perf/builtin-report.c | 10 ++++ tools/perf/ui/hist.c | 93 ++++++++++++++++++++++++++------ tools/perf/util/symbol_conf.h | 1 + 4 files changed, 94 insertions(+), 15 deletions(-) (limited to 'tools/perf/util') diff --git a/tools/perf/Documentation/perf-report.txt b/tools/perf/Documentation/perf-report.txt index bd0a029d4c08..e56e3f1344a7 100644 --- a/tools/perf/Documentation/perf-report.txt +++ b/tools/perf/Documentation/perf-report.txt @@ -377,6 +377,11 @@ OPTIONS Show event group information together. It forces group output also if there are no groups defined in data file. +--group-sort-idx:: + Sort the output by the event at the index n in group. If n is invalid, + sort by the first event. It can support multiple groups with different + amount of events. WARNING: This should be used on grouped events. + --demangle:: Demangle symbol names to human readable form. It's enabled by default, disable with --no-demangle. diff --git a/tools/perf/builtin-report.c b/tools/perf/builtin-report.c index 5f4045df76f4..fa21c5ae3a51 100644 --- a/tools/perf/builtin-report.c +++ b/tools/perf/builtin-report.c @@ -1227,6 +1227,10 @@ int cmd_report(int argc, const char **argv) "Show a column with the sum of periods"), OPT_BOOLEAN_SET(0, "group", &symbol_conf.event_group, &report.group_set, "Show event group information together"), + OPT_INTEGER(0, "group-sort-idx", &symbol_conf.group_sort_idx, + "Sort the output by the event at the index n in group. " + "If n is invalid, sort by the first event. " + "WARNING: should be used on grouped events."), OPT_CALLBACK_NOOPT('b', "branch-stack", &branch_mode, "", "use branch records for per branch histogram filling", parse_branch_mode), @@ -1369,6 +1373,12 @@ repeat: setup_forced_leader(&report, session->evlist); + if (symbol_conf.group_sort_idx && !session->evlist->nr_groups) { + parse_options_usage(NULL, options, "group-sort-idx", 0); + ret = -EINVAL; + goto error; + } + if (itrace_synth_opts.last_branch) has_br_stack = true; diff --git a/tools/perf/ui/hist.c b/tools/perf/ui/hist.c index f73675500061..025f4c7f96bf 100644 --- a/tools/perf/ui/hist.c +++ b/tools/perf/ui/hist.c @@ -151,15 +151,90 @@ static int field_cmp(u64 field_a, u64 field_b) return 0; } +static int hist_entry__new_pair(struct hist_entry *a, struct hist_entry *b, + hpp_field_fn get_field, int nr_members, + u64 **fields_a, u64 **fields_b) +{ + u64 *fa = calloc(nr_members, sizeof(*fa)), + *fb = calloc(nr_members, sizeof(*fb)); + struct hist_entry *pair; + + if (!fa || !fb) + goto out_free; + + list_for_each_entry(pair, &a->pairs.head, pairs.node) { + struct evsel *evsel = hists_to_evsel(pair->hists); + fa[perf_evsel__group_idx(evsel)] = get_field(pair); + } + + list_for_each_entry(pair, &b->pairs.head, pairs.node) { + struct evsel *evsel = hists_to_evsel(pair->hists); + fb[perf_evsel__group_idx(evsel)] = get_field(pair); + } + + *fields_a = fa; + *fields_b = fb; + return 0; +out_free: + free(fa); + free(fb); + *fields_a = *fields_b = NULL; + return -1; +} + +static int __hpp__group_sort_idx(struct hist_entry *a, struct hist_entry *b, + hpp_field_fn get_field, int idx) +{ + struct evsel *evsel = hists_to_evsel(a->hists); + u64 *fields_a, *fields_b; + int cmp, nr_members, ret, i; + + cmp = field_cmp(get_field(a), get_field(b)); + if (!perf_evsel__is_group_event(evsel)) + return cmp; + + nr_members = evsel->core.nr_members; + if (idx < 1 || idx >= nr_members) + return cmp; + + ret = hist_entry__new_pair(a, b, get_field, nr_members, &fields_a, &fields_b); + if (ret) { + ret = cmp; + goto out; + } + + ret = field_cmp(fields_a[idx], fields_b[idx]); + if (ret) + goto out; + + for (i = 1; i < nr_members; i++) { + if (i != idx) { + ret = field_cmp(fields_a[i], fields_b[i]); + if (ret) + goto out; + } + } + +out: + free(fields_a); + free(fields_b); + + return ret; +} + static int __hpp__sort(struct hist_entry *a, struct hist_entry *b, hpp_field_fn get_field) { s64 ret; int i, nr_members; struct evsel *evsel; - struct hist_entry *pair; u64 *fields_a, *fields_b; + if (symbol_conf.group_sort_idx && symbol_conf.event_group) { + return __hpp__group_sort_idx(a, b, get_field, + symbol_conf.group_sort_idx); + } + ret = field_cmp(get_field(a), get_field(b)); if (ret || !symbol_conf.event_group) return ret; @@ -169,22 +244,10 @@ static int __hpp__sort(struct hist_entry *a, struct hist_entry *b, return ret; nr_members = evsel->core.nr_members; - fields_a = calloc(nr_members, sizeof(*fields_a)); - fields_b = calloc(nr_members, sizeof(*fields_b)); - - if (!fields_a || !fields_b) + i = hist_entry__new_pair(a, b, get_field, nr_members, &fields_a, &fields_b); + if (i) goto out; - list_for_each_entry(pair, &a->pairs.head, pairs.node) { - evsel = hists_to_evsel(pair->hists); - fields_a[perf_evsel__group_idx(evsel)] = get_field(pair); - } - - list_for_each_entry(pair, &b->pairs.head, pairs.node) { - evsel = hists_to_evsel(pair->hists); - fields_b[perf_evsel__group_idx(evsel)] = get_field(pair); - } - for (i = 1; i < nr_members; i++) { ret = field_cmp(fields_a[i], fields_b[i]); if (ret) diff --git a/tools/perf/util/symbol_conf.h b/tools/perf/util/symbol_conf.h index 10f1ec3e0349..b916afb95ec5 100644 --- a/tools/perf/util/symbol_conf.h +++ b/tools/perf/util/symbol_conf.h @@ -73,6 +73,7 @@ struct symbol_conf { const char *symfs; int res_sample; int pad_output_len_dso; + int group_sort_idx; }; extern struct symbol_conf symbol_conf; -- cgit v1.2.3-70-g09d2 From 5e3b810aac49e960c80529e2c9aa8c101c5848ce Mon Sep 17 00:00:00 2001 From: Jin Yao Date: Thu, 20 Feb 2020 09:36:15 +0800 Subject: perf report: Support a new key to reload the browser Sometimes we may need to reload the browser to update the output since some options are changed. This patch creates a new key K_RELOAD. Once the __cmd_report() returns K_RELOAD, it would repeat the whole process, such as, read samples from data file, sort the data and display in the browser. v5: --- 1. Fix the 'make NO_SLANG=1' error. Define K_RELOAD in util/hist.h. 2. Skip setup_sorting() in repeat path if last key is K_RELOAD. v4: --- Need to quit in perf_evsel_menu__run if key is K_RELOAD. Signed-off-by: Jin Yao Tested-by: Arnaldo Carvalho de Melo Acked-by: Jiri Olsa Cc: Alexander Shishkin Cc: Andi Kleen Cc: Kan Liang Cc: Peter Zijlstra Link: http://lore.kernel.org/lkml/20200220013616.19916-3-yao.jin@linux.intel.com Signed-off-by: Arnaldo Carvalho de Melo --- tools/perf/builtin-report.c | 6 +++--- tools/perf/ui/browsers/hists.c | 1 + tools/perf/ui/keysyms.h | 1 + tools/perf/util/hist.h | 1 + 4 files changed, 6 insertions(+), 3 deletions(-) (limited to 'tools/perf/util') diff --git a/tools/perf/builtin-report.c b/tools/perf/builtin-report.c index fa21c5ae3a51..ea673b7eb3f4 100644 --- a/tools/perf/builtin-report.c +++ b/tools/perf/builtin-report.c @@ -635,7 +635,7 @@ static int report__browse_hists(struct report *rep) * Usually "ret" is the last pressed key, and we only * care if the key notifies us to switch data file. */ - if (ret != K_SWITCH_INPUT_DATA) + if (ret != K_SWITCH_INPUT_DATA && ret != K_RELOAD) ret = 0; break; case 2: @@ -1480,7 +1480,7 @@ repeat: sort_order = sort_tmp; } - if ((last_key != K_SWITCH_INPUT_DATA) && + if ((last_key != K_SWITCH_INPUT_DATA && last_key != K_RELOAD) && (setup_sorting(session->evlist) < 0)) { if (sort_order) parse_options_usage(report_usage, options, "s", 1); @@ -1559,7 +1559,7 @@ repeat: sort__setup_elide(stdout); ret = __cmd_report(&report); - if (ret == K_SWITCH_INPUT_DATA) { + if (ret == K_SWITCH_INPUT_DATA || ret == K_RELOAD) { perf_session__delete(session); last_key = K_SWITCH_INPUT_DATA; goto repeat; diff --git a/tools/perf/ui/browsers/hists.c b/tools/perf/ui/browsers/hists.c index 1103a019d83f..9f3401fd2cf6 100644 --- a/tools/perf/ui/browsers/hists.c +++ b/tools/perf/ui/browsers/hists.c @@ -3495,6 +3495,7 @@ browse_hists: pos = perf_evsel__prev(pos); goto browse_hists; case K_SWITCH_INPUT_DATA: + case K_RELOAD: case 'q': case CTRL('c'): goto out; diff --git a/tools/perf/ui/keysyms.h b/tools/perf/ui/keysyms.h index fbfac29077f2..04cc4e5c031f 100644 --- a/tools/perf/ui/keysyms.h +++ b/tools/perf/ui/keysyms.h @@ -25,5 +25,6 @@ #define K_ERROR -2 #define K_RESIZE -3 #define K_SWITCH_INPUT_DATA -4 +#define K_RELOAD -5 #endif /* _PERF_KEYSYMS_H_ */ diff --git a/tools/perf/util/hist.h b/tools/perf/util/hist.h index 0aa63aeb58ec..bb994e030495 100644 --- a/tools/perf/util/hist.h +++ b/tools/perf/util/hist.h @@ -536,6 +536,7 @@ static inline int block_hists_tui_browse(struct block_hist *bh __maybe_unused, #define K_LEFT -1000 #define K_RIGHT -2000 #define K_SWITCH_INPUT_DATA -3000 +#define K_RELOAD -4000 #endif unsigned int hists__sort_list_width(struct hists *hists); -- cgit v1.2.3-70-g09d2 From d13e9e413e5b470c4364bbbce559383081201811 Mon Sep 17 00:00:00 2001 From: Jin Yao Date: Tue, 18 Feb 2020 15:16:14 +0800 Subject: perf stat: Align the output for interval aggregation mode There is a slight misalignment in -A -I output. For example: # perf stat -e cpu/event=cpu-cycles/ -a -A -I 1000 # time CPU counts unit events 1.000440863 CPU0 1,068,388 cpu/event=cpu-cycles/ 1.000440863 CPU1 875,954 cpu/event=cpu-cycles/ 1.000440863 CPU2 3,072,538 cpu/event=cpu-cycles/ 1.000440863 CPU3 4,026,870 cpu/event=cpu-cycles/ 1.000440863 CPU4 5,919,630 cpu/event=cpu-cycles/ 1.000440863 CPU5 2,714,260 cpu/event=cpu-cycles/ 1.000440863 CPU6 2,219,240 cpu/event=cpu-cycles/ 1.000440863 CPU7 1,299,232 cpu/event=cpu-cycles/ The value of counts is not aligned with the column "counts" and the event name is not aligned with the column "events". With this patch, the output is, # perf stat -e cpu/event=cpu-cycles/ -a -A -I 1000 # time CPU counts unit events 1.000423009 CPU0 997,421 cpu/event=cpu-cycles/ 1.000423009 CPU1 1,422,042 cpu/event=cpu-cycles/ 1.000423009 CPU2 484,651 cpu/event=cpu-cycles/ 1.000423009 CPU3 525,791 cpu/event=cpu-cycles/ 1.000423009 CPU4 1,370,100 cpu/event=cpu-cycles/ 1.000423009 CPU5 442,072 cpu/event=cpu-cycles/ 1.000423009 CPU6 205,643 cpu/event=cpu-cycles/ 1.000423009 CPU7 1,302,250 cpu/event=cpu-cycles/ Now output is aligned. Signed-off-by: Jin Yao Acked-by: Jiri Olsa Tested-by: Arnaldo Carvalho de Melo Cc: Alexander Shishkin Cc: Andi Kleen Cc: Kan Liang Cc: Peter Zijlstra Link: http://lore.kernel.org/lkml/20200218071614.25736-1-yao.jin@linux.intel.com Signed-off-by: Arnaldo Carvalho de Melo --- tools/perf/util/stat-display.c | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) (limited to 'tools/perf/util') diff --git a/tools/perf/util/stat-display.c b/tools/perf/util/stat-display.c index 76c6052b12e2..9e757d18d713 100644 --- a/tools/perf/util/stat-display.c +++ b/tools/perf/util/stat-display.c @@ -115,11 +115,11 @@ static void aggr_printout(struct perf_stat_config *config, fprintf(config->output, "S%d-D%d-C%*d%s", cpu_map__id_to_socket(id), cpu_map__id_to_die(id), - config->csv_output ? 0 : -5, + config->csv_output ? 0 : -3, cpu_map__id_to_cpu(id), config->csv_sep); } else { - fprintf(config->output, "CPU%*d%s ", - config->csv_output ? 0 : -5, + fprintf(config->output, "CPU%*d%s", + config->csv_output ? 0 : -7, evsel__cpus(evsel)->map[id], config->csv_sep); } -- cgit v1.2.3-70-g09d2 From 58fc90fda0cc983c11c5290c7a9e992b08ac4a5c Mon Sep 17 00:00:00 2001 From: Kajol Jain Date: Fri, 21 Feb 2020 15:41:21 +0530 Subject: perf metricgroup: Fix printing event names of metric group with multiple events incase of overlapping events Commit f01642e4912b ("perf metricgroup: Support multiple events for metricgroup") introduced support for multiple events in a metric group. But with the current upstream, metric events names are not printed properly incase we try to run multiple metric groups with overlapping event. With current upstream version, incase of overlapping metric events issue is, we always start our comparision logic from start. So, the events which already matched with some metric group also take part in comparision logic. Because of that when we have overlapping events, we end up matching current metric group event with already matched one. For example, in skylake machine we have metric event CoreIPC and Instructions. Both of them need 'inst_retired.any' event value. As events in Instructions is subset of events in CoreIPC, they endup in pointing to same 'inst_retired.any' value. In skylake platform: command:# ./perf stat -M CoreIPC,Instructions -C 0 sleep 1 Performance counter stats for 'CPU(s) 0': 1,254,992,790 inst_retired.any # 1254992790.0 Instructions # 1.3 CoreIPC 977,172,805 cycles 1,254,992,756 inst_retired.any 1.000802596 seconds time elapsed command:# sudo ./perf stat -M UPI,IPC sleep 1 Performance counter stats for 'sleep 1': 948,650 uops_retired.retire_slots 866,182 inst_retired.any # 0.7 IPC 866,182 inst_retired.any 1,175,671 cpu_clk_unhalted.thread Patch fixes the issue by adding a new bool pointer 'evlist_used' to keep track of events which already matched with some group by setting it true. So, we skip all used events in list when we start comparision logic. Patch also make some changes in comparision logic, incase we get a match miss, we discard the whole match and start again with first event id in metric event. With this patch: In skylake platform: command:# ./perf stat -M CoreIPC,Instructions -C 0 sleep 1 Performance counter stats for 'CPU(s) 0': 3,348,415 inst_retired.any # 0.3 CoreIPC 11,779,026 cycles 3,348,381 inst_retired.any # 3348381.0 Instructions 1.001649056 seconds time elapsed command:# ./perf stat -M UPI,IPC sleep 1 Performance counter stats for 'sleep 1': 1,023,148 uops_retired.retire_slots # 1.1 UPI 924,976 inst_retired.any 924,976 inst_retired.any # 0.6 IPC 1,489,414 cpu_clk_unhalted.thread 1.003064672 seconds time elapsed Signed-off-by: Kajol Jain Acked-by: Jiri Olsa Cc: Alexander Shishkin Cc: Andi Kleen Cc: Anju T Sudhakar Cc: Jin Yao Cc: Kan Liang Cc: Madhavan Srinivasan Cc: Peter Zijlstra Cc: Ravi Bangoria Link: http://lore.kernel.org/lkml/20200221101121.28920-1-kjain@linux.ibm.com Signed-off-by: Arnaldo Carvalho de Melo --- tools/perf/util/metricgroup.c | 49 ++++++++++++++++++++++++++----------------- 1 file changed, 30 insertions(+), 19 deletions(-) (limited to 'tools/perf/util') diff --git a/tools/perf/util/metricgroup.c b/tools/perf/util/metricgroup.c index c3a8c701609a..926449a7cdbf 100644 --- a/tools/perf/util/metricgroup.c +++ b/tools/perf/util/metricgroup.c @@ -95,13 +95,16 @@ struct egroup { static struct evsel *find_evsel_group(struct evlist *perf_evlist, const char **ids, int idnum, - struct evsel **metric_events) + struct evsel **metric_events, + bool *evlist_used) { struct evsel *ev; - int i = 0; + int i = 0, j = 0; bool leader_found; evlist__for_each_entry (perf_evlist, ev) { + if (evlist_used[j++]) + continue; if (!strcmp(ev->name, ids[i])) { if (!metric_events[i]) metric_events[i] = ev; @@ -109,22 +112,17 @@ static struct evsel *find_evsel_group(struct evlist *perf_evlist, if (i == idnum) break; } else { - if (i + 1 == idnum) { - /* Discard the whole match and start again */ - i = 0; - memset(metric_events, 0, - sizeof(struct evsel *) * idnum); - continue; - } - - if (!strcmp(ev->name, ids[i])) - metric_events[i] = ev; - else { - /* Discard the whole match and start again */ - i = 0; - memset(metric_events, 0, - sizeof(struct evsel *) * idnum); - continue; + /* Discard the whole match and start again */ + i = 0; + memset(metric_events, 0, + sizeof(struct evsel *) * idnum); + + if (!strcmp(ev->name, ids[i])) { + if (!metric_events[i]) + metric_events[i] = ev; + i++; + if (i == idnum) + break; } } } @@ -146,7 +144,10 @@ static struct evsel *find_evsel_group(struct evlist *perf_evlist, !strcmp(ev->name, metric_events[i]->name)) { ev->metric_leader = metric_events[i]; } + j++; } + ev = metric_events[i]; + evlist_used[ev->idx] = true; } return metric_events[0]; @@ -162,6 +163,13 @@ static int metricgroup__setup_events(struct list_head *groups, int ret = 0; struct egroup *eg; struct evsel *evsel; + bool *evlist_used; + + evlist_used = calloc(perf_evlist->core.nr_entries, sizeof(bool)); + if (!evlist_used) { + ret = -ENOMEM; + return ret; + } list_for_each_entry (eg, groups, nd) { struct evsel **metric_events; @@ -172,7 +180,7 @@ static int metricgroup__setup_events(struct list_head *groups, break; } evsel = find_evsel_group(perf_evlist, eg->ids, eg->idnum, - metric_events); + metric_events, evlist_used); if (!evsel) { pr_debug("Cannot resolve %s: %s\n", eg->metric_name, eg->metric_expr); @@ -196,6 +204,9 @@ static int metricgroup__setup_events(struct list_head *groups, expr->metric_events = metric_events; list_add(&expr->nd, &me->head); } + + free(evlist_used); + return ret; } -- cgit v1.2.3-70-g09d2 From e45ad701e784e0eed8a07b537b47afb302c59dab Mon Sep 17 00:00:00 2001 From: John Garry Date: Tue, 17 Mar 2020 19:02:15 +0800 Subject: perf pmu: Refactor pmu_add_cpu_aliases() Create pmu_add_cpu_aliases_map() from pmu_add_cpu_aliases(), so the caller can pass the map; the pmu-events test would use this since there would be no CPUID matching to a mapfile there. Signed-off-by: John Garry Acked-by: Jiri Olsa Cc: Alexander Shishkin Cc: Andi Kleen Cc: James Clark Cc: Joakim Zhang Cc: Mark Rutland Cc: Namhyung Kim Cc: Peter Zijlstra Cc: Will Deacon Cc: linuxarm@huawei.com Link: http://lore.kernel.org/lkml/1584442939-8911-4-git-send-email-john.garry@huawei.com Signed-off-by: Arnaldo Carvalho de Melo --- tools/perf/util/pmu.c | 21 +++++++++++++-------- tools/perf/util/pmu.h | 3 +++ 2 files changed, 16 insertions(+), 8 deletions(-) (limited to 'tools/perf/util') diff --git a/tools/perf/util/pmu.c b/tools/perf/util/pmu.c index 8b99fd312aae..c616a06a34a8 100644 --- a/tools/perf/util/pmu.c +++ b/tools/perf/util/pmu.c @@ -21,7 +21,6 @@ #include "pmu.h" #include "parse-events.h" #include "header.h" -#include "pmu-events/pmu-events.h" #include "string2.h" #include "strbuf.h" #include "fncache.h" @@ -744,16 +743,11 @@ out: * to the current running CPU. Then, add all PMU events from that table * as aliases. */ -static void pmu_add_cpu_aliases(struct list_head *head, struct perf_pmu *pmu) +void pmu_add_cpu_aliases_map(struct list_head *head, struct perf_pmu *pmu, + struct pmu_events_map *map) { int i; - struct pmu_events_map *map; const char *name = pmu->name; - - map = perf_pmu__find_map(pmu); - if (!map) - return; - /* * Found a matching PMU events table. Create aliases */ @@ -788,6 +782,17 @@ new_alias: } } +static void pmu_add_cpu_aliases(struct list_head *head, struct perf_pmu *pmu) +{ + struct pmu_events_map *map; + + map = perf_pmu__find_map(pmu); + if (!map) + return; + + pmu_add_cpu_aliases_map(head, pmu, map); +} + struct perf_event_attr * __weak perf_pmu__get_default_config(struct perf_pmu *pmu __maybe_unused) { diff --git a/tools/perf/util/pmu.h b/tools/perf/util/pmu.h index 6737e3d5d568..0b4a0efae38e 100644 --- a/tools/perf/util/pmu.h +++ b/tools/perf/util/pmu.h @@ -7,6 +7,7 @@ #include #include #include "parse-events.h" +#include "pmu-events/pmu-events.h" struct perf_evsel_config_term; @@ -97,6 +98,8 @@ int perf_pmu__scan_file(struct perf_pmu *pmu, const char *name, const char *fmt, int perf_pmu__test(void); struct perf_event_attr *perf_pmu__get_default_config(struct perf_pmu *pmu); +void pmu_add_cpu_aliases_map(struct list_head *head, struct perf_pmu *pmu, + struct pmu_events_map *map); struct pmu_events_map *perf_pmu__find_map(struct perf_pmu *pmu); -- cgit v1.2.3-70-g09d2 From d504fae93dd61b734aefc403c7653d958aef655a Mon Sep 17 00:00:00 2001 From: John Garry Date: Tue, 17 Mar 2020 19:02:17 +0800 Subject: perf pmu: Add is_pmu_core() Add a function to decide whether a PMU is a core PMU. Signed-off-by: John Garry Acked-by: Jiri Olsa Cc: Alexander Shishkin Cc: Andi Kleen Cc: James Clark Cc: Joakim Zhang Cc: Mark Rutland Cc: Namhyung Kim Cc: Peter Zijlstra Cc: Will Deacon Cc: linuxarm@huawei.com Link: http://lore.kernel.org/lkml/1584442939-8911-6-git-send-email-john.garry@huawei.com Signed-off-by: Arnaldo Carvalho de Melo --- tools/perf/util/pmu.c | 5 +++++ tools/perf/util/pmu.h | 1 + 2 files changed, 6 insertions(+) (limited to 'tools/perf/util') diff --git a/tools/perf/util/pmu.c b/tools/perf/util/pmu.c index c616a06a34a8..55129d09f19d 100644 --- a/tools/perf/util/pmu.c +++ b/tools/perf/util/pmu.c @@ -1400,6 +1400,11 @@ static void wordwrap(char *s, int start, int max, int corr) } } +bool is_pmu_core(const char *name) +{ + return !strcmp(name, "cpu") || is_arm_pmu_core(name); +} + void print_pmu_events(const char *event_glob, bool name_only, bool quiet_flag, bool long_desc, bool details_flag, bool deprecated) { diff --git a/tools/perf/util/pmu.h b/tools/perf/util/pmu.h index 0b4a0efae38e..b756946ae78d 100644 --- a/tools/perf/util/pmu.h +++ b/tools/perf/util/pmu.h @@ -88,6 +88,7 @@ int perf_pmu__format_parse(char *dir, struct list_head *head); struct perf_pmu *perf_pmu__scan(struct perf_pmu *pmu); +bool is_pmu_core(const char *name); void print_pmu_events(const char *event_glob, bool name_only, bool quiet, bool long_desc, bool details_flag, bool deprecated); -- cgit v1.2.3-70-g09d2 From 5b9a50001b2c23f90f2a21db4763f3a9599588c4 Mon Sep 17 00:00:00 2001 From: John Garry Date: Tue, 17 Mar 2020 19:02:18 +0800 Subject: perf pmu: Make pmu_uncore_alias_match() public The perf pmu-events test will want to use pmu_uncore_alias_match(), so make it public. Signed-off-by: John Garry Acked-by: Jiri Olsa Cc: Alexander Shishkin Cc: Andi Kleen Cc: James Clark Cc: Joakim Zhang Cc: Mark Rutland Cc: Namhyung Kim Cc: Peter Zijlstra Cc: Will Deacon Cc: linuxarm@huawei.com Link: http://lore.kernel.org/lkml/1584442939-8911-7-git-send-email-john.garry@huawei.com Signed-off-by: Arnaldo Carvalho de Melo --- tools/perf/util/pmu.c | 2 +- tools/perf/util/pmu.h | 1 + 2 files changed, 2 insertions(+), 1 deletion(-) (limited to 'tools/perf/util') diff --git a/tools/perf/util/pmu.c b/tools/perf/util/pmu.c index 55129d09f19d..616fbda7c3fc 100644 --- a/tools/perf/util/pmu.c +++ b/tools/perf/util/pmu.c @@ -698,7 +698,7 @@ struct pmu_events_map *perf_pmu__find_map(struct perf_pmu *pmu) return map; } -static bool pmu_uncore_alias_match(const char *pmu_name, const char *name) +bool pmu_uncore_alias_match(const char *pmu_name, const char *name) { char *tmp = NULL, *tok, *str; bool res; diff --git a/tools/perf/util/pmu.h b/tools/perf/util/pmu.h index b756946ae78d..5fb3f16828df 100644 --- a/tools/perf/util/pmu.h +++ b/tools/perf/util/pmu.h @@ -103,6 +103,7 @@ void pmu_add_cpu_aliases_map(struct list_head *head, struct perf_pmu *pmu, struct pmu_events_map *map); struct pmu_events_map *perf_pmu__find_map(struct perf_pmu *pmu); +bool pmu_uncore_alias_match(const char *pmu_name, const char *name); int perf_pmu__convert_scale(const char *scale, char **end, double *sval); -- cgit v1.2.3-70-g09d2 From d74b181a028bb5a468f0c609553eff6a8fdf4887 Mon Sep 17 00:00:00 2001 From: Christophe JAILLET Date: Tue, 24 Mar 2020 08:03:19 +0100 Subject: perf cpumap: Fix snprintf overflow check 'snprintf' returns the number of characters which would be generated for the given input. If the returned value is *greater than* or equal to the buffer size, it means that the output has been truncated. Fix the overflow test accordingly. Fixes: 7780c25bae59f ("perf tools: Allow ability to map cpus to nodes easily") Fixes: 92a7e1278005b ("perf cpumap: Add cpu__max_present_cpu()") Signed-off-by: Christophe JAILLET Suggested-by: David Laight Cc: Alexander Shishkin Cc: Don Zickus Cc: He Zhe Cc: Jan Stancek Cc: Jiri Olsa Cc: Kan Liang Cc: Mark Rutland Cc: Namhyung Kim Cc: Peter Zijlstra Cc: kernel-janitors@vger.kernel.org Link: http://lore.kernel.org/lkml/20200324070319.10901-1-christophe.jaillet@wanadoo.fr Signed-off-by: Arnaldo Carvalho de Melo --- tools/perf/util/cpumap.c | 10 +++++----- 1 file changed, 5 insertions(+), 5 deletions(-) (limited to 'tools/perf/util') diff --git a/tools/perf/util/cpumap.c b/tools/perf/util/cpumap.c index 983b7388f22b..dc5c5e6fc502 100644 --- a/tools/perf/util/cpumap.c +++ b/tools/perf/util/cpumap.c @@ -317,7 +317,7 @@ static void set_max_cpu_num(void) /* get the highest possible cpu number for a sparse allocation */ ret = snprintf(path, PATH_MAX, "%s/devices/system/cpu/possible", mnt); - if (ret == PATH_MAX) { + if (ret >= PATH_MAX) { pr_err("sysfs path crossed PATH_MAX(%d) size\n", PATH_MAX); goto out; } @@ -328,7 +328,7 @@ static void set_max_cpu_num(void) /* get the highest present cpu number for a sparse allocation */ ret = snprintf(path, PATH_MAX, "%s/devices/system/cpu/present", mnt); - if (ret == PATH_MAX) { + if (ret >= PATH_MAX) { pr_err("sysfs path crossed PATH_MAX(%d) size\n", PATH_MAX); goto out; } @@ -356,7 +356,7 @@ static void set_max_node_num(void) /* get the highest possible cpu number for a sparse allocation */ ret = snprintf(path, PATH_MAX, "%s/devices/system/node/possible", mnt); - if (ret == PATH_MAX) { + if (ret >= PATH_MAX) { pr_err("sysfs path crossed PATH_MAX(%d) size\n", PATH_MAX); goto out; } @@ -441,7 +441,7 @@ int cpu__setup_cpunode_map(void) return 0; n = snprintf(path, PATH_MAX, "%s/devices/system/node", mnt); - if (n == PATH_MAX) { + if (n >= PATH_MAX) { pr_err("sysfs path crossed PATH_MAX(%d) size\n", PATH_MAX); return -1; } @@ -456,7 +456,7 @@ int cpu__setup_cpunode_map(void) continue; n = snprintf(buf, PATH_MAX, "%s/%s", path, dent1->d_name); - if (n == PATH_MAX) { + if (n >= PATH_MAX) { pr_err("sysfs path crossed PATH_MAX(%d) size\n", PATH_MAX); continue; } -- cgit v1.2.3-70-g09d2 From 0d33b34352531ff7029c58eda2321340c0ea3f5f Mon Sep 17 00:00:00 2001 From: Ravi Bangoria Date: Tue, 24 Mar 2020 09:54:24 +0530 Subject: perf dso: Fix dso comparison Perf gets dso details from two different sources. 1st, from builid headers in perf.data and 2nd from MMAP2 samples. Dso from buildid header does not have dso_id detail. And dso from MMAP2 samples does not have buildid information. If detail of the same dso is present at both the places, filename is common. Previously, __dsos__findnew_link_by_longname_id() used to compare only long or short names, but Commit 0e3149f86b99 ("perf dso: Move dso_id from 'struct map' to 'struct dso'") also added a dso_id comparison. Because of that, now perf is creating two different dso objects of the same file, one from buildid header (with dso_id but without buildid) and second from MMAP2 sample (with buildid but without dso_id). This is causing issues with archive, buildid-list etc subcommands. Fix this by comparing dso_id only when it's present. And incase dso is present in 'dsos' list without dso_id, inject dso_id detail as well. Before: $ sudo ./perf buildid-list -H 0000000000000000000000000000000000000000 /usr/bin/ls 0000000000000000000000000000000000000000 /usr/lib64/ld-2.30.so 0000000000000000000000000000000000000000 /usr/lib64/libc-2.30.so $ ./perf archive perf archive: no build-ids found After: $ ./perf buildid-list -H b6b1291d0cead046ed0fa5734037fa87a579adee /usr/bin/ls 641f0c90cfa15779352f12c0ec3c7a2b2b6f41e8 /usr/lib64/ld-2.30.so 675ace3ca07a0b863df01f461a7b0984c65c8b37 /usr/lib64/libc-2.30.so $ ./perf archive Now please run: $ tar xvf perf.data.tar.bz2 -C ~/.debug wherever you need to run 'perf report' on. Committer notes: Renamed is_empty_dso_id() to dso_id__empty() and inject_dso_id() to dso__inject_id() to keep namespacing consistent. Fixes: 0e3149f86b99 ("perf dso: Move dso_id from 'struct map' to 'struct dso'") Reported-by: Naveen N. Rao Signed-off-by: Ravi Bangoria Tested-by: Arnaldo Carvalho de Melo Tested-by: Naveen N. Rao Cc: Jiri Olsa Cc: Mark Rutland Cc: Namhyung Kim Link: http://lore.kernel.org/lkml/20200324042424.68366-1-ravi.bangoria@linux.ibm.com Signed-off-by: Arnaldo Carvalho de Melo --- tools/perf/util/dsos.c | 22 +++++++++++++++++++++- 1 file changed, 21 insertions(+), 1 deletion(-) (limited to 'tools/perf/util') diff --git a/tools/perf/util/dsos.c b/tools/perf/util/dsos.c index 591707c69c39..939471731ea6 100644 --- a/tools/perf/util/dsos.c +++ b/tools/perf/util/dsos.c @@ -26,13 +26,29 @@ static int __dso_id__cmp(struct dso_id *a, struct dso_id *b) return 0; } +static bool dso_id__empty(struct dso_id *id) +{ + if (!id) + return true; + + return !id->maj && !id->min && !id->ino && !id->ino_generation; +} + +static void dso__inject_id(struct dso *dso, struct dso_id *id) +{ + dso->id.maj = id->maj; + dso->id.min = id->min; + dso->id.ino = id->ino; + dso->id.ino_generation = id->ino_generation; +} + static int dso_id__cmp(struct dso_id *a, struct dso_id *b) { /* * The second is always dso->id, so zeroes if not set, assume passing * NULL for a means a zeroed id */ - if (a == NULL) + if (dso_id__empty(a) || dso_id__empty(b)) return 0; return __dso_id__cmp(a, b); @@ -249,6 +265,10 @@ struct dso *__dsos__addnew(struct dsos *dsos, const char *name) static struct dso *__dsos__findnew_id(struct dsos *dsos, const char *name, struct dso_id *id) { struct dso *dso = __dsos__find_id(dsos, name, id, false); + + if (dso && dso_id__empty(&dso->id) && !dso_id__empty(id)) + dso__inject_id(dso, id); + return dso ? dso : __dsos__addnew_id(dsos, name, id); } -- cgit v1.2.3-70-g09d2 From 2a3d252dffe14582f238e21b09923e3772263123 Mon Sep 17 00:00:00 2001 From: Ian Rogers Date: Wed, 25 Mar 2020 09:40:22 -0700 Subject: perf parse-events: Add defensive NULL check Terms may have a NULL config in which case a strcmp will SEGV. This can be reproduced with: perf stat -e '*/event=?,nr/' sleep 1 Add a NULL check to avoid this. This was caught by LLVM's libfuzzer. Signed-off-by: Ian Rogers Cc: Alexander Shishkin Cc: Andi Kleen Cc: Jin Yao Cc: Jiri Olsa Cc: John Garry Cc: Kan Liang Cc: Mark Rutland Cc: Namhyung Kim Cc: Peter Zijlstra Cc: Stephane Eranian Cc: clang-built-linux@googlegroups.com Link: http://lore.kernel.org/lkml/20200325164022.41385-1-irogers@google.com Signed-off-by: Arnaldo Carvalho de Melo --- tools/perf/util/pmu.c | 11 +++++------ 1 file changed, 5 insertions(+), 6 deletions(-) (limited to 'tools/perf/util') diff --git a/tools/perf/util/pmu.c b/tools/perf/util/pmu.c index 616fbda7c3fc..ef6a63f3d386 100644 --- a/tools/perf/util/pmu.c +++ b/tools/perf/util/pmu.c @@ -984,12 +984,11 @@ static int pmu_resolve_param_term(struct parse_events_term *term, struct parse_events_term *t; list_for_each_entry(t, head_terms, list) { - if (t->type_val == PARSE_EVENTS__TERM_TYPE_NUM) { - if (!strcmp(t->config, term->config)) { - t->used = true; - *value = t->val.num; - return 0; - } + if (t->type_val == PARSE_EVENTS__TERM_TYPE_NUM && + t->config && !strcmp(t->config, term->config)) { + t->used = true; + *value = t->val.num; + return 0; } } -- cgit v1.2.3-70-g09d2 From 460c3ed999d700fa84c953f83141f580ef456b3c Mon Sep 17 00:00:00 2001 From: Arnaldo Carvalho de Melo Date: Fri, 3 Apr 2020 09:29:52 -0300 Subject: perf python: Include rwsem.c in the pythong biding We'll need it for the cgroup patches, and its better to have it in a separate patch in case we need to later revert the cgroup patches. I.e. without this we have: [root@five ~]# perf test -v python 19: 'import perf' in python : --- start --- test child forked, pid 148447 Traceback (most recent call last): File "", line 1, in ImportError: /tmp/build/perf/python/perf.cpython-37m-x86_64-linux-gnu.so: undefined symbol: down_write test child finished with -1 ---- end ---- 'import perf' in python: FAILED! [root@five ~]# Cc: Adrian Hunter Cc: Alexander Shishkin Cc: Jiri Olsa Cc: Mark Rutland Cc: Namhyung Kim Cc: Peter Zijlstra Link: http://lore.kernel.org/lkml/20200403123606.GC23243@kernel.org Signed-off-by: Arnaldo Carvalho de Melo --- tools/perf/util/python-ext-sources | 1 + 1 file changed, 1 insertion(+) (limited to 'tools/perf/util') diff --git a/tools/perf/util/python-ext-sources b/tools/perf/util/python-ext-sources index e7279ea6043a..a9d9c142eb7c 100644 --- a/tools/perf/util/python-ext-sources +++ b/tools/perf/util/python-ext-sources @@ -34,3 +34,4 @@ util/string.c util/symbol_fprintf.c util/units.c util/affinity.c +util/rwsem.c -- cgit v1.2.3-70-g09d2 From ba78c1c5461c2fc2f57b777e971b3a9ec0df5666 Mon Sep 17 00:00:00 2001 From: Namhyung Kim Date: Wed, 25 Mar 2020 21:45:30 +0900 Subject: perf tools: Basic support for CGROUP event Implement basic functionality to support cgroup tracking. Each cgroup can be identified by inode number which can be read from userspace too. The actual cgroup processing will come in the later patch. Reported-by: kernel test robot Signed-off-by: Namhyung Kim Cc: Adrian Hunter [ fix perf test failure on sampling parsing ] Cc: Alexander Shishkin Cc: Jiri Olsa Cc: Mark Rutland Cc: Peter Zijlstra Link: http://lore.kernel.org/lkml/20200325124536.2800725-4-namhyung@kernel.org Signed-off-by: Arnaldo Carvalho de Melo --- tools/lib/perf/include/perf/event.h | 7 +++++++ tools/perf/builtin-diff.c | 1 + tools/perf/builtin-report.c | 1 + tools/perf/tests/sample-parsing.c | 6 +++++- tools/perf/util/event.c | 18 ++++++++++++++++++ tools/perf/util/event.h | 6 ++++++ tools/perf/util/evsel.c | 6 ++++++ tools/perf/util/machine.c | 12 ++++++++++++ tools/perf/util/machine.h | 3 +++ tools/perf/util/perf_event_attr_fprintf.c | 2 ++ tools/perf/util/session.c | 4 ++++ tools/perf/util/synthetic-events.c | 8 ++++++++ tools/perf/util/tool.h | 1 + 13 files changed, 74 insertions(+), 1 deletion(-) (limited to 'tools/perf/util') diff --git a/tools/lib/perf/include/perf/event.h b/tools/lib/perf/include/perf/event.h index 18106899cb4e..69b44d2cc0f5 100644 --- a/tools/lib/perf/include/perf/event.h +++ b/tools/lib/perf/include/perf/event.h @@ -105,6 +105,12 @@ struct perf_record_bpf_event { __u8 tag[BPF_TAG_SIZE]; // prog tag }; +struct perf_record_cgroup { + struct perf_event_header header; + __u64 id; + char path[PATH_MAX]; +}; + struct perf_record_sample { struct perf_event_header header; __u64 array[]; @@ -352,6 +358,7 @@ union perf_event { struct perf_record_mmap2 mmap2; struct perf_record_comm comm; struct perf_record_namespaces namespaces; + struct perf_record_cgroup cgroup; struct perf_record_fork fork; struct perf_record_lost lost; struct perf_record_lost_samples lost_samples; diff --git a/tools/perf/builtin-diff.c b/tools/perf/builtin-diff.c index 5e697cd2224a..c94a002f295e 100644 --- a/tools/perf/builtin-diff.c +++ b/tools/perf/builtin-diff.c @@ -455,6 +455,7 @@ static struct perf_diff pdiff = { .fork = perf_event__process_fork, .lost = perf_event__process_lost, .namespaces = perf_event__process_namespaces, + .cgroup = perf_event__process_cgroup, .ordered_events = true, .ordering_requires_timestamps = true, }, diff --git a/tools/perf/builtin-report.c b/tools/perf/builtin-report.c index ea673b7eb3f4..26d8fc27e427 100644 --- a/tools/perf/builtin-report.c +++ b/tools/perf/builtin-report.c @@ -1105,6 +1105,7 @@ int cmd_report(int argc, const char **argv) .mmap2 = perf_event__process_mmap2, .comm = perf_event__process_comm, .namespaces = perf_event__process_namespaces, + .cgroup = perf_event__process_cgroup, .exit = perf_event__process_exit, .fork = perf_event__process_fork, .lost = perf_event__process_lost, diff --git a/tools/perf/tests/sample-parsing.c b/tools/perf/tests/sample-parsing.c index 14239e472187..61865699c3f4 100644 --- a/tools/perf/tests/sample-parsing.c +++ b/tools/perf/tests/sample-parsing.c @@ -151,6 +151,9 @@ static bool samples_same(const struct perf_sample *s1, if (type & PERF_SAMPLE_PHYS_ADDR) COMP(phys_addr); + if (type & PERF_SAMPLE_CGROUP) + COMP(cgroup); + if (type & PERF_SAMPLE_AUX) { COMP(aux_sample.size); if (memcmp(s1->aux_sample.data, s2->aux_sample.data, @@ -230,6 +233,7 @@ static int do_test(u64 sample_type, u64 sample_regs, u64 read_format) .regs = regs, }, .phys_addr = 113, + .cgroup = 114, .aux_sample = { .size = sizeof(aux_data), .data = (void *)aux_data, @@ -336,7 +340,7 @@ int test__sample_parsing(struct test *test __maybe_unused, int subtest __maybe_u * were added. Please actually update the test rather than just change * the condition below. */ - if (PERF_SAMPLE_MAX > PERF_SAMPLE_AUX << 1) { + if (PERF_SAMPLE_MAX > PERF_SAMPLE_CGROUP << 1) { pr_debug("sample format has changed, some new PERF_SAMPLE_ bit was introduced - test needs updating\n"); return -1; } diff --git a/tools/perf/util/event.c b/tools/perf/util/event.c index c5447ff516a2..824c038e5c33 100644 --- a/tools/perf/util/event.c +++ b/tools/perf/util/event.c @@ -54,6 +54,7 @@ static const char *perf_event__names[] = { [PERF_RECORD_NAMESPACES] = "NAMESPACES", [PERF_RECORD_KSYMBOL] = "KSYMBOL", [PERF_RECORD_BPF_EVENT] = "BPF_EVENT", + [PERF_RECORD_CGROUP] = "CGROUP", [PERF_RECORD_HEADER_ATTR] = "ATTR", [PERF_RECORD_HEADER_EVENT_TYPE] = "EVENT_TYPE", [PERF_RECORD_HEADER_TRACING_DATA] = "TRACING_DATA", @@ -180,6 +181,12 @@ size_t perf_event__fprintf_namespaces(union perf_event *event, FILE *fp) return ret; } +size_t perf_event__fprintf_cgroup(union perf_event *event, FILE *fp) +{ + return fprintf(fp, " cgroup: %" PRI_lu64 " %s\n", + event->cgroup.id, event->cgroup.path); +} + int perf_event__process_comm(struct perf_tool *tool __maybe_unused, union perf_event *event, struct perf_sample *sample, @@ -196,6 +203,14 @@ int perf_event__process_namespaces(struct perf_tool *tool __maybe_unused, return machine__process_namespaces_event(machine, event, sample); } +int perf_event__process_cgroup(struct perf_tool *tool __maybe_unused, + union perf_event *event, + struct perf_sample *sample, + struct machine *machine) +{ + return machine__process_cgroup_event(machine, event, sample); +} + int perf_event__process_lost(struct perf_tool *tool __maybe_unused, union perf_event *event, struct perf_sample *sample, @@ -417,6 +432,9 @@ size_t perf_event__fprintf(union perf_event *event, FILE *fp) case PERF_RECORD_NAMESPACES: ret += perf_event__fprintf_namespaces(event, fp); break; + case PERF_RECORD_CGROUP: + ret += perf_event__fprintf_cgroup(event, fp); + break; case PERF_RECORD_MMAP2: ret += perf_event__fprintf_mmap2(event, fp); break; diff --git a/tools/perf/util/event.h b/tools/perf/util/event.h index 3cda40a2fafc..b8289f160f07 100644 --- a/tools/perf/util/event.h +++ b/tools/perf/util/event.h @@ -135,6 +135,7 @@ struct perf_sample { u32 raw_size; u64 data_src; u64 phys_addr; + u64 cgroup; u32 flags; u16 insn_len; u8 cpumode; @@ -322,6 +323,10 @@ int perf_event__process_namespaces(struct perf_tool *tool, union perf_event *event, struct perf_sample *sample, struct machine *machine); +int perf_event__process_cgroup(struct perf_tool *tool, + union perf_event *event, + struct perf_sample *sample, + struct machine *machine); int perf_event__process_mmap(struct perf_tool *tool, union perf_event *event, struct perf_sample *sample, @@ -377,6 +382,7 @@ size_t perf_event__fprintf_switch(union perf_event *event, FILE *fp); size_t perf_event__fprintf_thread_map(union perf_event *event, FILE *fp); size_t perf_event__fprintf_cpu_map(union perf_event *event, FILE *fp); size_t perf_event__fprintf_namespaces(union perf_event *event, FILE *fp); +size_t perf_event__fprintf_cgroup(union perf_event *event, FILE *fp); size_t perf_event__fprintf_ksymbol(union perf_event *event, FILE *fp); size_t perf_event__fprintf_bpf(union perf_event *event, FILE *fp); size_t perf_event__fprintf(union perf_event *event, FILE *fp); diff --git a/tools/perf/util/evsel.c b/tools/perf/util/evsel.c index 15ccd193483f..b766eb608b97 100644 --- a/tools/perf/util/evsel.c +++ b/tools/perf/util/evsel.c @@ -2267,6 +2267,12 @@ int perf_evsel__parse_sample(struct evsel *evsel, union perf_event *event, array++; } + data->cgroup = 0; + if (type & PERF_SAMPLE_CGROUP) { + data->cgroup = *array; + array++; + } + if (type & PERF_SAMPLE_AUX) { OVERFLOW_CHECK_u64(array); sz = *array++; diff --git a/tools/perf/util/machine.c b/tools/perf/util/machine.c index fd14f1489802..399b4731b246 100644 --- a/tools/perf/util/machine.c +++ b/tools/perf/util/machine.c @@ -654,6 +654,16 @@ int machine__process_namespaces_event(struct machine *machine __maybe_unused, return err; } +int machine__process_cgroup_event(struct machine *machine __maybe_unused, + union perf_event *event, + struct perf_sample *sample __maybe_unused) +{ + if (dump_trace) + perf_event__fprintf_cgroup(event, stdout); + + return 0; +} + int machine__process_lost_event(struct machine *machine __maybe_unused, union perf_event *event, struct perf_sample *sample __maybe_unused) { @@ -1878,6 +1888,8 @@ int machine__process_event(struct machine *machine, union perf_event *event, ret = machine__process_mmap_event(machine, event, sample); break; case PERF_RECORD_NAMESPACES: ret = machine__process_namespaces_event(machine, event, sample); break; + case PERF_RECORD_CGROUP: + ret = machine__process_cgroup_event(machine, event, sample); break; case PERF_RECORD_MMAP2: ret = machine__process_mmap2_event(machine, event, sample); break; case PERF_RECORD_FORK: diff --git a/tools/perf/util/machine.h b/tools/perf/util/machine.h index be0a930eca89..fa1be9ea00fa 100644 --- a/tools/perf/util/machine.h +++ b/tools/perf/util/machine.h @@ -128,6 +128,9 @@ int machine__process_switch_event(struct machine *machine, int machine__process_namespaces_event(struct machine *machine, union perf_event *event, struct perf_sample *sample); +int machine__process_cgroup_event(struct machine *machine, + union perf_event *event, + struct perf_sample *sample); int machine__process_mmap_event(struct machine *machine, union perf_event *event, struct perf_sample *sample); int machine__process_mmap2_event(struct machine *machine, union perf_event *event, diff --git a/tools/perf/util/perf_event_attr_fprintf.c b/tools/perf/util/perf_event_attr_fprintf.c index 355d3458d4e6..b94fa07f5d32 100644 --- a/tools/perf/util/perf_event_attr_fprintf.c +++ b/tools/perf/util/perf_event_attr_fprintf.c @@ -35,6 +35,7 @@ static void __p_sample_type(char *buf, size_t size, u64 value) bit_name(BRANCH_STACK), bit_name(REGS_USER), bit_name(STACK_USER), bit_name(IDENTIFIER), bit_name(REGS_INTR), bit_name(DATA_SRC), bit_name(WEIGHT), bit_name(PHYS_ADDR), bit_name(AUX), + bit_name(CGROUP), { .name = NULL, } }; #undef bit_name @@ -132,6 +133,7 @@ int perf_event_attr__fprintf(FILE *fp, struct perf_event_attr *attr, PRINT_ATTRf(ksymbol, p_unsigned); PRINT_ATTRf(bpf_event, p_unsigned); PRINT_ATTRf(aux_output, p_unsigned); + PRINT_ATTRf(cgroup, p_unsigned); PRINT_ATTRn("{ wakeup_events, wakeup_watermark }", wakeup_events, p_unsigned); PRINT_ATTRf(bp_type, p_unsigned); diff --git a/tools/perf/util/session.c b/tools/perf/util/session.c index 055b00abd56d..0b0bfe5bef17 100644 --- a/tools/perf/util/session.c +++ b/tools/perf/util/session.c @@ -471,6 +471,8 @@ void perf_tool__fill_defaults(struct perf_tool *tool) tool->comm = process_event_stub; if (tool->namespaces == NULL) tool->namespaces = process_event_stub; + if (tool->cgroup == NULL) + tool->cgroup = process_event_stub; if (tool->fork == NULL) tool->fork = process_event_stub; if (tool->exit == NULL) @@ -1436,6 +1438,8 @@ static int machines__deliver_event(struct machines *machines, return tool->comm(tool, event, sample, machine); case PERF_RECORD_NAMESPACES: return tool->namespaces(tool, event, sample, machine); + case PERF_RECORD_CGROUP: + return tool->cgroup(tool, event, sample, machine); case PERF_RECORD_FORK: return tool->fork(tool, event, sample, machine); case PERF_RECORD_EXIT: diff --git a/tools/perf/util/synthetic-events.c b/tools/perf/util/synthetic-events.c index 3f28af39f9c6..f72d80999506 100644 --- a/tools/perf/util/synthetic-events.c +++ b/tools/perf/util/synthetic-events.c @@ -1230,6 +1230,9 @@ size_t perf_event__sample_event_size(const struct perf_sample *sample, u64 type, if (type & PERF_SAMPLE_PHYS_ADDR) result += sizeof(u64); + if (type & PERF_SAMPLE_CGROUP) + result += sizeof(u64); + if (type & PERF_SAMPLE_AUX) { result += sizeof(u64); result += sample->aux_sample.size; @@ -1404,6 +1407,11 @@ int perf_event__synthesize_sample(union perf_event *event, u64 type, u64 read_fo array++; } + if (type & PERF_SAMPLE_CGROUP) { + *array = sample->cgroup; + array++; + } + if (type & PERF_SAMPLE_AUX) { sz = sample->aux_sample.size; *array++ = sz; diff --git a/tools/perf/util/tool.h b/tools/perf/util/tool.h index 2abbf668b8de..472ef5eb4068 100644 --- a/tools/perf/util/tool.h +++ b/tools/perf/util/tool.h @@ -46,6 +46,7 @@ struct perf_tool { mmap2, comm, namespaces, + cgroup, fork, exit, lost, -- cgit v1.2.3-70-g09d2 From d1277aa36bff4bfc1a187a469fc6a6a1d17cf59c Mon Sep 17 00:00:00 2001 From: Namhyung Kim Date: Wed, 25 Mar 2020 21:45:31 +0900 Subject: perf cgroup: Maintain cgroup hierarchy Each cgroup is kept in the perf_env's cgroup_tree sorted by the cgroup id. Hist entries have cgroup id can compare it directly and later it can be used to find a group name using this tree. Signed-off-by: Namhyung Kim Cc: Alexander Shishkin Cc: Jiri Olsa Cc: Mark Rutland Cc: Peter Zijlstra Link: http://lore.kernel.org/lkml/20200325124536.2800725-5-namhyung@kernel.org Signed-off-by: Arnaldo Carvalho de Melo --- tools/perf/util/cgroup.c | 80 +++++++++++++++++++++++++++++++++++++++++++++++ tools/perf/util/cgroup.h | 17 +++++++--- tools/perf/util/env.c | 2 ++ tools/perf/util/env.h | 6 ++++ tools/perf/util/machine.c | 9 +++++- 5 files changed, 109 insertions(+), 5 deletions(-) (limited to 'tools/perf/util') diff --git a/tools/perf/util/cgroup.c b/tools/perf/util/cgroup.c index 5bc9d3b01bd9..b73fb7823048 100644 --- a/tools/perf/util/cgroup.c +++ b/tools/perf/util/cgroup.c @@ -191,3 +191,83 @@ int parse_cgroups(const struct option *opt, const char *str, } return 0; } + +static struct cgroup *__cgroup__findnew(struct rb_root *root, uint64_t id, + bool create, const char *path) +{ + struct rb_node **p = &root->rb_node; + struct rb_node *parent = NULL; + struct cgroup *cgrp; + + while (*p != NULL) { + parent = *p; + cgrp = rb_entry(parent, struct cgroup, node); + + if (cgrp->id == id) + return cgrp; + + if (cgrp->id < id) + p = &(*p)->rb_left; + else + p = &(*p)->rb_right; + } + + if (!create) + return NULL; + + cgrp = malloc(sizeof(*cgrp)); + if (cgrp == NULL) + return NULL; + + cgrp->name = strdup(path); + if (cgrp->name == NULL) { + free(cgrp); + return NULL; + } + + cgrp->fd = -1; + cgrp->id = id; + refcount_set(&cgrp->refcnt, 1); + + rb_link_node(&cgrp->node, parent, p); + rb_insert_color(&cgrp->node, root); + + return cgrp; +} + +struct cgroup *cgroup__findnew(struct perf_env *env, uint64_t id, + const char *path) +{ + struct cgroup *cgrp; + + down_write(&env->cgroups.lock); + cgrp = __cgroup__findnew(&env->cgroups.tree, id, true, path); + up_write(&env->cgroups.lock); + return cgrp; +} + +struct cgroup *cgroup__find(struct perf_env *env, uint64_t id) +{ + struct cgroup *cgrp; + + down_read(&env->cgroups.lock); + cgrp = __cgroup__findnew(&env->cgroups.tree, id, false, NULL); + up_read(&env->cgroups.lock); + return cgrp; +} + +void perf_env__purge_cgroups(struct perf_env *env) +{ + struct rb_node *node; + struct cgroup *cgrp; + + down_write(&env->cgroups.lock); + while (!RB_EMPTY_ROOT(&env->cgroups.tree)) { + node = rb_first(&env->cgroups.tree); + cgrp = rb_entry(node, struct cgroup, node); + + rb_erase(node, &env->cgroups.tree); + cgroup__put(cgrp); + } + up_write(&env->cgroups.lock); +} diff --git a/tools/perf/util/cgroup.h b/tools/perf/util/cgroup.h index 2ec11f01090d..e98d5975fe55 100644 --- a/tools/perf/util/cgroup.h +++ b/tools/perf/util/cgroup.h @@ -3,16 +3,19 @@ #define __CGROUP_H__ #include +#include +#include "util/env.h" struct option; struct cgroup { - char *name; - int fd; - refcount_t refcnt; + struct rb_node node; + u64 id; + char *name; + int fd; + refcount_t refcnt; }; - extern int nr_cgroups; /* number of explicit cgroups defined */ struct cgroup *cgroup__get(struct cgroup *cgroup); @@ -26,4 +29,10 @@ void evlist__set_default_cgroup(struct evlist *evlist, struct cgroup *cgroup); int parse_cgroups(const struct option *opt, const char *str, int unset); +struct cgroup *cgroup__findnew(struct perf_env *env, uint64_t id, + const char *path); +struct cgroup *cgroup__find(struct perf_env *env, uint64_t id); + +void perf_env__purge_cgroups(struct perf_env *env); + #endif /* __CGROUP_H__ */ diff --git a/tools/perf/util/env.c b/tools/perf/util/env.c index 4154f944f474..fadc59708ece 100644 --- a/tools/perf/util/env.c +++ b/tools/perf/util/env.c @@ -6,6 +6,7 @@ #include #include #include "bpf-event.h" +#include "cgroup.h" #include #include #include @@ -168,6 +169,7 @@ void perf_env__exit(struct perf_env *env) int i; perf_env__purge_bpf(env); + perf_env__purge_cgroups(env); zfree(&env->hostname); zfree(&env->os_release); zfree(&env->version); diff --git a/tools/perf/util/env.h b/tools/perf/util/env.h index 11d05ae3606a..7632075a8792 100644 --- a/tools/perf/util/env.h +++ b/tools/perf/util/env.h @@ -88,6 +88,12 @@ struct perf_env { u32 btfs_cnt; } bpf_progs; + /* same reason as above (for perf-top) */ + struct { + struct rw_semaphore lock; + struct rb_root tree; + } cgroups; + /* For fast cpu to numa node lookup via perf_env__numa_node */ int *numa_map; int nr_numa_map; diff --git a/tools/perf/util/machine.c b/tools/perf/util/machine.c index 399b4731b246..97142e9671be 100644 --- a/tools/perf/util/machine.c +++ b/tools/perf/util/machine.c @@ -33,6 +33,7 @@ #include "asm/bug.h" #include "bpf-event.h" #include // page_size +#include "cgroup.h" #include #include @@ -654,13 +655,19 @@ int machine__process_namespaces_event(struct machine *machine __maybe_unused, return err; } -int machine__process_cgroup_event(struct machine *machine __maybe_unused, +int machine__process_cgroup_event(struct machine *machine, union perf_event *event, struct perf_sample *sample __maybe_unused) { + struct cgroup *cgrp; + if (dump_trace) perf_event__fprintf_cgroup(event, stdout); + cgrp = cgroup__findnew(machine->env, event->cgroup.id, event->cgroup.path); + if (cgrp == NULL) + return -ENOMEM; + return 0; } -- cgit v1.2.3-70-g09d2 From b629f3e9d01b4e4ad4e84c8f76fad514967a83da Mon Sep 17 00:00:00 2001 From: Namhyung Kim Date: Wed, 25 Mar 2020 21:45:32 +0900 Subject: perf report: Add 'cgroup' sort key The cgroup sort key is to show cgroup membership of each task. Currently it shows full path in the cgroupfs (not relative to the root of cgroup namespace) since it'd be more intuitive IMHO. Otherwise root cgroup in different namespaces will all show same name - "/". The cgroup sort key should come before cgroup_id otherwise sort_dimension__add() will match it to cgroup_id as it only matches with the given substring. For example it will look like following. Note that record patch adding --all-cgroups patch will come later. $ perf record -a --namespace --all-cgroups cgtest [ perf record: Woken up 1 times to write data ] [ perf record: Captured and wrote 0.208 MB perf.data (4090 samples) ] $ perf report -s cgroup_id,cgroup,pid ... # Overhead cgroup id (dev/inode) Cgroup Pid:Command # ........ ..................... .......... ............... # 93.96% 0/0x0 / 0:swapper 1.25% 3/0xeffffffb / 278:looper0 0.86% 3/0xf000015f /sub/cgrp1 280:cgtest 0.37% 3/0xf0000160 /sub/cgrp2 281:cgtest 0.34% 3/0xf0000163 /sub/cgrp3 282:cgtest 0.22% 3/0xeffffffb /sub 278:looper0 0.20% 3/0xeffffffb / 280:cgtest 0.15% 3/0xf0000163 /sub/cgrp3 285:looper3 Signed-off-by: Namhyung Kim Cc: Alexander Shishkin Cc: Jiri Olsa Cc: Mark Rutland Cc: Peter Zijlstra Link: http://lore.kernel.org/lkml/20200325124536.2800725-6-namhyung@kernel.org Signed-off-by: Arnaldo Carvalho de Melo --- tools/perf/Documentation/perf-report.txt | 1 + tools/perf/util/hist.c | 13 +++++++++++ tools/perf/util/hist.h | 1 + tools/perf/util/sort.c | 37 ++++++++++++++++++++++++++++++++ tools/perf/util/sort.h | 2 ++ 5 files changed, 54 insertions(+) (limited to 'tools/perf/util') diff --git a/tools/perf/Documentation/perf-report.txt b/tools/perf/Documentation/perf-report.txt index e56e3f1344a7..f569b9ea4002 100644 --- a/tools/perf/Documentation/perf-report.txt +++ b/tools/perf/Documentation/perf-report.txt @@ -95,6 +95,7 @@ OPTIONS abort cost. This is the global weight. - local_weight: Local weight version of the weight above. - cgroup_id: ID derived from cgroup namespace device and inode numbers. + - cgroup: cgroup pathname in the cgroupfs. - transaction: Transaction abort flags. - overhead: Overhead percentage of sample - overhead_sys: Overhead percentage of sample running in system mode diff --git a/tools/perf/util/hist.c b/tools/perf/util/hist.c index e74a5acf66d9..283a69ff6a3d 100644 --- a/tools/perf/util/hist.c +++ b/tools/perf/util/hist.c @@ -10,6 +10,7 @@ #include "mem-events.h" #include "session.h" #include "namespaces.h" +#include "cgroup.h" #include "sort.h" #include "units.h" #include "evlist.h" @@ -194,6 +195,7 @@ void hists__calc_col_len(struct hists *hists, struct hist_entry *h) hists__set_unres_dso_col_len(hists, HISTC_MEM_DADDR_DSO); } + hists__new_col_len(hists, HISTC_CGROUP, 6); hists__new_col_len(hists, HISTC_CGROUP_ID, 20); hists__new_col_len(hists, HISTC_CPU, 3); hists__new_col_len(hists, HISTC_SOCKET, 6); @@ -222,6 +224,16 @@ void hists__calc_col_len(struct hists *hists, struct hist_entry *h) if (h->trace_output) hists__new_col_len(hists, HISTC_TRACE, strlen(h->trace_output)); + + if (h->cgroup) { + const char *cgrp_name = "unknown"; + struct cgroup *cgrp = cgroup__find(h->ms.maps->machine->env, + h->cgroup); + if (cgrp != NULL) + cgrp_name = cgrp->name; + + hists__new_col_len(hists, HISTC_CGROUP, strlen(cgrp_name)); + } } void hists__output_recalc_col_len(struct hists *hists, int max_rows) @@ -691,6 +703,7 @@ __hists__add_entry(struct hists *hists, .dev = ns ? ns->link_info[CGROUP_NS_INDEX].dev : 0, .ino = ns ? ns->link_info[CGROUP_NS_INDEX].ino : 0, }, + .cgroup = sample->cgroup, .ms = { .maps = al->maps, .map = al->map, diff --git a/tools/perf/util/hist.h b/tools/perf/util/hist.h index bb994e030495..4141295a66fa 100644 --- a/tools/perf/util/hist.h +++ b/tools/perf/util/hist.h @@ -38,6 +38,7 @@ enum hist_column { HISTC_THREAD, HISTC_COMM, HISTC_CGROUP_ID, + HISTC_CGROUP, HISTC_PARENT, HISTC_CPU, HISTC_SOCKET, diff --git a/tools/perf/util/sort.c b/tools/perf/util/sort.c index e860595576c2..f14cc728c358 100644 --- a/tools/perf/util/sort.c +++ b/tools/perf/util/sort.c @@ -12,6 +12,7 @@ #include "cacheline.h" #include "comm.h" #include "map.h" +#include "maps.h" #include "symbol.h" #include "map_symbol.h" #include "branch.h" @@ -25,6 +26,8 @@ #include "mem-events.h" #include "annotate.h" #include "time-utils.h" +#include "cgroup.h" +#include "machine.h" #include #include @@ -634,6 +637,39 @@ struct sort_entry sort_cgroup_id = { .se_width_idx = HISTC_CGROUP_ID, }; +/* --sort cgroup */ + +static int64_t +sort__cgroup_cmp(struct hist_entry *left, struct hist_entry *right) +{ + return right->cgroup - left->cgroup; +} + +static int hist_entry__cgroup_snprintf(struct hist_entry *he, + char *bf, size_t size, + unsigned int width __maybe_unused) +{ + const char *cgrp_name = "N/A"; + + if (he->cgroup) { + struct cgroup *cgrp = cgroup__find(he->ms.maps->machine->env, + he->cgroup); + if (cgrp != NULL) + cgrp_name = cgrp->name; + else + cgrp_name = "unknown"; + } + + return repsep_snprintf(bf, size, "%s", cgrp_name); +} + +struct sort_entry sort_cgroup = { + .se_header = "Cgroup", + .se_cmp = sort__cgroup_cmp, + .se_snprintf = hist_entry__cgroup_snprintf, + .se_width_idx = HISTC_CGROUP, +}; + /* --sort socket */ static int64_t @@ -1660,6 +1696,7 @@ static struct sort_dimension common_sort_dimensions[] = { DIM(SORT_TRACE, "trace", sort_trace), DIM(SORT_SYM_SIZE, "symbol_size", sort_sym_size), DIM(SORT_DSO_SIZE, "dso_size", sort_dso_size), + DIM(SORT_CGROUP, "cgroup", sort_cgroup), DIM(SORT_CGROUP_ID, "cgroup_id", sort_cgroup_id), DIM(SORT_SYM_IPC_NULL, "ipc_null", sort_sym_ipc_null), DIM(SORT_TIME, "time", sort_time), diff --git a/tools/perf/util/sort.h b/tools/perf/util/sort.h index 6c862d62d052..cfa6ac6f7d06 100644 --- a/tools/perf/util/sort.h +++ b/tools/perf/util/sort.h @@ -101,6 +101,7 @@ struct hist_entry { struct thread *thread; struct comm *comm; struct namespace_id cgroup_id; + u64 cgroup; u64 ip; u64 transaction; s32 socket; @@ -224,6 +225,7 @@ enum sort_type { SORT_TRACE, SORT_SYM_SIZE, SORT_DSO_SIZE, + SORT_CGROUP, SORT_CGROUP_ID, SORT_SYM_IPC_NULL, SORT_TIME, -- cgit v1.2.3-70-g09d2 From ab64069f1a6604a43daec5fc4a4294b0b77a8b93 Mon Sep 17 00:00:00 2001 From: Namhyung Kim Date: Wed, 25 Mar 2020 21:45:33 +0900 Subject: perf record: Support synthesizing cgroup events Synthesize cgroup events by iterating cgroup filesystem directories. The cgroup event only saves the portion of cgroup path after the mount point and the cgroup id (which actually is a file handle). Signed-off-by: Namhyung Kim Tested-by: Arnaldo Carvalho de Melo Cc: Alexander Shishkin Cc: Jiri Olsa Cc: Mark Rutland Cc: Peter Zijlstra Link: http://lore.kernel.org/lkml/20200325124536.2800725-7-namhyung@kernel.org Link: http://lore.kernel.org/lkml/20200402015249.3800462-1-namhyung@kernel.org [ Extracted the HAVE_FILE_HANDLE from the followup patch, added missing __maybe_unused ] Signed-off-by: Arnaldo Carvalho de Melo --- tools/perf/builtin-record.c | 5 ++ tools/perf/util/synthetic-events.c | 122 +++++++++++++++++++++++++++++++++++++ tools/perf/util/synthetic-events.h | 1 + tools/perf/util/tool.h | 1 + 4 files changed, 129 insertions(+) (limited to 'tools/perf/util') diff --git a/tools/perf/builtin-record.c b/tools/perf/builtin-record.c index 4c301466101b..2802de9538ff 100644 --- a/tools/perf/builtin-record.c +++ b/tools/perf/builtin-record.c @@ -1397,6 +1397,11 @@ static int record__synthesize(struct record *rec, bool tail) if (err < 0) pr_warning("Couldn't synthesize bpf events.\n"); + err = perf_event__synthesize_cgroups(tool, process_synthesized_event, + machine); + if (err < 0) + pr_warning("Couldn't synthesize cgroup events.\n"); + err = __machine__synthesize_threads(machine, tool, &opts->target, rec->evlist->core.threads, process_synthesized_event, opts->sample_address, 1); diff --git a/tools/perf/util/synthetic-events.c b/tools/perf/util/synthetic-events.c index f72d80999506..a661b122d9d8 100644 --- a/tools/perf/util/synthetic-events.c +++ b/tools/perf/util/synthetic-events.c @@ -16,6 +16,7 @@ #include "util/synthetic-events.h" #include "util/target.h" #include "util/time-utils.h" +#include "util/cgroup.h" #include #include #include @@ -414,6 +415,127 @@ out: return rc; } +#ifdef HAVE_FILE_HANDLE +static int perf_event__synthesize_cgroup(struct perf_tool *tool, + union perf_event *event, + char *path, size_t mount_len, + perf_event__handler_t process, + struct machine *machine) +{ + size_t event_size = sizeof(event->cgroup) - sizeof(event->cgroup.path); + size_t path_len = strlen(path) - mount_len + 1; + struct { + struct file_handle fh; + uint64_t cgroup_id; + } handle; + int mount_id; + + while (path_len % sizeof(u64)) + path[mount_len + path_len++] = '\0'; + + memset(&event->cgroup, 0, event_size); + + event->cgroup.header.type = PERF_RECORD_CGROUP; + event->cgroup.header.size = event_size + path_len + machine->id_hdr_size; + + handle.fh.handle_bytes = sizeof(handle.cgroup_id); + if (name_to_handle_at(AT_FDCWD, path, &handle.fh, &mount_id, 0) < 0) { + pr_debug("stat failed: %s\n", path); + return -1; + } + + event->cgroup.id = handle.cgroup_id; + strncpy(event->cgroup.path, path + mount_len, path_len); + memset(event->cgroup.path + path_len, 0, machine->id_hdr_size); + + if (perf_tool__process_synth_event(tool, event, machine, process) < 0) { + pr_debug("process synth event failed\n"); + return -1; + } + + return 0; +} + +static int perf_event__walk_cgroup_tree(struct perf_tool *tool, + union perf_event *event, + char *path, size_t mount_len, + perf_event__handler_t process, + struct machine *machine) +{ + size_t pos = strlen(path); + DIR *d; + struct dirent *dent; + int ret = 0; + + if (perf_event__synthesize_cgroup(tool, event, path, mount_len, + process, machine) < 0) + return -1; + + d = opendir(path); + if (d == NULL) { + pr_debug("failed to open directory: %s\n", path); + return -1; + } + + while ((dent = readdir(d)) != NULL) { + if (dent->d_type != DT_DIR) + continue; + if (!strcmp(dent->d_name, ".") || + !strcmp(dent->d_name, "..")) + continue; + + /* any sane path should be less than PATH_MAX */ + if (strlen(path) + strlen(dent->d_name) + 1 >= PATH_MAX) + continue; + + if (path[pos - 1] != '/') + strcat(path, "/"); + strcat(path, dent->d_name); + + ret = perf_event__walk_cgroup_tree(tool, event, path, + mount_len, process, machine); + if (ret < 0) + break; + + path[pos] = '\0'; + } + + closedir(d); + return ret; +} + +int perf_event__synthesize_cgroups(struct perf_tool *tool, + perf_event__handler_t process, + struct machine *machine) +{ + union perf_event event; + char cgrp_root[PATH_MAX]; + size_t mount_len; /* length of mount point in the path */ + + if (cgroupfs_find_mountpoint(cgrp_root, PATH_MAX, "perf_event") < 0) { + pr_debug("cannot find cgroup mount point\n"); + return -1; + } + + mount_len = strlen(cgrp_root); + /* make sure the path starts with a slash (after mount point) */ + strcat(cgrp_root, "/"); + + if (perf_event__walk_cgroup_tree(tool, &event, cgrp_root, mount_len, + process, machine) < 0) + return -1; + + return 0; +} +#else +int perf_event__synthesize_cgroups(struct perf_tool *tool __maybe_unused, + perf_event__handler_t process __maybe_unused, + struct machine *machine __maybe_unused) +{ + return -1; +} +#endif + int perf_event__synthesize_modules(struct perf_tool *tool, perf_event__handler_t process, struct machine *machine) { diff --git a/tools/perf/util/synthetic-events.h b/tools/perf/util/synthetic-events.h index baead0cdc381..e7a3e9589738 100644 --- a/tools/perf/util/synthetic-events.h +++ b/tools/perf/util/synthetic-events.h @@ -45,6 +45,7 @@ int perf_event__synthesize_kernel_mmap(struct perf_tool *tool, perf_event__handl int perf_event__synthesize_mmap_events(struct perf_tool *tool, union perf_event *event, pid_t pid, pid_t tgid, perf_event__handler_t process, struct machine *machine, bool mmap_data); int perf_event__synthesize_modules(struct perf_tool *tool, perf_event__handler_t process, struct machine *machine); int perf_event__synthesize_namespaces(struct perf_tool *tool, union perf_event *event, pid_t pid, pid_t tgid, perf_event__handler_t process, struct machine *machine); +int perf_event__synthesize_cgroups(struct perf_tool *tool, perf_event__handler_t process, struct machine *machine); int perf_event__synthesize_sample(union perf_event *event, u64 type, u64 read_format, const struct perf_sample *sample); int perf_event__synthesize_stat_config(struct perf_tool *tool, struct perf_stat_config *config, perf_event__handler_t process, struct machine *machine); int perf_event__synthesize_stat_events(struct perf_stat_config *config, struct perf_tool *tool, struct evlist *evlist, perf_event__handler_t process, bool attrs); diff --git a/tools/perf/util/tool.h b/tools/perf/util/tool.h index 472ef5eb4068..3fb67bd31e4a 100644 --- a/tools/perf/util/tool.h +++ b/tools/perf/util/tool.h @@ -79,6 +79,7 @@ struct perf_tool { bool ordered_events; bool ordering_requires_timestamps; bool namespace_events; + bool cgroup_events; bool no_warn; enum show_feature_header show_feat_hdr; }; -- cgit v1.2.3-70-g09d2 From 8fb4b67939e169fca68174e9ac7be79fe9a04498 Mon Sep 17 00:00:00 2001 From: Namhyung Kim Date: Wed, 25 Mar 2020 21:45:34 +0900 Subject: perf record: Add --all-cgroups option The --all-cgroups option is to enable cgroup profiling support. It tells kernel to record CGROUP events in the ring buffer so that perf report can identify task/cgroup association later. [root@seventh ~]# perf record --all-cgroups --namespaces /wb/cgtest [ perf record: Woken up 1 times to write data ] [ perf record: Captured and wrote 0.042 MB perf.data (558 samples) ] [root@seventh ~]# perf report --stdio -s cgroup_id,cgroup,pid # To display the perf.data header info, please use --header/--header-only options. # # # Total Lost Samples: 0 # # Samples: 558 of event 'cycles' # Event count (approx.): 458017341 # # Overhead cgroup id (dev/inode) Cgroup Pid:Command # ........ ..................... .......... ............... # 33.15% 4/0xeffffffb /sub 9615:looper0 32.83% 4/0xf00002f5 /sub/cgrp2 9620:looper2 32.79% 4/0xf00002f4 /sub/cgrp1 9619:looper1 0.35% 4/0xf00002f5 /sub/cgrp2 9618:cgtest 0.34% 4/0xf00002f4 /sub/cgrp1 9617:cgtest 0.32% 4/0xeffffffb / 9615:looper0 0.11% 4/0xeffffffb /sub 9617:cgtest 0.10% 4/0xeffffffb /sub 9618:cgtest # # (Tip: Sample related events with: perf record -e '{cycles,instructions}:S') # [root@seventh ~]# Signed-off-by: Namhyung Kim Tested-by: Arnaldo Carvalho de Melo Cc: Alexander Shishkin Cc: Jiri Olsa Cc: Mark Rutland Cc: Peter Zijlstra Link: http://lore.kernel.org/lkml/20200325124536.2800725-8-namhyung@kernel.org Link: http://lore.kernel.org/lkml/20200402015249.3800462-1-namhyung@kernel.org [ Extracted the HAVE_FILE_HANDLE from the followup patch ] Signed-off-by: Arnaldo Carvalho de Melo --- tools/perf/Documentation/perf-record.txt | 5 ++++- tools/perf/builtin-record.c | 11 +++++++++++ tools/perf/util/evsel.c | 11 ++++++++++- tools/perf/util/evsel.h | 1 + tools/perf/util/record.h | 1 + 5 files changed, 27 insertions(+), 2 deletions(-) (limited to 'tools/perf/util') diff --git a/tools/perf/Documentation/perf-record.txt b/tools/perf/Documentation/perf-record.txt index b25e028458e2..b3f3b3f1c161 100644 --- a/tools/perf/Documentation/perf-record.txt +++ b/tools/perf/Documentation/perf-record.txt @@ -391,7 +391,10 @@ displayed with the weight and local_weight sort keys. This currently works for abort events and some memory events in precise mode on modern Intel CPUs. --namespaces:: -Record events of type PERF_RECORD_NAMESPACES. +Record events of type PERF_RECORD_NAMESPACES. This enables 'cgroup_id' sort key. + +--all-cgroups:: +Record events of type PERF_RECORD_CGROUP. This enables 'cgroup' sort key. --transaction:: Record transaction flags for transaction related events. diff --git a/tools/perf/builtin-record.c b/tools/perf/builtin-record.c index 2802de9538ff..1ab349abe904 100644 --- a/tools/perf/builtin-record.c +++ b/tools/perf/builtin-record.c @@ -1433,6 +1433,15 @@ static int __cmd_record(struct record *rec, int argc, const char **argv) if (rec->opts.record_namespaces) tool->namespace_events = true; + if (rec->opts.record_cgroup) { +#ifdef HAVE_FILE_HANDLE + tool->cgroup_events = true; +#else + pr_err("cgroup tracking is not supported\n"); + return -1; +#endif + } + if (rec->opts.auxtrace_snapshot_mode || rec->switch_output.enabled) { signal(SIGUSR2, snapshot_sig_handler); if (rec->opts.auxtrace_snapshot_mode) @@ -2363,6 +2372,8 @@ static struct option __record_options[] = { "per thread proc mmap processing timeout in ms"), OPT_BOOLEAN(0, "namespaces", &record.opts.record_namespaces, "Record namespaces events"), + OPT_BOOLEAN(0, "all-cgroups", &record.opts.record_cgroup, + "Record cgroup events"), OPT_BOOLEAN(0, "switch-events", &record.opts.record_switch_events, "Record context switch events"), OPT_BOOLEAN_FLAG(0, "all-kernel", &record.opts.all_kernel, diff --git a/tools/perf/util/evsel.c b/tools/perf/util/evsel.c index b766eb608b97..eb880efbce16 100644 --- a/tools/perf/util/evsel.c +++ b/tools/perf/util/evsel.c @@ -1104,6 +1104,11 @@ void perf_evsel__config(struct evsel *evsel, struct record_opts *opts, if (opts->record_namespaces) attr->namespaces = track; + if (opts->record_cgroup) { + attr->cgroup = track && !perf_missing_features.cgroup; + perf_evsel__set_sample_bit(evsel, CGROUP); + } + if (opts->record_switch_events) attr->context_switch = track; @@ -1789,7 +1794,11 @@ try_fallback: * Must probe features in the order they were added to the * perf_event_attr interface. */ - if (!perf_missing_features.branch_hw_idx && + if (!perf_missing_features.cgroup && evsel->core.attr.cgroup) { + perf_missing_features.cgroup = true; + pr_debug2_peo("Kernel has no cgroup sampling support, bailing out\n"); + goto out_close; + } else if (!perf_missing_features.branch_hw_idx && (evsel->core.attr.branch_sample_type & PERF_SAMPLE_BRANCH_HW_INDEX)) { perf_missing_features.branch_hw_idx = true; pr_debug2("switching off branch HW index support\n"); diff --git a/tools/perf/util/evsel.h b/tools/perf/util/evsel.h index 33804740e2ca..53187c501ee8 100644 --- a/tools/perf/util/evsel.h +++ b/tools/perf/util/evsel.h @@ -120,6 +120,7 @@ struct perf_missing_features { bool bpf; bool aux_output; bool branch_hw_idx; + bool cgroup; }; extern struct perf_missing_features perf_missing_features; diff --git a/tools/perf/util/record.h b/tools/perf/util/record.h index 5421fd2ad383..24316458be20 100644 --- a/tools/perf/util/record.h +++ b/tools/perf/util/record.h @@ -34,6 +34,7 @@ struct record_opts { bool auxtrace_snapshot_on_exit; bool auxtrace_sample_mode; bool record_namespaces; + bool record_cgroup; bool record_switch_events; bool all_kernel; bool all_user; -- cgit v1.2.3-70-g09d2 From d2bedb7863e9817f71fe2332a85ecdbd0f264b4b Mon Sep 17 00:00:00 2001 From: Stephane Eranian Date: Wed, 25 Mar 2020 15:08:02 -0700 Subject: perf script: Allow --symbol to accept hexadecimal addresses This patch extends the perf script --symbols option to filter on hexadecimal addresses in addition to symbol names. This makes it easier to handle cases where symbols are aliased. With this patch, it is possible to mix and match symbols and hexadecimal addresses using the --symbols option. $ perf script --symbols=noploop,0x4007a0 Signed-off-by: Stephane Eranian Reviewed-by: Ian Rogers Cc: Alexander Shishkin Cc: Jiri Olsa Cc: Mark Rutland Cc: Namhyung Kim Cc: Peter Zijlstra Link: http://lore.kernel.org/lkml/20200325220802.15039-1-irogers@google.com Signed-off-by: Arnaldo Carvalho de Melo --- tools/perf/util/event.c | 21 +++++++++++++++++---- 1 file changed, 17 insertions(+), 4 deletions(-) (limited to 'tools/perf/util') diff --git a/tools/perf/util/event.c b/tools/perf/util/event.c index 824c038e5c33..dc0e11214ae1 100644 --- a/tools/perf/util/event.c +++ b/tools/perf/util/event.c @@ -617,10 +617,23 @@ int machine__resolve(struct machine *machine, struct addr_location *al, al->sym = map__find_symbol(al->map, al->addr); } - if (symbol_conf.sym_list && - (!al->sym || !strlist__has_entry(symbol_conf.sym_list, - al->sym->name))) { - al->filtered |= (1 << HIST_FILTER__SYMBOL); + if (symbol_conf.sym_list) { + int ret = 0; + char al_addr_str[32]; + size_t sz = sizeof(al_addr_str); + + if (al->sym) { + ret = strlist__has_entry(symbol_conf.sym_list, + al->sym->name); + } + if (!(ret && al->sym)) { + snprintf(al_addr_str, sz, "0x%"PRIx64, + al->map->unmap_ip(al->map, al->sym->start)); + ret = strlist__has_entry(symbol_conf.sym_list, + al_addr_str); + } + if (!ret) + al->filtered |= (1 << HIST_FILTER__SYMBOL); } return 0; -- cgit v1.2.3-70-g09d2 From 47327f56674d69a423f7167f8d4ea537cc1762cc Mon Sep 17 00:00:00 2001 From: Adrian Hunter Date: Thu, 26 Mar 2020 10:01:47 +0200 Subject: perf events parser: Add missing Intel CPU events to parser perf list expects CPU events to be parseable by name, e.g. # perf list | grep el-capacity-read el-capacity-read OR cpu/el-capacity-read/ [Kernel PMU event] But the event parser does not recognize them that way, e.g. # perf test -v "Parse event" running test 54 'cycles//u' running test 55 'cycles:k' running test 0 'cpu/config=10,config1,config2=3,period=1000/u' running test 1 'cpu/config=1,name=krava/u,cpu/config=2/u' running test 2 'cpu/config=1,call-graph=fp,time,period=100000/,cpu/config=2,call-graph=no,time=0,period=2000/' running test 3 'cpu/name='COMPLEX_CYCLES_NAME:orig=cycles,desc=chip-clock-ticks',period=0x1,event=0x2/ukp' -> cpu/event=0,umask=0x11/ -> cpu/event=0,umask=0x13/ -> cpu/event=0x54,umask=0x1/ failed to parse event 'el-capacity-read:u,cpu/event=el-capacity-read/u', err 1, str 'parser error' event syntax error: 'el-capacity-read:u,cpu/event=el-capacity-read/u' \___ parser error test child finished with 1 ---- end ---- Parse event definition strings: FAILED! This happens because the parser splits names by '-' in order to deal with cache events. For example 'L1-dcache' is a token in parse-events.l which is matched to 'L1-dcache-load-miss' by the following rule: PE_NAME_CACHE_TYPE '-' PE_NAME_CACHE_OP_RESULT '-' PE_NAME_CACHE_OP_RESULT opt_event_config And so there is special handling for 2-part PMU names i.e. PE_PMU_EVENT_PRE '-' PE_PMU_EVENT_SUF sep_dc but no handling for 3-part names, which are instead added as tokens e.g. topdown-[a-z-]+ While it would be possible to add a rule for 3-part names, that would not work if the first parts were also a valid PMU name e.g. 'el-capacity-read' would be matched to 'el-capacity' before the parser reached the 3rd part. The parser would need significant change to rationalize all this, so instead fix for now by adding missing Intel CPU events with 3-part names to the event parser as tokens. Missing events were found by using: grep -r EVENT_ATTR_STR arch/x86/events/intel/core.c Signed-off-by: Adrian Hunter Acked-by: Jiri Olsa Cc: Kan Liang Link: http://lore.kernel.org/lkml/90c7ae07-c568-b6d3-f9c4-d0c1528a0610@intel.com Signed-off-by: Arnaldo Carvalho de Melo --- tools/perf/util/parse-events.l | 12 +++++++----- 1 file changed, 7 insertions(+), 5 deletions(-) (limited to 'tools/perf/util') diff --git a/tools/perf/util/parse-events.l b/tools/perf/util/parse-events.l index 7b1c8ee537cf..baa48f28d57d 100644 --- a/tools/perf/util/parse-events.l +++ b/tools/perf/util/parse-events.l @@ -342,11 +342,13 @@ bpf-output { return sym(yyscanner, PERF_TYPE_SOFTWARE, PERF_COUNT_SW_BPF_OUT * Because the prefix cycles is mixed up with cpu-cycles. * loads and stores are mixed up with cache event */ -cycles-ct { return str(yyscanner, PE_KERNEL_PMU_EVENT); } -cycles-t { return str(yyscanner, PE_KERNEL_PMU_EVENT); } -mem-loads { return str(yyscanner, PE_KERNEL_PMU_EVENT); } -mem-stores { return str(yyscanner, PE_KERNEL_PMU_EVENT); } -topdown-[a-z-]+ { return str(yyscanner, PE_KERNEL_PMU_EVENT); } +cycles-ct | +cycles-t | +mem-loads | +mem-stores | +topdown-[a-z-]+ | +tx-capacity-[a-z-]+ | +el-capacity-[a-z-]+ { return str(yyscanner, PE_KERNEL_PMU_EVENT); } L1-dcache|l1-d|l1d|L1-data | L1-icache|l1-i|l1i|L1-instruction | -- cgit v1.2.3-70-g09d2 From 1a4025f06059eeaecb2ef24363350ea3431568df Mon Sep 17 00:00:00 2001 From: Andreas Gerstmayr Date: Thu, 2 Apr 2020 14:54:16 +0200 Subject: perf script report: Fix SEGFAULT when using DWARF mode When running perf script report with a Python script and a callgraph in DWARF mode, intr_regs->regs can be 0 and therefore crashing the regs_map function. Added a check for this condition (same check as in builtin-script.c:595). Signed-off-by: Andreas Gerstmayr Tested-by: Kim Phillips Cc: Adrian Hunter Cc: Alexander Shishkin Cc: Jiri Olsa Cc: Kan Liang Cc: Mark Rutland Cc: Namhyung Kim Cc: Peter Zijlstra Cc: Steven Rostedt (VMware) Link: http://lore.kernel.org/lkml/20200402125417.422232-1-agerstmayr@redhat.com Signed-off-by: Arnaldo Carvalho de Melo --- tools/perf/util/scripting-engines/trace-event-python.c | 3 +++ 1 file changed, 3 insertions(+) (limited to 'tools/perf/util') diff --git a/tools/perf/util/scripting-engines/trace-event-python.c b/tools/perf/util/scripting-engines/trace-event-python.c index 8c1b27cd8b99..2c372cf5495e 100644 --- a/tools/perf/util/scripting-engines/trace-event-python.c +++ b/tools/perf/util/scripting-engines/trace-event-python.c @@ -694,6 +694,9 @@ static int regs_map(struct regs_dump *regs, uint64_t mask, char *bf, int size) bf[0] = 0; + if (!regs || !regs->regs) + return 0; + for_each_set_bit(r, (unsigned long *) &mask, sizeof(mask) * 8) { u64 val = regs->regs[i++]; -- cgit v1.2.3-70-g09d2 From 9ff76cea4e9e6d49a6f764ae114fc0fb8de97816 Mon Sep 17 00:00:00 2001 From: Arnaldo Carvalho de Melo Date: Wed, 1 Apr 2020 09:33:59 -0300 Subject: perf python: Fix clang detection to strip out options passed in $CC MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit The clang check in the python setup.py file expected $CC to be just the name of the compiler, not the compiler + options, i.e. all options were expected to be passed in $CFLAGS, this ends up making it fail in systems where CC is set to, e.g.: "aarch64-linaro-linux-gcc --sysroot=/oe/build/tmp/work/juno-linaro-linux/perf/1.0-r9/recipe-sysroot" Like this: $ python3 >>> from subprocess import Popen >>> a = Popen(["aarch64-linux-gnu-gcc --sysroot=/oe/build/tmp/work/juno-linaro-linux/perf/1.0-r9/recipe-sysroot", "-v"]) Traceback (most recent call last): File "", line 1, in File "/usr/lib/python3.6/subprocess.py", line 729, in __init__ restore_signals, start_new_session) File "/usr/lib/python3.6/subprocess.py", line 1364, in _execute_child raise child_exception_type(errno_num, err_msg, err_filename) FileNotFoundError: [Errno 2] No such file or directory: 'aarch64-linux-gnu-gcc --sysroot=/oe/build/tmp/work/juno-linaro-linux/perf/1.0-r9/recipe-sysroot': 'aarch64-linux-gnu-gcc --sysroot=/oe/build/tmp/work/juno-linaro-linux/perf/1.0-r9/recipe-sysroot' >>> Make it more robust, covering this case, by passing cc.split()[0] as the first arg to popen(). Fixes: a7ffd416d804 ("perf python: Fix clang detection when using CC=clang-version") Reported-by: Daniel Díaz Reported-by: Naresh Kamboju Tested-by: Daniel Díaz Cc: Adrian Hunter Cc: Ilie Halip Cc: Jiri Olsa Cc: Namhyung Kim Link: http://lore.kernel.org/lkml/20200401124037.GA12534@kernel.org Signed-off-by: Arnaldo Carvalho de Melo --- tools/perf/util/setup.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'tools/perf/util') diff --git a/tools/perf/util/setup.py b/tools/perf/util/setup.py index 8a065a6f9713..347b2c0789e4 100644 --- a/tools/perf/util/setup.py +++ b/tools/perf/util/setup.py @@ -3,7 +3,7 @@ from subprocess import Popen, PIPE from re import sub cc = getenv("CC") -cc_is_clang = b"clang version" in Popen([cc, "-v"], stderr=PIPE).stderr.readline() +cc_is_clang = b"clang version" in Popen([cc.split()[0], "-v"], stderr=PIPE).stderr.readline() def clang_has_option(option): return [o for o in Popen([cc, option], stderr=PIPE).stderr.readlines() if b"unknown argument" in o] == [ ] -- cgit v1.2.3-70-g09d2