From d4953f7ef1a2e87ef732823af35361404d13fea8 Mon Sep 17 00:00:00 2001 From: Ian Rogers <irogers@google.com> Date: Sat, 14 Mar 2020 10:03:56 -0700 Subject: perf parse-events: Fix 3 use after frees found with clang ASAN Reproducible with a clang asan build and then running perf test in particular 'Parse event definition strings'. Signed-off-by: Ian Rogers <irogers@google.com> Acked-by: Jiri Olsa <jolsa@redhat.com> Cc: Adrian Hunter <adrian.hunter@intel.com> Cc: Alexander Shishkin <alexander.shishkin@linux.intel.com> Cc: Andi Kleen <ak@linux.intel.com> Cc: Leo Yan <leo.yan@linaro.org> Cc: Mark Rutland <mark.rutland@arm.com> Cc: Namhyung Kim <namhyung@kernel.org> Cc: Peter Zijlstra <peterz@infradead.org> Cc: Stephane Eranian <eranian@google.com> Cc: clang-built-linux@googlegroups.com Link: http://lore.kernel.org/lkml/20200314170356.62914-1-irogers@google.com Signed-off-by: Arnaldo Carvalho de Melo <acme@redhat.com> --- tools/perf/util/evsel.c | 1 + tools/perf/util/parse-events.c | 6 +++--- 2 files changed, 4 insertions(+), 3 deletions(-) diff --git a/tools/perf/util/evsel.c b/tools/perf/util/evsel.c index 816d930d774e..15ccd193483f 100644 --- a/tools/perf/util/evsel.c +++ b/tools/perf/util/evsel.c @@ -1287,6 +1287,7 @@ void perf_evsel__exit(struct evsel *evsel) perf_thread_map__put(evsel->core.threads); zfree(&evsel->group_name); zfree(&evsel->name); + zfree(&evsel->pmu_name); perf_evsel__object.fini(evsel); } diff --git a/tools/perf/util/parse-events.c b/tools/perf/util/parse-events.c index a7dc0b096974..10107747b361 100644 --- a/tools/perf/util/parse-events.c +++ b/tools/perf/util/parse-events.c @@ -1449,7 +1449,7 @@ int parse_events_add_pmu(struct parse_events_state *parse_state, evsel = __add_event(list, &parse_state->idx, &attr, NULL, pmu, NULL, auto_merge_stats, NULL); if (evsel) { - evsel->pmu_name = name; + evsel->pmu_name = name ? strdup(name) : NULL; evsel->use_uncore_alias = use_uncore_alias; return 0; } else { @@ -1497,7 +1497,7 @@ int parse_events_add_pmu(struct parse_events_state *parse_state, evsel->snapshot = info.snapshot; evsel->metric_expr = info.metric_expr; evsel->metric_name = info.metric_name; - evsel->pmu_name = name; + evsel->pmu_name = name ? strdup(name) : NULL; evsel->use_uncore_alias = use_uncore_alias; evsel->percore = config_term_percore(&evsel->config_terms); } @@ -1547,7 +1547,7 @@ int parse_events_multi_pmu_add(struct parse_events_state *parse_state, if (!parse_events_add_pmu(parse_state, list, pmu->name, head, true, true)) { - pr_debug("%s -> %s/%s/\n", config, + pr_debug("%s -> %s/%s/\n", str, pmu->name, alias->str); ok++; } -- cgit v1.2.3-70-g09d2 From 7eec00a74720d35b6d89505350e5b08ecef376c0 Mon Sep 17 00:00:00 2001 From: Leo Yan <leo.yan@linaro.org> Date: Fri, 6 Mar 2020 09:57:58 +0800 Subject: perf symbols: Consolidate symbol fixup issue After copying Arm64's perf archive with object files and perf.data file to x86 laptop, the x86's perf kernel symbol resolution fails. It outputs 'unknown' for all symbols parsing. This issue is root caused by the function elf__needs_adjust_symbols(), x86 perf tool uses one weak version, Arm64 (and powerpc) has rewritten their own version. elf__needs_adjust_symbols() decides if need to parse symbols with the relative offset address; but x86 building uses the weak function which misses to check for the elf type 'ET_DYN', so that it cannot parse symbols in Arm DSOs due to the wrong result from elf__needs_adjust_symbols(). The DSO parsing should not depend on any specific architecture perf building; e.g. x86 perf tool can parse Arm and Arm64 DSOs, vice versa. And confirmed by Naveen N. Rao that powerpc64 kernels are not being built as ET_DYN anymore and change to ET_EXEC. This patch removes the arch specific functions for Arm64 and powerpc and changes elf__needs_adjust_symbols() as a common function. In the common elf__needs_adjust_symbols(), it checks an extra condition 'ET_DYN' for elf header type. With this fixing, the Arm64 DSO can be parsed properly with x86's perf tool. Before: # perf script main 3258 1 branches: 0 [unknown] ([unknown]) => ffff800010c4665c [unknown] ([kernel.kallsyms]) main 3258 1 branches: ffff800010c46670 [unknown] ([kernel.kallsyms]) => ffff800010c4eaec [unknown] ([kernel.kallsyms]) main 3258 1 branches: ffff800010c4eaec [unknown] ([kernel.kallsyms]) => ffff800010c4eb00 [unknown] ([kernel.kallsyms]) main 3258 1 branches: ffff800010c4eb08 [unknown] ([kernel.kallsyms]) => ffff800010c4e780 [unknown] ([kernel.kallsyms]) main 3258 1 branches: ffff800010c4e7a0 [unknown] ([kernel.kallsyms]) => ffff800010c4eeac [unknown] ([kernel.kallsyms]) main 3258 1 branches: ffff800010c4eebc [unknown] ([kernel.kallsyms]) => ffff800010c4ed80 [unknown] ([kernel.kallsyms]) After: # perf script main 3258 1 branches: 0 [unknown] ([unknown]) => ffff800010c4665c coresight_timeout+0x54 ([kernel.kallsyms]) main 3258 1 branches: ffff800010c46670 coresight_timeout+0x68 ([kernel.kallsyms]) => ffff800010c4eaec etm4_enable_hw+0x3cc ([kernel.kallsyms]) main 3258 1 branches: ffff800010c4eaec etm4_enable_hw+0x3cc ([kernel.kallsyms]) => ffff800010c4eb00 etm4_enable_hw+0x3e0 ([kernel.kallsyms]) main 3258 1 branches: ffff800010c4eb08 etm4_enable_hw+0x3e8 ([kernel.kallsyms]) => ffff800010c4e780 etm4_enable_hw+0x60 ([kernel.kallsyms]) main 3258 1 branches: ffff800010c4e7a0 etm4_enable_hw+0x80 ([kernel.kallsyms]) => ffff800010c4eeac etm4_enable+0x2d4 ([kernel.kallsyms]) main 3258 1 branches: ffff800010c4eebc etm4_enable+0x2e4 ([kernel.kallsyms]) => ffff800010c4ed80 etm4_enable+0x1a8 ([kernel.kallsyms]) v3: Changed to check for ET_DYN across all architectures. v2: Fixed Arm64 and powerpc native building. Reported-by: Mike Leach <mike.leach@linaro.org> Signed-off-by: Leo Yan <leo.yan@linaro.org> Reviewed-by: Naveen N. Rao <naveen.n.rao@linux.vnet.ibm.com> Acked-by: Jiri Olsa <jolsa@redhat.com> Cc: Alexander Shishkin <alexander.shishkin@linux.intel.com> Cc: Allison Randal <allison@lohutok.net> Cc: Enrico Weigelt <info@metux.net> Cc: Greg Kroah-Hartman <gregkh@linuxfoundation.org> Cc: Hendrik Brueckner <brueckner@linux.vnet.ibm.com> Cc: John Garry <john.garry@huawei.com> Cc: Kate Stewart <kstewart@linuxfoundation.org> Cc: Mark Rutland <mark.rutland@arm.com> Cc: Mathieu Poirier <mathieu.poirier@linaro.org> Cc: Namhyung Kim <namhyung@kernel.org> Cc: Peter Zijlstra <peterz@infradead.org> Cc: Thomas Gleixner <tglx@linutronix.de> Cc: Thomas Richter <tmricht@linux.vnet.ibm.com> Link: http://lore.kernel.org/lkml/20200306015759.10084-1-leo.yan@linaro.org Signed-off-by: Arnaldo Carvalho de Melo <acme@redhat.com> --- tools/perf/arch/arm64/util/Build | 1 - tools/perf/arch/arm64/util/sym-handling.c | 19 ------------------- tools/perf/arch/powerpc/util/Build | 1 - tools/perf/arch/powerpc/util/sym-handling.c | 10 ---------- tools/perf/util/symbol-elf.c | 10 ++++++++-- 5 files changed, 8 insertions(+), 33 deletions(-) delete mode 100644 tools/perf/arch/arm64/util/sym-handling.c diff --git a/tools/perf/arch/arm64/util/Build b/tools/perf/arch/arm64/util/Build index 0a7782c61209..789956f76d85 100644 --- a/tools/perf/arch/arm64/util/Build +++ b/tools/perf/arch/arm64/util/Build @@ -1,6 +1,5 @@ perf-y += header.o perf-y += perf_regs.o -perf-y += sym-handling.o perf-$(CONFIG_DWARF) += dwarf-regs.o perf-$(CONFIG_LOCAL_LIBUNWIND) += unwind-libunwind.o perf-$(CONFIG_LIBDW_DWARF_UNWIND) += unwind-libdw.o diff --git a/tools/perf/arch/arm64/util/sym-handling.c b/tools/perf/arch/arm64/util/sym-handling.c deleted file mode 100644 index 8dfa3e5229f1..000000000000 --- a/tools/perf/arch/arm64/util/sym-handling.c +++ /dev/null @@ -1,19 +0,0 @@ -// SPDX-License-Identifier: GPL-2.0-only -/* - * - * Copyright (C) 2015 Naveen N. Rao, IBM Corporation - */ - -#include "symbol.h" // for the elf__needs_adjust_symbols() prototype -#include <stdbool.h> - -#ifdef HAVE_LIBELF_SUPPORT -#include <gelf.h> - -bool elf__needs_adjust_symbols(GElf_Ehdr ehdr) -{ - return ehdr.e_type == ET_EXEC || - ehdr.e_type == ET_REL || - ehdr.e_type == ET_DYN; -} -#endif diff --git a/tools/perf/arch/powerpc/util/Build b/tools/perf/arch/powerpc/util/Build index 7cf0b8803097..e5c9504f8586 100644 --- a/tools/perf/arch/powerpc/util/Build +++ b/tools/perf/arch/powerpc/util/Build @@ -1,5 +1,4 @@ perf-y += header.o -perf-y += sym-handling.o perf-y += kvm-stat.o perf-y += perf_regs.o perf-y += mem-events.o diff --git a/tools/perf/arch/powerpc/util/sym-handling.c b/tools/perf/arch/powerpc/util/sym-handling.c index abb7a12d8f93..0856b32f9e08 100644 --- a/tools/perf/arch/powerpc/util/sym-handling.c +++ b/tools/perf/arch/powerpc/util/sym-handling.c @@ -10,16 +10,6 @@ #include "probe-event.h" #include "probe-file.h" -#ifdef HAVE_LIBELF_SUPPORT -bool elf__needs_adjust_symbols(GElf_Ehdr ehdr) -{ - return ehdr.e_type == ET_EXEC || - ehdr.e_type == ET_REL || - ehdr.e_type == ET_DYN; -} - -#endif - int arch__choose_best_symbol(struct symbol *syma, struct symbol *symb __maybe_unused) { diff --git a/tools/perf/util/symbol-elf.c b/tools/perf/util/symbol-elf.c index 1965aefccb02..be5b493f8284 100644 --- a/tools/perf/util/symbol-elf.c +++ b/tools/perf/util/symbol-elf.c @@ -704,9 +704,15 @@ void symsrc__destroy(struct symsrc *ss) close(ss->fd); } -bool __weak elf__needs_adjust_symbols(GElf_Ehdr ehdr) +bool elf__needs_adjust_symbols(GElf_Ehdr ehdr) { - return ehdr.e_type == ET_EXEC || ehdr.e_type == ET_REL; + /* + * Usually vmlinux is an ELF file with type ET_EXEC for most + * architectures; except Arm64 kernel is linked with option + * '-share', so need to check type ET_DYN. + */ + return ehdr.e_type == ET_EXEC || ehdr.e_type == ET_REL || + ehdr.e_type == ET_DYN; } int symsrc__init(struct symsrc *ss, struct dso *dso, const char *name, -- cgit v1.2.3-70-g09d2 From 443bc639e518c6c4e8fc2e0456cccf3a04f6e77f Mon Sep 17 00:00:00 2001 From: Jin Yao <yao.jin@linux.intel.com> Date: Thu, 27 Feb 2020 12:39:37 +0800 Subject: perf report: Print al_addr when symbol is not found For branch mode, if the symbol is not found, it prints the address. For example, 0x0000555eee0365a0 in below output. Overhead Command Source Shared Object Source Symbol Target Symbol 17.55% div libc-2.27.so [.] __random [.] __random 6.11% div div [.] 0x0000555eee0365a0 [.] rand 6.10% div libc-2.27.so [.] rand [.] 0x0000555eee036769 5.80% div libc-2.27.so [.] __random_r [.] __random 5.72% div libc-2.27.so [.] __random [.] __random_r 5.62% div libc-2.27.so [.] __random_r [.] __random_r 5.38% div libc-2.27.so [.] __random [.] rand 4.56% div libc-2.27.so [.] __random [.] __random 4.49% div div [.] 0x0000555eee036779 [.] 0x0000555eee0365ff 4.25% div div [.] 0x0000555eee0365fa [.] 0x0000555eee036760 But it's not very easy to understand what the instructions are in the binary. So this patch uses the al_addr instead. With this patch, the output is Overhead Command Source Shared Object Source Symbol Target Symbol 17.55% div libc-2.27.so [.] __random [.] __random 6.11% div div [.] 0x00000000000005a0 [.] rand 6.10% div libc-2.27.so [.] rand [.] 0x0000000000000769 5.80% div libc-2.27.so [.] __random_r [.] __random 5.72% div libc-2.27.so [.] __random [.] __random_r 5.62% div libc-2.27.so [.] __random_r [.] __random_r 5.38% div libc-2.27.so [.] __random [.] rand 4.56% div libc-2.27.so [.] __random [.] __random 4.49% div div [.] 0x0000000000000779 [.] 0x00000000000005ff 4.25% div div [.] 0x00000000000005fa [.] 0x0000000000000760 Now we can use objdump to dump the object starting from 0x5a0. For example, objdump -d --start-address 0x5a0 div 00000000000005a0 <rand@plt>: 5a0: ff 25 2a 0a 20 00 jmpq *0x200a2a(%rip) # 200fd0 <__cxa_finalize@plt+0x200a20> 5a6: 68 02 00 00 00 pushq $0x2 5ab: e9 c0 ff ff ff jmpq 570 <srand@plt-0x10> ... Committer testing: [root@seventh ~]# perf record -a -b sleep 1 [root@seventh ~]# perf report --header-only | grep cpudesc # cpudesc : Intel(R) Core(TM) i5-7500 CPU @ 3.40GHz [root@seventh ~]# perf evlist -v cycles: size: 120, { sample_period, sample_freq }: 4000, sample_type: IP|TID|TIME|CPU|PERIOD|BRANCH_STACK, read_format: ID, disabled: 1, inherit: 1, mmap: 1, comm: 1, freq: 1, task: 1, precise_ip: 3, sample_id_all: 1, exclude_guest: 1, mmap2: 1, comm_exec: 1, ksymbol: 1, bpf_event: 1, branch_sample_type: ANY [root@seventh ~]# Before: [root@seventh ~]# perf report --stdio --dso libsystemd-shared-241.so | head -20 # To display the perf.data header info, please use --header/--header-only options. # # # Total Lost Samples: 0 # # Samples: 2K of event 'cycles' # Event count (approx.): 2240 # # Overhead Command Source Shared Object Source Symbol Target Symbol Basic Block Cycles # ........ ............... ........................ ...................... ...................... .................. # 0.13% systemd-journal libc-2.29.so [.] cfree@GLIBC_2.2.5 [.] _int_free 1 0.09% systemd libsystemd-shared-241.so [.] 0x00007fe406465c82 [.] 0x00007fe406465d80 1 0.09% systemd libsystemd-shared-241.so [.] 0x00007fe406465ded [.] 0x00007fe406465c30 1 0.09% systemd libsystemd-shared-241.so [.] 0x00007fe406465e4e [.] 0x00007fe406465de0 1 0.09% systemd-journal systemd-journald [.] free@plt [.] cfree@GLIBC_2.2.5 1 0.09% systemd-journal libc-2.29.so [.] _int_free [.] _int_free 18 0.09% systemd-journal libc-2.29.so [.] _int_free [.] _int_free 2 0.04% systemd libsystemd-shared-241.so [.] bus_resolve@plt [.] bus_resolve 204 0.04% systemd libsystemd-shared-241.so [.] getpid_cached@plt [.] getpid_cached 7 [root@seventh ~]# After: [root@seventh ~]# perf report --stdio --dso libsystemd-shared-241.so | head -20 # To display the perf.data header info, please use --header/--header-only options. # # # Total Lost Samples: 0 # # Samples: 2K of event 'cycles' # Event count (approx.): 2240 # # Overhead Command Source Shared Object Source Symbol Target Symbol Basic Block Cycles # ........ ............... ........................ ...................... ...................... .................. # 0.13% systemd-journal libc-2.29.so [.] cfree@GLIBC_2.2.5 [.] _int_free 1 0.09% systemd libsystemd-shared-241.so [.] 0x00000000000f7c82 [.] 0x00000000000f7d80 1 0.09% systemd libsystemd-shared-241.so [.] 0x00000000000f7ded [.] 0x00000000000f7c30 1 0.09% systemd libsystemd-shared-241.so [.] 0x00000000000f7e4e [.] 0x00000000000f7de0 1 0.09% systemd-journal systemd-journald [.] free@plt [.] cfree@GLIBC_2.2.5 1 0.09% systemd-journal libc-2.29.so [.] _int_free [.] _int_free 18 0.09% systemd-journal libc-2.29.so [.] _int_free [.] _int_free 2 0.04% systemd libsystemd-shared-241.so [.] bus_resolve@plt [.] bus_resolve 204 0.04% systemd libsystemd-shared-241.so [.] getpid_cached@plt [.] getpid_cached 7 [root@seventh ~]# Lets use -v to get full paths and then try objdump on the unresolved address: [root@seventh ~]# perf report -v --stdio --dso libsystemd-shared-241.so |& grep libsystemd-shared-241.so | tail -1 0.04% systemd-journal /usr/lib/systemd/libsystemd-shared-241.so 0x80c1a B [.] 0x0000000000080c1a 0x80a95 B [.] 0x0000000000080a95 61 [root@seventh ~]# [root@seventh ~]# objdump -d --start-address 0x00000000000f7d80 /usr/lib/systemd/libsystemd-shared-241.so | head -20 /usr/lib/systemd/libsystemd-shared-241.so: file format elf64-x86-64 Disassembly of section .text: 00000000000f7d80 <proc_cmdline_parse_given@@SD_SHARED+0x330>: f7d80: 41 39 11 cmp %edx,(%r9) f7d83: 0f 84 ff fe ff ff je f7c88 <proc_cmdline_parse_given@@SD_SHARED+0x238> f7d89: 4c 8d 05 97 09 0c 00 lea 0xc0997(%rip),%r8 # 1b8727 <utf8_skip_data@@SD_SHARED+0x3147> f7d90: b9 49 00 00 00 mov $0x49,%ecx f7d95: 48 8d 15 c9 f5 0b 00 lea 0xbf5c9(%rip),%rdx # 1b7365 <utf8_skip_data@@SD_SHARED+0x1d85> f7d9c: 31 ff xor %edi,%edi f7d9e: 48 8d 35 9b ff 0b 00 lea 0xbff9b(%rip),%rsi # 1b7d40 <utf8_skip_data@@SD_SHARED+0x2760> f7da5: e8 a6 d6 f4 ff callq 45450 <log_assert_failed_realm@plt> f7daa: 66 0f 1f 44 00 00 nopw 0x0(%rax,%rax,1) f7db0: 41 56 push %r14 f7db2: 41 55 push %r13 f7db4: 41 54 push %r12 f7db6: 55 push %rbp [root@seventh ~]# If we tried the the reported address before this patch: [root@seventh ~]# objdump -d --start-address 0x00007fe406465d80 /usr/lib/systemd/libsystemd-shared-241.so | head -20 /usr/lib/systemd/libsystemd-shared-241.so: file format elf64-x86-64 [root@seventh ~]# Signed-off-by: Jin Yao <yao.jin@linux.intel.com> Tested-by: Arnaldo Carvalho de Melo <acme@redhat.com> Tested-by: Ravi Bangoria <ravi.bangoria@linux.ibm.com> Acked-by: Jiri Olsa <jolsa@kernel.org> Cc: Alexander Shishkin <alexander.shishkin@linux.intel.com> Cc: Andi Kleen <ak@linux.intel.com> Cc: Kan Liang <kan.liang@linux.intel.com> Cc: Peter Zijlstra <peterz@infradead.org> Link: http://lore.kernel.org/lkml/20200227043939.4403-2-yao.jin@linux.intel.com Signed-off-by: Arnaldo Carvalho de Melo <acme@redhat.com> --- tools/perf/util/sort.c | 6 ++++-- 1 file changed, 4 insertions(+), 2 deletions(-) diff --git a/tools/perf/util/sort.c b/tools/perf/util/sort.c index ab0cfd790ad0..e860595576c2 100644 --- a/tools/perf/util/sort.c +++ b/tools/perf/util/sort.c @@ -869,7 +869,8 @@ static int hist_entry__sym_from_snprintf(struct hist_entry *he, char *bf, if (he->branch_info) { struct addr_map_symbol *from = &he->branch_info->from; - return _hist_entry__sym_snprintf(&from->ms, from->addr, he->level, bf, size, width); + return _hist_entry__sym_snprintf(&from->ms, from->al_addr, + he->level, bf, size, width); } return repsep_snprintf(bf, size, "%-*.*s", width, width, "N/A"); @@ -881,7 +882,8 @@ static int hist_entry__sym_to_snprintf(struct hist_entry *he, char *bf, if (he->branch_info) { struct addr_map_symbol *to = &he->branch_info->to; - return _hist_entry__sym_snprintf(&to->ms, to->addr, he->level, bf, size, width); + return _hist_entry__sym_snprintf(&to->ms, to->al_addr, + he->level, bf, size, width); } return repsep_snprintf(bf, size, "%-*.*s", width, width, "N/A"); -- cgit v1.2.3-70-g09d2 From 7b0a0dcb64705d1dbed46d33c9810251667469b9 Mon Sep 17 00:00:00 2001 From: Jin Yao <yao.jin@linux.intel.com> Date: Thu, 27 Feb 2020 12:39:38 +0800 Subject: perf report: Support interactive annotation of code without symbols MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit For perf report on stripped binaries it is currently impossible to do annotation. The annotation state is all tied to symbols, but there are either no symbols, or symbols are not covering all the code. We should support the annotation functionality even without symbols. This patch fakes a symbol and the symbol name is the string of address. After that, we just follow current annotation working flow. For example, 1. perf report Overhead Command Shared Object Symbol 20.67% div libc-2.27.so [.] __random_r 17.29% div libc-2.27.so [.] __random 10.59% div div [.] 0x0000000000000628 9.25% div div [.] 0x0000000000000612 6.11% div div [.] 0x0000000000000645 2. Select the line of "10.59% div div [.] 0x0000000000000628" and ENTER. Annotate 0x0000000000000628 Zoom into div thread Zoom into div DSO (use the 'k' hotkey to zoom directly into the kernel) Browse map details Run scripts for samples of symbol [0x0000000000000628] Run scripts for all samples Switch to another data file in PWD Exit 3. Select the "Annotate 0x0000000000000628" and ENTER. Percent│ │ │ │ Disassembly of section .text: │ │ 0000000000000628 <.text+0x68>: │ divsd %xmm4,%xmm0 │ divsd %xmm3,%xmm1 │ movsd (%rsp),%xmm2 │ addsd %xmm1,%xmm0 │ addsd %xmm2,%xmm0 │ movsd %xmm0,(%rsp) Now we can see the dump of object starting from 0x628. v5: --- Remove the hotkey 'a' implementation from this patch. It will be moved to a separate patch. v4: --- 1. Support the hotkey 'a'. When we press 'a' on address, now it supports the annotation. 2. Change the patch title from "Support interactive annotation of code without symbols" to "perf report: Support interactive annotation of code without symbols" v3: --- Keep just the ANNOTATION_DUMMY_LEN, and remove the opts->annotate_dummy_len since it's the "maybe in future we will provide" feature. v2: --- Fix a crash issue when annotating an address in "unknown" object. The steps to reproduce this issue: perf record -e cycles:u ls perf report 75.29% ls ld-2.27.so [.] do_lookup_x 23.64% ls ld-2.27.so [.] __GI___tunables_init 1.04% ls [unknown] [k] 0xffffffff85c01210 0.03% ls ld-2.27.so [.] _start When annotating 0xffffffff85c01210, the crash happens. v2 adds checking for ms->map in add_annotate_opt(). If the object is "unknown", ms->map is NULL. Committer notes: Renamed new_annotate_sym() to symbol__new_unresolved(). Use PRIx64 to fix this issue in some 32-bit arches: ui/browsers/hists.c: In function 'symbol__new_unresolved': ui/browsers/hists.c:2474:38: error: format '%lx' expects argument of type 'long unsigned int', but argument 5 has type 'u64' {aka 'long long unsigned int'} [-Werror=format=] snprintf(name, sizeof(name), "%-#.*lx", BITS_PER_LONG / 4, addr); ~~~~~~^ ~~~~ %-#.*llx Signed-off-by: Jin Yao <yao.jin@linux.intel.com> Tested-by: Arnaldo Carvalho de Melo <acme@redhat.com> Tested-by: Ravi Bangoria <ravi.bangoria@linux.ibm.com> Acked-by: Jiri Olsa <jolsa@kernel.org> Cc: Alexander Shishkin <alexander.shishkin@linux.intel.com> Cc: Andi Kleen <ak@linux.intel.com> Cc: Kan Liang <kan.liang@linux.intel.com> Cc: Peter Zijlstra <peterz@infradead.org> Link: http://lore.kernel.org/lkml/20200227043939.4403-3-yao.jin@linux.intel.com Signed-off-by: Arnaldo Carvalho de Melo <acme@redhat.com> --- tools/perf/ui/browsers/hists.c | 43 ++++++++++++++++++++++++++++++++++++------ tools/perf/util/annotate.h | 1 + 2 files changed, 38 insertions(+), 6 deletions(-) diff --git a/tools/perf/ui/browsers/hists.c b/tools/perf/ui/browsers/hists.c index f36dee499320..d0f9745856fd 100644 --- a/tools/perf/ui/browsers/hists.c +++ b/tools/perf/ui/browsers/hists.c @@ -2465,13 +2465,41 @@ do_annotate(struct hist_browser *browser, struct popup_action *act) return 0; } +static struct symbol *symbol__new_unresolved(u64 addr, struct map *map) +{ + struct annotated_source *src; + struct symbol *sym; + char name[64]; + + snprintf(name, sizeof(name), "%.*" PRIx64, BITS_PER_LONG / 4, addr); + + sym = symbol__new(addr, ANNOTATION_DUMMY_LEN, 0, 0, name); + if (sym) { + src = symbol__hists(sym, 1); + if (!src) { + symbol__delete(sym); + return NULL; + } + + dso__insert_symbol(map->dso, sym); + } + + return sym; +} + static int add_annotate_opt(struct hist_browser *browser __maybe_unused, struct popup_action *act, char **optstr, - struct map_symbol *ms) + struct map_symbol *ms, + u64 addr) { - if (ms->sym == NULL || ms->map->dso->annotate_warned || - symbol__annotation(ms->sym)->src == NULL) + if (!ms->map || !ms->map->dso || ms->map->dso->annotate_warned) + return 0; + + if (!ms->sym) + ms->sym = symbol__new_unresolved(addr, ms->map); + + if (ms->sym == NULL || symbol__annotation(ms->sym)->src == NULL) return 0; if (asprintf(optstr, "Annotate %s", ms->sym->name) < 0) @@ -3219,17 +3247,20 @@ do_hotkey: // key came straight from options ui__popup_menu() nr_options += add_annotate_opt(browser, &actions[nr_options], &options[nr_options], - &bi->from.ms); + &bi->from.ms, + bi->from.al_addr); if (bi->to.ms.sym != bi->from.ms.sym) nr_options += add_annotate_opt(browser, &actions[nr_options], &options[nr_options], - &bi->to.ms); + &bi->to.ms, + bi->to.al_addr); } else { nr_options += add_annotate_opt(browser, &actions[nr_options], &options[nr_options], - browser->selection); + browser->selection, + browser->he_selection->ip); } skip_annotation: nr_options += add_thread_opt(browser, &actions[nr_options], diff --git a/tools/perf/util/annotate.h b/tools/perf/util/annotate.h index 07c775938d46..2d88069d6428 100644 --- a/tools/perf/util/annotate.h +++ b/tools/perf/util/annotate.h @@ -74,6 +74,7 @@ bool ins__is_fused(struct arch *arch, const char *ins1, const char *ins2); #define ANNOTATION__CYCLES_WIDTH 6 #define ANNOTATION__MINMAX_CYCLES_WIDTH 19 #define ANNOTATION__AVG_IPC_WIDTH 36 +#define ANNOTATION_DUMMY_LEN 256 struct annotation_options { bool hide_src_code, -- cgit v1.2.3-70-g09d2 From ec0479a63b7657e57665a9fc81d11a99131cfc62 Mon Sep 17 00:00:00 2001 From: Jin Yao <yao.jin@linux.intel.com> Date: Thu, 27 Feb 2020 12:39:39 +0800 Subject: perf report/top TUI: Support hotkey 'a' for annotation of unresolved addresses In previous patch, we have supported the annotation functionality even without symbols. For this patch, it supports the hotkey 'a' on address in report view. Note that, for branch mode, we only support the annotation for "branch to" address. Signed-off-by: Jin Yao <yao.jin@linux.intel.com> Tested-by: Arnaldo Carvalho de Melo <acme@redhat.com> Acked-by: Jiri Olsa <jolsa@kernel.org> Cc: Alexander Shishkin <alexander.shishkin@linux.intel.com> Cc: Andi Kleen <ak@linux.intel.com> Cc: Kan Liang <kan.liang@linux.intel.com> Cc: Peter Zijlstra <peterz@infradead.org> Link: http://lore.kernel.org/lkml/20200227043939.4403-4-yao.jin@linux.intel.com Signed-off-by: Arnaldo Carvalho de Melo <acme@redhat.com> --- tools/perf/ui/browsers/hists.c | 46 ++++++++++++++++++++++++++++++++---------- 1 file changed, 35 insertions(+), 11 deletions(-) diff --git a/tools/perf/ui/browsers/hists.c b/tools/perf/ui/browsers/hists.c index d0f9745856fd..1103a019d83f 100644 --- a/tools/perf/ui/browsers/hists.c +++ b/tools/perf/ui/browsers/hists.c @@ -3061,21 +3061,45 @@ do_hotkey: // key came straight from options ui__popup_menu() continue; } - if (browser->selection == NULL || - browser->selection->sym == NULL || - browser->selection->map->dso->annotate_warned) + if (!browser->selection || + !browser->selection->map || + !browser->selection->map->dso || + browser->selection->map->dso->annotate_warned) { continue; + } - if (symbol__annotation(browser->selection->sym)->src == NULL) { - ui_browser__warning(&browser->b, delay_secs * 2, - "No samples for the \"%s\" symbol.\n\n" - "Probably appeared just in a callchain", - browser->selection->sym->name); - continue; + if (!browser->selection->sym) { + if (!browser->he_selection) + continue; + + if (sort__mode == SORT_MODE__BRANCH) { + bi = browser->he_selection->branch_info; + if (!bi || !bi->to.ms.map) + continue; + + actions->ms.sym = symbol__new_unresolved(bi->to.al_addr, bi->to.ms.map); + actions->ms.map = bi->to.ms.map; + } else { + actions->ms.sym = symbol__new_unresolved(browser->he_selection->ip, + browser->selection->map); + actions->ms.map = browser->selection->map; + } + + if (!actions->ms.sym) + continue; + } else { + if (symbol__annotation(browser->selection->sym)->src == NULL) { + ui_browser__warning(&browser->b, delay_secs * 2, + "No samples for the \"%s\" symbol.\n\n" + "Probably appeared just in a callchain", + browser->selection->sym->name); + continue; + } + + actions->ms.map = browser->selection->map; + actions->ms.sym = browser->selection->sym; } - actions->ms.map = browser->selection->map; - actions->ms.sym = browser->selection->sym; do_annotate(browser, actions); continue; case 'P': -- cgit v1.2.3-70-g09d2 From 429a5f9d89fc52256b2a59cd1277afbfafb739b3 Mon Sep 17 00:00:00 2001 From: Jin Yao <yao.jin@linux.intel.com> Date: Thu, 20 Feb 2020 09:36:14 +0800 Subject: perf report: Allow specifying event to be used as sort key in --group output When performing "perf report --group", it shows the event group information together. By default, the output is sorted by the first event in group. It would be nice for user to select any event for sorting. This patch introduces a new option "--group-sort-idx" to sort the output by the event at the index n in event group. For example, Before: # perf report --group --stdio # To display the perf.data header info, please use --header/--header-only options. # # # Total Lost Samples: 0 # # Samples: 12K of events 'cpu/instructions,period=2000003/, cpu/cpu-cycles,period=200003/, BR_MISP_RETIRED.ALL_BRANCHES:pp, cpu/event=0xc0,umask=1,cmask=1, # Event count (approx.): 6451235635 # # Overhead Command Shared Object Symbol # ................................ ......... ....................... ................................... # 92.19% 98.68% 0.00% 93.30% mgen mgen [.] LOOP1 3.12% 0.29% 0.00% 0.16% gsd-color libglib-2.0.so.0.5600.4 [.] 0x0000000000049515 1.56% 0.03% 0.00% 0.04% gsd-color libglib-2.0.so.0.5600.4 [.] 0x00000000000494b7 1.56% 0.01% 0.00% 0.00% gsd-color libglib-2.0.so.0.5600.4 [.] 0x00000000000494ce 1.56% 0.00% 0.00% 0.00% mgen [kernel.kallsyms] [k] task_tick_fair 0.00% 0.15% 0.00% 0.04% perf [kernel.kallsyms] [k] smp_call_function_single 0.00% 0.13% 0.00% 6.08% swapper [kernel.kallsyms] [k] intel_idle 0.00% 0.03% 0.00% 0.00% gsd-color libglib-2.0.so.0.5600.4 [.] g_main_context_check 0.00% 0.03% 0.00% 0.00% swapper [kernel.kallsyms] [k] apic_timer_interrupt ... After: # perf report --group --stdio --group-sort-idx 3 # To display the perf.data header info, please use --header/--header-only options. # # # Total Lost Samples: 0 # # Samples: 12K of events 'cpu/instructions,period=2000003/, cpu/cpu-cycles,period=200003/, BR_MISP_RETIRED.ALL_BRANCHES:pp, cpu/event=0xc0,umask=1,cmask=1, # Event count (approx.): 6451235635 # # Overhead Command Shared Object Symbol # ................................ ......... ....................... ................................... # 92.19% 98.68% 0.00% 93.30% mgen mgen [.] LOOP1 0.00% 0.13% 0.00% 6.08% swapper [kernel.kallsyms] [k] intel_idle 3.12% 0.29% 0.00% 0.16% gsd-color libglib-2.0.so.0.5600.4 [.] 0x0000000000049515 0.00% 0.00% 0.00% 0.06% swapper [kernel.kallsyms] [k] hrtimer_start_range_ns 1.56% 0.03% 0.00% 0.04% gsd-color libglib-2.0.so.0.5600.4 [.] 0x00000000000494b7 0.00% 0.15% 0.00% 0.04% perf [kernel.kallsyms] [k] smp_call_function_single 0.00% 0.00% 0.00% 0.02% mgen [kernel.kallsyms] [k] update_curr 0.00% 0.00% 0.00% 0.02% mgen [kernel.kallsyms] [k] apic_timer_interrupt 0.00% 0.00% 0.00% 0.02% mgen [kernel.kallsyms] [k] native_apic_msr_eoi_write 0.00% 0.00% 0.00% 0.02% mgen [kernel.kallsyms] [k] __update_load_avg_se 0.00% 0.00% 0.00% 0.02% mgen [kernel.kallsyms] [k] scheduler_tick Now the output is sorted by the fourth event in group. v7: --- Rebase to latest perf/core, no other change. v4: --- 1. Update Documentation/perf-report.txt to mention '--group-sort-idx' support multiple groups with different amount of events and it should be used on grouped events. 2. Update __hpp__group_sort_idx(), just return when the idx is out of limit. 3. Return failure on symbol_conf.group_sort_idx && !session->evlist->nr_groups. So now we don't need to use together with --group. v3: --- Refine the code in __hpp__group_sort_idx(). Before: for (i = 1; i < nr_members; i++) { if (i == idx) { ret = field_cmp(fields_a[i], fields_b[i]); if (ret) goto out; } } After: if (idx >= 1 && idx < nr_members) { ret = field_cmp(fields_a[idx], fields_b[idx]); if (ret) goto out; } Signed-off-by: Jin Yao <yao.jin@linux.intel.com> Tested-by: Arnaldo Carvalho de Melo <acme@redhat.com> Acked-by: Jiri Olsa <jolsa@kernel.org> Cc: Alexander Shishkin <alexander.shishkin@linux.intel.com> Cc: Andi Kleen <ak@linux.intel.com> Cc: Kan Liang <kan.liang@linux.intel.com> Cc: Peter Zijlstra <peterz@infradead.org> Link: http://lore.kernel.org/lkml/20200220013616.19916-2-yao.jin@linux.intel.com [ Renamed pair_fields_alloc() to hist_entry__new_pair() and combined decl + assignment of vars ] Signed-off-by: Arnaldo Carvalho de Melo <acme@redhat.com> --- tools/perf/Documentation/perf-report.txt | 5 ++ tools/perf/builtin-report.c | 10 ++++ tools/perf/ui/hist.c | 93 ++++++++++++++++++++++++++------ tools/perf/util/symbol_conf.h | 1 + 4 files changed, 94 insertions(+), 15 deletions(-) diff --git a/tools/perf/Documentation/perf-report.txt b/tools/perf/Documentation/perf-report.txt index bd0a029d4c08..e56e3f1344a7 100644 --- a/tools/perf/Documentation/perf-report.txt +++ b/tools/perf/Documentation/perf-report.txt @@ -377,6 +377,11 @@ OPTIONS Show event group information together. It forces group output also if there are no groups defined in data file. +--group-sort-idx:: + Sort the output by the event at the index n in group. If n is invalid, + sort by the first event. It can support multiple groups with different + amount of events. WARNING: This should be used on grouped events. + --demangle:: Demangle symbol names to human readable form. It's enabled by default, disable with --no-demangle. diff --git a/tools/perf/builtin-report.c b/tools/perf/builtin-report.c index 5f4045df76f4..fa21c5ae3a51 100644 --- a/tools/perf/builtin-report.c +++ b/tools/perf/builtin-report.c @@ -1227,6 +1227,10 @@ int cmd_report(int argc, const char **argv) "Show a column with the sum of periods"), OPT_BOOLEAN_SET(0, "group", &symbol_conf.event_group, &report.group_set, "Show event group information together"), + OPT_INTEGER(0, "group-sort-idx", &symbol_conf.group_sort_idx, + "Sort the output by the event at the index n in group. " + "If n is invalid, sort by the first event. " + "WARNING: should be used on grouped events."), OPT_CALLBACK_NOOPT('b', "branch-stack", &branch_mode, "", "use branch records for per branch histogram filling", parse_branch_mode), @@ -1369,6 +1373,12 @@ repeat: setup_forced_leader(&report, session->evlist); + if (symbol_conf.group_sort_idx && !session->evlist->nr_groups) { + parse_options_usage(NULL, options, "group-sort-idx", 0); + ret = -EINVAL; + goto error; + } + if (itrace_synth_opts.last_branch) has_br_stack = true; diff --git a/tools/perf/ui/hist.c b/tools/perf/ui/hist.c index f73675500061..025f4c7f96bf 100644 --- a/tools/perf/ui/hist.c +++ b/tools/perf/ui/hist.c @@ -151,15 +151,90 @@ static int field_cmp(u64 field_a, u64 field_b) return 0; } +static int hist_entry__new_pair(struct hist_entry *a, struct hist_entry *b, + hpp_field_fn get_field, int nr_members, + u64 **fields_a, u64 **fields_b) +{ + u64 *fa = calloc(nr_members, sizeof(*fa)), + *fb = calloc(nr_members, sizeof(*fb)); + struct hist_entry *pair; + + if (!fa || !fb) + goto out_free; + + list_for_each_entry(pair, &a->pairs.head, pairs.node) { + struct evsel *evsel = hists_to_evsel(pair->hists); + fa[perf_evsel__group_idx(evsel)] = get_field(pair); + } + + list_for_each_entry(pair, &b->pairs.head, pairs.node) { + struct evsel *evsel = hists_to_evsel(pair->hists); + fb[perf_evsel__group_idx(evsel)] = get_field(pair); + } + + *fields_a = fa; + *fields_b = fb; + return 0; +out_free: + free(fa); + free(fb); + *fields_a = *fields_b = NULL; + return -1; +} + +static int __hpp__group_sort_idx(struct hist_entry *a, struct hist_entry *b, + hpp_field_fn get_field, int idx) +{ + struct evsel *evsel = hists_to_evsel(a->hists); + u64 *fields_a, *fields_b; + int cmp, nr_members, ret, i; + + cmp = field_cmp(get_field(a), get_field(b)); + if (!perf_evsel__is_group_event(evsel)) + return cmp; + + nr_members = evsel->core.nr_members; + if (idx < 1 || idx >= nr_members) + return cmp; + + ret = hist_entry__new_pair(a, b, get_field, nr_members, &fields_a, &fields_b); + if (ret) { + ret = cmp; + goto out; + } + + ret = field_cmp(fields_a[idx], fields_b[idx]); + if (ret) + goto out; + + for (i = 1; i < nr_members; i++) { + if (i != idx) { + ret = field_cmp(fields_a[i], fields_b[i]); + if (ret) + goto out; + } + } + +out: + free(fields_a); + free(fields_b); + + return ret; +} + static int __hpp__sort(struct hist_entry *a, struct hist_entry *b, hpp_field_fn get_field) { s64 ret; int i, nr_members; struct evsel *evsel; - struct hist_entry *pair; u64 *fields_a, *fields_b; + if (symbol_conf.group_sort_idx && symbol_conf.event_group) { + return __hpp__group_sort_idx(a, b, get_field, + symbol_conf.group_sort_idx); + } + ret = field_cmp(get_field(a), get_field(b)); if (ret || !symbol_conf.event_group) return ret; @@ -169,22 +244,10 @@ static int __hpp__sort(struct hist_entry *a, struct hist_entry *b, return ret; nr_members = evsel->core.nr_members; - fields_a = calloc(nr_members, sizeof(*fields_a)); - fields_b = calloc(nr_members, sizeof(*fields_b)); - - if (!fields_a || !fields_b) + i = hist_entry__new_pair(a, b, get_field, nr_members, &fields_a, &fields_b); + if (i) goto out; - list_for_each_entry(pair, &a->pairs.head, pairs.node) { - evsel = hists_to_evsel(pair->hists); - fields_a[perf_evsel__group_idx(evsel)] = get_field(pair); - } - - list_for_each_entry(pair, &b->pairs.head, pairs.node) { - evsel = hists_to_evsel(pair->hists); - fields_b[perf_evsel__group_idx(evsel)] = get_field(pair); - } - for (i = 1; i < nr_members; i++) { ret = field_cmp(fields_a[i], fields_b[i]); if (ret) diff --git a/tools/perf/util/symbol_conf.h b/tools/perf/util/symbol_conf.h index 10f1ec3e0349..b916afb95ec5 100644 --- a/tools/perf/util/symbol_conf.h +++ b/tools/perf/util/symbol_conf.h @@ -73,6 +73,7 @@ struct symbol_conf { const char *symfs; int res_sample; int pad_output_len_dso; + int group_sort_idx; }; extern struct symbol_conf symbol_conf; -- cgit v1.2.3-70-g09d2 From 5e3b810aac49e960c80529e2c9aa8c101c5848ce Mon Sep 17 00:00:00 2001 From: Jin Yao <yao.jin@linux.intel.com> Date: Thu, 20 Feb 2020 09:36:15 +0800 Subject: perf report: Support a new key to reload the browser Sometimes we may need to reload the browser to update the output since some options are changed. This patch creates a new key K_RELOAD. Once the __cmd_report() returns K_RELOAD, it would repeat the whole process, such as, read samples from data file, sort the data and display in the browser. v5: --- 1. Fix the 'make NO_SLANG=1' error. Define K_RELOAD in util/hist.h. 2. Skip setup_sorting() in repeat path if last key is K_RELOAD. v4: --- Need to quit in perf_evsel_menu__run if key is K_RELOAD. Signed-off-by: Jin Yao <yao.jin@linux.intel.com> Tested-by: Arnaldo Carvalho de Melo <acme@redhat.com> Acked-by: Jiri Olsa <jolsa@kernel.org> Cc: Alexander Shishkin <alexander.shishkin@linux.intel.com> Cc: Andi Kleen <ak@linux.intel.com> Cc: Kan Liang <kan.liang@linux.intel.com> Cc: Peter Zijlstra <peterz@infradead.org> Link: http://lore.kernel.org/lkml/20200220013616.19916-3-yao.jin@linux.intel.com Signed-off-by: Arnaldo Carvalho de Melo <acme@redhat.com> --- tools/perf/builtin-report.c | 6 +++--- tools/perf/ui/browsers/hists.c | 1 + tools/perf/ui/keysyms.h | 1 + tools/perf/util/hist.h | 1 + 4 files changed, 6 insertions(+), 3 deletions(-) diff --git a/tools/perf/builtin-report.c b/tools/perf/builtin-report.c index fa21c5ae3a51..ea673b7eb3f4 100644 --- a/tools/perf/builtin-report.c +++ b/tools/perf/builtin-report.c @@ -635,7 +635,7 @@ static int report__browse_hists(struct report *rep) * Usually "ret" is the last pressed key, and we only * care if the key notifies us to switch data file. */ - if (ret != K_SWITCH_INPUT_DATA) + if (ret != K_SWITCH_INPUT_DATA && ret != K_RELOAD) ret = 0; break; case 2: @@ -1480,7 +1480,7 @@ repeat: sort_order = sort_tmp; } - if ((last_key != K_SWITCH_INPUT_DATA) && + if ((last_key != K_SWITCH_INPUT_DATA && last_key != K_RELOAD) && (setup_sorting(session->evlist) < 0)) { if (sort_order) parse_options_usage(report_usage, options, "s", 1); @@ -1559,7 +1559,7 @@ repeat: sort__setup_elide(stdout); ret = __cmd_report(&report); - if (ret == K_SWITCH_INPUT_DATA) { + if (ret == K_SWITCH_INPUT_DATA || ret == K_RELOAD) { perf_session__delete(session); last_key = K_SWITCH_INPUT_DATA; goto repeat; diff --git a/tools/perf/ui/browsers/hists.c b/tools/perf/ui/browsers/hists.c index 1103a019d83f..9f3401fd2cf6 100644 --- a/tools/perf/ui/browsers/hists.c +++ b/tools/perf/ui/browsers/hists.c @@ -3495,6 +3495,7 @@ browse_hists: pos = perf_evsel__prev(pos); goto browse_hists; case K_SWITCH_INPUT_DATA: + case K_RELOAD: case 'q': case CTRL('c'): goto out; diff --git a/tools/perf/ui/keysyms.h b/tools/perf/ui/keysyms.h index fbfac29077f2..04cc4e5c031f 100644 --- a/tools/perf/ui/keysyms.h +++ b/tools/perf/ui/keysyms.h @@ -25,5 +25,6 @@ #define K_ERROR -2 #define K_RESIZE -3 #define K_SWITCH_INPUT_DATA -4 +#define K_RELOAD -5 #endif /* _PERF_KEYSYMS_H_ */ diff --git a/tools/perf/util/hist.h b/tools/perf/util/hist.h index 0aa63aeb58ec..bb994e030495 100644 --- a/tools/perf/util/hist.h +++ b/tools/perf/util/hist.h @@ -536,6 +536,7 @@ static inline int block_hists_tui_browse(struct block_hist *bh __maybe_unused, #define K_LEFT -1000 #define K_RIGHT -2000 #define K_SWITCH_INPUT_DATA -3000 +#define K_RELOAD -4000 #endif unsigned int hists__sort_list_width(struct hists *hists); -- cgit v1.2.3-70-g09d2 From dbddf17474411f5725fc76fc7e507410f0d4077f Mon Sep 17 00:00:00 2001 From: Jin Yao <yao.jin@linux.intel.com> Date: Thu, 20 Feb 2020 09:36:16 +0800 Subject: perf report/top TUI: Support hotkeys to let user select any event for sorting When performing "perf report --group", it shows the event group information together. In previous patch, we have supported a new option "--group-sort-idx" to sort the output by the event at the index n in event group. It would be nice if we can use a hotkey in browser to select a event to sort. For example, # perf report --group Samples: 12K of events 'cpu/instructions,period=2000003/, cpu/cpu-cycles,period=200003/, ... Overhead Command Shared Object Symbol 92.19% 98.68% 0.00% 93.30% mgen mgen [.] LOOP1 3.12% 0.29% 0.00% 0.16% gsd-color libglib-2.0.so.0.5600.4 [.] 0x0000000000049515 1.56% 0.03% 0.00% 0.04% gsd-color libglib-2.0.so.0.5600.4 [.] 0x00000000000494b7 1.56% 0.01% 0.00% 0.00% gsd-color libglib-2.0.so.0.5600.4 [.] 0x00000000000494ce 1.56% 0.00% 0.00% 0.00% mgen [kernel.kallsyms] [k] task_tick_fair 0.00% 0.15% 0.00% 0.04% perf [kernel.kallsyms] [k] smp_call_function_single 0.00% 0.13% 0.00% 6.08% swapper [kernel.kallsyms] [k] intel_idle 0.00% 0.03% 0.00% 0.00% gsd-color libglib-2.0.so.0.5600.4 [.] g_main_context_check 0.00% 0.03% 0.00% 0.00% swapper [kernel.kallsyms] [k] apic_timer_interrupt 0.00% 0.03% 0.00% 0.00% swapper [kernel.kallsyms] [k] check_preempt_curr When user press hotkey '3' (event index, starting from 0), it indicates to sort output by the forth event in group. Samples: 12K of events 'cpu/instructions,period=2000003/, cpu/cpu-cycles,period=200003/, ... Overhead Command Shared Object Symbol 92.19% 98.68% 0.00% 93.30% mgen mgen [.] LOOP1 0.00% 0.13% 0.00% 6.08% swapper [kernel.kallsyms] [k] intel_idle 3.12% 0.29% 0.00% 0.16% gsd-color libglib-2.0.so.0.5600.4 [.] 0x0000000000049515 0.00% 0.00% 0.00% 0.06% swapper [kernel.kallsyms] [k] hrtimer_start_range_ns 1.56% 0.03% 0.00% 0.04% gsd-color libglib-2.0.so.0.5600.4 [.] 0x00000000000494b7 0.00% 0.15% 0.00% 0.04% perf [kernel.kallsyms] [k] smp_call_function_single 0.00% 0.00% 0.00% 0.02% mgen [kernel.kallsyms] [k] update_curr 0.00% 0.00% 0.00% 0.02% mgen [kernel.kallsyms] [k] apic_timer_interrupt 0.00% 0.00% 0.00% 0.02% mgen [kernel.kallsyms] [k] native_apic_msr_eoi_write 0.00% 0.00% 0.00% 0.02% mgen [kernel.kallsyms] [k] __update_load_avg_se v6: --- Jiri provided a good improvement to eliminate unneeded refresh. This improvement is added to v6. v2: --- 1. Report warning at helpline when index is invalid. 2. Report warning at helpline when it's not group event. 3. Use "case '0' ... '9'" to refine the code 4. Split K_RELOAD implementation to another patch. Signed-off-by: Jin Yao <yao.jin@linux.intel.com> Acked-by: Jiri Olsa <jolsa@kernel.org> Tested-by: Arnaldo Carvalho de Melo <acme@redhat.com> Cc: Alexander Shishkin <alexander.shishkin@linux.intel.com> Cc: Andi Kleen <ak@linux.intel.com> Cc: Kan Liang <kan.liang@linux.intel.com> Cc: Peter Zijlstra <peterz@infradead.org> Link: http://lore.kernel.org/lkml/20200220013616.19916-4-yao.jin@linux.intel.com Signed-off-by: Arnaldo Carvalho de Melo <acme@redhat.com> --- tools/perf/ui/browsers/hists.c | 28 +++++++++++++++++++++++++++- 1 file changed, 27 insertions(+), 1 deletion(-) diff --git a/tools/perf/ui/browsers/hists.c b/tools/perf/ui/browsers/hists.c index 9f3401fd2cf6..95ac5e2ea949 100644 --- a/tools/perf/ui/browsers/hists.c +++ b/tools/perf/ui/browsers/hists.c @@ -2992,7 +2992,8 @@ static int perf_evsel__hists_browse(struct evsel *evsel, int nr_events, "s Switch to another data file in PWD\n" "t Zoom into current Thread\n" "V Verbose (DSO names in callchains, etc)\n" - "/ Filter symbol by name"; + "/ Filter symbol by name\n" + "0-9 Sort by event n in group"; static const char top_help[] = HIST_BROWSER_HELP_COMMON "P Print histograms to perf.hist.N\n" "t Zoom into current Thread\n" @@ -3053,6 +3054,31 @@ do_hotkey: // key came straight from options ui__popup_menu() * go to the next or previous */ goto out_free_stack; + case '0' ... '9': + if (!symbol_conf.event_group || + evsel->core.nr_members < 2) { + snprintf(buf, sizeof(buf), + "Sort by index only available with group events!"); + helpline = buf; + continue; + } + + if (key - '0' == symbol_conf.group_sort_idx) + continue; + + symbol_conf.group_sort_idx = key - '0'; + + if (symbol_conf.group_sort_idx >= evsel->core.nr_members) { + snprintf(buf, sizeof(buf), + "Max event group index to sort is %d (index from 0 to %d)", + evsel->core.nr_members - 1, + evsel->core.nr_members - 1); + helpline = buf; + continue; + } + + key = K_RELOAD; + goto out_free_stack; case 'a': if (!hists__has(hists, sym)) { ui_browser__warning(&browser->b, delay_secs * 2, -- cgit v1.2.3-70-g09d2 From d13e9e413e5b470c4364bbbce559383081201811 Mon Sep 17 00:00:00 2001 From: Jin Yao <yao.jin@linux.intel.com> Date: Tue, 18 Feb 2020 15:16:14 +0800 Subject: perf stat: Align the output for interval aggregation mode There is a slight misalignment in -A -I output. For example: # perf stat -e cpu/event=cpu-cycles/ -a -A -I 1000 # time CPU counts unit events 1.000440863 CPU0 1,068,388 cpu/event=cpu-cycles/ 1.000440863 CPU1 875,954 cpu/event=cpu-cycles/ 1.000440863 CPU2 3,072,538 cpu/event=cpu-cycles/ 1.000440863 CPU3 4,026,870 cpu/event=cpu-cycles/ 1.000440863 CPU4 5,919,630 cpu/event=cpu-cycles/ 1.000440863 CPU5 2,714,260 cpu/event=cpu-cycles/ 1.000440863 CPU6 2,219,240 cpu/event=cpu-cycles/ 1.000440863 CPU7 1,299,232 cpu/event=cpu-cycles/ The value of counts is not aligned with the column "counts" and the event name is not aligned with the column "events". With this patch, the output is, # perf stat -e cpu/event=cpu-cycles/ -a -A -I 1000 # time CPU counts unit events 1.000423009 CPU0 997,421 cpu/event=cpu-cycles/ 1.000423009 CPU1 1,422,042 cpu/event=cpu-cycles/ 1.000423009 CPU2 484,651 cpu/event=cpu-cycles/ 1.000423009 CPU3 525,791 cpu/event=cpu-cycles/ 1.000423009 CPU4 1,370,100 cpu/event=cpu-cycles/ 1.000423009 CPU5 442,072 cpu/event=cpu-cycles/ 1.000423009 CPU6 205,643 cpu/event=cpu-cycles/ 1.000423009 CPU7 1,302,250 cpu/event=cpu-cycles/ Now output is aligned. Signed-off-by: Jin Yao <yao.jin@linux.intel.com> Acked-by: Jiri Olsa <jolsa@kernel.org> Tested-by: Arnaldo Carvalho de Melo <acme@redhat.com> Cc: Alexander Shishkin <alexander.shishkin@linux.intel.com> Cc: Andi Kleen <ak@linux.intel.com> Cc: Kan Liang <kan.liang@linux.intel.com> Cc: Peter Zijlstra <peterz@infradead.org> Link: http://lore.kernel.org/lkml/20200218071614.25736-1-yao.jin@linux.intel.com Signed-off-by: Arnaldo Carvalho de Melo <acme@redhat.com> --- tools/perf/util/stat-display.c | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/tools/perf/util/stat-display.c b/tools/perf/util/stat-display.c index 76c6052b12e2..9e757d18d713 100644 --- a/tools/perf/util/stat-display.c +++ b/tools/perf/util/stat-display.c @@ -115,11 +115,11 @@ static void aggr_printout(struct perf_stat_config *config, fprintf(config->output, "S%d-D%d-C%*d%s", cpu_map__id_to_socket(id), cpu_map__id_to_die(id), - config->csv_output ? 0 : -5, + config->csv_output ? 0 : -3, cpu_map__id_to_cpu(id), config->csv_sep); } else { - fprintf(config->output, "CPU%*d%s ", - config->csv_output ? 0 : -5, + fprintf(config->output, "CPU%*d%s", + config->csv_output ? 0 : -7, evsel__cpus(evsel)->map[id], config->csv_sep); } -- cgit v1.2.3-70-g09d2 From 58fc90fda0cc983c11c5290c7a9e992b08ac4a5c Mon Sep 17 00:00:00 2001 From: Kajol Jain <kjain@linux.ibm.com> Date: Fri, 21 Feb 2020 15:41:21 +0530 Subject: perf metricgroup: Fix printing event names of metric group with multiple events incase of overlapping events Commit f01642e4912b ("perf metricgroup: Support multiple events for metricgroup") introduced support for multiple events in a metric group. But with the current upstream, metric events names are not printed properly incase we try to run multiple metric groups with overlapping event. With current upstream version, incase of overlapping metric events issue is, we always start our comparision logic from start. So, the events which already matched with some metric group also take part in comparision logic. Because of that when we have overlapping events, we end up matching current metric group event with already matched one. For example, in skylake machine we have metric event CoreIPC and Instructions. Both of them need 'inst_retired.any' event value. As events in Instructions is subset of events in CoreIPC, they endup in pointing to same 'inst_retired.any' value. In skylake platform: command:# ./perf stat -M CoreIPC,Instructions -C 0 sleep 1 Performance counter stats for 'CPU(s) 0': 1,254,992,790 inst_retired.any # 1254992790.0 Instructions # 1.3 CoreIPC 977,172,805 cycles 1,254,992,756 inst_retired.any 1.000802596 seconds time elapsed command:# sudo ./perf stat -M UPI,IPC sleep 1 Performance counter stats for 'sleep 1': 948,650 uops_retired.retire_slots 866,182 inst_retired.any # 0.7 IPC 866,182 inst_retired.any 1,175,671 cpu_clk_unhalted.thread Patch fixes the issue by adding a new bool pointer 'evlist_used' to keep track of events which already matched with some group by setting it true. So, we skip all used events in list when we start comparision logic. Patch also make some changes in comparision logic, incase we get a match miss, we discard the whole match and start again with first event id in metric event. With this patch: In skylake platform: command:# ./perf stat -M CoreIPC,Instructions -C 0 sleep 1 Performance counter stats for 'CPU(s) 0': 3,348,415 inst_retired.any # 0.3 CoreIPC 11,779,026 cycles 3,348,381 inst_retired.any # 3348381.0 Instructions 1.001649056 seconds time elapsed command:# ./perf stat -M UPI,IPC sleep 1 Performance counter stats for 'sleep 1': 1,023,148 uops_retired.retire_slots # 1.1 UPI 924,976 inst_retired.any 924,976 inst_retired.any # 0.6 IPC 1,489,414 cpu_clk_unhalted.thread 1.003064672 seconds time elapsed Signed-off-by: Kajol Jain <kjain@linux.ibm.com> Acked-by: Jiri Olsa <jolsa@kernel.org> Cc: Alexander Shishkin <alexander.shishkin@linux.intel.com> Cc: Andi Kleen <ak@linux.intel.com> Cc: Anju T Sudhakar <anju@linux.vnet.ibm.com> Cc: Jin Yao <yao.jin@linux.intel.com> Cc: Kan Liang <kan.liang@linux.intel.com> Cc: Madhavan Srinivasan <maddy@linux.vnet.ibm.com> Cc: Peter Zijlstra <peterz@infradead.org> Cc: Ravi Bangoria <ravi.bangoria@linux.ibm.com> Link: http://lore.kernel.org/lkml/20200221101121.28920-1-kjain@linux.ibm.com Signed-off-by: Arnaldo Carvalho de Melo <acme@redhat.com> --- tools/perf/util/metricgroup.c | 49 ++++++++++++++++++++++++++----------------- 1 file changed, 30 insertions(+), 19 deletions(-) diff --git a/tools/perf/util/metricgroup.c b/tools/perf/util/metricgroup.c index c3a8c701609a..926449a7cdbf 100644 --- a/tools/perf/util/metricgroup.c +++ b/tools/perf/util/metricgroup.c @@ -95,13 +95,16 @@ struct egroup { static struct evsel *find_evsel_group(struct evlist *perf_evlist, const char **ids, int idnum, - struct evsel **metric_events) + struct evsel **metric_events, + bool *evlist_used) { struct evsel *ev; - int i = 0; + int i = 0, j = 0; bool leader_found; evlist__for_each_entry (perf_evlist, ev) { + if (evlist_used[j++]) + continue; if (!strcmp(ev->name, ids[i])) { if (!metric_events[i]) metric_events[i] = ev; @@ -109,22 +112,17 @@ static struct evsel *find_evsel_group(struct evlist *perf_evlist, if (i == idnum) break; } else { - if (i + 1 == idnum) { - /* Discard the whole match and start again */ - i = 0; - memset(metric_events, 0, - sizeof(struct evsel *) * idnum); - continue; - } - - if (!strcmp(ev->name, ids[i])) - metric_events[i] = ev; - else { - /* Discard the whole match and start again */ - i = 0; - memset(metric_events, 0, - sizeof(struct evsel *) * idnum); - continue; + /* Discard the whole match and start again */ + i = 0; + memset(metric_events, 0, + sizeof(struct evsel *) * idnum); + + if (!strcmp(ev->name, ids[i])) { + if (!metric_events[i]) + metric_events[i] = ev; + i++; + if (i == idnum) + break; } } } @@ -146,7 +144,10 @@ static struct evsel *find_evsel_group(struct evlist *perf_evlist, !strcmp(ev->name, metric_events[i]->name)) { ev->metric_leader = metric_events[i]; } + j++; } + ev = metric_events[i]; + evlist_used[ev->idx] = true; } return metric_events[0]; @@ -162,6 +163,13 @@ static int metricgroup__setup_events(struct list_head *groups, int ret = 0; struct egroup *eg; struct evsel *evsel; + bool *evlist_used; + + evlist_used = calloc(perf_evlist->core.nr_entries, sizeof(bool)); + if (!evlist_used) { + ret = -ENOMEM; + return ret; + } list_for_each_entry (eg, groups, nd) { struct evsel **metric_events; @@ -172,7 +180,7 @@ static int metricgroup__setup_events(struct list_head *groups, break; } evsel = find_evsel_group(perf_evlist, eg->ids, eg->idnum, - metric_events); + metric_events, evlist_used); if (!evsel) { pr_debug("Cannot resolve %s: %s\n", eg->metric_name, eg->metric_expr); @@ -196,6 +204,9 @@ static int metricgroup__setup_events(struct list_head *groups, expr->metric_events = metric_events; list_add(&expr->nd, &me->head); } + + free(evlist_used); + return ret; } -- cgit v1.2.3-70-g09d2 From c5f18e9e94bad244115dc5e47f27bd061ecc5552 Mon Sep 17 00:00:00 2001 From: Vijay Thakkar <vijaythakkar@me.com> Date: Wed, 18 Mar 2020 15:00:00 -0400 Subject: perf vendor events amd: Restrict model detection for zen1 based processors MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit This patch changes the previous blanket detection of AMD Family 17h processors to be more specific to Zen1 core based products only by replacing model detection regex pattern [[:xdigit:]]+ with ([12][0-9A-F]|[0-9A-F]), restricting to models 0 though 2f only. This change is required to allow for the addition of separate PMU events for Zen2 core based models in the following patches as those belong to family 17h but have different PMCs. Current PMU events directory has also been renamed to "amdzen1" from "amdfam17h" to reflect this specificity. Note that although this change does not break PMU counters for existing zen1 based systems, it does disable the current set of counters for zen2 based systems. Counters for zen2 have been added in the following patches in this patchset. Signed-off-by: Vijay Thakkar <vijaythakkar@me.com> Acked-by: Kim Phillips <kim.phillips@amd.com> Cc: Alexander Shishkin <alexander.shishkin@linux.intel.com> Cc: Jiri Olsa <jolsa@redhat.com> Cc: Jon Grimm <jon.grimm@amd.com> Cc: Martin Liška <mliska@suse.cz> Cc: Namhyung Kim <namhyung@kernel.org> Cc: Peter Zijlstra <peterz@infradead.org> Link: http://lore.kernel.org/lkml/20200318190002.307290-2-vijaythakkar@me.com Signed-off-by: Arnaldo Carvalho de Melo <acme@redhat.com> --- .../perf/pmu-events/arch/x86/amdfam17h/branch.json | 12 - .../perf/pmu-events/arch/x86/amdfam17h/cache.json | 329 --------------------- tools/perf/pmu-events/arch/x86/amdfam17h/core.json | 134 --------- .../arch/x86/amdfam17h/floating-point.json | 168 ----------- .../perf/pmu-events/arch/x86/amdfam17h/memory.json | 162 ---------- .../perf/pmu-events/arch/x86/amdfam17h/other.json | 65 ---- tools/perf/pmu-events/arch/x86/amdzen1/branch.json | 12 + tools/perf/pmu-events/arch/x86/amdzen1/cache.json | 329 +++++++++++++++++++++ tools/perf/pmu-events/arch/x86/amdzen1/core.json | 134 +++++++++ .../arch/x86/amdzen1/floating-point.json | 168 +++++++++++ tools/perf/pmu-events/arch/x86/amdzen1/memory.json | 162 ++++++++++ tools/perf/pmu-events/arch/x86/amdzen1/other.json | 65 ++++ tools/perf/pmu-events/arch/x86/mapfile.csv | 2 +- 13 files changed, 871 insertions(+), 871 deletions(-) delete mode 100644 tools/perf/pmu-events/arch/x86/amdfam17h/branch.json delete mode 100644 tools/perf/pmu-events/arch/x86/amdfam17h/cache.json delete mode 100644 tools/perf/pmu-events/arch/x86/amdfam17h/core.json delete mode 100644 tools/perf/pmu-events/arch/x86/amdfam17h/floating-point.json delete mode 100644 tools/perf/pmu-events/arch/x86/amdfam17h/memory.json delete mode 100644 tools/perf/pmu-events/arch/x86/amdfam17h/other.json create mode 100644 tools/perf/pmu-events/arch/x86/amdzen1/branch.json create mode 100644 tools/perf/pmu-events/arch/x86/amdzen1/cache.json create mode 100644 tools/perf/pmu-events/arch/x86/amdzen1/core.json create mode 100644 tools/perf/pmu-events/arch/x86/amdzen1/floating-point.json create mode 100644 tools/perf/pmu-events/arch/x86/amdzen1/memory.json create mode 100644 tools/perf/pmu-events/arch/x86/amdzen1/other.json diff --git a/tools/perf/pmu-events/arch/x86/amdfam17h/branch.json b/tools/perf/pmu-events/arch/x86/amdfam17h/branch.json deleted file mode 100644 index 93ddfd8053ca..000000000000 --- a/tools/perf/pmu-events/arch/x86/amdfam17h/branch.json +++ /dev/null @@ -1,12 +0,0 @@ -[ - { - "EventName": "bp_l1_btb_correct", - "EventCode": "0x8a", - "BriefDescription": "L1 BTB Correction." - }, - { - "EventName": "bp_l2_btb_correct", - "EventCode": "0x8b", - "BriefDescription": "L2 BTB Correction." - } -] diff --git a/tools/perf/pmu-events/arch/x86/amdfam17h/cache.json b/tools/perf/pmu-events/arch/x86/amdfam17h/cache.json deleted file mode 100644 index 6221a840fcea..000000000000 --- a/tools/perf/pmu-events/arch/x86/amdfam17h/cache.json +++ /dev/null @@ -1,329 +0,0 @@ -[ - { - "EventName": "ic_fw32", - "EventCode": "0x80", - "BriefDescription": "The number of 32B fetch windows transferred from IC pipe to DE instruction decoder (includes non-cacheable and cacheable fill responses)." - }, - { - "EventName": "ic_fw32_miss", - "EventCode": "0x81", - "BriefDescription": "The number of 32B fetch windows tried to read the L1 IC and missed in the full tag." - }, - { - "EventName": "ic_cache_fill_l2", - "EventCode": "0x82", - "BriefDescription": "The number of 64 byte instruction cache line was fulfilled from the L2 cache." - }, - { - "EventName": "ic_cache_fill_sys", - "EventCode": "0x83", - "BriefDescription": "The number of 64 byte instruction cache line fulfilled from system memory or another cache." - }, - { - "EventName": "bp_l1_tlb_miss_l2_hit", - "EventCode": "0x84", - "BriefDescription": "The number of instruction fetches that miss in the L1 ITLB but hit in the L2 ITLB." - }, - { - "EventName": "bp_l1_tlb_miss_l2_miss", - "EventCode": "0x85", - "BriefDescription": "The number of instruction fetches that miss in both the L1 and L2 TLBs." - }, - { - "EventName": "bp_snp_re_sync", - "EventCode": "0x86", - "BriefDescription": "The number of pipeline restarts caused by invalidating probes that hit on the instruction stream currently being executed. This would happen if the active instruction stream was being modified by another processor in an MP system - typically a highly unlikely event." - }, - { - "EventName": "ic_fetch_stall.ic_stall_any", - "EventCode": "0x87", - "BriefDescription": "IC pipe was stalled during this clock cycle for any reason (nothing valid in pipe ICM1).", - "PublicDescription": "Instruction Pipe Stall. IC pipe was stalled during this clock cycle for any reason (nothing valid in pipe ICM1).", - "UMask": "0x4" - }, - { - "EventName": "ic_fetch_stall.ic_stall_dq_empty", - "EventCode": "0x87", - "BriefDescription": "IC pipe was stalled during this clock cycle (including IC to OC fetches) due to DQ empty.", - "PublicDescription": "Instruction Pipe Stall. IC pipe was stalled during this clock cycle (including IC to OC fetches) due to DQ empty.", - "UMask": "0x2" - }, - { - "EventName": "ic_fetch_stall.ic_stall_back_pressure", - "EventCode": "0x87", - "BriefDescription": "IC pipe was stalled during this clock cycle (including IC to OC fetches) due to back-pressure.", - "PublicDescription": "Instruction Pipe Stall. IC pipe was stalled during this clock cycle (including IC to OC fetches) due to back-pressure.", - "UMask": "0x1" - }, - { - "EventName": "ic_cache_inval.l2_invalidating_probe", - "EventCode": "0x8c", - "BriefDescription": "IC line invalidated due to L2 invalidating probe (external or LS).", - "PublicDescription": "The number of instruction cache lines invalidated. A non-SMC event is CMC (cross modifying code), either from the other thread of the core or another core. IC line invalidated due to L2 invalidating probe (external or LS).", - "UMask": "0x2" - }, - { - "EventName": "ic_cache_inval.fill_invalidated", - "EventCode": "0x8c", - "BriefDescription": "IC line invalidated due to overwriting fill response.", - "PublicDescription": "The number of instruction cache lines invalidated. A non-SMC event is CMC (cross modifying code), either from the other thread of the core or another core. IC line invalidated due to overwriting fill response.", - "UMask": "0x1" - }, - { - "EventName": "bp_tlb_rel", - "EventCode": "0x99", - "BriefDescription": "The number of ITLB reload requests." - }, - { - "EventName": "l2_request_g1.rd_blk_l", - "EventCode": "0x60", - "BriefDescription": "Requests to L2 Group1.", - "PublicDescription": "Requests to L2 Group1.", - "UMask": "0x80" - }, - { - "EventName": "l2_request_g1.rd_blk_x", - "EventCode": "0x60", - "BriefDescription": "Requests to L2 Group1.", - "PublicDescription": "Requests to L2 Group1.", - "UMask": "0x40" - }, - { - "EventName": "l2_request_g1.ls_rd_blk_c_s", - "EventCode": "0x60", - "BriefDescription": "Requests to L2 Group1.", - "PublicDescription": "Requests to L2 Group1.", - "UMask": "0x20" - }, - { - "EventName": "l2_request_g1.cacheable_ic_read", - "EventCode": "0x60", - "BriefDescription": "Requests to L2 Group1.", - "PublicDescription": "Requests to L2 Group1.", - "UMask": "0x10" - }, - { - "EventName": "l2_request_g1.change_to_x", - "EventCode": "0x60", - "BriefDescription": "Requests to L2 Group1.", - "PublicDescription": "Requests to L2 Group1.", - "UMask": "0x8" - }, - { - "EventName": "l2_request_g1.prefetch_l2", - "EventCode": "0x60", - "BriefDescription": "Requests to L2 Group1.", - "PublicDescription": "Requests to L2 Group1.", - "UMask": "0x4" - }, - { - "EventName": "l2_request_g1.l2_hw_pf", - "EventCode": "0x60", - "BriefDescription": "Requests to L2 Group1.", - "PublicDescription": "Requests to L2 Group1.", - "UMask": "0x2" - }, - { - "EventName": "l2_request_g1.other_requests", - "EventCode": "0x60", - "BriefDescription": "Events covered by l2_request_g2.", - "PublicDescription": "Requests to L2 Group1. Events covered by l2_request_g2.", - "UMask": "0x1" - }, - { - "EventName": "l2_request_g2.group1", - "EventCode": "0x61", - "BriefDescription": "All Group 1 commands not in unit0.", - "PublicDescription": "Multi-events in that LS and IF requests can be received simultaneous. All Group 1 commands not in unit0.", - "UMask": "0x80" - }, - { - "EventName": "l2_request_g2.ls_rd_sized", - "EventCode": "0x61", - "BriefDescription": "RdSized, RdSized32, RdSized64.", - "PublicDescription": "Multi-events in that LS and IF requests can be received simultaneous. RdSized, RdSized32, RdSized64.", - "UMask": "0x40" - }, - { - "EventName": "l2_request_g2.ls_rd_sized_nc", - "EventCode": "0x61", - "BriefDescription": "RdSizedNC, RdSized32NC, RdSized64NC.", - "PublicDescription": "Multi-events in that LS and IF requests can be received simultaneous. RdSizedNC, RdSized32NC, RdSized64NC.", - "UMask": "0x20" - }, - { - "EventName": "l2_request_g2.ic_rd_sized", - "EventCode": "0x61", - "BriefDescription": "Multi-events in that LS and IF requests can be received simultaneous.", - "PublicDescription": "Multi-events in that LS and IF requests can be received simultaneous.", - "UMask": "0x10" - }, - { - "EventName": "l2_request_g2.ic_rd_sized_nc", - "EventCode": "0x61", - "BriefDescription": "Multi-events in that LS and IF requests can be received simultaneous.", - "PublicDescription": "Multi-events in that LS and IF requests can be received simultaneous.", - "UMask": "0x8" - }, - { - "EventName": "l2_request_g2.smc_inval", - "EventCode": "0x61", - "BriefDescription": "Multi-events in that LS and IF requests can be received simultaneous.", - "PublicDescription": "Multi-events in that LS and IF requests can be received simultaneous.", - "UMask": "0x4" - }, - { - "EventName": "l2_request_g2.bus_locks_originator", - "EventCode": "0x61", - "BriefDescription": "Multi-events in that LS and IF requests can be received simultaneous.", - "PublicDescription": "Multi-events in that LS and IF requests can be received simultaneous.", - "UMask": "0x2" - }, - { - "EventName": "l2_request_g2.bus_locks_responses", - "EventCode": "0x61", - "BriefDescription": "Multi-events in that LS and IF requests can be received simultaneous.", - "PublicDescription": "Multi-events in that LS and IF requests can be received simultaneous.", - "UMask": "0x1" - }, - { - "EventName": "l2_latency.l2_cycles_waiting_on_fills", - "EventCode": "0x62", - "BriefDescription": "Total cycles spent waiting for L2 fills to complete from L3 or memory, divided by four. Event counts are for both threads. To calculate average latency, the number of fills from both threads must be used.", - "PublicDescription": "Total cycles spent waiting for L2 fills to complete from L3 or memory, divided by four. Event counts are for both threads. To calculate average latency, the number of fills from both threads must be used.", - "UMask": "0x1" - }, - { - "EventName": "l2_wcb_req.wcb_write", - "EventCode": "0x63", - "PublicDescription": "LS (Load/Store unit) to L2 WCB (Write Combining Buffer) write requests.", - "BriefDescription": "LS to L2 WCB write requests.", - "UMask": "0x40" - }, - { - "EventName": "l2_wcb_req.wcb_close", - "EventCode": "0x63", - "BriefDescription": "LS to L2 WCB close requests.", - "PublicDescription": "LS (Load/Store unit) to L2 WCB (Write Combining Buffer) close requests.", - "UMask": "0x20" - }, - { - "EventName": "l2_wcb_req.zero_byte_store", - "EventCode": "0x63", - "BriefDescription": "LS to L2 WCB zero byte store requests.", - "PublicDescription": "LS (Load/Store unit) to L2 WCB (Write Combining Buffer) zero byte store requests.", - "UMask": "0x4" - }, - { - "EventName": "l2_wcb_req.cl_zero", - "EventCode": "0x63", - "PublicDescription": "LS to L2 WCB cache line zeroing requests.", - "BriefDescription": "LS (Load/Store unit) to L2 WCB (Write Combining Buffer) cache line zeroing requests.", - "UMask": "0x1" - }, - { - "EventName": "l2_cache_req_stat.ls_rd_blk_cs", - "EventCode": "0x64", - "BriefDescription": "LS ReadBlock C/S Hit.", - "PublicDescription": "This event does not count accesses to the L2 cache by the L2 prefetcher, but it does count accesses by the L1 prefetcher. LS ReadBlock C/S Hit.", - "UMask": "0x80" - }, - { - "EventName": "l2_cache_req_stat.ls_rd_blk_l_hit_x", - "EventCode": "0x64", - "BriefDescription": "LS Read Block L Hit X.", - "PublicDescription": "This event does not count accesses to the L2 cache by the L2 prefetcher, but it does count accesses by the L1 prefetcher. LS Read Block L Hit X.", - "UMask": "0x40" - }, - { - "EventName": "l2_cache_req_stat.ls_rd_blk_l_hit_s", - "EventCode": "0x64", - "BriefDescription": "LsRdBlkL Hit Shared.", - "PublicDescription": "This event does not count accesses to the L2 cache by the L2 prefetcher, but it does count accesses by the L1 prefetcher. LsRdBlkL Hit Shared.", - "UMask": "0x20" - }, - { - "EventName": "l2_cache_req_stat.ls_rd_blk_x", - "EventCode": "0x64", - "BriefDescription": "LsRdBlkX/ChgToX Hit X. Count RdBlkX finding Shared as a Miss.", - "PublicDescription": "This event does not count accesses to the L2 cache by the L2 prefetcher, but it does count accesses by the L1 prefetcher. LsRdBlkX/ChgToX Hit X. Count RdBlkX finding Shared as a Miss.", - "UMask": "0x10" - }, - { - "EventName": "l2_cache_req_stat.ls_rd_blk_c", - "EventCode": "0x64", - "BriefDescription": "LS Read Block C S L X Change to X Miss.", - "PublicDescription": "This event does not count accesses to the L2 cache by the L2 prefetcher, but it does count accesses by the L1 prefetcher. LS Read Block C S L X Change to X Miss.", - "UMask": "0x8" - }, - { - "EventName": "l2_cache_req_stat.ic_fill_hit_x", - "EventCode": "0x64", - "BriefDescription": "IC Fill Hit Exclusive Stale.", - "PublicDescription": "This event does not count accesses to the L2 cache by the L2 prefetcher, but it does count accesses by the L1 prefetcher. IC Fill Hit Exclusive Stale.", - "UMask": "0x4" - }, - { - "EventName": "l2_cache_req_stat.ic_fill_hit_s", - "EventCode": "0x64", - "BriefDescription": "IC Fill Hit Shared.", - "PublicDescription": "This event does not count accesses to the L2 cache by the L2 prefetcher, but it does count accesses by the L1 prefetcher. IC Fill Hit Shared.", - "UMask": "0x2" - }, - { - "EventName": "l2_cache_req_stat.ic_fill_miss", - "EventCode": "0x64", - "BriefDescription": "IC Fill Miss.", - "PublicDescription": "This event does not count accesses to the L2 cache by the L2 prefetcher, but it does count accesses by the L1 prefetcher. IC Fill Miss.", - "UMask": "0x1" - }, - { - "EventName": "l2_fill_pending.l2_fill_busy", - "EventCode": "0x6d", - "BriefDescription": "Total cycles spent with one or more fill requests in flight from L2.", - "PublicDescription": "Total cycles spent with one or more fill requests in flight from L2.", - "UMask": "0x1" - }, - { - "EventName": "l3_request_g1.caching_l3_cache_accesses", - "EventCode": "0x01", - "BriefDescription": "Caching: L3 cache accesses", - "UMask": "0x80", - "Unit": "L3PMC" - }, - { - "EventName": "l3_lookup_state.all_l3_req_typs", - "EventCode": "0x04", - "BriefDescription": "All L3 Request Types", - "UMask": "0xff", - "Unit": "L3PMC" - }, - { - "EventName": "l3_comb_clstr_state.other_l3_miss_typs", - "EventCode": "0x06", - "BriefDescription": "Other L3 Miss Request Types", - "UMask": "0xfe", - "Unit": "L3PMC" - }, - { - "EventName": "l3_comb_clstr_state.request_miss", - "EventCode": "0x06", - "BriefDescription": "L3 cache misses", - "UMask": "0x01", - "Unit": "L3PMC" - }, - { - "EventName": "xi_sys_fill_latency", - "EventCode": "0x90", - "BriefDescription": "L3 Cache Miss Latency. Total cycles for all transactions divided by 16. Ignores SliceMask and ThreadMask.", - "UMask": "0x00", - "Unit": "L3PMC" - }, - { - "EventName": "xi_ccx_sdp_req1.all_l3_miss_req_typs", - "EventCode": "0x9a", - "BriefDescription": "All L3 Miss Request Types. Ignores SliceMask and ThreadMask.", - "UMask": "0x3f", - "Unit": "L3PMC" - } -] diff --git a/tools/perf/pmu-events/arch/x86/amdfam17h/core.json b/tools/perf/pmu-events/arch/x86/amdfam17h/core.json deleted file mode 100644 index 1079544eeed5..000000000000 --- a/tools/perf/pmu-events/arch/x86/amdfam17h/core.json +++ /dev/null @@ -1,134 +0,0 @@ -[ - { - "EventName": "ex_ret_instr", - "EventCode": "0xc0", - "BriefDescription": "Retired Instructions." - }, - { - "EventName": "ex_ret_cops", - "EventCode": "0xc1", - "BriefDescription": "Retired Uops.", - "PublicDescription": "The number of uOps retired. This includes all processor activity (instructions, exceptions, interrupts, microcode assists, etc.). The number of events logged per cycle can vary from 0 to 4." - }, - { - "EventName": "ex_ret_brn", - "EventCode": "0xc2", - "BriefDescription": "Retired Branch Instructions.", - "PublicDescription": "The number of branch instructions retired. This includes all types of architectural control flow changes, including exceptions and interrupts." - }, - { - "EventName": "ex_ret_brn_misp", - "EventCode": "0xc3", - "BriefDescription": "Retired Branch Instructions Mispredicted.", - "PublicDescription": "The number of branch instructions retired, of any type, that were not correctly predicted. This includes those for which prediction is not attempted (far control transfers, exceptions and interrupts)." - }, - { - "EventName": "ex_ret_brn_tkn", - "EventCode": "0xc4", - "BriefDescription": "Retired Taken Branch Instructions.", - "PublicDescription": "The number of taken branches that were retired. This includes all types of architectural control flow changes, including exceptions and interrupts." - }, - { - "EventName": "ex_ret_brn_tkn_misp", - "EventCode": "0xc5", - "BriefDescription": "Retired Taken Branch Instructions Mispredicted.", - "PublicDescription": "The number of retired taken branch instructions that were mispredicted." - }, - { - "EventName": "ex_ret_brn_far", - "EventCode": "0xc6", - "BriefDescription": "Retired Far Control Transfers.", - "PublicDescription": "The number of far control transfers retired including far call/jump/return, IRET, SYSCALL and SYSRET, plus exceptions and interrupts. Far control transfers are not subject to branch prediction." - }, - { - "EventName": "ex_ret_brn_resync", - "EventCode": "0xc7", - "BriefDescription": "Retired Branch Resyncs.", - "PublicDescription": "The number of resync branches. These reflect pipeline restarts due to certain microcode assists and events such as writes to the active instruction stream, among other things. Each occurrence reflects a restart penalty similar to a branch mispredict. This is relatively rare." - }, - { - "EventName": "ex_ret_near_ret", - "EventCode": "0xc8", - "BriefDescription": "Retired Near Returns.", - "PublicDescription": "The number of near return instructions (RET or RET Iw) retired." - }, - { - "EventName": "ex_ret_near_ret_mispred", - "EventCode": "0xc9", - "BriefDescription": "Retired Near Returns Mispredicted.", - "PublicDescription": "The number of near returns retired that were not correctly predicted by the return address predictor. Each such mispredict incurs the same penalty as a mispredicted conditional branch instruction." - }, - { - "EventName": "ex_ret_brn_ind_misp", - "EventCode": "0xca", - "BriefDescription": "Retired Indirect Branch Instructions Mispredicted.", - "PublicDescription": "Retired Indirect Branch Instructions Mispredicted." - }, - { - "EventName": "ex_ret_mmx_fp_instr.sse_instr", - "EventCode": "0xcb", - "BriefDescription": "SSE instructions (SSE, SSE2, SSE3, SSSE3, SSE4A, SSE41, SSE42, AVX).", - "PublicDescription": "The number of MMX, SSE or x87 instructions retired. The UnitMask allows the selection of the individual classes of instructions as given in the table. Each increment represents one complete instruction. Since this event includes non-numeric instructions it is not suitable for measuring MFLOPS. SSE instructions (SSE, SSE2, SSE3, SSSE3, SSE4A, SSE41, SSE42, AVX).", - "UMask": "0x4" - }, - { - "EventName": "ex_ret_mmx_fp_instr.mmx_instr", - "EventCode": "0xcb", - "BriefDescription": "MMX instructions.", - "PublicDescription": "The number of MMX, SSE or x87 instructions retired. The UnitMask allows the selection of the individual classes of instructions as given in the table. Each increment represents one complete instruction. Since this event includes non-numeric instructions it is not suitable for measuring MFLOPS. MMX instructions.", - "UMask": "0x2" - }, - { - "EventName": "ex_ret_mmx_fp_instr.x87_instr", - "EventCode": "0xcb", - "BriefDescription": "x87 instructions.", - "PublicDescription": "The number of MMX, SSE or x87 instructions retired. The UnitMask allows the selection of the individual classes of instructions as given in the table. Each increment represents one complete instruction. Since this event includes non-numeric instructions it is not suitable for measuring MFLOPS. x87 instructions.", - "UMask": "0x1" - }, - { - "EventName": "ex_ret_cond", - "EventCode": "0xd1", - "BriefDescription": "Retired Conditional Branch Instructions." - }, - { - "EventName": "ex_ret_cond_misp", - "EventCode": "0xd2", - "BriefDescription": "Retired Conditional Branch Instructions Mispredicted." - }, - { - "EventName": "ex_div_busy", - "EventCode": "0xd3", - "BriefDescription": "Div Cycles Busy count." - }, - { - "EventName": "ex_div_count", - "EventCode": "0xd4", - "BriefDescription": "Div Op Count." - }, - { - "EventName": "ex_tagged_ibs_ops.ibs_count_rollover", - "EventCode": "0x1cf", - "BriefDescription": "Number of times an op could not be tagged by IBS because of a previous tagged op that has not retired.", - "PublicDescription": "Tagged IBS Ops. Number of times an op could not be tagged by IBS because of a previous tagged op that has not retired.", - "UMask": "0x4" - }, - { - "EventName": "ex_tagged_ibs_ops.ibs_tagged_ops_ret", - "EventCode": "0x1cf", - "BriefDescription": "Number of Ops tagged by IBS that retired.", - "PublicDescription": "Tagged IBS Ops. Number of Ops tagged by IBS that retired.", - "UMask": "0x2" - }, - { - "EventName": "ex_tagged_ibs_ops.ibs_tagged_ops", - "EventCode": "0x1cf", - "BriefDescription": "Number of Ops tagged by IBS.", - "PublicDescription": "Tagged IBS Ops. Number of Ops tagged by IBS.", - "UMask": "0x1" - }, - { - "EventName": "ex_ret_fus_brnch_inst", - "EventCode": "0x1d0", - "BriefDescription": "The number of fused retired branch instructions retired per cycle. The number of events logged per cycle can vary from 0 to 3." - } -] diff --git a/tools/perf/pmu-events/arch/x86/amdfam17h/floating-point.json b/tools/perf/pmu-events/arch/x86/amdfam17h/floating-point.json deleted file mode 100644 index ea4711983d1d..000000000000 --- a/tools/perf/pmu-events/arch/x86/amdfam17h/floating-point.json +++ /dev/null @@ -1,168 +0,0 @@ -[ - { - "EventName": "fpu_pipe_assignment.dual", - "EventCode": "0x00", - "BriefDescription": "Total number multi-pipe uOps.", - "PublicDescription": "The number of operations (uOps) and dual-pipe uOps dispatched to each of the 4 FPU execution pipelines. This event reflects how busy the FPU pipelines are and may be used for workload characterization. This includes all operations performed by x87, MMX, and SSE instructions, including moves. Each increment represents a one- cycle dispatch event. This event is a speculative event. Since this event includes non-numeric operations it is not suitable for measuring MFLOPS. Total number multi-pipe uOps assigned to Pipe 3.", - "UMask": "0xf0" - }, - { - "EventName": "fpu_pipe_assignment.total", - "EventCode": "0x00", - "BriefDescription": "Total number uOps.", - "PublicDescription": "The number of operations (uOps) and dual-pipe uOps dispatched to each of the 4 FPU execution pipelines. This event reflects how busy the FPU pipelines are and may be used for workload characterization. This includes all operations performed by x87, MMX, and SSE instructions, including moves. Each increment represents a one- cycle dispatch event. This event is a speculative event. Since this event includes non-numeric operations it is not suitable for measuring MFLOPS. Total number uOps assigned to Pipe 3.", - "UMask": "0xf" - }, - { - "EventName": "fp_sched_empty", - "EventCode": "0x01", - "BriefDescription": "This is a speculative event. The number of cycles in which the FPU scheduler is empty. Note that some Ops like FP loads bypass the scheduler." - }, - { - "EventName": "fp_retx87_fp_ops.all", - "EventCode": "0x02", - "BriefDescription": "All Ops.", - "PublicDescription": "The number of x87 floating-point Ops that have retired. The number of events logged per cycle can vary from 0 to 8.", - "UMask": "0x7" - }, - { - "EventName": "fp_retx87_fp_ops.div_sqr_r_ops", - "EventCode": "0x02", - "BriefDescription": "Divide and square root Ops.", - "PublicDescription": "The number of x87 floating-point Ops that have retired. The number of events logged per cycle can vary from 0 to 8. Divide and square root Ops.", - "UMask": "0x4" - }, - { - "EventName": "fp_retx87_fp_ops.mul_ops", - "EventCode": "0x02", - "BriefDescription": "Multiply Ops.", - "PublicDescription": "The number of x87 floating-point Ops that have retired. The number of events logged per cycle can vary from 0 to 8. Multiply Ops.", - "UMask": "0x2" - }, - { - "EventName": "fp_retx87_fp_ops.add_sub_ops", - "EventCode": "0x02", - "BriefDescription": "Add/subtract Ops.", - "PublicDescription": "The number of x87 floating-point Ops that have retired. The number of events logged per cycle can vary from 0 to 8. Add/subtract Ops.", - "UMask": "0x1" - }, - { - "EventName": "fp_ret_sse_avx_ops.all", - "EventCode": "0x03", - "BriefDescription": "All FLOPS.", - "PublicDescription": "This is a retire-based event. The number of retired SSE/AVX FLOPS. The number of events logged per cycle can vary from 0 to 64. This event can count above 15.", - "UMask": "0xff" - }, - { - "EventName": "fp_ret_sse_avx_ops.dp_mult_add_flops", - "EventCode": "0x03", - "BriefDescription": "Double precision multiply-add FLOPS. Multiply-add counts as 2 FLOPS.", - "PublicDescription": "This is a retire-based event. The number of retired SSE/AVX FLOPS. The number of events logged per cycle can vary from 0 to 64. This event can count above 15. Double precision multiply-add FLOPS. Multiply-add counts as 2 FLOPS.", - "UMask": "0x80" - }, - { - "EventName": "fp_ret_sse_avx_ops.dp_div_flops", - "EventCode": "0x03", - "BriefDescription": "Double precision divide/square root FLOPS.", - "PublicDescription": "This is a retire-based event. The number of retired SSE/AVX FLOPS. The number of events logged per cycle can vary from 0 to 64. This event can count above 15. Double precision divide/square root FLOPS.", - "UMask": "0x40" - }, - { - "EventName": "fp_ret_sse_avx_ops.dp_mult_flops", - "EventCode": "0x03", - "BriefDescription": "Double precision multiply FLOPS.", - "PublicDescription": "This is a retire-based event. The number of retired SSE/AVX FLOPS. The number of events logged per cycle can vary from 0 to 64. This event can count above 15. Double precision multiply FLOPS.", - "UMask": "0x20" - }, - { - "EventName": "fp_ret_sse_avx_ops.dp_add_sub_flops", - "EventCode": "0x03", - "BriefDescription": "Double precision add/subtract FLOPS.", - "PublicDescription": "This is a retire-based event. The number of retired SSE/AVX FLOPS. The number of events logged per cycle can vary from 0 to 64. This event can count above 15. Double precision add/subtract FLOPS.", - "UMask": "0x10" - }, - { - "EventName": "fp_ret_sse_avx_ops.sp_mult_add_flops", - "EventCode": "0x03", - "BriefDescription": "Single precision multiply-add FLOPS. Multiply-add counts as 2 FLOPS.", - "PublicDescription": "This is a retire-based event. The number of retired SSE/AVX FLOPS. The number of events logged per cycle can vary from 0 to 64. This event can count above 15. Single precision multiply-add FLOPS. Multiply-add counts as 2 FLOPS.", - "UMask": "0x8" - }, - { - "EventName": "fp_ret_sse_avx_ops.sp_div_flops", - "EventCode": "0x03", - "BriefDescription": "Single-precision divide/square root FLOPS.", - "PublicDescription": "This is a retire-based event. The number of retired SSE/AVX FLOPS. The number of events logged per cycle can vary from 0 to 64. This event can count above 15. Single-precision divide/square root FLOPS.", - "UMask": "0x4" - }, - { - "EventName": "fp_ret_sse_avx_ops.sp_mult_flops", - "EventCode": "0x03", - "BriefDescription": "Single-precision multiply FLOPS.", - "PublicDescription": "This is a retire-based event. The number of retired SSE/AVX FLOPS. The number of events logged per cycle can vary from 0 to 64. This event can count above 15. Single-precision multiply FLOPS.", - "UMask": "0x2" - }, - { - "EventName": "fp_ret_sse_avx_ops.sp_add_sub_flops", - "EventCode": "0x03", - "BriefDescription": "Single-precision add/subtract FLOPS.", - "PublicDescription": "This is a retire-based event. The number of retired SSE/AVX FLOPS. The number of events logged per cycle can vary from 0 to 64. This event can count above 15. Single-precision add/subtract FLOPS.", - "UMask": "0x1" - }, - { - "EventName": "fp_num_mov_elim_scal_op.optimized", - "EventCode": "0x04", - "BriefDescription": "Number of Scalar Ops optimized.", - "PublicDescription": "This is a dispatch based speculative event, and is useful for measuring the effectiveness of the Move elimination and Scalar code optimization schemes. Number of Scalar Ops optimized.", - "UMask": "0x8" - }, - { - "EventName": "fp_num_mov_elim_scal_op.opt_potential", - "EventCode": "0x04", - "BriefDescription": "Number of Ops that are candidates for optimization (have Z-bit either set or pass).", - "PublicDescription": "This is a dispatch based speculative event, and is useful for measuring the effectiveness of the Move elimination and Scalar code optimization schemes. Number of Ops that are candidates for optimization (have Z-bit either set or pass).", - "UMask": "0x4" - }, - { - "EventName": "fp_num_mov_elim_scal_op.sse_mov_ops_elim", - "EventCode": "0x04", - "BriefDescription": "Number of SSE Move Ops eliminated.", - "PublicDescription": "This is a dispatch based speculative event, and is useful for measuring the effectiveness of the Move elimination and Scalar code optimization schemes. Number of SSE Move Ops eliminated.", - "UMask": "0x2" - }, - { - "EventName": "fp_num_mov_elim_scal_op.sse_mov_ops", - "EventCode": "0x04", - "BriefDescription": "Number of SSE Move Ops.", - "PublicDescription": "This is a dispatch based speculative event, and is useful for measuring the effectiveness of the Move elimination and Scalar code optimization schemes. Number of SSE Move Ops.", - "UMask": "0x1" - }, - { - "EventName": "fp_retired_ser_ops.x87_ctrl_ret", - "EventCode": "0x05", - "BriefDescription": "x87 control word mispredict traps due to mispredictions in RC or PC, or changes in mask bits.", - "PublicDescription": "The number of serializing Ops retired. x87 control word mispredict traps due to mispredictions in RC or PC, or changes in mask bits.", - "UMask": "0x8" - }, - { - "EventName": "fp_retired_ser_ops.x87_bot_ret", - "EventCode": "0x05", - "BriefDescription": "x87 bottom-executing uOps retired.", - "PublicDescription": "The number of serializing Ops retired. x87 bottom-executing uOps retired.", - "UMask": "0x4" - }, - { - "EventName": "fp_retired_ser_ops.sse_ctrl_ret", - "EventCode": "0x05", - "BriefDescription": "SSE control word mispredict traps due to mispredictions in RC, FTZ or DAZ, or changes in mask bits.", - "PublicDescription": "The number of serializing Ops retired. SSE control word mispredict traps due to mispredictions in RC, FTZ or DAZ, or changes in mask bits.", - "UMask": "0x2" - }, - { - "EventName": "fp_retired_ser_ops.sse_bot_ret", - "EventCode": "0x05", - "BriefDescription": "SSE bottom-executing uOps retired.", - "PublicDescription": "The number of serializing Ops retired. SSE bottom-executing uOps retired.", - "UMask": "0x1" - } -] diff --git a/tools/perf/pmu-events/arch/x86/amdfam17h/memory.json b/tools/perf/pmu-events/arch/x86/amdfam17h/memory.json deleted file mode 100644 index fa2d60d4def0..000000000000 --- a/tools/perf/pmu-events/arch/x86/amdfam17h/memory.json +++ /dev/null @@ -1,162 +0,0 @@ -[ - { - "EventName": "ls_locks.bus_lock", - "EventCode": "0x25", - "BriefDescription": "Bus lock when a locked operations crosses a cache boundary or is done on an uncacheable memory type.", - "PublicDescription": "Bus lock when a locked operations crosses a cache boundary or is done on an uncacheable memory type.", - "UMask": "0x1" - }, - { - "EventName": "ls_dispatch.ld_st_dispatch", - "EventCode": "0x29", - "BriefDescription": "Load-op-Stores.", - "PublicDescription": "Counts the number of operations dispatched to the LS unit. Unit Masks ADDed. Load-op-Stores.", - "UMask": "0x4" - }, - { - "EventName": "ls_dispatch.store_dispatch", - "EventCode": "0x29", - "BriefDescription": "Counts the number of operations dispatched to the LS unit. Unit Masks ADDed.", - "PublicDescription": "Counts the number of operations dispatched to the LS unit. Unit Masks ADDed.", - "UMask": "0x2" - }, - { - "EventName": "ls_dispatch.ld_dispatch", - "EventCode": "0x29", - "BriefDescription": "Counts the number of operations dispatched to the LS unit. Unit Masks ADDed.", - "PublicDescription": "Counts the number of operations dispatched to the LS unit. Unit Masks ADDed.", - "UMask": "0x1" - }, - { - "EventName": "ls_stlf", - "EventCode": "0x35", - "BriefDescription": "Number of STLF hits." - }, - { - "EventName": "ls_dc_accesses", - "EventCode": "0x40", - "BriefDescription": "The number of accesses to the data cache for load and store references. This may include certain microcode scratchpad accesses, although these are generally rare. Each increment represents an eight-byte access, although the instruction may only be accessing a portion of that. This event is a speculative event." - }, - { - "EventName": "ls_l1_d_tlb_miss.all", - "EventCode": "0x45", - "BriefDescription": "L1 DTLB Miss or Reload off all sizes.", - "PublicDescription": "L1 DTLB Miss or Reload off all sizes.", - "UMask": "0xff" - }, - { - "EventName": "ls_l1_d_tlb_miss.tlb_reload_1g_l2_miss", - "EventCode": "0x45", - "BriefDescription": "L1 DTLB Miss of a page of 1G size.", - "PublicDescription": "L1 DTLB Miss of a page of 1G size.", - "UMask": "0x80" - }, - { - "EventName": "ls_l1_d_tlb_miss.tlb_reload_2m_l2_miss", - "EventCode": "0x45", - "BriefDescription": "L1 DTLB Miss of a page of 2M size.", - "PublicDescription": "L1 DTLB Miss of a page of 2M size.", - "UMask": "0x40" - }, - { - "EventName": "ls_l1_d_tlb_miss.tlb_reload_32k_l2_miss", - "EventCode": "0x45", - "BriefDescription": "L1 DTLB Miss of a page of 32K size.", - "PublicDescription": "L1 DTLB Miss of a page of 32K size.", - "UMask": "0x20" - }, - { - "EventName": "ls_l1_d_tlb_miss.tlb_reload_4k_l2_miss", - "EventCode": "0x45", - "BriefDescription": "L1 DTLB Miss of a page of 4K size.", - "PublicDescription": "L1 DTLB Miss of a page of 4K size.", - "UMask": "0x10" - }, - { - "EventName": "ls_l1_d_tlb_miss.tlb_reload_1g_l2_hit", - "EventCode": "0x45", - "BriefDescription": "L1 DTLB Reload of a page of 1G size.", - "PublicDescription": "L1 DTLB Reload of a page of 1G size.", - "UMask": "0x8" - }, - { - "EventName": "ls_l1_d_tlb_miss.tlb_reload_2m_l2_hit", - "EventCode": "0x45", - "BriefDescription": "L1 DTLB Reload of a page of 2M size.", - "PublicDescription": "L1 DTLB Reload of a page of 2M size.", - "UMask": "0x4" - }, - { - "EventName": "ls_l1_d_tlb_miss.tlb_reload_32k_l2_hit", - "EventCode": "0x45", - "BriefDescription": "L1 DTLB Reload of a page of 32K size.", - "PublicDescription": "L1 DTLB Reload of a page of 32K size.", - "UMask": "0x2" - }, - { - "EventName": "ls_l1_d_tlb_miss.tlb_reload_4k_l2_hit", - "EventCode": "0x45", - "BriefDescription": "L1 DTLB Reload of a page of 4K size.", - "PublicDescription": "L1 DTLB Reload of a page of 4K size.", - "UMask": "0x1" - }, - { - "EventName": "ls_tablewalker.perf_mon_tablewalk_alloc_iside", - "EventCode": "0x46", - "BriefDescription": "Tablewalker allocation.", - "PublicDescription": "Tablewalker allocation.", - "UMask": "0xc" - }, - { - "EventName": "ls_tablewalker.perf_mon_tablewalk_alloc_dside", - "EventCode": "0x46", - "BriefDescription": "Tablewalker allocation.", - "PublicDescription": "Tablewalker allocation.", - "UMask": "0x3" - }, - { - "EventName": "ls_misal_accesses", - "EventCode": "0x47", - "BriefDescription": "Misaligned loads." - }, - { - "EventName": "ls_pref_instr_disp.prefetch_nta", - "EventCode": "0x4b", - "BriefDescription": "Software Prefetch Instructions (PREFETCHNTA instruction) Dispatched.", - "PublicDescription": "Software Prefetch Instructions (PREFETCHNTA instruction) Dispatched.", - "UMask": "0x4" - }, - { - "EventName": "ls_pref_instr_disp.store_prefetch_w", - "EventCode": "0x4b", - "BriefDescription": "Software Prefetch Instructions (3DNow PREFETCHW instruction) Dispatched.", - "PublicDescription": "Software Prefetch Instructions (3DNow PREFETCHW instruction) Dispatched.", - "UMask": "0x2" - }, - { - "EventName": "ls_pref_instr_disp.load_prefetch_w", - "EventCode": "0x4b", - "BriefDescription": "Prefetch, Prefetch_T0_T1_T2.", - "PublicDescription": "Software Prefetch Instructions Dispatched. Prefetch, Prefetch_T0_T1_T2.", - "UMask": "0x1" - }, - { - "EventName": "ls_inef_sw_pref.mab_mch_cnt", - "EventCode": "0x52", - "BriefDescription": "The number of software prefetches that did not fetch data outside of the processor core.", - "PublicDescription": "The number of software prefetches that did not fetch data outside of the processor core.", - "UMask": "0x2" - }, - { - "EventName": "ls_inef_sw_pref.data_pipe_sw_pf_dc_hit", - "EventCode": "0x52", - "BriefDescription": "The number of software prefetches that did not fetch data outside of the processor core.", - "PublicDescription": "The number of software prefetches that did not fetch data outside of the processor core.", - "UMask": "0x1" - }, - { - "EventName": "ls_not_halted_cyc", - "EventCode": "0x76", - "BriefDescription": "Cycles not in Halt." - } -] diff --git a/tools/perf/pmu-events/arch/x86/amdfam17h/other.json b/tools/perf/pmu-events/arch/x86/amdfam17h/other.json deleted file mode 100644 index b26a00d05a2e..000000000000 --- a/tools/perf/pmu-events/arch/x86/amdfam17h/other.json +++ /dev/null @@ -1,65 +0,0 @@ -[ - { - "EventName": "ic_oc_mode_switch.oc_ic_mode_switch", - "EventCode": "0x28a", - "BriefDescription": "OC to IC mode switch.", - "PublicDescription": "OC Mode Switch. OC to IC mode switch.", - "UMask": "0x2" - }, - { - "EventName": "ic_oc_mode_switch.ic_oc_mode_switch", - "EventCode": "0x28a", - "BriefDescription": "IC to OC mode switch.", - "PublicDescription": "OC Mode Switch. IC to OC mode switch.", - "UMask": "0x1" - }, - { - "EventName": "de_dis_dispatch_token_stalls0.retire_token_stall", - "EventCode": "0xaf", - "BriefDescription": "RETIRE Tokens unavailable.", - "PublicDescription": "Cycles where a dispatch group is valid but does not get dispatched due to a token stall. RETIRE Tokens unavailable.", - "UMask": "0x40" - }, - { - "EventName": "de_dis_dispatch_token_stalls0.agsq_token_stall", - "EventCode": "0xaf", - "BriefDescription": "AGSQ Tokens unavailable.", - "PublicDescription": "Cycles where a dispatch group is valid but does not get dispatched due to a token stall. AGSQ Tokens unavailable.", - "UMask": "0x20" - }, - { - "EventName": "de_dis_dispatch_token_stalls0.alu_token_stall", - "EventCode": "0xaf", - "BriefDescription": "ALU tokens total unavailable.", - "PublicDescription": "Cycles where a dispatch group is valid but does not get dispatched due to a token stall. ALU tokens total unavailable.", - "UMask": "0x10" - }, - { - "EventName": "de_dis_dispatch_token_stalls0.alsq3_0_token_stall", - "EventCode": "0xaf", - "BriefDescription": "Cycles where a dispatch group is valid but does not get dispatched due to a token stall.", - "PublicDescription": "Cycles where a dispatch group is valid but does not get dispatched due to a token stall.", - "UMask": "0x8" - }, - { - "EventName": "de_dis_dispatch_token_stalls0.alsq3_token_stall", - "EventCode": "0xaf", - "BriefDescription": "ALSQ 3 Tokens unavailable.", - "PublicDescription": "Cycles where a dispatch group is valid but does not get dispatched due to a token stall. ALSQ 3 Tokens unavailable.", - "UMask": "0x4" - }, - { - "EventName": "de_dis_dispatch_token_stalls0.alsq2_token_stall", - "EventCode": "0xaf", - "BriefDescription": "ALSQ 2 Tokens unavailable.", - "PublicDescription": "Cycles where a dispatch group is valid but does not get dispatched due to a token stall. ALSQ 2 Tokens unavailable.", - "UMask": "0x2" - }, - { - "EventName": "de_dis_dispatch_token_stalls0.alsq1_token_stall", - "EventCode": "0xaf", - "BriefDescription": "ALSQ 1 Tokens unavailable.", - "PublicDescription": "Cycles where a dispatch group is valid but does not get dispatched due to a token stall. ALSQ 1 Tokens unavailable.", - "UMask": "0x1" - } -] diff --git a/tools/perf/pmu-events/arch/x86/amdzen1/branch.json b/tools/perf/pmu-events/arch/x86/amdzen1/branch.json new file mode 100644 index 000000000000..93ddfd8053ca --- /dev/null +++ b/tools/perf/pmu-events/arch/x86/amdzen1/branch.json @@ -0,0 +1,12 @@ +[ + { + "EventName": "bp_l1_btb_correct", + "EventCode": "0x8a", + "BriefDescription": "L1 BTB Correction." + }, + { + "EventName": "bp_l2_btb_correct", + "EventCode": "0x8b", + "BriefDescription": "L2 BTB Correction." + } +] diff --git a/tools/perf/pmu-events/arch/x86/amdzen1/cache.json b/tools/perf/pmu-events/arch/x86/amdzen1/cache.json new file mode 100644 index 000000000000..6221a840fcea --- /dev/null +++ b/tools/perf/pmu-events/arch/x86/amdzen1/cache.json @@ -0,0 +1,329 @@ +[ + { + "EventName": "ic_fw32", + "EventCode": "0x80", + "BriefDescription": "The number of 32B fetch windows transferred from IC pipe to DE instruction decoder (includes non-cacheable and cacheable fill responses)." + }, + { + "EventName": "ic_fw32_miss", + "EventCode": "0x81", + "BriefDescription": "The number of 32B fetch windows tried to read the L1 IC and missed in the full tag." + }, + { + "EventName": "ic_cache_fill_l2", + "EventCode": "0x82", + "BriefDescription": "The number of 64 byte instruction cache line was fulfilled from the L2 cache." + }, + { + "EventName": "ic_cache_fill_sys", + "EventCode": "0x83", + "BriefDescription": "The number of 64 byte instruction cache line fulfilled from system memory or another cache." + }, + { + "EventName": "bp_l1_tlb_miss_l2_hit", + "EventCode": "0x84", + "BriefDescription": "The number of instruction fetches that miss in the L1 ITLB but hit in the L2 ITLB." + }, + { + "EventName": "bp_l1_tlb_miss_l2_miss", + "EventCode": "0x85", + "BriefDescription": "The number of instruction fetches that miss in both the L1 and L2 TLBs." + }, + { + "EventName": "bp_snp_re_sync", + "EventCode": "0x86", + "BriefDescription": "The number of pipeline restarts caused by invalidating probes that hit on the instruction stream currently being executed. This would happen if the active instruction stream was being modified by another processor in an MP system - typically a highly unlikely event." + }, + { + "EventName": "ic_fetch_stall.ic_stall_any", + "EventCode": "0x87", + "BriefDescription": "IC pipe was stalled during this clock cycle for any reason (nothing valid in pipe ICM1).", + "PublicDescription": "Instruction Pipe Stall. IC pipe was stalled during this clock cycle for any reason (nothing valid in pipe ICM1).", + "UMask": "0x4" + }, + { + "EventName": "ic_fetch_stall.ic_stall_dq_empty", + "EventCode": "0x87", + "BriefDescription": "IC pipe was stalled during this clock cycle (including IC to OC fetches) due to DQ empty.", + "PublicDescription": "Instruction Pipe Stall. IC pipe was stalled during this clock cycle (including IC to OC fetches) due to DQ empty.", + "UMask": "0x2" + }, + { + "EventName": "ic_fetch_stall.ic_stall_back_pressure", + "EventCode": "0x87", + "BriefDescription": "IC pipe was stalled during this clock cycle (including IC to OC fetches) due to back-pressure.", + "PublicDescription": "Instruction Pipe Stall. IC pipe was stalled during this clock cycle (including IC to OC fetches) due to back-pressure.", + "UMask": "0x1" + }, + { + "EventName": "ic_cache_inval.l2_invalidating_probe", + "EventCode": "0x8c", + "BriefDescription": "IC line invalidated due to L2 invalidating probe (external or LS).", + "PublicDescription": "The number of instruction cache lines invalidated. A non-SMC event is CMC (cross modifying code), either from the other thread of the core or another core. IC line invalidated due to L2 invalidating probe (external or LS).", + "UMask": "0x2" + }, + { + "EventName": "ic_cache_inval.fill_invalidated", + "EventCode": "0x8c", + "BriefDescription": "IC line invalidated due to overwriting fill response.", + "PublicDescription": "The number of instruction cache lines invalidated. A non-SMC event is CMC (cross modifying code), either from the other thread of the core or another core. IC line invalidated due to overwriting fill response.", + "UMask": "0x1" + }, + { + "EventName": "bp_tlb_rel", + "EventCode": "0x99", + "BriefDescription": "The number of ITLB reload requests." + }, + { + "EventName": "l2_request_g1.rd_blk_l", + "EventCode": "0x60", + "BriefDescription": "Requests to L2 Group1.", + "PublicDescription": "Requests to L2 Group1.", + "UMask": "0x80" + }, + { + "EventName": "l2_request_g1.rd_blk_x", + "EventCode": "0x60", + "BriefDescription": "Requests to L2 Group1.", + "PublicDescription": "Requests to L2 Group1.", + "UMask": "0x40" + }, + { + "EventName": "l2_request_g1.ls_rd_blk_c_s", + "EventCode": "0x60", + "BriefDescription": "Requests to L2 Group1.", + "PublicDescription": "Requests to L2 Group1.", + "UMask": "0x20" + }, + { + "EventName": "l2_request_g1.cacheable_ic_read", + "EventCode": "0x60", + "BriefDescription": "Requests to L2 Group1.", + "PublicDescription": "Requests to L2 Group1.", + "UMask": "0x10" + }, + { + "EventName": "l2_request_g1.change_to_x", + "EventCode": "0x60", + "BriefDescription": "Requests to L2 Group1.", + "PublicDescription": "Requests to L2 Group1.", + "UMask": "0x8" + }, + { + "EventName": "l2_request_g1.prefetch_l2", + "EventCode": "0x60", + "BriefDescription": "Requests to L2 Group1.", + "PublicDescription": "Requests to L2 Group1.", + "UMask": "0x4" + }, + { + "EventName": "l2_request_g1.l2_hw_pf", + "EventCode": "0x60", + "BriefDescription": "Requests to L2 Group1.", + "PublicDescription": "Requests to L2 Group1.", + "UMask": "0x2" + }, + { + "EventName": "l2_request_g1.other_requests", + "EventCode": "0x60", + "BriefDescription": "Events covered by l2_request_g2.", + "PublicDescription": "Requests to L2 Group1. Events covered by l2_request_g2.", + "UMask": "0x1" + }, + { + "EventName": "l2_request_g2.group1", + "EventCode": "0x61", + "BriefDescription": "All Group 1 commands not in unit0.", + "PublicDescription": "Multi-events in that LS and IF requests can be received simultaneous. All Group 1 commands not in unit0.", + "UMask": "0x80" + }, + { + "EventName": "l2_request_g2.ls_rd_sized", + "EventCode": "0x61", + "BriefDescription": "RdSized, RdSized32, RdSized64.", + "PublicDescription": "Multi-events in that LS and IF requests can be received simultaneous. RdSized, RdSized32, RdSized64.", + "UMask": "0x40" + }, + { + "EventName": "l2_request_g2.ls_rd_sized_nc", + "EventCode": "0x61", + "BriefDescription": "RdSizedNC, RdSized32NC, RdSized64NC.", + "PublicDescription": "Multi-events in that LS and IF requests can be received simultaneous. RdSizedNC, RdSized32NC, RdSized64NC.", + "UMask": "0x20" + }, + { + "EventName": "l2_request_g2.ic_rd_sized", + "EventCode": "0x61", + "BriefDescription": "Multi-events in that LS and IF requests can be received simultaneous.", + "PublicDescription": "Multi-events in that LS and IF requests can be received simultaneous.", + "UMask": "0x10" + }, + { + "EventName": "l2_request_g2.ic_rd_sized_nc", + "EventCode": "0x61", + "BriefDescription": "Multi-events in that LS and IF requests can be received simultaneous.", + "PublicDescription": "Multi-events in that LS and IF requests can be received simultaneous.", + "UMask": "0x8" + }, + { + "EventName": "l2_request_g2.smc_inval", + "EventCode": "0x61", + "BriefDescription": "Multi-events in that LS and IF requests can be received simultaneous.", + "PublicDescription": "Multi-events in that LS and IF requests can be received simultaneous.", + "UMask": "0x4" + }, + { + "EventName": "l2_request_g2.bus_locks_originator", + "EventCode": "0x61", + "BriefDescription": "Multi-events in that LS and IF requests can be received simultaneous.", + "PublicDescription": "Multi-events in that LS and IF requests can be received simultaneous.", + "UMask": "0x2" + }, + { + "EventName": "l2_request_g2.bus_locks_responses", + "EventCode": "0x61", + "BriefDescription": "Multi-events in that LS and IF requests can be received simultaneous.", + "PublicDescription": "Multi-events in that LS and IF requests can be received simultaneous.", + "UMask": "0x1" + }, + { + "EventName": "l2_latency.l2_cycles_waiting_on_fills", + "EventCode": "0x62", + "BriefDescription": "Total cycles spent waiting for L2 fills to complete from L3 or memory, divided by four. Event counts are for both threads. To calculate average latency, the number of fills from both threads must be used.", + "PublicDescription": "Total cycles spent waiting for L2 fills to complete from L3 or memory, divided by four. Event counts are for both threads. To calculate average latency, the number of fills from both threads must be used.", + "UMask": "0x1" + }, + { + "EventName": "l2_wcb_req.wcb_write", + "EventCode": "0x63", + "PublicDescription": "LS (Load/Store unit) to L2 WCB (Write Combining Buffer) write requests.", + "BriefDescription": "LS to L2 WCB write requests.", + "UMask": "0x40" + }, + { + "EventName": "l2_wcb_req.wcb_close", + "EventCode": "0x63", + "BriefDescription": "LS to L2 WCB close requests.", + "PublicDescription": "LS (Load/Store unit) to L2 WCB (Write Combining Buffer) close requests.", + "UMask": "0x20" + }, + { + "EventName": "l2_wcb_req.zero_byte_store", + "EventCode": "0x63", + "BriefDescription": "LS to L2 WCB zero byte store requests.", + "PublicDescription": "LS (Load/Store unit) to L2 WCB (Write Combining Buffer) zero byte store requests.", + "UMask": "0x4" + }, + { + "EventName": "l2_wcb_req.cl_zero", + "EventCode": "0x63", + "PublicDescription": "LS to L2 WCB cache line zeroing requests.", + "BriefDescription": "LS (Load/Store unit) to L2 WCB (Write Combining Buffer) cache line zeroing requests.", + "UMask": "0x1" + }, + { + "EventName": "l2_cache_req_stat.ls_rd_blk_cs", + "EventCode": "0x64", + "BriefDescription": "LS ReadBlock C/S Hit.", + "PublicDescription": "This event does not count accesses to the L2 cache by the L2 prefetcher, but it does count accesses by the L1 prefetcher. LS ReadBlock C/S Hit.", + "UMask": "0x80" + }, + { + "EventName": "l2_cache_req_stat.ls_rd_blk_l_hit_x", + "EventCode": "0x64", + "BriefDescription": "LS Read Block L Hit X.", + "PublicDescription": "This event does not count accesses to the L2 cache by the L2 prefetcher, but it does count accesses by the L1 prefetcher. LS Read Block L Hit X.", + "UMask": "0x40" + }, + { + "EventName": "l2_cache_req_stat.ls_rd_blk_l_hit_s", + "EventCode": "0x64", + "BriefDescription": "LsRdBlkL Hit Shared.", + "PublicDescription": "This event does not count accesses to the L2 cache by the L2 prefetcher, but it does count accesses by the L1 prefetcher. LsRdBlkL Hit Shared.", + "UMask": "0x20" + }, + { + "EventName": "l2_cache_req_stat.ls_rd_blk_x", + "EventCode": "0x64", + "BriefDescription": "LsRdBlkX/ChgToX Hit X. Count RdBlkX finding Shared as a Miss.", + "PublicDescription": "This event does not count accesses to the L2 cache by the L2 prefetcher, but it does count accesses by the L1 prefetcher. LsRdBlkX/ChgToX Hit X. Count RdBlkX finding Shared as a Miss.", + "UMask": "0x10" + }, + { + "EventName": "l2_cache_req_stat.ls_rd_blk_c", + "EventCode": "0x64", + "BriefDescription": "LS Read Block C S L X Change to X Miss.", + "PublicDescription": "This event does not count accesses to the L2 cache by the L2 prefetcher, but it does count accesses by the L1 prefetcher. LS Read Block C S L X Change to X Miss.", + "UMask": "0x8" + }, + { + "EventName": "l2_cache_req_stat.ic_fill_hit_x", + "EventCode": "0x64", + "BriefDescription": "IC Fill Hit Exclusive Stale.", + "PublicDescription": "This event does not count accesses to the L2 cache by the L2 prefetcher, but it does count accesses by the L1 prefetcher. IC Fill Hit Exclusive Stale.", + "UMask": "0x4" + }, + { + "EventName": "l2_cache_req_stat.ic_fill_hit_s", + "EventCode": "0x64", + "BriefDescription": "IC Fill Hit Shared.", + "PublicDescription": "This event does not count accesses to the L2 cache by the L2 prefetcher, but it does count accesses by the L1 prefetcher. IC Fill Hit Shared.", + "UMask": "0x2" + }, + { + "EventName": "l2_cache_req_stat.ic_fill_miss", + "EventCode": "0x64", + "BriefDescription": "IC Fill Miss.", + "PublicDescription": "This event does not count accesses to the L2 cache by the L2 prefetcher, but it does count accesses by the L1 prefetcher. IC Fill Miss.", + "UMask": "0x1" + }, + { + "EventName": "l2_fill_pending.l2_fill_busy", + "EventCode": "0x6d", + "BriefDescription": "Total cycles spent with one or more fill requests in flight from L2.", + "PublicDescription": "Total cycles spent with one or more fill requests in flight from L2.", + "UMask": "0x1" + }, + { + "EventName": "l3_request_g1.caching_l3_cache_accesses", + "EventCode": "0x01", + "BriefDescription": "Caching: L3 cache accesses", + "UMask": "0x80", + "Unit": "L3PMC" + }, + { + "EventName": "l3_lookup_state.all_l3_req_typs", + "EventCode": "0x04", + "BriefDescription": "All L3 Request Types", + "UMask": "0xff", + "Unit": "L3PMC" + }, + { + "EventName": "l3_comb_clstr_state.other_l3_miss_typs", + "EventCode": "0x06", + "BriefDescription": "Other L3 Miss Request Types", + "UMask": "0xfe", + "Unit": "L3PMC" + }, + { + "EventName": "l3_comb_clstr_state.request_miss", + "EventCode": "0x06", + "BriefDescription": "L3 cache misses", + "UMask": "0x01", + "Unit": "L3PMC" + }, + { + "EventName": "xi_sys_fill_latency", + "EventCode": "0x90", + "BriefDescription": "L3 Cache Miss Latency. Total cycles for all transactions divided by 16. Ignores SliceMask and ThreadMask.", + "UMask": "0x00", + "Unit": "L3PMC" + }, + { + "EventName": "xi_ccx_sdp_req1.all_l3_miss_req_typs", + "EventCode": "0x9a", + "BriefDescription": "All L3 Miss Request Types. Ignores SliceMask and ThreadMask.", + "UMask": "0x3f", + "Unit": "L3PMC" + } +] diff --git a/tools/perf/pmu-events/arch/x86/amdzen1/core.json b/tools/perf/pmu-events/arch/x86/amdzen1/core.json new file mode 100644 index 000000000000..1079544eeed5 --- /dev/null +++ b/tools/perf/pmu-events/arch/x86/amdzen1/core.json @@ -0,0 +1,134 @@ +[ + { + "EventName": "ex_ret_instr", + "EventCode": "0xc0", + "BriefDescription": "Retired Instructions." + }, + { + "EventName": "ex_ret_cops", + "EventCode": "0xc1", + "BriefDescription": "Retired Uops.", + "PublicDescription": "The number of uOps retired. This includes all processor activity (instructions, exceptions, interrupts, microcode assists, etc.). The number of events logged per cycle can vary from 0 to 4." + }, + { + "EventName": "ex_ret_brn", + "EventCode": "0xc2", + "BriefDescription": "Retired Branch Instructions.", + "PublicDescription": "The number of branch instructions retired. This includes all types of architectural control flow changes, including exceptions and interrupts." + }, + { + "EventName": "ex_ret_brn_misp", + "EventCode": "0xc3", + "BriefDescription": "Retired Branch Instructions Mispredicted.", + "PublicDescription": "The number of branch instructions retired, of any type, that were not correctly predicted. This includes those for which prediction is not attempted (far control transfers, exceptions and interrupts)." + }, + { + "EventName": "ex_ret_brn_tkn", + "EventCode": "0xc4", + "BriefDescription": "Retired Taken Branch Instructions.", + "PublicDescription": "The number of taken branches that were retired. This includes all types of architectural control flow changes, including exceptions and interrupts." + }, + { + "EventName": "ex_ret_brn_tkn_misp", + "EventCode": "0xc5", + "BriefDescription": "Retired Taken Branch Instructions Mispredicted.", + "PublicDescription": "The number of retired taken branch instructions that were mispredicted." + }, + { + "EventName": "ex_ret_brn_far", + "EventCode": "0xc6", + "BriefDescription": "Retired Far Control Transfers.", + "PublicDescription": "The number of far control transfers retired including far call/jump/return, IRET, SYSCALL and SYSRET, plus exceptions and interrupts. Far control transfers are not subject to branch prediction." + }, + { + "EventName": "ex_ret_brn_resync", + "EventCode": "0xc7", + "BriefDescription": "Retired Branch Resyncs.", + "PublicDescription": "The number of resync branches. These reflect pipeline restarts due to certain microcode assists and events such as writes to the active instruction stream, among other things. Each occurrence reflects a restart penalty similar to a branch mispredict. This is relatively rare." + }, + { + "EventName": "ex_ret_near_ret", + "EventCode": "0xc8", + "BriefDescription": "Retired Near Returns.", + "PublicDescription": "The number of near return instructions (RET or RET Iw) retired." + }, + { + "EventName": "ex_ret_near_ret_mispred", + "EventCode": "0xc9", + "BriefDescription": "Retired Near Returns Mispredicted.", + "PublicDescription": "The number of near returns retired that were not correctly predicted by the return address predictor. Each such mispredict incurs the same penalty as a mispredicted conditional branch instruction." + }, + { + "EventName": "ex_ret_brn_ind_misp", + "EventCode": "0xca", + "BriefDescription": "Retired Indirect Branch Instructions Mispredicted.", + "PublicDescription": "Retired Indirect Branch Instructions Mispredicted." + }, + { + "EventName": "ex_ret_mmx_fp_instr.sse_instr", + "EventCode": "0xcb", + "BriefDescription": "SSE instructions (SSE, SSE2, SSE3, SSSE3, SSE4A, SSE41, SSE42, AVX).", + "PublicDescription": "The number of MMX, SSE or x87 instructions retired. The UnitMask allows the selection of the individual classes of instructions as given in the table. Each increment represents one complete instruction. Since this event includes non-numeric instructions it is not suitable for measuring MFLOPS. SSE instructions (SSE, SSE2, SSE3, SSSE3, SSE4A, SSE41, SSE42, AVX).", + "UMask": "0x4" + }, + { + "EventName": "ex_ret_mmx_fp_instr.mmx_instr", + "EventCode": "0xcb", + "BriefDescription": "MMX instructions.", + "PublicDescription": "The number of MMX, SSE or x87 instructions retired. The UnitMask allows the selection of the individual classes of instructions as given in the table. Each increment represents one complete instruction. Since this event includes non-numeric instructions it is not suitable for measuring MFLOPS. MMX instructions.", + "UMask": "0x2" + }, + { + "EventName": "ex_ret_mmx_fp_instr.x87_instr", + "EventCode": "0xcb", + "BriefDescription": "x87 instructions.", + "PublicDescription": "The number of MMX, SSE or x87 instructions retired. The UnitMask allows the selection of the individual classes of instructions as given in the table. Each increment represents one complete instruction. Since this event includes non-numeric instructions it is not suitable for measuring MFLOPS. x87 instructions.", + "UMask": "0x1" + }, + { + "EventName": "ex_ret_cond", + "EventCode": "0xd1", + "BriefDescription": "Retired Conditional Branch Instructions." + }, + { + "EventName": "ex_ret_cond_misp", + "EventCode": "0xd2", + "BriefDescription": "Retired Conditional Branch Instructions Mispredicted." + }, + { + "EventName": "ex_div_busy", + "EventCode": "0xd3", + "BriefDescription": "Div Cycles Busy count." + }, + { + "EventName": "ex_div_count", + "EventCode": "0xd4", + "BriefDescription": "Div Op Count." + }, + { + "EventName": "ex_tagged_ibs_ops.ibs_count_rollover", + "EventCode": "0x1cf", + "BriefDescription": "Number of times an op could not be tagged by IBS because of a previous tagged op that has not retired.", + "PublicDescription": "Tagged IBS Ops. Number of times an op could not be tagged by IBS because of a previous tagged op that has not retired.", + "UMask": "0x4" + }, + { + "EventName": "ex_tagged_ibs_ops.ibs_tagged_ops_ret", + "EventCode": "0x1cf", + "BriefDescription": "Number of Ops tagged by IBS that retired.", + "PublicDescription": "Tagged IBS Ops. Number of Ops tagged by IBS that retired.", + "UMask": "0x2" + }, + { + "EventName": "ex_tagged_ibs_ops.ibs_tagged_ops", + "EventCode": "0x1cf", + "BriefDescription": "Number of Ops tagged by IBS.", + "PublicDescription": "Tagged IBS Ops. Number of Ops tagged by IBS.", + "UMask": "0x1" + }, + { + "EventName": "ex_ret_fus_brnch_inst", + "EventCode": "0x1d0", + "BriefDescription": "The number of fused retired branch instructions retired per cycle. The number of events logged per cycle can vary from 0 to 3." + } +] diff --git a/tools/perf/pmu-events/arch/x86/amdzen1/floating-point.json b/tools/perf/pmu-events/arch/x86/amdzen1/floating-point.json new file mode 100644 index 000000000000..ea4711983d1d --- /dev/null +++ b/tools/perf/pmu-events/arch/x86/amdzen1/floating-point.json @@ -0,0 +1,168 @@ +[ + { + "EventName": "fpu_pipe_assignment.dual", + "EventCode": "0x00", + "BriefDescription": "Total number multi-pipe uOps.", + "PublicDescription": "The number of operations (uOps) and dual-pipe uOps dispatched to each of the 4 FPU execution pipelines. This event reflects how busy the FPU pipelines are and may be used for workload characterization. This includes all operations performed by x87, MMX, and SSE instructions, including moves. Each increment represents a one- cycle dispatch event. This event is a speculative event. Since this event includes non-numeric operations it is not suitable for measuring MFLOPS. Total number multi-pipe uOps assigned to Pipe 3.", + "UMask": "0xf0" + }, + { + "EventName": "fpu_pipe_assignment.total", + "EventCode": "0x00", + "BriefDescription": "Total number uOps.", + "PublicDescription": "The number of operations (uOps) and dual-pipe uOps dispatched to each of the 4 FPU execution pipelines. This event reflects how busy the FPU pipelines are and may be used for workload characterization. This includes all operations performed by x87, MMX, and SSE instructions, including moves. Each increment represents a one- cycle dispatch event. This event is a speculative event. Since this event includes non-numeric operations it is not suitable for measuring MFLOPS. Total number uOps assigned to Pipe 3.", + "UMask": "0xf" + }, + { + "EventName": "fp_sched_empty", + "EventCode": "0x01", + "BriefDescription": "This is a speculative event. The number of cycles in which the FPU scheduler is empty. Note that some Ops like FP loads bypass the scheduler." + }, + { + "EventName": "fp_retx87_fp_ops.all", + "EventCode": "0x02", + "BriefDescription": "All Ops.", + "PublicDescription": "The number of x87 floating-point Ops that have retired. The number of events logged per cycle can vary from 0 to 8.", + "UMask": "0x7" + }, + { + "EventName": "fp_retx87_fp_ops.div_sqr_r_ops", + "EventCode": "0x02", + "BriefDescription": "Divide and square root Ops.", + "PublicDescription": "The number of x87 floating-point Ops that have retired. The number of events logged per cycle can vary from 0 to 8. Divide and square root Ops.", + "UMask": "0x4" + }, + { + "EventName": "fp_retx87_fp_ops.mul_ops", + "EventCode": "0x02", + "BriefDescription": "Multiply Ops.", + "PublicDescription": "The number of x87 floating-point Ops that have retired. The number of events logged per cycle can vary from 0 to 8. Multiply Ops.", + "UMask": "0x2" + }, + { + "EventName": "fp_retx87_fp_ops.add_sub_ops", + "EventCode": "0x02", + "BriefDescription": "Add/subtract Ops.", + "PublicDescription": "The number of x87 floating-point Ops that have retired. The number of events logged per cycle can vary from 0 to 8. Add/subtract Ops.", + "UMask": "0x1" + }, + { + "EventName": "fp_ret_sse_avx_ops.all", + "EventCode": "0x03", + "BriefDescription": "All FLOPS.", + "PublicDescription": "This is a retire-based event. The number of retired SSE/AVX FLOPS. The number of events logged per cycle can vary from 0 to 64. This event can count above 15.", + "UMask": "0xff" + }, + { + "EventName": "fp_ret_sse_avx_ops.dp_mult_add_flops", + "EventCode": "0x03", + "BriefDescription": "Double precision multiply-add FLOPS. Multiply-add counts as 2 FLOPS.", + "PublicDescription": "This is a retire-based event. The number of retired SSE/AVX FLOPS. The number of events logged per cycle can vary from 0 to 64. This event can count above 15. Double precision multiply-add FLOPS. Multiply-add counts as 2 FLOPS.", + "UMask": "0x80" + }, + { + "EventName": "fp_ret_sse_avx_ops.dp_div_flops", + "EventCode": "0x03", + "BriefDescription": "Double precision divide/square root FLOPS.", + "PublicDescription": "This is a retire-based event. The number of retired SSE/AVX FLOPS. The number of events logged per cycle can vary from 0 to 64. This event can count above 15. Double precision divide/square root FLOPS.", + "UMask": "0x40" + }, + { + "EventName": "fp_ret_sse_avx_ops.dp_mult_flops", + "EventCode": "0x03", + "BriefDescription": "Double precision multiply FLOPS.", + "PublicDescription": "This is a retire-based event. The number of retired SSE/AVX FLOPS. The number of events logged per cycle can vary from 0 to 64. This event can count above 15. Double precision multiply FLOPS.", + "UMask": "0x20" + }, + { + "EventName": "fp_ret_sse_avx_ops.dp_add_sub_flops", + "EventCode": "0x03", + "BriefDescription": "Double precision add/subtract FLOPS.", + "PublicDescription": "This is a retire-based event. The number of retired SSE/AVX FLOPS. The number of events logged per cycle can vary from 0 to 64. This event can count above 15. Double precision add/subtract FLOPS.", + "UMask": "0x10" + }, + { + "EventName": "fp_ret_sse_avx_ops.sp_mult_add_flops", + "EventCode": "0x03", + "BriefDescription": "Single precision multiply-add FLOPS. Multiply-add counts as 2 FLOPS.", + "PublicDescription": "This is a retire-based event. The number of retired SSE/AVX FLOPS. The number of events logged per cycle can vary from 0 to 64. This event can count above 15. Single precision multiply-add FLOPS. Multiply-add counts as 2 FLOPS.", + "UMask": "0x8" + }, + { + "EventName": "fp_ret_sse_avx_ops.sp_div_flops", + "EventCode": "0x03", + "BriefDescription": "Single-precision divide/square root FLOPS.", + "PublicDescription": "This is a retire-based event. The number of retired SSE/AVX FLOPS. The number of events logged per cycle can vary from 0 to 64. This event can count above 15. Single-precision divide/square root FLOPS.", + "UMask": "0x4" + }, + { + "EventName": "fp_ret_sse_avx_ops.sp_mult_flops", + "EventCode": "0x03", + "BriefDescription": "Single-precision multiply FLOPS.", + "PublicDescription": "This is a retire-based event. The number of retired SSE/AVX FLOPS. The number of events logged per cycle can vary from 0 to 64. This event can count above 15. Single-precision multiply FLOPS.", + "UMask": "0x2" + }, + { + "EventName": "fp_ret_sse_avx_ops.sp_add_sub_flops", + "EventCode": "0x03", + "BriefDescription": "Single-precision add/subtract FLOPS.", + "PublicDescription": "This is a retire-based event. The number of retired SSE/AVX FLOPS. The number of events logged per cycle can vary from 0 to 64. This event can count above 15. Single-precision add/subtract FLOPS.", + "UMask": "0x1" + }, + { + "EventName": "fp_num_mov_elim_scal_op.optimized", + "EventCode": "0x04", + "BriefDescription": "Number of Scalar Ops optimized.", + "PublicDescription": "This is a dispatch based speculative event, and is useful for measuring the effectiveness of the Move elimination and Scalar code optimization schemes. Number of Scalar Ops optimized.", + "UMask": "0x8" + }, + { + "EventName": "fp_num_mov_elim_scal_op.opt_potential", + "EventCode": "0x04", + "BriefDescription": "Number of Ops that are candidates for optimization (have Z-bit either set or pass).", + "PublicDescription": "This is a dispatch based speculative event, and is useful for measuring the effectiveness of the Move elimination and Scalar code optimization schemes. Number of Ops that are candidates for optimization (have Z-bit either set or pass).", + "UMask": "0x4" + }, + { + "EventName": "fp_num_mov_elim_scal_op.sse_mov_ops_elim", + "EventCode": "0x04", + "BriefDescription": "Number of SSE Move Ops eliminated.", + "PublicDescription": "This is a dispatch based speculative event, and is useful for measuring the effectiveness of the Move elimination and Scalar code optimization schemes. Number of SSE Move Ops eliminated.", + "UMask": "0x2" + }, + { + "EventName": "fp_num_mov_elim_scal_op.sse_mov_ops", + "EventCode": "0x04", + "BriefDescription": "Number of SSE Move Ops.", + "PublicDescription": "This is a dispatch based speculative event, and is useful for measuring the effectiveness of the Move elimination and Scalar code optimization schemes. Number of SSE Move Ops.", + "UMask": "0x1" + }, + { + "EventName": "fp_retired_ser_ops.x87_ctrl_ret", + "EventCode": "0x05", + "BriefDescription": "x87 control word mispredict traps due to mispredictions in RC or PC, or changes in mask bits.", + "PublicDescription": "The number of serializing Ops retired. x87 control word mispredict traps due to mispredictions in RC or PC, or changes in mask bits.", + "UMask": "0x8" + }, + { + "EventName": "fp_retired_ser_ops.x87_bot_ret", + "EventCode": "0x05", + "BriefDescription": "x87 bottom-executing uOps retired.", + "PublicDescription": "The number of serializing Ops retired. x87 bottom-executing uOps retired.", + "UMask": "0x4" + }, + { + "EventName": "fp_retired_ser_ops.sse_ctrl_ret", + "EventCode": "0x05", + "BriefDescription": "SSE control word mispredict traps due to mispredictions in RC, FTZ or DAZ, or changes in mask bits.", + "PublicDescription": "The number of serializing Ops retired. SSE control word mispredict traps due to mispredictions in RC, FTZ or DAZ, or changes in mask bits.", + "UMask": "0x2" + }, + { + "EventName": "fp_retired_ser_ops.sse_bot_ret", + "EventCode": "0x05", + "BriefDescription": "SSE bottom-executing uOps retired.", + "PublicDescription": "The number of serializing Ops retired. SSE bottom-executing uOps retired.", + "UMask": "0x1" + } +] diff --git a/tools/perf/pmu-events/arch/x86/amdzen1/memory.json b/tools/perf/pmu-events/arch/x86/amdzen1/memory.json new file mode 100644 index 000000000000..fa2d60d4def0 --- /dev/null +++ b/tools/perf/pmu-events/arch/x86/amdzen1/memory.json @@ -0,0 +1,162 @@ +[ + { + "EventName": "ls_locks.bus_lock", + "EventCode": "0x25", + "BriefDescription": "Bus lock when a locked operations crosses a cache boundary or is done on an uncacheable memory type.", + "PublicDescription": "Bus lock when a locked operations crosses a cache boundary or is done on an uncacheable memory type.", + "UMask": "0x1" + }, + { + "EventName": "ls_dispatch.ld_st_dispatch", + "EventCode": "0x29", + "BriefDescription": "Load-op-Stores.", + "PublicDescription": "Counts the number of operations dispatched to the LS unit. Unit Masks ADDed. Load-op-Stores.", + "UMask": "0x4" + }, + { + "EventName": "ls_dispatch.store_dispatch", + "EventCode": "0x29", + "BriefDescription": "Counts the number of operations dispatched to the LS unit. Unit Masks ADDed.", + "PublicDescription": "Counts the number of operations dispatched to the LS unit. Unit Masks ADDed.", + "UMask": "0x2" + }, + { + "EventName": "ls_dispatch.ld_dispatch", + "EventCode": "0x29", + "BriefDescription": "Counts the number of operations dispatched to the LS unit. Unit Masks ADDed.", + "PublicDescription": "Counts the number of operations dispatched to the LS unit. Unit Masks ADDed.", + "UMask": "0x1" + }, + { + "EventName": "ls_stlf", + "EventCode": "0x35", + "BriefDescription": "Number of STLF hits." + }, + { + "EventName": "ls_dc_accesses", + "EventCode": "0x40", + "BriefDescription": "The number of accesses to the data cache for load and store references. This may include certain microcode scratchpad accesses, although these are generally rare. Each increment represents an eight-byte access, although the instruction may only be accessing a portion of that. This event is a speculative event." + }, + { + "EventName": "ls_l1_d_tlb_miss.all", + "EventCode": "0x45", + "BriefDescription": "L1 DTLB Miss or Reload off all sizes.", + "PublicDescription": "L1 DTLB Miss or Reload off all sizes.", + "UMask": "0xff" + }, + { + "EventName": "ls_l1_d_tlb_miss.tlb_reload_1g_l2_miss", + "EventCode": "0x45", + "BriefDescription": "L1 DTLB Miss of a page of 1G size.", + "PublicDescription": "L1 DTLB Miss of a page of 1G size.", + "UMask": "0x80" + }, + { + "EventName": "ls_l1_d_tlb_miss.tlb_reload_2m_l2_miss", + "EventCode": "0x45", + "BriefDescription": "L1 DTLB Miss of a page of 2M size.", + "PublicDescription": "L1 DTLB Miss of a page of 2M size.", + "UMask": "0x40" + }, + { + "EventName": "ls_l1_d_tlb_miss.tlb_reload_32k_l2_miss", + "EventCode": "0x45", + "BriefDescription": "L1 DTLB Miss of a page of 32K size.", + "PublicDescription": "L1 DTLB Miss of a page of 32K size.", + "UMask": "0x20" + }, + { + "EventName": "ls_l1_d_tlb_miss.tlb_reload_4k_l2_miss", + "EventCode": "0x45", + "BriefDescription": "L1 DTLB Miss of a page of 4K size.", + "PublicDescription": "L1 DTLB Miss of a page of 4K size.", + "UMask": "0x10" + }, + { + "EventName": "ls_l1_d_tlb_miss.tlb_reload_1g_l2_hit", + "EventCode": "0x45", + "BriefDescription": "L1 DTLB Reload of a page of 1G size.", + "PublicDescription": "L1 DTLB Reload of a page of 1G size.", + "UMask": "0x8" + }, + { + "EventName": "ls_l1_d_tlb_miss.tlb_reload_2m_l2_hit", + "EventCode": "0x45", + "BriefDescription": "L1 DTLB Reload of a page of 2M size.", + "PublicDescription": "L1 DTLB Reload of a page of 2M size.", + "UMask": "0x4" + }, + { + "EventName": "ls_l1_d_tlb_miss.tlb_reload_32k_l2_hit", + "EventCode": "0x45", + "BriefDescription": "L1 DTLB Reload of a page of 32K size.", + "PublicDescription": "L1 DTLB Reload of a page of 32K size.", + "UMask": "0x2" + }, + { + "EventName": "ls_l1_d_tlb_miss.tlb_reload_4k_l2_hit", + "EventCode": "0x45", + "BriefDescription": "L1 DTLB Reload of a page of 4K size.", + "PublicDescription": "L1 DTLB Reload of a page of 4K size.", + "UMask": "0x1" + }, + { + "EventName": "ls_tablewalker.perf_mon_tablewalk_alloc_iside", + "EventCode": "0x46", + "BriefDescription": "Tablewalker allocation.", + "PublicDescription": "Tablewalker allocation.", + "UMask": "0xc" + }, + { + "EventName": "ls_tablewalker.perf_mon_tablewalk_alloc_dside", + "EventCode": "0x46", + "BriefDescription": "Tablewalker allocation.", + "PublicDescription": "Tablewalker allocation.", + "UMask": "0x3" + }, + { + "EventName": "ls_misal_accesses", + "EventCode": "0x47", + "BriefDescription": "Misaligned loads." + }, + { + "EventName": "ls_pref_instr_disp.prefetch_nta", + "EventCode": "0x4b", + "BriefDescription": "Software Prefetch Instructions (PREFETCHNTA instruction) Dispatched.", + "PublicDescription": "Software Prefetch Instructions (PREFETCHNTA instruction) Dispatched.", + "UMask": "0x4" + }, + { + "EventName": "ls_pref_instr_disp.store_prefetch_w", + "EventCode": "0x4b", + "BriefDescription": "Software Prefetch Instructions (3DNow PREFETCHW instruction) Dispatched.", + "PublicDescription": "Software Prefetch Instructions (3DNow PREFETCHW instruction) Dispatched.", + "UMask": "0x2" + }, + { + "EventName": "ls_pref_instr_disp.load_prefetch_w", + "EventCode": "0x4b", + "BriefDescription": "Prefetch, Prefetch_T0_T1_T2.", + "PublicDescription": "Software Prefetch Instructions Dispatched. Prefetch, Prefetch_T0_T1_T2.", + "UMask": "0x1" + }, + { + "EventName": "ls_inef_sw_pref.mab_mch_cnt", + "EventCode": "0x52", + "BriefDescription": "The number of software prefetches that did not fetch data outside of the processor core.", + "PublicDescription": "The number of software prefetches that did not fetch data outside of the processor core.", + "UMask": "0x2" + }, + { + "EventName": "ls_inef_sw_pref.data_pipe_sw_pf_dc_hit", + "EventCode": "0x52", + "BriefDescription": "The number of software prefetches that did not fetch data outside of the processor core.", + "PublicDescription": "The number of software prefetches that did not fetch data outside of the processor core.", + "UMask": "0x1" + }, + { + "EventName": "ls_not_halted_cyc", + "EventCode": "0x76", + "BriefDescription": "Cycles not in Halt." + } +] diff --git a/tools/perf/pmu-events/arch/x86/amdzen1/other.json b/tools/perf/pmu-events/arch/x86/amdzen1/other.json new file mode 100644 index 000000000000..b26a00d05a2e --- /dev/null +++ b/tools/perf/pmu-events/arch/x86/amdzen1/other.json @@ -0,0 +1,65 @@ +[ + { + "EventName": "ic_oc_mode_switch.oc_ic_mode_switch", + "EventCode": "0x28a", + "BriefDescription": "OC to IC mode switch.", + "PublicDescription": "OC Mode Switch. OC to IC mode switch.", + "UMask": "0x2" + }, + { + "EventName": "ic_oc_mode_switch.ic_oc_mode_switch", + "EventCode": "0x28a", + "BriefDescription": "IC to OC mode switch.", + "PublicDescription": "OC Mode Switch. IC to OC mode switch.", + "UMask": "0x1" + }, + { + "EventName": "de_dis_dispatch_token_stalls0.retire_token_stall", + "EventCode": "0xaf", + "BriefDescription": "RETIRE Tokens unavailable.", + "PublicDescription": "Cycles where a dispatch group is valid but does not get dispatched due to a token stall. RETIRE Tokens unavailable.", + "UMask": "0x40" + }, + { + "EventName": "de_dis_dispatch_token_stalls0.agsq_token_stall", + "EventCode": "0xaf", + "BriefDescription": "AGSQ Tokens unavailable.", + "PublicDescription": "Cycles where a dispatch group is valid but does not get dispatched due to a token stall. AGSQ Tokens unavailable.", + "UMask": "0x20" + }, + { + "EventName": "de_dis_dispatch_token_stalls0.alu_token_stall", + "EventCode": "0xaf", + "BriefDescription": "ALU tokens total unavailable.", + "PublicDescription": "Cycles where a dispatch group is valid but does not get dispatched due to a token stall. ALU tokens total unavailable.", + "UMask": "0x10" + }, + { + "EventName": "de_dis_dispatch_token_stalls0.alsq3_0_token_stall", + "EventCode": "0xaf", + "BriefDescription": "Cycles where a dispatch group is valid but does not get dispatched due to a token stall.", + "PublicDescription": "Cycles where a dispatch group is valid but does not get dispatched due to a token stall.", + "UMask": "0x8" + }, + { + "EventName": "de_dis_dispatch_token_stalls0.alsq3_token_stall", + "EventCode": "0xaf", + "BriefDescription": "ALSQ 3 Tokens unavailable.", + "PublicDescription": "Cycles where a dispatch group is valid but does not get dispatched due to a token stall. ALSQ 3 Tokens unavailable.", + "UMask": "0x4" + }, + { + "EventName": "de_dis_dispatch_token_stalls0.alsq2_token_stall", + "EventCode": "0xaf", + "BriefDescription": "ALSQ 2 Tokens unavailable.", + "PublicDescription": "Cycles where a dispatch group is valid but does not get dispatched due to a token stall. ALSQ 2 Tokens unavailable.", + "UMask": "0x2" + }, + { + "EventName": "de_dis_dispatch_token_stalls0.alsq1_token_stall", + "EventCode": "0xaf", + "BriefDescription": "ALSQ 1 Tokens unavailable.", + "PublicDescription": "Cycles where a dispatch group is valid but does not get dispatched due to a token stall. ALSQ 1 Tokens unavailable.", + "UMask": "0x1" + } +] diff --git a/tools/perf/pmu-events/arch/x86/mapfile.csv b/tools/perf/pmu-events/arch/x86/mapfile.csv index 745ced083844..82a9db00125e 100644 --- a/tools/perf/pmu-events/arch/x86/mapfile.csv +++ b/tools/perf/pmu-events/arch/x86/mapfile.csv @@ -36,4 +36,4 @@ GenuineIntel-6-55-[56789ABCDEF],v1,cascadelakex,core GenuineIntel-6-7D,v1,icelake,core GenuineIntel-6-7E,v1,icelake,core GenuineIntel-6-86,v1,tremontx,core -AuthenticAMD-23-[[:xdigit:]]+,v1,amdfam17h,core +AuthenticAMD-23-([12][0-9A-F]|[0-9A-F]),v1,amdzen1,core -- cgit v1.2.3-70-g09d2 From 2079f7aa0a49d3ae83a82f70785a28b07bb9b16b Mon Sep 17 00:00:00 2001 From: Vijay Thakkar <vijaythakkar@me.com> Date: Wed, 18 Mar 2020 15:00:01 -0400 Subject: perf vendor events amd: Add Zen2 events MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit This patch adds PMU events for AMD Zen2 core based processors, namely, Matisse (model 71h), Castle Peak (model 31h) and Rome (model 2xh), as documented in the AMD Processor Programming Reference for Matisse [1]. The model number regex has been set to detect all the models under family 17 that do not match those of Zen1, as the range is larger for zen2. Zen2 adds some additional counters that are not present in Zen1 and events for them have been added in this patch. Some counters have also been removed for Zen2 thatwere previously present in Zen1 and have been confirmed to always sample zero on zen2. These added/removed counters have been omitted for brevity but can be found here: https://gist.github.com/thakkarV/5b12ca5fd7488eb2c42e451e40bdd5f3 Note that PPR for Zen2 [1] does not include some counters that were documented in the PPR for Zen1 based processors [2]. After having tested these counters, some of them that still work for zen2 systems have been preserved in the events for zen2. The counters that are omitted in [1] but are still measurable and non-zero on zen2 (tested on a Ryzen 3900X system) are the following: PMC 0x000 fpu_pipe_assignment.{total|total0|total1|total2|total3} PMC 0x004 fp_num_mov_elim_scal_op.* PMC 0x046 ls_tablewalker.* PMC 0x062 l2_latency.l2_cycles_waiting_on_fills PMC 0x063 l2_wcb_req.* PMC 0x06D l2_fill_pending.l2_fill_busy PMC 0x080 ic_fw32 PMC 0x081 ic_fw32_miss PMC 0x086 bp_snp_re_sync PMC 0x087 ic_fetch_stall.* PMC 0x08C ic_cache_inval.* PMC 0x099 bp_tlb_rel PMC 0x0C7 ex_ret_brn_resync PMC 0x28A ic_oc_mode_switch.* L3PMC 0x001 l3_request_g1.* L3PMC 0x006 l3_comb_clstr_state.* [1]: Processor Programming Reference (PPR) for AMD Family 17h Model 71h, Revision B0 Processors, 56176 Rev 3.06 - Jul 17, 2019 [2]: Processor Programming Reference (PPR) for AMD Family 17h Models 01h,08h, Revision B2 Processors, 54945 Rev 3.03 - Jun 14, 2019 All of the PPRs can be found at: https://bugzilla.kernel.org/show_bug.cgi?id=206537 Here are the results of running "fpu_pipe_assignment.total" events on my Ryzen 3900X family 17h model 71h system: Before this patch: $> perf list *fpu_pipe_assignment* List of pre-defined events (to be used in -e): After: $> perf list *fpu_pipe_assignment* floating point: fpu_pipe_assignment.total [Total number of fp uOps] fpu_pipe_assignment.total0 [Total number uOps assigned to pipe 0] fpu_pipe_assignment.total1 [Total number uOps assigned to pipe 1] fpu_pipe_assignment.total2 [Total number uOps assigned to pipe 2] fpu_pipe_assignment.total3 [Total number uOps assigned to pipe 3] Metric Groups: $> perf stat -e fpu_pipe_assignment.total sleep 1 Performance counter stats for 'sleep 1': 25,883 fpu_pipe_assignment.total 1.004145868 seconds time elapsed 0.001805000 seconds user 0.000000000 seconds sys Usage tests while running Linpackin the background: $> perf stat -I1000 -e fpu_pipe_assignment.total 1.000266796 79,313,191,516 fpu_pipe_assignment.total 2.000809630 68,091,474,430 fpu_pipe_assignment.total 3.001028115 52,925,023,174 fpu_pipe_assignment.total $> perf record -e fpu_pipe_assignment.total,fpu_pipe_assignment.total0 -a sleep 1 [ perf record: Woken up 9 times to write data ] [ perf record: Captured and wrote 4.031 MB perf.data (64764 samples) ] $> perf report --stdio --no-header | head -30 98.33% xhpl xhpl [.] dgemm_kernel 0.28% xhpl xhpl [.] dtrsm_kernel_LT 0.10% xhpl [kernel.kallsyms] [k] entry_SYSCALL_64 0.08% xhpl xhpl [.] idamax_k 0.07% baloo_file_extr liblmdb.so [.] mdb_mid2l_insert 0.06% xhpl xhpl [.] dgemm_itcopy 0.06% xhpl xhpl [.] dgemm_oncopy 0.06% xhpl [kernel.kallsyms] [k] __schedule 0.06% xhpl [kernel.kallsyms] [k] syscall_trace_enter 0.06% xhpl [kernel.kallsyms] [k] native_sched_clock 0.06% xhpl [kernel.kallsyms] [k] pick_next_task_fair 0.05% xhpl xhpl [.] blas_thread_server.llvm.15009391670273914865 0.04% xhpl [kernel.kallsyms] [k] do_syscall_64 0.04% xhpl [kernel.kallsyms] [k] yield_task_fair 0.04% xhpl libpthread-2.31.so [.] __pthread_mutex_unlock_usercnt 0.03% xhpl [kernel.kallsyms] [k] cpuacct_charge 0.03% xhpl [kernel.kallsyms] [k] syscall_return_via_sysret 0.03% xhpl libc-2.31.so [.] __sched_yield 0.03% xhpl [kernel.kallsyms] [k] __calc_delta $> perf annotate --stdio2 dgemm_kernel | egrep '^ {0,2}[0-9]+' -B2 -A2 sub $0x60,%rsp mov %rbx,(%rsp) 0.00 mov %rbp,0x8(%rsp) mov %r12,0x10(%rsp) 0.00 mov %r13,0x18(%rsp) mov %r14,0x20(%rsp) mov %r15,0x28(%rsp) -- mov %rdi,%r13 mov %rsi,0x28(%rsp) 0.00 mov %rdx,%r12 vmovsd %xmm0,0x30(%rsp) shl $0x3,%r10 mov 0x28(%rsp),%rax 0.00 xor %rdx,%rdx mov $0x18,%rdi div %rdi -- nop a0: mov %r12,%rax 0.00 shl $0x3,%rax mov %r8,%rdi lea (%r8,%rax,8),%r15 -- mov %r12,%rax nop 0.00 c0: vmovups (%rdi),%ymm1 0.09 vmovups 0x20(%rdi),%ymm2 0.02 vmovups (%r15),%ymm3 0.10 vmovups %ymm1,(%rsi) 0.07 vmovups %ymm2,0x20(%rsi) 0.07 vmovups %ymm3,0x40(%rsi) 0.06 add $0x40,%rdi add $0x40,%r15 add $0x60,%rsi 0.00 dec %rax ↑ jne c0 mov %r9,%r15 -- nop 110: lea 0x80(%rsp),%rsi 0.01 add $0x60,%rsi 0.03 mov %r12,%rax 0.00 sar $0x3,%rax cmp $0x2,%rax ↓ jl d26 prefetcht0 0x200(%rdi) 0.01 vmovups -0x60(%rsi),%ymm1 0.02 prefetcht0 0xa0(%rsi) 0.00 vbroadcastsd -0x80(%rdi),%ymm0 0.00 prefetcht0 0xe0(%rsi) 0.03 vmovups -0x40(%rsi),%ymm2 0.00 prefetcht0 0x120(%rsi) vmovups -0x20(%rsi),%ymm3 vmulpd %ymm0,%ymm1,%ymm4 0.01 prefetcht0 0x160(%rsi) vmulpd %ymm0,%ymm2,%ymm8 0.01 vmulpd %ymm0,%ymm3,%ymm12 0.02 prefetcht0 0x1a0(%rsi) 0.01 vbroadcastsd -0x78(%rdi),%ymm0 vmulpd %ymm0,%ymm1,%ymm5 0.01 vmulpd %ymm0,%ymm2,%ymm9 vmulpd %ymm0,%ymm3,%ymm13 0.01 vbroadcastsd -0x70(%rdi),%ymm0 vmulpd %ymm0,%ymm1,%ymm6 0.00 vmulpd %ymm0,%ymm2,%ymm10 0.00 add $0x60,%rsi ... snip ... nop 65e0: vmovddup -0x60(%rsi),%xmm2 0.00 vmovups -0x80(%rdi),%xmm0 vmovups -0x70(%rdi),%xmm1 0.00 vmovddup -0x58(%rsi),%xmm3 vfmadd231pd %xmm0,%xmm2,%xmm4 0.00 vfmadd231pd %xmm1,%xmm2,%xmm5 0.00 vfmadd231pd %xmm0,%xmm3,%xmm6 0.00 vfmadd231pd %xmm1,%xmm3,%xmm7 0.00 add $0x10,%rsi add $0x20,%rdi 0.00 dec %rax ↑ jne 65e0 nop nop 6620: vmovddup 0x30(%rsp),%xmm0 0.00 vmulpd %xmm0,%xmm4,%xmm4 0.00 vmulpd %xmm0,%xmm5,%xmm5 vmulpd %xmm0,%xmm6,%xmm6 vmulpd %xmm0,%xmm7,%xmm7 vaddpd (%r15),%xmm4,%xmm4 vaddpd 0x10(%r15),%xmm5,%xmm5 0.00 vaddpd (%r15,%r10,1),%xmm6,%xmm6 0.00 vaddpd 0x10(%r15,%r10,1),%xmm7,%xmm7 0.00 vmovups %xmm4,(%r15) vmovups %xmm5,0x10(%r15) 0.00 vmovups %xmm6,(%r15,%r10,1) vmovups %xmm7,0x10(%r15,%r10,1) add $0x20,%r15 -- lea (%r8,%rax,8),%r8 69d8: mov 0x20(%rsp),%r14 0.00 test $0x1,%r14 ↓ je 6d84 mov %r9,%r15 -- vbroadcastsd -0x28(%rsi),%ymm3 vfmadd231pd (%rdi),%ymm0,%ymm4 0.00 vfmadd231pd 0x20(%rdi),%ymm1,%ymm5 vfmadd231pd 0x40(%rdi),%ymm2,%ymm6 vfmadd231pd 0x60(%rdi),%ymm3,%ymm7 -- vmulpd %ymm0,%ymm4,%ymm4 vaddpd (%r15),%ymm4,%ymm4 0.00 vmovups %ymm4,(%r15) add $0x20,%r15 dec %r11 -- mov %rbx,%rsp mov (%rsp),%rbx 0.01 mov 0x8(%rsp),%rbp mov 0x10(%rsp),%r12 mov 0x18(%rsp),%r13 Signed-off-by: Vijay Thakkar <vijaythakkar@me.com> Tested-by: Arnaldo Carvalho de Melo <acme@redhat.com> Acked-by: Kim Phillips <kim.phillips@amd.com> Cc: Alexander Shishkin <alexander.shishkin@linux.intel.com> Cc: Jiri Olsa <jolsa@redhat.com> Cc: Jon Grimm <jon.grimm@amd.com> Cc: Martin Liška <mliska@suse.cz> Cc: Namhyung Kim <namhyung@kernel.org> Cc: Peter Zijlstra <peterz@infradead.org> Link: http://lore.kernel.org/lkml/20200318190002.307290-3-vijaythakkar@me.com Signed-off-by: Arnaldo Carvalho de Melo <acme@redhat.com> --- tools/perf/pmu-events/arch/x86/amdzen2/branch.json | 52 ++++ tools/perf/pmu-events/arch/x86/amdzen2/cache.json | 338 ++++++++++++++++++++ tools/perf/pmu-events/arch/x86/amdzen2/core.json | 130 ++++++++ .../arch/x86/amdzen2/floating-point.json | 140 +++++++++ tools/perf/pmu-events/arch/x86/amdzen2/memory.json | 341 +++++++++++++++++++++ tools/perf/pmu-events/arch/x86/amdzen2/other.json | 115 +++++++ tools/perf/pmu-events/arch/x86/mapfile.csv | 1 + 7 files changed, 1117 insertions(+) create mode 100644 tools/perf/pmu-events/arch/x86/amdzen2/branch.json create mode 100644 tools/perf/pmu-events/arch/x86/amdzen2/cache.json create mode 100644 tools/perf/pmu-events/arch/x86/amdzen2/core.json create mode 100644 tools/perf/pmu-events/arch/x86/amdzen2/floating-point.json create mode 100644 tools/perf/pmu-events/arch/x86/amdzen2/memory.json create mode 100644 tools/perf/pmu-events/arch/x86/amdzen2/other.json diff --git a/tools/perf/pmu-events/arch/x86/amdzen2/branch.json b/tools/perf/pmu-events/arch/x86/amdzen2/branch.json new file mode 100644 index 000000000000..ef4166a66288 --- /dev/null +++ b/tools/perf/pmu-events/arch/x86/amdzen2/branch.json @@ -0,0 +1,52 @@ +[ + { + "EventName": "bp_l1_btb_correct", + "EventCode": "0x8a", + "BriefDescription": "L1 Branch Prediction Overrides Existing Prediction (speculative)." + }, + { + "EventName": "bp_l2_btb_correct", + "EventCode": "0x8b", + "BriefDescription": "L2 Branch Prediction Overrides Existing Prediction (speculative)." + }, + { + "EventName": "bp_dyn_ind_pred", + "EventCode": "0x8e", + "BriefDescription": "Dynamic Indirect Predictions.", + "PublicDescription": "Indirect Branch Prediction for potential multi-target branch (speculative)." + }, + { + "EventName": "bp_de_redirect", + "EventCode": "0x91", + "BriefDescription": "Decoder Overrides Existing Branch Prediction (speculative)." + }, + { + "EventName": "bp_l1_tlb_fetch_hit", + "EventCode": "0x94", + "BriefDescription": "The number of instruction fetches that hit in the L1 ITLB.", + "UMask": "0xFF" + }, + { + "EventName": "bp_l1_tlb_fetch_hit.if1g", + "EventCode": "0x94", + "BriefDescription": "The number of instruction fetches that hit in the L1 ITLB. Instruction fetches to a 1GB page.", + "UMask": "0x4" + }, + { + "EventName": "bp_l1_tlb_fetch_hit.if2m", + "EventCode": "0x94", + "BriefDescription": "The number of instruction fetches that hit in the L1 ITLB. Instruction fetches to a 2MB page.", + "UMask": "0x2" + }, + { + "EventName": "bp_l1_tlb_fetch_hit.if4k", + "EventCode": "0x94", + "BriefDescription": "The number of instruction fetches that hit in the L1 ITLB. Instruction fetches to a 4KB page.", + "UMask": "0x1" + }, + { + "EventName": "bp_tlb_rel", + "EventCode": "0x99", + "BriefDescription": "The number of ITLB reload requests." + } +] diff --git a/tools/perf/pmu-events/arch/x86/amdzen2/cache.json b/tools/perf/pmu-events/arch/x86/amdzen2/cache.json new file mode 100644 index 000000000000..1c60bfa0f00b --- /dev/null +++ b/tools/perf/pmu-events/arch/x86/amdzen2/cache.json @@ -0,0 +1,338 @@ +[ + { + "EventName": "l2_request_g1.rd_blk_l", + "EventCode": "0x60", + "BriefDescription": "All L2 Cache Requests (Breakdown 1 - Common). Data cache reads (including hardware and software prefetch).", + "UMask": "0x80" + }, + { + "EventName": "l2_request_g1.rd_blk_x", + "EventCode": "0x60", + "BriefDescription": "All L2 Cache Requests (Breakdown 1 - Common). Data cache stores.", + "UMask": "0x40" + }, + { + "EventName": "l2_request_g1.ls_rd_blk_c_s", + "EventCode": "0x60", + "BriefDescription": "All L2 Cache Requests (Breakdown 1 - Common). Data cache shared reads.", + "UMask": "0x20" + }, + { + "EventName": "l2_request_g1.cacheable_ic_read", + "EventCode": "0x60", + "BriefDescription": "All L2 Cache Requests (Breakdown 1 - Common). Instruction cache reads.", + "UMask": "0x10" + }, + { + "EventName": "l2_request_g1.change_to_x", + "EventCode": "0x60", + "BriefDescription": "All L2 Cache Requests (Breakdown 1 - Common). Data cache state change requests. Request change to writable, check L2 for current state.", + "UMask": "0x8" + }, + { + "EventName": "l2_request_g1.prefetch_l2_cmd", + "EventCode": "0x60", + "BriefDescription": "All L2 Cache Requests (Breakdown 1 - Common). PrefetchL2Cmd.", + "UMask": "0x4" + }, + { + "EventName": "l2_request_g1.l2_hw_pf", + "EventCode": "0x60", + "BriefDescription": "All L2 Cache Requests (Breakdown 1 - Common). L2 Prefetcher. All prefetches accepted by L2 pipeline, hit or miss. Types of PF and L2 hit/miss broken out in a separate perfmon event.", + "UMask": "0x2" + }, + { + "EventName": "l2_request_g1.group2", + "EventCode": "0x60", + "BriefDescription": "Miscellaneous events covered in more detail by l2_request_g2 (PMCx061).", + "UMask": "0x1" + }, + { + "EventName": "l2_request_g2.group1", + "EventCode": "0x61", + "BriefDescription": "Miscellaneous events covered in more detail by l2_request_g1 (PMCx060).", + "UMask": "0x80" + }, + { + "EventName": "l2_request_g2.ls_rd_sized", + "EventCode": "0x61", + "BriefDescription": "All L2 Cache Requests (Breakdown 2 - Rare). Data cache read sized.", + "UMask": "0x40" + }, + { + "EventName": "l2_request_g2.ls_rd_sized_nc", + "EventCode": "0x61", + "BriefDescription": "All L2 Cache Requests (Breakdown 2 - Rare). Data cache read sized non-cacheable.", + "UMask": "0x20" + }, + { + "EventName": "l2_request_g2.ic_rd_sized", + "EventCode": "0x61", + "BriefDescription": "All L2 Cache Requests (Breakdown 2 - Rare). Instruction cache read sized.", + "UMask": "0x10" + }, + { + "EventName": "l2_request_g2.ic_rd_sized_nc", + "EventCode": "0x61", + "BriefDescription": "All L2 Cache Requests (Breakdown 2 - Rare). Instruction cache read sized non-cacheable.", + "UMask": "0x8" + }, + { + "EventName": "l2_request_g2.smc_inval", + "EventCode": "0x61", + "BriefDescription": "All L2 Cache Requests (Breakdown 2 - Rare). Self-modifying code invalidates.", + "UMask": "0x4" + }, + { + "EventName": "l2_request_g2.bus_locks_originator", + "EventCode": "0x61", + "BriefDescription": "All L2 Cache Requests (Breakdown 2 - Rare). Bus locks.", + "UMask": "0x2" + }, + { + "EventName": "l2_request_g2.bus_locks_responses", + "EventCode": "0x61", + "BriefDescription": "All L2 Cache Requests (Breakdown 2 - Rare). Bus lock response.", + "UMask": "0x1" + }, + { + "EventName": "l2_latency.l2_cycles_waiting_on_fills", + "EventCode": "0x62", + "BriefDescription": "Total cycles spent waiting for L2 fills to complete from L3 or memory, divided by four. Event counts are for both threads. To calculate average latency, the number of fills from both threads must be used.", + "UMask": "0x1" + }, + { + "EventName": "l2_wcb_req.wcb_write", + "EventCode": "0x63", + "BriefDescription": "LS to L2 WCB write requests. LS (Load/Store unit) to L2 WCB (Write Combining Buffer) write requests.", + "UMask": "0x40" + }, + { + "EventName": "l2_wcb_req.wcb_close", + "EventCode": "0x63", + "BriefDescription": "LS to L2 WCB close requests. LS (Load/Store unit) to L2 WCB (Write Combining Buffer) close requests.", + "UMask": "0x20" + }, + { + "EventName": "l2_wcb_req.zero_byte_store", + "EventCode": "0x63", + "BriefDescription": "LS to L2 WCB zero byte store requests. LS (Load/Store unit) to L2 WCB (Write Combining Buffer) zero byte store requests.", + "UMask": "0x4" + }, + { + "EventName": "l2_wcb_req.cl_zero", + "EventCode": "0x63", + "BriefDescription": "LS to L2 WCB cache line zeroing requests. LS (Load/Store unit) to L2 WCB (Write Combining Buffer) cache line zeroing requests.", + "UMask": "0x1" + }, + { + "EventName": "l2_cache_req_stat.ls_rd_blk_cs", + "EventCode": "0x64", + "BriefDescription": "Core to L2 cacheable request access status (not including L2 Prefetch). Data cache shared read hit in L2", + "UMask": "0x80" + }, + { + "EventName": "l2_cache_req_stat.ls_rd_blk_l_hit_x", + "EventCode": "0x64", + "BriefDescription": "Core to L2 cacheable request access status (not including L2 Prefetch). Data cache read hit in L2.", + "UMask": "0x40" + }, + { + "EventName": "l2_cache_req_stat.ls_rd_blk_l_hit_s", + "EventCode": "0x64", + "BriefDescription": "Core to L2 cacheable request access status (not including L2 Prefetch). Data cache read hit on shared line in L2.", + "UMask": "0x20" + }, + { + "EventName": "l2_cache_req_stat.ls_rd_blk_x", + "EventCode": "0x64", + "BriefDescription": "Core to L2 cacheable request access status (not including L2 Prefetch). Data cache store or state change hit in L2.", + "UMask": "0x10" + }, + { + "EventName": "l2_cache_req_stat.ls_rd_blk_c", + "EventCode": "0x64", + "BriefDescription": "Core to L2 cacheable request access status (not including L2 Prefetch). Data cache request miss in L2 (all types).", + "UMask": "0x8" + }, + { + "EventName": "l2_cache_req_stat.ic_fill_hit_x", + "EventCode": "0x64", + "BriefDescription": "Core to L2 cacheable request access status (not including L2 Prefetch). Instruction cache hit modifiable line in L2.", + "UMask": "0x4" + }, + { + "EventName": "l2_cache_req_stat.ic_fill_hit_s", + "EventCode": "0x64", + "BriefDescription": "Core to L2 cacheable request access status (not including L2 Prefetch). Instruction cache hit clean line in L2.", + "UMask": "0x2" + }, + { + "EventName": "l2_cache_req_stat.ic_fill_miss", + "EventCode": "0x64", + "BriefDescription": "Core to L2 cacheable request access status (not including L2 Prefetch). Instruction cache request miss in L2.", + "UMask": "0x1" + }, + { + "EventName": "l2_fill_pending.l2_fill_busy", + "EventCode": "0x6d", + "BriefDescription": "Cycles with fill pending from L2. Total cycles spent with one or more fill requests in flight from L2.", + "UMask": "0x1" + }, + { + "EventName": "l2_pf_hit_l2", + "EventCode": "0x70", + "BriefDescription": "L2 prefetch hit in L2.", + "UMask": "0xff" + }, + { + "EventName": "l2_pf_miss_l2_hit_l3", + "EventCode": "0x71", + "BriefDescription": "L2 prefetcher hits in L3. Counts all L2 prefetches accepted by the L2 pipeline which miss the L2 cache and hit the L3.", + "UMask": "0xff" + }, + { + "EventName": "l2_pf_miss_l2_l3", + "EventCode": "0x72", + "BriefDescription": "L2 prefetcher misses in L3. All L2 prefetches accepted by the L2 pipeline which miss the L2 and the L3 caches.", + "UMask": "0xff" + }, + { + "EventName": "ic_fw32", + "EventCode": "0x80", + "BriefDescription": "The number of 32B fetch windows transferred from IC pipe to DE instruction decoder (includes non-cacheable and cacheable fill responses)." + }, + { + "EventName": "ic_fw32_miss", + "EventCode": "0x81", + "BriefDescription": "The number of 32B fetch windows tried to read the L1 IC and missed in the full tag." + }, + { + "EventName": "ic_cache_fill_l2", + "EventCode": "0x82", + "BriefDescription": "The number of 64 byte instruction cache line was fulfilled from the L2 cache." + }, + { + "EventName": "ic_cache_fill_sys", + "EventCode": "0x83", + "BriefDescription": "The number of 64 byte instruction cache line fulfilled from system memory or another cache." + }, + { + "EventName": "bp_l1_tlb_miss_l2_hit", + "EventCode": "0x84", + "BriefDescription": "The number of instruction fetches that miss in the L1 ITLB but hit in the L2 ITLB." + }, + { + "EventName": "bp_l1_tlb_miss_l2_tlb_miss", + "EventCode": "0x85", + "BriefDescription": "The number of instruction fetches that miss in both the L1 and L2 TLBs.", + "UMask": "0xff" + }, + { + "EventName": "bp_l1_tlb_miss_l2_tlb_miss.if1g", + "EventCode": "0x85", + "BriefDescription": "The number of instruction fetches that miss in both the L1 and L2 TLBs. Instruction fetches to a 1GB page.", + "UMask": "0x4" + }, + { + "EventName": "bp_l1_tlb_miss_l2_tlb_miss.if2m", + "EventCode": "0x85", + "BriefDescription": "The number of instruction fetches that miss in both the L1 and L2 TLBs. Instruction fetches to a 2MB page.", + "UMask": "0x2" + }, + { + "EventName": "bp_l1_tlb_miss_l2_tlb_miss.if4k", + "EventCode": "0x85", + "BriefDescription": "The number of instruction fetches that miss in both the L1 and L2 TLBs. Instruction fetches to a 4KB page.", + "UMask": "0x1" + }, + { + "EventName": "bp_snp_re_sync", + "EventCode": "0x86", + "BriefDescription": "The number of pipeline restarts caused by invalidating probes that hit on the instruction stream currently being executed. This would happen if the active instruction stream was being modified by another processor in an MP system - typically a highly unlikely event." + }, + { + "EventName": "ic_fetch_stall.ic_stall_any", + "EventCode": "0x87", + "BriefDescription": "Instruction Pipe Stall. IC pipe was stalled during this clock cycle for any reason (nothing valid in pipe ICM1).", + "UMask": "0x4" + }, + { + "EventName": "ic_fetch_stall.ic_stall_dq_empty", + "EventCode": "0x87", + "BriefDescription": "Instruction Pipe Stall. IC pipe was stalled during this clock cycle (including IC to OC fetches) due to DQ empty.", + "UMask": "0x2" + }, + { + "EventName": "ic_fetch_stall.ic_stall_back_pressure", + "EventCode": "0x87", + "BriefDescription": "Instruction Pipe Stall. IC pipe was stalled during this clock cycle (including IC to OC fetches) due to back-pressure.", + "UMask": "0x1" + }, + { + "EventName": "ic_cache_inval.l2_invalidating_probe", + "EventCode": "0x8c", + "BriefDescription": "IC line invalidated due to L2 invalidating probe (external or LS). The number of instruction cache lines invalidated. A non-SMC event is CMC (cross modifying code), either from the other thread of the core or another core.", + "UMask": "0x2" + }, + { + "EventName": "ic_cache_inval.fill_invalidated", + "EventCode": "0x8c", + "BriefDescription": "IC line invalidated due to overwriting fill response. The number of instruction cache lines invalidated. A non-SMC event is CMC (cross modifying code), either from the other thread of the core or another core.", + "UMask": "0x1" + }, + { + "EventName": "ic_oc_mode_switch.oc_ic_mode_switch", + "EventCode": "0x28a", + "BriefDescription": "OC Mode Switch. OC to IC mode switch.", + "UMask": "0x2" + }, + { + "EventName": "ic_oc_mode_switch.ic_oc_mode_switch", + "EventCode": "0x28a", + "BriefDescription": "OC Mode Switch. IC to OC mode switch.", + "UMask": "0x1" + }, + { + "EventName": "l3_request_g1.caching_l3_cache_accesses", + "EventCode": "0x01", + "BriefDescription": "Caching: L3 cache accesses", + "UMask": "0x80", + "Unit": "L3PMC" + }, + { + "EventName": "l3_lookup_state.all_l3_req_typs", + "EventCode": "0x04", + "BriefDescription": "All L3 Request Types", + "UMask": "0xff", + "Unit": "L3PMC" + }, + { + "EventName": "l3_comb_clstr_state.other_l3_miss_typs", + "EventCode": "0x06", + "BriefDescription": "Other L3 Miss Request Types", + "UMask": "0xfe", + "Unit": "L3PMC" + }, + { + "EventName": "l3_comb_clstr_state.request_miss", + "EventCode": "0x06", + "BriefDescription": "L3 cache misses", + "UMask": "0x01", + "Unit": "L3PMC" + }, + { + "EventName": "xi_sys_fill_latency", + "EventCode": "0x90", + "BriefDescription": "L3 Cache Miss Latency. Total cycles for all transactions divided by 16. Ignores SliceMask and ThreadMask.", + "UMask": "0x00", + "Unit": "L3PMC" + }, + { + "EventName": "xi_ccx_sdp_req1.all_l3_miss_req_typs", + "EventCode": "0x9A", + "BriefDescription": "All L3 Miss Request Types. Ignores SliceMask and ThreadMask.", + "UMask": "0x3f", + "Unit": "L3PMC" + } +] diff --git a/tools/perf/pmu-events/arch/x86/amdzen2/core.json b/tools/perf/pmu-events/arch/x86/amdzen2/core.json new file mode 100644 index 000000000000..de89e5a44ff1 --- /dev/null +++ b/tools/perf/pmu-events/arch/x86/amdzen2/core.json @@ -0,0 +1,130 @@ +[ + { + "EventName": "ex_ret_instr", + "EventCode": "0xc0", + "BriefDescription": "Retired Instructions." + }, + { + "EventName": "ex_ret_cops", + "EventCode": "0xc1", + "BriefDescription": "Retired Uops.", + "PublicDescription": "The number of micro-ops retired. This count includes all processor activity (instructions, exceptions, interrupts, microcode assists, etc.). The number of events logged per cycle can vary from 0 to 8." + }, + { + "EventName": "ex_ret_brn", + "EventCode": "0xc2", + "BriefDescription": "Retired Branch Instructions.", + "PublicDescription": "The number of branch instructions retired. This includes all types of architectural control flow changes, including exceptions and interrupts." + }, + { + "EventName": "ex_ret_brn_misp", + "EventCode": "0xc3", + "BriefDescription": "Retired Branch Instructions Mispredicted.", + "PublicDescription": "The number of branch instructions retired, of any type, that were not correctly predicted. This includes those for which prediction is not attempted (far control transfers, exceptions and interrupts)." + }, + { + "EventName": "ex_ret_brn_tkn", + "EventCode": "0xc4", + "BriefDescription": "Retired Taken Branch Instructions.", + "PublicDescription": "The number of taken branches that were retired. This includes all types of architectural control flow changes, including exceptions and interrupts." + }, + { + "EventName": "ex_ret_brn_tkn_misp", + "EventCode": "0xc5", + "BriefDescription": "Retired Taken Branch Instructions Mispredicted.", + "PublicDescription": "The number of retired taken branch instructions that were mispredicted." + }, + { + "EventName": "ex_ret_brn_far", + "EventCode": "0xc6", + "BriefDescription": "Retired Far Control Transfers.", + "PublicDescription": "The number of far control transfers retired including far call/jump/return, IRET, SYSCALL and SYSRET, plus exceptions and interrupts. Far control transfers are not subject to branch prediction." + }, + { + "EventName": "ex_ret_brn_resync", + "EventCode": "0xc7", + "BriefDescription": "Retired Branch Resyncs.", + "PublicDescription": "The number of resync branches. These reflect pipeline restarts due to certain microcode assists and events such as writes to the active instruction stream, among other things. Each occurrence reflects a restart penalty similar to a branch mispredict. This is relatively rare." + }, + { + "EventName": "ex_ret_near_ret", + "EventCode": "0xc8", + "BriefDescription": "Retired Near Returns.", + "PublicDescription": "The number of near return instructions (RET or RET Iw) retired." + }, + { + "EventName": "ex_ret_near_ret_mispred", + "EventCode": "0xc9", + "BriefDescription": "Retired Near Returns Mispredicted.", + "PublicDescription": "The number of near returns retired that were not correctly predicted by the return address predictor. Each such mispredict incurs the same penalty as a mispredicted conditional branch instruction." + }, + { + "EventName": "ex_ret_brn_ind_misp", + "EventCode": "0xca", + "BriefDescription": "Retired Indirect Branch Instructions Mispredicted." + }, + { + "EventName": "ex_ret_mmx_fp_instr.sse_instr", + "EventCode": "0xcb", + "BriefDescription": "SSE instructions (SSE, SSE2, SSE3, SSSE3, SSE4A, SSE41, SSE42, AVX).", + "PublicDescription": "The number of MMX, SSE or x87 instructions retired. The UnitMask allows the selection of the individual classes of instructions as given in the table. Each increment represents one complete instruction. Since this event includes non-numeric instructions it is not suitable for measuring MFLOPS. SSE instructions (SSE, SSE2, SSE3, SSSE3, SSE4A, SSE41, SSE42, AVX).", + "UMask": "0x4" + }, + { + "EventName": "ex_ret_mmx_fp_instr.mmx_instr", + "EventCode": "0xcb", + "BriefDescription": "MMX instructions.", + "PublicDescription": "The number of MMX, SSE or x87 instructions retired. The UnitMask allows the selection of the individual classes of instructions as given in the table. Each increment represents one complete instruction. Since this event includes non-numeric instructions it is not suitable for measuring MFLOPS. MMX instructions.", + "UMask": "0x2" + }, + { + "EventName": "ex_ret_mmx_fp_instr.x87_instr", + "EventCode": "0xcb", + "BriefDescription": "x87 instructions.", + "PublicDescription": "The number of MMX, SSE or x87 instructions retired. The UnitMask allows the selection of the individual classes of instructions as given in the table. Each increment represents one complete instruction. Since this event includes non-numeric instructions it is not suitable for measuring MFLOPS. x87 instructions.", + "UMask": "0x1" + }, + { + "EventName": "ex_ret_cond", + "EventCode": "0xd1", + "BriefDescription": "Retired Conditional Branch Instructions." + }, + { + "EventName": "ex_ret_cond_misp", + "EventCode": "0xd2", + "BriefDescription": "Retired Conditional Branch Instructions Mispredicted." + }, + { + "EventName": "ex_div_busy", + "EventCode": "0xd3", + "BriefDescription": "Div Cycles Busy count." + }, + { + "EventName": "ex_div_count", + "EventCode": "0xd4", + "BriefDescription": "Div Op Count." + }, + { + "EventName": "ex_tagged_ibs_ops.ibs_count_rollover", + "EventCode": "0x1cf", + "BriefDescription": "Tagged IBS Ops. Number of times an op could not be tagged by IBS because of a previous tagged op that has not retired.", + "UMask": "0x4" + }, + { + "EventName": "ex_tagged_ibs_ops.ibs_tagged_ops_ret", + "EventCode": "0x1cf", + "BriefDescription": "Tagged IBS Ops. Number of Ops tagged by IBS that retired.", + "UMask": "0x2" + }, + { + "EventName": "ex_tagged_ibs_ops.ibs_tagged_ops", + "EventCode": "0x1cf", + "BriefDescription": "Tagged IBS Ops. Number of Ops tagged by IBS.", + "UMask": "0x1" + }, + { + "EventName": "ex_ret_fus_brnch_inst", + "EventCode": "0x1d0", + "BriefDescription": "Retired Fused Instructions. The number of fuse-branch instructions retired per cycle. The number of events logged per cycle can vary from 0-8.", + } +] diff --git a/tools/perf/pmu-events/arch/x86/amdzen2/floating-point.json b/tools/perf/pmu-events/arch/x86/amdzen2/floating-point.json new file mode 100644 index 000000000000..622a0c420e46 --- /dev/null +++ b/tools/perf/pmu-events/arch/x86/amdzen2/floating-point.json @@ -0,0 +1,140 @@ +[ + { + "EventName": "fpu_pipe_assignment.total", + "EventCode": "0x00", + "BriefDescription": "Total number of fp uOps.", + "PublicDescription": "Total number of fp uOps. The number of operations (uOps) dispatched to each of the 4 FPU execution pipelines. This event reflects how busy the FPU pipelines are and may be used for workload characterization. This includes all operations performed by x87, MMX, and SSE instructions, including moves. Each increment represents a one- cycle dispatch event. This event is a speculative event. Since this event includes non-numeric operations it is not suitable for measuring MFLOPS.", + "UMask": "0xf" + }, + { + "EventName": "fpu_pipe_assignment.total3", + "EventCode": "0x00", + "BriefDescription": "Total number uOps assigned to pipe 3.", + "PublicDescription": "The number of operations (uOps) dispatched to each of the 4 FPU execution pipelines. This event reflects how busy the FPU pipelines are and may be used for workload characterization. This includes all operations performed by x87, MMX, and SSE instructions, including moves. Each increment represents a one-cycle dispatch event. This event is a speculative event. Since this event includes non-numeric operations it is not suitable for measuring MFLOPS. Total number uOps assigned to pipe 3.", + "UMask": "0x8" + }, + { + "EventName": "fpu_pipe_assignment.total2", + "EventCode": "0x00", + "BriefDescription": "Total number uOps assigned to pipe 2.", + "PublicDescription": "The number of operations (uOps) dispatched to each of the 4 FPU execution pipelines. This event reflects how busy the FPU pipelines are and may be used for workload characterization. This includes all operations performed by x87, MMX, and SSE instructions, including moves. Each increment represents a one- cycle dispatch event. This event is a speculative event. Since this event includes non-numeric operations it is not suitable for measuring MFLOPS. Total number uOps assigned to pipe 2.", + "UMask": "0x4" + }, + { + "EventName": "fpu_pipe_assignment.total1", + "EventCode": "0x00", + "BriefDescription": "Total number uOps assigned to pipe 1.", + "PublicDescription": "The number of operations (uOps) dispatched to each of the 4 FPU execution pipelines. This event reflects how busy the FPU pipelines are and may be used for workload characterization. This includes all operations performed by x87, MMX, and SSE instructions, including moves. Each increment represents a one- cycle dispatch event. This event is a speculative event. Since this event includes non-numeric operations it is not suitable for measuring MFLOPS. Total number uOps assigned to pipe 1.", + "UMask": "0x2" + }, + { + "EventName": "fpu_pipe_assignment.total0", + "EventCode": "0x00", + "BriefDescription": "Total number of fp uOps on pipe 0.", + "PublicDescription": "The number of operations (uOps) dispatched to each of the 4 FPU execution pipelines. This event reflects how busy the FPU pipelines are and may be used for workload characterization. This includes all operations performed by x87, MMX, and SSE instructions, including moves. Each increment represents a one- cycle dispatch event. This event is a speculative event. Since this event includes non-numeric operations it is not suitable for measuring MFLOPS. Total number uOps assigned to pipe 0.", + "UMask": "0x1" + }, + { + "EventName": "fp_ret_sse_avx_ops.all", + "EventCode": "0x03", + "BriefDescription": "All FLOPS. This is a retire-based event. The number of retired SSE/AVX FLOPS. The number of events logged per cycle can vary from 0 to 64. This event can count above 15.", + "UMask": "0xff" + }, + { + "EventName": "fp_ret_sse_avx_ops.mac_flops", + "EventCode": "0x03", + "BriefDescription": "Multiply-add FLOPS. Multiply-add counts as 2 FLOPS. This is a retire-based event. The number of retired SSE/AVX FLOPS. The number of events logged per cycle can vary from 0 to 64. This event can count above 15.", + "PublicDescription": "", + "UMask": "0x8" + }, + { + "EventName": "fp_ret_sse_avx_ops.div_flops", + "EventCode": "0x03", + "BriefDescription": "Divide/square root FLOPS. This is a retire-based event. The number of retired SSE/AVX FLOPS. The number of events logged per cycle can vary from 0 to 64. This event can count above 15.", + "UMask": "0x4" + }, + { + "EventName": "fp_ret_sse_avx_ops.mult_flops", + "EventCode": "0x03", + "BriefDescription": "Multiply FLOPS. This is a retire-based event. The number of retired SSE/AVX FLOPS. The number of events logged per cycle can vary from 0 to 64. This event can count above 15.", + "UMask": "0x2" + }, + { + "EventName": "fp_ret_sse_avx_ops.add_sub_flops", + "EventCode": "0x03", + "BriefDescription": "Add/subtract FLOPS. This is a retire-based event. The number of retired SSE/AVX FLOPS. The number of events logged per cycle can vary from 0 to 64. This event can count above 15.", + "UMask": "0x1" + }, + { + "EventName": "fp_num_mov_elim_scal_op.optimized", + "EventCode": "0x04", + "BriefDescription": "Number of Scalar Ops optimized. This is a dispatch based speculative event, and is useful for measuring the effectiveness of the Move elimination and Scalar code optimization schemes.", + "UMask": "0x8" + }, + { + "EventName": "fp_num_mov_elim_scal_op.opt_potential", + "EventCode": "0x04", + "BriefDescription": "Number of Ops that are candidates for optimization (have Z-bit either set or pass). This is a dispatch based speculative event, and is useful for measuring the effectiveness of the Move elimination and Scalar code optimization schemes.", + "UMask": "0x4" + }, + { + "EventName": "fp_num_mov_elim_scal_op.sse_mov_ops_elim", + "EventCode": "0x04", + "BriefDescription": "Number of SSE Move Ops eliminated. This is a dispatch based speculative event, and is useful for measuring the effectiveness of the Move elimination and Scalar code optimization schemes.", + "UMask": "0x2" + }, + { + "EventName": "fp_num_mov_elim_scal_op.sse_mov_ops", + "EventCode": "0x04", + "BriefDescription": "Number of SSE Move Ops. This is a dispatch based speculative event, and is useful for measuring the effectiveness of the Move elimination and Scalar code optimization schemes.", + "UMask": "0x1" + }, + { + "EventName": "fp_retired_ser_ops.sse_bot_ret", + "EventCode": "0x05", + "BriefDescription": "SSE bottom-executing uOps retired. The number of serializing Ops retired.", + "UMask": "0x8" + }, + { + "EventName": "fp_retired_ser_ops.sse_ctrl_ret", + "EventCode": "0x05", + "BriefDescription": "The number of serializing Ops retired. SSE control word mispredict traps due to mispredictions in RC, FTZ or DAZ, or changes in mask bits.", + "UMask": "0x4" + }, + { + "EventName": "fp_retired_ser_ops.x87_bot_ret", + "EventCode": "0x05", + "BriefDescription": "x87 bottom-executing uOps retired. The number of serializing Ops retired.", + "UMask": "0x2" + }, + { + "EventName": "fp_retired_ser_ops.x87_ctrl_ret", + "EventCode": "0x05", + "BriefDescription": "x87 control word mispredict traps due to mispredictions in RC or PC, or changes in mask bits. The number of serializing Ops retired.", + "UMask": "0x1" + }, + { + "EventName": "fp_disp_faults.ymm_spill_fault", + "EventCode": "0x0e", + "BriefDescription": "Floating Point Dispatch Faults. YMM spill fault.", + "UMask": "0x8" + }, + { + "EventName": "fp_disp_faults.ymm_fill_fault", + "EventCode": "0x0e", + "BriefDescription": "Floating Point Dispatch Faults. YMM fill fault.", + "UMask": "0x4" + }, + { + "EventName": "fp_disp_faults.xmm_fill_fault", + "EventCode": "0x0e", + "BriefDescription": "Floating Point Dispatch Faults. XMM fill fault.", + "UMask": "0x2" + }, + { + "EventName": "fp_disp_faults.x87_fill_fault", + "EventCode": "0x0e", + "BriefDescription": "Floating Point Dispatch Faults. x87 fill fault.", + "UMask": "0x1" + } +] diff --git a/tools/perf/pmu-events/arch/x86/amdzen2/memory.json b/tools/perf/pmu-events/arch/x86/amdzen2/memory.json new file mode 100644 index 000000000000..715046b339cb --- /dev/null +++ b/tools/perf/pmu-events/arch/x86/amdzen2/memory.json @@ -0,0 +1,341 @@ +[ + { + "EventName": "ls_bad_status2.stli_other", + "EventCode": "0x24", + "BriefDescription": "Non-forwardable conflict; used to reduce STLI's via software. All reasons. Store To Load Interlock (STLI) are loads that were unable to complete because of a possible match with an older store, and the older store could not do STLF for some reason.", + "PublicDescription" : "Store-to-load conflicts: A load was unable to complete due to a non-forwardable conflict with an older store. Most commonly, a load's address range partially but not completely overlaps with an uncompleted older store. Software can avoid this problem by using same-size and same-alignment loads and stores when accessing the same data. Vector/SIMD code is particularly susceptible to this problem; software should construct wide vector stores by manipulating vector elements in registers using shuffle/blend/swap instructions prior to storing to memory, instead of using narrow element-by-element stores.", + "UMask": "0x2" + }, + { + "EventName": "ls_locks.spec_lock_hi_spec", + "EventCode": "0x25", + "BriefDescription": "Retired lock instructions. High speculative cacheable lock speculation succeeded.", + "UMask": "0x8" + }, + { + "EventName": "ls_locks.spec_lock_lo_spec", + "EventCode": "0x25", + "BriefDescription": "Retired lock instructions. Low speculative cacheable lock speculation succeeded.", + "UMask": "0x4" + }, + { + "EventName": "ls_locks.non_spec_lock", + "EventCode": "0x25", + "BriefDescription": "Retired lock instructions. Non-speculative lock succeeded.", + "UMask": "0x2" + }, + { + "EventName": "ls_locks.bus_lock", + "EventCode": "0x25", + "BriefDescription": "Retired lock instructions. Bus lock when a locked operations crosses a cache boundary or is done on an uncacheable memory type. Comparable to legacy bus lock.", + "UMask": "0x1" + }, + { + "EventName": "ls_ret_cl_flush", + "EventCode": "0x26", + "BriefDescription": "Number of retired CLFLUSH instructions." + }, + { + "EventName": "ls_ret_cpuid", + "EventCode": "0x27", + "BriefDescription": "Number of retired CPUID instructions." + }, + { + "EventName": "ls_dispatch.ld_st_dispatch", + "EventCode": "0x29", + "BriefDescription": "Dispatch of a single op that performs a load from and store to the same memory address. Number of single ops that do load/store to an address.", + "UMask": "0x4" + }, + { + "EventName": "ls_dispatch.store_dispatch", + "EventCode": "0x29", + "BriefDescription": "Number of stores dispatched. Counts the number of operations dispatched to the LS unit. Unit Masks ADDed.", + "UMask": "0x2" + }, + { + "EventName": "ls_dispatch.ld_dispatch", + "EventCode": "0x29", + "BriefDescription": "Number of loads dispatched. Counts the number of operations dispatched to the LS unit. Unit Masks ADDed.", + "UMask": "0x1" + }, + { + "EventName": "ls_smi_rx", + "EventCode": "0x2B", + "BriefDescription": "Number of SMIs received." + }, + { + "EventName": "ls_int_taken", + "EventCode": "0x2C", + "BriefDescription": "Number of interrupts taken." + }, + { + "EventName": "ls_rdtsc", + "EventCode": "0x2D", + "BriefDescription": "Number of reads of the TSC (RDTSC instructions). The count is speculative." + }, + { + "EventName": "ls_stlf", + "EventCode": "0x35", + "BriefDescription": "Number of STLF hits." + }, + { + "EventName": "ls_st_commit_cancel2.st_commit_cancel_wcb_full", + "EventCode": "0x37", + "BriefDescription": "A non-cacheable store and the non-cacheable commit buffer is full." + }, + { + "EventName": "ls_dc_accesses", + "EventCode": "0x40", + "BriefDescription": "Number of accesses to the dcache for load/store references.", + "PublicDescription": "The number of accesses to the data cache for load and store references. This may include certain microcode scratchpad accesses, although these are generally rare. Each increment represents an eight-byte access, although the instruction may only be accessing a portion of that. This event is a speculative event." + }, + { + "EventName": "ls_mab_alloc.dc_prefetcher", + "EventCode": "0x41", + "BriefDescription": "LS MAB Allocates by Type. DC prefetcher.", + "UMask": "0x8" + }, + { + "EventName": "ls_mab_alloc.stores", + "EventCode": "0x41", + "BriefDescription": "LS MAB Allocates by Type. Stores.", + "UMask": "0x2" + }, + { + "EventName": "ls_mab_alloc.loads", + "EventCode": "0x41", + "BriefDescription": "LS MAB Allocates by Type. Loads.", + "UMask": "0x1" + }, + { + "EventName": "ls_refills_from_sys.ls_mabresp_rmt_dram", + "EventCode": "0x43", + "BriefDescription": "Demand Data Cache Fills by Data Source. DRAM or IO from different die.", + "UMask": "0x40" + }, + { + "EventName": "ls_refills_from_sys.ls_mabresp_rmt_cache", + "EventCode": "0x43", + "BriefDescription": "Demand Data Cache Fills by Data Source. Hit in cache; Remote CCX and the address's Home Node is on a different die.", + "UMask": "0x10" + }, + { + "EventName": "ls_refills_from_sys.ls_mabresp_lcl_dram", + "EventCode": "0x43", + "BriefDescription": "Demand Data Cache Fills by Data Source. DRAM or IO from this thread's die.", + "UMask": "0x8" + }, + { + "EventName": "ls_refills_from_sys.ls_mabresp_lcl_cache", + "EventCode": "0x43", + "BriefDescription": "Demand Data Cache Fills by Data Source. Hit in cache; local CCX (not Local L2), or Remote CCX and the address's Home Node is on this thread's die.", + "UMask": "0x2" + }, + { + "EventName": "ls_refills_from_sys.ls_mabresp_lcl_l2", + "EventCode": "0x43", + "BriefDescription": "Demand Data Cache Fills by Data Source. Local L2 hit.", + "UMask": "0x1" + }, + { + "EventName": "ls_l1_d_tlb_miss.all", + "EventCode": "0x45", + "BriefDescription": "All L1 DTLB Misses or Reloads.", + "UMask": "0xff" + }, + { + "EventName": "ls_l1_d_tlb_miss.tlb_reload_1g_l2_miss", + "EventCode": "0x45", + "BriefDescription": "L1 DTLB Miss. DTLB reload to a 1G page that miss in the L2 TLB.", + "UMask": "0x80" + }, + { + "EventName": "ls_l1_d_tlb_miss.tlb_reload_2m_l2_miss", + "EventCode": "0x45", + "BriefDescription": "L1 DTLB Miss. DTLB reload to a 2M page that miss in the L2 TLB.", + "UMask": "0x40" + }, + { + "EventName": "ls_l1_d_tlb_miss.tlb_reload_coalesced_page_miss", + "EventCode": "0x45", + "BriefDescription": "L1 DTLB Miss. DTLB reload coalesced page miss.", + "UMask": "0x20" + }, + { + "EventName": "ls_l1_d_tlb_miss.tlb_reload_4k_l2_miss", + "EventCode": "0x45", + "BriefDescription": "L1 DTLB Miss. DTLB reload to a 4K page that miss the L2 TLB.", + "UMask": "0x10" + }, + { + "EventName": "ls_l1_d_tlb_miss.tlb_reload_1g_l2_hit", + "EventCode": "0x45", + "BriefDescription": "L1 DTLB Miss. DTLB reload to a 1G page that hit in the L2 TLB.", + "UMask": "0x8" + }, + { + "EventName": "ls_l1_d_tlb_miss.tlb_reload_2m_l2_hit", + "EventCode": "0x45", + "BriefDescription": "L1 DTLB Miss. DTLB reload to a 2M page that hit in the L2 TLB.", + "UMask": "0x4" + }, + { + "EventName": "ls_l1_d_tlb_miss.tlb_reload_coalesced_page_hit", + "EventCode": "0x45", + "BriefDescription": "L1 DTLB Miss. DTLB reload hit a coalesced page.", + "UMask": "0x2" + }, + { + "EventName": "ls_l1_d_tlb_miss.tlb_reload_4k_l2_hit", + "EventCode": "0x45", + "BriefDescription": "L1 DTLB Miss. DTLB reload to a 4K page that hit in the L2 TLB.", + "UMask": "0x1" + }, + { + "EventName": "ls_tablewalker.iside", + "EventCode": "0x46", + "BriefDescription": "Total Page Table Walks on I-side.", + "UMask": "0xc" + }, + { + "EventName": "ls_tablewalker.ic_type1", + "EventCode": "0x46", + "BriefDescription": "Total Page Table Walks IC Type 1.", + "UMask": "0x8" + }, + { + "EventName": "ls_tablewalker.ic_type0", + "EventCode": "0x46", + "BriefDescription": "Total Page Table Walks IC Type 0.", + "UMask": "0x4" + }, + { + "EventName": "ls_tablewalker.dside", + "EventCode": "0x46", + "BriefDescription": "Total Page Table Walks on D-side.", + "UMask": "0x3" + }, + { + "EventName": "ls_tablewalker.dc_type1", + "EventCode": "0x46", + "BriefDescription": "Total Page Table Walks DC Type 1.", + "UMask": "0x2" + }, + { + "EventName": "ls_tablewalker.dc_type0", + "EventCode": "0x46", + "BriefDescription": "Total Page Table Walks DC Type 0.", + "UMask": "0x1" + }, + { + "EventName": "ls_misal_accesses", + "EventCode": "0x47", + "BriefDescription": "Misaligned loads." + }, + { + "EventName": "ls_pref_instr_disp", + "EventCode": "0x4b", + "BriefDescription": "Software Prefetch Instructions Dispatched (Speculative).", + "UMask": "0xff" + }, + { + "EventName": "ls_pref_instr_disp.prefetch_nta", + "EventCode": "0x4b", + "BriefDescription": "Software Prefetch Instructions Dispatched (Speculative). PrefetchNTA instruction. See docAPM3 PREFETCHlevel.", + "UMask": "0x4" + }, + { + "EventName": "ls_pref_instr_disp.prefetch_w", + "EventCode": "0x4b", + "BriefDescription": "Software Prefetch Instructions Dispatched (Speculative). See docAPM3 PREFETCHW.", + "UMask": "0x2" + }, + { + "EventName": "ls_pref_instr_disp.prefetch", + "EventCode": "0x4b", + "BriefDescription": "Software Prefetch Instructions Dispatched (Speculative). Prefetch_T0_T1_T2. PrefetchT0, T1 and T2 instructions. See docAPM3 PREFETCHlevel.", + "UMask": "0x1" + }, + { + "EventName": "ls_inef_sw_pref.mab_mch_cnt", + "EventCode": "0x52", + "BriefDescription": "The number of software prefetches that did not fetch data outside of the processor core. Software PREFETCH instruction saw a match on an already-allocated miss request buffer.", + "UMask": "0x2" + }, + { + "EventName": "ls_inef_sw_pref.data_pipe_sw_pf_dc_hit", + "EventCode": "0x52", + "BriefDescription": "The number of software prefetches that did not fetch data outside of the processor core. Software PREFETCH instruction saw a DC hit.", + "UMask": "0x1" + }, + { + "EventName": "ls_sw_pf_dc_fill.ls_mabresp_rmt_dram", + "EventCode": "0x59", + "BriefDescription": "Software Prefetch Data Cache Fills by Data Source. From DRAM (home node remote).", + "UMask": "0x40" + }, + { + "EventName": "ls_sw_pf_dc_fill.ls_mabresp_rmt_cache", + "EventCode": "0x59", + "BriefDescription": "Software Prefetch Data Cache Fills by Data Source. From another cache (home node remote).", + "UMask": "0x10" + }, + { + "EventName": "ls_sw_pf_dc_fill.ls_mabresp_lcl_dram", + "EventCode": "0x59", + "BriefDescription": "Software Prefetch Data Cache Fills by Data Source. DRAM or IO from this thread's die. From DRAM (home node local).", + "UMask": "0x8" + }, + { + "EventName": "ls_sw_pf_dc_fill.ls_mabresp_lcl_cache", + "EventCode": "0x59", + "BriefDescription": "Software Prefetch Data Cache Fills by Data Source. From another cache (home node local).", + "UMask": "0x2" + }, + { + "EventName": "ls_sw_pf_dc_fill.ls_mabresp_lcl_l2", + "EventCode": "0x59", + "BriefDescription": "Software Prefetch Data Cache Fills by Data Source. Local L2 hit.", + "UMask": "0x1" + }, + { + "EventName": "ls_hw_pf_dc_fill.ls_mabresp_rmt_dram", + "EventCode": "0x5A", + "BriefDescription": "Hardware Prefetch Data Cache Fills by Data Source. From DRAM (home node remote).", + "UMask": "0x40" + }, + { + "EventName": "ls_hw_pf_dc_fill.ls_mabresp_rmt_cache", + "EventCode": "0x5A", + "BriefDescription": "Hardware Prefetch Data Cache Fills by Data Source. From another cache (home node remote).", + "UMask": "0x10" + }, + { + "EventName": "ls_hw_pf_dc_fill.ls_mabresp_lcl_dram", + "EventCode": "0x5A", + "BriefDescription": "Hardware Prefetch Data Cache Fills by Data Source. From DRAM (home node local).", + "UMask": "0x8" + }, + { + "EventName": "ls_hw_pf_dc_fill.ls_mabresp_lcl_cache", + "EventCode": "0x5A", + "BriefDescription": "Hardware Prefetch Data Cache Fills by Data Source. From another cache (home node local).", + "UMask": "0x2" + }, + { + "EventName": "ls_hw_pf_dc_fill.ls_mabresp_lcl_l2", + "EventCode": "0x5A", + "BriefDescription": "Hardware Prefetch Data Cache Fills by Data Source. Local L2 hit.", + "UMask": "0x1" + }, + { + "EventName": "ls_not_halted_cyc", + "EventCode": "0x76", + "BriefDescription": "Cycles not in Halt." + }, + { + "EventName": "ls_tlb_flush", + "EventCode": "0x78", + "BriefDescription": "All TLB Flushes" + } +] diff --git a/tools/perf/pmu-events/arch/x86/amdzen2/other.json b/tools/perf/pmu-events/arch/x86/amdzen2/other.json new file mode 100644 index 000000000000..e94994d4a60e --- /dev/null +++ b/tools/perf/pmu-events/arch/x86/amdzen2/other.json @@ -0,0 +1,115 @@ +[ + { + "EventName": "de_dis_uop_queue_empty_di0", + "EventCode": "0xa9", + "BriefDescription": "Cycles where the Micro-Op Queue is empty." + }, + { + "EventName": "de_dis_uops_from_decoder", + "EventCode": "0xaa", + "BriefDescription": "Ops dispatched from either the decoders, OpCache or both.", + "UMask": "0xff" + }, + { + "EventName": "de_dis_uops_from_decoder.opcache_dispatched", + "EventCode": "0xaa", + "BriefDescription": "Count of dispatched Ops from OpCache.", + "UMask": "0x2" + }, + { + "EventName": "de_dis_uops_from_decoder.decoder_dispatched", + "EventCode": "0xaa", + "BriefDescription": "Count of dispatched Ops from Decoder.", + "UMask": "0x1" + }, + { + "EventName": "de_dis_dispatch_token_stalls1.fp_misc_rsrc_stall", + "EventCode": "0xae", + "BriefDescription": "Cycles where a dispatch group is valid but does not get dispatched due to a token stall. FP Miscellaneous resource unavailable. Applies to the recovery of mispredicts with FP ops.", + "UMask": "0x80" + }, + { + "EventName": "de_dis_dispatch_token_stalls1.fp_sch_rsrc_stall", + "EventCode": "0xae", + "BriefDescription": "Cycles where a dispatch group is valid but does not get dispatched due to a token stall. FP scheduler resource stall. Applies to ops that use the FP scheduler.", + "UMask": "0x40" + }, + { + "EventName": "de_dis_dispatch_token_stalls1.fp_reg_file_rsrc_stall", + "EventCode": "0xae", + "BriefDescription": "Cycles where a dispatch group is valid but does not get dispatched due to a token stall. Floating point register file resource stall. Applies to all FP ops that have a destination register.", + "UMask": "0x20" + }, + { + "EventName": "de_dis_dispatch_token_stalls1.taken_branch_buffer_rsrc_stall", + "EventCode": "0xae", + "BriefDescription": "Cycles where a dispatch group is valid but does not get dispatched due to a token stall. Taken branch buffer resource stall.", + "UMask": "0x10" + }, + { + "EventName": "de_dis_dispatch_token_stalls1.int_sched_misc_token_stall", + "EventCode": "0xae", + "BriefDescription": "Cycles where a dispatch group is valid but does not get dispatched due to a token stall. Integer Scheduler miscellaneous resource stall.", + "UMask": "0x8" + }, + { + "EventName": "de_dis_dispatch_token_stalls1.store_queue_token_stall", + "EventCode": "0xae", + "BriefDescription": "Cycles where a dispatch group is valid but does not get dispatched due to a token stall. Store queue resource stall. Applies to all ops with store semantics.", + "UMask": "0x4" + }, + { + "EventName": "de_dis_dispatch_token_stalls1.load_queue_token_stall", + "EventCode": "0xae", + "BriefDescription": "Cycles where a dispatch group is valid but does not get dispatched due to a token stall. Load queue resource stall. Applies to all ops with load semantics.", + "UMask": "0x2" + }, + { + "EventName": "de_dis_dispatch_token_stalls1.int_phy_reg_file_token_stall", + "EventCode": "0xae", + "BriefDescription": "Cycles where a dispatch group is valid but does not get dispatched due to a token stall. Integer Physical Register File resource stall. Applies to all ops that have an integer destination register.", + "UMask": "0x1" + }, + { + "EventName": "de_dis_dispatch_token_stalls0.sc_agu_dispatch_stall", + "EventCode": "0xaf", + "BriefDescription": "Cycles where a dispatch group is valid but does not get dispatched due to a token stall. SC AGU dispatch stall.", + "UMask": "0x40" + }, + { + "EventName": "de_dis_dispatch_token_stalls0.retire_token_stall", + "EventCode": "0xaf", + "BriefDescription": "Cycles where a dispatch group is valid but does not get dispatched due to a token stall. RETIRE Tokens unavailable.", + "UMask": "0x20" + }, + { + "EventName": "de_dis_dispatch_token_stalls0.agsq_token_stall", + "EventCode": "0xaf", + "BriefDescription": "Cycles where a dispatch group is valid but does not get dispatched due to a token stall. AGSQ Tokens unavailable.", + "UMask": "0x10" + }, + { + "EventName": "de_dis_dispatch_token_stalls0.alu_token_stall", + "EventCode": "0xaf", + "BriefDescription": "Cycles where a dispatch group is valid but does not get dispatched due to a token stall. ALU tokens total unavailable.", + "UMask": "0x8" + }, + { + "EventName": "de_dis_dispatch_token_stalls0.alsq3_0_token_stall", + "EventCode": "0xaf", + "BriefDescription": "Cycles where a dispatch group is valid but does not get dispatched due to a token stall. ALSQ3_0_TokenStall.", + "UMask": "0x4" + }, + { + "EventName": "de_dis_dispatch_token_stalls0.alsq2_token_stall", + "EventCode": "0xaf", + "BriefDescription": "Cycles where a dispatch group is valid but does not get dispatched due to a token stall. ALSQ 2 Tokens unavailable.", + "UMask": "0x2" + }, + { + "EventName": "de_dis_dispatch_token_stalls0.alsq1_token_stall", + "EventCode": "0xaf", + "BriefDescription": "Cycles where a dispatch group is valid but does not get dispatched due to a token stall. ALSQ 1 Tokens unavailable.", + "UMask": "0x1" + } +] diff --git a/tools/perf/pmu-events/arch/x86/mapfile.csv b/tools/perf/pmu-events/arch/x86/mapfile.csv index 82a9db00125e..244a36e37a3a 100644 --- a/tools/perf/pmu-events/arch/x86/mapfile.csv +++ b/tools/perf/pmu-events/arch/x86/mapfile.csv @@ -37,3 +37,4 @@ GenuineIntel-6-7D,v1,icelake,core GenuineIntel-6-7E,v1,icelake,core GenuineIntel-6-86,v1,tremontx,core AuthenticAMD-23-([12][0-9A-F]|[0-9A-F]),v1,amdzen1,core +AuthenticAMD-23-[[:xdigit:]]+,v1,amdzen2,core -- cgit v1.2.3-70-g09d2 From b5b8a7cf141acba6588d2a43cda0dd258299f902 Mon Sep 17 00:00:00 2001 From: Vijay Thakkar <vijaythakkar@me.com> Date: Wed, 18 Mar 2020 15:00:02 -0400 Subject: perf vendor events amd: Update Zen1 events to V2 MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit This patch updates the PMCs for AMD Zen1 core based processors (Family 17h; Models 0 through 2F) to be in accordance with PMCs as documented in the latest versions of the AMD Processor Programming Reference [1], [2] and [3]. Note that some events, such as FPU pipe assignment are missing in [1], and therefore [3] is included for full coverage of events. PMCs added: fpu_pipe_assignment.dual{0|1|2|3} fpu_pipe_assignment.total{0|1|2|3} ls_mab_alloc.dc_prefetcher ls_mab_alloc.stores ls_mab_alloc.loads bp_dyn_ind_pred bp_de_redirect PMC removed: ex_ret_cond_misp Cumulative counts, fpu_pipe_assignment.total and fpu_pipe_assignment.dual, existed in v1, but did expose port-level counters. ex_ret_cond_misp has been removed as it has been removed from the latest versions of the PPR, and when tested, always seems to sample zero as tested on a Ryzen 3400G system. [1]: Processor Programming Reference (PPR) for AMD Family 17h Models 01h,08h, Revision B2 Processors, 54945 Rev 3.03 - Jun 14, 2019. [2]: Processor Programming Reference (PPR) for AMD Family 17h Model 18h, Revision B1 Processors, 55570-B1 Rev 3.14 - Sep 26, 2019. [3]: OSRR for AMD Family 17h processors, Models 00h-2Fh, 56255 Rev 3.03 - July, 2018 All of the PPRs can be found at: https://bugzilla.kernel.org/show_bug.cgi?id=206537 Signed-off-by: Vijay Thakkar <vijaythakkar@me.com> Acked-by: Kim Phillips <kim.phillips@amd.com> Cc: Alexander Shishkin <alexander.shishkin@linux.intel.com> Cc: Jiri Olsa <jolsa@redhat.com> Cc: Jon Grimm <jon.grimm@amd.com> Cc: Martin Liška <mliska@suse.cz> Cc: Namhyung Kim <namhyung@kernel.org> Cc: Peter Zijlstra <peterz@infradead.org> Cc: vijay thakkar <vijaythakkar@me.com> Link: http://lore.kernel.org/lkml/20200318190002.307290-4-vijaythakkar@me.com Signed-off-by: Arnaldo Carvalho de Melo <acme@redhat.com> --- tools/perf/pmu-events/arch/x86/amdzen1/branch.json | 11 +++ tools/perf/pmu-events/arch/x86/amdzen1/cache.json | 107 +++++++-------------- tools/perf/pmu-events/arch/x86/amdzen1/core.json | 15 +-- .../arch/x86/amdzen1/floating-point.json | 64 +++++++++++- tools/perf/pmu-events/arch/x86/amdzen1/memory.json | 82 ++++++++++------ tools/perf/pmu-events/arch/x86/amdzen1/other.json | 27 ++---- tools/perf/pmu-events/arch/x86/mapfile.csv | 2 +- 7 files changed, 172 insertions(+), 136 deletions(-) diff --git a/tools/perf/pmu-events/arch/x86/amdzen1/branch.json b/tools/perf/pmu-events/arch/x86/amdzen1/branch.json index 93ddfd8053ca..a9943eeb8d6b 100644 --- a/tools/perf/pmu-events/arch/x86/amdzen1/branch.json +++ b/tools/perf/pmu-events/arch/x86/amdzen1/branch.json @@ -8,5 +8,16 @@ "EventName": "bp_l2_btb_correct", "EventCode": "0x8b", "BriefDescription": "L2 BTB Correction." + }, + { + "EventName": "bp_dyn_ind_pred", + "EventCode": "0x8e", + "BriefDescription": "Dynamic Indirect Predictions.", + "PublicDescription": "Indirect Branch Prediction for potential multi-target branch (speculative)." + }, + { + "EventName": "bp_de_redirect", + "EventCode": "0x91", + "BriefDescription": "Decoder Overrides Existing Branch Prediction (speculative)." } ] diff --git a/tools/perf/pmu-events/arch/x86/amdzen1/cache.json b/tools/perf/pmu-events/arch/x86/amdzen1/cache.json index 6221a840fcea..404d4c569c01 100644 --- a/tools/perf/pmu-events/arch/x86/amdzen1/cache.json +++ b/tools/perf/pmu-events/arch/x86/amdzen1/cache.json @@ -37,36 +37,31 @@ { "EventName": "ic_fetch_stall.ic_stall_any", "EventCode": "0x87", - "BriefDescription": "IC pipe was stalled during this clock cycle for any reason (nothing valid in pipe ICM1).", - "PublicDescription": "Instruction Pipe Stall. IC pipe was stalled during this clock cycle for any reason (nothing valid in pipe ICM1).", + "BriefDescription": "Instruction Pipe Stall. IC pipe was stalled during this clock cycle for any reason (nothing valid in pipe ICM1).", "UMask": "0x4" }, { "EventName": "ic_fetch_stall.ic_stall_dq_empty", "EventCode": "0x87", - "BriefDescription": "IC pipe was stalled during this clock cycle (including IC to OC fetches) due to DQ empty.", - "PublicDescription": "Instruction Pipe Stall. IC pipe was stalled during this clock cycle (including IC to OC fetches) due to DQ empty.", + "BriefDescription": "Instruction Pipe Stall. IC pipe was stalled during this clock cycle (including IC to OC fetches) due to DQ empty.", "UMask": "0x2" }, { "EventName": "ic_fetch_stall.ic_stall_back_pressure", "EventCode": "0x87", - "BriefDescription": "IC pipe was stalled during this clock cycle (including IC to OC fetches) due to back-pressure.", - "PublicDescription": "Instruction Pipe Stall. IC pipe was stalled during this clock cycle (including IC to OC fetches) due to back-pressure.", + "BriefDescription": "Instruction Pipe Stall. IC pipe was stalled during this clock cycle (including IC to OC fetches) due to back-pressure.", "UMask": "0x1" }, { "EventName": "ic_cache_inval.l2_invalidating_probe", "EventCode": "0x8c", - "BriefDescription": "IC line invalidated due to L2 invalidating probe (external or LS).", - "PublicDescription": "The number of instruction cache lines invalidated. A non-SMC event is CMC (cross modifying code), either from the other thread of the core or another core. IC line invalidated due to L2 invalidating probe (external or LS).", + "BriefDescription": "IC line invalidated due to L2 invalidating probe (external or LS). The number of instruction cache lines invalidated. A non-SMC event is CMC (cross modifying code), either from the other thread of the core or another core.", "UMask": "0x2" }, { "EventName": "ic_cache_inval.fill_invalidated", "EventCode": "0x8c", - "BriefDescription": "IC line invalidated due to overwriting fill response.", - "PublicDescription": "The number of instruction cache lines invalidated. A non-SMC event is CMC (cross modifying code), either from the other thread of the core or another core. IC line invalidated due to overwriting fill response.", + "BriefDescription": "IC line invalidated due to overwriting fill response. The number of instruction cache lines invalidated. A non-SMC event is CMC (cross modifying code), either from the other thread of the core or another core.", "UMask": "0x1" }, { @@ -77,211 +72,181 @@ { "EventName": "l2_request_g1.rd_blk_l", "EventCode": "0x60", - "BriefDescription": "Requests to L2 Group1.", - "PublicDescription": "Requests to L2 Group1.", + "BriefDescription": "All L2 Cache Requests (Breakdown 1 - Common). Data cache reads (including hardware and software prefetch).", "UMask": "0x80" }, { "EventName": "l2_request_g1.rd_blk_x", "EventCode": "0x60", - "BriefDescription": "Requests to L2 Group1.", - "PublicDescription": "Requests to L2 Group1.", + "BriefDescription": "All L2 Cache Requests (Breakdown 1 - Common). Data cache stores.", "UMask": "0x40" }, { "EventName": "l2_request_g1.ls_rd_blk_c_s", "EventCode": "0x60", - "BriefDescription": "Requests to L2 Group1.", - "PublicDescription": "Requests to L2 Group1.", + "BriefDescription": "All L2 Cache Requests (Breakdown 1 - Common). Data cache shared reads.", "UMask": "0x20" }, { "EventName": "l2_request_g1.cacheable_ic_read", "EventCode": "0x60", - "BriefDescription": "Requests to L2 Group1.", - "PublicDescription": "Requests to L2 Group1.", + "BriefDescription": "All L2 Cache Requests (Breakdown 1 - Common). Instruction cache reads.", "UMask": "0x10" }, { "EventName": "l2_request_g1.change_to_x", "EventCode": "0x60", - "BriefDescription": "Requests to L2 Group1.", - "PublicDescription": "Requests to L2 Group1.", + "BriefDescription": "All L2 Cache Requests (Breakdown 1 - Common). Data cache state change requests. Request change to writable, check L2 for current state.", "UMask": "0x8" }, { - "EventName": "l2_request_g1.prefetch_l2", + "EventName": "l2_request_g1.prefetch_l2_cmd", "EventCode": "0x60", - "BriefDescription": "Requests to L2 Group1.", - "PublicDescription": "Requests to L2 Group1.", + "BriefDescription": "All L2 Cache Requests (Breakdown 1 - Common). PrefetchL2Cmd.", "UMask": "0x4" }, { "EventName": "l2_request_g1.l2_hw_pf", "EventCode": "0x60", - "BriefDescription": "Requests to L2 Group1.", - "PublicDescription": "Requests to L2 Group1.", + "BriefDescription": "All L2 Cache Requests (Breakdown 1 - Common). L2 Prefetcher. All prefetches accepted by L2 pipeline, hit or miss. Types of PF and L2 hit/miss broken out in a separate perfmon event.", "UMask": "0x2" }, { - "EventName": "l2_request_g1.other_requests", + "EventName": "l2_request_g1.group2", "EventCode": "0x60", - "BriefDescription": "Events covered by l2_request_g2.", - "PublicDescription": "Requests to L2 Group1. Events covered by l2_request_g2.", + "BriefDescription": "Miscellaneous events covered in more detail by l2_request_g2 (PMCx061).", "UMask": "0x1" }, { "EventName": "l2_request_g2.group1", "EventCode": "0x61", - "BriefDescription": "All Group 1 commands not in unit0.", - "PublicDescription": "Multi-events in that LS and IF requests can be received simultaneous. All Group 1 commands not in unit0.", + "BriefDescription": "Miscellaneous events covered in more detail by l2_request_g1 (PMCx060).", "UMask": "0x80" }, { "EventName": "l2_request_g2.ls_rd_sized", "EventCode": "0x61", - "BriefDescription": "RdSized, RdSized32, RdSized64.", - "PublicDescription": "Multi-events in that LS and IF requests can be received simultaneous. RdSized, RdSized32, RdSized64.", + "BriefDescription": "All L2 Cache Requests (Breakdown 2 - Rare). Data cache read sized.", "UMask": "0x40" }, { "EventName": "l2_request_g2.ls_rd_sized_nc", "EventCode": "0x61", - "BriefDescription": "RdSizedNC, RdSized32NC, RdSized64NC.", - "PublicDescription": "Multi-events in that LS and IF requests can be received simultaneous. RdSizedNC, RdSized32NC, RdSized64NC.", + "BriefDescription": "All L2 Cache Requests (Breakdown 2 - Rare). Data cache read sized non-cacheable.", "UMask": "0x20" }, { "EventName": "l2_request_g2.ic_rd_sized", "EventCode": "0x61", - "BriefDescription": "Multi-events in that LS and IF requests can be received simultaneous.", - "PublicDescription": "Multi-events in that LS and IF requests can be received simultaneous.", + "BriefDescription": "All L2 Cache Requests (Breakdown 2 - Rare). Instruction cache read sized.", "UMask": "0x10" }, { "EventName": "l2_request_g2.ic_rd_sized_nc", "EventCode": "0x61", - "BriefDescription": "Multi-events in that LS and IF requests can be received simultaneous.", - "PublicDescription": "Multi-events in that LS and IF requests can be received simultaneous.", + "BriefDescription": "All L2 Cache Requests (Breakdown 2 - Rare). Instruction cache read sized non-cacheable.", "UMask": "0x8" }, { "EventName": "l2_request_g2.smc_inval", "EventCode": "0x61", - "BriefDescription": "Multi-events in that LS and IF requests can be received simultaneous.", - "PublicDescription": "Multi-events in that LS and IF requests can be received simultaneous.", + "BriefDescription": "All L2 Cache Requests (Breakdown 2 - Rare). Self-modifying code invalidates.", "UMask": "0x4" }, { "EventName": "l2_request_g2.bus_locks_originator", "EventCode": "0x61", - "BriefDescription": "Multi-events in that LS and IF requests can be received simultaneous.", - "PublicDescription": "Multi-events in that LS and IF requests can be received simultaneous.", + "BriefDescription": "All L2 Cache Requests (Breakdown 2 - Rare). Bus locks.", "UMask": "0x2" }, { "EventName": "l2_request_g2.bus_locks_responses", "EventCode": "0x61", - "BriefDescription": "Multi-events in that LS and IF requests can be received simultaneous.", - "PublicDescription": "Multi-events in that LS and IF requests can be received simultaneous.", + "BriefDescription": "All L2 Cache Requests (Breakdown 2 - Rare). Bus lock response.", "UMask": "0x1" }, { "EventName": "l2_latency.l2_cycles_waiting_on_fills", "EventCode": "0x62", "BriefDescription": "Total cycles spent waiting for L2 fills to complete from L3 or memory, divided by four. Event counts are for both threads. To calculate average latency, the number of fills from both threads must be used.", - "PublicDescription": "Total cycles spent waiting for L2 fills to complete from L3 or memory, divided by four. Event counts are for both threads. To calculate average latency, the number of fills from both threads must be used.", "UMask": "0x1" }, { "EventName": "l2_wcb_req.wcb_write", "EventCode": "0x63", - "PublicDescription": "LS (Load/Store unit) to L2 WCB (Write Combining Buffer) write requests.", - "BriefDescription": "LS to L2 WCB write requests.", + "BriefDescription": "LS to L2 WCB write requests. LS (Load/Store unit) to L2 WCB (Write Combining Buffer) write requests.", "UMask": "0x40" }, { "EventName": "l2_wcb_req.wcb_close", "EventCode": "0x63", - "BriefDescription": "LS to L2 WCB close requests.", - "PublicDescription": "LS (Load/Store unit) to L2 WCB (Write Combining Buffer) close requests.", + "BriefDescription": "LS to L2 WCB close requests. LS (Load/Store unit) to L2 WCB (Write Combining Buffer) close requests.", "UMask": "0x20" }, { "EventName": "l2_wcb_req.zero_byte_store", "EventCode": "0x63", - "BriefDescription": "LS to L2 WCB zero byte store requests.", - "PublicDescription": "LS (Load/Store unit) to L2 WCB (Write Combining Buffer) zero byte store requests.", + "BriefDescription": "LS to L2 WCB zero byte store requests. LS (Load/Store unit) to L2 WCB (Write Combining Buffer) zero byte store requests.", "UMask": "0x4" }, { "EventName": "l2_wcb_req.cl_zero", "EventCode": "0x63", - "PublicDescription": "LS to L2 WCB cache line zeroing requests.", - "BriefDescription": "LS (Load/Store unit) to L2 WCB (Write Combining Buffer) cache line zeroing requests.", + "BriefDescription": "LS to L2 WCB cache line zeroing requests. LS (Load/Store unit) to L2 WCB (Write Combining Buffer) cache line zeroing requests.", "UMask": "0x1" }, { "EventName": "l2_cache_req_stat.ls_rd_blk_cs", "EventCode": "0x64", - "BriefDescription": "LS ReadBlock C/S Hit.", - "PublicDescription": "This event does not count accesses to the L2 cache by the L2 prefetcher, but it does count accesses by the L1 prefetcher. LS ReadBlock C/S Hit.", + "BriefDescription": "Core to L2 cacheable request access status (not including L2 Prefetch). Data cache shared read hit in L2", "UMask": "0x80" }, { "EventName": "l2_cache_req_stat.ls_rd_blk_l_hit_x", "EventCode": "0x64", - "BriefDescription": "LS Read Block L Hit X.", - "PublicDescription": "This event does not count accesses to the L2 cache by the L2 prefetcher, but it does count accesses by the L1 prefetcher. LS Read Block L Hit X.", + "BriefDescription": "Core to L2 cacheable request access status (not including L2 Prefetch). Data cache read hit in L2.", "UMask": "0x40" }, { "EventName": "l2_cache_req_stat.ls_rd_blk_l_hit_s", "EventCode": "0x64", - "BriefDescription": "LsRdBlkL Hit Shared.", - "PublicDescription": "This event does not count accesses to the L2 cache by the L2 prefetcher, but it does count accesses by the L1 prefetcher. LsRdBlkL Hit Shared.", + "BriefDescription": "Core to L2 cacheable request access status (not including L2 Prefetch). Data cache read hit on shared line in L2.", "UMask": "0x20" }, { "EventName": "l2_cache_req_stat.ls_rd_blk_x", "EventCode": "0x64", - "BriefDescription": "LsRdBlkX/ChgToX Hit X. Count RdBlkX finding Shared as a Miss.", - "PublicDescription": "This event does not count accesses to the L2 cache by the L2 prefetcher, but it does count accesses by the L1 prefetcher. LsRdBlkX/ChgToX Hit X. Count RdBlkX finding Shared as a Miss.", + "BriefDescription": "Core to L2 cacheable request access status (not including L2 Prefetch). Data cache store or state change hit in L2.", "UMask": "0x10" }, { "EventName": "l2_cache_req_stat.ls_rd_blk_c", "EventCode": "0x64", - "BriefDescription": "LS Read Block C S L X Change to X Miss.", - "PublicDescription": "This event does not count accesses to the L2 cache by the L2 prefetcher, but it does count accesses by the L1 prefetcher. LS Read Block C S L X Change to X Miss.", + "BriefDescription": "Core to L2 cacheable request access status (not including L2 Prefetch). Data cache request miss in L2 (all types).", "UMask": "0x8" }, { "EventName": "l2_cache_req_stat.ic_fill_hit_x", "EventCode": "0x64", - "BriefDescription": "IC Fill Hit Exclusive Stale.", - "PublicDescription": "This event does not count accesses to the L2 cache by the L2 prefetcher, but it does count accesses by the L1 prefetcher. IC Fill Hit Exclusive Stale.", + "BriefDescription": "Core to L2 cacheable request access status (not including L2 Prefetch). Instruction cache hit modifiable line in L2.", "UMask": "0x4" }, { "EventName": "l2_cache_req_stat.ic_fill_hit_s", "EventCode": "0x64", - "BriefDescription": "IC Fill Hit Shared.", - "PublicDescription": "This event does not count accesses to the L2 cache by the L2 prefetcher, but it does count accesses by the L1 prefetcher. IC Fill Hit Shared.", + "BriefDescription": "Core to L2 cacheable request access status (not including L2 Prefetch). Instruction cache hit clean line in L2.", "UMask": "0x2" }, { "EventName": "l2_cache_req_stat.ic_fill_miss", "EventCode": "0x64", - "BriefDescription": "IC Fill Miss.", - "PublicDescription": "This event does not count accesses to the L2 cache by the L2 prefetcher, but it does count accesses by the L1 prefetcher. IC Fill Miss.", + "BriefDescription": "Core to L2 cacheable request access status (not including L2 Prefetch). Instruction cache request miss in L2.", "UMask": "0x1" }, { "EventName": "l2_fill_pending.l2_fill_busy", "EventCode": "0x6d", - "BriefDescription": "Total cycles spent with one or more fill requests in flight from L2.", - "PublicDescription": "Total cycles spent with one or more fill requests in flight from L2.", + "BriefDescription": "Cycles with fill pending from L2. Total cycles spent with one or more fill requests in flight from L2.", "UMask": "0x1" }, { diff --git a/tools/perf/pmu-events/arch/x86/amdzen1/core.json b/tools/perf/pmu-events/arch/x86/amdzen1/core.json index 1079544eeed5..7e1aa8273935 100644 --- a/tools/perf/pmu-events/arch/x86/amdzen1/core.json +++ b/tools/perf/pmu-events/arch/x86/amdzen1/core.json @@ -62,7 +62,6 @@ "EventName": "ex_ret_brn_ind_misp", "EventCode": "0xca", "BriefDescription": "Retired Indirect Branch Instructions Mispredicted.", - "PublicDescription": "Retired Indirect Branch Instructions Mispredicted." }, { "EventName": "ex_ret_mmx_fp_instr.sse_instr", @@ -90,11 +89,6 @@ "EventCode": "0xd1", "BriefDescription": "Retired Conditional Branch Instructions." }, - { - "EventName": "ex_ret_cond_misp", - "EventCode": "0xd2", - "BriefDescription": "Retired Conditional Branch Instructions Mispredicted." - }, { "EventName": "ex_div_busy", "EventCode": "0xd3", @@ -108,22 +102,19 @@ { "EventName": "ex_tagged_ibs_ops.ibs_count_rollover", "EventCode": "0x1cf", - "BriefDescription": "Number of times an op could not be tagged by IBS because of a previous tagged op that has not retired.", - "PublicDescription": "Tagged IBS Ops. Number of times an op could not be tagged by IBS because of a previous tagged op that has not retired.", + "BriefDescription": "Tagged IBS Ops. Number of times an op could not be tagged by IBS because of a previous tagged op that has not retired.", "UMask": "0x4" }, { "EventName": "ex_tagged_ibs_ops.ibs_tagged_ops_ret", "EventCode": "0x1cf", - "BriefDescription": "Number of Ops tagged by IBS that retired.", - "PublicDescription": "Tagged IBS Ops. Number of Ops tagged by IBS that retired.", + "BriefDescription": "Tagged IBS Ops. Number of Ops tagged by IBS that retired.", "UMask": "0x2" }, { "EventName": "ex_tagged_ibs_ops.ibs_tagged_ops", "EventCode": "0x1cf", - "BriefDescription": "Number of Ops tagged by IBS.", - "PublicDescription": "Tagged IBS Ops. Number of Ops tagged by IBS.", + "BriefDescription": "Tagged IBS Ops. Number of Ops tagged by IBS.", "UMask": "0x1" }, { diff --git a/tools/perf/pmu-events/arch/x86/amdzen1/floating-point.json b/tools/perf/pmu-events/arch/x86/amdzen1/floating-point.json index ea4711983d1d..a35542bd3b36 100644 --- a/tools/perf/pmu-events/arch/x86/amdzen1/floating-point.json +++ b/tools/perf/pmu-events/arch/x86/amdzen1/floating-point.json @@ -2,17 +2,73 @@ { "EventName": "fpu_pipe_assignment.dual", "EventCode": "0x00", - "BriefDescription": "Total number multi-pipe uOps.", - "PublicDescription": "The number of operations (uOps) and dual-pipe uOps dispatched to each of the 4 FPU execution pipelines. This event reflects how busy the FPU pipelines are and may be used for workload characterization. This includes all operations performed by x87, MMX, and SSE instructions, including moves. Each increment represents a one- cycle dispatch event. This event is a speculative event. Since this event includes non-numeric operations it is not suitable for measuring MFLOPS. Total number multi-pipe uOps assigned to Pipe 3.", + "BriefDescription": "Total number multi-pipe uOps assigned to all pipes.", + "PublicDescription": "The number of operations (uOps) and dual-pipe uOps dispatched to each of the 4 FPU execution pipelines. This event reflects how busy the FPU pipelines are and may be used for workload characterization. This includes all operations performed by x87, MMX, and SSE instructions, including moves. Each increment represents a one- cycle dispatch event. This event is a speculative event. Since this event includes non-numeric operations it is not suitable for measuring MFLOPS. Total number multi-pipe uOps assigned to all pipes.", "UMask": "0xf0" }, + { + "EventName": "fpu_pipe_assignment.dual3", + "EventCode": "0x00", + "BriefDescription": "Total number multi-pipe uOps assigned to pipe 3.", + "PublicDescription": "The number of operations (uOps) and dual-pipe uOps dispatched to each of the 4 FPU execution pipelines. This event reflects how busy the FPU pipelines are and may be used for workload characterization. This includes all operations performed by x87, MMX, and SSE instructions, including moves. Each increment represents a one- cycle dispatch event. This event is a speculative event. Since this event includes non-numeric operations it is not suitable for measuring MFLOPS. Total number multi-pipe uOps assigned to pipe 3.", + "UMask": "0x80" + }, + { + "EventName": "fpu_pipe_assignment.dual2", + "EventCode": "0x00", + "BriefDescription": "Total number multi-pipe uOps assigned to pipe 2.", + "PublicDescription": "The number of operations (uOps) and dual-pipe uOps dispatched to each of the 4 FPU execution pipelines. This event reflects how busy the FPU pipelines are and may be used for workload characterization. This includes all operations performed by x87, MMX, and SSE instructions, including moves. Each increment represents a one- cycle dispatch event. This event is a speculative event. Since this event includes non-numeric operations it is not suitable for measuring MFLOPS. Total number multi-pipe uOps assigned to pipe 2.", + "UMask": "0x40" + }, + { + "EventName": "fpu_pipe_assignment.dual1", + "EventCode": "0x00", + "BriefDescription": "Total number multi-pipe uOps assigned to pipe 1.", + "PublicDescription": "The number of operations (uOps) and dual-pipe uOps dispatched to each of the 4 FPU execution pipelines. This event reflects how busy the FPU pipelines are and may be used for workload characterization. This includes all operations performed by x87, MMX, and SSE instructions, including moves. Each increment represents a one- cycle dispatch event. This event is a speculative event. Since this event includes non-numeric operations it is not suitable for measuring MFLOPS. Total number multi-pipe uOps assigned to pipe 1.", + "UMask": "0x20" + }, + { + "EventName": "fpu_pipe_assignment.dual0", + "EventCode": "0x00", + "BriefDescription": "Total number multi-pipe uOps assigned to pipe 0.", + "PublicDescription": "The number of operations (uOps) and dual-pipe uOps dispatched to each of the 4 FPU execution pipelines. This event reflects how busy the FPU pipelines are and may be used for workload characterization. This includes all operations performed by x87, MMX, and SSE instructions, including moves. Each increment represents a one- cycle dispatch event. This event is a speculative event. Since this event includes non-numeric operations it is not suitable for measuring MFLOPS. Total number multi-pipe uOps assigned to pipe 0.", + "UMask": "0x10" + }, { "EventName": "fpu_pipe_assignment.total", "EventCode": "0x00", - "BriefDescription": "Total number uOps.", - "PublicDescription": "The number of operations (uOps) and dual-pipe uOps dispatched to each of the 4 FPU execution pipelines. This event reflects how busy the FPU pipelines are and may be used for workload characterization. This includes all operations performed by x87, MMX, and SSE instructions, including moves. Each increment represents a one- cycle dispatch event. This event is a speculative event. Since this event includes non-numeric operations it is not suitable for measuring MFLOPS. Total number uOps assigned to Pipe 3.", + "BriefDescription": "Total number uOps assigned to all fpu pipes.", + "PublicDescription": "The number of operations (uOps) and dual-pipe uOps dispatched to each of the 4 FPU execution pipelines. This event reflects how busy the FPU pipelines are and may be used for workload characterization. This includes all operations performed by x87, MMX, and SSE instructions, including moves. Each increment represents a one- cycle dispatch event. This event is a speculative event. Since this event includes non-numeric operations it is not suitable for measuring MFLOPS. Total number uOps assigned to all pipes.", "UMask": "0xf" }, + { + "EventName": "fpu_pipe_assignment.total3", + "EventCode": "0x00", + "BriefDescription": "Total number of fp uOps on pipe 3.", + "PublicDescription": "The number of operations (uOps) dispatched to each of the 4 FPU execution pipelines. This event reflects how busy the FPU pipelines are and may be used for workload characterization. This includes all operations performed by x87, MMX, and SSE instructions, including moves. Each increment represents a one-cycle dispatch event. This event is a speculative event. Since this event includes non-numeric operations it is not suitable for measuring MFLOPS. Total number uOps assigned to pipe 3.", + "UMask": "0x8" + }, + { + "EventName": "fpu_pipe_assignment.total2", + "EventCode": "0x00", + "BriefDescription": "Total number of fp uOps on pipe 2.", + "PublicDescription": "The number of operations (uOps) dispatched to each of the 4 FPU execution pipelines. This event reflects how busy the FPU pipelines are and may be used for workload characterization. This includes all operations performed by x87, MMX, and SSE instructions, including moves. Each increment represents a one- cycle dispatch event. This event is a speculative event. Since this event includes non-numeric operations it is not suitable for measuring MFLOPS. Total number uOps assigned to pipe 2.", + "UMask": "0x4" + }, + { + "EventName": "fpu_pipe_assignment.total1", + "EventCode": "0x00", + "BriefDescription": "Total number of fp uOps on pipe 1.", + "PublicDescription": "The number of operations (uOps) dispatched to each of the 4 FPU execution pipelines. This event reflects how busy the FPU pipelines are and may be used for workload characterization. This includes all operations performed by x87, MMX, and SSE instructions, including moves. Each increment represents a one- cycle dispatch event. This event is a speculative event. Since this event includes non-numeric operations it is not suitable for measuring MFLOPS. Total number uOps assigned to pipe 1.", + "UMask": "0x2" + }, + { + "EventName": "fpu_pipe_assignment.total0", + "EventCode": "0x00", + "BriefDescription": "Total number of fp uOps on pipe 0.", + "PublicDescription": "The number of operations (uOps) dispatched to each of the 4 FPU execution pipelines. This event reflects how busy the FPU pipelines are and may be used for workload characterization. This includes all operations performed by x87, MMX, and SSE instructions, including moves. Each increment represents a one- cycle dispatch event. This event is a speculative event. Since this event includes non-numeric operations it is not suitable for measuring MFLOPS. Total number uOps assigned to pipe 0.", + "UMask": "0x1" + }, { "EventName": "fp_sched_empty", "EventCode": "0x01", diff --git a/tools/perf/pmu-events/arch/x86/amdzen1/memory.json b/tools/perf/pmu-events/arch/x86/amdzen1/memory.json index fa2d60d4def0..b33a3c308019 100644 --- a/tools/perf/pmu-events/arch/x86/amdzen1/memory.json +++ b/tools/perf/pmu-events/arch/x86/amdzen1/memory.json @@ -3,28 +3,24 @@ "EventName": "ls_locks.bus_lock", "EventCode": "0x25", "BriefDescription": "Bus lock when a locked operations crosses a cache boundary or is done on an uncacheable memory type.", - "PublicDescription": "Bus lock when a locked operations crosses a cache boundary or is done on an uncacheable memory type.", "UMask": "0x1" }, { "EventName": "ls_dispatch.ld_st_dispatch", "EventCode": "0x29", - "BriefDescription": "Load-op-Stores.", - "PublicDescription": "Counts the number of operations dispatched to the LS unit. Unit Masks ADDed. Load-op-Stores.", + "BriefDescription": "Counts the number of operations dispatched to the LS unit. Unit Masks ADDed. Load-op-Stores.", "UMask": "0x4" }, { "EventName": "ls_dispatch.store_dispatch", "EventCode": "0x29", - "BriefDescription": "Counts the number of operations dispatched to the LS unit. Unit Masks ADDed.", - "PublicDescription": "Counts the number of operations dispatched to the LS unit. Unit Masks ADDed.", + "BriefDescription": "Counts the number of stores dispatched to the LS unit. Unit Masks ADDed.", "UMask": "0x2" }, { "EventName": "ls_dispatch.ld_dispatch", "EventCode": "0x29", - "BriefDescription": "Counts the number of operations dispatched to the LS unit. Unit Masks ADDed.", - "PublicDescription": "Counts the number of operations dispatched to the LS unit. Unit Masks ADDed.", + "BriefDescription": "Counts the number of loads dispatched to the LS unit. Unit Masks ADDed.", "UMask": "0x1" }, { @@ -37,83 +33,114 @@ "EventCode": "0x40", "BriefDescription": "The number of accesses to the data cache for load and store references. This may include certain microcode scratchpad accesses, although these are generally rare. Each increment represents an eight-byte access, although the instruction may only be accessing a portion of that. This event is a speculative event." }, + { + "EventName": "ls_mab_alloc.dc_prefetcher", + "EventCode": "0x41", + "BriefDescription": "LS MAB allocates by type - DC prefetcher.", + "UMask": "0x8" + }, + { + "EventName": "ls_mab_alloc.stores", + "EventCode": "0x41", + "BriefDescription": "LS MAB allocates by type - stores.", + "UMask": "0x2" + }, + { + "EventName": "ls_mab_alloc.loads", + "EventCode": "0x41", + "BriefDescription": "LS MAB allocates by type - loads.", + "UMask": "0x01" + }, { "EventName": "ls_l1_d_tlb_miss.all", "EventCode": "0x45", "BriefDescription": "L1 DTLB Miss or Reload off all sizes.", - "PublicDescription": "L1 DTLB Miss or Reload off all sizes.", "UMask": "0xff" }, { "EventName": "ls_l1_d_tlb_miss.tlb_reload_1g_l2_miss", "EventCode": "0x45", "BriefDescription": "L1 DTLB Miss of a page of 1G size.", - "PublicDescription": "L1 DTLB Miss of a page of 1G size.", "UMask": "0x80" }, { "EventName": "ls_l1_d_tlb_miss.tlb_reload_2m_l2_miss", "EventCode": "0x45", "BriefDescription": "L1 DTLB Miss of a page of 2M size.", - "PublicDescription": "L1 DTLB Miss of a page of 2M size.", "UMask": "0x40" }, { "EventName": "ls_l1_d_tlb_miss.tlb_reload_32k_l2_miss", "EventCode": "0x45", "BriefDescription": "L1 DTLB Miss of a page of 32K size.", - "PublicDescription": "L1 DTLB Miss of a page of 32K size.", "UMask": "0x20" }, { "EventName": "ls_l1_d_tlb_miss.tlb_reload_4k_l2_miss", "EventCode": "0x45", "BriefDescription": "L1 DTLB Miss of a page of 4K size.", - "PublicDescription": "L1 DTLB Miss of a page of 4K size.", "UMask": "0x10" }, { "EventName": "ls_l1_d_tlb_miss.tlb_reload_1g_l2_hit", "EventCode": "0x45", "BriefDescription": "L1 DTLB Reload of a page of 1G size.", - "PublicDescription": "L1 DTLB Reload of a page of 1G size.", "UMask": "0x8" }, { "EventName": "ls_l1_d_tlb_miss.tlb_reload_2m_l2_hit", "EventCode": "0x45", "BriefDescription": "L1 DTLB Reload of a page of 2M size.", - "PublicDescription": "L1 DTLB Reload of a page of 2M size.", "UMask": "0x4" }, { "EventName": "ls_l1_d_tlb_miss.tlb_reload_32k_l2_hit", "EventCode": "0x45", "BriefDescription": "L1 DTLB Reload of a page of 32K size.", - "PublicDescription": "L1 DTLB Reload of a page of 32K size.", "UMask": "0x2" }, { "EventName": "ls_l1_d_tlb_miss.tlb_reload_4k_l2_hit", "EventCode": "0x45", "BriefDescription": "L1 DTLB Reload of a page of 4K size.", - "PublicDescription": "L1 DTLB Reload of a page of 4K size.", "UMask": "0x1" }, { - "EventName": "ls_tablewalker.perf_mon_tablewalk_alloc_iside", + "EventName": "ls_tablewalker.iside", "EventCode": "0x46", - "BriefDescription": "Tablewalker allocation.", - "PublicDescription": "Tablewalker allocation.", + "BriefDescription": "Total Page Table Walks on I-side.", "UMask": "0xc" }, { - "EventName": "ls_tablewalker.perf_mon_tablewalk_alloc_dside", + "EventName": "ls_tablewalker.ic_type1", + "EventCode": "0x46", + "BriefDescription": "Total Page Table Walks IC Type 1.", + "UMask": "0x8" + }, + { + "EventName": "ls_tablewalker.ic_type0", "EventCode": "0x46", - "BriefDescription": "Tablewalker allocation.", - "PublicDescription": "Tablewalker allocation.", + "BriefDescription": "Total Page Table Walks IC Type 0.", + "UMask": "0x4" + }, + { + "EventName": "ls_tablewalker.dside", + "EventCode": "0x46", + "BriefDescription": "Total Page Table Walks on D-side.", "UMask": "0x3" }, + { + "EventName": "ls_tablewalker.dc_type1", + "EventCode": "0x46", + "BriefDescription": "Total Page Table Walks DC Type 1.", + "UMask": "0x2" + }, + { + "EventName": "ls_tablewalker.dc_type0", + "EventCode": "0x46", + "BriefDescription": "Total Page Table Walks DC Type 0.", + "UMask": "0x1" + }, { "EventName": "ls_misal_accesses", "EventCode": "0x47", @@ -123,35 +150,30 @@ "EventName": "ls_pref_instr_disp.prefetch_nta", "EventCode": "0x4b", "BriefDescription": "Software Prefetch Instructions (PREFETCHNTA instruction) Dispatched.", - "PublicDescription": "Software Prefetch Instructions (PREFETCHNTA instruction) Dispatched.", "UMask": "0x4" }, { "EventName": "ls_pref_instr_disp.store_prefetch_w", "EventCode": "0x4b", "BriefDescription": "Software Prefetch Instructions (3DNow PREFETCHW instruction) Dispatched.", - "PublicDescription": "Software Prefetch Instructions (3DNow PREFETCHW instruction) Dispatched.", "UMask": "0x2" }, { "EventName": "ls_pref_instr_disp.load_prefetch_w", "EventCode": "0x4b", - "BriefDescription": "Prefetch, Prefetch_T0_T1_T2.", - "PublicDescription": "Software Prefetch Instructions Dispatched. Prefetch, Prefetch_T0_T1_T2.", + "BriefDescription": "Software Prefetch Instructions Dispatched. Prefetch, Prefetch_T0_T1_T2.", "UMask": "0x1" }, { "EventName": "ls_inef_sw_pref.mab_mch_cnt", "EventCode": "0x52", - "BriefDescription": "The number of software prefetches that did not fetch data outside of the processor core.", - "PublicDescription": "The number of software prefetches that did not fetch data outside of the processor core.", + "BriefDescription": "The number of software prefetches that did not fetch data outside of the processor core. Software PREFETCH instruction saw a match on an already-allocated miss request buffer.", "UMask": "0x2" }, { "EventName": "ls_inef_sw_pref.data_pipe_sw_pf_dc_hit", "EventCode": "0x52", - "BriefDescription": "The number of software prefetches that did not fetch data outside of the processor core.", - "PublicDescription": "The number of software prefetches that did not fetch data outside of the processor core.", + "BriefDescription": "The number of software prefetches that did not fetch data outside of the processor core. Software PREFETCH instruction saw a DC hit.", "UMask": "0x1" }, { diff --git a/tools/perf/pmu-events/arch/x86/amdzen1/other.json b/tools/perf/pmu-events/arch/x86/amdzen1/other.json index b26a00d05a2e..ff780098d36e 100644 --- a/tools/perf/pmu-events/arch/x86/amdzen1/other.json +++ b/tools/perf/pmu-events/arch/x86/amdzen1/other.json @@ -2,64 +2,55 @@ { "EventName": "ic_oc_mode_switch.oc_ic_mode_switch", "EventCode": "0x28a", - "BriefDescription": "OC to IC mode switch.", - "PublicDescription": "OC Mode Switch. OC to IC mode switch.", + "BriefDescription": "OC Mode Switch. OC to IC mode switch.", "UMask": "0x2" }, { "EventName": "ic_oc_mode_switch.ic_oc_mode_switch", "EventCode": "0x28a", - "BriefDescription": "IC to OC mode switch.", - "PublicDescription": "OC Mode Switch. IC to OC mode switch.", + "BriefDescription": "OC Mode Switch. IC to OC mode switch.", "UMask": "0x1" }, { "EventName": "de_dis_dispatch_token_stalls0.retire_token_stall", "EventCode": "0xaf", - "BriefDescription": "RETIRE Tokens unavailable.", - "PublicDescription": "Cycles where a dispatch group is valid but does not get dispatched due to a token stall. RETIRE Tokens unavailable.", + "BriefDescription": "Cycles where a dispatch group is valid but does not get dispatched due to a token stall. RETIRE Tokens unavailable.", "UMask": "0x40" }, { "EventName": "de_dis_dispatch_token_stalls0.agsq_token_stall", "EventCode": "0xaf", - "BriefDescription": "AGSQ Tokens unavailable.", - "PublicDescription": "Cycles where a dispatch group is valid but does not get dispatched due to a token stall. AGSQ Tokens unavailable.", + "BriefDescription": "Cycles where a dispatch group is valid but does not get dispatched due to a token stall. AGSQ Tokens unavailable.", "UMask": "0x20" }, { "EventName": "de_dis_dispatch_token_stalls0.alu_token_stall", "EventCode": "0xaf", - "BriefDescription": "ALU tokens total unavailable.", - "PublicDescription": "Cycles where a dispatch group is valid but does not get dispatched due to a token stall. ALU tokens total unavailable.", + "BriefDescription": "Cycles where a dispatch group is valid but does not get dispatched due to a token stall. ALU tokens total unavailable.", "UMask": "0x10" }, { "EventName": "de_dis_dispatch_token_stalls0.alsq3_0_token_stall", "EventCode": "0xaf", - "BriefDescription": "Cycles where a dispatch group is valid but does not get dispatched due to a token stall.", - "PublicDescription": "Cycles where a dispatch group is valid but does not get dispatched due to a token stall.", + "BriefDescription": "Cycles where a dispatch group is valid but does not get dispatched due to a token stall. ALSQ 3_0 Tokens unavailable.", "UMask": "0x8" }, { "EventName": "de_dis_dispatch_token_stalls0.alsq3_token_stall", "EventCode": "0xaf", - "BriefDescription": "ALSQ 3 Tokens unavailable.", - "PublicDescription": "Cycles where a dispatch group is valid but does not get dispatched due to a token stall. ALSQ 3 Tokens unavailable.", + "BriefDescription": "Cycles where a dispatch group is valid but does not get dispatched due to a token stall. ALSQ 3 Tokens unavailable.", "UMask": "0x4" }, { "EventName": "de_dis_dispatch_token_stalls0.alsq2_token_stall", "EventCode": "0xaf", - "BriefDescription": "ALSQ 2 Tokens unavailable.", - "PublicDescription": "Cycles where a dispatch group is valid but does not get dispatched due to a token stall. ALSQ 2 Tokens unavailable.", + "BriefDescription": "Cycles where a dispatch group is valid but does not get dispatched due to a token stall. ALSQ 2 Tokens unavailable.", "UMask": "0x2" }, { "EventName": "de_dis_dispatch_token_stalls0.alsq1_token_stall", "EventCode": "0xaf", - "BriefDescription": "ALSQ 1 Tokens unavailable.", - "PublicDescription": "Cycles where a dispatch group is valid but does not get dispatched due to a token stall. ALSQ 1 Tokens unavailable.", + "BriefDescription": "Cycles where a dispatch group is valid but does not get dispatched due to a token stall. ALSQ 1 Tokens unavailable.", "UMask": "0x1" } ] diff --git a/tools/perf/pmu-events/arch/x86/mapfile.csv b/tools/perf/pmu-events/arch/x86/mapfile.csv index 244a36e37a3a..25b06cf98747 100644 --- a/tools/perf/pmu-events/arch/x86/mapfile.csv +++ b/tools/perf/pmu-events/arch/x86/mapfile.csv @@ -36,5 +36,5 @@ GenuineIntel-6-55-[56789ABCDEF],v1,cascadelakex,core GenuineIntel-6-7D,v1,icelake,core GenuineIntel-6-7E,v1,icelake,core GenuineIntel-6-86,v1,tremontx,core -AuthenticAMD-23-([12][0-9A-F]|[0-9A-F]),v1,amdzen1,core +AuthenticAMD-23-([12][0-9A-F]|[0-9A-F]),v2,amdzen1,core AuthenticAMD-23-[[:xdigit:]]+,v1,amdzen2,core -- cgit v1.2.3-70-g09d2 From 29f36c168813f1beaf59812cc79ef46fb33c669a Mon Sep 17 00:00:00 2001 From: Arnaldo Carvalho de Melo <acme@redhat.com> Date: Thu, 19 Mar 2020 11:42:56 -0300 Subject: tools headers uapi: Update linux/in.h copy To get the changes in: 267762538705 ("seg6: fix SRv6 L2 tunnels to use IANA-assigned protocol number") That ends up automatically adding the new IPPROTO_ETHERNET to the socket args beautifiers: $ tools/perf/trace/beauty/socket_ipproto.sh > before Apply this patch: $ tools/perf/trace/beauty/socket_ipproto.sh > after $ diff -u before after --- before 2020-03-19 11:48:36.876673819 -0300 +++ after 2020-03-19 11:49:00.148541377 -0300 @@ -6,6 +6,7 @@ [132] = "SCTP", [136] = "UDPLITE", [137] = "MPLS", + [143] = "ETHERNET", [17] = "UDP", [1] = "ICMP", [22] = "IDP", $ Addresses this tools/perf build warning: Warning: Kernel ABI header at 'tools/include/uapi/linux/in.h' differs from latest version at 'include/uapi/linux/in.h' diff -u tools/include/uapi/linux/in.h include/uapi/linux/in.h Cc: David S. Miller <davem@davemloft.net> Cc: Paolo Lungaroni <paolo.lungaroni@cnit.it> Cc: Adrian Hunter <adrian.hunter@intel.com> Cc: Jiri Olsa <jolsa@kernel.org> Cc: Namhyung Kim <namhyung@kernel.org> Signed-off-by: Arnaldo Carvalho de Melo <acme@redhat.com> --- tools/include/uapi/linux/in.h | 2 ++ 1 file changed, 2 insertions(+) diff --git a/tools/include/uapi/linux/in.h b/tools/include/uapi/linux/in.h index 1521073b6348..8533bf07450f 100644 --- a/tools/include/uapi/linux/in.h +++ b/tools/include/uapi/linux/in.h @@ -74,6 +74,8 @@ enum { #define IPPROTO_UDPLITE IPPROTO_UDPLITE IPPROTO_MPLS = 137, /* MPLS in IP (RFC 4023) */ #define IPPROTO_MPLS IPPROTO_MPLS + IPPROTO_ETHERNET = 143, /* Ethernet-within-IPv6 Encapsulation */ +#define IPPROTO_ETHERNET IPPROTO_ETHERNET IPPROTO_RAW = 255, /* Raw IP packets */ #define IPPROTO_RAW IPPROTO_RAW IPPROTO_MPTCP = 262, /* Multipath TCP connection */ -- cgit v1.2.3-70-g09d2 From 7cd053d4cf8a3dc1a06bdb4be0ed9b0ecbaa21f5 Mon Sep 17 00:00:00 2001 From: Jiri Olsa <jolsa@kernel.org> Date: Wed, 18 Mar 2020 21:45:22 +0100 Subject: perf tools: Unify a bit the build directory output Removing the extra 'SUBDIR' line from clean and doc build output. Because it's annoying.. ;-) Before: $ make clean ... SUBDIR Documentation CLEAN Documentation After: $ make clean ... CLEAN Documentation Before: $ make doc BUILD: Doing 'make -j8' parallel build SUBDIR Documentation ASCIIDOC perf-stat.html ... After: $ make doc BUILD: Doing 'make -j8' parallel build ASCIIDOC perf-stat.html ... Signed-off-by: Jiri Olsa <jolsa@kernel.org> Tested-by: Arnaldo Carvalho de Melo <acme@redhat.com> Cc: Alexander Shishkin <alexander.shishkin@linux.intel.com> Cc: Michael Petlan <mpetlan@redhat.com> Cc: Namhyung Kim <namhyung@kernel.org> Cc: Peter Zijlstra <peterz@infradead.org> Link: http://lore.kernel.org/lkml/20200318204522.1200981-1-jolsa@kernel.org Signed-off-by: Arnaldo Carvalho de Melo <acme@redhat.com> --- tools/perf/Makefile.perf | 9 +++++---- 1 file changed, 5 insertions(+), 4 deletions(-) diff --git a/tools/perf/Makefile.perf b/tools/perf/Makefile.perf index 3eda9d4b88e7..a02aca9b21f4 100644 --- a/tools/perf/Makefile.perf +++ b/tools/perf/Makefile.perf @@ -231,6 +231,7 @@ TRACE_EVENT_DIR = $(srctree)/tools/lib/traceevent/ BPF_DIR = $(srctree)/tools/lib/bpf/ SUBCMD_DIR = $(srctree)/tools/lib/subcmd/ LIBPERF_DIR = $(srctree)/tools/lib/perf/ +DOC_DIR = $(srctree)/tools/perf/Documentation/ # Set FEATURE_TESTS to 'all' so all possible feature checkers are executed. # Without this setting the output feature dump file misses some features, for @@ -792,7 +793,6 @@ $(LIBSUBCMD): FORCE $(Q)$(MAKE) -C $(SUBCMD_DIR) O=$(OUTPUT) $(OUTPUT)libsubcmd.a $(LIBSUBCMD)-clean: - $(call QUIET_CLEAN, libsubcmd) $(Q)$(MAKE) -C $(SUBCMD_DIR) O=$(OUTPUT) clean help: @@ -832,7 +832,7 @@ INSTALL_DOC_TARGETS += quick-install-doc quick-install-man quick-install-html # 'make doc' should call 'make -C Documentation all' $(DOC_TARGETS): - $(QUIET_SUBDIR0)Documentation $(QUIET_SUBDIR1) $(@:doc=all) + $(Q)$(MAKE) -C $(DOC_DIR) O=$(OUTPUT) $(@:doc=all) TAG_FOLDERS= . ../lib ../include TAG_FILES= ../../include/uapi/linux/perf_event.h @@ -959,7 +959,7 @@ install-python_ext: # 'make install-doc' should call 'make -C Documentation install' $(INSTALL_DOC_TARGETS): - $(QUIET_SUBDIR0)Documentation $(QUIET_SUBDIR1) $(@:-doc=) + $(Q)$(MAKE) -C $(DOC_DIR) O=$(OUTPUT) $(@:-doc=) ### Cleaning rules @@ -1008,7 +1008,8 @@ clean:: $(LIBTRACEEVENT)-clean $(LIBAPI)-clean $(LIBBPF)-clean $(LIBSUBCMD)-clea $(OUTPUT)$(rename_flags_array) \ $(OUTPUT)$(arch_errno_name_array) \ $(OUTPUT)$(sync_file_range_arrays) - $(QUIET_SUBDIR0)Documentation $(QUIET_SUBDIR1) clean + $(call QUIET_CLEAN, Documentation) \ + $(MAKE) -C $(DOC_DIR) O=$(OUTPUT) clean >/dev/null # # To provide FEATURE-DUMP into $(FEATURE_DUMP_COPY) -- cgit v1.2.3-70-g09d2 From c52db67a74b3a9b834af4588489cfea22ec280a3 Mon Sep 17 00:00:00 2001 From: John Garry <john.garry@huawei.com> Date: Tue, 17 Mar 2020 19:02:13 +0800 Subject: perf jevents: Add some test events Add some test PMU events. The events are randomly chosen from x86 and arm64 JSONs. The events include CPU and uncore events. Signed-off-by: John Garry <john.garry@huawei.com> Acked-by: Jiri Olsa <jolsa@redhat.com> Cc: Alexander Shishkin <alexander.shishkin@linux.intel.com> Cc: Andi Kleen <ak@linux.intel.com> Cc: James Clark <james.clark@arm.com> Cc: Joakim Zhang <qiangqing.zhang@nxp.com> Cc: Mark Rutland <mark.rutland@arm.com> Cc: Namhyung Kim <namhyung@kernel.org> Cc: Peter Zijlstra <peterz@infradead.org> Cc: Will Deacon <will@kernel.org> Cc: linuxarm@huawei.com Link: http://lore.kernel.org/lkml/1584442939-8911-2-git-send-email-john.garry@huawei.com Signed-off-by: Arnaldo Carvalho de Melo <acme@redhat.com> --- .../perf/pmu-events/arch/test/test_cpu/branch.json | 12 ++++++++++ .../perf/pmu-events/arch/test/test_cpu/other.json | 26 ++++++++++++++++++++++ .../perf/pmu-events/arch/test/test_cpu/uncore.json | 21 +++++++++++++++++ 3 files changed, 59 insertions(+) create mode 100644 tools/perf/pmu-events/arch/test/test_cpu/branch.json create mode 100644 tools/perf/pmu-events/arch/test/test_cpu/other.json create mode 100644 tools/perf/pmu-events/arch/test/test_cpu/uncore.json diff --git a/tools/perf/pmu-events/arch/test/test_cpu/branch.json b/tools/perf/pmu-events/arch/test/test_cpu/branch.json new file mode 100644 index 000000000000..93ddfd8053ca --- /dev/null +++ b/tools/perf/pmu-events/arch/test/test_cpu/branch.json @@ -0,0 +1,12 @@ +[ + { + "EventName": "bp_l1_btb_correct", + "EventCode": "0x8a", + "BriefDescription": "L1 BTB Correction." + }, + { + "EventName": "bp_l2_btb_correct", + "EventCode": "0x8b", + "BriefDescription": "L2 BTB Correction." + } +] diff --git a/tools/perf/pmu-events/arch/test/test_cpu/other.json b/tools/perf/pmu-events/arch/test/test_cpu/other.json new file mode 100644 index 000000000000..7d53d7ecd723 --- /dev/null +++ b/tools/perf/pmu-events/arch/test/test_cpu/other.json @@ -0,0 +1,26 @@ +[ + { + "EventCode": "0x6", + "Counter": "0,1", + "UMask": "0x80", + "EventName": "SEGMENT_REG_LOADS.ANY", + "SampleAfterValue": "200000", + "BriefDescription": "Number of segment register loads." + }, + { + "EventCode": "0x9", + "Counter": "0,1", + "UMask": "0x20", + "EventName": "DISPATCH_BLOCKED.ANY", + "SampleAfterValue": "200000", + "BriefDescription": "Memory cluster signals to block micro-op dispatch for any reason" + }, + { + "EventCode": "0x3A", + "Counter": "0,1", + "UMask": "0x0", + "EventName": "EIST_TRANS", + "SampleAfterValue": "200000", + "BriefDescription": "Number of Enhanced Intel SpeedStep(R) Technology (EIST) transitions" + } +] \ No newline at end of file diff --git a/tools/perf/pmu-events/arch/test/test_cpu/uncore.json b/tools/perf/pmu-events/arch/test/test_cpu/uncore.json new file mode 100644 index 000000000000..d0a890cc814d --- /dev/null +++ b/tools/perf/pmu-events/arch/test/test_cpu/uncore.json @@ -0,0 +1,21 @@ +[ + { + "EventCode": "0x02", + "EventName": "uncore_hisi_ddrc.flux_wcmd", + "BriefDescription": "DDRC write commands", + "PublicDescription": "DDRC write commands", + "Unit": "hisi_sccl,ddrc" + }, + { + "Unit": "CBO", + "EventCode": "0x22", + "UMask": "0x81", + "EventName": "UNC_CBO_XSNP_RESPONSE.MISS_EVICTION", + "BriefDescription": "A cross-core snoop resulted from L3 Eviction which misses in some processor core.", + "PublicDescription": "A cross-core snoop resulted from L3 Eviction which misses in some processor core.", + "Counter": "0,1", + "CounterMask": "0", + "Invert": "0", + "EdgeDetect": "0" + } +] -- cgit v1.2.3-70-g09d2 From d84478088780acdecfcf50a0e52b71cc4ab7c520 Mon Sep 17 00:00:00 2001 From: John Garry <john.garry@huawei.com> Date: Tue, 17 Mar 2020 19:02:14 +0800 Subject: perf jevents: Support test events folder With the goal of supporting pmu-events test case, introduce support for a test events folder. These test events can be used for testing generation of pmu-event tables and alias creation for any arch. When running the pmu-events test case, these test events will be used as the platform-agnostic events, so aliases can be created per-PMU and validated against known expected values. To support the test events, add a "testcpu" entry in pmu_events_map[]. The pmu-events test will be able to lookup the events map for "testcpu", to verify the generated tables against expected values. The resultant generated pmu-events.c will now look like the following: struct pmu_event pme_ampere_emag[] = { { .name = "ldrex_spec", .event = "event=0x6c", .desc = "Exclusive operation spe...", .topic = "intrinsic", .long_desc = "Exclusive operation ...", }, ... }; struct pmu_event pme_test_cpu[] = { { .name = "uncore_hisi_ddrc.flux_wcmd", .event = "event=0x2", .desc = "DDRC write commands. Unit: hisi_sccl,ddrc ", .topic = "uncore", .long_desc = "DDRC write commands", .pmu = "hisi_sccl,ddrc", }, { .name = "unc_cbo_xsnp_response.miss_eviction", .event = "umask=0x81,event=0x22", .desc = "Unit: uncore_cbox A cross-core snoop resulted ...", .topic = "uncore", .long_desc = "A cross-core snoop resulted from L3 ...", .pmu = "uncore_cbox", }, { .name = "eist_trans", .event = "umask=0x0,period=200000,event=0x3a", .desc = "Number of Enhanced Intel SpeedStep(R) ...", .topic = "other", }, { .name = 0, }, }; struct pmu_events_map pmu_events_map[] = { ... { .cpuid = "0x00000000500f0000", .version = "v1", .type = "core", .table = pme_ampere_emag }, ... { .cpuid = "testcpu", .version = "v1", .type = "core", .table = pme_test_cpu, }, { .cpuid = 0, .version = 0, .type = 0, .table = 0, }, }; Signed-off-by: John Garry <john.garry@huawei.com> Acked-by: Jiri Olsa <jolsa@redhat.com> Cc: Alexander Shishkin <alexander.shishkin@linux.intel.com> Cc: Andi Kleen <ak@linux.intel.com> Cc: James Clark <james.clark@arm.com> Cc: Joakim Zhang <qiangqing.zhang@nxp.com> Cc: Mark Rutland <mark.rutland@arm.com> Cc: Namhyung Kim <namhyung@kernel.org> Cc: Peter Zijlstra <peterz@infradead.org> Cc: Will Deacon <will@kernel.org> Cc: linuxarm@huawei.com Link: http://lore.kernel.org/lkml/1584442939-8911-3-git-send-email-john.garry@huawei.com Signed-off-by: Arnaldo Carvalho de Melo <acme@redhat.com> --- tools/perf/pmu-events/jevents.c | 30 ++++++++++++++++++++++++++++++ 1 file changed, 30 insertions(+) diff --git a/tools/perf/pmu-events/jevents.c b/tools/perf/pmu-events/jevents.c index 3c4236a5bad8..fa86c5f997cc 100644 --- a/tools/perf/pmu-events/jevents.c +++ b/tools/perf/pmu-events/jevents.c @@ -771,6 +771,19 @@ static void print_mapping_table_suffix(FILE *outfp) fprintf(outfp, "};\n"); } +static void print_mapping_test_table(FILE *outfp) +{ + /* + * Print the terminating, NULL entry. + */ + fprintf(outfp, "{\n"); + fprintf(outfp, "\t.cpuid = \"testcpu\",\n"); + fprintf(outfp, "\t.version = \"v1\",\n"); + fprintf(outfp, "\t.type = \"core\",\n"); + fprintf(outfp, "\t.table = pme_test_cpu,\n"); + fprintf(outfp, "},\n"); +} + static int process_mapfile(FILE *outfp, char *fpath) { int n = 16384; @@ -848,6 +861,7 @@ static int process_mapfile(FILE *outfp, char *fpath) } out: + print_mapping_test_table(outfp); print_mapping_table_suffix(outfp); fclose(mapfp); free(line); @@ -1168,6 +1182,22 @@ int main(int argc, char *argv[]) goto empty_map; } + sprintf(ldirname, "%s/test", start_dirname); + + rc = nftw(ldirname, process_one_file, maxfds, 0); + if (rc && verbose) { + pr_info("%s: Error walking file tree %s rc=%d for test\n", + prog, ldirname, rc); + goto empty_map; + } else if (rc < 0) { + /* Make build fail */ + free_arch_std_events(); + ret = 1; + goto out_free_mapfile; + } else if (rc) { + goto empty_map; + } + if (close_table) print_events_table_suffix(eventsfp); -- cgit v1.2.3-70-g09d2 From e45ad701e784e0eed8a07b537b47afb302c59dab Mon Sep 17 00:00:00 2001 From: John Garry <john.garry@huawei.com> Date: Tue, 17 Mar 2020 19:02:15 +0800 Subject: perf pmu: Refactor pmu_add_cpu_aliases() Create pmu_add_cpu_aliases_map() from pmu_add_cpu_aliases(), so the caller can pass the map; the pmu-events test would use this since there would be no CPUID matching to a mapfile there. Signed-off-by: John Garry <john.garry@huawei.com> Acked-by: Jiri Olsa <jolsa@redhat.com> Cc: Alexander Shishkin <alexander.shishkin@linux.intel.com> Cc: Andi Kleen <ak@linux.intel.com> Cc: James Clark <james.clark@arm.com> Cc: Joakim Zhang <qiangqing.zhang@nxp.com> Cc: Mark Rutland <mark.rutland@arm.com> Cc: Namhyung Kim <namhyung@kernel.org> Cc: Peter Zijlstra <peterz@infradead.org> Cc: Will Deacon <will@kernel.org> Cc: linuxarm@huawei.com Link: http://lore.kernel.org/lkml/1584442939-8911-4-git-send-email-john.garry@huawei.com Signed-off-by: Arnaldo Carvalho de Melo <acme@redhat.com> --- tools/perf/util/pmu.c | 21 +++++++++++++-------- tools/perf/util/pmu.h | 3 +++ 2 files changed, 16 insertions(+), 8 deletions(-) diff --git a/tools/perf/util/pmu.c b/tools/perf/util/pmu.c index 8b99fd312aae..c616a06a34a8 100644 --- a/tools/perf/util/pmu.c +++ b/tools/perf/util/pmu.c @@ -21,7 +21,6 @@ #include "pmu.h" #include "parse-events.h" #include "header.h" -#include "pmu-events/pmu-events.h" #include "string2.h" #include "strbuf.h" #include "fncache.h" @@ -744,16 +743,11 @@ out: * to the current running CPU. Then, add all PMU events from that table * as aliases. */ -static void pmu_add_cpu_aliases(struct list_head *head, struct perf_pmu *pmu) +void pmu_add_cpu_aliases_map(struct list_head *head, struct perf_pmu *pmu, + struct pmu_events_map *map) { int i; - struct pmu_events_map *map; const char *name = pmu->name; - - map = perf_pmu__find_map(pmu); - if (!map) - return; - /* * Found a matching PMU events table. Create aliases */ @@ -788,6 +782,17 @@ new_alias: } } +static void pmu_add_cpu_aliases(struct list_head *head, struct perf_pmu *pmu) +{ + struct pmu_events_map *map; + + map = perf_pmu__find_map(pmu); + if (!map) + return; + + pmu_add_cpu_aliases_map(head, pmu, map); +} + struct perf_event_attr * __weak perf_pmu__get_default_config(struct perf_pmu *pmu __maybe_unused) { diff --git a/tools/perf/util/pmu.h b/tools/perf/util/pmu.h index 6737e3d5d568..0b4a0efae38e 100644 --- a/tools/perf/util/pmu.h +++ b/tools/perf/util/pmu.h @@ -7,6 +7,7 @@ #include <linux/perf_event.h> #include <stdbool.h> #include "parse-events.h" +#include "pmu-events/pmu-events.h" struct perf_evsel_config_term; @@ -97,6 +98,8 @@ int perf_pmu__scan_file(struct perf_pmu *pmu, const char *name, const char *fmt, int perf_pmu__test(void); struct perf_event_attr *perf_pmu__get_default_config(struct perf_pmu *pmu); +void pmu_add_cpu_aliases_map(struct list_head *head, struct perf_pmu *pmu, + struct pmu_events_map *map); struct pmu_events_map *perf_pmu__find_map(struct perf_pmu *pmu); -- cgit v1.2.3-70-g09d2 From a6c925fd3aa2aba043a73b0ad57ffd296e1c42c9 Mon Sep 17 00:00:00 2001 From: John Garry <john.garry@huawei.com> Date: Tue, 17 Mar 2020 19:02:16 +0800 Subject: perf test: Add pmu-events test The initial test will verify that the test tables in generated pmu-events.c match against known, expected values. For known events added in pmu-events/arch/test, we need to add an entry in test_cpu_aliases_events[] or test_uncore_events[]. A sample run is as follows for x86: john@linux-3c19:~/linux> tools/perf/perf test -vv 10 10: PMU event aliases : --- start --- test child forked, pid 5316 testing event table bp_l1_btb_correct: pass testing event table bp_l2_btb_correct: pass testing event table segment_reg_loads.any: pass testing event table dispatch_blocked.any: pass testing event table eist_trans: pass testing event table uncore_hisi_ddrc.flux_wcmd: pass testing event table unc_cbo_xsnp_response.miss_eviction: pass test child finished with 0 ---- end ---- PMU event aliases: Ok Signed-off-by: John Garry <john.garry@huawei.com> Tested-by: Arnaldo Carvalho de Melo <acme@redhat.com> Acked-by: Jiri Olsa <jolsa@redhat.com> Cc: Alexander Shishkin <alexander.shishkin@linux.intel.com> Cc: Andi Kleen <ak@linux.intel.com> Cc: James Clark <james.clark@arm.com> Cc: Joakim Zhang <qiangqing.zhang@nxp.com> Cc: Mark Rutland <mark.rutland@arm.com> Cc: Namhyung Kim <namhyung@kernel.org> Cc: Peter Zijlstra <peterz@infradead.org> Cc: Will Deacon <will@kernel.org> Cc: linuxarm@huawei.com [ Fixup test_cpu_events[] and test_uncore_events[] sentinels to initialize one of its members to NULL, fixing the build in older compilers ] Link: http://lore.kernel.org/lkml/1584442939-8911-5-git-send-email-john.garry@huawei.com Signed-off-by: Arnaldo Carvalho de Melo <acme@redhat.com> --- tools/perf/tests/Build | 1 + tools/perf/tests/builtin-test.c | 4 + tools/perf/tests/pmu-events.c | 233 ++++++++++++++++++++++++++++++++++++++++ tools/perf/tests/tests.h | 1 + 4 files changed, 239 insertions(+) create mode 100644 tools/perf/tests/pmu-events.c diff --git a/tools/perf/tests/Build b/tools/perf/tests/Build index 1692529639b0..b3d1bf13ca07 100644 --- a/tools/perf/tests/Build +++ b/tools/perf/tests/Build @@ -14,6 +14,7 @@ perf-y += evsel-roundtrip-name.o perf-y += evsel-tp-sched.o perf-y += fdarray.o perf-y += pmu.o +perf-y += pmu-events.o perf-y += hists_common.o perf-y += hists_link.o perf-y += hists_filter.o diff --git a/tools/perf/tests/builtin-test.c b/tools/perf/tests/builtin-test.c index 54d9516c9839..b6322eb0f423 100644 --- a/tools/perf/tests/builtin-test.c +++ b/tools/perf/tests/builtin-test.c @@ -72,6 +72,10 @@ static struct test generic_tests[] = { .desc = "Parse perf pmu format", .func = test__pmu, }, + { + .desc = "PMU events", + .func = test__pmu_events, + }, { .desc = "DSO data read", .func = test__dso_data, diff --git a/tools/perf/tests/pmu-events.c b/tools/perf/tests/pmu-events.c new file mode 100644 index 000000000000..12fc29746a09 --- /dev/null +++ b/tools/perf/tests/pmu-events.c @@ -0,0 +1,233 @@ +// SPDX-License-Identifier: GPL-2.0 +#include "parse-events.h" +#include "pmu.h" +#include "tests.h" +#include <errno.h> +#include <stdio.h> +#include <linux/kernel.h> + +#include "debug.h" +#include "../pmu-events/pmu-events.h" + +struct perf_pmu_test_event { + struct pmu_event event; +}; +static struct perf_pmu_test_event test_cpu_events[] = { + { + .event = { + .name = "bp_l1_btb_correct", + .event = "event=0x8a", + .desc = "L1 BTB Correction", + .topic = "branch", + }, + }, + { + .event = { + .name = "bp_l2_btb_correct", + .event = "event=0x8b", + .desc = "L2 BTB Correction", + .topic = "branch", + }, + }, + { + .event = { + .name = "segment_reg_loads.any", + .event = "umask=0x80,period=200000,event=0x6", + .desc = "Number of segment register loads", + .topic = "other", + }, + }, + { + .event = { + .name = "dispatch_blocked.any", + .event = "umask=0x20,period=200000,event=0x9", + .desc = "Memory cluster signals to block micro-op dispatch for any reason", + .topic = "other", + }, + }, + { + .event = { + .name = "eist_trans", + .event = "umask=0x0,period=200000,event=0x3a", + .desc = "Number of Enhanced Intel SpeedStep(R) Technology (EIST) transitions", + .topic = "other", + }, + }, + { /* sentinel */ + .event = { + .name = NULL, + }, + }, +}; + +static struct perf_pmu_test_event test_uncore_events[] = { + { + .event = { + .name = "uncore_hisi_ddrc.flux_wcmd", + .event = "event=0x2", + .desc = "DDRC write commands. Unit: hisi_sccl,ddrc ", + .topic = "uncore", + .long_desc = "DDRC write commands", + .pmu = "hisi_sccl,ddrc", + }, + }, + { + .event = { + .name = "unc_cbo_xsnp_response.miss_eviction", + .event = "umask=0x81,event=0x22", + .desc = "Unit: uncore_cbox A cross-core snoop resulted from L3 Eviction which misses in some processor core", + .topic = "uncore", + .long_desc = "A cross-core snoop resulted from L3 Eviction which misses in some processor core", + .pmu = "uncore_cbox", + }, + }, + { /* sentinel */ + .event = { + .name = NULL, + }, + } +}; + +const int total_test_events_size = ARRAY_SIZE(test_uncore_events); + +static bool is_same(const char *reference, const char *test) +{ + if (!reference && !test) + return true; + + if (reference && !test) + return false; + + if (!reference && test) + return false; + + return !strcmp(reference, test); +} + +static struct pmu_events_map *__test_pmu_get_events_map(void) +{ + struct pmu_events_map *map; + + for (map = &pmu_events_map[0]; map->cpuid; map++) { + if (!strcmp(map->cpuid, "testcpu")) + return map; + } + + pr_err("could not find test events map\n"); + + return NULL; +} + +/* Verify generated events from pmu-events.c is as expected */ +static int __test_pmu_event_table(void) +{ + struct pmu_events_map *map = __test_pmu_get_events_map(); + struct pmu_event *table; + int map_events = 0, expected_events; + + /* ignore 2x sentinels */ + expected_events = ARRAY_SIZE(test_cpu_events) + + ARRAY_SIZE(test_uncore_events) - 2; + + if (!map) + return -1; + + for (table = map->table; table->name; table++) { + struct perf_pmu_test_event *test; + struct pmu_event *te; + bool found = false; + + if (table->pmu) + test = &test_uncore_events[0]; + else + test = &test_cpu_events[0]; + + te = &test->event; + + for (; te->name; test++, te = &test->event) { + if (strcmp(table->name, te->name)) + continue; + found = true; + map_events++; + + if (!is_same(table->desc, te->desc)) { + pr_debug2("testing event table %s: mismatched desc, %s vs %s\n", + table->name, table->desc, te->desc); + return -1; + } + + if (!is_same(table->topic, te->topic)) { + pr_debug2("testing event table %s: mismatched topic, %s vs %s\n", + table->name, table->topic, + te->topic); + return -1; + } + + if (!is_same(table->long_desc, te->long_desc)) { + pr_debug2("testing event table %s: mismatched long_desc, %s vs %s\n", + table->name, table->long_desc, + te->long_desc); + return -1; + } + + if (!is_same(table->unit, te->unit)) { + pr_debug2("testing event table %s: mismatched unit, %s vs %s\n", + table->name, table->unit, + te->unit); + return -1; + } + + if (!is_same(table->perpkg, te->perpkg)) { + pr_debug2("testing event table %s: mismatched perpkg, %s vs %s\n", + table->name, table->perpkg, + te->perpkg); + return -1; + } + + if (!is_same(table->metric_expr, te->metric_expr)) { + pr_debug2("testing event table %s: mismatched metric_expr, %s vs %s\n", + table->name, table->metric_expr, + te->metric_expr); + return -1; + } + + if (!is_same(table->metric_name, te->metric_name)) { + pr_debug2("testing event table %s: mismatched metric_name, %s vs %s\n", + table->name, table->metric_name, + te->metric_name); + return -1; + } + + if (!is_same(table->deprecated, te->deprecated)) { + pr_debug2("testing event table %s: mismatched deprecated, %s vs %s\n", + table->name, table->deprecated, + te->deprecated); + return -1; + } + + pr_debug("testing event table %s: pass\n", table->name); + } + + if (!found) { + pr_err("testing event table: could not find event %s\n", + table->name); + return -1; + } + } + + if (map_events != expected_events) { + pr_err("testing event table: found %d, but expected %d\n", + map_events, expected_events); + return -1; + } + + return 0; +} +int test__pmu_events(struct test *test __maybe_unused, + int subtest __maybe_unused) +{ + if (__test_pmu_event_table()) + return -1; + + return 0; +} diff --git a/tools/perf/tests/tests.h b/tools/perf/tests/tests.h index 9a160fef47c9..61a1ab032080 100644 --- a/tools/perf/tests/tests.h +++ b/tools/perf/tests/tests.h @@ -49,6 +49,7 @@ int test__perf_evsel__roundtrip_name_test(struct test *test, int subtest); int test__perf_evsel__tp_sched_test(struct test *test, int subtest); int test__syscall_openat_tp_fields(struct test *test, int subtest); int test__pmu(struct test *test, int subtest); +int test__pmu_events(struct test *test, int subtest); int test__attr(struct test *test, int subtest); int test__dso_data(struct test *test, int subtest); int test__dso_data_cache(struct test *test, int subtest); -- cgit v1.2.3-70-g09d2 From d504fae93dd61b734aefc403c7653d958aef655a Mon Sep 17 00:00:00 2001 From: John Garry <john.garry@huawei.com> Date: Tue, 17 Mar 2020 19:02:17 +0800 Subject: perf pmu: Add is_pmu_core() Add a function to decide whether a PMU is a core PMU. Signed-off-by: John Garry <john.garry@huawei.com> Acked-by: Jiri Olsa <jolsa@redhat.com> Cc: Alexander Shishkin <alexander.shishkin@linux.intel.com> Cc: Andi Kleen <ak@linux.intel.com> Cc: James Clark <james.clark@arm.com> Cc: Joakim Zhang <qiangqing.zhang@nxp.com> Cc: Mark Rutland <mark.rutland@arm.com> Cc: Namhyung Kim <namhyung@kernel.org> Cc: Peter Zijlstra <peterz@infradead.org> Cc: Will Deacon <will@kernel.org> Cc: linuxarm@huawei.com Link: http://lore.kernel.org/lkml/1584442939-8911-6-git-send-email-john.garry@huawei.com Signed-off-by: Arnaldo Carvalho de Melo <acme@redhat.com> --- tools/perf/util/pmu.c | 5 +++++ tools/perf/util/pmu.h | 1 + 2 files changed, 6 insertions(+) diff --git a/tools/perf/util/pmu.c b/tools/perf/util/pmu.c index c616a06a34a8..55129d09f19d 100644 --- a/tools/perf/util/pmu.c +++ b/tools/perf/util/pmu.c @@ -1400,6 +1400,11 @@ static void wordwrap(char *s, int start, int max, int corr) } } +bool is_pmu_core(const char *name) +{ + return !strcmp(name, "cpu") || is_arm_pmu_core(name); +} + void print_pmu_events(const char *event_glob, bool name_only, bool quiet_flag, bool long_desc, bool details_flag, bool deprecated) { diff --git a/tools/perf/util/pmu.h b/tools/perf/util/pmu.h index 0b4a0efae38e..b756946ae78d 100644 --- a/tools/perf/util/pmu.h +++ b/tools/perf/util/pmu.h @@ -88,6 +88,7 @@ int perf_pmu__format_parse(char *dir, struct list_head *head); struct perf_pmu *perf_pmu__scan(struct perf_pmu *pmu); +bool is_pmu_core(const char *name); void print_pmu_events(const char *event_glob, bool name_only, bool quiet, bool long_desc, bool details_flag, bool deprecated); -- cgit v1.2.3-70-g09d2 From 5b9a50001b2c23f90f2a21db4763f3a9599588c4 Mon Sep 17 00:00:00 2001 From: John Garry <john.garry@huawei.com> Date: Tue, 17 Mar 2020 19:02:18 +0800 Subject: perf pmu: Make pmu_uncore_alias_match() public The perf pmu-events test will want to use pmu_uncore_alias_match(), so make it public. Signed-off-by: John Garry <john.garry@huawei.com> Acked-by: Jiri Olsa <jolsa@redhat.com> Cc: Alexander Shishkin <alexander.shishkin@linux.intel.com> Cc: Andi Kleen <ak@linux.intel.com> Cc: James Clark <james.clark@arm.com> Cc: Joakim Zhang <qiangqing.zhang@nxp.com> Cc: Mark Rutland <mark.rutland@arm.com> Cc: Namhyung Kim <namhyung@kernel.org> Cc: Peter Zijlstra <peterz@infradead.org> Cc: Will Deacon <will@kernel.org> Cc: linuxarm@huawei.com Link: http://lore.kernel.org/lkml/1584442939-8911-7-git-send-email-john.garry@huawei.com Signed-off-by: Arnaldo Carvalho de Melo <acme@redhat.com> --- tools/perf/util/pmu.c | 2 +- tools/perf/util/pmu.h | 1 + 2 files changed, 2 insertions(+), 1 deletion(-) diff --git a/tools/perf/util/pmu.c b/tools/perf/util/pmu.c index 55129d09f19d..616fbda7c3fc 100644 --- a/tools/perf/util/pmu.c +++ b/tools/perf/util/pmu.c @@ -698,7 +698,7 @@ struct pmu_events_map *perf_pmu__find_map(struct perf_pmu *pmu) return map; } -static bool pmu_uncore_alias_match(const char *pmu_name, const char *name) +bool pmu_uncore_alias_match(const char *pmu_name, const char *name) { char *tmp = NULL, *tok, *str; bool res; diff --git a/tools/perf/util/pmu.h b/tools/perf/util/pmu.h index b756946ae78d..5fb3f16828df 100644 --- a/tools/perf/util/pmu.h +++ b/tools/perf/util/pmu.h @@ -103,6 +103,7 @@ void pmu_add_cpu_aliases_map(struct list_head *head, struct perf_pmu *pmu, struct pmu_events_map *map); struct pmu_events_map *perf_pmu__find_map(struct perf_pmu *pmu); +bool pmu_uncore_alias_match(const char *pmu_name, const char *name); int perf_pmu__convert_scale(const char *scale, char **end, double *sval); -- cgit v1.2.3-70-g09d2 From 956a78356c24c1ac3becf854135dc065b6b74265 Mon Sep 17 00:00:00 2001 From: John Garry <john.garry@huawei.com> Date: Tue, 17 Mar 2020 19:02:19 +0800 Subject: perf test: Test pmu-events aliases Add creating event aliases to the pmu-events test. So currently we verify that the generated pmu-events.c is as expected for some test events. Now test that we generate aliases as expected for those events during normal operation. For that, we cycle through each HW PMU in the system, and use the test events to create aliases, and verify those against known, expected values. For core PMUs, we should create an alias for every event in test_cpu_events[]. However, for uncore PMUs, they need to be matched by the pmu_event.pmu member, so use test_uncore_events[]; so check the match beforehand with pmu_uncore_alias_match(). A sample run is as follows for my x86 machine: john@linux-3c19:~/linux> tools/perf/perf test -vv 10 10: PMU events : --- start --- ... testing PMU uncore_arb aliases: no events to match testing PMU cstate_pkg aliases: no events to match skipping testing PMU breakpoint testing aliases PMU uncore_cbox_1: matched event unc_cbo_xsnp_response.miss_eviction testing PMU uncore_cbox_1 aliases: pass testing PMU power aliases: no events to match testing aliases PMU cpu: matched event bp_l1_btb_correct testing aliases PMU cpu: matched event bp_l2_btb_correct testing aliases PMU cpu: matched event segment_reg_loads.any testing aliases PMU cpu: matched event dispatch_blocked.any testing aliases PMU cpu: matched event eist_trans testing PMU cpu aliases: pass testing PMU intel_pt aliases: no events to match skipping testing PMU software skipping testing PMU intel_bts testing PMU uncore_imc aliases: no events to match testing aliases PMU uncore_cbox_0: matched event unc_cbo_xsnp_response.miss_eviction testing PMU uncore_cbox_0 aliases: pass testing PMU cstate_core aliases: no events to match skipping testing PMU tracepoint testing PMU msr aliases: no events to match test child finished with 0 Signed-off-by: John Garry <john.garry@huawei.com> Acked-by: Jiri Olsa <jolsa@redhat.com> Tested-by: Arnaldo Carvalho de Melo <acme@redhat.com> Cc: Alexander Shishkin <alexander.shishkin@linux.intel.com> Cc: Andi Kleen <ak@linux.intel.com> Cc: James Clark <james.clark@arm.com> Cc: Joakim Zhang <qiangqing.zhang@nxp.com> Cc: Mark Rutland <mark.rutland@arm.com> Cc: Namhyung Kim <namhyung@kernel.org> Cc: Peter Zijlstra <peterz@infradead.org> Cc: Will Deacon <will@kernel.org> Cc: linuxarm@huawei.com Link: http://lore.kernel.org/lkml/1584442939-8911-8-git-send-email-john.garry@huawei.com Signed-off-by: Arnaldo Carvalho de Melo <acme@redhat.com> --- tools/perf/tests/pmu-events.c | 148 +++++++++++++++++++++++++++++++++++++++++- 1 file changed, 147 insertions(+), 1 deletion(-) diff --git a/tools/perf/tests/pmu-events.c b/tools/perf/tests/pmu-events.c index 12fc29746a09..d64261da8bf7 100644 --- a/tools/perf/tests/pmu-events.c +++ b/tools/perf/tests/pmu-events.c @@ -5,13 +5,24 @@ #include <errno.h> #include <stdio.h> #include <linux/kernel.h> - +#include <linux/zalloc.h> #include "debug.h" #include "../pmu-events/pmu-events.h" struct perf_pmu_test_event { struct pmu_event event; + + /* extra events for aliases */ + const char *alias_str; + + /* + * Note: For when PublicDescription does not exist in the JSON, we + * will have no long_desc in pmu_event.long_desc, but long_desc may + * be set in the alias. + */ + const char *alias_long_desc; }; + static struct perf_pmu_test_event test_cpu_events[] = { { .event = { @@ -20,6 +31,8 @@ static struct perf_pmu_test_event test_cpu_events[] = { .desc = "L1 BTB Correction", .topic = "branch", }, + .alias_str = "event=0x8a", + .alias_long_desc = "L1 BTB Correction", }, { .event = { @@ -28,6 +41,8 @@ static struct perf_pmu_test_event test_cpu_events[] = { .desc = "L2 BTB Correction", .topic = "branch", }, + .alias_str = "event=0x8b", + .alias_long_desc = "L2 BTB Correction", }, { .event = { @@ -36,6 +51,8 @@ static struct perf_pmu_test_event test_cpu_events[] = { .desc = "Number of segment register loads", .topic = "other", }, + .alias_str = "umask=0x80,(null)=0x30d40,event=0x6", + .alias_long_desc = "Number of segment register loads", }, { .event = { @@ -44,6 +61,8 @@ static struct perf_pmu_test_event test_cpu_events[] = { .desc = "Memory cluster signals to block micro-op dispatch for any reason", .topic = "other", }, + .alias_str = "umask=0x20,(null)=0x30d40,event=0x9", + .alias_long_desc = "Memory cluster signals to block micro-op dispatch for any reason", }, { .event = { @@ -52,6 +71,8 @@ static struct perf_pmu_test_event test_cpu_events[] = { .desc = "Number of Enhanced Intel SpeedStep(R) Technology (EIST) transitions", .topic = "other", }, + .alias_str = "umask=0,(null)=0x30d40,event=0x3a", + .alias_long_desc = "Number of Enhanced Intel SpeedStep(R) Technology (EIST) transitions", }, { /* sentinel */ .event = { @@ -70,6 +91,8 @@ static struct perf_pmu_test_event test_uncore_events[] = { .long_desc = "DDRC write commands", .pmu = "hisi_sccl,ddrc", }, + .alias_str = "event=0x2", + .alias_long_desc = "DDRC write commands", }, { .event = { @@ -80,6 +103,8 @@ static struct perf_pmu_test_event test_uncore_events[] = { .long_desc = "A cross-core snoop resulted from L3 Eviction which misses in some processor core", .pmu = "uncore_cbox", }, + .alias_str = "umask=0x81,event=0x22", + .alias_long_desc = "A cross-core snoop resulted from L3 Eviction which misses in some processor core", }, { /* sentinel */ .event = { @@ -223,11 +248,132 @@ static int __test_pmu_event_table(void) return 0; } + +static struct perf_pmu_alias *find_alias(const char *test_event, struct list_head *aliases) +{ + struct perf_pmu_alias *alias; + + list_for_each_entry(alias, aliases, list) + if (!strcmp(test_event, alias->name)) + return alias; + + return NULL; +} + +/* Verify aliases are as expected */ +static int __test__pmu_event_aliases(char *pmu_name, int *count) +{ + struct perf_pmu_test_event *test; + struct pmu_event *te; + struct perf_pmu *pmu; + LIST_HEAD(aliases); + int res = 0; + bool use_uncore_table; + struct pmu_events_map *map = __test_pmu_get_events_map(); + + if (!map) + return -1; + + if (is_pmu_core(pmu_name)) { + test = &test_cpu_events[0]; + use_uncore_table = false; + } else { + test = &test_uncore_events[0]; + use_uncore_table = true; + } + + pmu = zalloc(sizeof(*pmu)); + if (!pmu) + return -1; + + pmu->name = pmu_name; + + pmu_add_cpu_aliases_map(&aliases, pmu, map); + + for (te = &test->event; te->name; test++, te = &test->event) { + struct perf_pmu_alias *alias = find_alias(te->name, &aliases); + + if (!alias) { + bool uncore_match = pmu_uncore_alias_match(pmu_name, + te->pmu); + + if (use_uncore_table && !uncore_match) { + pr_debug3("testing aliases PMU %s: skip matching alias %s\n", + pmu_name, te->name); + continue; + } + + pr_debug2("testing aliases PMU %s: no alias, alias_table->name=%s\n", + pmu_name, te->name); + res = -1; + break; + } + + if (!is_same(alias->desc, te->desc)) { + pr_debug2("testing aliases PMU %s: mismatched desc, %s vs %s\n", + pmu_name, alias->desc, te->desc); + res = -1; + break; + } + + if (!is_same(alias->long_desc, test->alias_long_desc)) { + pr_debug2("testing aliases PMU %s: mismatched long_desc, %s vs %s\n", + pmu_name, alias->long_desc, + test->alias_long_desc); + res = -1; + break; + } + + if (!is_same(alias->str, test->alias_str)) { + pr_debug2("testing aliases PMU %s: mismatched str, %s vs %s\n", + pmu_name, alias->str, test->alias_str); + res = -1; + break; + } + + if (!is_same(alias->topic, te->topic)) { + pr_debug2("testing aliases PMU %s: mismatched topic, %s vs %s\n", + pmu_name, alias->topic, te->topic); + res = -1; + break; + } + + (*count)++; + pr_debug2("testing aliases PMU %s: matched event %s\n", + pmu_name, alias->name); + } + + free(pmu); + return res; +} + int test__pmu_events(struct test *test __maybe_unused, int subtest __maybe_unused) { + struct perf_pmu *pmu = NULL; + if (__test_pmu_event_table()) return -1; + while ((pmu = perf_pmu__scan(pmu)) != NULL) { + int count = 0; + + if (list_empty(&pmu->format)) { + pr_debug2("skipping testing PMU %s\n", pmu->name); + continue; + } + + if (__test__pmu_event_aliases(pmu->name, &count)) { + pr_debug("testing PMU %s aliases: failed\n", pmu->name); + return -1; + } + + if (count == 0) + pr_debug3("testing PMU %s aliases: no events to match\n", + pmu->name); + else + pr_debug("testing PMU %s aliases: pass\n", pmu->name); + } + return 0; } -- cgit v1.2.3-70-g09d2 From d74b181a028bb5a468f0c609553eff6a8fdf4887 Mon Sep 17 00:00:00 2001 From: Christophe JAILLET <christophe.jaillet@wanadoo.fr> Date: Tue, 24 Mar 2020 08:03:19 +0100 Subject: perf cpumap: Fix snprintf overflow check 'snprintf' returns the number of characters which would be generated for the given input. If the returned value is *greater than* or equal to the buffer size, it means that the output has been truncated. Fix the overflow test accordingly. Fixes: 7780c25bae59f ("perf tools: Allow ability to map cpus to nodes easily") Fixes: 92a7e1278005b ("perf cpumap: Add cpu__max_present_cpu()") Signed-off-by: Christophe JAILLET <christophe.jaillet@wanadoo.fr> Suggested-by: David Laight <David.Laight@ACULAB.COM> Cc: Alexander Shishkin <alexander.shishkin@linux.intel.com> Cc: Don Zickus <dzickus@redhat.com> Cc: He Zhe <zhe.he@windriver.com> Cc: Jan Stancek <jstancek@redhat.com> Cc: Jiri Olsa <jolsa@redhat.com> Cc: Kan Liang <kan.liang@linux.intel.com> Cc: Mark Rutland <mark.rutland@arm.com> Cc: Namhyung Kim <namhyung@kernel.org> Cc: Peter Zijlstra <peterz@infradead.org> Cc: kernel-janitors@vger.kernel.org Link: http://lore.kernel.org/lkml/20200324070319.10901-1-christophe.jaillet@wanadoo.fr Signed-off-by: Arnaldo Carvalho de Melo <acme@redhat.com> --- tools/perf/util/cpumap.c | 10 +++++----- 1 file changed, 5 insertions(+), 5 deletions(-) diff --git a/tools/perf/util/cpumap.c b/tools/perf/util/cpumap.c index 983b7388f22b..dc5c5e6fc502 100644 --- a/tools/perf/util/cpumap.c +++ b/tools/perf/util/cpumap.c @@ -317,7 +317,7 @@ static void set_max_cpu_num(void) /* get the highest possible cpu number for a sparse allocation */ ret = snprintf(path, PATH_MAX, "%s/devices/system/cpu/possible", mnt); - if (ret == PATH_MAX) { + if (ret >= PATH_MAX) { pr_err("sysfs path crossed PATH_MAX(%d) size\n", PATH_MAX); goto out; } @@ -328,7 +328,7 @@ static void set_max_cpu_num(void) /* get the highest present cpu number for a sparse allocation */ ret = snprintf(path, PATH_MAX, "%s/devices/system/cpu/present", mnt); - if (ret == PATH_MAX) { + if (ret >= PATH_MAX) { pr_err("sysfs path crossed PATH_MAX(%d) size\n", PATH_MAX); goto out; } @@ -356,7 +356,7 @@ static void set_max_node_num(void) /* get the highest possible cpu number for a sparse allocation */ ret = snprintf(path, PATH_MAX, "%s/devices/system/node/possible", mnt); - if (ret == PATH_MAX) { + if (ret >= PATH_MAX) { pr_err("sysfs path crossed PATH_MAX(%d) size\n", PATH_MAX); goto out; } @@ -441,7 +441,7 @@ int cpu__setup_cpunode_map(void) return 0; n = snprintf(path, PATH_MAX, "%s/devices/system/node", mnt); - if (n == PATH_MAX) { + if (n >= PATH_MAX) { pr_err("sysfs path crossed PATH_MAX(%d) size\n", PATH_MAX); return -1; } @@ -456,7 +456,7 @@ int cpu__setup_cpunode_map(void) continue; n = snprintf(buf, PATH_MAX, "%s/%s", path, dent1->d_name); - if (n == PATH_MAX) { + if (n >= PATH_MAX) { pr_err("sysfs path crossed PATH_MAX(%d) size\n", PATH_MAX); continue; } -- cgit v1.2.3-70-g09d2 From 0d33b34352531ff7029c58eda2321340c0ea3f5f Mon Sep 17 00:00:00 2001 From: Ravi Bangoria <ravi.bangoria@linux.ibm.com> Date: Tue, 24 Mar 2020 09:54:24 +0530 Subject: perf dso: Fix dso comparison Perf gets dso details from two different sources. 1st, from builid headers in perf.data and 2nd from MMAP2 samples. Dso from buildid header does not have dso_id detail. And dso from MMAP2 samples does not have buildid information. If detail of the same dso is present at both the places, filename is common. Previously, __dsos__findnew_link_by_longname_id() used to compare only long or short names, but Commit 0e3149f86b99 ("perf dso: Move dso_id from 'struct map' to 'struct dso'") also added a dso_id comparison. Because of that, now perf is creating two different dso objects of the same file, one from buildid header (with dso_id but without buildid) and second from MMAP2 sample (with buildid but without dso_id). This is causing issues with archive, buildid-list etc subcommands. Fix this by comparing dso_id only when it's present. And incase dso is present in 'dsos' list without dso_id, inject dso_id detail as well. Before: $ sudo ./perf buildid-list -H 0000000000000000000000000000000000000000 /usr/bin/ls 0000000000000000000000000000000000000000 /usr/lib64/ld-2.30.so 0000000000000000000000000000000000000000 /usr/lib64/libc-2.30.so $ ./perf archive perf archive: no build-ids found After: $ ./perf buildid-list -H b6b1291d0cead046ed0fa5734037fa87a579adee /usr/bin/ls 641f0c90cfa15779352f12c0ec3c7a2b2b6f41e8 /usr/lib64/ld-2.30.so 675ace3ca07a0b863df01f461a7b0984c65c8b37 /usr/lib64/libc-2.30.so $ ./perf archive Now please run: $ tar xvf perf.data.tar.bz2 -C ~/.debug wherever you need to run 'perf report' on. Committer notes: Renamed is_empty_dso_id() to dso_id__empty() and inject_dso_id() to dso__inject_id() to keep namespacing consistent. Fixes: 0e3149f86b99 ("perf dso: Move dso_id from 'struct map' to 'struct dso'") Reported-by: Naveen N. Rao <naveen.n.rao@linux.vnet.ibm.com> Signed-off-by: Ravi Bangoria <ravi.bangoria@linux.ibm.com> Tested-by: Arnaldo Carvalho de Melo <acme@redhat.com> Tested-by: Naveen N. Rao <naveen.n.rao@linux.vnet.ibm.com> Cc: Jiri Olsa <jolsa@redhat.com> Cc: Mark Rutland <mark.rutland@arm.com> Cc: Namhyung Kim <namhyung@kernel.org> Link: http://lore.kernel.org/lkml/20200324042424.68366-1-ravi.bangoria@linux.ibm.com Signed-off-by: Arnaldo Carvalho de Melo <acme@redhat.com> --- tools/perf/util/dsos.c | 22 +++++++++++++++++++++- 1 file changed, 21 insertions(+), 1 deletion(-) diff --git a/tools/perf/util/dsos.c b/tools/perf/util/dsos.c index 591707c69c39..939471731ea6 100644 --- a/tools/perf/util/dsos.c +++ b/tools/perf/util/dsos.c @@ -26,13 +26,29 @@ static int __dso_id__cmp(struct dso_id *a, struct dso_id *b) return 0; } +static bool dso_id__empty(struct dso_id *id) +{ + if (!id) + return true; + + return !id->maj && !id->min && !id->ino && !id->ino_generation; +} + +static void dso__inject_id(struct dso *dso, struct dso_id *id) +{ + dso->id.maj = id->maj; + dso->id.min = id->min; + dso->id.ino = id->ino; + dso->id.ino_generation = id->ino_generation; +} + static int dso_id__cmp(struct dso_id *a, struct dso_id *b) { /* * The second is always dso->id, so zeroes if not set, assume passing * NULL for a means a zeroed id */ - if (a == NULL) + if (dso_id__empty(a) || dso_id__empty(b)) return 0; return __dso_id__cmp(a, b); @@ -249,6 +265,10 @@ struct dso *__dsos__addnew(struct dsos *dsos, const char *name) static struct dso *__dsos__findnew_id(struct dsos *dsos, const char *name, struct dso_id *id) { struct dso *dso = __dsos__find_id(dsos, name, id, false); + + if (dso && dso_id__empty(&dso->id) && !dso_id__empty(id)) + dso__inject_id(dso, id); + return dso ? dso : __dsos__addnew_id(dsos, name, id); } -- cgit v1.2.3-70-g09d2 From eadcaa3dfd706bbf46682c8b8b5979262443c3c3 Mon Sep 17 00:00:00 2001 From: Tony Jones <tonyj@suse.de> Date: Wed, 25 Mar 2020 09:40:53 -0700 Subject: perf callchain: Update docs regarding kernel/user space unwinding The method of unwinding for kernel space is defined by the kernel config, not by the value of --call-graph. Improve the documentation to reflect this. Signed-off-by: Tony Jones <tonyj@suse.de> Link: http://lore.kernel.org/lkml/20200325164053.10177-1-tonyj@suse.de Signed-off-by: Arnaldo Carvalho de Melo <acme@redhat.com> --- tools/perf/Documentation/perf-config.txt | 14 ++++++++------ tools/perf/Documentation/perf-record.txt | 18 ++++++++++++------ 2 files changed, 20 insertions(+), 12 deletions(-) diff --git a/tools/perf/Documentation/perf-config.txt b/tools/perf/Documentation/perf-config.txt index 8ead55593984..f16d8a71d3f5 100644 --- a/tools/perf/Documentation/perf-config.txt +++ b/tools/perf/Documentation/perf-config.txt @@ -405,14 +405,16 @@ ui.*:: This option is only applied to TUI. call-graph.*:: - When sub-commands 'top' and 'report' work with -g/—-children - there're options in control of call-graph. + The following controls the handling of call-graphs (obtained via the + -g/--call-graph options). call-graph.record-mode:: - The record-mode can be 'fp' (frame pointer), 'dwarf' and 'lbr'. - The value of 'dwarf' is effective only if perf detect needed library - (libunwind or a recent version of libdw). - 'lbr' only work for cpus that support it. + The mode for user space can be 'fp' (frame pointer), 'dwarf' + and 'lbr'. The value 'dwarf' is effective only if libunwind + (or a recent version of libdw) is present on the system; + the value 'lbr' only works for certain cpus. The method for + kernel space is controlled not by this option but by the + kernel config (CONFIG_UNWINDER_*). call-graph.dump-size:: The size of stack to dump in order to do post-unwinding. Default is 8192 (byte). diff --git a/tools/perf/Documentation/perf-record.txt b/tools/perf/Documentation/perf-record.txt index 7f4db7592467..b25e028458e2 100644 --- a/tools/perf/Documentation/perf-record.txt +++ b/tools/perf/Documentation/perf-record.txt @@ -237,16 +237,22 @@ OPTIONS option and remains only for backward compatibility. See --event. -g:: - Enables call-graph (stack chain/backtrace) recording. + Enables call-graph (stack chain/backtrace) recording for both + kernel space and user space. --call-graph:: Setup and enable call-graph (stack chain/backtrace) recording, - implies -g. Default is "fp". + implies -g. Default is "fp" (for user space). - Allows specifying "fp" (frame pointer) or "dwarf" - (DWARF's CFI - Call Frame Information) or "lbr" - (Hardware Last Branch Record facility) as the method to collect - the information used to show the call graphs. + The unwinding method used for kernel space is dependent on the + unwinder used by the active kernel configuration, i.e + CONFIG_UNWINDER_FRAME_POINTER (fp) or CONFIG_UNWINDER_ORC (orc) + + Any option specified here controls the method used for user space. + + Valid options are "fp" (frame pointer), "dwarf" (DWARF's CFI - + Call Frame Information) or "lbr" (Hardware Last Branch Record + facility). In some systems, where binaries are build with gcc --fomit-frame-pointer, using the "fp" method will produce bogus -- cgit v1.2.3-70-g09d2 From 2a3d252dffe14582f238e21b09923e3772263123 Mon Sep 17 00:00:00 2001 From: Ian Rogers <irogers@google.com> Date: Wed, 25 Mar 2020 09:40:22 -0700 Subject: perf parse-events: Add defensive NULL check Terms may have a NULL config in which case a strcmp will SEGV. This can be reproduced with: perf stat -e '*/event=?,nr/' sleep 1 Add a NULL check to avoid this. This was caught by LLVM's libfuzzer. Signed-off-by: Ian Rogers <irogers@google.com> Cc: Alexander Shishkin <alexander.shishkin@linux.intel.com> Cc: Andi Kleen <ak@linux.intel.com> Cc: Jin Yao <yao.jin@linux.intel.com> Cc: Jiri Olsa <jolsa@redhat.com> Cc: John Garry <john.garry@huawei.com> Cc: Kan Liang <kan.liang@linux.intel.com> Cc: Mark Rutland <mark.rutland@arm.com> Cc: Namhyung Kim <namhyung@kernel.org> Cc: Peter Zijlstra <peterz@infradead.org> Cc: Stephane Eranian <eranian@google.com> Cc: clang-built-linux@googlegroups.com Link: http://lore.kernel.org/lkml/20200325164022.41385-1-irogers@google.com Signed-off-by: Arnaldo Carvalho de Melo <acme@redhat.com> --- tools/perf/util/pmu.c | 11 +++++------ 1 file changed, 5 insertions(+), 6 deletions(-) diff --git a/tools/perf/util/pmu.c b/tools/perf/util/pmu.c index 616fbda7c3fc..ef6a63f3d386 100644 --- a/tools/perf/util/pmu.c +++ b/tools/perf/util/pmu.c @@ -984,12 +984,11 @@ static int pmu_resolve_param_term(struct parse_events_term *term, struct parse_events_term *t; list_for_each_entry(t, head_terms, list) { - if (t->type_val == PARSE_EVENTS__TERM_TYPE_NUM) { - if (!strcmp(t->config, term->config)) { - t->used = true; - *value = t->val.num; - return 0; - } + if (t->type_val == PARSE_EVENTS__TERM_TYPE_NUM && + t->config && !strcmp(t->config, term->config)) { + t->used = true; + *value = t->val.num; + return 0; } } -- cgit v1.2.3-70-g09d2 From e4ffd066ff440a57097e9140fa9e16ceef905de8 Mon Sep 17 00:00:00 2001 From: He Zhe <zhe.he@windriver.com> Date: Fri, 14 Feb 2020 02:21:06 +0800 Subject: perf: Normalize gcc parameter when generating arch errno table The $(CC) passed to arch_errno_names.sh may include a series of parameters along with gcc itself. To avoid overwriting the following parameters of arch_errno_names.sh and break the build like below, we just pick up the first word of the $(CC). find: unknown predicate `-m64/arch' x86_64-wrs-linux-gcc: warning: '-x c' after last input file has no effect x86_64-wrs-linux-gcc: error: unrecognized command line option '-m64/include/uapi/asm-generic/errno.h' x86_64-wrs-linux-gcc: fatal error: no input files Signed-off-by: He Zhe <zhe.he@windriver.com> Cc: Alexander Shishkin <alexander.shishkin@linux.intel.com> Cc: Jiri Olsa <jolsa@redhat.com> Cc: Mark Rutland <mark.rutland@arm.com> Cc: Namhyung Kim <namhyung@kernel.org> Cc: Peter Zijlstra <peterz@infradead.org> Link: http://lore.kernel.org/lkml/1581618066-187262-2-git-send-email-zhe.he@windriver.com Signed-off-by: Arnaldo Carvalho de Melo <acme@redhat.com> --- tools/perf/Makefile.perf | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/tools/perf/Makefile.perf b/tools/perf/Makefile.perf index a02aca9b21f4..d15a311408f1 100644 --- a/tools/perf/Makefile.perf +++ b/tools/perf/Makefile.perf @@ -574,7 +574,7 @@ arch_errno_hdr_dir := $(srctree)/tools arch_errno_tbl := $(srctree)/tools/perf/trace/beauty/arch_errno_names.sh $(arch_errno_name_array): $(arch_errno_tbl) - $(Q)$(SHELL) '$(arch_errno_tbl)' $(CC) $(arch_errno_hdr_dir) > $@ + $(Q)$(SHELL) '$(arch_errno_tbl)' $(firstword $(CC)) $(arch_errno_hdr_dir) > $@ sync_file_range_arrays := $(beauty_outdir)/sync_file_range_arrays.c sync_file_range_tbls := $(srctree)/tools/perf/trace/beauty/sync_file_range.sh -- cgit v1.2.3-70-g09d2 From 315a4af8cd2469fcf5dacc58b5e73944b4f0c561 Mon Sep 17 00:00:00 2001 From: Yu-cheng Yu <yu-cheng.yu@intel.com> Date: Tue, 4 Feb 2020 09:14:24 -0800 Subject: x86/insn: Add Control-flow Enforcement (CET) instructions to the opcode map Add the following CET instructions to the opcode map: INCSSP: Increment Shadow Stack pointer (SSP). RDSSP: Read SSP into a GPR. SAVEPREVSSP: Use "previous ssp" token at top of current Shadow Stack (SHSTK) to create a "restore token" on the previous (outgoing) SHSTK. RSTORSSP: Restore from a "restore token" to SSP. WRSS: Write to kernel-mode SHSTK (kernel-mode instruction). WRUSS: Write to user-mode SHSTK (kernel-mode instruction). SETSSBSY: Verify the "supervisor token" pointed by MSR_IA32_PL0_SSP, set the token busy, and set then Shadow Stack pointer(SSP) to the value of MSR_IA32_PL0_SSP. CLRSSBSY: Verify the "supervisor token" and clear its busy bit. ENDBR64/ENDBR32: Mark a valid 64/32 bit control transfer endpoint. Detailed information of CET instructions can be found in Intel Software Developer's Manual. Signed-off-by: Yu-cheng Yu <yu-cheng.yu@intel.com> Reviewed-by: Adrian Hunter <adrian.hunter@intel.com> Reviewed-by: Tony Luck <tony.luck@intel.com> Acked-by: Masami Hiramatsu <mhiramat@kernel.org> Cc: Andi Kleen <ak@linux.intel.com> Cc: Borislav Petkov <bp@alien8.de> Cc: Dave Hansen <dave.hansen@linux.intel.com> Cc: H. Peter Anvin <hpa@zytor.com> Cc: Jiri Olsa <jolsa@redhat.com> Cc: Peter Zijlstra <peterz@infradead.org> Cc: Ravi v. Shankar <ravi.v.shankar@intel.com> Cc: Thomas Gleixner <tglx@linutronix.de> Cc: x86@kernel.org Link: http://lore.kernel.org/lkml/20200204171425.28073-2-yu-cheng.yu@intel.com Signed-off-by: Arnaldo Carvalho de Melo <acme@redhat.com> --- arch/x86/lib/x86-opcode-map.txt | 17 +++++++++++------ tools/arch/x86/lib/x86-opcode-map.txt | 17 +++++++++++------ 2 files changed, 22 insertions(+), 12 deletions(-) diff --git a/arch/x86/lib/x86-opcode-map.txt b/arch/x86/lib/x86-opcode-map.txt index 53adc1762ec0..ec31f5b60323 100644 --- a/arch/x86/lib/x86-opcode-map.txt +++ b/arch/x86/lib/x86-opcode-map.txt @@ -366,7 +366,7 @@ AVXcode: 1 1b: BNDCN Gv,Ev (F2) | BNDMOV Ev,Gv (66) | BNDMK Gv,Ev (F3) | BNDSTX Ev,Gv 1c: Grp20 (1A),(1C) 1d: -1e: +1e: Grp21 (1A) 1f: NOP Ev # 0x0f 0x20-0x2f 20: MOV Rd,Cd @@ -803,8 +803,8 @@ f0: MOVBE Gy,My | MOVBE Gw,Mw (66) | CRC32 Gd,Eb (F2) | CRC32 Gd,Eb (66&F2) f1: MOVBE My,Gy | MOVBE Mw,Gw (66) | CRC32 Gd,Ey (F2) | CRC32 Gd,Ew (66&F2) f2: ANDN Gy,By,Ey (v) f3: Grp17 (1A) -f5: BZHI Gy,Ey,By (v) | PEXT Gy,By,Ey (F3),(v) | PDEP Gy,By,Ey (F2),(v) -f6: ADCX Gy,Ey (66) | ADOX Gy,Ey (F3) | MULX By,Gy,rDX,Ey (F2),(v) +f5: BZHI Gy,Ey,By (v) | PEXT Gy,By,Ey (F3),(v) | PDEP Gy,By,Ey (F2),(v) | WRUSSD/Q My,Gy (66) +f6: ADCX Gy,Ey (66) | ADOX Gy,Ey (F3) | MULX By,Gy,rDX,Ey (F2),(v) | WRSSD/Q My,Gy f7: BEXTR Gy,Ey,By (v) | SHLX Gy,Ey,By (66),(v) | SARX Gy,Ey,By (F3),(v) | SHRX Gy,Ey,By (F2),(v) f8: MOVDIR64B Gv,Mdqq (66) | ENQCMD Gv,Mdqq (F2) | ENQCMDS Gv,Mdqq (F3) f9: MOVDIRI My,Gy @@ -970,7 +970,7 @@ GrpTable: Grp7 2: LGDT Ms | XGETBV (000),(11B) | XSETBV (001),(11B) | VMFUNC (100),(11B) | XEND (101)(11B) | XTEST (110)(11B) | ENCLU (111),(11B) 3: LIDT Ms 4: SMSW Mw/Rv -5: rdpkru (110),(11B) | wrpkru (111),(11B) +5: rdpkru (110),(11B) | wrpkru (111),(11B) | SAVEPREVSSP (F3),(010),(11B) | RSTORSSP Mq (F3) | SETSSBSY (F3),(000),(11B) 6: LMSW Ew 7: INVLPG Mb | SWAPGS (o64),(000),(11B) | RDTSCP (001),(11B) EndTable @@ -1041,8 +1041,8 @@ GrpTable: Grp15 2: vldmxcsr Md (v1) | WRFSBASE Ry (F3),(11B) 3: vstmxcsr Md (v1) | WRGSBASE Ry (F3),(11B) 4: XSAVE | ptwrite Ey (F3),(11B) -5: XRSTOR | lfence (11B) -6: XSAVEOPT | clwb (66) | mfence (11B) | TPAUSE Rd (66),(11B) | UMONITOR Rv (F3),(11B) | UMWAIT Rd (F2),(11B) +5: XRSTOR | lfence (11B) | INCSSPD/Q Ry (F3),(11B) +6: XSAVEOPT | clwb (66) | mfence (11B) | TPAUSE Rd (66),(11B) | UMONITOR Rv (F3),(11B) | UMWAIT Rd (F2),(11B) | CLRSSBSY Mq (F3) 7: clflush | clflushopt (66) | sfence (11B) EndTable @@ -1077,6 +1077,11 @@ GrpTable: Grp20 0: cldemote Mb EndTable +GrpTable: Grp21 +1: RDSSPD/Q Ry (F3),(11B) +7: ENDBR64 (F3),(010),(11B) | ENDBR32 (F3),(011),(11B) +EndTable + # AMD's Prefetch Group GrpTable: GrpP 0: PREFETCH diff --git a/tools/arch/x86/lib/x86-opcode-map.txt b/tools/arch/x86/lib/x86-opcode-map.txt index 53adc1762ec0..ec31f5b60323 100644 --- a/tools/arch/x86/lib/x86-opcode-map.txt +++ b/tools/arch/x86/lib/x86-opcode-map.txt @@ -366,7 +366,7 @@ AVXcode: 1 1b: BNDCN Gv,Ev (F2) | BNDMOV Ev,Gv (66) | BNDMK Gv,Ev (F3) | BNDSTX Ev,Gv 1c: Grp20 (1A),(1C) 1d: -1e: +1e: Grp21 (1A) 1f: NOP Ev # 0x0f 0x20-0x2f 20: MOV Rd,Cd @@ -803,8 +803,8 @@ f0: MOVBE Gy,My | MOVBE Gw,Mw (66) | CRC32 Gd,Eb (F2) | CRC32 Gd,Eb (66&F2) f1: MOVBE My,Gy | MOVBE Mw,Gw (66) | CRC32 Gd,Ey (F2) | CRC32 Gd,Ew (66&F2) f2: ANDN Gy,By,Ey (v) f3: Grp17 (1A) -f5: BZHI Gy,Ey,By (v) | PEXT Gy,By,Ey (F3),(v) | PDEP Gy,By,Ey (F2),(v) -f6: ADCX Gy,Ey (66) | ADOX Gy,Ey (F3) | MULX By,Gy,rDX,Ey (F2),(v) +f5: BZHI Gy,Ey,By (v) | PEXT Gy,By,Ey (F3),(v) | PDEP Gy,By,Ey (F2),(v) | WRUSSD/Q My,Gy (66) +f6: ADCX Gy,Ey (66) | ADOX Gy,Ey (F3) | MULX By,Gy,rDX,Ey (F2),(v) | WRSSD/Q My,Gy f7: BEXTR Gy,Ey,By (v) | SHLX Gy,Ey,By (66),(v) | SARX Gy,Ey,By (F3),(v) | SHRX Gy,Ey,By (F2),(v) f8: MOVDIR64B Gv,Mdqq (66) | ENQCMD Gv,Mdqq (F2) | ENQCMDS Gv,Mdqq (F3) f9: MOVDIRI My,Gy @@ -970,7 +970,7 @@ GrpTable: Grp7 2: LGDT Ms | XGETBV (000),(11B) | XSETBV (001),(11B) | VMFUNC (100),(11B) | XEND (101)(11B) | XTEST (110)(11B) | ENCLU (111),(11B) 3: LIDT Ms 4: SMSW Mw/Rv -5: rdpkru (110),(11B) | wrpkru (111),(11B) +5: rdpkru (110),(11B) | wrpkru (111),(11B) | SAVEPREVSSP (F3),(010),(11B) | RSTORSSP Mq (F3) | SETSSBSY (F3),(000),(11B) 6: LMSW Ew 7: INVLPG Mb | SWAPGS (o64),(000),(11B) | RDTSCP (001),(11B) EndTable @@ -1041,8 +1041,8 @@ GrpTable: Grp15 2: vldmxcsr Md (v1) | WRFSBASE Ry (F3),(11B) 3: vstmxcsr Md (v1) | WRGSBASE Ry (F3),(11B) 4: XSAVE | ptwrite Ey (F3),(11B) -5: XRSTOR | lfence (11B) -6: XSAVEOPT | clwb (66) | mfence (11B) | TPAUSE Rd (66),(11B) | UMONITOR Rv (F3),(11B) | UMWAIT Rd (F2),(11B) +5: XRSTOR | lfence (11B) | INCSSPD/Q Ry (F3),(11B) +6: XSAVEOPT | clwb (66) | mfence (11B) | TPAUSE Rd (66),(11B) | UMONITOR Rv (F3),(11B) | UMWAIT Rd (F2),(11B) | CLRSSBSY Mq (F3) 7: clflush | clflushopt (66) | sfence (11B) EndTable @@ -1077,6 +1077,11 @@ GrpTable: Grp20 0: cldemote Mb EndTable +GrpTable: Grp21 +1: RDSSPD/Q Ry (F3),(11B) +7: ENDBR64 (F3),(010),(11B) | ENDBR32 (F3),(011),(11B) +EndTable + # AMD's Prefetch Group GrpTable: GrpP 0: PREFETCH -- cgit v1.2.3-70-g09d2 From 26cec7480e7edaa9dead4912055078b98c1d192e Mon Sep 17 00:00:00 2001 From: Adrian Hunter <adrian.hunter@intel.com> Date: Tue, 4 Feb 2020 09:14:25 -0800 Subject: perf test x86: Add CET instructions to the new instructions test Add to the "x86 instruction decoder - new instructions" test the following instructions: incsspd incsspq rdsspd rdsspq saveprevssp rstorssp wrssd wrssq wrussd wrussq setssbsy clrssbsy endbr32 endbr64 And the "notrack" prefix for indirect calls and jumps. For information about the instructions, refer Intel Control-flow Enforcement Technology Specification May 2019 (334525-003). Committer testing: $ perf test instr 67: x86 instruction decoder - new instructions : Ok $ Then use verbose mode and check one of those new instructions: $ perf test -v instr |& grep saveprevssp Decoded ok: f3 0f 01 ea saveprevssp Decoded ok: f3 0f 01 ea saveprevssp $ Signed-off-by: Adrian Hunter <adrian.hunter@intel.com> Acked-by: Masami Hiramatsu <mhiramat@kernel.org> Tested-by: Arnaldo Carvalho de Melo <acme@redhat.com> Cc: Andi Kleen <ak@linux.intel.com> Cc: Borislav Petkov <bp@alien8.de> Cc: Dave Hansen <dave.hansen@linux.intel.com> Cc: H. Peter Anvin <hpa@zytor.com> Cc: Jiri Olsa <jolsa@redhat.com> Cc: Masami Hiramatsu <mhiramat@kernel.org> Cc: Peter Zijlstra <peterz@infradead.org> Cc: Ravi v. Shankar <ravi.v.shankar@intel.com> Cc: Thomas Gleixner <tglx@linutronix.de> Cc: Tony Luck <tony.luck@intel.com> Cc: x86@kernel.org Link: http://lore.kernel.org/lkml/20200204171425.28073-3-yu-cheng.yu@intel.com Signed-off-by: Yu-cheng Yu <yu-cheng.yu@intel.com> Signed-off-by: Arnaldo Carvalho de Melo <acme@redhat.com> --- tools/perf/arch/x86/tests/insn-x86-dat-32.c | 112 +++++++++++++ tools/perf/arch/x86/tests/insn-x86-dat-64.c | 196 ++++++++++++++++++++++ tools/perf/arch/x86/tests/insn-x86-dat-src.c | 236 +++++++++++++++++++++++++++ 3 files changed, 544 insertions(+) diff --git a/tools/perf/arch/x86/tests/insn-x86-dat-32.c b/tools/perf/arch/x86/tests/insn-x86-dat-32.c index e6461abc9e7b..9708ae892061 100644 --- a/tools/perf/arch/x86/tests/insn-x86-dat-32.c +++ b/tools/perf/arch/x86/tests/insn-x86-dat-32.c @@ -2085,6 +2085,118 @@ "67 f3 0f 38 f8 1c \tenqcmds (%si),%bx",}, {{0x67, 0xf3, 0x0f, 0x38, 0xf8, 0x8c, 0x34, 0x12, }, 8, 0, "", "", "67 f3 0f 38 f8 8c 34 12 \tenqcmds 0x1234(%si),%cx",}, +{{0xf3, 0x0f, 0xae, 0xe8, }, 4, 0, "", "", +"f3 0f ae e8 \tincsspd %eax",}, +{{0x0f, 0xae, 0x28, }, 3, 0, "", "", +"0f ae 28 \txrstor (%eax)",}, +{{0x0f, 0xae, 0x2d, 0x78, 0x56, 0x34, 0x12, }, 7, 0, "", "", +"0f ae 2d 78 56 34 12 \txrstor 0x12345678",}, +{{0x0f, 0xae, 0xac, 0xc8, 0x78, 0x56, 0x34, 0x12, }, 8, 0, "", "", +"0f ae ac c8 78 56 34 12 \txrstor 0x12345678(%eax,%ecx,8)",}, +{{0x0f, 0xae, 0xe8, }, 3, 0, "", "", +"0f ae e8 \tlfence ",}, +{{0xf3, 0x0f, 0x1e, 0xc8, }, 4, 0, "", "", +"f3 0f 1e c8 \trdsspd %eax",}, +{{0xf3, 0x0f, 0x01, 0xea, }, 4, 0, "", "", +"f3 0f 01 ea \tsaveprevssp ",}, +{{0xf3, 0x0f, 0x01, 0x28, }, 4, 0, "", "", +"f3 0f 01 28 \trstorssp (%eax)",}, +{{0xf3, 0x0f, 0x01, 0x2d, 0x78, 0x56, 0x34, 0x12, }, 8, 0, "", "", +"f3 0f 01 2d 78 56 34 12 \trstorssp 0x12345678",}, +{{0xf3, 0x0f, 0x01, 0xac, 0xc8, 0x78, 0x56, 0x34, 0x12, }, 9, 0, "", "", +"f3 0f 01 ac c8 78 56 34 12 \trstorssp 0x12345678(%eax,%ecx,8)",}, +{{0x0f, 0x38, 0xf6, 0x08, }, 4, 0, "", "", +"0f 38 f6 08 \twrssd %ecx,(%eax)",}, +{{0x0f, 0x38, 0xf6, 0x15, 0x78, 0x56, 0x34, 0x12, }, 8, 0, "", "", +"0f 38 f6 15 78 56 34 12 \twrssd %edx,0x12345678",}, +{{0x0f, 0x38, 0xf6, 0x94, 0xc8, 0x78, 0x56, 0x34, 0x12, }, 9, 0, "", "", +"0f 38 f6 94 c8 78 56 34 12 \twrssd %edx,0x12345678(%eax,%ecx,8)",}, +{{0x66, 0x0f, 0x38, 0xf5, 0x08, }, 5, 0, "", "", +"66 0f 38 f5 08 \twrussd %ecx,(%eax)",}, +{{0x66, 0x0f, 0x38, 0xf5, 0x15, 0x78, 0x56, 0x34, 0x12, }, 9, 0, "", "", +"66 0f 38 f5 15 78 56 34 12 \twrussd %edx,0x12345678",}, +{{0x66, 0x0f, 0x38, 0xf5, 0x94, 0xc8, 0x78, 0x56, 0x34, 0x12, }, 10, 0, "", "", +"66 0f 38 f5 94 c8 78 56 34 12 \twrussd %edx,0x12345678(%eax,%ecx,8)",}, +{{0xf3, 0x0f, 0x01, 0xe8, }, 4, 0, "", "", +"f3 0f 01 e8 \tsetssbsy ",}, +{{0x0f, 0x01, 0xee, }, 3, 0, "", "", +"0f 01 ee \trdpkru ",}, +{{0x0f, 0x01, 0xef, }, 3, 0, "", "", +"0f 01 ef \twrpkru ",}, +{{0xf3, 0x0f, 0xae, 0x30, }, 4, 0, "", "", +"f3 0f ae 30 \tclrssbsy (%eax)",}, +{{0xf3, 0x0f, 0xae, 0x35, 0x78, 0x56, 0x34, 0x12, }, 8, 0, "", "", +"f3 0f ae 35 78 56 34 12 \tclrssbsy 0x12345678",}, +{{0xf3, 0x0f, 0xae, 0xb4, 0xc8, 0x78, 0x56, 0x34, 0x12, }, 9, 0, "", "", +"f3 0f ae b4 c8 78 56 34 12 \tclrssbsy 0x12345678(%eax,%ecx,8)",}, +{{0xf3, 0x0f, 0x1e, 0xfb, }, 4, 0, "", "", +"f3 0f 1e fb \tendbr32 ",}, +{{0xf3, 0x0f, 0x1e, 0xfa, }, 4, 0, "", "", +"f3 0f 1e fa \tendbr64 ",}, +{{0xff, 0xd0, }, 2, 0, "call", "indirect", +"ff d0 \tcall *%eax",}, +{{0xff, 0x10, }, 2, 0, "call", "indirect", +"ff 10 \tcall *(%eax)",}, +{{0xff, 0x15, 0x78, 0x56, 0x34, 0x12, }, 6, 0, "call", "indirect", +"ff 15 78 56 34 12 \tcall *0x12345678",}, +{{0xff, 0x94, 0xc8, 0x78, 0x56, 0x34, 0x12, }, 7, 0, "call", "indirect", +"ff 94 c8 78 56 34 12 \tcall *0x12345678(%eax,%ecx,8)",}, +{{0xf2, 0xff, 0xd0, }, 3, 0, "call", "indirect", +"f2 ff d0 \tbnd call *%eax",}, +{{0xf2, 0xff, 0x10, }, 3, 0, "call", "indirect", +"f2 ff 10 \tbnd call *(%eax)",}, +{{0xf2, 0xff, 0x15, 0x78, 0x56, 0x34, 0x12, }, 7, 0, "call", "indirect", +"f2 ff 15 78 56 34 12 \tbnd call *0x12345678",}, +{{0xf2, 0xff, 0x94, 0xc8, 0x78, 0x56, 0x34, 0x12, }, 8, 0, "call", "indirect", +"f2 ff 94 c8 78 56 34 12 \tbnd call *0x12345678(%eax,%ecx,8)",}, +{{0x3e, 0xff, 0xd0, }, 3, 0, "call", "indirect", +"3e ff d0 \tnotrack call *%eax",}, +{{0x3e, 0xff, 0x10, }, 3, 0, "call", "indirect", +"3e ff 10 \tnotrack call *(%eax)",}, +{{0x3e, 0xff, 0x15, 0x78, 0x56, 0x34, 0x12, }, 7, 0, "call", "indirect", +"3e ff 15 78 56 34 12 \tnotrack call *0x12345678",}, +{{0x3e, 0xff, 0x94, 0xc8, 0x78, 0x56, 0x34, 0x12, }, 8, 0, "call", "indirect", +"3e ff 94 c8 78 56 34 12 \tnotrack call *0x12345678(%eax,%ecx,8)",}, +{{0x3e, 0xf2, 0xff, 0xd0, }, 4, 0, "call", "indirect", +"3e f2 ff d0 \tnotrack bnd call *%eax",}, +{{0x3e, 0xf2, 0xff, 0x10, }, 4, 0, "call", "indirect", +"3e f2 ff 10 \tnotrack bnd call *(%eax)",}, +{{0x3e, 0xf2, 0xff, 0x15, 0x78, 0x56, 0x34, 0x12, }, 8, 0, "call", "indirect", +"3e f2 ff 15 78 56 34 12 \tnotrack bnd call *0x12345678",}, +{{0x3e, 0xf2, 0xff, 0x94, 0xc8, 0x78, 0x56, 0x34, 0x12, }, 9, 0, "call", "indirect", +"3e f2 ff 94 c8 78 56 34 12 \tnotrack bnd call *0x12345678(%eax,%ecx,8)",}, +{{0xff, 0xe0, }, 2, 0, "jmp", "indirect", +"ff e0 \tjmp *%eax",}, +{{0xff, 0x20, }, 2, 0, "jmp", "indirect", +"ff 20 \tjmp *(%eax)",}, +{{0xff, 0x25, 0x78, 0x56, 0x34, 0x12, }, 6, 0, "jmp", "indirect", +"ff 25 78 56 34 12 \tjmp *0x12345678",}, +{{0xff, 0xa4, 0xc8, 0x78, 0x56, 0x34, 0x12, }, 7, 0, "jmp", "indirect", +"ff a4 c8 78 56 34 12 \tjmp *0x12345678(%eax,%ecx,8)",}, +{{0xf2, 0xff, 0xe0, }, 3, 0, "jmp", "indirect", +"f2 ff e0 \tbnd jmp *%eax",}, +{{0xf2, 0xff, 0x20, }, 3, 0, "jmp", "indirect", +"f2 ff 20 \tbnd jmp *(%eax)",}, +{{0xf2, 0xff, 0x25, 0x78, 0x56, 0x34, 0x12, }, 7, 0, "jmp", "indirect", +"f2 ff 25 78 56 34 12 \tbnd jmp *0x12345678",}, +{{0xf2, 0xff, 0xa4, 0xc8, 0x78, 0x56, 0x34, 0x12, }, 8, 0, "jmp", "indirect", +"f2 ff a4 c8 78 56 34 12 \tbnd jmp *0x12345678(%eax,%ecx,8)",}, +{{0x3e, 0xff, 0xe0, }, 3, 0, "jmp", "indirect", +"3e ff e0 \tnotrack jmp *%eax",}, +{{0x3e, 0xff, 0x20, }, 3, 0, "jmp", "indirect", +"3e ff 20 \tnotrack jmp *(%eax)",}, +{{0x3e, 0xff, 0x25, 0x78, 0x56, 0x34, 0x12, }, 7, 0, "jmp", "indirect", +"3e ff 25 78 56 34 12 \tnotrack jmp *0x12345678",}, +{{0x3e, 0xff, 0xa4, 0xc8, 0x78, 0x56, 0x34, 0x12, }, 8, 0, "jmp", "indirect", +"3e ff a4 c8 78 56 34 12 \tnotrack jmp *0x12345678(%eax,%ecx,8)",}, +{{0x3e, 0xf2, 0xff, 0xe0, }, 4, 0, "jmp", "indirect", +"3e f2 ff e0 \tnotrack bnd jmp *%eax",}, +{{0x3e, 0xf2, 0xff, 0x20, }, 4, 0, "jmp", "indirect", +"3e f2 ff 20 \tnotrack bnd jmp *(%eax)",}, +{{0x3e, 0xf2, 0xff, 0x25, 0x78, 0x56, 0x34, 0x12, }, 8, 0, "jmp", "indirect", +"3e f2 ff 25 78 56 34 12 \tnotrack bnd jmp *0x12345678",}, +{{0x3e, 0xf2, 0xff, 0xa4, 0xc8, 0x78, 0x56, 0x34, 0x12, }, 9, 0, "jmp", "indirect", +"3e f2 ff a4 c8 78 56 34 12 \tnotrack bnd jmp *0x12345678(%eax,%ecx,8)",}, {{0x0f, 0x01, 0xcf, }, 3, 0, "", "", "0f 01 cf \tencls ",}, {{0x0f, 0x01, 0xd7, }, 3, 0, "", "", diff --git a/tools/perf/arch/x86/tests/insn-x86-dat-64.c b/tools/perf/arch/x86/tests/insn-x86-dat-64.c index 567ecccfad7c..5da17d41d302 100644 --- a/tools/perf/arch/x86/tests/insn-x86-dat-64.c +++ b/tools/perf/arch/x86/tests/insn-x86-dat-64.c @@ -2263,6 +2263,202 @@ "67 f3 0f 38 f8 18 \tenqcmds (%eax),%ebx",}, {{0x67, 0xf3, 0x0f, 0x38, 0xf8, 0x88, 0x78, 0x56, 0x34, 0x12, }, 10, 0, "", "", "67 f3 0f 38 f8 88 78 56 34 12 \tenqcmds 0x12345678(%eax),%ecx",}, +{{0xf3, 0x0f, 0xae, 0xe8, }, 4, 0, "", "", +"f3 0f ae e8 \tincsspd %eax",}, +{{0xf3, 0x41, 0x0f, 0xae, 0xe8, }, 5, 0, "", "", +"f3 41 0f ae e8 \tincsspd %r8d",}, +{{0xf3, 0x48, 0x0f, 0xae, 0xe8, }, 5, 0, "", "", +"f3 48 0f ae e8 \tincsspq %rax",}, +{{0xf3, 0x49, 0x0f, 0xae, 0xe8, }, 5, 0, "", "", +"f3 49 0f ae e8 \tincsspq %r8",}, +{{0x0f, 0xae, 0x28, }, 3, 0, "", "", +"0f ae 28 \txrstor (%rax)",}, +{{0x41, 0x0f, 0xae, 0x28, }, 4, 0, "", "", +"41 0f ae 28 \txrstor (%r8)",}, +{{0x0f, 0xae, 0x2c, 0x25, 0x78, 0x56, 0x34, 0x12, }, 8, 0, "", "", +"0f ae 2c 25 78 56 34 12 \txrstor 0x12345678",}, +{{0x0f, 0xae, 0xac, 0xc8, 0x78, 0x56, 0x34, 0x12, }, 8, 0, "", "", +"0f ae ac c8 78 56 34 12 \txrstor 0x12345678(%rax,%rcx,8)",}, +{{0x41, 0x0f, 0xae, 0xac, 0xc8, 0x78, 0x56, 0x34, 0x12, }, 9, 0, "", "", +"41 0f ae ac c8 78 56 34 12 \txrstor 0x12345678(%r8,%rcx,8)",}, +{{0x0f, 0xae, 0xe8, }, 3, 0, "", "", +"0f ae e8 \tlfence ",}, +{{0xf3, 0x0f, 0x1e, 0xc8, }, 4, 0, "", "", +"f3 0f 1e c8 \trdsspd %eax",}, +{{0xf3, 0x41, 0x0f, 0x1e, 0xc8, }, 5, 0, "", "", +"f3 41 0f 1e c8 \trdsspd %r8d",}, +{{0xf3, 0x48, 0x0f, 0x1e, 0xc8, }, 5, 0, "", "", +"f3 48 0f 1e c8 \trdsspq %rax",}, +{{0xf3, 0x49, 0x0f, 0x1e, 0xc8, }, 5, 0, "", "", +"f3 49 0f 1e c8 \trdsspq %r8",}, +{{0xf3, 0x0f, 0x01, 0xea, }, 4, 0, "", "", +"f3 0f 01 ea \tsaveprevssp ",}, +{{0xf3, 0x0f, 0x01, 0x28, }, 4, 0, "", "", +"f3 0f 01 28 \trstorssp (%rax)",}, +{{0xf3, 0x41, 0x0f, 0x01, 0x28, }, 5, 0, "", "", +"f3 41 0f 01 28 \trstorssp (%r8)",}, +{{0xf3, 0x0f, 0x01, 0x2c, 0x25, 0x78, 0x56, 0x34, 0x12, }, 9, 0, "", "", +"f3 0f 01 2c 25 78 56 34 12 \trstorssp 0x12345678",}, +{{0xf3, 0x0f, 0x01, 0xac, 0xc8, 0x78, 0x56, 0x34, 0x12, }, 9, 0, "", "", +"f3 0f 01 ac c8 78 56 34 12 \trstorssp 0x12345678(%rax,%rcx,8)",}, +{{0xf3, 0x41, 0x0f, 0x01, 0xac, 0xc8, 0x78, 0x56, 0x34, 0x12, }, 10, 0, "", "", +"f3 41 0f 01 ac c8 78 56 34 12 \trstorssp 0x12345678(%r8,%rcx,8)",}, +{{0x0f, 0x38, 0xf6, 0x08, }, 4, 0, "", "", +"0f 38 f6 08 \twrssd %ecx,(%rax)",}, +{{0x41, 0x0f, 0x38, 0xf6, 0x10, }, 5, 0, "", "", +"41 0f 38 f6 10 \twrssd %edx,(%r8)",}, +{{0x0f, 0x38, 0xf6, 0x14, 0x25, 0x78, 0x56, 0x34, 0x12, }, 9, 0, "", "", +"0f 38 f6 14 25 78 56 34 12 \twrssd %edx,0x12345678",}, +{{0x0f, 0x38, 0xf6, 0x94, 0xc8, 0x78, 0x56, 0x34, 0x12, }, 9, 0, "", "", +"0f 38 f6 94 c8 78 56 34 12 \twrssd %edx,0x12345678(%rax,%rcx,8)",}, +{{0x41, 0x0f, 0x38, 0xf6, 0x94, 0xc8, 0x78, 0x56, 0x34, 0x12, }, 10, 0, "", "", +"41 0f 38 f6 94 c8 78 56 34 12 \twrssd %edx,0x12345678(%r8,%rcx,8)",}, +{{0x48, 0x0f, 0x38, 0xf6, 0x08, }, 5, 0, "", "", +"48 0f 38 f6 08 \twrssq %rcx,(%rax)",}, +{{0x49, 0x0f, 0x38, 0xf6, 0x10, }, 5, 0, "", "", +"49 0f 38 f6 10 \twrssq %rdx,(%r8)",}, +{{0x48, 0x0f, 0x38, 0xf6, 0x14, 0x25, 0x78, 0x56, 0x34, 0x12, }, 10, 0, "", "", +"48 0f 38 f6 14 25 78 56 34 12 \twrssq %rdx,0x12345678",}, +{{0x48, 0x0f, 0x38, 0xf6, 0x94, 0xc8, 0x78, 0x56, 0x34, 0x12, }, 10, 0, "", "", +"48 0f 38 f6 94 c8 78 56 34 12 \twrssq %rdx,0x12345678(%rax,%rcx,8)",}, +{{0x49, 0x0f, 0x38, 0xf6, 0x94, 0xc8, 0x78, 0x56, 0x34, 0x12, }, 10, 0, "", "", +"49 0f 38 f6 94 c8 78 56 34 12 \twrssq %rdx,0x12345678(%r8,%rcx,8)",}, +{{0x66, 0x0f, 0x38, 0xf5, 0x08, }, 5, 0, "", "", +"66 0f 38 f5 08 \twrussd %ecx,(%rax)",}, +{{0x66, 0x41, 0x0f, 0x38, 0xf5, 0x10, }, 6, 0, "", "", +"66 41 0f 38 f5 10 \twrussd %edx,(%r8)",}, +{{0x66, 0x0f, 0x38, 0xf5, 0x14, 0x25, 0x78, 0x56, 0x34, 0x12, }, 10, 0, "", "", +"66 0f 38 f5 14 25 78 56 34 12 \twrussd %edx,0x12345678",}, +{{0x66, 0x0f, 0x38, 0xf5, 0x94, 0xc8, 0x78, 0x56, 0x34, 0x12, }, 10, 0, "", "", +"66 0f 38 f5 94 c8 78 56 34 12 \twrussd %edx,0x12345678(%rax,%rcx,8)",}, +{{0x66, 0x41, 0x0f, 0x38, 0xf5, 0x94, 0xc8, 0x78, 0x56, 0x34, 0x12, }, 11, 0, "", "", +"66 41 0f 38 f5 94 c8 78 56 34 12 \twrussd %edx,0x12345678(%r8,%rcx,8)",}, +{{0x66, 0x48, 0x0f, 0x38, 0xf5, 0x08, }, 6, 0, "", "", +"66 48 0f 38 f5 08 \twrussq %rcx,(%rax)",}, +{{0x66, 0x49, 0x0f, 0x38, 0xf5, 0x10, }, 6, 0, "", "", +"66 49 0f 38 f5 10 \twrussq %rdx,(%r8)",}, +{{0x66, 0x48, 0x0f, 0x38, 0xf5, 0x14, 0x25, 0x78, 0x56, 0x34, 0x12, }, 11, 0, "", "", +"66 48 0f 38 f5 14 25 78 56 34 12 \twrussq %rdx,0x12345678",}, +{{0x66, 0x48, 0x0f, 0x38, 0xf5, 0x94, 0xc8, 0x78, 0x56, 0x34, 0x12, }, 11, 0, "", "", +"66 48 0f 38 f5 94 c8 78 56 34 12 \twrussq %rdx,0x12345678(%rax,%rcx,8)",}, +{{0x66, 0x49, 0x0f, 0x38, 0xf5, 0x94, 0xc8, 0x78, 0x56, 0x34, 0x12, }, 11, 0, "", "", +"66 49 0f 38 f5 94 c8 78 56 34 12 \twrussq %rdx,0x12345678(%r8,%rcx,8)",}, +{{0xf3, 0x0f, 0x01, 0xe8, }, 4, 0, "", "", +"f3 0f 01 e8 \tsetssbsy ",}, +{{0x0f, 0x01, 0xee, }, 3, 0, "", "", +"0f 01 ee \trdpkru ",}, +{{0x0f, 0x01, 0xef, }, 3, 0, "", "", +"0f 01 ef \twrpkru ",}, +{{0xf3, 0x0f, 0xae, 0x30, }, 4, 0, "", "", +"f3 0f ae 30 \tclrssbsy (%rax)",}, +{{0xf3, 0x41, 0x0f, 0xae, 0x30, }, 5, 0, "", "", +"f3 41 0f ae 30 \tclrssbsy (%r8)",}, +{{0xf3, 0x0f, 0xae, 0x34, 0x25, 0x78, 0x56, 0x34, 0x12, }, 9, 0, "", "", +"f3 0f ae 34 25 78 56 34 12 \tclrssbsy 0x12345678",}, +{{0xf3, 0x0f, 0xae, 0xb4, 0xc8, 0x78, 0x56, 0x34, 0x12, }, 9, 0, "", "", +"f3 0f ae b4 c8 78 56 34 12 \tclrssbsy 0x12345678(%rax,%rcx,8)",}, +{{0xf3, 0x41, 0x0f, 0xae, 0xb4, 0xc8, 0x78, 0x56, 0x34, 0x12, }, 10, 0, "", "", +"f3 41 0f ae b4 c8 78 56 34 12 \tclrssbsy 0x12345678(%r8,%rcx,8)",}, +{{0xf3, 0x0f, 0x1e, 0xfb, }, 4, 0, "", "", +"f3 0f 1e fb \tendbr32 ",}, +{{0xf3, 0x0f, 0x1e, 0xfa, }, 4, 0, "", "", +"f3 0f 1e fa \tendbr64 ",}, +{{0xff, 0xd0, }, 2, 0, "call", "indirect", +"ff d0 \tcallq *%rax",}, +{{0xff, 0x10, }, 2, 0, "call", "indirect", +"ff 10 \tcallq *(%rax)",}, +{{0x41, 0xff, 0x10, }, 3, 0, "call", "indirect", +"41 ff 10 \tcallq *(%r8)",}, +{{0xff, 0x14, 0x25, 0x78, 0x56, 0x34, 0x12, }, 7, 0, "call", "indirect", +"ff 14 25 78 56 34 12 \tcallq *0x12345678",}, +{{0xff, 0x94, 0xc8, 0x78, 0x56, 0x34, 0x12, }, 7, 0, "call", "indirect", +"ff 94 c8 78 56 34 12 \tcallq *0x12345678(%rax,%rcx,8)",}, +{{0x41, 0xff, 0x94, 0xc8, 0x78, 0x56, 0x34, 0x12, }, 8, 0, "call", "indirect", +"41 ff 94 c8 78 56 34 12 \tcallq *0x12345678(%r8,%rcx,8)",}, +{{0xf2, 0xff, 0xd0, }, 3, 0, "call", "indirect", +"f2 ff d0 \tbnd callq *%rax",}, +{{0xf2, 0xff, 0x10, }, 3, 0, "call", "indirect", +"f2 ff 10 \tbnd callq *(%rax)",}, +{{0xf2, 0x41, 0xff, 0x10, }, 4, 0, "call", "indirect", +"f2 41 ff 10 \tbnd callq *(%r8)",}, +{{0xf2, 0xff, 0x14, 0x25, 0x78, 0x56, 0x34, 0x12, }, 8, 0, "call", "indirect", +"f2 ff 14 25 78 56 34 12 \tbnd callq *0x12345678",}, +{{0xf2, 0xff, 0x94, 0xc8, 0x78, 0x56, 0x34, 0x12, }, 8, 0, "call", "indirect", +"f2 ff 94 c8 78 56 34 12 \tbnd callq *0x12345678(%rax,%rcx,8)",}, +{{0xf2, 0x41, 0xff, 0x94, 0xc8, 0x78, 0x56, 0x34, 0x12, }, 9, 0, "call", "indirect", +"f2 41 ff 94 c8 78 56 34 12 \tbnd callq *0x12345678(%r8,%rcx,8)",}, +{{0x3e, 0xff, 0xd0, }, 3, 0, "call", "indirect", +"3e ff d0 \tnotrack callq *%rax",}, +{{0x3e, 0xff, 0x10, }, 3, 0, "call", "indirect", +"3e ff 10 \tnotrack callq *(%rax)",}, +{{0x3e, 0x41, 0xff, 0x10, }, 4, 0, "call", "indirect", +"3e 41 ff 10 \tnotrack callq *(%r8)",}, +{{0x3e, 0xff, 0x14, 0x25, 0x78, 0x56, 0x34, 0x12, }, 8, 0, "call", "indirect", +"3e ff 14 25 78 56 34 12 \tnotrack callq *0x12345678",}, +{{0x3e, 0xff, 0x94, 0xc8, 0x78, 0x56, 0x34, 0x12, }, 8, 0, "call", "indirect", +"3e ff 94 c8 78 56 34 12 \tnotrack callq *0x12345678(%rax,%rcx,8)",}, +{{0x3e, 0x41, 0xff, 0x94, 0xc8, 0x78, 0x56, 0x34, 0x12, }, 9, 0, "call", "indirect", +"3e 41 ff 94 c8 78 56 34 12 \tnotrack callq *0x12345678(%r8,%rcx,8)",}, +{{0x3e, 0xf2, 0xff, 0xd0, }, 4, 0, "call", "indirect", +"3e f2 ff d0 \tnotrack bnd callq *%rax",}, +{{0x3e, 0xf2, 0xff, 0x10, }, 4, 0, "call", "indirect", +"3e f2 ff 10 \tnotrack bnd callq *(%rax)",}, +{{0x3e, 0xf2, 0x41, 0xff, 0x10, }, 5, 0, "call", "indirect", +"3e f2 41 ff 10 \tnotrack bnd callq *(%r8)",}, +{{0x3e, 0xf2, 0xff, 0x14, 0x25, 0x78, 0x56, 0x34, 0x12, }, 9, 0, "call", "indirect", +"3e f2 ff 14 25 78 56 34 12 \tnotrack bnd callq *0x12345678",}, +{{0x3e, 0xf2, 0xff, 0x94, 0xc8, 0x78, 0x56, 0x34, 0x12, }, 9, 0, "call", "indirect", +"3e f2 ff 94 c8 78 56 34 12 \tnotrack bnd callq *0x12345678(%rax,%rcx,8)",}, +{{0x3e, 0xf2, 0x41, 0xff, 0x94, 0xc8, 0x78, 0x56, 0x34, 0x12, }, 10, 0, "call", "indirect", +"3e f2 41 ff 94 c8 78 56 34 12 \tnotrack bnd callq *0x12345678(%r8,%rcx,8)",}, +{{0xff, 0xe0, }, 2, 0, "jmp", "indirect", +"ff e0 \tjmpq *%rax",}, +{{0xff, 0x20, }, 2, 0, "jmp", "indirect", +"ff 20 \tjmpq *(%rax)",}, +{{0x41, 0xff, 0x20, }, 3, 0, "jmp", "indirect", +"41 ff 20 \tjmpq *(%r8)",}, +{{0xff, 0x24, 0x25, 0x78, 0x56, 0x34, 0x12, }, 7, 0, "jmp", "indirect", +"ff 24 25 78 56 34 12 \tjmpq *0x12345678",}, +{{0xff, 0xa4, 0xc8, 0x78, 0x56, 0x34, 0x12, }, 7, 0, "jmp", "indirect", +"ff a4 c8 78 56 34 12 \tjmpq *0x12345678(%rax,%rcx,8)",}, +{{0x41, 0xff, 0xa4, 0xc8, 0x78, 0x56, 0x34, 0x12, }, 8, 0, "jmp", "indirect", +"41 ff a4 c8 78 56 34 12 \tjmpq *0x12345678(%r8,%rcx,8)",}, +{{0xf2, 0xff, 0xe0, }, 3, 0, "jmp", "indirect", +"f2 ff e0 \tbnd jmpq *%rax",}, +{{0xf2, 0xff, 0x20, }, 3, 0, "jmp", "indirect", +"f2 ff 20 \tbnd jmpq *(%rax)",}, +{{0xf2, 0x41, 0xff, 0x20, }, 4, 0, "jmp", "indirect", +"f2 41 ff 20 \tbnd jmpq *(%r8)",}, +{{0xf2, 0xff, 0x24, 0x25, 0x78, 0x56, 0x34, 0x12, }, 8, 0, "jmp", "indirect", +"f2 ff 24 25 78 56 34 12 \tbnd jmpq *0x12345678",}, +{{0xf2, 0xff, 0xa4, 0xc8, 0x78, 0x56, 0x34, 0x12, }, 8, 0, "jmp", "indirect", +"f2 ff a4 c8 78 56 34 12 \tbnd jmpq *0x12345678(%rax,%rcx,8)",}, +{{0xf2, 0x41, 0xff, 0xa4, 0xc8, 0x78, 0x56, 0x34, 0x12, }, 9, 0, "jmp", "indirect", +"f2 41 ff a4 c8 78 56 34 12 \tbnd jmpq *0x12345678(%r8,%rcx,8)",}, +{{0x3e, 0xff, 0xe0, }, 3, 0, "jmp", "indirect", +"3e ff e0 \tnotrack jmpq *%rax",}, +{{0x3e, 0xff, 0x20, }, 3, 0, "jmp", "indirect", +"3e ff 20 \tnotrack jmpq *(%rax)",}, +{{0x3e, 0x41, 0xff, 0x20, }, 4, 0, "jmp", "indirect", +"3e 41 ff 20 \tnotrack jmpq *(%r8)",}, +{{0x3e, 0xff, 0x24, 0x25, 0x78, 0x56, 0x34, 0x12, }, 8, 0, "jmp", "indirect", +"3e ff 24 25 78 56 34 12 \tnotrack jmpq *0x12345678",}, +{{0x3e, 0xff, 0xa4, 0xc8, 0x78, 0x56, 0x34, 0x12, }, 8, 0, "jmp", "indirect", +"3e ff a4 c8 78 56 34 12 \tnotrack jmpq *0x12345678(%rax,%rcx,8)",}, +{{0x3e, 0x41, 0xff, 0xa4, 0xc8, 0x78, 0x56, 0x34, 0x12, }, 9, 0, "jmp", "indirect", +"3e 41 ff a4 c8 78 56 34 12 \tnotrack jmpq *0x12345678(%r8,%rcx,8)",}, +{{0x3e, 0xf2, 0xff, 0xe0, }, 4, 0, "jmp", "indirect", +"3e f2 ff e0 \tnotrack bnd jmpq *%rax",}, +{{0x3e, 0xf2, 0xff, 0x20, }, 4, 0, "jmp", "indirect", +"3e f2 ff 20 \tnotrack bnd jmpq *(%rax)",}, +{{0x3e, 0xf2, 0x41, 0xff, 0x20, }, 5, 0, "jmp", "indirect", +"3e f2 41 ff 20 \tnotrack bnd jmpq *(%r8)",}, +{{0x3e, 0xf2, 0xff, 0x24, 0x25, 0x78, 0x56, 0x34, 0x12, }, 9, 0, "jmp", "indirect", +"3e f2 ff 24 25 78 56 34 12 \tnotrack bnd jmpq *0x12345678",}, +{{0x3e, 0xf2, 0xff, 0xa4, 0xc8, 0x78, 0x56, 0x34, 0x12, }, 9, 0, "jmp", "indirect", +"3e f2 ff a4 c8 78 56 34 12 \tnotrack bnd jmpq *0x12345678(%rax,%rcx,8)",}, +{{0x3e, 0xf2, 0x41, 0xff, 0xa4, 0xc8, 0x78, 0x56, 0x34, 0x12, }, 10, 0, "jmp", "indirect", +"3e f2 41 ff a4 c8 78 56 34 12 \tnotrack bnd jmpq *0x12345678(%r8,%rcx,8)",}, {{0x0f, 0x01, 0xcf, }, 3, 0, "", "", "0f 01 cf \tencls ",}, {{0x0f, 0x01, 0xd7, }, 3, 0, "", "", diff --git a/tools/perf/arch/x86/tests/insn-x86-dat-src.c b/tools/perf/arch/x86/tests/insn-x86-dat-src.c index ddbf07c50bb8..c3808e94c46e 100644 --- a/tools/perf/arch/x86/tests/insn-x86-dat-src.c +++ b/tools/perf/arch/x86/tests/insn-x86-dat-src.c @@ -1771,6 +1771,145 @@ int main(void) asm volatile("enqcmds (%eax),%ebx"); asm volatile("enqcmds 0x12345678(%eax),%ecx"); + /* incsspd/q */ + + asm volatile("incsspd %eax"); + asm volatile("incsspd %r8d"); + asm volatile("incsspq %rax"); + asm volatile("incsspq %r8"); + /* Also check instructions in the same group encoding as incsspd/q */ + asm volatile("xrstor (%rax)"); + asm volatile("xrstor (%r8)"); + asm volatile("xrstor (0x12345678)"); + asm volatile("xrstor 0x12345678(%rax,%rcx,8)"); + asm volatile("xrstor 0x12345678(%r8,%rcx,8)"); + asm volatile("lfence"); + + /* rdsspd/q */ + + asm volatile("rdsspd %eax"); + asm volatile("rdsspd %r8d"); + asm volatile("rdsspq %rax"); + asm volatile("rdsspq %r8"); + + /* saveprevssp */ + + asm volatile("saveprevssp"); + + /* rstorssp */ + + asm volatile("rstorssp (%rax)"); + asm volatile("rstorssp (%r8)"); + asm volatile("rstorssp (0x12345678)"); + asm volatile("rstorssp 0x12345678(%rax,%rcx,8)"); + asm volatile("rstorssp 0x12345678(%r8,%rcx,8)"); + + /* wrssd/q */ + + asm volatile("wrssd %ecx,(%rax)"); + asm volatile("wrssd %edx,(%r8)"); + asm volatile("wrssd %edx,(0x12345678)"); + asm volatile("wrssd %edx,0x12345678(%rax,%rcx,8)"); + asm volatile("wrssd %edx,0x12345678(%r8,%rcx,8)"); + asm volatile("wrssq %rcx,(%rax)"); + asm volatile("wrssq %rdx,(%r8)"); + asm volatile("wrssq %rdx,(0x12345678)"); + asm volatile("wrssq %rdx,0x12345678(%rax,%rcx,8)"); + asm volatile("wrssq %rdx,0x12345678(%r8,%rcx,8)"); + + /* wrussd/q */ + + asm volatile("wrussd %ecx,(%rax)"); + asm volatile("wrussd %edx,(%r8)"); + asm volatile("wrussd %edx,(0x12345678)"); + asm volatile("wrussd %edx,0x12345678(%rax,%rcx,8)"); + asm volatile("wrussd %edx,0x12345678(%r8,%rcx,8)"); + asm volatile("wrussq %rcx,(%rax)"); + asm volatile("wrussq %rdx,(%r8)"); + asm volatile("wrussq %rdx,(0x12345678)"); + asm volatile("wrussq %rdx,0x12345678(%rax,%rcx,8)"); + asm volatile("wrussq %rdx,0x12345678(%r8,%rcx,8)"); + + /* setssbsy */ + + asm volatile("setssbsy"); + /* Also check instructions in the same group encoding as setssbsy */ + asm volatile("rdpkru"); + asm volatile("wrpkru"); + + /* clrssbsy */ + + asm volatile("clrssbsy (%rax)"); + asm volatile("clrssbsy (%r8)"); + asm volatile("clrssbsy (0x12345678)"); + asm volatile("clrssbsy 0x12345678(%rax,%rcx,8)"); + asm volatile("clrssbsy 0x12345678(%r8,%rcx,8)"); + + /* endbr32/64 */ + + asm volatile("endbr32"); + asm volatile("endbr64"); + + /* call with/without notrack prefix */ + + asm volatile("callq *%rax"); /* Expecting: call indirect 0 */ + asm volatile("callq *(%rax)"); /* Expecting: call indirect 0 */ + asm volatile("callq *(%r8)"); /* Expecting: call indirect 0 */ + asm volatile("callq *(0x12345678)"); /* Expecting: call indirect 0 */ + asm volatile("callq *0x12345678(%rax,%rcx,8)"); /* Expecting: call indirect 0 */ + asm volatile("callq *0x12345678(%r8,%rcx,8)"); /* Expecting: call indirect 0 */ + + asm volatile("bnd callq *%rax"); /* Expecting: call indirect 0 */ + asm volatile("bnd callq *(%rax)"); /* Expecting: call indirect 0 */ + asm volatile("bnd callq *(%r8)"); /* Expecting: call indirect 0 */ + asm volatile("bnd callq *(0x12345678)"); /* Expecting: call indirect 0 */ + asm volatile("bnd callq *0x12345678(%rax,%rcx,8)"); /* Expecting: call indirect 0 */ + asm volatile("bnd callq *0x12345678(%r8,%rcx,8)"); /* Expecting: call indirect 0 */ + + asm volatile("notrack callq *%rax"); /* Expecting: call indirect 0 */ + asm volatile("notrack callq *(%rax)"); /* Expecting: call indirect 0 */ + asm volatile("notrack callq *(%r8)"); /* Expecting: call indirect 0 */ + asm volatile("notrack callq *(0x12345678)"); /* Expecting: call indirect 0 */ + asm volatile("notrack callq *0x12345678(%rax,%rcx,8)"); /* Expecting: call indirect 0 */ + asm volatile("notrack callq *0x12345678(%r8,%rcx,8)"); /* Expecting: call indirect 0 */ + + asm volatile("notrack bnd callq *%rax"); /* Expecting: call indirect 0 */ + asm volatile("notrack bnd callq *(%rax)"); /* Expecting: call indirect 0 */ + asm volatile("notrack bnd callq *(%r8)"); /* Expecting: call indirect 0 */ + asm volatile("notrack bnd callq *(0x12345678)"); /* Expecting: call indirect 0 */ + asm volatile("notrack bnd callq *0x12345678(%rax,%rcx,8)"); /* Expecting: call indirect 0 */ + asm volatile("notrack bnd callq *0x12345678(%r8,%rcx,8)"); /* Expecting: call indirect 0 */ + + /* jmp with/without notrack prefix */ + + asm volatile("jmpq *%rax"); /* Expecting: jmp indirect 0 */ + asm volatile("jmpq *(%rax)"); /* Expecting: jmp indirect 0 */ + asm volatile("jmpq *(%r8)"); /* Expecting: jmp indirect 0 */ + asm volatile("jmpq *(0x12345678)"); /* Expecting: jmp indirect 0 */ + asm volatile("jmpq *0x12345678(%rax,%rcx,8)"); /* Expecting: jmp indirect 0 */ + asm volatile("jmpq *0x12345678(%r8,%rcx,8)"); /* Expecting: jmp indirect 0 */ + + asm volatile("bnd jmpq *%rax"); /* Expecting: jmp indirect 0 */ + asm volatile("bnd jmpq *(%rax)"); /* Expecting: jmp indirect 0 */ + asm volatile("bnd jmpq *(%r8)"); /* Expecting: jmp indirect 0 */ + asm volatile("bnd jmpq *(0x12345678)"); /* Expecting: jmp indirect 0 */ + asm volatile("bnd jmpq *0x12345678(%rax,%rcx,8)"); /* Expecting: jmp indirect 0 */ + asm volatile("bnd jmpq *0x12345678(%r8,%rcx,8)"); /* Expecting: jmp indirect 0 */ + + asm volatile("notrack jmpq *%rax"); /* Expecting: jmp indirect 0 */ + asm volatile("notrack jmpq *(%rax)"); /* Expecting: jmp indirect 0 */ + asm volatile("notrack jmpq *(%r8)"); /* Expecting: jmp indirect 0 */ + asm volatile("notrack jmpq *(0x12345678)"); /* Expecting: jmp indirect 0 */ + asm volatile("notrack jmpq *0x12345678(%rax,%rcx,8)"); /* Expecting: jmp indirect 0 */ + asm volatile("notrack jmpq *0x12345678(%r8,%rcx,8)"); /* Expecting: jmp indirect 0 */ + + asm volatile("notrack bnd jmpq *%rax"); /* Expecting: jmp indirect 0 */ + asm volatile("notrack bnd jmpq *(%rax)"); /* Expecting: jmp indirect 0 */ + asm volatile("notrack bnd jmpq *(%r8)"); /* Expecting: jmp indirect 0 */ + asm volatile("notrack bnd jmpq *(0x12345678)"); /* Expecting: jmp indirect 0 */ + asm volatile("notrack bnd jmpq *0x12345678(%rax,%rcx,8)"); /* Expecting: jmp indirect 0 */ + asm volatile("notrack bnd jmpq *0x12345678(%r8,%rcx,8)"); /* Expecting: jmp indirect 0 */ + #else /* #ifdef __x86_64__ */ /* bound r32, mem (same op code as EVEX prefix) */ @@ -3434,6 +3573,103 @@ int main(void) asm volatile("enqcmds (%si),%bx"); asm volatile("enqcmds 0x1234(%si),%cx"); + /* incsspd */ + + asm volatile("incsspd %eax"); + /* Also check instructions in the same group encoding as incsspd */ + asm volatile("xrstor (%eax)"); + asm volatile("xrstor (0x12345678)"); + asm volatile("xrstor 0x12345678(%eax,%ecx,8)"); + asm volatile("lfence"); + + /* rdsspd */ + + asm volatile("rdsspd %eax"); + + /* saveprevssp */ + + asm volatile("saveprevssp"); + + /* rstorssp */ + + asm volatile("rstorssp (%eax)"); + asm volatile("rstorssp (0x12345678)"); + asm volatile("rstorssp 0x12345678(%eax,%ecx,8)"); + + /* wrssd */ + + asm volatile("wrssd %ecx,(%eax)"); + asm volatile("wrssd %edx,(0x12345678)"); + asm volatile("wrssd %edx,0x12345678(%eax,%ecx,8)"); + + /* wrussd */ + + asm volatile("wrussd %ecx,(%eax)"); + asm volatile("wrussd %edx,(0x12345678)"); + asm volatile("wrussd %edx,0x12345678(%eax,%ecx,8)"); + + /* setssbsy */ + + asm volatile("setssbsy"); + /* Also check instructions in the same group encoding as setssbsy */ + asm volatile("rdpkru"); + asm volatile("wrpkru"); + + /* clrssbsy */ + + asm volatile("clrssbsy (%eax)"); + asm volatile("clrssbsy (0x12345678)"); + asm volatile("clrssbsy 0x12345678(%eax,%ecx,8)"); + + /* endbr32/64 */ + + asm volatile("endbr32"); + asm volatile("endbr64"); + + /* call with/without notrack prefix */ + + asm volatile("call *%eax"); /* Expecting: call indirect 0 */ + asm volatile("call *(%eax)"); /* Expecting: call indirect 0 */ + asm volatile("call *(0x12345678)"); /* Expecting: call indirect 0 */ + asm volatile("call *0x12345678(%eax,%ecx,8)"); /* Expecting: call indirect 0 */ + + asm volatile("bnd call *%eax"); /* Expecting: call indirect 0 */ + asm volatile("bnd call *(%eax)"); /* Expecting: call indirect 0 */ + asm volatile("bnd call *(0x12345678)"); /* Expecting: call indirect 0 */ + asm volatile("bnd call *0x12345678(%eax,%ecx,8)"); /* Expecting: call indirect 0 */ + + asm volatile("notrack call *%eax"); /* Expecting: call indirect 0 */ + asm volatile("notrack call *(%eax)"); /* Expecting: call indirect 0 */ + asm volatile("notrack call *(0x12345678)"); /* Expecting: call indirect 0 */ + asm volatile("notrack call *0x12345678(%eax,%ecx,8)"); /* Expecting: call indirect 0 */ + + asm volatile("notrack bnd call *%eax"); /* Expecting: call indirect 0 */ + asm volatile("notrack bnd call *(%eax)"); /* Expecting: call indirect 0 */ + asm volatile("notrack bnd call *(0x12345678)"); /* Expecting: call indirect 0 */ + asm volatile("notrack bnd call *0x12345678(%eax,%ecx,8)"); /* Expecting: call indirect 0 */ + + /* jmp with/without notrack prefix */ + + asm volatile("jmp *%eax"); /* Expecting: jmp indirect 0 */ + asm volatile("jmp *(%eax)"); /* Expecting: jmp indirect 0 */ + asm volatile("jmp *(0x12345678)"); /* Expecting: jmp indirect 0 */ + asm volatile("jmp *0x12345678(%eax,%ecx,8)"); /* Expecting: jmp indirect 0 */ + + asm volatile("bnd jmp *%eax"); /* Expecting: jmp indirect 0 */ + asm volatile("bnd jmp *(%eax)"); /* Expecting: jmp indirect 0 */ + asm volatile("bnd jmp *(0x12345678)"); /* Expecting: jmp indirect 0 */ + asm volatile("bnd jmp *0x12345678(%eax,%ecx,8)"); /* Expecting: jmp indirect 0 */ + + asm volatile("notrack jmp *%eax"); /* Expecting: jmp indirect 0 */ + asm volatile("notrack jmp *(%eax)"); /* Expecting: jmp indirect 0 */ + asm volatile("notrack jmp *(0x12345678)"); /* Expecting: jmp indirect 0 */ + asm volatile("notrack jmp *0x12345678(%eax,%ecx,8)"); /* Expecting: jmp indirect 0 */ + + asm volatile("notrack bnd jmp *%eax"); /* Expecting: jmp indirect 0 */ + asm volatile("notrack bnd jmp *(%eax)"); /* Expecting: jmp indirect 0 */ + asm volatile("notrack bnd jmp *(0x12345678)"); /* Expecting: jmp indirect 0 */ + asm volatile("notrack bnd jmp *0x12345678(%eax,%ecx,8)"); /* Expecting: jmp indirect 0 */ + #endif /* #ifndef __x86_64__ */ /* SGX */ -- cgit v1.2.3-70-g09d2 From 26567ed79d13ec54b2c5661ce8a07fd149a23a9b Mon Sep 17 00:00:00 2001 From: Hagen Paul Pfeifer <hagen@jauu.net> Date: Tue, 4 Feb 2020 18:37:09 +0100 Subject: perf script: Introduce --deltatime option For some kind of analysis a deltatime output is more human friendly and reduce the cognitive load for further analysis. The following output demonstrate the new option "deltatime": calculate the time difference in relation to the previous event. $ perf script --deltatime test 2525 [001] 0.000000: sdt_libev:ev_add: (5635e72a5ebd) test 2525 [001] 0.000091: sdt_libev:epoll_wait_enter: (5635e72a76a9) test 2525 [001] 1.000051: sdt_libev:epoll_wait_return: (5635e72a772e) arg1=1 test 2525 [001] 0.000685: sdt_libev:ev_add: (5635e72a5ebd) test 2525 [001] 0.000048: sdt_libev:epoll_wait_enter: (5635e72a76a9) test 2525 [001] 1.000104: sdt_libev:epoll_wait_return: (5635e72a772e) arg1=1 test 2525 [001] 0.003895: sdt_libev:epoll_wait_enter: (5635e72a76a9) test 2525 [001] 0.996034: sdt_libev:epoll_wait_return: (5635e72a772e) arg1=1 test 2525 [001] 0.000058: sdt_libev:epoll_wait_enter: (5635e72a76a9) test 2525 [001] 1.000004: sdt_libev:epoll_wait_return: (5635e72a772e) arg1=1 test 2525 [001] 0.000064: sdt_libev:epoll_wait_enter: (5635e72a76a9) test 2525 [001] 0.999934: sdt_libev:epoll_wait_return: (5635e72a772e) arg1=1 test 2525 [001] 0.000056: sdt_libev:epoll_wait_enter: (5635e72a76a9) test 2525 [001] 0.999930: sdt_libev:epoll_wait_return: (5635e72a772e) arg1=1 Committer testing: So go from default output to --reltime and then this new --deltatime, to contrast the various timestamp presentation modes for a random perf.data file I had laying around: [root@five ~]# perf script --reltime | head perf 442394 [000] 0.000000: 16 cycles: ffffffff9706e544 native_write_msr+0x4 (/usr/lib/debug/lib/modules/5.5.10-200.fc31.x86_64/vmlinux) perf 442394 [000] 0.000002: 16 cycles: ffffffff9706e544 native_write_msr+0x4 (/usr/lib/debug/lib/modules/5.5.10-200.fc31.x86_64/vmlinux) perf 442394 [000] 0.000004: 16 cycles: ffffffff9706e544 native_write_msr+0x4 (/usr/lib/debug/lib/modules/5.5.10-200.fc31.x86_64/vmlinux) perf 442394 [000] 0.000006: 128 cycles: ffffffff972415a1 perf_event_update_userpage+0x1 (/usr/lib/debug/lib/modules/5.5.10-200.fc31.x86_64/vmlinux) perf 442394 [000] 0.000009: 2597 cycles: ffffffff97463785 cap_task_setscheduler+0x5 (/usr/lib/debug/lib/modules/5.5.10-200.fc31.x86_64/vmlinux) perf 442394 [001] 0.000036: 16 cycles: ffffffff9706e544 native_write_msr+0x4 (/usr/lib/debug/lib/modules/5.5.10-200.fc31.x86_64/vmlinux) perf 442394 [001] 0.000038: 16 cycles: ffffffff9706e544 native_write_msr+0x4 (/usr/lib/debug/lib/modules/5.5.10-200.fc31.x86_64/vmlinux) perf 442394 [001] 0.000040: 16 cycles: ffffffff9706e544 native_write_msr+0x4 (/usr/lib/debug/lib/modules/5.5.10-200.fc31.x86_64/vmlinux) perf 442394 [001] 0.000041: 224 cycles: ffffffff9700a53a perf_ibs_handle_irq+0x1da (/usr/lib/debug/lib/modules/5.5.10-200.fc31.x86_64/vmlinux) perf 442394 [001] 0.000044: 4439 cycles: ffffffff97120d85 put_prev_entity+0x45 (/usr/lib/debug/lib/modules/5.5.10-200.fc31.x86_64/vmlinux) [root@five ~]# perf script --deltatime | head perf 442394 [000] 0.000000: 16 cycles: ffffffff9706e544 native_write_msr+0x4 (/usr/lib/debug/lib/modules/5.5.10-200.fc31.x86_64/vmlinux) perf 442394 [000] 0.000002: 16 cycles: ffffffff9706e544 native_write_msr+0x4 (/usr/lib/debug/lib/modules/5.5.10-200.fc31.x86_64/vmlinux) perf 442394 [000] 0.000001: 16 cycles: ffffffff9706e544 native_write_msr+0x4 (/usr/lib/debug/lib/modules/5.5.10-200.fc31.x86_64/vmlinux) perf 442394 [000] 0.000001: 128 cycles: ffffffff972415a1 perf_event_update_userpage+0x1 (/usr/lib/debug/lib/modules/5.5.10-200.fc31.x86_64/vmlinux) perf 442394 [000] 0.000002: 2597 cycles: ffffffff97463785 cap_task_setscheduler+0x5 (/usr/lib/debug/lib/modules/5.5.10-200.fc31.x86_64/vmlinux) perf 442394 [001] 0.000027: 16 cycles: ffffffff9706e544 native_write_msr+0x4 (/usr/lib/debug/lib/modules/5.5.10-200.fc31.x86_64/vmlinux) perf 442394 [001] 0.000002: 16 cycles: ffffffff9706e544 native_write_msr+0x4 (/usr/lib/debug/lib/modules/5.5.10-200.fc31.x86_64/vmlinux) perf 442394 [001] 0.000001: 16 cycles: ffffffff9706e544 native_write_msr+0x4 (/usr/lib/debug/lib/modules/5.5.10-200.fc31.x86_64/vmlinux) perf 442394 [001] 0.000001: 224 cycles: ffffffff9700a53a perf_ibs_handle_irq+0x1da (/usr/lib/debug/lib/modules/5.5.10-200.fc31.x86_64/vmlinux) perf 442394 [001] 0.000002: 4439 cycles: ffffffff97120d85 put_prev_entity+0x45 (/usr/lib/debug/lib/modules/5.5.10-200.fc31.x86_64/vmlinux) [root@five ~]# perf script | head perf 442394 [000] 7600.157861: 16 cycles: ffffffff9706e544 native_write_msr+0x4 (/usr/lib/debug/lib/modules/5.5.10-200.fc31.x86_64/vmlinux) perf 442394 [000] 7600.157864: 16 cycles: ffffffff9706e544 native_write_msr+0x4 (/usr/lib/debug/lib/modules/5.5.10-200.fc31.x86_64/vmlinux) perf 442394 [000] 7600.157866: 16 cycles: ffffffff9706e544 native_write_msr+0x4 (/usr/lib/debug/lib/modules/5.5.10-200.fc31.x86_64/vmlinux) perf 442394 [000] 7600.157867: 128 cycles: ffffffff972415a1 perf_event_update_userpage+0x1 (/usr/lib/debug/lib/modules/5.5.10-200.fc31.x86_64/vmlinux) perf 442394 [000] 7600.157870: 2597 cycles: ffffffff97463785 cap_task_setscheduler+0x5 (/usr/lib/debug/lib/modules/5.5.10-200.fc31.x86_64/vmlinux) perf 442394 [001] 7600.157897: 16 cycles: ffffffff9706e544 native_write_msr+0x4 (/usr/lib/debug/lib/modules/5.5.10-200.fc31.x86_64/vmlinux) perf 442394 [001] 7600.157900: 16 cycles: ffffffff9706e544 native_write_msr+0x4 (/usr/lib/debug/lib/modules/5.5.10-200.fc31.x86_64/vmlinux) perf 442394 [001] 7600.157901: 16 cycles: ffffffff9706e544 native_write_msr+0x4 (/usr/lib/debug/lib/modules/5.5.10-200.fc31.x86_64/vmlinux) perf 442394 [001] 7600.157903: 224 cycles: ffffffff9700a53a perf_ibs_handle_irq+0x1da (/usr/lib/debug/lib/modules/5.5.10-200.fc31.x86_64/vmlinux) perf 442394 [001] 7600.157906: 4439 cycles: ffffffff97120d85 put_prev_entity+0x45 (/usr/lib/debug/lib/modules/5.5.10-200.fc31.x86_64/vmlinux) [root@five ~]# Andi suggested we better implement it as a new field, i.e. -F deltatime, like: [root@five ~]# perf script -F deltatime Invalid field requested. Usage: perf script [<options>] or: perf script [<options>] record <script> [<record-options>] <command> or: perf script [<options>] report <script> [script-args] or: perf script [<options>] <script> [<record-options>] <command> or: perf script [<options>] <top-script> [script-args] -F, --fields <str> comma separated output fields prepend with 'type:'. +field to add and -field to remove.Valid types: hw,sw,trace,raw,synth. Fields: comm,tid,pid,time,cpu,event,trace,ip,sym,dso,addr,symoff,srcline,period,iregs,uregs,brstack,brstacksym,flags,bpf-output,brstackinsn,brstackoff,callindent,insn,insnlen,synth,phys_addr,metric,misc,ipc [root@five ~]# I.e. we have -F for maximum flexibility: [root@five ~]# perf script -F comm,pid,cpu,time | head perf 442394 [000] 7600.157861: perf 442394 [000] 7600.157864: perf 442394 [000] 7600.157866: perf 442394 [000] 7600.157867: perf 442394 [000] 7600.157870: perf 442394 [001] 7600.157897: perf 442394 [001] 7600.157900: perf 442394 [001] 7600.157901: perf 442394 [001] 7600.157903: perf 442394 [001] 7600.157906: [root@five ~]# But since we already have --reltime, having --deltatime, documented one after the other is sensible. Signed-off-by: Hagen Paul Pfeifer <hagen@jauu.net> Tested-by: Arnaldo Carvalho de Melo <acme@redhat.com> Cc: Adrian Hunter <adrian.hunter@intel.com> Cc: Andi Kleen <ak@linux.intel.com> Cc: Jiri Olsa <jolsa@kernel.org> Link: http://lore.kernel.org/lkml/20200204173709.489161-1-hagen@jauu.net [ Added 'perf script' man page entry for --deltatime ] Signed-off-by: Arnaldo Carvalho de Melo <acme@redhat.com> --- tools/perf/Documentation/perf-script.txt | 3 +++ tools/perf/builtin-script.c | 17 +++++++++++++++++ 2 files changed, 20 insertions(+) diff --git a/tools/perf/Documentation/perf-script.txt b/tools/perf/Documentation/perf-script.txt index db6a36aac47e..4af255bb0977 100644 --- a/tools/perf/Documentation/perf-script.txt +++ b/tools/perf/Documentation/perf-script.txt @@ -390,6 +390,9 @@ include::itrace.txt[] --reltime:: Print time stamps relative to trace start. +--deltatime:: + Print time stamps relative to previous event. + --per-event-dump:: Create per event files with a "perf.data.EVENT.dump" name instead of printing to stdout, useful, for instance, for generating flamegraphs. diff --git a/tools/perf/builtin-script.c b/tools/perf/builtin-script.c index 656b347f6dd8..f63869aef914 100644 --- a/tools/perf/builtin-script.c +++ b/tools/perf/builtin-script.c @@ -63,7 +63,9 @@ static char const *script_name; static char const *generate_script_lang; static bool reltime; +static bool deltatime; static u64 initial_time; +static u64 previous_time; static bool debug_mode; static u64 last_timestamp; static u64 nr_unordered; @@ -704,6 +706,13 @@ static int perf_sample__fprintf_start(struct perf_sample *sample, if (!initial_time) initial_time = sample->time; t = sample->time - initial_time; + } else if (deltatime) { + if (previous_time) + t = sample->time - previous_time; + else { + t = 0; + } + previous_time = sample->time; } nsecs = t; secs = nsecs / NSEC_PER_SEC; @@ -3555,6 +3564,7 @@ int cmd_script(int argc, const char **argv) "anything beyond the specified depth will be ignored. " "Default: kernel.perf_event_max_stack or " __stringify(PERF_MAX_STACK_DEPTH)), OPT_BOOLEAN(0, "reltime", &reltime, "Show time stamps relative to start"), + OPT_BOOLEAN(0, "deltatime", &deltatime, "Show time stamps relative to previous event"), OPT_BOOLEAN('I', "show-info", &show_full_info, "display extended information from perf.data file"), OPT_BOOLEAN('\0', "show-kernel-path", &symbol_conf.show_kernel_path, @@ -3651,6 +3661,13 @@ int cmd_script(int argc, const char **argv) } } + if (reltime && deltatime) { + fprintf(stderr, + "reltime and deltatime - the two don't get along well. " + "Please limit to --reltime or --deltatime.\n"); + return -1; + } + if (itrace_synth_opts.callchain && itrace_synth_opts.callchain_sz > scripting_max_stack) scripting_max_stack = itrace_synth_opts.callchain_sz; -- cgit v1.2.3-70-g09d2 From 96aaab686505c449e24d76e76507290dcc30e008 Mon Sep 17 00:00:00 2001 From: Namhyung Kim <namhyung@kernel.org> Date: Wed, 25 Mar 2020 21:45:28 +0900 Subject: perf/core: Add PERF_RECORD_CGROUP event To support cgroup tracking, add CGROUP event to save a link between cgroup path and id number. This is needed since cgroups can go away when userspace tries to read the cgroup info (from the id) later. The attr.cgroup bit was also added to enable cgroup tracking from userspace. This event will be generated when a new cgroup becomes active. Userspace might need to synthesize those events for existing cgroups. Committer testing: From the resulting kernel, using /sys/kernel/btf/vmlinux: $ pahole perf_event_attr | grep -w cgroup -B5 -A1 __u64 write_backward:1; /* 40:27 8 */ __u64 namespaces:1; /* 40:28 8 */ __u64 ksymbol:1; /* 40:29 8 */ __u64 bpf_event:1; /* 40:30 8 */ __u64 aux_output:1; /* 40:31 8 */ __u64 cgroup:1; /* 40:32 8 */ __u64 __reserved_1:31; /* 40:33 8 */ $ Reported-by: kbuild test robot <lkp@intel.com> Signed-off-by: Namhyung Kim <namhyung@kernel.org> Tested-by: Arnaldo Carvalho de Melo <acme@redhat.com> Acked-by: Peter Zijlstra (Intel) <peterz@infradead.org> Acked-by: Tejun Heo <tj@kernel.org> [staticize perf_event_cgroup function] Cc: Adrian Hunter <adrian.hunter@intel.com> Cc: Alexander Shishkin <alexander.shishkin@linux.intel.com> Cc: Jiri Olsa <jolsa@redhat.com> Cc: Johannes Weiner <hannes@cmpxchg.org> Cc: Mark Rutland <mark.rutland@arm.com> Cc: Zefan Li <lizefan@huawei.com> Link: http://lore.kernel.org/lkml/20200325124536.2800725-2-namhyung@kernel.org Signed-off-by: Arnaldo Carvalho de Melo <acme@redhat.com> --- include/uapi/linux/perf_event.h | 13 ++++- kernel/events/core.c | 111 ++++++++++++++++++++++++++++++++++++++++ 2 files changed, 123 insertions(+), 1 deletion(-) diff --git a/include/uapi/linux/perf_event.h b/include/uapi/linux/perf_event.h index 397cfd65b3fe..de95f6c7b273 100644 --- a/include/uapi/linux/perf_event.h +++ b/include/uapi/linux/perf_event.h @@ -381,7 +381,8 @@ struct perf_event_attr { ksymbol : 1, /* include ksymbol events */ bpf_event : 1, /* include bpf events */ aux_output : 1, /* generate AUX records instead of events */ - __reserved_1 : 32; + cgroup : 1, /* include cgroup events */ + __reserved_1 : 31; union { __u32 wakeup_events; /* wakeup every n events */ @@ -1012,6 +1013,16 @@ enum perf_event_type { */ PERF_RECORD_BPF_EVENT = 18, + /* + * struct { + * struct perf_event_header header; + * u64 id; + * char path[]; + * struct sample_id sample_id; + * }; + */ + PERF_RECORD_CGROUP = 19, + PERF_RECORD_MAX, /* non-ABI */ }; diff --git a/kernel/events/core.c b/kernel/events/core.c index d22e4ba59dfa..994932d5e474 100644 --- a/kernel/events/core.c +++ b/kernel/events/core.c @@ -387,6 +387,7 @@ static atomic_t nr_freq_events __read_mostly; static atomic_t nr_switch_events __read_mostly; static atomic_t nr_ksymbol_events __read_mostly; static atomic_t nr_bpf_events __read_mostly; +static atomic_t nr_cgroup_events __read_mostly; static LIST_HEAD(pmus); static DEFINE_MUTEX(pmus_lock); @@ -4608,6 +4609,8 @@ static void unaccount_event(struct perf_event *event) atomic_dec(&nr_comm_events); if (event->attr.namespaces) atomic_dec(&nr_namespaces_events); + if (event->attr.cgroup) + atomic_dec(&nr_cgroup_events); if (event->attr.task) atomic_dec(&nr_task_events); if (event->attr.freq) @@ -7735,6 +7738,105 @@ void perf_event_namespaces(struct task_struct *task) NULL); } +/* + * cgroup tracking + */ +#ifdef CONFIG_CGROUP_PERF + +struct perf_cgroup_event { + char *path; + int path_size; + struct { + struct perf_event_header header; + u64 id; + char path[]; + } event_id; +}; + +static int perf_event_cgroup_match(struct perf_event *event) +{ + return event->attr.cgroup; +} + +static void perf_event_cgroup_output(struct perf_event *event, void *data) +{ + struct perf_cgroup_event *cgroup_event = data; + struct perf_output_handle handle; + struct perf_sample_data sample; + u16 header_size = cgroup_event->event_id.header.size; + int ret; + + if (!perf_event_cgroup_match(event)) + return; + + perf_event_header__init_id(&cgroup_event->event_id.header, + &sample, event); + ret = perf_output_begin(&handle, event, + cgroup_event->event_id.header.size); + if (ret) + goto out; + + perf_output_put(&handle, cgroup_event->event_id); + __output_copy(&handle, cgroup_event->path, cgroup_event->path_size); + + perf_event__output_id_sample(event, &handle, &sample); + + perf_output_end(&handle); +out: + cgroup_event->event_id.header.size = header_size; +} + +static void perf_event_cgroup(struct cgroup *cgrp) +{ + struct perf_cgroup_event cgroup_event; + char path_enomem[16] = "//enomem"; + char *pathname; + size_t size; + + if (!atomic_read(&nr_cgroup_events)) + return; + + cgroup_event = (struct perf_cgroup_event){ + .event_id = { + .header = { + .type = PERF_RECORD_CGROUP, + .misc = 0, + .size = sizeof(cgroup_event.event_id), + }, + .id = cgroup_id(cgrp), + }, + }; + + pathname = kmalloc(PATH_MAX, GFP_KERNEL); + if (pathname == NULL) { + cgroup_event.path = path_enomem; + } else { + /* just to be sure to have enough space for alignment */ + cgroup_path(cgrp, pathname, PATH_MAX - sizeof(u64)); + cgroup_event.path = pathname; + } + + /* + * Since our buffer works in 8 byte units we need to align our string + * size to a multiple of 8. However, we must guarantee the tail end is + * zero'd out to avoid leaking random bits to userspace. + */ + size = strlen(cgroup_event.path) + 1; + while (!IS_ALIGNED(size, sizeof(u64))) + cgroup_event.path[size++] = '\0'; + + cgroup_event.event_id.header.size += size; + cgroup_event.path_size = size; + + perf_iterate_sb(perf_event_cgroup_output, + &cgroup_event, + NULL); + + kfree(pathname); +} + +#endif + /* * mmap tracking */ @@ -10781,6 +10883,8 @@ static void account_event(struct perf_event *event) atomic_inc(&nr_comm_events); if (event->attr.namespaces) atomic_inc(&nr_namespaces_events); + if (event->attr.cgroup) + atomic_inc(&nr_cgroup_events); if (event->attr.task) atomic_inc(&nr_task_events); if (event->attr.freq) @@ -12757,6 +12861,12 @@ static void perf_cgroup_css_free(struct cgroup_subsys_state *css) kfree(jc); } +static int perf_cgroup_css_online(struct cgroup_subsys_state *css) +{ + perf_event_cgroup(css->cgroup); + return 0; +} + static int __perf_cgroup_move(void *info) { struct task_struct *task = info; @@ -12778,6 +12888,7 @@ static void perf_cgroup_attach(struct cgroup_taskset *tset) struct cgroup_subsys perf_event_cgrp_subsys = { .css_alloc = perf_cgroup_css_alloc, .css_free = perf_cgroup_css_free, + .css_online = perf_cgroup_css_online, .attach = perf_cgroup_attach, /* * Implicitly enable on dfl hierarchy so that perf events can -- cgit v1.2.3-70-g09d2 From 6546b19f95acc986807de981402bbac6b3a94b0f Mon Sep 17 00:00:00 2001 From: Namhyung Kim <namhyung@kernel.org> Date: Wed, 25 Mar 2020 21:45:29 +0900 Subject: perf/core: Add PERF_SAMPLE_CGROUP feature The PERF_SAMPLE_CGROUP bit is to save (perf_event) cgroup information in the sample. It will add a 64-bit id to identify current cgroup and it's the file handle in the cgroup file system. Userspace should use this information with PERF_RECORD_CGROUP event to match which cgroup it belongs. I put it before PERF_SAMPLE_AUX for simplicity since it just needs a 64-bit word. But if we want bigger samples, I can work on that direction too. Committer testing: $ pahole perf_sample_data | grep -w cgroup -B5 -A5 /* --- cacheline 4 boundary (256 bytes) was 56 bytes ago --- */ struct perf_regs regs_intr; /* 312 16 */ /* --- cacheline 5 boundary (320 bytes) was 8 bytes ago --- */ u64 stack_user_size; /* 328 8 */ u64 phys_addr; /* 336 8 */ u64 cgroup; /* 344 8 */ /* size: 384, cachelines: 6, members: 22 */ /* padding: 32 */ }; $ Signed-off-by: Namhyung Kim <namhyung@kernel.org> Tested-by: Arnaldo Carvalho de Melo <acme@redhat.com> Acked-by: Peter Zijlstra (Intel) <peterz@infradead.org> Acked-by: Tejun Heo <tj@kernel.org> Cc: Alexander Shishkin <alexander.shishkin@linux.intel.com> Cc: Jiri Olsa <jolsa@redhat.com> Cc: Johannes Weiner <hannes@cmpxchg.org> Cc: Mark Rutland <mark.rutland@arm.com> Cc: Zefan Li <lizefan@huawei.com> Link: http://lore.kernel.org/lkml/20200325124536.2800725-3-namhyung@kernel.org Signed-off-by: Arnaldo Carvalho de Melo <acme@redhat.com> --- include/linux/perf_event.h | 1 + include/uapi/linux/perf_event.h | 3 ++- init/Kconfig | 3 ++- kernel/events/core.c | 22 ++++++++++++++++++++++ 4 files changed, 27 insertions(+), 2 deletions(-) diff --git a/include/linux/perf_event.h b/include/linux/perf_event.h index 8768a39b5258..9c3e7619c929 100644 --- a/include/linux/perf_event.h +++ b/include/linux/perf_event.h @@ -1020,6 +1020,7 @@ struct perf_sample_data { u64 stack_user_size; u64 phys_addr; + u64 cgroup; } ____cacheline_aligned; /* default value for data source */ diff --git a/include/uapi/linux/perf_event.h b/include/uapi/linux/perf_event.h index de95f6c7b273..7b2d6fc9e6ed 100644 --- a/include/uapi/linux/perf_event.h +++ b/include/uapi/linux/perf_event.h @@ -142,8 +142,9 @@ enum perf_event_sample_format { PERF_SAMPLE_REGS_INTR = 1U << 18, PERF_SAMPLE_PHYS_ADDR = 1U << 19, PERF_SAMPLE_AUX = 1U << 20, + PERF_SAMPLE_CGROUP = 1U << 21, - PERF_SAMPLE_MAX = 1U << 21, /* non-ABI */ + PERF_SAMPLE_MAX = 1U << 22, /* non-ABI */ __PERF_SAMPLE_CALLCHAIN_EARLY = 1ULL << 63, /* non-ABI; internal use */ }; diff --git a/init/Kconfig b/init/Kconfig index 20a6ac33761c..7766b06a0038 100644 --- a/init/Kconfig +++ b/init/Kconfig @@ -1027,7 +1027,8 @@ config CGROUP_PERF help This option extends the perf per-cpu mode to restrict monitoring to threads which belong to the cgroup specified and run on the - designated cpu. + designated cpu. Or this can be used to have cgroup ID in samples + so that it can monitor performance events among cgroups. Say N if unsure. diff --git a/kernel/events/core.c b/kernel/events/core.c index 994932d5e474..1569979c8912 100644 --- a/kernel/events/core.c +++ b/kernel/events/core.c @@ -1862,6 +1862,9 @@ static void __perf_event_header_size(struct perf_event *event, u64 sample_type) if (sample_type & PERF_SAMPLE_PHYS_ADDR) size += sizeof(data->phys_addr); + if (sample_type & PERF_SAMPLE_CGROUP) + size += sizeof(data->cgroup); + event->header_size = size; } @@ -6867,6 +6870,9 @@ void perf_output_sample(struct perf_output_handle *handle, if (sample_type & PERF_SAMPLE_PHYS_ADDR) perf_output_put(handle, data->phys_addr); + if (sample_type & PERF_SAMPLE_CGROUP) + perf_output_put(handle, data->cgroup); + if (sample_type & PERF_SAMPLE_AUX) { perf_output_put(handle, data->aux_size); @@ -7066,6 +7072,16 @@ void perf_prepare_sample(struct perf_event_header *header, if (sample_type & PERF_SAMPLE_PHYS_ADDR) data->phys_addr = perf_virt_to_phys(data->addr); +#ifdef CONFIG_CGROUP_PERF + if (sample_type & PERF_SAMPLE_CGROUP) { + struct cgroup *cgrp; + + /* protected by RCU */ + cgrp = task_css_check(current, perf_event_cgrp_id, 1)->cgroup; + data->cgroup = cgroup_id(cgrp); + } +#endif + if (sample_type & PERF_SAMPLE_AUX) { u64 size; @@ -11264,6 +11280,12 @@ static int perf_copy_attr(struct perf_event_attr __user *uattr, if (attr->sample_type & PERF_SAMPLE_REGS_INTR) ret = perf_reg_validate(attr->sample_regs_intr); + +#ifndef CONFIG_CGROUP_PERF + if (attr->sample_type & PERF_SAMPLE_CGROUP) + return -EINVAL; +#endif + out: return ret; -- cgit v1.2.3-70-g09d2 From 03590fb409bce428bce0b4535a488dedfbed0c00 Mon Sep 17 00:00:00 2001 From: Namhyung Kim <namhyung@kernel.org> Date: Fri, 27 Mar 2020 11:07:17 -0300 Subject: tools headers UAPI: Update tools's copy of linux/perf_event.h To get the changes in: 6546b19f95ac ("perf/core: Add PERF_SAMPLE_CGROUP feature") 96aaab686505 ("perf/core: Add PERF_RECORD_CGROUP event") This silences this perf tools build warning: Warning: Kernel ABI header at 'tools/include/uapi/linux/perf_event.h' differs from latest version at 'include/uapi/linux/perf_event.h' diff -u tools/include/uapi/linux/perf_event.h include/uapi/linux/perf_event.h This update is a prerequisite to adding support for the HW index of raw branch records. Signed-off-by: Namhyung Kim <namhyung@kernel.org> Cc: Adrian Hunter <adrian.hunter@intel.com> Cc: Alexander Shishkin <alexander.shishkin@linux.intel.com> Cc: Jiri Olsa <jolsa@redhat.com> Cc: Mark Rutland <mark.rutland@arm.com> Cc: Peter Zijlstra <a.p.zijlstra@chello.nl> Cc: http://lore.kernel.org/lkml/20200325124536.2800725-4-namhyung@kernel.org [ split from a larger patch ] Signed-off-by: Arnaldo Carvalho de Melo <acme@redhat.com> --- tools/include/uapi/linux/perf_event.h | 16 ++++++++++++++-- 1 file changed, 14 insertions(+), 2 deletions(-) diff --git a/tools/include/uapi/linux/perf_event.h b/tools/include/uapi/linux/perf_event.h index 397cfd65b3fe..7b2d6fc9e6ed 100644 --- a/tools/include/uapi/linux/perf_event.h +++ b/tools/include/uapi/linux/perf_event.h @@ -142,8 +142,9 @@ enum perf_event_sample_format { PERF_SAMPLE_REGS_INTR = 1U << 18, PERF_SAMPLE_PHYS_ADDR = 1U << 19, PERF_SAMPLE_AUX = 1U << 20, + PERF_SAMPLE_CGROUP = 1U << 21, - PERF_SAMPLE_MAX = 1U << 21, /* non-ABI */ + PERF_SAMPLE_MAX = 1U << 22, /* non-ABI */ __PERF_SAMPLE_CALLCHAIN_EARLY = 1ULL << 63, /* non-ABI; internal use */ }; @@ -381,7 +382,8 @@ struct perf_event_attr { ksymbol : 1, /* include ksymbol events */ bpf_event : 1, /* include bpf events */ aux_output : 1, /* generate AUX records instead of events */ - __reserved_1 : 32; + cgroup : 1, /* include cgroup events */ + __reserved_1 : 31; union { __u32 wakeup_events; /* wakeup every n events */ @@ -1012,6 +1014,16 @@ enum perf_event_type { */ PERF_RECORD_BPF_EVENT = 18, + /* + * struct { + * struct perf_event_header header; + * u64 id; + * char path[]; + * struct sample_id sample_id; + * }; + */ + PERF_RECORD_CGROUP = 19, + PERF_RECORD_MAX, /* non-ABI */ }; -- cgit v1.2.3-70-g09d2 From 460c3ed999d700fa84c953f83141f580ef456b3c Mon Sep 17 00:00:00 2001 From: Arnaldo Carvalho de Melo <acme@redhat.com> Date: Fri, 3 Apr 2020 09:29:52 -0300 Subject: perf python: Include rwsem.c in the pythong biding We'll need it for the cgroup patches, and its better to have it in a separate patch in case we need to later revert the cgroup patches. I.e. without this we have: [root@five ~]# perf test -v python 19: 'import perf' in python : --- start --- test child forked, pid 148447 Traceback (most recent call last): File "<stdin>", line 1, in <module> ImportError: /tmp/build/perf/python/perf.cpython-37m-x86_64-linux-gnu.so: undefined symbol: down_write test child finished with -1 ---- end ---- 'import perf' in python: FAILED! [root@five ~]# Cc: Adrian Hunter <adrian.hunter@intel.com> Cc: Alexander Shishkin <alexander.shishkin@linux.intel.com> Cc: Jiri Olsa <jolsa@kernel.org> Cc: Mark Rutland <mark.rutland@arm.com> Cc: Namhyung Kim <namhyung@kernel.org> Cc: Peter Zijlstra <peterz@infradead.org> Link: http://lore.kernel.org/lkml/20200403123606.GC23243@kernel.org Signed-off-by: Arnaldo Carvalho de Melo <acme@redhat.com> --- tools/perf/util/python-ext-sources | 1 + 1 file changed, 1 insertion(+) diff --git a/tools/perf/util/python-ext-sources b/tools/perf/util/python-ext-sources index e7279ea6043a..a9d9c142eb7c 100644 --- a/tools/perf/util/python-ext-sources +++ b/tools/perf/util/python-ext-sources @@ -34,3 +34,4 @@ util/string.c util/symbol_fprintf.c util/units.c util/affinity.c +util/rwsem.c -- cgit v1.2.3-70-g09d2 From 49f550ea87c73671ee4d5e08820e0976894dd610 Mon Sep 17 00:00:00 2001 From: Namhyung Kim <namhyung@kernel.org> Date: Thu, 2 Apr 2020 10:52:49 +0900 Subject: perf tools: Add file-handle feature test The file handle (FHANDLE) support is configurable so some systems might not have it. So add a config feature item to check it on build time so that we don't add the cgroup tracking feature based on that. Committer notes: Had to make the test use the same construct as its later use in synthetic-events.c, in the next patch in this series. i.e. make it be: struct { struct file_handle fh; uint64_t cgroup_id; } handle; To cope with: CC /tmp/build/perf/util/cloexec.o util/synthetic-events.c:428:22: error: field 'fh' with CC /tmp/build/perf/util/call-path.o variable sized type 'struct file_handle' not at the end of a struct or class is a GNU extension [-Werror,-Wgnu-variable-sized-type-not-at-end] struct file_handle fh; ^ 1 error generated. Deal with this at some point, i.e. investigate if the right thing is to remove that -Wgnu-variable-sized-type-not-at-end from our CFLAGS, for now do the test the same way as it is used looks more sensible. Reported-by: Arnaldo Carvalho de Melo <acme@kernel.org> Signed-off-by: Namhyung Kim <namhyung@kernel.org> Tested-by: Arnaldo Carvalho de Melo <acme@redhat.com> Cc: Alexander Shishkin <alexander.shishkin@linux.intel.com> Cc: Jiri Olsa <jolsa@redhat.com> Cc: Mark Rutland <mark.rutland@arm.com> Cc: Peter Zijlstra <peterz@infradead.org> Link: http://lore.kernel.org/lkml/20200402015249.3800462-1-namhyung@kernel.org [ split from a larger patch, removed blank line at EOF ] Signed-off-by: Arnaldo Carvalho de Melo <acme@redhat.com> --- tools/build/Makefile.feature | 3 ++- tools/build/feature/Makefile | 6 +++++- tools/build/feature/test-file-handle.c | 17 +++++++++++++++++ tools/perf/Makefile.config | 4 ++++ 4 files changed, 28 insertions(+), 2 deletions(-) create mode 100644 tools/build/feature/test-file-handle.c diff --git a/tools/build/Makefile.feature b/tools/build/Makefile.feature index 574c2e0b9d20..3e0c019ef297 100644 --- a/tools/build/Makefile.feature +++ b/tools/build/Makefile.feature @@ -72,7 +72,8 @@ FEATURE_TESTS_BASIC := \ setns \ libaio \ libzstd \ - disassembler-four-args + disassembler-four-args \ + file-handle # FEATURE_TESTS_BASIC + FEATURE_TESTS_EXTRA is the complete list # of all feature tests diff --git a/tools/build/feature/Makefile b/tools/build/feature/Makefile index 7ac0d8088565..621f528f7822 100644 --- a/tools/build/feature/Makefile +++ b/tools/build/feature/Makefile @@ -67,7 +67,8 @@ FILES= \ test-llvm.bin \ test-llvm-version.bin \ test-libaio.bin \ - test-libzstd.bin + test-libzstd.bin \ + test-file-handle.bin FILES := $(addprefix $(OUTPUT),$(FILES)) @@ -321,6 +322,9 @@ $(OUTPUT)test-libaio.bin: $(OUTPUT)test-libzstd.bin: $(BUILD) -lzstd +$(OUTPUT)test-file-handle.bin: + $(BUILD) + ############################### clean: diff --git a/tools/build/feature/test-file-handle.c b/tools/build/feature/test-file-handle.c new file mode 100644 index 000000000000..4d3b03b27a0b --- /dev/null +++ b/tools/build/feature/test-file-handle.c @@ -0,0 +1,17 @@ +#define _GNU_SOURCE +#include <sys/types.h> +#include <sys/stat.h> +#include <fcntl.h> +#include <inttypes.h> + +int main(void) +{ + struct { + struct file_handle fh; + uint64_t cgroup_id; + } handle; + int mount_id; + + name_to_handle_at(AT_FDCWD, "/", &handle.fh, &mount_id, 0); + return 0; +} diff --git a/tools/perf/Makefile.config b/tools/perf/Makefile.config index 80e55e796be9..eb95c0c0a169 100644 --- a/tools/perf/Makefile.config +++ b/tools/perf/Makefile.config @@ -348,6 +348,10 @@ ifeq ($(feature-gettid), 1) CFLAGS += -DHAVE_GETTID endif +ifeq ($(feature-file-handle), 1) + CFLAGS += -DHAVE_FILE_HANDLE +endif + ifdef NO_LIBELF NO_DWARF := 1 NO_DEMANGLE := 1 -- cgit v1.2.3-70-g09d2 From ba78c1c5461c2fc2f57b777e971b3a9ec0df5666 Mon Sep 17 00:00:00 2001 From: Namhyung Kim <namhyung@kernel.org> Date: Wed, 25 Mar 2020 21:45:30 +0900 Subject: perf tools: Basic support for CGROUP event Implement basic functionality to support cgroup tracking. Each cgroup can be identified by inode number which can be read from userspace too. The actual cgroup processing will come in the later patch. Reported-by: kernel test robot <rong.a.chen@intel.com> Signed-off-by: Namhyung Kim <namhyung@kernel.org> Cc: Adrian Hunter <adrian.hunter@intel.com> [ fix perf test failure on sampling parsing ] Cc: Alexander Shishkin <alexander.shishkin@linux.intel.com> Cc: Jiri Olsa <jolsa@redhat.com> Cc: Mark Rutland <mark.rutland@arm.com> Cc: Peter Zijlstra <peterz@infradead.org> Link: http://lore.kernel.org/lkml/20200325124536.2800725-4-namhyung@kernel.org Signed-off-by: Arnaldo Carvalho de Melo <acme@redhat.com> --- tools/lib/perf/include/perf/event.h | 7 +++++++ tools/perf/builtin-diff.c | 1 + tools/perf/builtin-report.c | 1 + tools/perf/tests/sample-parsing.c | 6 +++++- tools/perf/util/event.c | 18 ++++++++++++++++++ tools/perf/util/event.h | 6 ++++++ tools/perf/util/evsel.c | 6 ++++++ tools/perf/util/machine.c | 12 ++++++++++++ tools/perf/util/machine.h | 3 +++ tools/perf/util/perf_event_attr_fprintf.c | 2 ++ tools/perf/util/session.c | 4 ++++ tools/perf/util/synthetic-events.c | 8 ++++++++ tools/perf/util/tool.h | 1 + 13 files changed, 74 insertions(+), 1 deletion(-) diff --git a/tools/lib/perf/include/perf/event.h b/tools/lib/perf/include/perf/event.h index 18106899cb4e..69b44d2cc0f5 100644 --- a/tools/lib/perf/include/perf/event.h +++ b/tools/lib/perf/include/perf/event.h @@ -105,6 +105,12 @@ struct perf_record_bpf_event { __u8 tag[BPF_TAG_SIZE]; // prog tag }; +struct perf_record_cgroup { + struct perf_event_header header; + __u64 id; + char path[PATH_MAX]; +}; + struct perf_record_sample { struct perf_event_header header; __u64 array[]; @@ -352,6 +358,7 @@ union perf_event { struct perf_record_mmap2 mmap2; struct perf_record_comm comm; struct perf_record_namespaces namespaces; + struct perf_record_cgroup cgroup; struct perf_record_fork fork; struct perf_record_lost lost; struct perf_record_lost_samples lost_samples; diff --git a/tools/perf/builtin-diff.c b/tools/perf/builtin-diff.c index 5e697cd2224a..c94a002f295e 100644 --- a/tools/perf/builtin-diff.c +++ b/tools/perf/builtin-diff.c @@ -455,6 +455,7 @@ static struct perf_diff pdiff = { .fork = perf_event__process_fork, .lost = perf_event__process_lost, .namespaces = perf_event__process_namespaces, + .cgroup = perf_event__process_cgroup, .ordered_events = true, .ordering_requires_timestamps = true, }, diff --git a/tools/perf/builtin-report.c b/tools/perf/builtin-report.c index ea673b7eb3f4..26d8fc27e427 100644 --- a/tools/perf/builtin-report.c +++ b/tools/perf/builtin-report.c @@ -1105,6 +1105,7 @@ int cmd_report(int argc, const char **argv) .mmap2 = perf_event__process_mmap2, .comm = perf_event__process_comm, .namespaces = perf_event__process_namespaces, + .cgroup = perf_event__process_cgroup, .exit = perf_event__process_exit, .fork = perf_event__process_fork, .lost = perf_event__process_lost, diff --git a/tools/perf/tests/sample-parsing.c b/tools/perf/tests/sample-parsing.c index 14239e472187..61865699c3f4 100644 --- a/tools/perf/tests/sample-parsing.c +++ b/tools/perf/tests/sample-parsing.c @@ -151,6 +151,9 @@ static bool samples_same(const struct perf_sample *s1, if (type & PERF_SAMPLE_PHYS_ADDR) COMP(phys_addr); + if (type & PERF_SAMPLE_CGROUP) + COMP(cgroup); + if (type & PERF_SAMPLE_AUX) { COMP(aux_sample.size); if (memcmp(s1->aux_sample.data, s2->aux_sample.data, @@ -230,6 +233,7 @@ static int do_test(u64 sample_type, u64 sample_regs, u64 read_format) .regs = regs, }, .phys_addr = 113, + .cgroup = 114, .aux_sample = { .size = sizeof(aux_data), .data = (void *)aux_data, @@ -336,7 +340,7 @@ int test__sample_parsing(struct test *test __maybe_unused, int subtest __maybe_u * were added. Please actually update the test rather than just change * the condition below. */ - if (PERF_SAMPLE_MAX > PERF_SAMPLE_AUX << 1) { + if (PERF_SAMPLE_MAX > PERF_SAMPLE_CGROUP << 1) { pr_debug("sample format has changed, some new PERF_SAMPLE_ bit was introduced - test needs updating\n"); return -1; } diff --git a/tools/perf/util/event.c b/tools/perf/util/event.c index c5447ff516a2..824c038e5c33 100644 --- a/tools/perf/util/event.c +++ b/tools/perf/util/event.c @@ -54,6 +54,7 @@ static const char *perf_event__names[] = { [PERF_RECORD_NAMESPACES] = "NAMESPACES", [PERF_RECORD_KSYMBOL] = "KSYMBOL", [PERF_RECORD_BPF_EVENT] = "BPF_EVENT", + [PERF_RECORD_CGROUP] = "CGROUP", [PERF_RECORD_HEADER_ATTR] = "ATTR", [PERF_RECORD_HEADER_EVENT_TYPE] = "EVENT_TYPE", [PERF_RECORD_HEADER_TRACING_DATA] = "TRACING_DATA", @@ -180,6 +181,12 @@ size_t perf_event__fprintf_namespaces(union perf_event *event, FILE *fp) return ret; } +size_t perf_event__fprintf_cgroup(union perf_event *event, FILE *fp) +{ + return fprintf(fp, " cgroup: %" PRI_lu64 " %s\n", + event->cgroup.id, event->cgroup.path); +} + int perf_event__process_comm(struct perf_tool *tool __maybe_unused, union perf_event *event, struct perf_sample *sample, @@ -196,6 +203,14 @@ int perf_event__process_namespaces(struct perf_tool *tool __maybe_unused, return machine__process_namespaces_event(machine, event, sample); } +int perf_event__process_cgroup(struct perf_tool *tool __maybe_unused, + union perf_event *event, + struct perf_sample *sample, + struct machine *machine) +{ + return machine__process_cgroup_event(machine, event, sample); +} + int perf_event__process_lost(struct perf_tool *tool __maybe_unused, union perf_event *event, struct perf_sample *sample, @@ -417,6 +432,9 @@ size_t perf_event__fprintf(union perf_event *event, FILE *fp) case PERF_RECORD_NAMESPACES: ret += perf_event__fprintf_namespaces(event, fp); break; + case PERF_RECORD_CGROUP: + ret += perf_event__fprintf_cgroup(event, fp); + break; case PERF_RECORD_MMAP2: ret += perf_event__fprintf_mmap2(event, fp); break; diff --git a/tools/perf/util/event.h b/tools/perf/util/event.h index 3cda40a2fafc..b8289f160f07 100644 --- a/tools/perf/util/event.h +++ b/tools/perf/util/event.h @@ -135,6 +135,7 @@ struct perf_sample { u32 raw_size; u64 data_src; u64 phys_addr; + u64 cgroup; u32 flags; u16 insn_len; u8 cpumode; @@ -322,6 +323,10 @@ int perf_event__process_namespaces(struct perf_tool *tool, union perf_event *event, struct perf_sample *sample, struct machine *machine); +int perf_event__process_cgroup(struct perf_tool *tool, + union perf_event *event, + struct perf_sample *sample, + struct machine *machine); int perf_event__process_mmap(struct perf_tool *tool, union perf_event *event, struct perf_sample *sample, @@ -377,6 +382,7 @@ size_t perf_event__fprintf_switch(union perf_event *event, FILE *fp); size_t perf_event__fprintf_thread_map(union perf_event *event, FILE *fp); size_t perf_event__fprintf_cpu_map(union perf_event *event, FILE *fp); size_t perf_event__fprintf_namespaces(union perf_event *event, FILE *fp); +size_t perf_event__fprintf_cgroup(union perf_event *event, FILE *fp); size_t perf_event__fprintf_ksymbol(union perf_event *event, FILE *fp); size_t perf_event__fprintf_bpf(union perf_event *event, FILE *fp); size_t perf_event__fprintf(union perf_event *event, FILE *fp); diff --git a/tools/perf/util/evsel.c b/tools/perf/util/evsel.c index 15ccd193483f..b766eb608b97 100644 --- a/tools/perf/util/evsel.c +++ b/tools/perf/util/evsel.c @@ -2267,6 +2267,12 @@ int perf_evsel__parse_sample(struct evsel *evsel, union perf_event *event, array++; } + data->cgroup = 0; + if (type & PERF_SAMPLE_CGROUP) { + data->cgroup = *array; + array++; + } + if (type & PERF_SAMPLE_AUX) { OVERFLOW_CHECK_u64(array); sz = *array++; diff --git a/tools/perf/util/machine.c b/tools/perf/util/machine.c index fd14f1489802..399b4731b246 100644 --- a/tools/perf/util/machine.c +++ b/tools/perf/util/machine.c @@ -654,6 +654,16 @@ int machine__process_namespaces_event(struct machine *machine __maybe_unused, return err; } +int machine__process_cgroup_event(struct machine *machine __maybe_unused, + union perf_event *event, + struct perf_sample *sample __maybe_unused) +{ + if (dump_trace) + perf_event__fprintf_cgroup(event, stdout); + + return 0; +} + int machine__process_lost_event(struct machine *machine __maybe_unused, union perf_event *event, struct perf_sample *sample __maybe_unused) { @@ -1878,6 +1888,8 @@ int machine__process_event(struct machine *machine, union perf_event *event, ret = machine__process_mmap_event(machine, event, sample); break; case PERF_RECORD_NAMESPACES: ret = machine__process_namespaces_event(machine, event, sample); break; + case PERF_RECORD_CGROUP: + ret = machine__process_cgroup_event(machine, event, sample); break; case PERF_RECORD_MMAP2: ret = machine__process_mmap2_event(machine, event, sample); break; case PERF_RECORD_FORK: diff --git a/tools/perf/util/machine.h b/tools/perf/util/machine.h index be0a930eca89..fa1be9ea00fa 100644 --- a/tools/perf/util/machine.h +++ b/tools/perf/util/machine.h @@ -128,6 +128,9 @@ int machine__process_switch_event(struct machine *machine, int machine__process_namespaces_event(struct machine *machine, union perf_event *event, struct perf_sample *sample); +int machine__process_cgroup_event(struct machine *machine, + union perf_event *event, + struct perf_sample *sample); int machine__process_mmap_event(struct machine *machine, union perf_event *event, struct perf_sample *sample); int machine__process_mmap2_event(struct machine *machine, union perf_event *event, diff --git a/tools/perf/util/perf_event_attr_fprintf.c b/tools/perf/util/perf_event_attr_fprintf.c index 355d3458d4e6..b94fa07f5d32 100644 --- a/tools/perf/util/perf_event_attr_fprintf.c +++ b/tools/perf/util/perf_event_attr_fprintf.c @@ -35,6 +35,7 @@ static void __p_sample_type(char *buf, size_t size, u64 value) bit_name(BRANCH_STACK), bit_name(REGS_USER), bit_name(STACK_USER), bit_name(IDENTIFIER), bit_name(REGS_INTR), bit_name(DATA_SRC), bit_name(WEIGHT), bit_name(PHYS_ADDR), bit_name(AUX), + bit_name(CGROUP), { .name = NULL, } }; #undef bit_name @@ -132,6 +133,7 @@ int perf_event_attr__fprintf(FILE *fp, struct perf_event_attr *attr, PRINT_ATTRf(ksymbol, p_unsigned); PRINT_ATTRf(bpf_event, p_unsigned); PRINT_ATTRf(aux_output, p_unsigned); + PRINT_ATTRf(cgroup, p_unsigned); PRINT_ATTRn("{ wakeup_events, wakeup_watermark }", wakeup_events, p_unsigned); PRINT_ATTRf(bp_type, p_unsigned); diff --git a/tools/perf/util/session.c b/tools/perf/util/session.c index 055b00abd56d..0b0bfe5bef17 100644 --- a/tools/perf/util/session.c +++ b/tools/perf/util/session.c @@ -471,6 +471,8 @@ void perf_tool__fill_defaults(struct perf_tool *tool) tool->comm = process_event_stub; if (tool->namespaces == NULL) tool->namespaces = process_event_stub; + if (tool->cgroup == NULL) + tool->cgroup = process_event_stub; if (tool->fork == NULL) tool->fork = process_event_stub; if (tool->exit == NULL) @@ -1436,6 +1438,8 @@ static int machines__deliver_event(struct machines *machines, return tool->comm(tool, event, sample, machine); case PERF_RECORD_NAMESPACES: return tool->namespaces(tool, event, sample, machine); + case PERF_RECORD_CGROUP: + return tool->cgroup(tool, event, sample, machine); case PERF_RECORD_FORK: return tool->fork(tool, event, sample, machine); case PERF_RECORD_EXIT: diff --git a/tools/perf/util/synthetic-events.c b/tools/perf/util/synthetic-events.c index 3f28af39f9c6..f72d80999506 100644 --- a/tools/perf/util/synthetic-events.c +++ b/tools/perf/util/synthetic-events.c @@ -1230,6 +1230,9 @@ size_t perf_event__sample_event_size(const struct perf_sample *sample, u64 type, if (type & PERF_SAMPLE_PHYS_ADDR) result += sizeof(u64); + if (type & PERF_SAMPLE_CGROUP) + result += sizeof(u64); + if (type & PERF_SAMPLE_AUX) { result += sizeof(u64); result += sample->aux_sample.size; @@ -1404,6 +1407,11 @@ int perf_event__synthesize_sample(union perf_event *event, u64 type, u64 read_fo array++; } + if (type & PERF_SAMPLE_CGROUP) { + *array = sample->cgroup; + array++; + } + if (type & PERF_SAMPLE_AUX) { sz = sample->aux_sample.size; *array++ = sz; diff --git a/tools/perf/util/tool.h b/tools/perf/util/tool.h index 2abbf668b8de..472ef5eb4068 100644 --- a/tools/perf/util/tool.h +++ b/tools/perf/util/tool.h @@ -46,6 +46,7 @@ struct perf_tool { mmap2, comm, namespaces, + cgroup, fork, exit, lost, -- cgit v1.2.3-70-g09d2 From d1277aa36bff4bfc1a187a469fc6a6a1d17cf59c Mon Sep 17 00:00:00 2001 From: Namhyung Kim <namhyung@kernel.org> Date: Wed, 25 Mar 2020 21:45:31 +0900 Subject: perf cgroup: Maintain cgroup hierarchy Each cgroup is kept in the perf_env's cgroup_tree sorted by the cgroup id. Hist entries have cgroup id can compare it directly and later it can be used to find a group name using this tree. Signed-off-by: Namhyung Kim <namhyung@kernel.org> Cc: Alexander Shishkin <alexander.shishkin@linux.intel.com> Cc: Jiri Olsa <jolsa@redhat.com> Cc: Mark Rutland <mark.rutland@arm.com> Cc: Peter Zijlstra <peterz@infradead.org> Link: http://lore.kernel.org/lkml/20200325124536.2800725-5-namhyung@kernel.org Signed-off-by: Arnaldo Carvalho de Melo <acme@redhat.com> --- tools/perf/util/cgroup.c | 80 +++++++++++++++++++++++++++++++++++++++++++++++ tools/perf/util/cgroup.h | 17 +++++++--- tools/perf/util/env.c | 2 ++ tools/perf/util/env.h | 6 ++++ tools/perf/util/machine.c | 9 +++++- 5 files changed, 109 insertions(+), 5 deletions(-) diff --git a/tools/perf/util/cgroup.c b/tools/perf/util/cgroup.c index 5bc9d3b01bd9..b73fb7823048 100644 --- a/tools/perf/util/cgroup.c +++ b/tools/perf/util/cgroup.c @@ -191,3 +191,83 @@ int parse_cgroups(const struct option *opt, const char *str, } return 0; } + +static struct cgroup *__cgroup__findnew(struct rb_root *root, uint64_t id, + bool create, const char *path) +{ + struct rb_node **p = &root->rb_node; + struct rb_node *parent = NULL; + struct cgroup *cgrp; + + while (*p != NULL) { + parent = *p; + cgrp = rb_entry(parent, struct cgroup, node); + + if (cgrp->id == id) + return cgrp; + + if (cgrp->id < id) + p = &(*p)->rb_left; + else + p = &(*p)->rb_right; + } + + if (!create) + return NULL; + + cgrp = malloc(sizeof(*cgrp)); + if (cgrp == NULL) + return NULL; + + cgrp->name = strdup(path); + if (cgrp->name == NULL) { + free(cgrp); + return NULL; + } + + cgrp->fd = -1; + cgrp->id = id; + refcount_set(&cgrp->refcnt, 1); + + rb_link_node(&cgrp->node, parent, p); + rb_insert_color(&cgrp->node, root); + + return cgrp; +} + +struct cgroup *cgroup__findnew(struct perf_env *env, uint64_t id, + const char *path) +{ + struct cgroup *cgrp; + + down_write(&env->cgroups.lock); + cgrp = __cgroup__findnew(&env->cgroups.tree, id, true, path); + up_write(&env->cgroups.lock); + return cgrp; +} + +struct cgroup *cgroup__find(struct perf_env *env, uint64_t id) +{ + struct cgroup *cgrp; + + down_read(&env->cgroups.lock); + cgrp = __cgroup__findnew(&env->cgroups.tree, id, false, NULL); + up_read(&env->cgroups.lock); + return cgrp; +} + +void perf_env__purge_cgroups(struct perf_env *env) +{ + struct rb_node *node; + struct cgroup *cgrp; + + down_write(&env->cgroups.lock); + while (!RB_EMPTY_ROOT(&env->cgroups.tree)) { + node = rb_first(&env->cgroups.tree); + cgrp = rb_entry(node, struct cgroup, node); + + rb_erase(node, &env->cgroups.tree); + cgroup__put(cgrp); + } + up_write(&env->cgroups.lock); +} diff --git a/tools/perf/util/cgroup.h b/tools/perf/util/cgroup.h index 2ec11f01090d..e98d5975fe55 100644 --- a/tools/perf/util/cgroup.h +++ b/tools/perf/util/cgroup.h @@ -3,16 +3,19 @@ #define __CGROUP_H__ #include <linux/refcount.h> +#include <linux/rbtree.h> +#include "util/env.h" struct option; struct cgroup { - char *name; - int fd; - refcount_t refcnt; + struct rb_node node; + u64 id; + char *name; + int fd; + refcount_t refcnt; }; - extern int nr_cgroups; /* number of explicit cgroups defined */ struct cgroup *cgroup__get(struct cgroup *cgroup); @@ -26,4 +29,10 @@ void evlist__set_default_cgroup(struct evlist *evlist, struct cgroup *cgroup); int parse_cgroups(const struct option *opt, const char *str, int unset); +struct cgroup *cgroup__findnew(struct perf_env *env, uint64_t id, + const char *path); +struct cgroup *cgroup__find(struct perf_env *env, uint64_t id); + +void perf_env__purge_cgroups(struct perf_env *env); + #endif /* __CGROUP_H__ */ diff --git a/tools/perf/util/env.c b/tools/perf/util/env.c index 4154f944f474..fadc59708ece 100644 --- a/tools/perf/util/env.c +++ b/tools/perf/util/env.c @@ -6,6 +6,7 @@ #include <linux/ctype.h> #include <linux/zalloc.h> #include "bpf-event.h" +#include "cgroup.h" #include <errno.h> #include <sys/utsname.h> #include <bpf/libbpf.h> @@ -168,6 +169,7 @@ void perf_env__exit(struct perf_env *env) int i; perf_env__purge_bpf(env); + perf_env__purge_cgroups(env); zfree(&env->hostname); zfree(&env->os_release); zfree(&env->version); diff --git a/tools/perf/util/env.h b/tools/perf/util/env.h index 11d05ae3606a..7632075a8792 100644 --- a/tools/perf/util/env.h +++ b/tools/perf/util/env.h @@ -88,6 +88,12 @@ struct perf_env { u32 btfs_cnt; } bpf_progs; + /* same reason as above (for perf-top) */ + struct { + struct rw_semaphore lock; + struct rb_root tree; + } cgroups; + /* For fast cpu to numa node lookup via perf_env__numa_node */ int *numa_map; int nr_numa_map; diff --git a/tools/perf/util/machine.c b/tools/perf/util/machine.c index 399b4731b246..97142e9671be 100644 --- a/tools/perf/util/machine.c +++ b/tools/perf/util/machine.c @@ -33,6 +33,7 @@ #include "asm/bug.h" #include "bpf-event.h" #include <internal/lib.h> // page_size +#include "cgroup.h" #include <linux/ctype.h> #include <symbol/kallsyms.h> @@ -654,13 +655,19 @@ int machine__process_namespaces_event(struct machine *machine __maybe_unused, return err; } -int machine__process_cgroup_event(struct machine *machine __maybe_unused, +int machine__process_cgroup_event(struct machine *machine, union perf_event *event, struct perf_sample *sample __maybe_unused) { + struct cgroup *cgrp; + if (dump_trace) perf_event__fprintf_cgroup(event, stdout); + cgrp = cgroup__findnew(machine->env, event->cgroup.id, event->cgroup.path); + if (cgrp == NULL) + return -ENOMEM; + return 0; } -- cgit v1.2.3-70-g09d2 From b629f3e9d01b4e4ad4e84c8f76fad514967a83da Mon Sep 17 00:00:00 2001 From: Namhyung Kim <namhyung@kernel.org> Date: Wed, 25 Mar 2020 21:45:32 +0900 Subject: perf report: Add 'cgroup' sort key The cgroup sort key is to show cgroup membership of each task. Currently it shows full path in the cgroupfs (not relative to the root of cgroup namespace) since it'd be more intuitive IMHO. Otherwise root cgroup in different namespaces will all show same name - "/". The cgroup sort key should come before cgroup_id otherwise sort_dimension__add() will match it to cgroup_id as it only matches with the given substring. For example it will look like following. Note that record patch adding --all-cgroups patch will come later. $ perf record -a --namespace --all-cgroups cgtest [ perf record: Woken up 1 times to write data ] [ perf record: Captured and wrote 0.208 MB perf.data (4090 samples) ] $ perf report -s cgroup_id,cgroup,pid ... # Overhead cgroup id (dev/inode) Cgroup Pid:Command # ........ ..................... .......... ............... # 93.96% 0/0x0 / 0:swapper 1.25% 3/0xeffffffb / 278:looper0 0.86% 3/0xf000015f /sub/cgrp1 280:cgtest 0.37% 3/0xf0000160 /sub/cgrp2 281:cgtest 0.34% 3/0xf0000163 /sub/cgrp3 282:cgtest 0.22% 3/0xeffffffb /sub 278:looper0 0.20% 3/0xeffffffb / 280:cgtest 0.15% 3/0xf0000163 /sub/cgrp3 285:looper3 Signed-off-by: Namhyung Kim <namhyung@kernel.org> Cc: Alexander Shishkin <alexander.shishkin@linux.intel.com> Cc: Jiri Olsa <jolsa@redhat.com> Cc: Mark Rutland <mark.rutland@arm.com> Cc: Peter Zijlstra <peterz@infradead.org> Link: http://lore.kernel.org/lkml/20200325124536.2800725-6-namhyung@kernel.org Signed-off-by: Arnaldo Carvalho de Melo <acme@redhat.com> --- tools/perf/Documentation/perf-report.txt | 1 + tools/perf/util/hist.c | 13 +++++++++++ tools/perf/util/hist.h | 1 + tools/perf/util/sort.c | 37 ++++++++++++++++++++++++++++++++ tools/perf/util/sort.h | 2 ++ 5 files changed, 54 insertions(+) diff --git a/tools/perf/Documentation/perf-report.txt b/tools/perf/Documentation/perf-report.txt index e56e3f1344a7..f569b9ea4002 100644 --- a/tools/perf/Documentation/perf-report.txt +++ b/tools/perf/Documentation/perf-report.txt @@ -95,6 +95,7 @@ OPTIONS abort cost. This is the global weight. - local_weight: Local weight version of the weight above. - cgroup_id: ID derived from cgroup namespace device and inode numbers. + - cgroup: cgroup pathname in the cgroupfs. - transaction: Transaction abort flags. - overhead: Overhead percentage of sample - overhead_sys: Overhead percentage of sample running in system mode diff --git a/tools/perf/util/hist.c b/tools/perf/util/hist.c index e74a5acf66d9..283a69ff6a3d 100644 --- a/tools/perf/util/hist.c +++ b/tools/perf/util/hist.c @@ -10,6 +10,7 @@ #include "mem-events.h" #include "session.h" #include "namespaces.h" +#include "cgroup.h" #include "sort.h" #include "units.h" #include "evlist.h" @@ -194,6 +195,7 @@ void hists__calc_col_len(struct hists *hists, struct hist_entry *h) hists__set_unres_dso_col_len(hists, HISTC_MEM_DADDR_DSO); } + hists__new_col_len(hists, HISTC_CGROUP, 6); hists__new_col_len(hists, HISTC_CGROUP_ID, 20); hists__new_col_len(hists, HISTC_CPU, 3); hists__new_col_len(hists, HISTC_SOCKET, 6); @@ -222,6 +224,16 @@ void hists__calc_col_len(struct hists *hists, struct hist_entry *h) if (h->trace_output) hists__new_col_len(hists, HISTC_TRACE, strlen(h->trace_output)); + + if (h->cgroup) { + const char *cgrp_name = "unknown"; + struct cgroup *cgrp = cgroup__find(h->ms.maps->machine->env, + h->cgroup); + if (cgrp != NULL) + cgrp_name = cgrp->name; + + hists__new_col_len(hists, HISTC_CGROUP, strlen(cgrp_name)); + } } void hists__output_recalc_col_len(struct hists *hists, int max_rows) @@ -691,6 +703,7 @@ __hists__add_entry(struct hists *hists, .dev = ns ? ns->link_info[CGROUP_NS_INDEX].dev : 0, .ino = ns ? ns->link_info[CGROUP_NS_INDEX].ino : 0, }, + .cgroup = sample->cgroup, .ms = { .maps = al->maps, .map = al->map, diff --git a/tools/perf/util/hist.h b/tools/perf/util/hist.h index bb994e030495..4141295a66fa 100644 --- a/tools/perf/util/hist.h +++ b/tools/perf/util/hist.h @@ -38,6 +38,7 @@ enum hist_column { HISTC_THREAD, HISTC_COMM, HISTC_CGROUP_ID, + HISTC_CGROUP, HISTC_PARENT, HISTC_CPU, HISTC_SOCKET, diff --git a/tools/perf/util/sort.c b/tools/perf/util/sort.c index e860595576c2..f14cc728c358 100644 --- a/tools/perf/util/sort.c +++ b/tools/perf/util/sort.c @@ -12,6 +12,7 @@ #include "cacheline.h" #include "comm.h" #include "map.h" +#include "maps.h" #include "symbol.h" #include "map_symbol.h" #include "branch.h" @@ -25,6 +26,8 @@ #include "mem-events.h" #include "annotate.h" #include "time-utils.h" +#include "cgroup.h" +#include "machine.h" #include <linux/kernel.h> #include <linux/string.h> @@ -634,6 +637,39 @@ struct sort_entry sort_cgroup_id = { .se_width_idx = HISTC_CGROUP_ID, }; +/* --sort cgroup */ + +static int64_t +sort__cgroup_cmp(struct hist_entry *left, struct hist_entry *right) +{ + return right->cgroup - left->cgroup; +} + +static int hist_entry__cgroup_snprintf(struct hist_entry *he, + char *bf, size_t size, + unsigned int width __maybe_unused) +{ + const char *cgrp_name = "N/A"; + + if (he->cgroup) { + struct cgroup *cgrp = cgroup__find(he->ms.maps->machine->env, + he->cgroup); + if (cgrp != NULL) + cgrp_name = cgrp->name; + else + cgrp_name = "unknown"; + } + + return repsep_snprintf(bf, size, "%s", cgrp_name); +} + +struct sort_entry sort_cgroup = { + .se_header = "Cgroup", + .se_cmp = sort__cgroup_cmp, + .se_snprintf = hist_entry__cgroup_snprintf, + .se_width_idx = HISTC_CGROUP, +}; + /* --sort socket */ static int64_t @@ -1660,6 +1696,7 @@ static struct sort_dimension common_sort_dimensions[] = { DIM(SORT_TRACE, "trace", sort_trace), DIM(SORT_SYM_SIZE, "symbol_size", sort_sym_size), DIM(SORT_DSO_SIZE, "dso_size", sort_dso_size), + DIM(SORT_CGROUP, "cgroup", sort_cgroup), DIM(SORT_CGROUP_ID, "cgroup_id", sort_cgroup_id), DIM(SORT_SYM_IPC_NULL, "ipc_null", sort_sym_ipc_null), DIM(SORT_TIME, "time", sort_time), diff --git a/tools/perf/util/sort.h b/tools/perf/util/sort.h index 6c862d62d052..cfa6ac6f7d06 100644 --- a/tools/perf/util/sort.h +++ b/tools/perf/util/sort.h @@ -101,6 +101,7 @@ struct hist_entry { struct thread *thread; struct comm *comm; struct namespace_id cgroup_id; + u64 cgroup; u64 ip; u64 transaction; s32 socket; @@ -224,6 +225,7 @@ enum sort_type { SORT_TRACE, SORT_SYM_SIZE, SORT_DSO_SIZE, + SORT_CGROUP, SORT_CGROUP_ID, SORT_SYM_IPC_NULL, SORT_TIME, -- cgit v1.2.3-70-g09d2 From ab64069f1a6604a43daec5fc4a4294b0b77a8b93 Mon Sep 17 00:00:00 2001 From: Namhyung Kim <namhyung@kernel.org> Date: Wed, 25 Mar 2020 21:45:33 +0900 Subject: perf record: Support synthesizing cgroup events Synthesize cgroup events by iterating cgroup filesystem directories. The cgroup event only saves the portion of cgroup path after the mount point and the cgroup id (which actually is a file handle). Signed-off-by: Namhyung Kim <namhyung@kernel.org> Tested-by: Arnaldo Carvalho de Melo <acme@redhat.com> Cc: Alexander Shishkin <alexander.shishkin@linux.intel.com> Cc: Jiri Olsa <jolsa@redhat.com> Cc: Mark Rutland <mark.rutland@arm.com> Cc: Peter Zijlstra <peterz@infradead.org> Link: http://lore.kernel.org/lkml/20200325124536.2800725-7-namhyung@kernel.org Link: http://lore.kernel.org/lkml/20200402015249.3800462-1-namhyung@kernel.org [ Extracted the HAVE_FILE_HANDLE from the followup patch, added missing __maybe_unused ] Signed-off-by: Arnaldo Carvalho de Melo <acme@redhat.com> --- tools/perf/builtin-record.c | 5 ++ tools/perf/util/synthetic-events.c | 122 +++++++++++++++++++++++++++++++++++++ tools/perf/util/synthetic-events.h | 1 + tools/perf/util/tool.h | 1 + 4 files changed, 129 insertions(+) diff --git a/tools/perf/builtin-record.c b/tools/perf/builtin-record.c index 4c301466101b..2802de9538ff 100644 --- a/tools/perf/builtin-record.c +++ b/tools/perf/builtin-record.c @@ -1397,6 +1397,11 @@ static int record__synthesize(struct record *rec, bool tail) if (err < 0) pr_warning("Couldn't synthesize bpf events.\n"); + err = perf_event__synthesize_cgroups(tool, process_synthesized_event, + machine); + if (err < 0) + pr_warning("Couldn't synthesize cgroup events.\n"); + err = __machine__synthesize_threads(machine, tool, &opts->target, rec->evlist->core.threads, process_synthesized_event, opts->sample_address, 1); diff --git a/tools/perf/util/synthetic-events.c b/tools/perf/util/synthetic-events.c index f72d80999506..a661b122d9d8 100644 --- a/tools/perf/util/synthetic-events.c +++ b/tools/perf/util/synthetic-events.c @@ -16,6 +16,7 @@ #include "util/synthetic-events.h" #include "util/target.h" #include "util/time-utils.h" +#include "util/cgroup.h" #include <linux/bitops.h> #include <linux/kernel.h> #include <linux/string.h> @@ -414,6 +415,127 @@ out: return rc; } +#ifdef HAVE_FILE_HANDLE +static int perf_event__synthesize_cgroup(struct perf_tool *tool, + union perf_event *event, + char *path, size_t mount_len, + perf_event__handler_t process, + struct machine *machine) +{ + size_t event_size = sizeof(event->cgroup) - sizeof(event->cgroup.path); + size_t path_len = strlen(path) - mount_len + 1; + struct { + struct file_handle fh; + uint64_t cgroup_id; + } handle; + int mount_id; + + while (path_len % sizeof(u64)) + path[mount_len + path_len++] = '\0'; + + memset(&event->cgroup, 0, event_size); + + event->cgroup.header.type = PERF_RECORD_CGROUP; + event->cgroup.header.size = event_size + path_len + machine->id_hdr_size; + + handle.fh.handle_bytes = sizeof(handle.cgroup_id); + if (name_to_handle_at(AT_FDCWD, path, &handle.fh, &mount_id, 0) < 0) { + pr_debug("stat failed: %s\n", path); + return -1; + } + + event->cgroup.id = handle.cgroup_id; + strncpy(event->cgroup.path, path + mount_len, path_len); + memset(event->cgroup.path + path_len, 0, machine->id_hdr_size); + + if (perf_tool__process_synth_event(tool, event, machine, process) < 0) { + pr_debug("process synth event failed\n"); + return -1; + } + + return 0; +} + +static int perf_event__walk_cgroup_tree(struct perf_tool *tool, + union perf_event *event, + char *path, size_t mount_len, + perf_event__handler_t process, + struct machine *machine) +{ + size_t pos = strlen(path); + DIR *d; + struct dirent *dent; + int ret = 0; + + if (perf_event__synthesize_cgroup(tool, event, path, mount_len, + process, machine) < 0) + return -1; + + d = opendir(path); + if (d == NULL) { + pr_debug("failed to open directory: %s\n", path); + return -1; + } + + while ((dent = readdir(d)) != NULL) { + if (dent->d_type != DT_DIR) + continue; + if (!strcmp(dent->d_name, ".") || + !strcmp(dent->d_name, "..")) + continue; + + /* any sane path should be less than PATH_MAX */ + if (strlen(path) + strlen(dent->d_name) + 1 >= PATH_MAX) + continue; + + if (path[pos - 1] != '/') + strcat(path, "/"); + strcat(path, dent->d_name); + + ret = perf_event__walk_cgroup_tree(tool, event, path, + mount_len, process, machine); + if (ret < 0) + break; + + path[pos] = '\0'; + } + + closedir(d); + return ret; +} + +int perf_event__synthesize_cgroups(struct perf_tool *tool, + perf_event__handler_t process, + struct machine *machine) +{ + union perf_event event; + char cgrp_root[PATH_MAX]; + size_t mount_len; /* length of mount point in the path */ + + if (cgroupfs_find_mountpoint(cgrp_root, PATH_MAX, "perf_event") < 0) { + pr_debug("cannot find cgroup mount point\n"); + return -1; + } + + mount_len = strlen(cgrp_root); + /* make sure the path starts with a slash (after mount point) */ + strcat(cgrp_root, "/"); + + if (perf_event__walk_cgroup_tree(tool, &event, cgrp_root, mount_len, + process, machine) < 0) + return -1; + + return 0; +} +#else +int perf_event__synthesize_cgroups(struct perf_tool *tool __maybe_unused, + perf_event__handler_t process __maybe_unused, + struct machine *machine __maybe_unused) +{ + return -1; +} +#endif + int perf_event__synthesize_modules(struct perf_tool *tool, perf_event__handler_t process, struct machine *machine) { diff --git a/tools/perf/util/synthetic-events.h b/tools/perf/util/synthetic-events.h index baead0cdc381..e7a3e9589738 100644 --- a/tools/perf/util/synthetic-events.h +++ b/tools/perf/util/synthetic-events.h @@ -45,6 +45,7 @@ int perf_event__synthesize_kernel_mmap(struct perf_tool *tool, perf_event__handl int perf_event__synthesize_mmap_events(struct perf_tool *tool, union perf_event *event, pid_t pid, pid_t tgid, perf_event__handler_t process, struct machine *machine, bool mmap_data); int perf_event__synthesize_modules(struct perf_tool *tool, perf_event__handler_t process, struct machine *machine); int perf_event__synthesize_namespaces(struct perf_tool *tool, union perf_event *event, pid_t pid, pid_t tgid, perf_event__handler_t process, struct machine *machine); +int perf_event__synthesize_cgroups(struct perf_tool *tool, perf_event__handler_t process, struct machine *machine); int perf_event__synthesize_sample(union perf_event *event, u64 type, u64 read_format, const struct perf_sample *sample); int perf_event__synthesize_stat_config(struct perf_tool *tool, struct perf_stat_config *config, perf_event__handler_t process, struct machine *machine); int perf_event__synthesize_stat_events(struct perf_stat_config *config, struct perf_tool *tool, struct evlist *evlist, perf_event__handler_t process, bool attrs); diff --git a/tools/perf/util/tool.h b/tools/perf/util/tool.h index 472ef5eb4068..3fb67bd31e4a 100644 --- a/tools/perf/util/tool.h +++ b/tools/perf/util/tool.h @@ -79,6 +79,7 @@ struct perf_tool { bool ordered_events; bool ordering_requires_timestamps; bool namespace_events; + bool cgroup_events; bool no_warn; enum show_feature_header show_feat_hdr; }; -- cgit v1.2.3-70-g09d2 From 8fb4b67939e169fca68174e9ac7be79fe9a04498 Mon Sep 17 00:00:00 2001 From: Namhyung Kim <namhyung@kernel.org> Date: Wed, 25 Mar 2020 21:45:34 +0900 Subject: perf record: Add --all-cgroups option The --all-cgroups option is to enable cgroup profiling support. It tells kernel to record CGROUP events in the ring buffer so that perf report can identify task/cgroup association later. [root@seventh ~]# perf record --all-cgroups --namespaces /wb/cgtest [ perf record: Woken up 1 times to write data ] [ perf record: Captured and wrote 0.042 MB perf.data (558 samples) ] [root@seventh ~]# perf report --stdio -s cgroup_id,cgroup,pid # To display the perf.data header info, please use --header/--header-only options. # # # Total Lost Samples: 0 # # Samples: 558 of event 'cycles' # Event count (approx.): 458017341 # # Overhead cgroup id (dev/inode) Cgroup Pid:Command # ........ ..................... .......... ............... # 33.15% 4/0xeffffffb /sub 9615:looper0 32.83% 4/0xf00002f5 /sub/cgrp2 9620:looper2 32.79% 4/0xf00002f4 /sub/cgrp1 9619:looper1 0.35% 4/0xf00002f5 /sub/cgrp2 9618:cgtest 0.34% 4/0xf00002f4 /sub/cgrp1 9617:cgtest 0.32% 4/0xeffffffb / 9615:looper0 0.11% 4/0xeffffffb /sub 9617:cgtest 0.10% 4/0xeffffffb /sub 9618:cgtest # # (Tip: Sample related events with: perf record -e '{cycles,instructions}:S') # [root@seventh ~]# Signed-off-by: Namhyung Kim <namhyung@kernel.org> Tested-by: Arnaldo Carvalho de Melo <acme@redhat.com> Cc: Alexander Shishkin <alexander.shishkin@linux.intel.com> Cc: Jiri Olsa <jolsa@redhat.com> Cc: Mark Rutland <mark.rutland@arm.com> Cc: Peter Zijlstra <peterz@infradead.org> Link: http://lore.kernel.org/lkml/20200325124536.2800725-8-namhyung@kernel.org Link: http://lore.kernel.org/lkml/20200402015249.3800462-1-namhyung@kernel.org [ Extracted the HAVE_FILE_HANDLE from the followup patch ] Signed-off-by: Arnaldo Carvalho de Melo <acme@redhat.com> --- tools/perf/Documentation/perf-record.txt | 5 ++++- tools/perf/builtin-record.c | 11 +++++++++++ tools/perf/util/evsel.c | 11 ++++++++++- tools/perf/util/evsel.h | 1 + tools/perf/util/record.h | 1 + 5 files changed, 27 insertions(+), 2 deletions(-) diff --git a/tools/perf/Documentation/perf-record.txt b/tools/perf/Documentation/perf-record.txt index b25e028458e2..b3f3b3f1c161 100644 --- a/tools/perf/Documentation/perf-record.txt +++ b/tools/perf/Documentation/perf-record.txt @@ -391,7 +391,10 @@ displayed with the weight and local_weight sort keys. This currently works for abort events and some memory events in precise mode on modern Intel CPUs. --namespaces:: -Record events of type PERF_RECORD_NAMESPACES. +Record events of type PERF_RECORD_NAMESPACES. This enables 'cgroup_id' sort key. + +--all-cgroups:: +Record events of type PERF_RECORD_CGROUP. This enables 'cgroup' sort key. --transaction:: Record transaction flags for transaction related events. diff --git a/tools/perf/builtin-record.c b/tools/perf/builtin-record.c index 2802de9538ff..1ab349abe904 100644 --- a/tools/perf/builtin-record.c +++ b/tools/perf/builtin-record.c @@ -1433,6 +1433,15 @@ static int __cmd_record(struct record *rec, int argc, const char **argv) if (rec->opts.record_namespaces) tool->namespace_events = true; + if (rec->opts.record_cgroup) { +#ifdef HAVE_FILE_HANDLE + tool->cgroup_events = true; +#else + pr_err("cgroup tracking is not supported\n"); + return -1; +#endif + } + if (rec->opts.auxtrace_snapshot_mode || rec->switch_output.enabled) { signal(SIGUSR2, snapshot_sig_handler); if (rec->opts.auxtrace_snapshot_mode) @@ -2363,6 +2372,8 @@ static struct option __record_options[] = { "per thread proc mmap processing timeout in ms"), OPT_BOOLEAN(0, "namespaces", &record.opts.record_namespaces, "Record namespaces events"), + OPT_BOOLEAN(0, "all-cgroups", &record.opts.record_cgroup, + "Record cgroup events"), OPT_BOOLEAN(0, "switch-events", &record.opts.record_switch_events, "Record context switch events"), OPT_BOOLEAN_FLAG(0, "all-kernel", &record.opts.all_kernel, diff --git a/tools/perf/util/evsel.c b/tools/perf/util/evsel.c index b766eb608b97..eb880efbce16 100644 --- a/tools/perf/util/evsel.c +++ b/tools/perf/util/evsel.c @@ -1104,6 +1104,11 @@ void perf_evsel__config(struct evsel *evsel, struct record_opts *opts, if (opts->record_namespaces) attr->namespaces = track; + if (opts->record_cgroup) { + attr->cgroup = track && !perf_missing_features.cgroup; + perf_evsel__set_sample_bit(evsel, CGROUP); + } + if (opts->record_switch_events) attr->context_switch = track; @@ -1789,7 +1794,11 @@ try_fallback: * Must probe features in the order they were added to the * perf_event_attr interface. */ - if (!perf_missing_features.branch_hw_idx && + if (!perf_missing_features.cgroup && evsel->core.attr.cgroup) { + perf_missing_features.cgroup = true; + pr_debug2_peo("Kernel has no cgroup sampling support, bailing out\n"); + goto out_close; + } else if (!perf_missing_features.branch_hw_idx && (evsel->core.attr.branch_sample_type & PERF_SAMPLE_BRANCH_HW_INDEX)) { perf_missing_features.branch_hw_idx = true; pr_debug2("switching off branch HW index support\n"); diff --git a/tools/perf/util/evsel.h b/tools/perf/util/evsel.h index 33804740e2ca..53187c501ee8 100644 --- a/tools/perf/util/evsel.h +++ b/tools/perf/util/evsel.h @@ -120,6 +120,7 @@ struct perf_missing_features { bool bpf; bool aux_output; bool branch_hw_idx; + bool cgroup; }; extern struct perf_missing_features perf_missing_features; diff --git a/tools/perf/util/record.h b/tools/perf/util/record.h index 5421fd2ad383..24316458be20 100644 --- a/tools/perf/util/record.h +++ b/tools/perf/util/record.h @@ -34,6 +34,7 @@ struct record_opts { bool auxtrace_snapshot_on_exit; bool auxtrace_sample_mode; bool record_namespaces; + bool record_cgroup; bool record_switch_events; bool all_kernel; bool all_user; -- cgit v1.2.3-70-g09d2 From f382842fa0244ae1e2c28c8377732c85ec1fe7a9 Mon Sep 17 00:00:00 2001 From: Namhyung Kim <namhyung@kernel.org> Date: Wed, 25 Mar 2020 21:45:35 +0900 Subject: perf top: Add --all-cgroups option The --all-cgroups option is to enable cgroup profiling support. It tells kernel to record CGROUP events in the ring buffer so that 'perf top' can identify task/cgroup association later. Committer testing: Use: # perf top --all-cgroups -s cgroup_id,cgroup,pid Signed-off-by: Namhyung Kim <namhyung@kernel.org> Tested-by: Arnaldo Carvalho de Melo <acme@redhat.com> Cc: Alexander Shishkin <alexander.shishkin@linux.intel.com> Cc: Jiri Olsa <jolsa@redhat.com> Cc: Mark Rutland <mark.rutland@arm.com> Cc: Peter Zijlstra <peterz@infradead.org> Link: http://lore.kernel.org/lkml/20200325124536.2800725-9-namhyung@kernel.org Link: http://lore.kernel.org/lkml/20200402015249.3800462-1-namhyung@kernel.org [ Extracted the HAVE_FILE_HANDLE from the followup patch ] Signed-off-by: Arnaldo Carvalho de Melo <acme@redhat.com> --- tools/perf/Documentation/perf-top.txt | 4 ++++ tools/perf/builtin-top.c | 15 +++++++++++++++ 2 files changed, 19 insertions(+) diff --git a/tools/perf/Documentation/perf-top.txt b/tools/perf/Documentation/perf-top.txt index 324b6b53c86b..ddab103af8c7 100644 --- a/tools/perf/Documentation/perf-top.txt +++ b/tools/perf/Documentation/perf-top.txt @@ -272,6 +272,10 @@ Default is to monitor all CPUS. Record events of type PERF_RECORD_NAMESPACES and display it with the 'cgroup_id' sort key. +--all-cgroups:: + Record events of type PERF_RECORD_CGROUP and display it with the + 'cgroup' sort key. + --switch-on EVENT_NAME:: Only consider events after this event is found. diff --git a/tools/perf/builtin-top.c b/tools/perf/builtin-top.c index d2539b793f9d..02ea2cf2a3d9 100644 --- a/tools/perf/builtin-top.c +++ b/tools/perf/builtin-top.c @@ -1246,6 +1246,14 @@ static int __cmd_top(struct perf_top *top) if (opts->record_namespaces) top->tool.namespace_events = true; + if (opts->record_cgroup) { +#ifdef HAVE_FILE_HANDLE + top->tool.cgroup_events = true; +#else + pr_err("cgroup tracking is not supported.\n"); + return -1; +#endif + } ret = perf_event__synthesize_bpf_events(top->session, perf_event__process, &top->session->machines.host, @@ -1253,6 +1261,11 @@ static int __cmd_top(struct perf_top *top) if (ret < 0) pr_debug("Couldn't synthesize BPF events: Pre-existing BPF programs won't have symbols resolved.\n"); + ret = perf_event__synthesize_cgroups(&top->tool, perf_event__process, + &top->session->machines.host); + if (ret < 0) + pr_debug("Couldn't synthesize cgroup events.\n"); + machine__synthesize_threads(&top->session->machines.host, &opts->target, top->evlist->core.threads, false, top->nr_threads_synthesize); @@ -1545,6 +1558,8 @@ int cmd_top(int argc, const char **argv) "number of thread to run event synthesize"), OPT_BOOLEAN(0, "namespaces", &opts->record_namespaces, "Record namespaces events"), + OPT_BOOLEAN(0, "all-cgroups", &opts->record_cgroup, + "Record cgroup events"), OPTS_EVSWITCH(&top.evswitch), OPT_END() }; -- cgit v1.2.3-70-g09d2 From 160d4af97b831c87e680c607c639d55ae027087f Mon Sep 17 00:00:00 2001 From: Namhyung Kim <namhyung@kernel.org> Date: Wed, 25 Mar 2020 21:45:36 +0900 Subject: perf script: Add --show-cgroup-events option The --show-cgroup-events option is to print CGROUP events in the output like others. Committer testing: [root@seventh ~]# perf record --all-cgroups --namespaces /wb/cgtest [ perf record: Woken up 1 times to write data ] [ perf record: Captured and wrote 0.039 MB perf.data (487 samples) ] [root@seventh ~]# perf script --show-cgroup-events | grep PERF_RECORD_CGROUP -B2 -A2 swapper 0 0.000000: PERF_RECORD_CGROUP cgroup: 1 / perf 12145 11200.440730: 1 cycles: ffffffffb900d58b __intel_pmu_enable_all.constprop.0+0x3b (/lib/modules/5.6.0-rc6-00008-gfe2413eefd7f/build/vmlinux) perf 12145 11200.440733: 1 cycles: ffffffffb900d58b __intel_pmu_enable_all.constprop.0+0x3b (/lib/modules/5.6.0-rc6-00008-gfe2413eefd7f/build/vmlinux) -- cgtest 12145 11200.440739: 193472 cycles: ffffffffb90f6fbc commit_creds+0x1fc (/lib/modules/5.6.0-rc6-00008-gfe2413eefd7f/build/vmlinux) cgtest 12145 11200.440790: 2691608 cycles: 7fa2cb43019b _dl_sysdep_start+0x7cb (/usr/lib64/ld-2.29.so) cgtest 12145 11200.440962: PERF_RECORD_CGROUP cgroup: 83 /sub cgtest 12147 11200.441054: 1 cycles: ffffffffb900d58b __intel_pmu_enable_all.constprop.0+0x3b (/lib/modules/5.6.0-rc6-00008-gfe2413eefd7f/build/vmlinux) cgtest 12147 11200.441057: 1 cycles: ffffffffb900d58b __intel_pmu_enable_all.constprop.0+0x3b (/lib/modules/5.6.0-rc6-00008-gfe2413eefd7f/build/vmlinux) -- cgtest 12148 11200.441103: 10227 cycles: ffffffffb9a0153d end_repeat_nmi+0x48 (/lib/modules/5.6.0-rc6-00008-gfe2413eefd7f/build/vmlinux) cgtest 12148 11200.441106: 273295 cycles: ffffffffb99ecbc7 copy_page+0x7 (/lib/modules/5.6.0-rc6-00008-gfe2413eefd7f/build/vmlinux) cgtest 12147 11200.441133: PERF_RECORD_CGROUP cgroup: 88 /sub/cgrp1 cgtest 12147 11200.441143: 2788845 cycles: ffffffffb94676c2 security_genfs_sid+0x102 (/lib/modules/5.6.0-rc6-00008-gfe2413eefd7f/build/vmlinux) cgtest 12148 11200.441162: PERF_RECORD_CGROUP cgroup: 93 /sub/cgrp2 cgtest 12148 11200.441182: 2669546 cycles: 401020 _init+0x20 (/wb/cgtest) cgtest 12149 11200.441247: 1 cycles: ffffffffb900d58b __intel_pmu_enable_all.constprop.0+0x3b (/lib/modules/5.6.0-rc6-00008-gfe2413eefd7f/build/vmlinux) [root@seventh ~]# Signed-off-by: Namhyung Kim <namhyung@kernel.org> Tested-by: Arnaldo Carvalho de Melo <acme@redhat.com> Cc: Alexander Shishkin <alexander.shishkin@linux.intel.com> Cc: Jiri Olsa <jolsa@redhat.com> Cc: Mark Rutland <mark.rutland@arm.com> Cc: Peter Zijlstra <peterz@infradead.org> Link: http://lore.kernel.org/lkml/20200325124536.2800725-10-namhyung@kernel.org Signed-off-by: Arnaldo Carvalho de Melo <acme@redhat.com> --- tools/perf/Documentation/perf-script.txt | 3 +++ tools/perf/builtin-script.c | 41 ++++++++++++++++++++++++++++++++ 2 files changed, 44 insertions(+) diff --git a/tools/perf/Documentation/perf-script.txt b/tools/perf/Documentation/perf-script.txt index 4af255bb0977..99a9853a11ba 100644 --- a/tools/perf/Documentation/perf-script.txt +++ b/tools/perf/Documentation/perf-script.txt @@ -319,6 +319,9 @@ OPTIONS --show-bpf-events Display bpf events i.e. events of type PERF_RECORD_KSYMBOL and PERF_RECORD_BPF_EVENT. +--show-cgroup-events + Display cgroup events i.e. events of type PERF_RECORD_CGROUP. + --demangle:: Demangle symbol names to human readable form. It's enabled by default, disable with --no-demangle. diff --git a/tools/perf/builtin-script.c b/tools/perf/builtin-script.c index f63869aef914..186ebf827fa1 100644 --- a/tools/perf/builtin-script.c +++ b/tools/perf/builtin-script.c @@ -1694,6 +1694,7 @@ struct perf_script { bool show_lost_events; bool show_round_events; bool show_bpf_events; + bool show_cgroup_events; bool allocated; bool per_event_dump; struct evswitch evswitch; @@ -2212,6 +2213,41 @@ out: return ret; } +static int process_cgroup_event(struct perf_tool *tool, + union perf_event *event, + struct perf_sample *sample, + struct machine *machine) +{ + struct thread *thread; + struct perf_script *script = container_of(tool, struct perf_script, tool); + struct perf_session *session = script->session; + struct evsel *evsel = perf_evlist__id2evsel(session->evlist, sample->id); + int ret = -1; + + thread = machine__findnew_thread(machine, sample->pid, sample->tid); + if (thread == NULL) { + pr_debug("problem processing CGROUP event, skipping it.\n"); + return -1; + } + + if (perf_event__process_cgroup(tool, event, sample, machine) < 0) + goto out; + + if (!evsel->core.attr.sample_id_all) { + sample->cpu = 0; + sample->time = 0; + } + if (!filter_cpu(sample)) { + perf_sample__fprintf_start(sample, thread, evsel, + PERF_RECORD_CGROUP, stdout); + perf_event__fprintf(event, stdout); + } + ret = 0; +out: + thread__put(thread); + return ret; +} + static int process_fork_event(struct perf_tool *tool, union perf_event *event, struct perf_sample *sample, @@ -2551,6 +2587,8 @@ static int __cmd_script(struct perf_script *script) script->tool.context_switch = process_switch_event; if (script->show_namespace_events) script->tool.namespaces = process_namespaces_event; + if (script->show_cgroup_events) + script->tool.cgroup = process_cgroup_event; if (script->show_lost_events) script->tool.lost = process_lost_event; if (script->show_round_events) { @@ -3476,6 +3514,7 @@ int cmd_script(int argc, const char **argv) .mmap2 = perf_event__process_mmap2, .comm = perf_event__process_comm, .namespaces = perf_event__process_namespaces, + .cgroup = perf_event__process_cgroup, .exit = perf_event__process_exit, .fork = perf_event__process_fork, .attr = process_attr, @@ -3577,6 +3616,8 @@ int cmd_script(int argc, const char **argv) "Show context switch events (if recorded)"), OPT_BOOLEAN('\0', "show-namespace-events", &script.show_namespace_events, "Show namespace events (if recorded)"), + OPT_BOOLEAN('\0', "show-cgroup-events", &script.show_cgroup_events, + "Show cgroup events (if recorded)"), OPT_BOOLEAN('\0', "show-lost-events", &script.show_lost_events, "Show lost events (if recorded)"), OPT_BOOLEAN('\0', "show-round-events", &script.show_round_events, -- cgit v1.2.3-70-g09d2 From 7b1642f2fc1e1f20948e806b7ce319f660633342 Mon Sep 17 00:00:00 2001 From: Arnaldo Carvalho de Melo <acme@redhat.com> Date: Mon, 30 Mar 2020 09:32:41 -0300 Subject: perf build-test: Honour JOBS to override detection of number of cores When one does: $ make -C tools/perf build-test The makefile in tools/perf/tests/ will, just like the main one, detect how many cores are in the system and use it with -j. Sometimes we may need to override that, for instance, when using icecream or distcc to use multiple machines in the build process, then we need to, as with the main makefile, use: $ make JOBS=N -C tools/perf build-test Fix the tests makefile to honour that. Acked-by: Jiri Olsa <jolsa@kernel.org> Cc: Adrian Hunter <adrian.hunter@intel.com> Cc: Namhyung Kim <namhyung@kernel.org> Link: http://lore.kernel.org/lkml/20200330130301.GA31702@kernel.org Signed-off-by: Arnaldo Carvalho de Melo <acme@redhat.com> --- tools/perf/tests/make | 10 +++++++--- 1 file changed, 7 insertions(+), 3 deletions(-) diff --git a/tools/perf/tests/make b/tools/perf/tests/make index c850d1664c56..5d0c3a9c47a1 100644 --- a/tools/perf/tests/make +++ b/tools/perf/tests/make @@ -28,9 +28,13 @@ endif PARALLEL_OPT= ifeq ($(SET_PARALLEL),1) - cores := $(shell (getconf _NPROCESSORS_ONLN || egrep -c '^processor|^CPU[0-9]' /proc/cpuinfo) 2>/dev/null) - ifeq ($(cores),0) - cores := 1 + ifeq ($(JOBS),) + cores := $(shell (getconf _NPROCESSORS_ONLN || egrep -c '^processor|^CPU[0-9]' /proc/cpuinfo) 2>/dev/null) + ifeq ($(cores),0) + cores := 1 + endif + else + cores=$(JOBS) endif PARALLEL_OPT="-j$(cores)" endif -- cgit v1.2.3-70-g09d2 From 78886f3ed37e89a06c76b95be873573e27900979 Mon Sep 17 00:00:00 2001 From: Kemeng Shi <shikemeng@huawei.com> Date: Mon, 30 Mar 2020 15:41:11 +0800 Subject: perf symbols: Fix arm64 gap between kernel start and module end During execution of command 'perf report' in my arm64 virtual machine, this error message is showed: failed to process sample __symbol__inc_addr_samples(860): ENOMEM! sym->name=__this_module, start=0x1477100, addr=0x147dbd8, end=0x80002000, func: 0 The error is caused with path: cmd_report __cmd_report perf_session__process_events __perf_session__process_events ordered_events__flush __ordered_events__flush oe->deliver (ordered_events__deliver_event) perf_session__deliver_event machines__deliver_event perf_evlist__deliver_sample tool->sample (process_sample_event) hist_entry_iter__add iter->add_entry_cb(hist_iter__report_callback) hist_entry__inc_addr_samples symbol__inc_addr_samples __symbol__inc_addr_samples h = annotated_source__histogram(src, evidx) (NULL) annotated_source__histogram failed is caused with path: ... hist_entry__inc_addr_samples symbol__inc_addr_samples symbol__hists annotated_source__alloc_histograms src->histograms = calloc(nr_hists, sizeof_sym_hist) (failed) Calloc failed as the symbol__size(sym) is too huge. As show in error message: start=0x1477100, end=0x80002000, size of symbol is about 2G. This is the same problem as 'perf annotate: Fix s390 gap between kernel end and module start (b9c0a64901d5bd)'. Perf gets symbol information from /proc/kallsyms in __dso__load_kallsyms. A part of symbol in /proc/kallsyms from my virtual machine is as follows: #cat /proc/kallsyms | sort ... ffff000001475080 d rpfilter_mt_reg [ip6t_rpfilter] ffff000001475100 d $d [ip6t_rpfilter] ffff000001475100 d __this_module [ip6t_rpfilter] ffff000080080000 t _head ffff000080080000 T _text ffff000080080040 t pe_header ... Take line 'ffff000001475100 d __this_module [ip6t_rpfilter]' as example. The start and end of symbol are both set to ffff000001475100 in dso__load_all_kallsyms. Then symbols__fixup_end will set the end of symbol to next big address to ffff000001475100 in /proc/kallsyms, ffff000080080000 in this example. Then sizeof of symbol will be about 2G and cause the problem. The start of module in my machine is ffff000000a62000 t $x [dm_mod] The start of kernel in my machine is ffff000080080000 t _head There is a big gap between end of module and begin of kernel if a samll amount of memory is used by module. And the last symbol in module will have a large address range as caotaining the big gap. Give that the module and kernel text segment sequence may change in the future, fix this by limiting range of last symbol in module and kernel to 4K in arch arm64. Signed-off-by: Kemeng Shi <shikemeng@huawei.com> Cc: Alexander Shishkin <alexander.shishkin@linux.intel.com> Cc: Hewenliang <hewenliang4@huawei.com> Cc: Hu Shiyuan <hushiyuan@huawei.com> Cc: Ian Rogers <irogers@google.com> Cc: Jiri Olsa <jolsa@redhat.com> Cc: Mark Rutland <mark.rutland@arm.com> Cc: Namhyung Kim <namhyung@kernel.org> Cc: Nick Desaulniers <ndesaulniers@google.com> Cc: Peter Zijlstra <peterz@infradead.org> Cc: Thomas Richter <tmricht@linux.ibm.com> Link: http://lore.kernel.org/lkml/33fd24c4-0d5a-9d93-9b62-dffa97c992ca@huawei.com [ refreshed the patch on current codebase, added string.h include as strchr() is used ] Signed-off-by: Arnaldo Carvalho de Melo <acme@redhat.com> --- tools/perf/arch/arm64/util/Build | 1 + tools/perf/arch/arm64/util/machine.c | 27 +++++++++++++++++++++++++++ 2 files changed, 28 insertions(+) create mode 100644 tools/perf/arch/arm64/util/machine.c diff --git a/tools/perf/arch/arm64/util/Build b/tools/perf/arch/arm64/util/Build index 789956f76d85..5c13438c7bd4 100644 --- a/tools/perf/arch/arm64/util/Build +++ b/tools/perf/arch/arm64/util/Build @@ -1,4 +1,5 @@ perf-y += header.o +perf-y += machine.o perf-y += perf_regs.o perf-$(CONFIG_DWARF) += dwarf-regs.o perf-$(CONFIG_LOCAL_LIBUNWIND) += unwind-libunwind.o diff --git a/tools/perf/arch/arm64/util/machine.c b/tools/perf/arch/arm64/util/machine.c new file mode 100644 index 000000000000..d41b27e781d3 --- /dev/null +++ b/tools/perf/arch/arm64/util/machine.c @@ -0,0 +1,27 @@ +// SPDX-License-Identifier: GPL-2.0 + +#include <stdio.h> +#include <string.h> +#include "debug.h" +#include "symbol.h" + +/* On arm64, kernel text segment start at high memory address, + * for example 0xffff 0000 8xxx xxxx. Modules start at a low memory + * address, like 0xffff 0000 00ax xxxx. When only samll amount of + * memory is used by modules, gap between end of module's text segment + * and start of kernel text segment may be reach 2G. + * Therefore do not fill this gap and do not assign it to the kernel dso map. + */ + +#define SYMBOL_LIMIT (1 << 12) /* 4K */ + +void arch__symbols__fixup_end(struct symbol *p, struct symbol *c) +{ + if ((strchr(p->name, '[') && strchr(c->name, '[') == NULL) || + (strchr(p->name, '[') == NULL && strchr(c->name, '['))) + /* Limit range of last symbol in module and kernel */ + p->end += SYMBOL_LIMIT; + else + p->end = c->start; + pr_debug4("%s sym:%s end:%#lx\n", __func__, p->name, p->end); +} -- cgit v1.2.3-70-g09d2 From df7deb2cceef0546ab2115702da3421b7c61a8c0 Mon Sep 17 00:00:00 2001 From: Jin Yao <yao.jin@linux.intel.com> Date: Wed, 25 Mar 2020 06:07:10 +0800 Subject: perf top: Support --group-sort-idx to change the sort order 'perf report' supports the option --group-sort-idx, which sorts the output by the event at the index n in event group. For example: perf record -e cycles,instructions,cache-misses perf report --group --group-sort-idx 2 --stdio The perf-report output is sorted by cache-misses. This patch supports --group-sort-idx in perf-top. For example: perf top --group -e cycles,instructions,cache-misses --group-sort-idx 2 The perf-top output is sorted by cache-misses. Signed-off-by: Jin Yao <yao.jin@linux.intel.com> Suggested-by: Arnaldo Carvalho de Melo <acme@redhat.com> Tested-by: Arnaldo Carvalho de Melo <acme@redhat.com> Cc: Alexander Shishkin <alexander.shishkin@linux.intel.com> Cc: Andi Kleen <ak@linux.intel.com> Cc: Jin Yao <yao.jin@intel.com> Cc: Jiri Olsa <jolsa@kernel.org> Cc: Kan Liang <kan.liang@linux.intel.com> Cc: Peter Zijlstra <peterz@infradead.org> Link: http://lore.kernel.org/lkml/20200324220711.6025-1-yao.jin@linux.intel.com Signed-off-by: Arnaldo Carvalho de Melo <acme@redhat.com> --- tools/perf/Documentation/perf-top.txt | 5 +++++ tools/perf/builtin-top.c | 4 ++++ 2 files changed, 9 insertions(+) diff --git a/tools/perf/Documentation/perf-top.txt b/tools/perf/Documentation/perf-top.txt index ddab103af8c7..487737a725e9 100644 --- a/tools/perf/Documentation/perf-top.txt +++ b/tools/perf/Documentation/perf-top.txt @@ -53,6 +53,11 @@ Default is to monitor all CPUS. --group:: Put the counters into a counter group. +--group-sort-idx:: + Sort the output by the event at the index n in group. If n is invalid, + sort by the first event. It can support multiple groups with different + amount of events. WARNING: This should be used on grouped events. + -F <freq>:: --freq=<freq>:: Profile at this frequency. Use 'max' to use the currently maximum diff --git a/tools/perf/builtin-top.c b/tools/perf/builtin-top.c index 02ea2cf2a3d9..9ff7943d2014 100644 --- a/tools/perf/builtin-top.c +++ b/tools/perf/builtin-top.c @@ -1560,6 +1560,10 @@ int cmd_top(int argc, const char **argv) "Record namespaces events"), OPT_BOOLEAN(0, "all-cgroups", &opts->record_cgroup, "Record cgroup events"), + OPT_INTEGER(0, "group-sort-idx", &symbol_conf.group_sort_idx, + "Sort the output by the event at the index n in group. " + "If n is invalid, sort by the first event. " + "WARNING: should be used on grouped events."), OPTS_EVSWITCH(&top.evswitch), OPT_END() }; -- cgit v1.2.3-70-g09d2 From 2605af0f32d1bf434dd6819732c7851a97f5cbc0 Mon Sep 17 00:00:00 2001 From: Jin Yao <yao.jin@linux.intel.com> Date: Wed, 25 Mar 2020 06:07:11 +0800 Subject: perf top: Support hotkey to change sort order It would be nice if we can use a hotkey in perf top browser to select a event for sorting. For example: perf top --group -e cycles,instructions,cache-misses Samples Overhead Shared Object Symbol 40.03% 45.71% 0.03% div [.] main 20.46% 14.67% 0.21% libc-2.27.so [.] __random_r 20.01% 19.54% 0.02% libc-2.27.so [.] __random 9.68% 10.68% 0.00% div [.] compute_flag 4.32% 4.70% 0.00% libc-2.27.so [.] rand 3.84% 3.43% 0.00% div [.] rand@plt 0.05% 0.05% 2.33% libc-2.27.so [.] __strcmp_sse2_unaligned 0.04% 0.08% 2.43% perf [.] perf_hpp__is_dynamic_en 0.04% 0.02% 6.64% perf [.] rb_next 0.04% 0.01% 3.87% perf [.] dso__find_symbol 0.04% 0.04% 1.77% perf [.] sort__dso_cmp When user press hotkey '2' (event index, starting from 0), it indicates to sort output by the third event in group (cache-misses). Samples Overhead Shared Object Symbol 4.07% 1.28% 6.68% perf [.] rb_next 3.57% 3.98% 4.11% perf [.] __hists__insert_output 3.67% 11.24% 3.60% perf [.] perf_hpp__is_dynamic_e 3.67% 3.20% 3.20% perf [.] hpp__sort_overhead 0.81% 0.06% 3.01% perf [.] dso__find_symbol 1.62% 5.47% 2.51% perf [.] hists__match 2.70% 1.86% 2.47% libc-2.27.so [.] _int_malloc 0.19% 0.00% 2.29% [kernel] [k] copy_page 0.41% 0.32% 1.98% perf [.] hists__decay_entries 1.84% 3.67% 1.68% perf [.] sort__dso_cmp 0.16% 0.00% 1.63% [kernel] [k] clear_page_erms Now the output is sorted by cache-misses. v2: --- Zero the history if hotkey is pressed. Signed-off-by: Jin Yao <yao.jin@linux.intel.com> Suggested-by: Arnaldo Carvalho de Melo <acme@redhat.com> Tested-by: Arnaldo Carvalho de Melo <acme@redhat.com> Cc: Alexander Shishkin <alexander.shishkin@linux.intel.com> Cc: Andi Kleen <ak@linux.intel.com> Cc: Jin Yao <yao.jin@intel.com> Cc: Jiri Olsa <jolsa@kernel.org> Cc: Kan Liang <kan.liang@linux.intel.com> Cc: Peter Zijlstra <peterz@infradead.org> Link: http://lore.kernel.org/lkml/20200324220711.6025-2-yao.jin@linux.intel.com Signed-off-by: Arnaldo Carvalho de Melo <acme@redhat.com> --- tools/perf/builtin-top.c | 11 +++++++++-- 1 file changed, 9 insertions(+), 2 deletions(-) diff --git a/tools/perf/builtin-top.c b/tools/perf/builtin-top.c index 9ff7943d2014..289cf83e658a 100644 --- a/tools/perf/builtin-top.c +++ b/tools/perf/builtin-top.c @@ -616,6 +616,7 @@ static void *display_thread_tui(void *arg) .arg = top, .refresh = top->delay_secs, }; + int ret; /* In order to read symbols from other namespaces perf to needs to call * setns(2). This isn't permitted if the struct_fs has multiple users. @@ -626,6 +627,7 @@ static void *display_thread_tui(void *arg) prctl(PR_SET_NAME, "perf-top-UI", 0, 0, 0); +repeat: perf_top__sort_new_samples(top); /* @@ -638,13 +640,18 @@ static void *display_thread_tui(void *arg) hists->uid_filter_str = top->record_opts.target.uid_str; } - perf_evlist__tui_browse_hists(top->evlist, help, &hbt, + ret = perf_evlist__tui_browse_hists(top->evlist, help, &hbt, top->min_percent, &top->session->header.env, !top->record_opts.overwrite, &top->annotation_opts); - stop_top(); + if (ret == K_RELOAD) { + top->zero = true; + goto repeat; + } else + stop_top(); + return NULL; } -- cgit v1.2.3-70-g09d2 From 376c3c22e2ba69d7509aa070f1a7dd88efdb0a9c Mon Sep 17 00:00:00 2001 From: Arnaldo Carvalho de Melo <acme@redhat.com> Date: Mon, 30 Mar 2020 12:35:21 -0300 Subject: perf report/top TUI: Fix title line formatting In d10ec006dcd7 ("perf hists browser: Allow passing an initial hotkey") the hist_entry__title() call was cut'n'pasted to a function where the 'title' variable is a pointer, not an array, so the sizeof(title) continues syntactically valid but ends up reducing the real size of the buffer where to format the first line in the screen to 8 bytes, which makes the formatting at the title at each refresh to produce just the string "Samples ", duh, fix it by passing the size of the buffer. Cc: Adrian Hunter <adrian.hunter@intel.com> Cc: Jin Yao <yao.jin@linux.intel.com> Cc: Jiri Olsa <jolsa@kernel.org> Cc: Namhyung Kim <namhyung@kernel.org> Fixes: d10ec006dcd7 ("perf hists browser: Allow passing an initial hotkey") Link: http://lore.kernel.org/lkml/20200330154314.GB4576@kernel.org Signed-off-by: Arnaldo Carvalho de Melo <acme@redhat.com> --- tools/perf/ui/browsers/hists.c | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-) diff --git a/tools/perf/ui/browsers/hists.c b/tools/perf/ui/browsers/hists.c index 95ac5e2ea949..487e54ef56a9 100644 --- a/tools/perf/ui/browsers/hists.c +++ b/tools/perf/ui/browsers/hists.c @@ -677,7 +677,7 @@ static int hist_browser__title(struct hist_browser *browser, char *bf, size_t si return browser->title ? browser->title(browser, bf, size) : 0; } -static int hist_browser__handle_hotkey(struct hist_browser *browser, bool warn_lost_event, char *title, int key) +static int hist_browser__handle_hotkey(struct hist_browser *browser, bool warn_lost_event, char *title, size_t size, int key) { switch (key) { case K_TIMER: { @@ -703,7 +703,7 @@ static int hist_browser__handle_hotkey(struct hist_browser *browser, bool warn_l ui_browser__warn_lost_events(&browser->b); } - hist_browser__title(browser, title, sizeof(title)); + hist_browser__title(browser, title, size); ui_browser__show_title(&browser->b, title); break; } @@ -764,13 +764,13 @@ int hist_browser__run(struct hist_browser *browser, const char *help, if (ui_browser__show(&browser->b, title, "%s", help) < 0) return -1; - if (key && hist_browser__handle_hotkey(browser, warn_lost_event, title, key)) + if (key && hist_browser__handle_hotkey(browser, warn_lost_event, title, sizeof(title), key)) goto out; while (1) { key = ui_browser__run(&browser->b, delay_secs); - if (hist_browser__handle_hotkey(browser, warn_lost_event, title, key)) + if (hist_browser__handle_hotkey(browser, warn_lost_event, title, sizeof(title), key)) break; } out: -- cgit v1.2.3-70-g09d2 From d2bedb7863e9817f71fe2332a85ecdbd0f264b4b Mon Sep 17 00:00:00 2001 From: Stephane Eranian <eranian@google.com> Date: Wed, 25 Mar 2020 15:08:02 -0700 Subject: perf script: Allow --symbol to accept hexadecimal addresses This patch extends the perf script --symbols option to filter on hexadecimal addresses in addition to symbol names. This makes it easier to handle cases where symbols are aliased. With this patch, it is possible to mix and match symbols and hexadecimal addresses using the --symbols option. $ perf script --symbols=noploop,0x4007a0 Signed-off-by: Stephane Eranian <eranian@google.com> Reviewed-by: Ian Rogers <irogers@google.com> Cc: Alexander Shishkin <alexander.shishkin@linux.intel.com> Cc: Jiri Olsa <jolsa@redhat.com> Cc: Mark Rutland <mark.rutland@arm.com> Cc: Namhyung Kim <namhyung@kernel.org> Cc: Peter Zijlstra <peterz@infradead.org> Link: http://lore.kernel.org/lkml/20200325220802.15039-1-irogers@google.com Signed-off-by: Arnaldo Carvalho de Melo <acme@redhat.com> --- tools/perf/util/event.c | 21 +++++++++++++++++---- 1 file changed, 17 insertions(+), 4 deletions(-) diff --git a/tools/perf/util/event.c b/tools/perf/util/event.c index 824c038e5c33..dc0e11214ae1 100644 --- a/tools/perf/util/event.c +++ b/tools/perf/util/event.c @@ -617,10 +617,23 @@ int machine__resolve(struct machine *machine, struct addr_location *al, al->sym = map__find_symbol(al->map, al->addr); } - if (symbol_conf.sym_list && - (!al->sym || !strlist__has_entry(symbol_conf.sym_list, - al->sym->name))) { - al->filtered |= (1 << HIST_FILTER__SYMBOL); + if (symbol_conf.sym_list) { + int ret = 0; + char al_addr_str[32]; + size_t sz = sizeof(al_addr_str); + + if (al->sym) { + ret = strlist__has_entry(symbol_conf.sym_list, + al->sym->name); + } + if (!(ret && al->sym)) { + snprintf(al_addr_str, sz, "0x%"PRIx64, + al->map->unmap_ip(al->map, al->sym->start)); + ret = strlist__has_entry(symbol_conf.sym_list, + al_addr_str); + } + if (!ret) + al->filtered |= (1 << HIST_FILTER__SYMBOL); } return 0; -- cgit v1.2.3-70-g09d2 From 47327f56674d69a423f7167f8d4ea537cc1762cc Mon Sep 17 00:00:00 2001 From: Adrian Hunter <adrian.hunter@intel.com> Date: Thu, 26 Mar 2020 10:01:47 +0200 Subject: perf events parser: Add missing Intel CPU events to parser perf list expects CPU events to be parseable by name, e.g. # perf list | grep el-capacity-read el-capacity-read OR cpu/el-capacity-read/ [Kernel PMU event] But the event parser does not recognize them that way, e.g. # perf test -v "Parse event" <SNIP> running test 54 'cycles//u' running test 55 'cycles:k' running test 0 'cpu/config=10,config1,config2=3,period=1000/u' running test 1 'cpu/config=1,name=krava/u,cpu/config=2/u' running test 2 'cpu/config=1,call-graph=fp,time,period=100000/,cpu/config=2,call-graph=no,time=0,period=2000/' running test 3 'cpu/name='COMPLEX_CYCLES_NAME:orig=cycles,desc=chip-clock-ticks',period=0x1,event=0x2/ukp' -> cpu/event=0,umask=0x11/ -> cpu/event=0,umask=0x13/ -> cpu/event=0x54,umask=0x1/ failed to parse event 'el-capacity-read:u,cpu/event=el-capacity-read/u', err 1, str 'parser error' event syntax error: 'el-capacity-read:u,cpu/event=el-capacity-read/u' \___ parser error test child finished with 1 ---- end ---- Parse event definition strings: FAILED! This happens because the parser splits names by '-' in order to deal with cache events. For example 'L1-dcache' is a token in parse-events.l which is matched to 'L1-dcache-load-miss' by the following rule: PE_NAME_CACHE_TYPE '-' PE_NAME_CACHE_OP_RESULT '-' PE_NAME_CACHE_OP_RESULT opt_event_config And so there is special handling for 2-part PMU names i.e. PE_PMU_EVENT_PRE '-' PE_PMU_EVENT_SUF sep_dc but no handling for 3-part names, which are instead added as tokens e.g. topdown-[a-z-]+ While it would be possible to add a rule for 3-part names, that would not work if the first parts were also a valid PMU name e.g. 'el-capacity-read' would be matched to 'el-capacity' before the parser reached the 3rd part. The parser would need significant change to rationalize all this, so instead fix for now by adding missing Intel CPU events with 3-part names to the event parser as tokens. Missing events were found by using: grep -r EVENT_ATTR_STR arch/x86/events/intel/core.c Signed-off-by: Adrian Hunter <adrian.hunter@intel.com> Acked-by: Jiri Olsa <jolsa@redhat.com> Cc: Kan Liang <kan.liang@linux.intel.com> Link: http://lore.kernel.org/lkml/90c7ae07-c568-b6d3-f9c4-d0c1528a0610@intel.com Signed-off-by: Arnaldo Carvalho de Melo <acme@redhat.com> --- tools/perf/util/parse-events.l | 12 +++++++----- 1 file changed, 7 insertions(+), 5 deletions(-) diff --git a/tools/perf/util/parse-events.l b/tools/perf/util/parse-events.l index 7b1c8ee537cf..baa48f28d57d 100644 --- a/tools/perf/util/parse-events.l +++ b/tools/perf/util/parse-events.l @@ -342,11 +342,13 @@ bpf-output { return sym(yyscanner, PERF_TYPE_SOFTWARE, PERF_COUNT_SW_BPF_OUT * Because the prefix cycles is mixed up with cpu-cycles. * loads and stores are mixed up with cache event */ -cycles-ct { return str(yyscanner, PE_KERNEL_PMU_EVENT); } -cycles-t { return str(yyscanner, PE_KERNEL_PMU_EVENT); } -mem-loads { return str(yyscanner, PE_KERNEL_PMU_EVENT); } -mem-stores { return str(yyscanner, PE_KERNEL_PMU_EVENT); } -topdown-[a-z-]+ { return str(yyscanner, PE_KERNEL_PMU_EVENT); } +cycles-ct | +cycles-t | +mem-loads | +mem-stores | +topdown-[a-z-]+ | +tx-capacity-[a-z-]+ | +el-capacity-[a-z-]+ { return str(yyscanner, PE_KERNEL_PMU_EVENT); } L1-dcache|l1-d|l1d|L1-data | L1-icache|l1-i|l1i|L1-instruction | -- cgit v1.2.3-70-g09d2 From 8ed1faf0156e569c46f1b3d59e74c9cbd5e557ff Mon Sep 17 00:00:00 2001 From: Jin Yao <yao.jin@linux.intel.com> Date: Mon, 9 Mar 2020 09:31:25 +0800 Subject: perf pmu-events x86: Use CPU_CLK_UNHALTED.THREAD in Kernel_Utilization metric The kernel utilization metric does multiplexing currently and is somewhat unreliable. The problem is that it uses two instances of the fixed counter, and the kernel has to multipleplex which causes errors. So should use CPU_CLK_UNHALTED.THREAD instead. Before: # perf stat -M Kernel_Utilization -- sleep 1 Performance counter stats for 'sleep 1': 1,419,425 cpu_clk_unhalted.ref_tsc:k <not counted> cpu_clk_unhalted.ref_tsc (0.00%) After: # perf stat -M Kernel_Utilization -- sleep 1 Performance counter stats for 'sleep 1': 746,688 cpu_clk_unhalted.thread:k # 0.7 Kernel_Utilization 1,088,348 cpu_clk_unhalted.thread Signed-off-by: Jin Yao <yao.jin@linux.intel.com> Reviewed-by: Andi Kleen <ak@linux.intel.com> Reviewed-by: Kan Liang <kan.liang@linux.intel.com> Cc: Alexander Shishkin <alexander.shishkin@linux.intel.com> Cc: Jin Yao <yao.jin@intel.com> Cc: Jiri Olsa <jolsa@kernel.org> Cc: Peter Zijlstra <peterz@infradead.org> Link: http://lore.kernel.org/lkml/20200309013125.7559-1-yao.jin@linux.intel.com Signed-off-by: Arnaldo Carvalho de Melo <acme@redhat.com> --- tools/perf/pmu-events/arch/x86/broadwell/bdw-metrics.json | 2 +- tools/perf/pmu-events/arch/x86/broadwellde/bdwde-metrics.json | 2 +- tools/perf/pmu-events/arch/x86/broadwellx/bdx-metrics.json | 2 +- tools/perf/pmu-events/arch/x86/cascadelakex/clx-metrics.json | 2 +- tools/perf/pmu-events/arch/x86/haswell/hsw-metrics.json | 2 +- tools/perf/pmu-events/arch/x86/haswellx/hsx-metrics.json | 2 +- tools/perf/pmu-events/arch/x86/ivybridge/ivb-metrics.json | 2 +- tools/perf/pmu-events/arch/x86/ivytown/ivt-metrics.json | 2 +- tools/perf/pmu-events/arch/x86/jaketown/jkt-metrics.json | 2 +- tools/perf/pmu-events/arch/x86/sandybridge/snb-metrics.json | 2 +- tools/perf/pmu-events/arch/x86/skylake/skl-metrics.json | 2 +- tools/perf/pmu-events/arch/x86/skylakex/skx-metrics.json | 2 +- 12 files changed, 12 insertions(+), 12 deletions(-) diff --git a/tools/perf/pmu-events/arch/x86/broadwell/bdw-metrics.json b/tools/perf/pmu-events/arch/x86/broadwell/bdw-metrics.json index 45a34ce4fe89..8cdc7c13dc2a 100644 --- a/tools/perf/pmu-events/arch/x86/broadwell/bdw-metrics.json +++ b/tools/perf/pmu-events/arch/x86/broadwell/bdw-metrics.json @@ -297,7 +297,7 @@ }, { "BriefDescription": "Fraction of cycles spent in Kernel mode", - "MetricExpr": "CPU_CLK_UNHALTED.REF_TSC:k / CPU_CLK_UNHALTED.REF_TSC", + "MetricExpr": "CPU_CLK_UNHALTED.THREAD:k / CPU_CLK_UNHALTED.THREAD", "MetricGroup": "Summary", "MetricName": "Kernel_Utilization" }, diff --git a/tools/perf/pmu-events/arch/x86/broadwellde/bdwde-metrics.json b/tools/perf/pmu-events/arch/x86/broadwellde/bdwde-metrics.json index 961fe4395758..16fd8a7490fc 100644 --- a/tools/perf/pmu-events/arch/x86/broadwellde/bdwde-metrics.json +++ b/tools/perf/pmu-events/arch/x86/broadwellde/bdwde-metrics.json @@ -115,7 +115,7 @@ }, { "BriefDescription": "Fraction of cycles spent in Kernel mode", - "MetricExpr": "CPU_CLK_UNHALTED.REF_TSC:k / CPU_CLK_UNHALTED.REF_TSC", + "MetricExpr": "CPU_CLK_UNHALTED.THREAD:k / CPU_CLK_UNHALTED.THREAD", "MetricGroup": "Summary", "MetricName": "Kernel_Utilization" }, diff --git a/tools/perf/pmu-events/arch/x86/broadwellx/bdx-metrics.json b/tools/perf/pmu-events/arch/x86/broadwellx/bdx-metrics.json index 746734ce09be..1eb0415fa11a 100644 --- a/tools/perf/pmu-events/arch/x86/broadwellx/bdx-metrics.json +++ b/tools/perf/pmu-events/arch/x86/broadwellx/bdx-metrics.json @@ -297,7 +297,7 @@ }, { "BriefDescription": "Fraction of cycles spent in Kernel mode", - "MetricExpr": "CPU_CLK_UNHALTED.REF_TSC:k / CPU_CLK_UNHALTED.REF_TSC", + "MetricExpr": "CPU_CLK_UNHALTED.THREAD:k / CPU_CLK_UNHALTED.THREAD", "MetricGroup": "Summary", "MetricName": "Kernel_Utilization" }, diff --git a/tools/perf/pmu-events/arch/x86/cascadelakex/clx-metrics.json b/tools/perf/pmu-events/arch/x86/cascadelakex/clx-metrics.json index a728c6e5119b..7fde0d2943cd 100644 --- a/tools/perf/pmu-events/arch/x86/cascadelakex/clx-metrics.json +++ b/tools/perf/pmu-events/arch/x86/cascadelakex/clx-metrics.json @@ -316,7 +316,7 @@ }, { "BriefDescription": "Fraction of cycles spent in Kernel mode", - "MetricExpr": "CPU_CLK_UNHALTED.REF_TSC:k / CPU_CLK_UNHALTED.REF_TSC", + "MetricExpr": "CPU_CLK_UNHALTED.THREAD:k / CPU_CLK_UNHALTED.THREAD", "MetricGroup": "Summary", "MetricName": "Kernel_Utilization" }, diff --git a/tools/perf/pmu-events/arch/x86/haswell/hsw-metrics.json b/tools/perf/pmu-events/arch/x86/haswell/hsw-metrics.json index 5402cd3120f9..f57c5f3506c2 100644 --- a/tools/perf/pmu-events/arch/x86/haswell/hsw-metrics.json +++ b/tools/perf/pmu-events/arch/x86/haswell/hsw-metrics.json @@ -267,7 +267,7 @@ }, { "BriefDescription": "Fraction of cycles spent in Kernel mode", - "MetricExpr": "CPU_CLK_UNHALTED.REF_TSC:k / CPU_CLK_UNHALTED.REF_TSC", + "MetricExpr": "CPU_CLK_UNHALTED.THREAD:k / CPU_CLK_UNHALTED.THREAD", "MetricGroup": "Summary", "MetricName": "Kernel_Utilization" }, diff --git a/tools/perf/pmu-events/arch/x86/haswellx/hsx-metrics.json b/tools/perf/pmu-events/arch/x86/haswellx/hsx-metrics.json index 832f3cb40b34..311a005dc35b 100644 --- a/tools/perf/pmu-events/arch/x86/haswellx/hsx-metrics.json +++ b/tools/perf/pmu-events/arch/x86/haswellx/hsx-metrics.json @@ -267,7 +267,7 @@ }, { "BriefDescription": "Fraction of cycles spent in Kernel mode", - "MetricExpr": "CPU_CLK_UNHALTED.REF_TSC:k / CPU_CLK_UNHALTED.REF_TSC", + "MetricExpr": "CPU_CLK_UNHALTED.THREAD:k / CPU_CLK_UNHALTED.THREAD", "MetricGroup": "Summary", "MetricName": "Kernel_Utilization" }, diff --git a/tools/perf/pmu-events/arch/x86/ivybridge/ivb-metrics.json b/tools/perf/pmu-events/arch/x86/ivybridge/ivb-metrics.json index d69b2a8fc0bc..28e25447d3ef 100644 --- a/tools/perf/pmu-events/arch/x86/ivybridge/ivb-metrics.json +++ b/tools/perf/pmu-events/arch/x86/ivybridge/ivb-metrics.json @@ -285,7 +285,7 @@ }, { "BriefDescription": "Fraction of cycles spent in Kernel mode", - "MetricExpr": "CPU_CLK_UNHALTED.REF_TSC:k / CPU_CLK_UNHALTED.REF_TSC", + "MetricExpr": "CPU_CLK_UNHALTED.THREAD:k / CPU_CLK_UNHALTED.THREAD", "MetricGroup": "Summary", "MetricName": "Kernel_Utilization" }, diff --git a/tools/perf/pmu-events/arch/x86/ivytown/ivt-metrics.json b/tools/perf/pmu-events/arch/x86/ivytown/ivt-metrics.json index 5f465fd81315..db23db2e98be 100644 --- a/tools/perf/pmu-events/arch/x86/ivytown/ivt-metrics.json +++ b/tools/perf/pmu-events/arch/x86/ivytown/ivt-metrics.json @@ -285,7 +285,7 @@ }, { "BriefDescription": "Fraction of cycles spent in Kernel mode", - "MetricExpr": "CPU_CLK_UNHALTED.REF_TSC:k / CPU_CLK_UNHALTED.REF_TSC", + "MetricExpr": "CPU_CLK_UNHALTED.THREAD:k / CPU_CLK_UNHALTED.THREAD", "MetricGroup": "Summary", "MetricName": "Kernel_Utilization" }, diff --git a/tools/perf/pmu-events/arch/x86/jaketown/jkt-metrics.json b/tools/perf/pmu-events/arch/x86/jaketown/jkt-metrics.json index 3e909b306003..dbb33e00b72a 100644 --- a/tools/perf/pmu-events/arch/x86/jaketown/jkt-metrics.json +++ b/tools/perf/pmu-events/arch/x86/jaketown/jkt-metrics.json @@ -171,7 +171,7 @@ }, { "BriefDescription": "Fraction of cycles spent in Kernel mode", - "MetricExpr": "CPU_CLK_UNHALTED.REF_TSC:k / CPU_CLK_UNHALTED.REF_TSC", + "MetricExpr": "CPU_CLK_UNHALTED.THREAD:k / CPU_CLK_UNHALTED.THREAD", "MetricGroup": "Summary", "MetricName": "Kernel_Utilization" }, diff --git a/tools/perf/pmu-events/arch/x86/sandybridge/snb-metrics.json b/tools/perf/pmu-events/arch/x86/sandybridge/snb-metrics.json index 50c053235752..fb2d7b8875f8 100644 --- a/tools/perf/pmu-events/arch/x86/sandybridge/snb-metrics.json +++ b/tools/perf/pmu-events/arch/x86/sandybridge/snb-metrics.json @@ -171,7 +171,7 @@ }, { "BriefDescription": "Fraction of cycles spent in Kernel mode", - "MetricExpr": "CPU_CLK_UNHALTED.REF_TSC:k / CPU_CLK_UNHALTED.REF_TSC", + "MetricExpr": "CPU_CLK_UNHALTED.THREAD:k / CPU_CLK_UNHALTED.THREAD", "MetricGroup": "Summary", "MetricName": "Kernel_Utilization" }, diff --git a/tools/perf/pmu-events/arch/x86/skylake/skl-metrics.json b/tools/perf/pmu-events/arch/x86/skylake/skl-metrics.json index f97e8316ad2f..8704efeb8d31 100644 --- a/tools/perf/pmu-events/arch/x86/skylake/skl-metrics.json +++ b/tools/perf/pmu-events/arch/x86/skylake/skl-metrics.json @@ -304,7 +304,7 @@ }, { "BriefDescription": "Fraction of cycles spent in Kernel mode", - "MetricExpr": "CPU_CLK_UNHALTED.REF_TSC:k / CPU_CLK_UNHALTED.REF_TSC", + "MetricExpr": "CPU_CLK_UNHALTED.THREAD:k / CPU_CLK_UNHALTED.THREAD", "MetricGroup": "Summary", "MetricName": "Kernel_Utilization" }, diff --git a/tools/perf/pmu-events/arch/x86/skylakex/skx-metrics.json b/tools/perf/pmu-events/arch/x86/skylakex/skx-metrics.json index 35f5db1786f7..b4f91137f40c 100644 --- a/tools/perf/pmu-events/arch/x86/skylakex/skx-metrics.json +++ b/tools/perf/pmu-events/arch/x86/skylakex/skx-metrics.json @@ -316,7 +316,7 @@ }, { "BriefDescription": "Fraction of cycles spent in Kernel mode", - "MetricExpr": "CPU_CLK_UNHALTED.REF_TSC:k / CPU_CLK_UNHALTED.REF_TSC", + "MetricExpr": "CPU_CLK_UNHALTED.THREAD:k / CPU_CLK_UNHALTED.THREAD", "MetricGroup": "Summary", "MetricName": "Kernel_Utilization" }, -- cgit v1.2.3-70-g09d2 From 628d736d913a490d587941e8f68392f2de17e37a Mon Sep 17 00:00:00 2001 From: Ian Rogers <irogers@google.com> Date: Thu, 2 Apr 2020 10:41:30 -0700 Subject: perf script: add -S/--symbols documentation Capture both that this option exists and that symbols can be hexadecimal addresses. Signed-off-by: Ian Rogers <irogers@google.com> Suggested-by: Arnaldo Carvalho de Melo <acme@redhat.com> Cc: Alexander Shishkin <alexander.shishkin@linux.intel.com> Cc: Jiri Olsa <jolsa@redhat.com> Cc: Mark Rutland <mark.rutland@arm.com> Cc: Namhyung Kim <namhyung@kernel.org> Cc: Peter Zijlstra <peterz@infradead.org> Cc: Stephane Eranian <eranian@google.com> Link: http://lore.kernel.org/lkml/20200402174130.140319-1-irogers@google.com Signed-off-by: Arnaldo Carvalho de Melo <acme@redhat.com> --- tools/perf/Documentation/perf-script.txt | 8 ++++++++ 1 file changed, 8 insertions(+) diff --git a/tools/perf/Documentation/perf-script.txt b/tools/perf/Documentation/perf-script.txt index 99a9853a11ba..963487e82edc 100644 --- a/tools/perf/Documentation/perf-script.txt +++ b/tools/perf/Documentation/perf-script.txt @@ -412,6 +412,14 @@ include::itrace.txt[] --xed:: Run xed disassembler on output. Requires installing the xed disassembler. +-S:: +--symbols=symbol[,symbol...]:: + Only consider the listed symbols. Symbols are typically a name + but they may also be hexadecimal address. + + For example, to select the symbol noploop or the address 0x4007a0: + perf script --symbols=noploop,0x4007a0 + --call-trace:: Show call stream for intel_pt traces. The CPUs are interleaved, but can be filtered with -C. -- cgit v1.2.3-70-g09d2 From 1a4025f06059eeaecb2ef24363350ea3431568df Mon Sep 17 00:00:00 2001 From: Andreas Gerstmayr <agerstmayr@redhat.com> Date: Thu, 2 Apr 2020 14:54:16 +0200 Subject: perf script report: Fix SEGFAULT when using DWARF mode When running perf script report with a Python script and a callgraph in DWARF mode, intr_regs->regs can be 0 and therefore crashing the regs_map function. Added a check for this condition (same check as in builtin-script.c:595). Signed-off-by: Andreas Gerstmayr <agerstmayr@redhat.com> Tested-by: Kim Phillips <kim.phillips@amd.com> Cc: Adrian Hunter <adrian.hunter@intel.com> Cc: Alexander Shishkin <alexander.shishkin@linux.intel.com> Cc: Jiri Olsa <jolsa@redhat.com> Cc: Kan Liang <kan.liang@linux.intel.com> Cc: Mark Rutland <mark.rutland@arm.com> Cc: Namhyung Kim <namhyung@kernel.org> Cc: Peter Zijlstra <peterz@infradead.org> Cc: Steven Rostedt (VMware) <rostedt@goodmis.org> Link: http://lore.kernel.org/lkml/20200402125417.422232-1-agerstmayr@redhat.com Signed-off-by: Arnaldo Carvalho de Melo <acme@redhat.com> --- tools/perf/util/scripting-engines/trace-event-python.c | 3 +++ 1 file changed, 3 insertions(+) diff --git a/tools/perf/util/scripting-engines/trace-event-python.c b/tools/perf/util/scripting-engines/trace-event-python.c index 8c1b27cd8b99..2c372cf5495e 100644 --- a/tools/perf/util/scripting-engines/trace-event-python.c +++ b/tools/perf/util/scripting-engines/trace-event-python.c @@ -694,6 +694,9 @@ static int regs_map(struct regs_dump *regs, uint64_t mask, char *bf, int size) bf[0] = 0; + if (!regs || !regs->regs) + return 0; + for_each_set_bit(r, (unsigned long *) &mask, sizeof(mask) * 8) { u64 val = regs->regs[i++]; -- cgit v1.2.3-70-g09d2 From 27486a85cb65bd258a9a213b3e95bdf8c24fd781 Mon Sep 17 00:00:00 2001 From: Andreas Gerstmayr <agerstmayr@redhat.com> Date: Thu, 2 Apr 2020 14:43:38 +0200 Subject: perf script: Fix invalid read of directory entry after closedir() closedir(lang_dir) frees the memory of script_dirent->d_name, which gets accessed in the next line in a call to scnprintf(). Valgrind report: Invalid read of size 1 ==413557== at 0x483CBE6: strlen (vg_replace_strmem.c:461) ==413557== by 0x4DD45FD: __vfprintf_internal (vfprintf-internal.c:1688) ==413557== by 0x4DE6679: __vsnprintf_internal (vsnprintf.c:114) ==413557== by 0x53A037: vsnprintf (stdio2.h:80) ==413557== by 0x53A037: scnprintf (vsprintf.c:21) ==413557== by 0x435202: get_script_path (builtin-script.c:3223) ==413557== Address 0x52e7313 is 1,139 bytes inside a block of size 32,816 free'd ==413557== at 0x483AA0C: free (vg_replace_malloc.c:540) ==413557== by 0x4E303C0: closedir (closedir.c:50) ==413557== by 0x4351DC: get_script_path (builtin-script.c:3222) Signed-off-by: Andreas Gerstmayr <agerstmayr@redhat.com> Cc: Alexander Shishkin <alexander.shishkin@linux.intel.com> Cc: Jiri Olsa <jolsa@redhat.com> Cc: Mark Rutland <mark.rutland@arm.com> Cc: Namhyung Kim <namhyung@kernel.org> Cc: Peter Zijlstra <peterz@infradead.org> Link: http://lore.kernel.org/lkml/20200402124337.419456-1-agerstmayr@redhat.com Signed-off-by: Arnaldo Carvalho de Melo <acme@redhat.com> --- tools/perf/builtin-script.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/tools/perf/builtin-script.c b/tools/perf/builtin-script.c index 186ebf827fa1..1f57a7ecdf3d 100644 --- a/tools/perf/builtin-script.c +++ b/tools/perf/builtin-script.c @@ -3265,10 +3265,10 @@ static char *get_script_path(const char *script_root, const char *suffix) __script_root = get_script_root(script_dirent, suffix); if (__script_root && !strcmp(script_root, __script_root)) { free(__script_root); - closedir(lang_dir); closedir(scripts_dir); scnprintf(script_path, MAXPATHLEN, "%s/%s", lang_path, script_dirent->d_name); + closedir(lang_dir); return strdup(script_path); } free(__script_root); -- cgit v1.2.3-70-g09d2 From b9c9ce4e598e012ca7c1813fae2f4d02395807de Mon Sep 17 00:00:00 2001 From: Sam Lunt <samueljlunt@gmail.com> Date: Fri, 31 Jan 2020 12:11:23 -0600 Subject: perf tools: Support Python 3.8+ in Makefile Python 3.8 changed the output of 'python-config --ldflags' to no longer include the '-lpythonX.Y' flag (this apparently fixed an issue loading modules with a statically linked Python executable). The libpython feature check in linux/build/feature fails if the Python library is not included in FEATURE_CHECK_LDFLAGS-libpython variable. This adds a check in the Makefile to determine if PYTHON_CONFIG accepts the '--embed' flag and passes that flag alongside '--ldflags' if so. tools/perf is the only place the libpython feature check is used. Signed-off-by: Sam Lunt <samuel.j.lunt@gmail.com> Tested-by: He Zhe <zhe.he@windriver.com> Link: http://lore.kernel.org/lkml/c56be2e1-8111-9dfe-8298-f7d0f9ab7431@windriver.com Acked-by: Jiri Olsa <jolsa@redhat.com> Cc: Alexander Shishkin <alexander.shishkin@linux.intel.com> Cc: Mark Rutland <mark.rutland@arm.com> Cc: Namhyung Kim <namhyung@kernel.org> Cc: Peter Zijlstra <peterz@infradead.org> Cc: trivial@kernel.org Cc: stable@kernel.org Link: http://lore.kernel.org/lkml/20200131181123.tmamivhq4b7uqasr@gmail.com Signed-off-by: Arnaldo Carvalho de Melo <acme@redhat.com> --- tools/perf/Makefile.config | 11 ++++++++++- 1 file changed, 10 insertions(+), 1 deletion(-) diff --git a/tools/perf/Makefile.config b/tools/perf/Makefile.config index eb95c0c0a169..12a8204d63c6 100644 --- a/tools/perf/Makefile.config +++ b/tools/perf/Makefile.config @@ -228,8 +228,17 @@ strip-libs = $(filter-out -l%,$(1)) PYTHON_CONFIG_SQ := $(call shell-sq,$(PYTHON_CONFIG)) +# Python 3.8 changed the output of `python-config --ldflags` to not include the +# '-lpythonX.Y' flag unless '--embed' is also passed. The feature check for +# libpython fails if that flag is not included in LDFLAGS +ifeq ($(shell $(PYTHON_CONFIG_SQ) --ldflags --embed 2>&1 1>/dev/null; echo $$?), 0) + PYTHON_CONFIG_LDFLAGS := --ldflags --embed +else + PYTHON_CONFIG_LDFLAGS := --ldflags +endif + ifdef PYTHON_CONFIG - PYTHON_EMBED_LDOPTS := $(shell $(PYTHON_CONFIG_SQ) --ldflags 2>/dev/null) + PYTHON_EMBED_LDOPTS := $(shell $(PYTHON_CONFIG_SQ) $(PYTHON_CONFIG_LDFLAGS) 2>/dev/null) PYTHON_EMBED_LDFLAGS := $(call strip-libs,$(PYTHON_EMBED_LDOPTS)) PYTHON_EMBED_LIBADD := $(call grep-libs,$(PYTHON_EMBED_LDOPTS)) -lutil PYTHON_EMBED_CCOPTS := $(shell $(PYTHON_CONFIG_SQ) --includes 2>/dev/null) -- cgit v1.2.3-70-g09d2 From 9ff76cea4e9e6d49a6f764ae114fc0fb8de97816 Mon Sep 17 00:00:00 2001 From: Arnaldo Carvalho de Melo <acme@redhat.com> Date: Wed, 1 Apr 2020 09:33:59 -0300 Subject: perf python: Fix clang detection to strip out options passed in $CC MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit The clang check in the python setup.py file expected $CC to be just the name of the compiler, not the compiler + options, i.e. all options were expected to be passed in $CFLAGS, this ends up making it fail in systems where CC is set to, e.g.: "aarch64-linaro-linux-gcc --sysroot=/oe/build/tmp/work/juno-linaro-linux/perf/1.0-r9/recipe-sysroot" Like this: $ python3 >>> from subprocess import Popen >>> a = Popen(["aarch64-linux-gnu-gcc --sysroot=/oe/build/tmp/work/juno-linaro-linux/perf/1.0-r9/recipe-sysroot", "-v"]) Traceback (most recent call last): File "<stdin>", line 1, in <module> File "/usr/lib/python3.6/subprocess.py", line 729, in __init__ restore_signals, start_new_session) File "/usr/lib/python3.6/subprocess.py", line 1364, in _execute_child raise child_exception_type(errno_num, err_msg, err_filename) FileNotFoundError: [Errno 2] No such file or directory: 'aarch64-linux-gnu-gcc --sysroot=/oe/build/tmp/work/juno-linaro-linux/perf/1.0-r9/recipe-sysroot': 'aarch64-linux-gnu-gcc --sysroot=/oe/build/tmp/work/juno-linaro-linux/perf/1.0-r9/recipe-sysroot' >>> Make it more robust, covering this case, by passing cc.split()[0] as the first arg to popen(). Fixes: a7ffd416d804 ("perf python: Fix clang detection when using CC=clang-version") Reported-by: Daniel Díaz <daniel.diaz@linaro.org> Reported-by: Naresh Kamboju <naresh.kamboju@linaro.org> Tested-by: Daniel Díaz <daniel.diaz@linaro.org> Cc: Adrian Hunter <adrian.hunter@intel.com> Cc: Ilie Halip <ilie.halip@gmail.com> Cc: Jiri Olsa <jolsa@kernel.org> Cc: Namhyung Kim <namhyung@kernel.org> Link: http://lore.kernel.org/lkml/20200401124037.GA12534@kernel.org Signed-off-by: Arnaldo Carvalho de Melo <acme@redhat.com> --- tools/perf/util/setup.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/tools/perf/util/setup.py b/tools/perf/util/setup.py index 8a065a6f9713..347b2c0789e4 100644 --- a/tools/perf/util/setup.py +++ b/tools/perf/util/setup.py @@ -3,7 +3,7 @@ from subprocess import Popen, PIPE from re import sub cc = getenv("CC") -cc_is_clang = b"clang version" in Popen([cc, "-v"], stderr=PIPE).stderr.readline() +cc_is_clang = b"clang version" in Popen([cc.split()[0], "-v"], stderr=PIPE).stderr.readline() def clang_has_option(option): return [o for o in Popen([cc, option], stderr=PIPE).stderr.readlines() if b"unknown argument" in o] == [ ] -- cgit v1.2.3-70-g09d2