summaryrefslogtreecommitdiff
path: root/tools/bpf
diff options
context:
space:
mode:
authorLinus Torvalds <torvalds@linux-foundation.org>2022-08-06 09:36:08 -0700
committerLinus Torvalds <torvalds@linux-foundation.org>2022-08-06 09:36:08 -0700
commit48a577dc1b09c1d35f2b8b37e7fa9a7169d50f5d (patch)
treebdb0ea12938bce19b48fe304c37be24b55e67ebe /tools/bpf
parent033a94412b6065a21c2ede2f37867e747a84563f (diff)
parentbb8bc52e75785af94b9ba079277547d50d018a52 (diff)
Merge tag 'perf-tools-for-v6.0-2022-08-04' of git://git.kernel.org/pub/scm/linux/kernel/git/acme/linux
Pull perf tools updates from Arnaldo Carvalho de Melo: - Introduce 'perf lock contention' subtool, using new lock contention tracepoints and using BPF for in kernel aggregation and then userspace processing using the perf tooling infrastructure for resolving symbols, target specification, etc. Since the new lock contention tracepoints don't provide lock names, get up to 8 stack traces and display the first non-lock function symbol name as a caller: $ perf lock report -F acquired,contended,avg_wait,wait_total Name acquired contended avg wait total wait update_blocked_a... 40 40 3.61 us 144.45 us kernfs_fop_open+... 5 5 3.64 us 18.18 us _nohz_idle_balance 3 3 2.65 us 7.95 us tick_do_update_j... 1 1 6.04 us 6.04 us ep_scan_ready_list 1 1 3.93 us 3.93 us Supports the usual 'perf record' + 'perf report' workflow as well as a BCC/bpftrace like mode where you start the tool and then press control+C to get results: $ sudo perf lock contention -b ^C contended total wait max wait avg wait type caller 42 192.67 us 13.64 us 4.59 us spinlock queue_work_on+0x20 23 85.54 us 10.28 us 3.72 us spinlock worker_thread+0x14a 6 13.92 us 6.51 us 2.32 us mutex kernfs_iop_permission+0x30 3 11.59 us 10.04 us 3.86 us mutex kernfs_dop_revalidate+0x3c 1 7.52 us 7.52 us 7.52 us spinlock kthread+0x115 1 7.24 us 7.24 us 7.24 us rwlock:W sys_epoll_wait+0x148 2 7.08 us 3.99 us 3.54 us spinlock delayed_work_timer_fn+0x1b 1 6.41 us 6.41 us 6.41 us spinlock idle_balance+0xa06 2 2.50 us 1.83 us 1.25 us mutex kernfs_iop_lookup+0x2f 1 1.71 us 1.71 us 1.71 us mutex kernfs_iop_getattr+0x2c ... - Add new 'perf kwork' tool to trace time properties of kernel work (such as softirq, and workqueue), uses eBPF skeletons to collect info in kernel space, aggregating data that then gets processed by the userspace tool, e.g.: # perf kwork report Kwork Name | Cpu | Total Runtime | Count | Max runtime | Max runtime start | Max runtime end | ---------------------------------------------------------------------------------------------------- nvme0q5:130 | 004 | 1.101 ms | 49 | 0.051 ms | 26035.056403 s | 26035.056455 s | amdgpu:162 | 002 | 0.176 ms | 9 | 0.046 ms | 26035.268020 s | 26035.268066 s | nvme0q24:149 | 023 | 0.161 ms | 55 | 0.009 ms | 26035.655280 s | 26035.655288 s | nvme0q20:145 | 019 | 0.090 ms | 33 | 0.014 ms | 26035.939018 s | 26035.939032 s | nvme0q31:156 | 030 | 0.075 ms | 21 | 0.010 ms | 26035.052237 s | 26035.052247 s | nvme0q8:133 | 007 | 0.062 ms | 12 | 0.021 ms | 26035.416840 s | 26035.416861 s | nvme0q6:131 | 005 | 0.054 ms | 22 | 0.010 ms | 26035.199919 s | 26035.199929 s | nvme0q19:144 | 018 | 0.052 ms | 14 | 0.010 ms | 26035.110615 s | 26035.110625 s | nvme0q7:132 | 006 | 0.049 ms | 13 | 0.007 ms | 26035.125180 s | 26035.125187 s | nvme0q18:143 | 017 | 0.033 ms | 14 | 0.007 ms | 26035.169698 s | 26035.169705 s | nvme0q17:142 | 016 | 0.013 ms | 1 | 0.013 ms | 26035.565147 s | 26035.565160 s | enp5s0-rx-0:164 | 006 | 0.004 ms | 4 | 0.002 ms | 26035.928882 s | 26035.928884 s | enp5s0-tx-0:166 | 008 | 0.003 ms | 3 | 0.002 ms | 26035.870923 s | 26035.870925 s | -------------------------------------------------------------------------------------------------------- See commit log messages for more examples with extra options to limit the events time window, etc. - Add support for new AMD IBS (Instruction Based Sampling) features: With the DataSrc extensions, the source of data can be decoded among: - Local L3 or other L1/L2 in CCX. - A peer cache in a near CCX. - Data returned from DRAM. - A peer cache in a far CCX. - DRAM address map with "long latency" bit set. - Data returned from MMIO/Config/PCI/APIC. - Extension Memory (S-Link, GenZ, etc - identified by the CS target and/or address map at DF's choice). - Peer Agent Memory. - Support hardware tracing with Intel PT on guest machines, combining the traces with the ones in the host machine. - Add a "-m" option to 'perf buildid-list' to show kernel and modules build-ids, to display all of the information needed to do external symbolization of kernel stack traces, such as those collected by bpf_get_stackid(). - Add arch TSC frequency information to perf.data file headers. - Handle changes in the binutils disassembler function signatures in perf, bpftool and bpf_jit_disasm (Acked by the bpftool maintainer). - Fix building the perf perl binding with the newest gcc in distros such as fedora rawhide, where some new warnings were breaking the build as perf uses -Werror. - Add 'perf test' entry for branch stack sampling. - Add ARM SPE system wide 'perf test' entry. - Add user space counter reading tests to 'perf test'. - Build with python3 by default, if available. - Add python converter script for the vendor JSON event files. - Update vendor JSON files for most Intel cores. - Add vendor JSON File for Intel meteorlake. - Add Arm Cortex-A78C and X1C JSON vendor event files. - Add workaround to symbol address reading from ELF files without phdr, falling back to the previoous equation. - Convert legacy map definition to BTF-defined in the perf BPF script test. - Rework prologue generation code to stop using libbpf deprecated APIs. - Add default hybrid events for 'perf stat' on x86. - Add topdown metrics in the default 'perf stat' on the hybrid machines (big/little cores). - Prefer sampled CPU when exporting JSON in 'perf data convert' - Fix ('perf stat CSV output linter') and ("Check branch stack sampling") 'perf test' entries on s390. * tag 'perf-tools-for-v6.0-2022-08-04' of git://git.kernel.org/pub/scm/linux/kernel/git/acme/linux: (169 commits) perf stat: Refactor __run_perf_stat() common code perf lock: Print the number of lost entries for BPF perf lock: Add --map-nr-entries option perf lock: Introduce struct lock_contention perf scripting python: Do not build fail on deprecation warnings genelf: Use HAVE_LIBCRYPTO_SUPPORT, not the never defined HAVE_LIBCRYPTO perf build: Suppress openssl v3 deprecation warnings in libcrypto feature test perf parse-events: Break out tracepoint and printing perf parse-events: Don't #define YY_EXTRA_TYPE tools bpftool: Don't display disassembler-four-args feature test tools bpftool: Fix compilation error with new binutils tools bpf_jit_disasm: Don't display disassembler-four-args feature test tools bpf_jit_disasm: Fix compilation error with new binutils tools perf: Fix compilation error with new binutils tools include: add dis-asm-compat.h to handle version differences tools build: Don't display disassembler-four-args feature test tools build: Add feature test for init_disassemble_info API changes perf test: Add ARM SPE system wide test perf tools: Rework prologue generation code perf bpf: Convert legacy map definition to BTF-defined ...
Diffstat (limited to 'tools/bpf')
-rw-r--r--tools/bpf/Makefile7
-rw-r--r--tools/bpf/bpf_jit_disasm.c5
-rw-r--r--tools/bpf/bpftool/Makefile8
-rw-r--r--tools/bpf/bpftool/jit_disasm.c42
4 files changed, 49 insertions, 13 deletions
diff --git a/tools/bpf/Makefile b/tools/bpf/Makefile
index b11cfc86a3d0..243b79f2b451 100644
--- a/tools/bpf/Makefile
+++ b/tools/bpf/Makefile
@@ -34,8 +34,8 @@ else
endif
FEATURE_USER = .bpf
-FEATURE_TESTS = libbfd disassembler-four-args
-FEATURE_DISPLAY = libbfd disassembler-four-args
+FEATURE_TESTS = libbfd disassembler-four-args disassembler-init-styled
+FEATURE_DISPLAY = libbfd
check_feat := 1
NON_CHECK_FEAT_TARGETS := clean bpftool_clean runqslower_clean resolve_btfids_clean
@@ -56,6 +56,9 @@ endif
ifeq ($(feature-disassembler-four-args), 1)
CFLAGS += -DDISASM_FOUR_ARGS_SIGNATURE
endif
+ifeq ($(feature-disassembler-init-styled), 1)
+CFLAGS += -DDISASM_INIT_STYLED
+endif
$(OUTPUT)%.yacc.c: $(srctree)/tools/bpf/%.y
$(QUIET_BISON)$(YACC) -o $@ -d $<
diff --git a/tools/bpf/bpf_jit_disasm.c b/tools/bpf/bpf_jit_disasm.c
index c8ae95804728..a90a5d110f92 100644
--- a/tools/bpf/bpf_jit_disasm.c
+++ b/tools/bpf/bpf_jit_disasm.c
@@ -28,6 +28,7 @@
#include <sys/types.h>
#include <sys/stat.h>
#include <limits.h>
+#include <tools/dis-asm-compat.h>
#define CMD_ACTION_SIZE_BUFFER 10
#define CMD_ACTION_READ_ALL 3
@@ -64,7 +65,9 @@ static void get_asm_insns(uint8_t *image, size_t len, int opcodes)
assert(bfdf);
assert(bfd_check_format(bfdf, bfd_object));
- init_disassemble_info(&info, stdout, (fprintf_ftype) fprintf);
+ init_disassemble_info_compat(&info, stdout,
+ (fprintf_ftype) fprintf,
+ fprintf_styled);
info.arch = bfd_get_arch(bfdf);
info.mach = bfd_get_mach(bfdf);
info.buffer = image;
diff --git a/tools/bpf/bpftool/Makefile b/tools/bpf/bpftool/Makefile
index 6b5b3a99f79d..04d733e98bff 100644
--- a/tools/bpf/bpftool/Makefile
+++ b/tools/bpf/bpftool/Makefile
@@ -93,8 +93,9 @@ INSTALL ?= install
RM ?= rm -f
FEATURE_USER = .bpftool
-FEATURE_TESTS = libbfd disassembler-four-args libcap clang-bpf-co-re
-FEATURE_DISPLAY = libbfd disassembler-four-args libcap clang-bpf-co-re
+FEATURE_TESTS = libbfd disassembler-four-args disassembler-init-styled libcap \
+ clang-bpf-co-re
+FEATURE_DISPLAY = libbfd libcap clang-bpf-co-re
check_feat := 1
NON_CHECK_FEAT_TARGETS := clean uninstall doc doc-clean doc-install doc-uninstall
@@ -115,6 +116,9 @@ endif
ifeq ($(feature-disassembler-four-args), 1)
CFLAGS += -DDISASM_FOUR_ARGS_SIGNATURE
endif
+ifeq ($(feature-disassembler-init-styled), 1)
+ CFLAGS += -DDISASM_INIT_STYLED
+endif
LIBS = $(LIBBPF) -lelf -lz
LIBS_BOOTSTRAP = $(LIBBPF_BOOTSTRAP) -lelf -lz
diff --git a/tools/bpf/bpftool/jit_disasm.c b/tools/bpf/bpftool/jit_disasm.c
index 24734f2249d6..aaf99a0168c9 100644
--- a/tools/bpf/bpftool/jit_disasm.c
+++ b/tools/bpf/bpftool/jit_disasm.c
@@ -24,6 +24,7 @@
#include <sys/stat.h>
#include <limits.h>
#include <bpf/libbpf.h>
+#include <tools/dis-asm-compat.h>
#include "json_writer.h"
#include "main.h"
@@ -39,15 +40,12 @@ static void get_exec_path(char *tpath, size_t size)
}
static int oper_count;
-static int fprintf_json(void *out, const char *fmt, ...)
+static int printf_json(void *out, const char *fmt, va_list ap)
{
- va_list ap;
char *s;
int err;
- va_start(ap, fmt);
err = vasprintf(&s, fmt, ap);
- va_end(ap);
if (err < 0)
return -1;
@@ -73,6 +71,32 @@ static int fprintf_json(void *out, const char *fmt, ...)
return 0;
}
+static int fprintf_json(void *out, const char *fmt, ...)
+{
+ va_list ap;
+ int r;
+
+ va_start(ap, fmt);
+ r = printf_json(out, fmt, ap);
+ va_end(ap);
+
+ return r;
+}
+
+static int fprintf_json_styled(void *out,
+ enum disassembler_style style __maybe_unused,
+ const char *fmt, ...)
+{
+ va_list ap;
+ int r;
+
+ va_start(ap, fmt);
+ r = printf_json(out, fmt, ap);
+ va_end(ap);
+
+ return r;
+}
+
void disasm_print_insn(unsigned char *image, ssize_t len, int opcodes,
const char *arch, const char *disassembler_options,
const struct btf *btf,
@@ -99,11 +123,13 @@ void disasm_print_insn(unsigned char *image, ssize_t len, int opcodes,
assert(bfd_check_format(bfdf, bfd_object));
if (json_output)
- init_disassemble_info(&info, stdout,
- (fprintf_ftype) fprintf_json);
+ init_disassemble_info_compat(&info, stdout,
+ (fprintf_ftype) fprintf_json,
+ fprintf_json_styled);
else
- init_disassemble_info(&info, stdout,
- (fprintf_ftype) fprintf);
+ init_disassemble_info_compat(&info, stdout,
+ (fprintf_ftype) fprintf,
+ fprintf_styled);
/* Update architecture info for offload. */
if (arch) {