diff options
author | Sandipan Das <sandipan.das@amd.com> | 2023-02-02 17:56:15 +0530 |
---|---|---|
committer | Arnaldo Carvalho de Melo <acme@redhat.com> | 2023-02-02 17:18:31 -0300 |
commit | 8eaf8ec3c09b88e35c1c3c761ac4188ee425aeb6 (patch) | |
tree | 9edf77b02bbe3fdb19d1bec268aeac6d89853e13 /tools/perf | |
parent | 6ade6c6460357a4878db24f468bbc66e3eddcd42 (diff) |
perf session: Show branch speculation info in raw dump
Show the branch speculation info if provided by the branch recording
hardware feature. This can be useful for purposes of code optimization.
E.g.
$ perf record -j any,u ./test_branch
$ perf report --dump-raw-trace
Before:
[...]
8380958377610 0x40b178 [0x1b0]: PERF_RECORD_SAMPLE(IP, 0x2): 7952/7952: 0x4f851a period: 48973 addr: 0
... branch stack: nr:16
..... 0: 00000000004b52fd -> 00000000004f82c0 0 cycles P 0
..... 1: ffffffff8220137c -> 00000000004b52f0 0 cycles M 0
..... 2: 000000000041d1c4 -> 00000000004b52f0 0 cycles P 0
..... 3: 00000000004e7ead -> 000000000041d1b0 0 cycles M 0
..... 4: 00000000004e7f91 -> 00000000004e7ead 0 cycles P 0
..... 5: 00000000004e7ea8 -> 00000000004e7f70 0 cycles P 0
..... 6: 00000000004e7e52 -> 00000000004e7e98 0 cycles M 0
..... 7: 00000000004e7e1f -> 00000000004e7e40 0 cycles M 0
..... 8: 00000000004e7f60 -> 00000000004e7df0 0 cycles P 0
..... 9: 00000000004e7f58 -> 00000000004e7f60 0 cycles M 0
..... 10: 000000000041d85d -> 00000000004e7f50 0 cycles P 0
..... 11: 000000000043306a -> 000000000041d840 0 cycles P 0
..... 12: ffffffff8220137c -> 0000000000433040 0 cycles M 0
..... 13: 000000000041e4a1 -> 0000000000433040 0 cycles P 0
..... 14: ffffffff8220137c -> 000000000041e490 0 cycles M 0
..... 15: 000000000041d89b -> 000000000041e487 0 cycles P 0
... thread: test_branch:7952
...... dso: /data/sandipan/test_branch
[...]
After:
[...]
8380958377610 0x40b178 [0x1b0]: PERF_RECORD_SAMPLE(IP, 0x2): 7952/7952: 0x4f851a period: 48973 addr: 0
... branch stack: nr:16
..... 0: 00000000004b52fd -> 00000000004f82c0 0 cycles P 0 NON_SPEC_CORRECT_PATH
..... 1: ffffffff8220137c -> 00000000004b52f0 0 cycles M 0 NON_SPEC_CORRECT_PATH
..... 2: 000000000041d1c4 -> 00000000004b52f0 0 cycles P 0 NON_SPEC_CORRECT_PATH
..... 3: 00000000004e7ead -> 000000000041d1b0 0 cycles M 0 NON_SPEC_CORRECT_PATH
..... 4: 00000000004e7f91 -> 00000000004e7ead 0 cycles P 0 NON_SPEC_CORRECT_PATH
..... 5: 00000000004e7ea8 -> 00000000004e7f70 0 cycles P 0 NON_SPEC_CORRECT_PATH
..... 6: 00000000004e7e52 -> 00000000004e7e98 0 cycles M 0 SPEC_CORRECT_PATH
..... 7: 00000000004e7e1f -> 00000000004e7e40 0 cycles M 0 NON_SPEC_CORRECT_PATH
..... 8: 00000000004e7f60 -> 00000000004e7df0 0 cycles P 0 NON_SPEC_CORRECT_PATH
..... 9: 00000000004e7f58 -> 00000000004e7f60 0 cycles M 0 NON_SPEC_CORRECT_PATH
..... 10: 000000000041d85d -> 00000000004e7f50 0 cycles P 0 NON_SPEC_CORRECT_PATH
..... 11: 000000000043306a -> 000000000041d840 0 cycles P 0 NON_SPEC_CORRECT_PATH
..... 12: ffffffff8220137c -> 0000000000433040 0 cycles M 0 NON_SPEC_CORRECT_PATH
..... 13: 000000000041e4a1 -> 0000000000433040 0 cycles P 0 NON_SPEC_CORRECT_PATH
..... 14: ffffffff8220137c -> 000000000041e490 0 cycles M 0 NON_SPEC_CORRECT_PATH
..... 15: 000000000041d89b -> 000000000041e487 0 cycles P 0 NON_SPEC_CORRECT_PATH
... thread: test_branch:7952
...... dso: /data/sandipan/test_branch
[...]
With the addition of new branch flags, the "brstacksym" fields in perf
script output now shows speculation information after the branch type.
Change the regular expressions accordingly for the test to pass. Since
branch speculation information may vary across platforms, the test does
not look for specific values.
E.g.
$ perf test -v 110
Before:
110: Check branch stack sampling :
--- start ---
test child forked, pid 54154
Testing user branch stack sampling
+ grep -E -m1 ^brstack_bench\+[^ ]*/brstack_foo\+[^ ]*/IND_CALL$ /tmp/__perf_test.program.AfhUI/perf.script
+ cleanup
+ rm -rf /tmp/__perf_test.program.AfhUI
test child finished with -1
---- end ----
Check branch stack sampling: FAILED!
After:
110: Check branch stack sampling :
--- start ---
test child forked, pid 43716
Testing user branch stack sampling
+ grep -E -m1 ^brstack_bench\+[^ ]*/brstack_foo\+[^ ]*/IND_CALL/.*$ /tmp/__perf_test.program.xgzAi/perf.script
brstack_bench+0x66/brstack_foo+0x0/P/-/-/0/IND_CALL/NON_SPEC_CORRECT_PATH
+ grep -E -m1 ^brstack_foo\+[^ ]*/brstack_bar\+[^ ]*/CALL/.*$ /tmp/__perf_test.program.xgzAi/perf.script
brstack_foo+0x1b/brstack_bar+0x0/P/-/-/0/CALL/NON_SPEC_CORRECT_PATH
+ grep -E -m1 ^brstack_bench\+[^ ]*/brstack_foo\+[^ ]*/CALL/.*$ /tmp/__perf_test.program.xgzAi/perf.script
brstack_bench+0x58/brstack_foo+0x0/P/-/-/0/CALL/NON_SPEC_CORRECT_PATH
+ grep -E -m1 ^brstack_bench\+[^ ]*/brstack_bar\+[^ ]*/CALL/.*$ /tmp/__perf_test.program.xgzAi/perf.script
brstack_bench+0x5d/brstack_bar+0x0/P/-/-/0/CALL/NON_SPEC_CORRECT_PATH
+ grep -E -m1 ^brstack_bar\+[^ ]*/brstack_foo\+[^ ]*/RET/.*$ /tmp/__perf_test.program.xgzAi/perf.script
brstack_bar+0x31/brstack_foo+0x20/P/-/-/0/RET/NON_SPEC_CORRECT_PATH
+ grep -E -m1 ^brstack_foo\+[^ ]*/brstack_bench\+[^ ]*/RET/.*$ /tmp/__perf_test.program.xgzAi/perf.script
brstack_foo+0x36/brstack_bench+0x5d/P/-/-/0/RET/NON_SPEC_CORRECT_PATH
+ grep -E -m1 ^brstack_bench\+[^ ]*/brstack_bench\+[^ ]*/COND/.*$ /tmp/__perf_test.program.xgzAi/perf.script
brstack_bench+0x76/brstack_bench+0x7d/P/-/-/0/COND/NON_SPEC_CORRECT_PATH
+ grep -E -m1 ^brstack\+[^ ]*/brstack\+[^ ]*/UNCOND/.*$ /tmp/__perf_test.program.xgzAi/perf.script
brstack+0x5a/brstack+0x41/P/-/-/0/UNCOND/NON_SPEC_CORRECT_PATH
+ set +x
Testing branch stack filtering permutation (any_call,CALL|IND_CALL|COND_CALL|SYSCALL|IRQ)
Testing branch stack filtering permutation (call,CALL|SYSCALL)
Testing branch stack filtering permutation (cond,COND)
Testing branch stack filtering permutation (any_ret,RET|COND_RET|SYSRET|ERET)
Testing branch stack filtering permutation (call,cond,CALL|SYSCALL|COND)
Testing branch stack filtering permutation (any_call,cond,CALL|IND_CALL|COND_CALL|IRQ|SYSCALL|COND)
Testing branch stack filtering permutation (cond,any_call,any_ret,COND|CALL|IND_CALL|COND_CALL|SYSCALL|IRQ|RET|COND_RET|SYSRET|ERET)
test child finished with 0
---- end ----
Check branch stack sampling: Ok
Signed-off-by: Sandipan Das <sandipan.das@amd.com>
Cc: Alexander Shishkin <alexander.shishkin@linux.intel.com>
Cc: Ananth Narayan <ananth.narayan@amd.com>
Cc: Borislav Petkov <bp@alien8.de>
Cc: Dave Hansen <dave.hansen@linux.intel.com>
Cc: Ian Rogers <irogers@google.com>
Cc: Ingo Molnar <mingo@redhat.com>
Cc: James Clark <james.clark@arm.com>
Cc: Jiri Olsa <jolsa@kernel.org>
Cc: Kajol Jain <kjain@linux.ibm.com>
Cc: Madhavan Srinivasan <maddy@linux.ibm.com>
Cc: Mark Rutland <mark.rutland@arm.com>
Cc: Namhyung Kim <namhyung@kernel.org>
Cc: Peter Zijlstra <peterz@infradead.org>
Cc: Ravi Bangoria <ravi.bangoria@amd.com>
Cc: Santosh Shukla <santosh.shukla@amd.com>
Cc: Stephane Eranian <eranian@google.com>
Cc: Thomas Gleixner <tglx@linutronix.de>
Cc: Thomas Richter <tmricht@linux.ibm.com>
Cc: x86@kernel.org
Link: https://lore.kernel.org/r/048d67c9de3cc8e3dbf19aaa7ff718dec91364c5.1675333809.git.sandipan.das@amd.com
Signed-off-by: Arnaldo Carvalho de Melo <acme@redhat.com>
Diffstat (limited to 'tools/perf')
-rwxr-xr-x | tools/perf/tests/shell/test_brstack.sh | 18 | ||||
-rw-r--r-- | tools/perf/util/session.c | 5 |
2 files changed, 12 insertions, 11 deletions
diff --git a/tools/perf/tests/shell/test_brstack.sh b/tools/perf/tests/shell/test_brstack.sh index 59195eb80052..1c49d8293003 100755 --- a/tools/perf/tests/shell/test_brstack.sh +++ b/tools/perf/tests/shell/test_brstack.sh @@ -30,14 +30,14 @@ test_user_branches() { # brstack_foo+0x14/brstack_bar+0x40/P/-/-/0/CALL set -x - grep -E -m1 "^brstack_bench\+[^ ]*/brstack_foo\+[^ ]*/IND_CALL$" $TMPDIR/perf.script - grep -E -m1 "^brstack_foo\+[^ ]*/brstack_bar\+[^ ]*/CALL$" $TMPDIR/perf.script - grep -E -m1 "^brstack_bench\+[^ ]*/brstack_foo\+[^ ]*/CALL$" $TMPDIR/perf.script - grep -E -m1 "^brstack_bench\+[^ ]*/brstack_bar\+[^ ]*/CALL$" $TMPDIR/perf.script - grep -E -m1 "^brstack_bar\+[^ ]*/brstack_foo\+[^ ]*/RET$" $TMPDIR/perf.script - grep -E -m1 "^brstack_foo\+[^ ]*/brstack_bench\+[^ ]*/RET$" $TMPDIR/perf.script - grep -E -m1 "^brstack_bench\+[^ ]*/brstack_bench\+[^ ]*/COND$" $TMPDIR/perf.script - grep -E -m1 "^brstack\+[^ ]*/brstack\+[^ ]*/UNCOND$" $TMPDIR/perf.script + grep -E -m1 "^brstack_bench\+[^ ]*/brstack_foo\+[^ ]*/IND_CALL/.*$" $TMPDIR/perf.script + grep -E -m1 "^brstack_foo\+[^ ]*/brstack_bar\+[^ ]*/CALL/.*$" $TMPDIR/perf.script + grep -E -m1 "^brstack_bench\+[^ ]*/brstack_foo\+[^ ]*/CALL/.*$" $TMPDIR/perf.script + grep -E -m1 "^brstack_bench\+[^ ]*/brstack_bar\+[^ ]*/CALL/.*$" $TMPDIR/perf.script + grep -E -m1 "^brstack_bar\+[^ ]*/brstack_foo\+[^ ]*/RET/.*$" $TMPDIR/perf.script + grep -E -m1 "^brstack_foo\+[^ ]*/brstack_bench\+[^ ]*/RET/.*$" $TMPDIR/perf.script + grep -E -m1 "^brstack_bench\+[^ ]*/brstack_bench\+[^ ]*/COND/.*$" $TMPDIR/perf.script + grep -E -m1 "^brstack\+[^ ]*/brstack\+[^ ]*/UNCOND/.*$" $TMPDIR/perf.script set +x # some branch types are still not being tested: @@ -57,7 +57,7 @@ test_filter() { # fail if we find any branch type that doesn't match any of the expected ones # also consider UNKNOWN branch types (-) - if grep -E -vm1 "^[^ ]*/($expect|-|( *))$" $TMPDIR/perf.script; then + if grep -E -vm1 "^[^ ]*/($expect|-|( *))/.*$" $TMPDIR/perf.script; then return 1 fi } diff --git a/tools/perf/util/session.c b/tools/perf/util/session.c index fdfe772f2699..749d5b5c135b 100644 --- a/tools/perf/util/session.c +++ b/tools/perf/util/session.c @@ -1180,7 +1180,7 @@ static void branch_stack__printf(struct perf_sample *sample, bool callstack) struct branch_entry *e = &entries[i]; if (!callstack) { - printf("..... %2"PRIu64": %016" PRIx64 " -> %016" PRIx64 " %hu cycles %s%s%s%s %x %s\n", + printf("..... %2"PRIu64": %016" PRIx64 " -> %016" PRIx64 " %hu cycles %s%s%s%s %x %s %s\n", i, e->from, e->to, (unsigned short)e->flags.cycles, e->flags.mispred ? "M" : " ", @@ -1188,7 +1188,8 @@ static void branch_stack__printf(struct perf_sample *sample, bool callstack) e->flags.abort ? "A" : " ", e->flags.in_tx ? "T" : " ", (unsigned)e->flags.reserved, - get_branch_type(e)); + get_branch_type(e), + e->flags.spec ? branch_spec_desc(e->flags.spec) : ""); } else { if (i == 0) { printf("..... %2"PRIu64": %016" PRIx64 "\n" |