summaryrefslogtreecommitdiff
path: root/tools/perf/util/stat-display.c
diff options
context:
space:
mode:
authorLinus Torvalds <torvalds@linux-foundation.org>2022-01-18 06:32:11 +0200
committerLinus Torvalds <torvalds@linux-foundation.org>2022-01-18 06:32:11 +0200
commit57d17378a4a042401b0c2fe211e5a0e3a276cb3d (patch)
treebe65e51a7b11cccb903b44708b98b3af47477304 /tools/perf/util/stat-display.c
parentf0033681f0fe8421baf8db125e57fa6157824c2d (diff)
parent9bce13ea88f85344b765abe5d3dabdd0f44dc177 (diff)
Merge tag 'perf-tools-for-v5.17-2022-01-16' of git://git.kernel.org/pub/scm/linux/kernel/git/acme/linux
Pull perf tool updates from Arnaldo Carvalho de Melo: "New features: - Add 'trace' subcommand for 'perf ftrace', setting the stage for more 'perf ftrace' subcommands. Not using a subcommand yields the previous behaviour of 'perf ftrace'. - Add 'latency' subcommand to 'perf ftrace', that can use the function graph tracer or a BPF optimized one, via the -b/--use-bpf option. E.g.: $ sudo perf ftrace latency -a -T mutex_lock sleep 1 # DURATION | COUNT | GRAPH | 0 - 1 us | 4596 | ######################## | 1 - 2 us | 1680 | ######### | 2 - 4 us | 1106 | ##### | 4 - 8 us | 546 | ## | 8 - 16 us | 562 | ### | 16 - 32 us | 1 | | 32 - 64 us | 0 | | 64 - 128 us | 0 | | 128 - 256 us | 0 | | 256 - 512 us | 0 | | 512 - 1024 us | 0 | | 1 - 2 ms | 0 | | 2 - 4 ms | 0 | | 4 - 8 ms | 0 | | 8 - 16 ms | 0 | | 16 - 32 ms | 0 | | 32 - 64 ms | 0 | | 64 - 128 ms | 0 | | 128 - 256 ms | 0 | | 256 - 512 ms | 0 | | 512 - 1024 ms | 0 | | 1 - ... s | 0 | | The original implementation of this command was in the bcc tool. - Support --cputype option for hybrid events in 'perf stat'. Improvements: - Call chain improvements for ARM64. - No need to do any affinity setup when profiling pids. - Reduce multiplexing with duration_time in 'perf stat' metrics. - Improve error message for uncore events, stating that some event groups are can only be used in system wide (-a) mode. - perf stat metric group leader fixes/improvements, including arch specific changes to better support Intel topdown events. - Probe non-deprecated sysfs path first, i.e. try the path /sys/devices/system/cpu/cpuN/topology/thread_siblings first, then the old /sys/devices/system/cpu/cpuN/topology/core_cpus. - Disable debuginfod by default in 'perf record', to avoid stalls on distros such as Fedora 35. - Use unbuffered output in 'perf bench' when pipe/tee'ing to a file. - Enable ignore_missing_thread in 'perf trace' Fixes: - Avoid TUI crash when navigating in the annotation of recursive functions. - Fix hex dump character output in 'perf script'. - Fix JSON indentation to 4 spaces standard in the ARM vendor event files. - Fix use after free in metric__new(). - Fix IS_ERR_OR_NULL() usage in the perf BPF loader. - Fix up cross-arch register support, i.e. when printing register names take into account the architecture where the perf.data file was collected. - Fix SMT fallback with large core counts. - Don't lower case MetricExpr when parsing JSON files so as not to lose info such as the ":G" event modifier in metrics. perf test: - Add basic stress test for sigtrap handling to 'perf test'. - Fix 'perf test' failures on s/390 - Enable system wide for metricgroups test in 'perf test´. - Use 3 digits for test numbering now we can have more tests. Arch specific: - Add events for Arm Neoverse N2 in the ARM JSON vendor event files - Support PERF_MEM_LVLNUM encodings in powerpc, that came from a single patch series, where I incorrectly merged the kernel bits, that were then reverted after coordination with Michael Ellerman and Stephen Rothwell. - Add ARM SPE total latency as PERF_SAMPLE_WEIGHT. - Update AMD documentation, with info on raw event encoding. - Add support for global and local variants of the "p_stage_cyc" sort key, applicable to perf.data files collected on powerpc. - Remove duplicate and incorrect aux size checks in the ARM CoreSight ETM code. Refactorings: - Add a perf_cpu abstraction to disambiguate CPUs and CPU map indexes, fixing problems along the way. - Document CPU map methods. UAPI sync: - Update arch/x86/lib/mem{cpy,set}_64.S copies used in 'perf bench mem memcpy' - Sync UAPI files with the kernel sources: drm, msr-index, cpufeatures. Build system - Enable warnings through HOSTCFLAGS. - Drop requirement for libstdc++.so for libopencsd check libperf: - Make libperf adopt perf_counts_values__scale() from tools/perf/util/. - Add a stat multiplexing test to libperf" * tag 'perf-tools-for-v5.17-2022-01-16' of git://git.kernel.org/pub/scm/linux/kernel/git/acme/linux: (115 commits) perf record: Disable debuginfod by default perf evlist: No need to do any affinity setup when profiling pids perf cpumap: Add is_dummy() method perf metric: Fix metric_leader perf cputopo: Fix CPU topology reading on s/390 perf metricgroup: Fix use after free in metric__new() libperf tests: Update a use of the new cpumap API perf arm: Fix off-by-one directory path tools arch x86: Sync the msr-index.h copy with the kernel sources tools headers cpufeatures: Sync with the kernel sources tools headers UAPI: Update tools's copy of drm.h header tools arch: Update arch/x86/lib/mem{cpy,set}_64.S copies used in 'perf bench mem memcpy' perf pmu-events: Don't lower case MetricExpr perf expr: Add debug logging for literals perf tools: Probe non-deprecated sysfs path 1st perf tools: Fix SMT fallback with large core counts perf cpumap: Give CPUs their own type perf stat: Correct first_shadow_cpu to return index perf script: Fix flipped index and cpu perf c2c: Use more intention revealing iterator ...
Diffstat (limited to 'tools/perf/util/stat-display.c')
-rw-r--r--tools/perf/util/stat-display.c138
1 files changed, 74 insertions, 64 deletions
diff --git a/tools/perf/util/stat-display.c b/tools/perf/util/stat-display.c
index 588601000f3f..5db83e51ceef 100644
--- a/tools/perf/util/stat-display.c
+++ b/tools/perf/util/stat-display.c
@@ -4,6 +4,7 @@
#include <linux/string.h>
#include <linux/time64.h>
#include <math.h>
+#include <perf/cpumap.h>
#include "color.h"
#include "counts.h"
#include "evlist.h"
@@ -120,11 +121,10 @@ static void aggr_printout(struct perf_stat_config *config,
id.die,
config->csv_output ? 0 : -3,
id.core, config->csv_sep);
- } else if (id.core > -1) {
+ } else if (id.cpu.cpu > -1) {
fprintf(config->output, "CPU%*d%s",
config->csv_output ? 0 : -7,
- evsel__cpus(evsel)->map[id.core],
- config->csv_sep);
+ id.cpu.cpu, config->csv_sep);
}
break;
case AGGR_THREAD:
@@ -327,26 +327,24 @@ static void print_metric_header(struct perf_stat_config *config,
fprintf(os->fh, "%*s ", config->metric_only_len, unit);
}
-static int first_shadow_cpu(struct perf_stat_config *config,
- struct evsel *evsel, struct aggr_cpu_id id)
+static int first_shadow_cpu_map_idx(struct perf_stat_config *config,
+ struct evsel *evsel, const struct aggr_cpu_id *id)
{
- struct evlist *evlist = evsel->evlist;
- int i;
+ struct perf_cpu_map *cpus = evsel__cpus(evsel);
+ struct perf_cpu cpu;
+ int idx;
if (config->aggr_mode == AGGR_NONE)
- return id.core;
+ return perf_cpu_map__idx(cpus, id->cpu);
if (!config->aggr_get_id)
return 0;
- for (i = 0; i < evsel__nr_cpus(evsel); i++) {
- int cpu2 = evsel__cpus(evsel)->map[i];
+ perf_cpu_map__for_each_cpu(cpu, idx, cpus) {
+ struct aggr_cpu_id cpu_id = config->aggr_get_id(config, cpu);
- if (cpu_map__compare_aggr_cpu_id(
- config->aggr_get_id(config, evlist->core.cpus, cpu2),
- id)) {
- return cpu2;
- }
+ if (aggr_cpu_id__equal(&cpu_id, id))
+ return idx;
}
return 0;
}
@@ -505,7 +503,7 @@ static void printout(struct perf_stat_config *config, struct aggr_cpu_id id, int
}
perf_stat__print_shadow_stats(config, counter, uval,
- first_shadow_cpu(config, counter, id),
+ first_shadow_cpu_map_idx(config, counter, &id),
&out, &config->metric_events, st);
if (!config->csv_output && !config->metric_only) {
print_noise(config, counter, noise);
@@ -516,23 +514,26 @@ static void printout(struct perf_stat_config *config, struct aggr_cpu_id id, int
static void aggr_update_shadow(struct perf_stat_config *config,
struct evlist *evlist)
{
- int cpu, s;
+ int idx, s;
+ struct perf_cpu cpu;
struct aggr_cpu_id s2, id;
u64 val;
struct evsel *counter;
+ struct perf_cpu_map *cpus;
for (s = 0; s < config->aggr_map->nr; s++) {
id = config->aggr_map->map[s];
evlist__for_each_entry(evlist, counter) {
+ cpus = evsel__cpus(counter);
val = 0;
- for (cpu = 0; cpu < evsel__nr_cpus(counter); cpu++) {
- s2 = config->aggr_get_id(config, evlist->core.cpus, cpu);
- if (!cpu_map__compare_aggr_cpu_id(s2, id))
+ perf_cpu_map__for_each_cpu(cpu, idx, cpus) {
+ s2 = config->aggr_get_id(config, cpu);
+ if (!aggr_cpu_id__equal(&s2, &id))
continue;
- val += perf_counts(counter->counts, cpu, 0)->val;
+ val += perf_counts(counter->counts, idx, 0)->val;
}
perf_stat__update_shadow_stats(counter, val,
- first_shadow_cpu(config, counter, id),
+ first_shadow_cpu_map_idx(config, counter, &id),
&rt_stat);
}
}
@@ -627,25 +628,28 @@ struct aggr_data {
u64 ena, run, val;
struct aggr_cpu_id id;
int nr;
- int cpu;
+ int cpu_map_idx;
};
static void aggr_cb(struct perf_stat_config *config,
struct evsel *counter, void *data, bool first)
{
struct aggr_data *ad = data;
- int cpu;
+ int idx;
+ struct perf_cpu cpu;
+ struct perf_cpu_map *cpus;
struct aggr_cpu_id s2;
- for (cpu = 0; cpu < evsel__nr_cpus(counter); cpu++) {
+ cpus = evsel__cpus(counter);
+ perf_cpu_map__for_each_cpu(cpu, idx, cpus) {
struct perf_counts_values *counts;
- s2 = config->aggr_get_id(config, evsel__cpus(counter), cpu);
- if (!cpu_map__compare_aggr_cpu_id(s2, ad->id))
+ s2 = config->aggr_get_id(config, cpu);
+ if (!aggr_cpu_id__equal(&s2, &ad->id))
continue;
if (first)
ad->nr++;
- counts = perf_counts(counter->counts, cpu, 0);
+ counts = perf_counts(counter->counts, idx, 0);
/*
* When any result is bad, make them all to give
* consistent output in interval mode.
@@ -665,7 +669,7 @@ static void aggr_cb(struct perf_stat_config *config,
static void print_counter_aggrdata(struct perf_stat_config *config,
struct evsel *counter, int s,
char *prefix, bool metric_only,
- bool *first, int cpu)
+ bool *first, struct perf_cpu cpu)
{
struct aggr_data ad;
FILE *output = config->output;
@@ -695,10 +699,9 @@ static void print_counter_aggrdata(struct perf_stat_config *config,
fprintf(output, "%s", prefix);
uval = val * counter->scale;
- if (cpu != -1) {
- id = cpu_map__empty_aggr_cpu_id();
- id.core = cpu;
- }
+ if (cpu.cpu != -1)
+ id = aggr_cpu_id__cpu(cpu, /*data=*/NULL);
+
printout(config, id, nr, counter, uval,
prefix, run, ena, 1.0, &rt_stat);
if (!metric_only)
@@ -731,8 +734,8 @@ static void print_aggr(struct perf_stat_config *config,
first = true;
evlist__for_each_entry(evlist, counter) {
print_counter_aggrdata(config, counter, s,
- prefix, metric_only,
- &first, -1);
+ prefix, metric_only,
+ &first, (struct perf_cpu){ .cpu = -1 });
}
if (metric_only)
fputc('\n', output);
@@ -778,7 +781,7 @@ static struct perf_aggr_thread_value *sort_aggr_thread(
continue;
buf[i].counter = counter;
- buf[i].id = cpu_map__empty_aggr_cpu_id();
+ buf[i].id = aggr_cpu_id__empty();
buf[i].id.thread = thread;
buf[i].uval = uval;
buf[i].val = val;
@@ -866,7 +869,7 @@ static void print_counter_aggr(struct perf_stat_config *config,
fprintf(output, "%s", prefix);
uval = cd.avg * counter->scale;
- printout(config, cpu_map__empty_aggr_cpu_id(), 0, counter, uval, prefix, cd.avg_running,
+ printout(config, aggr_cpu_id__empty(), 0, counter, uval, prefix, cd.avg_running,
cd.avg_enabled, cd.avg, &rt_stat);
if (!metric_only)
fprintf(output, "\n");
@@ -878,9 +881,9 @@ static void counter_cb(struct perf_stat_config *config __maybe_unused,
{
struct aggr_data *ad = data;
- ad->val += perf_counts(counter->counts, ad->cpu, 0)->val;
- ad->ena += perf_counts(counter->counts, ad->cpu, 0)->ena;
- ad->run += perf_counts(counter->counts, ad->cpu, 0)->run;
+ ad->val += perf_counts(counter->counts, ad->cpu_map_idx, 0)->val;
+ ad->ena += perf_counts(counter->counts, ad->cpu_map_idx, 0)->ena;
+ ad->run += perf_counts(counter->counts, ad->cpu_map_idx, 0)->run;
}
/*
@@ -893,11 +896,12 @@ static void print_counter(struct perf_stat_config *config,
FILE *output = config->output;
u64 ena, run, val;
double uval;
- int cpu;
+ int idx;
+ struct perf_cpu cpu;
struct aggr_cpu_id id;
- for (cpu = 0; cpu < evsel__nr_cpus(counter); cpu++) {
- struct aggr_data ad = { .cpu = cpu };
+ perf_cpu_map__for_each_cpu(cpu, idx, evsel__cpus(counter)) {
+ struct aggr_data ad = { .cpu_map_idx = idx };
if (!collect_data(config, counter, counter_cb, &ad))
return;
@@ -909,8 +913,7 @@ static void print_counter(struct perf_stat_config *config,
fprintf(output, "%s", prefix);
uval = val * counter->scale;
- id = cpu_map__empty_aggr_cpu_id();
- id.core = cpu;
+ id = aggr_cpu_id__cpu(cpu, /*data=*/NULL);
printout(config, id, 0, counter, uval, prefix,
run, ena, 1.0, &rt_stat);
@@ -922,29 +925,32 @@ static void print_no_aggr_metric(struct perf_stat_config *config,
struct evlist *evlist,
char *prefix)
{
- int cpu;
- int nrcpus = 0;
- struct evsel *counter;
- u64 ena, run, val;
- double uval;
- struct aggr_cpu_id id;
+ int all_idx;
+ struct perf_cpu cpu;
- nrcpus = evlist->core.cpus->nr;
- for (cpu = 0; cpu < nrcpus; cpu++) {
+ perf_cpu_map__for_each_cpu(cpu, all_idx, evlist->core.cpus) {
+ struct evsel *counter;
bool first = true;
if (prefix)
fputs(prefix, config->output);
evlist__for_each_entry(evlist, counter) {
- id = cpu_map__empty_aggr_cpu_id();
- id.core = cpu;
+ u64 ena, run, val;
+ double uval;
+ struct aggr_cpu_id id;
+ int counter_idx = perf_cpu_map__idx(evsel__cpus(counter), cpu);
+
+ if (counter_idx < 0)
+ continue;
+
+ id = aggr_cpu_id__cpu(cpu, /*data=*/NULL);
if (first) {
aggr_printout(config, counter, id, 0);
first = false;
}
- val = perf_counts(counter->counts, cpu, 0)->val;
- ena = perf_counts(counter->counts, cpu, 0)->ena;
- run = perf_counts(counter->counts, cpu, 0)->run;
+ val = perf_counts(counter->counts, counter_idx, 0)->val;
+ ena = perf_counts(counter->counts, counter_idx, 0)->ena;
+ run = perf_counts(counter->counts, counter_idx, 0)->run;
uval = val * counter->scale;
printout(config, id, 0, counter, uval, prefix,
@@ -1208,19 +1214,23 @@ static void print_percore_thread(struct perf_stat_config *config,
{
int s;
struct aggr_cpu_id s2, id;
+ struct perf_cpu_map *cpus;
bool first = true;
+ int idx;
+ struct perf_cpu cpu;
- for (int i = 0; i < evsel__nr_cpus(counter); i++) {
- s2 = config->aggr_get_id(config, evsel__cpus(counter), i);
+ cpus = evsel__cpus(counter);
+ perf_cpu_map__for_each_cpu(cpu, idx, cpus) {
+ s2 = config->aggr_get_id(config, cpu);
for (s = 0; s < config->aggr_map->nr; s++) {
id = config->aggr_map->map[s];
- if (cpu_map__compare_aggr_cpu_id(s2, id))
+ if (aggr_cpu_id__equal(&s2, &id))
break;
}
print_counter_aggrdata(config, counter, s,
prefix, false,
- &first, i);
+ &first, cpu);
}
}
@@ -1243,8 +1253,8 @@ static void print_percore(struct perf_stat_config *config,
fprintf(output, "%s", prefix);
print_counter_aggrdata(config, counter, s,
- prefix, metric_only,
- &first, -1);
+ prefix, metric_only,
+ &first, (struct perf_cpu){ .cpu = -1 });
}
if (metric_only)