From 0bf02a0d80427f263195c1e5a4c8ada14bd5d261 Mon Sep 17 00:00:00 2001 From: Namhyung Kim Date: Mon, 12 Oct 2020 16:02:09 +0900 Subject: perf bench: Add build-id injection benchmark Sometimes I can see that 'perf record' piped with 'perf inject' take a long time processing build-ids. So introduce a inject-build-id benchmark to the internals benchmark suite to measure its overhead regularly. It runs the 'perf inject' command internally and feeds the given number of synthesized events (MMAP2 + SAMPLE basically). Usage: perf bench internals inject-build-id -i, --iterations Number of iterations used to compute average (default: 100) -m, --nr-mmaps Number of mmap events for each iteration (default: 100) -n, --nr-samples Number of sample events per mmap event (default: 100) -v, --verbose be more verbose (show iteration count, DSO name, etc) By default, it measures average processing time of 100 MMAP2 events and 10000 SAMPLE events. Below is a result on my laptop. $ perf bench internals inject-build-id # Running 'internals/inject-build-id' benchmark: Average build-id injection took: 25.789 msec (+- 0.202 msec) Average time per event: 2.528 usec (+- 0.020 usec) Average memory usage: 8411 KB (+- 7 KB) Committer testing: $ perf bench Usage: perf bench [] [] # List of all available benchmark collections: sched: Scheduler and IPC benchmarks syscall: System call benchmarks mem: Memory access benchmarks numa: NUMA scheduling and MM benchmarks futex: Futex stressing benchmarks epoll: Epoll stressing benchmarks internals: Perf-internals benchmarks all: All benchmarks $ perf bench internals # List of available benchmarks for collection 'internals': synthesize: Benchmark perf event synthesis kallsyms-parse: Benchmark kallsyms parsing inject-build-id: Benchmark build-id injection $ perf bench internals inject-build-id # Running 'internals/inject-build-id' benchmark: Average build-id injection took: 14.202 msec (+- 0.059 msec) Average time per event: 1.392 usec (+- 0.006 usec) Average memory usage: 12650 KB (+- 10 KB) Average build-id-all injection took: 12.831 msec (+- 0.071 msec) Average time per event: 1.258 usec (+- 0.007 usec) Average memory usage: 11895 KB (+- 10 KB) $ $ perf stat -r5 perf bench internals inject-build-id # Running 'internals/inject-build-id' benchmark: Average build-id injection took: 14.380 msec (+- 0.056 msec) Average time per event: 1.410 usec (+- 0.006 usec) Average memory usage: 12608 KB (+- 11 KB) Average build-id-all injection took: 11.889 msec (+- 0.064 msec) Average time per event: 1.166 usec (+- 0.006 usec) Average memory usage: 11838 KB (+- 10 KB) # Running 'internals/inject-build-id' benchmark: Average build-id injection took: 14.246 msec (+- 0.065 msec) Average time per event: 1.397 usec (+- 0.006 usec) Average memory usage: 12744 KB (+- 10 KB) Average build-id-all injection took: 12.019 msec (+- 0.066 msec) Average time per event: 1.178 usec (+- 0.006 usec) Average memory usage: 11963 KB (+- 10 KB) # Running 'internals/inject-build-id' benchmark: Average build-id injection took: 14.321 msec (+- 0.067 msec) Average time per event: 1.404 usec (+- 0.007 usec) Average memory usage: 12690 KB (+- 10 KB) Average build-id-all injection took: 11.909 msec (+- 0.041 msec) Average time per event: 1.168 usec (+- 0.004 usec) Average memory usage: 11938 KB (+- 10 KB) # Running 'internals/inject-build-id' benchmark: Average build-id injection took: 14.287 msec (+- 0.059 msec) Average time per event: 1.401 usec (+- 0.006 usec) Average memory usage: 12864 KB (+- 10 KB) Average build-id-all injection took: 11.862 msec (+- 0.058 msec) Average time per event: 1.163 usec (+- 0.006 usec) Average memory usage: 12103 KB (+- 10 KB) # Running 'internals/inject-build-id' benchmark: Average build-id injection took: 14.402 msec (+- 0.053 msec) Average time per event: 1.412 usec (+- 0.005 usec) Average memory usage: 12876 KB (+- 10 KB) Average build-id-all injection took: 11.826 msec (+- 0.061 msec) Average time per event: 1.159 usec (+- 0.006 usec) Average memory usage: 12111 KB (+- 10 KB) Performance counter stats for 'perf bench internals inject-build-id' (5 runs): 4,267.48 msec task-clock:u # 1.502 CPUs utilized ( +- 0.14% ) 0 context-switches:u # 0.000 K/sec 0 cpu-migrations:u # 0.000 K/sec 102,092 page-faults:u # 0.024 M/sec ( +- 0.08% ) 3,894,589,578 cycles:u # 0.913 GHz ( +- 0.19% ) (83.49%) 140,078,421 stalled-cycles-frontend:u # 3.60% frontend cycles idle ( +- 0.77% ) (83.34%) 948,581,189 stalled-cycles-backend:u # 24.36% backend cycles idle ( +- 0.46% ) (83.25%) 5,835,587,719 instructions:u # 1.50 insn per cycle # 0.16 stalled cycles per insn ( +- 0.21% ) (83.24%) 1,267,423,636 branches:u # 296.996 M/sec ( +- 0.22% ) (83.12%) 17,484,290 branch-misses:u # 1.38% of all branches ( +- 0.12% ) (83.55%) 2.84176 +- 0.00222 seconds time elapsed ( +- 0.08% ) $ Acked-by: Jiri Olsa Tested-by: Arnaldo Carvalho de Melo Signed-off-by: Namhyung Kim Link: https://lore.kernel.org/r/20201012070214.2074921-2-namhyung@kernel.org Signed-off-by: Arnaldo Carvalho de Melo --- tools/perf/bench/Build | 1 + tools/perf/bench/bench.h | 1 + tools/perf/bench/inject-buildid.c | 460 ++++++++++++++++++++++++++++++++++++++ 3 files changed, 462 insertions(+) create mode 100644 tools/perf/bench/inject-buildid.c (limited to 'tools/perf/bench') diff --git a/tools/perf/bench/Build b/tools/perf/bench/Build index dd68a40a790c..8b52591338d6 100644 --- a/tools/perf/bench/Build +++ b/tools/perf/bench/Build @@ -12,6 +12,7 @@ perf-y += epoll-ctl.o perf-y += synthesize.o perf-y += kallsyms-parse.o perf-y += find-bit-bench.o +perf-y += inject-buildid.o perf-$(CONFIG_X86_64) += mem-memcpy-x86-64-lib.o perf-$(CONFIG_X86_64) += mem-memcpy-x86-64-asm.o diff --git a/tools/perf/bench/bench.h b/tools/perf/bench/bench.h index 2804812d4154..eac36afab2b3 100644 --- a/tools/perf/bench/bench.h +++ b/tools/perf/bench/bench.h @@ -47,6 +47,7 @@ int bench_epoll_wait(int argc, const char **argv); int bench_epoll_ctl(int argc, const char **argv); int bench_synthesize(int argc, const char **argv); int bench_kallsyms_parse(int argc, const char **argv); +int bench_inject_build_id(int argc, const char **argv); #define BENCH_FORMAT_DEFAULT_STR "default" #define BENCH_FORMAT_DEFAULT 0 diff --git a/tools/perf/bench/inject-buildid.c b/tools/perf/bench/inject-buildid.c new file mode 100644 index 000000000000..0fccf2a9e95b --- /dev/null +++ b/tools/perf/bench/inject-buildid.c @@ -0,0 +1,460 @@ +// SPDX-License-Identifier: GPL-2.0 +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include + +#include "bench.h" +#include "util/data.h" +#include "util/stat.h" +#include "util/debug.h" +#include "util/event.h" +#include "util/symbol.h" +#include "util/session.h" +#include "util/build-id.h" +#include "util/synthetic-events.h" + +#define MMAP_DEV_MAJOR 8 +#define DSO_MMAP_RATIO 4 + +static unsigned int iterations = 100; +static unsigned int nr_mmaps = 100; +static unsigned int nr_samples = 100; /* samples per mmap */ + +static u64 bench_sample_type; +static u16 bench_id_hdr_size; + +struct bench_data { + int pid; + int input_pipe[2]; + int output_pipe[2]; + pthread_t th; +}; + +struct bench_dso { + struct list_head list; + char *name; + int ino; +}; + +static int nr_dsos; +static struct bench_dso *dsos; + +extern int cmd_inject(int argc, const char *argv[]); + +static const struct option options[] = { + OPT_UINTEGER('i', "iterations", &iterations, + "Number of iterations used to compute average (default: 100)"), + OPT_UINTEGER('m', "nr-mmaps", &nr_mmaps, + "Number of mmap events for each iteration (default: 100)"), + OPT_UINTEGER('n', "nr-samples", &nr_samples, + "Number of sample events per mmap event (default: 100)"), + OPT_INCR('v', "verbose", &verbose, + "be more verbose (show iteration count, DSO name, etc)"), + OPT_END() +}; + +static const char *const bench_usage[] = { + "perf bench internals inject-build-id ", + NULL +}; + +/* + * Helper for collect_dso that adds the given file as a dso to dso_list + * if it contains a build-id. Stops after collecting 4 times more than + * we need (for MMAP2 events). + */ +static int add_dso(const char *fpath, const struct stat *sb __maybe_unused, + int typeflag, struct FTW *ftwbuf __maybe_unused) +{ + struct bench_dso *dso = &dsos[nr_dsos]; + unsigned char build_id[BUILD_ID_SIZE]; + + if (typeflag == FTW_D || typeflag == FTW_SL) + return 0; + + if (filename__read_build_id(fpath, build_id, BUILD_ID_SIZE) < 0) + return 0; + + dso->name = realpath(fpath, NULL); + if (dso->name == NULL) + return -1; + + dso->ino = nr_dsos++; + pr_debug2(" Adding DSO: %s\n", fpath); + + /* stop if we collected enough DSOs */ + if ((unsigned int)nr_dsos == DSO_MMAP_RATIO * nr_mmaps) + return 1; + + return 0; +} + +static void collect_dso(void) +{ + dsos = calloc(nr_mmaps * DSO_MMAP_RATIO, sizeof(*dsos)); + if (dsos == NULL) { + printf(" Memory allocation failed\n"); + exit(1); + } + + if (nftw("/usr/lib/", add_dso, 10, FTW_PHYS) < 0) + return; + + pr_debug(" Collected %d DSOs\n", nr_dsos); +} + +static void release_dso(void) +{ + int i; + + for (i = 0; i < nr_dsos; i++) { + struct bench_dso *dso = &dsos[i]; + + free(dso->name); + } + free(dsos); +} + +/* Fake address used by mmap and sample events */ +static u64 dso_map_addr(struct bench_dso *dso) +{ + return 0x400000ULL + dso->ino * 8192ULL; +} + +static u32 synthesize_attr(struct bench_data *data) +{ + union perf_event event; + + memset(&event, 0, sizeof(event.attr) + sizeof(u64)); + + event.header.type = PERF_RECORD_HEADER_ATTR; + event.header.size = sizeof(event.attr) + sizeof(u64); + + event.attr.attr.type = PERF_TYPE_SOFTWARE; + event.attr.attr.config = PERF_COUNT_SW_TASK_CLOCK; + event.attr.attr.exclude_kernel = 1; + event.attr.attr.sample_id_all = 1; + event.attr.attr.sample_type = bench_sample_type; + + return writen(data->input_pipe[1], &event, event.header.size); +} + +static u32 synthesize_fork(struct bench_data *data) +{ + union perf_event event; + + memset(&event, 0, sizeof(event.fork) + bench_id_hdr_size); + + event.header.type = PERF_RECORD_FORK; + event.header.misc = PERF_RECORD_MISC_FORK_EXEC; + event.header.size = sizeof(event.fork) + bench_id_hdr_size; + + event.fork.ppid = 1; + event.fork.ptid = 1; + event.fork.pid = data->pid; + event.fork.tid = data->pid; + + return writen(data->input_pipe[1], &event, event.header.size); +} + +static u32 synthesize_mmap(struct bench_data *data, struct bench_dso *dso, + u64 timestamp) +{ + union perf_event event; + size_t len = offsetof(struct perf_record_mmap2, filename); + u64 *id_hdr_ptr = (void *)&event; + int ts_idx; + + len += roundup(strlen(dso->name) + 1, 8) + bench_id_hdr_size; + + memset(&event, 0, min(len, sizeof(event.mmap2))); + + event.header.type = PERF_RECORD_MMAP2; + event.header.misc = PERF_RECORD_MISC_USER; + event.header.size = len; + + event.mmap2.pid = data->pid; + event.mmap2.tid = data->pid; + event.mmap2.maj = MMAP_DEV_MAJOR; + event.mmap2.ino = dso->ino; + + strcpy(event.mmap2.filename, dso->name); + + event.mmap2.start = dso_map_addr(dso); + event.mmap2.len = 4096; + event.mmap2.prot = PROT_EXEC; + + if (len > sizeof(event.mmap2)) { + /* write mmap2 event first */ + writen(data->input_pipe[1], &event, len - bench_id_hdr_size); + /* zero-fill sample id header */ + memset(id_hdr_ptr, 0, bench_id_hdr_size); + /* put timestamp in the right position */ + ts_idx = (bench_id_hdr_size / sizeof(u64)) - 2; + id_hdr_ptr[ts_idx] = timestamp; + writen(data->input_pipe[1], id_hdr_ptr, bench_id_hdr_size); + } else { + ts_idx = (len / sizeof(u64)) - 2; + id_hdr_ptr[ts_idx] = timestamp; + writen(data->input_pipe[1], &event, len); + } + return len; +} + +static u32 synthesize_sample(struct bench_data *data, struct bench_dso *dso, + u64 timestamp) +{ + union perf_event event; + struct perf_sample sample = { + .tid = data->pid, + .pid = data->pid, + .ip = dso_map_addr(dso), + .time = timestamp, + }; + + event.header.type = PERF_RECORD_SAMPLE; + event.header.misc = PERF_RECORD_MISC_USER; + event.header.size = perf_event__sample_event_size(&sample, bench_sample_type, 0); + + perf_event__synthesize_sample(&event, bench_sample_type, 0, &sample); + + return writen(data->input_pipe[1], &event, event.header.size); +} + +static u32 synthesize_flush(struct bench_data *data) +{ + struct perf_event_header header = { + .size = sizeof(header), + .type = PERF_RECORD_FINISHED_ROUND, + }; + + return writen(data->input_pipe[1], &header, header.size); +} + +static void *data_reader(void *arg) +{ + struct bench_data *data = arg; + char buf[8192]; + int flag; + int n; + + flag = fcntl(data->output_pipe[0], F_GETFL); + fcntl(data->output_pipe[0], F_SETFL, flag | O_NONBLOCK); + + /* read out data from child */ + while (true) { + n = read(data->output_pipe[0], buf, sizeof(buf)); + if (n > 0) + continue; + if (n == 0) + break; + + if (errno != EINTR && errno != EAGAIN) + break; + + usleep(100); + } + + close(data->output_pipe[0]); + return NULL; +} + +static int setup_injection(struct bench_data *data) +{ + int ready_pipe[2]; + int dev_null_fd; + char buf; + + if (pipe(ready_pipe) < 0) + return -1; + + if (pipe(data->input_pipe) < 0) + return -1; + + if (pipe(data->output_pipe) < 0) + return -1; + + data->pid = fork(); + if (data->pid < 0) + return -1; + + if (data->pid == 0) { + const char **inject_argv; + + close(data->input_pipe[1]); + close(data->output_pipe[0]); + close(ready_pipe[0]); + + dup2(data->input_pipe[0], STDIN_FILENO); + close(data->input_pipe[0]); + dup2(data->output_pipe[1], STDOUT_FILENO); + close(data->output_pipe[1]); + + dev_null_fd = open("/dev/null", O_WRONLY); + if (dev_null_fd < 0) + exit(1); + + dup2(dev_null_fd, STDERR_FILENO); + + inject_argv = calloc(3, sizeof(*inject_argv)); + if (inject_argv == NULL) + exit(1); + + inject_argv[0] = strdup("inject"); + inject_argv[1] = strdup("-b"); + + /* signal that we're ready to go */ + close(ready_pipe[1]); + + cmd_inject(2, inject_argv); + + exit(0); + } + + pthread_create(&data->th, NULL, data_reader, data); + + close(ready_pipe[1]); + close(data->input_pipe[0]); + close(data->output_pipe[1]); + + /* wait for child ready */ + if (read(ready_pipe[0], &buf, 1) < 0) + return -1; + close(ready_pipe[0]); + + return 0; +} + +static int inject_build_id(struct bench_data *data, u64 *max_rss) +{ + int status; + unsigned int i, k; + struct rusage rusage; + u64 len = 0; + + /* this makes the child to run */ + if (perf_header__write_pipe(data->input_pipe[1]) < 0) + return -1; + + len += synthesize_attr(data); + len += synthesize_fork(data); + + for (i = 0; i < nr_mmaps; i++) { + int idx = rand() % (nr_dsos - 1); + struct bench_dso *dso = &dsos[idx]; + u64 timestamp = rand() % 1000000; + + pr_debug2(" [%d] injecting: %s\n", i+1, dso->name); + len += synthesize_mmap(data, dso, timestamp); + + for (k = 0; k < nr_samples; k++) + len += synthesize_sample(data, dso, timestamp + k * 1000); + + if ((i + 1) % 10 == 0) + len += synthesize_flush(data); + } + + /* tihs makes the child to finish */ + close(data->input_pipe[1]); + + wait4(data->pid, &status, 0, &rusage); + *max_rss = rusage.ru_maxrss; + + pr_debug(" Child %d exited with %d\n", data->pid, status); + + return 0; +} + +static int do_inject_loop(struct bench_data *data) +{ + unsigned int i; + struct stats time_stats, mem_stats; + double time_average, time_stddev; + double mem_average, mem_stddev; + + srand(time(NULL)); + init_stats(&time_stats); + init_stats(&mem_stats); + symbol__init(NULL); + + bench_sample_type = PERF_SAMPLE_IDENTIFIER | PERF_SAMPLE_IP; + bench_sample_type |= PERF_SAMPLE_TID | PERF_SAMPLE_TIME; + bench_id_hdr_size = 32; + + collect_dso(); + if (nr_dsos == 0) { + printf(" Cannot collect DSOs for injection\n"); + return -1; + } + + for (i = 0; i < iterations; i++) { + struct timeval start, end, diff; + u64 runtime_us, max_rss; + + pr_debug(" Iteration #%d\n", i+1); + + if (setup_injection(data) < 0) { + printf(" Build-id injection setup failed\n"); + break; + } + + gettimeofday(&start, NULL); + if (inject_build_id(data, &max_rss) < 0) { + printf(" Build-id injection failed\n"); + break; + } + + gettimeofday(&end, NULL); + timersub(&end, &start, &diff); + runtime_us = diff.tv_sec * USEC_PER_SEC + diff.tv_usec; + update_stats(&time_stats, runtime_us); + update_stats(&mem_stats, max_rss); + + pthread_join(data->th, NULL); + } + + time_average = avg_stats(&time_stats) / USEC_PER_MSEC; + time_stddev = stddev_stats(&time_stats) / USEC_PER_MSEC; + printf(" Average build-id injection took: %.3f msec (+- %.3f msec)\n", + time_average, time_stddev); + + /* each iteration, it processes MMAP2 + BUILD_ID + nr_samples * SAMPLE */ + time_average = avg_stats(&time_stats) / (nr_mmaps * (nr_samples + 2)); + time_stddev = stddev_stats(&time_stats) / (nr_mmaps * (nr_samples + 2)); + printf(" Average time per event: %.3f usec (+- %.3f usec)\n", + time_average, time_stddev); + + mem_average = avg_stats(&mem_stats); + mem_stddev = stddev_stats(&mem_stats); + printf(" Average memory usage: %.0f KB (+- %.0f KB)\n", + mem_average, mem_stddev); + + release_dso(); + return 0; +} + +int bench_inject_build_id(int argc, const char **argv) +{ + struct bench_data data; + + argc = parse_options(argc, argv, options, bench_usage, 0); + if (argc) { + usage_with_options(bench_usage, options); + exit(EXIT_FAILURE); + } + + return do_inject_loop(&data); +} + -- cgit v1.2.3-70-g09d2 From bf7ef5ddb0b327099404d3e2af4b3b142c702813 Mon Sep 17 00:00:00 2001 From: Namhyung Kim Date: Mon, 12 Oct 2020 16:02:14 +0900 Subject: perf bench: Run inject-build-id with --buildid-all option too For comparison, it now runs the benchmark twice - one if regular -b and another for --buildid-all. $ perf bench internals inject-build-id # Running 'internals/inject-build-id' benchmark: Average build-id injection took: 21.002 msec (+- 0.172 msec) Average time per event: 2.059 usec (+- 0.017 usec) Average memory usage: 8169 KB (+- 0 KB) Average build-id-all injection took: 19.543 msec (+- 0.124 msec) Average time per event: 1.916 usec (+- 0.012 usec) Average memory usage: 7348 KB (+- 0 KB) Signed-off-by: Namhyung Kim Tested-by: Arnaldo Carvalho de Melo Acked-by: Ian Rogers Acked-by: Jiri Olsa Link: https://lore.kernel.org/r/20201012070214.2074921-7-namhyung@kernel.org Signed-off-by: Arnaldo Carvalho de Melo --- tools/perf/bench/inject-buildid.c | 54 +++++++++++++++++++++++++-------------- 1 file changed, 35 insertions(+), 19 deletions(-) (limited to 'tools/perf/bench') diff --git a/tools/perf/bench/inject-buildid.c b/tools/perf/bench/inject-buildid.c index 0fccf2a9e95b..e9a11f4a1109 100644 --- a/tools/perf/bench/inject-buildid.c +++ b/tools/perf/bench/inject-buildid.c @@ -271,7 +271,7 @@ static void *data_reader(void *arg) return NULL; } -static int setup_injection(struct bench_data *data) +static int setup_injection(struct bench_data *data, bool build_id_all) { int ready_pipe[2]; int dev_null_fd; @@ -292,6 +292,7 @@ static int setup_injection(struct bench_data *data) if (data->pid == 0) { const char **inject_argv; + int inject_argc = 2; close(data->input_pipe[1]); close(data->output_pipe[0]); @@ -308,17 +309,22 @@ static int setup_injection(struct bench_data *data) dup2(dev_null_fd, STDERR_FILENO); - inject_argv = calloc(3, sizeof(*inject_argv)); + if (build_id_all) + inject_argc++; + + inject_argv = calloc(inject_argc + 1, sizeof(*inject_argv)); if (inject_argv == NULL) exit(1); inject_argv[0] = strdup("inject"); inject_argv[1] = strdup("-b"); + if (build_id_all) + inject_argv[2] = strdup("--buildid-all"); /* signal that we're ready to go */ close(ready_pipe[1]); - cmd_inject(2, inject_argv); + cmd_inject(inject_argc, inject_argv); exit(0); } @@ -377,27 +383,17 @@ static int inject_build_id(struct bench_data *data, u64 *max_rss) return 0; } -static int do_inject_loop(struct bench_data *data) +static void do_inject_loop(struct bench_data *data, bool build_id_all) { unsigned int i; struct stats time_stats, mem_stats; double time_average, time_stddev; double mem_average, mem_stddev; - srand(time(NULL)); init_stats(&time_stats); init_stats(&mem_stats); - symbol__init(NULL); - bench_sample_type = PERF_SAMPLE_IDENTIFIER | PERF_SAMPLE_IP; - bench_sample_type |= PERF_SAMPLE_TID | PERF_SAMPLE_TIME; - bench_id_hdr_size = 32; - - collect_dso(); - if (nr_dsos == 0) { - printf(" Cannot collect DSOs for injection\n"); - return -1; - } + pr_debug(" Build-id%s injection benchmark\n", build_id_all ? "-all" : ""); for (i = 0; i < iterations; i++) { struct timeval start, end, diff; @@ -405,7 +401,7 @@ static int do_inject_loop(struct bench_data *data) pr_debug(" Iteration #%d\n", i+1); - if (setup_injection(data) < 0) { + if (setup_injection(data, build_id_all) < 0) { printf(" Build-id injection setup failed\n"); break; } @@ -427,8 +423,8 @@ static int do_inject_loop(struct bench_data *data) time_average = avg_stats(&time_stats) / USEC_PER_MSEC; time_stddev = stddev_stats(&time_stats) / USEC_PER_MSEC; - printf(" Average build-id injection took: %.3f msec (+- %.3f msec)\n", - time_average, time_stddev); + printf(" Average build-id%s injection took: %.3f msec (+- %.3f msec)\n", + build_id_all ? "-all" : "", time_average, time_stddev); /* each iteration, it processes MMAP2 + BUILD_ID + nr_samples * SAMPLE */ time_average = avg_stats(&time_stats) / (nr_mmaps * (nr_samples + 2)); @@ -440,6 +436,26 @@ static int do_inject_loop(struct bench_data *data) mem_stddev = stddev_stats(&mem_stats); printf(" Average memory usage: %.0f KB (+- %.0f KB)\n", mem_average, mem_stddev); +} + +static int do_inject_loops(struct bench_data *data) +{ + + srand(time(NULL)); + symbol__init(NULL); + + bench_sample_type = PERF_SAMPLE_IDENTIFIER | PERF_SAMPLE_IP; + bench_sample_type |= PERF_SAMPLE_TID | PERF_SAMPLE_TIME; + bench_id_hdr_size = 32; + + collect_dso(); + if (nr_dsos == 0) { + printf(" Cannot collect DSOs for injection\n"); + return -1; + } + + do_inject_loop(data, false); + do_inject_loop(data, true); release_dso(); return 0; @@ -455,6 +471,6 @@ int bench_inject_build_id(int argc, const char **argv) exit(EXIT_FAILURE); } - return do_inject_loop(&data); + return do_inject_loops(&data); } -- cgit v1.2.3-70-g09d2 From 0aba7f036a56675b7fdc536757b4c49fc76a2208 Mon Sep 17 00:00:00 2001 From: Jiri Olsa Date: Tue, 13 Oct 2020 21:24:33 +0200 Subject: perf tools: Use build_id object in dso Replace build_id byte array with struct build_id object and all the code that references it. The objective is to carry size together with build id array, so it's better to keep both together. This is preparatory change for following patches, and there's no functional change. Signed-off-by: Jiri Olsa Acked-by: Ian Rogers Link: https://lore.kernel.org/r/20201013192441.1299447-2-jolsa@kernel.org Signed-off-by: Arnaldo Carvalho de Melo --- tools/perf/bench/inject-buildid.c | 4 ++-- tools/perf/builtin-buildid-cache.c | 2 +- tools/perf/builtin-inject.c | 4 ++-- tools/perf/util/annotate.c | 4 ++-- tools/perf/util/build-id.c | 6 +++--- tools/perf/util/build-id.h | 5 +++++ tools/perf/util/dso.c | 18 +++++++++--------- tools/perf/util/dso.h | 2 +- tools/perf/util/dsos.c | 4 ++-- tools/perf/util/header.c | 2 +- tools/perf/util/map.c | 4 ++-- tools/perf/util/probe-event.c | 2 +- tools/perf/util/scripting-engines/trace-event-python.c | 2 +- tools/perf/util/symbol.c | 2 +- tools/perf/util/synthetic-events.c | 2 +- 15 files changed, 34 insertions(+), 29 deletions(-) (limited to 'tools/perf/bench') diff --git a/tools/perf/bench/inject-buildid.c b/tools/perf/bench/inject-buildid.c index e9a11f4a1109..3d048a8139a7 100644 --- a/tools/perf/bench/inject-buildid.c +++ b/tools/perf/bench/inject-buildid.c @@ -79,12 +79,12 @@ static int add_dso(const char *fpath, const struct stat *sb __maybe_unused, int typeflag, struct FTW *ftwbuf __maybe_unused) { struct bench_dso *dso = &dsos[nr_dsos]; - unsigned char build_id[BUILD_ID_SIZE]; + struct build_id bid; if (typeflag == FTW_D || typeflag == FTW_SL) return 0; - if (filename__read_build_id(fpath, build_id, BUILD_ID_SIZE) < 0) + if (filename__read_build_id(fpath, bid.data, sizeof(bid.data)) < 0) return 0; dso->name = realpath(fpath, NULL); diff --git a/tools/perf/builtin-buildid-cache.c b/tools/perf/builtin-buildid-cache.c index 39efa51d7fb3..a523c629f321 100644 --- a/tools/perf/builtin-buildid-cache.c +++ b/tools/perf/builtin-buildid-cache.c @@ -284,7 +284,7 @@ static bool dso__missing_buildid_cache(struct dso *dso, int parm __maybe_unused) pr_warning("Problems with %s file, consider removing it from the cache\n", filename); - } else if (memcmp(dso->build_id, build_id, sizeof(dso->build_id))) { + } else if (memcmp(dso->bid.data, build_id, sizeof(dso->bid.data))) { pr_warning("Problems with %s file, consider removing it from the cache\n", filename); } diff --git a/tools/perf/builtin-inject.c b/tools/perf/builtin-inject.c index f3f965157d69..a35cdabe5bd4 100644 --- a/tools/perf/builtin-inject.c +++ b/tools/perf/builtin-inject.c @@ -522,8 +522,8 @@ static int dso__read_build_id(struct dso *dso) return 0; nsinfo__mountns_enter(dso->nsinfo, &nsc); - if (filename__read_build_id(dso->long_name, dso->build_id, - sizeof(dso->build_id)) > 0) { + if (filename__read_build_id(dso->long_name, dso->bid.data, + sizeof(dso->bid.data)) > 0) { dso->has_build_id = true; } nsinfo__mountns_exit(&nsc); diff --git a/tools/perf/util/annotate.c b/tools/perf/util/annotate.c index fc17af7ba845..a016e1bd7b8d 100644 --- a/tools/perf/util/annotate.c +++ b/tools/perf/util/annotate.c @@ -1578,8 +1578,8 @@ int symbol__strerror_disassemble(struct map_symbol *ms, int errnum, char *buf, s char *build_id_msg = NULL; if (dso->has_build_id) { - build_id__sprintf(dso->build_id, - sizeof(dso->build_id), bf + 15); + build_id__sprintf(dso->bid.data, + sizeof(dso->bid.data), bf + 15); build_id_msg = bf; } scnprintf(buf, buflen, diff --git a/tools/perf/util/build-id.c b/tools/perf/util/build-id.c index 31207b6e2066..7da13ddb0d50 100644 --- a/tools/perf/util/build-id.c +++ b/tools/perf/util/build-id.c @@ -272,7 +272,7 @@ char *dso__build_id_filename(const struct dso *dso, char *bf, size_t size, if (!dso->has_build_id) return NULL; - build_id__sprintf(dso->build_id, sizeof(dso->build_id), sbuild_id); + build_id__sprintf(dso->bid.data, sizeof(dso->bid.data), sbuild_id); linkname = build_id_cache__linkname(sbuild_id, NULL, 0); if (!linkname) return NULL; @@ -355,7 +355,7 @@ static int machine__write_buildid_table(struct machine *machine, in_kernel = pos->kernel || is_kernel_module(name, PERF_RECORD_MISC_CPUMODE_UNKNOWN); - err = write_buildid(name, name_len, pos->build_id, machine->pid, + err = write_buildid(name, name_len, pos->bid.data, machine->pid, in_kernel ? kmisc : umisc, fd); if (err) break; @@ -841,7 +841,7 @@ static int dso__cache_build_id(struct dso *dso, struct machine *machine) is_kallsyms = true; name = machine->mmap_name; } - return build_id_cache__add_b(dso->build_id, sizeof(dso->build_id), name, + return build_id_cache__add_b(dso->bid.data, sizeof(dso->bid.data), name, dso->nsinfo, is_kallsyms, is_vdso); } diff --git a/tools/perf/util/build-id.h b/tools/perf/util/build-id.h index 949f7e54c9cb..5033e96d5c9d 100644 --- a/tools/perf/util/build-id.h +++ b/tools/perf/util/build-id.h @@ -8,6 +8,11 @@ #include "tool.h" #include +struct build_id { + u8 data[BUILD_ID_SIZE]; + size_t size; +}; + struct nsinfo; extern struct perf_tool build_id__mark_dso_hit_ops; diff --git a/tools/perf/util/dso.c b/tools/perf/util/dso.c index 5a3b4755f0b3..d2c1ed08c879 100644 --- a/tools/perf/util/dso.c +++ b/tools/perf/util/dso.c @@ -172,8 +172,8 @@ int dso__read_binary_type_filename(const struct dso *dso, break; } - build_id__sprintf(dso->build_id, - sizeof(dso->build_id), + build_id__sprintf(dso->bid.data, + sizeof(dso->bid.data), build_id_hex); len = __symbol__join_symfs(filename, size, "/usr/lib/debug/.build-id/"); snprintf(filename + len, size - len, "%.2s/%s.debug", @@ -1330,13 +1330,13 @@ void dso__put(struct dso *dso) void dso__set_build_id(struct dso *dso, void *build_id) { - memcpy(dso->build_id, build_id, sizeof(dso->build_id)); + memcpy(dso->bid.data, build_id, sizeof(dso->bid.data)); dso->has_build_id = 1; } bool dso__build_id_equal(const struct dso *dso, u8 *build_id) { - return memcmp(dso->build_id, build_id, sizeof(dso->build_id)) == 0; + return memcmp(dso->bid.data, build_id, sizeof(dso->bid.data)) == 0; } void dso__read_running_kernel_build_id(struct dso *dso, struct machine *machine) @@ -1346,8 +1346,8 @@ void dso__read_running_kernel_build_id(struct dso *dso, struct machine *machine) if (machine__is_default_guest(machine)) return; sprintf(path, "%s/sys/kernel/notes", machine->root_dir); - if (sysfs__read_build_id(path, dso->build_id, - sizeof(dso->build_id)) == 0) + if (sysfs__read_build_id(path, dso->bid.data, + sizeof(dso->bid.data)) == 0) dso->has_build_id = true; } @@ -1365,8 +1365,8 @@ int dso__kernel_module_get_build_id(struct dso *dso, "%s/sys/module/%.*s/notes/.note.gnu.build-id", root_dir, (int)strlen(name) - 1, name); - if (sysfs__read_build_id(filename, dso->build_id, - sizeof(dso->build_id)) == 0) + if (sysfs__read_build_id(filename, dso->bid.data, + sizeof(dso->bid.data)) == 0) dso->has_build_id = true; return 0; @@ -1376,7 +1376,7 @@ size_t dso__fprintf_buildid(struct dso *dso, FILE *fp) { char sbuild_id[SBUILD_ID_SIZE]; - build_id__sprintf(dso->build_id, sizeof(dso->build_id), sbuild_id); + build_id__sprintf(dso->bid.data, sizeof(dso->bid.data), sbuild_id); return fprintf(fp, "%s", sbuild_id); } diff --git a/tools/perf/util/dso.h b/tools/perf/util/dso.h index 8ad17f395a19..eac004210b47 100644 --- a/tools/perf/util/dso.h +++ b/tools/perf/util/dso.h @@ -176,7 +176,7 @@ struct dso { bool sorted_by_name; bool loaded; u8 rel; - u8 build_id[BUILD_ID_SIZE]; + struct build_id bid; u64 text_offset; const char *short_name; const char *long_name; diff --git a/tools/perf/util/dsos.c b/tools/perf/util/dsos.c index 939471731ea6..e3af46e818f1 100644 --- a/tools/perf/util/dsos.c +++ b/tools/perf/util/dsos.c @@ -73,8 +73,8 @@ bool __dsos__read_build_ids(struct list_head *head, bool with_hits) continue; } nsinfo__mountns_enter(pos->nsinfo, &nsc); - if (filename__read_build_id(pos->long_name, pos->build_id, - sizeof(pos->build_id)) > 0) { + if (filename__read_build_id(pos->long_name, pos->bid.data, + sizeof(pos->bid.data)) > 0) { have_build_id = true; pos->has_build_id = true; } diff --git a/tools/perf/util/header.c b/tools/perf/util/header.c index 9cf4efdcbbbd..cde8f6eba479 100644 --- a/tools/perf/util/header.c +++ b/tools/perf/util/header.c @@ -2095,7 +2095,7 @@ static int __event_process_build_id(struct perf_record_header_build_id *bev, free(m.name); } - build_id__sprintf(dso->build_id, sizeof(dso->build_id), + build_id__sprintf(dso->bid.data, sizeof(dso->bid.data), sbuild_id); pr_debug("build id event received for %s: %s\n", dso->long_name, sbuild_id); diff --git a/tools/perf/util/map.c b/tools/perf/util/map.c index 8b305e624124..522df3090b1c 100644 --- a/tools/perf/util/map.c +++ b/tools/perf/util/map.c @@ -331,8 +331,8 @@ int map__load(struct map *map) if (map->dso->has_build_id) { char sbuild_id[SBUILD_ID_SIZE]; - build_id__sprintf(map->dso->build_id, - sizeof(map->dso->build_id), + build_id__sprintf(map->dso->bid.data, + sizeof(map->dso->bid.data), sbuild_id); pr_debug("%s with build id %s not found", name, sbuild_id); } else diff --git a/tools/perf/util/probe-event.c b/tools/perf/util/probe-event.c index 3a1b58a92673..2041ad849851 100644 --- a/tools/perf/util/probe-event.c +++ b/tools/perf/util/probe-event.c @@ -473,7 +473,7 @@ static struct debuginfo *open_from_debuginfod(struct dso *dso, struct nsinfo *ns if (!c) return NULL; - build_id__sprintf(dso->build_id, BUILD_ID_SIZE, sbuild_id); + build_id__sprintf(dso->bid.data, BUILD_ID_SIZE, sbuild_id); fd = debuginfod_find_debuginfo(c, (const unsigned char *)sbuild_id, 0, &path); if (fd >= 0) diff --git a/tools/perf/util/scripting-engines/trace-event-python.c b/tools/perf/util/scripting-engines/trace-event-python.c index 739516fdf6e3..db3b1c275d8f 100644 --- a/tools/perf/util/scripting-engines/trace-event-python.c +++ b/tools/perf/util/scripting-engines/trace-event-python.c @@ -1064,7 +1064,7 @@ static int python_export_dso(struct db_export *dbe, struct dso *dso, char sbuild_id[SBUILD_ID_SIZE]; PyObject *t; - build_id__sprintf(dso->build_id, sizeof(dso->build_id), sbuild_id); + build_id__sprintf(dso->bid.data, sizeof(dso->bid.data), sbuild_id); t = tuple_new(5); diff --git a/tools/perf/util/symbol.c b/tools/perf/util/symbol.c index 5ddf76fb691c..c772c8c70db5 100644 --- a/tools/perf/util/symbol.c +++ b/tools/perf/util/symbol.c @@ -2153,7 +2153,7 @@ static char *dso__find_kallsyms(struct dso *dso, struct map *map) goto proc_kallsyms; } - build_id__sprintf(dso->build_id, sizeof(dso->build_id), sbuild_id); + build_id__sprintf(dso->bid.data, sizeof(dso->bid.data), sbuild_id); /* Find kallsyms in build-id cache with kcore */ scnprintf(path, sizeof(path), "%s/%s/%s", diff --git a/tools/perf/util/synthetic-events.c b/tools/perf/util/synthetic-events.c index 3ca5d9399680..8a23391558cf 100644 --- a/tools/perf/util/synthetic-events.c +++ b/tools/perf/util/synthetic-events.c @@ -1961,7 +1961,7 @@ int perf_event__synthesize_build_id(struct perf_tool *tool, struct dso *pos, u16 len = pos->long_name_len + 1; len = PERF_ALIGN(len, NAME_ALIGN); - memcpy(&ev.build_id.build_id, pos->build_id, sizeof(pos->build_id)); + memcpy(&ev.build_id.build_id, pos->bid.data, sizeof(pos->bid.data)); ev.build_id.header.type = PERF_RECORD_HEADER_BUILD_ID; ev.build_id.header.misc = misc; ev.build_id.pid = machine->pid; -- cgit v1.2.3-70-g09d2 From f766819cd5290d42efa55c505b9bed184fec17bf Mon Sep 17 00:00:00 2001 From: Jiri Olsa Date: Tue, 13 Oct 2020 21:24:34 +0200 Subject: perf tools: Pass build_id object to filename__read_build_id() Pass a build_id object to filename__read_build_id function, so it can populate the size of the build_id object. Changing filename__read_build_id() code for both ELF/non-ELF code. Signed-off-by: Jiri Olsa Acked-by: Ian Rogers Link: https://lore.kernel.org/r/20201013192441.1299447-3-jolsa@kernel.org Signed-off-by: Arnaldo Carvalho de Melo --- tools/perf/bench/inject-buildid.c | 2 +- tools/perf/builtin-buildid-cache.c | 25 ++++++++++++------------- tools/perf/builtin-inject.c | 4 +--- tools/perf/tests/pe-file-parsing.c | 10 +++++----- tools/perf/tests/sdt.c | 6 +++--- tools/perf/util/build-id.c | 8 +++----- tools/perf/util/dsos.c | 3 +-- tools/perf/util/symbol-elf.c | 16 ++++++++++------ tools/perf/util/symbol-minimal.c | 34 +++++++++++++++++++--------------- tools/perf/util/symbol.c | 6 +++--- tools/perf/util/symbol.h | 3 ++- 11 files changed, 60 insertions(+), 57 deletions(-) (limited to 'tools/perf/bench') diff --git a/tools/perf/bench/inject-buildid.c b/tools/perf/bench/inject-buildid.c index 3d048a8139a7..280227e3ffd7 100644 --- a/tools/perf/bench/inject-buildid.c +++ b/tools/perf/bench/inject-buildid.c @@ -84,7 +84,7 @@ static int add_dso(const char *fpath, const struct stat *sb __maybe_unused, if (typeflag == FTW_D || typeflag == FTW_SL) return 0; - if (filename__read_build_id(fpath, bid.data, sizeof(bid.data)) < 0) + if (filename__read_build_id(fpath, &bid) < 0) return 0; dso->name = realpath(fpath, NULL); diff --git a/tools/perf/builtin-buildid-cache.c b/tools/perf/builtin-buildid-cache.c index a523c629f321..c3cb168d546d 100644 --- a/tools/perf/builtin-buildid-cache.c +++ b/tools/perf/builtin-buildid-cache.c @@ -174,19 +174,19 @@ static int build_id_cache__add_kcore(const char *filename, bool force) static int build_id_cache__add_file(const char *filename, struct nsinfo *nsi) { char sbuild_id[SBUILD_ID_SIZE]; - u8 build_id[BUILD_ID_SIZE]; + struct build_id bid; int err; struct nscookie nsc; nsinfo__mountns_enter(nsi, &nsc); - err = filename__read_build_id(filename, &build_id, sizeof(build_id)); + err = filename__read_build_id(filename, &bid); nsinfo__mountns_exit(&nsc); if (err < 0) { pr_debug("Couldn't read a build-id in %s\n", filename); return -1; } - build_id__sprintf(build_id, sizeof(build_id), sbuild_id); + build_id__sprintf(bid.data, sizeof(bid.data), sbuild_id); err = build_id_cache__add_s(sbuild_id, filename, nsi, false, false); pr_debug("Adding %s %s: %s\n", sbuild_id, filename, @@ -196,21 +196,21 @@ static int build_id_cache__add_file(const char *filename, struct nsinfo *nsi) static int build_id_cache__remove_file(const char *filename, struct nsinfo *nsi) { - u8 build_id[BUILD_ID_SIZE]; char sbuild_id[SBUILD_ID_SIZE]; + struct build_id bid; struct nscookie nsc; int err; nsinfo__mountns_enter(nsi, &nsc); - err = filename__read_build_id(filename, &build_id, sizeof(build_id)); + err = filename__read_build_id(filename, &bid); nsinfo__mountns_exit(&nsc); if (err < 0) { pr_debug("Couldn't read a build-id in %s\n", filename); return -1; } - build_id__sprintf(build_id, sizeof(build_id), sbuild_id); + build_id__sprintf(bid.data, sizeof(bid.data), sbuild_id); err = build_id_cache__remove_s(sbuild_id); pr_debug("Removing %s %s: %s\n", sbuild_id, filename, err ? "FAIL" : "Ok"); @@ -274,17 +274,16 @@ static int build_id_cache__purge_all(void) static bool dso__missing_buildid_cache(struct dso *dso, int parm __maybe_unused) { char filename[PATH_MAX]; - u8 build_id[BUILD_ID_SIZE]; + struct build_id bid; if (dso__build_id_filename(dso, filename, sizeof(filename), false) && - filename__read_build_id(filename, build_id, - sizeof(build_id)) != sizeof(build_id)) { + filename__read_build_id(filename, &bid) == -1) { if (errno == ENOENT) return false; pr_warning("Problems with %s file, consider removing it from the cache\n", filename); - } else if (memcmp(dso->bid.data, build_id, sizeof(dso->bid.data))) { + } else if (memcmp(dso->bid.data, bid.data, bid.size)) { pr_warning("Problems with %s file, consider removing it from the cache\n", filename); } @@ -300,14 +299,14 @@ static int build_id_cache__fprintf_missing(struct perf_session *session, FILE *f static int build_id_cache__update_file(const char *filename, struct nsinfo *nsi) { - u8 build_id[BUILD_ID_SIZE]; char sbuild_id[SBUILD_ID_SIZE]; + struct build_id bid; struct nscookie nsc; int err; nsinfo__mountns_enter(nsi, &nsc); - err = filename__read_build_id(filename, &build_id, sizeof(build_id)); + err = filename__read_build_id(filename, &bid); nsinfo__mountns_exit(&nsc); if (err < 0) { pr_debug("Couldn't read a build-id in %s\n", filename); @@ -315,7 +314,7 @@ static int build_id_cache__update_file(const char *filename, struct nsinfo *nsi) } err = 0; - build_id__sprintf(build_id, sizeof(build_id), sbuild_id); + build_id__sprintf(bid.data, sizeof(bid.data), sbuild_id); if (build_id_cache__cached(sbuild_id)) err = build_id_cache__remove_s(sbuild_id); diff --git a/tools/perf/builtin-inject.c b/tools/perf/builtin-inject.c index a35cdabe5bd4..452a75fe68e5 100644 --- a/tools/perf/builtin-inject.c +++ b/tools/perf/builtin-inject.c @@ -522,10 +522,8 @@ static int dso__read_build_id(struct dso *dso) return 0; nsinfo__mountns_enter(dso->nsinfo, &nsc); - if (filename__read_build_id(dso->long_name, dso->bid.data, - sizeof(dso->bid.data)) > 0) { + if (filename__read_build_id(dso->long_name, &dso->bid) > 0) dso->has_build_id = true; - } nsinfo__mountns_exit(&nsc); return dso->has_build_id ? 0 : -1; diff --git a/tools/perf/tests/pe-file-parsing.c b/tools/perf/tests/pe-file-parsing.c index 19eae3e8e229..58b90c42eb38 100644 --- a/tools/perf/tests/pe-file-parsing.c +++ b/tools/perf/tests/pe-file-parsing.c @@ -24,7 +24,7 @@ static int run_dir(const char *d) { char filename[PATH_MAX]; char debugfile[PATH_MAX]; - char build_id[BUILD_ID_SIZE]; + struct build_id bid; char debuglink[PATH_MAX]; char expect_build_id[] = { 0x5a, 0x0f, 0xd8, 0x82, 0xb5, 0x30, 0x84, 0x22, @@ -36,10 +36,10 @@ static int run_dir(const char *d) int ret; scnprintf(filename, PATH_MAX, "%s/pe-file.exe", d); - ret = filename__read_build_id(filename, build_id, BUILD_ID_SIZE); + ret = filename__read_build_id(filename, &bid); TEST_ASSERT_VAL("Failed to read build_id", ret == sizeof(expect_build_id)); - TEST_ASSERT_VAL("Wrong build_id", !memcmp(build_id, expect_build_id, + TEST_ASSERT_VAL("Wrong build_id", !memcmp(bid.data, expect_build_id, sizeof(expect_build_id))); ret = filename__read_debuglink(filename, debuglink, PATH_MAX); @@ -48,10 +48,10 @@ static int run_dir(const char *d) !strcmp(debuglink, expect_debuglink)); scnprintf(debugfile, PATH_MAX, "%s/%s", d, debuglink); - ret = filename__read_build_id(debugfile, build_id, BUILD_ID_SIZE); + ret = filename__read_build_id(debugfile, &bid); TEST_ASSERT_VAL("Failed to read debug file build_id", ret == sizeof(expect_build_id)); - TEST_ASSERT_VAL("Wrong build_id", !memcmp(build_id, expect_build_id, + TEST_ASSERT_VAL("Wrong build_id", !memcmp(bid.data, expect_build_id, sizeof(expect_build_id))); dso = dso__new(filename); diff --git a/tools/perf/tests/sdt.c b/tools/perf/tests/sdt.c index 60f0e9ee04fb..3ef37f739203 100644 --- a/tools/perf/tests/sdt.c +++ b/tools/perf/tests/sdt.c @@ -28,16 +28,16 @@ static int target_function(void) static int build_id_cache__add_file(const char *filename) { char sbuild_id[SBUILD_ID_SIZE]; - u8 build_id[BUILD_ID_SIZE]; + struct build_id bid; int err; - err = filename__read_build_id(filename, &build_id, sizeof(build_id)); + err = filename__read_build_id(filename, &bid); if (err < 0) { pr_debug("Failed to read build id of %s\n", filename); return err; } - build_id__sprintf(build_id, sizeof(build_id), sbuild_id); + build_id__sprintf(bid.data, sizeof(bid.data), sbuild_id); err = build_id_cache__add_s(sbuild_id, filename, NULL, false, false); if (err < 0) pr_debug("Failed to add build id cache of %s\n", filename); diff --git a/tools/perf/util/build-id.c b/tools/perf/util/build-id.c index 7da13ddb0d50..62b258aaa128 100644 --- a/tools/perf/util/build-id.c +++ b/tools/perf/util/build-id.c @@ -130,16 +130,14 @@ int sysfs__sprintf_build_id(const char *root_dir, char *sbuild_id) int filename__sprintf_build_id(const char *pathname, char *sbuild_id) { - u8 build_id[BUILD_ID_SIZE]; + struct build_id bid; int ret; - ret = filename__read_build_id(pathname, build_id, sizeof(build_id)); + ret = filename__read_build_id(pathname, &bid); if (ret < 0) return ret; - else if (ret != sizeof(build_id)) - return -EINVAL; - return build_id__sprintf(build_id, sizeof(build_id), sbuild_id); + return build_id__sprintf(bid.data, sizeof(bid.data), sbuild_id); } /* asnprintf consolidates asprintf and snprintf */ diff --git a/tools/perf/util/dsos.c b/tools/perf/util/dsos.c index e3af46e818f1..87161e431830 100644 --- a/tools/perf/util/dsos.c +++ b/tools/perf/util/dsos.c @@ -73,8 +73,7 @@ bool __dsos__read_build_ids(struct list_head *head, bool with_hits) continue; } nsinfo__mountns_enter(pos->nsinfo, &nsc); - if (filename__read_build_id(pos->long_name, pos->bid.data, - sizeof(pos->bid.data)) > 0) { + if (filename__read_build_id(pos->long_name, &pos->bid) > 0) { have_build_id = true; pos->has_build_id = true; } diff --git a/tools/perf/util/symbol-elf.c b/tools/perf/util/symbol-elf.c index 94a156df22d5..61d7c444e6f5 100644 --- a/tools/perf/util/symbol-elf.c +++ b/tools/perf/util/symbol-elf.c @@ -534,8 +534,9 @@ out: #ifdef HAVE_LIBBFD_BUILDID_SUPPORT -int filename__read_build_id(const char *filename, void *bf, size_t size) +int filename__read_build_id(const char *filename, struct build_id *bid) { + size_t size = sizeof(bid->data); int err = -1; bfd *abfd; @@ -551,9 +552,9 @@ int filename__read_build_id(const char *filename, void *bf, size_t size) if (!abfd->build_id || abfd->build_id->size > size) goto out_close; - memcpy(bf, abfd->build_id->data, abfd->build_id->size); - memset(bf + abfd->build_id->size, 0, size - abfd->build_id->size); - err = abfd->build_id->size; + memcpy(bid->data, abfd->build_id->data, abfd->build_id->size); + memset(bid->data + abfd->build_id->size, 0, size - abfd->build_id->size); + err = bid->size = abfd->build_id->size; out_close: bfd_close(abfd); @@ -562,8 +563,9 @@ out_close: #else // HAVE_LIBBFD_BUILDID_SUPPORT -int filename__read_build_id(const char *filename, void *bf, size_t size) +int filename__read_build_id(const char *filename, struct build_id *bid) { + size_t size = sizeof(bid->data); int fd, err = -1; Elf *elf; @@ -580,7 +582,9 @@ int filename__read_build_id(const char *filename, void *bf, size_t size) goto out_close; } - err = elf_read_build_id(elf, bf, size); + err = elf_read_build_id(elf, bid->data, size); + if (err > 0) + bid->size = err; elf_end(elf); out_close: diff --git a/tools/perf/util/symbol-minimal.c b/tools/perf/util/symbol-minimal.c index d6e99af263ec..5173331ee6e4 100644 --- a/tools/perf/util/symbol-minimal.c +++ b/tools/perf/util/symbol-minimal.c @@ -31,9 +31,10 @@ static bool check_need_swap(int file_endian) #define NT_GNU_BUILD_ID 3 -static int read_build_id(void *note_data, size_t note_len, void *bf, - size_t size, bool need_swap) +static int read_build_id(void *note_data, size_t note_len, struct build_id *bid, + bool need_swap) { + size_t size = sizeof(bid->data); struct { u32 n_namesz; u32 n_descsz; @@ -63,8 +64,9 @@ static int read_build_id(void *note_data, size_t note_len, void *bf, nhdr->n_namesz == sizeof("GNU")) { if (memcmp(name, "GNU", sizeof("GNU")) == 0) { size_t sz = min(size, descsz); - memcpy(bf, ptr, sz); - memset(bf + sz, 0, size - sz); + memcpy(bid->data, ptr, sz); + memset(bid->data + sz, 0, size - sz); + bid->size = sz; return 0; } } @@ -84,7 +86,7 @@ int filename__read_debuglink(const char *filename __maybe_unused, /* * Just try PT_NOTE header otherwise fails */ -int filename__read_build_id(const char *filename, void *bf, size_t size) +int filename__read_build_id(const char *filename, struct build_id *bid) { FILE *fp; int ret = -1; @@ -156,9 +158,9 @@ int filename__read_build_id(const char *filename, void *bf, size_t size) if (fread(buf, buf_size, 1, fp) != 1) goto out_free; - ret = read_build_id(buf, buf_size, bf, size, need_swap); + ret = read_build_id(buf, buf_size, bid, need_swap); if (ret == 0) - ret = size; + ret = bid->size; break; } } else { @@ -207,9 +209,9 @@ int filename__read_build_id(const char *filename, void *bf, size_t size) if (fread(buf, buf_size, 1, fp) != 1) goto out_free; - ret = read_build_id(buf, buf_size, bf, size, need_swap); + ret = read_build_id(buf, buf_size, bid, need_swap); if (ret == 0) - ret = size; + ret = bid->size; break; } } @@ -220,8 +222,9 @@ out: return ret; } -int sysfs__read_build_id(const char *filename, void *build_id, size_t size) +int sysfs__read_build_id(const char *filename, void *build_id, size_t size __maybe_unused) { + struct build_id bid; int fd; int ret = -1; struct stat stbuf; @@ -243,7 +246,9 @@ int sysfs__read_build_id(const char *filename, void *build_id, size_t size) if (read(fd, buf, buf_size) != (ssize_t) buf_size) goto out_free; - ret = read_build_id(buf, buf_size, build_id, size, false); + ret = read_build_id(buf, buf_size, &bid, false); + if (ret > 0) + memcpy(build_id, bid.data, bid.size); out_free: free(buf); out: @@ -339,16 +344,15 @@ int dso__load_sym(struct dso *dso, struct map *map __maybe_unused, struct symsrc *runtime_ss __maybe_unused, int kmodule __maybe_unused) { - unsigned char build_id[BUILD_ID_SIZE]; + struct build_id bid; int ret; ret = fd__is_64_bit(ss->fd); if (ret >= 0) dso->is_64_bit = ret; - if (filename__read_build_id(ss->name, build_id, BUILD_ID_SIZE) > 0) { - dso__set_build_id(dso, build_id); - } + if (filename__read_build_id(ss->name, &bid) > 0) + dso__set_build_id(dso, bid.data); return 0; } diff --git a/tools/perf/util/symbol.c b/tools/perf/util/symbol.c index c772c8c70db5..4c43bb959fee 100644 --- a/tools/perf/util/symbol.c +++ b/tools/perf/util/symbol.c @@ -1755,7 +1755,7 @@ int dso__load(struct dso *dso, struct map *map) struct symsrc *syms_ss = NULL, *runtime_ss = NULL; bool kmod; bool perfmap; - unsigned char build_id[BUILD_ID_SIZE]; + struct build_id bid; struct nscookie nsc; char newmapname[PATH_MAX]; const char *map_path = dso->long_name; @@ -1817,8 +1817,8 @@ int dso__load(struct dso *dso, struct map *map) if (!dso->has_build_id && is_regular_file(dso->long_name)) { __symbol__join_symfs(name, PATH_MAX, dso->long_name); - if (filename__read_build_id(name, build_id, BUILD_ID_SIZE) > 0) - dso__set_build_id(dso, build_id); + if (filename__read_build_id(name, &bid) > 0) + dso__set_build_id(dso, bid.data); } /* diff --git a/tools/perf/util/symbol.h b/tools/perf/util/symbol.h index 11fe71f46d14..98908fa3f796 100644 --- a/tools/perf/util/symbol.h +++ b/tools/perf/util/symbol.h @@ -23,6 +23,7 @@ struct dso; struct map; struct maps; struct option; +struct build_id; /* * libelf 0.8.x and earlier do not support ELF_C_READ_MMAP; @@ -142,7 +143,7 @@ struct symbol *dso__next_symbol(struct symbol *sym); enum dso_type dso__type_fd(int fd); -int filename__read_build_id(const char *filename, void *bf, size_t size); +int filename__read_build_id(const char *filename, struct build_id *id); int sysfs__read_build_id(const char *filename, void *bf, size_t size); int modules__parse(const char *filename, void *arg, int (*process_module)(void *arg, const char *name, -- cgit v1.2.3-70-g09d2 From f92993851f01fa5d8892329d30867acc87180d39 Mon Sep 17 00:00:00 2001 From: Ian Rogers Date: Mon, 12 Oct 2020 09:16:11 -0700 Subject: perf bench: Use condition variables in numa. The existing approach to synchronization between threads in the numa benchmark is unbalanced mutexes. This synchronization causes thread sanitizer to warn of locks being taken twice on a thread without an unlock, as well as unlocks with no corresponding locks. This change replaces the synchronization with more regular condition variables. While this fixes one class of thread sanitizer warnings, there still remain warnings of data races due to threads reading and writing shared memory without any atomics. Committer testing: Basic run on a non-NUMA machine. # perf bench numa # List of available benchmarks for collection 'numa': mem: Benchmark for NUMA workloads all: Run all NUMA benchmarks # perf bench numa all # Running numa/mem benchmark... # Running main, "perf bench numa numa-mem" # # Running test on: Linux five 5.8.12-200.fc32.x86_64 #1 SMP Mon Sep 28 12:17:31 UTC 2020 x86_64 x86_64 x86_64 GNU/Linux # # Running RAM-bw-local, "perf bench numa mem -p 1 -t 1 -P 1024 -C 0 -M 0 -s 20 -zZq --thp 1 --no-data_rand_walk" 20.076 secs slowest (max) thread-runtime 20.000 secs fastest (min) thread-runtime 20.073 secs average thread-runtime 0.190 % difference between max/avg runtime 241.828 GB data processed, per thread 241.828 GB data processed, total 0.083 nsecs/byte/thread runtime 12.045 GB/sec/thread speed 12.045 GB/sec total speed # Running RAM-bw-local-NOTHP, "perf bench numa mem -p 1 -t 1 -P 1024 -C 0 -M 0 -s 20 -zZq --thp 1 --no-data_rand_walk --thp -1" 20.045 secs slowest (max) thread-runtime 20.000 secs fastest (min) thread-runtime 20.014 secs average thread-runtime 0.111 % difference between max/avg runtime 234.304 GB data processed, per thread 234.304 GB data processed, total 0.086 nsecs/byte/thread runtime 11.689 GB/sec/thread speed 11.689 GB/sec total speed # Running RAM-bw-remote, "perf bench numa mem -p 1 -t 1 -P 1024 -C 0 -M 1 -s 20 -zZq --thp 1 --no-data_rand_walk" Test not applicable, system has only 1 nodes. # Running RAM-bw-local-2x, "perf bench numa mem -p 2 -t 1 -P 1024 -C 0,2 -M 0x2 -s 20 -zZq --thp 1 --no-data_rand_walk" 20.138 secs slowest (max) thread-runtime 20.000 secs fastest (min) thread-runtime 20.121 secs average thread-runtime 0.342 % difference between max/avg runtime 135.961 GB data processed, per thread 271.922 GB data processed, total 0.148 nsecs/byte/thread runtime 6.752 GB/sec/thread speed 13.503 GB/sec total speed # Running RAM-bw-remote-2x, "perf bench numa mem -p 2 -t 1 -P 1024 -C 0,2 -M 1x2 -s 20 -zZq --thp 1 --no-data_rand_walk" Test not applicable, system has only 1 nodes. # Running RAM-bw-cross, "perf bench numa mem -p 2 -t 1 -P 1024 -C 0,8 -M 1,0 -s 20 -zZq --thp 1 --no-data_rand_walk" Test not applicable, system has only 1 nodes. # Running 1x3-convergence, "perf bench numa mem -p 1 -t 3 -P 512 -s 100 -zZ0qcm --thp 1" 0.747 secs latency to NUMA-converge 0.747 secs slowest (max) thread-runtime 0.000 secs fastest (min) thread-runtime 0.714 secs average thread-runtime 50.000 % difference between max/avg runtime 3.228 GB data processed, per thread 9.683 GB data processed, total 0.231 nsecs/byte/thread runtime 4.321 GB/sec/thread speed 12.964 GB/sec total speed # Running 1x4-convergence, "perf bench numa mem -p 1 -t 4 -P 512 -s 100 -zZ0qcm --thp 1" 1.127 secs latency to NUMA-converge 1.127 secs slowest (max) thread-runtime 1.000 secs fastest (min) thread-runtime 1.089 secs average thread-runtime 5.624 % difference between max/avg runtime 3.765 GB data processed, per thread 15.062 GB data processed, total 0.299 nsecs/byte/thread runtime 3.342 GB/sec/thread speed 13.368 GB/sec total speed # Running 1x6-convergence, "perf bench numa mem -p 1 -t 6 -P 1020 -s 100 -zZ0qcm --thp 1" 1.003 secs latency to NUMA-converge 1.003 secs slowest (max) thread-runtime 0.000 secs fastest (min) thread-runtime 0.889 secs average thread-runtime 50.000 % difference between max/avg runtime 2.141 GB data processed, per thread 12.847 GB data processed, total 0.469 nsecs/byte/thread runtime 2.134 GB/sec/thread speed 12.805 GB/sec total speed # Running 2x3-convergence, "perf bench numa mem -p 2 -t 3 -P 1020 -s 100 -zZ0qcm --thp 1" 1.814 secs latency to NUMA-converge 1.814 secs slowest (max) thread-runtime 1.000 secs fastest (min) thread-runtime 1.716 secs average thread-runtime 22.440 % difference between max/avg runtime 3.747 GB data processed, per thread 22.483 GB data processed, total 0.484 nsecs/byte/thread runtime 2.065 GB/sec/thread speed 12.393 GB/sec total speed # Running 3x3-convergence, "perf bench numa mem -p 3 -t 3 -P 1020 -s 100 -zZ0qcm --thp 1" 2.065 secs latency to NUMA-converge 2.065 secs slowest (max) thread-runtime 1.000 secs fastest (min) thread-runtime 1.947 secs average thread-runtime 25.788 % difference between max/avg runtime 2.855 GB data processed, per thread 25.694 GB data processed, total 0.723 nsecs/byte/thread runtime 1.382 GB/sec/thread speed 12.442 GB/sec total speed # Running 4x4-convergence, "perf bench numa mem -p 4 -t 4 -P 512 -s 100 -zZ0qcm --thp 1" 1.912 secs latency to NUMA-converge 1.912 secs slowest (max) thread-runtime 1.000 secs fastest (min) thread-runtime 1.775 secs average thread-runtime 23.852 % difference between max/avg runtime 1.479 GB data processed, per thread 23.668 GB data processed, total 1.293 nsecs/byte/thread runtime 0.774 GB/sec/thread speed 12.378 GB/sec total speed # Running 4x4-convergence-NOTHP, "perf bench numa mem -p 4 -t 4 -P 512 -s 100 -zZ0qcm --thp 1 --thp -1" 1.783 secs latency to NUMA-converge 1.783 secs slowest (max) thread-runtime 1.000 secs fastest (min) thread-runtime 1.633 secs average thread-runtime 21.960 % difference between max/avg runtime 1.345 GB data processed, per thread 21.517 GB data processed, total 1.326 nsecs/byte/thread runtime 0.754 GB/sec/thread speed 12.067 GB/sec total speed # Running 4x6-convergence, "perf bench numa mem -p 4 -t 6 -P 1020 -s 100 -zZ0qcm --thp 1" 5.396 secs latency to NUMA-converge 5.396 secs slowest (max) thread-runtime 4.000 secs fastest (min) thread-runtime 4.928 secs average thread-runtime 12.937 % difference between max/avg runtime 2.721 GB data processed, per thread 65.306 GB data processed, total 1.983 nsecs/byte/thread runtime 0.504 GB/sec/thread speed 12.102 GB/sec total speed # Running 4x8-convergence, "perf bench numa mem -p 4 -t 8 -P 512 -s 100 -zZ0qcm --thp 1" 3.121 secs latency to NUMA-converge 3.121 secs slowest (max) thread-runtime 2.000 secs fastest (min) thread-runtime 2.836 secs average thread-runtime 17.962 % difference between max/avg runtime 1.194 GB data processed, per thread 38.192 GB data processed, total 2.615 nsecs/byte/thread runtime 0.382 GB/sec/thread speed 12.236 GB/sec total speed # Running 8x4-convergence, "perf bench numa mem -p 8 -t 4 -P 512 -s 100 -zZ0qcm --thp 1" 4.302 secs latency to NUMA-converge 4.302 secs slowest (max) thread-runtime 3.000 secs fastest (min) thread-runtime 4.045 secs average thread-runtime 15.133 % difference between max/avg runtime 1.631 GB data processed, per thread 52.178 GB data processed, total 2.638 nsecs/byte/thread runtime 0.379 GB/sec/thread speed 12.128 GB/sec total speed # Running 8x4-convergence-NOTHP, "perf bench numa mem -p 8 -t 4 -P 512 -s 100 -zZ0qcm --thp 1 --thp -1" 4.418 secs latency to NUMA-converge 4.418 secs slowest (max) thread-runtime 3.000 secs fastest (min) thread-runtime 4.104 secs average thread-runtime 16.045 % difference between max/avg runtime 1.664 GB data processed, per thread 53.254 GB data processed, total 2.655 nsecs/byte/thread runtime 0.377 GB/sec/thread speed 12.055 GB/sec total speed # Running 3x1-convergence, "perf bench numa mem -p 3 -t 1 -P 512 -s 100 -zZ0qcm --thp 1" 0.973 secs latency to NUMA-converge 0.973 secs slowest (max) thread-runtime 0.000 secs fastest (min) thread-runtime 0.955 secs average thread-runtime 50.000 % difference between max/avg runtime 4.124 GB data processed, per thread 12.372 GB data processed, total 0.236 nsecs/byte/thread runtime 4.238 GB/sec/thread speed 12.715 GB/sec total speed # Running 4x1-convergence, "perf bench numa mem -p 4 -t 1 -P 512 -s 100 -zZ0qcm --thp 1" 0.820 secs latency to NUMA-converge 0.820 secs slowest (max) thread-runtime 0.000 secs fastest (min) thread-runtime 0.808 secs average thread-runtime 50.000 % difference between max/avg runtime 2.555 GB data processed, per thread 10.220 GB data processed, total 0.321 nsecs/byte/thread runtime 3.117 GB/sec/thread speed 12.468 GB/sec total speed # Running 8x1-convergence, "perf bench numa mem -p 8 -t 1 -P 512 -s 100 -zZ0qcm --thp 1" 0.667 secs latency to NUMA-converge 0.667 secs slowest (max) thread-runtime 0.000 secs fastest (min) thread-runtime 0.607 secs average thread-runtime 50.000 % difference between max/avg runtime 1.009 GB data processed, per thread 8.069 GB data processed, total 0.661 nsecs/byte/thread runtime 1.512 GB/sec/thread speed 12.095 GB/sec total speed # Running 16x1-convergence, "perf bench numa mem -p 16 -t 1 -P 256 -s 100 -zZ0qcm --thp 1" 1.546 secs latency to NUMA-converge 1.546 secs slowest (max) thread-runtime 1.000 secs fastest (min) thread-runtime 1.485 secs average thread-runtime 17.664 % difference between max/avg runtime 1.162 GB data processed, per thread 18.594 GB data processed, total 1.331 nsecs/byte/thread runtime 0.752 GB/sec/thread speed 12.025 GB/sec total speed # Running 32x1-convergence, "perf bench numa mem -p 32 -t 1 -P 128 -s 100 -zZ0qcm --thp 1" 0.812 secs latency to NUMA-converge 0.812 secs slowest (max) thread-runtime 0.000 secs fastest (min) thread-runtime 0.739 secs average thread-runtime 50.000 % difference between max/avg runtime 0.309 GB data processed, per thread 9.874 GB data processed, total 2.630 nsecs/byte/thread runtime 0.380 GB/sec/thread speed 12.166 GB/sec total speed # Running 2x1-bw-process, "perf bench numa mem -p 2 -t 1 -P 1024 -s 20 -zZ0q --thp 1" 20.044 secs slowest (max) thread-runtime 20.000 secs fastest (min) thread-runtime 20.020 secs average thread-runtime 0.109 % difference between max/avg runtime 125.750 GB data processed, per thread 251.501 GB data processed, total 0.159 nsecs/byte/thread runtime 6.274 GB/sec/thread speed 12.548 GB/sec total speed # Running 3x1-bw-process, "perf bench numa mem -p 3 -t 1 -P 1024 -s 20 -zZ0q --thp 1" 20.148 secs slowest (max) thread-runtime 20.000 secs fastest (min) thread-runtime 20.090 secs average thread-runtime 0.367 % difference between max/avg runtime 85.267 GB data processed, per thread 255.800 GB data processed, total 0.236 nsecs/byte/thread runtime 4.232 GB/sec/thread speed 12.696 GB/sec total speed # Running 4x1-bw-process, "perf bench numa mem -p 4 -t 1 -P 1024 -s 20 -zZ0q --thp 1" 20.169 secs slowest (max) thread-runtime 20.000 secs fastest (min) thread-runtime 20.100 secs average thread-runtime 0.419 % difference between max/avg runtime 63.144 GB data processed, per thread 252.576 GB data processed, total 0.319 nsecs/byte/thread runtime 3.131 GB/sec/thread speed 12.523 GB/sec total speed # Running 8x1-bw-process, "perf bench numa mem -p 8 -t 1 -P 512 -s 20 -zZ0q --thp 1" 20.175 secs slowest (max) thread-runtime 20.000 secs fastest (min) thread-runtime 20.107 secs average thread-runtime 0.433 % difference between max/avg runtime 31.267 GB data processed, per thread 250.133 GB data processed, total 0.645 nsecs/byte/thread runtime 1.550 GB/sec/thread speed 12.398 GB/sec total speed # Running 8x1-bw-process-NOTHP, "perf bench numa mem -p 8 -t 1 -P 512 -s 20 -zZ0q --thp 1 --thp -1" 20.216 secs slowest (max) thread-runtime 20.000 secs fastest (min) thread-runtime 20.113 secs average thread-runtime 0.535 % difference between max/avg runtime 30.998 GB data processed, per thread 247.981 GB data processed, total 0.652 nsecs/byte/thread runtime 1.533 GB/sec/thread speed 12.266 GB/sec total speed # Running 16x1-bw-process, "perf bench numa mem -p 16 -t 1 -P 256 -s 20 -zZ0q --thp 1" 20.234 secs slowest (max) thread-runtime 20.000 secs fastest (min) thread-runtime 20.174 secs average thread-runtime 0.577 % difference between max/avg runtime 15.377 GB data processed, per thread 246.039 GB data processed, total 1.316 nsecs/byte/thread runtime 0.760 GB/sec/thread speed 12.160 GB/sec total speed # Running 1x4-bw-thread, "perf bench numa mem -p 1 -t 4 -T 256 -s 20 -zZ0q --thp 1" 20.040 secs slowest (max) thread-runtime 20.000 secs fastest (min) thread-runtime 20.028 secs average thread-runtime 0.099 % difference between max/avg runtime 66.832 GB data processed, per thread 267.328 GB data processed, total 0.300 nsecs/byte/thread runtime 3.335 GB/sec/thread speed 13.340 GB/sec total speed # Running 1x8-bw-thread, "perf bench numa mem -p 1 -t 8 -T 256 -s 20 -zZ0q --thp 1" 20.064 secs slowest (max) thread-runtime 20.000 secs fastest (min) thread-runtime 20.034 secs average thread-runtime 0.160 % difference between max/avg runtime 32.911 GB data processed, per thread 263.286 GB data processed, total 0.610 nsecs/byte/thread runtime 1.640 GB/sec/thread speed 13.122 GB/sec total speed # Running 1x16-bw-thread, "perf bench numa mem -p 1 -t 16 -T 128 -s 20 -zZ0q --thp 1" 20.092 secs slowest (max) thread-runtime 20.000 secs fastest (min) thread-runtime 20.052 secs average thread-runtime 0.230 % difference between max/avg runtime 16.131 GB data processed, per thread 258.088 GB data processed, total 1.246 nsecs/byte/thread runtime 0.803 GB/sec/thread speed 12.845 GB/sec total speed # Running 1x32-bw-thread, "perf bench numa mem -p 1 -t 32 -T 64 -s 20 -zZ0q --thp 1" 20.099 secs slowest (max) thread-runtime 20.000 secs fastest (min) thread-runtime 20.063 secs average thread-runtime 0.247 % difference between max/avg runtime 7.962 GB data processed, per thread 254.773 GB data processed, total 2.525 nsecs/byte/thread runtime 0.396 GB/sec/thread speed 12.676 GB/sec total speed # Running 2x3-bw-process, "perf bench numa mem -p 2 -t 3 -P 512 -s 20 -zZ0q --thp 1" 20.150 secs slowest (max) thread-runtime 20.000 secs fastest (min) thread-runtime 20.120 secs average thread-runtime 0.372 % difference between max/avg runtime 44.827 GB data processed, per thread 268.960 GB data processed, total 0.450 nsecs/byte/thread runtime 2.225 GB/sec/thread speed 13.348 GB/sec total speed # Running 4x4-bw-process, "perf bench numa mem -p 4 -t 4 -P 512 -s 20 -zZ0q --thp 1" 20.258 secs slowest (max) thread-runtime 20.000 secs fastest (min) thread-runtime 20.168 secs average thread-runtime 0.636 % difference between max/avg runtime 17.079 GB data processed, per thread 273.263 GB data processed, total 1.186 nsecs/byte/thread runtime 0.843 GB/sec/thread speed 13.489 GB/sec total speed # Running 4x6-bw-process, "perf bench numa mem -p 4 -t 6 -P 512 -s 20 -zZ0q --thp 1" 20.559 secs slowest (max) thread-runtime 20.000 secs fastest (min) thread-runtime 20.382 secs average thread-runtime 1.359 % difference between max/avg runtime 10.758 GB data processed, per thread 258.201 GB data processed, total 1.911 nsecs/byte/thread runtime 0.523 GB/sec/thread speed 12.559 GB/sec total speed # Running 4x8-bw-process, "perf bench numa mem -p 4 -t 8 -P 512 -s 20 -zZ0q --thp 1" 20.744 secs slowest (max) thread-runtime 20.000 secs fastest (min) thread-runtime 20.516 secs average thread-runtime 1.792 % difference between max/avg runtime 8.069 GB data processed, per thread 258.201 GB data processed, total 2.571 nsecs/byte/thread runtime 0.389 GB/sec/thread speed 12.447 GB/sec total speed # Running 4x8-bw-process-NOTHP, "perf bench numa mem -p 4 -t 8 -P 512 -s 20 -zZ0q --thp 1 --thp -1" 20.855 secs slowest (max) thread-runtime 20.000 secs fastest (min) thread-runtime 20.561 secs average thread-runtime 2.050 % difference between max/avg runtime 8.069 GB data processed, per thread 258.201 GB data processed, total 2.585 nsecs/byte/thread runtime 0.387 GB/sec/thread speed 12.381 GB/sec total speed # Running 3x3-bw-process, "perf bench numa mem -p 3 -t 3 -P 512 -s 20 -zZ0q --thp 1" 20.134 secs slowest (max) thread-runtime 20.000 secs fastest (min) thread-runtime 20.077 secs average thread-runtime 0.333 % difference between max/avg runtime 28.091 GB data processed, per thread 252.822 GB data processed, total 0.717 nsecs/byte/thread runtime 1.395 GB/sec/thread speed 12.557 GB/sec total speed # Running 5x5-bw-process, "perf bench numa mem -p 5 -t 5 -P 512 -s 20 -zZ0q --thp 1" 20.588 secs slowest (max) thread-runtime 20.000 secs fastest (min) thread-runtime 20.375 secs average thread-runtime 1.427 % difference between max/avg runtime 10.177 GB data processed, per thread 254.436 GB data processed, total 2.023 nsecs/byte/thread runtime 0.494 GB/sec/thread speed 12.359 GB/sec total speed # Running 2x16-bw-process, "perf bench numa mem -p 2 -t 16 -P 512 -s 20 -zZ0q --thp 1" 20.657 secs slowest (max) thread-runtime 20.000 secs fastest (min) thread-runtime 20.429 secs average thread-runtime 1.589 % difference between max/avg runtime 8.170 GB data processed, per thread 261.429 GB data processed, total 2.528 nsecs/byte/thread runtime 0.395 GB/sec/thread speed 12.656 GB/sec total speed # Running 1x32-bw-process, "perf bench numa mem -p 1 -t 32 -P 2048 -s 20 -zZ0q --thp 1" 22.981 secs slowest (max) thread-runtime 20.000 secs fastest (min) thread-runtime 21.996 secs average thread-runtime 6.486 % difference between max/avg runtime 8.863 GB data processed, per thread 283.606 GB data processed, total 2.593 nsecs/byte/thread runtime 0.386 GB/sec/thread speed 12.341 GB/sec total speed # Running numa02-bw, "perf bench numa mem -p 1 -t 32 -T 32 -s 20 -zZ0q --thp 1" 20.047 secs slowest (max) thread-runtime 19.000 secs fastest (min) thread-runtime 20.026 secs average thread-runtime 2.611 % difference between max/avg runtime 8.441 GB data processed, per thread 270.111 GB data processed, total 2.375 nsecs/byte/thread runtime 0.421 GB/sec/thread speed 13.474 GB/sec total speed # Running numa02-bw-NOTHP, "perf bench numa mem -p 1 -t 32 -T 32 -s 20 -zZ0q --thp 1 --thp -1" 20.088 secs slowest (max) thread-runtime 19.000 secs fastest (min) thread-runtime 20.025 secs average thread-runtime 2.709 % difference between max/avg runtime 8.411 GB data processed, per thread 269.142 GB data processed, total 2.388 nsecs/byte/thread runtime 0.419 GB/sec/thread speed 13.398 GB/sec total speed # Running numa01-bw-thread, "perf bench numa mem -p 2 -t 16 -T 192 -s 20 -zZ0q --thp 1" 20.293 secs slowest (max) thread-runtime 20.000 secs fastest (min) thread-runtime 20.175 secs average thread-runtime 0.721 % difference between max/avg runtime 7.918 GB data processed, per thread 253.374 GB data processed, total 2.563 nsecs/byte/thread runtime 0.390 GB/sec/thread speed 12.486 GB/sec total speed # Running numa01-bw-thread-NOTHP, "perf bench numa mem -p 2 -t 16 -T 192 -s 20 -zZ0q --thp 1 --thp -1" 20.411 secs slowest (max) thread-runtime 20.000 secs fastest (min) thread-runtime 20.226 secs average thread-runtime 1.006 % difference between max/avg runtime 7.931 GB data processed, per thread 253.778 GB data processed, total 2.574 nsecs/byte/thread runtime 0.389 GB/sec/thread speed 12.434 GB/sec total speed # Signed-off-by: Ian Rogers Acked-by: Jiri Olsa Tested-by: Arnaldo Carvalho de Melo Link: https://lore.kernel.org/r/20201012161611.366482-1-irogers@google.com Signed-off-by: Arnaldo Carvalho de Melo --- tools/perf/bench/numa.c | 67 +++++++++++++++++++++++++++++++++---------------- 1 file changed, 46 insertions(+), 21 deletions(-) (limited to 'tools/perf/bench') diff --git a/tools/perf/bench/numa.c b/tools/perf/bench/numa.c index f85bceccc459..11726ec6285f 100644 --- a/tools/perf/bench/numa.c +++ b/tools/perf/bench/numa.c @@ -137,12 +137,13 @@ struct global_info { u8 *data; pthread_mutex_t startup_mutex; + pthread_cond_t startup_cond; int nr_tasks_started; - pthread_mutex_t startup_done_mutex; - pthread_mutex_t start_work_mutex; + pthread_cond_t start_work_cond; int nr_tasks_working; + bool start_work; pthread_mutex_t stop_work_mutex; u64 bytes_done; @@ -483,6 +484,18 @@ static void init_global_mutex(pthread_mutex_t *mutex) pthread_mutex_init(mutex, &attr); } +/* + * Return a process-shared (global) condition variable: + */ +static void init_global_cond(pthread_cond_t *cond) +{ + pthread_condattr_t attr; + + pthread_condattr_init(&attr); + pthread_condattr_setpshared(&attr, PTHREAD_PROCESS_SHARED); + pthread_cond_init(cond, &attr); +} + static int parse_cpu_list(const char *arg) { p0.cpu_list_str = strdup(arg); @@ -1136,15 +1149,18 @@ static void *worker_thread(void *__tdata) if (g->p.serialize_startup) { pthread_mutex_lock(&g->startup_mutex); g->nr_tasks_started++; + /* The last thread wakes the main process. */ + if (g->nr_tasks_started == g->p.nr_tasks) + pthread_cond_signal(&g->startup_cond); + pthread_mutex_unlock(&g->startup_mutex); /* Here we will wait for the main process to start us all at once: */ pthread_mutex_lock(&g->start_work_mutex); + g->start_work = false; g->nr_tasks_working++; - - /* Last one wake the main process: */ - if (g->nr_tasks_working == g->p.nr_tasks) - pthread_mutex_unlock(&g->startup_done_mutex); + while (!g->start_work) + pthread_cond_wait(&g->start_work_cond, &g->start_work_mutex); pthread_mutex_unlock(&g->start_work_mutex); } @@ -1441,8 +1457,9 @@ static int init(void) /* Startup serialization: */ init_global_mutex(&g->start_work_mutex); + init_global_cond(&g->start_work_cond); init_global_mutex(&g->startup_mutex); - init_global_mutex(&g->startup_done_mutex); + init_global_cond(&g->startup_cond); init_global_mutex(&g->stop_work_mutex); init_thread_data(); @@ -1502,9 +1519,6 @@ static int __bench_numa(const char *name) pids = zalloc(g->p.nr_proc * sizeof(*pids)); pid = -1; - /* All threads try to acquire it, this way we can wait for them to start up: */ - pthread_mutex_lock(&g->start_work_mutex); - if (g->p.serialize_startup) { tprintf(" #\n"); tprintf(" # Startup synchronization: ..."); fflush(stdout); @@ -1526,22 +1540,29 @@ static int __bench_numa(const char *name) pids[i] = pid; } - /* Wait for all the threads to start up: */ - while (g->nr_tasks_started != g->p.nr_tasks) - usleep(USEC_PER_MSEC); - - BUG_ON(g->nr_tasks_started != g->p.nr_tasks); if (g->p.serialize_startup) { + bool threads_ready = false; double startup_sec; - pthread_mutex_lock(&g->startup_done_mutex); + /* + * Wait for all the threads to start up. The last thread will + * signal this process. + */ + pthread_mutex_lock(&g->startup_mutex); + while (g->nr_tasks_started != g->p.nr_tasks) + pthread_cond_wait(&g->startup_cond, &g->startup_mutex); - /* This will start all threads: */ - pthread_mutex_unlock(&g->start_work_mutex); + pthread_mutex_unlock(&g->startup_mutex); - /* This mutex is locked - the last started thread will wake us: */ - pthread_mutex_lock(&g->startup_done_mutex); + /* Wait for all threads to be at the start_work_cond. */ + while (!threads_ready) { + pthread_mutex_lock(&g->start_work_mutex); + threads_ready = (g->nr_tasks_working == g->p.nr_tasks); + pthread_mutex_unlock(&g->start_work_mutex); + if (!threads_ready) + usleep(1); + } gettimeofday(&stop, NULL); @@ -1555,7 +1576,11 @@ static int __bench_numa(const char *name) tprintf(" #\n"); start = stop; - pthread_mutex_unlock(&g->startup_done_mutex); + /* Start all threads running. */ + pthread_mutex_lock(&g->start_work_mutex); + g->start_work = true; + pthread_mutex_unlock(&g->start_work_mutex); + pthread_cond_broadcast(&g->start_work_cond); } else { gettimeofday(&start, NULL); } -- cgit v1.2.3-70-g09d2