summaryrefslogtreecommitdiff
path: root/tools/perf/bench
diff options
context:
space:
mode:
Diffstat (limited to 'tools/perf/bench')
-rw-r--r--tools/perf/bench/Build1
-rw-r--r--tools/perf/bench/bench.h2
-rw-r--r--tools/perf/bench/breakpoint.c244
-rw-r--r--tools/perf/bench/numa.c140
4 files changed, 351 insertions, 36 deletions
diff --git a/tools/perf/bench/Build b/tools/perf/bench/Build
index 61d45fcb4057..6b6155a8ad09 100644
--- a/tools/perf/bench/Build
+++ b/tools/perf/bench/Build
@@ -14,6 +14,7 @@ perf-y += kallsyms-parse.o
perf-y += find-bit-bench.o
perf-y += inject-buildid.o
perf-y += evlist-open-close.o
+perf-y += breakpoint.o
perf-$(CONFIG_X86_64) += mem-memcpy-x86-64-asm.o
perf-$(CONFIG_X86_64) += mem-memset-x86-64-asm.o
diff --git a/tools/perf/bench/bench.h b/tools/perf/bench/bench.h
index b3480bc33fe8..6cefb4315d75 100644
--- a/tools/perf/bench/bench.h
+++ b/tools/perf/bench/bench.h
@@ -49,6 +49,8 @@ int bench_synthesize(int argc, const char **argv);
int bench_kallsyms_parse(int argc, const char **argv);
int bench_inject_build_id(int argc, const char **argv);
int bench_evlist_open_close(int argc, const char **argv);
+int bench_breakpoint_thread(int argc, const char **argv);
+int bench_breakpoint_enable(int argc, const char **argv);
#define BENCH_FORMAT_DEFAULT_STR "default"
#define BENCH_FORMAT_DEFAULT 0
diff --git a/tools/perf/bench/breakpoint.c b/tools/perf/bench/breakpoint.c
new file mode 100644
index 000000000000..41385f89ffc7
--- /dev/null
+++ b/tools/perf/bench/breakpoint.c
@@ -0,0 +1,244 @@
+// SPDX-License-Identifier: GPL-2.0
+
+#include <subcmd/parse-options.h>
+#include <linux/hw_breakpoint.h>
+#include <linux/perf_event.h>
+#include <linux/time64.h>
+#include <sys/syscall.h>
+#include <sys/ioctl.h>
+#include <sys/time.h>
+#include <pthread.h>
+#include <stddef.h>
+#include <stdlib.h>
+#include <unistd.h>
+#include <stdio.h>
+#include <errno.h>
+#include "bench.h"
+#include "futex.h"
+
+struct {
+ unsigned int nbreakpoints;
+ unsigned int nparallel;
+ unsigned int nthreads;
+} thread_params = {
+ .nbreakpoints = 1,
+ .nparallel = 1,
+ .nthreads = 1,
+};
+
+static const struct option thread_options[] = {
+ OPT_UINTEGER('b', "breakpoints", &thread_params.nbreakpoints,
+ "Specify amount of breakpoints"),
+ OPT_UINTEGER('p', "parallelism", &thread_params.nparallel, "Specify amount of parallelism"),
+ OPT_UINTEGER('t', "threads", &thread_params.nthreads, "Specify amount of threads"),
+ OPT_END()
+};
+
+static const char * const thread_usage[] = {
+ "perf bench breakpoint thread <options>",
+ NULL
+};
+
+struct breakpoint {
+ int fd;
+ char watched;
+};
+
+static int breakpoint_setup(void *addr)
+{
+ struct perf_event_attr attr = { .size = 0, };
+
+ attr.type = PERF_TYPE_BREAKPOINT;
+ attr.size = sizeof(attr);
+ attr.inherit = 1;
+ attr.exclude_kernel = 1;
+ attr.exclude_hv = 1;
+ attr.bp_addr = (unsigned long)addr;
+ attr.bp_type = HW_BREAKPOINT_RW;
+ attr.bp_len = HW_BREAKPOINT_LEN_1;
+ return syscall(SYS_perf_event_open, &attr, 0, -1, -1, 0);
+}
+
+static void *passive_thread(void *arg)
+{
+ unsigned int *done = (unsigned int *)arg;
+
+ while (!__atomic_load_n(done, __ATOMIC_RELAXED))
+ futex_wait(done, 0, NULL, 0);
+ return NULL;
+}
+
+static void *active_thread(void *arg)
+{
+ unsigned int *done = (unsigned int *)arg;
+
+ while (!__atomic_load_n(done, __ATOMIC_RELAXED));
+ return NULL;
+}
+
+static void *breakpoint_thread(void *arg)
+{
+ unsigned int i, done;
+ int *repeat = (int *)arg;
+ pthread_t *threads;
+
+ threads = calloc(thread_params.nthreads, sizeof(threads[0]));
+ if (!threads)
+ exit((perror("calloc"), EXIT_FAILURE));
+
+ while (__atomic_fetch_sub(repeat, 1, __ATOMIC_RELAXED) > 0) {
+ done = 0;
+ for (i = 0; i < thread_params.nthreads; i++) {
+ if (pthread_create(&threads[i], NULL, passive_thread, &done))
+ exit((perror("pthread_create"), EXIT_FAILURE));
+ }
+ __atomic_store_n(&done, 1, __ATOMIC_RELAXED);
+ futex_wake(&done, thread_params.nthreads, 0);
+ for (i = 0; i < thread_params.nthreads; i++)
+ pthread_join(threads[i], NULL);
+ }
+ free(threads);
+ return NULL;
+}
+
+// The benchmark creates nbreakpoints inheritable breakpoints,
+// then starts nparallel threads which create and join bench_repeat batches of nthreads threads.
+int bench_breakpoint_thread(int argc, const char **argv)
+{
+ unsigned int i, result_usec;
+ int repeat = bench_repeat;
+ struct breakpoint *breakpoints;
+ pthread_t *parallel;
+ struct timeval start, stop, diff;
+
+ if (parse_options(argc, argv, thread_options, thread_usage, 0)) {
+ usage_with_options(thread_usage, thread_options);
+ exit(EXIT_FAILURE);
+ }
+ breakpoints = calloc(thread_params.nbreakpoints, sizeof(breakpoints[0]));
+ parallel = calloc(thread_params.nparallel, sizeof(parallel[0]));
+ if (!breakpoints || !parallel)
+ exit((perror("calloc"), EXIT_FAILURE));
+
+ for (i = 0; i < thread_params.nbreakpoints; i++) {
+ breakpoints[i].fd = breakpoint_setup(&breakpoints[i].watched);
+ if (breakpoints[i].fd == -1)
+ exit((perror("perf_event_open"), EXIT_FAILURE));
+ }
+ gettimeofday(&start, NULL);
+ for (i = 0; i < thread_params.nparallel; i++) {
+ if (pthread_create(&parallel[i], NULL, breakpoint_thread, &repeat))
+ exit((perror("pthread_create"), EXIT_FAILURE));
+ }
+ for (i = 0; i < thread_params.nparallel; i++)
+ pthread_join(parallel[i], NULL);
+ gettimeofday(&stop, NULL);
+ timersub(&stop, &start, &diff);
+ for (i = 0; i < thread_params.nbreakpoints; i++)
+ close(breakpoints[i].fd);
+ free(parallel);
+ free(breakpoints);
+ switch (bench_format) {
+ case BENCH_FORMAT_DEFAULT:
+ printf("# Created/joined %d threads with %d breakpoints and %d parallelism\n",
+ bench_repeat, thread_params.nbreakpoints, thread_params.nparallel);
+ printf(" %14s: %lu.%03lu [sec]\n\n", "Total time",
+ (long)diff.tv_sec, (long)(diff.tv_usec / USEC_PER_MSEC));
+ result_usec = diff.tv_sec * USEC_PER_SEC + diff.tv_usec;
+ printf(" %14lf usecs/op\n",
+ (double)result_usec / bench_repeat / thread_params.nthreads);
+ printf(" %14lf usecs/op/cpu\n",
+ (double)result_usec / bench_repeat /
+ thread_params.nthreads * thread_params.nparallel);
+ break;
+ case BENCH_FORMAT_SIMPLE:
+ printf("%lu.%03lu\n", (long)diff.tv_sec, (long)(diff.tv_usec / USEC_PER_MSEC));
+ break;
+ default:
+ fprintf(stderr, "Unknown format: %d\n", bench_format);
+ exit(EXIT_FAILURE);
+ }
+ return 0;
+}
+
+struct {
+ unsigned int npassive;
+ unsigned int nactive;
+} enable_params = {
+ .nactive = 0,
+ .npassive = 0,
+};
+
+static const struct option enable_options[] = {
+ OPT_UINTEGER('p', "passive", &enable_params.npassive, "Specify amount of passive threads"),
+ OPT_UINTEGER('a', "active", &enable_params.nactive, "Specify amount of active threads"),
+ OPT_END()
+};
+
+static const char * const enable_usage[] = {
+ "perf bench breakpoint enable <options>",
+ NULL
+};
+
+// The benchmark creates an inheritable breakpoint,
+// then starts npassive threads that block and nactive threads that actively spin
+// and then disables and enables the breakpoint bench_repeat times.
+int bench_breakpoint_enable(int argc, const char **argv)
+{
+ unsigned int i, nthreads, result_usec, done = 0;
+ char watched;
+ int fd;
+ pthread_t *threads;
+ struct timeval start, stop, diff;
+
+ if (parse_options(argc, argv, enable_options, enable_usage, 0)) {
+ usage_with_options(enable_usage, enable_options);
+ exit(EXIT_FAILURE);
+ }
+ fd = breakpoint_setup(&watched);
+ if (fd == -1)
+ exit((perror("perf_event_open"), EXIT_FAILURE));
+ nthreads = enable_params.npassive + enable_params.nactive;
+ threads = calloc(nthreads, sizeof(threads[0]));
+ if (!threads)
+ exit((perror("calloc"), EXIT_FAILURE));
+
+ for (i = 0; i < nthreads; i++) {
+ if (pthread_create(&threads[i], NULL,
+ i < enable_params.npassive ? passive_thread : active_thread, &done))
+ exit((perror("pthread_create"), EXIT_FAILURE));
+ }
+ usleep(10000); // let the threads block
+ gettimeofday(&start, NULL);
+ for (i = 0; i < bench_repeat; i++) {
+ if (ioctl(fd, PERF_EVENT_IOC_DISABLE, 0))
+ exit((perror("ioctl(PERF_EVENT_IOC_DISABLE)"), EXIT_FAILURE));
+ if (ioctl(fd, PERF_EVENT_IOC_ENABLE, 0))
+ exit((perror("ioctl(PERF_EVENT_IOC_ENABLE)"), EXIT_FAILURE));
+ }
+ gettimeofday(&stop, NULL);
+ timersub(&stop, &start, &diff);
+ __atomic_store_n(&done, 1, __ATOMIC_RELAXED);
+ futex_wake(&done, enable_params.npassive, 0);
+ for (i = 0; i < nthreads; i++)
+ pthread_join(threads[i], NULL);
+ free(threads);
+ close(fd);
+ switch (bench_format) {
+ case BENCH_FORMAT_DEFAULT:
+ printf("# Enabled/disabled breakpoint %d time with %d passive and %d active threads\n",
+ bench_repeat, enable_params.npassive, enable_params.nactive);
+ printf(" %14s: %lu.%03lu [sec]\n\n", "Total time",
+ (long)diff.tv_sec, (long)(diff.tv_usec / USEC_PER_MSEC));
+ result_usec = diff.tv_sec * USEC_PER_SEC + diff.tv_usec;
+ printf(" %14lf usecs/op\n", (double)result_usec / bench_repeat);
+ break;
+ case BENCH_FORMAT_SIMPLE:
+ printf("%lu.%03lu\n", (long)diff.tv_sec, (long)(diff.tv_usec / USEC_PER_MSEC));
+ break;
+ default:
+ fprintf(stderr, "Unknown format: %d\n", bench_format);
+ exit(EXIT_FAILURE);
+ }
+ return 0;
+}
diff --git a/tools/perf/bench/numa.c b/tools/perf/bench/numa.c
index f2640179ada9..20eed1e53f80 100644
--- a/tools/perf/bench/numa.c
+++ b/tools/perf/bench/numa.c
@@ -34,6 +34,7 @@
#include <linux/numa.h>
#include <linux/zalloc.h>
+#include "../util/header.h"
#include <numa.h>
#include <numaif.h>
@@ -54,7 +55,7 @@
struct thread_data {
int curr_cpu;
- cpu_set_t bind_cpumask;
+ cpu_set_t *bind_cpumask;
int bind_node;
u8 *process_data;
int process_nr;
@@ -266,71 +267,117 @@ static bool node_has_cpus(int node)
return ret;
}
-static cpu_set_t bind_to_cpu(int target_cpu)
+static cpu_set_t *bind_to_cpu(int target_cpu)
{
- cpu_set_t orig_mask, mask;
- int ret;
+ int nrcpus = numa_num_possible_cpus();
+ cpu_set_t *orig_mask, *mask;
+ size_t size;
- ret = sched_getaffinity(0, sizeof(orig_mask), &orig_mask);
- BUG_ON(ret);
+ orig_mask = CPU_ALLOC(nrcpus);
+ BUG_ON(!orig_mask);
+ size = CPU_ALLOC_SIZE(nrcpus);
+ CPU_ZERO_S(size, orig_mask);
+
+ if (sched_getaffinity(0, size, orig_mask))
+ goto err_out;
- CPU_ZERO(&mask);
+ mask = CPU_ALLOC(nrcpus);
+ if (!mask)
+ goto err_out;
+
+ CPU_ZERO_S(size, mask);
if (target_cpu == -1) {
int cpu;
for (cpu = 0; cpu < g->p.nr_cpus; cpu++)
- CPU_SET(cpu, &mask);
+ CPU_SET_S(cpu, size, mask);
} else {
- BUG_ON(target_cpu < 0 || target_cpu >= g->p.nr_cpus);
- CPU_SET(target_cpu, &mask);
+ if (target_cpu < 0 || target_cpu >= g->p.nr_cpus)
+ goto err;
+
+ CPU_SET_S(target_cpu, size, mask);
}
- ret = sched_setaffinity(0, sizeof(mask), &mask);
- BUG_ON(ret);
+ if (sched_setaffinity(0, size, mask))
+ goto err;
return orig_mask;
+
+err:
+ CPU_FREE(mask);
+err_out:
+ CPU_FREE(orig_mask);
+
+ /* BUG_ON due to failure in allocation of orig_mask/mask */
+ BUG_ON(-1);
+ return NULL;
}
-static cpu_set_t bind_to_node(int target_node)
+static cpu_set_t *bind_to_node(int target_node)
{
- cpu_set_t orig_mask, mask;
+ int nrcpus = numa_num_possible_cpus();
+ size_t size;
+ cpu_set_t *orig_mask, *mask;
int cpu;
- int ret;
- ret = sched_getaffinity(0, sizeof(orig_mask), &orig_mask);
- BUG_ON(ret);
+ orig_mask = CPU_ALLOC(nrcpus);
+ BUG_ON(!orig_mask);
+ size = CPU_ALLOC_SIZE(nrcpus);
+ CPU_ZERO_S(size, orig_mask);
- CPU_ZERO(&mask);
+ if (sched_getaffinity(0, size, orig_mask))
+ goto err_out;
+
+ mask = CPU_ALLOC(nrcpus);
+ if (!mask)
+ goto err_out;
+
+ CPU_ZERO_S(size, mask);
if (target_node == NUMA_NO_NODE) {
for (cpu = 0; cpu < g->p.nr_cpus; cpu++)
- CPU_SET(cpu, &mask);
+ CPU_SET_S(cpu, size, mask);
} else {
struct bitmask *cpumask = numa_allocate_cpumask();
- BUG_ON(!cpumask);
+ if (!cpumask)
+ goto err;
+
if (!numa_node_to_cpus(target_node, cpumask)) {
for (cpu = 0; cpu < (int)cpumask->size; cpu++) {
if (numa_bitmask_isbitset(cpumask, cpu))
- CPU_SET(cpu, &mask);
+ CPU_SET_S(cpu, size, mask);
}
}
numa_free_cpumask(cpumask);
}
- ret = sched_setaffinity(0, sizeof(mask), &mask);
- BUG_ON(ret);
+ if (sched_setaffinity(0, size, mask))
+ goto err;
return orig_mask;
+
+err:
+ CPU_FREE(mask);
+err_out:
+ CPU_FREE(orig_mask);
+
+ /* BUG_ON due to failure in allocation of orig_mask/mask */
+ BUG_ON(-1);
+ return NULL;
}
-static void bind_to_cpumask(cpu_set_t mask)
+static void bind_to_cpumask(cpu_set_t *mask)
{
int ret;
+ size_t size = CPU_ALLOC_SIZE(numa_num_possible_cpus());
- ret = sched_setaffinity(0, sizeof(mask), &mask);
- BUG_ON(ret);
+ ret = sched_setaffinity(0, size, mask);
+ if (ret) {
+ CPU_FREE(mask);
+ BUG_ON(ret);
+ }
}
static void mempol_restore(void)
@@ -376,7 +423,7 @@ do { \
static u8 *alloc_data(ssize_t bytes0, int map_flags,
int init_zero, int init_cpu0, int thp, int init_random)
{
- cpu_set_t orig_mask;
+ cpu_set_t *orig_mask = NULL;
ssize_t bytes;
u8 *buf;
int ret;
@@ -434,6 +481,7 @@ static u8 *alloc_data(ssize_t bytes0, int map_flags,
/* Restore affinity: */
if (init_cpu0) {
bind_to_cpumask(orig_mask);
+ CPU_FREE(orig_mask);
mempol_restore();
}
@@ -585,10 +633,16 @@ static int parse_setup_cpu_list(void)
return -1;
}
+ if (is_cpu_online(bind_cpu_0) != 1 || is_cpu_online(bind_cpu_1) != 1) {
+ printf("\nTest not applicable, bind_cpu_0 or bind_cpu_1 is offline\n");
+ return -1;
+ }
+
BUG_ON(bind_cpu_0 < 0 || bind_cpu_1 < 0);
BUG_ON(bind_cpu_0 > bind_cpu_1);
for (bind_cpu = bind_cpu_0; bind_cpu <= bind_cpu_1; bind_cpu += step) {
+ size_t size = CPU_ALLOC_SIZE(g->p.nr_cpus);
int i;
for (i = 0; i < mul; i++) {
@@ -608,10 +662,15 @@ static int parse_setup_cpu_list(void)
tprintf("%2d", bind_cpu);
}
- CPU_ZERO(&td->bind_cpumask);
+ td->bind_cpumask = CPU_ALLOC(g->p.nr_cpus);
+ BUG_ON(!td->bind_cpumask);
+ CPU_ZERO_S(size, td->bind_cpumask);
for (cpu = bind_cpu; cpu < bind_cpu+bind_len; cpu++) {
- BUG_ON(cpu < 0 || cpu >= g->p.nr_cpus);
- CPU_SET(cpu, &td->bind_cpumask);
+ if (cpu < 0 || cpu >= g->p.nr_cpus) {
+ CPU_FREE(td->bind_cpumask);
+ BUG_ON(-1);
+ }
+ CPU_SET_S(cpu, size, td->bind_cpumask);
}
t++;
}
@@ -752,8 +811,6 @@ static int parse_nodes_opt(const struct option *opt __maybe_unused,
return parse_node_list(arg);
}
-#define BIT(x) (1ul << x)
-
static inline uint32_t lfsr_32(uint32_t lfsr)
{
const uint32_t taps = BIT(1) | BIT(5) | BIT(6) | BIT(31);
@@ -1241,7 +1298,7 @@ static void *worker_thread(void *__tdata)
* by migrating to CPU#0:
*/
if (first_task && g->p.perturb_secs && (int)(stop.tv_sec - last_perturbance) >= g->p.perturb_secs) {
- cpu_set_t orig_mask;
+ cpu_set_t *orig_mask;
int target_cpu;
int this_cpu;
@@ -1265,6 +1322,7 @@ static void *worker_thread(void *__tdata)
printf(" (injecting perturbalance, moved to CPU#%d)\n", target_cpu);
bind_to_cpumask(orig_mask);
+ CPU_FREE(orig_mask);
}
if (details >= 3) {
@@ -1398,21 +1456,31 @@ static void init_thread_data(void)
for (t = 0; t < g->p.nr_tasks; t++) {
struct thread_data *td = g->threads + t;
+ size_t cpuset_size = CPU_ALLOC_SIZE(g->p.nr_cpus);
int cpu;
/* Allow all nodes by default: */
td->bind_node = NUMA_NO_NODE;
/* Allow all CPUs by default: */
- CPU_ZERO(&td->bind_cpumask);
+ td->bind_cpumask = CPU_ALLOC(g->p.nr_cpus);
+ BUG_ON(!td->bind_cpumask);
+ CPU_ZERO_S(cpuset_size, td->bind_cpumask);
for (cpu = 0; cpu < g->p.nr_cpus; cpu++)
- CPU_SET(cpu, &td->bind_cpumask);
+ CPU_SET_S(cpu, cpuset_size, td->bind_cpumask);
}
}
static void deinit_thread_data(void)
{
ssize_t size = sizeof(*g->threads)*g->p.nr_tasks;
+ int t;
+
+ /* Free the bind_cpumask allocated for thread_data */
+ for (t = 0; t < g->p.nr_tasks; t++) {
+ struct thread_data *td = g->threads + t;
+ CPU_FREE(td->bind_cpumask);
+ }
free_data(g->threads, size);
}
@@ -1672,7 +1740,7 @@ static int __bench_numa(const char *name)
"GB/sec,", "total-speed", "GB/sec total speed");
if (g->p.show_details >= 2) {
- char tname[14 + 2 * 10 + 1];
+ char tname[14 + 2 * 11 + 1];
struct thread_data *td;
for (p = 0; p < g->p.nr_proc; p++) {
for (t = 0; t < g->p.nr_threads; t++) {