diff options
author | Ingo Molnar <mingo@kernel.org> | 2019-02-28 08:29:50 +0100 |
---|---|---|
committer | Ingo Molnar <mingo@kernel.org> | 2019-02-28 08:29:50 +0100 |
commit | c978b9460fe1d4a1e1effa0abd6bd69b18a098a8 (patch) | |
tree | eecc4c6179dea191c55ac8ef50467573b29a0b06 /tools/perf/util | |
parent | 0a1571243d3f150fa99c6f41f1b8e17a307a2b8b (diff) | |
parent | de667cce7f4f96b6e22da8fd9c065b961f355080 (diff) |
Merge tag 'perf-core-for-mingo-5.1-20190225' of git://git.kernel.org/pub/scm/linux/kernel/git/acme/linux into perf/core
Pull perf/core improvements and fixes from Arnaldo Carvalho de Melo:
perf annotate:
Wei Li:
- Fix getting source line failure
perf script:
Andi Kleen:
- Handle missing fields with -F +...
perf data:
Jiri Olsa:
- Prep work to support per-cpu files in a directory.
Intel PT:
Adrian Hunter:
- Improve thread_stack__no_call_return()
- Hide x86 retpolines in thread stacks.
- exported SQL viewer refactorings, new 'top calls' report..
Alexander Shishkin:
- Copy parent's address filter offsets on clone
- Fix address filters for vmas with non-zero offset. Applies to
ARM's CoreSight as well.
python scripts:
Tony Jones:
- Python3 support for several 'perf script' python scripts.
Signed-off-by: Arnaldo Carvalho de Melo <acme@redhat.com>
Signed-off-by: Ingo Molnar <mingo@kernel.org>
Diffstat (limited to 'tools/perf/util')
-rw-r--r-- | tools/perf/util/annotate.c | 4 | ||||
-rw-r--r-- | tools/perf/util/data-convert-bt.c | 4 | ||||
-rw-r--r-- | tools/perf/util/data.c | 175 | ||||
-rw-r--r-- | tools/perf/util/data.h | 16 | ||||
-rw-r--r-- | tools/perf/util/header.c | 12 | ||||
-rw-r--r-- | tools/perf/util/thread-stack.c | 161 | ||||
-rw-r--r-- | tools/perf/util/util.c | 65 | ||||
-rw-r--r-- | tools/perf/util/util.h | 3 |
8 files changed, 398 insertions, 42 deletions
diff --git a/tools/perf/util/annotate.c b/tools/perf/util/annotate.c index 2468b8aa0b6b..11a8a447a3af 100644 --- a/tools/perf/util/annotate.c +++ b/tools/perf/util/annotate.c @@ -1891,6 +1891,7 @@ int symbol__annotate(struct symbol *sym, struct map *map, struct annotation_options *options, struct arch **parch) { + struct annotation *notes = symbol__annotation(sym); struct annotate_args args = { .privsize = privsize, .evsel = evsel, @@ -1921,6 +1922,7 @@ int symbol__annotate(struct symbol *sym, struct map *map, args.ms.map = map; args.ms.sym = sym; + notes->start = map__rip_2objdump(map, sym->start); return symbol__disassemble(sym, &args); } @@ -2796,8 +2798,6 @@ int symbol__annotate2(struct symbol *sym, struct map *map, struct perf_evsel *ev symbol__calc_percent(sym, evsel); - notes->start = map__rip_2objdump(map, sym->start); - annotation__set_offsets(notes, size); annotation__mark_jump_targets(notes, sym); annotation__compute_ipc(notes, size); diff --git a/tools/perf/util/data-convert-bt.c b/tools/perf/util/data-convert-bt.c index 2a36fab76994..26af43ad9ddd 100644 --- a/tools/perf/util/data-convert-bt.c +++ b/tools/perf/util/data-convert-bt.c @@ -1578,7 +1578,7 @@ int bt_convert__perf2ctf(const char *input, const char *path, { struct perf_session *session; struct perf_data data = { - .file = { .path = input, .fd = -1 }, + .path = input, .mode = PERF_DATA_MODE_READ, .force = opts->force, }; @@ -1650,7 +1650,7 @@ int bt_convert__perf2ctf(const char *input, const char *path, fprintf(stderr, "[ perf data convert: Converted '%s' into CTF data '%s' ]\n", - data.file.path, path); + data.path, path); fprintf(stderr, "[ perf data convert: Converted and wrote %.3f MB (%" PRIu64 " samples", diff --git a/tools/perf/util/data.c b/tools/perf/util/data.c index d8cfc19ddb10..7bd5ddeb7a41 100644 --- a/tools/perf/util/data.c +++ b/tools/perf/util/data.c @@ -7,11 +7,117 @@ #include <fcntl.h> #include <unistd.h> #include <string.h> +#include <asm/bug.h> +#include <sys/types.h> +#include <dirent.h> #include "data.h" #include "util.h" #include "debug.h" +static void close_dir(struct perf_data_file *files, int nr) +{ + while (--nr >= 1) { + close(files[nr].fd); + free(files[nr].path); + } + free(files); +} + +void perf_data__close_dir(struct perf_data *data) +{ + close_dir(data->dir.files, data->dir.nr); +} + +int perf_data__create_dir(struct perf_data *data, int nr) +{ + struct perf_data_file *files = NULL; + int i, ret = -1; + + files = zalloc(nr * sizeof(*files)); + if (!files) + return -ENOMEM; + + data->dir.files = files; + data->dir.nr = nr; + + for (i = 0; i < nr; i++) { + struct perf_data_file *file = &files[i]; + + if (asprintf(&file->path, "%s/data.%d", data->path, i) < 0) + goto out_err; + + ret = open(file->path, O_RDWR|O_CREAT|O_TRUNC, S_IRUSR|S_IWUSR); + if (ret < 0) + goto out_err; + + file->fd = ret; + } + + return 0; + +out_err: + close_dir(files, i); + return ret; +} + +int perf_data__open_dir(struct perf_data *data) +{ + struct perf_data_file *files = NULL; + struct dirent *dent; + int ret = -1; + DIR *dir; + int nr = 0; + + dir = opendir(data->path); + if (!dir) + return -EINVAL; + + while ((dent = readdir(dir)) != NULL) { + struct perf_data_file *file; + char path[PATH_MAX]; + struct stat st; + + snprintf(path, sizeof(path), "%s/%s", data->path, dent->d_name); + if (stat(path, &st)) + continue; + + if (!S_ISREG(st.st_mode) || strncmp(dent->d_name, "data", 4)) + continue; + + ret = -ENOMEM; + + file = realloc(files, (nr + 1) * sizeof(*files)); + if (!file) + goto out_err; + + files = file; + file = &files[nr++]; + + file->path = strdup(path); + if (!file->path) + goto out_err; + + ret = open(file->path, O_RDONLY); + if (ret < 0) + goto out_err; + + file->fd = ret; + file->size = st.st_size; + } + + if (!files) + return -EINVAL; + + data->dir.files = files; + data->dir.nr = nr; + return 0; + +out_err: + close_dir(files, nr); + return ret; +} + static bool check_pipe(struct perf_data *data) { struct stat st; @@ -19,11 +125,11 @@ static bool check_pipe(struct perf_data *data) int fd = perf_data__is_read(data) ? STDIN_FILENO : STDOUT_FILENO; - if (!data->file.path) { + if (!data->path) { if (!fstat(fd, &st) && S_ISFIFO(st.st_mode)) is_pipe = true; } else { - if (!strcmp(data->file.path, "-")) + if (!strcmp(data->path, "-")) is_pipe = true; } @@ -37,13 +143,31 @@ static int check_backup(struct perf_data *data) { struct stat st; - if (!stat(data->file.path, &st) && st.st_size) { - /* TODO check errors properly */ + if (perf_data__is_read(data)) + return 0; + + if (!stat(data->path, &st) && st.st_size) { char oldname[PATH_MAX]; + int ret; + snprintf(oldname, sizeof(oldname), "%s.old", - data->file.path); - unlink(oldname); - rename(data->file.path, oldname); + data->path); + + ret = rm_rf_perf_data(oldname); + if (ret) { + pr_err("Can't remove old data: %s (%s)\n", + ret == -2 ? + "Unknown file found" : strerror(errno), + oldname); + return -1; + } + + if (rename(data->path, oldname)) { + pr_err("Can't move data: %s (%s to %s)\n", + strerror(errno), + data->path, oldname); + return -1; + } } return 0; @@ -82,7 +206,7 @@ static int open_file_read(struct perf_data *data) goto out_close; } - data->size = st.st_size; + data->file.size = st.st_size; return fd; out_close: @@ -95,9 +219,6 @@ static int open_file_write(struct perf_data *data) int fd; char sbuf[STRERR_BUFSIZE]; - if (check_backup(data)) - return -1; - fd = open(data->file.path, O_CREAT|O_RDWR|O_TRUNC|O_CLOEXEC, S_IRUSR|S_IWUSR); @@ -115,8 +236,22 @@ static int open_file(struct perf_data *data) fd = perf_data__is_read(data) ? open_file_read(data) : open_file_write(data); + if (fd < 0) { + free(data->file.path); + return -1; + } + data->file.fd = fd; - return fd < 0 ? -1 : 0; + return 0; +} + +static int open_file_dup(struct perf_data *data) +{ + data->file.path = strdup(data->path); + if (!data->file.path) + return -ENOMEM; + + return open_file(data); } int perf_data__open(struct perf_data *data) @@ -124,14 +259,18 @@ int perf_data__open(struct perf_data *data) if (check_pipe(data)) return 0; - if (!data->file.path) - data->file.path = "perf.data"; + if (!data->path) + data->path = "perf.data"; - return open_file(data); + if (check_backup(data)) + return -1; + + return open_file_dup(data); } void perf_data__close(struct perf_data *data) { + free(data->file.path); close(data->file.fd); } @@ -159,15 +298,15 @@ int perf_data__switch(struct perf_data *data, if (perf_data__is_read(data)) return -EINVAL; - if (asprintf(&new_filepath, "%s.%s", data->file.path, postfix) < 0) + if (asprintf(&new_filepath, "%s.%s", data->path, postfix) < 0) return -ENOMEM; /* * Only fire a warning, don't return error, continue fill * original file. */ - if (rename(data->file.path, new_filepath)) - pr_warning("Failed to rename %s to %s\n", data->file.path, new_filepath); + if (rename(data->path, new_filepath)) + pr_warning("Failed to rename %s to %s\n", data->path, new_filepath); if (!at_exit) { close(data->file.fd); diff --git a/tools/perf/util/data.h b/tools/perf/util/data.h index 4828f7feea89..14b47be2bd69 100644 --- a/tools/perf/util/data.h +++ b/tools/perf/util/data.h @@ -10,16 +10,22 @@ enum perf_data_mode { }; struct perf_data_file { - const char *path; + char *path; int fd; + unsigned long size; }; struct perf_data { + const char *path; struct perf_data_file file; bool is_pipe; bool force; - unsigned long size; enum perf_data_mode mode; + + struct { + struct perf_data_file *files; + int nr; + } dir; }; static inline bool perf_data__is_read(struct perf_data *data) @@ -44,7 +50,7 @@ static inline int perf_data__fd(struct perf_data *data) static inline unsigned long perf_data__size(struct perf_data *data) { - return data->size; + return data->file.size; } int perf_data__open(struct perf_data *data); @@ -63,4 +69,8 @@ ssize_t perf_data_file__write(struct perf_data_file *file, int perf_data__switch(struct perf_data *data, const char *postfix, size_t pos, bool at_exit); + +int perf_data__create_dir(struct perf_data *data, int nr); +int perf_data__open_dir(struct perf_data *data); +void perf_data__close_dir(struct perf_data *data); #endif /* __PERF_DATA_H */ diff --git a/tools/perf/util/header.c b/tools/perf/util/header.c index a2323d777dae..01b324c275b9 100644 --- a/tools/perf/util/header.c +++ b/tools/perf/util/header.c @@ -527,17 +527,11 @@ static int write_event_desc(struct feat_fd *ff, static int write_cmdline(struct feat_fd *ff, struct perf_evlist *evlist __maybe_unused) { - char buf[MAXPATHLEN]; - u32 n; - int i, ret; + char pbuf[MAXPATHLEN], *buf; + int i, ret, n; /* actual path to perf binary */ - ret = readlink("/proc/self/exe", buf, sizeof(buf) - 1); - if (ret <= 0) - return -1; - - /* readlink() does not add null termination */ - buf[ret] = '\0'; + buf = perf_exe(pbuf, MAXPATHLEN); /* account for binary path */ n = perf_env.nr_cmdline + 1; diff --git a/tools/perf/util/thread-stack.c b/tools/perf/util/thread-stack.c index f52c0f90915d..a8b45168513c 100644 --- a/tools/perf/util/thread-stack.c +++ b/tools/perf/util/thread-stack.c @@ -20,6 +20,7 @@ #include "thread.h" #include "event.h" #include "machine.h" +#include "env.h" #include "util.h" #include "debug.h" #include "symbol.h" @@ -29,6 +30,19 @@ #define STACK_GROWTH 2048 +/* + * State of retpoline detection. + * + * RETPOLINE_NONE: no retpoline detection + * X86_RETPOLINE_POSSIBLE: x86 retpoline possible + * X86_RETPOLINE_DETECTED: x86 retpoline detected + */ +enum retpoline_state_t { + RETPOLINE_NONE, + X86_RETPOLINE_POSSIBLE, + X86_RETPOLINE_DETECTED, +}; + /** * struct thread_stack_entry - thread stack entry. * @ret_addr: return address @@ -64,6 +78,7 @@ struct thread_stack_entry { * @crp: call/return processor * @comm: current comm * @arr_sz: size of array if this is the first element of an array + * @rstate: used to detect retpolines */ struct thread_stack { struct thread_stack_entry *stack; @@ -76,6 +91,7 @@ struct thread_stack { struct call_return_processor *crp; struct comm *comm; unsigned int arr_sz; + enum retpoline_state_t rstate; }; /* @@ -115,10 +131,16 @@ static int thread_stack__init(struct thread_stack *ts, struct thread *thread, if (err) return err; - if (thread->mg && thread->mg->machine) - ts->kernel_start = machine__kernel_start(thread->mg->machine); - else + if (thread->mg && thread->mg->machine) { + struct machine *machine = thread->mg->machine; + const char *arch = perf_env__arch(machine->env); + + ts->kernel_start = machine__kernel_start(machine); + if (!strcmp(arch, "x86")) + ts->rstate = X86_RETPOLINE_POSSIBLE; + } else { ts->kernel_start = 1ULL << 63; + } ts->crp = crp; return 0; @@ -638,14 +660,57 @@ static int thread_stack__no_call_return(struct thread *thread, else parent = root; - /* This 'return' had no 'call', so push and pop top of stack */ - cp = call_path__findnew(cpr, parent, fsym, ip, ks); + if (parent->sym == from_al->sym) { + /* + * At the bottom of the stack, assume the missing 'call' was + * before the trace started. So, pop the current symbol and push + * the 'to' symbol. + */ + if (ts->cnt == 1) { + err = thread_stack__call_return(thread, ts, --ts->cnt, + tm, ref, false); + if (err) + return err; + } + + if (!ts->cnt) { + cp = call_path__findnew(cpr, root, tsym, addr, ks); + + return thread_stack__push_cp(ts, addr, tm, ref, cp, + true, false); + } + + /* + * Otherwise assume the 'return' is being used as a jump (e.g. + * retpoline) and just push the 'to' symbol. + */ + cp = call_path__findnew(cpr, parent, tsym, addr, ks); + + err = thread_stack__push_cp(ts, 0, tm, ref, cp, true, false); + if (!err) + ts->stack[ts->cnt - 1].non_call = true; + + return err; + } + + /* + * Assume 'parent' has not yet returned, so push 'to', and then push and + * pop 'from'. + */ + + cp = call_path__findnew(cpr, parent, tsym, addr, ks); err = thread_stack__push_cp(ts, addr, tm, ref, cp, true, false); if (err) return err; - return thread_stack__pop_cp(thread, ts, addr, tm, ref, tsym); + cp = call_path__findnew(cpr, cp, fsym, ip, ks); + + err = thread_stack__push_cp(ts, ip, tm, ref, cp, true, false); + if (err) + return err; + + return thread_stack__call_return(thread, ts, --ts->cnt, tm, ref, false); } static int thread_stack__trace_begin(struct thread *thread, @@ -690,6 +755,70 @@ static int thread_stack__trace_end(struct thread_stack *ts, false, true); } +static bool is_x86_retpoline(const char *name) +{ + const char *p = strstr(name, "__x86_indirect_thunk_"); + + return p == name || !strcmp(name, "__indirect_thunk_start"); +} + +/* + * x86 retpoline functions pollute the call graph. This function removes them. + * This does not handle function return thunks, nor is there any improvement + * for the handling of inline thunks or extern thunks. + */ +static int thread_stack__x86_retpoline(struct thread_stack *ts, + struct perf_sample *sample, + struct addr_location *to_al) +{ + struct thread_stack_entry *tse = &ts->stack[ts->cnt - 1]; + struct call_path_root *cpr = ts->crp->cpr; + struct symbol *sym = tse->cp->sym; + struct symbol *tsym = to_al->sym; + struct call_path *cp; + + if (sym && is_x86_retpoline(sym->name)) { + /* + * This is a x86 retpoline fn. It pollutes the call graph by + * showing up everywhere there is an indirect branch, but does + * not itself mean anything. Here the top-of-stack is removed, + * by decrementing the stack count, and then further down, the + * resulting top-of-stack is replaced with the actual target. + * The result is that the retpoline functions will no longer + * appear in the call graph. Note this only affects the call + * graph, since all the original branches are left unchanged. + */ + ts->cnt -= 1; + sym = ts->stack[ts->cnt - 2].cp->sym; + if (sym && sym == tsym && to_al->addr != tsym->start) { + /* + * Target is back to the middle of the symbol we came + * from so assume it is an indirect jmp and forget it + * altogether. + */ + ts->cnt -= 1; + return 0; + } + } else if (sym && sym == tsym) { + /* + * Target is back to the symbol we came from so assume it is an + * indirect jmp and forget it altogether. + */ + ts->cnt -= 1; + return 0; + } + + cp = call_path__findnew(cpr, ts->stack[ts->cnt - 2].cp, tsym, + sample->addr, ts->kernel_start); + if (!cp) + return -ENOMEM; + + /* Replace the top-of-stack with the actual target */ + ts->stack[ts->cnt - 1].cp = cp; + + return 0; +} + int thread_stack__process(struct thread *thread, struct comm *comm, struct perf_sample *sample, struct addr_location *from_al, @@ -697,6 +826,7 @@ int thread_stack__process(struct thread *thread, struct comm *comm, struct call_return_processor *crp) { struct thread_stack *ts = thread__stack(thread, sample->cpu); + enum retpoline_state_t rstate; int err = 0; if (ts && !ts->crp) { @@ -712,6 +842,10 @@ int thread_stack__process(struct thread *thread, struct comm *comm, ts->comm = comm; } + rstate = ts->rstate; + if (rstate == X86_RETPOLINE_DETECTED) + ts->rstate = X86_RETPOLINE_POSSIBLE; + /* Flush stack on exec */ if (ts->comm != comm && thread->pid_ == thread->tid) { err = __thread_stack__flush(thread, ts); @@ -748,10 +882,25 @@ int thread_stack__process(struct thread *thread, struct comm *comm, ts->kernel_start); err = thread_stack__push_cp(ts, ret_addr, sample->time, ref, cp, false, trace_end); + + /* + * A call to the same symbol but not the start of the symbol, + * may be the start of a x86 retpoline. + */ + if (!err && rstate == X86_RETPOLINE_POSSIBLE && to_al->sym && + from_al->sym == to_al->sym && + to_al->addr != to_al->sym->start) + ts->rstate = X86_RETPOLINE_DETECTED; + } else if (sample->flags & PERF_IP_FLAG_RETURN) { if (!sample->ip || !sample->addr) return 0; + /* x86 retpoline 'return' doesn't match the stack */ + if (rstate == X86_RETPOLINE_DETECTED && ts->cnt > 2 && + ts->stack[ts->cnt - 1].ret_addr != sample->addr) + return thread_stack__x86_retpoline(ts, sample, to_al); + err = thread_stack__pop_cp(thread, ts, sample->addr, sample->time, ref, from_al->sym); if (err) { diff --git a/tools/perf/util/util.c b/tools/perf/util/util.c index 3ee410fc047a..d388f80d8703 100644 --- a/tools/perf/util/util.c +++ b/tools/perf/util/util.c @@ -21,6 +21,7 @@ #include <linux/time64.h> #include <unistd.h> #include "strlist.h" +#include "string2.h" /* * XXX We need to find a better place for these things... @@ -117,7 +118,38 @@ int mkdir_p(char *path, mode_t mode) return (stat(path, &st) && mkdir(path, mode)) ? -1 : 0; } -int rm_rf(const char *path) +static bool match_pat(char *file, const char **pat) +{ + int i = 0; + + if (!pat) + return true; + + while (pat[i]) { + if (strglobmatch(file, pat[i])) + return true; + + i++; + } + + return false; +} + +/* + * The depth specify how deep the removal will go. + * 0 - will remove only files under the 'path' directory + * 1 .. x - will dive in x-level deep under the 'path' directory + * + * If specified the pat is array of string patterns ended with NULL, + * which are checked upon every file/directory found. Only matching + * ones are removed. + * + * The function returns: + * 0 on success + * -1 on removal failure with errno set + * -2 on pattern failure + */ +static int rm_rf_depth_pat(const char *path, int depth, const char **pat) { DIR *dir; int ret; @@ -144,6 +176,9 @@ int rm_rf(const char *path) if (!strcmp(d->d_name, ".") || !strcmp(d->d_name, "..")) continue; + if (!match_pat(d->d_name, pat)) + return -2; + scnprintf(namebuf, sizeof(namebuf), "%s/%s", path, d->d_name); @@ -155,7 +190,7 @@ int rm_rf(const char *path) } if (S_ISDIR(statbuf.st_mode)) - ret = rm_rf(namebuf); + ret = depth ? rm_rf_depth_pat(namebuf, depth - 1, pat) : 0; else ret = unlink(namebuf); } @@ -167,6 +202,22 @@ int rm_rf(const char *path) return rmdir(path); } +int rm_rf_perf_data(const char *path) +{ + const char *pat[] = { + "header", + "data.*", + NULL, + }; + + return rm_rf_depth_pat(path, 0, pat); +} + +int rm_rf(const char *path) +{ + return rm_rf_depth_pat(path, INT_MAX, NULL); +} + /* A filter which removes dot files */ bool lsdir_no_dot_filter(const char *name __maybe_unused, struct dirent *d) { @@ -517,3 +568,13 @@ out: return tip; } + +char *perf_exe(char *buf, int len) +{ + int n = readlink("/proc/self/exe", buf, len); + if (n > 0) { + buf[n] = 0; + return buf; + } + return strcpy(buf, "perf"); +} diff --git a/tools/perf/util/util.h b/tools/perf/util/util.h index ece040b799f6..09c1b0f91f65 100644 --- a/tools/perf/util/util.h +++ b/tools/perf/util/util.h @@ -31,6 +31,7 @@ struct strlist; int mkdir_p(char *path, mode_t mode); int rm_rf(const char *path); +int rm_rf_perf_data(const char *path); struct strlist *lsdir(const char *name, bool (*filter)(const char *, struct dirent *)); bool lsdir_no_dot_filter(const char *name, struct dirent *d); int copyfile(const char *from, const char *to); @@ -76,6 +77,8 @@ extern bool perf_singlethreaded; void perf_set_singlethreaded(void); void perf_set_multithreaded(void); +char *perf_exe(char *buf, int len); + #ifndef O_CLOEXEC #ifdef __sparc__ #define O_CLOEXEC 0x400000 |