diff options
Diffstat (limited to 'tools/perf/builtin-script.c')
| -rw-r--r-- | tools/perf/builtin-script.c | 326 | 
1 files changed, 313 insertions, 13 deletions
| diff --git a/tools/perf/builtin-script.c b/tools/perf/builtin-script.c index c0783b4f7b6c..d05aec491cff 100644 --- a/tools/perf/builtin-script.c +++ b/tools/perf/builtin-script.c @@ -21,13 +21,27 @@  #include "util/cpumap.h"  #include "util/thread_map.h"  #include "util/stat.h" +#include "util/string2.h"  #include "util/thread-stack.h"  #include "util/time-utils.h" +#include "print_binary.h"  #include <linux/bitmap.h> +#include <linux/kernel.h>  #include <linux/stringify.h>  #include <linux/time64.h>  #include "asm/bug.h"  #include "util/mem-events.h" +#include "util/dump-insn.h" +#include <dirent.h> +#include <errno.h> +#include <inttypes.h> +#include <signal.h> +#include <sys/param.h> +#include <sys/types.h> +#include <sys/stat.h> +#include <unistd.h> + +#include "sane_ctype.h"  static char const		*script_name;  static char const		*generate_script_lang; @@ -42,6 +56,7 @@ static bool			nanosecs;  static const char		*cpu_list;  static DECLARE_BITMAP(cpu_bitmap, MAX_NR_CPUS);  static struct perf_stat_config	stat_config; +static int			max_blocks;  unsigned int scripting_max_stack = PERF_MAX_STACK_DEPTH; @@ -69,6 +84,7 @@ enum perf_output_field {  	PERF_OUTPUT_CALLINDENT	    = 1U << 20,  	PERF_OUTPUT_INSN	    = 1U << 21,  	PERF_OUTPUT_INSNLEN	    = 1U << 22, +	PERF_OUTPUT_BRSTACKINSN	    = 1U << 23,  };  struct output_option { @@ -98,6 +114,7 @@ struct output_option {  	{.str = "callindent", .field = PERF_OUTPUT_CALLINDENT},  	{.str = "insn", .field = PERF_OUTPUT_INSN},  	{.str = "insnlen", .field = PERF_OUTPUT_INSNLEN}, +	{.str = "brstackinsn", .field = PERF_OUTPUT_BRSTACKINSN},  };  /* default set to maintain compatibility with current format */ @@ -292,7 +309,13 @@ static int perf_evsel__check_attr(struct perf_evsel *evsel,  		       "selected. Hence, no address to lookup the source line number.\n");  		return -EINVAL;  	} - +	if (PRINT_FIELD(BRSTACKINSN) && +	    !(perf_evlist__combined_branch_type(session->evlist) & +	      PERF_SAMPLE_BRANCH_ANY)) { +		pr_err("Display of branch stack assembler requested, but non all-branch filter set\n" +		       "Hint: run 'perf record -b ...'\n"); +		return -EINVAL; +	}  	if ((PRINT_FIELD(PID) || PRINT_FIELD(TID)) &&  		perf_evsel__check_stype(evsel, PERF_SAMPLE_TID, "TID",  					PERF_OUTPUT_TID|PERF_OUTPUT_PID)) @@ -546,6 +569,233 @@ static void print_sample_brstacksym(struct perf_sample *sample,  	}  } +#define MAXBB 16384UL + +static int grab_bb(u8 *buffer, u64 start, u64 end, +		    struct machine *machine, struct thread *thread, +		    bool *is64bit, u8 *cpumode, bool last) +{ +	long offset, len; +	struct addr_location al; +	bool kernel; + +	if (!start || !end) +		return 0; + +	kernel = machine__kernel_ip(machine, start); +	if (kernel) +		*cpumode = PERF_RECORD_MISC_KERNEL; +	else +		*cpumode = PERF_RECORD_MISC_USER; + +	/* +	 * Block overlaps between kernel and user. +	 * This can happen due to ring filtering +	 * On Intel CPUs the entry into the kernel is filtered, +	 * but the exit is not. Let the caller patch it up. +	 */ +	if (kernel != machine__kernel_ip(machine, end)) { +		printf("\tblock %" PRIx64 "-%" PRIx64 " transfers between kernel and user\n", +				start, end); +		return -ENXIO; +	} + +	memset(&al, 0, sizeof(al)); +	if (end - start > MAXBB - MAXINSN) { +		if (last) +			printf("\tbrstack does not reach to final jump (%" PRIx64 "-%" PRIx64 ")\n", start, end); +		else +			printf("\tblock %" PRIx64 "-%" PRIx64 " (%" PRIu64 ") too long to dump\n", start, end, end - start); +		return 0; +	} + +	thread__find_addr_map(thread, *cpumode, MAP__FUNCTION, start, &al); +	if (!al.map || !al.map->dso) { +		printf("\tcannot resolve %" PRIx64 "-%" PRIx64 "\n", start, end); +		return 0; +	} +	if (al.map->dso->data.status == DSO_DATA_STATUS_ERROR) { +		printf("\tcannot resolve %" PRIx64 "-%" PRIx64 "\n", start, end); +		return 0; +	} + +	/* Load maps to ensure dso->is_64_bit has been updated */ +	map__load(al.map); + +	offset = al.map->map_ip(al.map, start); +	len = dso__data_read_offset(al.map->dso, machine, offset, (u8 *)buffer, +				    end - start + MAXINSN); + +	*is64bit = al.map->dso->is_64_bit; +	if (len <= 0) +		printf("\tcannot fetch code for block at %" PRIx64 "-%" PRIx64 "\n", +			start, end); +	return len; +} + +static void print_jump(uint64_t ip, struct branch_entry *en, +		       struct perf_insn *x, u8 *inbuf, int len, +		       int insn) +{ +	printf("\t%016" PRIx64 "\t%-30s\t#%s%s%s%s", +	       ip, +	       dump_insn(x, ip, inbuf, len, NULL), +	       en->flags.predicted ? " PRED" : "", +	       en->flags.mispred ? " MISPRED" : "", +	       en->flags.in_tx ? " INTX" : "", +	       en->flags.abort ? " ABORT" : ""); +	if (en->flags.cycles) { +		printf(" %d cycles", en->flags.cycles); +		if (insn) +			printf(" %.2f IPC", (float)insn / en->flags.cycles); +	} +	putchar('\n'); +} + +static void print_ip_sym(struct thread *thread, u8 cpumode, int cpu, +			 uint64_t addr, struct symbol **lastsym, +			 struct perf_event_attr *attr) +{ +	struct addr_location al; +	int off; + +	memset(&al, 0, sizeof(al)); + +	thread__find_addr_map(thread, cpumode, MAP__FUNCTION, addr, &al); +	if (!al.map) +		thread__find_addr_map(thread, cpumode, MAP__VARIABLE, +				      addr, &al); +	if ((*lastsym) && al.addr >= (*lastsym)->start && al.addr < (*lastsym)->end) +		return; + +	al.cpu = cpu; +	al.sym = NULL; +	if (al.map) +		al.sym = map__find_symbol(al.map, al.addr); + +	if (!al.sym) +		return; + +	if (al.addr < al.sym->end) +		off = al.addr - al.sym->start; +	else +		off = al.addr - al.map->start - al.sym->start; +	printf("\t%s", al.sym->name); +	if (off) +		printf("%+d", off); +	putchar(':'); +	if (PRINT_FIELD(SRCLINE)) +		map__fprintf_srcline(al.map, al.addr, "\t", stdout); +	putchar('\n'); +	*lastsym = al.sym; +} + +static void print_sample_brstackinsn(struct perf_sample *sample, +				     struct thread *thread, +				     struct perf_event_attr *attr, +				     struct machine *machine) +{ +	struct branch_stack *br = sample->branch_stack; +	u64 start, end; +	int i, insn, len, nr, ilen; +	struct perf_insn x; +	u8 buffer[MAXBB]; +	unsigned off; +	struct symbol *lastsym = NULL; + +	if (!(br && br->nr)) +		return; +	nr = br->nr; +	if (max_blocks && nr > max_blocks + 1) +		nr = max_blocks + 1; + +	x.thread = thread; +	x.cpu = sample->cpu; + +	putchar('\n'); + +	/* Handle first from jump, of which we don't know the entry. */ +	len = grab_bb(buffer, br->entries[nr-1].from, +			br->entries[nr-1].from, +			machine, thread, &x.is64bit, &x.cpumode, false); +	if (len > 0) { +		print_ip_sym(thread, x.cpumode, x.cpu, +			     br->entries[nr - 1].from, &lastsym, attr); +		print_jump(br->entries[nr - 1].from, &br->entries[nr - 1], +			    &x, buffer, len, 0); +	} + +	/* Print all blocks */ +	for (i = nr - 2; i >= 0; i--) { +		if (br->entries[i].from || br->entries[i].to) +			pr_debug("%d: %" PRIx64 "-%" PRIx64 "\n", i, +				 br->entries[i].from, +				 br->entries[i].to); +		start = br->entries[i + 1].to; +		end   = br->entries[i].from; + +		len = grab_bb(buffer, start, end, machine, thread, &x.is64bit, &x.cpumode, false); +		/* Patch up missing kernel transfers due to ring filters */ +		if (len == -ENXIO && i > 0) { +			end = br->entries[--i].from; +			pr_debug("\tpatching up to %" PRIx64 "-%" PRIx64 "\n", start, end); +			len = grab_bb(buffer, start, end, machine, thread, &x.is64bit, &x.cpumode, false); +		} +		if (len <= 0) +			continue; + +		insn = 0; +		for (off = 0;; off += ilen) { +			uint64_t ip = start + off; + +			print_ip_sym(thread, x.cpumode, x.cpu, ip, &lastsym, attr); +			if (ip == end) { +				print_jump(ip, &br->entries[i], &x, buffer + off, len - off, insn); +				break; +			} else { +				printf("\t%016" PRIx64 "\t%s\n", ip, +					dump_insn(&x, ip, buffer + off, len - off, &ilen)); +				if (ilen == 0) +					break; +				insn++; +			} +		} +	} + +	/* +	 * Hit the branch? In this case we are already done, and the target +	 * has not been executed yet. +	 */ +	if (br->entries[0].from == sample->ip) +		return; +	if (br->entries[0].flags.abort) +		return; + +	/* +	 * Print final block upto sample +	 */ +	start = br->entries[0].to; +	end = sample->ip; +	len = grab_bb(buffer, start, end, machine, thread, &x.is64bit, &x.cpumode, true); +	print_ip_sym(thread, x.cpumode, x.cpu, start, &lastsym, attr); +	if (len <= 0) { +		/* Print at least last IP if basic block did not work */ +		len = grab_bb(buffer, sample->ip, sample->ip, +			      machine, thread, &x.is64bit, &x.cpumode, false); +		if (len <= 0) +			return; + +		printf("\t%016" PRIx64 "\t%s\n", sample->ip, +			dump_insn(&x, sample->ip, buffer, len, NULL)); +		return; +	} +	for (off = 0; off <= end - start; off += ilen) { +		printf("\t%016" PRIx64 "\t%s\n", start + off, +			dump_insn(&x, start + off, buffer + off, len - off, &ilen)); +		if (ilen == 0) +			break; +	} +}  static void print_sample_addr(struct perf_sample *sample,  			  struct thread *thread, @@ -632,7 +882,9 @@ static void print_sample_callindent(struct perf_sample *sample,  }  static void print_insn(struct perf_sample *sample, -		       struct perf_event_attr *attr) +		       struct perf_event_attr *attr, +		       struct thread *thread, +		       struct machine *machine)  {  	if (PRINT_FIELD(INSNLEN))  		printf(" ilen: %d", sample->insn_len); @@ -643,12 +895,15 @@ static void print_insn(struct perf_sample *sample,  		for (i = 0; i < sample->insn_len; i++)  			printf(" %02x", (unsigned char)sample->insn[i]);  	} +	if (PRINT_FIELD(BRSTACKINSN)) +		print_sample_brstackinsn(sample, thread, attr, machine);  }  static void print_sample_bts(struct perf_sample *sample,  			     struct perf_evsel *evsel,  			     struct thread *thread, -			     struct addr_location *al) +			     struct addr_location *al, +			     struct machine *machine)  {  	struct perf_event_attr *attr = &evsel->attr;  	bool print_srcline_last = false; @@ -689,7 +944,7 @@ static void print_sample_bts(struct perf_sample *sample,  	if (print_srcline_last)  		map__fprintf_srcline(al->map, al->addr, "\n  ", stdout); -	print_insn(sample, attr); +	print_insn(sample, attr, thread, machine);  	printf("\n");  } @@ -830,6 +1085,7 @@ struct perf_script {  	bool			show_task_events;  	bool			show_mmap_events;  	bool			show_switch_events; +	bool			show_namespace_events;  	bool			allocated;  	struct cpu_map		*cpus;  	struct thread_map	*threads; @@ -871,7 +1127,8 @@ static size_t data_src__printf(u64 data_src)  static void process_event(struct perf_script *script,  			  struct perf_sample *sample, struct perf_evsel *evsel, -			  struct addr_location *al) +			  struct addr_location *al, +			  struct machine *machine)  {  	struct thread *thread = al->thread;  	struct perf_event_attr *attr = &evsel->attr; @@ -898,7 +1155,7 @@ static void process_event(struct perf_script *script,  		print_sample_flags(sample->flags);  	if (is_bts_event(attr)) { -		print_sample_bts(sample, evsel, thread, al); +		print_sample_bts(sample, evsel, thread, al, machine);  		return;  	} @@ -936,7 +1193,7 @@ static void process_event(struct perf_script *script,  	if (perf_evsel__is_bpf_output(evsel) && PRINT_FIELD(BPF_OUTPUT))  		print_sample_bpf_output(sample); -	print_insn(sample, attr); +	print_insn(sample, attr, thread, machine);  	printf("\n");  } @@ -1046,7 +1303,7 @@ static int process_sample_event(struct perf_tool *tool,  	if (scripting_ops)  		scripting_ops->process_event(event, sample, evsel, &al);  	else -		process_event(scr, sample, evsel, &al); +		process_event(scr, sample, evsel, &al, machine);  out_put:  	addr_location__put(&al); @@ -1118,6 +1375,41 @@ out:  	return ret;  } +static int process_namespaces_event(struct perf_tool *tool, +				    union perf_event *event, +				    struct perf_sample *sample, +				    struct machine *machine) +{ +	struct thread *thread; +	struct perf_script *script = container_of(tool, struct perf_script, tool); +	struct perf_session *session = script->session; +	struct perf_evsel *evsel = perf_evlist__id2evsel(session->evlist, sample->id); +	int ret = -1; + +	thread = machine__findnew_thread(machine, event->namespaces.pid, +					 event->namespaces.tid); +	if (thread == NULL) { +		pr_debug("problem processing NAMESPACES event, skipping it.\n"); +		return -1; +	} + +	if (perf_event__process_namespaces(tool, event, sample, machine) < 0) +		goto out; + +	if (!evsel->attr.sample_id_all) { +		sample->cpu = 0; +		sample->time = 0; +		sample->tid = event->namespaces.tid; +		sample->pid = event->namespaces.pid; +	} +	print_sample_start(sample, thread, evsel); +	perf_event__fprintf(event, stdout); +	ret = 0; +out: +	thread__put(thread); +	return ret; +} +  static int process_fork_event(struct perf_tool *tool,  			      union perf_event *event,  			      struct perf_sample *sample, @@ -1293,6 +1585,8 @@ static int __cmd_script(struct perf_script *script)  	}  	if (script->show_switch_events)  		script->tool.context_switch = process_switch_event; +	if (script->show_namespace_events) +		script->tool.namespaces = process_namespaces_event;  	ret = perf_session__process_events(script->session); @@ -1427,7 +1721,7 @@ static int parse_scriptname(const struct option *opt __maybe_unused,  static int parse_output_fields(const struct option *opt __maybe_unused,  			    const char *arg, int unset __maybe_unused)  { -	char *tok; +	char *tok, *strtok_saveptr = NULL;  	int i, imax = ARRAY_SIZE(all_output_options);  	int j;  	int rc = 0; @@ -1488,7 +1782,7 @@ static int parse_output_fields(const struct option *opt __maybe_unused,  		}  	} -	for (tok = strtok(tok, ","); tok; tok = strtok(NULL, ",")) { +	for (tok = strtok_r(tok, ",", &strtok_saveptr); tok; tok = strtok_r(NULL, ",", &strtok_saveptr)) {  		for (i = 0; i < imax; ++i) {  			if (strcmp(tok, all_output_options[i].str) == 0)  				break; @@ -2078,7 +2372,7 @@ int process_cpu_map_event(struct perf_tool *tool __maybe_unused,  	return set_maps(script);  } -int cmd_script(int argc, const char **argv, const char *prefix __maybe_unused) +int cmd_script(int argc, const char **argv)  {  	bool show_full_info = false;  	bool header = false; @@ -2097,6 +2391,7 @@ int cmd_script(int argc, const char **argv, const char *prefix __maybe_unused)  			.mmap		 = perf_event__process_mmap,  			.mmap2		 = perf_event__process_mmap2,  			.comm		 = perf_event__process_comm, +			.namespaces	 = perf_event__process_namespaces,  			.exit		 = perf_event__process_exit,  			.fork		 = perf_event__process_fork,  			.attr		 = process_attr, @@ -2152,7 +2447,8 @@ int cmd_script(int argc, const char **argv, const char *prefix __maybe_unused)  		     "Valid types: hw,sw,trace,raw. "  		     "Fields: comm,tid,pid,time,cpu,event,trace,ip,sym,dso,"  		     "addr,symoff,period,iregs,brstack,brstacksym,flags," -		     "bpf-output,callindent,insn,insnlen", parse_output_fields), +		     "bpf-output,callindent,insn,insnlen,brstackinsn", +		     parse_output_fields),  	OPT_BOOLEAN('a', "all-cpus", &system_wide,  		    "system-wide collection from all CPUs"),  	OPT_STRING('S', "symbols", &symbol_conf.sym_list_str, "symbol[,symbol...]", @@ -2180,7 +2476,11 @@ int cmd_script(int argc, const char **argv, const char *prefix __maybe_unused)  		    "Show the mmap events"),  	OPT_BOOLEAN('\0', "show-switch-events", &script.show_switch_events,  		    "Show context switch events (if recorded)"), +	OPT_BOOLEAN('\0', "show-namespace-events", &script.show_namespace_events, +		    "Show namespace events (if recorded)"),  	OPT_BOOLEAN('f', "force", &symbol_conf.force, "don't complain, do it"), +	OPT_INTEGER(0, "max-blocks", &max_blocks, +		    "Maximum number of code blocks to dump with brstackinsn"),  	OPT_BOOLEAN(0, "ns", &nanosecs,  		    "Use 9 decimal places when displaying time"),  	OPT_CALLBACK_OPTARG(0, "itrace", &itrace_synth_opts, NULL, "opts", @@ -2217,7 +2517,7 @@ int cmd_script(int argc, const char **argv, const char *prefix __maybe_unused)  	if (argc > 1 && !strncmp(argv[0], "rec", strlen("rec"))) {  		rec_script_path = get_script_path(argv[1], RECORD_SUFFIX);  		if (!rec_script_path) -			return cmd_record(argc, argv, NULL); +			return cmd_record(argc, argv);  	}  	if (argc > 1 && !strncmp(argv[0], "rep", strlen("rep"))) { | 
