diff options
Diffstat (limited to 'tools/perf/util/evsel.c')
| -rw-r--r-- | tools/perf/util/evsel.c | 884 | 
1 files changed, 523 insertions, 361 deletions
diff --git a/tools/perf/util/evsel.c b/tools/perf/util/evsel.c index dbf9c8cee3c5..f745723d486b 100644 --- a/tools/perf/util/evsel.c +++ b/tools/perf/util/evsel.c @@ -5,12 +5,16 @@   * Parts came from builtin-{top,stat,record}.c, see those files for further   * copyright notes.   */ +/* + * Powerpc needs __SANE_USERSPACE_TYPES__ before <linux/types.h> to select + * 'int-ll64.h' and avoid compile warnings when printing __u64 with %llu. + */ +#define __SANE_USERSPACE_TYPES__  #include <byteswap.h>  #include <errno.h>  #include <inttypes.h>  #include <linux/bitops.h> -#include <api/io.h>  #include <api/fs/fs.h>  #include <api/fs/tracing_path.h>  #include <linux/hw_breakpoint.h> @@ -20,6 +24,7 @@  #include <linux/zalloc.h>  #include <sys/ioctl.h>  #include <sys/resource.h> +#include <sys/syscall.h>  #include <sys/types.h>  #include <dirent.h>  #include <stdlib.h> @@ -51,6 +56,8 @@  #include "off_cpu.h"  #include "pmu.h"  #include "pmus.h" +#include "hwmon_pmu.h" +#include "tool_pmu.h"  #include "rlimit.h"  #include "../perf-sys.h"  #include "util/parse-branch-options.h" @@ -64,46 +71,135 @@  #include <linux/ctype.h>  #ifdef HAVE_LIBTRACEEVENT -#include <traceevent/event-parse.h> +#include <event-parse.h>  #endif  struct perf_missing_features perf_missing_features;  static clockid_t clockid; -static const char *const perf_tool_event__tool_names[PERF_TOOL_MAX] = { -	NULL, -	"duration_time", -	"user_time", -	"system_time", -}; - -const char *perf_tool_event__to_str(enum perf_tool_event ev) +static int evsel__no_extra_init(struct evsel *evsel __maybe_unused)  { -	if (ev > PERF_TOOL_NONE && ev < PERF_TOOL_MAX) -		return perf_tool_event__tool_names[ev]; - -	return NULL; +	return 0;  } -enum perf_tool_event perf_tool_event__from_str(const char *str) +static bool test_attr__enabled(void)  { -	int i; +	static bool test_attr__enabled; +	static bool test_attr__enabled_tested; -	perf_tool_event__for_each_event(i) { -		if (!strcmp(str, perf_tool_event__tool_names[i])) -			return i; +	if (!test_attr__enabled_tested) { +		char *dir = getenv("PERF_TEST_ATTR"); + +		test_attr__enabled = (dir != NULL); +		test_attr__enabled_tested = true;  	} -	return PERF_TOOL_NONE; +	return test_attr__enabled;  } +#define __WRITE_ASS(str, fmt, data)					\ +do {									\ +	if (fprintf(file, #str "=%"fmt "\n", data) < 0) {		\ +		perror("test attr - failed to write event file");	\ +		fclose(file);						\ +		return -1;						\ +	}								\ +} while (0) -static int evsel__no_extra_init(struct evsel *evsel __maybe_unused) +#define WRITE_ASS(field, fmt) __WRITE_ASS(field, fmt, attr->field) + +static int store_event(struct perf_event_attr *attr, pid_t pid, struct perf_cpu cpu, +		       int fd, int group_fd, unsigned long flags)  { +	FILE *file; +	char path[PATH_MAX]; +	char *dir = getenv("PERF_TEST_ATTR"); + +	snprintf(path, PATH_MAX, "%s/event-%d-%llu-%d", dir, +		 attr->type, attr->config, fd); + +	file = fopen(path, "w+"); +	if (!file) { +		perror("test attr - failed to open event file"); +		return -1; +	} + +	if (fprintf(file, "[event-%d-%llu-%d]\n", +		    attr->type, attr->config, fd) < 0) { +		perror("test attr - failed to write event file"); +		fclose(file); +		return -1; +	} + +	/* syscall arguments */ +	__WRITE_ASS(fd,       "d", fd); +	__WRITE_ASS(group_fd, "d", group_fd); +	__WRITE_ASS(cpu,      "d", cpu.cpu); +	__WRITE_ASS(pid,      "d", pid); +	__WRITE_ASS(flags,   "lu", flags); + +	/* struct perf_event_attr */ +	WRITE_ASS(type,   PRIu32); +	WRITE_ASS(size,   PRIu32); +	WRITE_ASS(config,  "llu"); +	WRITE_ASS(sample_period, "llu"); +	WRITE_ASS(sample_type,   "llu"); +	WRITE_ASS(read_format,   "llu"); +	WRITE_ASS(disabled,       "d"); +	WRITE_ASS(inherit,        "d"); +	WRITE_ASS(pinned,         "d"); +	WRITE_ASS(exclusive,      "d"); +	WRITE_ASS(exclude_user,   "d"); +	WRITE_ASS(exclude_kernel, "d"); +	WRITE_ASS(exclude_hv,     "d"); +	WRITE_ASS(exclude_idle,   "d"); +	WRITE_ASS(mmap,           "d"); +	WRITE_ASS(comm,           "d"); +	WRITE_ASS(freq,           "d"); +	WRITE_ASS(inherit_stat,   "d"); +	WRITE_ASS(enable_on_exec, "d"); +	WRITE_ASS(task,           "d"); +	WRITE_ASS(watermark,      "d"); +	WRITE_ASS(precise_ip,     "d"); +	WRITE_ASS(mmap_data,      "d"); +	WRITE_ASS(sample_id_all,  "d"); +	WRITE_ASS(exclude_host,   "d"); +	WRITE_ASS(exclude_guest,  "d"); +	WRITE_ASS(exclude_callchain_kernel, "d"); +	WRITE_ASS(exclude_callchain_user, "d"); +	WRITE_ASS(mmap2,	  "d"); +	WRITE_ASS(comm_exec,	  "d"); +	WRITE_ASS(context_switch, "d"); +	WRITE_ASS(write_backward, "d"); +	WRITE_ASS(namespaces,	  "d"); +	WRITE_ASS(use_clockid,    "d"); +	WRITE_ASS(wakeup_events, PRIu32); +	WRITE_ASS(bp_type, PRIu32); +	WRITE_ASS(config1, "llu"); +	WRITE_ASS(config2, "llu"); +	WRITE_ASS(branch_sample_type, "llu"); +	WRITE_ASS(sample_regs_user,   "llu"); +	WRITE_ASS(sample_stack_user,  PRIu32); + +	fclose(file);  	return 0;  } -void __weak test_attr__ready(void) { } +#undef __WRITE_ASS +#undef WRITE_ASS + +static void test_attr__open(struct perf_event_attr *attr, pid_t pid, struct perf_cpu cpu, +		     int fd, int group_fd, unsigned long flags) +{ +	int errno_saved = errno; + +	if ((fd != -1) && store_event(attr, pid, cpu, fd, group_fd, flags)) { +		pr_err("test attr FAILED"); +		exit(128); +	} + +	errno = errno_saved; +}  static void evsel__no_extra_fini(struct evsel *evsel __maybe_unused)  { @@ -296,9 +392,9 @@ void evsel__init(struct evsel *evsel,  	evsel->metric_events = NULL;  	evsel->per_pkg_mask  = NULL;  	evsel->collect_stat  = false; -	evsel->pmu_name      = NULL;  	evsel->group_pmu_name = NULL;  	evsel->skippable     = false; +	evsel->alternate_hw_config = PERF_COUNT_HW_MAX;  }  struct evsel *evsel__new_idx(struct perf_event_attr *attr, int idx) @@ -393,11 +489,6 @@ struct evsel *evsel__clone(struct evsel *orig)  		if (evsel->group_name == NULL)  			goto out_err;  	} -	if (orig->pmu_name) { -		evsel->pmu_name = strdup(orig->pmu_name); -		if (evsel->pmu_name == NULL) -			goto out_err; -	}  	if (orig->group_pmu_name) {  		evsel->group_pmu_name = strdup(orig->group_pmu_name);  		if (evsel->group_pmu_name == NULL) @@ -421,7 +512,6 @@ struct evsel *evsel__clone(struct evsel *orig)  	evsel->core.leader = orig->core.leader;  	evsel->max_events = orig->max_events; -	evsel->tool_event = orig->tool_event;  	free((char *)evsel->unit);  	evsel->unit = strdup(orig->unit);  	if (evsel->unit == NULL) @@ -445,6 +535,8 @@ struct evsel *evsel__clone(struct evsel *orig)  	if (evsel__copy_config_terms(evsel, orig) < 0)  		goto out_err; +	evsel->alternate_hw_config = orig->alternate_hw_config; +  	return evsel;  out_err: @@ -548,7 +640,6 @@ static int evsel__add_modifiers(struct evsel *evsel, char *bf, size_t size)  {  	int colon = 0, r = 0;  	struct perf_event_attr *attr = &evsel->core.attr; -	bool exclude_guest_default = false;  #define MOD_PRINT(context, mod)	do {					\  		if (!attr->exclude_##context) {				\ @@ -560,17 +651,15 @@ static int evsel__add_modifiers(struct evsel *evsel, char *bf, size_t size)  		MOD_PRINT(kernel, 'k');  		MOD_PRINT(user, 'u');  		MOD_PRINT(hv, 'h'); -		exclude_guest_default = true;  	}  	if (attr->precise_ip) {  		if (!colon)  			colon = ++r;  		r += scnprintf(bf + r, size - r, "%.*s", attr->precise_ip, "ppp"); -		exclude_guest_default = true;  	} -	if (attr->exclude_host || attr->exclude_guest == exclude_guest_default) { +	if (attr->exclude_host || attr->exclude_guest) {  		MOD_PRINT(host, 'H');  		MOD_PRINT(guest, 'G');  	} @@ -617,11 +706,6 @@ static int evsel__sw_name(struct evsel *evsel, char *bf, size_t size)  	return r + evsel__add_modifiers(evsel, bf + r, size - r);  } -static int evsel__tool_name(enum perf_tool_event ev, char *bf, size_t size) -{ -	return scnprintf(bf, size, "%s", perf_tool_event__to_str(ev)); -} -  static int __evsel__bp_name(char *bf, size_t size, u64 addr, u64 type)  {  	int r; @@ -772,10 +856,7 @@ const char *evsel__name(struct evsel *evsel)  		break;  	case PERF_TYPE_SOFTWARE: -		if (evsel__is_tool(evsel)) -			evsel__tool_name(evsel__tool_event(evsel), bf, sizeof(bf)); -		else -			evsel__sw_name(evsel, bf, sizeof(bf)); +		evsel__sw_name(evsel, bf, sizeof(bf));  		break;  	case PERF_TYPE_TRACEPOINT: @@ -786,6 +867,10 @@ const char *evsel__name(struct evsel *evsel)  		evsel__bp_name(evsel, bf, sizeof(bf));  		break; +	case PERF_PMU_TYPE_TOOL: +		scnprintf(bf, sizeof(bf), "%s", evsel__tool_pmu_event_name(evsel)); +		break; +  	default:  		scnprintf(bf, sizeof(bf), "unknown attr type: %d",  			  evsel->core.attr.type); @@ -811,7 +896,7 @@ const char *evsel__metric_id(const struct evsel *evsel)  		return evsel->metric_id;  	if (evsel__is_tool(evsel)) -		return perf_tool_event__to_str(evsel__tool_event(evsel)); +		return evsel__tool_pmu_event_name(evsel);  	return "unknown";  } @@ -862,7 +947,6 @@ static void __evsel__config_callchain(struct evsel *evsel, struct record_opts *o  {  	bool function = evsel__is_function_event(evsel);  	struct perf_event_attr *attr = &evsel->core.attr; -	const char *arch = perf_env__arch(evsel__env(evsel));  	evsel__set_sample_bit(evsel, CALLCHAIN); @@ -893,6 +977,8 @@ static void __evsel__config_callchain(struct evsel *evsel, struct record_opts *o  	if (param->record_mode == CALLCHAIN_DWARF) {  		if (!function) { +			const char *arch = perf_env__arch(evsel__env(evsel)); +  			evsel__set_sample_bit(evsel, REGS_USER);  			evsel__set_sample_bit(evsel, STACK_USER);  			if (opts->sample_user_regs && @@ -1150,7 +1236,7 @@ void evsel__config(struct evsel *evsel, struct record_opts *opts,  	bool per_cpu = opts->target.default_per_cpu && !opts->target.per_thread;  	attr->sample_id_all = perf_missing_features.sample_id_all ? 0 : 1; -	attr->inherit	    = !opts->no_inherit; +	attr->inherit	    = target__has_cpu(&opts->target) ? 0 : !opts->no_inherit;  	attr->write_backward = opts->overwrite ? 1 : 0;  	attr->read_format   = PERF_FORMAT_LOST; @@ -1172,7 +1258,15 @@ void evsel__config(struct evsel *evsel, struct record_opts *opts,  		 */  		if (leader->core.nr_members > 1) {  			attr->read_format |= PERF_FORMAT_GROUP; -			attr->inherit = 0; +		} + +		/* +		 * Inherit + SAMPLE_READ requires SAMPLE_TID in the read_format +		 */ +		if (attr->inherit) { +			evsel__set_sample_bit(evsel, TID); +			evsel->core.attr.read_format |= +				PERF_FORMAT_ID;  		}  	} @@ -1494,7 +1588,6 @@ void evsel__exit(struct evsel *evsel)  	zfree(&evsel->group_name);  	zfree(&evsel->name);  	zfree(&evsel->filter); -	zfree(&evsel->pmu_name);  	zfree(&evsel->group_pmu_name);  	zfree(&evsel->unit);  	zfree(&evsel->metric_id); @@ -1503,8 +1596,8 @@ void evsel__exit(struct evsel *evsel)  	evsel->per_pkg_mask = NULL;  	zfree(&evsel->metric_events);  	perf_evsel__object.fini(evsel); -	if (evsel__tool_event(evsel) == PERF_TOOL_SYSTEM_TIME || -	    evsel__tool_event(evsel) == PERF_TOOL_USER_TIME) +	if (evsel__tool_event(evsel) == TOOL_PMU__EVENT_SYSTEM_TIME || +	    evsel__tool_event(evsel) == TOOL_PMU__EVENT_USER_TIME)  		xyarray__delete(evsel->start_times);  } @@ -1684,171 +1777,31 @@ static int evsel__read_group(struct evsel *leader, int cpu_map_idx, int thread)  	return evsel__process_group_data(leader, cpu_map_idx, thread, data);  } -static bool read_until_char(struct io *io, char e) -{ -	int c; - -	do { -		c = io__get_char(io); -		if (c == -1) -			return false; -	} while (c != e); -	return true; -} - -static int read_stat_field(int fd, struct perf_cpu cpu, int field, __u64 *val) -{ -	char buf[256]; -	struct io io; -	int i; - -	io__init(&io, fd, buf, sizeof(buf)); - -	/* Skip lines to relevant CPU. */ -	for (i = -1; i < cpu.cpu; i++) { -		if (!read_until_char(&io, '\n')) -			return -EINVAL; -	} -	/* Skip to "cpu". */ -	if (io__get_char(&io) != 'c') return -EINVAL; -	if (io__get_char(&io) != 'p') return -EINVAL; -	if (io__get_char(&io) != 'u') return -EINVAL; - -	/* Skip N of cpuN. */ -	if (!read_until_char(&io, ' ')) -		return -EINVAL; - -	i = 1; -	while (true) { -		if (io__get_dec(&io, val) != ' ') -			break; -		if (field == i) -			return 0; -		i++; -	} -	return -EINVAL; -} - -static int read_pid_stat_field(int fd, int field, __u64 *val) -{ -	char buf[256]; -	struct io io; -	int c, i; - -	io__init(&io, fd, buf, sizeof(buf)); -	if (io__get_dec(&io, val) != ' ') -		return -EINVAL; -	if (field == 1) -		return 0; - -	/* Skip comm. */ -	if (io__get_char(&io) != '(' || !read_until_char(&io, ')')) -		return -EINVAL; -	if (field == 2) -		return -EINVAL; /* String can't be returned. */ - -	/* Skip state */ -	if (io__get_char(&io) != ' ' || io__get_char(&io) == -1) -		return -EINVAL; -	if (field == 3) -		return -EINVAL; /* String can't be returned. */ - -	/* Loop over numeric fields*/ -	if (io__get_char(&io) != ' ') -		return -EINVAL; - -	i = 4; -	while (true) { -		c = io__get_dec(&io, val); -		if (c == -1) -			return -EINVAL; -		if (c == -2) { -			/* Assume a -ve was read */ -			c = io__get_dec(&io, val); -			*val *= -1; -		} -		if (c != ' ') -			return -EINVAL; -		if (field == i) -			return 0; -		i++; -	} -	return -EINVAL; -} - -static int evsel__read_tool(struct evsel *evsel, int cpu_map_idx, int thread) +bool __evsel__match(const struct evsel *evsel, u32 type, u64 config)  { -	__u64 *start_time, cur_time, delta_start; -	int fd, err = 0; -	struct perf_counts_values *count; -	bool adjust = false; -	count = perf_counts(evsel->counts, cpu_map_idx, thread); +	u32 e_type = evsel->core.attr.type; +	u64 e_config = evsel->core.attr.config; -	switch (evsel__tool_event(evsel)) { -	case PERF_TOOL_DURATION_TIME: -		/* -		 * Pretend duration_time is only on the first CPU and thread, or -		 * else aggregation will scale duration_time by the number of -		 * CPUs/threads. -		 */ -		start_time = &evsel->start_time; -		if (cpu_map_idx == 0 && thread == 0) -			cur_time = rdclock(); -		else -			cur_time = *start_time; -		break; -	case PERF_TOOL_USER_TIME: -	case PERF_TOOL_SYSTEM_TIME: { -		bool system = evsel__tool_event(evsel) == PERF_TOOL_SYSTEM_TIME; - -		start_time = xyarray__entry(evsel->start_times, cpu_map_idx, thread); -		fd = FD(evsel, cpu_map_idx, thread); -		lseek(fd, SEEK_SET, 0); -		if (evsel->pid_stat) { -			/* The event exists solely on 1 CPU. */ -			if (cpu_map_idx == 0) -				err = read_pid_stat_field(fd, system ? 15 : 14, &cur_time); -			else -				cur_time = 0; -		} else { -			/* The event is for all threads. */ -			if (thread == 0) { -				struct perf_cpu cpu = perf_cpu_map__cpu(evsel->core.cpus, -									cpu_map_idx); - -				err = read_stat_field(fd, cpu, system ? 3 : 1, &cur_time); -			} else { -				cur_time = 0; -			} -		} -		adjust = true; -		break; +	if (e_type != type) { +		return type == PERF_TYPE_HARDWARE && evsel->pmu && evsel->pmu->is_core && +			evsel->alternate_hw_config == config;  	} -	case PERF_TOOL_NONE: -	case PERF_TOOL_MAX: -	default: -		err = -EINVAL; -	} -	if (err) -		return err; -	delta_start = cur_time - *start_time; -	if (adjust) { -		__u64 ticks_per_sec = sysconf(_SC_CLK_TCK); +	if ((type == PERF_TYPE_HARDWARE || type == PERF_TYPE_HW_CACHE) && +	    perf_pmus__supports_extended_type()) +		e_config &= PERF_HW_EVENT_MASK; -		delta_start *= 1000000000 / ticks_per_sec; -	} -	count->val    = delta_start; -	count->ena    = count->run = delta_start; -	count->lost   = 0; -	return 0; +	return e_config == config;  }  int evsel__read_counter(struct evsel *evsel, int cpu_map_idx, int thread)  {  	if (evsel__is_tool(evsel)) -		return evsel__read_tool(evsel, cpu_map_idx, thread); +		return evsel__tool_pmu_read(evsel, cpu_map_idx, thread); + +	if (evsel__is_hwmon(evsel)) +		return evsel__hwmon_pmu_read(evsel, cpu_map_idx, thread);  	if (evsel__is_retire_lat(evsel))  		return evsel__read_retire_lat(evsel, cpu_map_idx, thread); @@ -2042,6 +1995,7 @@ static struct perf_thread_map *empty_thread_map;  static int __evsel__prepare_open(struct evsel *evsel, struct perf_cpu_map *cpus,  		struct perf_thread_map *threads)  { +	int ret = 0;  	int nthreads = perf_thread_map__nr(threads);  	if ((perf_missing_features.write_backward && evsel->core.attr.write_backward) || @@ -2072,23 +2026,21 @@ static int __evsel__prepare_open(struct evsel *evsel, struct perf_cpu_map *cpus,  	    perf_evsel__alloc_fd(&evsel->core, perf_cpu_map__nr(cpus), nthreads) < 0)  		return -ENOMEM; -	if ((evsel__tool_event(evsel) == PERF_TOOL_SYSTEM_TIME || -	     evsel__tool_event(evsel) == PERF_TOOL_USER_TIME) && -	    !evsel->start_times) { -		evsel->start_times = xyarray__new(perf_cpu_map__nr(cpus), nthreads, sizeof(__u64)); -		if (!evsel->start_times) -			return -ENOMEM; -	} +	if (evsel__is_tool(evsel)) +		ret = evsel__tool_pmu_prepare_open(evsel, cpus, nthreads);  	evsel->open_flags = PERF_FLAG_FD_CLOEXEC;  	if (evsel->cgrp)  		evsel->open_flags |= PERF_FLAG_PID_CGROUP; -	return 0; +	return ret;  }  static void evsel__disable_missing_features(struct evsel *evsel)  { +	if (perf_missing_features.inherit_sample_read && evsel->core.attr.inherit && +	    (evsel->core.attr.sample_type & PERF_SAMPLE_READ)) +		evsel->core.attr.inherit = 0;  	if (perf_missing_features.branch_counters)  		evsel->core.attr.branch_sample_type &= ~PERF_SAMPLE_BRANCH_COUNTERS;  	if (perf_missing_features.read_lost) @@ -2138,120 +2090,346 @@ int evsel__prepare_open(struct evsel *evsel, struct perf_cpu_map *cpus,  	return err;  } -bool evsel__detect_missing_features(struct evsel *evsel) +static bool has_attr_feature(struct perf_event_attr *attr, unsigned long flags) +{ +	int fd = syscall(SYS_perf_event_open, attr, /*pid=*/0, /*cpu=*/-1, +			 /*group_fd=*/-1, flags); +	close(fd); + +	if (fd < 0) { +		attr->exclude_kernel = 1; + +		fd = syscall(SYS_perf_event_open, attr, /*pid=*/0, /*cpu=*/-1, +			     /*group_fd=*/-1, flags); +		close(fd); +	} + +	if (fd < 0) { +		attr->exclude_hv = 1; + +		fd = syscall(SYS_perf_event_open, attr, /*pid=*/0, /*cpu=*/-1, +			     /*group_fd=*/-1, flags); +		close(fd); +	} + +	if (fd < 0) { +		attr->exclude_guest = 1; + +		fd = syscall(SYS_perf_event_open, attr, /*pid=*/0, /*cpu=*/-1, +			     /*group_fd=*/-1, flags); +		close(fd); +	} + +	attr->exclude_kernel = 0; +	attr->exclude_guest = 0; +	attr->exclude_hv = 0; + +	return fd >= 0; +} + +static void evsel__detect_missing_pmu_features(struct evsel *evsel)  { +	struct perf_event_attr attr = { +		.type = evsel->core.attr.type, +		.config = evsel->core.attr.config, +		.disabled = 1, +	}; +	struct perf_pmu *pmu = evsel->pmu; +	int old_errno; + +	old_errno = errno; + +	if (pmu == NULL) +		pmu = evsel->pmu = evsel__find_pmu(evsel); + +	if (pmu == NULL || pmu->missing_features.checked) +		goto out; +  	/*  	 * Must probe features in the order they were added to the -	 * perf_event_attr interface. +	 * perf_event_attr interface.  These are kernel core limitation but +	 * specific to PMUs with branch stack.  So we can detect with the given +	 * hardware event and stop on the first one succeeded.  	 */ -	if (!perf_missing_features.branch_counters && -	    (evsel->core.attr.branch_sample_type & PERF_SAMPLE_BRANCH_COUNTERS)) { -		perf_missing_features.branch_counters = true; -		pr_debug2("switching off branch counters support\n"); + +	/* Please add new feature detection here. */ + +	attr.exclude_guest = 1; +	if (has_attr_feature(&attr, /*flags=*/0)) +		goto found; +	pmu->missing_features.exclude_guest = true; +	pr_debug2("switching off exclude_guest for PMU %s\n", pmu->name); + +found: +	pmu->missing_features.checked = true; +out: +	errno = old_errno; +} + +static void evsel__detect_missing_brstack_features(struct evsel *evsel) +{ +	static bool detection_done = false; +	struct perf_event_attr attr = { +		.type = evsel->core.attr.type, +		.config = evsel->core.attr.config, +		.disabled = 1, +		.sample_type = PERF_SAMPLE_BRANCH_STACK, +		.sample_period = 1000, +	}; +	int old_errno; + +	if (detection_done) +		return; + +	old_errno = errno; + +	/* +	 * Must probe features in the order they were added to the +	 * perf_event_attr interface.  These are PMU specific limitation +	 * so we can detect with the given hardware event and stop on the +	 * first one succeeded. +	 */ + +	/* Please add new feature detection here. */ + +	attr.branch_sample_type = PERF_SAMPLE_BRANCH_COUNTERS; +	if (has_attr_feature(&attr, /*flags=*/0)) +		goto found; +	perf_missing_features.branch_counters = true; +	pr_debug2("switching off branch counters support\n"); + +	attr.branch_sample_type = PERF_SAMPLE_BRANCH_HW_INDEX; +	if (has_attr_feature(&attr, /*flags=*/0)) +		goto found; +	perf_missing_features.branch_hw_idx = true; +	pr_debug2("switching off branch HW index support\n"); + +	attr.branch_sample_type = PERF_SAMPLE_BRANCH_NO_CYCLES | PERF_SAMPLE_BRANCH_NO_FLAGS; +	if (has_attr_feature(&attr, /*flags=*/0)) +		goto found; +	perf_missing_features.lbr_flags = true; +	pr_debug2_peo("switching off branch sample type no (cycles/flags)\n"); + +found: +	detection_done = true; +	errno = old_errno; +} + +static bool evsel__detect_missing_features(struct evsel *evsel) +{ +	static bool detection_done = false; +	struct perf_event_attr attr = { +		.type = PERF_TYPE_SOFTWARE, +		.config = PERF_COUNT_SW_TASK_CLOCK, +		.disabled = 1, +	}; +	int old_errno; + +	evsel__detect_missing_pmu_features(evsel); + +	if (evsel__has_br_stack(evsel)) +		evsel__detect_missing_brstack_features(evsel); + +	if (detection_done) +		goto check; + +	old_errno = errno; + +	/* +	 * Must probe features in the order they were added to the +	 * perf_event_attr interface.  These are kernel core limitation +	 * not PMU-specific so we can detect with a software event and +	 * stop on the first one succeeded. +	 */ + +	/* Please add new feature detection here. */ + +	attr.inherit = true; +	attr.sample_type = PERF_SAMPLE_READ; +	if (has_attr_feature(&attr, /*flags=*/0)) +		goto found; +	perf_missing_features.inherit_sample_read = true; +	pr_debug2("Using PERF_SAMPLE_READ / :S modifier is not compatible with inherit, falling back to no-inherit.\n"); +	attr.inherit = false; +	attr.sample_type = 0; + +	attr.read_format = PERF_FORMAT_LOST; +	if (has_attr_feature(&attr, /*flags=*/0)) +		goto found; +	perf_missing_features.read_lost = true; +	pr_debug2("switching off PERF_FORMAT_LOST support\n"); +	attr.read_format = 0; + +	attr.sample_type = PERF_SAMPLE_WEIGHT_STRUCT; +	if (has_attr_feature(&attr, /*flags=*/0)) +		goto found; +	perf_missing_features.weight_struct = true; +	pr_debug2("switching off weight struct support\n"); +	attr.sample_type = 0; + +	attr.sample_type = PERF_SAMPLE_CODE_PAGE_SIZE; +	if (has_attr_feature(&attr, /*flags=*/0)) +		goto found; +	perf_missing_features.code_page_size = true; +	pr_debug2_peo("Kernel has no PERF_SAMPLE_CODE_PAGE_SIZE support\n"); +	attr.sample_type = 0; + +	attr.sample_type = PERF_SAMPLE_DATA_PAGE_SIZE; +	if (has_attr_feature(&attr, /*flags=*/0)) +		goto found; +	perf_missing_features.data_page_size = true; +	pr_debug2_peo("Kernel has no PERF_SAMPLE_DATA_PAGE_SIZE support\n"); +	attr.sample_type = 0; + +	attr.cgroup = 1; +	if (has_attr_feature(&attr, /*flags=*/0)) +		goto found; +	perf_missing_features.cgroup = true; +	pr_debug2_peo("Kernel has no cgroup sampling support\n"); +	attr.cgroup = 0; + +	attr.aux_output = 1; +	if (has_attr_feature(&attr, /*flags=*/0)) +		goto found; +	perf_missing_features.aux_output = true; +	pr_debug2_peo("Kernel has no attr.aux_output support\n"); +	attr.aux_output = 0; + +	attr.bpf_event = 1; +	if (has_attr_feature(&attr, /*flags=*/0)) +		goto found; +	perf_missing_features.bpf = true; +	pr_debug2_peo("switching off bpf_event\n"); +	attr.bpf_event = 0; + +	attr.ksymbol = 1; +	if (has_attr_feature(&attr, /*flags=*/0)) +		goto found; +	perf_missing_features.ksymbol = true; +	pr_debug2_peo("switching off ksymbol\n"); +	attr.ksymbol = 0; + +	attr.write_backward = 1; +	if (has_attr_feature(&attr, /*flags=*/0)) +		goto found; +	perf_missing_features.write_backward = true; +	pr_debug2_peo("switching off write_backward\n"); +	attr.write_backward = 0; + +	attr.use_clockid = 1; +	attr.clockid = CLOCK_MONOTONIC; +	if (has_attr_feature(&attr, /*flags=*/0)) +		goto found; +	perf_missing_features.clockid = true; +	pr_debug2_peo("switching off clockid\n"); +	attr.use_clockid = 0; +	attr.clockid = 0; + +	if (has_attr_feature(&attr, /*flags=*/PERF_FLAG_FD_CLOEXEC)) +		goto found; +	perf_missing_features.cloexec = true; +	pr_debug2_peo("switching off cloexec flag\n"); + +	attr.mmap2 = 1; +	if (has_attr_feature(&attr, /*flags=*/0)) +		goto found; +	perf_missing_features.mmap2 = true; +	pr_debug2_peo("switching off mmap2\n"); +	attr.mmap2 = 0; + +	/* set this unconditionally? */ +	perf_missing_features.sample_id_all = true; +	pr_debug2_peo("switching off sample_id_all\n"); + +	attr.inherit = 1; +	attr.read_format = PERF_FORMAT_GROUP; +	if (has_attr_feature(&attr, /*flags=*/0)) +		goto found; +	perf_missing_features.group_read = true; +	pr_debug2_peo("switching off group read\n"); +	attr.inherit = 0; +	attr.read_format = 0; + +found: +	detection_done = true; +	errno = old_errno; + +check: +	if (evsel->core.attr.inherit && +	    (evsel->core.attr.sample_type & PERF_SAMPLE_READ) && +	    perf_missing_features.inherit_sample_read)  		return true; -	} else if (!perf_missing_features.read_lost && -	    (evsel->core.attr.read_format & PERF_FORMAT_LOST)) { -		perf_missing_features.read_lost = true; -		pr_debug2("switching off PERF_FORMAT_LOST support\n"); + +	if ((evsel->core.attr.branch_sample_type & PERF_SAMPLE_BRANCH_COUNTERS) && +	    perf_missing_features.branch_counters)  		return true; -	} else if (!perf_missing_features.weight_struct && -	    (evsel->core.attr.sample_type & PERF_SAMPLE_WEIGHT_STRUCT)) { -		perf_missing_features.weight_struct = true; -		pr_debug2("switching off weight struct support\n"); + +	if ((evsel->core.attr.read_format & PERF_FORMAT_LOST) && +	    perf_missing_features.read_lost)  		return true; -	} else if (!perf_missing_features.code_page_size && -	    (evsel->core.attr.sample_type & PERF_SAMPLE_CODE_PAGE_SIZE)) { -		perf_missing_features.code_page_size = true; -		pr_debug2_peo("Kernel has no PERF_SAMPLE_CODE_PAGE_SIZE support, bailing out\n"); -		return false; -	} else if (!perf_missing_features.data_page_size && -	    (evsel->core.attr.sample_type & PERF_SAMPLE_DATA_PAGE_SIZE)) { -		perf_missing_features.data_page_size = true; -		pr_debug2_peo("Kernel has no PERF_SAMPLE_DATA_PAGE_SIZE support, bailing out\n"); -		return false; -	} else if (!perf_missing_features.cgroup && evsel->core.attr.cgroup) { -		perf_missing_features.cgroup = true; -		pr_debug2_peo("Kernel has no cgroup sampling support, bailing out\n"); -		return false; -	} else if (!perf_missing_features.branch_hw_idx && -	    (evsel->core.attr.branch_sample_type & PERF_SAMPLE_BRANCH_HW_INDEX)) { -		perf_missing_features.branch_hw_idx = true; -		pr_debug2("switching off branch HW index support\n"); + +	if ((evsel->core.attr.sample_type & PERF_SAMPLE_WEIGHT_STRUCT) && +	    perf_missing_features.weight_struct)  		return true; -	} else if (!perf_missing_features.aux_output && evsel->core.attr.aux_output) { -		perf_missing_features.aux_output = true; -		pr_debug2_peo("Kernel has no attr.aux_output support, bailing out\n"); -		return false; -	} else if (!perf_missing_features.bpf && evsel->core.attr.bpf_event) { -		perf_missing_features.bpf = true; -		pr_debug2_peo("switching off bpf_event\n"); + +	if (evsel->core.attr.use_clockid && evsel->core.attr.clockid != CLOCK_MONOTONIC && +	    !perf_missing_features.clockid) { +		perf_missing_features.clockid_wrong = true;  		return true; -	} else if (!perf_missing_features.ksymbol && evsel->core.attr.ksymbol) { -		perf_missing_features.ksymbol = true; -		pr_debug2_peo("switching off ksymbol\n"); +	} + +	if (evsel->core.attr.use_clockid && perf_missing_features.clockid)  		return true; -	} else if (!perf_missing_features.write_backward && evsel->core.attr.write_backward) { -		perf_missing_features.write_backward = true; -		pr_debug2_peo("switching off write_backward\n"); -		return false; -	} else if (!perf_missing_features.clockid_wrong && evsel->core.attr.use_clockid) { -		perf_missing_features.clockid_wrong = true; -		pr_debug2_peo("switching off clockid\n"); + +	if ((evsel->open_flags & PERF_FLAG_FD_CLOEXEC) && +	    perf_missing_features.cloexec)  		return true; -	} else if (!perf_missing_features.clockid && evsel->core.attr.use_clockid) { -		perf_missing_features.clockid = true; -		pr_debug2_peo("switching off use_clockid\n"); + +	if (evsel->core.attr.mmap2 && perf_missing_features.mmap2)  		return true; -	} else if (!perf_missing_features.cloexec && (evsel->open_flags & PERF_FLAG_FD_CLOEXEC)) { -		perf_missing_features.cloexec = true; -		pr_debug2_peo("switching off cloexec flag\n"); + +	if ((evsel->core.attr.branch_sample_type & (PERF_SAMPLE_BRANCH_NO_FLAGS | +						    PERF_SAMPLE_BRANCH_NO_CYCLES)) && +	    perf_missing_features.lbr_flags)  		return true; -	} else if (!perf_missing_features.mmap2 && evsel->core.attr.mmap2) { -		perf_missing_features.mmap2 = true; -		pr_debug2_peo("switching off mmap2\n"); + +	if (evsel->core.attr.inherit && (evsel->core.attr.read_format & PERF_FORMAT_GROUP) && +	    perf_missing_features.group_read)  		return true; -	} else if (evsel->core.attr.exclude_guest || evsel->core.attr.exclude_host) { -		if (evsel->pmu == NULL) -			evsel->pmu = evsel__find_pmu(evsel); -		if (evsel->pmu) -			evsel->pmu->missing_features.exclude_guest = true; -		else { -			/* we cannot find PMU, disable attrs now */ -			evsel->core.attr.exclude_host = false; -			evsel->core.attr.exclude_guest = false; -		} +	if (evsel->core.attr.ksymbol && perf_missing_features.ksymbol) +		return true; -		if (evsel->exclude_GH) { -			pr_debug2_peo("PMU has no exclude_host/guest support, bailing out\n"); -			return false; -		} -		if (!perf_missing_features.exclude_guest) { -			perf_missing_features.exclude_guest = true; -			pr_debug2_peo("switching off exclude_guest, exclude_host\n"); -		} +	if (evsel->core.attr.bpf_event && perf_missing_features.bpf)  		return true; -	} else if (!perf_missing_features.sample_id_all) { -		perf_missing_features.sample_id_all = true; -		pr_debug2_peo("switching off sample_id_all\n"); + +	if ((evsel->core.attr.branch_sample_type & PERF_SAMPLE_BRANCH_HW_INDEX) && +	    perf_missing_features.branch_hw_idx)  		return true; -	} else if (!perf_missing_features.lbr_flags && -			(evsel->core.attr.branch_sample_type & -			 (PERF_SAMPLE_BRANCH_NO_CYCLES | -			  PERF_SAMPLE_BRANCH_NO_FLAGS))) { -		perf_missing_features.lbr_flags = true; -		pr_debug2_peo("switching off branch sample type no (cycles/flags)\n"); + +	if (evsel->core.attr.sample_id_all && perf_missing_features.sample_id_all)  		return true; -	} else if (!perf_missing_features.group_read && -		    evsel->core.attr.inherit && -		   (evsel->core.attr.read_format & PERF_FORMAT_GROUP) && -		   evsel__is_group_leader(evsel)) { -		perf_missing_features.group_read = true; -		pr_debug2_peo("switching off group read\n"); + +	return false; +} + +static bool evsel__handle_error_quirks(struct evsel *evsel, int error) +{ +	/* +	 * AMD core PMU tries to forward events with precise_ip to IBS PMU +	 * implicitly.  But IBS PMU has more restrictions so it can fail with +	 * supported event attributes.  Let's forward it back to the core PMU +	 * by clearing precise_ip only if it's from precise_max (:P). +	 */ +	if ((error == -EINVAL || error == -ENOENT) && x86__is_amd_cpu() && +	    evsel->core.attr.precise_ip && evsel->precise_max) { +		evsel->core.attr.precise_ip = 0; +		pr_debug2_peo("removing precise_ip on AMD\n"); +		display_attr(&evsel->core.attr);  		return true; -	} else { -		return false;  	} + +	return false;  }  static int evsel__open_cpu(struct evsel *evsel, struct perf_cpu_map *cpus, @@ -2262,13 +2440,6 @@ static int evsel__open_cpu(struct evsel *evsel, struct perf_cpu_map *cpus,  	int pid = -1, err, old_errno;  	enum rlimit_action set_rlimit = NO_CHANGE; -	if (evsel__tool_event(evsel) == PERF_TOOL_DURATION_TIME) { -		if (evsel->core.attr.sample_period) /* no sampling */ -			return -EINVAL; -		evsel->start_time = rdclock(); -		return 0; -	} -  	if (evsel__is_retire_lat(evsel))  		return tpebs_start(evsel->evlist); @@ -2293,6 +2464,17 @@ fallback_missing_features:  	pr_debug3("Opening: %s\n", evsel__name(evsel));  	display_attr(&evsel->core.attr); +	if (evsel__is_tool(evsel)) { +		return evsel__tool_pmu_open(evsel, threads, +					    start_cpu_map_idx, +					    end_cpu_map_idx); +	} +	if (evsel__is_hwmon(evsel)) { +		return evsel__hwmon_pmu_open(evsel, threads, +					     start_cpu_map_idx, +					     end_cpu_map_idx); +	} +  	for (idx = start_cpu_map_idx; idx < end_cpu_map_idx; idx++) {  		for (thread = 0; thread < nthreads; thread++) { @@ -2304,46 +2486,6 @@ retry_open:  			if (!evsel->cgrp && !evsel->core.system_wide)  				pid = perf_thread_map__pid(threads, thread); -			if (evsel__tool_event(evsel) == PERF_TOOL_USER_TIME || -			    evsel__tool_event(evsel) == PERF_TOOL_SYSTEM_TIME) { -				bool system = evsel__tool_event(evsel) == PERF_TOOL_SYSTEM_TIME; -				__u64 *start_time = NULL; - -				if (evsel->core.attr.sample_period) { -					/* no sampling */ -					err = -EINVAL; -					goto out_close; -				} -				if (pid > -1) { -					char buf[64]; - -					snprintf(buf, sizeof(buf), "/proc/%d/stat", pid); -					fd = open(buf, O_RDONLY); -					evsel->pid_stat = true; -				} else { -					fd = open("/proc/stat", O_RDONLY); -				} -				FD(evsel, idx, thread) = fd; -				if (fd < 0) { -					err = -errno; -					goto out_close; -				} -				start_time = xyarray__entry(evsel->start_times, idx, thread); -				if (pid > -1) { -					err = read_pid_stat_field(fd, system ? 15 : 14, -								  start_time); -				} else { -					struct perf_cpu cpu; - -					cpu = perf_cpu_map__cpu(evsel->core.cpus, idx); -					err = read_stat_field(fd, cpu, system ? 3 : 1, -							      start_time); -				} -				if (err) -					goto out_close; -				continue; -			} -  			group_fd = get_group_fd(evsel, idx, thread);  			if (group_fd == -2) { @@ -2352,8 +2494,6 @@ retry_open:  				goto out_close;  			} -			test_attr__ready(); -  			/* Debug message used by test scripts */  			pr_debug2_peo("sys_perf_event_open: pid %d  cpu %d  group_fd %d  flags %#lx",  				pid, perf_cpu_map__cpu(cpus, idx).cpu, group_fd, evsel->open_flags); @@ -2374,7 +2514,7 @@ retry_open:  			bpf_counter__install_pe(evsel, idx, fd); -			if (unlikely(test_attr__enabled)) { +			if (unlikely(test_attr__enabled())) {  				test_attr__open(&evsel->core.attr, pid,  						perf_cpu_map__cpu(cpus, idx),  						fd, group_fd, evsel->open_flags); @@ -2415,9 +2555,6 @@ retry_open:  	return 0;  try_fallback: -	if (evsel__precise_ip_fallback(evsel)) -		goto retry_open; -  	if (evsel__ignore_missing_thread(evsel, perf_cpu_map__nr(cpus),  					 idx, threads, thread, err)) {  		/* We just removed 1 thread, so lower the upper nthreads limit. */ @@ -2434,11 +2571,15 @@ try_fallback:  	if (err == -EMFILE && rlimit__increase_nofile(&set_rlimit))  		goto retry_open; -	if (err != -EINVAL || idx > 0 || thread > 0) -		goto out_close; +	if (err == -EOPNOTSUPP && evsel__precise_ip_fallback(evsel)) +		goto retry_open; -	if (evsel__detect_missing_features(evsel)) +	if (err == -EINVAL && evsel__detect_missing_features(evsel))  		goto fallback_missing_features; + +	if (evsel__handle_error_quirks(evsel, err)) +		goto retry_open; +  out_close:  	if (err)  		threads->err_thread = thread; @@ -3245,6 +3386,27 @@ bool evsel__fallback(struct evsel *evsel, struct target *target, int err,  		evsel->core.attr.exclude_hv     = 1;  		return true; +	} else if (err == EOPNOTSUPP && !evsel->core.attr.exclude_guest && +		   !evsel->exclude_GH) { +		const char *name = evsel__name(evsel); +		char *new_name; +		const char *sep = ":"; + +		/* Is there already the separator in the name. */ +		if (strchr(name, '/') || +		    (strchr(name, ':') && !evsel->is_libpfm_event)) +			sep = ""; + +		if (asprintf(&new_name, "%s%sH", name, sep) < 0) +			return false; + +		free(evsel->name); +		evsel->name = new_name; +		/* Apple M1 requires exclude_guest */ +		scnprintf(msg, msgsize, "trying to fall back to excluding guest samples"); +		evsel->core.attr.exclude_guest = 1; + +		return true;  	}  	return false; @@ -3415,7 +3577,7 @@ int evsel__open_strerror(struct evsel *evsel, struct target *target,  	return scnprintf(msg, size,  	"The sys_perf_event_open() syscall returned with %d (%s) for event (%s).\n" -	"/bin/dmesg | grep -i perf may provide additional information.\n", +	"\"dmesg | grep -i perf\" may provide additional information.\n",  			 err, str_error_r(err, sbuf, sizeof(sbuf)), evsel__name(evsel));  }  | 
