diff options
author | Linus Torvalds <torvalds@linux-foundation.org> | 2022-10-10 09:27:46 -0700 |
---|---|---|
committer | Linus Torvalds <torvalds@linux-foundation.org> | 2022-10-10 09:27:46 -0700 |
commit | 3871d93b82a4a6c1f4308064f046a544f16ada21 (patch) | |
tree | 99ef06b536798100cd7abb4753263505cc465215 /include | |
parent | 30c999937f69abf935b0228b8411713737377d9e (diff) | |
parent | 82aad7ff7ac25c8cf09d491ae23b9823f1901486 (diff) |
Merge tag 'perf-core-2022-10-07' of git://git.kernel.org/pub/scm/linux/kernel/git/tip/tip
Pull perf events updates from Ingo Molnar:
"PMU driver updates:
- Add AMD Last Branch Record Extension Version 2 (LbrExtV2) feature
support for Zen 4 processors.
- Extend the perf ABI to provide branch speculation information, if
available, and use this on CPUs that have it (eg. LbrExtV2).
- Improve Intel PEBS TSC timestamp handling & integration.
- Add Intel Raptor Lake S CPU support.
- Add 'perf mem' and 'perf c2c' memory profiling support on AMD CPUs
by utilizing IBS tagged load/store samples.
- Clean up & optimize various x86 PMU details.
HW breakpoints:
- Big rework to optimize the code for systems with hundreds of CPUs
and thousands of breakpoints:
- Replace the nr_bp_mutex global mutex with the bp_cpuinfo_sem
per-CPU rwsem that is read-locked during most of the key
operations.
- Improve the O(#cpus * #tasks) logic in toggle_bp_slot() and
fetch_bp_busy_slots().
- Apply micro-optimizations & cleanups.
- Misc cleanups & enhancements"
* tag 'perf-core-2022-10-07' of git://git.kernel.org/pub/scm/linux/kernel/git/tip/tip: (75 commits)
perf/hw_breakpoint: Annotate tsk->perf_event_mutex vs ctx->mutex
perf: Fix pmu_filter_match()
perf: Fix lockdep_assert_event_ctx()
perf/x86/amd/lbr: Adjust LBR regardless of filtering
perf/x86/utils: Fix uninitialized var in get_branch_type()
perf/uapi: Define PERF_MEM_SNOOPX_PEER in kernel header file
perf/x86/amd: Support PERF_SAMPLE_PHY_ADDR
perf/x86/amd: Support PERF_SAMPLE_ADDR
perf/x86/amd: Support PERF_SAMPLE_{WEIGHT|WEIGHT_STRUCT}
perf/x86/amd: Support PERF_SAMPLE_DATA_SRC
perf/x86/amd: Add IBS OP_DATA2 DataSrc bit definitions
perf/mem: Introduce PERF_MEM_LVLNUM_{EXTN_MEM|IO}
perf/x86/uncore: Add new Raptor Lake S support
perf/x86/cstate: Add new Raptor Lake S support
perf/x86/msr: Add new Raptor Lake S support
perf/x86: Add new Raptor Lake S support
bpf: Check flags for branch stack in bpf_read_branch_records helper
perf, hw_breakpoint: Fix use-after-free if perf_event_open() fails
perf: Use sample_flags for raw_data
perf: Use sample_flags for addr
...
Diffstat (limited to 'include')
-rw-r--r-- | include/linux/hw_breakpoint.h | 4 | ||||
-rw-r--r-- | include/linux/percpu-rwsem.h | 6 | ||||
-rw-r--r-- | include/linux/perf/arm_pmu.h | 9 | ||||
-rw-r--r-- | include/linux/perf_event.h | 77 | ||||
-rw-r--r-- | include/uapi/linux/perf_event.h | 57 |
5 files changed, 129 insertions, 24 deletions
diff --git a/include/linux/hw_breakpoint.h b/include/linux/hw_breakpoint.h index 78dd7035d1e5..f319bd26b030 100644 --- a/include/linux/hw_breakpoint.h +++ b/include/linux/hw_breakpoint.h @@ -74,12 +74,12 @@ register_wide_hw_breakpoint(struct perf_event_attr *attr, extern int register_perf_hw_breakpoint(struct perf_event *bp); extern void unregister_hw_breakpoint(struct perf_event *bp); extern void unregister_wide_hw_breakpoint(struct perf_event * __percpu *cpu_events); +extern bool hw_breakpoint_is_used(void); extern int dbg_reserve_bp_slot(struct perf_event *bp); extern int dbg_release_bp_slot(struct perf_event *bp); extern int reserve_bp_slot(struct perf_event *bp); extern void release_bp_slot(struct perf_event *bp); -int hw_breakpoint_weight(struct perf_event *bp); int arch_reserve_bp_slot(struct perf_event *bp); void arch_release_bp_slot(struct perf_event *bp); void arch_unregister_hw_breakpoint(struct perf_event *bp); @@ -121,6 +121,8 @@ register_perf_hw_breakpoint(struct perf_event *bp) { return -ENOSYS; } static inline void unregister_hw_breakpoint(struct perf_event *bp) { } static inline void unregister_wide_hw_breakpoint(struct perf_event * __percpu *cpu_events) { } +static inline bool hw_breakpoint_is_used(void) { return false; } + static inline int reserve_bp_slot(struct perf_event *bp) {return -ENOSYS; } static inline void release_bp_slot(struct perf_event *bp) { } diff --git a/include/linux/percpu-rwsem.h b/include/linux/percpu-rwsem.h index 5fda40f97fe9..36b942b67b7d 100644 --- a/include/linux/percpu-rwsem.h +++ b/include/linux/percpu-rwsem.h @@ -121,9 +121,15 @@ static inline void percpu_up_read(struct percpu_rw_semaphore *sem) preempt_enable(); } +extern bool percpu_is_read_locked(struct percpu_rw_semaphore *); extern void percpu_down_write(struct percpu_rw_semaphore *); extern void percpu_up_write(struct percpu_rw_semaphore *); +static inline bool percpu_is_write_locked(struct percpu_rw_semaphore *sem) +{ + return atomic_read(&sem->block); +} + extern int __percpu_init_rwsem(struct percpu_rw_semaphore *, const char *, struct lock_class_key *); diff --git a/include/linux/perf/arm_pmu.h b/include/linux/perf/arm_pmu.h index 0407a38b470a..0356cb6a215d 100644 --- a/include/linux/perf/arm_pmu.h +++ b/include/linux/perf/arm_pmu.h @@ -24,10 +24,11 @@ /* * ARM PMU hw_event flags */ -/* Event uses a 64bit counter */ -#define ARMPMU_EVT_64BIT 1 -/* Event uses a 47bit counter */ -#define ARMPMU_EVT_47BIT 2 +#define ARMPMU_EVT_64BIT 0x00001 /* Event uses a 64bit counter */ +#define ARMPMU_EVT_47BIT 0x00002 /* Event uses a 47bit counter */ + +static_assert((PERF_EVENT_FLAG_ARCH & ARMPMU_EVT_64BIT) == ARMPMU_EVT_64BIT); +static_assert((PERF_EVENT_FLAG_ARCH & ARMPMU_EVT_47BIT) == ARMPMU_EVT_47BIT); #define HW_OP_UNSUPPORTED 0xFFFF #define C(_x) PERF_COUNT_HW_CACHE_##_x diff --git a/include/linux/perf_event.h b/include/linux/perf_event.h index ee8b9ecdc03b..853f64b6c8c2 100644 --- a/include/linux/perf_event.h +++ b/include/linux/perf_event.h @@ -36,6 +36,7 @@ struct perf_guest_info_callbacks { }; #ifdef CONFIG_HAVE_HW_BREAKPOINT +#include <linux/rhashtable-types.h> #include <asm/hw_breakpoint.h> #endif @@ -60,6 +61,7 @@ struct perf_guest_info_callbacks { #include <linux/refcount.h> #include <linux/security.h> #include <linux/static_call.h> +#include <linux/lockdep.h> #include <asm/local.h> struct perf_callchain_entry { @@ -137,9 +139,11 @@ struct hw_perf_event_extra { * PERF_EVENT_FLAG_ARCH bits are reserved for architecture-specific * usage. */ -#define PERF_EVENT_FLAG_ARCH 0x0000ffff +#define PERF_EVENT_FLAG_ARCH 0x000fffff #define PERF_EVENT_FLAG_USER_READ_CNT 0x80000000 +static_assert((PERF_EVENT_FLAG_USER_READ_CNT & PERF_EVENT_FLAG_ARCH) == 0); + /** * struct hw_perf_event - performance event hardware details: */ @@ -178,7 +182,7 @@ struct hw_perf_event { * creation and event initalization. */ struct arch_hw_breakpoint info; - struct list_head bp_list; + struct rhlist_head bp_list; }; #endif struct { /* amd_iommu */ @@ -631,7 +635,23 @@ struct pmu_event_list { struct list_head list; }; +/* + * event->sibling_list is modified whole holding both ctx->lock and ctx->mutex + * as such iteration must hold either lock. However, since ctx->lock is an IRQ + * safe lock, and is only held by the CPU doing the modification, having IRQs + * disabled is sufficient since it will hold-off the IPIs. + */ +#ifdef CONFIG_PROVE_LOCKING +#define lockdep_assert_event_ctx(event) \ + WARN_ON_ONCE(__lockdep_enabled && \ + (this_cpu_read(hardirqs_enabled) && \ + lockdep_is_held(&(event)->ctx->mutex) != LOCK_STATE_HELD)) +#else +#define lockdep_assert_event_ctx(event) +#endif + #define for_each_sibling_event(sibling, event) \ + lockdep_assert_event_ctx(event); \ if ((event)->group_leader == (event)) \ list_for_each_entry((sibling), &(event)->sibling_list, sibling_list) @@ -1007,18 +1027,20 @@ struct perf_sample_data { * Fields set by perf_sample_data_init(), group so as to * minimize the cachelines touched. */ - u64 addr; - struct perf_raw_record *raw; - struct perf_branch_stack *br_stack; + u64 sample_flags; u64 period; - union perf_sample_weight weight; - u64 txn; - union perf_mem_data_src data_src; /* * The other fields, optionally {set,used} by * perf_{prepare,output}_sample(). */ + struct perf_branch_stack *br_stack; + union perf_sample_weight weight; + union perf_mem_data_src data_src; + u64 txn; + u64 addr; + struct perf_raw_record *raw; + u64 type; u64 ip; struct { @@ -1056,13 +1078,13 @@ static inline void perf_sample_data_init(struct perf_sample_data *data, u64 addr, u64 period) { /* remaining struct members initialized in perf_prepare_sample() */ - data->addr = addr; - data->raw = NULL; - data->br_stack = NULL; + data->sample_flags = PERF_SAMPLE_PERIOD; data->period = period; - data->weight.full = 0; - data->data_src.val = PERF_MEM_NA; - data->txn = 0; + + if (addr) { + data->addr = addr; + data->sample_flags |= PERF_SAMPLE_ADDR; + } } /* @@ -1078,6 +1100,7 @@ static inline void perf_clear_branch_entry_bitfields(struct perf_branch_entry *b br->abort = 0; br->cycles = 0; br->type = 0; + br->spec = PERF_BR_SPEC_NA; br->reserved = 0; } @@ -1684,4 +1707,30 @@ static inline void perf_lopwr_cb(bool mode) } #endif +#ifdef CONFIG_PERF_EVENTS +static inline bool branch_sample_no_flags(const struct perf_event *event) +{ + return event->attr.branch_sample_type & PERF_SAMPLE_BRANCH_NO_FLAGS; +} + +static inline bool branch_sample_no_cycles(const struct perf_event *event) +{ + return event->attr.branch_sample_type & PERF_SAMPLE_BRANCH_NO_CYCLES; +} + +static inline bool branch_sample_type(const struct perf_event *event) +{ + return event->attr.branch_sample_type & PERF_SAMPLE_BRANCH_TYPE_SAVE; +} + +static inline bool branch_sample_hw_index(const struct perf_event *event) +{ + return event->attr.branch_sample_type & PERF_SAMPLE_BRANCH_HW_INDEX; +} + +static inline bool branch_sample_priv(const struct perf_event *event) +{ + return event->attr.branch_sample_type & PERF_SAMPLE_BRANCH_PRIV_SAVE; +} +#endif /* CONFIG_PERF_EVENTS */ #endif /* _LINUX_PERF_EVENT_H */ diff --git a/include/uapi/linux/perf_event.h b/include/uapi/linux/perf_event.h index 03b370062741..85be78e0e7f6 100644 --- a/include/uapi/linux/perf_event.h +++ b/include/uapi/linux/perf_event.h @@ -164,8 +164,6 @@ enum perf_event_sample_format { PERF_SAMPLE_WEIGHT_STRUCT = 1U << 24, PERF_SAMPLE_MAX = 1U << 25, /* non-ABI */ - - __PERF_SAMPLE_CALLCHAIN_EARLY = 1ULL << 63, /* non-ABI; internal use */ }; #define PERF_SAMPLE_WEIGHT_TYPE (PERF_SAMPLE_WEIGHT | PERF_SAMPLE_WEIGHT_STRUCT) @@ -204,6 +202,8 @@ enum perf_branch_sample_type_shift { PERF_SAMPLE_BRANCH_HW_INDEX_SHIFT = 17, /* save low level index of raw branch records */ + PERF_SAMPLE_BRANCH_PRIV_SAVE_SHIFT = 18, /* save privilege mode */ + PERF_SAMPLE_BRANCH_MAX_SHIFT /* non-ABI */ }; @@ -233,6 +233,8 @@ enum perf_branch_sample_type { PERF_SAMPLE_BRANCH_HW_INDEX = 1U << PERF_SAMPLE_BRANCH_HW_INDEX_SHIFT, + PERF_SAMPLE_BRANCH_PRIV_SAVE = 1U << PERF_SAMPLE_BRANCH_PRIV_SAVE_SHIFT, + PERF_SAMPLE_BRANCH_MAX = 1U << PERF_SAMPLE_BRANCH_MAX_SHIFT, }; @@ -253,9 +255,48 @@ enum { PERF_BR_COND_RET = 10, /* conditional function return */ PERF_BR_ERET = 11, /* exception return */ PERF_BR_IRQ = 12, /* irq */ + PERF_BR_SERROR = 13, /* system error */ + PERF_BR_NO_TX = 14, /* not in transaction */ + PERF_BR_EXTEND_ABI = 15, /* extend ABI */ PERF_BR_MAX, }; +/* + * Common branch speculation outcome classification + */ +enum { + PERF_BR_SPEC_NA = 0, /* Not available */ + PERF_BR_SPEC_WRONG_PATH = 1, /* Speculative but on wrong path */ + PERF_BR_NON_SPEC_CORRECT_PATH = 2, /* Non-speculative but on correct path */ + PERF_BR_SPEC_CORRECT_PATH = 3, /* Speculative and on correct path */ + PERF_BR_SPEC_MAX, +}; + +enum { + PERF_BR_NEW_FAULT_ALGN = 0, /* Alignment fault */ + PERF_BR_NEW_FAULT_DATA = 1, /* Data fault */ + PERF_BR_NEW_FAULT_INST = 2, /* Inst fault */ + PERF_BR_NEW_ARCH_1 = 3, /* Architecture specific */ + PERF_BR_NEW_ARCH_2 = 4, /* Architecture specific */ + PERF_BR_NEW_ARCH_3 = 5, /* Architecture specific */ + PERF_BR_NEW_ARCH_4 = 6, /* Architecture specific */ + PERF_BR_NEW_ARCH_5 = 7, /* Architecture specific */ + PERF_BR_NEW_MAX, +}; + +enum { + PERF_BR_PRIV_UNKNOWN = 0, + PERF_BR_PRIV_USER = 1, + PERF_BR_PRIV_KERNEL = 2, + PERF_BR_PRIV_HV = 3, +}; + +#define PERF_BR_ARM64_FIQ PERF_BR_NEW_ARCH_1 +#define PERF_BR_ARM64_DEBUG_HALT PERF_BR_NEW_ARCH_2 +#define PERF_BR_ARM64_DEBUG_EXIT PERF_BR_NEW_ARCH_3 +#define PERF_BR_ARM64_DEBUG_INST PERF_BR_NEW_ARCH_4 +#define PERF_BR_ARM64_DEBUG_DATA PERF_BR_NEW_ARCH_5 + #define PERF_SAMPLE_BRANCH_PLM_ALL \ (PERF_SAMPLE_BRANCH_USER|\ PERF_SAMPLE_BRANCH_KERNEL|\ @@ -1295,7 +1336,9 @@ union perf_mem_data_src { #define PERF_MEM_LVLNUM_L2 0x02 /* L2 */ #define PERF_MEM_LVLNUM_L3 0x03 /* L3 */ #define PERF_MEM_LVLNUM_L4 0x04 /* L4 */ -/* 5-0xa available */ +/* 5-0x8 available */ +#define PERF_MEM_LVLNUM_EXTN_MEM 0x09 /* Extension memory */ +#define PERF_MEM_LVLNUM_IO 0x0a /* I/O */ #define PERF_MEM_LVLNUM_ANY_CACHE 0x0b /* Any cache */ #define PERF_MEM_LVLNUM_LFB 0x0c /* LFB */ #define PERF_MEM_LVLNUM_RAM 0x0d /* RAM */ @@ -1313,7 +1356,7 @@ union perf_mem_data_src { #define PERF_MEM_SNOOP_SHIFT 19 #define PERF_MEM_SNOOPX_FWD 0x01 /* forward */ -/* 1 free */ +#define PERF_MEM_SNOOPX_PEER 0x02 /* xfer from peer */ #define PERF_MEM_SNOOPX_SHIFT 38 /* locked instruction */ @@ -1363,6 +1406,7 @@ union perf_mem_data_src { * abort: aborting a hardware transaction * cycles: cycles from last branch (or 0 if not supported) * type: branch type + * spec: branch speculation info (or 0 if not supported) */ struct perf_branch_entry { __u64 from; @@ -1373,7 +1417,10 @@ struct perf_branch_entry { abort:1, /* transaction abort */ cycles:16, /* cycle count to last branch */ type:4, /* branch type */ - reserved:40; + spec:2, /* branch speculation info */ + new_type:4, /* additional branch type */ + priv:3, /* privilege level */ + reserved:31; }; union perf_sample_weight { |