1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
|
// SPDX-License-Identifier: GPL-2.0
// Copyright (c) 2024 Dennis Kobert <dennis@kobert.dev>
//! Performance Events Module
//!
//! This module contains event types for hardware and software performance counters.
//! C header: [`include/linux/perf_event.h`](srctree/include/linux/perf_event.h)
use crate::bindings;
/// Main event type categories
#[derive(Debug, Clone, Copy)]
pub enum EventType {
/// Hardware events like CPU cycles, instructions, cache misses
Hardware(HardwareEvent),
/// Software events like context switches, page faults
Software(SoftwareEvent),
/// Hardware cache events (L1D, L1I, LLC, etc.)
HwCache(HwCacheEvent),
/// Raw hardware-specific event configuration
Raw(u64),
}
/// Hardware performance events that can be monitored
#[derive(Debug, Clone, Copy)]
pub enum HardwareEvent {
/// Total CPU cycles (both used and idle)
/// Does not include cycles when the CPU is idle
CpuCycles,
/// Total instructions executed by the CPU
/// Can be used with CpuCycles to calculate Instructions per Cycle (IPC)
Instructions,
/// Cache operations that reference the CPU's cache hierarchy
/// Includes all cache levels (L1, L2, LLC)
CacheReferences,
/// Cache operations that miss the CPU's cache hierarchy
/// Requires memory access from RAM or other CPUs
CacheMisses,
/// Total branch instructions executed
/// Used to monitor program flow changes
BranchInstructions,
/// Branch instructions that were mispredicted
/// Indicates branch prediction efficiency
BranchMisses,
/// Bus cycles, indicating memory/system bus activity
/// Useful for monitoring memory bus utilization
BusCycles,
/// Cycles where the CPU front-end is stalled
/// Indicates instruction fetch or decode bottlenecks
StalledCyclesFrontend,
/// Cycles where the CPU back-end is stalled
/// Indicates execution bottlenecks like resource conflicts
StalledCyclesBackend,
/// Total CPU cycles, including idle cycles
/// Counts at a constant rate regardless of CPU frequency changes
RefCpuCycles,
}
/// Software performance events that can be monitored
#[derive(Debug, Clone, Copy)]
pub enum SoftwareEvent {
/// CPU clock, a high-resolution per-CPU timer
/// Measures time spent on this CPU in nanoseconds
CpuClock,
/// Task clock, a high-resolution timer specific to the monitored task
/// Measures time spent by this task on CPU in nanoseconds
TaskClock,
/// Total page faults (both minor and major)
/// Triggered when a process accesses a memory page not currently mapped
PageFaults,
/// Process context switches
/// Counts voluntary and involuntary context switches
ContextSwitches,
/// CPU migrations
/// Counts when a process moves execution to a different CPU
CpuMigrations,
/// Minor page faults
/// Page is in memory but not allocated to the process
PageFaultsMin,
/// Major page faults
/// Page needs to be loaded from disk
PageFaultsMaj,
/// Memory alignment faults
/// Occurs on unaligned memory accesses when they're not handled by hardware
AlignmentFaults,
/// Instruction emulation faults
/// Occurs when the CPU needs to emulate an instruction in software
EmulationFaults,
/// Dummy software event
/// Used for testing or placeholder purposes
Dummy,
/// BPF program output
/// Counts output events from eBPF programs
BpfOutput,
/// CGroup switches
/// Counts transitions between cgroups
CGroupSwitches,
}
/// Hardware cache performance events that can be monitored
#[derive(Debug, Clone, Copy)]
pub enum HwCacheEvent {
/// Level 1 data cache events
L1D(HwCacheOp, HwCacheOpResult),
/// Level 1 instruction cache events
L1I(HwCacheOp, HwCacheOpResult),
/// Last level cache events
LLC(HwCacheOp, HwCacheOpResult),
/// Data TLB events
DTLB(HwCacheOp, HwCacheOpResult),
/// Instruction TLB events
ITLB(HwCacheOp, HwCacheOpResult),
/// Branch prediction unit events
BPU(HwCacheOp, HwCacheOpResult),
/// NUMA node cache events
Node(HwCacheOp, HwCacheOpResult),
}
/// Hardware cache operations to monitor
#[derive(Debug, Clone, Copy)]
pub enum HwCacheOp {
/// Cache read operations
Read,
/// Cache write operations
Write,
/// Cache prefetch operations
Prefetch,
}
/// Hardware cache operation result types
#[derive(Debug, Clone, Copy)]
pub enum HwCacheOpResult {
/// Cache access (hit or miss)
Access,
/// Cache miss
Miss,
}
/// Trait for configuring perf_event_attr based on event type
pub trait EventConfig {
/// Test
fn configure_attr(&self, attr: &mut bindings::perf_event_attr);
}
impl EventConfig for EventType {
fn configure_attr(&self, attr: &mut bindings::perf_event_attr) {
match self {
EventType::Hardware(hw) => hw.configure_attr(attr),
EventType::Software(sw) => sw.configure_attr(attr),
EventType::HwCache(cache) => cache.configure_attr(attr),
EventType::Raw(raw) => {
attr.type_ = bindings::perf_type_id_PERF_TYPE_RAW as u32;
attr.config = *raw;
}
}
}
}
impl EventConfig for HardwareEvent {
fn configure_attr(&self, attr: &mut bindings::perf_event_attr) {
attr.type_ = bindings::perf_type_id_PERF_TYPE_HARDWARE as u32;
attr.config = (*self).into();
}
}
impl EventConfig for SoftwareEvent {
fn configure_attr(&self, attr: &mut bindings::perf_event_attr) {
attr.type_ = bindings::perf_type_id_PERF_TYPE_SOFTWARE as u32;
attr.config = (*self).into();
}
}
impl EventConfig for HwCacheEvent {
fn configure_attr(&self, attr: &mut bindings::perf_event_attr) {
attr.type_ = bindings::perf_type_id_PERF_TYPE_HW_CACHE as u32;
attr.config = (*self).into();
}
}
// Raw value conversions
impl From<HardwareEvent> for u64 {
fn from(event: HardwareEvent) -> Self {
use HardwareEvent::*;
match event {
CpuCycles => bindings::perf_hw_id_PERF_COUNT_HW_CPU_CYCLES as u64,
Instructions => bindings::perf_hw_id_PERF_COUNT_HW_INSTRUCTIONS as u64,
CacheReferences => bindings::perf_hw_id_PERF_COUNT_HW_CACHE_REFERENCES as u64,
CacheMisses => bindings::perf_hw_id_PERF_COUNT_HW_CACHE_MISSES as u64,
BranchInstructions => bindings::perf_hw_id_PERF_COUNT_HW_BRANCH_INSTRUCTIONS as u64,
BranchMisses => bindings::perf_hw_id_PERF_COUNT_HW_BRANCH_MISSES as u64,
BusCycles => bindings::perf_hw_id_PERF_COUNT_HW_BUS_CYCLES as u64,
StalledCyclesFrontend => {
bindings::perf_hw_id_PERF_COUNT_HW_STALLED_CYCLES_FRONTEND as u64
}
StalledCyclesBackend => {
bindings::perf_hw_id_PERF_COUNT_HW_STALLED_CYCLES_BACKEND as u64
}
RefCpuCycles => bindings::perf_hw_id_PERF_COUNT_HW_REF_CPU_CYCLES as u64,
}
}
}
impl From<SoftwareEvent> for u64 {
fn from(event: SoftwareEvent) -> Self {
use SoftwareEvent::*;
match event {
CpuClock => bindings::perf_sw_ids_PERF_COUNT_SW_CPU_CLOCK as u64,
TaskClock => bindings::perf_sw_ids_PERF_COUNT_SW_TASK_CLOCK as u64,
PageFaults => bindings::perf_sw_ids_PERF_COUNT_SW_PAGE_FAULTS as u64,
ContextSwitches => bindings::perf_sw_ids_PERF_COUNT_SW_CONTEXT_SWITCHES as u64,
CpuMigrations => bindings::perf_sw_ids_PERF_COUNT_SW_CPU_MIGRATIONS as u64,
PageFaultsMin => bindings::perf_sw_ids_PERF_COUNT_SW_PAGE_FAULTS_MIN as u64,
PageFaultsMaj => bindings::perf_sw_ids_PERF_COUNT_SW_PAGE_FAULTS_MAJ as u64,
AlignmentFaults => bindings::perf_sw_ids_PERF_COUNT_SW_ALIGNMENT_FAULTS as u64,
EmulationFaults => bindings::perf_sw_ids_PERF_COUNT_SW_EMULATION_FAULTS as u64,
Dummy => bindings::perf_sw_ids_PERF_COUNT_SW_DUMMY as u64,
BpfOutput => bindings::perf_sw_ids_PERF_COUNT_SW_BPF_OUTPUT as u64,
CGroupSwitches => bindings::perf_sw_ids_PERF_COUNT_SW_CGROUP_SWITCHES as u64,
}
}
}
impl From<HwCacheEvent> for u64 {
fn from(event: HwCacheEvent) -> Self {
use HwCacheEvent::*;
let (cache_id, op, result) = match event {
L1D(op, result) => (
bindings::perf_hw_cache_id_PERF_COUNT_HW_CACHE_L1D,
op,
result,
),
L1I(op, result) => (
bindings::perf_hw_cache_id_PERF_COUNT_HW_CACHE_L1I,
op,
result,
),
LLC(op, result) => (
bindings::perf_hw_cache_id_PERF_COUNT_HW_CACHE_LL,
op,
result,
),
DTLB(op, result) => (
bindings::perf_hw_cache_id_PERF_COUNT_HW_CACHE_DTLB,
op,
result,
),
ITLB(op, result) => (
bindings::perf_hw_cache_id_PERF_COUNT_HW_CACHE_ITLB,
op,
result,
),
BPU(op, result) => (
bindings::perf_hw_cache_id_PERF_COUNT_HW_CACHE_BPU,
op,
result,
),
Node(op, result) => (
bindings::perf_hw_cache_id_PERF_COUNT_HW_CACHE_NODE,
op,
result,
),
};
let op_id: u32 = match op {
HwCacheOp::Read => bindings::perf_hw_cache_op_id_PERF_COUNT_HW_CACHE_OP_READ,
HwCacheOp::Write => bindings::perf_hw_cache_op_id_PERF_COUNT_HW_CACHE_OP_WRITE,
HwCacheOp::Prefetch => bindings::perf_hw_cache_op_id_PERF_COUNT_HW_CACHE_OP_PREFETCH,
};
let result_id: u32 = match result {
HwCacheOpResult::Access => {
bindings::perf_hw_cache_op_result_id_PERF_COUNT_HW_CACHE_RESULT_ACCESS
}
HwCacheOpResult::Miss => {
bindings::perf_hw_cache_op_result_id_PERF_COUNT_HW_CACHE_RESULT_MISS
}
};
// Encode the cache event ID according to the kernel's encoding scheme:
// - cache_id << 0
// - op_id << 8
// - result_id << 16
(cache_id as u64) | ((op_id as u64) << 8) | ((result_id as u64) << 16)
}
}
// Implement From for EventType
impl From<HardwareEvent> for EventType {
fn from(event: HardwareEvent) -> Self {
EventType::Hardware(event)
}
}
impl From<SoftwareEvent> for EventType {
fn from(event: SoftwareEvent) -> Self {
EventType::Software(event)
}
}
impl From<HwCacheEvent> for EventType {
fn from(event: HwCacheEvent) -> Self {
EventType::HwCache(event)
}
}
|