9 files changed, 969 insertions, 4 deletions
diff --git a/Cargo.lock b/Cargo.lock
index 3abd369..973834c 100644
--- a/Cargo.lock
+++ b/Cargo.lock
@@ -337,6 +337,27 @@ source = "registry+https://github.com/rust-lang/crates.io-index"
 checksum = "d0a5c400df2834b80a4c3327b3aad3a4c4cd4de0629063962b03235697506a28"
 
 [[package]]
+name = "csv"
+version = "1.3.1"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "acdc4883a9c96732e4733212c01447ebd805833b7275a73ca3ee080fd77afdaf"
+dependencies = [
+ "csv-core",
+ "itoa",
+ "ryu",
+ "serde",
+]
+
+[[package]]
+name = "csv-core"
+version = "0.1.12"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "7d02f3b0da4c6504f86e9cd789d8dbafab48c2321be74e9987593de5a894d93d"
+dependencies = [
+ "memchr",
+]
+
+[[package]]
 name = "ctrlc"
 version = "3.4.5"
 source = "registry+https://github.com/rust-lang/crates.io-index"
@@ -698,6 +719,7 @@ version = "0.1.0"
 dependencies = [
  "anyhow",
  "clap",
+ "csv",
  "ctrlc",
  "dashmap",
  "iocuddle",
diff --git a/Cargo.toml b/Cargo.toml
index 7691cf3..19d8385 100644
--- a/Cargo.toml
+++ b/Cargo.toml
@@ -17,6 +17,7 @@ clap = { version = "4.5" , features = ["derive"] }
 perf-event = { path = "./perf-event" }
 procfs = { version = "0.17.0", default-features = false }
 dashmap = "6.1.0"
+csv = "1.3.1"
 
 [build-dependencies]
 scx_rustland_core = "2.2.8"
diff --git a/flake.nix b/flake.nix
index 863c0ea..c97c033 100644
--- a/flake.nix
+++ b/flake.nix
@@ -29,12 +29,22 @@
           extensions = [ "rust-src" "rust-analyzer" "clippy" "rustfmt" ];
         };
 
+        # Python with ML dependencies
+        pythonEnv = pkgs.python3.withPackages (ps: with ps; [
+          pandas
+          numpy
+          scikit-learn
+          matplotlib
+          pytorch
+        ]);
+
         # Dependencies based on scx_rustscheds
         nativeBuildInputs = with pkgs; [
           pkg-config
           rustVersion
           llvmPackages.clang
           bpftools # bpftool
+          pythonEnv # Add Python environment
         ];
         
         buildInputs = with pkgs; [
@@ -84,6 +94,7 @@
             echo "Kernel: $(uname -r)"
             echo "Rust: $(rustc --version)"
             echo "Clang: $(clang --version | head -n1)"
+            echo "Python: $(python --version)"
           '';
         };
       });
diff --git a/perf-event/src/lib.rs b/perf-event/src/lib.rs
index 03ef643..d1478fe 100644
--- a/perf-event/src/lib.rs
+++ b/perf-event/src/lib.rs
@@ -453,6 +453,8 @@ impl<'a> Default for Builder<'a> {
         attrs.set_disabled(1);
         attrs.set_exclude_kernel(1); // don't count time in kernel
         attrs.set_exclude_hv(1); // don't count time in hypervisor
+        attrs.set_exclude_kernel(0); // don't count time in kernel
+        attrs.set_exclude_hv(0); // don't count time in hypervisor
 
         // Request data for `time_enabled` and `time_running`.
         attrs.read_format |= sys::bindings::PERF_FORMAT_TOTAL_TIME_ENABLED as u64
@@ -760,6 +762,11 @@ impl Group {
     /// Construct a new, empty `Group`.
     #[allow(unused_parens)]
     pub fn new() -> io::Result<Group> {
+        Self::new_with_pid_and_cpu(0, -1)
+    }
+
+    /// Construct a new, empty `Group` with cpu and pid set.
+    pub fn new_with_pid_and_cpu(pid: i32, cpu: i32) -> io::Result<Group> {
         // Open a placeholder perf counter that we can add other events to.
         let mut attrs = perf_event_attr {
             size: std::mem::size_of::<perf_event_attr>() as u32,
@@ -780,7 +787,7 @@ impl Group {
 
         let file = unsafe {
             File::from_raw_fd(check_errno_syscall(|| {
-                sys::perf_event_open(&mut attrs, 0, -1, -1, 0)
+                sys::perf_event_open(&mut attrs, pid, cpu, -1, 0)
             })?)
         };
 
diff --git a/power_predictor.py b/power_predictor.py
new file mode 100644
index 0000000..67c73ae
--- /dev/null
+++ b/power_predictor.py
@@ -0,0 +1,235 @@
+import pandas as pd
+import numpy as np
+from sklearn.impute import KNNImputer
+from sklearn.preprocessing import StandardScaler
+from sklearn.feature_selection import mutual_info_regression
+from sklearn.model_selection import train_test_split
+import torch
+import torch.nn as nn
+import torch.optim as optim
+from torch.utils.data import DataLoader, TensorDataset
+import matplotlib.pyplot as plt
+
+# Step 1: Load and prepare the data
+def load_data(csv_path):
+    # Read the CSV file
+    df = pd.read_csv(csv_path)
+    
+    # Extract target (power usage)
+    y = df['package_power_j'].values
+    
+    # Extract features (CPU frequency and performance counters)
+    # Skip timestamp and duration_ms columns
+    X = df.iloc[:, 3:].copy()  # Starting from cpu_frequency_mhz
+    
+    # Print information about the dataset
+    print(f"Loaded dataset with {X.shape[0]} samples and {X.shape[1]} features")
+    print(f"Feature names: {X.columns.tolist()}")
+    
+    return X, y
+
+# Step 2: Handle missing values using KNN imputation
+def impute_missing_values(X):
+    # Replace empty strings with NaN
+    X = X.replace('', np.nan)
+    
+    # Convert all values to float
+    X = X.astype(float)
+    
+    # Count missing values per column
+    missing_counts = X.isna().sum()
+    print("Missing values per column:")
+    for col, count in missing_counts.items():
+        if count > 0:
+            print(f"  {col}: {count} ({count/len(X)*100:.1f}%)")
+    
+    # Impute missing values using KNN
+    imputer = KNNImputer(n_neighbors=5)
+    X_imputed = imputer.fit_transform(X)
+    
+    return pd.DataFrame(X_imputed, columns=X.columns)
+
+# Step 3: Feature importance analysis
+def analyze_feature_importance(X, y):
+    # Calculate mutual information scores
+    mi_scores = mutual_info_regression(X, y)
+    
+    # Create a DataFrame of features and their importance scores
+    feature_importance = pd.DataFrame({
+        'Feature': X.columns,
+        'Importance': mi_scores
+    })
+    
+    # Sort by importance
+    feature_importance = feature_importance.sort_values('Importance', ascending=False)
+    
+    # Plot feature importance
+    plt.figure(figsize=(10, 6))
+    plt.barh(feature_importance['Feature'][:10], feature_importance['Importance'][:10])
+    plt.xlabel('Mutual Information Score')
+    plt.title('Top 10 Most Important Features')
+    plt.tight_layout()
+    plt.savefig('feature_importance.png')
+    
+    print("\nTop 5 most important features:")
+    for i, (_, row) in enumerate(feature_importance.head(5).iterrows(), 1):
+        print(f"{i}. {row['Feature']} - importance: {row['Importance']:.4f}")
+    
+    return feature_importance
+
+# Step 4: Define the neural network model in PyTorch
+class PowerEstimator(nn.Module):
+    def __init__(self, input_size):
+        super(PowerEstimator, self).__init__()
+        self.model = nn.Sequential(
+            nn.Linear(input_size, 64),
+            nn.ReLU(),
+            nn.Dropout(0.2),
+            nn.Linear(64, 32),
+            nn.ReLU(),
+            nn.Linear(32, 1)
+        )
+    
+    def forward(self, x):
+        return self.model(x)
+
+# Step 5: Train the model
+def train_model(X, y, batch_size=32, epochs=100, lr=0.001):
+    # Split data into training and validation sets
+    X_train, X_val, y_train, y_val = train_test_split(X, y, test_size=0.2, random_state=42)
+    
+    # Scale the features
+    scaler = StandardScaler()
+    X_train_scaled = scaler.fit_transform(X_train)
+    X_val_scaled = scaler.transform(X_val)
+    
+    # Convert to PyTorch tensors
+    X_train_tensor = torch.tensor(X_train_scaled, dtype=torch.float32)
+    y_train_tensor = torch.tensor(y_train, dtype=torch.float32).view(-1, 1)
+    X_val_tensor = torch.tensor(X_val_scaled, dtype=torch.float32)
+    y_val_tensor = torch.tensor(y_val, dtype=torch.float32).view(-1, 1)
+    
+    # Create data loaders
+    train_dataset = TensorDataset(X_train_tensor, y_train_tensor)
+    train_loader = DataLoader(train_dataset, batch_size=batch_size, shuffle=True)
+    val_dataset = TensorDataset(X_val_tensor, y_val_tensor)
+    val_loader = DataLoader(val_dataset, batch_size=batch_size)
+    
+    # Initialize model, loss function, and optimizer
+    model = PowerEstimator(X_train.shape[1])
+    criterion = nn.MSELoss()
+    optimizer = optim.Adam(model.parameters(), lr=lr)
+    
+    # Training loop
+    train_losses = []
+    val_losses = []
+    
+    for epoch in range(epochs):
+        # Training
+        model.train()
+        train_loss = 0.0
+        for inputs, targets in train_loader:
+            optimizer.zero_grad()
+            outputs = model(inputs)
+            loss = criterion(outputs, targets)
+            loss.backward()
+            optimizer.step()
+            train_loss += loss.item() * inputs.size(0)
+        
+        train_loss /= len(train_loader.dataset)
+        train_losses.append(train_loss)
+        
+        # Validation
+        model.eval()
+        val_loss = 0.0
+        with torch.no_grad():
+            for inputs, targets in val_loader:
+                outputs = model(inputs)
+                loss = criterion(outputs, targets)
+                val_loss += loss.item() * inputs.size(0)
+            
+            val_loss /= len(val_loader.dataset)
+            val_losses.append(val_loss)
+        
+        # Print progress
+        if (epoch + 1) % 10 == 0:
+            print(f"Epoch {epoch+1}/{epochs}, Train Loss: {train_loss:.6f}, Val Loss: {val_loss:.6f}")
+    
+    # Plot training history
+    plt.figure(figsize=(10, 5))
+    plt.plot(train_losses, label='Training Loss')
+    plt.plot(val_losses, label='Validation Loss')
+    plt.xlabel('Epoch')
+    plt.ylabel('MSE Loss')
+    plt.title('Training and Validation Loss')
+    plt.legend()
+    plt.savefig('training_history.png')
+    
+    return model, scaler
+
+# Step 6: Evaluate the model
+def evaluate_model(model, X, y, scaler):
+    # Scale the features
+    X_scaled = scaler.transform(X)
+    
+    # Convert to PyTorch tensors
+    X_tensor = torch.tensor(X_scaled, dtype=torch.float32)
+    y_tensor = torch.tensor(y, dtype=torch.float32)
+    
+    # Make predictions
+    model.eval()
+    with torch.no_grad():
+        y_pred = model(X_tensor).squeeze().numpy()
+    
+    # Calculate metrics
+    mse = np.mean((y_pred - y) ** 2)
+    rmse = np.sqrt(mse)
+    mae = np.mean(np.abs(y_pred - y))
+    r2 = 1 - (np.sum((y - y_pred) ** 2) / np.sum((y - np.mean(y)) ** 2))
+    
+    print("\nModel Evaluation:")
+    print(f"MSE: {mse:.6f}")
+    print(f"RMSE: {rmse:.6f}")
+    print(f"MAE: {mae:.6f}")
+    print(f"R²: {r2:.6f}")
+    
+    # Plot actual vs predicted values
+    plt.figure(figsize=(8, 8))
+    plt.scatter(y, y_pred, alpha=0.5)
+    plt.plot([min(y), max(y)], [min(y), max(y)], 'r--')
+    plt.xlabel('Actual Power Usage (J)')
+    plt.ylabel('Predicted Power Usage (J)')
+    plt.title('Actual vs Predicted Power Usage')
+    plt.tight_layout()
+    plt.savefig('prediction_scatter.png')
+    
+    return mse, rmse, mae, r2
+
+# Step 7: Main function
+def main():
+    csv_path = 'logs.csv'
+    
+    # Load and prepare data
+    print("Loading data...")
+    X, y = load_data(csv_path)
+    
+    # Impute missing values
+    print("\nImputing missing values...")
+    X_imputed = impute_missing_values(X)
+    
+    # Analyze feature importance
+    print("\nAnalyzing feature importance...")
+    feature_importance = analyze_feature_importance(X_imputed, y)
+    
+    # Train the model
+    print("\nTraining model...")
+    model, scaler = train_model(X_imputed, y, batch_size=8, epochs=200)
+    
+    # Evaluate the model
+    print("\nEvaluating model...")
+    evaluate_model(model, X_imputed, y, scaler)
+    
+    print("\nDone!")
+
+if __name__ == "__main__":
+    main()
diff --git a/src/benchmark.rs b/src/benchmark.rs
new file mode 100644
index 0000000..606f29b
--- /dev/null
+++ b/src/benchmark.rs
@@ -0,0 +1,664 @@
+use crate::bpf::*;
+use crate::energy::rapl;
+use anyhow::Result;
+use csv::Writer;
+use libbpf_rs::OpenObject;
+use perf_event::{
+    events::{Cache, CacheOp, CacheResult, Event, Hardware, Software, WhichCache},
+    Builder, Counter, Group,
+};
+use rand::seq::IteratorRandom;
+use scx_utils::UserExitInfo;
+use std::collections::HashMap;
+use std::fs::File;
+use std::mem::MaybeUninit;
+use std::process;
+use std::thread;
+use std::time::{Duration, Instant};
+
+const SLICE_US: u64 = 50000;
+const LOG_INTERVAL_MS: u64 = 10; // Log every 1 second
+                                 // const RESHUFFLE_ROUNDS: usize = 5; // Number of rounds before reshuffling counters
+const RESHUFFLE_ROUNDS: usize = 1; // Number of rounds before reshuffling counters
+const MAX_COUNTERS_AT_ONCE: usize = 5;
+
+type Pid = i32;
+
+pub struct BenchmarkScheduler<'a> {
+    bpf: BpfScheduler<'a>,
+    own_pid: Pid,
+    log_path: String,
+}
+
+// Represents a single measurement point in time
+struct Measurement {
+    timestamp: Instant,
+    energy: Option<f64>,
+    frequency: Option<f64>,
+    counter_values: Vec<Option<u64>>,
+}
+
+impl Measurement {
+    fn new() -> Self {
+        Self {
+            timestamp: Instant::now(),
+            energy: None,
+            frequency: None,
+            counter_values: Vec::new(),
+        }
+    }
+
+    // Take a measurement with the given counter group
+    fn take(counters: &[(String, Counter)], group: &mut Group) -> Result<Self> {
+        let mut measurement = Self::new();
+
+        // Read energy
+        // Basline is 4.5W
+        measurement.energy = rapl::read_package_energy().ok();
+
+        // Read CPU frequency
+        measurement.frequency = read_cpu_frequency(0);
+
+        // Read performance counters
+        let counts = group.read()?;
+        // dbg!(&counts);
+
+        let counters: HashMap<_, _> = counters.iter().map(|(a, b)| (a.clone(), b)).collect();
+
+        // Extract counter values
+        for (name, _) in define_available_events() {
+            let Some(counter) = counters.get(&name) else {
+                measurement.counter_values.push(None);
+                continue;
+            };
+            measurement
+                .counter_values
+                .push(counts.get(counter).cloned());
+        }
+
+        Ok(measurement)
+    }
+
+    // Calculate the difference between two measurements
+    fn diff(&self, previous: &Measurement) -> MeasurementDiff {
+        let duration_ms = self
+            .timestamp
+            .duration_since(previous.timestamp)
+            .as_millis() as u64;
+
+        // Calculate energy delta
+        let energy_delta = match (previous.energy, self.energy) {
+            (Some(prev), Some(curr)) => curr - prev,
+            _ => 0.0,
+        };
+
+        MeasurementDiff {
+            timestamp: self.timestamp,
+            duration_ms,
+            energy_delta,
+            frequency: self.frequency,
+            counter_deltas: self.counter_values.clone(),
+        }
+    }
+}
+
+// Represents the difference between two measurements
+struct MeasurementDiff {
+    timestamp: Instant,
+    duration_ms: u64,
+    energy_delta: f64,
+    frequency: Option<f64>,
+    counter_deltas: Vec<Option<u64>>,
+}
+
+impl MeasurementDiff {
+    // Write this diff as a CSV record
+    fn write_csv_record(&self, writer: &mut Writer<File>) -> Result<()> {
+        // Prepare CSV record
+        let mut record = vec![
+            self.timestamp.elapsed().as_secs_f64().to_string(),
+            self.duration_ms.to_string(),
+            self.energy_delta.to_string(),
+            self.frequency
+                .map(|f| f.to_string())
+                .unwrap_or_else(|| "N/A".to_string()),
+        ];
+        record.extend(
+            self.counter_deltas
+                .iter()
+                .map(|x| x.map(|x| x.to_string()).unwrap_or_default()),
+        );
+
+        // Write record
+        writer.write_record(&record)?;
+        writer.flush()?;
+
+        Ok(())
+    }
+}
+
+impl<'a> BenchmarkScheduler<'a> {
+    pub fn init(open_object: &'a mut MaybeUninit<OpenObject>, log_path: &str) -> Result<Self> {
+        let bpf = BpfScheduler::init(
+            open_object,
+            0,     // exit_dump_len (default)
+            false, // partial (include all tasks)
+            false, // debug mode off
+        )?;
+
+        println!("Initializing benchmark scheduler (single-core profiling mode)");
+
+        Ok(Self {
+            bpf,
+            own_pid: process::id() as i32,
+            log_path: log_path.to_string(),
+        })
+    }
+
+    fn consume_all_tasks(&mut self) -> Result<()> {
+        while let Ok(Some(task)) = self.bpf.dequeue_task() {
+            let mut dispatched_task = DispatchedTask::new(&task);
+
+            // If it's our own process, schedule it to core 1
+            if task.pid == self.own_pid {
+                dispatched_task.cpu = 1;
+                // dispatched_task.flags |= RL_CPU_ANY as u64;
+            } else {
+                // Schedule all other tasks on core 0
+                // dispatched_task.flags |= RL_CPU_ANY as u64;
+                dispatched_task.cpu = 0;
+            }
+
+            dispatched_task.slice_ns = SLICE_US;
+
+            // Dispatch the task
+            if let Err(e) = self.bpf.dispatch_task(&dispatched_task) {
+                eprintln!("Failed to dispatch task: {}", e);
+            }
+        }
+
+        // Notify BPF we're done processing tasks
+        self.bpf.notify_complete(0);
+
+        Ok(())
+    }
+
+    fn start_measurement_thread(&self) -> thread::JoinHandle<()> {
+        let log_path = self.log_path.clone();
+
+        thread::spawn(move || {
+            if let Err(e) = run_measurement_loop(log_path) {
+                eprintln!("Measurement thread error: {:?}", e);
+            }
+        })
+    }
+
+    pub fn run(&mut self) -> Result<UserExitInfo> {
+        // Start the measurement thread
+        self.start_measurement_thread();
+
+        // Main scheduling loop
+        while !self.bpf.exited() {
+            // Process all tasks
+            self.consume_all_tasks()?;
+        }
+
+        self.bpf.shutdown_and_report()
+    }
+}
+
+// Main measurement loop
+fn run_measurement_loop(log_path: String) -> Result<()> {
+    // Define available hardware counters
+    let available_events = define_available_events();
+
+    // Initialize CSV writer with header
+    let mut csv_writer = initialize_csv_writer(&log_path, &available_events)?;
+
+    let mut rng = rand::rng();
+    let mut round_counter = 0;
+
+    // Main measurement loop
+    loop {
+        // println!("Starting new counter group (round {})", round_counter);
+        round_counter += 1;
+
+        let cpu = 0;
+        // Create a new perf group
+        let mut group = match Group::new_with_pid_and_cpu(-1, cpu) {
+            Ok(g) => g,
+            Err(e) => {
+                eprintln!("Failed to create perf group: {}", e);
+                thread::sleep(Duration::from_millis(100));
+                continue;
+            }
+        };
+
+        // Select random subset of counters
+        let selected_events = available_events
+            .iter()
+            .choose_multiple(&mut rng, MAX_COUNTERS_AT_ONCE);
+
+        // println!("Selected {} events for monitoring", selected_events.len());
+
+        // Build counters
+        let mut counters = Vec::new();
+        for (name, event) in &selected_events {
+            match Builder::new()
+                .group(&mut group)
+                .kind(event.clone())
+                .observe_pid(-1)
+                .one_cpu(cpu.try_into().unwrap()) // Core 0 is where we're scheduling tasks
+                .build()
+            {
+                Ok(counter) => {
+                    // println!("Successfully created counter for {}", name);
+                    counters.push((name.clone(), counter));
+                }
+                Err(e) => {
+                    eprintln!("Failed to create counter for {}: {}", name, e);
+                }
+            }
+        }
+        group.enable().unwrap();
+
+        if counters.is_empty() {
+            eprintln!("Failed to create any counters, retrying...");
+            thread::sleep(Duration::from_millis(100));
+            continue;
+        }
+
+        // Enable the counter group
+        if let Err(e) = group.enable() {
+            eprintln!("Failed to enable perf group: {}", e);
+            thread::sleep(Duration::from_millis(100));
+            continue;
+        }
+
+        // println!(
+        //     "Successfully enabled counter group with {} counters",
+        //     counters.len()
+        // );
+
+        // Take initial measurement
+        let mut prev_measurement = match Measurement::take(&counters, &mut group) {
+            Ok(m) => m,
+            Err(e) => {
+                eprintln!("Failed to take initial measurement: {}", e);
+                thread::sleep(Duration::from_millis(100));
+                continue;
+            }
+        };
+
+        // println!("Took initial measurement");
+
+        // Monitor for several rounds before reshuffling
+        for round in 0..RESHUFFLE_ROUNDS {
+            group.enable().unwrap();
+            group.reset().unwrap();
+            // Wait for the sampling interval
+            thread::sleep(Duration::from_millis(LOG_INTERVAL_MS));
+
+            // Take current measurement
+            let curr_measurement = match Measurement::take(&counters, &mut group) {
+                Ok(m) => m,
+                Err(e) => {
+                    eprintln!("Failed to take measurement in round {}: {}", round, e);
+                    continue;
+                }
+            };
+
+            // Calculate difference and write to CSV
+            let diff = curr_measurement.diff(&prev_measurement);
+            // println!(
+            //     "Measurement diff: duration={}ms, energy={}J",
+            //     diff.duration_ms, diff.energy_delta
+            // );
+
+            if let Err(e) = diff.write_csv_record(&mut csv_writer) {
+                eprintln!("Failed to write CSV record: {}", e);
+            }
+
+            // Current becomes previous for next iteration
+            prev_measurement = curr_measurement;
+        }
+
+        let _ = group.disable();
+        // panic!();
+    }
+}
+
+fn initialize_csv_writer(
+    log_path: &str,
+    available_events: &[(String, Event)],
+) -> Result<Writer<File>> {
+    let file = File::create(log_path)?;
+    let mut csv_writer = Writer::from_writer(file);
+
+    // Write header with all possible counter names
+    let mut header = vec![
+        "timestamp".to_string(),
+        "duration_ms".to_string(),
+        "package_power_j".to_string(),
+        "cpu_frequency_mhz".to_string(),
+    ];
+
+    // Add counter deltas
+    for (name, _) in available_events {
+        header.push(name.into());
+    }
+
+    csv_writer.write_record(&header)?;
+    csv_writer.flush()?;
+
+    Ok(csv_writer)
+}
+
+fn read_cpu_frequency(cpu_id: u32) -> Option<f64> {
+    // Try to read frequency from sysfs
+    let freq_path = format!(
+        "/sys/devices/system/cpu/cpu{}/cpufreq/scaling_cur_freq",
+        cpu_id
+    );
+
+    match std::fs::read_to_string(freq_path) {
+        Ok(content) => {
+            // Convert from kHz to MHz
+            content.trim().parse::<f64>().ok().map(|freq| freq / 1000.0)
+        }
+        Err(_) => None,
+    }
+}
+
+fn define_available_events() -> Vec<(String, Event)> {
+    let mut events = Vec::new();
+
+    // Hardware events
+    events.extend([
+        (
+            "cpu_cycles".to_string(),
+            Event::Hardware(Hardware::CPU_CYCLES),
+        ),
+        (
+            "instructions".to_string(),
+            Event::Hardware(Hardware::INSTRUCTIONS),
+        ),
+        (
+            "cache_references".to_string(),
+            Event::Hardware(Hardware::CACHE_REFERENCES),
+        ),
+        (
+            "cache_misses".to_string(),
+            Event::Hardware(Hardware::CACHE_MISSES),
+        ),
+        (
+            "branch_instructions".to_string(),
+            Event::Hardware(Hardware::BRANCH_INSTRUCTIONS),
+        ),
+        (
+            "branch_misses".to_string(),
+            Event::Hardware(Hardware::BRANCH_MISSES),
+        ),
+        // (
+        //     "bus_cycles".to_string(),
+        //     Event::Hardware(Hardware::BUS_CYCLES),
+        // ),
+        // (
+        //     "stalled_cycles_frontend".to_string(),
+        //     Event::Hardware(Hardware::STALLED_CYCLES_FRONTEND),
+        // ),
+        // (
+        //     "stalled_cycles_backend".to_string(),
+        //     Event::Hardware(Hardware::STALLED_CYCLES_BACKEND),
+        // ),
+        (
+            "ref_cpu_cycles".to_string(),
+            Event::Hardware(Hardware::REF_CPU_CYCLES),
+        ),
+    ]);
+
+    // Software events
+    events.extend([
+        (
+            "sw_cpu_clock".to_string(),
+            Event::Software(Software::CPU_CLOCK),
+        ),
+        (
+            "sw_task_clock".to_string(),
+            Event::Software(Software::TASK_CLOCK),
+        ),
+        (
+            "sw_page_faults".to_string(),
+            Event::Software(Software::PAGE_FAULTS),
+        ),
+        (
+            "sw_context_switches".to_string(),
+            Event::Software(Software::CONTEXT_SWITCHES),
+        ),
+        (
+            "sw_cpu_migrations".to_string(),
+            Event::Software(Software::CPU_MIGRATIONS),
+        ),
+        (
+            "sw_page_faults_min".to_string(),
+            Event::Software(Software::PAGE_FAULTS_MIN),
+        ),
+        (
+            "sw_page_faults_maj".to_string(),
+            Event::Software(Software::PAGE_FAULTS_MAJ),
+        ),
+        (
+            "sw_alignment_faults".to_string(),
+            Event::Software(Software::ALIGNMENT_FAULTS),
+        ),
+        (
+            "sw_emulation_faults".to_string(),
+            Event::Software(Software::EMULATION_FAULTS),
+        ),
+    ]);
+
+    // L1 Data Cache events
+    events.extend([
+        (
+            "l1d_read_access".to_string(),
+            Event::Cache(Cache {
+                which: WhichCache::L1D,
+                operation: CacheOp::READ,
+                result: CacheResult::ACCESS,
+            }),
+        ),
+        (
+            "l1d_read_miss".to_string(),
+            Event::Cache(Cache {
+                which: WhichCache::L1D,
+                operation: CacheOp::READ,
+                result: CacheResult::MISS,
+            }),
+        ),
+        // (
+        //     "l1d_write_access".to_string(),
+        //     Event::Cache(Cache {
+        //         which: WhichCache::L1D,
+        //         operation: CacheOp::WRITE,
+        //         result: CacheResult::ACCESS,
+        //     }),
+        // ),
+        // (
+        //     "l1d_write_miss".to_string(),
+        //     Event::Cache(Cache {
+        //         which: WhichCache::L1D,
+        //         operation: CacheOp::WRITE,
+        //         result: CacheResult::MISS,
+        //     }),
+        // ),
+        // (
+        //     "l1d_prefetch_access".to_string(),
+        //     Event::Cache(Cache {
+        //         which: WhichCache::L1D,
+        //         operation: CacheOp::PREFETCH,
+        //         result: CacheResult::ACCESS,
+        //     }),
+        // ),
+        // (
+        //     "l1d_prefetch_miss".to_string(),
+        //     Event::Cache(Cache {
+        //         which: WhichCache::L1D,
+        //         operation: CacheOp::PREFETCH,
+        //         result: CacheResult::MISS,
+        //     }),
+        // ),
+    ]);
+
+    // L1 Instruction Cache events
+    events.extend([
+        (
+            "l1i_read_access".to_string(),
+            Event::Cache(Cache {
+                which: WhichCache::L1I,
+                operation: CacheOp::READ,
+                result: CacheResult::ACCESS,
+            }),
+        ),
+        (
+            "l1i_read_miss".to_string(),
+            Event::Cache(Cache {
+                which: WhichCache::L1I,
+                operation: CacheOp::READ,
+                result: CacheResult::MISS,
+            }),
+        ),
+    ]);
+
+    // Last Level Cache events
+    events.extend([
+        // (
+        //     "llc_read_access".to_string(),
+        //     Event::Cache(Cache {
+        //         which: WhichCache::LL,
+        //         operation: CacheOp::READ,
+        //         result: CacheResult::ACCESS,
+        //     }),
+        // ),
+        // (
+        //     "llc_read_miss".to_string(),
+        //     Event::Cache(Cache {
+        //         which: WhichCache::LL,
+        //         operation: CacheOp::READ,
+        //         result: CacheResult::MISS,
+        //     }),
+        // ),
+        // (
+        //     "llc_write_access".to_string(),
+        //     Event::Cache(Cache {
+        //         which: WhichCache::LL,
+        //         operation: CacheOp::WRITE,
+        //         result: CacheResult::ACCESS,
+        //     }),
+        // ),
+        // (
+        //     "llc_write_miss".to_string(),
+        //     Event::Cache(Cache {
+        //         which: WhichCache::LL,
+        //         operation: CacheOp::WRITE,
+        //         result: CacheResult::MISS,
+        //     }),
+        // ),
+        // (
+        //     "llc_prefetch_access".to_string(),
+        //     Event::Cache(Cache {
+        //         which: WhichCache::LL,
+        //         operation: CacheOp::PREFETCH,
+        //         result: CacheResult::ACCESS,
+        //     }),
+        // ),
+        // (
+        //     "llc_prefetch_miss".to_string(),
+        //     Event::Cache(Cache {
+        //         which: WhichCache::LL,
+        //         operation: CacheOp::PREFETCH,
+        //         result: CacheResult::MISS,
+        //     }),
+        // ),
+    ]);
+
+    // Data TLB events
+    events.extend([
+        (
+            "dtlb_read_access".to_string(),
+            Event::Cache(Cache {
+                which: WhichCache::DTLB,
+                operation: CacheOp::READ,
+                result: CacheResult::ACCESS,
+            }),
+        ),
+        (
+            "dtlb_read_miss".to_string(),
+            Event::Cache(Cache {
+                which: WhichCache::DTLB,
+                operation: CacheOp::READ,
+                result: CacheResult::MISS,
+            }),
+        ),
+        // (
+        //     "dtlb_write_access".to_string(),
+        //     Event::Cache(Cache {
+        //         which: WhichCache::DTLB,
+        //         operation: CacheOp::WRITE,
+        //         result: CacheResult::ACCESS,
+        //     }),
+        // ),
+        // (
+        //     "dtlb_write_miss".to_string(),
+        //     Event::Cache(Cache {
+        //         which: WhichCache::DTLB,
+        //         operation: CacheOp::WRITE,
+        //         result: CacheResult::MISS,
+        //     }),
+        // ),
+    ]);
+
+    // Instruction TLB events
+    events.extend([
+        (
+            "itlb_read_access".to_string(),
+            Event::Cache(Cache {
+                which: WhichCache::ITLB,
+                operation: CacheOp::READ,
+                result: CacheResult::ACCESS,
+            }),
+        ),
+        (
+            "itlb_read_miss".to_string(),
+            Event::Cache(Cache {
+                which: WhichCache::ITLB,
+                operation: CacheOp::READ,
+                result: CacheResult::MISS,
+            }),
+        ),
+    ]);
+
+    // Branch Prediction Unit events
+    events.extend([
+        (
+            "bpu_read_access".to_string(),
+            Event::Cache(Cache {
+                which: WhichCache::BPU,
+                operation: CacheOp::READ,
+                result: CacheResult::ACCESS,
+            }),
+        ),
+        (
+            "bpu_read_miss".to_string(),
+            Event::Cache(Cache {
+                which: WhichCache::BPU,
+                operation: CacheOp::READ,
+                result: CacheResult::MISS,
+            }),
+        ),
+    ]);
+
+    // Sort events by name for consistent ordering
+    events.sort_unstable_by_key(|(name, _)| name.clone());
+
+    events
+}
diff --git a/src/energy.rs b/src/energy.rs
index ed8b65e..a26a2b3 100644
--- a/src/energy.rs
+++ b/src/energy.rs
@@ -1,5 +1,5 @@
 mod budget;
-mod rapl;
+pub mod rapl;
 mod trackers;
 
 use std::collections::{BTreeSet, HashMap};
diff --git a/src/energy/trackers/perf.rs b/src/energy/trackers/perf.rs
index 7c26bdb..17bc693 100644
--- a/src/energy/trackers/perf.rs
+++ b/src/energy/trackers/perf.rs
@@ -24,14 +24,22 @@ static EVENT_TYPES: &[(f32, Event)] = &[
 
 impl Estimator for PerfEstimator {
     fn start_trace(&mut self, pid: u64) {
-        let Ok(mut group) = Group::new() else {
+        let Ok(mut group) = Group::new_with_pid_and_cpu(-1, 0) else {
             eprintln!("Failed to create performance counter group for PID {}", pid);
             return;
         };
 
         let counters: Result<Vec<_>, _> = EVENT_TYPES
             .iter()
-            .map(|(_, kind)| Builder::new().group(&mut group).kind(kind.clone()).build())
+            .map(|(_, kind)| {
+                Builder::new()
+                    .group(&mut group)
+                    .kind(kind.clone())
+                    // .observe_pid(pid as i32)
+                    .observe_pid(-1)
+                    .one_cpu(0)
+                    .build()
+            })
             .collect();
 
         let counters = match counters {
diff --git a/src/main.rs b/src/main.rs
index d3c9628..f3d4992 100644
--- a/src/main.rs
+++ b/src/main.rs
@@ -1,7 +1,9 @@
 mod bpf_skel;
+use benchmark::BenchmarkScheduler;
 pub use bpf_skel::*;
 pub mod bpf_intf;
 
+mod benchmark;
 mod e_core_selector;
 mod energy;
 mod freq;
@@ -28,6 +30,14 @@ fn main() -> Result<()> {
                 .required(false),
         )
         .arg(
+            Arg::new("benchmark")
+                .short('b')
+                .long("benchmark")
+                .help("Use this flag to enable benckmarking mode")
+                .action(ArgAction::SetTrue)
+                .required(false),
+        )
+        .arg(
             Arg::new("power_cap")
                 .long("energy_cap")
                 .help("Set a power cap for the processor")
@@ -38,9 +48,16 @@ fn main() -> Result<()> {
 
     let power_cap = *matches.get_one::<u64>("power_cap").unwrap_or(&u64::MAX);
     let use_mocking = matches.get_flag("mock");
+    let benchmark = matches.get_flag("benchmark");
 
     // Initialize and load the scheduler.
     let mut open_object = MaybeUninit::uninit();
+    let log_path = "logs.csv";
+    if benchmark {
+        let mut sched = BenchmarkScheduler::init(&mut open_object, log_path)?;
+        sched.run();
+        return Ok(());
+    }
     loop {
         let mut sched = Scheduler::init(&mut open_object, use_mocking, power_cap)?;
         if !sched.run()?.should_restart() {