1 files changed, 24 insertions, 2 deletions
diff --git a/power_predictor.py b/power_predictor.py
index 67c73ae..49e49fe 100644
--- a/power_predictor.py
+++ b/power_predictor.py
@@ -72,7 +72,7 @@ def analyze_feature_importance(X, y):
     plt.savefig('feature_importance.png')
     
     print("\nTop 5 most important features:")
-    for i, (_, row) in enumerate(feature_importance.head(5).iterrows(), 1):
+    for i, (_, row) in enumerate(feature_importance.head(15).iterrows(), 1):
         print(f"{i}. {row['Feature']} - importance: {row['Importance']:.4f}")
     
     return feature_importance
@@ -205,6 +205,26 @@ def evaluate_model(model, X, y, scaler):
     
     return mse, rmse, mae, r2
 
+def remove_outliers(X, y, threshold=3):
+    # Scale features for distance calculations
+    scaler = StandardScaler()
+    X_scaled = scaler.fit_transform(X)
+    
+    # Calculate z-scores for target values
+    z_scores = np.abs((y - np.mean(y)) / np.std(y))
+    
+    # Identify inliers
+    inliers = z_scores < threshold
+    
+    # Remove outliers
+    X_clean = X.iloc[inliers]
+    y_clean = y[inliers]
+    
+    removed = len(y) - len(y_clean)
+    print(f"Removed {removed} outliers ({removed/len(y)*100:.2f}% of data)")
+    
+    return X_clean, y_clean
+
 # Step 7: Main function
 def main():
     csv_path = 'logs.csv'
@@ -212,6 +232,8 @@ def main():
     # Load and prepare data
     print("Loading data...")
     X, y = load_data(csv_path)
+
+    X, y = remove_outliers(X, y)
     
     # Impute missing values
     print("\nImputing missing values...")
@@ -223,7 +245,7 @@ def main():
     
     # Train the model
     print("\nTraining model...")
-    model, scaler = train_model(X_imputed, y, batch_size=8, epochs=200)
+    model, scaler = train_model(X_imputed, y, batch_size=8, epochs=100)
     
     # Evaluate the model
     print("\nEvaluating model...")