summaryrefslogtreecommitdiff
path: root/power_predictor.py
diff options
context:
space:
mode:
Diffstat (limited to 'power_predictor.py')
-rw-r--r--power_predictor.py26
1 files changed, 24 insertions, 2 deletions
diff --git a/power_predictor.py b/power_predictor.py
index 67c73ae..49e49fe 100644
--- a/power_predictor.py
+++ b/power_predictor.py
@@ -72,7 +72,7 @@ def analyze_feature_importance(X, y):
plt.savefig('feature_importance.png')
print("\nTop 5 most important features:")
- for i, (_, row) in enumerate(feature_importance.head(5).iterrows(), 1):
+ for i, (_, row) in enumerate(feature_importance.head(15).iterrows(), 1):
print(f"{i}. {row['Feature']} - importance: {row['Importance']:.4f}")
return feature_importance
@@ -205,6 +205,26 @@ def evaluate_model(model, X, y, scaler):
return mse, rmse, mae, r2
+def remove_outliers(X, y, threshold=3):
+ # Scale features for distance calculations
+ scaler = StandardScaler()
+ X_scaled = scaler.fit_transform(X)
+
+ # Calculate z-scores for target values
+ z_scores = np.abs((y - np.mean(y)) / np.std(y))
+
+ # Identify inliers
+ inliers = z_scores < threshold
+
+ # Remove outliers
+ X_clean = X.iloc[inliers]
+ y_clean = y[inliers]
+
+ removed = len(y) - len(y_clean)
+ print(f"Removed {removed} outliers ({removed/len(y)*100:.2f}% of data)")
+
+ return X_clean, y_clean
+
# Step 7: Main function
def main():
csv_path = 'logs.csv'
@@ -212,6 +232,8 @@ def main():
# Load and prepare data
print("Loading data...")
X, y = load_data(csv_path)
+
+ X, y = remove_outliers(X, y)
# Impute missing values
print("\nImputing missing values...")
@@ -223,7 +245,7 @@ def main():
# Train the model
print("\nTraining model...")
- model, scaler = train_model(X_imputed, y, batch_size=8, epochs=200)
+ model, scaler = train_model(X_imputed, y, batch_size=8, epochs=100)
# Evaluate the model
print("\nEvaluating model...")