diff options
Diffstat (limited to 'power_predictor.py')
-rw-r--r-- | power_predictor.py | 26 |
1 files changed, 24 insertions, 2 deletions
diff --git a/power_predictor.py b/power_predictor.py index 67c73ae..49e49fe 100644 --- a/power_predictor.py +++ b/power_predictor.py @@ -72,7 +72,7 @@ def analyze_feature_importance(X, y): plt.savefig('feature_importance.png') print("\nTop 5 most important features:") - for i, (_, row) in enumerate(feature_importance.head(5).iterrows(), 1): + for i, (_, row) in enumerate(feature_importance.head(15).iterrows(), 1): print(f"{i}. {row['Feature']} - importance: {row['Importance']:.4f}") return feature_importance @@ -205,6 +205,26 @@ def evaluate_model(model, X, y, scaler): return mse, rmse, mae, r2 +def remove_outliers(X, y, threshold=3): + # Scale features for distance calculations + scaler = StandardScaler() + X_scaled = scaler.fit_transform(X) + + # Calculate z-scores for target values + z_scores = np.abs((y - np.mean(y)) / np.std(y)) + + # Identify inliers + inliers = z_scores < threshold + + # Remove outliers + X_clean = X.iloc[inliers] + y_clean = y[inliers] + + removed = len(y) - len(y_clean) + print(f"Removed {removed} outliers ({removed/len(y)*100:.2f}% of data)") + + return X_clean, y_clean + # Step 7: Main function def main(): csv_path = 'logs.csv' @@ -212,6 +232,8 @@ def main(): # Load and prepare data print("Loading data...") X, y = load_data(csv_path) + + X, y = remove_outliers(X, y) # Impute missing values print("\nImputing missing values...") @@ -223,7 +245,7 @@ def main(): # Train the model print("\nTraining model...") - model, scaler = train_model(X_imputed, y, batch_size=8, epochs=200) + model, scaler = train_model(X_imputed, y, batch_size=8, epochs=100) # Evaluate the model print("\nEvaluating model...") |