import numpy as np
from [Link] import load_breast_cancer
from sklearn.model_selection import train_test_split
from [Link] import RandomForestClassifier
from [Link] import accuracy_score, confusion_matrix,
classification_report
from tensorflow import keras
from [Link] import differential_evolution
import [Link] as plt
import seaborn as sns
# Load Breast Cancer dataset
data = load_breast_cancer()
X, y = [Link], [Link]
# Split dataset into training and testing sets
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2,
random_state=42)
# Step 1: Feature Selection Using Neural Network
def create_nn(input_shape):
model = [Link]([
[Link](16, activation='relu',
input_shape=(input_shape,)),
[Link](8, activation='relu'),
[Link](1, activation='sigmoid') # Binary
classification
])
[Link](optimizer='adam', loss='binary_crossentropy',
metrics=['accuracy'])
return model
# Train Neural Network
nn_model = create_nn(X_train.shape[1])
nn_model.fit(X_train, y_train, epochs=50, batch_size=32, verbose=0)
# Get feature importance from the neural network
# Use the absolute weights of the first layer as a proxy for feature
importance
weights = [Link](nn_model.layers[0].get_weights()[0])
feature_importance = [Link](weights, axis=1)
# Select top-k features based on importance
k = 10 # Number of features to select (changed to 7)
top_k_features = [Link](feature_importance)[-k:]
# Filter dataset to include only top-k features
X_train_pruned = X_train[:, top_k_features]
X_test_pruned = X_test[:, top_k_features]
# Print number of features used
print(f"Number of Features Used: {k}")
# Step 2: Tree Pruning Using Selected Features
# Train Random Forest on pruned features
rf_model = RandomForestClassifier(n_estimators=100, max_depth=4,
random_state=42)
rf_model.fit(X_train_pruned, y_train)
# Evaluate Random Forest on pruned features
y_pred = rf_model.predict(X_test_pruned)
print("Random Forest Accuracy (Pruned Features):", accuracy_score(y_test,
y_pred))
# Step 3: Optimize Random Forest Using Genetic Algorithm
def ga_fitness(params):
"""
Fitness function for Genetic Algorithm.
Optimizes n_estimators and max_depth of Random Forest.
"""
n_estimators, max_depth = params
n_estimators = int(n_estimators) # Ensure n_estimators is an integer
max_depth = int(max_depth) # Ensure max_depth is an integer
# Train Random Forest with given hyperparameters
model = RandomForestClassifier(
n_estimators=n_estimators,
max_depth=max_depth,
random_state=42
)
[Link](X_train_pruned, y_train)
# Evaluate on validation set
y_pred = [Link](X_test_pruned)
return -accuracy_score(y_test, y_pred) # Minimize negative accuracy
# Define bounds for GA (n_estimators, max_depth)
bounds = [
(10, 200), # n_estimators range
(2, 20) # max_depth range
]
# Run Genetic Algorithm
result = differential_evolution(ga_fitness, bounds, strategy='best1bin',
maxiter=5, popsize=10, tol=0.01)
# Get optimized hyperparameters
optimized_n_estimators = int(result.x[0])
optimized_max_depth = int(result.x[1])
# Train Random Forest with optimized hyperparameters
optimized_rf_model = RandomForestClassifier(
n_estimators=optimized_n_estimators,
max_depth=optimized_max_depth,
random_state=42
)
optimized_rf_model.fit(X_train_pruned, y_train)
# Evaluate optimized Random Forest
y_pred_optimized = optimized_rf_model.predict(X_test_pruned)
print("Optimized Random Forest Accuracy (Pruned Features):",
accuracy_score(y_test, y_pred_optimized))
# Confusion Matrix
conf_matrix = confusion_matrix(y_test, y_pred_optimized)
[Link](figsize=(6, 4))
[Link](conf_matrix, annot=True, fmt="d", cmap="Blues",
xticklabels=data.target_names, yticklabels=data.target_names)
[Link]("Predicted")
[Link]("Actual")
[Link]("Confusion Matrix (Optimized Random Forest)")
[Link]()
# Classification Report
print("\nClassification Report (Optimized Random Forest):\n",
classification_report(y_test, y_pred_optimized,
target_names=data.target_names))
import numpy as np
from [Link] import RandomForestClassifier
from sklearn.model_selection import train_test_split
from [Link] import load_breast_cancer
from [Link] import accuracy_score, confusion_matrix,
classification_report
import [Link] as plt
import seaborn as sns
import time
# Load dataset
data = load_breast_cancer()
X, y = [Link], [Link]
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2,
random_state=42)
# Function to evaluate model performance
def evaluate_model(model, X_train, X_test, y_train, y_test, model_name):
"""
Evaluate the model's performance and generate visualizations.
"""
# Measure training time
start_time = [Link]()
[Link](X_train, y_train)
training_time = [Link]() - start_time
# Make predictions
y_pred = [Link](X_test)
y_pred_proba = model.predict_proba(X_test)[:, 1] # For ROC curve
(binary classification)
# Calculate accuracy
accuracy = accuracy_score(y_test, y_pred)
print(f"{model_name} Accuracy: {accuracy:.4f}")
print(f"{model_name} Training Time: {training_time:.4f} seconds")
# Confusion Matrix
conf_matrix = confusion_matrix(y_test, y_pred)
[Link](figsize=(6, 4))
[Link](conf_matrix, annot=True, fmt="d", cmap="Blues",
xticklabels=data.target_names, yticklabels=data.target_names)
[Link]("Predicted")
[Link]("Actual")
[Link](f"{model_name} Confusion Matrix")
[Link]()
# Classification Report
print(f"\n{classification_report(y_test, y_pred,
target_names=data.target_names)}")
# Feature Importance Plot
[Link](figsize=(10, 6))
[Link](range(len(model.feature_importances_)),
model.feature_importances_)
[Link]("Feature Index")
[Link]("Feature Importance")
[Link](f"{model_name} Feature Importance")
[Link]()
# Train and evaluate Normal Random Forest model
print("Normal Random Forest Model:")
rf_model_normal = RandomForestClassifier(n_estimators=100, max_depth=4,
random_state=42)
evaluate_model(rf_model_normal, X_train, X_test, y_train, y_test, "Normal
Random Forest")
# Train and evaluate Pruned Random Forest model
print("\nPruned Random Forest Model:")
pruned_features = [7, 20, 22, 23, 27] # Significant features from pruning
X_train_pruned = X_train[:, pruned_features]
X_test_pruned = X_test[:, pruned_features]
rf_model_pruned = RandomForestClassifier(n_estimators=100, max_depth=4,
random_state=42)
evaluate_model(rf_model_pruned, X_train_pruned, X_test_pruned, y_train,
y_test, "Pruned Random Forest")
# Compare number of features
print(f"\nNumber of Features:")
print(f"Normal Random Forest: {X_train.shape[1]}")
print(f"Pruned Random Forest: {X_train_pruned.shape[1]}")