# Step 1: Import libraries
import pandas as pd
import numpy as np
import seaborn as sns
import matplotlib.pyplot as plt
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import StandardScaler
from sklearn.metrics import (
accuracy_score, recall_score, precision_score, f1_score,
confusion_matrix, roc_curve, auc
)
# Step 2: Upload the dataset
from google.colab import files
uploaded = files.upload()
# Step 3: Load the dataset
df = pd.read_csv('pima-indians-diabetes.data.csv', header=None)
df.head()
# Step 4: Prepare features and labels
X = df.iloc[:, :-1]
y = df.iloc[:, -1]
# Step 5: Normalize and split
scaler = StandardScaler()
X_scaled = scaler.fit_transform(X)
X_train, X_test, y_train, y_test = train_test_split(X_scaled, y, test_size=0.2, random_state=42)
# Step 6: Import models
from sklearn.svm import SVC
from sklearn.naive_bayes import GaussianNB
from sklearn.tree import DecisionTreeClassifier
from sklearn.neighbors import KNeighborsClassifier
models = {
"SVM": SVC(probability=True),
"Naive Bayes": GaussianNB(),
"Decision Tree": DecisionTreeClassifier(),
"KNN": KNeighborsClassifier()
}
metrics = {}
# Step 7: Train and Evaluate
for name, model in models.items():
model.fit(X_train, y_train)
y_pred = model.predict(X_test)
y_prob = model.predict_proba(X_test)[:, 1]
acc = accuracy_score(y_test, y_pred)
rec = recall_score(y_test, y_pred)
prec = precision_score(y_test, y_pred)
f1 = f1_score(y_test, y_pred)
cm = confusion_matrix(y_test, y_pred)
metrics[name] = {
"Accuracy": acc,
"Recall": rec,
"Precision": prec,
"F1 Score": f1,
"Confusion Matrix": cm,
"y_prob": y_prob
}
# Step 8: Print metrics
for name in metrics:
print(f"\n {name}")
print(f"Accuracy: {metrics[name]['Accuracy']:.4f}")
print(f"Recall: {metrics[name]['Recall']:.4f}")
print(f"Precision: {metrics[name]['Precision']:.4f}")
print(f"F1 Score: {metrics[name]['F1 Score']:.4f}")
# Plot all confusion matrices
plt.figure(figsize=(14, 10))
for i, name in enumerate(metrics):
plt.subplot(2, 2, i+1)
sns.heatmap(metrics[name]["Confusion Matrix"], annot=True, fmt='d', cmap="Blues")
plt.title(f"{name} Confusion Matrix")
plt.xlabel("Predicted")
plt.ylabel("Actual")
plt.tight_layout()
plt.show()
# ROC Curve comparison
plt.figure(figsize=(10, 6))
for name in models:
fpr, tpr, _ = roc_curve(y_test, metrics[name]["y_prob"])
plt.plot(fpr, tpr, label=f'{name} (AUC = {auc(fpr, tpr):.2f})')
plt.plot([0, 1], [0, 1], 'k--')
plt.title("ROC Curve Comparison")
plt.xlabel("False Positive Rate")
plt.ylabel("True Positive Rate")
plt.legend()
plt.grid(True)
plt.show()
Comments
Post a Comment