import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
import seaborn as sns
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import StandardScaler
from sklearn.svm import SVC
from sklearn.metrics import (
accuracy_score, recall_score, precision_score, f1_score,
jaccard_score, confusion_matrix, roc_curve, auc
)
# Load dataset
df = pd.read_csv('samples_cancer.csv')
print(df.head())
# Step 3: Data Preprocessing
# Convert BareNuc to numeric (in case of '?')
df['BareNuc'] = pd.to_numeric(df['BareNuc'], errors='coerce')
# Drop rows with missing values
df.dropna(inplace=True)
# Drop the 'ID' column
df.drop(columns=['ID'], inplace=True)
# Convert target: 2 = Benign (0), 4 = Malignant (1)
4) F1-Score
df['Class'] = df['Class'].apply(lambda x: 1 if x == 4 else 0)
# Step 4: Feature Scaling
X = df.drop('Class', axis=1)
y = df['Class']
scaler = StandardScaler()
X_scaled = scaler.fit_transform(X)
X_train, X_test, y_train, y_test = train_test_split(X_scaled, y, test_size=0.2, random_state=4)
import seaborn as sns
# Step 5: Train and Evaluate SVM Models
kernels = ['linear', 'poly', 'rbf', 'sigmoid']
results = {}
fpr_dict, tpr_dict = {}, {}
for kernel in kernels:
model = SVC(kernel=kernel, probability=True, random_state=42)
model.fit(X_train, y_train)
y_pred = model.predict(X_test)
y_prob = model.predict_proba(X_test)[:, 1]
acc = accuracy_score(y_test, y_pred)
rec = recall_score(y_test, y_pred)
prec = precision_score(y_test, y_pred)
f1 = f1_score(y_test, y_pred)
jaccard = jaccard_score(y_test, y_pred)
error = 1 - acc
cm = confusion_matrix(y_test, y_pred)
results[kernel] = {
'Accuracy': acc,
'Recall': rec,
'Precision': prec,
'F1-Score': f1,
'Jaccard Score': jaccard,
'Error Rate': error,
'Confusion Matrix': cm
}
# ROC
fpr, tpr, _ = roc_curve(y_test, y_prob)
fpr_dict[kernel] = fpr
tpr_dict[kernel] = tpr
import seaborn as sns
import matplotlib.pyplot as plt
# Step 6: Display Results with Confusion Matrix Heatmaps
for kernel in results:
print(f"\n Kernel: {kernel.upper()}")
for metric, value in results[kernel].items():
if metric != "Confusion Matrix":
print(f"{metric}: {value:.4f}")
# Plot Confusion Matrix
plt.figure(figsize=(5, 4))
sns.heatmap(results[kernel]["Confusion Matrix"], annot=True, fmt='d', cmap="Blues")
plt.title(f"{kernel.upper()} Kernel - Confusion Matrix")
plt.xlabel("Predicted")
plt.ylabel("Actual")
plt.tight_layout()
plt.show()
# Step 7: Plot ROC Curve
plt.figure(figsize=(10, 6))
for kernel in kernels:
plt.plot(fpr_dict[kernel], tpr_dict[kernel], label=f'{kernel} (AUC = {auc(fpr_dict[kernel],
tpr_dict[kernel]):.2f})')
plt.plot([0, 1], [0, 1], 'k--')
plt.xlabel('False Positive Rate')
plt.ylabel('True Positive Rate')
plt.title('ROC Curve Comparison for SVM Kernels')
plt.legend()
plt.grid(True)
plt.tight_layout()
plt.show()
Comments
Post a Comment