Ass3(Logistic Regression)

 # Import necessary libraries 

import pandas as pd 

import numpy as np 

import matplotlib.pyplot as plt 

import seaborn as sns 

from sklearn.model_selection import train_test_split 

from sklearn.preprocessing import StandardScaler 

from sklearn.linear_model import LogisticRegression 

from sklearn.metrics import accuracy_score, classification_report, log_loss 

from sklearn.preprocessing import LabelEncoder 

df = pd.read_csv("samples_cancer.csv") 

df.replace("?", np.nan, inplace=True) 

df = df.apply(pd.to_numeric, errors="coerce") 

df.dropna(inplace=True) 

label_encoders = {} 

for col in ['Class']: 

encoder = LabelEncoder() 

df[col] = encoder.fit_transform(df[col])  # Convert category labels 

label_encoders[col]=encoder 

feature_df=df[df.columns[0:-1]] 

X=np.asarray(feature_df) 

y=np.asarray(df[df.columns[-1]]) 

X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=4) 

lr = LogisticRegression(solver='liblinear', penalty='l2', C=1.0) 

lr.fit(X_train, y_train) 

df.head(10) 

print("Classes after encoding:", encoder.classes_) 

 Classes after encoding: [2 4] 

y_pred = lr.predict(X_test) 

accuracy = accuracy_score(y_test, y_pred) 

print("Logistic Regression Accuracy:", np.round(accuracy * 100, 2), "%") 

 Logistic Regression Accuracy: 65.69 % 

# Classification Report 

print("\nClassification Report:\n", classification_report(y_test, y_pred)) 

features = [[1000025, 5,  1,  1,  1,  2,  1,  3, 1, 1]] 

predicted_category = lr.predict(features) 

print("Predicted tumor for a patient:", 

label_encoders["Class"].inverse_transform(predicted_category)) 

 Predicted tumor for a patient: [2] 

y_train_prob = lr.predict_proba(X_train) 

y_test_prob = lr.predict_proba(X_test) 

train_log_loss = log_loss(y_train, y_train_prob) 

test_log_loss = log_loss(y_test, y_test_prob) 

print(f"Training Log Loss: {train_log_loss:.4f}") 

print(f"Testing Log Loss: {test_log_loss:.4f}") 

 Training Log Loss: 0.6432 

 Testing Log Loss: 0.6307 

import numpy as np 

import matplotlib.pyplot as plt 

y_train_prob_pos = y_train_prob[:, 1] if y_train_prob.ndim > 1 else y_train_prob 

y_test_prob_pos = y_test_prob[:, 1] if y_test_prob.ndim > 1 else y_test_prob 

# Function to calculate log-loss 

def compute_log_loss(y_true, y_pred): 

return -np.mean(y_true * np.log(y_pred) + (1 - y_true) * np.log(1 - y_pred)) 

# Store log-loss at each iteration during training 

train_log_loss = [] 

test_log_loss = [] 

for i in range(1, 1001):  # Replace 1000 with your actual number of iterations 

# Calculate the log-loss for training and test set at each iteration 

train_log_loss.append(compute_log_loss(y_train, y_train_prob_pos)) 

test_log_loss.append(compute_log_loss(y_test, y_test_prob_pos)) 

# Update your model parameters here for each iteration (this part depends on your 

implementation) 

# Plotting Log-Loss Curve 

epochs = np.arange(1, len(train_log_loss) + 1) 

plt.figure(figsize=(7, 5)) 

plt.plot(epochs, train_log_loss, label="Train Log-Loss", color="blue") 

plt.plot(epochs, test_log_loss, label="Test Log-Loss", color="red") 

plt.xlabel("Iterations") 

plt.ylabel("Log-Loss") 

plt.title("Log-Loss Curve for Logistic Regression") 

plt.legend() 

plt.show() 

Comments