import pandas as pd
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import LabelEncoder
from sklearn.tree import DecisionTreeClassifier
from sklearn.metrics import accuracy_score
# Load the dataset
file_path = "drug.csv" # Change this to your file path
df = pd.read_csv(file_path)
# Encode categorical variables
label_encoders = {}
for col in ["Sex", "BP", "Cholesterol", "Drug"]:
le = LabelEncoder()
df[col] = le.fit_transform(df[col])
label_encoders[col] = le
# Define features and target variable
X = df.drop(columns=["Drug"])
y = df["Drug"]
# Split dataset into training and testing sets (80% train, 20% test)
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2,
random_state=4)
# Train a Decision Tree classifier with specified parameters
dtree = DecisionTreeClassifier(criterion='entropy', max_depth=None)
dtree_y_pred = dtree.fit(X_train, y_train).predict(X_test)
# Calculate accuracy
dtree_acc = accuracy_score(y_test, dtree_y_pred)
print(f'Accuracy of Decision Tree Classifier: {dtree_acc * 100:.2f}%')
Comments
Post a Comment