import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
import seaborn as sns
from sklearn.preprocessing import StandardScaler
from scipy.cluster.hierarchy import dendrogram, linkage
from sklearn.cluster import AgglomerativeClustering
# Load dataset
df = pd.read_csv('Vehicle_dataset.csv')
# Step 1: Load & Clean Data
df.replace('$null$', np.nan, inplace=True)
df.dropna(inplace=True)
# Convert necessary columns to numeric
cols_to_convert = ['engine_s', 'horsepow', 'wheelbas', 'width', 'length',
'curb_wgt', 'fuel_cap', 'mpg']
df[cols_to_convert] = df[cols_to_convert].astype(float)
# Step 2: Select Features for Clustering
features = df[cols_to_convert]
# Step 3: Standardize Features
scaler = StandardScaler()
scaled_features = scaler.fit_transform(features)
# Step 4 & 5: Dendrograms with 3 Linkage Types
linkage_methods = ['single', 'complete', 'average']
labels = df['manufact'] + ' ' + df['model']
for method in linkage_methods:
plt.figure(figsize=(12, 5))
Z = linkage(scaled_features, method=method)
dendrogram(Z, labels=labels.values, leaf_rotation=90)
plt.title(f'Dendrogram - {method.capitalize()} Linkage')
plt.xlabel('Car Model')
plt.ylabel('Distance')
plt.tight_layout()
plt.grid(True)
plt.show()
Comments
Post a Comment