ass 13(DBSCAN)

 import pandas as pd 

import numpy as np 

import matplotlib.pyplot as plt 

import seaborn as sns 

from sklearn.cluster import DBSCAN 

from sklearn.preprocessing import StandardScaler 

df = pd.read_csv('weather_stations.csv') 

df.head() 

# Drop rows with missing values 

df.dropna(inplace=True) 

# Display basic info 

df.info() 

features = df[[ 

'Data.Temperature.Avg Temp', 

'Data.Temperature.Max Temp', 

'Data.Temperature.Min Temp', 

'Data.Wind.Speed', 

'Data.Precipitation' 

]] 

scaler = StandardScaler() 

scaled_features = scaler.fit_transform(features) 

# Apply DBSCAN 

dbscan = DBSCAN(eps=1.3, min_samples=5)  # You can tweak eps/min_samples 

clusters = dbscan.fit_predict(scaled_features) 

# Add cluster labels to original dataframe 

df = df.loc[features.index]  # Keep only rows with complete features 

df['Cluster'] = clusters 

# Plot Clusters (Using first 2 temp features for visualization) 

plt.figure(figsize=(10, 6)) 

sns.scatterplot( 

    x=df['Data.Temperature.Avg Temp'], 

    y=df['Data.Temperature.Max Temp'], 

    hue=df['Cluster'], 

    palette='tab10', 

    style=(df['Cluster'] == -1), 

    s=100 

plt.title('DBSCAN Clustering of Weather Stations') 

plt.xlabel('Average Temperature') 

plt.ylabel('Max Temperature') 

plt.legend(title='Cluster') 

plt.show() 

 

 

 

 

 

 

 

 

# Plot Only Outliers 

outliers = df[df['Cluster'] == -1] 

plt.figure(figsize=(10, 6)) 

plt.scatter( 

    outliers['Data.Temperature.Avg Temp'], 

    outliers['Data.Temperature.Max Temp'], 

    c='red', 

    label='Outliers', 

    s=100 

plt.xlabel('Average Temperature') 

plt.ylabel('Max Temperature') 

plt.title('Outlier Weather Stations Detected by DBSCAN') 

plt.legend() 

plt.show()

Comments