You are on page 1of 1

import pandas as pd

from sklearn.cluster import DBSCAN


from sklearn.preprocessing import StandardScaler
import matplotlib.pyplot as plt

# Load the dataset


data = pd.read_csv('your_dataset.csv')

# Display the first few rows of the dataset to understand its structure
print(data.head())

# Assuming your dataset has features that you want to use for clustering, let's say
'feature1' and 'feature2'
X = data[['feature1', 'feature2']]

# Scale the features using StandardScaler


scaler = StandardScaler()
X_scaled = scaler.fit_transform(X)

# Initialize DBSCAN
# You need to specify the epsilon (eps) and the minimum samples (min_samples)
parameters
# You might need to experiment with these parameters to get the desired clustering
eps = 0.5 # Epsilon neighborhood radius
min_samples = 5 # Minimum number of samples in a neighborhood
dbscan = DBSCAN(eps=eps, min_samples=min_samples)

# Fit the model


dbscan.fit(X_scaled)

# Extract the cluster labels and outliers (-1 indicates outliers)


labels = dbscan.labels_

# Number of clusters in labels, ignoring noise if present.


n_clusters_ = len(set(labels)) - (1 if -1 in labels else 0)
n_noise_ = list(labels).count(-1)

print('Estimated number of clusters: %d' % n_clusters_)


print('Estimated number of noise points: %d' % n_noise_)

# Visualize the clusters


plt.figure(figsize=(8, 6))

# Assigning a different color to each cluster, and black for noise (if present)
unique_labels = set(labels)
colors = [plt.cm.Spectral(each) for each in range(len(unique_labels))]
for k, col in zip(unique_labels, colors):
if k == -1:
# Black used for noise.
col = [0, 0, 0, 1]

class_member_mask = (labels == k)

xy = X_scaled[class_member_mask]
plt.scatter(xy[:, 0], xy[:, 1], s=50, c=[col], marker='o', alpha=0.5)

plt.title('Estimated number of clusters: %d' % n_clusters_)


plt.show()

You might also like