You are on page 1of 4

MACHINE

LEARNING
LAB TEST

NAMRA SHAH
I232, SAP-70411119035
MBATECH IT BATCH B

DATASET - SEED.CSV
PROJECT - Hierarchical clustering
DATASET VIEW
CODE –

from sklearn import metrics


def purity_score(y_true, y_pred):
contingency_matrix = metrics.cluster.contingency_matrix(y_true, y_pred)
return np.sum(np.amax(contingency_matrix, axis=0)) / np.sum(contingency_matrix)

import pandas as pd
df=pd.read_csv("seeds.csv")
df.head(5)
X=df.iloc[:,0:7]
y=df.iloc[:,7]

from sklearn.cluster import AgglomerativeClustering


import numpy as np
model = AgglomerativeClustering(linkage="single",n_clusters=3)
model = model.fit(X)
ymin=model.labels_
print(purity_score(y,ymin))
model = AgglomerativeClustering(linkage="complete",n_clusters=3)
model = model.fit(X)
ymax=model.labels_
print(purity_score(y,ymax))
model = AgglomerativeClustering(linkage="average",n_clusters=3)
model =model.fit(X)
yavg=model.labels_
print(purity_score(y,yavg))
model = AgglomerativeClustering(linkage="ward",n_clusters=3)
model = model.fit(X)
yward=model.labels_
print(purity_score(y,yward))

from scipy.cluster.hierarchy import dendrogram, linkage


from matplotlib import pyplot as plt
Z = linkage(X, 'ward')
dn = dendrogram(Z)
plt.show()

OUTPUT (USE OF JYPTER NOOTEBOOK)

--PYTHON LIBRARY

You might also like