You are on page 1of 4

Practical - 3

AIM :- K Means Clustering On Iris Datasets

Import libraries

In [3]:

1 import numpy as np
2 import pandas as pd
3 import matplotlib.pyplot as plt
4 import seaborn as sns
5 from sklearn.model_selection import train_test_split
6 from sklearn.cluster import KMeans

import the dataset

In [4]:

1 data = pd.read_csv('iris.csv')
2 data.head(6)

Out[4]:

Id SepalLengthCm SepalWidthCm PetalLengthCm PetalWidthCm Species

0 1 5.1 3.5 1.4 0.2 Iris-setosa

1 2 4.9 3.0 1.4 0.2 Iris-setosa

2 3 4.7 3.2 1.3 0.2 Iris-setosa

3 4 4.6 3.1 1.5 0.2 Iris-setosa

4 5 5.0 3.6 1.4 0.2 Iris-setosa

5 6 5.4 3.9 1.7 0.4 Iris-setosa

X is the selected columns

In [5]:

1 X = data[['SepalLengthCm', 'SepalWidthCm', 'PetalLengthCm',


2 'PetalWidthCm']].values
3 X[0:5]

Out[5]:

array([[5.1, 3.5, 1.4, 0.2],

[4.9, 3. , 1.4, 0.2],

[4.7, 3.2, 1.3, 0.2],

[4.6, 3.1, 1.5, 0.2],

[5. , 3.6, 1.4, 0.2]])



Specifing the correct value of k selecting randomly and applying elbow
method

In [6]:

1 kmeans5 = KMeans(n_clusters=5)
2 y_kmeans5 = kmeans5.fit_predict(X)
3 print(y_kmeans5)
4
5 kmeans5.cluster_centers_

[1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1

1 1 1 1 1 1 1 1 1 1 1 1 1 3 3 3 4 3 3 3 4 3 4 4 3 4 3 4 3 3 4 3 4 3 4 3 3

3 3 3 3 3 4 4 4 4 3 4 3 3 3 4 4 4 3 4 4 4 4 4 3 4 4 0 3 2 0 0 2 4 2 0 2 0

0 0 3 0 0 0 2 2 3 0 3 2 3 0 2 3 3 0 2 2 2 0 3 3 2 0 0 3 0 0 0 3 0 0 0 3 0

0 3]

Out[6]:

array([[6.52916667, 3.05833333, 5.50833333, 2.1625 ],

[5.006 , 3.418 , 1.464 , 0.244 ],

[7.475 , 3.125 , 6.3 , 2.05 ],

[6.20769231, 2.85384615, 4.74615385, 1.56410256],

[5.508 , 2.6 , 3.908 , 1.204 ]])

In [13]:

1 Error = []
2 for i in range(1, 11):
3 kmeans = KMeans(n_clusters=i).fit(X)
4 kmeans.fit(X)
5 Error.append(kmeans.inertia_)
6
7 import matplotlib.pyplot as plt
8
9 plt.grid()
10 plt.plot(range(1, 11), Error, 'r')
11 plt.plot(range(1, 11), Error, 'o')
12 plt.title('Elbow method')
13 plt.xlabel('No of clusters')
14 plt.ylabel('Error')
15 plt.show()
In [8]:

1 kmeans3 = KMeans(n_clusters=3)
2 y_kmeans3 = kmeans3.fit_predict(X)
3 print(y_kmeans3)
4
5 kmeans3.cluster_centers_

[1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1

1 1 1 1 1 1 1 1 1 1 1 1 1 0 0 2 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0

0 0 0 2 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 2 0 2 2 2 2 0 2 2 2 2

2 2 0 0 2 2 2 2 0 2 0 2 0 2 2 0 0 2 2 2 2 2 0 2 2 2 2 0 2 2 2 0 2 2 2 0 2

2 0]

Out[8]:

array([[5.9016129 , 2.7483871 , 4.39354839, 1.43387097],

[5.006 , 3.418 , 1.464 , 0.244 ],

[6.85 , 3.07368421, 5.74210526, 2.07105263]])

Visualizing Clustering

Clustring Sepal Length and Sepal Width

In [9]:

1 plt.scatter(X[:, 0], X[:, 1], c=y_kmeans3, cmap="rainbow")


2 SepalLength = 5.1
3 SepalWidth = 3.5
4 plt.scatter(SepalLength, SepalWidth, cmap='rainbow', marker='*')
5 plt.title('KMeans clustering')
6 plt.xlabel('SepalLength')
7 plt.ylabel('SepalWidth')

Out[9]:

Text(0, 0.5, 'SepalWidth')


Clustering Petal Length and Petal Width

In [10]:

1 plt.scatter(X[:, 2], X[:, 3], c=y_kmeans3, cmap="rainbow")


2 PetalLength = 1.4
3 PetalWidth = 0.2
4 plt.scatter(PetalLength, PetalWidth, cmap='rainbow', marker='*')
5 plt.title('KMeans clustering')
6 plt.xlabel('PetalLength')
7 plt.ylabel('PetalWidth')

Out[10]:

Text(0, 0.5, 'PetalWidth')

You might also like