Practical - 2

AIM :- Decision Tree Classification on iris


Import Libraries

In [1]:

1 import numpy as np
2 import pandas as pd
3 from sklearn.tree import DecisionTreeClassifier

Loading iris.csv Dataset in Pandas Dataframe

In [2]:

1 data = pd.read_csv("Iris.csv")
2 data.head(3)


Id SepalLengthCm SepalWidthCm PetalLengthCm PetalWidthCm Species

0 1 5.1 3.5 1.4 0.2 Iris-setosa

1 2 4.9 3.0 1.4 0.2 Iris-setosa

2 3 4.7 3.2 1.3 0.2 Iris-setosa

Getting Information about data

In [3]:


<class 'pandas.core.frame.DataFrame'>

RangeIndex: 150 entries, 0 to 149

Data columns (total 6 columns):

# Column Non-Null Count Dtype

--- ------ -------------- -----

0 Id 150 non-null int64

1 SepalLengthCm 150 non-null float64

2 SepalWidthCm 150 non-null float64

3 PetalLengthCm 150 non-null float64

4 PetalWidthCm 150 non-null float64

5 Species 150 non-null object

dtypes: float64(4), int64(1), object(1)

memory usage: 7.2+ KB

X is data and Y is target data i.e species

In [4]:

1 X = data[['SepalLengthCm','SepalWidthCm','PetalLengthCm','PetalWidthCm']].values
2 X[:5]


array([[5.1, 3.5, 1.4, 0.2],

[4.9, 3. , 1.4, 0.2],

[4.7, 3.2, 1.3, 0.2],

[4.6, 3.1, 1.5, 0.2],

[5. , 3.6, 1.4, 0.2]])

In [5]:

1 Y = data['Species']
2 Y[:5]


0 Iris-setosa

1 Iris-setosa

2 Iris-setosa

3 Iris-setosa

4 Iris-setosa

Name: Species, dtype: object

Training Model

In [6]:

1 from sklearn.model_selection import train_test_split

3 X_trainset, X_testset, Y_trainset, Y_testset = train_test_splittrain_X, test_X, train_
4 X, Y, test_size=0.3, random_state=0)

In [7]:

1 SpeciesTree = DecisionTreeClassifier(criterion = 'entropy', max_depth = 4)

2 SpeciesTree


DecisionTreeClassifier(criterion='entropy', max_depth=4)

In [8]:

1, Y_trainset)


DecisionTreeClassifier(criterion='entropy', max_depth=4)

In [9]:

1 predTree = SpeciesTree.predict(X_testset)
2 predTree [0:5]


array(['Iris-virginica', 'Iris-versicolor', 'Iris-setosa',

'Iris-virginica', 'Iris-setosa'], dtype=object)

In [10]:

1 Y_testset[0:5]


114 Iris-virginica

62 Iris-versicolor

33 Iris-setosa

107 Iris-virginica

7 Iris-setosa

Name: Species, dtype: object

In [11]:

1 from sklearn import metrics

2 import matplotlib.pyplot as plt
3 print("DecisionTrees's Accuracy: ",metrics.accuracy_score(Y_testset, predTree))

DecisionTrees's Accuracy: 0.9777777777777777

Visualizing the Decision Tree

In [12]:

1 import matplotlib.pyplot as plt

2 from sklearn.tree import DecisionTreeClassifier
3 from sklearn import tree
5 fn = data.columns[1:5]
6 cn = data["Species"].unique().tolist()
7, Y)
8 fig, axes = plt.subplots(nrows=1, ncols=1, figsize=(10, 10), dpi=300)
10 tree.plot_tree(SpeciesTree, feature_names=fn, class_names=cn, filled=True)


[Text(0.5, 0.9, 'PetalLengthCm <= 2.45\nentropy = 1.585\nsamples = 150\nvalu

e = [50, 50, 50]\nclass = Iris-setosa'),

Text(0.4230769230769231, 0.7, 'entropy = 0.0\nsamples = 50\nvalue = [50, 0,

0]\nclass = Iris-setosa'),

Text(0.5769230769230769, 0.7, 'PetalWidthCm <= 1.75\nentropy = 1.0\nsamples

= 100\nvalue = [0, 50, 50]\nclass = Iris-versicolor'),

Text(0.3076923076923077, 0.5, 'PetalLengthCm <= 4.95\nentropy = 0.445\nsamp

les = 54\nvalue = [0, 49, 5]\nclass = Iris-versicolor'),

Text(0.15384615384615385, 0.3, 'PetalWidthCm <= 1.65\nentropy = 0.146\nsamp

les = 48\nvalue = [0, 47, 1]\nclass = Iris-versicolor'),

Text(0.07692307692307693, 0.1, 'entropy = 0.0\nsamples = 47\nvalue = [0, 4

7, 0]\nclass = Iris-versicolor'),

Text(0.23076923076923078, 0.1, 'entropy = 0.0\nsamples = 1\nvalue = [0, 0,

1]\nclass = Iris-virginica'),

Text(0.46153846153846156, 0.3, 'PetalWidthCm <= 1.55\nentropy = 0.918\nsamp

les = 6\nvalue = [0, 2, 4]\nclass = Iris-virginica'),

Text(0.38461538461538464, 0.1, 'entropy = 0.0\nsamples = 3\nvalue = [0, 0,

3]\nclass = Iris-virginica'),

Text(0.5384615384615384, 0.1, 'entropy = 0.918\nsamples = 3\nvalue = [0, 2,

1]\nclass = Iris-versicolor'),

Text(0.8461538461538461, 0.5, 'PetalLengthCm <= 4.85\nentropy = 0.151\nsamp

les = 46\nvalue = [0, 1, 45]\nclass = Iris-virginica'),

Text(0.7692307692307693, 0.3, 'SepalLengthCm <= 5.95\nentropy = 0.918\nsamp

les = 3\nvalue = [0, 1, 2]\nclass = Iris-virginica'),

Text(0.6923076923076923, 0.1, 'entropy = 0.0\nsamples = 1\nvalue = [0, 1,

0]\nclass = Iris-versicolor'),

Text(0.8461538461538461, 0.1, 'entropy = 0.0\nsamples = 2\nvalue = [0, 0,

2]\nclass = Iris-virginica'),

Text(0.9230769230769231, 0.3, 'entropy = 0.0\nsamples = 43\nvalue = [0, 0,

43]\nclass = Iris-virginica')]
Predicting Species for Set of Values

In [13]:

1 X_new = [[6.3,3.0,1.3,0.2]]
2 predTree = SpeciesTree.predict(X_new)
3 predTree


array(['Iris-setosa'], dtype=object)


In [14]:

1 X_new = [[5.4,2.8,2.9,1.5]]
2 predTree = SpeciesTree.predict(X_new)
3 predTree


array(['Iris-versicolor'], dtype=object)


In [15]:

1 X_new = [[5.4,2.8,2.9,0.5]]
2 predTree = SpeciesTree.predict(X_new)
3 predTree


array(['Iris-versicolor'], dtype=object)

