You are on page 1of 5

4/27/23, 8:17 PM DM_Lab_8 - Jupyter Notebook

Topic : CART - Classification & Regression Tree, ID3

In [22]:

1 import pandas as pd
2 import numpy as np
3
4 df1 = pd.read_csv('/home/c0nqu3r0r/Desktop/_Second sem/Data Mining/Dataset/Te
5 df1.drop('Day', axis = 1, inplace = True)# for classification
6
7 df2 = pd.read_csv('/home/c0nqu3r0r/Desktop/_Second sem/Data Mining/Dataset/ar

In [23]:

1 df1.head()

Out[23]:

Outlook Temperature Humidity Wind PlayTennis

0 Sunny Hot High Weak No

1 Sunny Hot High Strong No

2 Overcast Hot High Weak Yes

3 Rain Mild High Weak Yes

4 Rain Cool Normal Weak Yes

In [24]:

1 from sklearn.preprocessing import LabelEncoder


2 Le = LabelEncoder()
3
4 df1['Outlook'] = Le.fit_transform(df1['Outlook'])
5 df1['Temperature'] = Le.fit_transform(df1['Temperature'])
6 df1['Humidity'] = Le.fit_transform(df1['Humidity'])
7 df1['Wind'] = Le.fit_transform(df1['Wind'])
8 df1['PlayTennis'] = Le.fit_transform(df1['PlayTennis'])

In [25]:

1 y1 = df1['PlayTennis']
2 x1 = df1.drop(['PlayTennis'], axis = 1)

localhost:8888/notebooks/Desktop/_Second sem/Data Mining/Lab Work/DM_Lab_8.ipynb 1/5


4/27/23, 8:17 PM DM_Lab_8 - Jupyter Notebook

In [26]:

1 # CART classification Tree


2 from sklearn import tree
3 clf1 = tree.DecisionTreeClassifier(criterion = 'gini')
4 clf1 = clf1.fit(x1, y1)
5 tree.plot_tree(clf1)

Out[26]:

[Text(0.4444444444444444, 0.9, 'X[0] <= 0.5\ngini = 0.459\nsamples =


14\nvalue = [5, 9]'),
Text(0.3333333333333333, 0.7, 'gini = 0.0\nsamples = 4\nvalue = [0,
4]'),
Text(0.5555555555555556, 0.7, 'X[2] <= 0.5\ngini = 0.5\nsamples = 1
0\nvalue = [5, 5]'),
Text(0.3333333333333333, 0.5, 'X[0] <= 1.5\ngini = 0.32\nsamples =
5\nvalue = [4, 1]'),
Text(0.2222222222222222, 0.3, 'X[3] <= 0.5\ngini = 0.5\nsamples = 2
\nvalue = [1, 1]'),
Text(0.1111111111111111, 0.1, 'gini = 0.0\nsamples = 1\nvalue = [1,
0]'),
Text(0.3333333333333333, 0.1, 'gini = 0.0\nsamples = 1\nvalue = [0,
1]'),
Text(0.4444444444444444, 0.3, 'gini = 0.0\nsamples = 3\nvalue = [3,
0]'),
Text(0.7777777777777778, 0.5, 'X[3] <= 0.5\ngini = 0.32\nsamples =
5\nvalue = [1, 4]'),
Text(0.6666666666666666, 0.3, 'X[0] <= 1.5\ngini = 0.5\nsamples = 2
\nvalue = [1, 1]'),
Text(0.5555555555555556, 0.1, 'gini = 0.0\nsamples = 1\nvalue = [1,
0]'),
Text(0.7777777777777778, 0.1, 'gini = 0.0\nsamples = 1\nvalue = [0,
1]'),
Text(0.8888888888888888, 0.3, 'gini = 0.0\nsamples = 3\nvalue = [0,
3]')]

localhost:8888/notebooks/Desktop/_Second sem/Data Mining/Lab Work/DM_Lab_8.ipynb 2/5


4/27/23, 8:17 PM DM_Lab_8 - Jupyter Notebook

In [27]:

1 df2.head()

Out[27]:

Position Level Salary

0 Business Analyst 1 45000

1 Junior Consultant 2 50000

2 Senior Consultant 3 60000

3 Manager 4 80000

4 Country Manager 5 110000

In [28]:

1 x2 = df2.iloc[:, 1:2].values
2 y2 = df2.iloc[:, 2].values

localhost:8888/notebooks/Desktop/_Second sem/Data Mining/Lab Work/DM_Lab_8.ipynb 3/5


4/27/23, 8:17 PM DM_Lab_8 - Jupyter Notebook

In [29]:

1 # CART - Regression Tree


2 from sklearn import tree
3 clf2 = tree.DecisionTreeRegressor()
4 clf2 = clf2.fit(x2, y2)
5 tree.plot_tree(clf2)

Out[29]:

[Text(0.703125, 0.9285714285714286, 'X[0] <= 8.5\nsquared_error = 80


662250000.0\nsamples = 10\nvalue = 249500.0'),
Text(0.53125, 0.7857142857142857, 'X[0] <= 6.5\nsquared_error = 692
1484375.0\nsamples = 8\nvalue = 124375.0'),
Text(0.375, 0.6428571428571429, 'X[0] <= 4.5\nsquared_error = 13812
50000.0\nsamples = 6\nvalue = 82500.0'),
Text(0.25, 0.5, 'X[0] <= 3.5\nsquared_error = 179687500.0\nsamples
= 4\nvalue = 58750.0'),
Text(0.1875, 0.35714285714285715, 'X[0] <= 2.5\nsquared_error = 388
88888.889\nsamples = 3\nvalue = 51666.667'),
Text(0.125, 0.21428571428571427, 'X[0] <= 1.5\nsquared_error = 6250
000.0\nsamples = 2\nvalue = 47500.0'),
Text(0.0625, 0.07142857142857142, 'squared_error = 0.0\nsamples = 1
\nvalue = 45000.0'),
Text(0.1875, 0.07142857142857142, 'squared_error = 0.0\nsamples = 1
\nvalue = 50000.0'),
Text(0.25, 0.21428571428571427, 'squared_error = 0.0\nsamples = 1\n
value = 60000.0'),
Text(0.3125, 0.35714285714285715, 'squared_error = 0.0\nsamples = 1
\nvalue = 80000.0'),
Text(0.5, 0.5, 'X[0] <= 5.5\nsquared_error = 400000000.0\nsamples =
2\nvalue = 130000.0'),
Text(0.4375, 0.35714285714285715, 'squared_error = 0.0\nsamples = 1
\nvalue = 110000.0'),
Text(0.5625, 0.35714285714285715, 'squared_error = 0.0\nsamples = 1
\nvalue = 150000.0'),
Text(0.6875, 0.6428571428571429, 'X[0] <= 7.5\nsquared_error = 2500
000000.0\nsamples = 2\nvalue = 250000.0'),
Text(0.625, 0.5, 'squared_error = 0.0\nsamples = 1\nvalue = 200000.
0'),
Text(0.75, 0.5, 'squared_error = 0.0\nsamples = 1\nvalue = 300000.
0'),
Text(0.875, 0.7857142857142857, 'X[0] <= 9.5\nsquared_error = 62500
000000.0\nsamples = 2\nvalue = 750000.0'),
Text(0.8125, 0.6428571428571429, 'squared_error = 0.0\nsamples = 1
\nvalue = 500000.0'),
Text(0.9375, 0.6428571428571429, 'squared_error = 0.0\nsamples = 1
\nvalue = 1000000.0')]

localhost:8888/notebooks/Desktop/_Second sem/Data Mining/Lab Work/DM_Lab_8.ipynb 4/5


4/27/23, 8:17 PM DM_Lab_8 - Jupyter Notebook

In [30]:

1 # ID3
2 from sklearn import tree
3 clf3 = tree.DecisionTreeClassifier(criterion = 'entropy')
4 clf3 = clf3.fit(x1, y1)
5 tree.plot_tree(clf3)

Out[30]:

[Text(0.4444444444444444, 0.9, 'X[0] <= 0.5\nentropy = 0.94\nsamples


= 14\nvalue = [5, 9]'),
Text(0.3333333333333333, 0.7, 'entropy = 0.0\nsamples = 4\nvalue =
[0, 4]'),
Text(0.5555555555555556, 0.7, 'X[2] <= 0.5\nentropy = 1.0\nsamples
= 10\nvalue = [5, 5]'),
Text(0.3333333333333333, 0.5, 'X[0] <= 1.5\nentropy = 0.722\nsample
s = 5\nvalue = [4, 1]'),
Text(0.2222222222222222, 0.3, 'X[3] <= 0.5\nentropy = 1.0\nsamples
= 2\nvalue = [1, 1]'),
Text(0.1111111111111111, 0.1, 'entropy = 0.0\nsamples = 1\nvalue =
[1, 0]'),
Text(0.3333333333333333, 0.1, 'entropy = 0.0\nsamples = 1\nvalue =
[0, 1]'),
Text(0.4444444444444444, 0.3, 'entropy = 0.0\nsamples = 3\nvalue =
[3, 0]'),
Text(0.7777777777777778, 0.5, 'X[3] <= 0.5\nentropy = 0.722\nsample
s = 5\nvalue = [1, 4]'),
Text(0.6666666666666666, 0.3, 'X[0] <= 1.5\nentropy = 1.0\nsamples
= 2\nvalue = [1, 1]'),
Text(0.5555555555555556, 0.1, 'entropy = 0.0\nsamples = 1\nvalue =
[1, 0]'),
Text(0.7777777777777778, 0.1, 'entropy = 0.0\nsamples = 1\nvalue =
[0, 1]'),
Text(0.8888888888888888, 0.3, 'entropy = 0.0\nsamples = 3\nvalue =
[0, 3]')]

localhost:8888/notebooks/Desktop/_Second sem/Data Mining/Lab Work/DM_Lab_8.ipynb 5/5

You might also like