You are on page 1of 6

Random Forest

April 21, 2022

1 Name-Tanmay Mehrotra

2 Regno-20BCE2251

3 Implement Random forest Trees algorithm and test the algo-


rithm using any data set of your choice. The output should
include Accuracy, Error rate, Precision and recall rate along
with the confusion matrix
[19]: import pandas as pd
import numpy as np
import sklearn
import matplotlib.pyplot as plt
p=pd.read_csv('student_scores.csv')
d=pd.DataFrame(p)

[20]: x=d.iloc[:,:-1]
y=d.iloc[:,-1]
x
y

[20]: 0 21
1 47
2 27
3 75
4 30
5 20
6 88
7 60
8 81
9 25
10 85
11 62
12 41
13 42
14 17

1
15 95
16 30
17 24
18 67
19 69
20 30
21 54
22 35
23 76
24 86
Name: Scores, dtype: int64

[21]: from sklearn.model_selection import train_test_split


x_train,x_test,y_train,y_test=train_test_split(x,y,test_size=0.4)

[22]: from sklearn.preprocessing import StandardScaler


s=StandardScaler()
x_train=s.fit_transform(x_train)
x_test=s.transform(x_test)
from sklearn.ensemble import RandomForestClassifier
c=RandomForestClassifier(n_estimators=10,criterion="entropy")
p1=c.fit(x_train,y_train)
y_pred=c.predict(x_test)
y_pred

[22]: array([47, 86, 69, 47, 86, 24, 67, 30, 47, 95], dtype=int64)

[23]: p1.score(x_test,y_test)

[23]: 0.0

[24]: from sklearn.metrics import␣


,→confusion_matrix,accuracy_score,recall_score,precision_score

con=confusion_matrix(y_test,y_pred)
con

[24]: array([[0, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0],


[0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0],
[0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0],
[0, 0, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0],
[0, 0, 0, 0, 0, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0],
[0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0],
[0, 0, 0, 0, 0, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0],
[0, 0, 0, 0, 0, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0],
[0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 0, 0, 0, 0, 0, 0, 0],
[0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0],
[0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0],

2
[0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1],
[0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 0, 0, 0, 0, 0, 0],
[0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 0],
[0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 0],
[0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0],
[0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0]], dtype=int64)

[25]: accuracy_score(y_test,y_pred)

[25]: 0.0

[26]: recall_score(y_test,y_pred,average=None,zero_division=1)

[26]: array([0., 1., 1., 0., 0., 1., 0., 0., 0., 1., 1., 0., 0., 0., 0., 1., 1.])

[27]: precision_score(y_test,y_pred,average=None,zero_division=1)

[27]: array([1., 0., 0., 1., 1., 0., 1., 1., 1., 0., 0., 1., 1., 1., 1., 0., 0.])

[28]: from sklearn.metrics import mean_squared_error


mean_squared_error(y_test,y_pred)

[28]: 79.5

[29]: import math


math.sqrt(mean_squared_error(y_test,y_pred))

[29]: 8.916277250063503

[ ]:

3
Adaboost2

April 21, 2022

1 Name-Tanmay Mehrotra

2 Regno-20BCE2251

3 Implement AdaBoost algorithm and test the algorithm using


any data set of your choice. The output should include Accu-
racy, Error rate, Precision and recall rate along with the confu-
sion matrix.
[1]: import pandas as pd
import numpy as np
import sklearn
import matplotlib.pyplot as plt
p=pd.read_csv('Salary_Data.csv')
d=pd.DataFrame(p)
x=d.iloc[:,:-1]
y=d.iloc[:,-1]
x
y

[1]: 0 39343.0
1 46205.0
2 37731.0
3 43525.0
4 39891.0
5 56642.0
6 60150.0
7 54445.0
8 64445.0
9 57189.0
10 63218.0
11 55794.0
12 56957.0
13 57081.0
14 61111.0
15 67938.0

1
16 66029.0
17 83088.0
18 81363.0
19 93940.0
20 91738.0
21 98273.0
22 101302.0
23 113812.0
24 109431.0
25 105582.0
26 116969.0
27 112635.0
28 122391.0
29 121872.0
Name: Salary, dtype: float64

[8]: from sklearn.model_selection import train_test_split


x_train,x_test,y_train,y_test=train_test_split(x,y,test_size=0.33)
from sklearn.preprocessing import StandardScaler
s=StandardScaler()
x_train=s.fit_transform(x_train)
x_test=s.transform(x_test)
from sklearn.ensemble import AdaBoostClassifier
c=AdaBoostClassifier()
i=c.fit(x_train,y_train)
y_pred=i.predict(x_test)
y_pred

[8]: array([101302., 55794., 64445., 66029., 55794., 101302., 43525.,


43525., 43525., 121872.])

[9]: from sklearn.metrics import␣


,→confusion_matrix,accuracy_score,recall_score,precision_score

con=confusion_matrix(y_test,y_pred)
con

[9]: array([[0, 0, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0],


[0, 0, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0],
[0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0],
[0, 0, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0],
[0, 0, 0, 0, 0, 0, 0, 0, 1, 0, 0, 0, 0, 0, 0, 0],
[0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0],
[0, 0, 0, 0, 0, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0],
[0, 0, 0, 0, 0, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0],
[0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0],
[0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0],
[0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 0, 0, 0, 0, 0, 0],

2
[0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0],
[0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 0, 0, 0, 0],
[0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 0, 0, 0, 0],
[0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0],
[0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 0]], dtype=int64)

[10]: accuracy_score(y_test,y_pred)

[10]: 0.0

[11]: recall_score(y_test,y_pred,average=None,zero_division=1)

[11]: array([0., 0., 1., 0., 0., 1., 0., 0., 1., 1., 0., 1., 0., 0., 1., 0.])

[12]: precision_score(y_test,y_pred,average=None,zero_division=1)

[12]: array([1., 1., 0., 1., 1., 0., 1., 1., 0., 0., 1., 0., 1., 1., 0., 1.])

[13]: from sklearn.metrics import mean_squared_error


mean_squared_error(y_test,y_pred)

[13]: 116912588.7

[14]: import math


math.sqrt(mean_squared_error(y_test,y_pred))

[14]: 10812.612482651915

You might also like