You are on page 1of 6

Aman Agarwal

Roll No. 19
Regn. No. 201800119
7th Semester
Section C

Question: Implement a random forest supervised learning technique.


1. Submit pdf file
2. Perform and analyze random forest by varying the test and training set percentage.
3. Analyze random forest by varying the number of decision tree.

Solution:

A. Using Random Forest for Regression:

Dataset Used –

petrol_consumption.csv

Code:
import pandas as pd
import numpy as np

dataset = pd.read_csv('petrol_consumption.csv')

# Preparing data for training

X = dataset.iloc[:, 0:4].values
y = dataset.iloc[:, 4].values

from sklearn.model_selection import train_test_split


X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=0)

# Vary the test and training set percentage.

X_train1, X_test1, y_train1, y_test1 = train_test_split(X, y, test_size=0.4, random_state=0)


# Feature scaling

from sklearn.preprocessing import StandardScaler


sc = StandardScaler()
X_train = sc.fit_transform(X_train)
X_test = sc.transform(X_test)

sc1 = StandardScaler()
X_train1 = sc1.fit_transform(X_train1)
X_test1 = sc1.transform(X_test1)

# Training the algorithm

from sklearn.ensemble import RandomForestRegressor


regressor = RandomForestRegressor(n_estimators=20, random_state=0)
regressor.fit(X_train, y_train)
y_pred = regressor.predict(X_test)

regressor1 = RandomForestRegressor(n_estimators=20, random_state=0)


regressor1.fit(X_train1, y_train1)
y_pred1 = regressor1.predict(X_test1)

# Vary the number of decision trees

regressor2 = RandomForestRegressor(n_estimators=200, random_state=0)


regressor2.fit(X_train, y_train)
y_pred2 = regressor2.predict(X_test)

regressor3 = RandomForestRegressor(n_estimators=200, random_state=0)


regressor3.fit(X_train1, y_train1)
y_pred3 = regressor3.predict(X_test1)

# Evaluating the algorithm

from sklearn import metrics


print('Mean Absolute Error:', metrics.mean_absolute_error(y_test, y_pred))
print('Mean Squared Error:', metrics.mean_squared_error(y_test, y_pred))
print('Root Mean Squared Error:', np.sqrt(metrics.mean_squared_error(y_test, y_pred)))

print('Mean Absolute Error:', metrics.mean_absolute_error(y_test1, y_pred1))


print('Mean Squared Error:', metrics.mean_squared_error(y_test1, y_pred1))
print('Root Mean Squared Error:', np.sqrt(metrics.mean_squared_error(y_test1, y_pred1)))
print('Mean Absolute Error:', metrics.mean_absolute_error(y_test, y_pred2))
print('Mean Squared Error:', metrics.mean_squared_error(y_test, y_pred2))
print('Root Mean Squared Error:', np.sqrt(metrics.mean_squared_error(y_test, y_pred2)))

print('Mean Absolute Error:', metrics.mean_absolute_error(y_test1, y_pred3))


print('Mean Squared Error:', metrics.mean_squared_error(y_test1, y_pred3))
print('Root Mean Squared Error:', np.sqrt(metrics.mean_squared_error(y_test1, y_pred3)))

B. Using Random Forest for

Classification: Dataset Used –

bill_authentication.csv

Code:

import pandas as pd
import numpy as np
dataset = pd.read_csv("bill_authentication.csv")

# Prepare data for training

X = dataset.iloc[:, 0:4].values
y = dataset.iloc[:, 4].values

from sklearn.model_selection import train_test_split


X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=0)

X_train1, X_test1, y_train1, y_test1 = train_test_split(X, y, test_size=0.4, random_state=0)

# Feature scaling

from sklearn.preprocessing import StandardScaler


sc = StandardScaler()
X_train = sc.fit_transform(X_train)
X_test = sc.transform(X_test)

sc1 = StandardScaler()
X_train1 = sc.fit_transform(X_train1)
X_test1 = sc.transform(X_test1)

# Training the algorithm

from sklearn.ensemble import RandomForestClassifier


regressor = RandomForestClassifier(n_estimators=20, random_state=0)
regressor.fit(X_train, y_train)
y_pred = regressor.predict(X_test)

regressor1 = RandomForestClassifier(n_estimators=20, random_state=0)


regressor.fit(X_train1, y_train1)
y_pred1 = regressor.predict(X_test1)

regressor2 = RandomForestClassifier(n_estimators=200, random_state=0)


regressor2.fit(X_train, y_train)
y_pred2 = regressor2.predict(X_test)

regressor3 = RandomForestClassifier(n_estimators=200, random_state=0)


regressor3.fit(X_train1, y_train1)
y_pred3 = regressor3.predict(X_test1)

# Evaluating the Algorithm

from sklearn.metrics import classification_report, confusion_matrix, accuracy_score


print(confusion_matrix(y_test, y_pred))
print(classification_report(y_test, y_pred))
print(accuracy_score(y_test, y_pred))
print(confusion_matrix(y_test1, y_pred1))
print(classification_report(y_test1, y_pred1))
print(accuracy_score(y_test1, y_pred1))

print(confusion_matrix(y_test, y_pred2))
print(classification_report(y_test, y_pred2))
print(accuracy_score(y_test, y_pred2))
print(confusion_matrix(y_test1,y_pred3))
print(classification_report(y_test1,y_pred3))
print(accuracy_score(y_test1, y_pred3))

You might also like