AmanAgarwal

Aman Agarwal
Roll No. 19
Regn. No. 201800119
7th Semester
Section C
Question: Implement a random forest supervised learning technique.

1. Submit pdf file
2. Perform and analyze random forest by varying the test and training set percentage.
3. Analyze random forest by varying the number of decision tree.
Solution:
A. Using Random Forest for Regression:
Dataset Used –
petrol_consumption.csv
Code:
import pandas as pd
import numpy as np
dataset = pd.read_csv('petrol_consumption.csv')
# Preparing data for training
X = dataset.iloc[:, 0:4].values
y = dataset.iloc[:, 4].values
from sklearn.model_selection import train_test_split

X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=0)
# Vary the test and training set percentage.
X_train1, X_test1, y_train1, y_test1 = train_test_split(X, y, test_size=0.4, random_state=0)

# Feature scaling
from sklearn.preprocessing import StandardScaler

sc = StandardScaler()
X_train = sc.fit_transform(X_train)
X_test = sc.transform(X_test)
sc1 = StandardScaler()
X_train1 = sc1.fit_transform(X_train1)
X_test1 = sc1.transform(X_test1)
# Training the algorithm
from sklearn.ensemble import RandomForestRegressor

regressor = RandomForestRegressor(n_estimators=20, random_state=0)
regressor.fit(X_train, y_train)
y_pred = regressor.predict(X_test)
regressor1 = RandomForestRegressor(n_estimators=20, random_state=0)

regressor1.fit(X_train1, y_train1)
y_pred1 = regressor1.predict(X_test1)
# Vary the number of decision trees

regressor2.fit(X_train, y_train)
y_pred2 = regressor2.predict(X_test)

# Evaluating the algorithm
from sklearn import metrics

print('Mean Absolute Error:', metrics.mean_absolute_error(y_test, y_pred))
print('Mean Squared Error:', metrics.mean_squared_error(y_test, y_pred))
print('Root Mean Squared Error:', np.sqrt(metrics.mean_squared_error(y_test, y_pred)))
print('Mean Absolute Error:', metrics.mean_absolute_error(y_test1, y_pred1))

print('Mean Squared Error:', metrics.mean_squared_error(y_test1, y_pred1))
print('Root Mean Squared Error:', np.sqrt(metrics.mean_squared_error(y_test1, y_pred1)))
print('Mean Absolute Error:', metrics.mean_absolute_error(y_test, y_pred2))
print('Mean Squared Error:', metrics.mean_squared_error(y_test, y_pred2))
print('Root Mean Squared Error:', np.sqrt(metrics.mean_squared_error(y_test, y_pred2)))
print('Mean Absolute Error:', metrics.mean_absolute_error(y_test1, y_pred3))

print('Mean Squared Error:', metrics.mean_squared_error(y_test1, y_pred3))
print('Root Mean Squared Error:', np.sqrt(metrics.mean_squared_error(y_test1, y_pred3)))
B. Using Random Forest for
Classification: Dataset Used –
bill_authentication.csv
Code:
import pandas as pd
import numpy as np
dataset = pd.read_csv("bill_authentication.csv")
# Prepare data for training
X = dataset.iloc[:, 0:4].values
y = dataset.iloc[:, 4].values
from sklearn.model_selection import train_test_split

X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=0)
X_train1, X_test1, y_train1, y_test1 = train_test_split(X, y, test_size=0.4, random_state=0)
# Feature scaling
from sklearn.preprocessing import StandardScaler

sc = StandardScaler()
X_train = sc.fit_transform(X_train)
X_test = sc.transform(X_test)
sc1 = StandardScaler()
X_train1 = sc.fit_transform(X_train1)
X_test1 = sc.transform(X_test1)
# Training the algorithm
from sklearn.ensemble import RandomForestClassifier

regressor = RandomForestClassifier(n_estimators=20, random_state=0)
regressor.fit(X_train, y_train)
y_pred = regressor.predict(X_test)
regressor1 = RandomForestClassifier(n_estimators=20, random_state=0)

regressor.fit(X_train1, y_train1)
y_pred1 = regressor.predict(X_test1)

regressor2.fit(X_train, y_train)
y_pred2 = regressor2.predict(X_test)

# Evaluating the Algorithm
from sklearn.metrics import classification_report, confusion_matrix, accuracy_score

print(confusion_matrix(y_test, y_pred))
print(classification_report(y_test, y_pred))
print(accuracy_score(y_test, y_pred))
print(confusion_matrix(y_test1, y_pred1))
print(classification_report(y_test1, y_pred1))
print(accuracy_score(y_test1, y_pred1))
print(confusion_matrix(y_test, y_pred2))
print(classification_report(y_test, y_pred2))
print(accuracy_score(y_test, y_pred2))
print(confusion_matrix(y_test1,y_pred3))
print(classification_report(y_test1,y_pred3))
print(accuracy_score(y_test1, y_pred3))

AmanAgarwal

Uploaded by

Document Information

Original Description:

Original Title

Copyright

Available Formats

Share this document

Share or Embed Document

Sharing Options

Did you find this document useful?

Is this content inappropriate?

Copyright:

Available Formats

AmanAgarwal

Uploaded by

Copyright:

Available Formats

Aman Agarwal

Question: Implement a random forest supervised learning technique.

A. Using Random Forest for Regression:

# Preparing data for training

from sklearn.model_selection import train_test_split

# Vary the test and training set percentage.

X_train1, X_test1, y_train1, y_test1 = train_test_split(X, y, test_size=0.4, random_state=0)

from sklearn.preprocessing import StandardScaler

# Training the algorithm

from sklearn.ensemble import RandomForestRegressor

regressor1 = RandomForestRegressor(n_estimators=20, random_state=0)

# Vary the number of decision trees

regressor2 = RandomForestRegressor(n_estimators=200, random_state=0)

regressor3 = RandomForestRegressor(n_estimators=200, random_state=0)

# Evaluating the algorithm

from sklearn import metrics

print('Mean Absolute Error:', metrics.mean_absolute_error(y_test1, y_pred1))

print('Mean Absolute Error:', metrics.mean_absolute_error(y_test1, y_pred3))

B. Using Random Forest for

Classification: Dataset Used –

# Prepare data for training

from sklearn.model_selection import train_test_split

X_train1, X_test1, y_train1, y_test1 = train_test_split(X, y, test_size=0.4, random_state=0)

from sklearn.preprocessing import StandardScaler

# Training the algorithm

from sklearn.ensemble import RandomForestClassifier

regressor1 = RandomForestClassifier(n_estimators=20, random_state=0)

regressor2 = RandomForestClassifier(n_estimators=200, random_state=0)

regressor3 = RandomForestClassifier(n_estimators=200, random_state=0)

# Evaluating the Algorithm

from sklearn.metrics import classification_report, confusion_matrix, accuracy_score

You might also like