You are on page 1of 1

import pandas as pd

df = pd.read_excel("project.xlsx")
df.head()

X= df.iloc[:,0:3]
Yy1= df.iloc[:,3:5]

from sklearn.model_selection import train_test_split


X_train,X_test,Yy1_train,Yy1_test =
train_test_split(X,Yy1,random_state=42,test_size=0.3)

"""***Random forest regression***"""

from sklearn.ensemble import RandomForestRegressor


from sklearn.metrics import mean_absolute_error
from sklearn.metrics import mean_squared_error
from sklearn.metrics import r2_score

rand_clf=RandomForestRegressor(n_estimators = 1000,max_features='auto')
rand_clf.fit(X_train,Yy1_train)
yrand_pred=rand_clf.predict(X_test)
print('mean absolute error=',mean_absolute_error(Yy1_test,yrand_pred))
print('mean squared error=',mean_squared_error(Yy1_test,yrand_pred))
print('regression2_score=',r2_score(Yy1_test,yrand_pred))

print(rand_clf.predict([[19.661,123.26,344.97]]))

import matplotlib.pyplot as plt


plt.figure()
plt.errorbar(Yy1_test, yrand_pred)
plt.scatter(Yy1_test, yrand_pred)
plt.title('Random forest regression, R2=%.2f' % r2_score(Yy1_test,
yrand_pred))
plt.xlabel('Actual')
plt.ylabel('Predicted')

You might also like