You are on page 1of 4

import numpy as np

import pandas as pd
import matplotlib.pyplot as plt
import seaborn as sns

df = pd.read_csv('iris.csv')

df.head(5)

sepal_length sepal_width petal_length petal_width species


0 5.1 3.5 1.4 0.2 setosa
1 4.9 3.0 1.4 0.2 setosa
2 4.7 3.2 1.3 0.2 setosa
3 4.6 3.1 1.5 0.2 setosa
4 5.0 3.6 1.4 0.2 setosa

df.shape

(150, 5)

df.describe()

sepal_length sepal_width petal_length petal_width


count 150.000000 150.000000 150.000000 150.000000
mean 5.843333 3.057333 3.758000 1.199333
std 0.828066 0.435866 1.765298 0.762238
min 4.300000 2.000000 1.000000 0.100000
25% 5.100000 2.800000 1.600000 0.300000
50% 5.800000 3.000000 4.350000 1.300000
75% 6.400000 3.300000 5.100000 1.800000
max 7.900000 4.400000 6.900000 2.500000

df['species'].value_counts()

virginica 50
versicolor 50
setosa 50
Name: species, dtype: int64

df.isnull().sum()

sepal_length 0
sepal_width 0
petal_length 0
petal_width 0
species 0
dtype: int64

X = df.drop(['species'],axis=1)
y = df['species']

from sklearn.model_selection import train_test_split


X_train, X_test, y_train, y_test = train_test_split(X, y,
test_size=0.3, random_state=10)

from sklearn.ensemble import RandomForestClassifier

rfc =
RandomForestClassifier(n_estimators=10,max_features='auto',random_stat
e=10)

rfc.fit(X_train,y_train)

RandomForestClassifier(n_estimators=10, random_state=10)

rfc.score(X_train,y_train)

1.0

rfc.score(X_test, y_test)

0.9555555555555556

pred = rfc.predict(X_test)
pred

array(['versicolor', 'virginica', 'setosa', 'versicolor', 'setosa',


'versicolor', 'virginica', 'versicolor', 'setosa',
'versicolor',
'versicolor', 'virginica', 'versicolor', 'setosa', 'setosa',
'virginica', 'versicolor', 'setosa', 'setosa', 'setosa',
'virginica', 'virginica', 'virginica', 'setosa', 'versicolor',
'setosa', 'versicolor', 'versicolor', 'versicolor',
'virginica',
'versicolor', 'versicolor', 'versicolor', 'virginica',
'virginica',
'setosa', 'virginica', 'virginica', 'virginica', 'virginica',
'setosa', 'setosa', 'versicolor', 'setosa', 'versicolor'],
dtype=object)

from sklearn.metrics import


classification_report,confusion_matrix,plot_confusion_matrix,accuracy_
score

print(classification_report(y_test,pred))

precision recall f1-score support

setosa 1.00 1.00 1.00 14


versicolor 0.94 0.94 0.94 17
virginica 0.93 0.93 0.93 14

accuracy 0.96 45
macro avg 0.96 0.96 0.96 45
weighted avg 0.96 0.96 0.96 45

plot_confusion_matrix(model, X_test, y_test)

<sklearn.metrics._plot.confusion_matrix.ConfusionMatrixDisplay at
0x7fc90bfa7c10>

pd.DataFrame(index=X.columns, data =model.feature_importances_,


columns=['Feature Importance']).sort_values('Feature
Importance',ascending=False)

Feature Importance
petal_width 0.932588
petal_length 0.053115
sepal_length 0.014297
sepal_width 0.000000

from sklearn.tree import plot_tree

plt.figure(figsize=(12,8), dpi=300)
plot_tree(model, feature_names=X.columns,filled=True);

You might also like