Professional Documents
Culture Documents
import pandas as pd
import matplotlib.pyplot as plt
import seaborn as sns
df = pd.read_csv('iris.csv')
df.head(5)
df.shape
(150, 5)
df.describe()
df['species'].value_counts()
virginica 50
versicolor 50
setosa 50
Name: species, dtype: int64
df.isnull().sum()
sepal_length 0
sepal_width 0
petal_length 0
petal_width 0
species 0
dtype: int64
X = df.drop(['species'],axis=1)
y = df['species']
rfc =
RandomForestClassifier(n_estimators=10,max_features='auto',random_stat
e=10)
rfc.fit(X_train,y_train)
RandomForestClassifier(n_estimators=10, random_state=10)
rfc.score(X_train,y_train)
1.0
rfc.score(X_test, y_test)
0.9555555555555556
pred = rfc.predict(X_test)
pred
print(classification_report(y_test,pred))
accuracy 0.96 45
macro avg 0.96 0.96 0.96 45
weighted avg 0.96 0.96 0.96 45
<sklearn.metrics._plot.confusion_matrix.ConfusionMatrixDisplay at
0x7fc90bfa7c10>
Feature Importance
petal_width 0.932588
petal_length 0.053115
sepal_length 0.014297
sepal_width 0.000000
plt.figure(figsize=(12,8), dpi=300)
plot_tree(model, feature_names=X.columns,filled=True);