Professional Documents
Culture Documents
June 1, 2023
df = pd.read_csv('diabetes.csv')
df
[77]: X = df[list(df.columns[0:8])]
y = df[['Outcome']]
1
[78]: import plotly.graph_objects as go
import pandas as pd
import plotly.express as px
fig = go.Figure()
for feature in list(df.columns[0:8]):
fig.add_trace(go.Violin(y=df[feature],
name=feature,
box_visible=True,
meanline_visible=True))
fig.show()
fig.show()
clf = DecisionTreeClassifier(random_state=0)
clf.fit(X_train, y_train)
clf.score(X_test, y_test)
[81]: 0.8057553956834532
[82]: display(X_test[0:8])
2
14 5 166 72 19 175 25.8
529 0 111 65 0 0 24.6
103 1 81 72 18 40 26.6
338 9 152 78 34 171 34.2
588 3 176 86 27 156 33.3
DiabetesPedigreeFunction Age
661 1.394 22
122 0.404 23
113 0.391 25
14 0.587 51
529 0.660 31
103 0.283 24
338 0.893 33
588 1.154 52
DiabetesPedigreeFunction Age 0
0 1.394 22 1
1 0.404 23 0
2 0.391 25 0
3 0.587 51 1
4 0.660 31 0
.. … … ..
134 0.738 41 0
135 0.665 46 1
136 0.181 29 0
137 0.344 31 1
138 0.761 27 1
3
[84]: from sklearn import tree
from matplotlib import pyplot as plt
tree.plot_tree(clf)
plt.savefig('out.pdf')
plt.show()