Professional Documents
Culture Documents
ipynb - Colaboratory
Mounted at /content/drive
df.head()
0 6 148 72 35 0 33.6
1 1 85 66 29 0 26.6
2 8 183 64 0 0 23.3
3 1 89 66 23 94 28.1
df.describe()
df.isnull().values.any()
False
zero_not_allowed = ["Glucose","BloodPressure","SkinThickness"]
x = df.iloc[:, :-2]
y = df.iloc[:, -1]
x_train, x_test, y_train, y_test = train_test_split(x, y, random_state = 0, test_size = 0.2)
clf = svm.SVC(kernel='rbf')
clf.fit(x_train,y_train)
https://colab.research.google.com/drive/1A0OglTKaWFaL81tRzcsF_pLPI4xvd0vA#scrollTo=kaIiTkNy4Wpc&printMode=true 1/8
6/27/23, 4:21 PM SVM_RF_Diabetes_CSV_26/6/2023.ipynb - Colaboratory
y_pred = clf.predict(x_test)
Accuracy: 0.7922077922077922
confusion_matrix(y_test,y_pred)
array([[98, 9],
[23, 24]])
sbs.set_theme()
# %matplotlib qt
%matplotlib inline
0 6 148 72 35 0 33.6
1 1 85 66 29 0 26.6
2 8 183 64 0 0 23.3
3 1 89 66 23 94 28.1
df.info()
<class 'pandas.core.frame.DataFrame'>
RangeIndex: 768 entries, 0 to 767
Data columns (total 9 columns):
# Column Non-Null Count Dtype
--- ------ -------------- -----
0 Pregnancies 768 non-null int64
1 Glucose 768 non-null int64
2 BloodPressure 768 non-null int64
3 SkinThickness 768 non-null int64
4 Insulin 768 non-null int64
5 BMI 768 non-null float64
6 DiabetesPedigreeFunction 768 non-null float64
7 Age 768 non-null int64
8 Outcome 768 non-null int64
dtypes: float64(2), int64(7)
memory usage: 54.1 KB
df.describe()
https://colab.research.google.com/drive/1A0OglTKaWFaL81tRzcsF_pLPI4xvd0vA#scrollTo=kaIiTkNy4Wpc&printMode=true 2/8
6/27/23, 4:21 PM SVM_RF_Diabetes_CSV_26/6/2023.ipynb - Colaboratory
h_diab = pd.Series({'healthy':Healthy.shape[0],
'Diabetic':Diabetic.shape[0]})
h_diab.plot.bar(alpha=0.7)
https://colab.research.google.com/drive/1A0OglTKaWFaL81tRzcsF_pLPI4xvd0vA#scrollTo=kaIiTkNy4Wpc&printMode=true 3/8
6/27/23, 4:21 PM SVM_RF_Diabetes_CSV_26/6/2023.ipynb - Colaboratory
<Axes: >
h_diab.plot.pie(startangle=90,
explode=[0, 0.1],
autopct='%1.1f%%',
colors=['C3', 'C4'])
plt.title('Relative % of females diabetic ')
plt.ylabel('')
_ = plt.axis('equal')
df.isnull().sum()
Pregnancies 0
Glucose 5
BloodPressure 35
SkinThickness 227
Insulin 374
BMI 11
DiabetesPedigreeFunction 0
Age 0
Outcome 0
dtype: int64
df.pivot_table(index=['Outcome'] )
Outcome
def replace_null_values(df):
for col in df.columns:
df.loc[(df['Outcome']==0) & (df[col].isnull()), col] = df[df['Outcome'] == 0][col].median()
df.loc[(df['Outcome']==1) & (df[col].isnull()), col] = df[ df['Outcome'] == 1][col].median()
print(df.isnull().sum())
replace_null_values(df)
https://colab.research.google.com/drive/1A0OglTKaWFaL81tRzcsF_pLPI4xvd0vA#scrollTo=kaIiTkNy4Wpc&printMode=true 4/8
6/27/23, 4:21 PM SVM_RF_Diabetes_CSV_26/6/2023.ipynb - Colaboratory
Pregnancies 0
Glucose 0
BloodPressure 0
SkinThickness 0
Insulin 0
BMI 0
DiabetesPedigreeFunction 0
Age 0
Outcome 0
dtype: int64
def create_new_bmi(df):
new_cat = "NEW_BMI_CAT"
df.loc[(df['BMI'] < 18.5), new_cat] = "underweight"
df.loc[(df['BMI'] > 18.5) & (df['BMI'] < 25), new_cat] = "normal"
df.loc[(df['BMI'] > 24) & (df['BMI'] < 30), new_cat] = "overweight"
df.loc[(df['BMI'] > 30) & (df['BMI']< 40), new_cat] = "obese"
df.drop('BMI', axis=1, inplace=True)
df[new_cat] = df[new_cat].astype('category')
def create_new_glucose(df):
new_cat = "NEW_GLUCOSE_CAT"
df.loc[(df['Glucose'] < 70), new_cat] = "low"
df.loc[(df['Glucose'] > 70) & (df['Glucose'] < 99), new_cat] = "normal"
df.loc[(df['Glucose'] > 99) & (df['Glucose'] < 126), new_cat] = "high"
df.loc[(df['Glucose'] > 126) & (df['Glucose'] < 200), new_cat] = "very_high"
df[new_cat] = df[new_cat].astype('category')
def create_new_skinthickness(df):
new_cat = "NEW_SKIN_THICKNESS"
df.loc[df['SkinThickness'] < 30, new_cat] = "normal"
df.loc[df['SkinThickness'] >= 70, new_cat] = "highfat"
df[new_cat] = df[new_cat].astype('category')
def create_new_pregnancies(df):
new_cat = "NEW_PREGNANCIES"
df.loc[df['Pregnancies'] == 0, new_cat] = "no_pregnancies"
df.loc[(df['Pregnancies'] > 0) & df['Pregnancies'] <= 4, new_cat] = "std_pregnancies"
df.loc[(df['Pregnancies'] > 4), new_cat] = "over_pregnancies"
df[new_cat] = df[new_cat].astype('category')
def create_circulation_level(df):
new_cat = "NEW_CIRCULATION_LEVEL"
df.loc[(df['SkinThickness'] < 30) & (df['BloodPressure'] < 80), new_cat] = "normal"
df.loc[(df['SkinThickness'] > 30) & (df['BloodPressure']>= 80), new_cat] = "high_risk"
df.loc[((df['SkinThickness']< 30) & (df['BloodPressure'] >=80)) | ((df['SkinThickness']> 30) & (df['BloodPressure'] <80)), new_cat] = "me
df[new_cat] = df[new_cat].astype('category')
df.drop('SkinThickness', axis=1, inplace=True)
def create_other_features(df):
df['PRE_AGE_CAT'] = df['Age'] * df['Pregnancies']
df['INSULIN_GLUCOSE_CAT'] = df['Insulin'] * df['Glucose']
df.drop('Pregnancies', axis=1, inplace=True)
df.drop('Glucose', axis=1, inplace=True)
create_new_bmi(df)
create_new_glucose(df)
create_new_pregnancies(df)
create_new_skinthickness(df)
create_circulation_level(df)
create_other_features(df)
df
https://colab.research.google.com/drive/1A0OglTKaWFaL81tRzcsF_pLPI4xvd0vA#scrollTo=kaIiTkNy4Wpc&printMode=true 5/8
6/27/23, 4:21 PM SVM_RF_Diabetes_CSV_26/6/2023.ipynb - Colaboratory
X = result.drop('Outcome', axis=1)
y = df['Outcome']
0.4558423058385518
https://colab.research.google.com/drive/1A0OglTKaWFaL81tRzcsF_pLPI4xvd0vA#scrollTo=kaIiTkNy4Wpc&printMode=true 6/8
6/27/23, 4:21 PM SVM_RF_Diabetes_CSV_26/6/2023.ipynb - Colaboratory
print(accuracy_score(y_test, lg_predictions))
print(roc_auc_score(y_test, lg_predictions))
0.7922077922077922
0.7494949494949495
print(classification_report(y_test, lg_predictions))
grid_search_rf = GridSearchCV(random_forest,
param_grid=param_grid,
cv=3,
scoring='recall',
return_train_score=True)
grid_search_rf.fit(X_train, y_train)
grid_search_rf.best_params_
print(classification_report(y_test, grid_search_rf.predict(X_test)))
print(recall_score(y_test, grid_search_rf.predict(X_test)))
print(roc_auc_score(y_test, grid_search_rf.predict_proba(X_test)[:, 1]))
0.8181818181818182
0.9263544536271808
https://colab.research.google.com/drive/1A0OglTKaWFaL81tRzcsF_pLPI4xvd0vA#scrollTo=kaIiTkNy4Wpc&printMode=true 7/8
6/27/23, 4:21 PM SVM_RF_Diabetes_CSV_26/6/2023.ipynb - Colaboratory
https://colab.research.google.com/drive/1A0OglTKaWFaL81tRzcsF_pLPI4xvd0vA#scrollTo=kaIiTkNy4Wpc&printMode=true 8/8