You are on page 1of 3

import matplotlib.

pyplot as plt

import seaborn as sns

import pandas as pd

import numpy as np

df = pd.read_csv('C:/Users/sravan/Desktop/2021/GItam - mini/review2/5

kidney/kidney_disease.csv')

df.head()

df.info()

df.shape

df.describe()

df.drop('id', axis = 1, inplace = True)

df[['htn','dm','cad','pe','ane']] = df[['htn','dm','cad','pe','ane']].replace(to_replace={'yes':1,'no':0})

df[['rbc','pc']] = df[['rbc','pc']].replace(to_replace={'abnormal':1,'normal':0})

df[['pcc','ba']] = df[['pcc','ba']].replace(to_replace={'present':1,'notpresent':0})

df[['appet']] = df[['appet']].replace(to_replace={'good':1,'poor':0,'no':np.nan})

df['classification'] =

df['classification'].replace(to_replace={'ckd':1.0,'ckd\t':1.0,'notckd':0.0,'no':0.0})

df.rename(columns={'classification':'class'},inplace=True)

df['pe'] = df['pe'].replace(to_replace='good',value=0) # Not having pedal edema is good

df['appet'] = df['appet'].replace(to_replace='no',value=0)

df['cad'] = df['cad'].replace(to_replace='\tno',value=0)

df['dm'] = df['dm'].replace(to_replace={'\tno':0,'\tyes':1,' yes':1, '':np.nan})

df.head()
df2 = df.dropna(axis=0)

df2['class'].value_counts()

ind_col = [col for col in df.columns if col != 'class']

dep_col = 'class'

X = df[ind_col]

y = df[dep_col]

from sklearn.model_selection import train_test_split

X_train, X_test, y_train, y_test = train_test_split(X, y, test_size = 0.30, random_state = 0)

# Algorithm

from sklearn.neighbors import KNeighborsClassifier

from sklearn.metrics import accuracy_score, confusion_matrix, classification_report

knn = KNeighborsClassifier()

knn.fit(X_train, y_train)

knn_acc = accuracy_score(y_test, knn.predict(X_test))

print(f"Training Accuracy of KNN is {accuracy_score(y_train, knn.predict(X_train))}")

print(f"Test Accuracy of KNN is {knn_acc} \n")

print(f"Confusion Matrix :- \n{confusion_matrix(y_test, knn.predict(X_test))}\n")

print(f"Classification Report :- \n {classification_report(y_test, knn.predict(X_test))}")

from sklearn.ensemble import RandomForestClassifier

rd_clf = RandomForestClassifier(criterion = 'entropy', max_depth = 11, max_features = 'auto',

min_samples_leaf = 2, min_samples_split = 3, n_estimators = 130)

rd_clf.fit(X_train, y_train)

rd_clf_acc = accuracy_score(y_test, rd_clf.predict(X_test))

print(f"Training Accuracy of Random Forest Classifier is {accuracy_score(y_train,


rd_clf.predict(X_train))}")

print(f"Test Accuracy of Random Forest Classifier is {rd_clf_acc} \n")

print(f"Confusion Matrix :- \n{confusion_matrix(y_test, rd_clf.predict(X_test))}\n")

print(f"Classification Report :- \n {classification_report(y_test, rd_clf.predict(X_test))}")

from sklearn.naive_bayes import GaussianNB

gnb = GaussianNB()

gnb.fit(X_train, y_train)

gnb_acc = accuracy_score(y_test, gnb.predict(X_test))

print(f"Training Accuracy of NB is {accuracy_score(y_train, gnb.predict(X_train))}")

print(f"Test Accuracy of NB is {gnb_acc} \n")

print(f"Confusion Matrix :- \n{confusion_matrix(y_test, gnb.predict(X_test))}\n")

print(f"Classification Report :- \n {classification_report(y_test, gnb.predict(X_test))}")

from xgboost import XGBClassifier

xgb = XGBClassifier(objective = 'binary:logistic', learning_rate = 0.5, max_depth = 5,

n_estimators = 150)

xgb.fit(X_train, y_train)

xgb_acc = accuracy_score(y_test, xgb.predict(X_test))

print(f"Training Accuracy of XgBoost is {accuracy_score(y_train, xgb.predict(X_train))}")

print(f"Test Accuracy of XgBoost is {xgb_acc} \n")

print(f"Confusion Matrix :- \n{confusion_matrix(y_test, xgb.predict(X_test))}\n")

print(f"Classification Report :- \n {classification_report(y_test, xgb.predict(X_test))}")

You might also like