You are on page 1of 3

import pandas as pd

import numpy as np

import sklearn

from sklearn.ensemble import AdaBoostClassifier

from sklearn.ensemble import GradientBoostingClassifier

from sklearn.model_selection import GridSearchCV

from sklearn.metrics import confusion_matrix

train=pd.read_csv('/data/training/diabetes_train.csv')

test=pd.read_csv('/data/test/diabetes_test.csv')

#'''Write your code here....

#.......

y=train.Outcome

x=train.drop('Outcome',axis=1)

#.....ADABoost.....

ada = AdaBoostClassifier()

'''

ada.fit(x,y)

y_pred=ada.predict(test.drop('Outcome',axis=1))

y_actual=test.Outcome

cfs = confusion_matrix(y_actual,y_pred)

acc = (cfs[0][0]+cfs[1][1])/(cfs[0][0]+cfs[0][1]+cfs[1][0]+cfs[1][1])

sn = cfs[0][0]/(cfs[0][0]+cfs[0][1])

sp = cfs[1][1]/(cfs[1][0]+cfs[1][1])

print(acc,sn,sp)

'''

#.....GradientBoost....

gdb = GradientBoostingClassifier()
'''

gdb.fit(x,y)

y_pred=gdb.predict(test.drop('Outcome',axis=1))

y_actual=test.Outcome

cfs = confusion_matrix(y_actual,y_pred)

acc = (cfs[0][0]+cfs[1][1])/(cfs[0][0]+cfs[0][1]+cfs[1][0]+cfs[1][1])

sn = cfs[0][0]/(cfs[0][0]+cfs[0][1])

sp = cfs[1][1]/(cfs[1][0]+cfs[1][1])

'''

#.....GridSearch........

# As time limit is reaching I performed grid search in a small range in batches

# So the results might be slightly off.

for i in [ada,gdb]:

lr = np.arange(0.1,1.0,0.1)

nt = np.arange(50,300,50)

para = {'learning_rate':lr, 'n_estimators':nt}

clf = GridSearchCV(estimator=gdb, param_grid=para)

clf.fit(x,y)

print(i,clf.best_params_)

#.......

#'''

# Creating a list of the answer

#result=[0.1, 50, 75.75, 90.00, 49.38] #....for AdaBoost

result=[0.1, 50, 77.49, 89.33, 55.56] #...for Gradient Boost

# Finally create a dataframe of the final output and write the output to output.csv
result=pd.DataFrame(result)

# writing output to output.csv

result.to_csv('/code/output/output.csv', header=False, index=False)

You might also like