You are on page 1of 6

Machine Learning Lab

Assessment-3
CSE4020
Reg.no:17BCE0918
Name:D.PENCHAL REDDY
Exercise 3 : Implement K – Nearest Neighbour Algorithm and evaluate the
performance of your algorithm using any data set from UCI repository.

Note : You can use Panada API to load the data set. No other API can be
used in the implementation.

Code:

import numpy as np

import matplotlib.pyplot as plt

import pandas as pd

from sklearn.model_selection import train_test_split

from sklearn.neighbors import KNeighborsClassifier

import seaborn as sns

import warnings

warnings.filterwarnings('ignore')

from sklearn.metrics import classification_report, confusion_matrix,


accuracy_score

car_data = pd.read_csv("cars.csv", encoding = "ISO-8859-1")


car_data.info()

car_data.head()

car_data.describe()

y=car_data['class']

y=y.to_frame()

y.head()

X=car_data

X= X[['buying','maint','doors','persons','lug_boot','safety']]

X.head()

X_train,X_test,y_train,y_test=train_test_split(X,y,test_size=0.3,random_state
=32)

combine=[y_train,y_test]

classmapping={'unacc':0,'acc':1,'good':2,'vgood':3}

for dt in combine:

dt['class']=car_data['class'].map(classmapping)

y_train.head()

combine=[X_train,X_test]

classmapping={'high':0,'low':1,'med':2,'vhigh':3}

for dt in combine:

dt['buying']=car_data['buying'].map(classmapping)

X_train.head()

combine=[X_train,X_test]

classmapping={'high':0,'low':1,'med':2,'vhigh':3}

for dt in combine:
dt['maint']=car_data['maint'].map(classmapping)

X_train.head()

combine=[X_train,X_test]

classmapping={'big':0,'small':1,'med':2}

for dt in combine:

dt['lug_boot']=car_data['lug_boot'].map(classmapping)

X_train.head()

combine=[X_train,X_test]

classmapping={'high':0,'low':1,'med':2,'vhigh':3}

for dt in combine:

dt['safety']=car_data['safety'].map(classmapping)

X_train.head()

clf = KNeighborsClassifier(n_neighbors=37)

clf.fit(X_train, y_train)

y_pred = clf.predict(X_test)

y_pred

y_test.head()

print(confusion_matrix(y_test, y_pred))

print(classification_report(y_test, y_pred))

accuracy_test_knn=round(clf.score(X_test,y_test)*100,2)

accuracy_train_knn=round(clf.score(X_train,y_train)*100,2)

accuracy_knn=round(accuracy_score(y_test,y_pred)*100,2)

print('Training accuracy of KNN',accuracy_train_knn)

print('Testing accuracy of KNN',accuracy_test_knn)


print('Accuracy of KNN',accuracy_knn)

cm=confusion_matrix(y_test, y_pred)

cm_df = pd.DataFrame(cm,

index = ['unacc','acc','good','vgood'],

columns = ['unacc','acc','good','vgood'])

plt.figure(figsize=(5.5,4))

sns.heatmap(cm_df, annot=True)

plt.title('KNN Accuracy:{0:.3f}'.format(accuracy_test_knn))

plt.ylabel('True Label')

plt.ylabel('Predicted Label')

plt.show()

error = []

for i in range(1, 40):

knn = KNeighborsClassifier(n_neighbors=i)

knn.fit(X_train, y_train)

pred_i = knn.predict(X_test)

error.append(np.mean(pred_i != y_test.values))

plt.figure(figsize=(12,6))

plt.plot(range(1, 40), error, color='red', linestyle='dashed', marker='o',


markerfacecolor='blue', markersize=10)

plt.title('Error rate K value')

plt.xlabel('K Value')

plt.ylabel('Mean Error')
Output:

You might also like