Professional Documents
Culture Documents
In [4]: df<-datasets::iris
In [6]: head(df,10)
In [7]: train_test_split<-function(df,test_ratio){
set.seed(1)
return(split)
In [8]: euclidean_dist<-function(train_row,test_row){
dist<-sum((test_row[1:4]-train_row[1:4])**2)
return(dist)
Function to calculate the euclidean distance of one test instance wrt all the training instances
In [9]: calc_distances<-function(train_df,test_row){
distances<-c(euclidean_dist(train_df[1,],test_row),euclidean_dist(train_df[2,],te
st_row))
classes<-c(as.numeric(train_df[1,][5]),as.numeric(train_df[2,][5]))
n<-nrow(train_df)
for(i in 3:n){
curr_dist=euclidean_dist(train_df[i,],test_row)
curr_class=as.numeric(train_df[i,][5])
distances<-append(distances,curr_dist)
classes<-append(classes,curr_class)
distances=cbind(data.frame(distances), data.frame(classes))
return(distances)
Function to sort the distances and then selecting the majority class with minimum distance
In [11]: predict_class<-function(train_df,test_row,k){
distances<-calc_distances(train_df,test_row)
distances<-distances[order(distances[,1]),]
dist<-table(distances[1:k,2])
class=names(dist)[which.max(dist)]
return(class)
In [12]: knn<-function(train_df,test_df,k){
n<-nrow(test_df)
predictions<-list()
for(i in 1:n){
curr_pred<-predict_class(train_df,test_df[i,],k)
predictions[length(predictions)+1]<-curr_pred
return(predictions)
In [13]: calc_accuracy<-function(test_classes,pred){
correct<-sum(test_classes==pred)
return(correct/length(test_classes))
Creating the train and test sets form the performed split
In [14]: split<-train_test_split(df,0.3)
train_df<-df[split==0,]
test_df<-df[split==1,]
In [15]: head(train_df,5)
In [16]: head(test_df,5)
Funtion to change the classes from numeric to string in the predicted values
In [17]: correct_classes<-function(pred){
n<-length(pred)
for(i in 1:n){
if(pred[i]=='1')
pred[i]='setosa'
else if(pred[i]=='2')
pred[i]='versicolor'
else
pred[i]='virginica'
return(pred)
In [18]: pred<-knn(train_df,test_df,5)
pred=correct_classes(pred)
In [19]: pred
1. 'setosa'
2. 'setosa'
3. 'setosa'
4. 'setosa'
5. 'setosa'
6. 'setosa'
7. 'setosa'
8. 'setosa'
9. 'setosa'
10. 'setosa'
11. 'setosa'
12. 'setosa'
13. 'setosa'
14. 'setosa'
15. 'versicolor'
16. 'versicolor'
17. 'versicolor'
18. 'versicolor'
19. 'versicolor'
20. 'versicolor'
21. 'versicolor'
22. 'versicolor'
23. 'versicolor'
24. 'virginica'
25. 'versicolor'
26. 'virginica'
27. 'versicolor'
28. 'versicolor'
29. 'versicolor'
30. 'versicolor'
31. 'versicolor'
32. 'versicolor'
33. 'virginica'
34. 'versicolor'
35. 'virginica'
36. 'virginica'
37. 'virginica'
38. 'virginica'
39. 'virginica'
40. 'virginica'
41. 'virginica'
42. 'virginica'
43. 'virginica'
44. 'virginica'
45. 'virginica'
In [20]: calc_accuracy(test_df$Species,pred)
0.933333333333333
Perform knn for odd k values from 3 to 31 and then comparing accuracies
In [ ]: k_values<-seq(3,31,by=2)
accuracies<-list()
for(i in k_values){
preds=knn(train_df,test_df,i)
preds=correct_classes(preds)
accuracy=calc_accuracy(test_df$Species,preds)
accuracies<-append(accuracies,accuracy)
accuracies
In [ ]: