You are on page 1of 1

#Kmeans clustering

library(caret)
library(ggplot2)

setwd("E:\\dataset")
data1 <- read.csv("loan
prediction.csv",na.strings=c("NA","NAN",""),stringsAsFactors = T)

sum(is.na(data1))
data2 <- data1[c('ApplicantIncome','LoanAmount')]

#d\Data Standardization

preProcValues <- preProcess(data2, method = c("center","scale"))


dataKM <- predict(preProcValues, data2)

#creating clusters
set.seed(10)
clust <- kmeans(dataKM, 5,nstart = 500)
dataKM$Cluster <- as.factor(clust$cluster)
table(dataKM$Cluster)

#Elbow Method
k<-seq(1,10)
wss <- vector()
for (i in k) {
set.seed(10)
clust1 <- kmeans(dataKM, i)
print(clust1$tot.withinss)
wss[i]<- clust1$tot.withinss
}
plot(k,wss,type="b")

ggplot(dataKM, aes(x=ApplicantIncome,y=LoanAmount)) +
geom_point(size=2,aes(color = factor(Cluster)))

You might also like