You are on page 1of 8

clustering

Ella kurnia

2022-11-27
library(factoextra)

## Loading required package: ggplot2

## Welcome! Want to learn more? See two factoextra-related books at


https://goo.gl/ve3WBa

library(cluster)
library(ggplot2)
library(dplyr)

##
## Attaching package: 'dplyr'

## The following objects are masked from 'package:stats':


##
## filter, lag

## The following objects are masked from 'package:base':


##
## intersect, setdiff, setequal, union

library(broom)
library(ggdendro)
library(readxl)
library(fmsb)
library(NbClust)
library(RColorBrewer)
library(gridExtra)

##
## Attaching package: 'gridExtra'

## The following object is masked from 'package:dplyr':


##
## combine

clustering1 <- read_excel("C:/Users/Gadget House/Downloads/clustering1.xlsx")


data <- as.data.frame(clustering1)
rownames(data) <- data$Provinsi
data <- data[,-1]
View(data)

plot(data, col="blue")
boxplot(data)
data <- scale(data)
boxplot(data)

par(mfrow=c(2,2))
hist(data[,1], col="red", main = "Indeks Pembangunan Manusia")
hist(data[,2], col="green", main = "Persentase Penduduk Miskin")
hist(data[,3], col="blue", main = "Persentase Pengangguran Terbuka")
set.seed(123)
km_out = kmeans(data, 4)

fviz_cluster(km_out, data = data)


res <- cbind(data, km_out$cluster)
head(res)

## IPM P0 PT
## ACEH 0.20795838 0.8454411 0.4442299 1
## SUMATERA UTARA 0.16224339 -0.3243489 0.4607248 2
## SUMATERA BARAT 0.32732530 -0.7648710 0.5651924 2
## RIAU 0.40097723 -0.6741753 -0.5894496 2
## JAMBI 0.06827369 -0.4946347 -0.2210638 2
## SUMATERA SELATAN -0.28474761 0.3845586 -0.2815451 1

new <- aggregate(res[,-ncol(res)], list(res[,ncol(res)]), mean)


new

## Group.1 IPM P0 PT
## 1 1 0.1531004 0.4980209 -0.24470649
## 2 2 0.0942570 -0.7359680 -0.05357728
## 3 3 -1.3967224 1.4238577 -0.99540711
## 4 4 1.1247979 -0.7911543 1.82320245

coul <- brewer.pal(4, "RdBu")


colors_border <- coul
colors_in <- alpha(coul, 0.3)
radarchart( new[,-1], axistype=0 , maxmin=F,
#custom polygon
pcol=colors_border, pfcol=colors_in, plwd=4, plty=1,
#custom the grid
cglcol="grey", cglty=1, axislabcol="black", cglwd=0.8,
#custom labels
vlcex=0.8
)
legend(x=0.7, y=1, legend = new$Group.1, bty = "n", pch=20, col=colors_in,
text.col = "grey", cex=1.2, pt.cex=3)

hc_average = hclust(dist(data), method = "average")


ggdendrogram(hc_average, rotate = FALSE, size = 2) + labs(title = "Average
Linkage")
clust <- cutree(hc_average, 4)
hc_res <- cbind(data,clust)
head(hc_res)

## IPM P0 PT clust
## ACEH 0.20795838 0.8454411 0.4442299 1
## SUMATERA UTARA 0.16224339 -0.3243489 0.4607248 1
## SUMATERA BARAT 0.32732530 -0.7648710 0.5651924 1
## RIAU 0.40097723 -0.6741753 -0.5894496 1
## JAMBI 0.06827369 -0.4946347 -0.2210638 1
## SUMATERA SELATAN -0.28474761 0.3845586 -0.2815451 1

new <- aggregate(hc_res[,-ncol(hc_res)], list(hc_res[,ncol(hc_res)]), mean)


new

## Group.1 IPM P0 PT
## 1 1 -0.02501224 -0.1586903 -0.2278310
## 2 2 1.05559048 -0.7935605 2.0950932
## 3 3 2.24989458 0.3771549 -0.5124735
## 4 4 -1.94064612 2.3076784 -0.6480981

coul <- brewer.pal(4, "RdBu")


colors_border <- coul
colors_in <- alpha(coul, 0.3)
radarchart( new[,-1], axistype=0 , maxmin=F,
#custom polygon
pcol=colors_border, pfcol=colors_in, plwd=4, plty=1,
#custom the grid
cglcol="grey", cglty=1, axislabcol="black", cglwd=0.8,
#custom labels
vlcex=0.8
)
legend(x=0.7, y=1, legend = new$Group.1, bty = "n", pch=20, col=colors_in,
text.col = "grey", cex=1.2, pt.cex=3)

You might also like