Professional Documents
Culture Documents
NIM: H051171305
b. Output
> install.packages("MASS")
WARNING: Rtools is required to build R packages but is not currently installed.
Please download and install the appropriate version of Rtools before proceeding:
https://cran.rstudio.com/bin/windows/Rtools/
Installing package into ‘C:/Users/Acer/Documents/R/win-library/4.0’
(as ‘lib’ is unspecified)
trying URL 'https://cran.rstudio.com/bin/windows/contrib/4.0/MASS_7.3-51.6.zip'
Content type 'application/zip' length 1185215 bytes (1.1 MB)
downloaded 1.1 MB
https://cran.rstudio.com/bin/windows/Rtools/
Installing package into ‘C:/Users/Acer/Documents/R/win-library/4.0’
(as ‘lib’ is unspecified)
also installing the dependencies ‘DEoptimR’, ‘robustbase’, ‘mvtnorm’, ‘pcaPP’
> data(salmon)
> head(salmon)
Gender Freshwater Marine Origin
1 2 108 368 Alaskan
2 1 131 355 Alaskan
3 1 105 469 Alaskan
4 2 86 506 Alaskan
5 1 99 402 Alaskan
6 2 87 423 Alaskan
> salmon <- salmon[ , -1]
> summary(salmon)
Freshwater Marine Origin
Min. : 53.0 Min. :301.0 Alaskan :50
1st Qu.: 99.0 1st Qu.:367.0 Canadian:50
Median :117.5 Median :396.5
Mean :117.9 Mean :398.1
3rd Qu.:140.0 3rd Qu.:428.2
Max. :179.0 Max. :511.0
> # Sebaran Plot Data Alaska.
> alaska <- subset(salmon, salmon$Origin =="Alaskan") # Create a alaskan fish
subset.
> canada <- subset(salmon, salmon$Origin == "Canadian") # Create a canadian fish
subset.
> plot(alaska$Freshwater, alaska$Marine, pch = 20, col=2, xlim=c(50,200),
ylim=c(300, 550), main="Plot of Scale size of Salmon", xlab="Freshwater scale
diameter", ylab="Marine scale diameter")
> # Sebaran Plot Data Canada
> points(canada$Freshwater, canada$Marine, col=3, pch=15)
> legend("topright", legend =c("Alaskan Salmon", "Canadian Salmon"),
pch=c(20,15), col=c(2:3))
> # Metode Grafik QQ-Plot : Uji Multivariat Normal
> salmon2<-data.frame(salmon$Freshwater, salmon$Marine)
> X <- as.matrix(salmon2) # membentuk matriks n x p
> center <- colMeans(X) # titik pusat
> n <- nrow(X)
> p <- ncol(X)
> cov <- cov(X)
> d <- mahalanobis(X,center,cov) # Menghitung Jarak Mahalanobis
> qqplot(qchisq(ppoints(n),df=p),d,main="QQ-Plot untuk Data Salmon",ylab="Jarak
Mahalanobis")
> abline(a=0,b=1)
> # Analisis Diskriminan Linier
> lda1 <- lda(salmon$Origin~salmon$Freshwater+salmon$Marine , na.action =
"na.omit" )
> lda1
Call:
lda(salmon$Origin ~ salmon$Freshwater + salmon$Marine, na.action = "na.omit")
Group means:
salmon$Freshwater salmon$Marine
Alaskan 98.38 429.66
Canadian 137.46 366.62
Alaskan Canadian
Alaskan 44 6
Canadian 1 49
c. Grafik
2. Tutorial 6 Analisis Cluster
a. Syntax
library(tidyverse) # data manipulation
library(cluster) # clustering algorithms
library(factoextra) # clustering visualization
library(dendextend) # for comparing two dendrograms
df <- USArrests
df <- na.omit(df)
df <- scale(df)
head(df)
# Matrix Jarak
d <- dist(df, method = "euclidean")
# Cluster Hirarki dengan Complete Linkage
hc1 <- hclust(d, method = "complete" )
# Plot dendrogram
plot(hc1, cex = 0.6, hang = -1)
# Menghitung dengan Agnes
hc2 <- agnes(df, method = "complete")
# Koefisien Agglomerative
hc2$ac
# methods to assess
m <- c( "average", "single", "complete", "ward")
names(m) <- c( "average", "single", "complete", "ward")
# Fungsi untuk Menghitung Koefisien
ac <- function(x)
{
agnes(df, method = x)$ac
}
map_dbl(m, ac)
hc3 <- agnes(df, method = "ward")
pltree(hc3, cex = 0.6, hang = -1, main = "Dendrogram of agnes")
# Menghitung hierarchical clustering
hc4 <- diana(df)
# Koefisien Divise
hc4$dc
# plot dendrogram
pltree(hc4, cex = 0.6, hang = -1, main = "Dendrogram of diana")
# Ward's method
hc5 <- hclust(d, method = "ward.D2" )
# Cut tree into 4 groups
sub_grp <- cutree(hc5, k = 4)
# Number of members in each cluster
table(sub_grp)
USArrests %>% mutate(cluster = sub_grp) %>% head
plot(hc5, cex = 0.6)
rect.hclust(hc5, k = 4, border = 2:5)
b. Output
> library(tidyverse) # data manipulation
> library(cluster) # clustering algorithms
> library(factoextra) # clustering visualization
> library(dendextend) # for comparing two dendrograms
> df <- USArrests
> df <- na.omit(df)
> df <- scale(df)
> head(df)
Murder Assault UrbanPop Rape
Alabama 1.24256408 0.7828393 -0.5209066 -0.003416473
Alaska 0.50786248 1.1068225 -1.2117642 2.484202941
Arizona 0.07163341 1.4788032 0.9989801 1.042878388
Arkansas 0.23234938 0.2308680 -1.0735927 -0.184916602
California 0.27826823 1.2628144 1.7589234 2.067820292
Colorado 0.02571456 0.3988593 0.8608085 1.864967207
> # Matrix Jarak
> d <- dist(df, method = "euclidean")
> # Cluster Hirarki dengan Complete Linkage
> hc1 <- hclust(d, method = "complete" )
> # Plot dendrogram
> plot(hc1, cex = 0.6, hang = -1)
> # Menghitung dengan Agnes
> hc2 <- agnes(df, method = "complete")
> # Koefisien Agglomerative
> hc2$ac
[1] 0.8531583
> # methods to assess
> m <- c( "average", "single", "complete", "ward")
> names(m) <- c( "average", "single", "complete", "ward")
> # Fungsi untuk Menghitung Koefisien
> ac <- function(x)
+ {
+ agnes(df, method = x)$ac
+ }
> map_dbl(m, ac)
average single complete ward
0.7379371 0.6276128 0.8531583 0.9346210
> hc3 <- agnes(df, method = "ward")
> pltree(hc3, cex = 0.6, hang = -1, main = "Dendrogram of agnes")
> # Menghitung hierarchical clustering
> hc4 <- diana(df)
> # Koefisien Divise
> hc4$dc
[1] 0.8514345
> # plot dendrogram
> pltree(hc4, cex = 0.6, hang = -1, main = "Dendrogram of diana")
> # Ward's method
> hc5 <- hclust(d, method = "ward.D2" )
> # Cut tree into 4 groups
> sub_grp <- cutree(hc5, k = 4)
> # Number of members in each cluster
> table(sub_grp)
sub_grp
1 2 3 4
7 12 19 12
> USArrests %>% mutate(cluster = sub_grp) %>% head
Murder Assault UrbanPop Rape cluster
1 13.2 236 58 21.2 1
2 10.0 263 48 44.5 2
3 8.1 294 80 31.0 2
4 8.8 190 50 19.5 3
5 9.0 276 91 40.6 2
6 7.9 204 78 38.7 2
> plot(hc5, cex = 0.6)
> rect.hclust(hc5, k = 4, border = 2:5)
c. Grafik