You are on page 1of 8

Nama: Zulfa Putri Asmawi

NIM: H051171305

Tugas Praktikum 3 tentang Analisis Deskriminan dan Clustering

1. Tutorial 5 Analisis Deskrimin


a. Syntax
library(MASS)
library(rrcov)
data(salmon)
head(salmon)
salmon <- salmon[ , -1]
summary(salmon)
# Sebaran Plot Data Alaska.
alaska <- subset(salmon, salmon$Origin =="Alaskan") # Create a alaskan fish subset.
canada <- subset(salmon, salmon$Origin == "Canadian") # Create a canadian fish
subset.
plot(alaska$Freshwater, alaska$Marine, pch = 20, col=2, xlim=c(50,200),
ylim=c(300, 550), main="Plot of Scale size of Salmon", xlab="Freshwater scale
diameter", ylab="Marine scale diameter")
# Sebaran Plot Data Canada
points(canada$Freshwater, canada$Marine, col=3, pch=15)
legend("topright", legend =c("Alaskan Salmon", "Canadian Salmon"), pch=c(20,15),
col=c(2:3))
# Metode Grafik QQ-Plot : Uji Multivariat Normal
salmon2<-data.frame(salmon$Freshwater, salmon$Marine)
X <- as.matrix(salmon2) # membentuk matriks n x p
center <- colMeans(X) # titik pusat
n <- nrow(X)
p <- ncol(X)
cov <- cov(X)
d <- mahalanobis(X,center,cov) # Menghitung Jarak Mahalanobis
qqplot(qchisq(ppoints(n),df=p),d,main="QQ-Plot untuk Data Salmon",ylab="Jarak
Mahalanobis")
abline(a=0,b=1)
# Analisis Diskriminan Linier
lda1 <- lda(salmon$Origin~salmon$Freshwater+salmon$Marine , na.action =
"na.omit" )
lda1
salmon1 <- predict(lda1)
confus_m <- table(salmon$Origin, salmon1$class)
confus_m

b. Output
> install.packages("MASS")
WARNING: Rtools is required to build R packages but is not currently installed.
Please download and install the appropriate version of Rtools before proceeding:

https://cran.rstudio.com/bin/windows/Rtools/
Installing package into ‘C:/Users/Acer/Documents/R/win-library/4.0’
(as ‘lib’ is unspecified)
trying URL 'https://cran.rstudio.com/bin/windows/contrib/4.0/MASS_7.3-51.6.zip'
Content type 'application/zip' length 1185215 bytes (1.1 MB)
downloaded 1.1 MB

package ‘MASS’ successfully unpacked and MD5 sums checked

The downloaded binary packages are in


C:\Users\Acer\AppData\Local\Temp\RtmpG8v0c0\downloaded_packages
> install.packages("rrcov")
WARNING: Rtools is required to build R packages but is not currently installed.
Please download and install the appropriate version of Rtools before proceeding:

https://cran.rstudio.com/bin/windows/Rtools/
Installing package into ‘C:/Users/Acer/Documents/R/win-library/4.0’
(as ‘lib’ is unspecified)
also installing the dependencies ‘DEoptimR’, ‘robustbase’, ‘mvtnorm’, ‘pcaPP’

trying URL 'https://cran.rstudio.com/bin/windows/contrib/4.0/DEoptimR_1.0-8.zip'


Content type 'application/zip' length 42098 bytes (41 KB)
downloaded 41 KB

trying URL 'https://cran.rstudio.com/bin/windows/contrib/4.0/robustbase_0.93-6.zip'


Content type 'application/zip' length 3276715 bytes (3.1 MB)
downloaded 3.1 MB

trying URL 'https://cran.rstudio.com/bin/windows/contrib/4.0/mvtnorm_1.1-0.zip'


Content type 'application/zip' length 267521 bytes (261 KB)
downloaded 261 KB

trying URL 'https://cran.rstudio.com/bin/windows/contrib/4.0/pcaPP_1.9-73.zip'


Content type 'application/zip' length 529644 bytes (517 KB)
downloaded 517 KB

trying URL 'https://cran.rstudio.com/bin/windows/contrib/4.0/rrcov_1.5-2.zip'


Content type 'application/zip' length 1699597 bytes (1.6 MB)
downloaded 1.6 MB

package ‘DEoptimR’ successfully unpacked and MD5 sums checked


package ‘robustbase’ successfully unpacked and MD5 sums checked
package ‘mvtnorm’ successfully unpacked and MD5 sums checked
package ‘pcaPP’ successfully unpacked and MD5 sums checked
package ‘rrcov’ successfully unpacked and MD5 sums checked

The downloaded binary packages are in


C:\Users\Acer\AppData\Local\Temp\RtmpG8v0c0\downloaded_packages
> library(MASS)
> library(rrcov)
Loading required package: robustbase
Scalable Robust Estimators with High Breakdown Point (version 1.5-2)

> data(salmon)
> head(salmon)
Gender Freshwater Marine Origin
1 2 108 368 Alaskan
2 1 131 355 Alaskan
3 1 105 469 Alaskan
4 2 86 506 Alaskan
5 1 99 402 Alaskan
6 2 87 423 Alaskan
> salmon <- salmon[ , -1]
> summary(salmon)
Freshwater Marine Origin
Min. : 53.0 Min. :301.0 Alaskan :50
1st Qu.: 99.0 1st Qu.:367.0 Canadian:50
Median :117.5 Median :396.5
Mean :117.9 Mean :398.1
3rd Qu.:140.0 3rd Qu.:428.2
Max. :179.0 Max. :511.0
> # Sebaran Plot Data Alaska.
> alaska <- subset(salmon, salmon$Origin =="Alaskan") # Create a alaskan fish
subset.
> canada <- subset(salmon, salmon$Origin == "Canadian") # Create a canadian fish
subset.
> plot(alaska$Freshwater, alaska$Marine, pch = 20, col=2, xlim=c(50,200),
ylim=c(300, 550), main="Plot of Scale size of Salmon", xlab="Freshwater scale
diameter", ylab="Marine scale diameter")
> # Sebaran Plot Data Canada
> points(canada$Freshwater, canada$Marine, col=3, pch=15)
> legend("topright", legend =c("Alaskan Salmon", "Canadian Salmon"),
pch=c(20,15), col=c(2:3))
> # Metode Grafik QQ-Plot : Uji Multivariat Normal
> salmon2<-data.frame(salmon$Freshwater, salmon$Marine)
> X <- as.matrix(salmon2) # membentuk matriks n x p
> center <- colMeans(X) # titik pusat
> n <- nrow(X)
> p <- ncol(X)
> cov <- cov(X)
> d <- mahalanobis(X,center,cov) # Menghitung Jarak Mahalanobis
> qqplot(qchisq(ppoints(n),df=p),d,main="QQ-Plot untuk Data Salmon",ylab="Jarak
Mahalanobis")
> abline(a=0,b=1)
> # Analisis Diskriminan Linier
> lda1 <- lda(salmon$Origin~salmon$Freshwater+salmon$Marine , na.action =
"na.omit" )
> lda1
Call:
lda(salmon$Origin ~ salmon$Freshwater + salmon$Marine, na.action = "na.omit")

Prior probabilities of groups:


Alaskan Canadian
0.5 0.5

Group means:
salmon$Freshwater salmon$Marine
Alaskan 98.38 429.66
Canadian 137.46 366.62

Coefficients of linear discriminants:


LD1
salmon$Freshwater 0.04458572
salmon$Marine -0.01803856
> salmon1 <- predict(lda1)
> confus_m <- table(salmon$Origin, salmon1$class)
> confus_m

Alaskan Canadian
Alaskan 44 6
Canadian 1 49

c. Grafik
2. Tutorial 6 Analisis Cluster
a. Syntax
library(tidyverse) # data manipulation
library(cluster) # clustering algorithms
library(factoextra) # clustering visualization
library(dendextend) # for comparing two dendrograms
df <- USArrests
df <- na.omit(df)
df <- scale(df)
head(df)
# Matrix Jarak
d <- dist(df, method = "euclidean")
# Cluster Hirarki dengan Complete Linkage
hc1 <- hclust(d, method = "complete" )
# Plot dendrogram
plot(hc1, cex = 0.6, hang = -1)
# Menghitung dengan Agnes
hc2 <- agnes(df, method = "complete")
# Koefisien Agglomerative
hc2$ac
# methods to assess
m <- c( "average", "single", "complete", "ward")
names(m) <- c( "average", "single", "complete", "ward")
# Fungsi untuk Menghitung Koefisien
ac <- function(x)
{
agnes(df, method = x)$ac
}
map_dbl(m, ac)
hc3 <- agnes(df, method = "ward")
pltree(hc3, cex = 0.6, hang = -1, main = "Dendrogram of agnes")
# Menghitung hierarchical clustering
hc4 <- diana(df)
# Koefisien Divise
hc4$dc
# plot dendrogram
pltree(hc4, cex = 0.6, hang = -1, main = "Dendrogram of diana")
# Ward's method
hc5 <- hclust(d, method = "ward.D2" )
# Cut tree into 4 groups
sub_grp <- cutree(hc5, k = 4)
# Number of members in each cluster
table(sub_grp)
USArrests %>% mutate(cluster = sub_grp) %>% head
plot(hc5, cex = 0.6)
rect.hclust(hc5, k = 4, border = 2:5)

b. Output
> library(tidyverse) # data manipulation
> library(cluster) # clustering algorithms
> library(factoextra) # clustering visualization
> library(dendextend) # for comparing two dendrograms
> df <- USArrests
> df <- na.omit(df)
> df <- scale(df)
> head(df)
Murder Assault UrbanPop Rape
Alabama 1.24256408 0.7828393 -0.5209066 -0.003416473
Alaska 0.50786248 1.1068225 -1.2117642 2.484202941
Arizona 0.07163341 1.4788032 0.9989801 1.042878388
Arkansas 0.23234938 0.2308680 -1.0735927 -0.184916602
California 0.27826823 1.2628144 1.7589234 2.067820292
Colorado 0.02571456 0.3988593 0.8608085 1.864967207
> # Matrix Jarak
> d <- dist(df, method = "euclidean")
> # Cluster Hirarki dengan Complete Linkage
> hc1 <- hclust(d, method = "complete" )
> # Plot dendrogram
> plot(hc1, cex = 0.6, hang = -1)
> # Menghitung dengan Agnes
> hc2 <- agnes(df, method = "complete")
> # Koefisien Agglomerative
> hc2$ac
[1] 0.8531583
> # methods to assess
> m <- c( "average", "single", "complete", "ward")
> names(m) <- c( "average", "single", "complete", "ward")
> # Fungsi untuk Menghitung Koefisien
> ac <- function(x)
+ {
+ agnes(df, method = x)$ac
+ }
> map_dbl(m, ac)
average single complete ward
0.7379371 0.6276128 0.8531583 0.9346210
> hc3 <- agnes(df, method = "ward")
> pltree(hc3, cex = 0.6, hang = -1, main = "Dendrogram of agnes")
> # Menghitung hierarchical clustering
> hc4 <- diana(df)
> # Koefisien Divise
> hc4$dc
[1] 0.8514345
> # plot dendrogram
> pltree(hc4, cex = 0.6, hang = -1, main = "Dendrogram of diana")
> # Ward's method
> hc5 <- hclust(d, method = "ward.D2" )
> # Cut tree into 4 groups
> sub_grp <- cutree(hc5, k = 4)
> # Number of members in each cluster
> table(sub_grp)
sub_grp
1 2 3 4
7 12 19 12
> USArrests %>% mutate(cluster = sub_grp) %>% head
Murder Assault UrbanPop Rape cluster
1 13.2 236 58 21.2 1
2 10.0 263 48 44.5 2
3 8.1 294 80 31.0 2
4 8.8 190 50 19.5 3
5 9.0 276 91 40.6 2
6 7.9 204 78 38.7 2
> plot(hc5, cex = 0.6)
> rect.hclust(hc5, k = 4, border = 2:5)

c. Grafik

You might also like