Professional Documents
Culture Documents
Homework
Homework
node<-list("N1"=p, "N2"=q)
Impurity_1<-c("Node1_impurity"=
{-(node$N1[1])/sum(node$N1)}*log((node$N1[1])/
sum(node$N1),base=2)+
{-(node$N1[2])/sum(node$N1)}*log((node$N1[2])/sum(node$N1),base=2
))
Impurity_2<- c("Node2_impurity"=
{-(node$N2[1])/sum(node$N2)}*log((node$N2[1])/
sum(node$N2),base=2)+
{-(node$N2[2])/sum(node$N2)}*log((node$N2[2])/sum(node$N2),base=2
))
print(Impurity_1); print(Impurity_2)
a<-c(10,1);b<-c(4,5)
impu(a,b)
Node1_impurity
0.439497
Node2_impurity
0.9910761
gainratio<-function(p,q){
node_a<-list("N1"=p, "N2"=q)
Parent_node<-(-sum(node_a$N1[1]+node_a$N2[1])/
sum(node_a$N1+node_a$N2))*log(sum(node_a$N1[1]+node_a$N2[1])/
sum(node_a$N1+node_a$N2),base=2)+(-
sum(node_a$N1[2]+node_a$N2[2])/
sum(node_a$N1+node_a$N2))*log(sum(node_a$N1[2]+node_a$N2[2])/
sum(node_a$N1+node_a$N2),base=2)
Impurity_n1<-c({-(node_a$N1[1])/
sum(node_a$N1)}*log((node_a$N1[1])/sum(node_a$N1),base=2)+{-
(node_a$N1[2])/sum(node_a$N1)}*log((node_a$N1[2])/sum(node_a
$N1),base=2))
Impurity_n2<-c({-(node_a$N2[1])/
sum(node_a$N2)}*log((node_a$N2[1])/sum(node_a$N2),base=2)+{-
(node_a$N2[2])/sum(node_a$N2)}*log((node_a$N2[2])/
sum(node_a$N2),base=2))
Information_Gain<-Parent_node-(sum(node_a$N1)/
sum(node_a$N1+node_a$N2))*Impurity_n1-(sum(node_a$N2)/
sum(node_a$N1+node_a $N2))*Impurity_n2
SplitINFO<-(-sum(node_a$N1)/
sum(node_a$N1+node_a$N2))*log(sum(node_a$N1)/
sum(node_a$N1+node_a$N2),base=2)+(-sum(node_a$N2)/
sum(node_a$N1+node_a$N2))*log(sum(node_a$N2)/sum(node_a$N1+
node_a$N2),base=2)
Gain_ratio<- c("Gain_ratio"=Information_Gain/SplitINFO)
print(Gain_ratio)
}
a<-c(10,1);b<-c(4,5)
gainratio(a,b)
Gain_ratio 0.1949923
P(play=yes)=9/14
if(p=="yes"){
if(q=="sunny"){
print(nrow(tennis[tennis$outlook=="sunny"&tennis$play=="yes",])/
nrow(tennis[tennis$play=="yes",]))
}
if(q=="overcast"){
print(nrow(tennis[tennis$outlook=="overcast"&tennis$play=="yes",])/
nrow(tennis[tennis$play=="yes",]))
}
if(q=="rainy"){
print(nrow(tennis[tennis$outlook=="rainy"&tennis$play=="yes",])/
nrow(tennis[tennis$play=="yes",]))
}
if(q=="hot"){
print(nrow(tennis[tennis$temperature=="hot"&tennis$play=="yes",])/
nrow(tennis[tennis$play=="yes",]))
}
if(q=="mild"){
print(nrow(tennis[tennis$temperature=="mild"&tennis$play=="yes",])/
nrow(tennis[tennis$play=="yes",]))
}
if(q=="cold"){
print(nrow(tennis[tennis$temperature=="cold"&tennis$play=="yes",])/
nrow(tennis[tennis$play=="yes",]))
}
if(q=="high"){
print(nrow(tennis[tennis$humidity=="high"&tennis$play=="yes",])/
nrow(tennis[tennis$play=="yes",]))
}
if(q=="normal"){
print(nrow(tennis[tennis$humidity=="normal"&tennis$play=="yes",])/
nrow(tennis[tennis$play=="yes",]))
}
if(q=="true"){
print(nrow(tennis[tennis$windy=="true"&tennis$play=="yes",])/
nrow(tennis[tennis$play=="yes",]))
}
if(q=="false"){
print(nrow(tennis[tennis$windy=="false"&tennis$play=="yes",])/
nrow(tennis[tennis$play=="yes",]))
}
}
if(p=="no"){
if(q=="sunny"){
print(nrow(tennis[tennis$outlook=="sunny"&tennis$play=="no",])/
nrow(tennis[tennis$play=="no",]))
}
if(q=="overcast"){
print(nrow(tennis[tennis$outlook=="overcast"&tennis$play=="no",])/
nrow(tennis[tennis$play=="no",]))
}
if(q=="rainy"){
print(nrow(tennis[tennis$outlook=="rainy"&tennis$play=="no",])/
nrow(tennis[tennis$play=="no",]))
}
if(q=="hot"){
print(nrow(tennis[tennis$temperature=="hot"&tennis$play=="no",])/
nrow(tennis[tennis$play=="no",]))
}
if(q=="mild"){
print(nrow(tennis[tennis$temperature=="mild"&tennis$play=="no",])/
nrow(tennis[tennis$play=="no",]))
}
if(q=="cold"){
print(nrow(tennis[tennis$temperature=="cold"&tennis$play=="no",])/
nrow(tennis[tennis$play=="no",]))
}
if(q=="high"){
print(nrow(tennis[tennis$humidity=="high"&tennis$play=="no",])/
nrow(tennis[tennis$play=="no",]))
}
if(q=="normal"){
print(nrow(tennis[tennis$humidity=="normal"&tennis$play=="no",])/
nrow(tennis[tennis$play=="no",]))
}
if(q=="true"){
print(nrow(tennis[tennis$windy=="true"&tennis$play=="no",])/
nrow(tennis[tennis$play=="no",]))
}
if(q=="false"){
print(nrow(tennis[tennis$windy=="false"&tennis$play=="no",])/
nrow(tennis[tennis$play=="no",]))
}
}
}
tennisr("yes","true")
[1] 0.3333333
for (i in 1:4) {
print("P(Y)=")
print(postP_Y)
print("P(N)=")
print(postP_N)
}
3. 수업시간에 사용한 diabetes 자료를 이용하여 다음의 문제를 해결하시오.
Information Gain =
Entropy ( P ) −
{( )
13
20 ( )
× Entropy ( N 1 )+
7
20} {( )
× Entropy ( N 2 ) =1−
13
20 ( )
× 0.890+
7
20 }
× 0.592 =0.214
Information Gain =
Entropy ( P ) −
{( )
5
20 ( )
× Entropy ( N 1 )+
10
20 ( )
× Entropy ( N 2 ) +
5
20 } {( )
× Entropy ( N 3 ) =1−
5
20
×0.971+ (
SplitINFO = −( ) log ( )−( ) log ( )−( ) log ( )=1.5
5 5 10 10 5 5
2 2 2
20 20 20 20 20 20
B 의 Gain Raio 가 더 크다
p1 p2 p3 p4 p5 p6
p1 0.0000 0.2357 0.2219 0.3688 0.3421 0.2348
distance1
p1 p2 p3 p4 p5
p2 0.2357
p3 0.2219 0.1483
cresult<-hclust(distance1,method="complete")
plot(cresult, hang=-1)
sresult<-hclust(distance1,method="single")
plot(sresult, hang=-1)
aresult<-hclust(distance1,method="average")
plot(aresult, hang=-1)