Professional Documents
Culture Documents
SHUBHAM OJHA
EDA
LAB 4 - ANOVA
HP
2023-01-30
# Lab 2 - 20BCE1205
# Question 1
df<-data.frame(x=c(82,93,61,74,69,53,
62,85,94,78,71,66,
64,73,87,91,56,78),
y=c("x","x","x","x","x","x",
"y","y","y","y","y","y",
"z","z","z","z","z","z"))
df
## x y
## 1 82 x
## 2 93 x
## 3 61 x
## 4 74 x
## 5 69 x
## 6 53 x
## 7 62 y
## 8 85 y
## 9 94 y
## 10 78 y
## 11 71 y
## 12 66 y
## 13 64 z
## 14 73 z
## 15 87 z
## 16 91 z
## 17 56 z
## 18 78 z
library(dplyr)
##
## Attaching package: 'dplyr'
s<-group_by(df,y) %>%
summarise(
count=n(),
mean=mean(x,na.rm=TRUE),
sd=sd(x,na.rm=TRUE)
)
s
## # A tibble: 3 × 4
## y count mean sd
## <chr> <int> <dbl> <dbl>
## 1 x 6 72 14.4
## 2 y 6 76 12.1
## 3 z 6 74.8 13.4
ma=mean(df$x[1:6])
mb=mean(df$x[7:12])
mc=mean(df$x[13:18])
ma
## [1] 72
mb
## [1] 76
mc
## [1] 74.83333
meantotal=mean(s$mean)
meantotal
## [1] 74.27778
sse=(sum((df$x[0:6]-ma)^2))+(sum((df$x[7:12]-mb)^2))+(sum((df$x[13:18]-
mc)^2))
sse
## [1] 2660.833
ssc=(6*(ma-meantotal)^2)+(6*(mb-meantotal)^2)+(6*(mc-meantotal)^2)
ssc
## [1] 50.77778
dfb=3-1
dfw=nrow(df)-3
dfb
## [1] 2
dfw
## [1] 15
msc=ssc/dfb
mse=sse/dfw
msc
## [1] 25.38889
mse
## [1] 177.3889
fstatistic=msc/mse
fstatistic
## [1] 0.1431256
fcritical=qf(0.05,dfb,dfw,lower.tail=FALSE)
fcritical
## [1] 3.68232
anova<-aov(x~y,df)
anova
## Call:
## aov(formula = x ~ y, data = df)
##
## Terms:
## y Residuals
## Sum of Squares 50.7778 2660.8333
## Deg. of Freedom 2 15
##
## Residual standard error: 13.31874
## Estimated effects may be unbalanced
summary(anova)
# Question 2
# Plant Growth
df1<-PlantGrowth
set.seed(1234)
dplyr::sample_n(df1,10)
## weight group
## 1 6.15 trt2
## 2 3.83 trt1
## 3 5.29 trt2
## 4 5.12 trt2
## 5 4.50 ctrl
## 6 4.17 trt1
## 7 5.87 trt1
## 8 5.33 ctrl
## 9 5.26 trt2
## 10 4.61 ctrl
levels(df1$group)
s<-group_by(df1,group) %>%
summarise(
count=n(),
mean=mean(weight,na.rm=TRUE),
sd=sd(weight,na.rm=TRUE)
)
s
## # A tibble: 3 × 4
## group count mean sd
## <fct> <int> <dbl> <dbl>
## 1 ctrl 10 5.03 0.583
## 2 trt1 10 4.66 0.794
## 3 trt2 10 5.53 0.443
boxplot(weight~group,df1)
anova1<-aov(weight~group,df1)
summary(anova1)
fcritical1=qf(0.05,2,27,lower.tail = FALSE)
fcritical1
## [1] 3.354131
#Poison Dataset
df2<-read.csv("C:/Users/HP/Desktop/padhai/EDA/Lab/dataset-94158.csv")
set.seed(1234)
dplyr::sample_n(df2,10)
levels(df2$poison)
## NULL
s<-group_by(df2,poison) %>%
summarise(
count=n(),
mean=mean(time,na.rm=TRUE),
sd=sd(time,na.rm=TRUE)
)
s
## # A tibble: 3 × 4
## poison count mean sd
## <int> <int> <dbl> <dbl>
## 1 1 16 0.618 0.209
## 2 2 16 0.544 0.289
## 3 3 16 0.276 0.0623
boxplot(time~poison,df2)
anova2<-aov(time~poison,df2)
summary(anova2)
## Df Sum Sq Mean Sq F value Pr(>F)
## poison 1 0.9316 0.9316 20.67 3.96e-05 ***
## Residuals 46 2.0735 0.0451
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
fcritical2=qf(0.05,1,46,lower.tail = FALSE)
fcritical2
## [1] 4.051749
## [1] " FOR BOTH DATASET F_STATISTIC > F_CRITICAL. Therefore, There is a
significant difference in the mean of different groups"