You are on page 1of 7

20BCE1205-Lab4.

SHUBHAM OJHA

EDA

LAB 4 - ANOVA
HP

2023-01-30
# Lab 2 - 20BCE1205

# Question 1

df<-data.frame(x=c(82,93,61,74,69,53,
62,85,94,78,71,66,
64,73,87,91,56,78),
y=c("x","x","x","x","x","x",
"y","y","y","y","y","y",
"z","z","z","z","z","z"))
df

## x y
## 1 82 x
## 2 93 x
## 3 61 x
## 4 74 x
## 5 69 x
## 6 53 x
## 7 62 y
## 8 85 y
## 9 94 y
## 10 78 y
## 11 71 y
## 12 66 y
## 13 64 z
## 14 73 z
## 15 87 z
## 16 91 z
## 17 56 z
## 18 78 z
library(dplyr)

##
## Attaching package: 'dplyr'

## The following objects are masked from 'package:stats':


##
## filter, lag

## The following objects are masked from 'package:base':


##
## intersect, setdiff, setequal, union

s<-group_by(df,y) %>%
summarise(
count=n(),
mean=mean(x,na.rm=TRUE),
sd=sd(x,na.rm=TRUE)
)
s

## # A tibble: 3 × 4
## y count mean sd
## <chr> <int> <dbl> <dbl>
## 1 x 6 72 14.4
## 2 y 6 76 12.1
## 3 z 6 74.8 13.4

ma=mean(df$x[1:6])
mb=mean(df$x[7:12])
mc=mean(df$x[13:18])
ma

## [1] 72

mb

## [1] 76

mc

## [1] 74.83333

meantotal=mean(s$mean)
meantotal

## [1] 74.27778

sse=(sum((df$x[0:6]-ma)^2))+(sum((df$x[7:12]-mb)^2))+(sum((df$x[13:18]-
mc)^2))
sse

## [1] 2660.833
ssc=(6*(ma-meantotal)^2)+(6*(mb-meantotal)^2)+(6*(mc-meantotal)^2)
ssc

## [1] 50.77778

dfb=3-1
dfw=nrow(df)-3
dfb

## [1] 2

dfw

## [1] 15

msc=ssc/dfb
mse=sse/dfw
msc

## [1] 25.38889

mse

## [1] 177.3889

fstatistic=msc/mse
fstatistic

## [1] 0.1431256

fcritical=qf(0.05,dfb,dfw,lower.tail=FALSE)
fcritical

## [1] 3.68232

print("SINCE F_STATISTIC < F_CRITICAL. Therefore, There is no significant


difference in the mean of years")

## [1] "SINCE F_STATISTIC < F_CRITICAL. Therefore, There is no significant


difference in the mean of years"

anova<-aov(x~y,df)
anova

## Call:
## aov(formula = x ~ y, data = df)
##
## Terms:
## y Residuals
## Sum of Squares 50.7778 2660.8333
## Deg. of Freedom 2 15
##
## Residual standard error: 13.31874
## Estimated effects may be unbalanced
summary(anova)

## Df Sum Sq Mean Sq F value Pr(>F)


## y 2 50.8 25.39 0.143 0.868
## Residuals 15 2660.8 177.39

# Question 2

# Setting the hypothesis (Null hypothesis or alternate hypothesis)-:


# Null Hypothesis (H0: mean1=mean2=mean3)
# Alternate Hypothesis (Ha: At least one difference among the means)

# Plant Growth

df1<-PlantGrowth
set.seed(1234)
dplyr::sample_n(df1,10)

## weight group
## 1 6.15 trt2
## 2 3.83 trt1
## 3 5.29 trt2
## 4 5.12 trt2
## 5 4.50 ctrl
## 6 4.17 trt1
## 7 5.87 trt1
## 8 5.33 ctrl
## 9 5.26 trt2
## 10 4.61 ctrl

levels(df1$group)

## [1] "ctrl" "trt1" "trt2"

s<-group_by(df1,group) %>%
summarise(
count=n(),
mean=mean(weight,na.rm=TRUE),
sd=sd(weight,na.rm=TRUE)
)
s

## # A tibble: 3 × 4
## group count mean sd
## <fct> <int> <dbl> <dbl>
## 1 ctrl 10 5.03 0.583
## 2 trt1 10 4.66 0.794
## 3 trt2 10 5.53 0.443

boxplot(weight~group,df1)
anova1<-aov(weight~group,df1)
summary(anova1)

## Df Sum Sq Mean Sq F value Pr(>F)


## group 2 3.766 1.8832 4.846 0.0159 *
## Residuals 27 10.492 0.3886
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1

fcritical1=qf(0.05,2,27,lower.tail = FALSE)
fcritical1

## [1] 3.354131

#Poison Dataset
df2<-read.csv("C:/Users/HP/Desktop/padhai/EDA/Lab/dataset-94158.csv")
set.seed(1234)
dplyr::sample_n(df2,10)

## time poison treat


## 1 0.76 1 C
## 2 0.72 1 B
## 3 0.37 3 B
## 4 0.45 1 D
## 5 0.38 2 D
## 6 0.22 3 A
## 7 0.36 2 A
## 8 0.71 1 D
## 9 0.31 3 D
## 10 0.43 1 A

levels(df2$poison)

## NULL

s<-group_by(df2,poison) %>%
summarise(
count=n(),
mean=mean(time,na.rm=TRUE),
sd=sd(time,na.rm=TRUE)
)
s

## # A tibble: 3 × 4
## poison count mean sd
## <int> <int> <dbl> <dbl>
## 1 1 16 0.618 0.209
## 2 2 16 0.544 0.289
## 3 3 16 0.276 0.0623

boxplot(time~poison,df2)

anova2<-aov(time~poison,df2)
summary(anova2)
## Df Sum Sq Mean Sq F value Pr(>F)
## poison 1 0.9316 0.9316 20.67 3.96e-05 ***
## Residuals 46 2.0735 0.0451
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1

fcritical2=qf(0.05,1,46,lower.tail = FALSE)
fcritical2

## [1] 4.051749

print(" FOR BOTH DATASET F_STATISTIC > F_CRITICAL. Therefore, There is a


significant difference in the mean of different groups")

## [1] " FOR BOTH DATASET F_STATISTIC > F_CRITICAL. Therefore, There is a
significant difference in the mean of different groups"

You might also like